[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC PATCH v3.1 1/2] xsplice: rfc.v3.1
*TODO*: - XSM add - Contributs/authors on xsplice.markdown - Figure out the preemption method (rfc.v4 target). Talk in Seattle? - Further work - write out an Wiki detailing what implementation pieces to be done for individual contributions. Do it before Seattle? Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx> --- docs/misc/xsplice.h | 80 +++ docs/misc/xsplice.markdown | 1230 +++++++++++++++++++++++++++++++++++++++++ docs/misc/xsplice_test.c | 78 +++ tools/libxc/include/xenctrl.h | 16 + tools/libxc/xc_misc.c | 183 ++++++ tools/misc/Makefile | 4 + tools/misc/xen-xsplice.c | 360 ++++++++++++ xen/common/Makefile | 1 + xen/common/keyhandler.c | 8 +- xen/common/sysctl.c | 5 + xen/common/xsplice.c | 405 ++++++++++++++ xen/include/public/sysctl.h | 66 +++ xen/include/xen/xsplice.h | 9 + 13 files changed, 2444 insertions(+), 1 deletion(-) create mode 100644 docs/misc/xsplice.h create mode 100644 docs/misc/xsplice.markdown create mode 100644 docs/misc/xsplice_test.c create mode 100644 tools/misc/xen-xsplice.c create mode 100644 xen/common/xsplice.c create mode 100644 xen/include/xen/xsplice.h diff --git a/docs/misc/xsplice.h b/docs/misc/xsplice.h new file mode 100644 index 0000000..00061fc --- /dev/null +++ b/docs/misc/xsplice.h @@ -0,0 +1,80 @@ + +#include <stdint.h> +#include <sys/types.h> + +#define XSPLICE_HOWTO_INLINE 0x1 /* It is an inline replacement. */ +#define XSPLICE_HOWTO_RELOC_PATCH 0x2 /* Add an trampoline. */ + +#define XSPLICE_HOWTO_FLAG_PC_REL 0x1 /* Is PC relative. */ +#define XSPLICE_HOWOT_FLAG_SIGN 0x2 /* Should the new value be treated as signed value. */ + +struct xsplice_reloc_howto { + uint32_t howto;/* XSPLICE_HOWTO_* */ + uint32_t flag; /* XSPLICE_HOWTO_FLAG_* */ + uint32_t size; /* Size, in bytes, of the item to be relocated. */ + uint32_t r_shift; /* The value the final relocation is shifted right by; used to drop unwanted data from the relocation. */ + uint64_t mask; /* Bitmask for which parts of the instruction or data are replaced with the relocated value. */ + uint8_t pad[8]; /* Must be zero. */ +}; + +struct xsplice_symbol { + const char *name; /* The ELF name of the symbol. */ + const char *label; /* A unique xSplice name for the symbol. */ + uint8_t pad[16]; /* Must be zero. */ +}; +#define XSPLICE_PATCH_INLINE_TEXT 0x1 +#define XSPLICE_PATCH_INLINE_DATA 0x2 +#define XSPLICE_PATCH_RELOC_TEXT 0x3 + +struct xsplice_patch { + uint32_t type; /* XSPLICE_PATCH_* .*/ + uint32_t size; /* Size of patch. */ + uint64_t addr; /* The address of the inline new code (or data). */ + void *content; /* The bytes to be installed. */ + uint8_t pad[40]; /* Must be zero. */ +}; + +#define XSPLICE_SECTION_TEXT 0x00000001 /* Section is in .text */ +#define XSPLICE_SECTION_RODATA 0x00000002 /* Section is in .rodata */ +#define XSPLICE_SECTION_DATA 0x00000004 /* Section is in .data */ +#define XSPLICE_SECTION_STRING 0x00000008 /* Section is in .str */ + +#define XSPLICE_SECTION_TEXT_INLINE 0x00000200 /* Change is to be inline. */ +#define XSPLICE_SECTION_MATCH_EXACT 0x00000400 /* Must match exactly. */ +#define XSPLICE_SECTION_NO_STACKCHECK 0x00000800 /* Do not check the stack. */ + +struct xsplice_section { + struct xsplice_symbol *symbol; /* The symbol associated with this change. */ + uint64_t address; /* The address of the section (if known). */ + uint32_t size; /* The size of the section. */ + uint32_t flags; /* Various XSPLICE_SECTION_* flags. */ + uint8_t pad[12]; /* To be zero. */ +}; + +struct xsplice_reloc { + uint64_t addr; /* The address of the relocation (if known). */ + struct xsplice_symbol *symbol; /* Symbol for this relocation. */ + int64_t isns_target; /* rest of the ELF addend. This is equal to the offset against the symbol that the relocation refers to. */ + struct xsplice_reloc_howto *howto; /* Pointer to the above structure. */ + int64_t isns_added; /* ELF addend resulting from quirks of instruction one of whose operands is the relocation. For example, this is -4 on x86 pc-relative jumps. */ + uint8_t pad[24]; /* Must be zero. */ +}; + +struct xsplice_code { + struct xsplice_reloc *relocs; /* How to patch it. */ + uint32_t n_relocs; + struct xsplice_section *sections; /* Safety data. */ + uint32_t n_sections; + struct xsplice_patch *patches; /* Patch code and data */ + uint32_t n_patches; + uint8_t pad[28]; /* Must be zero. */ +}; +struct xsplice { + uint32_t version; + const char *name; /* A sensible name for the patch. Up to 40 characters. */ + const char *id; /* ID of the hypervisor this binary was built against. */ + uint32_t id_size; + struct xsplice_code *new; /* Pointer to the new code to be patched. */ + struct xsplice_code *old; /* Pointer to the old code to be checked against. */ + uint8_t pad[24]; /* Must be zero. */ +}; diff --git a/docs/misc/xsplice.markdown b/docs/misc/xsplice.markdown new file mode 100644 index 0000000..02fd4d1 --- /dev/null +++ b/docs/misc/xsplice.markdown @@ -0,0 +1,1230 @@ +# xSplice Design v1 (EXTERNAL RFC v3) + +## Rationale + +A mechanism is required to binarily patch the running hypervisor with new +opcodes that have come about due to primarily security updates. + +This document describes the design of the API that would allow us to +upload to the hypervisor binary patches. + +The document is split in four sections: + - Detailed descriptions of the problem statement. + - Design of the data structures. + - Design of the hypercalls. + - Implementation notes that should be taken into consideration. + + +## Glossary + + * splice - patch in the binary code with new opcodes + * trampoline - a jump to a new instruction. + * payload - telemetries of the old code along with binary blob of the new + function (if needed). + * reloc - telemetries contained in the payload to construct proper trampoline. + +## Multiple ways to patch + +The mechanism needs to be flexible to patch the hypervisor in multiple ways +and be as simple as possible. The compiled code is contiguous in memory with +no gaps - so we have no luxury of 'moving' existing code and must either +insert a trampoline to the new code to be executed - or only modify in-place +the code if there is sufficient space. The placement of new code has to be done +by hypervisor and the virtual address for the new code is allocated dynamically. + +This implies that the hypervisor must compute the new offsets when splicing +in the new trampoline code. Where the trampoline is added (inside +the function we are patching or just the callers?) is also important. + +To lessen the amount of code in hypervisor, the consumer of the API +is responsible for identifying which mechanism to employ and how many locations +to patch. Combinations of modifying in-place code, adding trampoline, etc +has to be supported. The API should allow read/write any memory within +the hypervisor virtual address space. + +We must also have a mechanism to query what has been applied and a mechanism +to revert it if needed. + +We must also have a mechanism to: (optional) provide an copy of the old code - so +that the hypervisor can verify it against the code in memory; the new code; +the symbol name of the function to be patched; or offset from the symbol; +or virtual address. + +The complications that this design will encounter are explained later +in this document. + +## Workflow + + +The expected workflows of higher-level tools that manage multiple patches +on production machines would be: + + * The first obvious task is loading all available / suggested + hotpatches around system start. + * Whenever new hotpatches are installed, they should be loaded too. + * One wants to query which modules have been loaded at runtime. + * If unloading is deemed safe (see unloading below), one may want to + support a workflow where a specific hotpatch is marked as bad and + unloaded. + * If we do no restrict module activation order and want to report tboot + state on sequences, we might have a complexity explosion problem, in + what system hashes should be considered acceptable. + +## Patching code + +The first mechanism to patch that comes in mind is in-place replacement. +That is replace the affected code with new code. Unfortunately the x86 +ISA is variable size which places limits on how much space we have available +to replace the instructions. That is not a problem if the change is smaller +than the original opcode and we can fill it with nops. Problems will +appear if the replacement code is longer. + +The second mechanism is by replacing the call or jump to the +old function with the address of the new function. + +A third mechanism is to add a jump to the new function at the +start of the old function. + +### Example of trampoline and in-place splicing + +As example we will assume the hypervisor does not have XSA-132 (see +*domctl/sysctl: don't leak hypervisor stack to toolstacks* +4ff3449f0e9d175ceb9551d3f2aecb59273f639d) and we would like to binary patch +the hypervisor with it. The original code looks as so: + +<pre> + 48 89 e0 mov %rsp,%rax + 48 25 00 80 ff ff and $0xffffffffffff8000,%rax +</pre> + +while the new patched hypervisor would be: + +<pre> + 48 c7 45 b8 00 00 00 00 movq $0x0,-0x48(%rbp) + 48 c7 45 c0 00 00 00 00 movq $0x0,-0x40(%rbp) + 48 c7 45 c8 00 00 00 00 movq $0x0,-0x38(%rbp) + 48 89 e0 mov %rsp,%rax + 48 25 00 80 ff ff and $0xffffffffffff8000,%rax +</pre> + +This is inside the arch_do_domctl. This new change adds 21 extra +bytes of code which alters all the offsets inside the function. To alter +these offsets and add the extra 21 bytes of code we might not have enough +space in .text to squeeze this in. + +As such we could simplify this problem by only patching the site +which calls arch_do_domctl: + +<pre> +<do_domctl>: + e8 4b b1 05 00 callq ffff82d08015fbb9 <arch_do_domctl> +</pre> + +with a new address for where the new `arch_do_domctl` would be (this +area would be allocated dynamically). + +Astute readers will wonder what we need to do if we were to patch `do_domctl` +- which is not called directly by hypervisor but on behalf of the guests via +the `compat_hypercall_table` and `hypercall_table`. +Patching the offset in `hypercall_table` for `do_domctl: +(ffff82d080103079 <do_domctl>:) +<pre> + + ffff82d08024d490: 79 30 + ffff82d08024d492: 10 80 d0 82 ff ff + +</pre> +with the new address where the new `do_domctl` is possible. The other +place where it is used is in `hvm_hypercall64_table` which would need +to be patched in a similar way. This would require an in-place splicing +of the new virtual address of `arch_do_domctl`. + +In summary this example patched the callee of the affected function by + * allocating memory for the new code to live in, + * changing the virtual address in all the functions which called the old + code (computing the new offset, patching the callq with a new callq). + * changing the function pointer tables with the new virtual address of + the function (splicing in the new virtual address). Since this table + resides in the .rodata section we would need to temporarily change the + page table permissions during this part. + + +However it has severe drawbacks - the safety checks which have to make sure +the function is not on the stack - must also check every caller. For some +patches this could mean - if there were an sufficient large amount of +callers - that we would never be able to apply the update. + +### Example of different trampoline patching. + +An alternative mechanism exists where we can insert a trampoline in the +existing function to be patched to jump directly to the new code. This +lessens the locations to be patched to one but it puts pressure on the +CPU branching logic (I-cache, but it is just one unconditional jump). + +For this example we will assume that the hypervisor has not been compiled +with fe2e079f642effb3d24a6e1a7096ef26e691d93e (XSA-125: *pre-fill structures +for certain HYPERVISOR_xen_version sub-ops*) which mem-sets an structure +in `xen_version` hypercall. This function is not called **anywhere** in +the hypervisor (it is called by the guest) but referenced in the +`compat_hypercall_table` and `hypercall_table` (and indirectly called +from that). Patching the offset in `hypercall_table` for the old +`do_xen_version` (ffff82d080112f9e <do_xen_version>) + +</pre> + ffff82d08024b270 <hypercall_table> + ... + ffff82d08024b2f8: 9e 2f 11 80 d0 82 ff ff + +</pre> +with the new address where the new `do_xen_version` is possible. The other +place where it is used is in `hvm_hypercall64_table` which would need +to be patched in a similar way. This would require an in-place splicing +of the new virtual address of `do_xen_version`. + +An alternative solution would be to patch insert a trampoline in the +old `do_xen_version' function to directly jump to the new `do_xen_version`. + +<pre> + ffff82d080112f9e <do_xen_version>: + ffff82d080112f9e: 48 c7 c0 da ff ff ff mov $0xffffffffffffffda,%rax + ffff82d080112fa5: 83 ff 09 cmp $0x9,%edi + ffff82d080112fa8: 0f 87 24 05 00 00 ja ffff82d0801134d2 <do_xen_version+0x534> +</pre> + +with: + +<pre> + ffff82d080112f9e <do_xen_version>: + ffff82d080112f9e: e9 XX YY ZZ QQ jmpq [new do_xen_version] +</pre> + +which would lessen the amount of patching to just one location. + +In summary this example patched the affected function to jump to the +new replacement function which required: + * allocating memory for the new code to live in, + * inserting trampoline with new offset in the old function to point to the + new function. + * Optionally we can insert in the old function a trampoline jump to an function + providing an BUG_ON to catch errant code. + +The disadvantage of this are that the unconditional jump will consume a small +I-cache penalty. However the simplicity of the patching and higher chance +of passing safety checks make this a worthwhile option. + +### Security + +With this method we can re-write the hypervisor - and as such we **MUST** be +diligent in only allowing certain guests to perform this operation. + +Furthermore with SecureBoot or tboot, we **MUST** also verify the signature +of the payload to be certain it came from a trusted source and integrity +was intact. + +As such the hypercall **MUST** support an XSM policy to limit what the guest +is allowed to invoke. If the system is booted with signature checking the +signature checking will be enforced. + +## Design of payload format + +The payload **MUST** contain enough data to allow us to apply the update +and also safely reverse it. As such we **MUST** know: + + * (optional) What the old code is expected to be. We **MUST** be able verify it + against the runtime code if old code is included in the payload. + * Verify the build-id of hypervisor against the payload build-id. + * The locations in memory to be patched. This can be determined dynamically + via symbols or via virtual addresses. + * The new code (or data) that will be patched in. + * Signature to verify the payload. + +This binary format can be constructed using an custom binary format but +there are severe disadvantages of it: + + * The format might need to be changed and we need an mechanism to accommodate + that. + * It has to be platform agnostic. + * Easily constructed using existing tools. + +As such having the payload in an ELF file is the sensible way. We would be +carrying the various sets of structures (and data) in the ELF sections under +different names and with definitions. The prefix for the ELF section name +would always be: *.xsplice* to match up to the names of the structures. + +Note that every structure has padding. This is added so that the hypervisor +can re-use those fields as it sees fit. + +Earlier design attempted to ineptly explain the relations of the ELF sections +to each other without using proper ELF mechanism (sh_info, sh_link, data +structures using Elf_* types, etc). This design will explain in detail +the structures and how they are used together and not dig in the ELF +format - except mention that the section names should match the +structure names. + +### ASCII art of structures. + +The diagram below is omitting some entries to easy the relationship explanation. + +<pre> + /---------------------\ + +->| xsplice_reloc_howto | + / \---------------------/ + /---------------\ 1:1/ + +->| xsplice_reloc | / + / | - howto +--/ 1:1 /----------------\ + / | - symbol +-------->| xsplice_symbol | + 1:N / \---------------/ / \----------------/ +/----------\ /--------------\ / / +| xsplice | 1:1 | xsplice_code | / 1:1/ +| - new +------->| - relocs +---/ 1:N /-----------------\ / +| - old +------->| - sections +----------->| xsplice_section | / +\----------/ | - patches +--\ | - symbol +/ 1:1 /----------------\ + \--------------/ \ | - addr +------->| .text or .data | + \ \----------------/ \----------------/ + \ + 1:N \ + \ /----------------\ + +-->| xsplice_patch | 1:1 /----------------\ + | - content +------>| binary code or | + \----------------/ | data | + \----------------/ + +</pre> + +### xsplice structures + +From the top (or left in the above diagram) the structures are: + + * `xsplice`. The top most structure - contains the the name of the update, + the id to match against the hypervisor, the pointer to the metadata for + the new code and optionally the metadata for the old code. + + * `xsplice_code`. The structure that ties all of this together and defines + the payload. Contains arrays of `xsplice_reloc`, `xsplice_section`, and + `xsplice_patch`. + + * `xsplice_reloc` contains telemetry used for patching - which describes the + targets to be patched and how to do it. + + * `xsplice_section` - the safety data for the code. Contains pointer to the + symbol (`xsplice_symbols`) and pointer to the code (`.text`) or data (`.data`), + which are to be used during safety and dependency checking. + + * `xsplice_patch`: the description of the new function to be patched in + along with the binary code or data. + + * ` xsplice_reloc_howto`: the howto properly construct trampolines for an patch. + We may have multiple locations for which we need to insert a trampoline for a + payload and each location might require a different way of handling it. + + * `xsplice_symbols `. The symbol that will be patched. + +In short the *.xsplice* sections (with `xsplice` being the top) represent +various structures to define the new code and safety checks for the old +code (optional). The ELF provides the mechanism to glue it all together when +loaded in memory. + + +Note that a lot of these ideas are borrowed from kSplice which is +available at: https://github.com/jirislaby/ksplice + +### struct xsplice + +The top most structure is quite simple. It defines the name, the id +of the hypervisor, pointer to the new code & data and an pointer to +the old code & data (optional). + +The `new` code uses all of the `xsplice_*` structures while the +`old` does not use the `xsplice_reloc` structures. + +The sections defining the structures will explicitly state +when they are not used. + +<pre> +struct xsplice { + uint32_t version; /* Version of payload. */ + const char *name; /* A sensible name for the patch. Up to 40 characters. */ + const char *id; /* ID of the hypervisor this binary was built against. */ + uint32_t id_size; /* Size of the ID. */ + struct xsplice_code *new; /* Pointer to the new code & data to be patched. */ + struct xsplice_code *old; /* Pointer to the old code & data to be checked against. */ + uint8_t pad[24]; /* Must be zero. */ +}; +</pre> + +The size of this structure should be 64 bytes. + +### xsplice_code + +The structure embedded within this section ties the other +structures together. It has the pointers with an start and end +address for each set of structures. This means that an update +can be split in multiple changes - for example to accomodate +an update that contains both code and data and will need patching +in both .text and .data sections. + +<pre> +struct xsplice_code { + struct xsplice_reloc *relocs; /* How to patch it. */ + uint32_t n_relocs; + struct xsplice_section *sections; /* Safety data. */ + uint32_t n_sections; + struct xsplice_patch *patches; /* Patch code and data */ + uint32_t n_patches; + uint8_t pad[28]; /* Must be zero. */ +}; +</pre> + +The size of this structure is 64 bytes. + +There can be at most two of those structures in the payload. +One for the `new` and another for the `old` (optional). + +If it is for the old code the relocs, and relocs_end values will be ignored. + + +### xsplice_reloc + +The `xsplice_code` defines an array of these structures. As such +an singular structure defines an singular point where to patch the +hypervisor. + +The structure contains the address of the hypervisor (if known), +the symbol associated with this address, how the patching is to +be done, and platform specific details. + +The `isns_added` is an value to be used to compute the new offset +due to the quirks of the operands of the instruction. For example +to patch in an jump operation to the new code - the offset is relative +to the program counter of the next instruction - hence the offset +value has to be subtracted by four bytes - hence this would contain -4 . + +The `isns_target` is the offset against the symbol. + +The relation of this structure with `xsplice_patch` is 1:1, even +for inline patches. See the section detailing the structure +`xsplice_reloc_howto`. + +The relation of this structure with `xsplice_section` is 1:1. + +This structure is as follow: + +<pre> +struct xsplice_reloc { + uint64_t addr; /* The address of the relocation (if known). */ + struct xsplice_symbol *symbol; /* Symbol for this relocation. */ + int64_t isns_target; /* rest of the ELF addend. This is equal to the offset against the symbol that the relocation refers to. */ + struct xsplice_reloc_howto *howto; /* Pointer to the above structure. */ + int64_t isns_added; /* ELF addend resulting from quirks of instruction one of whose operands is the relocation. For example, this is -4 on x86 pc-relative jumps. */ + uint8_t pad[24]; /* Must be zero. */ +}; + +</pre> + +The size of this structure is 64 bytes. + +### xsplice_section + +The structure defined in this section is used during pre-patching and +during patching. Pre-patching it is used to verify that it is safe +to update with the new changes - and contains safety data on the old code +and what kind of matching we are to expect. + +That is whether the address (either provided or resolved when payload is +loaded by referencing the symbols) is: + + * in memory, + * correct size, + * in it's proper ELF section, + * has been already patched (or not), + * is expected not to be on any CPU stack - (or if it is OK for it be on the CPU stack). + +with what we expect it to be. + +Some of the checks can be relaxed, as such the `flag` values +can be or-ed together. + +Depending on the time when patching is done, stack checking might not +be required. +<pre> + +#define XSPLICE_SECTION_TEXT 0x00000001 /* Section is in .text */ +#define XSPLICE_SECTION_RODATA 0x00000002 /* Section is in .rodata */ +#define XSPLICE_SECTION_DATA 0x00000004 /* Section is in .data */ +#define XSPLICE_SECTION_STRING 0x00000008 /* Section is in .str */ + +#define XSPLICE_SECTION_TEXT_INLINE 0x00000200 /* Change is to be inline. */ +#define XSPLICE_SECTION_MATCH_EXACT 0x00000400 /* Must match exactly. */ +#define XSPLICE_SECTION_NO_STACKCHECK 0x00000800 /* Do not check the stack. */ + + +struct xsplice_section { + struct xsplice_symbol *symbol; /* The symbol associated with this change. */ + uint64_t address; /* The address of the section (if known). */ + uint32_t size; /* The size of the section. */ + uint32_t flags; /* Various XSPLICE_SECTION_* flags. */ + uint8_t pad[12]; /* To be zero. */ +}; + +</pre> + +The size of this structure is 32 bytes. + +### xsplice_patch + +This structure has the binary code (or data) to be patched. Depending on the +type it can either an inline patch (data or text) or require an relocation +change (which requires a trampoline). Naturally it also points to a blob +of the binary data to patch in, and the size of the patch. + +The `addr` is used when the patch is for inline change. It can be +an virtual address or an offset from the symbol start. + +If it is an relocation (requiring a trampoline), the `addr` should be zero. + +There must be an corresponding ` struct xsplice_reloc` and +`struct xsplice_section` describing this patch. + +<pre> +#define XSPLICE_PATCH_INLINE_TEXT 0x1 +#define XSPLICE_PATCH_INLINE_DATA 0x2 +#define XSPLICE_PATCH_RELOC_TEXT 0x3 + +struct xsplice_patch { + uint32_t type; /* XSPLICE_PATCH_* .*/ + uint32_t size; /* Size of patch. */ + uint64_t addr; /* The address (or offset from symbol) of the inline new code (or data). */ + void *content; /* The bytes to be installed. */ + uint8_t pad[40]; /* Must be zero. */ +}; + +</pre> + +The size of this structure is 64 bytes. + +### xsplice_symbols + +The structure contains an pointer to the name of the ELF symbol +to be patched and as well an unique name for the symbol. + +The `label` is used for diagnostic purposes - such as including the +name and the offset. + +The structure is as follow: + +<pre> +struct xsplice_symbol { + const char *name; /* The ELF name of the symbol. */ + const char *label; /* A unique xSplice name for the symbol. */ + uint8_t pad[16]; /* Must be zero. */ +}; +</pre> + +The size of this structure is 32 bytes. + + +### xsplice_reloc_howto + +The howto defines in the detail the change. It contains the type, +whether the relocation is relative, the size of the relocation, +bitmask for which parts of the instruction or data are to be replaced, +amount the final relocation is shifted by (to drop unwanted data), and +whether the replacement should be interpreted as signed value. + +The structure is as follow: + +<pre> +#define XSPLICE_HOWTO_INLINE 0x1 /* It is an inline replacement. */ +#define XSPLICE_HOWTO_RELOC_PATCH 0x2 /* Add a trampoline. */ + +#define XSPLICE_HOWTO_FLAG_PC_REL 0x1 /* Is PC relative. */ +#define XSPLICE_HOWOT_FLAG_SIGN 0x2 /* Should the new value be treated as signed value. */ + +struct xsplice_reloc_howto { + uint32_t howto; /* XSPLICE_HOWTO_* */ + uint32_t flag; /* XSPLICE_HOWTO_FLAG_* */ + uint32_t size; /* Size, in bytes, of the item to be relocated. */ + uint32_t r_shift; /* The value the final relocation is shifted right by; used to drop unwanted data from the relocation. */ + uint64_t mask; /* Bitmask for which parts of the instruction or data are replaced with the relocated value. */ + uint8_t pad[8]; /* Must be zero. */ +}; + +</pre> + +The size of this structure is 32 bytes. + +### Example + +There is a wealth of information that the payload must have to define a simple +patch. For this example we will assume that the hypervisor has not been compiled +with fe2e079f642effb3d24a6e1a7096ef26e691d93e (XSA-125: *pre-fill structures +for certain HYPERVISOR_xen_version sub-ops*) which mem-sets an structure +in `xen_version` hypercall. This function is not called **anywhere** in +the hypervisor (it is called by the guest) but referenced in the +`compat_hypercall_table` and `hypercall_table` (and indirectly called +from that). There are two ways to patch this: +inline patch `hvm_hypercall64_table` and `hvm_hypercall` with a new +address for the new `do_xen_version` , or insert +trampoline in `do_xen_version` code. The example will focus on the later. + +The `do_xen_version` code is located at virtual address ffff82d080112f9e. + +<pre> +struct xsplice_code xsplice_xsa125; +struct xsplice_reloc relocs[1]; +struct xsplice_section sections[1]; +struct xsplice_patch patches[1]; +struct xsplice_symbol do_xen_version_symbol; +struct xsplice_reloc_howto do_xen_version_howto; +char do_xen_version_new_code[1728]; + +#ifndef HYPERVISOR_ID +#define HYPERVISOR_ID "92dd05a61556c554155b1508c9cf67d993336d28" +#endif + +struct xsplice xsa125 = { + .name = "xsa125", + .id = HYPERVISOR_ID, + .old = NULL, + .new = &xsplice_xsa125, +}; + +struct xsplice_code xsplice_xsa125 = { + .relocs = &relocs[0], + .n_relocs = 1, + .sections = §ions[0], + .n_sections = 1, + .patches = &patches[0], + .n_patches = 1, +}; + +struct xsplice_reloc relocs[1] = { + { + .addr = 0xffff82d080112f9e, + .symbol = &do_xen_version_symbol, + .isns_target = 0, + .howto = &do_xen_version_howto, + .isns_added = -4, + }, +}; + +struct xsplice_symbol do_xen_version_symbol = { + .name = "do_xen_version", + .label = "do_xen_version+<0x0>", +}; + +struct xsplice_reloc_howto do_xen_version_howto = { + .type = XSPLICE_HOWTO_RELOC_PATCH, + .flag = XSPLICE_HOWTO_FLAG_PC_REL, + .r_shift = 0, + .mask = (-1ULL), +}; + + +struct xsplice_section sections[1] = { + { + .symbol = &do_xen_version_symbol, + .address = 0xffff82d080112f9e, + .size = 1728, + .flags = XSPLICE_SECTION_TEXT, + }, +}; + +struct xsplice_patch patches[1] = { + { + .type = XSPLICE_PATCH_RELOC_TEXT, + .size = 1728, + .addr = 0, + .content = &do_xen_version_new_code, + }, +}; + +char do_xen_version_new_code[1728] = { 0x83, 0xff, 0x09, /* And more code. */}; +</pre> + + +## Signature checking requirements. + +The signature checking requires that the layout of the data in memory +**MUST** be same for signature to be verified. This means that the payload +data layout in ELF format **MUST** match what the hypervisor would be +expecting such that it can properly do signature verification. + +The signature is based on the all of the payloads continuously laid out +in memory. The signature is to be appended at the end of the ELF payload +prefixed with the string '~Module signature appended~\n', followed by +an signature header then followed by the signature, key identifier, and signers +name. + +Specifically the signature header would be: + +<pre> +#define PKEY_ALGO_DSA 0 +#define PKEY_ALGO_RSA 1 + +#define PKEY_ID_PGP 0 /* OpenPGP generated key ID */ +#define PKEY_ID_X509 1 /* X.509 arbitrary subjectKeyIdentifier */ + +#define HASH_ALGO_MD4 0 +#define HASH_ALGO_MD5 1 +#define HASH_ALGO_SHA1 2 +#define HASH_ALGO_RIPE_MD_160 3 +#define HASH_ALGO_SHA256 4 +#define HASH_ALGO_SHA384 5 +#define HASH_ALGO_SHA512 6 +#define HASH_ALGO_SHA224 7 +#define HASH_ALGO_RIPE_MD_128 8 +#define HASH_ALGO_RIPE_MD_256 9 +#define HASH_ALGO_RIPE_MD_320 10 +#define HASH_ALGO_WP_256 11 +#define HASH_ALGO_WP_384 12 +#define HASH_ALGO_WP_512 13 +#define HASH_ALGO_TGR_128 14 +#define HASH_ALGO_TGR_160 15 +#define HASH_ALGO_TGR_192 16 + + +struct elf_payload_signature { + u8 algo; /* Public-key crypto algorithm PKEY_ALGO_*. */ + u8 hash; /* Digest algorithm: HASH_ALGO_*. */ + u8 id_type; /* Key identifier type PKEY_ID*. */ + u8 signer_len; /* Length of signer's name */ + u8 key_id_len; /* Length of key identifier */ + u8 __pad[3]; + __be32 sig_len; /* Length of signature data */ +}; + +</pre> +(Note that this has been borrowed from Linux module signature code.). + + +## Hypercalls + +We will employ the sub operations of the system management hypercall (sysctl). +There are to be four sub-operations: + + * upload the payloads. + * listing of payloads summary uploaded and their state. + * getting an particular payload summary and its state. + * command to apply, delete, or revert the payload. + * querying of the hypervisor ID (TODO). + +Most of the actions are asynchronous therefore the caller is responsible +to verify that it has been applied properly by retrieving the summary of it +and verifying that there are no error codes associated with the payload. + +We **MUST** make some of them asynchronous due to the nature of patching +it requires every physical CPU to be lock-step with each other. +The patching mechanism while an implementation detail, is not an short +operation and as such the design **MUST** assume it will be an long-running +operation. + +The sub-operations will spell out how preemption is to be handled (if at all). + +Furthermore it is possible to have multiple different payloads for the same +function. As such an unique id per payload has to be visible to allow proper manipulation. + +The hypercall is part of the `xen_sysctl`. The top level structure contains +one uint32_t to determine the sub-operations: + +<pre> +struct xen_sysctl_xsplice_op { + uint32_t cmd; + union { + ... see below ... + } u; +}; + +</pre> +while the rest of hypercall specific structures are part of the this structure. + +### XEN_SYSCTL_XSPLICE_UPLOAD (0) + +Upload a payload to the hypervisor. The payload is verified and if there +are any issues the proper return code will be returned. The payload is +not applied at this time - that is controlled by *XEN_SYSCTL_XSPLICE_ACTION*. + +The caller provides: + + * `id` unique id. + * `payload` the virtual address of where the ELF payload is. + +The `id` could be an UUID in mind that stays fixed forever for a given +hotpatch. It can be embedded into the Elf payload at creation time +and extracted by tools. + +The return value is zero if the payload was succesfully uploaded and the +signature was verified. Otherwise an EXX return value is provided. +Duplicate `id` are not supported. + +The `payload` is the ELF payload as mentioned in the `Payload format` section. + +This operation can be preempted by the hypercall returning EAGAIN. +This is due to the nature of signature verification - which may require +SecureBoot firmware calls which are unbounded. + +The structure is as follow: + +<pre> +struct xen_sysctl_xsplice_upload { + char id[40]; /* IN, name of the patch. */ + uint64_t size; /* IN, size of the ELF file. */ + XEN_GUEST_HANDLE_64(uint8) payload; /* ELF file. */ +}; +</pre> + +### XEN_SYSCTL_XSPLICE_GET (1) + +Retrieve an summary of an specific payload. This caller provides: + + * `id` the unique id. + * `status` *MUST* be set to zero. + +The `summary` structure contains an summary of payload which includes: + + * `id` the unique id. + * `status` - whether it has been: + 1. *XSPLICE_STATUS_LOADED* (0x1) has been loaded. + 2. *XSPLICE_STATUS_PROGRESS* (0x2) acting on the **XEN_SYSCTL_XSPLICE_ACTION** command. + 3. *XSPLICE_STATUS_CHECKED* (0x4) the ELF payload safety checks passed. + 4. *XSPLICE_STATUS_APPLIED* (0x8) loaded, checked, and applied. + 5. *XSPLICE_STATUS_REVERTED* (0x10) loaded, checked, applied and then also reverted. + 6. Negative values is an error. The error would be of EXX format. + +The return value is zero on success and EXX on failure. This operation +is synchronous and does not require preemption. + +The structure is as follow: + +<pre> +#define XSPLICE_STATUS_LOADED 0x1 +#define XSPLICE_STATUS_PROGRESS 0x2 +#define XSPLICE_STATUS_CHECKED 0x4 +#define XSPLICE_STATUS_APPLIED 0x8 +#define XSPLICE_STATUS_REVERTED 0x10 + +struct xen_sysctl_xsplice_summary { + char id[40]; /* IN/OUT, name of the patch. */ + int32_t status; /* OUT */ +}; +</pre> + +### XEN_SYSCTL_XSPLICE_LIST (2) + +Retrieve an array of abbreviated summary of payloads that are loaded in the +hypervisor. + +The caller provides: + + * `version`. Initially it *MUST* be zero. + * `idx` index iterator. On first call *MUST* be zero, subsequent calls varies. + * `count` the max number of entries to populate. + * `summary` virtual address of where to write payload summaries. + +The hypercall returns zero on success and updates the `idx` (index) iterator +with the number of payloads returned, `count` with the number of remaining +payloads, and `summary` with an number of payload summaries. The `version` +is updated on every hypercall - if it varies from one hypercall to another +the data is stale and further calls could fail. + + +If the hypercall returns E2BIG the `count` is too big and should be +lowered. + +Note that due to the asynchronous nature of hypercalls the domain might have +added or removed the number of payloads making this information stale. It is +the responsibility of the toolstack to use the `version` field to check +between each invocation. if the version differs it should discard the stale +data and start from scratch. + +This operation is synchronous and does not require preemption. + +The `summary` structure contains an summary of payload which includes: + + * `version` version of the data. + * `id` unique id per payload. + * `status` - whether it has been: + 1. *XSPLICE_STATUS_LOADED* (0x1) has been loaded. + 2. *XSPLICE_STATUS_PROGRESS* (0x2) acting on the **XEN_SYSCTL_XSPLICE_ACTION** command. + 3. *XSPLICE_STATUS_CHECKED* (0x4) the ELF payload safety checks passed. + 4. *XSPLICE_STATUS_APPLIED* (0x8) loaded, checked, and applied. + 5. *XSPLICE_STATUS_REVERTED* (0x10) loaded, checked, applied and then also reverted. + 6. Any negative values means there has been error. The value is in EXX format. + +The structure is as follow: + +<pre> +struct xen_sysctl_xsplice_list { + uint32_t version; /* OUT */ + uint32_t idx; /* IN/OUT */ + uint32_t count; /* IN/OUT */ + XEN_GUEST_HANDLE_64(xen_sysctl_xsplice_summary) summary; /* OUT */ +}; + +struct xen_sysctl_xsplice_summary { + char id[40]; /* OUT, name of the patch. */ + int32_t status; /* OUT */ +}; + +</pre> +### XEN_SYSCTL_XSPLICE_ACTION (3) + +Perform an operation on the payload structure referenced by the `id` field. +The operation request is asynchronous and the status should be retrieved +by using either **XEN_SYSCTL_XSPLICE_GET** or **XEN_SYSCTL_XSPLICE_LIST** hypercall. + +There are two ways about doing preemption. Either via returning back EBUSY +or the mechanism outlined here. + +Doing it in userland would remove any tracking of states in +the hypervisor - except the simple commands apply, unload, and revert. + +However we would not be able to patch all the code that is invoked while +this hypercall is in progress. That is - the do_domctl, the spinlocks, +anything put on the stack, etc. + +The disadvantage of the mechanism outlined here is that the hypervisor +code has to keep the state atomic and have an upper bound of time +on actions. If within the time the operation does not succeed the +operation would go in error state. + + * `id` the unique id. + * `time` the upper bound of time the cmd should take. Zero means infinite. + * `cmd` the command requested: + 1. *XSPLICE_ACTION_CHECK* (1) check that the payload will apply properly. + 2. *XSPLICE_ACTION_UNLOAD* (2) unload the payload. + Any further hypercalls against the `id` will result in failure unless + **XEN_SYSCTL_XSPLICE_UPLOAD** hypercall is perfomed with same `id`. + 3. *XSPLICE_ACTION_REVERT* (3) revert the payload. If the operation takes + more time than the upper bound of time the `status` will EBUSY. + 4. *XSPLICE_ACTION_APPLY* (4) apply the payload. If the operation takes + more time than the upper bound of time the `status` will be EBUSY. + 5. *XSPLICE_ACTION_LOADED* is an initial state and cannot be requested. + +The return value will be zero unless the provided fields are incorrect. + +The structure is as follow: + +<pre> +#define XSPLICE_ACTION_CHECK 1 +#define XSPLICE_ACTION_UNLOAD 2 +#define XSPLICE_ACTION_REVERT 3 +#define XSPLICE_ACTION_APPLY 4 + +struct xen_sysctl_xsplice_action { + char id[40]; /* IN, name of the patch. */ + uint64_t time; /* IN, upper bound of time (ms) for the operation to take. */ + uint32_t cmd; /* IN */ +}; + +</pre> + +## State diagrams of XSPLICE_ACTION values. + +There is a strict ordering state of what the commands can be. +The XSPLICE_ACTION prefix has been dropped to easy reading: + +<pre> + /->\ + \ / + /-------< CHECK + | | + | + + | UNLOAD<--\ + | \ + | \ + /-> APPLY -----------> REVERT --\ + | | + \-------------------------------/ + +</pre> +Or an state transition table of valid states: +<pre> ++-------+-------+--------+--------+---------+-------+------------------+ +| CHECK | APPLY | REVERT | UNLOAD | Current | Next | Result | ++-------+-------+--------+--------+---------+-------+------------------+ +| x | | | | LOADED | CHECK | Check payload. | ++-------+-------+--------+--------+---------+-------+------------------+ +| | | | x | LOADED | UNLOAD| unload payload. | ++-------+-------+--------+--------+---------+-------+------------------+ +| x | | | | CHECK | CHECK | Check payload. | ++-------+-------+--------+--------+---------+-------+------------------+ +| | x | | | CHECK | APPLY | Apply payload. | ++-------+-------+--------+--------+---------+-------+------------------+ +| | | | x | CHECK | UNLOAD| Unload payload. | ++-------+-------+--------+--------+---------+-------+------------------+ +| | | x | | APPLY | REVERT| Revert payload. | ++-------+-------+--------+--------+---------+-------+------------------+ +| | x | | | REVERT | APPLY | Apply payload. | ++-------+-------+--------+--------+---------+-------+------------------+ +| | | | x | REVERT | UNLOAD| Unload payload. | ++-------+-------+--------+--------+---------+-------+------------------+ +</pre> +All the other state transitions are invalid. + +## Sequence of events. + +The normal sequence of events is to: + + 1. *XEN_SYSCTL_XSPLICE_UPLOAD* to upload the payload. If there are errors *STOP* here. + 2. *XEN_SYSCTL_XSPLICE_GET* to check the `->status`. If in *XSPLICE_STATUS_PROGRESS* spin. If in *XSPLICE_STATUS_LOADED* go to next step. + 3. *XEN_SYSCTL_XSPLICE_ACTION* with *XSPLICE_ACTION_CHECK* command to verify that the payload can be succesfully applied. + 4. *XEN_SYSCTL_XSPLICE_GET* to check the `->status`. If in *XSPLICE_STATUS_PROGRESS* spin. If in *XSPLICE_STATUS_CHECKED* go to next step. + 5. *XEN_SYSCTL_XSPLICE_ACTION* with *XSPLICE_ACTION_APPLY* to apply the patch. + 6. *XEN_SYSCTL_XSPLICE_GET* to check the `->status`. If in *XSPLICE_STATUS_PROGRESS* spin. If in *XSPLICE_STATUS_APPLIED* exit with success. + + +## Addendum + +Implementation quirks should not be discussed in a design document. + +However these observations can provide aid when developing against this +document. + + +### Alternative assembler + +Alternative assembler is a mechanism to use different instructions depending +on what the CPU supports. This is done by providing multiple streams of code +that can be patched in - or if the CPU does not support it - padded with +`nop` operations. The alternative assembler macros cause the compiler to +expand the code to place a most generic code in place - emit a special +ELF .section header to tag this location. During run-time the hypervisor +can leave the areas alone or patch them with an better suited opcodes. + +However these sections are part of .init. and as such can't reasonably be +subject to patching. + +### .rodata sections + +The patching might require strings to be updated as well. As such we must be +also able to patch the strings as needed. This sounds simple - but the compiler +has a habit of coalescing strings that are the same - which means if we in-place +alter the strings - other users will be inadvertently affected as well. + +This is also where pointers to functions live - and we may need to patch this +as well. And switch-style jump tables. + +To guard against that we must be prepared to do patching similar to +trampoline patching or in-line depending on the flavour. If we can +do in-line patching we would need to: + + * alter `.rodata` to be writeable. + * inline patch. + * alter `.rodata` to be read-only. + +If are doing trampoline patching we would need to: + + * allocate a new memory location for the string. + * all locations which use this string will have to be updated to use the + offset to the string. + * mark the region RO when we are done. + +### .bss and .data sections. + +Patching writable data is not suitable as it is unclear what should be done +depending on the current state of data. As such it should not be attempted. + + +### Patching code which is in the stack. + +We should not patch the code which is on the stack. That can lead +to corruption. + +### Inline patching + +The hypervisor should verify that the in-place patching would fit within +the code or data. + +### Trampoline (e9 opcode) + +The e9 opcode used for jmpq uses a 32-bit signed displacement. That means +we are limited to up to 2GB of virtual address to place the new code +from the old code. That should not be a problem since Xen hypervisor has +a very small footprint. + +However if we need - we can always add two trampolines. One at the 2GB +limit that calls the next trampoline. + +Please note there is a small limitation for trampolines in +function entries: The target function (+ trailing padding) must be able +to accomodate the trampoline. On x86 with +-2 GB relative jumps, +this means 5 bytes are required. + +Depending on compiler settings, there are several functions in Xen that +are smaller (without inter-function padding). + +<pre> +readelf -sW xen-syms | grep " FUNC " | \ + awk '{ if ($3 < 5) print $3, $4, $5, $8 }' + +... +3 FUNC LOCAL wbinvd_ipi +3 FUNC LOCAL shadow_l1_index +... +</pre> +A compile-time check for, e.g., a minimum alignment of functions or a +runtime check that verifies symbol size (+ padding to next symbols) for +that in the hypervisor is advised. + +### When to patch + +During the discussion on the design two candidates bubbled where +the call stack for each CPU would be deterministic. This would +minimize the chance of the patch not being applied due to safety +checks failing. + +#### Rendezvous code instead of stop_machine for patching + +The hypervisor's time rendezvous code runs synchronously across all CPUs +every second. Using the stop_machine to patch can stall the time rendezvous +code and result in NMI. As such having the patching be done at the tail +of rendezvous code should avoid this problem. + +However the entrance point for that code is +do_softirq->timer_softirq_action->time_calibration +which ends up calling on_selected_cpus on remote CPUs. + +The remote CPUs receive CALL_FUNCTION_VECTOR IPI and execute the +desired function. + + +#### Before entering the guest code. + +Before we call VMXResume we check whether any soft IRQs need to be executed. +This is a good spot because all Xen stacks are effectively empty at +that point. + +To randezvous all the CPUs an barrier with an maximum timeout (which +could be adjusted), combined with forcing all other CPUs through the +hypervisor with IPIs, can be utilized to have all the CPUs be lockstep. + +The approach is similar in concept to stop_machine and the time rendezvous +but is time-bound. However the local CPU stack is much shorter and +a lot more deterministic. + +### Compiling the hypervisor code + +Hotpatch generation often requires support for compiling the target +with -ffunction-sections / -fdata-sections. Changes would have to +be done to the linker scripts to support this. + + +### Generation of xSplice ELF payloads + +The design of that is not discussed in this design. + +The author of this design envisions objdump and objcopy along +with special GCC parameters (see above) to create .o.xsplice files +which can be used to splice an ELF with the new payload. + +The ksplice code can provide inspiration. + +### Exception tables and symbol tables growth + +We may need support for adapting or augmenting exception tables if +patching such code. Hotpatches may need to bring their own small +exception tables (similar to how Linux modules support this). + +If supporting hotpatches that introduce additional exception-locations +is not important, one could also change the exception table in-place +and reorder it afterwards. + + +### xSplice interdependencies + +xSplice patches interdependencies are tricky. + +There are the ways this can be addressed: + * A single large patch that subsumes and replaces all previous ones. + Over the life-time of patching the hypervisor this large patch + grows to accumulate all the code changes. + * Hotpatch stack - where an mechanism exists that loads the hotpatches + in the same order they were built in. We would need an build-id + of the hypevisor to make sure the hot-patches are build against the + correct build. + * Payload containing the old code to check against that. That allows + the hotpatches to be loaded indepedently (if they don't overlap) - or + if the old code also containst previously patched code - even if they + overlap. + +The disadvantage of the first large patch is that it can grow over +time and not provide an bisection mechanism to identify faulty patches. + +The hot-patch stack puts stricts requirements on the order of the patches +being loaded and requires an hypervisor build-id to match against. + +The old code allows much more flexibility and an additional guard, +but is more complex to implement. + +### Hypervisor ID (buid-id) + +The build-id can help with: + + * Prevent loading of wrong hotpatches (intended for other builds) + + * Allow to identify suitable hotpatches on disk and help with runtime + tooling (if laid out using build ID) + +The build-id (aka hypervisor id) can be easily obtained by utilizing +the ld --build-id operatin which (copied from ld): + +<pre> +--build-id + --build-id=style + Request creation of ".note.gnu.build-id" ELF note section. The contents of the note are unique bits identifying this + linked file. style can be "uuid" to use 128 random bits, "sha1" to use a 160-bit SHA1 hash on the normative parts of the + output contents, "md5" to use a 128-bit MD5 hash on the normative parts of the output contents, or "0xhexstring" to use a + chosen bit string specified as an even number of hexadecimal digits ("-" and ":" characters between digit pairs are + ignored). If style is omitted, "sha1" is used. + + The "md5" and "sha1" styles produces an identifier that is always the same in an identical output file, but will be + unique among all nonidentical output files. It is not intended to be compared as a checksum for the file's contents. A + linked file may be changed later by other tools, but the build ID bit string identifying the original linked file does + not change. + + Passing "none" for style disables the setting from any "--build-id" options earlier on the command line. + +</pre> + +### Symbol names + + +Xen as it is now, has a couple of non-unique symbol names which will +make runtime symbol identification hard. Sometimes, static symbols +simply have the same name in C files, sometimes such symbols get +included via header files, and some C files are also compiled +multiple times and linked under different names (guest_walk.c). + +As such we need to modify the linker to make sure that the symbol +table qualifies also symbols by their source file name. + +For the awkward situations in which C-files are compiled multiple +times patches we would need to some modification in the Xen code. + + +The convention for file-type symbols (that would allow to map many +symbols to their compilation unit) says that only the basename (i.e., +without directories) is embedded. This creates another layer of +confusion for duplicate file names in the build tree. + +That would have to be resolved. + +<pre> +> find . -name \*.c -print0 | xargs -0 -n1 basename | sort | uniq -c | sort -n | tail -n10 + 3 shutdown.c + 3 sysctl.c + 3 time.c + 3 xenoprof.c + 4 gdbstub.c + 4 irq.c + 5 domain.c + 5 mm.c + 5 pci.c + 5 traps.c +</pre> + +### Security + +Only the privileged domain should be allowed to do this operation. + diff --git a/docs/misc/xsplice_test.c b/docs/misc/xsplice_test.c new file mode 100644 index 0000000..6e0cf93 --- /dev/null +++ b/docs/misc/xsplice_test.c @@ -0,0 +1,78 @@ +#include "xsplice.h" +#include <stdio.h> + +struct xsplice_code xsplice_xsa125; +struct xsplice_reloc relocs[1]; +struct xsplice_section sections[1]; +struct xsplice_patch patches[1]; +struct xsplice_symbol do_xen_version_symbol; +struct xsplice_reloc_howto do_xen_version_howto; +char do_xen_version_new_code[1728]; + +#ifndef HYPERVISOR_ID +#define HYPERVISOR_ID "Xen 4.6-unstable-g9348394" +#endif + +struct xsplice xsa125 = { + .name = "xsa125", + .id = HYPERVISOR_ID, + .old = NULL, + .new = &xsplice_xsa125, +}; + +struct xsplice_code xsplice_xsa125 = { + .relocs = &relocs[0], + .n_relocs = 1, + .sections = §ions[0], + .n_sections = 1, + .patches = &patches[0], + .n_patches = 1, +}; + +struct xsplice_reloc relocs[1] = { + { + .addr = 0xffff82d080112f9e, + .symbol = &do_xen_version_symbol, + .isns_target = 0, + .howto = &do_xen_version_howto, + .isns_added = -4, + }, +}; + +struct xsplice_symbol do_xen_version_symbol = { + .name = "do_xen_version", + .label = "do_xen_version+<0x0>", +}; + +struct xsplice_reloc_howto do_xen_version_howto = { + .howto = XSPLICE_HOWTO_RELOC_PATCH, + .flag = XSPLICE_HOWTO_FLAG_PC_REL, + .r_shift = 0, + .mask = (-1ULL), +}; + + +struct xsplice_section sections[1] = { + { + .symbol = &do_xen_version_symbol, + .address = 0xffff82d080112f9e, + .size = 1728, + .flags = XSPLICE_SECTION_TEXT, + }, +}; + +struct xsplice_patch patches[1] = { + { + .type = XSPLICE_PATCH_RELOC_TEXT, + .size = 1728, + .addr = 0, + .content = &do_xen_version_new_code, + }, +}; + +char do_xen_version_new_code[1728] = { 0x83, 0xff, 0x09, }; + +void main() +{ + printf("%s\n", xsa125.name); +} diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h index ac7e5fd..9ef39fd 100644 --- a/tools/libxc/include/xenctrl.h +++ b/tools/libxc/include/xenctrl.h @@ -2828,6 +2828,22 @@ int xc_psr_cat_get_l3_info(xc_interface *xch, uint32_t socket, uint32_t *cos_max, uint32_t *cbm_len); #endif +int xc_xsplice_upload(xc_interface *xch, + char *id, char *payload, uint32_t size); + +int xc_xsplice_get(xc_interface *xch, + char *id, + xen_sysctl_xsplice_summary_t *summary); + +int xc_xsplice_list(xc_interface *xch, unsigned int max, unsigned int start, + xen_sysctl_xsplice_summary_t *info, unsigned int *done, + unsigned int *left); + +int xc_xsplice_apply(xc_interface *xch, char *id); +int xc_xsplice_revert(xc_interface *xch, char *id); +int xc_xsplice_unload(xc_interface *xch, char *id); +int xc_xsplice_check(xc_interface *xch, char *id); + #endif /* XENCTRL_H */ /* diff --git a/tools/libxc/xc_misc.c b/tools/libxc/xc_misc.c index b827bbb..bb91930 100644 --- a/tools/libxc/xc_misc.c +++ b/tools/libxc/xc_misc.c @@ -719,6 +719,189 @@ int xc_hvm_inject_trap( return rc; } +int xc_xsplice_upload(xc_interface *xch, + char *id, + char *payload, + uint32_t size) +{ + int rc; + DECLARE_SYSCTL; + DECLARE_HYPERCALL_BOUNCE(payload, size, XC_HYPERCALL_BUFFER_BOUNCE_IN); + + if ( !id || !payload ) + return -1; + + if ( xc_hypercall_bounce_pre(xch, payload) ) + return -1; + + sysctl.cmd = XEN_SYSCTL_xsplice_op; + sysctl.u.xsplice.cmd = XEN_SYSCTL_XSPLICE_UPLOAD; + sysctl.u.xsplice.u.upload.size = size; + memcpy(sysctl.u.xsplice.u.upload.id, id, XEN_XSPLICE_ID_SIZE); + set_xen_guest_handle(sysctl.u.xsplice.u.upload.payload, payload); + + rc = do_sysctl(xch, &sysctl); + + xc_hypercall_bounce_post(xch, payload); + + return rc; +} + +int xc_xsplice_get(xc_interface *xch, + char *id, + xen_sysctl_xsplice_summary_t *summary) +{ + int rc; + DECLARE_SYSCTL; + + if ( !id ) + return -1; + + sysctl.cmd = XEN_SYSCTL_xsplice_op; + sysctl.u.xsplice.cmd = XEN_SYSCTL_XSPLICE_GET; + sysctl.u.xsplice.u.get.status = 0; + memcpy(sysctl.u.xsplice.u.get.id, id, XEN_XSPLICE_ID_SIZE); + + rc = do_sysctl(xch, &sysctl); + + memcpy(summary, &sysctl.u.xsplice.u.get, sizeof(*summary)); + + return rc; +} + +int xc_xsplice_list(xc_interface *xch, unsigned int max, unsigned int start, + xen_sysctl_xsplice_summary_t *info, unsigned int *done, + unsigned int *left) +{ + int rc; + DECLARE_SYSCTL; + DECLARE_HYPERCALL_BOUNCE(info, 0 /* adjust later. */, XC_HYPERCALL_BUFFER_BOUNCE_OUT); + uint32_t max_batch_sz, nr; + uint32_t version = 0, retries = 0; + uint32_t adjust = 0; + + if ( !max ) + return -1; + + sysctl.cmd = XEN_SYSCTL_xsplice_op; + sysctl.u.xsplice.cmd = XEN_SYSCTL_XSPLICE_LIST; + sysctl.u.xsplice.u.list.version = 0; + sysctl.u.xsplice.u.list.idx = start; + + max_batch_sz = max; + + *done = 0; + *left = 0; + do { + if ( adjust ) + adjust = 0; /* Used when adjusting the 'max_batch_sz' or 'retries'. */ + + nr = min(max - *done, max_batch_sz); + + sysctl.u.xsplice.u.list.nr = nr; + HYPERCALL_BOUNCE_SET_SIZE(info, nr * sizeof(*info)); + + /* Move the pointer to proper offset into 'info'. */ + (HYPERCALL_BUFFER(info))->ubuf = info + *done; + if ( xc_hypercall_bounce_pre(xch, info) ) + return -1; + + set_xen_guest_handle(sysctl.u.xsplice.u.list.summary, info); + + rc = do_sysctl(xch, &sysctl); + if ( rc < 0 && errno == E2BIG ) + { + if ( max_batch_sz <= 1 ) + break; + max_batch_sz >>= 1; + adjust = 1; /* For the loop conditional to let us loop again. */ + xc_hypercall_bounce_post(xch, info); /* No memory leaks! */ + continue; + } + if ( rc < 0 ) /* For all other errors we bail out. */ + break; + + if ( !version ) + version = sysctl.u.xsplice.u.list.version; + + if ( sysctl.u.xsplice.u.list.version != version ) + { + /* TODO: retries should be configurable? */ + if ( retries++ > 3 ) + { + rc = -1; + errno = EBUSY; + break; + } + *done = 0; /* Retry from scratch. */ + version = sysctl.u.xsplice.u.list.version; + adjust = 1; /* And make sure we continue in the loop. */ + xc_hypercall_bounce_post(xch, info); /* No memory leaks! */ + continue; + } + + /* We should never hit this, but just in case. */ + if ( rc > nr ) + { + errno = EINVAL; /* Overflow! */ + rc = -1; + break; + } + *left = sysctl.u.xsplice.u.list.nr; /* Total remaining count. */ + /* Copy only up 'rc' of data' - we could add 'min(rc,nr) if desired. */ + HYPERCALL_BOUNCE_SET_SIZE(info, (rc * sizeof(*info))); + /* Bounce the data and free the bounce buffer. */ + xc_hypercall_bounce_post(xch, info); + /* And update how many elements of info we have copied into. */ + *done += rc; + /* Update idx. */ + sysctl.u.xsplice.u.list.idx = rc; + } while ( adjust || (*done < max && *left != 0) ); + + return rc > 0 ? 0 : rc; +} + +static int _xc_xsplice_action(xc_interface *xch, + char *id, + unsigned int action) +{ + int rc; + DECLARE_SYSCTL; + + if ( !id ) + return -1; + + sysctl.cmd = XEN_SYSCTL_xsplice_op; + sysctl.u.xsplice.cmd = XEN_SYSCTL_XSPLICE_ACTION; + sysctl.u.xsplice.u.action.cmd = action; + sysctl.u.xsplice.u.action.time = 0; /* TODO */ + memcpy(sysctl.u.xsplice.u.action.id, id, XEN_XSPLICE_ID_SIZE); + + rc = do_sysctl(xch, &sysctl); + + return rc; +} + +int xc_xsplice_apply(xc_interface *xch, char *id) +{ + return _xc_xsplice_action(xch, id, XSPLICE_ACTION_APPLY); +} + +int xc_xsplice_revert(xc_interface *xch, char *id) +{ + return _xc_xsplice_action(xch, id, XSPLICE_ACTION_REVERT); +} + +int xc_xsplice_unload(xc_interface *xch, char *id) +{ + return _xc_xsplice_action(xch, id, XSPLICE_ACTION_UNLOAD); +} + +int xc_xsplice_check(xc_interface *xch, char *id) +{ + return _xc_xsplice_action(xch, id, XSPLICE_ACTION_CHECK); +} + /* * Local variables: * mode: C diff --git a/tools/misc/Makefile b/tools/misc/Makefile index c4490f3..c46873e 100644 --- a/tools/misc/Makefile +++ b/tools/misc/Makefile @@ -30,6 +30,7 @@ INSTALL_SBIN += xenlockprof INSTALL_SBIN += xenperf INSTALL_SBIN += xenpm INSTALL_SBIN += xenwatchdogd +INSTALL_SBIN += xen-xsplice INSTALL_SBIN += $(INSTALL_SBIN-y) # Everything to be installed in a private bin/ @@ -98,6 +99,9 @@ xen-mfndump: xen-mfndump.o xenwatchdogd: xenwatchdogd.o $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS_libxenctrl) $(APPEND_LDFLAGS) +xen-xsplice: xen-xsplice.o + $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS_libxenctrl) $(APPEND_LDFLAGS) + xen-lowmemd: xen-lowmemd.o $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS_libxenctrl) $(LDLIBS_libxenstore) $(APPEND_LDFLAGS) diff --git a/tools/misc/xen-xsplice.c b/tools/misc/xen-xsplice.c new file mode 100644 index 0000000..7cf9879 --- /dev/null +++ b/tools/misc/xen-xsplice.c @@ -0,0 +1,360 @@ +#include <xenctrl.h> +#include <xenstore.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> +static xc_interface *xch; + +void show_help(void) +{ + fprintf(stderr, + "xen-xsplice: Xsplice test tool\n" + "Usage: xen-xsplice <command> [args]\n" + " <id> An unique name of payload. Up to 40 characters.\n" + "Commands:\n" + " help display this help\n" + " upload <id> <file> upload file <cpuid> with <id> name\n" + " list list payloads uploaded.\n" + " apply <id> apply <id> patch.\n" + " revert <id> revert id <id> patch.\n" + " unload <id> unload id <id> patch.\n" + " check <id> check id <id> patch.\n" + ); +} + +/* wrapper function */ +static int help_func(int argc, char *argv[]) +{ + show_help(); + return 0; +} + +#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0])) + +static const char *status2str(long status) +{ +#define STATUS(x) [XSPLICE_STATUS_##x] = #x + static const char *const names[] = { + STATUS(LOADED), + STATUS(PROGRESS), + STATUS(CHECKED), + STATUS(APPLIED), + STATUS(REVERTED), + }; + + if (status >= ARRAY_SIZE(names)) + return "unknown"; + + if (status < 0) + return "-EXX"; + + if (!names[status]) + return "unknown"; + + return names[status]; +} + +#define MAX 11 +static int list_func(int argc, char *argv[]) +{ + unsigned int idx, done, left, rc, i; + xen_sysctl_xsplice_summary_t *info; + + if ( argc ) + { + show_help(); + return -1; + } + idx = left = 0; + info = malloc(sizeof(*info) * MAX); + if ( !info ) + { + fprintf(stderr, "Could not allocate buffer!\n"); + return ENOMEM; + } + fprintf(stdout," ID | status\n" + "----------------------------------------+------------\n"); + do { + done = 0; + memset(info, 'A', sizeof(*info) * MAX); /* Optional. */ + rc = xc_xsplice_list(xch, MAX, idx, info, &done, &left); + if ( rc ) + { + fprintf(stderr, "Failed to list %d/%d: %d(%s)!\n", idx, left, errno, strerror(errno)); + return errno; + } + for ( i = 0; i < done; i++ ) + { + fprintf(stdout, "%-40s| ", info[i].id); + if ( info[i].status < 0 ) + fprintf(stdout, "%s\n", strerror(info[i].status)); + else + fprintf(stdout, "%s\n", status2str(info[i].status)); + } + idx += done; + } while ( left ); + + return 0; +} + +static int get_id(int argc, char *argv[], char *id) +{ + ssize_t len = strlen(argv[0]); + if ( len > XEN_XSPLICE_ID_SIZE ) + { + fprintf(stderr, "ID MUST be %d characters!\n", XEN_XSPLICE_ID_SIZE); + errno = EINVAL; + return errno; + } + /* Don't want any funny strings from the stack. */ + memset(id, 0, XEN_XSPLICE_ID_SIZE); + strncpy(id, argv[0], len); + return 0; +} + +static int upload_func(int argc, char *argv[]) +{ + char *filename; + char id[XEN_XSPLICE_ID_SIZE]; + int fd = 0, rc; + struct stat buf; + unsigned char *fbuf; + ssize_t len; + DECLARE_HYPERCALL_BUFFER(char, payload); + + if ( argc != 2 ) + { + show_help(); + return -1; + } + + if ( get_id(argc, argv, id) ) + return EINVAL; + + filename = argv[1]; + fd = open(filename, O_RDONLY); + if ( fd < 0 ) + { + fprintf(stderr, "Could not open %s, error: %d(%s)\n", + filename, errno, strerror(errno)); + return errno; + } + if ( stat(filename, &buf) != 0 ) + { + fprintf(stderr, "Could not get right size %s, error: %d(%s)\n", + filename, errno, strerror(errno)); + close(fd); + return errno; + } + + len = buf.st_size; + fbuf = mmap(0, len, PROT_READ, MAP_PRIVATE, fd, 0); + if ( fbuf == MAP_FAILED ) + { + fprintf(stderr,"Could not map: %s, error: %d(%s)\n", + filename, errno, strerror(errno)); + close (fd); + return errno; + } + printf("Uploading %s (%ld bytes)\n", filename, len); + payload = xc_hypercall_buffer_alloc(xch, payload, len); + memcpy(payload, fbuf, len); + + rc = xc_xsplice_upload(xch, id, payload, len); + if ( rc ) + { + fprintf(stderr, "Upload failed: %s, error: %d(%s)!\n", + filename, errno, strerror(errno)); + goto out; + } + xc_hypercall_buffer_free(xch, payload); + +out: + if ( munmap( fbuf, len) ) + { + fprintf(stderr, "Could not unmap!? error: %d(%s)!\n", + errno, strerror(errno)); + rc = errno; + } + close(fd); + + return rc; +} + +struct { + int allow; /* State it must be in to call function. */ + int expected; /* The state to be in after the function. */ + const char *name; + int (*function)(xc_interface *xch, char *id); + unsigned int executed; /* Has the function been called?. */ +} action_options[] = { + { .allow = XSPLICE_STATUS_CHECKED | XSPLICE_STATUS_REVERTED, + .expected = XSPLICE_STATUS_APPLIED, + .name = "apply", + .function = xc_xsplice_apply, + }, + { .allow = XSPLICE_STATUS_APPLIED, + .expected = XSPLICE_STATUS_REVERTED, + .name = "revert", + .function = xc_xsplice_revert, + }, + { .allow = XSPLICE_STATUS_CHECKED | XSPLICE_STATUS_REVERTED | XSPLICE_STATUS_LOADED, + .expected = ENOENT, + .name = "unload", + .function = xc_xsplice_unload, + }, + { .allow = XSPLICE_STATUS_CHECKED | XSPLICE_STATUS_LOADED, + .expected = XSPLICE_STATUS_CHECKED, + .name = "check", + .function = xc_xsplice_check + }, +}; + +int action_func(int argc, char *argv[], unsigned int idx) +{ + char id[40]; + int rc; + xen_sysctl_xsplice_summary_t summary; + unsigned int retry = 0; + + if ( argc != 1 ) + { + show_help(); + return -1; + } + + if ( get_id(argc, argv, id) ) + return EINVAL; + + do { + rc = xc_xsplice_get(xch, id, &summary); + /* N.B. Successfull unload will return ENOENT. */ + if ( rc ) + { + rc = errno; /* rc is just -1 and we want proper EXX. */ + break; + } + + if ( summary.status < 0 ) + { /* We report it outside the loop. */ + rc = summary.status; + break; + } + if ( summary.status == XSPLICE_STATUS_PROGRESS ) + { + if ( !action_options[idx].executed ) + { + printf("%s is in progress and we didn't initiate it!\n", id); + errno = EBUSY; + rc = -1; + break; + } + if ( retry++ < 30 ) + { + printf("."); + sleep(1); + continue; + } + printf("%s: Waited more than 30 seconds! Bailing out.\n", id); + errno = EBUSY; + rc = -1; + break; + } + /* We use rc outside loop to deal with EXX type expected values. */ + rc = summary.status; + if ( action_options[idx].expected == rc ) /* Yeey! */ + break; + + if ( action_options[idx].allow & rc ) + { + if ( action_options[idx].executed ) + { + printf(" (0x%x vs 0x%x) state not reached!?\n", + action_options[idx].expected, rc); + errno = EINVAL; + break; + } + printf("%s: State is 0x%x, ok are 0x%x. Commencing %s:", + id, rc, action_options[idx].allow, + action_options[idx].name); + + rc = action_options[idx].function(xch, id); + if ( rc ) /* We report it outside the loop. */ + break; + + action_options[idx].executed = 1; + rc = 1; /* Loop again so we can display the dots. */ + } else { + printf("%s: in wrong state (0x%x), expected 0x%x\n", + id, rc, action_options[idx].expected); + errno = EINVAL; + rc = -1; + break; + } + } while ( rc > 0 ); + + if ( action_options[idx].expected == rc ) + { + printf("completed!\n"); + rc = 0; + } else + printf("%s failed with %d(%s)\n", id, errno, strerror(errno)); + + return rc; +} +struct { + const char *name; + int (*function)(int argc, char *argv[]); +} main_options[] = { + { "help", help_func }, + { "list", list_func }, + { "upload", upload_func }, +}; + +int main(int argc, char *argv[]) +{ + int i, j, ret; + + if ( argc <= 1 ) + { + show_help(); + return 0; + } + for ( i = 0; i < ARRAY_SIZE(main_options); i++ ) + if (!strncmp(main_options[i].name, argv[1], strlen(argv[1]))) + break; + + if ( i == ARRAY_SIZE(main_options) ) + { + for ( j = 0; j < ARRAY_SIZE(action_options); j++ ) + if (!strncmp(action_options[j].name, argv[1], strlen(argv[1]))) + break; + + if ( j == ARRAY_SIZE(action_options) ) + { + fprintf(stderr, "Unrecognised command '%s' -- try " + "'xen-xsplice help'\n", argv[1]); + return 1; + } + } + + xch = xc_interface_open(0,0,0); + if ( !xch ) + { + fprintf(stderr, "failed to get the handler\n"); + return 0; + } + + if ( i == ARRAY_SIZE(main_options) ) + ret = action_func(argc -2, argv + 2, j); + else + ret = main_options[i].function(argc -2, argv + 2); + + xc_interface_close(xch); + + return !!ret; +} diff --git a/xen/common/Makefile b/xen/common/Makefile index 3fdf931..7769e5c 100644 --- a/xen/common/Makefile +++ b/xen/common/Makefile @@ -55,6 +55,7 @@ obj-y += vmap.o obj-y += vsprintf.o obj-y += wait.o obj-y += xmalloc_tlsf.o +obj-y += xsplice.o obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma unlzo unlz4 earlycpio,$(n).init.o) diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c index 5d21e48..1d4574a 100644 --- a/xen/common/keyhandler.c +++ b/xen/common/keyhandler.c @@ -18,6 +18,7 @@ #include <xen/mm.h> #include <xen/watchdog.h> #include <xen/init.h> +#include <xen/xsplice.h> #include <asm/debugger.h> #include <asm/div64.h> @@ -455,6 +456,11 @@ static struct keyhandler spinlock_reset_keyhandler = { .desc = "reset lock profile info" }; #endif +static struct keyhandler xsplice_printall_keyhandler = { + .diagnostic = 1, + .u.fn = xsplice_printall, + .desc = "print splicing information" +}; static void run_all_nonirq_keyhandlers(unsigned long unused) { @@ -567,7 +573,7 @@ void __init initialize_keytable(void) register_keyhandler('l', &spinlock_printall_keyhandler); register_keyhandler('L', &spinlock_reset_keyhandler); #endif - + register_keyhandler('x', &xsplice_printall_keyhandler); } /* diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c index f1c0c76..641bb25 100644 --- a/xen/common/sysctl.c +++ b/xen/common/sysctl.c @@ -27,6 +27,7 @@ #include <xsm/xsm.h> #include <xen/pmstat.h> #include <xen/gcov.h> +#include <xen/xsplice.h> long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl) { @@ -399,6 +400,10 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl) ret = sysctl_coverage_op(&op->u.coverage_op); break; #endif + case XEN_SYSCTL_xsplice_op: + ret = xsplice_control(&op->u.xsplice); + copyback = 1; + break; #ifdef HAS_PCI case XEN_SYSCTL_pcitopoinfo: diff --git a/xen/common/xsplice.c b/xen/common/xsplice.c new file mode 100644 index 0000000..e816394 --- /dev/null +++ b/xen/common/xsplice.c @@ -0,0 +1,405 @@ +/* + * xSplice - Copyright Oracle Corp. Inc 2015. + * + * Author: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx> + */ + +/* TODO: Sort includes .*/ +#include <xen/smp.h> +#include <xen/keyhandler.h> +#include <xen/spinlock.h> +#include <xen/mm.h> +#include <xen/list.h> +#include <xen/guest_access.h> +#include <xen/stdbool.h> +#include <xen/sched.h> +#include <xen/lib.h> +#include <xen/xsplice.h> +#include <public/sysctl.h> + +#include <asm/event.h> + +static DEFINE_SPINLOCK(payload_list_lock); +static LIST_HEAD(payload_list); +static unsigned int payload_cnt; +static unsigned int payload_version = 1; +static int _debug = 1; +#define where() { if (_debug) printk("%s:%d\n", __func__, __LINE__); } + +struct payload { + char id[XEN_XSPLICE_ID_SIZE]; /* Unique id. */ + int32_t status; /* XSPLICE_STATUS_* or Exx type value. */ + int32_t old_status; /* XSPLICE_STATUS_* or Exx type value. */ + + uint32_t cmd; /* Action request. XSPLICE_ACTION_* */ + struct spinlock cmd_lock; /* Lock against the action. */ + + uint8_t *raw; /* Pointer to Elf file. */ + ssize_t size; /* Size of 'raw'. */ + + struct tasklet tasklet; + struct list_head list; /* Linked to 'payload_list'. */ +}; + +static const char *status2str(int64_t status) +{ +#define STATUS(x) [XSPLICE_STATUS_##x] = #x + static const char *const names[] = { + STATUS(LOADED), + STATUS(PROGRESS), + STATUS(CHECKED), + STATUS(APPLIED), + STATUS(REVERTED), + }; + + if (status >= ARRAY_SIZE(names)) + return "unknown"; + + if (status < 0) + return "-EXX"; + + if (!names[status]) + return "unknown"; + + return names[status]; +} + +void xsplice_printall(unsigned char key) +{ + struct payload *data; + + spin_lock(&payload_list_lock); + + list_for_each_entry ( data, &payload_list, list ) + { + printk(" id=%s status=%s(%d,old=%d): \n", data->id, + status2str(data->status), data->status, data->old_status); + } + spin_unlock(&payload_list_lock); +} + +struct payload *find_payload(const char *id, bool_t need_lock) +{ + struct payload *data, *found = NULL; + + if ( need_lock ) + spin_lock(&payload_list_lock); + + list_for_each_entry ( data, &payload_list, list ) + { + if ( !strncmp(data->id, id, XEN_XSPLICE_ID_SIZE) ) + { + found = data; + break; + } + } + + if ( need_lock ) + spin_unlock(&payload_list_lock); + + return found; +} + +static int verify_payload(xen_sysctl_xsplice_upload_t *upload) +{ + if ( upload->id[0] == '\0' ) + { + where(); + return -EINVAL; + } + if ( upload->size == 0 ) + { + where(); + return -EINVAL; + } + if ( !guest_handle_okay(upload->payload, upload->size) ) + { + where(); + return -EFAULT; + } + + return 0; +} + +/* + * We MUST be holding the spinlock. + */ +static void __free_payload(struct payload *data) +{ + + free_xenheap_pages(data->raw, get_order_from_bytes(data->size)); + list_del(&data->list); + payload_cnt --; + payload_version ++; + tasklet_kill(&data->tasklet); + xfree(data); +} +#include <xen/delay.h> +static void xsplice_tasklet(unsigned long _data) +{ + struct payload *data = (struct payload *)_data; + + /* TODO: Remove it. */ + mdelay(1000); + + spin_lock(&data->cmd_lock); + switch ( data->cmd ) { + case XSPLICE_ACTION_CHECK: + /* Do the operation here. */ + data->status = XSPLICE_STATUS_CHECKED; + break; + case XSPLICE_ACTION_APPLY: + /* TODO: Well, do the work :-) */ + data->status = XSPLICE_STATUS_APPLIED; + break; + case XSPLICE_ACTION_REVERT: + /* TODO: Well, do the work :-) */ + data->status = XSPLICE_STATUS_REVERTED; + break; + default: + data->status = -EINVAL; + } + spin_unlock(&data->cmd_lock); +} + +static int xsplice_upload(xen_sysctl_xsplice_upload_t *upload) +{ + struct payload *data; + uint8_t *raw = NULL; + int rc; + + rc = verify_payload(upload); + if ( rc ) + return rc; + + /* + * Compute the size of the structures which need to be verified. + */ + + data = find_payload(upload->id, true); + if ( data ) + { + where(); + return -EEXIST; + } + rc = -ENOMEM; + data = xzalloc(struct payload); + if ( !data ) + { + where(); + return rc; + } + + raw = alloc_xenheap_pages(get_order_from_bytes(upload->size), 0); + if ( !raw ) + { + printk("%s: alloc for %ld bytes, %d order\n", __func__, upload->size, get_order_from_bytes(upload->size)); + xfree(data); + return rc; + } + if ( copy_from_guest(raw, upload->payload, upload->size) ) + { + rc = -EFAULT; + goto err_out; + } + + printk("%s: size %ld %p [%02x %02x ..] \n", __func__, upload->size, + raw, (unsigned int)raw[0], (unsigned int)raw[1]); + + /* TODO: Verify signature . */ + memcpy(data->id, upload->id, XEN_XSPLICE_ID_SIZE); + data->status = XSPLICE_STATUS_LOADED; + INIT_LIST_HEAD(&data->list); + data->raw = raw; + data->size = upload->size; + spin_lock_init(&data->cmd_lock); + data->cmd = 0; + tasklet_init(&data->tasklet, xsplice_tasklet, (unsigned long)data); + + spin_lock(&payload_list_lock); + list_add_tail(&data->list, &payload_list); + payload_cnt ++; + payload_version ++; + spin_unlock(&payload_list_lock); + + return 0; + + err_out: + if ( raw ) + free_xenheap_pages(raw, get_order_from_bytes(upload->size)); + if ( data ) + xfree(data); + return rc; +} + +static int xsplice_get(xen_sysctl_xsplice_summary_t *summary) +{ + struct payload *data; + + if ( summary->status ) + return -EINVAL; + + data = find_payload(summary->id, true); + if ( !data ) + return -ENOENT; + + summary->status = data->status; + + return 0; +} + +static int xsplice_list(xen_sysctl_xsplice_list_t *list) +{ + xen_sysctl_xsplice_summary_t summary; + struct payload *data; + unsigned int idx = 0, i = 0; + int rc = 0; + unsigned int ver = payload_version; + + // TODO: Increase to 64. Leave 4 for debug. + if ( list->nr > 4 ) + return -E2BIG; + + if ( guest_handle_is_null(list->summary) ) + return -EINVAL; + + spin_lock(&payload_list_lock); + if ( list->idx > payload_cnt ) + { + spin_unlock(&payload_list_lock); + where(); + return -EINVAL; + } + + list_for_each_entry( data, &payload_list, list ) + { + if ( list->idx > i++ ) + continue; + + /* Copy all of the bytes avoid leaking stack data. */ + memcpy(summary.id, data->id, XEN_XSPLICE_ID_SIZE); + summary.status = data->status; + + /* N.B. 'idx' != 'i'. */ + if ( copy_to_guest_offset(list->summary, idx++, &summary, 1) ) + { + rc = -EFAULT; + break; + } + if ( hypercall_preempt_check() || (idx + 1 > list->nr) ) + { + break; + } + } + list->nr = payload_cnt - i; /* Remaining amount. */ + spin_unlock(&payload_list_lock); + list->version = ver; + + /* And how many we have processed. */ + return rc ? rc : idx; +} + +static int xsplice_action(xen_sysctl_xsplice_action_t *action) +{ + struct payload *data; + int rc = -EINVAL; + + if ( action->id[0] == '\0' ) + return rc; + + spin_lock(&payload_list_lock); + data = find_payload(action->id, false /* we are holding the lock. */); + if ( !data ) + { + rc = -ENOENT; + goto out; + } + if ( action->cmd != XSPLICE_ACTION_UNLOAD ) + spin_lock(&data->cmd_lock); + + switch ( action->cmd ) + { + case XSPLICE_ACTION_CHECK: + if ( ( data->status == XSPLICE_STATUS_LOADED ) ) + { + data->old_status = data->status; + data->status = XSPLICE_STATUS_PROGRESS; + data->cmd = action->cmd; + tasklet_schedule(&data->tasklet); + rc = 0; + } else if ( data->status == XSPLICE_STATUS_CHECKED ) + { + rc = 0; + } + break; + case XSPLICE_ACTION_UNLOAD: + if ( ( data->status == XSPLICE_STATUS_REVERTED ) || + ( data->status == XSPLICE_STATUS_LOADED ) || + ( data->status == XSPLICE_STATUS_CHECKED ) ) + { + __free_payload(data); + /* No touching 'data' from here on! */ + rc = 0; + } + break; + case XSPLICE_ACTION_REVERT: + if ( data->status == XSPLICE_STATUS_APPLIED ) + { + data->old_status = data->status; + data->status = XSPLICE_STATUS_PROGRESS; + data->cmd = action->cmd; + rc = 0; + /* TODO: Tasklet is not good for this. We need a different vehicle. */ + tasklet_schedule(&data->tasklet); + } + break; + case XSPLICE_ACTION_APPLY: + if ( ( data->status == XSPLICE_STATUS_CHECKED ) || + ( data->status == XSPLICE_STATUS_REVERTED )) + { + data->old_status = data->status; + data->status = XSPLICE_STATUS_PROGRESS; + data->cmd = action->cmd; + rc = 0; + /* TODO: Tasklet is not good for this. We need a different vehicle. */ + tasklet_schedule(&data->tasklet); + } + break; + default: + rc = -ENOSYS; + break; + } + + if ( action->cmd != XSPLICE_ACTION_UNLOAD ) + spin_unlock(&data->cmd_lock); + out: + spin_unlock(&payload_list_lock); + + return rc; +} + +int xsplice_control(xen_sysctl_xsplice_op_t *xsplice) +{ + int rc; + + switch ( xsplice->cmd ) + { + case XEN_SYSCTL_XSPLICE_UPLOAD: + rc = xsplice_upload(&xsplice->u.upload); + break; + case XEN_SYSCTL_XSPLICE_GET: + rc = xsplice_get(&xsplice->u.get); + break; + case XEN_SYSCTL_XSPLICE_LIST: + rc = xsplice_list(&xsplice->u.list); + break; + case XEN_SYSCTL_XSPLICE_ACTION: + rc = xsplice_action(&xsplice->u.action); + break; + default: + rc = -ENOSYS; + break; + } + + return rc; +} diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h index 58c9be2..48dd511 100644 --- a/xen/include/public/sysctl.h +++ b/xen/include/public/sysctl.h @@ -710,6 +710,70 @@ struct xen_sysctl_psr_cat_op { typedef struct xen_sysctl_psr_cat_op xen_sysctl_psr_cat_op_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_psr_cat_op_t); +/* + * XEN_SYSCTL_XSPLICE_op + * + */ +#define XEN_SYSCTL_XSPLICE_UPLOAD 0 +#define XEN_XSPLICE_ID_SIZE 40 + +struct xen_sysctl_xsplice_upload { + char id[XEN_XSPLICE_ID_SIZE]; /* IN, name of the patch. */ + uint64_t size; /* IN, size of the ELF file. */ + XEN_GUEST_HANDLE_64(uint8) payload; /* IN, the ELF file. */ +}; +typedef struct xen_sysctl_xsplice_upload xen_sysctl_xsplice_upload_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_xsplice_upload_t); + +#define XEN_SYSCTL_XSPLICE_GET 1 +struct xen_sysctl_xsplice_summary { + char id[XEN_XSPLICE_ID_SIZE]; /* IN, name of the patch. */ +#define XSPLICE_STATUS_LOADED 0x01 +#define XSPLICE_STATUS_PROGRESS 0x02 +#define XSPLICE_STATUS_CHECKED 0x04 +#define XSPLICE_STATUS_APPLIED 0x08 +#define XSPLICE_STATUS_REVERTED 0x10 + /* Any negative value is an error. The error would be in -EXX format. */ + int32_t status; /* OUT, On IN has to be zero. */ +}; +typedef struct xen_sysctl_xsplice_summary xen_sysctl_xsplice_summary_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_xsplice_summary_t); + +#define XEN_SYSCTL_XSPLICE_LIST 2 +struct xen_sysctl_xsplice_list { + uint32_t version; /* OUT. */ + uint32_t idx; /* IN/OUT */ + uint32_t nr; /* IN/OUT */ + XEN_GUEST_HANDLE_64(xen_sysctl_xsplice_summary_t) summary; /* OUT */ +}; +typedef struct xen_sysctl_xsplice_list xen_sysctl_xsplice_list_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_xsplice_list_t); + +#define XEN_SYSCTL_XSPLICE_ACTION 3 +struct xen_sysctl_xsplice_action { + char id[XEN_XSPLICE_ID_SIZE]; /* IN, name of the patch. */ +#define XSPLICE_ACTION_CHECK 1 +#define XSPLICE_ACTION_UNLOAD 2 +#define XSPLICE_ACTION_REVERT 3 +#define XSPLICE_ACTION_APPLY 4 + uint32_t cmd; /* IN */ + uint64_aligned_t time; /* IN */ +}; +typedef struct xen_sysctl_xsplice_action xen_sysctl_xsplice_action_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_xsplice_action_t); + +struct xen_sysctl_xsplice_op { + uint32_t cmd; /* IN */ + union { + xen_sysctl_xsplice_upload_t upload; + xen_sysctl_xsplice_list_t list; + xen_sysctl_xsplice_summary_t get; + xen_sysctl_xsplice_action_t action; + } u; +}; +typedef struct xen_sysctl_xsplice_op xen_sysctl_xsplice_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_xsplice_op_t); + struct xen_sysctl { uint32_t cmd; #define XEN_SYSCTL_readconsole 1 @@ -734,6 +798,7 @@ struct xen_sysctl { #define XEN_SYSCTL_psr_cmt_op 21 #define XEN_SYSCTL_pcitopoinfo 22 #define XEN_SYSCTL_psr_cat_op 23 +#define XEN_SYSCTL_xsplice_op 24 uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */ union { struct xen_sysctl_readconsole readconsole; @@ -758,6 +823,7 @@ struct xen_sysctl { struct xen_sysctl_coverage_op coverage_op; struct xen_sysctl_psr_cmt_op psr_cmt_op; struct xen_sysctl_psr_cat_op psr_cat_op; + struct xen_sysctl_xsplice_op xsplice; uint8_t pad[128]; } u; }; diff --git a/xen/include/xen/xsplice.h b/xen/include/xen/xsplice.h new file mode 100644 index 0000000..41e28da --- /dev/null +++ b/xen/include/xen/xsplice.h @@ -0,0 +1,9 @@ +#ifndef __XEN_XSPLICE_H__ +#define __XEN_XSPLICE_H__ + +struct xen_sysctl_xsplice_op; +int xsplice_control(struct xen_sysctl_xsplice_op *); + +extern void xsplice_printall(unsigned char key); + +#endif /* __XEN_XSPLICE_H__ */ -- 1.8.4.2 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |