Hi Alex,
speaking with Ian about the dom0 kernel crash caused by using O_DIRECT
in QEMU, we came up with a simple workaround that should turn the crash
into a data corruption problem (same as native).
The idea is that when we balloon out pages, we replace the original page
with a mapping of a scrub page, so that if the network stack wants to
access an old grant that doesn't exist anymore, it should find a valid
page mapped there (the scrub page).
Could you please try the appended patch for Linux with QEMU that uses
O_DIRECT to open a file on NFS?
Thanks!
- Stefano
---
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 930fb68..0663fda 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -88,6 +88,7 @@ EXPORT_SYMBOL_GPL(balloon_stats);
/* We increase/decrease in batches which fit in a page */
static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)];
+static struct page* trade_page;
#ifdef CONFIG_HIGHMEM
#define inc_totalhigh_pages() (totalhigh_pages++)
@@ -423,7 +424,7 @@ static enum bp_state decrease_reservation(unsigned
long nr_pages, gfp_t gfp) if (xen_pv_domain() &&
!PageHighMem(page)) {
ret = HYPERVISOR_update_va_mapping(
(unsigned long)__va(pfn << PAGE_SHIFT),
- __pte_ma(0), 0);
+ pfn_pte(page_to_pfn(trade_page), PAGE_KERNEL),
0);
BUG_ON(ret);
}
#endif
@@ -436,7 +437,7 @@ static enum bp_state decrease_reservation(unsigned
long nr_pages, gfp_t gfp) /* No more mappings: invalidate P2M and add
to balloon. */
for (i = 0; i < nr_pages; i++) {
pfn = mfn_to_pfn(frame_list[i]);
- __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ __set_phys_to_machine(pfn, pfn_to_mfn(page_to_pfn(trade_page)));
balloon_append(pfn_to_page(pfn));
}
@@ -591,6 +592,10 @@ static int __init balloon_init(void)
if (!xen_domain())
return -ENODEV;
+ trade_page = alloc_page(GFP_KERNEL);
+ if (trade_page == NULL)
+ return -ENOMEM;
+
pr_info("xen/balloon: Initialising balloon driver.\n");
balloon_stats.current_pages = xen_pv_domain()