[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [linux-2.6.18-xen] blktap: refine mm tracking
# HG changeset patch # User Jan Beulich <jbeulich@xxxxxxxx> # Date 1447677635 -3600 # Node ID a0a79976ffebcfa5ff55feb09f833fc1adbeb2e8 # Parent 3ae3ae4c878fe95de3f895110806a428e88e90b1 blktap: refine mm tracking As already noted in c/s 1013:eb21d96a6aae ("xen/blktap: fix cleanup after unclean application exit") and 1015:2a4b455b1fba ("blktap: fix cleanup after unclean application exit #2"), the extra reference obtained on tapdisk's mm (from c/s 867:978499ee4f39 ["linux/blktap: fix vma_close() for partial munmap"]) is problematic. However, while the two c/s fixed what they claim to, the reconnect case got broken (tap_blkif_schedule() clearing init->mm left no way for it to get set again). Do away with the extra reference: The mm of interest can't go away as long as we have a VMA in it. To take care of VMA splitting, track the amount of outstanding mapped space, and zap info->mm when that value drops to zero. At the same time also fix the oversight in the first of the mentioned c/s of not keeping ring_ok up to date: The zapping of info->mm without clearing info->ring_ok led to NULL pointer accesses in down_read() called from dispatch_rw_block_io(). We don't really need the extra flag, we can use info->mm for that purpose. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- diff -r 3ae3ae4c878f -r a0a79976ffeb drivers/xen/blktap/blktap.c --- a/drivers/xen/blktap/blktap.c Mon Nov 16 13:38:00 2015 +0100 +++ b/drivers/xen/blktap/blktap.c Mon Nov 16 13:40:35 2015 +0100 @@ -102,10 +102,10 @@ typedef struct domid_translate_ext { typedef struct tap_blkif { struct mm_struct *mm; /*User address space */ unsigned long rings_vstart; /*Kernel memory mapping */ + unsigned long rings_total; /*Kernel memory mapping size */ unsigned long user_vstart; /*User memory mapping */ unsigned long dev_inuse; /*One process opens device at a time. */ unsigned long dev_pending; /*In process of being opened */ - unsigned long ring_ok; /*make this ring->state */ blkif_front_ring_t ufe_ring; /*Rings up to user space. */ wait_queue_head_t wait; /*for poll */ unsigned long mode; /*current switching mode */ @@ -397,12 +397,22 @@ static void blktap_vma_open(struct vm_ar */ static void blktap_vma_close(struct vm_area_struct *vma) { + tap_blkif_t *info; struct vm_area_struct *next = vma->vm_next; + if (vma->vm_file == NULL) + return; + + info = vma->vm_file->private_data; + info->rings_total -= vma->vm_end - vma->vm_start; + if (info->rings_total == 0) { + info->mm = NULL; + return; + } + if (next == NULL || vma->vm_ops != next->vm_ops || vma->vm_end != next->vm_start || - vma->vm_file == NULL || vma->vm_file != next->vm_file) return; @@ -537,7 +547,6 @@ void signal_tapdisk(int idx) { tap_blkif_t *info; struct task_struct *ptask; - struct mm_struct *mm; /* * if the userland tools set things up wrong, this could be negative; @@ -556,10 +565,7 @@ void signal_tapdisk(int idx) info->status = CLEANSHUTDOWN; } info->blkif = NULL; - - mm = xchg(&info->mm, NULL); - if (mm) - mmput(mm); + info->mm = NULL; } static int blktap_open(struct inode *inode, struct file *filp) @@ -629,19 +635,16 @@ static int blktap_open(struct inode *ino static int blktap_release(struct inode *inode, struct file *filp) { tap_blkif_t *info = filp->private_data; - struct mm_struct *mm; /* check for control device */ if (!info) return 0; - info->ring_ok = 0; + info->mm = NULL; smp_wmb(); info->rings_vstart = 0; + info->rings_total = 0; - mm = xchg(&info->mm, NULL); - if (mm) - mmput(mm); kfree(info->foreign_maps->map); kfree(info->foreign_maps); info->foreign_maps = NULL; @@ -700,7 +703,7 @@ static int blktap_mmap(struct file *filp return -ENOMEM; } - if (info->rings_vstart) { + if (info->rings_total) { WPRINTK("mmap already called on filp %p (minor %d)\n", filp, info->minor); return -EPERM; @@ -718,6 +721,7 @@ static int blktap_mmap(struct file *filp size >>= PAGE_SHIFT; info->rings_vstart = vma->vm_start; + info->rings_total = vma->vm_end - vma->vm_start; info->user_vstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT); /* Map the ring pages to the start of the region and reserve it. */ @@ -754,15 +758,15 @@ static int blktap_mmap(struct file *filp vma->vm_mm->context.has_foreign_mappings = 1; #endif - info->mm = get_task_mm(current); smp_wmb(); - info->ring_ok = 1; + info->mm = vma->vm_mm; return 0; fail: /* Clear any active mappings. */ zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL); info->rings_vstart = 0; + info->rings_total = 0; return -ENOMEM; } @@ -1057,6 +1061,7 @@ static void fast_flush_area(pending_req_ unsigned long uvaddr; struct mm_struct *mm = info->mm; + smp_rmb(); if (mm != NULL) down_read(&mm->mmap_sem); @@ -1187,13 +1192,6 @@ int tap_blkif_schedule(void *arg) info = tapfds[blkif->dev_num]; blkif_put(blkif); - if (info) { - struct mm_struct *mm = xchg(&info->mm, NULL); - - if (mm) - mmput(mm); - } - return 0; } @@ -1474,11 +1472,12 @@ static void dispatch_rw_block_io(blkif_t } /* Make sure userspace is ready. */ - if (!info->ring_ok) { + mm = info->mm; + smp_rmb(); + if (!mm) { WPRINTK("ring not ready for requests!\n"); goto fail_response; } - smp_rmb(); if (RING_FULL(&info->ufe_ring)) { WPRINTK("fe_ring is full, " @@ -1496,7 +1495,6 @@ static void dispatch_rw_block_io(blkif_t if (req->operation == BLKIF_OP_WRITE) flags |= GNTMAP_readonly; op = 0; - mm = info->mm; if (!xen_feature(XENFEAT_auto_translated_physmap)) down_read(&mm->mmap_sem); for (i = 0; i < nseg; i++) { _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |