[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] linux-2.6.18/blktap: fix locking (again)
>>> On 23.12.11 at 12:33, "Jan Beulich" <JBeulich@xxxxxxxx> wrote: > c/s 1090 wasn't really helping much - I was doing that under the wrong > impression that the zap_pte() hook would be called with the mmap_sem > held. That being wrong, none of the uses of mmap_sem here actually are > write ones (they only look up vma information), so convert them all to > read acquires/releases. > > A new spinlock needs to be introduced, protecting idx_map. This has to > nest inside mmap_sem, which requires some code movement. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> > --- a/drivers/xen/blktap/blktap.c > +++ b/drivers/xen/blktap/blktap.c > @@ -113,6 +113,7 @@ typedef struct tap_blkif { > pid_t pid; /*tapdisk process id */ > enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace > shutdown */ > + spinlock_t map_lock; /*protects idx_map */ > struct idx_map { > u16 mem, req; > } *idx_map; /*Record the user ring id to kern > @@ -295,7 +296,7 @@ static pte_t blktap_clear_pte(struct vm_ > pte_t copy; > tap_blkif_t *info = NULL; > unsigned int seg, usr_idx, pending_idx, mmap_idx, count = 0; > - unsigned long offset, uvstart = 0; > + unsigned long offset; > struct page *pg; > struct grant_handle_pair *khandle; > struct gnttab_unmap_grant_ref unmap[2]; > @@ -304,25 +305,28 @@ static pte_t blktap_clear_pte(struct vm_ > * If the address is before the start of the grant mapped region or > * if vm_file is NULL (meaning mmap failed and we have nothing to do) > */ > - if (vma->vm_file != NULL) { > + if (vma->vm_file != NULL) > info = vma->vm_file->private_data; > - uvstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT); > - } > - if (vma->vm_file == NULL || uvaddr < uvstart) > + if (info == NULL || uvaddr < info->user_vstart) > return ptep_get_and_clear_full(vma->vm_mm, uvaddr, > ptep, is_fullmm); > > - /* TODO Should these be changed to if statements? */ > - BUG_ON(!info); > - BUG_ON(!info->idx_map); > - > - offset = (uvaddr - uvstart) >> PAGE_SHIFT; > + offset = (uvaddr - info->user_vstart) >> PAGE_SHIFT; > usr_idx = OFFSET_TO_USR_IDX(offset); > seg = OFFSET_TO_SEG(offset); > > + spin_lock(&info->map_lock); > + > pending_idx = info->idx_map[usr_idx].req; > mmap_idx = info->idx_map[usr_idx].mem; > > + /* fast_flush_area() may already have cleared this entry */ > + if (mmap_idx == INVALID_MIDX) { > + spin_unlock(&info->map_lock); > + return ptep_get_and_clear_full(vma->vm_mm, uvaddr, ptep, > + is_fullmm); > + } > + > pg = idx_to_page(mmap_idx, pending_idx, seg); > ClearPageReserved(pg); > info->foreign_map.map[offset + RING_PAGES] = NULL; > @@ -365,6 +369,8 @@ static pte_t blktap_clear_pte(struct vm_ > BUG(); > } > > + spin_unlock(&info->map_lock); > + > return copy; > } > > @@ -489,6 +495,7 @@ found: > } > > info->minor = minor; > + spin_lock_init(&info->map_lock); > /* > * Make sure that we have a minor before others can > * see us. > @@ -1029,25 +1036,30 @@ static void fast_flush_area(pending_req_ > unsigned int u_idx, tap_blkif_t *info) > { > struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; > - unsigned int i, mmap_idx, invcount = 0, locked = 0; > + unsigned int i, mmap_idx, invcount = 0; > struct grant_handle_pair *khandle; > uint64_t ptep; > int ret; > unsigned long uvaddr; > struct mm_struct *mm = info->mm; > > + if (mm != NULL) > + down_read(&mm->mmap_sem); > + > if (mm != NULL && xen_feature(XENFEAT_auto_translated_physmap)) { > - down_write(&mm->mmap_sem); > + slow: > blktap_zap_page_range(mm, > MMAP_VADDR(info->user_vstart, u_idx, 0), > req->nr_pages); > info->idx_map[u_idx].mem = INVALID_MIDX; > - up_write(&mm->mmap_sem); > + up_read(&mm->mmap_sem); > return; > } > > mmap_idx = req->mem_idx; > > + spin_lock(&info->map_lock); > + > for (i = 0; i < req->nr_pages; i++) { > uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i); > > @@ -1066,15 +1078,13 @@ static void fast_flush_area(pending_req_ > > if (mm != NULL && khandle->user != INVALID_GRANT_HANDLE) { > BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); > - if (!locked++) > - down_write(&mm->mmap_sem); > if (create_lookup_pte_addr( > mm, > MMAP_VADDR(info->user_vstart, u_idx, i), > &ptep) !=0) { > - up_write(&mm->mmap_sem); > + spin_unlock(&info->map_lock); > WPRINTK("Couldn't get a pte addr!\n"); > - return; > + goto slow; > } > > gnttab_set_unmap_op(&unmap[invcount], ptep, > @@ -1091,19 +1101,11 @@ static void fast_flush_area(pending_req_ > GNTTABOP_unmap_grant_ref, unmap, invcount); > BUG_ON(ret); > > - if (mm != NULL && !xen_feature(XENFEAT_auto_translated_physmap)) { > - if (!locked++) > - down_write(&mm->mmap_sem); > - blktap_zap_page_range(mm, > - MMAP_VADDR(info->user_vstart, u_idx, 0), > - req->nr_pages); > - } > - > - if (!locked && mm != NULL) > - down_write(&mm->mmap_sem); > info->idx_map[u_idx].mem = INVALID_MIDX; > + > + spin_unlock(&info->map_lock); > if (mm != NULL) > - up_write(&mm->mmap_sem); > + up_read(&mm->mmap_sem); > } > > /****************************************************************** > @@ -1406,7 +1408,9 @@ static void dispatch_rw_block_io(blkif_t > goto fail_response; > > /* Check we have space on user ring - should never fail. */ > + spin_lock(&info->map_lock); > usr_idx = GET_NEXT_REQ(info->idx_map); > + spin_unlock(&info->map_lock); > if (usr_idx >= MAX_PENDING_REQS) { > WARN_ON(1); > goto fail_response; > @@ -1445,7 +1449,7 @@ static void dispatch_rw_block_io(blkif_t > op = 0; > mm = info->mm; > if (!xen_feature(XENFEAT_auto_translated_physmap)) > - down_write(&mm->mmap_sem); > + down_read(&mm->mmap_sem); > for (i = 0; i < nseg; i++) { > unsigned long uvaddr; > unsigned long kvaddr; > @@ -1462,9 +1466,9 @@ static void dispatch_rw_block_io(blkif_t > /* Now map it to user. */ > ret = create_lookup_pte_addr(mm, uvaddr, &ptep); > if (ret) { > - up_write(&mm->mmap_sem); > + up_read(&mm->mmap_sem); > WPRINTK("Couldn't get a pte addr!\n"); > - goto fail_flush; > + goto fail_response; > } > > gnttab_set_map_op(&map[op], ptep, > @@ -1478,12 +1482,15 @@ static void dispatch_rw_block_io(blkif_t > req->seg[i].first_sect + 1); > } > > + if (xen_feature(XENFEAT_auto_translated_physmap)) > + down_read(&mm->mmap_sem); > + > + spin_lock(&info->map_lock); > + > ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op); > BUG_ON(ret); > > if (!xen_feature(XENFEAT_auto_translated_physmap)) { > - up_write(&mm->mmap_sem); > - > for (i = 0; i < (nseg*2); i+=2) { > unsigned long uvaddr; > unsigned long offset; > @@ -1548,18 +1555,18 @@ static void dispatch_rw_block_io(blkif_t > } > } > > + /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/ > + info->idx_map[usr_idx].mem = mmap_idx; > + info->idx_map[usr_idx].req = pending_idx; > + > + spin_unlock(&info->map_lock); > + > if (ret) > goto fail_flush; > > - if (xen_feature(XENFEAT_auto_translated_physmap)) > - down_write(&mm->mmap_sem); > - /* Mark mapped pages as reserved: */ > - for (i = 0; i < req->nr_segments; i++) { > - struct page *pg; > - > - pg = idx_to_page(mmap_idx, pending_idx, i); > - SetPageReserved(pg); > - if (xen_feature(XENFEAT_auto_translated_physmap)) { > + if (xen_feature(XENFEAT_auto_translated_physmap)) { > + for (i = 0; i < nseg; i++) { > + struct page *pg = idx_to_page(mmap_idx, pending_idx, i); > unsigned long uvaddr = MMAP_VADDR(info->user_vstart, > usr_idx, i); > if (vma && uvaddr >= vma->vm_end) { > @@ -1577,18 +1584,12 @@ static void dispatch_rw_block_io(blkif_t > continue; > } > ret = vm_insert_page(vma, uvaddr, pg); > - if (ret) { > - up_write(&mm->mmap_sem); > + if (ret) > goto fail_flush; > - } > } > } > - if (xen_feature(XENFEAT_auto_translated_physmap)) > - up_write(&mm->mmap_sem); > > - /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/ > - info->idx_map[usr_idx].mem = mmap_idx; > - info->idx_map[usr_idx].req = pending_idx; > + up_read(&mm->mmap_sem); > > blkif_get(blkif); > /* Finally, write the request message to the user ring. */ > @@ -1611,6 +1612,7 @@ static void dispatch_rw_block_io(blkif_t > return; > > fail_flush: > + up_read(&mm->mmap_sem); > WPRINTK("Reached Fail_flush\n"); > fast_flush_area(pending_req, pending_idx, usr_idx, info); > fail_response: _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |