[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] PING? RE: [PATCH] tmem (hypervisor-side): ABI v1 to handle long object-ids (XEN-4.0-TESTING and XEN-UNSTABLE)



Just returned from a week of vacation.

I see these patches have not yet been applied to unstable for
4.0-testing, hypervisor or tools. Did I drop the ball on something?
(sorry if I am suffering from post-vacation amnesia).

Thanks,
Dan

> -----Original Message-----
> From: Dan Magenheimer
> Sent: Friday, September 03, 2010 9:48 AM
> To: Xen-Devel (xen-devel@xxxxxxxxxxxxxxxxxxx)
> Cc: Ian Jackson; Keir Fraser
> Subject: [PATCH] tmem (hypervisor-side): ABI v1 to handle long object-
> ids (XEN-4.0-TESTING and XEN-UNSTABLE)
> 
> [PATCH] tmem (hypervisor-side): move to new ABI version to handle
> long object-ids
> 
> Please apply patch to both xen-4.0-testing and xen-unstable
> (same patch applies cleanly to both).
> 
> (Note to Keir/Ian: These patches should be applied
> together, but I'm not clear on how to submit patches
> that cross MAINTAINERS boundaries as this one does.)
> 
> After a great deal of discussion and review with linux
> kernel developers, it appears there are "next-generation"
> filesystems (such as btrfs, xfs, Lustre) that will not
> be able to use tmem due to an ABI limitation... a field
> that represents a unique file identifier is 64-bits in
> the tmem ABI and may need to be as large as 192-bits.
> So to support these guest filesystems, the tmem ABI must be
> revised, from "v0" to "v1".
> 
> I *think* it is still the case that tmem is experimental
> and is not used anywhere yet in production.
> 
> The tmem ABI is designed to support multiple revisions,
> so the Xen tmem implementation could be updated to
> handle both v0 and v1.  However this is a bit
> messy and would require data structures for both v0
> and v1 to appear in public Xen header files.
> 
> I am inclined to update the Xen tmem implementation
> to only support v1 and gracefully fail v0.  This would
> result in only a performance loss (as if tmem were
> disabled) for newly launched tmem-v0-enabled guests,
> but live-migration between old tmem-v0 Xen and new
> tmem-v1 Xen machines would fail, and saved tmem-v0
> guests will not be able to be restored on a tmem-v1
> Xen machine.  I would plan to update both pre-4.0.2
> and unstable (future 4.1) to only support v1.
> 
> I believe these restrictions are reasonable at this
> point in the tmem lifecycle, though they may not
> be reasonable in the near future; should the tmem
> ABI need to be revised from v1 to v2, I understand
> backwards compatibility will be required.
> 
> Signed-off-by: Dan Magenheimer <dan.magenheimer@xxxxxxxxxx>
> 
> diff -r 07ac5459b250 xen/common/tmem.c
> --- a/xen/common/tmem.c       Wed Aug 25 09:23:31 2010 +0100
> +++ b/xen/common/tmem.c       Thu Sep 02 16:43:33 2010 -0600
> @@ -26,7 +26,7 @@
>  #define EXPORT /* indicates code other modules are dependent upon */
>  #define FORWARD
> 
> -#define TMEM_SPEC_VERSION 0
> +#define TMEM_SPEC_VERSION 1
> 
>  /************  INTERFACE TO TMEM HOST-DEPENDENT (tmh) CODE
> ************/
> 
> @@ -149,14 +149,13 @@ typedef struct share_list sharelist_t;
> 
>  #define OBJ_HASH_BUCKETS 256 /* must be power of two */
>  #define OBJ_HASH_BUCKETS_MASK (OBJ_HASH_BUCKETS-1)
> -#define OBJ_HASH(_oid) (tmh_hash(_oid, BITS_PER_LONG) &
> OBJ_HASH_BUCKETS_MASK)
> 
>  struct tm_pool {
>      bool_t shared;
>      bool_t persistent;
>      bool_t is_dying;
>      int pageshift; /* 0 == 2**12 */
> -    struct list_head pool_list; /* FIXME do we need this anymore? */
> +    struct list_head pool_list;
>      client_t *client;
>      uint64_t uuid[2]; /* 0 for private, non-zero for shared */
>      uint32_t pool_id;
> @@ -189,9 +188,14 @@ typedef struct tm_pool pool_t;
>  #define is_shared(_p)      (_p->shared)
>  #define is_private(_p)     (!(_p->shared))
> 
> +struct oid {
> +    uint64_t oid[3];
> +};
> +typedef struct oid OID;
> +
>  struct tmem_object_root {
>      DECL_SENTINEL
> -    uint64_t oid;
> +    OID oid;
>      struct rb_node rb_tree_node; /* protected by pool->pool_rwlock */
>      unsigned long objnode_count; /* atomicity depends on obj_spinlock
> */
>      long pgp_count; /* atomicity depends on obj_spinlock */
> @@ -217,12 +221,14 @@ struct tmem_page_descriptor {
>          struct list_head client_inv_pages;
>      };
>      union {
> -        struct list_head client_eph_pages;
> -        struct list_head pool_pers_pages;
> -    };
> -    union {
> -        obj_t *obj;
> -        uint64_t inv_oid;  /* used for invalid list only */
> +        struct {
> +            union {
> +                struct list_head client_eph_pages;
> +                struct list_head pool_pers_pages;
> +            };
> +            obj_t *obj;
> +        } us;
> +        OID inv_oid;  /* used for invalid list only */
>      };
>      pagesize_t size; /* 0 == PAGE_SIZE (pfp), -1 == data invalid,
>                      else compressed data (cdata) */
> @@ -467,9 +473,9 @@ static NOINLINE int pcd_associate(pgp_t
> 
>      if ( !tmh_dedup_enabled() )
>          return 0;
> -    ASSERT(pgp->obj != NULL);
> -    ASSERT(pgp->obj->pool != NULL);
> -    ASSERT(!pgp->obj->pool->persistent);
> +    ASSERT(pgp->us.obj != NULL);
> +    ASSERT(pgp->us.obj->pool != NULL);
> +    ASSERT(!pgp->us.obj->pool->persistent);
>      if ( cdata == NULL )
>      {
>          ASSERT(pgp->pfp != NULL);
> @@ -528,7 +534,7 @@ static NOINLINE int pcd_associate(pgp_t
>              /* match! if not compressed, free the no-longer-needed
> page */
>              /* but if compressed, data is assumed static so don't
> free! */
>              if ( cdata == NULL )
> -                tmem_page_free(pgp->obj->pool,pgp->pfp);
> +                tmem_page_free(pgp->us.obj->pool,pgp->pfp);
>              deduped_puts++;
>              goto match;
>          }
> @@ -540,7 +546,7 @@ static NOINLINE int pcd_associate(pgp_t
>          ret = -ENOMEM;
>          goto unlock;
>      } else if ( cdata != NULL ) {
> -        if ( (pcd->cdata = tmem_malloc_bytes(csize,pgp->obj->pool)) ==
> NULL )
> +        if ( (pcd->cdata = tmem_malloc_bytes(csize,pgp->us.obj->pool))
> == NULL )
>          {
>              tmem_free(pcd,sizeof(pcd_t),NULL);
>              ret = -ENOMEM;
> @@ -561,11 +567,11 @@ static NOINLINE int pcd_associate(pgp_t
>          pcd->size = 0;
>          pcd->tze = NULL;
>      } else if ( pfp_size < PAGE_SIZE &&
> -         ((pcd->tze = tmem_malloc_bytes(pfp_size,pgp->obj->pool)) !=
> NULL) ) {
> +         ((pcd->tze = tmem_malloc_bytes(pfp_size,pgp->us.obj->pool))
> != NULL) ) {
>          tmh_tze_copy_from_pfp(pcd->tze,pgp->pfp,pfp_size);
>          pcd->size = pfp_size;
>          pcd_tot_tze_size += pfp_size;
> -        tmem_page_free(pgp->obj->pool,pgp->pfp);
> +        tmem_page_free(pgp->us.obj->pool,pgp->pfp);
>      } else {
>          pcd->pfp = pgp->pfp;
>          pcd->size = PAGE_SIZE;
> @@ -602,9 +608,9 @@ static NOINLINE pgp_t *pgp_alloc(obj_t *
>      pool = obj->pool;
>      if ( (pgp = tmem_malloc(pgp_t, pool)) == NULL )
>          return NULL;
> -    pgp->obj = obj;
> +    pgp->us.obj = obj;
>      INIT_LIST_HEAD(&pgp->global_eph_pages);
> -    INIT_LIST_HEAD(&pgp->client_eph_pages);
> +    INIT_LIST_HEAD(&pgp->us.client_eph_pages);
>      pgp->pfp = NULL;
>      if ( tmh_dedup_enabled() )
>      {
> @@ -642,7 +648,7 @@ static NOINLINE void pgp_free_data(pgp_t
>      else if ( pgp_size )
>          tmem_free(pgp->cdata,pgp_size,pool);
>      else
> -        tmem_page_free(pgp->obj->pool,pgp->pfp);
> +        tmem_page_free(pgp->us.obj->pool,pgp->pfp);
>      if ( pool != NULL && pgp_size )
>      {
>          pool->client->compressed_pages--;
> @@ -657,18 +663,18 @@ static NOINLINE void pgp_free(pgp_t *pgp
>      pool_t *pool = NULL;
> 
>      ASSERT_SENTINEL(pgp,PGD);
> -    ASSERT(pgp->obj != NULL);
> -    ASSERT_SENTINEL(pgp->obj,OBJ);
> -    ASSERT_SENTINEL(pgp->obj->pool,POOL);
> -    ASSERT(pgp->obj->pool->client != NULL);
> +    ASSERT(pgp->us.obj != NULL);
> +    ASSERT_SENTINEL(pgp->us.obj,OBJ);
> +    ASSERT_SENTINEL(pgp->us.obj->pool,POOL);
> +    ASSERT(pgp->us.obj->pool->client != NULL);
>      if ( from_delete )
> -        ASSERT(pgp_lookup_in_obj(pgp->obj,pgp->index) == NULL);
> -    ASSERT(pgp->obj->pool != NULL);
> -    pool = pgp->obj->pool;
> +        ASSERT(pgp_lookup_in_obj(pgp->us.obj,pgp->index) == NULL);
> +    ASSERT(pgp->us.obj->pool != NULL);
> +    pool = pgp->us.obj->pool;
>      if ( is_ephemeral(pool) )
>      {
>          ASSERT(list_empty(&pgp->global_eph_pages));
> -        ASSERT(list_empty(&pgp->client_eph_pages));
> +        ASSERT(list_empty(&pgp->us.client_eph_pages));
>      }
>      pgp_free_data(pgp, pool);
>      atomic_dec_and_assert(global_pgp_count);
> @@ -676,12 +682,12 @@ static NOINLINE void pgp_free(pgp_t *pgp
>      pgp->size = -1;
>      if ( is_persistent(pool) && pool->client->live_migrating )
>      {
> -        pgp->inv_oid = pgp->obj->oid;
> +        pgp->inv_oid = pgp->us.obj->oid;
>          pgp->pool_id = pool->pool_id;
>          return;
>      }
>      INVERT_SENTINEL(pgp,PGD);
> -    pgp->obj = NULL;
> +    pgp->us.obj = NULL;
>      pgp->index = -1;
>      tmem_free(pgp,sizeof(pgp_t),pool);
>  }
> @@ -693,7 +699,7 @@ static NOINLINE void pgp_free_from_inv_l
>      ASSERT_SENTINEL(pool,POOL);
>      ASSERT_SENTINEL(pgp,PGD);
>      INVERT_SENTINEL(pgp,PGD);
> -    pgp->obj = NULL;
> +    pgp->us.obj = NULL;
>      pgp->index = -1;
>      tmem_free(pgp,sizeof(pgp_t),pool);
>  }
> @@ -704,18 +710,18 @@ static void pgp_delist(pgp_t *pgp, bool_
>      client_t *client;
> 
>      ASSERT(pgp != NULL);
> -    ASSERT(pgp->obj != NULL);
> -    ASSERT(pgp->obj->pool != NULL);
> -    client = pgp->obj->pool->client;
> +    ASSERT(pgp->us.obj != NULL);
> +    ASSERT(pgp->us.obj->pool != NULL);
> +    client = pgp->us.obj->pool->client;
>      ASSERT(client != NULL);
> -    if ( is_ephemeral(pgp->obj->pool) )
> +    if ( is_ephemeral(pgp->us.obj->pool) )
>      {
>          if ( !no_eph_lock )
>              tmem_spin_lock(&eph_lists_spinlock);
> -        if ( !list_empty(&pgp->client_eph_pages) )
> +        if ( !list_empty(&pgp->us.client_eph_pages) )
>              client->eph_count--;
>          ASSERT(client->eph_count >= 0);
> -        list_del_init(&pgp->client_eph_pages);
> +        list_del_init(&pgp->us.client_eph_pages);
>          if ( !list_empty(&pgp->global_eph_pages) )
>              global_eph_count--;
>          ASSERT(global_eph_count >= 0);
> @@ -728,12 +734,12 @@ static void pgp_delist(pgp_t *pgp, bool_
>              tmem_spin_lock(&pers_lists_spinlock);
>              list_add_tail(&pgp->client_inv_pages,
>                            &client->persistent_invalidated_list);
> -            if ( pgp != pgp->obj->pool->cur_pgp )
> -                list_del_init(&pgp->pool_pers_pages);
> +            if ( pgp != pgp->us.obj->pool->cur_pgp )
> +                list_del_init(&pgp->us.pool_pers_pages);
>              tmem_spin_unlock(&pers_lists_spinlock);
>          } else {
>              tmem_spin_lock(&pers_lists_spinlock);
> -            list_del_init(&pgp->pool_pers_pages);
> +            list_del_init(&pgp->us.pool_pers_pages);
>              tmem_spin_unlock(&pers_lists_spinlock);
>          }
>      }
> @@ -745,10 +751,10 @@ static NOINLINE void pgp_delete(pgp_t *p
>      uint64_t life;
> 
>      ASSERT(pgp != NULL);
> -    ASSERT(pgp->obj != NULL);
> -    ASSERT(pgp->obj->pool != NULL);
> +    ASSERT(pgp->us.obj != NULL);
> +    ASSERT(pgp->us.obj->pool != NULL);
>      life = get_cycles() - pgp->timestamp;
> -    pgp->obj->pool->sum_life_cycles += life;
> +    pgp->us.obj->pool->sum_life_cycles += life;
>      pgp_delist(pgp, no_eph_lock);
>      pgp_free(pgp,1);
>  }
> @@ -758,11 +764,11 @@ static NOINLINE void pgp_destroy(void *v
>  {
>      pgp_t *pgp = (pgp_t *)v;
> 
> -    ASSERT_SPINLOCK(&pgp->obj->obj_spinlock);
> +    ASSERT_SPINLOCK(&pgp->us.obj->obj_spinlock);
>      pgp_delist(pgp,0);
> -    ASSERT(pgp->obj != NULL);
> -    pgp->obj->pgp_count--;
> -    ASSERT(pgp->obj->pgp_count >= 0);
> +    ASSERT(pgp->us.obj != NULL);
> +    pgp->us.obj->pgp_count--;
> +    ASSERT(pgp->us.obj->pgp_count >= 0);
>      pgp_free(pgp,0);
>  }
> 
> @@ -849,37 +855,74 @@ static void rtn_free(rtn_t *rtn)
> 
>  /************ POOL OBJECT COLLECTION MANIPULATION ROUTINES
> *******************/
> 
> +int oid_compare(OID *left, OID *right)
> +{
> +    if ( left->oid[2] == right->oid[2] )
> +    {
> +        if ( left->oid[1] == right->oid[1] )
> +        {
> +            if ( left->oid[0] == right->oid[0] )
> +                return 0;
> +            else if ( left->oid[0] < left->oid[0] )
> +                return -1;
> +            else
> +                return 1;
> +        }
> +        else if ( left->oid[1] < left->oid[1] )
> +            return -1;
> +        else
> +            return 1;
> +    }
> +    else if ( left->oid[2] < left->oid[2] )
> +        return -1;
> +    else
> +        return 1;
> +}
> +
> +void oid_set_invalid(OID *oidp)
> +{
> +    oidp->oid[0] = oidp->oid[1] = oidp->oid[2] = -1UL;
> +}
> +
> +unsigned oid_hash(OID *oidp)
> +{
> +    return (tmh_hash(oidp->oid[0] ^ oidp->oid[1] ^ oidp->oid[2],
> +                     BITS_PER_LONG) & OBJ_HASH_BUCKETS_MASK);
> +}
> +
>  /* searches for object==oid in pool, returns locked object if found */
> -static NOINLINE obj_t * obj_find(pool_t *pool, uint64_t oid)
> +static NOINLINE obj_t * obj_find(pool_t *pool, OID *oidp)
>  {
>      struct rb_node *node;
>      obj_t *obj;
> 
>  restart_find:
>      tmem_read_lock(&pool->pool_rwlock);
> -    node = pool->obj_rb_root[OBJ_HASH(oid)].rb_node;
> +    node = pool->obj_rb_root[oid_hash(oidp)].rb_node;
>      while ( node )
>      {
>          obj = container_of(node, obj_t, rb_tree_node);
> -        if ( obj->oid == oid )
> +        switch ( oid_compare(&obj->oid, oidp) )
>          {
> -            if ( tmh_lock_all )
> -                obj->no_evict = 1;
> -            else
> -            {
> -                if ( !tmem_spin_trylock(&obj->obj_spinlock) )
> +            case 0: /* equal */
> +                if ( tmh_lock_all )
> +                    obj->no_evict = 1;
> +                else
>                  {
> +                    if ( !tmem_spin_trylock(&obj->obj_spinlock) )
> +                    {
> +                        tmem_read_unlock(&pool->pool_rwlock);
> +                        goto restart_find;
> +                    }
>                      tmem_read_unlock(&pool->pool_rwlock);
> -                    goto restart_find;
>                  }
> -                tmem_read_unlock(&pool->pool_rwlock);
> -            }
> -            return obj;
> +                return obj;
> +            case -1:
> +                node = node->rb_left;
> +                break;
> +            case 1:
> +                node = node->rb_right;
>          }
> -        else if ( oid < obj->oid )
> -            node = node->rb_left;
> -        else
> -            node = node->rb_right;
>      }
>      tmem_read_unlock(&pool->pool_rwlock);
>      return NULL;
> @@ -889,7 +932,7 @@ static NOINLINE void obj_free(obj_t *obj
>  static NOINLINE void obj_free(obj_t *obj, int no_rebalance)
>  {
>      pool_t *pool;
> -    uint64_t old_oid;
> +    OID old_oid;
> 
>      ASSERT_SPINLOCK(&obj->obj_spinlock);
>      ASSERT(obj != NULL);
> @@ -908,12 +951,12 @@ static NOINLINE void obj_free(obj_t *obj
>      INVERT_SENTINEL(obj,OBJ);
>      obj->pool = NULL;
>      old_oid = obj->oid;
> -    obj->oid = -1;
> +    oid_set_invalid(&obj->oid);
>      obj->last_client = CLI_ID_NULL;
>      atomic_dec_and_assert(global_obj_count);
>      /* use no_rebalance only if all objects are being destroyed anyway
> */
>      if ( !no_rebalance )
> -        rb_erase(&obj->rb_tree_node,&pool-
> >obj_rb_root[OBJ_HASH(old_oid)]);
> +        rb_erase(&obj->rb_tree_node,&pool-
> >obj_rb_root[oid_hash(&old_oid)]);
>      tmem_free(obj,sizeof(obj_t),pool);
>  }
> 
> @@ -927,12 +970,17 @@ static NOINLINE int obj_rb_insert(struct
>      {
>          this = container_of(*new, obj_t, rb_tree_node);
>          parent = *new;
> -        if ( obj->oid < this->oid )
> -            new = &((*new)->rb_left);
> -        else if ( obj->oid > this->oid )
> -            new = &((*new)->rb_right);
> -        else
> -            return 0;
> +        switch ( oid_compare(&obj->oid, &this->oid) )
> +        {
> +            case 0:
> +                return 0;
> +            case -1:
> +                new = &((*new)->rb_left);
> +                break;
> +            case 1:
> +                new = &((*new)->rb_right);
> +                break;
> +        }
>      }
>      rb_link_node(&obj->rb_tree_node, parent, new);
>      rb_insert_color(&obj->rb_tree_node, root);
> @@ -943,7 +991,7 @@ static NOINLINE int obj_rb_insert(struct
>   * allocate, initialize, and insert an tmem_object_root
>   * (should be called only if find failed)
>   */
> -static NOINLINE obj_t * obj_new(pool_t *pool, uint64_t oid)
> +static NOINLINE obj_t * obj_new(pool_t *pool, OID *oidp)
>  {
>      obj_t *obj;
> 
> @@ -958,13 +1006,13 @@ static NOINLINE obj_t * obj_new(pool_t *
>      INIT_RADIX_TREE(&obj->tree_root,0);
>      spin_lock_init(&obj->obj_spinlock);
>      obj->pool = pool;
> -    obj->oid = oid;
> +    obj->oid = *oidp;
>      obj->objnode_count = 0;
>      obj->pgp_count = 0;
>      obj->last_client = CLI_ID_NULL;
>      SET_SENTINEL(obj,OBJ);
>      tmem_spin_lock(&obj->obj_spinlock);
> -    obj_rb_insert(&pool->obj_rb_root[OBJ_HASH(oid)], obj);
> +    obj_rb_insert(&pool->obj_rb_root[oid_hash(oidp)], obj);
>      obj->no_evict = 1;
>      ASSERT_SPINLOCK(&obj->obj_spinlock);
>      return obj;
> @@ -1256,7 +1304,7 @@ static void client_freeze(client_t *clie
> 
>  static bool_t tmem_try_to_evict_pgp(pgp_t *pgp, bool_t
> *hold_pool_rwlock)
>  {
> -    obj_t *obj = pgp->obj;
> +    obj_t *obj = pgp->us.obj;
>      pool_t *pool = obj->pool;
>      client_t *client = pool->client;
>      uint16_t firstbyte = pgp->firstbyte;
> @@ -1280,8 +1328,8 @@ static bool_t tmem_try_to_evict_pgp(pgp_
>                  pgp->eviction_attempted++;
>                  list_del(&pgp->global_eph_pages);
>                  list_add_tail(&pgp-
> >global_eph_pages,&global_ephemeral_page_list);
> -                list_del(&pgp->client_eph_pages);
> -                list_add_tail(&pgp->client_eph_pages,&client-
> >ephemeral_page_list);
> +                list_del(&pgp->us.client_eph_pages);
> +                list_add_tail(&pgp->us.client_eph_pages,&client-
> >ephemeral_page_list);
>                  goto pcd_unlock;
>              }
>          }
> @@ -1314,7 +1362,7 @@ static int tmem_evict(void)
>      if ( (client != NULL) && client_over_quota(client) &&
>           !list_empty(&client->ephemeral_page_list) )
>      {
> -        list_for_each_entry_safe(pgp,pgp2,&client-
> >ephemeral_page_list,client_eph_pages)
> +        list_for_each_entry_safe(pgp,pgp2,&client-
> >ephemeral_page_list,us.client_eph_pages)
>              if ( tmem_try_to_evict_pgp(pgp,&hold_pool_rwlock) )
>                  goto found;
>      } else if ( list_empty(&global_ephemeral_page_list) ) {
> @@ -1331,7 +1379,7 @@ found:
>  found:
>      ASSERT(pgp != NULL);
>      ASSERT_SENTINEL(pgp,PGD);
> -    obj = pgp->obj;
> +    obj = pgp->us.obj;
>      ASSERT(obj != NULL);
>      ASSERT(obj->no_evict == 0);
>      ASSERT(obj->pool != NULL);
> @@ -1407,16 +1455,16 @@ static NOINLINE int do_tmem_put_compress
>      DECL_LOCAL_CYC_COUNTER(compress);
> 
>      ASSERT(pgp != NULL);
> -    ASSERT(pgp->obj != NULL);
> -    ASSERT_SPINLOCK(&pgp->obj->obj_spinlock);
> -    ASSERT(pgp->obj->pool != NULL);
> -    ASSERT(pgp->obj->pool->client != NULL);
> +    ASSERT(pgp->us.obj != NULL);
> +    ASSERT_SPINLOCK(&pgp->us.obj->obj_spinlock);
> +    ASSERT(pgp->us.obj->pool != NULL);
> +    ASSERT(pgp->us.obj->pool->client != NULL);
>  #ifdef __i386__
>      return -ENOMEM;
>  #endif
> 
>      if ( pgp->pfp != NULL )
> -        pgp_free_data(pgp, pgp->obj->pool);
> +        pgp_free_data(pgp, pgp->us.obj->pool);
>      START_CYC_COUNTER(compress);
>      ret = tmh_compress_from_client(cmfn, &dst, &size, cva);
>      if ( (ret == -EFAULT) || (ret == 0) )
> @@ -1424,10 +1472,10 @@ static NOINLINE int do_tmem_put_compress
>      else if ( (size == 0) || (size >= tmem_subpage_maxsize()) ) {
>          ret = 0;
>          goto out;
> -    } else if ( tmh_dedup_enabled() && !is_persistent(pgp->obj->pool)
> ) {
> +    } else if ( tmh_dedup_enabled() && !is_persistent(pgp->us.obj-
> >pool) ) {
>          if ( (ret = pcd_associate(pgp,dst,size)) == -ENOMEM )
>              goto out;
> -    } else if ( (p = tmem_malloc_bytes(size,pgp->obj->pool)) == NULL )
> {
> +    } else if ( (p = tmem_malloc_bytes(size,pgp->us.obj->pool)) ==
> NULL ) {
>          ret = -ENOMEM;
>          goto out;
>      } else {
> @@ -1435,8 +1483,8 @@ static NOINLINE int do_tmem_put_compress
>          pgp->cdata = p;
>      }
>      pgp->size = size;
> -    pgp->obj->pool->client->compressed_pages++;
> -    pgp->obj->pool->client->compressed_sum_size += size;
> +    pgp->us.obj->pool->client->compressed_pages++;
> +    pgp->us.obj->pool->client->compressed_sum_size += size;
>      ret = 1;
> 
>  out:
> @@ -1456,7 +1504,7 @@ static NOINLINE int do_tmem_dup_put(pgp_
>      ASSERT(pgp != NULL);
>      ASSERT(pgp->pfp != NULL);
>      ASSERT(pgp->size != -1);
> -    obj = pgp->obj;
> +    obj = pgp->us.obj;
>      ASSERT_SPINLOCK(&obj->obj_spinlock);
>      ASSERT(obj != NULL);
>      pool = obj->pool;
> @@ -1535,7 +1583,7 @@ cleanup:
> 
> 
>  static NOINLINE int do_tmem_put(pool_t *pool,
> -              uint64_t oid, uint32_t index,
> +              OID *oidp, uint32_t index,
>                tmem_cli_mfn_t cmfn, pagesize_t tmem_offset,
>                pagesize_t pfn_offset, pagesize_t len, void *cva)
>  {
> @@ -1547,7 +1595,7 @@ static NOINLINE int do_tmem_put(pool_t *
>      ASSERT(pool != NULL);
>      pool->puts++;
>      /* does page already exist (dup)?  if so, handle specially */
> -    if ( (obj = objfound = obj_find(pool,oid)) != NULL )
> +    if ( (obj = objfound = obj_find(pool,oidp)) != NULL )
>      {
>          ASSERT_SPINLOCK(&objfound->obj_spinlock);
>          if ((pgp = pgp_lookup_in_obj(objfound, index)) != NULL)
> @@ -1561,7 +1609,7 @@ static NOINLINE int do_tmem_put(pool_t *
>      if ( (objfound == NULL) )
>      {
>          tmem_write_lock(&pool->pool_rwlock);
> -        if ( (obj = objnew = obj_new(pool,oid)) == NULL )
> +        if ( (obj = objnew = obj_new(pool,oidp)) == NULL )
>          {
>              tmem_write_unlock(&pool->pool_rwlock);
>              return -ENOMEM;
> @@ -1627,14 +1675,14 @@ insert_page:
>              &global_ephemeral_page_list);
>          if (++global_eph_count > global_eph_count_max)
>              global_eph_count_max = global_eph_count;
> -        list_add_tail(&pgp->client_eph_pages,
> +        list_add_tail(&pgp->us.client_eph_pages,
>              &client->ephemeral_page_list);
>          if (++client->eph_count > client->eph_count_max)
>              client->eph_count_max = client->eph_count;
>          tmem_spin_unlock(&eph_lists_spinlock);
>      } else { /* is_persistent */
>          tmem_spin_lock(&pers_lists_spinlock);
> -        list_add_tail(&pgp->pool_pers_pages,
> +        list_add_tail(&pgp->us.pool_pers_pages,
>              &pool->persistent_page_list);
>          tmem_spin_unlock(&pers_lists_spinlock);
>      }
> @@ -1678,7 +1726,7 @@ free:
>      return ret;
>  }
> 
> -static NOINLINE int do_tmem_get(pool_t *pool, uint64_t oid, uint32_t
> index,
> +static NOINLINE int do_tmem_get(pool_t *pool, OID *oidp, uint32_t
> index,
>                tmem_cli_mfn_t cmfn, pagesize_t tmem_offset,
>                pagesize_t pfn_offset, pagesize_t len, void *cva)
>  {
> @@ -1691,7 +1739,7 @@ static NOINLINE int do_tmem_get(pool_t *
>          return -EEMPTY;
> 
>      pool->gets++;
> -    obj = obj_find(pool,oid);
> +    obj = obj_find(pool,oidp);
>      if ( obj == NULL )
>          return 0;
> 
> @@ -1737,8 +1785,8 @@ static NOINLINE int do_tmem_get(pool_t *
>              tmem_spin_lock(&eph_lists_spinlock);
>              list_del(&pgp->global_eph_pages);
>              list_add_tail(&pgp-
> >global_eph_pages,&global_ephemeral_page_list);
> -            list_del(&pgp->client_eph_pages);
> -            list_add_tail(&pgp->client_eph_pages,&client-
> >ephemeral_page_list);
> +            list_del(&pgp->us.client_eph_pages);
> +            list_add_tail(&pgp->us.client_eph_pages,&client-
> >ephemeral_page_list);
>              tmem_spin_unlock(&eph_lists_spinlock);
>              ASSERT(obj != NULL);
>              obj->last_client = tmh_get_cli_id_from_current();
> @@ -1763,13 +1811,13 @@ bad_copy:
> 
>  }
> 
> -static NOINLINE int do_tmem_flush_page(pool_t *pool, uint64_t oid,
> uint32_t index)
> +static NOINLINE int do_tmem_flush_page(pool_t *pool, OID *oidp,
> uint32_t index)
>  {
>      obj_t *obj;
>      pgp_t *pgp;
> 
>      pool->flushs++;
> -    obj = obj_find(pool,oid);
> +    obj = obj_find(pool,oidp);
>      if ( obj == NULL )
>          goto out;
>      pgp = pgp_delete_from_obj(obj, index);
> @@ -1798,12 +1846,12 @@ out:
>          return 1;
>  }
> 
> -static NOINLINE int do_tmem_flush_object(pool_t *pool, uint64_t oid)
> +static NOINLINE int do_tmem_flush_object(pool_t *pool, OID *oidp)
>  {
>      obj_t *obj;
> 
>      pool->flush_objs++;
> -    obj = obj_find(pool,oid);
> +    obj = obj_find(pool,oidp);
>      if ( obj == NULL )
>          goto out;
>      tmem_write_lock(&pool->pool_rwlock);
> @@ -1863,6 +1911,16 @@ static NOINLINE int do_tmem_new_pool(cli
>      if ( pagebits != (PAGE_SHIFT - 12) )
>      {
>          printk("failed... unsupported pagesize
> %d\n",1<<(pagebits+12));
> +        return -EPERM;
> +    }
> +    if ( flags & TMEM_POOL_PRECOMPRESSED )
> +    {
> +        printk("failed... precompression flag set but unsupported\n");
> +        return -EPERM;
> +    }
> +    if ( flags & TMEM_POOL_RESERVED_BITS )
> +    {
> +        printk("failed... reserved bits must be zero\n");
>          return -EPERM;
>      }
>      if ( (pool = pool_alloc()) == NULL )
> @@ -2369,6 +2427,7 @@ static NOINLINE int tmemc_save_get_next_
>      pool_t *pool = (client == NULL || pool_id >= MAX_POOLS_PER_DOMAIN)
>                     ? NULL : client->pools[pool_id];
>      pgp_t *pgp;
> +    OID oid;
>      int ret = 0;
>      struct tmem_handle *h;
>      unsigned int pagesize = 1 << (pool->pageshift+12);
> @@ -2389,22 +2448,23 @@ static NOINLINE int tmemc_save_get_next_
>      {
>          /* process the first one */
>          pool->cur_pgp = pgp = list_entry((&pool-
> >persistent_page_list)->next,
> -                         pgp_t,pool_pers_pages);
> -    } else if ( list_is_last(&pool->cur_pgp->pool_pers_pages,
> +                         pgp_t,us.pool_pers_pages);
> +    } else if ( list_is_last(&pool->cur_pgp->us.pool_pers_pages,
>                               &pool->persistent_page_list) )
>      {
>          /* already processed the last one in the list */
>          ret = -1;
>          goto out;
>      }
> -    pgp = list_entry((&pool->cur_pgp->pool_pers_pages)->next,
> -                         pgp_t,pool_pers_pages);
> +    pgp = list_entry((&pool->cur_pgp->us.pool_pers_pages)->next,
> +                         pgp_t,us.pool_pers_pages);
>      pool->cur_pgp = pgp;
> +    oid = pgp->us.obj->oid;
>      h = (struct tmem_handle *)buf.p;
> -    h->oid = pgp->obj->oid;
> +    *(OID *)&h->oid[0] = oid;
>      h->index = pgp->index;
>      buf.p = (void *)(h+1);
> -    ret = do_tmem_get(pool, h->oid, h->index,0,0,0,pagesize,buf.p);
> +    ret = do_tmem_get(pool, &oid, h->index,0,0,0,pagesize,buf.p);
> 
>  out:
>      tmem_spin_unlock(&pers_lists_spinlock);
> @@ -2444,7 +2504,7 @@ static NOINLINE int tmemc_save_get_next_
>      }
>      h = (struct tmem_handle *)buf.p;
>      h->pool_id = pgp->pool_id;
> -    h->oid = pgp->inv_oid;
> +    *(OID *)&h->oid = pgp->inv_oid;
>      h->index = pgp->index;
>      ret = 1;
>  out:
> @@ -2452,7 +2512,7 @@ out:
>      return ret;
>  }
> 
> -static int tmemc_restore_put_page(int cli_id, int pool_id, uint64_t
> oid,
> +static int tmemc_restore_put_page(int cli_id, int pool_id, OID *oidp,
>                        uint32_t index, tmem_cli_va_t buf, uint32_t
> bufsize)
>  {
>      client_t *client = tmh_client_from_cli_id(cli_id);
> @@ -2461,10 +2521,10 @@ static int tmemc_restore_put_page(int cl
> 
>      if ( pool == NULL )
>          return -1;
> -    return do_tmem_put(pool,oid,index,0,0,0,bufsize,buf.p);
> +    return do_tmem_put(pool,oidp,index,0,0,0,bufsize,buf.p);
>  }
> 
> -static int tmemc_restore_flush_page(int cli_id, int pool_id, uint64_t
> oid,
> +static int tmemc_restore_flush_page(int cli_id, int pool_id, OID
> *oidp,
>                          uint32_t index)
>  {
>      client_t *client = tmh_client_from_cli_id(cli_id);
> @@ -2473,7 +2533,7 @@ static int tmemc_restore_flush_page(int
> 
>      if ( pool == NULL )
>          return -1;
> -    return do_tmem_flush_page(pool,oid,index);
> +    return do_tmem_flush_page(pool,oidp,index);
>  }
> 
>  static NOINLINE int do_tmem_control(struct tmem_op *op)
> @@ -2481,6 +2541,7 @@ static NOINLINE int do_tmem_control(stru
>      int ret;
>      uint32_t pool_id = op->pool_id;
>      uint32_t subop = op->u.ctrl.subop;
> +    OID *oidp = (OID *)(&op->u.ctrl.oid[0]);
> 
>      if (!tmh_current_is_privileged())
>      {
> @@ -2533,12 +2594,12 @@ static NOINLINE int do_tmem_control(stru
>          break;
>      case TMEMC_RESTORE_PUT_PAGE:
>          ret = tmemc_restore_put_page(op->u.ctrl.cli_id,pool_id,
> -                                     op->u.ctrl.arg3, op->u.ctrl.arg2,
> +                                     oidp, op->u.ctrl.arg2,
>                                       op->u.ctrl.buf, op->u.ctrl.arg1);
>          break;
>      case TMEMC_RESTORE_FLUSH_PAGE:
>          ret = tmemc_restore_flush_page(op->u.ctrl.cli_id,pool_id,
> -                                       op->u.ctrl.arg3, op-
> >u.ctrl.arg2);
> +                                       oidp, op->u.ctrl.arg2);
>          break;
>      default:
>          ret = -1;
> @@ -2553,6 +2614,7 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
>      struct tmem_op op;
>      client_t *client = tmh_client_from_current();
>      pool_t *pool = NULL;
> +    OID *oidp;
>      int rc = 0;
>      bool_t succ_get = 0, succ_put = 0;
>      bool_t non_succ_get = 0, non_succ_put = 0;
> @@ -2656,6 +2718,7 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
>          ASSERT_SENTINEL(pool,POOL);
>      }
> 
> +    oidp = (OID *)&op.u.gen.oid[0];
>      switch ( op.cmd )
>      {
>      case TMEM_NEW_POOL:
> @@ -2664,28 +2727,28 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
>          break;
>      case TMEM_NEW_PAGE:
>          tmem_ensure_avail_pages();
> -        rc = do_tmem_put(pool, op.u.gen.object,
> +        rc = do_tmem_put(pool, oidp,
>                           op.u.gen.index, op.u.gen.cmfn, 0, 0, 0,
> NULL);
>          break;
>      case TMEM_PUT_PAGE:
>          tmem_ensure_avail_pages();
> -        rc = do_tmem_put(pool, op.u.gen.object,
> +        rc = do_tmem_put(pool, oidp,
>                      op.u.gen.index, op.u.gen.cmfn, 0, 0, PAGE_SIZE,
> NULL);
>          if (rc == 1) succ_put = 1;
>          else non_succ_put = 1;
>          break;
>      case TMEM_GET_PAGE:
> -        rc = do_tmem_get(pool, op.u.gen.object, op.u.gen.index,
> op.u.gen.cmfn,
> +        rc = do_tmem_get(pool, oidp, op.u.gen.index, op.u.gen.cmfn,
>                           0, 0, PAGE_SIZE, 0);
>          if (rc == 1) succ_get = 1;
>          else non_succ_get = 1;
>          break;
>      case TMEM_FLUSH_PAGE:
>          flush = 1;
> -        rc = do_tmem_flush_page(pool, op.u.gen.object,
> op.u.gen.index);
> +        rc = do_tmem_flush_page(pool, oidp, op.u.gen.index);
>          break;
>      case TMEM_FLUSH_OBJECT:
> -        rc = do_tmem_flush_object(pool, op.u.gen.object);
> +        rc = do_tmem_flush_object(pool, oidp);
>          flush_obj = 1;
>          break;
>      case TMEM_DESTROY_POOL:
> @@ -2693,12 +2756,12 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
>          rc = do_tmem_destroy_pool(op.pool_id);
>          break;
>      case TMEM_READ:
> -        rc = do_tmem_get(pool, op.u.gen.object, op.u.gen.index,
> op.u.gen.cmfn,
> +        rc = do_tmem_get(pool, oidp, op.u.gen.index, op.u.gen.cmfn,
>                           op.u.gen.tmem_offset, op.u.gen.pfn_offset,
>                           op.u.gen.len,0);
>          break;
>      case TMEM_WRITE:
> -        rc = do_tmem_put(pool, op.u.gen.object,
> +        rc = do_tmem_put(pool, oidp,
>                           op.u.gen.index, op.u.gen.cmfn,
>                           op.u.gen.tmem_offset, op.u.gen.pfn_offset,
>                           op.u.gen.len, NULL);
> diff -r 07ac5459b250 xen/include/public/tmem.h
> --- a/xen/include/public/tmem.h       Wed Aug 25 09:23:31 2010 +0100
> +++ b/xen/include/public/tmem.h       Thu Sep 02 16:43:33 2010 -0600
> @@ -28,6 +28,9 @@
>  #define __XEN_PUBLIC_TMEM_H__
> 
>  #include "xen.h"
> +
> +/* version of ABI */
> +#define TMEM_SPEC_VERSION          1
> 
>  /* Commands to HYPERVISOR_tmem_op() */
>  #define TMEM_CONTROL               0
> @@ -75,10 +78,12 @@
>  /* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */
>  #define TMEM_POOL_PERSIST          1
>  #define TMEM_POOL_SHARED           2
> +#define TMEM_POOL_PRECOMPRESSED    4
>  #define TMEM_POOL_PAGESIZE_SHIFT   4
>  #define TMEM_POOL_PAGESIZE_MASK  0xf
>  #define TMEM_POOL_VERSION_SHIFT   24
>  #define TMEM_POOL_VERSION_MASK  0xff
> +#define TMEM_POOL_RESERVED_BITS  0x00ffff00
> 
>  /* Bits for client flags (save/restore) */
>  #define TMEM_CLIENT_COMPRESS       1
> @@ -106,12 +111,12 @@ struct tmem_op {
>              uint32_t cli_id;
>              uint32_t arg1;
>              uint32_t arg2;
> -            uint64_t arg3;
> +            uint64_t oid[3];
>              tmem_cli_va_t buf;
>          } ctrl; /* for cmd == TMEM_CONTROL */
>          struct {
> 
> -            uint64_t object;
> +            uint64_t oid[3];
>              uint32_t index;
>              uint32_t tmem_offset;
>              uint32_t pfn_offset;
> @@ -126,9 +131,8 @@ struct tmem_handle {
>  struct tmem_handle {
>      uint32_t pool_id;
>      uint32_t index;
> -    uint64_t oid;
> +    uint64_t oid[3];
>  };
> -
>  #endif
> 
>  #endif /* __XEN_PUBLIC_TMEM_H__ */

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.