[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH] 1. changes for vdiskadm on illumos based platform
2. update ZFS in libfsimage from illumos for pygrub diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/Rules.mk --- a/tools/libfsimage/Rules.mk Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/Rules.mk Sat Oct 26 20:03:06 2013 +0400 @@ -2,11 +2,19 @@ include $(XEN_ROOT)/tools/Rules.mk CFLAGS += -Wno-unknown-pragmas -I$(XEN_ROOT)/tools/libfsimage/common/ -DFSIMAGE_FSDIR=\"$(FSDIR)\" CFLAGS += -Werror -D_GNU_SOURCE +# need for build illumos ZFS +CFLAGS += -Wno-parentheses +CFLAGS += -Wno-unused +# end LDFLAGS += -L../common/ PIC_OBJS := $(patsubst %.c,%.opic,$(LIB_SRCS-y)) -FSDIR = $(LIBDIR)/fs +FSDIR-y = $(LIBDIR)/fs/$(FS) +FSDIR-$(CONFIG_SunOS)-x86_64 = $(PREFIX)/lib/fs/$(FS)/64 +FSDIR-$(CONFIG_SunOS)-x86_32 = $(PREFIX)/lib/fs/$(FS)/ +FSDIR-$(CONFIG_SunOS) = $(FSDIR-$(CONFIG_SunOS)-$(XEN_TARGET_ARCH)) +FSDIR = $(FSDIR-y) FSLIB = fsimage.so @@ -15,11 +23,14 @@ fs-all: $(FSLIB) .PHONY: fs-install fs-install: fs-all - $(INSTALL_DIR) $(DESTDIR)$(FSDIR)/$(FS) - $(INSTALL_PROG) $(FSLIB) $(DESTDIR)$(FSDIR)/$(FS) + $(INSTALL_DIR) $(DESTDIR)$(FSDIR) + $(INSTALL_PROG) $(FSLIB) $(DESTDIR)$(FSDIR) + +BUILD_LINE-y = $(CC) $(LDFLAGS) $(SHLIB_LDFLAGS) -o $@ $^ -lfsimage $(FS_LIBDEPS) $(APPEND_LDFLAGS) +BUILD_LINE-$(CONFIG_SunOS) = $(CC) $(CFLAGS) $(LDFLAGS) $(SHLIB_LDFLAGS) -o $@ $^ -lfsimage $(FS_LIBDEPS) $(FSLIB): $(PIC_OBJS) - $(CC) $(LDFLAGS) $(SHLIB_LDFLAGS) -o $@ $^ -lfsimage $(FS_LIBDEPS) $(APPEND_LDFLAGS) + $(BUILD_LINE-y) clean distclean:: rm -f $(PIC_OBJS) $(FSLIB) $(DEPS) diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/common/Makefile --- a/tools/libfsimage/common/Makefile Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/common/Makefile Sat Oct 26 20:03:06 2013 +0400 @@ -4,11 +4,16 @@ include $(XEN_ROOT)/tools/libfsimage/Rul MAJOR = 1.0 MINOR = 0 -LDFLAGS-$(CONFIG_SunOS) = -Wl,-M -Wl,mapfile-SunOS +CFLAGS-ADDS-$(CONFIG_SunOS) += -Werror -Wp,-MD,.$(@F).d $(ADD_INCLUDES) +CFLAGS-ADDS-$(CONFIG_SunOS) += -I/usr/include/libxml2 +CFLAGS-ADDS-$(CONFIG_Linux)= + +LDFLAGS-$(CONFIG_SunOS) = -Wl,-M -Wl,mapfile-SunOS $(ADD_PATH_LIBS) LDFLAGS-$(CONFIG_Linux) = -Wl,mapfile-GNU LDFLAGS = $(LDFLAGS-y) CFLAGS += $(PTHREAD_CFLAGS) +CFLAGS += $(CFLAGS-ADDS-y) LDFLAGS += $(PTHREAD_LDFLAGS) LIB_SRCS-y = fsimage.c fsimage_plugin.c fsimage_grub.c @@ -32,15 +37,18 @@ install: all $(INSTALL_DATA) fsimage_grub.h $(DESTDIR)$(INCLUDEDIR) clean distclean:: - rm -f $(LIB) + rm -f $(PIC_OBJS) $(LIB) $(DEPS) libfsimage.so: libfsimage.so.$(MAJOR) ln -sf $< $@ libfsimage.so.$(MAJOR): libfsimage.so.$(MAJOR).$(MINOR) ln -sf $< $@ +BUILD_LINE-y = $(CC) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libfsimage.so.$(MAJOR) $(SHLIB_LDFLAGS) -o $@ $^ $(PTHREAD_LIBS) +BUILD_LINE-$(CONFIG_SunOS) = $(CC) $(CFLAGS $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libfsimage.so.$(MAJOR) $(SHLIB_LDFLAGS) -o $@ $^ $(PTHREAD_LIBS) -lvdisk -lvboxdisk -lxml2 -lgen -lc + libfsimage.so.$(MAJOR).$(MINOR): $(PIC_OBJS) - $(CC) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libfsimage.so.$(MAJOR) $(SHLIB_LDFLAGS) -o $@ $^ $(PTHREAD_LIBS) + $(BUILD_LINE-y) -include $(DEPS) diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/common/fsimage.c --- a/tools/libfsimage/common/fsimage.c Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/common/fsimage.c Sat Oct 26 20:03:06 2013 +0400 @@ -36,22 +36,43 @@ static pthread_mutex_t fsi_lock = PTHREAD_MUTEX_INITIALIZER; +#ifdef _VDISK_ +#include "vdisk.h" +#endif + fsi_t *fsi_open_fsimage(const char *path, uint64_t off, const char *options) { fsi_t *fsi = NULL; - int fd; + int fd = -1; int err; + void *pvd = NULL; +#ifdef _VDISK_ + if (vdisk_check_vdisk(path)) { + if ((pvd = vdisk_open(path)) == NULL) + goto fail; + } else { + if ((fd = open(path, O_RDONLY)) == -1) + goto fail; + } +#else if ((fd = open(path, O_RDONLY)) == -1) goto fail; +#endif if ((fsi = malloc(sizeof(*fsi))) == NULL) goto fail; - fsi->f_fd = fd; fsi->f_off = off; fsi->f_data = NULL; fsi->f_bootstring = NULL; + if (pvd) { + fsi->f_fd = NULL; + fsi->f_pvdisk = pvd; + } else { + fsi->f_fd = fd; + fsi->f_pvdisk = NULL; + } pthread_mutex_lock(&fsi_lock); err = find_plugin(fsi, path, options); @@ -73,8 +94,16 @@ fail: void fsi_close_fsimage(fsi_t *fsi) { pthread_mutex_lock(&fsi_lock); - fsi->f_plugin->fp_ops->fpo_umount(fsi); - (void) close(fsi->f_fd); + fsi->f_plugin->fp_ops->fpo_umount(fsi); +#ifdef _VDISK_ + if (fsi->f_pvdisk) { + vdisk_close(fsi->f_pvdisk); + } else { + (void) close(fsi->f_fd); + } +#else + (void) close(fsi->f_fd); +#endif free(fsi); pthread_mutex_unlock(&fsi_lock); } diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/common/fsimage_grub.c --- a/tools/libfsimage/common/fsimage_grub.c Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/common/fsimage_grub.c Sat Oct 26 20:03:06 2013 +0400 @@ -31,6 +31,10 @@ #include "fsimage_grub.h" #include "fsimage_priv.h" +#ifdef _VDISK_ +#include "vdisk.h" +#endif + static char *disk_read_junk; typedef struct fsig_data { @@ -176,7 +180,17 @@ fsig_devread(fsi_file_t *ffi, unsigned i r = SECTOR_SIZE - n; if (r > bufsize) r = bufsize; +#ifdef _VDISK_ + if (ffi->ff_fsi->f_pvdisk) { + ret = vdisk_read(ffi->ff_fsi->f_pvdisk, + (off_t)(off - n), tmp, SECTOR_SIZE); + } else { + ret = pread(ffi->ff_fsi->f_fd, tmp, SECTOR_SIZE, + off - n); + } +#else ret = pread(ffi->ff_fsi->f_fd, tmp, SECTOR_SIZE, off - n); +#endif if (ret < n + r) return (0); memcpy(buf, tmp + n, r); @@ -187,7 +201,16 @@ fsig_devread(fsi_file_t *ffi, unsigned i n = (bufsize & ~(SECTOR_SIZE - 1)); if (n > 0) { +#ifdef _VDISK_ + if (ffi->ff_fsi->f_pvdisk) { + ret = vdisk_read(ffi->ff_fsi->f_pvdisk, (off_t)off, + buf, n); + } else { + ret = pread(ffi->ff_fsi->f_fd, buf, n, off); + } +#else ret = pread(ffi->ff_fsi->f_fd, buf, n, off); +#endif if (ret < n) return (0); buf += n; @@ -195,7 +218,16 @@ fsig_devread(fsi_file_t *ffi, unsigned i off += n; } if (bufsize > 0) { +#ifdef _VDISK_ + if (ffi->ff_fsi->f_pvdisk) { + ret = vdisk_read(ffi->ff_fsi->f_pvdisk, (off_t)off, + tmp, SECTOR_SIZE); + } else { + ret = pread(ffi->ff_fsi->f_fd, tmp, SECTOR_SIZE, off); + } +#else ret = pread(ffi->ff_fsi->f_fd, tmp, SECTOR_SIZE, off); +#endif if (ret < bufsize) return (0); memcpy(buf, tmp, bufsize); diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/common/fsimage_plugin.c --- a/tools/libfsimage/common/fsimage_plugin.c Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/common/fsimage_plugin.c Sat Oct 26 20:03:06 2013 +0400 @@ -122,6 +122,7 @@ fail: static int load_plugins(void) { const char *fsdir = getenv("FSIMAGE_FSDIR"); + const char *isadir = ""; struct dirent *dp = NULL; struct dirent *dpp; DIR *dir = NULL; @@ -130,8 +131,26 @@ static int load_plugins(void) int err; int ret = -1; +#if defined(FSIMAGE_FSDIR) if (fsdir == NULL) fsdir = FSIMAGE_FSDIR; +#elif defined(__sun__) + if (fsdir == NULL) + fsdir = "/usr/lib/fs"; + + if (sizeof(void *) == 8) + isadir = "64/"; +#elif defined(__ia64__) + if (fsdir == NULL) + fsdir = "/usr/lib/fs"; +#else + if (fsdir == NULL) { + if (sizeof(void *) == 8) + fsdir = "/usr/lib64/fs"; + else + fsdir = "/usr/lib/fs"; + } +#endif if ((name_max = pathconf(fsdir, _PC_NAME_MAX)) == -1) goto fail; @@ -153,8 +172,8 @@ static int load_plugins(void) if (strcmp(dpp->d_name, "..") == 0) continue; - (void) snprintf(tmp, name_max, "%s/%s/fsimage.so", fsdir, - dpp->d_name); + (void) snprintf(tmp, name_max, "%s/%s/%sfsimage.so", fsdir, + dpp->d_name, isadir); if (init_plugin(tmp) != 0) goto fail; diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/common/fsimage_priv.h --- a/tools/libfsimage/common/fsimage_priv.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/common/fsimage_priv.h Sat Oct 26 20:03:06 2013 +0400 @@ -47,6 +47,7 @@ struct fsi { void *f_data; fsi_plugin_t *f_plugin; char *f_bootstring; + void *f_pvdisk; }; struct fsi_file { diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/Makefile --- a/tools/libfsimage/zfs/Makefile Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/Makefile Sat Oct 26 20:03:06 2013 +0400 @@ -25,7 +25,7 @@ XEN_ROOT = $(CURDIR)/../../.. CFLAGS += -DFSYS_ZFS -DFSIMAGE -I$(XEN_ROOT)/tools/libfsimage/zfs -LIB_SRCS-y = zfs_lzjb.c zfs_sha256.c zfs_fletcher.c fsi_zfs.c fsys_zfs.c +LIB_SRCS-y = zfs_lzjb.c zfs_sha256.c zfs_fletcher.c fsi_zfs.c fsys_zfs.c zfs_lz4.c FS = zfs diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/fsi_zfs.h --- a/tools/libfsimage/zfs/fsi_zfs.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/fsi_zfs.h Sat Oct 26 20:03:06 2013 +0400 @@ -36,6 +36,8 @@ /* Boot signature related defines for the findroot command */ #define BOOTSIGN_DIR "/boot/grub/bootsign" +#define BOOTSIGN_ARGLEN (MAXNAMELEN + 10) /* (<sign>,0,d) */ +#define BOOTSIGN_LEN (sizeof (BOOTSIGN_DIR) + 1 + BOOTSIGN_ARGLEN) #define BOOTSIGN_BACKUP "/etc/bootsign" /* Maybe redirect memory requests through grub_scratch_mem. */ @@ -60,6 +62,7 @@ #define grub_strstr strstr #define grub_strlen strlen #define grub_memmove memmove +#define grub_isspace isspace extern char current_bootpath[MAXPATHLEN]; extern char current_rootpool[MAXNAMELEN]; diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/fsys_zfs.c --- a/tools/libfsimage/zfs/fsys_zfs.c Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/fsys_zfs.c Sat Oct 26 20:03:06 2013 +0400 @@ -16,12 +16,18 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* + * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. + */ + +/* * The zfs plug-in routines for GRUB are: * * zfs_mount() - locates a valid uberblock of the root pool and reads @@ -69,7 +75,18 @@ decomp_entry_t decomp_table[ZIO_COMPRESS {"on", lzjb_decompress}, /* ZIO_COMPRESS_ON */ {"off", 0}, /* ZIO_COMPRESS_OFF */ {"lzjb", lzjb_decompress}, /* ZIO_COMPRESS_LZJB */ - {"empty", 0} /* ZIO_COMPRESS_EMPTY */ + {"empty", 0}, /* ZIO_COMPRESS_EMPTY */ + {"gzip-1", 0}, /* ZIO_COMPRESS_GZIP_1 */ + {"gzip-2", 0}, /* ZIO_COMPRESS_GZIP_2 */ + {"gzip-3", 0}, /* ZIO_COMPRESS_GZIP_3 */ + {"gzip-4", 0}, /* ZIO_COMPRESS_GZIP_4 */ + {"gzip-5", 0}, /* ZIO_COMPRESS_GZIP_5 */ + {"gzip-6", 0}, /* ZIO_COMPRESS_GZIP_6 */ + {"gzip-7", 0}, /* ZIO_COMPRESS_GZIP_7 */ + {"gzip-8", 0}, /* ZIO_COMPRESS_GZIP_8 */ + {"gzip-9", 0}, /* ZIO_COMPRESS_GZIP_9 */ + {"zle", 0}, /* ZIO_COMPRESS_ZLE */ + {"lz4", lz4_decompress} /* ZIO_COMPRESS_LZ4 */ }; static int zio_read_data(blkptr_t *bp, void *buf, char *stack); @@ -80,8 +97,8 @@ static int zio_read_data(blkptr_t *bp, v static int zfs_bcmp(const void *s1, const void *s2, size_t n) { - const uint8_t *ps1 = s1; - const uint8_t *ps2 = s2; + const uchar_t *ps1 = s1; + const uchar_t *ps2 = s2; if (s1 != s2 && n != 0) { do { @@ -118,16 +135,16 @@ zio_checksum_off(const void *buf, uint64 /* Checksum Table and Values */ zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { - { { NULL, NULL }, 0, 0, "inherit" }, - { { NULL, NULL }, 0, 0, "on" }, - { { zio_checksum_off, zio_checksum_off }, 0, 0, "off" }, - { { zio_checksum_SHA256, zio_checksum_SHA256 }, 1, 1, "label" }, - { { zio_checksum_SHA256, zio_checksum_SHA256 }, 1, 1, "gang_header" }, - { { NULL, NULL }, 0, 0, "zilog" }, - { { fletcher_2_native, fletcher_2_byteswap }, 0, 0, "fletcher2" }, - { { fletcher_4_native, fletcher_4_byteswap }, 1, 0, "fletcher4" }, - { { zio_checksum_SHA256, zio_checksum_SHA256 }, 1, 0, "SHA256" }, - { { NULL, NULL }, 0, 0, "zilog2" } + {{NULL, NULL}, 0, 0, "inherit"}, + {{NULL, NULL}, 0, 0, "on"}, + {{zio_checksum_off, zio_checksum_off}, 0, 0, "off"}, + {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "label"}, + {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "gang_header"}, + {{NULL, NULL}, 0, 0, "zilog"}, + {{fletcher_2_native, fletcher_2_byteswap}, 0, 0, "fletcher2"}, + {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, "fletcher4"}, + {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, "SHA256"}, + {{NULL, NULL}, 0, 0, "zilog2"}, }; /* @@ -217,18 +234,13 @@ vdev_uberblock_compare(uberblock_t *ub1, * Three pieces of information are needed to verify an uberblock: the magic * number, the version number, and the checksum. * - * Currently Implemented: version number, magic number - * Need to Implement: checksum - * * Return: * 0 - Success * -1 - Failure */ static int -uberblock_verify(uberblock_phys_t *ub, uint64_t offset) +uberblock_verify(uberblock_t *uber, uint64_t ub_size, uint64_t offset) { - - uberblock_t *uber = &ub->ubp_uberblock; blkptr_t bp; BP_ZERO(&bp); @@ -236,11 +248,11 @@ uberblock_verify(uberblock_phys_t *ub, u BP_SET_BYTEORDER(&bp, ZFS_HOST_BYTEORDER); ZIO_SET_CHECKSUM(&bp.blk_cksum, offset, 0, 0, 0); - if (zio_checksum_verify(&bp, (char *)ub, UBERBLOCK_SIZE) != 0) + if (zio_checksum_verify(&bp, (char *)uber, ub_size) != 0) return (-1); if (uber->ub_magic == UBERBLOCK_MAGIC && - uber->ub_version > 0 && uber->ub_version <= SPA_VERSION) + SPA_VERSION_IS_SUPPORTED(uber->ub_version)) return (0); return (-1); @@ -252,25 +264,28 @@ uberblock_verify(uberblock_phys_t *ub, u * Success - Pointer to the best uberblock. * Failure - NULL */ -static uberblock_phys_t * -find_bestub(uberblock_phys_t *ub_array, uint64_t sector) +static uberblock_t * +find_bestub(char *ub_array, uint64_t ashift, uint64_t sector) { - uberblock_phys_t *ubbest = NULL; - uint64_t offset; + uberblock_t *ubbest = NULL; + uberblock_t *ubnext; + uint64_t offset, ub_size; int i; - for (i = 0; i < (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT); i++) { + ub_size = VDEV_UBERBLOCK_SIZE(ashift); + + for (i = 0; i < VDEV_UBERBLOCK_COUNT(ashift); i++) { + ubnext = (uberblock_t *)ub_array; + ub_array += ub_size; offset = (sector << SPA_MINBLOCKSHIFT) + - VDEV_UBERBLOCK_OFFSET(i); - if (uberblock_verify(&ub_array[i], offset) == 0) { - if (ubbest == NULL) { - ubbest = &ub_array[i]; - } else if (vdev_uberblock_compare( - &(ub_array[i].ubp_uberblock), - &(ubbest->ubp_uberblock)) > 0) { - ubbest = &ub_array[i]; - } - } + VDEV_UBERBLOCK_OFFSET(ashift, i); + + if (uberblock_verify(ubnext, ub_size, offset) != 0) + continue; + + if (ubbest == NULL || + vdev_uberblock_compare(ubnext, ubbest) > 0) + ubbest = ubnext; } return (ubbest); @@ -295,7 +310,7 @@ zio_read_gang(blkptr_t *bp, dva_t *dva, zio_gb = (zio_gbh_phys_t *)stack; stack += SPA_GANGBLOCKSIZE; offset = DVA_GET_OFFSET(dva); - sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); + sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); /* read in the gang block header */ if (devread(sector, 0, SPA_GANGBLOCKSIZE, (char *)zio_gb) == 0) { @@ -354,8 +369,8 @@ zio_read_data(blkptr_t *bp, void *buf, c } else { /* read in a data block */ offset = DVA_GET_OFFSET(&bp->blk_dva[i]); - sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); - if (devread(sector, 0, psize, buf)) + sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); + if (devread(sector, 0, psize, buf) != 0) return (0); } } @@ -399,7 +414,7 @@ zio_read(blkptr_t *bp, void *buf, char * stack += psize; } - if (zio_read_data(bp, buf, stack)) { + if (zio_read_data(bp, buf, stack) != 0) { grub_printf("zio_read_data failed\n"); return (ERR_FSYS_CORRUPT); } @@ -409,8 +424,13 @@ zio_read(blkptr_t *bp, void *buf, char * return (ERR_FSYS_CORRUPT); } - if (comp != ZIO_COMPRESS_OFF) - decomp_table[comp].decomp_func(buf, retbuf, psize, lsize); + if (comp != ZIO_COMPRESS_OFF) { + if (decomp_table[comp].decomp_func(buf, retbuf, psize, + lsize) != 0) { + grub_printf("zio_read decompression failed\n"); + return (ERR_FSYS_CORRUPT); + } + } return (0); } @@ -446,7 +466,7 @@ dmu_read(dnode_phys_t *dn, uint64_t blki grub_memset(buf, 0, dn->dn_datablkszsec << SPA_MINBLOCKSHIFT); break; - } else if ((errnum = zio_read(bp, tmpbuf, stack))) { + } else if (errnum = zio_read(bp, tmpbuf, stack)) { return (errnum); } @@ -465,13 +485,13 @@ dmu_read(dnode_phys_t *dn, uint64_t blki * errnum - failure */ static int -mzap_lookup(mzap_phys_t *zapobj, int objsize, char *name, +mzap_lookup(mzap_phys_t *zapobj, int objsize, const char *name, uint64_t *value) { int i, chunks; mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk; - chunks = objsize/MZAP_ENT_LEN - 1; + chunks = objsize / MZAP_ENT_LEN - 1; for (i = 0; i < chunks; i++) { if (grub_strcmp(mzap_ent[i].mze_name, name) == 0) { *value = mzap_ent[i].mze_value; @@ -511,8 +531,8 @@ zap_hash(uint64_t salt, const char *name /* * Only use 28 bits, since we need 4 bits in the cookie for the * collision differentiator. We MUST use the high bits, since - * those are the onces that we first pay attention to when - * chosing the bucket. + * those are the ones that we first pay attention to when + * choosing the bucket. */ crc &= ~((1ULL << (64 - 28)) - 1); @@ -617,7 +637,7 @@ zap_leaf_lookup(zap_leaf_phys_t *l, int */ static int fzap_lookup(dnode_phys_t *zap_dnode, zap_phys_t *zap, - char *name, uint64_t *value, char *stack) + const char *name, uint64_t *value, char *stack) { zap_leaf_phys_t *l; uint64_t hash, idx, blkid; @@ -645,7 +665,7 @@ fzap_lookup(dnode_phys_t *zap_dnode, zap stack += 1<<blksft; if ((1<<blksft) < sizeof (zap_leaf_phys_t)) return (ERR_FSYS_CORRUPT); - if ((errnum = dmu_read(zap_dnode, blkid, l, stack))) + if (errnum = dmu_read(zap_dnode, blkid, l, stack)) return (errnum); return (zap_leaf_lookup(l, blksft, hash, name, value)); @@ -660,7 +680,8 @@ fzap_lookup(dnode_phys_t *zap_dnode, zap * errnum - failure */ static int -zap_lookup(dnode_phys_t *zap_dnode, char *name, uint64_t *val, char *stack) +zap_lookup(dnode_phys_t *zap_dnode, const char *name, uint64_t *val, + char *stack) { uint64_t block_type; int size; @@ -671,7 +692,7 @@ zap_lookup(dnode_phys_t *zap_dnode, char size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; stack += size; - if ((errnum = dmu_read(zap_dnode, 0, zapbuf, stack))) + if ((errnum = dmu_read(zap_dnode, 0, zapbuf, stack)) != 0) return (errnum); block_type = *((uint64_t *)zapbuf); @@ -687,6 +708,56 @@ zap_lookup(dnode_phys_t *zap_dnode, char return (ERR_FSYS_CORRUPT); } +typedef struct zap_attribute { + int za_integer_length; + uint64_t za_num_integers; + uint64_t za_first_integer; + char *za_name; +} zap_attribute_t; + +typedef int (zap_cb_t)(zap_attribute_t *za, void *arg, char *stack); + +static int +zap_iterate(dnode_phys_t *zap_dnode, zap_cb_t *cb, void *arg, char *stack) +{ + uint32_t size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; + zap_attribute_t za; + int i; + mzap_phys_t *mzp = (mzap_phys_t *)stack; + stack += size; + + if ((errnum = dmu_read(zap_dnode, 0, mzp, stack)) != 0) + return (errnum); + + /* + * Iteration over fatzap objects has not yet been implemented. + * If we encounter a pool in which there are more features for + * read than can fit inside a microzap (i.e., more than 2048 + * features for read), we can add support for fatzap iteration. + * For now, fail. + */ + if (mzp->mz_block_type != ZBT_MICRO) { + grub_printf("feature information stored in fatzap, pool " + "version not supported\n"); + return (1); + } + + za.za_integer_length = 8; + za.za_num_integers = 1; + for (i = 0; i < size / MZAP_ENT_LEN - 1; i++) { + mzap_ent_phys_t *mzep = &mzp->mz_chunk[i]; + int err; + + za.za_first_integer = mzep->mze_value; + za.za_name = mzep->mze_name; + err = cb(&za, arg, stack); + if (err != 0) + return (err); + } + + return (0); +} + /* * Get the dnode of an object number from the metadnode of an object set. * @@ -731,7 +802,7 @@ dnode_get(dnode_phys_t *mdn, uint64_t ob stack += blksz; } - if ((errnum = dmu_read(mdn, blkid, (char *)dnbuf, stack))) + if (errnum = dmu_read(mdn, blkid, (char *)dnbuf, stack)) return (errnum); grub_memmove(buf, &dnbuf[idx], DNODE_SIZE); @@ -766,6 +837,24 @@ is_top_dataset_file(char *str) return (0); } +static int +check_feature(zap_attribute_t *za, void *arg, char *stack) +{ + const char **names = arg; + int i; + + if (za->za_first_integer == 0) + return (0); + + for (i = 0; names[i] != NULL; i++) { + if (grub_strcmp(za->za_name, names[i]) == 0) { + return (0); + } + } + grub_printf("missing feature for read '%s'\n", za->za_name); + return (ERR_NEWER_VERSION); +} + /* * Get the file dnode for a given file name where mdn is the meta dnode * for this ZFS object set. When found, place the file dnode in dn. @@ -782,40 +871,40 @@ dnode_get_path(dnode_phys_t *mdn, char * uint64_t objnum, version; char *cname, ch; - if ((errnum = dnode_get(mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE, - dn, stack))) + if (errnum = dnode_get(mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE, + dn, stack)) return (errnum); - if ((errnum = zap_lookup(dn, ZPL_VERSION_STR, &version, stack))) + if (errnum = zap_lookup(dn, ZPL_VERSION_STR, &version, stack)) return (errnum); if (version > ZPL_VERSION) return (-1); - if ((errnum = zap_lookup(dn, ZFS_ROOT_OBJ, &objnum, stack))) + if (errnum = zap_lookup(dn, ZFS_ROOT_OBJ, &objnum, stack)) return (errnum); - if ((errnum = dnode_get(mdn, objnum, DMU_OT_DIRECTORY_CONTENTS, - dn, stack))) + if (errnum = dnode_get(mdn, objnum, DMU_OT_DIRECTORY_CONTENTS, + dn, stack)) return (errnum); /* skip leading slashes */ while (*path == '/') path++; - while (*path && !isspace((uint8_t)*path)) { + while (*path && !grub_isspace(*path)) { /* get the next component name */ cname = path; - while (*path && !isspace((uint8_t)*path) && *path != '/') + while (*path && !grub_isspace(*path) && *path != '/') path++; ch = *path; *path = 0; /* ensure null termination */ - if ((errnum = zap_lookup(dn, cname, &objnum, stack))) + if (errnum = zap_lookup(dn, cname, &objnum, stack)) return (errnum); objnum = ZFS_DIRENT_OBJ(objnum); - if ((errnum = dnode_get(mdn, objnum, 0, dn, stack))) + if (errnum = dnode_get(mdn, objnum, 0, dn, stack)) return (errnum); *path = ch; @@ -843,8 +932,8 @@ get_default_bootfsobj(dnode_phys_t *mosm dnode_phys_t *dn = (dnode_phys_t *)stack; stack += DNODE_SIZE; - if ((errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, - DMU_OT_OBJECT_DIRECTORY, dn, stack))) + if (errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, + DMU_OT_OBJECT_DIRECTORY, dn, stack)) return (errnum); /* @@ -854,7 +943,7 @@ get_default_bootfsobj(dnode_phys_t *mosm if (zap_lookup(dn, DMU_POOL_PROPS, &objnum, stack)) return (ERR_FILESYSTEM_NOT_FOUND); - if ((errnum = dnode_get(mosmdn, objnum, DMU_OT_POOL_PROPS, dn, stack))) + if (errnum = dnode_get(mosmdn, objnum, DMU_OT_POOL_PROPS, dn, stack)) return (errnum); if (zap_lookup(dn, ZPOOL_PROP_BOOTFS, &objnum, stack)) @@ -868,6 +957,57 @@ get_default_bootfsobj(dnode_phys_t *mosm } /* + * List of pool features that the grub implementation of ZFS supports for + * read. Note that features that are only required for write do not need + * to be listed here since grub opens pools in read-only mode. + * + * When this list is updated the version number in usr/src/grub/capability + * must be incremented to ensure the new grub gets installed. + */ +static const char *spa_feature_names[] = { + "org.illumos:lz4_compress", + NULL +}; + +/* + * Checks whether the MOS features that are active are supported by this + * (GRUB's) implementation of ZFS. + * + * Return: + * 0: Success. + * errnum: Failure. + */ +static int +check_mos_features(dnode_phys_t *mosmdn, char *stack) +{ + uint64_t objnum; + dnode_phys_t *dn; + uint8_t error = 0; + + dn = (dnode_phys_t *)stack; + stack += DNODE_SIZE; + + if ((errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, + DMU_OT_OBJECT_DIRECTORY, dn, stack)) != 0) + return (errnum); + + /* + * Find the object number for 'features_for_read' and retrieve its + * corresponding dnode. Note that we don't check features_for_write + * because GRUB is not opening the pool for write. + */ + if ((errnum = zap_lookup(dn, DMU_POOL_FEATURES_FOR_READ, &objnum, + stack)) != 0) + return (errnum); + + if ((errnum = dnode_get(mosmdn, objnum, DMU_OTN_ZAP_METADATA, + dn, stack)) != 0) + return (errnum); + + return (zap_iterate(dn, check_feature, spa_feature_names, stack)); +} + +/* * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname), * e.g. pool/rootfs, or a given object number (obj), e.g. the object number * of pool/rootfs. @@ -896,15 +1036,15 @@ get_objset_mdn(dnode_phys_t *mosmdn, cha goto skip; } - if ((errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, - DMU_OT_OBJECT_DIRECTORY, mdn, stack))) + if (errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, + DMU_OT_OBJECT_DIRECTORY, mdn, stack)) return (errnum); - if ((errnum = zap_lookup(mdn, DMU_POOL_ROOT_DATASET, &objnum, - stack))) + if (errnum = zap_lookup(mdn, DMU_POOL_ROOT_DATASET, &objnum, + stack)) return (errnum); - if ((errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, mdn, stack))) + if (errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, mdn, stack)) return (errnum); if (fsname == NULL) { @@ -914,23 +1054,24 @@ get_objset_mdn(dnode_phys_t *mosmdn, cha } /* take out the pool name */ - while (*fsname && !isspace((uint8_t)*fsname) && *fsname != '/') + while (*fsname && !grub_isspace(*fsname) && *fsname != '/') fsname++; - while (*fsname && !isspace((uint8_t)*fsname)) { + while (*fsname && !grub_isspace(*fsname)) { uint64_t childobj; while (*fsname == '/') fsname++; cname = fsname; - while (*fsname && !isspace((uint8_t)*fsname) && *fsname != '/') + while (*fsname && !grub_isspace(*fsname) && *fsname != '/') fsname++; ch = *fsname; *fsname = 0; snapname = cname; - while (*snapname && !isspace((uint8_t)*snapname) && *snapname != '@') + while (*snapname && !grub_isspace(*snapname) && *snapname != + '@') snapname++; if (*snapname == '@') { issnapshot = 1; @@ -938,15 +1079,15 @@ get_objset_mdn(dnode_phys_t *mosmdn, cha } childobj = ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_child_dir_zapobj; - if ((errnum = dnode_get(mosmdn, childobj, - DMU_OT_DSL_DIR_CHILD_MAP, mdn, stack))) + if (errnum = dnode_get(mosmdn, childobj, + DMU_OT_DSL_DIR_CHILD_MAP, mdn, stack)) return (errnum); if (zap_lookup(mdn, cname, &objnum, stack)) return (ERR_FILESYSTEM_NOT_FOUND); - if ((errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, - mdn, stack))) + if (errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, + mdn, stack)) return (errnum); *fsname = ch; @@ -958,7 +1099,7 @@ get_objset_mdn(dnode_phys_t *mosmdn, cha *obj = headobj; skip: - if ((errnum = dnode_get(mosmdn, headobj, DMU_OT_DSL_DATASET, mdn, stack))) + if (errnum = dnode_get(mosmdn, headobj, DMU_OT_DSL_DATASET, mdn, stack)) return (errnum); if (issnapshot) { uint64_t snapobj; @@ -966,13 +1107,13 @@ skip: snapobj = ((dsl_dataset_phys_t *)DN_BONUS(mdn))-> ds_snapnames_zapobj; - if ((errnum = dnode_get(mosmdn, snapobj, - DMU_OT_DSL_DS_SNAP_MAP, mdn, stack))) + if (errnum = dnode_get(mosmdn, snapobj, + DMU_OT_DSL_DS_SNAP_MAP, mdn, stack)) return (errnum); if (zap_lookup(mdn, snapname + 1, &headobj, stack)) return (ERR_FILESYSTEM_NOT_FOUND); - if ((errnum = dnode_get(mosmdn, headobj, - DMU_OT_DSL_DATASET, mdn, stack))) + if (errnum = dnode_get(mosmdn, headobj, + DMU_OT_DSL_DATASET, mdn, stack)) return (errnum); if (obj) *obj = headobj; @@ -981,7 +1122,7 @@ skip: bp = &((dsl_dataset_phys_t *)DN_BONUS(mdn))->ds_bp; osp = (objset_phys_t *)stack; stack += sizeof (objset_phys_t); - if ((errnum = zio_read(bp, osp, stack))) + if (errnum = zio_read(bp, osp, stack)) return (errnum); grub_memmove((char *)mdn, (char *)&osp->os_meta_dnode, DNODE_SIZE); @@ -1019,8 +1160,7 @@ nvlist_unpack(char *nvlist, char **out) if (nvlist[0] != NV_ENCODE_XDR || nvlist[1] != HOST_ENDIAN) return (1); - nvlist += 4; - *out = nvlist; + *out = nvlist + 4; return (0); } @@ -1033,7 +1173,7 @@ nvlist_array(char *nvlist, int index) /* skip the header, nvl_version, and nvl_nvflag */ nvlist = nvlist + 4 * 2; - while ((encode_size = BSWAP_32(*(uint32_t *)nvlist))) + while (encode_size = BSWAP_32(*(uint32_t *)nvlist)) nvlist += encode_size; /* goto the next nvpair */ nvlist = nvlist + 4 * 2; /* skip the ending 2 zeros - 8 bytes */ @@ -1042,69 +1182,159 @@ nvlist_array(char *nvlist, int index) return (nvlist); } +/* + * The nvlist_next_nvpair() function returns a handle to the next nvpair in the + * list following nvpair. If nvpair is NULL, the first pair is returned. If + * nvpair is the last pair in the nvlist, NULL is returned. + */ +static char * +nvlist_next_nvpair(char *nvl, char *nvpair) +{ + char *cur, *prev; + int encode_size; + + if (nvl == NULL) + return (NULL); + + if (nvpair == NULL) { + /* skip over nvl_version and nvl_nvflag */ + nvpair = nvl + 4 * 2; + } else { + /* skip to the next nvpair */ + encode_size = BSWAP_32(*(uint32_t *)nvpair); + nvpair += encode_size; + } + + /* 8 bytes of 0 marks the end of the list */ + if (*(uint64_t *)nvpair == 0) + return (NULL); + + return (nvpair); +} + +/* + * This function returns 0 on success and 1 on failure. On success, a string + * containing the name of nvpair is saved in buf. + */ +static int +nvpair_name(char *nvp, char *buf, int buflen) +{ + int len; + + /* skip over encode/decode size */ + nvp += 4 * 2; + + len = BSWAP_32(*(uint32_t *)nvp); + if (buflen < len + 1) + return (1); + + grub_memmove(buf, nvp + 4, len); + buf[len] = '\0'; + + return (0); +} + +/* + * This function retrieves the value of the nvpair in the form of enumerated + * type data_type_t. This is used to determine the appropriate type to pass to + * nvpair_value(). + */ +static int +nvpair_type(char *nvp) +{ + int name_len, type; + + /* skip over encode/decode size */ + nvp += 4 * 2; + + /* skip over name_len */ + name_len = BSWAP_32(*(uint32_t *)nvp); + nvp += 4; + + /* skip over name */ + nvp = nvp + ((name_len + 3) & ~3); /* align */ + + type = BSWAP_32(*(uint32_t *)nvp); + + return (type); +} + +static int +nvpair_value(char *nvp, void *val, int valtype, int *nelmp) +{ + int name_len, type, slen; + char *strval = val; + uint64_t *intval = val; + + /* skip over encode/decode size */ + nvp += 4 * 2; + + /* skip over name_len */ + name_len = BSWAP_32(*(uint32_t *)nvp); + nvp += 4; + + /* skip over name */ + nvp = nvp + ((name_len + 3) & ~3); /* align */ + + /* skip over type */ + type = BSWAP_32(*(uint32_t *)nvp); + nvp += 4; + + if (type == valtype) { + int nelm; + + nelm = BSWAP_32(*(uint32_t *)nvp); + if (valtype != DATA_TYPE_BOOLEAN && nelm < 1) + return (1); + nvp += 4; + + switch (valtype) { + case DATA_TYPE_BOOLEAN: + return (0); + + case DATA_TYPE_STRING: + slen = BSWAP_32(*(uint32_t *)nvp); + nvp += 4; + grub_memmove(strval, nvp, slen); + strval[slen] = '\0'; + return (0); + + case DATA_TYPE_UINT64: + *intval = BSWAP_64(*(uint64_t *)nvp); + return (0); + + case DATA_TYPE_NVLIST: + *(void **)val = (void *)nvp; + return (0); + + case DATA_TYPE_NVLIST_ARRAY: + *(void **)val = (void *)nvp; + if (nelmp) + *nelmp = nelm; + return (0); + } + } + + return (1); +} + static int nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype, int *nelmp) { - int name_len, type, slen, encode_size; - char *nvpair, *nvp_name, *strval = val; - uint64_t *intval = val; + char *nvpair; - /* skip the header, nvl_version, and nvl_nvflag */ - nvlist = nvlist + 4 * 2; - - /* - * Loop thru the nvpair list - * The XDR representation of an integer is in big-endian byte order. - */ - while ((encode_size = BSWAP_32(*(uint32_t *)nvlist))) { - - nvpair = nvlist + 4 * 2; /* skip the encode/decode size */ - - name_len = BSWAP_32(*(uint32_t *)nvpair); - nvpair += 4; - - nvp_name = nvpair; - nvpair = nvpair + ((name_len + 3) & ~3); /* align */ - - type = BSWAP_32(*(uint32_t *)nvpair); - nvpair += 4; + for (nvpair = nvlist_next_nvpair(nvlist, NULL); + nvpair != NULL; + nvpair = nvlist_next_nvpair(nvlist, nvpair)) { + int name_len = BSWAP_32(*(uint32_t *)(nvpair + 4 * 2)); + char *nvp_name = nvpair + 4 * 3; if ((grub_strncmp(nvp_name, name, name_len) == 0) && - type == valtype) { - int nelm; - - if ((nelm = BSWAP_32(*(uint32_t *)nvpair)) < 1) - return (1); - nvpair += 4; - - switch (valtype) { - case DATA_TYPE_STRING: - slen = BSWAP_32(*(uint32_t *)nvpair); - nvpair += 4; - grub_memmove(strval, nvpair, slen); - strval[slen] = '\0'; - return (0); - - case DATA_TYPE_UINT64: - *intval = BSWAP_64(*(uint64_t *)nvpair); - return (0); - - case DATA_TYPE_NVLIST: - *(void **)val = (void *)nvpair; - return (0); - - case DATA_TYPE_NVLIST_ARRAY: - *(void **)val = (void *)nvpair; - if (nelmp) - *nelmp = nelm; - return (0); - } + nvpair_type(nvpair) == valtype) { + return (nvpair_value(nvpair, val, valtype, nelmp)); } - - nvlist += encode_size; /* goto the next nvpair */ } - return (1); } @@ -1141,7 +1371,7 @@ vdev_get_bootpath(char *nv, uint64_t ing NULL)) return (ERR_FSYS_CORRUPT); - if (strcmp(type, VDEV_TYPE_DISK) == 0) { + if (grub_strcmp(type, VDEV_TYPE_DISK) == 0) { uint64_t guid; if (vdev_validate(nv) != 0) @@ -1171,15 +1401,15 @@ vdev_get_bootpath(char *nv, uint64_t ing devid, DATA_TYPE_STRING, NULL) != 0) devid[0] = '\0'; - if (strlen(bootpath) >= MAXPATHLEN || - strlen(devid) >= MAXPATHLEN) + if (grub_strlen(bootpath) >= MAXPATHLEN || + grub_strlen(devid) >= MAXPATHLEN) return (ERR_WONT_FIT); return (0); - } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 || - strcmp(type, VDEV_TYPE_REPLACING) == 0 || - (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) { + } else if (grub_strcmp(type, VDEV_TYPE_MIRROR) == 0 || + grub_strcmp(type, VDEV_TYPE_REPLACING) == 0 || + (is_spare = (grub_strcmp(type, VDEV_TYPE_SPARE) == 0))) { int nelm, i; char *child; @@ -1207,15 +1437,14 @@ vdev_get_bootpath(char *nv, uint64_t ing * 0 - success * ERR_* - failure */ -int +static int check_pool_label(uint64_t sector, char *stack, char *outdevid, - char *outpath, uint64_t *outguid) + char *outpath, uint64_t *outguid, uint64_t *outashift, uint64_t *outversion) { vdev_phys_t *vdev; uint64_t pool_state, txg = 0; - char *nvlist, *nv; + char *nvlist, *nv, *features; uint64_t diskguid; - uint64_t version; sector += (VDEV_SKIP_SIZE >> SPA_MINBLOCKSHIFT); @@ -1248,10 +1477,10 @@ check_pool_label(uint64_t sector, char * if (txg == 0) return (ERR_NO_BOOTPATH); - if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VERSION, &version, + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VERSION, outversion, DATA_TYPE_UINT64, NULL)) return (ERR_FSYS_CORRUPT); - if (version > SPA_VERSION) + if (!SPA_VERSION_IS_SUPPORTED(*outversion)) return (ERR_NEWER_VERSION); if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv, DATA_TYPE_NVLIST, NULL)) @@ -1259,11 +1488,38 @@ check_pool_label(uint64_t sector, char * if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_GUID, &diskguid, DATA_TYPE_UINT64, NULL)) return (ERR_FSYS_CORRUPT); + if (nvlist_lookup_value(nv, ZPOOL_CONFIG_ASHIFT, outashift, + DATA_TYPE_UINT64, NULL) != 0) + return (ERR_FSYS_CORRUPT); if (vdev_get_bootpath(nv, diskguid, outdevid, outpath, 0)) return (ERR_NO_BOOTPATH); if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_GUID, outguid, DATA_TYPE_UINT64, NULL)) return (ERR_FSYS_CORRUPT); + + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_FEATURES_FOR_READ, + &features, DATA_TYPE_NVLIST, NULL) == 0) { + char *nvp; + char *name = stack; + stack += MAXNAMELEN; + + for (nvp = nvlist_next_nvpair(features, NULL); + nvp != NULL; + nvp = nvlist_next_nvpair(features, nvp)) { + zap_attribute_t za; + + if (nvpair_name(nvp, name, MAXNAMELEN) != 0) + return (ERR_FSYS_CORRUPT); + + za.za_integer_length = 8; + za.za_num_integers = 1; + za.za_first_integer = 1; + za.za_name = name; + if (check_feature(&za, spa_feature_names, stack) != 0) + return (ERR_NEWER_VERSION); + } + } + return (0); } @@ -1278,15 +1534,16 @@ check_pool_label(uint64_t sector, char * int zfs_mount(void) { - char *stack; + char *stack, *ub_array; int label = 0; - uberblock_phys_t *ub_array, *ubbest; + uberblock_t *ubbest; objset_phys_t *osp; char tmp_bootpath[MAXNAMELEN]; char tmp_devid[MAXNAMELEN]; - uint64_t tmp_guid; + uint64_t tmp_guid, ashift, version; uint64_t adjpl = (uint64_t)part_length << SPA_MINBLOCKSHIFT; int err = errnum; /* preserve previous errnum state */ + uint64_t sector; /* if it's our first time here, zero the best uberblock out */ if (best_drive == 0 && best_part == 0 && find_best_root) { @@ -1296,7 +1553,7 @@ zfs_mount(void) stackbase = ZFS_SCRATCH; stack = stackbase; - ub_array = (uberblock_phys_t *)stack; + ub_array = stack; stack += VDEV_UBERBLOCK_RING; osp = (objset_phys_t *)stack; @@ -1305,8 +1562,6 @@ zfs_mount(void) for (label = 0; label < VDEV_LABELS; label++) { - uint64_t sector; - /* * some eltorito stacks don't give us a size and * we end up setting the size to MAXUINT, further @@ -1324,39 +1579,38 @@ zfs_mount(void) /* Read in the uberblock ring (128K). */ if (devread(sector + - ((VDEV_SKIP_SIZE + VDEV_PHYS_SIZE) >> - SPA_MINBLOCKSHIFT), 0, VDEV_UBERBLOCK_RING, - (char *)ub_array) == 0) + ((VDEV_SKIP_SIZE + VDEV_PHYS_SIZE) >> SPA_MINBLOCKSHIFT), + 0, VDEV_UBERBLOCK_RING, ub_array) == 0) continue; - if ((ubbest = find_bestub(ub_array, sector)) != NULL && - zio_read(&ubbest->ubp_uberblock.ub_rootbp, osp, stack) - == 0) { + if (check_pool_label(sector, stack, tmp_devid, + tmp_bootpath, &tmp_guid, &ashift, &version)) + continue; - VERIFY_OS_TYPE(osp, DMU_OST_META); + if (pool_guid == 0) + pool_guid = tmp_guid; - if (check_pool_label(sector, stack, tmp_devid, - tmp_bootpath, &tmp_guid)) - continue; - if (pool_guid == 0) - pool_guid = tmp_guid; + if ((ubbest = find_bestub(ub_array, ashift, sector)) == NULL || + zio_read(&ubbest->ub_rootbp, osp, stack) != 0) + continue; - if (find_best_root && ((pool_guid != tmp_guid) || - vdev_uberblock_compare(&ubbest->ubp_uberblock, - &(current_uberblock)) <= 0)) - continue; + VERIFY_OS_TYPE(osp, DMU_OST_META); - /* Got the MOS. Save it at the memory addr MOS. */ - grub_memmove(MOS, &osp->os_meta_dnode, DNODE_SIZE); - grub_memmove(¤t_uberblock, - &ubbest->ubp_uberblock, sizeof (uberblock_t)); - grub_memmove(current_bootpath, tmp_bootpath, - MAXNAMELEN); - grub_memmove(current_devid, tmp_devid, - grub_strlen(tmp_devid)); - is_zfs_mount = 1; - return (1); - } + if (version >= SPA_VERSION_FEATURES && + check_mos_features(&osp->os_meta_dnode, stack) != 0) + continue; + + if (find_best_root && ((pool_guid != tmp_guid) || + vdev_uberblock_compare(ubbest, &(current_uberblock)) <= 0)) + continue; + + /* Got the MOS. Save it at the memory addr MOS. */ + grub_memmove(MOS, &osp->os_meta_dnode, DNODE_SIZE); + grub_memmove(¤t_uberblock, ubbest, sizeof (uberblock_t)); + grub_memmove(current_bootpath, tmp_bootpath, MAXNAMELEN); + grub_memmove(current_devid, tmp_devid, grub_strlen(tmp_devid)); + is_zfs_mount = 1; + return (1); } /* @@ -1399,23 +1653,23 @@ zfs_open(char *filename) * do not goto 'current_bootfs'. */ if (is_top_dataset_file(filename)) { - if ((errnum = get_objset_mdn(MOS, NULL, NULL, mdn, stack))) + if (errnum = get_objset_mdn(MOS, NULL, NULL, mdn, stack)) return (0); current_bootfs_obj = 0; } else { if (current_bootfs[0] == '\0') { /* Get the default root filesystem object number */ - if ((errnum = get_default_bootfsobj(MOS, - ¤t_bootfs_obj, stack))) + if (errnum = get_default_bootfsobj(MOS, + ¤t_bootfs_obj, stack)) return (0); - if ((errnum = get_objset_mdn(MOS, NULL, - ¤t_bootfs_obj, mdn, stack))) + if (errnum = get_objset_mdn(MOS, NULL, + ¤t_bootfs_obj, mdn, stack)) return (0); } else { - if ((errnum = get_objset_mdn(MOS, current_bootfs, - ¤t_bootfs_obj, mdn, stack))) { + if (errnum = get_objset_mdn(MOS, current_bootfs, + ¤t_bootfs_obj, mdn, stack)) { grub_memset(current_bootfs, 0, MAXNAMELEN); return (0); } @@ -1515,7 +1769,7 @@ zfs_read(char *buf, int len) */ uint64_t blkid = filepos / blksz; - if ((errnum = dmu_read(DNODE, blkid, file_buf, stack))) + if (errnum = dmu_read(DNODE, blkid, file_buf, stack)) return (0); file_start = blkid * blksz; diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/fsys_zfs.h --- a/tools/libfsimage/zfs/fsys_zfs.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/fsys_zfs.h Sat Oct 26 20:03:06 2013 +0400 @@ -16,10 +16,17 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ + +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. + */ + #ifndef _FSYS_ZFS_H #define _FSYS_ZFS_H @@ -95,26 +102,8 @@ typedef unsigned int size_t; #define BSWAP_64(x) ((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32)) #define P2ROUNDUP(x, align) (-(-(x) & -(align))) -/* - * XXX Match these macro up with real zfs once we have nvlist support so that we - * can support large sector disks. - */ -#define UBERBLOCK_SIZE (1ULL << UBERBLOCK_SHIFT) -#define VDEV_UBERBLOCK_SHIFT UBERBLOCK_SHIFT -#include <stddef.h> -#define VDEV_UBERBLOCK_OFFSET(n) \ -offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT]) - typedef struct uberblock uberblock_t; -/* XXX Uberblock_phys_t is no longer in the kernel zfs */ -typedef struct uberblock_phys { - uberblock_t ubp_uberblock; - char ubp_pad[UBERBLOCK_SIZE - sizeof (uberblock_t) - - sizeof (zio_eck_t)]; - zio_eck_t ubp_zec; -} uberblock_phys_t; - /* * Macros to get fields in a bp or DVA. */ @@ -137,10 +126,36 @@ typedef struct uberblock_phys { #define NV_ENCODE_NATIVE 0 #define NV_ENCODE_XDR 1 #define HOST_ENDIAN 1 /* for x86 machine */ -#define DATA_TYPE_UINT64 8 -#define DATA_TYPE_STRING 9 -#define DATA_TYPE_NVLIST 19 -#define DATA_TYPE_NVLIST_ARRAY 20 +typedef enum { + DATA_TYPE_UNKNOWN = 0, + DATA_TYPE_BOOLEAN, + DATA_TYPE_BYTE, + DATA_TYPE_INT16, + DATA_TYPE_UINT16, + DATA_TYPE_INT32, + DATA_TYPE_UINT32, + DATA_TYPE_INT64, + DATA_TYPE_UINT64, + DATA_TYPE_STRING, + DATA_TYPE_BYTE_ARRAY, + DATA_TYPE_INT16_ARRAY, + DATA_TYPE_UINT16_ARRAY, + DATA_TYPE_INT32_ARRAY, + DATA_TYPE_UINT32_ARRAY, + DATA_TYPE_INT64_ARRAY, + DATA_TYPE_UINT64_ARRAY, + DATA_TYPE_STRING_ARRAY, + DATA_TYPE_HRTIME, + DATA_TYPE_NVLIST, + DATA_TYPE_NVLIST_ARRAY, + DATA_TYPE_BOOLEAN_VALUE, + DATA_TYPE_INT8, + DATA_TYPE_UINT8, + DATA_TYPE_BOOLEAN_ARRAY, + DATA_TYPE_INT8_ARRAY, + DATA_TYPE_UINT8_ARRAY, + DATA_TYPE_DOUBLE +} data_type_t; /* * Decompression Entry - lzjb @@ -199,6 +214,7 @@ extern void fletcher_4_native(const void extern void fletcher_4_byteswap(const void *, uint64_t, zio_cksum_t *); extern void zio_checksum_SHA256(const void *, uint64_t, zio_cksum_t *); extern int lzjb_decompress(void *, void *, size_t, size_t); +extern int lz4_decompress(void *, void *, size_t, size_t); #endif /* FSYS_ZFS */ diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/dmu.h --- a/tools/libfsimage/zfs/zfs-include/dmu.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs-include/dmu.h Sat Oct 26 20:03:06 2013 +0400 @@ -16,11 +16,16 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + #ifndef _SYS_DMU_H #define _SYS_DMU_H @@ -31,6 +36,41 @@ * The DMU also interacts with the SPA. That interface is described in * dmu_spa.h. */ + +#define B_FALSE 0 +#define B_TRUE 1 + +#define DMU_OT_NEWTYPE 0x80 +#define DMU_OT_METADATA 0x40 +#define DMU_OT_BYTESWAP_MASK 0x3f + +#define DMU_OT(byteswap, metadata) \ + (DMU_OT_NEWTYPE | \ + ((metadata) ? DMU_OT_METADATA : 0) | \ + ((byteswap) & DMU_OT_BYTESWAP_MASK)) + +#define DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \ + ((ot) & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS : \ + (ot) < DMU_OT_NUMTYPES) + +#define DMU_OT_IS_METADATA(ot) (((ot) & DMU_OT_NEWTYPE) ? \ + ((ot) & DMU_OT_METADATA) : \ + dmu_ot[(ot)].ot_metadata) + +typedef enum dmu_object_byteswap { + DMU_BSWAP_UINT8, + DMU_BSWAP_UINT16, + DMU_BSWAP_UINT32, + DMU_BSWAP_UINT64, + DMU_BSWAP_ZAP, + DMU_BSWAP_DNODE, + DMU_BSWAP_OBJSET, + DMU_BSWAP_ZNODE, + DMU_BSWAP_OLDACL, + DMU_BSWAP_ACL, + DMU_BSWAP_NUMFUNCS +} dmu_object_byteswap_t; + typedef enum dmu_object_type { DMU_OT_NONE, /* general: */ @@ -38,8 +78,8 @@ typedef enum dmu_object_type { DMU_OT_OBJECT_ARRAY, /* UINT64 */ DMU_OT_PACKED_NVLIST, /* UINT8 (XDR by nvlist_pack/unpack) */ DMU_OT_PACKED_NVLIST_SIZE, /* UINT64 */ - DMU_OT_BPLIST, /* UINT64 */ - DMU_OT_BPLIST_HDR, /* UINT64 */ + DMU_OT_BPOBJ, /* UINT64 */ + DMU_OT_BPOBJ_HDR, /* UINT64 */ /* spa: */ DMU_OT_SPACE_MAP_HEADER, /* UINT64 */ DMU_OT_SPACE_MAP, /* UINT64 */ @@ -56,7 +96,7 @@ typedef enum dmu_object_type { DMU_OT_DSL_DATASET, /* UINT64 */ /* zpl: */ DMU_OT_ZNODE, /* ZNODE */ - DMU_OT_OLDACL, /* OLD ACL */ + DMU_OT_OLDACL, /* Old ACL */ DMU_OT_PLAIN_FILE_CONTENTS, /* UINT8 */ DMU_OT_DIRECTORY_CONTENTS, /* ZAP */ DMU_OT_MASTER_NODE, /* ZAP */ @@ -79,7 +119,7 @@ typedef enum dmu_object_type { DMU_OT_FUID, /* FUID table (Packed NVLIST UINT8) */ DMU_OT_FUID_SIZE, /* FUID table size UINT64 */ DMU_OT_NEXT_CLONES, /* ZAP */ - DMU_OT_SCRUB_QUEUE, /* ZAP */ + DMU_OT_SCAN_QUEUE, /* ZAP */ DMU_OT_USERGROUP_USED, /* ZAP */ DMU_OT_USERGROUP_QUOTA, /* ZAP */ DMU_OT_USERREFS, /* ZAP */ @@ -89,7 +129,24 @@ typedef enum dmu_object_type { DMU_OT_SA_MASTER_NODE, /* ZAP */ DMU_OT_SA_ATTR_REGISTRATION, /* ZAP */ DMU_OT_SA_ATTR_LAYOUTS, /* ZAP */ - DMU_OT_NUMTYPES + DMU_OT_SCAN_XLATE, /* ZAP */ + DMU_OT_DEDUP, /* fake dedup BP from ddt_bp_create() */ + DMU_OT_DEADLIST, /* ZAP */ + DMU_OT_DEADLIST_HDR, /* UINT64 */ + DMU_OT_DSL_CLONES, /* ZAP */ + DMU_OT_BPOBJ_SUBOBJ, /* UINT64 */ + DMU_OT_NUMTYPES, + + DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE), + DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE), + DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE), + DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE), + DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE), + DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE), + DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE), + DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE), + DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE), + DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE), } dmu_object_type_t; typedef enum dmu_objset_type { @@ -107,6 +164,9 @@ typedef enum dmu_objset_type { */ #define DMU_POOL_DIRECTORY_OBJECT 1 #define DMU_POOL_CONFIG "config" +#define DMU_POOL_FEATURES_FOR_READ "features_for_read" +#define DMU_POOL_FEATURES_FOR_WRITE "features_for_write" +#define DMU_POOL_FEATURE_DESCRIPTIONS "feature_descriptions" #define DMU_POOL_ROOT_DATASET "root_dataset" #define DMU_POOL_SYNC_BPLIST "sync_bplist" #define DMU_POOL_ERRLOG_SCRUB "errlog_scrub" diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/dsl_dataset.h --- a/tools/libfsimage/zfs/zfs-include/dsl_dataset.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs-include/dsl_dataset.h Sat Oct 26 20:03:06 2013 +0400 @@ -24,8 +24,6 @@ #ifndef _SYS_DSL_DATASET_H #define _SYS_DSL_DATASET_H -#pragma ident "%Z%%M% %I% %E% SMI" - typedef struct dsl_dataset_phys { uint64_t ds_dir_obj; uint64_t ds_prev_snap_obj; diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/dsl_dir.h --- a/tools/libfsimage/zfs/zfs-include/dsl_dir.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs-include/dsl_dir.h Sat Oct 26 20:03:06 2013 +0400 @@ -24,8 +24,6 @@ #ifndef _SYS_DSL_DIR_H #define _SYS_DSL_DIR_H -#pragma ident "%Z%%M% %I% %E% SMI" - typedef struct dsl_dir_phys { uint64_t dd_creation_time; /* not actually used */ uint64_t dd_head_dataset_obj; diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/spa.h --- a/tools/libfsimage/zfs/zfs-include/spa.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs-include/spa.h Sat Oct 26 20:03:06 2013 +0400 @@ -16,11 +16,16 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + #ifndef _SYS_SPA_H #define _SYS_SPA_H @@ -65,7 +70,7 @@ /* * Size of block to hold the configuration data (a packed nvlist) */ -#define SPA_CONFIG_BLOCKSIZE (1 << 14) +#define SPA_CONFIG_BLOCKSIZE (1ULL << 14) /* * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB. diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/uberblock_impl.h --- a/tools/libfsimage/zfs/zfs-include/uberblock_impl.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs-include/uberblock_impl.h Sat Oct 26 20:03:06 2013 +0400 @@ -24,8 +24,6 @@ #ifndef _SYS_UBERBLOCK_IMPL_H #define _SYS_UBERBLOCK_IMPL_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * The uberblock version is incremented whenever an incompatible on-disk * format change is made to the SPA, DMU, or ZAP. diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/vdev_impl.h --- a/tools/libfsimage/zfs/zfs-include/vdev_impl.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs-include/vdev_impl.h Sat Oct 26 20:03:06 2013 +0400 @@ -24,12 +24,30 @@ #ifndef _SYS_VDEV_IMPL_H #define _SYS_VDEV_IMPL_H +/* helper macros */ +#undef offsetof +#if defined(__GNUC__) +#define offsetof(s, m) __builtin_offsetof(s, m) +#else +#define offsetof(s, m) ((size_t)(&(((s *)0)->m))) +#endif +#define MAX(x, y) ((x) > (y) ? (x) : (y)) + #define VDEV_PAD_SIZE (8 << 10) /* 2 padding areas (vl_pad1 and vl_pad2) to skip */ #define VDEV_SKIP_SIZE VDEV_PAD_SIZE * 2 #define VDEV_PHYS_SIZE (112 << 10) #define VDEV_UBERBLOCK_RING (128 << 10) +#define VDEV_UBERBLOCK_SHIFT(sh) \ + MAX((sh), UBERBLOCK_SHIFT) +#define VDEV_UBERBLOCK_COUNT(sh) \ + (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT(sh)) +#define VDEV_UBERBLOCK_OFFSET(sh, n) \ + offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT(sh)]) +#define VDEV_UBERBLOCK_SIZE(sh) \ + (1ULL << VDEV_UBERBLOCK_SHIFT(sh)) + typedef struct vdev_phys { char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_eck_t)]; zio_eck_t vp_zbt; diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/zap_leaf.h --- a/tools/libfsimage/zfs/zfs-include/zap_leaf.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs-include/zap_leaf.h Sat Oct 26 20:03:06 2013 +0400 @@ -24,8 +24,6 @@ #ifndef _SYS_ZAP_LEAF_H #define _SYS_ZAP_LEAF_H -#pragma ident "%Z%%M% %I% %E% SMI" - #define ZAP_LEAF_MAGIC 0x2AB1EAF /* chunk size = 24 bytes */ diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/zfs.h --- a/tools/libfsimage/zfs/zfs-include/zfs.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs-include/zfs.h Sat Oct 26 20:03:06 2013 +0400 @@ -16,9 +16,10 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_FS_ZFS_H @@ -27,7 +28,14 @@ /* * On-disk version number. */ -#define SPA_VERSION 24ULL +#define SPA_VERSION_INITIAL 1ULL +#define SPA_VERSION_BEFORE_FEATURES 28ULL +#define SPA_VERSION 5000ULL +#define SPA_VERSION_FEATURES 5000ULL + +#define SPA_VERSION_IS_SUPPORTED(v) \ + (((v) >= SPA_VERSION_INITIAL && (v) <= SPA_VERSION_BEFORE_FEATURES) || \ + ((v) >= SPA_VERSION_FEATURES && (v) <= SPA_VERSION)) /* * The following are configuration names used in the nvlist describing a pool's @@ -67,6 +75,7 @@ #define ZPOOL_CONFIG_DDT_HISTOGRAM "ddt_histogram" #define ZPOOL_CONFIG_DDT_OBJ_STATS "ddt_object_stats" #define ZPOOL_CONFIG_DDT_STATS "ddt_stats" +#define ZPOOL_CONFIG_FEATURES_FOR_READ "features_for_read" /* * The persistent vdev state is stored as separate values rather than a single * 'vdev_state' entry. This is because a device can be in multiple states, such diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/zfs_acl.h --- a/tools/libfsimage/zfs/zfs-include/zfs_acl.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs-include/zfs_acl.h Sat Oct 26 20:03:06 2013 +0400 @@ -24,8 +24,6 @@ #ifndef _SYS_FS_ZFS_ACL_H #define _SYS_FS_ZFS_ACL_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifndef _UID_T #define _UID_T typedef unsigned int uid_t; /* UID type */ diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/zio.h --- a/tools/libfsimage/zfs/zfs-include/zio.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs-include/zio.h Sat Oct 26 20:03:06 2013 +0400 @@ -20,6 +20,9 @@ * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright 2013 by Saso Kiselkov. All rights reserved. + */ #ifndef _ZIO_H #define _ZIO_H @@ -73,6 +76,17 @@ enum zio_compress { ZIO_COMPRESS_OFF, ZIO_COMPRESS_LZJB, ZIO_COMPRESS_EMPTY, + ZIO_COMPRESS_GZIP_1, + ZIO_COMPRESS_GZIP_2, + ZIO_COMPRESS_GZIP_3, + ZIO_COMPRESS_GZIP_4, + ZIO_COMPRESS_GZIP_5, + ZIO_COMPRESS_GZIP_6, + ZIO_COMPRESS_GZIP_7, + ZIO_COMPRESS_GZIP_8, + ZIO_COMPRESS_GZIP_9, + ZIO_COMPRESS_ZLE, + ZIO_COMPRESS_LZ4, ZIO_COMPRESS_FUNCTIONS }; diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs_fletcher.c --- a/tools/libfsimage/zfs/zfs_fletcher.c Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs_fletcher.c Sat Oct 26 20:03:06 2013 +0400 @@ -21,8 +21,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include "fsys_zfs.h" diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs_lz4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libfsimage/zfs/zfs_lz4.c Sat Oct 26 20:03:06 2013 +0400 @@ -0,0 +1,313 @@ +/* + * LZ4 - Fast LZ compression algorithm + * Header File + * Copyright (C) 2011-2013, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + */ + +#include "fsys_zfs.h" +#include <string.h> + +static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, + int isize, int maxOutputSize); + +int +lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len) +{ + const uint8_t *src = s_start; + uint32_t bufsiz = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | + src[3]; + + /* invalid compressed buffer size encoded at start */ + if (bufsiz + 4 > s_len) + return (1); + + /* + * Returns 0 on success (decompression function returned non-negative) + * and non-zero on failure (decompression function returned negative). + */ + return (LZ4_uncompress_unknownOutputSize(s_start + 4, d_start, bufsiz, + d_len) < 0); +} + +/* + * CPU Feature Detection + */ + +/* 32 or 64 bits ? */ +#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \ + defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \ + defined(__LP64__) || defined(_LP64)) +#define LZ4_ARCH64 1 +#else +#define LZ4_ARCH64 0 +#endif + +/* + * Little Endian or Big Endian? + * Note: overwrite the below #define if you know your architecture endianess. + */ +#if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \ + defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \ + defined(__PPC) || defined(PPC) || defined(__powerpc__) || \ + defined(__powerpc) || defined(powerpc) || \ + ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))) +#define LZ4_BIG_ENDIAN 1 +#else + /* + * Little Endian assumed. PDP Endian and other very rare endian format + * are unsupported. + */ +#endif + +/* + * Compiler Options + */ +#if __STDC_VERSION__ >= 199901L /* C99 */ +/* "restrict" is a known keyword */ +#else +/* Disable restrict */ +#define restrict +#endif + +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +#define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) \ + | (((x) & 0xffu) << 8))) + +#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) +#define expect(expr, value) (__builtin_expect((expr), (value))) +#else +#define expect(expr, value) (expr) +#endif + +#define likely(expr) expect((expr) != 0, 1) +#define unlikely(expr) expect((expr) != 0, 0) + +/* Basic types */ +#define BYTE uint8_t +#define U16 uint16_t +#define U32 uint32_t +#define S32 int32_t +#define U64 uint64_t + +typedef struct _U16_S { + U16 v; +} U16_S; +typedef struct _U32_S { + U32 v; +} U32_S; +typedef struct _U64_S { + U64 v; +} U64_S; + +#define A64(x) (((U64_S *)(x))->v) +#define A32(x) (((U32_S *)(x))->v) +#define A16(x) (((U16_S *)(x))->v) + +/* + * Constants + */ +#define MINMATCH 4 + +#define COPYLENGTH 8 +#define LASTLITERALS 5 + +#define ML_BITS 4 +#define ML_MASK ((1U<<ML_BITS)-1) +#define RUN_BITS (8-ML_BITS) +#define RUN_MASK ((1U<<RUN_BITS)-1) + +/* + * Architecture-specific macros + */ +#if LZ4_ARCH64 +#define STEPSIZE 8 +#define UARCH U64 +#define AARCH A64 +#define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8; +#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) +#define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e) +#define HTYPE U32 +#define INITBASE(base) const BYTE* const base = ip +#else +#define STEPSIZE 4 +#define UARCH U32 +#define AARCH A32 +#define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4; +#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d); +#define LZ4_SECURECOPY LZ4_WILDCOPY +#define HTYPE const BYTE* +#define INITBASE(base) const int base = 0 +#endif + +#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) +#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ + { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } +#define LZ4_WRITE_LITTLEENDIAN_16(p, i) \ + { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; } +#else +#define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } +#endif + +/* Macros */ +#define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); + +/* Decompression functions */ + +static int +LZ4_uncompress_unknownOutputSize(const char *source, + char *dest, int isize, int maxOutputSize) +{ + /* Local Variables */ + const BYTE *restrict ip = (const BYTE *) source; + const BYTE *const iend = ip + isize; + const BYTE *restrict ref; + + BYTE *restrict op = (BYTE *) dest; + BYTE *const oend = op + maxOutputSize; + BYTE *cpy; + + size_t dec[] = { 0, 3, 2, 3, 0, 0, 0, 0 }; + + /* Main Loop */ + while (ip < iend) { + BYTE token; + int length; + + /* get runlength */ + token = *ip++; + if ((length = (token >> ML_BITS)) == RUN_MASK) { + int s = 255; + while ((ip < iend) && (s == 255)) { + s = *ip++; + length += s; + } + } + /* copy literals */ + cpy = op + length; + if ((cpy > oend - COPYLENGTH) || + (ip + length > iend - COPYLENGTH)) { + if (cpy > oend) + /* + * Error: request to write beyond destination + * buffer. + */ + goto _output_error; + if (ip + length > iend) + /* + * Error : request to read beyond source + * buffer. + */ + goto _output_error; + memcpy(op, ip, length); + op += length; + ip += length; + if (ip < iend) + /* Error : LZ4 format violation */ + goto _output_error; + /* Necessarily EOF, due to parsing restrictions. */ + break; + } + LZ4_WILDCOPY(ip, op, cpy); + ip -= (op - cpy); + op = cpy; + + /* get offset */ + LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); + ip += 2; + if (ref < (BYTE * const) dest) + /* + * Error: offset creates reference outside of + * destination buffer. + */ + goto _output_error; + + /* get matchlength */ + if ((length = (token & ML_MASK)) == ML_MASK) { + while (ip < iend) { + int s = *ip++; + length += s; + if (s == 255) + continue; + break; + } + } + /* copy repeated sequence */ + if unlikely(op - ref < STEPSIZE) { +#if LZ4_ARCH64 + size_t dec2table[] = { 0, 0, 0, -1, 0, 1, 2, 3 }; + size_t dec2 = dec2table[op - ref]; +#else + const int dec2 = 0; +#endif + *op++ = *ref++; + *op++ = *ref++; + *op++ = *ref++; + *op++ = *ref++; + ref -= dec[op - ref]; + A32(op) = A32(ref); + op += STEPSIZE - 4; + ref -= dec2; + } else { + LZ4_COPYSTEP(ref, op); + } + cpy = op + length - (STEPSIZE - 4); + if (cpy > oend - COPYLENGTH) { + if (cpy > oend) + /* + * Error: request to write outside of + * destination buffer. + */ + goto _output_error; + LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); + while (op < cpy) + *op++ = *ref++; + op = cpy; + if (op == oend) + /* + * Check EOF (should never happen, since last + * 5 bytes are supposed to be literals). + */ + break; + continue; + } + LZ4_SECURECOPY(ref, op, cpy); + op = cpy; /* correction */ + } + + /* end of decoding */ + return (int)(((char *)op) - dest); + + /* write overflow error detected */ + _output_error: + return (int)(-(((char *)ip) - source)); +} diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs_lzjb.c --- a/tools/libfsimage/zfs/zfs_lzjb.c Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs_lzjb.c Sat Oct 26 20:03:06 2013 +0400 @@ -21,8 +21,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include "fsys_zfs.h" #define MATCH_BITS 6 @@ -34,10 +32,10 @@ int lzjb_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len) { - uint8_t *src = s_start; - uint8_t *dst = d_start; - uint8_t *d_end = (uint8_t *)d_start + d_len; - uint8_t *cpy, copymap = '\0'; + uchar_t *src = s_start; + uchar_t *dst = d_start; + uchar_t *d_end = (uchar_t *)d_start + d_len; + uchar_t *cpy, copymap = '\0'; int copymask = 1 << (NBBY - 1); while (dst < d_end) { @@ -49,7 +47,7 @@ lzjb_decompress(void *s_start, void *d_s int mlen = (src[0] >> (NBBY - MATCH_BITS)) + MATCH_MIN; int offset = ((src[0] << NBBY) | src[1]) & OFFSET_MASK; src += 2; - if ((cpy = dst - offset) < (uint8_t *)d_start) + if ((cpy = dst - offset) < (uchar_t *)d_start) return (-1); while (--mlen >= 0 && dst < d_end) *dst++ = *cpy++; diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs_sha256.c --- a/tools/libfsimage/zfs/zfs_sha256.c Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs_sha256.c Sat Oct 26 20:03:06 2013 +0400 @@ -21,8 +21,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include "fsys_zfs.h" /* _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |