[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-users] GlusterFS spewing errors with Xen tap:aio block driver



Greetings,

I'm trying to get to the bottom of a problem I'm having combining Xen and GlusterFS. I've googled this extensively with no success. Every once in awhile (more frequently than I'd like on a production environment), GlusterFS client will start spewing errors into glusterfs.log similar to:

2008-10-08 16:49:21 E [client-protocol.c:1158:client_writev] brick- gridcpu02: : returning EBADFD 2008-10-08 16:49:21 E [fuse-bridge.c:1645:fuse_writev_cbk] glusterfs- fuse: 352231064: WRITE => -1 (77) 2008-10-08 16:49:53 E [client-protocol.c:1158:client_writev] brick- gridcpu02: : returning EBADFD 2008-10-08 16:49:53 E [fuse-bridge.c:1645:fuse_writev_cbk] glusterfs- fuse: 352231241: WRITE => -1 (77) 2008-10-08 16:49:53 E [client-protocol.c:1158:client_writev] brick- gridcpu02: : returning EBADFD 2008-10-08 16:49:53 E [fuse-bridge.c:1645:fuse_writev_cbk] glusterfs- fuse: 352231243: WRITE => -1 (77)

It will literally fill the disk up in a matter of a hours if we don't catch it early enough. This seems to only happen around disk images opened by the Xen tap:aio block driver, and it goes away if I "xm destroy" the right virtual instance or "xm migrate" it to another Xen server. The DomU itself shows a disk error in dmesg and remounts the disk read-only. How much functionality remains in the DomU tends to vary from being able to cleanly shut it down, to not even being able to login to run the shutdown command.

I'm going to cross-post this in both Xen and GlusterFS user mailing lists, so I hope to get a response from one side or the other.

Specs are:
Hardware: Dell PowerEdge (1955 I think), with PERC3 SCSI disks in RAID1
OS: Ubuntu 8.04, amd64, Kernel 2.6.24-19-xen
GlusterFS 1.3.10, 1.3.10-0ubuntu1~hardy2 from https://launchpad.net/ ~neil-aldur/+ppa-packages
Xen 3.2.0, 3.2.0-0ubuntu10, from Ubuntu

GlusterFS client configuration:
# file: /etc/glusterfs/glusterfs-client.vol
volume brick-gridfs01
 type protocol/client
 option transport-type tcp/client
 option remote-host atl1gridfs01
 option remote-port 6997
 option remote-subvolume brick
end-volume

volume brick-gridcpu01
 type protocol/client
 option transport-type tcp/client
 option remote-host atl1gridcpu01
 option remote-port 6997
 option remote-subvolume brick
end-volume

volume brick-gridcpu02
 type protocol/client
 option transport-type tcp/client
 option remote-host atl1gridcpu02
 option remote-port 6997
 option remote-subvolume brick
end-volume

volume brick-gridcpu03
 type protocol/client
 option transport-type tcp/client
 option remote-host atl1gridcpu03
 option remote-port 6997
 option remote-subvolume brick
end-volume

volume brick-gridcpu04
 type protocol/client
 option transport-type tcp/client
 option remote-host atl1gridcpu04
 option remote-port 6997
 option remote-subvolume brick
end-volume

volume namespace-gridfs01
 type protocol/client
 option transport-type tcp/client
 option remote-host atl1gridfs01
 option remote-port 6997
 option remote-subvolume brick-ns
end-volume

volume unify0
 type cluster/unify
 option scheduler alu
 option alu.limits.min-free-disk  5%
 option alu.limits.max-open-files 10000
option alu.order disk-usage:read-usage:write-usage:open-files- usage:disk-speed-usage option alu.disk-usage.entry-threshold 2GB # Kick in if the discrepancy in disk-usage between volumes is more than 2GB option alu.disk-usage.exit-threshold 60MB # Don't stop writing to the least-used volume until the discrepancy is 1988MB option alu.open-files-usage.entry-threshold 1024 # Kick in if the discrepancy in open files is 1024 option alu.open-files-usage.exit-threshold 32 # Don't stop until 992 files have been written the least-used volume # option alu.read-usage.entry-threshold 20% # Kick in when the read- usage discrepancy is 20% # option alu.read-usage.exit-threshold 4% # Don't stop until the discrepancy has been reduced to 16% (20% - 4%) # option alu.write-usage.entry-threshold 20% # Kick in when the write-usage discrepancy is 20% # option alu.write-usage.exit-threshold 4% # Don't stop until the discrepancy has been reduced to 16% # option alu.disk-speed-usage.entry-threshold # NEVER SET IT. SPEED IS CONSTANT!!! # option alu.disk-speed-usage.exit-threshold # NEVER SET IT. SPEED IS CONSTANT!!! option alu.stat-refresh.interval 10sec # Refresh the statistics used for decision-making every 10 seconds
# option alu.stat-refresh.num-file-create 10
 option namespace namespace-gridfs01
subvolumes brick-gridfs01 brick-gridcpu01 brick-gridcpu02 brick- gridcpu03 brick-gridcpu04
end-volume


GlusterFS Server Config:
# file: /etc/glusterfs/glusterfs-server.vol
volume posix
 type storage/posix
 option directory /opt/gridfs/export
end-volume

volume plocks
 type features/posix-locks
 subvolumes posix
end-volume

volume brick
 type performance/io-threads
 option thread-count 4
 subvolumes plocks
end-volume

volume brick-ns
 type storage/posix
 option directory /opt/gridfs/namespace
end-volume

volume server
 type protocol/server
 option transport-type tcp/server
 option listen-port 6997
 option auth.ip.brick.allow *
 option auth.ip.brick-ns.allow *
 subvolumes brick brick-ns
end-volume


Jim Phillips
jim@xxxxxxxxxxxxxx


_______________________________________________
Xen-users mailing list
Xen-users@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-users


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.