[Pkg-ceph-commits] [ceph-dkms] 02/02: Imported Upstream version 3.14+git20140429

Thu May 8 20:10:42 UTC 2014

This is an automated email from the git hooks/post-receive script.

onlyjob pushed a commit to branch upstream
in repository ceph-dkms.

commit 29507ad (upstream)
Author: Dmitry Smirnov <onlyjob at member.fsf.org>
Date:   Thu May 8 13:36:04 2014

    Imported Upstream version 3.14+git20140429
---
 ChangeLog                  | 18347 +++++++++++++++++++++++++++++++++++++++++++
 ceph/Kconfig               |    40 +
 ceph/Makefile              |    13 +
 ceph/acl.c                 |   200 +
 ceph/addr.c                |  1345 ++++
 ceph/cache.c               |   402 +
 ceph/cache.h               |   182 +
 ceph/caps.c                |  3313 ++++++++
 ceph/ceph_frag.c           |    22 +
 ceph/debugfs.c             |   277 +
 ceph/dir.c                 |  1349 ++++
 ceph/export.c              |   250 +
 ceph/file.c                |  1294 +++
 ceph/inode.c               |  1927 +++++
 ceph/ioctl.c               |   296 +
 ceph/ioctl.h               |   100 +
 ceph/locks.c               |   338 +
 ceph/mds_client.c          |  3665 +++++++++
 ceph/mds_client.h          |   393 +
 ceph/mdsmap.c              |   189 +
 ceph/snap.c                |   932 +++
 ceph/strings.c             |   124 +
 ceph/super.c               |  1061 +++
 ceph/super.h               |   890 +++
 ceph/xattr.c               |  1128 +++
 keys/ceph-type.h           |     8 +
 libceph/Kconfig            |    43 +
 libceph/Makefile           |    15 +
 libceph/armor.c            |   105 +
 libceph/auth.c             |   340 +
 libceph/auth_none.c        |   137 +
 libceph/auth_none.h        |    29 +
 libceph/auth_x.c           |   711 ++
 libceph/auth_x.h           |    51 +
 libceph/auth_x_protocol.h  |    90 +
 libceph/buffer.c           |    58 +
 libceph/ceph_common.c      |   664 ++
 libceph/ceph_fs.c          |    78 +
 libceph/ceph_hash.c        |   121 +
 libceph/ceph_strings.c     |   123 +
 libceph/crush/crush.c      |   129 +
 libceph/crush/hash.c       |   149 +
 libceph/crush/mapper.c     |   819 ++
 libceph/crypto.c           |   487 ++
 libceph/crypto.h           |    51 +
 libceph/debugfs.c          |   282 +
 libceph/messenger.c        |  3316 ++++++++
 libceph/mon_client.c       |  1102 +++
 libceph/msgpool.c          |    83 +
 libceph/osd_client.c       |  2904 +++++++
 libceph/osdmap.c           |  1724 ++++
 libceph/pagelist.c         |   147 +
 libceph/pagevec.c          |   231 +
 libceph/snapshot.c         |    78 +
 linux/ceph/auth.h          |   116 +
 linux/ceph/buffer.h        |    38 +
 linux/ceph/ceph_debug.h    |    38 +
 linux/ceph/ceph_features.h |   104 +
 linux/ceph/ceph_frag.h     |   109 +
 linux/ceph/ceph_fs.h       |   789 ++
 linux/ceph/ceph_hash.h     |    13 +
 linux/ceph/debugfs.h       |    33 +
 linux/ceph/decode.h        |   259 +
 linux/ceph/libceph.h       |   230 +
 linux/ceph/mdsmap.h        |    63 +
 linux/ceph/messenger.h     |   304 +
 linux/ceph/mon_client.h    |   121 +
 linux/ceph/msgpool.h       |    26 +
 linux/ceph/msgr.h          |   176 +
 linux/ceph/osd_client.h    |   374 +
 linux/ceph/osdmap.h        |   225 +
 linux/ceph/pagelist.h      |    75 +
 linux/ceph/rados.h         |   436 +
 linux/ceph/types.h         |    29 +
 linux/crush/crush.h        |   201 +
 linux/crush/hash.h         |    17 +
 linux/crush/mapper.h       |    20 +
 rbd/Kconfig                |   560 ++
 rbd/Makefile               |    49 +
 rbd/rbd.c                  |  5406 +++++++++++++
 rbd/rbd_types.h            |    81 +
 81 files changed, 62044 insertions(+)

diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..5991140
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,18347 @@
+2014-04-27  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: reserve caps for file layout/lock MDS requests
+
+2014-04-17  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: avoid releasing caps that are being used
+
+2014-04-14  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: clear directory's completeness when creating file
+
+2014-04-10  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: fix non-default values check in apply_primary_affinity()
+
+2014-04-09  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: use fpos_cmp() to compare dentry positions
+
+2014-04-08  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: check directory's completeness before emitting directory entry
+
+2014-04-06  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: skip invalid dentry during dcache readdir
+
+2014-04-04  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: dump pool {read,write}_tier to debugfs
+
+2014-04-02  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: output primary affinity values on osdmap updates
+
+2014-04-01  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: flush cap release queue when trimming session caps
+
+2014-04-01  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: don't grabs open file reference for aborted request
+
+2014-04-01  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: drop extra open file reference in ceph_atomic_open()
+
+2014-03-29  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: preallocate buffer for readdir reply
+
+2014-03-24  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: enable PRIMARY_AFFINITY feature bit
+
+2014-03-24  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: redo ceph_calc_pg_primary() in terms of ceph_calc_pg_acting()
+
+2014-03-24  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: add support for osd primary affinity
+
+2014-03-24  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: add support for primary_temp mappings
+
+2014-03-24  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: return primary from ceph_calc_pg_acting()
+
+2014-03-24  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: switch ceph_calc_pg_acting() to new helpers
+
+2014-03-24  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: introduce apply_temps() helper
+
+2014-03-24  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: introduce pg_to_raw_osds() and raw_to_up_osds() helpers
+
+2014-03-24  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: ceph_can_shift_osds(pool) and pool type defines
+
+2014-03-24  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: ceph_osd_{exists,is_up,is_down}(osd) definitions
+
+2014-03-21  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: enable OSDMAP_ENC feature bit
+
+2014-03-21  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: primary_affinity decode bits
+
+2014-03-21  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: primary_affinity infrastructure
+
+2014-03-21  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: primary_temp decode bits
+
+2014-03-21  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: primary_temp infrastructure
+
+2014-03-21  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: generalize ceph_pg_mapping
+
+2014-03-21  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: introduce get_osdmap_client_data_v()
+
+2014-03-21  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: introduce decode{,_new}_pg_temp() and switch to them
+
+2014-03-21  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: switch osdmap_set_max_osd() to krealloc()
+
+2014-03-21  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: introduce decode{,_new}_pools() and switch to them
+
+2014-03-21  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: rename __decode_pool{,_names}() to decode_pool{,_names}()
+
+2014-03-13  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: fix and clarify ceph_decode_need() sizes
+
+2014-03-13  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: nuke bogus encoding version check in osdmap_apply_incremental()
+
+2014-03-13  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: fixup error handling in osdmap_apply_incremental()
+
+2014-03-13  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: fix crush_decode() call site in osdmap_decode()
+
+2014-03-13  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: check length of osdmap osd arrays
+
+2014-03-13  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: safely decode max_osd value in osdmap_decode()
+
+2014-03-13  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: fixup error handling in osdmap_decode()
+
+2014-03-13  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: split osdmap allocation and decode steps
+
+2014-03-13  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: dump osdmap and enhance output on decode errors
+
+2014-03-13  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: dump pg_temp mappings to debugfs
+
+2014-03-13  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: do not prefix osd lines with \t in debugfs output
+
+2014-03-13  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: refer to osdmap directly in osdmap_show()
+
+2014-03-19  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* crush: support chooseleaf_vary_r tunable (tunables3) by default
+
+2014-03-19  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* crush: add SET_CHOOSELEAF_VARY_R step
+
+2014-03-19  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* crush: add chooseleaf_vary_r tunable
+
+2014-03-19  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* crush: allow crush rules to set (re)tries counts to 0
+
+2014-03-19  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* crush: fix off-by-one errors in total_tries refactor
+
+2014-03-24  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: don't include ceph.{file,dir}.layout vxattr in listxattr()
+
+2014-03-24  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: check buffer size in ceph_vxattrcb_layout()
+
+2014-03-24  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: fix null pointer dereference in discard_cap_releases()
+
+2014-03-23  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* libceph: fix oops in ceph_msg_data_{pages,pagelist}_advance()
+
+2014-03-21  Fabian Frederick  <fabf at skynet.be>
+
+	* ceph: Remove get/set acl on symlinks
+
+2014-03-18  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: set mds_wanted when MDS reply changes a cap to auth cap
+
+2014-03-09  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: use fl->fl_file as owner identifier of flock and posix lock
+
+2014-03-04  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: forbid mandatory file lock
+
+2014-03-04  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: use fl->fl_type to decide flock operation
+
+2014-03-08  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: update i_max_size even if inode version does not change
+
+2014-03-08  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: make sure write caps are registered with auth MDS
+
+2014-03-01  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: print inode number for LOOKUPINO request
+
+2014-03-06  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: add get_name() NFS export callback
+
+2014-03-01  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: fix ceph_fh_to_parent()
+
+2014-03-01  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: add get_parent() NFS export callback
+
+2014-03-01  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: simplify ceph_fh_to_dentry()
+
+2013-12-26  Yunchuan Wen  <yunchuanwen at ubuntukylin.com>
+
+	* ceph: fscache: Wait for completion of object initialization
+
+2013-12-26  Yunchuan Wen  <yunchuanwen at ubuntukylin.com>
+
+	* ceph: fscache: Update object store limit after file writing
+
+2013-12-26  Yunchuan Wen  <yunchuanwen at ubuntukylin.com>
+
+	* ceph: fscache: add an interface to synchronize object store limit
+
+2013-02-05  Sage Weil  <sage at inktank.com>
+
+	* ceph: do not set r_old_dentry_dir on link()
+
+2013-02-05  Sage Weil  <sage at inktank.com>
+
+	* ceph: do not assume r_old_dentry[_dir] always set together
+
+2013-02-05  Sage Weil  <sage at inktank.com>
+
+	* ceph: do not chain inode updates to parent fsync
+
+2013-02-05  Sage Weil  <sage at inktank.com>
+
+	* ceph: avoid useless ceph_get_dentry_parent_inode() in ceph_rename()
+
+2014-03-03  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: let MDS adjust readdir 'frag'
+
+2014-02-28  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: fix reset_readdir()
+
+2014-02-27  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: fix ceph_dir_llseek()
+
+2014-02-25  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* rbd: prefix rbd writes with CEPH_OSD_OP_SETALLOCHINT osd op
+
+2014-02-25  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* rbd: num_ops parameter for rbd_osd_req_create()
+
+2014-02-25  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: bump CEPH_OSD_MAX_OP to 3
+
+2014-02-25  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: add support for CEPH_OSD_OP_SETALLOCHINT osd op
+
+2014-02-25  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: encode CEPH_OSD_OP_FLAG_* op flags
+
+2014-03-04  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* rbd: fix error paths in rbd_img_request_fill()
+
+2014-03-04  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* rbd: remove out_partial label in rbd_img_request_fill()
+
+2014-01-31  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: a per-osdc crush scratch buffer
+
+2014-03-30  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.14
+
+2014-03-30  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
+
+2014-03-28  Randy Dunlap  <rdunlap at infradead.org>
+
+	* MAINTAINERS: resume as Documentation maintainer
+
+2014-03-30  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
+
+2014-03-30  Eric Paris  <eparis at redhat.com>
+
+	* AUDIT: Allow login in non-init namespaces
+
+2014-03-30  Theodore Ts'o  <tytso at mit.edu>
+
+	* ext4: atomically set inode->i_flags in ext4_set_inode_flags()
+
+2014-03-20  Al Viro  <viro at zeniv.linux.org.uk>
+
+	* switch mnt_hash to hlist
+
+2014-03-21  Al Viro  <viro at zeniv.linux.org.uk>
+
+	* don't bother with propagate_mnt() unless the target is shared
+
+2014-03-20  Al Viro  <viro at zeniv.linux.org.uk>
+
+	* keep shadowed vfsmounts together
+
+2014-02-28  Al Viro  <viro at zeniv.linux.org.uk>
+
+	* resizable namespace.c hashes
+
+2014-03-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
+
+2014-03-06  Dmitry Torokhov  <dmitry.torokhov at gmail.com>
+
+	* Input: mousedev - fix race when creating mixed device
+
+2014-03-29  Elias Vanderstuyft  <elias.vds at gmail.com>
+
+	* Input: don't modify the id of ioctl-provided ff effect on upload failure
+
+2014-03-25  Alex Elder  <elder at linaro.org>
+
+	* rbd: drop an unsafe assertion
+
+2014-03-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2014-03-28  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'vlan_offloads'
+
+2014-03-27  Vlad Yasevich  <vyasevic at redhat.com>
+
+	* vlan: Warn the user if lowerdev has bad vlan features.
+
+2014-03-27  Vlad Yasevich  <vyasevic at redhat.com>
+
+	* veth: Turn off vlan rx acceleration in vlan_features
+
+2014-03-27  Vlad Yasevich  <vyasevic at redhat.com>
+
+	* ifb: Remove vlan acceleration from vlan_features
+
+2014-03-27  Vlad Yasevich  <vyasevic at redhat.com>
+
+	* qlge: Do not propaged vlan tag offloads to vlans
+
+2014-03-27  Vlad Yasevich  <vyasevic at redhat.com>
+
+	* bridge: Fix crash with vlan filtering and tcpdump
+
+2014-03-27  Vlad Yasevich  <vyasevic at redhat.com>
+
+	* net: Account for all vlan headers in skb_mac_gso_segment
+
+2014-03-27  Veaceslav Falico  <vfalico at redhat.com>
+
+	* MAINTAINERS: bonding: change email address
+
+2014-03-27  Jay Vosburgh  <fubar at us.ibm.com>
+
+	* MAINTAINERS: bonding: change email address
+
+2014-03-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'akpm' (patches from Andrew Morton)
+
+2014-03-28  Artem Fetishev  <artem_fetishev at epam.com>
+
+	* x86: fix boot on uniprocessor systems
+
+2014-03-28  Sasha Levin  <sasha.levin at oracle.com>
+
+	* ocfs2: check if cluster name exists before deref
+
+2014-03-27  Hannes Frederic Sowa  <hannes at stressinduktion.org>
+
+	* ipv6: move DAD and addrconf_verify processing to workqueue
+
+2014-03-27  Eric Dumazet  <edumazet at google.com>
+
+	* tcp: fix get_timewait4_sock() delay computation on 64bit
+
+2014-03-27  Flavio Leitner  <fbl at redhat.com>
+
+	* openvswitch: fix a possible deadlock and lockdep warning
+
+2014-03-27  Toshiaki Makita  <makita.toshiaki at lab.ntt.co.jp>
+
+	* bridge: Fix handling stacked vlan tags
+
+2014-03-27  Toshiaki Makita  <makita.toshiaki at lab.ntt.co.jp>
+
+	* bridge: Fix inabillity to retrieve vlan tags when tx offload is disabled
+
+2014-03-27  Michael S. Tsirkin  <mst at redhat.com>
+
+	* vhost: validate vhost_get_vq_desc return value
+
+2014-03-27  Michael S. Tsirkin  <mst at redhat.com>
+
+	* vhost: fix total length when packets are too short
+
+2014-03-28  Sasha Levin  <sasha.levin at oracle.com>
+
+	* random32: avoid attempt to late reseed if in the middle of seeding
+
+2014-03-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
+
+2014-03-27  Sasha Levin  <sasha.levin at oracle.com>
+
+	* random32: assign to network folks in MAINTAINERS
+
+2014-03-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2014-03-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'i2c/for-current' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux
+
+2014-03-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'stable/for-linus-3.14-rc8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
+
+2014-03-28  Hans de Goede  <hdegoede at redhat.com>
+
+	* Input: synaptics - add manual min/max quirk for ThinkPad X240
+
+2014-03-28  Benjamin Tissoires  <benjamin.tissoires at redhat.com>
+
+	* Input: synaptics - add manual min/max quirk
+
+2014-03-27  John Stultz  <john.stultz at linaro.org>
+
+	* time: Revert to calling clock_was_set_delayed() while in irq context
+
+2014-03-26  Daniel Vetter  <daniel.vetter at ffwll.ch>
+
+	* drm/i915: Undo gtt scratch pte unmapping again
+
+2014-03-27  Dave Airlie  <airlied at redhat.com>
+
+	* drm/radeon: fix runtime suspend breaking secondary GPUs
+
+2014-03-27  Wei Yang  <weiyang at linux.vnet.ibm.com>
+
+	* net/mlx4_core: pass pci_device_id.driver_data to __mlx4_init_one during reset
+
+2014-03-26  Zoltan Kiss  <zoltan.kiss at citrix.com>
+
+	* core, nfqueue, openvswitch: Orphan frags in skb_zerocopy and handle errors
+
+2014-03-26  Vlad Yasevich  <vyasevic at redhat.com>
+
+	* vlan: Set hard_header_len according to available acceleration
+
+2014-03-26  Oliver Neukum  <oneukum at suse.de>
+
+	* usbnet: include wait queue head in device structure
+
+2014-03-26  Jason Wang  <jasowang at redhat.com>
+
+	* virtio-net: correct error handling of virtqueue_kick()
+
+2014-03-26  Jan Kara  <jack at suse.cz>
+
+	* vfs: Allocate anon_inode_inode in anon_inode_init()
+
+2014-03-26  Dave Airlie  <airlied at redhat.com>
+
+	* drm/nouveau: fail runtime pm properly.
+
+2014-03-25  Dave Airlie  <airlied at redhat.com>
+
+	* drm/udl: take reference to device struct for dma-bufs
+
+2014-03-25  Eric Dumazet  <edumazet at google.com>
+
+	* net: unix: non blocking recvmsg() should not return -EINTR
+
+2014-03-26  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'mvneta'
+
+2014-03-26  Thomas Petazzoni  <thomas.petazzoni at free-electrons.com>
+
+	* net: mvneta: use devm_ioremap_resource() instead of of_iomap()
+
+2014-03-26  Thomas Petazzoni  <thomas.petazzoni at free-electrons.com>
+
+	* net: mvneta: fix usage as a module on RGMII configurations
+
+2014-03-26  Thomas Petazzoni  <thomas.petazzoni at free-electrons.com>
+
+	* net: mvneta: rename MVNETA_GMAC2_PSC_ENABLE to MVNETA_GMAC2_PCS_ENABLE
+
+2014-03-26  Hans de Goede  <hdegoede at redhat.com>
+
+	* Input: cypress_ps2 - don't report as a button pads
+
+2014-03-24  Vlad Yasevich  <vyasevic at redhat.com>
+
+	* tg3: Do not include vlan acceleration features in vlan_features
+
+2014-03-23  Pravin B Shelar  <pshelar at nicira.com>
+
+	* ip_tunnel: Fix dst ref-count.
+
+2014-03-26  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'trace-fixes-v3.14-rc7-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
+
+2014-03-25  Steven Rostedt (Red Hat)  <rostedt at goodmis.org>
+
+	* tracing: Fix traceon trigger condition to actually turn tracing on
+
+2014-03-25  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* fs: remove now stale label in anon_inode_init()
+
+2014-03-25  Jan Kara  <jack at suse.cz>
+
+	* fs: Avoid userspace mounting anon_inodefs filesystem
+
+2014-03-25  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'nfsd-next' of git://linux-nfs.org/~bfields/linux
+
+2014-03-25  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
+
+2014-03-25  David Vrabel  <david.vrabel at citrix.com>
+
+	* Revert "xen: properly account for _PAGE_NUMA during xen pte translations"
+
+2014-03-15  Wei Liu  <wei.liu2 at citrix.com>
+
+	* xen/balloon: flush persistent kmaps in correct position
+
+2014-03-24  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.14-rc8
+
+2014-03-24  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'parisc-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
+
+2014-03-24  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
+
+2014-03-24  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2014-03-24  Erik Hugne  <erik.hugne at ericsson.com>
+
+	* tipc: fix spinlock recursion bug for failed subscriptions
+
+2014-03-24  David Stevens  <dlstevens at us.ibm.com>
+
+	* vxlan: fix nonfunctional neigh_reduce()
+
+2014-03-24  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'davinci_emac'
+
+2014-03-24  Christian Riesch  <christian.riesch at omicron.at>
+
+	* net: davinci_emac: Fix rollback of emac_dev_open()
+
+2014-03-24  Christian Riesch  <christian.riesch at omicron.at>
+
+	* net: davinci_emac: Replace devm_request_irq with request_irq
+
+2014-03-21  Li RongQing  <roy.qing.li at gmail.com>
+
+	* netpoll: fix the skb check in pkt_is_ns
+
+2014-03-24  David S. Miller  <davem at davemloft.net>
+
+	* sparc64: Make sure %pil interrupts are enabled during hypervisor yield.
+
+2014-03-18  Scott Wood  <scottwood at freescale.com>
+
+	* i2c: cpm: Fix build by adding of_address.h and of_irq.h
+
+2014-03-21  Nishanth Menon  <nm at ti.com>
+
+	* net: micrel : ks8851-ml: add vdd-supply support
+
+2014-02-21  Will Deacon  <will.deacon at arm.com>
+
+	* parisc: locks: remove redundant arch_*_relax operations
+
+2014-03-23  Helge Deller  <deller at gmx.de>
+
+	* parisc: wire up sys_utimes
+
+2014-03-01  John David Anglin  <dave.anglin at bell.net>
+
+	* parisc: Remove unused CONFIG_PARISC_TMPALIAS code
+
+2014-03-23  Helge Deller  <deller at gmx.de>
+
+	* partly revert commit 8a10bc9: parisc/sti_console: prefer Linux fonts over built-in ROM fonts
+
+2014-03-20  Al Viro  <viro at zeniv.linux.org.uk>
+
+	* rcuwalk: recheck mount_lock after mountpoint crossing attempts
+
+2014-03-23  Al Viro  <viro at zeniv.linux.org.uk>
+
+	* make prepend_name() work correctly when called with negative *buflen
+
+2014-03-16  Eric Biggers  <ebiggers3 at gmail.com>
+
+	* vfs: Don't let __fdget_pos() get FMODE_PATH files
+
+2014-03-16  Eric Biggers  <ebiggers3 at gmail.com>
+
+	* vfs: atomic f_pos access in llseek()
+
+2014-03-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2014-03-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2014-03-20  Dave Jones  <davej at redhat.com>
+
+	* block: free q->flush_rq in blk_init_allocated_queue error paths
+
+2014-03-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* futex: revert back to the explicit waiter counting code
+
+2014-03-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'trace-fixes-v3.14-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
+
+2014-03-20  Hugh Dickins  <hughd at google.com>
+
+	* mm: fix swapops.h:131 bug if remap_file_pages raced migration
+
+2014-03-20  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jesse/openvswitch
+
+2014-03-19  Nicolas Dichtel  <nicolas.dichtel at 6wind.com>
+
+	* ip6mr: fix mfc notification flags
+
+2014-03-19  Nicolas Dichtel  <nicolas.dichtel at 6wind.com>
+
+	* ipmr: fix mfc notification flags
+
+2014-03-19  Nicolas Dichtel  <nicolas.dichtel at 6wind.com>
+
+	* rtnetlink: fix fdb notification flags
+
+2014-03-19  Eric Dumazet  <edumazet at google.com>
+
+	* tcp: syncookies: do not use getnstimeofday()
+
+2014-03-19  stephen hemminger  <shemming at brocade.com>
+
+	* netlink: fix setsockopt in mmap examples in documentation
+
+2014-03-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'upstream' of git://git.linux-mips.org/pub/scm/ralf/upstream-linus
+
+2014-03-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'sound-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2014-03-20  Ben Pfaff  <blp at nicira.com>
+
+	* openvswitch: Correctly report flow used times for first 5 minutes after boot.
+
+2014-02-13  Vaibhav Nagarnaik  <vnagarnaik at google.com>
+
+	* tracing: Fix array size mismatch in format string
+
+2013-11-27  Jim Quinlan  <jim2101024 at gmail.com>
+
+	* MIPS: Make local_irq_disable macro safe for non-Mipsr2
+
+2014-03-20  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'exynos-drm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/daeinki/drm-exynos into drm-fixes
+
+2014-03-17  Daniel Kurtz  <djkurtz at chromium.org>
+
+	* drm/exynos: Fix (more) freeing issues in exynos_drm_drv.c
+
+2014-03-18  Hugh Dickins  <hughd at google.com>
+
+	* mm: fix bad rss-counter if remap_file_pages raced migration
+
+2014-03-19  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pci-v3.14-fixes-3' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
+
+2014-03-19  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
+
+2014-03-19  Andreas Herrmann  <andreas.herrmann at caviumnetworks.com>
+
+	* MIPS: Octeon: Fix warning in of_device_alloc on cn3xxx
+
+2014-03-18  Viller Hsiao  <villerhsiao at gmail.com>
+
+	* MIPS: ftrace: Tweak safe_load()/safe_store() macros
+
+2014-03-20  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'drm-intel-fixes-2014-03-19' of git://anongit.freedesktop.org/drm-intel into drm-fixes
+
+2014-03-18  Roger Luethi  <rl at hellgate.ch>
+
+	* via-rhine: Disable device in error path
+
+2014-03-19  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-19  Charles Keepax  <ckeepax at opensource.wolfsonmicro.com>
+
+	* ALSA: compress: Pass through return value of open ops callback
+
+2014-02-13  Rafał Miłecki  <zajec5 at gmail.com>
+
+	* MIPS: BCM47XX: Check all (32) GPIOs when looking for a pin
+
+2014-03-18  Chris Wilson  <chris at chris-wilson.co.uk>
+
+	* drm/i915: Disable stolen memory when DMAR is active
+
+2014-03-17  Jani Nikula  <jani.nikula at intel.com>
+
+	* Revert "drm/i915: don't touch the VDD when disabling the panel"
+
+2014-03-18  Li Zefan  <lizefan at huawei.com>
+
+	* cgroup: fix a failure path in create_css()
+
+2014-03-18  Bjorn Helgaas  <bhelgaas at google.com>
+
+	* Revert "[PATCH] Insert GART region into resource map"
+
+2014-03-18  Peter Senna Tschudin  <peter.senna at gmail.com>
+
+	* ATHEROS-ATL1E: Convert iounmap to pci_iounmap
+
+2014-03-18  David Stevens  <dlstevens at us.ibm.com>
+
+	* vxlan: fix potential NULL dereference in arp_reduce()
+
+2014-03-18  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'cnic-net'
+
+2014-03-17  Michael Chan  <mchan at broadcom.com>
+
+	* cnic: Update version to 2.5.20 and copyright year.
+
+2014-03-17  Michael Chan  <mchan at broadcom.com>
+
+	* cnic,bnx2i,bnx2fc: Fix inconsistent use of page size
+
+2014-03-17  Michael Chan  <mchan at broadcom.com>
+
+	* cnic: Use proper ulp_ops for per device operations.
+
+2014-03-17  Bjørn Mork  <bjorn at mork.no>
+
+	* net: cdc_ncm: fix control message ordering
+
+2014-03-17  lucien  <lucien.xin at gmail.com>
+
+	* ipv6: ip6_append_data_mtu do not handle the mtu of the second fragment properly
+
+2014-03-16  Paul Bolle  <pebolle at tiscali.nl>
+
+	* isdn/capi: Make Middleware depend on CAPI2.0
+
+2014-03-18  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
+
+2014-03-18  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec
+
+2014-03-18  Clemens Ladisch  <clemens at ladisch.de>
+
+	* ALSA: oxygen: Xonar DG(X): fix Stereo Upmixing regression
+
+2014-03-18  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2014-01-21  Alex Smith  <alex.smith at imgtec.com>
+
+	* MIPS: Fix possible build error with transparent hugepages enabled
+
+2014-01-30  Daniel Vetter  <daniel.vetter at ffwll.ch>
+
+	* drm: Fix use-after-free in the shadow-attache exit code
+
+2014-03-18  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'drm-intel-fixes-2014-03-17' of git://anongit.freedesktop.org/drm-intel into drm-fixes
+
+2014-03-16  Benedikt Spranger  <b.spranger at linutronix.de>
+
+	* net: cpsw: do not register cpts twice
+
+2014-03-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.dk/linux-block
+
+2014-03-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'upstream' of git://git.linux-mips.org/pub/scm/ralf/upstream-linus
+
+2014-03-16  Peter Senna Tschudin  <peter.senna at gmail.com>
+
+	* ATHEROS-ALX: Use dma_set_mask_and_coherent and fix a bug
+
+2014-03-07  Doug Wilson  <doug.lkml at gmail.com>
+
+	* sparc64:tsb.c:use array size macro rather than number
+
+2014-03-14  Dave Kleikamp  <dave.kleikamp at oracle.com>
+
+	* sparc64: don't treat 64-bit syscall return codes as 32-bit
+
+2014-02-14  Paul Burton  <paul.burton at imgtec.com>
+
+	* MIPS: mark O32+FP64 experimental for now
+
+2014-03-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
+
+2014-02-22  Viller Hsiao  <villerhsiao at gmail.com>
+
+	* MIPS: ftrace: Fix icache flush range error
+
+2014-03-17  Lars Persson  <lars.persson at axis.com>
+
+	* MIPS: Fix syscall tracing interface
+
+2014-01-22  Markos Chandras  <markos.chandras at imgtec.com>
+
+	* MIPS: asm: syscall: Fix copying system call arguments
+
+2014-03-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.14-rc7
+
+2014-03-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-10  Michael Kerrisk  <mtk.manpages at gmail.com>
+
+	* ipc: Fix 2 bugs in msgrcv() MSG_COPY implementation
+
+2014-03-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
+
+2014-03-14  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless
+
+2014-03-14  Sebastian Hesselbarth  <sebastian.hesselbarth at gmail.com>
+
+	* net: phy: fix uninitalized ethtool_wolinfo in phy_suspend
+
+2014-03-13  Joe Perches  <joe at perches.com>
+
+	* MAINTAINERS: Add linux.nics at intel.com to INTEL ETHERNET DRIVERS
+
+2014-03-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pm+acpi-3.14-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2014-03-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'dm-3.14-fixes-4' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
+
+2014-02-10  Colin Ian King  <colin.king at canonical.com>
+
+	* MIPS: Octeon: Fix fall through on bar type OCTEON_DMA_BAR_TYPE_SMALL
+
+2014-03-14  John W. Linville  <linville at tuxdriver.com>
+
+	* Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless into for-davem
+
+2014-02-07  Huacai Chen  <chenhc at lemote.com>
+
+	* MIPS: FPU: Fix conflict of register usage
+
+2014-02-09  Paul Bolle  <pebolle at tiscali.nl>
+
+	* MIPS: Replace CONFIG_MIPS64 and CONFIG_MIPS32_R2
+
+2014-03-12  Patrick Palka  <patrick at parcs.ath.cx>
+
+	* perf bench: Fix NULL pointer dereference in "perf bench all"
+
+2014-03-13  Simon Wood  <simon at mungewell.org>
+
+	* HID: hid-lg4ff: Support new version of G27
+
+2014-03-13  Arnaldo Carvalho de Melo  <acme at redhat.com>
+
+	* perf bench numa: Make no args mean 'run all tests'
+
+2014-03-13  Daniel J Blueman  <daniel at numascale.com>
+
+	* x86/amd/numa: Fix northbridge quirk to assign correct NUMA node
+
+2014-03-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2014-03-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'i2c/for-current' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux
+
+2014-03-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2014-02-09  Richard Weinberger  <richard at nod.at>
+
+	* i2c: Remove usage of orphaned symbol OF_I2C
+
+2014-03-13  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branches 'pnp', 'acpi-init', 'acpi-sleep' and 'pm-cpufreq'
+
+2014-03-13  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPI / sleep: Add extra checks for HW Reduced ACPI mode sleep states
+
+2014-03-12  Heiner Kallweit  <heiner.kallweit at web.de>
+
+	* ipv6: Avoid unnecessary temporary addresses being generated
+
+2014-03-12  Stefan Wahren  <stefan.wahren at i2se.com>
+
+	* eth: fec: Fix lost promiscuous mode after reconnecting cable
+
+2014-03-12  dingtianhong  <dingtianhong at huawei.com>
+
+	* bonding: set correct vlan id for alb xmit path
+
+2014-03-12  Alexander Aring  <alex.aring at gmail.com>
+
+	* at86rf230: fix lockdep splats
+
+2014-03-12  Stanislaw Gruszka  <sgruszka at redhat.com>
+
+	* Revert "rt2x00: rt2800lib: Update BBP register initialization for RT53xx"
+
+2014-03-12  Helmut Schaa  <helmut.schaa at googlemail.com>
+
+	* ath9k: Fix sequence number assignment for non-data frames
+
+2014-03-13  Or Gerlitz  <ogerlitz at mellanox.com>
+
+	* net/mlx4_en: Deregister multicast vxlan steering rules when going down
+
+2014-03-13  Arnd Bergmann  <arnd at arndb.de>
+
+	* vmxnet3: fix building without CONFIG_PCI_MSI
+
+2014-03-13  Daniel Borkmann  <dborkman at redhat.com>
+
+	* MAINTAINERS: add networking selftests to NETWORKING
+
+2014-03-13  Paul Mackerras  <paulus at samba.org>
+
+	* KVM: PPC: Book3S HV: Fix register usage when loading/saving VRSAVE
+
+2014-03-13  Paul Mackerras  <paulus at samba.org>
+
+	* KVM: PPC: Book3S HV: Remove bogus duplicate code
+
+2014-03-13  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'ttm-fixes-3.14-2014-03-12' of git://people.freedesktop.org/~thomash/linux into drm-fixes
+
+2014-03-13  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-fixes-3.14' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
+
+2014-03-13  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'vmwgfx-fixes-3.14-2014-03-13' of git://people.freedesktop.org/~thomash/linux into drm-fixes
+
+2014-03-12  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Fix a surface reference corner-case in legacy emulation mode
+
+2014-03-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pci-v3.14-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
+
+2014-03-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
+
+2014-03-13  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPI / init: Invoke early ACPI initialization later
+
+2014-03-12  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* cpufreq: Skip current frequency initialization for ->setpolicy drivers
+
+2014-03-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'sound-3.14-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2014-03-11  Matthew Leach  <matthew.leach at arm.com>
+
+	* net: socket: error on a negative msg_namelen
+
+2014-03-12  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon/cik: properly set compute ring status on disable
+
+2014-03-11  Tobias Klauser  <tklauser at distanz.ch>
+
+	* MAINTAINERS: Add tools/net to NETWORKING [GENERAL]
+
+2014-03-11  Geert Uytterhoeven  <geert+renesas at linux-m68k.org>
+
+	* packet: doc: Spelling s/than/that/
+
+2014-03-12  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'mlx4'
+
+2014-03-12  Or Gerlitz  <ogerlitz at mellanox.com>
+
+	* net/mlx4_core: Load the IB driver when the device supports IBoE
+
+2014-03-12  Or Gerlitz  <ogerlitz at mellanox.com>
+
+	* net/mlx4_en: Handle vxlan steering rules for mac address changes
+
+2014-03-12  Or Gerlitz  <ogerlitz at mellanox.com>
+
+	* net/mlx4_core: Fix wrong dump of the vxlan offloads device capability
+
+2014-03-12  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon/cik: stop the sdma engines in the enable() function
+
+2014-03-12  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon/cik: properly set sdma ring status on disable
+
+2014-03-11  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: fix runpm disabling on non-PX harder
+
+2014-03-11  Wei Liu  <wei.liu2 at citrix.com>
+
+	* xen-netback: use skb_is_gso in xenvif_start_xmit
+
+2014-03-12  Rob Clark  <rclark at redhat.com>
+
+	* drm/ttm: don't oops if no invalidate_caches()
+
+2014-03-12  Heinz Mauelshagen  <heinzm at redhat.com>
+
+	* dm cache: fix access beyond end of origin device
+
+2014-03-12  Heinz Mauelshagen  <heinzm at redhat.com>
+
+	* dm cache: fix truncation bug when copying a block to/from >2TB fast device
+
+2014-03-11  Radim Krčmář  <rkrcmar at redhat.com>
+
+	* KVM: SVM: fix cr8 intercept window
+
+2014-03-11  Bjorn Helgaas  <bhelgaas at google.com>
+
+	* PCI: Don't check resource_size() in pci_bus_alloc_resource()
+
+2014-03-11  Bjorn Helgaas  <bhelgaas at google.com>
+
+	* PCI: Enable INTx in pci_reenable_device() only when MSI/MSI-X not enabled
+
+2014-03-07  Daniel Vetter  <daniel.vetter at ffwll.ch>
+
+	* drm/i915: Don't enable display error interrupts from the start
+
+2014-03-11  Ville Syrjälä  <ville.syrjala at linux.intel.com>
+
+	* drm/i915: Fix scanline counter fixup on BDW
+
+2014-03-11  Ville Syrjälä  <ville.syrjala at linux.intel.com>
+
+	* drm/i915: Add a workaround for HSW scanline counter weirdness
+
+2014-03-12  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/ttm: Work around performance regression with VM_PFNMAP
+
+2014-02-27  Ales Novak  <alnovak at suse.cz>
+
+	* [SCSI] storvsc: NULL pointer dereference fix
+
+2014-03-11  hayeswang  <hayeswang at realtek.com>
+
+	* r8169: fix the incorrect tx descriptor version
+
+2014-03-11  Markos Chandras  <markos.chandras at imgtec.com>
+
+	* tools/net/Makefile: Define PACKAGE to fix build problems
+
+2014-03-10  Alexei Starovoitov  <ast at plumgrid.com>
+
+	* x86: bpf_jit: support negative offsets
+
+2014-03-10  Linus Lüssing  <linus.luessing at web.de>
+
+	* bridge: multicast: enable snooping on general queries only
+
+2014-03-10  Linus Lüssing  <linus.luessing at web.de>
+
+	* bridge: multicast: add sanity check for general query destination
+
+2014-03-06  Deng-Cheng Zhu  <dengcheng.zhu at imgtec.com>
+
+	* MIPS: math-emu: Fix prefx detection and COP1X function field definition
+
+2014-03-10  Eric Dumazet  <eric.dumazet at gmail.com>
+
+	* tcp: tcp_release_cb() should release socket ownership
+
+2014-03-11  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'skb_frags'
+
+2014-03-10  Michael S. Tsirkin  <mst at redhat.com>
+
+	* skbuff: skb_segment: orphan frags before copying
+
+2014-03-11  Zhang Rui  <rui.zhang at intel.com>
+
+	* PNP / ACPI: proper handling of ACPI IO/Memory resource parsing failures
+
+2014-03-11  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'stmmac'
+
+2014-03-10  Boris BREZILLON  <b.brezillon.dev at gmail.com>
+
+	* ARM: at91: fix network interface ordering for sama5d36
+
+2014-03-11  Shawn Guo  <shawn.guo at linaro.org>
+
+	* MAINTAINERS: update IMX kernel git tree
+
+2014-02-02  Suresh Siddha  <sbsiddha at gmail.com>
+
+	* x86, fpu: Check tsk_used_math() in kernel_fpu_end() for eager FPU
+
+2014-03-11  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-next' of git://git.samba.org/sfrench/cifs-2.6
+
+2014-03-11  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
+
+2014-03-10  Dave Jones  <davej at redhat.com>
+
+	* x86: Remove CONFIG_X86_OOSTORE
+
+2014-03-06  Dave Jones  <davej at redhat.com>
+
+	* perf/x86: Fix leak in uncore_type_init failure paths
+
+2014-03-06  Fernando Luis Vazquez Cao  <fernando at oss.ntt.co.jp>
+
+	* sched/clock: Prevent tracing recursion in sched_clock_cpu()
+
+2014-02-28  Peter Zijlstra  <peterz at infradead.org>
+
+	* stop_machine: Fix^2 race between stop_two_cpus() and stop_cpus()
+
+2014-03-03  Juri Lelli  <juri.lelli at gmail.com>
+
+	* sched/deadline: Deny unprivileged users to set/change SCHED_DEADLINE policy
+
+2014-03-11  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2014-03-11  Takashi Iwai  <tiwai at suse.de>
+
+	* Merge tag 'asoc-v3.14-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2014-03-10  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'akpm' (patches from Andrew Morton)
+
+2014-03-10  Geert Uytterhoeven  <geert at linux-m68k.org>
+
+	* cris: convert ffs from an object-like macro to a function-like macro
+
+2014-03-10  Sergei Antonov  <saproj at gmail.com>
+
+	* hfsplus: add HFSX subfolder count support
+
+2014-03-10  Colin Ian King  <colin.king at canonical.com>
+
+	* tools/testing/selftests/ipc/msgque.c: handle msgget failure return correctly
+
+2014-03-10  Michael Opdenacker  <michael.opdenacker at free-electrons.com>
+
+	* MAINTAINERS: blackfin: add git repository
+
+2014-03-10  Andrew Morton  <akpm at linux-foundation.org>
+
+	* revert "kallsyms: fix absolute addresses for kASLR"
+
+2014-03-10  Ben Hutchings  <ben at decadent.org.uk>
+
+	* mm/Kconfig: fix URL for zsmalloc benchmark
+
+2014-03-10  Artem Fetishev  <artem_fetishev at epam.com>
+
+	* fs/proc/base.c: fix GPF in /proc/$PID/map_files
+
+2014-03-10  Laura Abbott  <lauraa at codeaurora.org>
+
+	* mm/compaction: break out of loop on !PageBuddy in isolate_freepages_block
+
+2014-03-10  Johannes Weiner  <hannes at cmpxchg.org>
+
+	* mm: fix GFP_THISNODE callers and clarify
+
+2014-03-10  Jens Axboe  <axboe at fb.com>
+
+	* mtip32xx: fix bad use of smp_processor_id()
+
+2014-03-10  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
+
+2014-03-10  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata
+
+2014-03-04  Al Viro  <viro at zeniv.linux.org.uk>
+
+	* get rid of fget_light()
+
+2014-03-03  Al Viro  <viro at zeniv.linux.org.uk>
+
+	* sockfd_lookup_light(): switch to fdget^W^Waway from fget_light
+
+2014-03-03  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* vfs: atomic f_pos accesses as per POSIX
+
+2014-02-10  Al Viro  <viro at zeniv.linux.org.uk>
+
+	* ocfs2 syncs the wrong range...
+
+2014-03-10  Tejun Heo  <tj at kernel.org>
+
+	* libata: use wider match for blacklisting Crucial M500
+
+2014-02-25  Don Zickus  <dzickus at redhat.com>
+
+	* perf machine: Use map as success in ip__resolve_ams
+
+2014-03-02  Jiri Olsa  <jolsa at redhat.com>
+
+	* perf symbols: Fix crash in elf_section_by_name
+
+2014-02-06  Ben Hutchings  <ben at decadent.org.uk>
+
+	* perf trace: Decode architecture-specific signal numbers
+
+2014-03-10  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'asoc/fix/88pm860', 'asoc/fix/omap' and 'asoc/fix/si476x' into asoc-linus
+
+2014-03-10  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/fix/pcm' into asoc-linus
+
+2014-03-04  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: 88pm860: Fix IO setup
+
+2014-03-04  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: si476x: Fix IO setup
+
+2014-02-26  Giridhar Malavali  <giridhar.malavali at qlogic.com>
+
+	* [SCSI] qla2xxx: Poll during initialization for ISP25xx and ISP83xx
+
+2014-02-06  Lukasz Dorau  <lukasz.dorau at intel.com>
+
+	* [SCSI] isci: correct erroneous for_each_isci_host macro
+
+2014-02-06  Dan Williams  <dan.j.williams at intel.com>
+
+	* [SCSI] isci: fix reset timeout handling
+
+2013-12-19  Mike Christie  <michaelc at cs.wisc.edu>
+
+	* [SCSI] be2iscsi: fix bad if expression
+
+2014-02-26  Chad Dupuis  <chad.dupuis at qlogic.com>
+
+	* [SCSI] qla2xxx: Fix multiqueue MSI-X registration.
+
+2014-03-07  Nikolay Aleksandrov  <nikolay at redhat.com>
+
+	* selinux: add gfp argument to security_xfrm_policy_alloc and fix callers
+
+2014-03-07  Nikolay Aleksandrov  <nikolay at redhat.com>
+
+	* net: af_key: fix sleeping under rcu
+
+2014-03-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.14-rc6
+
+2014-03-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2014-03-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'nfs-for-3.14-5' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
+
+2014-03-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'usb-3.14-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
+
+2014-03-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'staging-3.15-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
+
+2014-03-09  David Howells  <dhowells at redhat.com>
+
+	* KEYS: Make the keyring cycle detector ignore other keyrings of the same name
+
+2014-03-09  Michael Chan  <mchan at broadcom.com>
+
+	* bnx2: Fix shutdown sequence
+
+2014-03-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux
+
+2014-03-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'spi-v3.14-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi
+
+2014-03-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending
+
+2014-03-09  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Revert "ACPI / sleep: pm_power_off needs more sanity checks to be installed"
+
+2014-03-08  Olof Johansson  <olof at lixom.net>
+
+	* Merge tag 'omap-for-v3.14/fixes-dt-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
+
+2014-03-08  Olof Johansson  <olof at lixom.net>
+
+	* Merge tag 'bcm-for-3.14-pinctrl-reduced-rename' of git://github.com/broadcom/bcm11351 into fixes
+
+2014-03-08  Olof Johansson  <olof at lixom.net>
+
+	* Merge tag 'sunxi-fixes-for-3.14' of https://github.com/mripard/linux into fixes
+
+2014-03-08  Mike Snitzer  <msnitzer at redhat.com>
+
+	* block: change flush sequence list addition back to front add
+
+2014-03-08  Mike Snitzer  <snitzer at redhat.com>
+
+	* block: fix q->flush_rq NULL pointer crash on dm-mpath flush
+
+2014-03-08  Eric W. Biederman  <ebiederm at xmission.com>
+
+	* audit: Update kdoc for audit_send_reply and audit_list_rules_send
+
+2014-03-08  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
+
+2014-03-08  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata
+
+2014-03-08  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
+
+2014-03-08  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'firewire-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394
+
+2014-03-08  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'clk-fixes-for-linus' of git://git.linaro.org/people/mike.turquette/linux
+
+2014-03-08  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pm+acpi-3.14-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2014-03-07  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* x86: fix compile error due to X86_TRAP_NMI use in asm files
+
+2014-03-07  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-arm
+
+2014-03-07  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-07  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
+
+2014-03-07  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'trace-fixes-v3.14-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
+
+2014-03-07  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'irq-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-07  Ditang Chen  <chendt.fnst at cn.fujitsu.com>
+
+	* SUNRPC: Fix oops when trace sunrpc_task events in nfs client
+
+2014-03-08  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'pm-cpufreq'
+
+2014-03-08  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branches 'acpi-resources', 'acpi-ec' and 'acpi-sleep'
+
+2014-03-07  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'dm-3.14-fixes-3' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
+
+2014-03-07  H. Peter Anvin  <hpa at linux.intel.com>
+
+	* x86: Ignore NMIs that come in during early boot
+
+2014-02-26  Mark Rutland  <mark.rutland at arm.com>
+
+	* ARM: 7992/1: boot: compressed: ignore bswapsdi2.S
+
+2014-02-25  Linus Walleij  <linus.walleij at linaro.org>
+
+	* ARM: 7991/1: sa1100: fix compile problem on Collie
+
+2014-02-26  Russell King  <rmk+kernel at arm.linux.org.uk>
+
+	* ARM: fix noMMU kallsyms symbol filtering
+
+2014-03-07  Mathias Nyman  <mathias.nyman at linux.intel.com>
+
+	* Revert "USBNET: ax88179_178a: enable tso if usb host supports sg dma"
+
+2014-03-07  Mathias Nyman  <mathias.nyman at linux.intel.com>
+
+	* Revert "xhci 1.0: Limit arbitrarily-aligned scatter gather."
+
+2014-03-04  Julius Werner  <jwerner at chromium.org>
+
+	* usb: Make DELAY_INIT quirk wait 100ms between Get Configuration requests
+
+2014-03-04  Julius Werner  <jwerner at chromium.org>
+
+	* usb: Add device quirk for Logitech HD Pro Webcams C920 and C930e
+
+2014-03-07  Michele Baldessari  <michele at acksyn.org>
+
+	* libata: add ATA_HORKAGE_BROKEN_FPDMA_AA quirk for Seagate Momentus SpinPoint M8 (2BA30001)
+
+2014-03-07  Joe Thornber  <ejt at redhat.com>
+
+	* dm space map metadata: fix refcount decrement below 0 which caused corruption
+
+2014-03-07  Tejun Heo  <tj at kernel.org>
+
+	* firewire: don't use PREPARE_DELAYED_WORK
+
+2014-03-07  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Fix loud click noise with IdeaPad 410Y
+
+2014-03-05  Sagi Grimberg  <sagig at mellanox.com>
+
+	* Target/sbc: Fix sbc_copy_prot for offset scatters
+
+2014-03-07  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'spi/fix/ath79', 'spi/fix/atmel', 'spi/fix/coldfire', 'spi/fix/fsl-dspi', 'spi/fix/imx' and 'spi/fix/topcliff-pch' into spi-linus
+
+2014-03-04  Anton Blanchard  <anton at samba.org>
+
+	* powerpc: Align p_dyn, p_rela and p_st symbols
+
+2014-03-03  Michael Neuling  <mikey at neuling.org>
+
+	* powerpc/tm: Fix crash when forking inside a transaction
+
+2014-03-07  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-fixes-3.14' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
+
+2014-03-06  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon/atom: select the proper number of lanes in transmitter setup
+
+2014-03-03  Russell King  <rmk+kernel at arm.linux.org.uk>
+
+	* MAINTAINERS: add maintainer entry for TDA998x driver
+
+2014-03-06  Gerd Hoffmann  <kraxel at redhat.com>
+
+	* drm: fix bochs kconfig dependencies
+
+2014-03-07  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-armada-fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-cubox into drm-fixes
+
+2014-03-07  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-fixes-3.14' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
+
+2014-03-06  Sabrina Dubroca  <sd at queasysnail.net>
+
+	* ipv6: don't set DST_NOCOUNT for remotely added routes
+
+2014-03-06  Amir Vadai  <amirv at mellanox.com>
+
+	* net/mlx4_core: mlx4_init_slave() shouldn't access comm channel before PF is ready
+
+2014-03-06  Amir Vadai  <amirv at mellanox.com>
+
+	* net/mlx4_core: Fix memory access error in mlx4_QUERY_DEV_CAP_wrapper()
+
+2014-03-06  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon/dpm: fix typo in EVERGREEN_SMC_FIRMWARE_HEADER_softRegisters
+
+2014-03-04  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon/cik: fix typo in documentation
+
+2014-03-04  Paul Bolle  <pebolle at tiscali.nl>
+
+	* drm/radeon: silence GCC warning on 32 bit
+
+2014-02-25  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: resume old pm late
+
+2014-02-28  Lauri Kasanen  <cand at gmx.com>
+
+	* drm/radeon: TTM must be init with cpu-visible VRAM, v2
+
+2014-03-04  David Miller  <davem at davemloft.net>
+
+	* sparc: serial: Clean up the locking for -rt
+
+2014-03-06  Stefan Richter  <stefanr at s5r6.in-berlin.de>
+
+	* firewire: ohci: fix probe failure with Agere/LSI controllers
+
+2014-02-28  Deng-Cheng Zhu  <dengcheng.zhu at imgtec.com>
+
+	* MIPS: APRP: Choose the correct VPE loader by fixing the linking
+
+2014-02-28  Deng-Cheng Zhu  <dengcheng.zhu at imgtec.com>
+
+	* MIPS: APRP: Unregister rtlx interrupt hook at module exit
+
+2014-03-06  Mike Snitzer  <snitzer at redhat.com>
+
+	* dm thin: fix Documentation for held metadata root feature
+
+2014-03-05  Peter Zijlstra  <peterz at infradead.org>
+
+	* x86, trace: Further robustify CR2 handling vs tracing
+
+2014-03-01  Kieran Clancy  <clancy.kieran at gmail.com>
+
+	* ACPI / EC: Clear stale EC events on Samsung systems
+
+2014-03-04  Viresh Kumar  <viresh.kumar at linaro.org>
+
+	* cpufreq: Initialize governor for a new policy under policy->rwsem
+
+2014-03-04  Viresh Kumar  <viresh.kumar at linaro.org>
+
+	* cpufreq: Initialize policy before making it available for others to use
+
+2014-03-04  Aaron Plattner  <aplattner at nvidia.com>
+
+	* cpufreq: use cpufreq_cpu_get() to avoid cpufreq_get() race conditions
+
+2014-03-04  Ben Widawsky  <benjamin.widawsky at intel.com>
+
+	* drm/i915: Fix PSR programming
+
+2014-03-05  Stefan Agner  <stefan at agner.ch>
+
+	* clocksource: vf_pit_timer: use complement for sched_clock reading
+
+2014-03-06  Marc Zyngier  <marc.zyngier at arm.com>
+
+	* ARM: KVM: fix non-VGIC compilation
+
+2014-02-28  Benoit Cousson  <bcousson at baylibre.com>
+
+	* clk: shmobile: rcar-gen2: Use kick bit to allow Z clock frequency change
+
+2014-03-03  Joe Thornber  <ejt at redhat.com>
+
+	* dm thin: fix noflush suspend IO queueing
+
+2014-03-03  Joe Thornber  <ejt at redhat.com>
+
+	* dm thin: fix deadlock in __requeue_bio_list
+
+2014-03-03  Joe Thornber  <ejt at redhat.com>
+
+	* dm thin: fix out of data space handling
+
+2014-02-14  Mike Snitzer  <snitzer at redhat.com>
+
+	* dm thin: ensure user takes action to validate data and metadata consistency
+
+2014-03-04  Trond Myklebust  <trond.myklebust at primarydata.com>
+
+	* NFSv4: Fail the truncate() if the lock/open stateid is invalid
+
+2014-03-04  Andy Adamson  <andros at netapp.com>
+
+	* NFSv4.1 Fail data server I/O if stateid represents a lost lock
+
+2014-03-04  Trond Myklebust  <trond.myklebust at primarydata.com>
+
+	* NFSv4: Fix the return value of nfs4_select_rw_stateid
+
+2014-03-05  Trond Myklebust  <trond.myklebust at primarydata.com>
+
+	* NFSv4: nfs4_stateid_is_current should return 'true' for an invalid stateid
+
+2014-03-05  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: usb-audio: Add quirk for Logitech Webcam C500
+
+2014-03-05  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Use analog beep for Thinkpads with AD1984 codecs
+
+2014-03-05  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Add missing loopback merge path for AD1884/1984 codecs
+
+2014-03-05  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'drm-intel-fixes-2014-03-04' of ssh://git.freedesktop.org/git/drm-intel into drm-fixes
+
+2014-03-05  Wenyou Yang  <wenyou.yang at atmel.com>
+
+	* spi: atmel: add missing spi_master_{resume,suspend} calls to PM callbacks
+
+2014-02-14  Axel Lin  <axel.lin at ingics.com>
+
+	* spi: coldfire-qspi: Fix getting correct address for *mcfqspi
+
+2014-02-14  Axel Lin  <axel.lin at ingics.com>
+
+	* spi: fsl-dspi: Fix getting correct address for master
+
+2014-03-02  Patrick Lai  <plai at codeaurora.org>
+
+	* ASoC: pcm: free path list before exiting from error conditions
+
+2014-03-02  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* iser-target: Fix command leak for tx_desc->comp_llnode_batch
+
+2014-02-27  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* iser-target: Ignore completions for FRWRs in isert_cq_tx_work
+
+2014-02-27  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* iser-target: Fix post_send_buf_count for RDMA READ/WRITE
+
+2014-02-03  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* iscsi/iser-target: Fix isert_conn->state hung shutdown issues
+
+2014-02-03  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* iscsi/iser-target: Use list_del_init for ->i_conn_node
+
+2014-02-26  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* iscsi-target: Fix iscsit_get_tpg_from_np tpg_state bug
+
+2014-03-03  Salva Peiró  <speiro at ai2.upv.es>
+
+	* staging/cxt1e1/linux.c: Correct arbitrary memory write in c4_ioctl()
+
+2014-02-28  Jiri Olsa  <jolsa at redhat.com>
+
+	* x86, trace: Fix CR2 corruption when tracing page faults
+
+2014-03-04  H. Peter Anvin  <hpa at linux.intel.com>
+
+	* Merge tag 'efi-urgent' into x86/urgent
+
+2014-03-04  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2014-03-04  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'regulator-v3.14-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
+
+2014-03-04  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'akpm' (patches from Andrew Morton)
+
+2014-02-14  Mike Snitzer  <snitzer at redhat.com>
+
+	* dm thin: synchronize the pool mode during suspend
+
+2014-03-03  Johannes Weiner  <hannes at cmpxchg.org>
+
+	* mm: page_alloc: exempt GFP_THISNODE allocations from zone fairness
+
+2014-03-03  Liu Ping Fan  <pingfank at linux.vnet.ibm.com>
+
+	* mm: numa: bugfix for LAST_CPUPID_NOT_IN_PAGE_FLAGS
+
+2014-03-03  Joe Perches  <joe at perches.com>
+
+	* MAINTAINERS: add and correct types of some "T:" entries
+
+2014-03-03  Joe Perches  <joe at perches.com>
+
+	* MAINTAINERS: use tab for separator
+
+2014-03-03  Alexandre Bounine  <alexandre.bounine at idt.com>
+
+	* rapidio/tsi721: fix tasklet termination in dma channel release
+
+2014-03-03  Vyacheslav Dubeyko  <slava at dubeyko.com>
+
+	* hfsplus: fix remount issue
+
+2014-03-03  Minchan Kim  <minchan at kernel.org>
+
+	* zram: avoid null access when fail to alloc meta
+
+2014-03-03  Geert Uytterhoeven  <geert at linux-m68k.org>
+
+	* sh: prefix sh-specific "CCR" and "CCR2" by "SH_"
+
+2014-03-03  Jan Kara  <jack at suse.cz>
+
+	* ocfs2: fix quota file corruption
+
+2014-03-03  Vikas Sajjan  <vikas.sajjan at linaro.org>
+
+	* drivers/rtc/rtc-s3c.c: fix incorrect way of save/restore of S3C2410_TICNT for TYPE_S3C64XX
+
+2014-03-03  Andy Honig  <ahonig at google.com>
+
+	* kallsyms: fix absolute addresses for kASLR
+
+2014-03-03  Daniel M. Weeks  <dan at danweeks.net>
+
+	* scripts/gen_initramfs_list.sh: fix flags for initramfs LZ4 compression
+
+2014-03-03  Vlastimil Babka  <vbabka at suse.cz>
+
+	* mm: include VM_MIXEDMAP flag in the VM_SPECIAL list to avoid m(un)locking
+
+2014-03-03  Filipe Brandenburger  <filbranden at google.com>
+
+	* memcg: reparent charges of children before processing parent
+
+2014-03-03  Hugh Dickins  <hughd at google.com>
+
+	* memcg: fix endless loop in __mem_cgroup_iter_next()
+
+2014-03-03  Hugh Dickins  <hughd at google.com>
+
+	* lib/radix-tree.c: swapoff tmpfs radix_tree: remember to rcu_read_unlock
+
+2014-03-03  Dan Williams  <dan.j.williams at intel.com>
+
+	* dma debug: account for cachelines and read-only mappings in overlap tracking
+
+2014-03-03  David Rientjes  <rientjes at google.com>
+
+	* mm: close PageTail race
+
+2014-03-03  Borislav Petkov  <bp at suse.de>
+
+	* MAINTAINERS: EDAC: add Mauro and Borislav as interim patch collectors
+
+2014-03-04  Hui Wang  <hui.wang at canonical.com>
+
+	* ALSA: hda - add automute fix for another dell AIO model
+
+2014-03-04  Aaro Koskinen  <aaro.koskinen at iki.fi>
+
+	* ASoC: n810: fix init with DT boot
+
+2014-02-26  Steven Rostedt (Red Hat)  <rostedt at goodmis.org>
+
+	* tracing: Do not add event files for modules that fail tracepoints
+
+2014-03-03  Mikulas Patocka  <mpatocka at redhat.com>
+
+	* dm snapshot: fix metadata corruption
+
+2014-03-03  Marios Andreopoulos  <opensource at andmarios.com>
+
+	* libata: disable queued TRIM for Crucial M500 mSATA SSDs
+
+2014-03-03  Mike Snitzer  <snitzer at redhat.com>
+
+	* dm: fix Kconfig indentation
+
+2014-03-03  Vlad Yasevich  <vyasevic at redhat.com>
+
+	* macvlan: Add support for 'always_on' offload features
+
+2014-03-03  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless
+
+2014-03-03  Daniel Borkmann  <dborkman at redhat.com>
+
+	* net: sctp: fix sctp_sf_do_5_1D_ce to verify if we/peer is AUTH capable
+
+2014-03-03  David S. Miller  <davem at davemloft.net>
+
+	* Merge tag 'linux-can-fixes-for-3.14-20140303' of git://gitorious.org/linux-can/linux-can
+
+2014-03-03  Xin Long  <lucien.xin at gmail.com>
+
+	* ip_tunnel:multicast process cause panic due to skb->_skb_refdst NULL pointer
+
+2014-03-03  Schuyler Patton  <spatton at ti.com>
+
+	* net: cpsw: fix cpdma rx descriptor leak on down interface
+
+2014-03-03  Vasundhara Volam  <vasundhara.volam at emulex.com>
+
+	* be2net: isolate TX workarounds not applicable to Skyhawk-R
+
+2014-03-03  Vasundhara Volam  <vasundhara.volam at emulex.com>
+
+	* be2net: Fix skb double free in be_xmit_wrokarounds() failure path
+
+2014-03-03  Somnath kotur  <somnath.kotur at emulex.com>
+
+	* be2net: clear promiscuous bits in adapter->flags while disabling promiscuous mode
+
+2014-03-03  Somnath Kotur  <somnath.kotur at emulex.com>
+
+	* be2net: Fix to reset transparent vlan tagging
+
+2014-03-01  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* qlcnic: dcb: a couple off by one bugs
+
+2014-02-28  Yuchung Cheng  <ycheng at google.com>
+
+	* tcp: fix bogus RTT on special retransmission
+
+2014-03-01  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* hsr: off by one sanity check in hsr_register_frame_in()
+
+2014-03-03  John W. Linville  <linville at tuxdriver.com>
+
+	* Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless into for-davem
+
+2014-03-03  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-03  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-03  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'clk-fixes-for-linus' of git://git.linaro.org/people/mike.turquette/linux
+
+2014-03-03  Russell King  <rmk+kernel at arm.linux.org.uk>
+
+	* DRM: armada: fix use of kfifo_put()
+
+2014-03-03  Ville Syrjälä  <ville.syrjala at linux.intel.com>
+
+	* drm/i915: Reject >165MHz modes w/ DVI monitors
+
+2014-02-27  Paulo Zanoni  <paulo.r.zanoni at intel.com>
+
+	* drm/i915: fix assert_cursor on BDW
+
+2014-02-11  Imre Deak  <imre.deak at intel.com>
+
+	* drm/i915: vlv: reserve GT power context early
+
+2014-01-24  Zhang Rui  <rui.zhang at intel.com>
+
+	* Thermal: thermal zone governor fix
+
+2014-02-17  Ni Wade  <wni at nvidia.com>
+
+	* Thermal: Allow first update of cooling device state
+
+2014-01-27  Richard Weinberger  <richard at nod.at>
+
+	* thermal,rcar_thermal: Add dependency on HAS_IOMEM
+
+2014-03-02  Jean Delvare  <jdelvare at suse.de>
+
+	* x86_pkg_temp_thermal: Fix the thermal zone type
+
+2014-03-02  Jean Delvare  <jdelvare at suse.de>
+
+	* x86_pkg_temp_thermal: Do not expose as a hwmon device
+
+2014-03-03  Zhang Rui  <rui.zhang at intel.com>
+
+	* Thermal: update INT3404 thermal driver help text
+
+2014-03-01  Oliver Hartkopp  <socketcan at hartkopp.net>
+
+	* can: remove CAN FD compatibility for CAN 2.0 sockets
+
+2014-02-28  Marc Kleine-Budde  <mkl at pengutronix.de>
+
+	* can: flexcan: factor out soft reset into seperate funtion
+
+2014-02-28  Marc Kleine-Budde  <mkl at pengutronix.de>
+
+	* can: flexcan: flexcan_remove(): add missing netif_napi_del()
+
+2014-02-28  Marc Kleine-Budde  <mkl at pengutronix.de>
+
+	* can: flexcan: fix transition from and to freeze mode in chip_{,un}freeze
+
+2014-02-28  Marc Kleine-Budde  <mkl at pengutronix.de>
+
+	* can: flexcan: factor out transceiver {en,dis}able into seperate functions
+
+2014-02-28  Marc Kleine-Budde  <mkl at pengutronix.de>
+
+	* can: flexcan: fix transition from and to low power mode in chip_{en,dis}able
+
+2014-02-28  Marc Kleine-Budde  <mkl at pengutronix.de>
+
+	* can: flexcan: flexcan_open(): fix error path if flexcan_chip_start() fails
+
+2014-02-19  Marc Kleine-Budde  <mkl at pengutronix.de>
+
+	* can: flexcan: fix shutdown: first disable chip, then all interrupts
+
+2014-02-14  Imre Deak  <imre.deak at intel.com>
+
+	* drm/i915: fix pch pci device enumeration
+
+2014-01-13  Akash Goel  <akash.goel at intel.com>
+
+	* drm/i915: Resolving the memory region conflict for Stolen area
+
+2014-02-25  Jani Nikula  <jani.nikula at intel.com>
+
+	* drm/i915: use backlight legacy combination mode also for i915gm/i945gm
+
+2014-03-03  Marius Knaust  <marius.knaust at gmail.com>
+
+	* ALSA: hda - Added inverted digital-mic handling for Acer TravelMate 8371
+
+2014-03-02  Gabor Juhos  <juhosg at openwrt.org>
+
+	* spi: spi-ath79: fix initial GPIO CS line setup
+
+2014-02-27  Dave Airlie  <airlied at redhat.com>
+
+	* MAINTAINERS: update AGP tree to point at drm tree
+
+2014-03-02  Trond Myklebust  <trond.myklebust at primarydata.com>
+
+	* NFS: Fix a delegation callback race
+
+2014-03-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.14-rc5
+
+2014-02-28  Gerry Demaret  <gerry at tigron.be>
+
+	* USB AX88179/178A: Support D-Link DUB-1312
+
+2014-03-02  Hauke Mehrtens  <hauke at hauke-m.de>
+
+	* b44: always set duplex mode why phy changes
+
+2014-03-02  Hauke Mehrtens  <hauke at hauke-m.de>
+
+	* b44: add calls to phy_{start,stop}
+
+2014-03-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2014-03-02  Li, Aubrey  <aubrey.li at linux.intel.com>
+
+	* ACPI / sleep: pm_power_off needs more sanity checks to be installed
+
+2014-03-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'usb-3.14-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
+
+2014-03-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'driver-core-3.14-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core
+
+2014-03-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'staging-3.14-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
+
+2014-03-03  Dave Airlie  <airlied at gmail.com>
+
+	* Merge branch 'drm-fixes-3.14' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
+
+2014-03-02  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Merge iio fixes into staging-linus
+
+2014-03-01  Marek Belisko  <marek at goldelico.com>
+
+	* ARM: dts: omap3-gta04: Add ti,omap36xx to compatible property to avoid problems with booting
+
+2014-03-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-02  Dave Airlie  <airlied at gmail.com>
+
+	* Merge tag 'vmwgfx-fixes-3.14-2014-03-02' of git://people.freedesktop.org/~thomash/linux into drm-fixes
+
+2014-03-01  Alexey Khoroshilov  <khoroshilov at ispras.ru>
+
+	* drm/vmwgfx: avoid null pointer dereference at failure paths
+
+2014-02-28  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Make sure backing mobs are cleared when allocated. Update driver date.
+
+2014-02-28  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Remove some unused surface formats
+
+2014-03-01  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-01  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending
+
+2014-03-01  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'fixes' of git://git.infradead.org/users/vkoul/slave-dma
+
+2014-03-01  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pm+acpi-3.14-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2014-02-27  Zhang Rui  <rui.zhang at intel.com>
+
+	* ACPI / resources: ignore invalid ACPI device resources
+
+2014-02-26  Trond Myklebust  <trond.myklebust at primarydata.com>
+
+	* NFSv4: Fix another nfs4_sequence corruptor
+
+2014-03-01  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2014-02-28  Eric W. Biederman  <ebiederm at xmission.com>
+
+	* audit: Send replies in the proper network namespace.
+
+2014-02-28  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Merge tag 'fixes-for-3.14d' of git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio into staging-linus
+
+2014-02-28  Russell King  <rmk+kernel at arm.linux.org.uk>
+
+	* MAINTAINERS: add maintainer entry for Armada DRM driver
+
+2014-02-28  Ivan Vecera  <ivecera at redhat.com>
+
+	* bna: fix vlan tag stripping and implement its toggling
+
+2014-02-28  Michael Chan  <mchan at broadcom.com>
+
+	* tg3: Don't check undefined error bits in RXBD
+
+2014-02-20  Stephen Warren  <swarren at nvidia.com>
+
+	* ARM: tegra: add LED options back into tegra_defconfig
+
+2014-02-22  Jeff Layton  <jlayton at redhat.com>
+
+	* cifs: mask off top byte in get_rfc1002_length()
+
+2014-02-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'dm-3.14-fixes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
+
+2014-02-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'sound-3.14-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2014-02-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'edac_fixes_for_3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp
+
+2014-02-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
+
+2014-02-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
+
+2014-02-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
+
+2014-02-22  Javier Martinez Canillas  <javier.martinez at collabora.co.uk>
+
+	* ARM: dts: omap3-igep: fix boot fail due wrong compatible match
+
+2014-02-26  Bing Zhao  <bzhao at marvell.com>
+
+	* mwifiex: do not advertise usb autosuspend support
+
+2014-02-28  John W. Linville  <linville at tuxdriver.com>
+
+	* Merge branch 'for-john' of git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211
+
+2014-02-28  John W. Linville  <linville at tuxdriver.com>
+
+	* Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi-fixes
+
+2014-02-28  Heinz Mauelshagen  <heinzm at redhat.com>
+
+	* dm cache mq: fix memory allocation failure for large cache devices
+
+2014-02-28  Catalin Marinas  <catalin.marinas at arm.com>
+
+	* arm64: Fix !CONFIG_SMP kernel build
+
+2014-02-25  Steve Capper  <steve.capper at linaro.org>
+
+	* arm64: mm: Add double logical invert to pte accessors
+
+2014-02-28  Arnd Bergmann  <arnd at arndb.de>
+
+	* Merge tag 'omap-for-v3.14/fixes-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
+
+2014-02-27  Heinz Mauelshagen  <heinzm at redhat.com>
+
+	* dm cache: fix truncation bug when mapping I/O to >2TB fast device
+
+2014-02-26  Jiri Olsa  <jolsa at redhat.com>
+
+	* perf tools: Fix strict alias issue for find_first_bit
+
+2014-02-03  Eric W. Biederman  <ebiederm at xmission.com>
+
+	* audit: Use struct net not pid_t to remember the network namespce to reply in
+
+2014-02-18  Stefan Richter  <stefanr at s5r6.in-berlin.de>
+
+	* firewire: net: fix use after free
+
+2014-02-28  Benjamin Herrenschmidt  <benh at kernel.crashing.org>
+
+	* powerpc/powernv: Fix indirect XSCOM unmangling
+
+2014-02-28  Benjamin Herrenschmidt  <benh at kernel.crashing.org>
+
+	* powerpc/powernv: Fix opal_xscom_{read,write} prototype
+
+2014-02-25  Gavin Shan  <shangw at linux.vnet.ibm.com>
+
+	* powerpc/powernv: Refactor PHB diag-data dump
+
+2014-02-25  Gavin Shan  <shangw at linux.vnet.ibm.com>
+
+	* powerpc/powernv: Dump PHB diag-data immediately
+
+2014-02-26  Paul Mackerras  <paulus at samba.org>
+
+	* powerpc: Increase stack redzone for 64-bit userspace to 512 bytes
+
+2014-02-26  Liu Ping Fan  <kernelfans at gmail.com>
+
+	* powerpc/ftrace: bugfix for test_24bit_addr
+
+2014-02-24  Laurent Dufour  <ldufour at linux.vnet.ibm.com>
+
+	* powerpc/crashdump : Fix page frame number check in copy_oldmem_page
+
+2014-02-20  Tony Breeds  <tony at bakeyournoodle.com>
+
+	* powerpc/le: Ensure that the 'stop-self' RTAS token is handled correctly
+
+2014-02-27  Philippe De Muyter  <phdm at macqel.be>
+
+	* spi: spi-imx: spi_imx_remove: do not disable disabled clocks
+
+2014-02-27  Tony Lindgren  <tony at atomide.com>
+
+	* ARM: OMAP3: Fix pinctrl interrupts for core2
+
+2014-02-27  Hans Schillstrom  <hans at schillstrom.com>
+
+	* ipv6: ipv6_find_hdr restore prev functionality
+
+2014-02-27  Duan Jiong  <duanj.fnst at cn.fujitsu.com>
+
+	* neigh: recompute reachabletime before returning from neigh_periodic_work()
+
+2014-02-28  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branches 'pm-cpufreq', 'pm-hibernate' and 'acpi-processor'
+
+2014-02-27  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless
+
+2014-02-27  Yuval Mintz  <yuvalmin at broadcom.com>
+
+	* bnx2x: Add missing bit in default Tx switching
+
+2014-02-27  Paolo Bonzini  <pbonzini at redhat.com>
+
+	* kvm, vmx: Really fix lazy FPU on nested guest
+
+2014-01-11  Andi Kleen  <ak at linux.intel.com>
+
+	* perf tools: fix BFD detection on opensuse
+
+2014-02-27  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec
+
+2014-02-27  Lorenzo Colitti  <lorenzo at google.com>
+
+	* net: ipv6: ping: Use socket mark in routing lookup
+
+2014-02-27  John W. Linville  <linville at tuxdriver.com>
+
+	* Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless into for-davem
+
+2014-02-27  Johannes Berg  <johannes.berg at intel.com>
+
+	* mac80211: fix association to 20/40 MHz VHT networks
+
+2014-02-18  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: enable speaker allocation setup on dce3.2
+
+2014-02-18  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: change audio enable logic
+
+2014-02-18  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: fix audio disable on dce6+
+
+2014-02-26  Jerome Glisse  <jglisse at redhat.com>
+
+	* drm/radeon: free uvd ring on unload
+
+2014-02-25  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: disable pll sharing for DP on DCE4.1
+
+2014-02-20  Christian König  <christian.koenig at amd.com>
+
+	* drm/radeon: fix missing bo reservation
+
+2014-02-20  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: print the supported atpx function mask
+
+2014-02-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'metag-fixes-v3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/jhogan/metag
+
+2014-02-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pwm/for-3.14-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/thierry.reding/linux-pwm
+
+2014-02-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
+
+2014-02-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'upstream-3.14-rc5' of git://git.infradead.org/linux-ubifs
+
+2014-02-27  Andrew Honig  <ahonig at google.com>
+
+	* kvm: x86: fix emulator buffer overflow (CVE-2014-0049)
+
+2014-02-26  Marc Zyngier  <marc.zyngier at arm.com>
+
+	* arm/arm64: KVM: detect CPU reset on CPU_PM_EXIT
+
+2014-02-26  Hiroaki SHIMODA  <shimoda.hiroaki at gmail.com>
+
+	* sch_tbf: Fix potential memory leak in tbf_change().
+
+2014-02-12  Mike Snitzer  <snitzer at redhat.com>
+
+	* dm thin: allow metadata space larger than supported to go unused
+
+2014-02-27  Li Zefan  <lizefan at huawei.com>
+
+	* cpuset: fix a race condition in __cpuset_node_allowed_softwall()
+
+2014-02-27  Li Zefan  <lizefan at huawei.com>
+
+	* cpuset: fix a locking issue in cpuset_migrate_mm()
+
+2014-02-27  Rashika Kheria  <rashika.kheria at gmail.com>
+
+	* genirq: Include missing header file in irqdomain.c
+
+2014-02-27  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge branch 'liblockdep-fixes' of https://github.com/sashalevin/liblockdep into core/urgent
+
+2014-02-27  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2014-02-27  Takashi Iwai  <tiwai at suse.de>
+
+	* Merge tag 'asoc-v3.14-rc4-2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2014-02-24  Peter Zijlstra  <peterz at infradead.org>
+
+	* perf: Fix hotplug splat
+
+2014-02-21  Peter Zijlstra  <peterz at infradead.org>
+
+	* perf/x86: Fix event scheduling
+
+2014-02-21  Juri Lelli  <juri.lelli at gmail.com>
+
+	* sched/deadline: Prevent rt_time growth to infinity
+
+2014-02-24  Juri Lelli  <juri.lelli at gmail.com>
+
+	* sched/deadline: Switch CPU's presence test order
+
+2014-02-25  Kirill Tkhai  <ktkhai at parallels.com>
+
+	* sched/deadline: Cleanup RT leftovers from {inc/dec}_dl_migration
+
+2014-02-18  George McCollister  <george.mccollister at gmail.com>
+
+	* sched: Fix double normalization of vruntime
+
+2014-02-27  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/fix/wm8958' into asoc-linus
+
+2014-02-27  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'asoc/fix/da732x' and 'asoc/fix/sta32x' into asoc-linus
+
+2014-02-27  Mark Brown  <broonie at linaro.org>
+
+	* Merge tag 'asoc-v3.14-rc4' into asoc-linus
+
+2014-02-27  Mark Brown  <broonie at linaro.org>
+
+	* Merge tag 'asoc-v3.14-rc3' into asoc-linus
+
+2014-02-25  Johannes Berg  <johannes.berg at intel.com>
+
+	* iwlwifi: fix TX status for aggregated packets
+
+2014-02-27  Takashi Iwai  <tiwai at suse.de>
+
+	* ASoC: sta32x: Fix wrong enum for limiter2 release rate
+
+2014-02-16  Max Stepanov  <Max.Stepanov at intel.com>
+
+	* iwlwifi: mvm: change of listen interval from 70 to 10
+
+2014-02-27  Takashi Iwai  <tiwai at suse.de>
+
+	* Merge tag 'asoc-v3.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2014-02-18  Alex Deucher  <alexdeucher at gmail.com>
+
+	* MAINTAINERS: update drm git tree entry
+
+2014-02-18  Alex Deucher  <alexdeucher at gmail.com>
+
+	* MAINTAINERS: add entry for drm radeon driver
+
+2014-02-26  Alexander Stein  <alexander.stein at systec-electronic.com>
+
+	* spi-topcliff-pch: Fix probing when DMA mode is used
+
+2014-02-26  Jiri Bohac  <jbohac at suse.cz>
+
+	* bonding: disallow enslaving a bond to itself
+
+2014-02-08  Wang Nan  <wangnan0 at huawei.com>
+
+	* tools/liblockdep: Use realpath for srctree and objtree
+
+2014-02-05  Sasha Levin  <sasha.levin at oracle.com>
+
+	* tools/liblockdep: Add a stub for new rcu_is_watching
+
+2014-02-05  Sasha Levin  <sasha.levin at oracle.com>
+
+	* tools/liblockdep: Mark runtests.sh as executable
+
+2014-01-31  Ira W. Snyder  <iws at ovro.caltech.edu>
+
+	* tools/liblockdep: Add include directory to allow tests to compile
+
+2014-01-31  Ira W. Snyder  <iws at ovro.caltech.edu>
+
+	* tools/liblockdep: Fix include of asm/hash.h
+
+2014-01-31  Ira W. Snyder  <iws at ovro.caltech.edu>
+
+	* tools/liblockdep: Fix initialization code path
+
+2014-02-11  Masanari Iida  <standby24x7 at gmail.com>
+
+	* clk:at91: Fix memory leak in of_at91_clk_master_setup()
+
+2014-02-19  Stanislaw Gruszka  <sgruszka at redhat.com>
+
+	* usb: ehci: fix deadlock when threadirqs option is used
+
+2014-02-21  Joerg Dorchain  <joerg at dorchain.net>
+
+	* USB: ftdi_sio: add Cressi Leonardo PID
+
+2014-02-26  Lan Tianyu  <tianyu.lan at intel.com>
+
+	* ACPI / processor: Rework processor throttling with work_on_cpu()
+
+2014-02-26  Nikolay Aleksandrov  <nikolay at redhat.com>
+
+	* bonding: fix a div error caused by the slave release path
+
+2014-02-26  Freddy Xin  <freddy at asix.com.tw>
+
+	* AX88179_178A: Add VID:DID for Lenovo OneLinkDock Gigabit LAN
+
+2014-02-26  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'bonding_rtnl'
+
+2014-02-26  dingtianhong  <dingtianhong at huawei.com>
+
+	* bonding: Fix RTNL: assertion failed at net/core/rtnetlink.c for ab arp monitor
+
+2014-02-26  dingtianhong  <dingtianhong at huawei.com>
+
+	* bonding: Fix RTNL: assertion failed at net/core/rtnetlink.c for 802.3ad mode
+
+2014-02-25  Joe Perches  <joe at perches.com>
+
+	* MAINTAINERS: Intel nic drivers
+
+2014-02-25  Edward Cree  <ecree at solarflare.com>
+
+	* sfc: check for NULL efx->ptp_data in efx_ptp_event
+
+2014-02-25  Eric Dumazet  <edumazet at google.com>
+
+	* net: tcp: use NET_INC_STATS()
+
+2014-01-21  Linus Walleij  <linus.walleij at linaro.org>
+
+	* clk: nomadik: fix multiplatform problem
+
+2014-02-24  Marcelo Tosatti  <mtosatti at redhat.com>
+
+	* KVM: MMU: drop read-only large sptes when creating lower level sptes
+
+2014-01-23  Christian Engelmayer  <cengelma at gmx.at>
+
+	* pwm: lp3943: Fix potential memory leak during request
+
+2014-02-26  Hannes Reinecke  <hare at suse.de>
+
+	* dm mpath: fix stalls when handling invalid ioctls
+
+2014-02-24  Mark Brown  <broonie at linaro.org>
+
+	* ASoC: da732x: Mark DC offset control registers volatile
+
+2014-02-26  Fernando Luis Vázquez Cao  <fernando_b1 at lab.ntt.co.jp>
+
+	* HID: hidraw: fix warning destroying hidraw device files after parent
+
+2014-02-26  Kailang Yang  <kailang at realtek.com>
+
+	* ALSA: hda/realtek - Add more entry for enable HP mute led
+
+2014-02-19  Steffen Klassert  <steffen.klassert at secunet.com>
+
+	* xfrm: Fix unlink race when policies are deleted.
+
+2014-02-09  Kees Cook  <keescook at chromium.org>
+
+	* x86, kaslr: add missed "static" declarations
+
+2014-01-23  Eugene Surovegin  <surovegin at google.com>
+
+	* x86, kaslr: export offset in VMCOREINFO ELF notes
+
+2014-02-18  Sebastian Capella  <sebastian.capella at linaro.org>
+
+	* PM / hibernate: Fix restore hang in freeze_processes()
+
+2014-02-25  Dirk Brandewie  <dirk.j.brandewie at intel.com>
+
+	* intel_pstate: Change busy calculation to use fixed point math.
+
+2014-02-25  Cristian Bercaru  <cristian.bercaru at freescale.com>
+
+	* phy: unmask link partner capabilities
+
+2014-02-25  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'akpm' (patches from Andrew Morton)
+
+2014-02-25  Tobias Klauser  <tklauser at distanz.ch>
+
+	* MAINTAINERS: change mailing list address for Altera UART drivers
+
+2014-02-25  Jan Beulich  <JBeulich at suse.com>
+
+	* Makefile: fix build with make 3.80 again
+
+2014-02-25  Joe Perches  <joe at perches.com>
+
+	* MAINTAINERS: update L: misuses
+
+2014-02-25  Fathi Boudra  <fathi.boudra at linaro.org>
+
+	* Makefile: fix extra parenthesis typo when CC_STACKPROTECTOR_REGULAR is enabled
+
+2014-02-25  Davidlohr Bueso  <davidlohr at hp.com>
+
+	* ipc,mqueue: remove limits for the amount of system-wide queues
+
+2014-02-25  Michal Hocko  <mhocko at suse.cz>
+
+	* memcg: change oom_info_lock to mutex
+
+2014-02-25  Kirill A. Shutemov  <kirill.shutemov at linux.intel.com>
+
+	* mm, thp: fix infinite loop on memcg OOM
+
+2014-02-25  Joe Perches  <joe at perches.com>
+
+	* drivers/fmc/fmc-write-eeprom.c: fix decimal permissions
+
+2014-02-25  Joe Perches  <joe at perches.com>
+
+	* drivers/iommu/omap-iommu-debug.c: fix decimal permissions
+
+2014-02-25  Kirill A. Shutemov  <kirill.shutemov at linux.intel.com>
+
+	* mm, hwpoison: release page on PageHWPoison() in __do_fault()
+
+2014-02-25  James Hogan  <james.hogan at imgtec.com>
+
+	* irq-metag*: stop set_affinity vectoring to offline cpus
+
+2014-02-25  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'dmaengine-fixes-3.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/dmaengine
+
+2014-02-25  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'for-linus-20140225' of git://git.infradead.org/linux-mtd
+
+2014-02-25  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/geert/linux-m68k
+
+2014-02-25  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'xtensa-next-20140224' of git://github.com/czankel/xtensa-linux
+
+2014-02-24  Felix Fietkau  <nbd at openwrt.org>
+
+	* ath9k: fix invalid descriptor discarding
+
+2014-02-24  Felix Fietkau  <nbd at openwrt.org>
+
+	* ath9k: reduce baseband hang detection false positive rate
+
+2014-02-19  Dan Williams  <dan.j.williams at intel.com>
+
+	* ioat: fix tasklet tear down
+
+2014-02-25  Li Zefan  <lizefan at huawei.com>
+
+	* sysfs: fix namespace refcnt leak
+
+2014-02-14  Janusz Dziedzic  <janusz.dziedzic at tieto.com>
+
+	* cfg80211: regulatory: reset regdomain in case of error
+
+2014-02-21  Jan Kara  <jack at suse.cz>
+
+	* fsnotify: Allocate overflow events with proper type
+
+2014-02-21  Jan Kara  <jack at suse.cz>
+
+	* fanotify: Handle overflow in case of permission events
+
+2014-02-21  Jan Kara  <jack at suse.cz>
+
+	* fsnotify: Fix detection whether overflow event is queued
+
+2014-02-25  Jean Delvare  <jdelvare at suse.de>
+
+	* i7300_edac: Fix device reference count
+
+2014-02-24  Jean Delvare  <jdelvare at suse.de>
+
+	* i7core_edac: Fix PCI device reference count
+
+2014-02-24  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Add a fixup for HP Folio 13 mute LED
+
+2014-02-24  Mike Turquette  <mturquette at linaro.org>
+
+	* Merge branch 'clocks/fixes/drivers' of git://linuxtv.org/pinchartl/fbdev into clk-fixes
+
+2014-01-07  Sylwester Nawrocki  <s.nawrocki at samsung.com>
+
+	* clk: Correct handling of NULL clk in __clk_{get, put}
+
+2014-02-22  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: wm8958-dsp: Fix firmware block loading
+
+2014-01-23  Sherman Yin  <syin at broadcom.com>
+
+	* pinctrl: Rename Broadcom Capri pinctrl binding
+
+2014-02-24  Christian Daudt  <bcm at fixthebug.org>
+
+	* pinctrl: refer to updated dt binding string.
+
+2014-01-23  Sherman Yin  <syin at broadcom.com>
+
+	* Update dtsi with new pinctrl compatible string
+
+2014-02-20  Markus Pargmann  <mpa at pengutronix.de>
+
+	* regulator: core: Replace direct ops->disable usage
+
+2014-02-20  Markus Pargmann  <mpa at pengutronix.de>
+
+	* regulator: core: Replace direct ops->enable usage
+
+2014-02-24  Manu Gupta  <manugupt1 at gmail.com>
+
+	* staging: r8188eu: Add new device ID
+
+2014-02-24  Venkatesh Srinivas  <venkateshs at google.com>
+
+	* vhost/scsi: Check LUN structure byte 0 is set to 1, per spec
+
+2014-02-24  Mike Turquette  <mturquette at linaro.org>
+
+	* Merge branch 'clk-tegra-more-fixes-3.14' of git://nv-tegra.nvidia.com/user/pdeschrijver/linux into clk-fixes
+
+2014-02-24  Juergen Beisert  <jbe at pengutronix.de>
+
+	* staging:iio:adc:MXS:LRADC: fix touchscreen statemachine
+
+2014-02-24  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Merge 3.14-rc4 into char-misc-linus
+
+2014-02-24  Dr. Greg Wettstein  <greg at enjellic.com>
+
+	* qla2xxx: Fix kernel panic on selective retransmission request
+
+2014-02-22  Felix Fietkau  <nbd at openwrt.org>
+
+	* ath9k: fix ps-poll responses under a-mpdu sessions
+
+2014-02-21  Bing Zhao  <bzhao at marvell.com>
+
+	* mwifiex: rename usb driver name registerring to usb core
+
+2014-02-24  John W. Linville  <linville at tuxdriver.com>
+
+	* Merge tag 'nfc-fixes-3.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git/sameo/nfc-fixes
+
+2014-02-24  John W. Linville  <linville at tuxdriver.com>
+
+	* Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi-fixes
+
+2014-02-19  Mike Snitzer  <snitzer at redhat.com>
+
+	* dm thin: fix the error path for the thin device constructor
+
+2014-02-24  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security
+
+2014-02-24  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
+
+2014-02-20  Namhyung Kim  <namhyung at kernel.org>
+
+	* perf symbols: Destroy unused symsrcs
+
+2014-02-20  Namhyung Kim  <namhyung at kernel.org>
+
+	* perf annotate: Check availability of annotate when processing samples
+
+2014-02-19  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* clk: shmobile: Fix typo in MSTP clock DT bindings
+
+2014-01-07  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* clk: shmobile: rcar-gen2: Fix qspi divisor
+
+2014-01-07  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* clk: shmobile: rcar-gen2: Fix clock parent for all non-PLL clocks
+
+2014-02-03  James Hogan  <james.hogan at imgtec.com>
+
+	* asm-generic: add sched_setattr/sched_getattr syscalls
+
+2014-02-21  Johannes Berg  <johannes.berg at intel.com>
+
+	* mac80211: don't validate unchanged AP bandwidth while tracking
+
+2014-02-24  Chris Zankel  <chris at zankel.net>
+
+	* Merge tag 'xtensa-for-next-20140221-1' into for_next
+
+2014-02-24  James Morris  <james.l.morris at oracle.com>
+
+	* Merge branch 'stable-3.14' of git://git.infradead.org/users/pcmoore/selinux into for-linus
+
+2014-02-14  Jeff Layton  <jlayton at redhat.com>
+
+	* cifs: sanity check length of data to send before sending
+
+2014-02-14  Pavel Shilovsky  <piastry at etersoft.ru>
+
+	* CIFS: Fix wrong pos argument of cifs_find_lock_conflict
+
+2014-02-23  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.14-rc4
+
+2014-02-23  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2014-02-23  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'regulator-v3.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
+
+2014-02-23  Sagi Grimberg  <sagig at mellanox.com>
+
+	* Target/sbc: Don't use sg as iterator in sbc_verify_read
+
+2014-02-23  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* target: Add DIF sense codes in transport_generic_request_failure
+
+2014-02-23  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* target/sbc: Fix sbc_dif_copy_prot addr offset bug
+
+2014-02-17  Pekon Gupta  <pekon at ti.com>
+
+	* mtd: nand: omap: fix ecclayout->oobfree->length
+
+2014-02-17  Pekon Gupta  <pekon at ti.com>
+
+	* mtd: nand: omap: fix ecclayout->oobfree->offset
+
+2014-02-17  Pekon Gupta  <pekon at ti.com>
+
+	* mtd: nand: omap: fix ecclayout to be in sync with u-boot NAND driver
+
+2014-02-23  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-23  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-23  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'irq-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-06  Amitkumar Karwar  <akarwar at marvell.com>
+
+	* NFC: NCI: Fix NULL pointer dereference
+
+2014-02-23  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'usb-3.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
+
+2014-02-23  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'tty-3.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
+
+2014-02-23  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'staging-3.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
+
+2014-02-23  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'char-misc-3.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
+
+2014-02-14  Matt Porter  <mporter at linaro.org>
+
+	* MAINTAINERS: add additional ARM BCM281xx/BCM11xxx maintainer
+
+2014-02-23  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'regulator/fix/da9063', 'regulator/fix/max14577' and 'regulator/fix/s5m8767' into regulator-linus
+
+2014-02-23  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'regulator/fix/core' into regulator-linus
+
+2014-02-23  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'asoc/fix/sta32x', 'asoc/fix/wm8400', 'asoc/fix/wm8770', 'asoc/fix/wm8900' and 'asoc/fix/wm8994' into asoc-linus
+
+2014-02-23  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'asoc/fix/ad1980' and 'asoc/fix/isabelle' into asoc-linus
+
+2014-02-23  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/fix/dapm' into asoc-linus
+
+2014-02-23  Mark Brown  <broonie at linaro.org>
+
+	* Merge tag 'asoc-v3.14-rc3' into asoc-linus
+
+2014-02-22  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: sta32x: Fix cache sync
+
+2014-02-22  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Revert "tty: Set correct tty name in 'active' sysfs attribute"
+
+2014-02-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging
+
+2014-02-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'xfs-fixes-for-3.14-rc4' of git://oss.sgi.com/xfs/xfs
+
+2014-02-22  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2014-02-21  Krzysztof Kozlowski  <k.kozlowski at samsung.com>
+
+	* regulator: max14577: Fix invalid return value on DT parse success
+
+2014-02-21  Jan Kara  <jack at suse.cz>
+
+	* Revert "writeback: do not sync data dirtied after sync start"
+
+2014-02-17  Santosh Shilimkar  <santosh.shilimkar at ti.com>
+
+	* ARM: OMAP: Kill warning in CPUIDLE code with !CONFIG_SMP
+
+2014-02-17  Sebastian Reichel  <sre at debian.org>
+
+	* ARM: OMAP2+: Add support for thumb mode on DT booted N900
+
+2014-02-21  Tony Lindgren  <tony at atomide.com>
+
+	* Merge tag 'for-v3.14-rc/omap-fixes-a' of git://git.kernel.org/pub/scm/linux/kernel/git/pjw/omap-pending into omap-for-v3.14/fixes
+
+2014-02-21  Thomas Gleixner  <tglx at linutronix.de>
+
+	* Merge tag 'irqchip-mvebu-fixes-3.14' of git://git.infradead.org/linux-mvebu into irq/urgent
+
+2014-02-21  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'dt-for-linus' of git://git.secretlab.ca/git/linux
+
+2014-02-21  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://www.linux-watchdog.org/linux-watchdog
+
+2014-02-07  Andrew Lunn  <andrew at lunn.ch>
+
+	* irqchip: orion: Fix getting generic chip pointer.
+
+2014-02-19  Stephane Eranian  <eranian at google.com>
+
+	* perf/x86/uncore: Fix IVT/SNB-EP uncore CBOX NID filter table
+
+2014-02-03  Peter Zijlstra  <peterz at infradead.org>
+
+	* perf/x86: Correctly use FEATURE_PDCM
+
+2014-02-14  Markus Metzger  <markus.t.metzger at intel.com>
+
+	* perf, nmi: Fix unknown NMI warning
+
+2014-02-19  Matthieu CASTET  <matthieu.castet at parrot.com>
+
+	* usb: chipidea: need to mask when writting endptflush and endptprime
+
+2014-02-17  Arve Hjønnevåg  <arve at android.com>
+
+	* staging: binder: Fix death notifications
+
+2014-02-18  Kirill Tkhai  <tkhai at yandex.ru>
+
+	* sched/deadline: Remove useless dl_nr_total
+
+2014-02-17  Boris Ostrovsky  <boris.ostrovsky at oracle.com>
+
+	* sched/deadline: Test for CPU's presence explicitly
+
+2014-02-14  Peter Zijlstra  <peterz at infradead.org>
+
+	* sched: Add 'flags' argument to sched_{set,get}attr() syscalls
+
+2014-02-16  Vegard Nossum  <vegard.nossum at oracle.com>
+
+	* sched: Fix information leak in sys_sched_getattr()
+
+2014-02-18  Rik van Riel  <riel at redhat.com>
+
+	* sched,numa: add cond_resched to task_numa_work
+
+2014-02-11  Juri Lelli  <juri.lelli at gmail.com>
+
+	* sched/core: Make dl_b->lock IRQ safe
+
+2014-02-11  Juri Lelli  <juri.lelli at gmail.com>
+
+	* sched/core: Fix sched_rt_global_validate
+
+2014-02-19  Steven Rostedt  <rostedt at goodmis.org>
+
+	* sched/deadline: Fix overflow to handle period==0 and deadline!=0
+
+2014-02-20  Juri Lelli  <juri.lelli at gmail.com>
+
+	* sched/deadline: Fix bad accounting of nr_running
+
+2014-01-27  Stanislav Kholmanskikh  <stanislav.kholmanskikh at oracle.com>
+
+	* watchdog: w83697hf_wdt: return ENODEV if no device was found
+
+2014-02-18  Max Filippov  <jcmvbkbc at gmail.com>
+
+	* xtensa: wire up sched_setattr and sched_getattr syscalls
+
+2014-02-21  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
+
+2014-02-21  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pm+acpi-3.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2014-02-21  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'iommu-fixes-v3.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
+
+2014-02-21  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'sound-3.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2014-01-29  Max Filippov  <jcmvbkbc at gmail.com>
+
+	* xtensa: xtfpga: set ethoc clock frequency
+
+2014-01-29  Max Filippov  <jcmvbkbc at gmail.com>
+
+	* xtensa: xtfpga: use common clock framework
+
+2014-01-29  Max Filippov  <jcmvbkbc at gmail.com>
+
+	* xtensa: support common clock framework
+
+2014-02-09  Paul Bolle  <pebolle at tiscali.nl>
+
+	* xtensa: no need to select USE_GENERIC_SMP_HELPERS
+
+2014-02-07  Max Filippov  <jcmvbkbc at gmail.com>
+
+	* xtensa: fsf: drop nonexistent GPIO32 support
+
+2014-01-31  Max Filippov  <jcmvbkbc at gmail.com>
+
+	* xtensa: don't pass high memory to bootmem allocator
+
+2014-02-18  Thomas Petazzoni  <thomas.petazzoni at free-electrons.com>
+
+	* ARM: 7980/1: kernel: improve error message when LPAE config doesn't match CPU
+
+2014-02-21  Kailang Yang  <kailang at realtek.com>
+
+	* ALSA: hda/realtek - Add more entry for enable HP mute led
+
+2014-02-19  Peter Oberparleiter  <oberpar at linux.vnet.ibm.com>
+
+	* s390/cio: Fix missing subchannels after CHPID configure on
+
+2014-02-18  Gerald Schaefer  <gerald.schaefer at de.ibm.com>
+
+	* s390/pci/dma: use correct segment boundary size
+
+2014-02-10  Heiko Carstens  <heiko.carstens at de.ibm.com>
+
+	* s390/compat: fix sys_sched_getattr compat wrapper
+
+2014-02-21  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'pm-cpufreq'
+
+2014-02-21  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branches 'acpi-pm' and 'acpi-video'
+
+2014-02-21  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branches 'acpi-cleanup', 'acpi-dock', 'acpi-pci' and 'acpi-dsm'
+
+2014-02-12  Dirk Brandewie  <dirk.j.brandewie at intel.com>
+
+	* intel_pstate: Add support for Baytrail turbo P states
+
+2014-02-12  Dirk Brandewie  <dirk.j.brandewie at intel.com>
+
+	* intel_pstate: Use LFM bus ratio as min ratio/P state
+
+2014-02-20  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Merge tag 'iio-fixes-for-3.14c' of git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio into char-misc-linus
+
+2014-02-20  Shuah Khan  <shuah.kh at samsung.com>
+
+	* regulator: core: Change dummy supplies error message to a warning
+
+2014-02-20  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Merge tag 'fixes-for-v3.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb into usb-linus
+
+2014-01-30  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* tcm_qla2xxx: Fix NAA formatted name for NPIV WWPNs
+
+2014-02-19  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* tcm_qla2xxx: Perform configfs depend/undepend for base_tpg
+
+2014-02-19  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* tcm_qla2xxx: Add NPIV specific enable/disable attribute logic
+
+2014-02-19  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* qla2xxx: Check + fail when npiv_vports_inuse exists in shutdown
+
+2014-02-19  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* qla2xxx: Fix qlt_lport_register base_vha callback race
+
+2014-02-20  Jan Kara  <jack at suse.cz>
+
+	* quota: Fix race between dqput() and dquot_scan_active()
+
+2014-02-18  Jan Kara  <jack at suse.cz>
+
+	* udf: Fix data corruption on file type conversion
+
+2014-02-14  Sujith Manoharan  <c_manoha at qca.qualcomm.com>
+
+	* ath9k: Fix ETSI compliance for AR9462 2.0
+
+2014-02-20  Arend van Spriel  <arend at broadcom.com>
+
+	* brcmfmac: fix txglomming scatter-gather packet transfers
+
+2014-02-19  Stanislaw Gruszka  <sgruszka at redhat.com>
+
+	* ath9k: protect tid->sched check
+
+2014-02-18  Amitkumar Karwar  <akarwar at marvell.com>
+
+	* mwifiex: fix cmd and Tx data timeout issue for PCIe cards
+
+2014-02-18  Amitkumar Karwar  <akarwar at marvell.com>
+
+	* mwifiex: add NULL check for PCIe Rx skb
+
+2014-02-18  Avinash Patil  <patila at marvell.com>
+
+	* mwifiex: clean pcie ring only when device is present
+
+2014-02-17  James Cameron  <quozl at laptop.org>
+
+	* libertas: fix scan result loss if SSID IE len 0
+
+2014-02-14  Kirill Tkhai  <ktkhai at parallels.com>
+
+	* hostap: Do not free priv until timer handler has actually stopped using it
+
+2014-02-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pci-v3.14-fixes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
+
+2014-02-20  John W. Linville  <linville at tuxdriver.com>
+
+	* Merge branch 'for-john' of git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211
+
+2014-02-20  Jiang Liu  <jiang.liu at linux.intel.com>
+
+	* ACPI / nouveau: fix probing regression related to _DSM
+
+2014-02-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata
+
+2014-02-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
+
+2014-02-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
+
+2014-02-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'fixes-for-v3.14' of git://git.linaro.org/people/mszyprowski/linux-dma-mapping
+
+2014-02-16  Brian Campbell  <brian.campbell at editshare.com>
+
+	* user_namespace.c: Remove duplicated word in comment
+
+2014-02-20  David Howells  <dhowells at redhat.com>
+
+	* Sparc: sparc_cpu_model isn't in asm/system.h any more [ver #2]
+
+2014-02-18  Emmanuel Grumbach  <emmanuel.grumbach at intel.com>
+
+	* iwlwifi: dvm: clear IWL_STA_UCODE_INPROGRESS when assoc fails
+
+2014-02-20  Peter De Schrijver  <pdeschrijver at nvidia.com>
+
+	* clk: tegra124: remove gr2d and gr3d clocks
+
+2014-02-20  Eric Paris  <eparis at redhat.com>
+
+	* SELinux: bigendian problems with filename trans rules
+
+2014-02-05  Daniel Mack  <zonque at gmail.com>
+
+	* usb: musb: correct use of schedule_delayed_work()
+
+2014-02-18  Josh Cartwright  <joshc at codeaurora.org>
+
+	* usb: phy: msm: fix compilation errors when !CONFIG_PM_SLEEP
+
+2014-01-20  Andrzej Pietrasiewicz  <andrzej.p at samsung.com>
+
+	* usb: gadget: fix NULL pointer dereference
+
+2014-01-03  Peter Chen  <peter.chen at freescale.com>
+
+	* usb: gadget: printer: using gadget_is_otg to check otg support at runtime
+
+2014-02-19  Steffen Klassert  <steffen.klassert at secunet.com>
+
+	* xfrm: Clone states properly on migration
+
+2014-02-19  Steffen Klassert  <steffen.klassert at secunet.com>
+
+	* xfrm: Take xfrm_state_lock in xfrm_migrate_state_find
+
+2014-02-19  Steffen Klassert  <steffen.klassert at secunet.com>
+
+	* xfrm: Fix NULL pointer dereference on sub policy usage
+
+2014-02-19  Steffen Klassert  <steffen.klassert at secunet.com>
+
+	* ip6_vti: Fix build when NET_IP_TUNNEL is not set.
+
+2014-02-19  Frank Praznik  <frank.praznik at oh.rr.com>
+
+	* HID: sony: Fix work queue issues.
+
+2014-02-20  Joerg Roedel  <joro at 8bytes.org>
+
+	* arm/smmu: Use irqsafe spinlock for domain lock
+
+2014-02-18  Grant Likely  <grant.likely at linaro.org>
+
+	* of: Add self test for of_match_node()
+
+2014-02-18  Grant Likely  <grant.likely at linaro.org>
+
+	* of: Move testcase FDT data into drivers/of
+
+2014-02-19  Kevin Hao  <haokexin at gmail.com>
+
+	* of: reimplement the matching method for __of_match_node()
+
+2014-02-20  Johannes Berg  <johannes.berg at intel.com>
+
+	* mac80211: fix station wakeup powersave race
+
+2014-02-18  Charles Keepax  <ckeepax at opensource.wolfsonmicro.com>
+
+	* ASoC: dapm: Add locking to snd_soc_dapm_xxxx_pin functions
+
+2014-02-18  Charles Keepax  <ckeepax at opensource.wolfsonmicro.com>
+
+	* Input - arizona-haptics: Fix double lock of dapm_mutex
+
+2014-02-17  Johannes Berg  <johannes.berg at intel.com>
+
+	* mac80211: insert stations before adding to driver
+
+2014-02-20  Emmanuel Grumbach  <emmanuel.grumbach at intel.com>
+
+	* mac80211: fix AP powersave TX vs. wakeup race
+
+2014-02-19  Peter De Schrijver  <pdeschrijver at nvidia.com>
+
+	* clk: tegra: Fix vic03 mux index
+
+2014-02-20  Hui Wang  <hui.wang at canonical.com>
+
+	* ALSA: hda - Enable front audio jacks on one HP desktop model
+
+2014-02-20  Takashi Iwai  <tiwai at suse.de>
+
+	* Merge tag 'asoc-v3.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2014-02-20  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'drm/for-3.14-rc3' of git://anongit.freedesktop.org/tegra/linux into drm-fixes
+
+2014-02-18  Takashi Iwai  <tiwai at suse.de>
+
+	* ASoC: wm8400: Fix the wrong number of enum items
+
+2014-02-18  Takashi Iwai  <tiwai at suse.de>
+
+	* ASoC: isabelle: Fix the wrong number of items in enum ctls
+
+2014-02-18  Takashi Iwai  <tiwai at suse.de>
+
+	* ASoC: ad1980: Fix wrong number of items for capture source
+
+2014-02-18  Takashi Iwai  <tiwai at suse.de>
+
+	* ASoC: wm8994: Fix the wrong number of enum items
+
+2014-02-18  Takashi Iwai  <tiwai at suse.de>
+
+	* ASoC: wm8900: Fix the wrong number of enum items
+
+2014-02-18  Takashi Iwai  <tiwai at suse.de>
+
+	* ASoC: wm8770: Fix wrong number of enum items
+
+2014-02-13  Paul Gortmaker  <paul.gortmaker at windriver.com>
+
+	* sparc32: make copy_to/from_user_page() usable from modular code
+
+2014-02-13  Paul Gortmaker  <paul.gortmaker at windriver.com>
+
+	* sparc32: fix build failure for arch_jump_label_transform
+
+2014-02-18  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Revert "ACPI: Blacklist Win8 OSI for some HP laptop 2013 models"
+
+2014-02-18  Aaron Lu  <aaron.lu at intel.com>
+
+	* ACPI / video: Add systems that should favour native backlight interface
+
+2014-02-13  Hans de Goede  <hdegoede at redhat.com>
+
+	* ACPI / video: Filter the _BCL table for duplicate brightness values
+
+2014-02-19  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'nfs-for-3.14-4' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
+
+2014-02-19  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'mfd-fixes-3.14-1' of git://git.linaro.org/people/lee.jones/mfd
+
+2014-01-30  Tomi Valkeinen  <tomi.valkeinen at ti.com>
+
+	* ARM: OMAP2+: clock: fix clkoutx2 with CLK_SET_RATE_PARENT
+
+2014-02-05  Illia Smyrnov  <illia.smyrnov at globallogic.com>
+
+	* ARM: OMAP4: hwmod: Fix SOFTRESET logic for OMAP4
+
+2014-01-10  Suman Anna  <s-anna at ti.com>
+
+	* ARM: DRA7: hwmod data: correct the sysc data for spinlock
+
+2014-02-16  Vaibhav Bedia  <vaibhav.bedia at gmail.com>
+
+	* ARM: OMAP5: PRM: Fix reboot handling
+
+2014-02-18  Takashi Iwai  <tiwai at suse.de>
+
+	* ASoC: sta32x: Fix array access overflow
+
+2014-02-19  Mika Westerberg  <mika.westerberg at linux.intel.com>
+
+	* x86: tsc: Add missing Baytrail frequency to the table
+
+2014-02-19  Thomas Gleixner  <tglx at linutronix.de>
+
+	* x86, tsc: Fallback to normal calibration if fast MSR calibration fails
+
+2014-02-17  Stephen Boyd  <sboyd at codeaurora.org>
+
+	* sched_clock: Prevent callers from seeing half-updated data
+
+2014-02-18  Andy Adamson  <andros at netapp.com>
+
+	* NFS fix error return in nfs4_select_rw_stateid
+
+2014-01-26  Geert Uytterhoeven  <geert at linux-m68k.org>
+
+	* mfd: sec-core: sec_pmic_{suspend,resume}() should depend on CONFIG_PM_SLEEP
+
+2014-01-26  Geert Uytterhoeven  <geert at linux-m68k.org>
+
+	* mfd: max14577: max14577_{suspend,resume}() should depend on CONFIG_PM_SLEEP
+
+2014-02-03  Lee Jones  <lee.jones at linaro.org>
+
+	* mfd: tps65217: Naturalise cross-architecture discrepancies
+
+2014-01-23  Lee Jones  <lee.jones at linaro.org>
+
+	* mfd: wm8994-core: Naturalise cross-architecture discrepancies
+
+2014-02-03  Lee Jones  <lee.jones at linaro.org>
+
+	* mfd: max8998: Naturalise cross-architecture discrepancies
+
+2014-01-23  Lee Jones  <lee.jones at linaro.org>
+
+	* mfd: max8997: Naturalise cross-architecture discrepancies
+
+2014-02-19  Alexander Stein  <alexander.stein at systec-electronic.com>
+
+	* spi/topcliff-pch: Fix DMA channel
+
+2014-02-12  Inbal Hacohen  <Inbal.Hacohen at intel.com>
+
+	* cfg80211: bugfix in regulatory user hint process
+
+2014-02-19  Hsin-Yu Chao  <hychao at chromium.org>
+
+	* ALSA: hda/ca0132 - Fix recording from mode id 0x8
+
+2014-02-19  Hsin-Yu Chao  <hychao at chromium.org>
+
+	* ALSA: hda/ca0132 - setup/cleanup streams
+
+2014-02-18  Mike Turquette  <mturquette at linaro.org>
+
+	* Merge branch 'for_3.14-rcx/clk-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone into clk-fixes
+
+2014-02-18  Mike Turquette  <mturquette at linaro.org>
+
+	* Merge tag 'mvebu-clk-fixes-3.14' of git://git.infradead.org/linux-mvebu into clk-fixes
+
+2014-01-07  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* clk: shmobile: rcar-gen2: Fix qspi divisor
+
+2014-01-07  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* clk: shmobile: rcar-gen2: Fix clock parent all non-PLL clocks
+
+2014-02-19  Eric Sandeen  <sandeen at redhat.com>
+
+	* xfs: limit superblock corruption errors to actual corruption
+
+2014-02-19  Eric Sandeen  <sandeen at redhat.com>
+
+	* xfs: skip verification on initial "guess" superblock read
+
+2014-02-19  Ben Myers  <bpm at sgi.com>
+
+	* MAINTAINERS: SGI no longer maintaining XFS
+
+2014-02-19  Eric Sandeen  <sandeen at redhat.com>
+
+	* xfs: xfs_sb_read_verify() doesn't flag bad crcs on primary sb
+
+2014-02-18  Stephen Warren  <swarren at nvidia.com>
+
+	* ARM: tegra: only run PL310 init on systems with one
+
+2014-02-18  Charles Keepax  <ckeepax at opensource.wolfsonmicro.com>
+
+	* ASoC: dapm: Correct regulator bypass error messages
+
+2014-02-19  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/fix/wm8993' into asoc-linus
+
+2014-02-19  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'asoc/fix/blackfin', 'asoc/fix/da9055', 'asoc/fix/davinci', 'asoc/fix/fsl', 'asoc/fix/fsl-esai', 'asoc/fix/max98090', 'asoc/fix/rt5640', 'asoc/fix/samsung' and 'asoc/fix/txx9aclc-ac97' into asoc-linus
+
+2014-02-18  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2014-02-18  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
+
+2014-02-18  Thierry Reding  <treding at nvidia.com>
+
+	* ARM: tegra: Add head numbers to display controllers
+
+2014-02-18  Olof Johansson  <olof at lixom.net>
+
+	* Merge tag 'mvebu-dt-fixes-3.14' of git://git.infradead.org/linux-mvebu into fixes
+
+2014-02-17  Srivatsa S. Bhat  <srivatsa.bhat at linux.vnet.ibm.com>
+
+	* cpufreq: powernow-k8: Initialize per-cpu data-structures properly
+
+2014-02-17  viresh kumar  <viresh.kumar at linaro.org>
+
+	* cpufreq: remove sysfs link when a cpu != policy->cpu, is removed
+
+2014-02-18  Shawn Guo  <shawn.guo at linaro.org>
+
+	* ARM: imx6: build pm-imx6q.c independently of CONFIG_PM
+
+2014-02-18  Olof Johansson  <olof at lixom.net>
+
+	* Merge tag 'omap-for-v3.14/fixes-against-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
+
+2014-02-13  Stephen Warren  <swarren at nvidia.com>
+
+	* ARM: tegra: fix RTC0 alias for Cardhu
+
+2014-02-15  Guenter Roeck  <linux at roeck-us.net>
+
+	* hwmon: (max1668) Fix writing the minimum temperature
+
+2014-02-18  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2014-02-18  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-arm
+
+2014-02-18  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'jfs-3.14-rc4' of git://github.com/kleikamp/linux-shaggy
+
+2014-02-18  Olof Johansson  <olof at lixom.net>
+
+	* Merge tag 'pwm_pxa_for_v3.14' of https://git.kernel.org/pub/scm/linux/kernel/git/hzhuang1/linux into fixes
+
+2014-02-13  Tejun Heo  <tj at kernel.org>
+
+	* cgroup: update cgroup_enable_task_cg_lists() to grab siglock
+
+2014-02-18  Florian Fainelli  <f.fainelli at gmail.com>
+
+	* MAINTAINERS: add entry for the PHY library
+
+2014-02-18  Ben Dooks  <ben.dooks at codethink.co.uk>
+
+	* of_mdio: fix phy interrupt passing
+
+2014-02-18  Thomas Petazzoni  <thomas.petazzoni at free-electrons.com>
+
+	* net: ethernet: update dependency and help text of mvneta
+
+2014-02-18  Russell King  <rmk+kernel at arm.linux.org.uk>
+
+	* NET: fec: only enable napi if we are successful
+
+2014-02-18  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* af_packet: remove a stray tab in packet_set_ring()
+
+2014-02-18  Kevin Hao  <haokexin at gmail.com>
+
+	* Revert "of: search the best compatible match first in __of_match_node()"
+
+2014-02-19  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'ttm-fixes-3.14-2014-02-18' of git://people.freedesktop.org/~thomash/linux into drm-fixes
+
+2014-02-19  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'vmwgfx-fixes-3.14-2014-02-18' of git://people.freedesktop.org/~thomash/linux into drm-fixes
+
+2014-02-19  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-fixes-3.14' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
+
+2014-02-18  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless
+
+2014-02-15  Lai Jiangshan  <laijs at cn.fujitsu.com>
+
+	* workqueue: ensure @task is valid across kthread_stop()
+
+2014-02-17  Daniel Borkmann  <dborkman at redhat.com>
+
+	* net: sctp: fix sctp_connectx abi for ia32 emulation/compat mode
+
+2014-02-18  David S. Miller  <davem at davemloft.net>
+
+	* Merge tag 'batman-adv-fix-for-davem' of git://git.open-mesh.org/linux-merge
+
+2014-02-17  Kishon Vijay Abraham I  <kishon at ti.com>
+
+	* phy: let phy_provider_register be the last step in registering PHY
+
+2014-02-17  Hans de Goede  <hdegoede at redhat.com>
+
+	* phy-core: Don't allow building phy-core as a module
+
+2014-02-17  Hans de Goede  <hdegoede at redhat.com>
+
+	* phy-core: Don't propagate -ENOSUPP from phy_pm_runtime_get_sync to caller
+
+2014-02-17  Hans de Goede  <hdegoede at redhat.com>
+
+	* phy-core: phy_get: Leave error logging to the caller
+
+2014-02-17  Richard Weinberger  <richard at nod.at>
+
+	* phy,phy-bcm-kona-usb2.c: Add dependency on HAS_IOMEM
+
+2014-02-14  Daniel Mack  <zonque at gmail.com>
+
+	* usb: musb: correct use of schedule_delayed_work()
+
+2014-02-14  Daniel Mack  <zonque at gmail.com>
+
+	* usb: musb: do not sleep in atomic context
+
+2014-02-12  Aleksander Morgado  <aleksander at aleksander.es>
+
+	* USB: serial: option: blacklist interface 4 for Cinterion PHS8 and PXS8
+
+2014-02-13  Alan Stern  <stern at rowland.harvard.edu>
+
+	* USB: EHCI: add delay during suspend to prevent erroneous wakeups
+
+2014-02-14  David A. Long  <dave.long at linaro.org>
+
+	* ARM: 7964/1: Detect section mismatches in thumb relocations
+
+2014-02-18  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon/ni: fix typo in dpm sq ramping setup
+
+2014-02-18  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon/si: fix typo in dpm sq ramping setup
+
+2014-02-18  Christian König  <christian.koenig at amd.com>
+
+	* drm/radeon: fix CP semaphores on CIK
+
+2014-02-17  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* drm/radeon: delete a stray tab
+
+2014-02-17  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: fix display tiling setup on SI
+
+2014-02-17  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon/dpm: reduce r7xx vblank mclk threshold to 200
+
+2014-02-12  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: fill in DRM_CAPs for cursor size
+
+2014-02-12  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm: add DRM_CAPs for cursor size
+
+2014-02-03  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: unify bpc handling
+
+2014-02-12  Chao Bi  <chao.bi at intel.com>
+
+	* mei: set client's read_cb to NULL when flow control fails
+
+2014-02-18  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
+
+2014-02-19  J. R. Okajima  <hooanon05g at gmail.com>
+
+	* nfsd: fix lost nfserrno() call in nfsd_setattr()
+
+2014-01-14  Florian Fainelli  <florian at openwrt.org>
+
+	* usb: gadget: bcm63xx_udc: fix build failure on DMA channel code
+
+2014-01-15  Daniel Mack  <zonque at gmail.com>
+
+	* usb: musb: do not sleep in atomic context
+
+2014-02-03  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* usb: gadget: s3c2410_udc: Fix build error
+
+2014-02-04  Roger Quadros  <rogerq at ti.com>
+
+	* usb: musb: core: Fix remote-wakeup resume
+
+2014-02-04  Ajay Kumar Gupta  <ajay.gupta at ti.com>
+
+	* usb: musb: host: Fix SuperSpeed hub enumeration
+
+2014-02-17  Jason Cooper  <jason at lakedaemon.net>
+
+	* ARM: dove: dt: revert PMU interrupt controller node
+
+2014-02-18  Mikulas Patocka  <mpatocka at redhat.com>
+
+	* dm raid1: fix immutable biovec related BUG when retrying read bio
+
+2014-02-06  Felipe Balbi  <balbi at ti.com>
+
+	* usb: musb: fix obex in g_nokia.ko causing kernel panic
+
+2014-02-18  Levente Kurusa  <levex at linux.com>
+
+	* ahci: disable NCQ on Samsung pci-e SSDs on macbooks
+
+2014-02-10  Tomasz Nowicki  <tomasz.nowicki at linaro.org>
+
+	* ACPI / PCI: Fix memory leak in acpi_pci_irq_enable()
+
+2014-02-12  Masanari Iida  <standby24x7 at gmail.com>
+
+	* drm/ttm: Fix memory leak in ttm_agp_backend.c
+
+2014-02-09  Alexandre Courbot  <acourbot at nvidia.com>
+
+	* drm/ttm: declare 'struct device' in ttm_page_alloc.h
+
+2014-02-16  Markus Pargmann  <mpa at pengutronix.de>
+
+	* dma: sdma: Add imx25 compatible
+
+2014-02-18  Joerg Roedel  <joro at 8bytes.org>
+
+	* Merge branch 'for-joerg/arm-smmu/fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into iommu/fixes
+
+2014-02-14  Denis CIOCCA  <denis.ciocca at st.com>
+
+	* iio:gyro: bug on L3GD20H gyroscope support
+
+2014-02-14  Beomho Seo  <beomho.seo at samsung.com>
+
+	* iio: cm32181: Change cm32181 ambient light sensor driver
+
+2014-02-14  Beomho Seo  <beomho.seo at samsung.com>
+
+	* iio: cm36651: Fix read/write integration time function.
+
+2014-02-17  Jan Kara  <jack at suse.cz>
+
+	* inotify: Fix reporting of cookies for inotify events
+
+2014-02-18  Hui Wang  <hui.wang at canonical.com>
+
+	* ALSA: hda - add headset mic detect quirks for two Dell laptops
+
+2014-02-18  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-nouveau-next' of git://anongit.freedesktop.org/git/nouveau/linux-2.6 into drm-fixes
+
+2014-02-18  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'drm-intel-fixes-2014-02-14' of ssh://git.freedesktop.org/git/drm-intel into drm-fixes
+
+2014-02-18  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'tda998x-fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-cubox into drm-fixes
+
+2014-02-17  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* jbd2: fix use after free in jbd2_journal_start_reserved()
+
+2014-02-15  Ilia Mirkin  <imirkin at alum.mit.edu>
+
+	* drm/nouveau: fix TTM_PL_TT memtype on pre-nv50
+
+2014-02-13  Ilia Mirkin  <imirkin at alum.mit.edu>
+
+	* drm/nv50/disp: use correct register to determine DP display bpp
+
+2014-02-12  Emil Velikov  <emil.l.velikov at gmail.com>
+
+	* drm/nouveau/fb: use correct ram oclass for nv1a hardware
+
+2014-02-08  Ilia Mirkin  <imirkin at alum.mit.edu>
+
+	* drm/nv50/gr: add missing nv_error parameter priv
+
+2014-02-07  Alexandre Courbot  <acourbot at nvidia.com>
+
+	* drm/nouveau: fix ENG_RUNLIST register address
+
+2014-02-05  Ilia Mirkin  <imirkin at alum.mit.edu>
+
+	* drm/nv4c/bios: disallow retrieving from prom on nv4x igp's
+
+2014-02-05  Ilia Mirkin  <imirkin at alum.mit.edu>
+
+	* drm/nv4c/vga: decode register is in a different place on nv4x igp's
+
+2014-02-05  Ilia Mirkin  <imirkin at alum.mit.edu>
+
+	* drm/nv4c/mc: nv4x igp's have a different msi rearm register
+
+2014-01-29  Ilia Mirkin  <imirkin at alum.mit.edu>
+
+	* drm/nouveau: set irq_enabled manually
+
+2014-02-12  Vinayak Kale  <vkale at apm.com>
+
+	* ARM: 7957/1: add DSB after icache flush in __flush_icache_all()
+
+2014-02-11  Russell King  <rmk+kernel at arm.linux.org.uk>
+
+	* Fix uses of dma_max_pfn() when converting to a limiting address
+
+2014-02-17  Duan Jiong  <duanj.fnst at cn.fujitsu.com>
+
+	* ipv4: fix counter in_slow_tot
+
+2014-02-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
+
+2014-02-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.samba.org/sfrench/cifs-2.6
+
+2014-02-17  David Howells  <dhowells at redhat.com>
+
+	* FS-Cache: Handle removal of unadded object to the fscache_object_list rb tree
+
+2014-02-17  Dave Jones  <davej at redhat.com>
+
+	* reiserfs: fix utterly brain-damaged indentation.
+
+2014-02-17  Tommie Gannert  <tommie at gannert.se>
+
+	* irtty-sir.c: Do not set_termios() on irtty_close()
+
+2014-02-17  John W. Linville  <linville at tuxdriver.com>
+
+	* Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless into for-davem
+
+2014-02-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'dma-buf-for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/sumits/dma-buf
+
+2014-02-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/egtvedt/linux-avr32
+
+2014-02-13  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: fix __dcache_readdir()
+
+2014-02-16  Sage Weil  <sage at inktank.com>
+
+	* ceph: add acl, noacl options for cephfs mount
+
+2014-02-16  Guangliang Zhao  <lucienchao at gmail.com>
+
+	* ceph: make ceph_forget_all_cached_acls() static inline
+
+2014-02-11  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: add missing init_acl() for mkdir() and atomic_open()
+
+2014-02-11  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: fix ceph_set_acl()
+
+2014-02-11  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: fix ceph_removexattr()
+
+2014-02-11  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: remove xattr when null value is given to setxattr()
+
+2014-02-11  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: properly handle XATTR_CREATE and XATTR_REPLACE
+
+2014-02-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
+
+2014-02-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* printk: fix syslog() overflowing user buffer
+
+2013-12-19  David Herrmann  <dh.herrmann at gmail.com>
+
+	* HID: hyperv: make sure input buffer is big enough
+
+2013-12-19  David Herrmann  <dh.herrmann at gmail.com>
+
+	* HID: Bluetooth: hidp: make sure input buffers are big enough
+
+2014-02-14  Jiri Bohac  <jiri at boha.cz>
+
+	* bonding: 802.3ad: make aggregator_identifier bond-private
+
+2014-02-13  Emil Goode  <emilgoode at gmail.com>
+
+	* usbnet: remove generic hard_header_len check
+
+2014-02-17  Anthony Olech  <anthony.olech.opensource at diasemi.com>
+
+	* Input: da9052_onkey - use correct register bit for key status
+
+2014-02-16  Trond Myklebust  <trond.myklebust at primarydata.com>
+
+	* NFSv4: Use the correct net namespace in nfs4_update_server
+
+2014-02-17  Nicolas Dichtel  <nicolas.dichtel at 6wind.com>
+
+	* gre: add link local route when local addr is any
+
+2014-02-15  Antonio Quartulli  <antonio at meshcoding.com>
+
+	* batman-adv: fix potential kernel paging error for unicast transmissions
+
+2014-02-15  Antonio Quartulli  <antonio at meshcoding.com>
+
+	* batman-adv: avoid double free when orig_node initialization fails
+
+2014-02-11  Antonio Quartulli  <antonio at open-mesh.com>
+
+	* batman-adv: free skb on TVLV parsing success
+
+2014-02-11  Antonio Quartulli  <antonio at open-mesh.com>
+
+	* batman-adv: fix TT CRC computation by ensuring byte order
+
+2014-02-08  Simon Wunderlich  <sw at simonwunderlich.de>
+
+	* batman-adv: fix potential orig_node reference leak
+
+2014-01-29  Antonio Quartulli  <antonio at open-mesh.com>
+
+	* batman-adv: avoid potential race condition when adding a new neighbour
+
+2014-01-30  Antonio Quartulli  <antonio at meshcoding.com>
+
+	* batman-adv: properly check pskb_may_pull return value
+
+2014-01-28  Antonio Quartulli  <antonio at meshcoding.com>
+
+	* batman-adv: release vlan object after checking the CRC
+
+2014-01-27  Antonio Quartulli  <antonio at meshcoding.com>
+
+	* batman-adv: fix TT-TVLV parsing on OGM reception
+
+2014-02-12  Mikulas Patocka  <mpatocka at redhat.com>
+
+	* dm io: fix I/O to multiple destinations
+
+2014-02-06  Mike Snitzer  <snitzer at redhat.com>
+
+	* dm thin: avoid metadata commit if a pool's thin devices haven't changed
+
+2014-01-31  Mike Snitzer  <snitzer at redhat.com>
+
+	* dm cache: do not add migration to completed list before unhooking bio
+
+2014-01-31  Mike Snitzer  <snitzer at redhat.com>
+
+	* dm cache: move hook_info into common portion of per_bio_data structure
+
+2013-12-26  Andrew Bresticker  <abrestic at chromium.org>
+
+	* clk: tegra: use max divider if divider overflows
+
+2013-12-26  Andrew Bresticker  <abrestic at chromium.org>
+
+	* clk: tegra: cclk_lp has a pllx/2 divider
+
+2013-12-26  Andrew Bresticker  <abrestic at chromium.org>
+
+	* clk: tegra: fix sdmmc clks on Tegra1x4
+
+2013-12-26  Mark Zhang  <markz at nvidia.com>
+
+	* clk: tegra: fix host1x clock on Tegra124
+
+2013-12-26  David Ung  <davidu at nvidia.com>
+
+	* clk: tegra: PLLD2 fixes for hdmi
+
+2013-12-26  Rhyland Klein  <rklein at nvidia.com>
+
+	* clk: tegra: Fix PLLD mnp table
+
+2013-12-26  Gabe Black  <gabeblack at chromium.org>
+
+	* clk: tegra: Fix PLLP rate table
+
+2013-12-02  Thierry Reding  <thierry.reding at gmail.com>
+
+	* clk: tegra: Correct clock number for UARTE
+
+2013-12-19  Peter De Schrijver  <pdeschrijver at nvidia.com>
+
+	* clk: tegra: Add missing Tegra20 fuse clks
+
+2014-02-03  Archana Patni  <archana.patni at linux.intel.com>
+
+	* HID: hid-sensor-hub: quirk for STM Sensor hub
+
+2014-02-16  Chen Gang  <gang.chen.5i5j at gmail.com>
+
+	* avr32: add generic vga.h to Kbuild
+
+2014-02-16  Chen Gang  <gang.chen.5i5j at gmail.com>
+
+	* avr32: add generic ioremap_wc() definition in io.h
+
+2014-02-01  Chen Gang  <gang.chen.5i5j at gmail.com>
+
+	* avr32: Makefile: add '-D__linux__' flag for gcc-4.4.7 use
+
+2014-01-10  Paul Gortmaker  <paul.gortmaker at windriver.com>
+
+	* avr32: fix missing module.h causing build failure in mimc200/fram.c
+
+2014-02-14  Olof Johansson  <olof at lixom.net>
+
+	* ARM64: unwind: Fix PC calculation
+
+2014-02-16  Clemens Ladisch  <clemens at ladisch.de>
+
+	* ALSA: usb-audio: work around KEF X300A firmware bug
+
+2014-02-13  Linus Walleij  <linus.walleij at linaro.org>
+
+	* dma: ste_dma40: don't dereference free:d descriptor
+
+2014-02-16  Daniel Borkmann  <dborkman at redhat.com>
+
+	* packet: check for ndo_select_queue during queue selection
+
+2014-02-16  Daniel Borkmann  <dborkman at redhat.com>
+
+	* netdevice: move netdev_cap_txqueue for shared usage to header
+
+2014-02-16  Daniel Borkmann  <dborkman at redhat.com>
+
+	* netdevice: add queue selection fallback handler for ndo_select_queue
+
+2014-02-14  Ingo Molnar  <mingo at elte.hu>
+
+	* drivers/net: tulip_remove_one needs to call pci_disable_device()
+
+2014-02-14  Matija Glavinic Pecotic  <matija.glavinic-pecotic.ext at nsn.com>
+
+	* net: sctp: Fix a_rwnd/rwnd management to reflect real state of the receiver's buffer
+
+2014-02-14  Duan Jiong  <duanj.fnst at cn.fujitsu.com>
+
+	* ipv4: distinguish EHOSTUNREACH from the ENETUNREACH
+
+2014-02-12  Haiyang Zhang  <haiyangz at microsoft.com>
+
+	* hyperv: Fix the carrier status setting
+
+2014-02-13  Gerrit Renker  <gerrit at erg.abdn.ac.uk>
+
+	* dccp: re-enable debug macro
+
+2014-02-16  Theodore Ts'o  <tytso at mit.edu>
+
+	* ext4: don't leave i_crtime.tv_sec uninitialized
+
+2014-02-12  Gavin Shan  <shangw at linux.vnet.ibm.com>
+
+	* powerpc/eeh: Disable EEH on reboot
+
+2014-02-12  Gavin Shan  <shangw at linux.vnet.ibm.com>
+
+	* powerpc/eeh: Cleanup on eeh_subsystem_enabled
+
+2014-02-12  Gavin Shan  <shangw at linux.vnet.ibm.com>
+
+	* powerpc/powernv: Rework EEH reset
+
+2014-02-12  Anton Blanchard  <anton at au1.ibm.com>
+
+	* powerpc: Use unstripped VDSO image for more accurate profiling data
+
+2014-02-12  Anton Blanchard  <anton at au1.ibm.com>
+
+	* powerpc: Link VDSOs at 0x0
+
+2014-02-12  Aneesh Kumar K.V  <aneesh.kumar at linux.vnet.ibm.com>
+
+	* mm: Use ptep/pmdp_set_numa() for updating _PAGE_NUMA bit
+
+2014-02-12  Aneesh Kumar K.V  <aneesh.kumar at linux.vnet.ibm.com>
+
+	* mm: Dirty accountable change only apply to non prot numa case
+
+2014-02-12  Aneesh Kumar K.V  <aneesh.kumar at linux.vnet.ibm.com>
+
+	* powerpc/mm: Add new "set" flag argument to pte/pmd update function
+
+2014-01-17  Kleber Sacilotto de Souza  <klebers at linux.vnet.ibm.com>
+
+	* powerpc/pseries: Add Gen3 definitions for PCIE link speed
+
+2014-01-17  Kleber Sacilotto de Souza  <klebers at linux.vnet.ibm.com>
+
+	* powerpc/pseries: Fix regression on PCI link speed
+
+2014-01-17  Kevin Hao  <haokexin at gmail.com>
+
+	* powerpc: Set the correct ksp_limit on ppc32 when switching to irq stack
+
+2014-02-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.14-rc3
+
+2014-02-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
+
+2014-02-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'dt-fixes-for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux
+
+2014-02-16  Trond Myklebust  <trond.myklebust at primarydata.com>
+
+	* SUNRPC: Fix a pipe_version reference leak
+
+2014-02-16  Trond Myklebust  <trond.myklebust at primarydata.com>
+
+	* SUNRPC: Ensure that gss_auth isn't freed before its upcall messages
+
+2014-02-16  Ezequiel Garcia  <ezequiel.garcia at free-electrons.com>
+
+	* ata: sata_mv: Cleanup only the initialized ports
+
+2014-02-15  Theodore Ts'o  <tytso at mit.edu>
+
+	* ext4: fix online resize with a non-standard blocks per group setting
+
+2014-02-15  Theodore Ts'o  <tytso at mit.edu>
+
+	* ext4: fix online resize with very large inode tables
+
+2014-02-15  Jarno Rajahalme  <jrajahalme at nicira.com>
+
+	* openvswitch: Fix race.
+
+2014-02-15  Jarno Rajahalme  <jrajahalme at nicira.com>
+
+	* openvswitch: Read tcp flags only then the tranport header is present.
+
+2014-02-14  Jiri Pirko  <jiri at resnulli.us>
+
+	* ovs: fix dp check in ovs_dp_reset_user_features
+
+2014-02-13  Stephen Warren  <swarren at nvidia.com>
+
+	* ASoC: max98090: make REVISION_ID readable
+
+2014-02-14  Kevin Hao  <haokexin at gmail.com>
+
+	* of: search the best compatible match first in __of_match_node()
+
+2014-02-15  Alexander Shiyan  <shc_work at mail.ru>
+
+	* ASoC: txx9aclc_ac97: Fix kernel crash on probe
+
+2014-02-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending
+
+2014-02-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'i2c/for-current' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux
+
+2014-02-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branches 'irq-urgent-for-linus' and 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'trace-fixes-v3.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
+
+2014-02-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2014-02-12  Jean-Francois Dagenais  <jeff.dagenais at gmail.com>
+
+	* Input: adp5588-keys - get value from data out when dir is out
+
+2014-02-15  Wolfram Sang  <wsa at the-dreams.de>
+
+	* Documentation: i2c: mention ACPI method for instantiating devices
+
+2014-02-10  Wolfram Sang  <wsa at the-dreams.de>
+
+	* Documentation: i2c: describe devicetree method for instantiating devices
+
+2014-02-15  Filipe David Borba Manana  <fdmanana at gmail.com>
+
+	* Btrfs: use right clone root offset for compressed extents
+
+2014-01-15  Anand Jain  <Anand.Jain at oracle.com>
+
+	* btrfs: fix null pointer deference at btrfs_sysfs_add_one+0x105
+
+2014-02-13  Wolfram Sang  <wsa at the-dreams.de>
+
+	* i2c: mv64xxx: refactor message start to ensure proper initialization
+
+2014-02-15  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPI / dock: Make 'docked' sysfs attribute work as documented
+
+2014-02-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'usb-3.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
+
+2014-02-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'tty-3.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
+
+2014-02-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'staging-3.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
+
+2014-02-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'driver-core-3.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core
+
+2014-02-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'char-misc-3.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
+
+2014-02-14  Pavel Shilovsky  <piastry at etersoft.ru>
+
+	* CIFS: Fix too big maxBuf size for SMB3 mounts
+
+2014-02-14  Jeff Layton  <jlayton at redhat.com>
+
+	* cifs: ensure that uncached writes handle unmapped areas correctly
+
+2014-02-14  Josef Bacik  <jbacik at fb.com>
+
+	* Btrfs: unset DCACHE_DISCONNECTED when mounting default subvol
+
+2014-02-13  Mitch Harder  <mitch.harder at sabayonlinux.org>
+
+	* Btrfs: fix max_inline mount option
+
+2014-02-08  Liu Bo  <bo.li.liu at oracle.com>
+
+	* Btrfs: fix a lockdep warning when cleaning up aborted transaction
+
+2014-02-14  Chris Mason  <clm at fb.com>
+
+	* Revert "btrfs: add ioctl to export size of global metadata reservation"
+
+2014-02-14  Alexander Gordeev  <agordeev at redhat.com>
+
+	* ahci: Fix broken fallback to single MSI mode
+
+2014-02-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.14' of git://linux-nfs.org/~bfields/linux
+
+2014-02-14  Bjorn Helgaas  <bhelgaas at google.com>
+
+	* PCI: Enable INTx if BIOS left them disabled
+
+2014-02-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'md/3.14-fixes' of git://neil.brown.name/md
+
+2014-02-12  Brian Norris  <computersforpeace at gmail.com>
+
+	* mtd: nand: fix off-by-one read retry mode counting
+
+2014-02-14  Kevin Hao  <haokexin at gmail.com>
+
+	* Revert "OF: base: match each node compatible against all given matches first"
+
+2014-02-14  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Revert "misc: eeprom: sunxi: Add new compatibles"
+
+2014-02-14  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Revert "ARM: sunxi: dt: Convert to the new SID compatibles"
+
+2014-02-14  H. Peter Anvin  <hpa at linux.intel.com>
+
+	* Merge remote-tracking branch 'efi/urgent' into x86/urgent
+
+2014-02-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
+
+2014-02-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pm+acpi-3.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2014-02-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'edac_for_3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp
+
+2014-02-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'fbdev-fixes-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tomba/linux
+
+2014-02-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.dk/linux-block
+
+2014-02-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'sound-3.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2014-02-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
+
+2014-02-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging
+
+2014-02-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'for-v3.14-fixes' of git://git.infradead.org/battery-2.6
+
+2014-02-14  Roland Dreier  <roland at purestorage.com>
+
+	* Merge branches 'cma', 'cxgb4', 'iser', 'misc', 'mlx4', 'mlx5', 'nes', 'ocrdma', 'qib' and 'usnic' into for-next
+
+2014-02-04  Devesh Sharma  <devesh.sharma at emulex.com>
+
+	* RDMA/ocrdma: Fix load time panic during GID table init
+
+2014-02-10  Devesh Sharma  <devesh.sharma at emulex.com>
+
+	* RDMA/ocrdma: Fix traffic class shift
+
+2014-01-29  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* IB/iser: Fix use after free in iser_snd_completion()
+
+2014-02-04  Roi Dayan  <roid at mellanox.com>
+
+	* IB/iser: Avoid dereferencing iscsi_iser conn object when not bound to iser connection
+
+2014-01-23  Upinder Malhi  <umalhi at cisco.com>
+
+	* IB/usnic: Fix smatch endianness error
+
+2014-02-13  Florian Vaussard  <florian.vaussard at epfl.ch>
+
+	* Documentation: dt: OMAP: Update Overo/Tobi
+
+2014-02-14  Li Zhong  <zhong at linux.vnet.ibm.com>
+
+	* workqueue: add args to workqueue lockdep name
+
+2014-02-02  Christoffer Dall  <christoffer.dall at linaro.org>
+
+	* arm64: KVM: Add VGIC device control for arm64
+
+2014-02-13  Matt Fleming  <matt.fleming at intel.com>
+
+	* x86/efi: Check status field to validate BGRT header
+
+2014-02-12  Borislav Petkov  <bp at suse.de>
+
+	* EDAC: Correct workqueue setup path
+
+2014-02-03  Borislav Petkov  <bp at suse.de>
+
+	* EDAC: Poll timeout cannot be zero, p2
+
+2014-02-14  Borislav Petkov  <bp at suse.de>
+
+	* x86/efi: Fix 32-bit fallout
+
+2014-01-22  Denis Carikli  <denis at eukrea.com>
+
+	* video: Kconfig: Allow more broad selection of the imxfb framebuffer driver.
+
+2014-02-12  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* video: exynos: Fix S6E8AX0 LCD driver build error
+
+2014-02-14  Eli Cohen  <eli at mellanox.com>
+
+	* IB/mlx5: Remove dependency on X86
+
+2014-02-13  Roland Dreier  <roland at purestorage.com>
+
+	* mlx5: Add include of <linux/slab.h> because of kzalloc()/kfree() use
+
+2014-02-13  Doug Anderson  <dianders at chromium.org>
+
+	* hwmon: (ntc_thermistor) Avoid math overflow
+
+2014-02-13  Florian Vaussard  <florian.vaussard at epfl.ch>
+
+	* ARM: dts: Add support for both OMAP35xx and OMAP36xx Overo/Tobi
+
+2014-02-13  Florian Vaussard  <florian.vaussard at epfl.ch>
+
+	* ARM: dts: omap3-tobi: Use the correct vendor prefix
+
+2014-02-13  Florian Vaussard  <florian.vaussard at epfl.ch>
+
+	* ARM: dts: omap3-tobi: Fix boot with OMAP36xx-based Overo
+
+2014-02-12  Paul Bolle  <pebolle at tiscali.nl>
+
+	* ARM: OMAP2+: Remove legacy macros for zoom platforms
+
+2014-02-09  Paul Bolle  <pebolle at tiscali.nl>
+
+	* ARM: OMAP2+: Remove MACH_NOKIA_N800
+
+2014-02-09  Aaro Koskinen  <aaro.koskinen at iki.fi>
+
+	* ARM: dts: N900: add missing compatible property
+
+2014-02-09  Aaro Koskinen  <aaro.koskinen at iki.fi>
+
+	* ARM: dts: N9/N950: fix boot hang with 3.14-rc1
+
+2014-02-08  Aaro Koskinen  <aaro.koskinen at iki.fi>
+
+	* ARM: OMAP1: nokia770: enable tahvo-usb
+
+2014-01-28  Pekon Gupta  <pekon at ti.com>
+
+	* ARM: OMAP2+: gpmc: fix: DT ONENAND child nodes not probed when MTD_ONENAND is built as module
+
+2014-01-28  Pekon Gupta  <pekon at ti.com>
+
+	* ARM: OMAP2+: gpmc: fix: DT NAND child nodes not probed when MTD_NAND is built as module
+
+2014-01-25  Marek Belisko  <marek at goldelico.com>
+
+	* ARM: dts: omap3-gta04: Fix mmc1 properties.
+
+2014-01-25  NeilBrown  <neilb at suse.de>
+
+	* ARM: dts: omap3-gta04: Fix 'aux' gpio key flags.
+
+2014-01-15  Nishanth Menon  <nm at ti.com>
+
+	* ARM: OMAP2+: add missing ARCH_HAS_OPP
+
+2014-02-12  Mike Marciniszyn  <mike.marciniszyn at intel.com>
+
+	* IB/qib: Add missing serdes init sequence
+
+2014-02-06  Kumar Sanghvi  <kumaras at chelsio.com>
+
+	* RDMA/cxgb4: Add missing neigh_release in LE-Workaround path
+
+2014-02-09  Moni Shoua  <monis at mellanox.co.il>
+
+	* IB: Report using RoCE IP based gids in port caps
+
+2013-12-23  Peter Ujfalusi  <peter.ujfalusi at ti.com>
+
+	* ARM: dts: am335x-evmsk: Fix mmc1 support
+
+2013-12-23  Peter Ujfalusi  <peter.ujfalusi at ti.com>
+
+	* ARM: DTS: am335x-evmsk: Correct audio clock frequency
+
+2013-12-21  Marek Belisko  <marek at goldelico.com>
+
+	* ARM: dts: omap3-gta04: Add EOC irq gpio line handling.
+
+2014-02-05  Moni Shoua  <monis at mellanox.co.il>
+
+	* IB/mlx4: Build the port IBoE GID table properly under bonding
+
+2014-02-05  Moni Shoua  <monis at mellanox.co.il>
+
+	* IB/mlx4: Do IBoE GID table resets per-port
+
+2014-02-05  Moni Shoua  <monis at mellanox.co.il>
+
+	* IB/mlx4: Do IBoE locking earlier when initializing the GID table
+
+2014-02-13  Dave Kleikamp  <dave.kleikamp at oracle.com>
+
+	* jfs: set i_ctime when setting ACL
+
+2014-02-11  Thomas Gleixner  <tglx at linutronix.de>
+
+	* tick: Clear broadcast pending bit when switching to oneshot
+
+2014-02-10  Arnaldo Carvalho de Melo  <acme at redhat.com>
+
+	* perf trace: Fix ioctl 'request' beautifier build problems on !(i386 || x86_64) arches
+
+2014-02-12  Russell King - ARM Linux  <linux at arm.linux.org.uk>
+
+	* hostap: fix "hostap: proc: Use remove_proc_subtree()"
+
+2014-02-10  Stanislaw Gruszka  <stf_xl at wp.pl>
+
+	* rtl8187: fix regression on MIPS without coherent DMA
+
+2014-02-07  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* ath5k: shifting the wrong variable for AR5K_AR5210
+
+2014-02-01  Olivier Langlois  <olivier at trillion01.com>
+
+	* rtlwifi: Fix incorrect return from rtl_ps_enable_nic()
+
+2014-02-01  Olivier Langlois  <olivier at trillion01.com>
+
+	* rtlwifi: rtl8192ce: Fix too long disable of IRQs
+
+2014-02-13  John W. Linville  <linville at tuxdriver.com>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi-fixes
+
+2014-02-07  NeilBrown  <neilb at suse.de>
+
+	* lockd: send correct lock when granting a delayed lock.
+
+2014-02-12  Dave Jones  <davej at redhat.com>
+
+	* drm/i2c: tda998x: Fix memory leak in tda998x_encoder_init error path.
+
+2014-02-06  Petr Písař  <petr.pisar at atlas.cz>
+
+	* vt: Fix secure clear screen
+
+2014-02-13  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* regulator: s5m8767: Add missing of_node_put
+
+2014-02-13  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* regulator: s5m8767: Use of_get_child_by_name
+
+2014-02-11  Joe Schultz  <jschultz at xes-inc.com>
+
+	* serial: 8250: Support XR17V35x fraction divisor
+
+2014-02-11  Peter Hurley  <peter at hurleysoftware.com>
+
+	* n_tty: Fix stale echo output
+
+2014-01-27  Qipan Li  <Qipan.Li at csr.com>
+
+	* serial: sirf: fix kernel panic caused by unpaired spinlock
+
+2014-02-11  Dmitry Eremin-Solenikov  <dbaryshkov at gmail.com>
+
+	* serial: 8250_pci: unbreak last serial ports on NetMos 9865 cards
+
+2014-02-11  Peter Hurley  <peter at hurleysoftware.com>
+
+	* n_tty: Fix poll() when TIME_CHAR and MIN_CHAR == 0
+
+2014-02-13  Michael Grzeschik  <m.grzeschik at pengutronix.de>
+
+	* serial: omap: fix rs485 probe on defered pinctrl
+
+2014-01-16  Mika Westerberg  <mika.westerberg at linux.intel.com>
+
+	* serial: 8250_dw: fix compilation warning when !CONFIG_PM_SLEEP
+
+2014-01-24  Markus Pargmann  <mpa at pengutronix.de>
+
+	* serial: omap-serial: Move info message to probe function
+
+2014-02-13  Alexander Gordeev  <agordeev at redhat.com>
+
+	* PCI/MSI: Add pci_enable_msi_exact() and pci_enable_msix_exact()
+
+2014-02-13  Alexander Gordeev  <agordeev at redhat.com>
+
+	* PCI/MSI: Fix cut-and-paste errors in documentation
+
+2014-02-13  Alexander Gordeev  <agordeev at redhat.com>
+
+	* PCI/MSI: Add pci_enable_msi() documentation back
+
+2014-02-13  Masanari Iida  <standby24x7 at gmail.com>
+
+	* PCI/MSI: Fix pci_msix_vec_count() htmldocs failure
+
+2014-02-13  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* PCI/MSI: Fix leak of msi_attrs
+
+2014-02-13  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* PCI/MSI: Check kmalloc() return value, fix leak of name
+
+2014-02-13  H. Peter Anvin  <hpa at linux.intel.com>
+
+	* x86, smap: smap_violation() is bogus if CONFIG_X86_SMAP is off
+
+2014-02-13  H. Peter Anvin  <hpa at linux.intel.com>
+
+	* x86, smap: Don't enable SMAP if CONFIG_X86_SMAP is disabled
+
+2014-02-11  Jani Nikula  <jani.nikula at intel.com>
+
+	* drm/i915/dp: add native aux defer retry limit
+
+2014-02-11  Jani Nikula  <jani.nikula at intel.com>
+
+	* drm/i915/dp: increase native aux defer retry timeout
+
+2014-02-10  Luis G.F  <luisgf at luisgf.es>
+
+	* ACPI / SBS: Fix incorrect sscanf() string
+
+2014-02-12  Shuah Khan  <shuah.kh at samsung.com>
+
+	* ACPI / thermal: fix thermal driver compile error when CONFIG_PM_SLEEP is undefined
+
+2014-02-12  Shuah Khan  <shuah.kh at samsung.com>
+
+	* ACPI / SBS: fix SBS driver compile error when CONFIG_PM_SLEEP is undefined
+
+2014-02-12  Shuah Khan  <shuah.kh at samsung.com>
+
+	* ACPI / fan: fix fan driver compile error when CONFIG_PM_SLEEP is undefined
+
+2014-02-12  Shuah Khan  <shuah.kh at samsung.com>
+
+	* ACPI / button: fix button driver compile error when CONFIG_PM_SLEEP is undefined
+
+2014-02-12  Shuah Khan  <shuah.kh at samsung.com>
+
+	* ACPI / battery: fix battery driver compile error when CONFIG_PM_SLEEP is undefined
+
+2014-02-12  Shuah Khan  <shuah.kh at samsung.com>
+
+	* ACPI / AC: fix AC driver compile error when CONFIG_PM_SLEEP is undefined
+
+2014-02-12  Emmanuel Grumbach  <emmanuel.grumbach at intel.com>
+
+	* iwlwifi: disable TX AMPDU by default for iwldvm
+
+2014-02-12  Steven Noonan  <steven at uplinklabs.net>
+
+	* compiler/gcc4: Make quirk for asm_volatile_goto() unconditional
+
+2014-02-03  Sumit Semwal  <sumit.semwal at linaro.org>
+
+	* dma-buf: update debugfs output
+
+2014-02-06  Oleg Nesterov  <oleg at redhat.com>
+
+	* md/raid5: Fix CPU hotplug callback registration
+
+2014-02-13  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'pm-cpufreq'
+
+2014-02-11  Sudeep Holla  <sudeep.holla at arm.com>
+
+	* MAINTAINERS / cpufreq: update Sudeep's email address
+
+2014-02-12  Dirk Brandewie  <dirk.j.brandewie at intel.com>
+
+	* intel_pstate: Remove energy reporting from pstate_sample tracepoint
+
+2014-02-03  Olof Johansson  <olof at lixom.net>
+
+	* dma: mv_xor: Silence a bunch of LPAE-related warnings
+
+2014-02-12  Tejun Heo  <tj at kernel.org>
+
+	* Revert "cgroup: use an ordered workqueue for cgroup destruction"
+
+2014-02-12  Sagi Grimberg  <sagig at mellanox.com>
+
+	* Target/sbc: Fix protection copy routine
+
+2014-02-05  Jingoo Han  <jg1.han at samsung.com>
+
+	* IB/srpt: replace strict_strtoul() with kstrtoul()
+
+2014-02-03  Roland Dreier  <roland at purestorage.com>
+
+	* target: Simplify command completion by removing CMD_T_FAILED flag
+
+2014-02-03  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* iser-target: Fix leak on failure in isert_conn_create_fastreg_pool
+
+2014-02-03  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* iscsi-target: Fix SNACK Type 1 + BegRun=0 handling
+
+2014-02-03  Roland Dreier  <roland at purestorage.com>
+
+	* target: Fix missing length check in spc_emulate_evpd_83()
+
+2014-01-31  Roland Dreier  <roland at purestorage.com>
+
+	* qla2xxx: Remove last vestiges of qla_tgt_cmd.cmd_list
+
+2014-01-30  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* target: Fix 32-bit + CONFIG_LBDAF=n link error w/ sector_div
+
+2014-01-30  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* target: Fix free-after-use regression in PR unregister
+
+2014-02-05  Andrew Lunn  <andrew at lunn.ch>
+
+	* PCI: mvebu: Use Device ID and revision from underlying endpoint
+
+2014-02-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'stable/for-linus-3.14-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
+
+2014-02-12  Dylan Reid  <dgreid at chromium.org>
+
+	* ASoC: max98090: sync regcache on entering STANDBY
+
+2013-12-29  Julia Lawall  <Julia.Lawall at lip6.fr>
+
+	* RDMA/amso1100: Fix error return code
+
+2013-12-29  Julia Lawall  <Julia.Lawall at lip6.fr>
+
+	* RDMA/nes: Fix error return code
+
+2014-02-12  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Fix command defines and checks
+
+2014-02-12  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Fix possible integer overflow
+
+2014-02-12  Theodore Ts'o  <tytso at mit.edu>
+
+	* ext4: don't try to modify s_flags if the the file system is read-only
+
+2014-02-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'regulator-v3.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
+
+2014-02-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'gpio-v3.14-2' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio
+
+2014-02-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'spi-v3.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi
+
+2014-02-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2014-02-12  Zheng Liu  <wenqing.lz at taobao.com>
+
+	* ext4: fix error paths in swap_inode_boot_loader()
+
+2014-02-12  Martin Kepplinger  <martink at posteo.de>
+
+	* ALSA: Revert "ALSA: hda/realtek - Avoid invalid COEFs for ALC271X"
+
+2014-02-12  Jens Axboe  <axboe at fb.com>
+
+	* block: add cond_resched() to potentially long running ioctl discard loop
+
+2014-02-12  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: blackfin: Fix machine driver Kconfig dependencies
+
+2014-02-12  Eric Whitney  <enwlinux at gmail.com>
+
+	* ext4: fix xfstest generic/299 block validity failures
+
+2014-02-12  Steve Twiss  <stwiss.opensource at diasemi.com>
+
+	* regulator: da9063: Bug fix when setting max voltage on LDOs 5-11
+
+2014-02-12  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branches 'acpi-dock', 'acpi-scan' and 'acpi-pci-hotplug'
+
+2014-02-12  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPI / container: Fix error code path in container_device_attach()
+
+2014-02-12  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Remove stray const
+
+2014-02-12  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'regulator/fix/da9055' and 'regulator/fix/max14577' into regulator-linus
+
+2014-02-11  Mika Westerberg  <mika.westerberg at linux.intel.com>
+
+	* ACPI / hotplug / PCI: Relax the checking of _STA return values
+
+2014-02-11  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* drm/vmwgfx: unlock on error path in vmw_execbuf_process()
+
+2014-02-12  Charmaine Lee  <charmainel at vmware.com>
+
+	* drm/vmwgfx: Get maximum mob size from register SVGA_REG_MOB_MAX_SIZE
+
+2014-02-06  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Fix a couple of sparse warnings and errors
+
+2014-02-11  Felix Fietkau  <nbd at openwrt.org>
+
+	* mac80211: send control port protocol frames to the VO queue
+
+2014-02-05  Ingo Tuchscherer  <ingo.tuchscherer at linux.vnet.ibm.com>
+
+	* s390/zcrypt: additional check to avoid overflow in msg-type 6 requests
+
+2014-02-11  Dmitry Osipenko  <digetx at gmail.com>
+
+	* drm/tegra: Add guard to avoid double disable/enable of RGB outputs
+
+2014-01-07  Erik Faye-Lund  <kusmabite at gmail.com>
+
+	* gpu: host1x: do not check previously handled gathers
+
+2014-02-09  Paul Bolle  <pebolle at tiscali.nl>
+
+	* drm/tegra: fix typo 'CONFIG_TEGRA_DRM_FBDEV'
+
+2014-02-11  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
+
+2014-02-11  Roger Pau Monne  <roger.pau at citrix.com>
+
+	* xen-blkback: init persistent_purge_work work_struct
+
+2014-02-11  Steven Rostedt (Red Hat)  <rostedt at goodmis.org>
+
+	* ftrace/x86: Use breakpoints for converting function graph caller
+
+2014-02-11  Randy Dunlap  <rdunlap at infradead.org>
+
+	* staging/rtl8821ae: fix build, depends on MAC80211
+
+2014-02-12  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'tda998x-fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-cubox into drm-next
+
+2014-02-09  Raymond Wanyoike  <raymond.wanyoike at gmail.com>
+
+	* usb: option: blacklist ZTE MF667 net interface
+
+2014-02-11  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Merge tag 'for-usb-linus-2014-02-11' of git://git.kernel.org/pub/scm/linux/kernel/git/sarah/xhci into usb-linus
+
+2014-02-11  Ville Syrjälä  <ville.syrjala at linux.intel.com>
+
+	* drm/i915: Prevent MI_DISPLAY_FLIP straddling two cachelines on IVB
+
+2014-02-11  Ville Syrjälä  <ville.syrjala at linux.intel.com>
+
+	* drm/i915: Add intel_ring_cachline_align()
+
+2014-02-11  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'dt-fixes-for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux
+
+2014-02-11  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'microblaze-3.14-rc3' of git://git.monstr.eu/linux-2.6-microblaze
+
+2014-02-11  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
+
+2014-02-11  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2014-02-11  Trond Myklebust  <trond.myklebust at primarydata.com>
+
+	* SUNRPC: Fix potential memory scribble in xprt_free_bc_request()
+
+2014-02-11  J. Bruce Fields  <bfields at redhat.com>
+
+	* nfsd4: fix acl buffer overrun
+
+2014-02-11  Steven Rostedt (Red Hat)  <rostedt at goodmis.org>
+
+	* ring-buffer: Fix first commit on sub-buffer having non-zero delta
+
+2014-02-11  Linus Walleij  <linus.walleij at linaro.org>
+
+	* ARM: ux500: disable msp2 device tree node
+
+2014-02-11  Christoph Hellwig  <hch at infradead.org>
+
+	* blk-mq: pair blk_mq_start_request / blk_mq_requeue_request
+
+2014-02-11  Christoph Hellwig  <hch at infradead.org>
+
+	* blk-mq: dont assume rq->errors is set when returning an error from ->queue_rq
+
+2014-02-10  Kent Overstreet  <kmo at daterainc.com>
+
+	* block: Fix cloning of discard/write same bios
+
+2014-02-11  Li Zefan  <lizefan at huawei.com>
+
+	* cgroup: protect modifications to cgroup_idr with cgroup_mutex
+
+2014-02-08  Paul Bolle  <pebolle at tiscali.nl>
+
+	* ia64/xen: Remove Xen support for ia64 even more
+
+2014-02-10  David Vrabel  <david.vrabel at citrix.com>
+
+	* xen: install xen/gntdev.h and xen/gntalloc.h
+
+2014-02-05  David Vrabel  <david.vrabel at citrix.com>
+
+	* xen/events: bind all new interdomain events to VCPU0
+
+2014-02-11  Trond Myklebust  <trond.myklebust at primarydata.com>
+
+	* SUNRPC: Fix races in xs_nospace()
+
+2014-01-28  Tomi Valkeinen  <tomi.valkeinen at ti.com>
+
+	* OMAPDSS: fix fck field types
+
+2014-01-27  Tomi Valkeinen  <tomi.valkeinen at ti.com>
+
+	* OMAPDSS: DISPC: decimation rounding fix
+
+2014-02-11  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'spi/fix/doc', 'spi/fix/nuc900' and 'spi/fix/rspi' into spi-linus
+
+2014-02-11  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'spi/fix/core' into spi-linus
+
+2014-02-06  Eytan Lifshitz  <eytan.lifshitz at intel.com>
+
+	* mac80211: fix memory leak
+
+2014-02-11  Arik Nemtsov  <arik at wizery.com>
+
+	* mac80211: fix sched_scan restart on recovery
+
+2014-02-07  Mika Kuoppala  <mika.kuoppala at linux.intel.com>
+
+	* drm/i915: Pair va_copy with va_end in i915_error_vprintf
+
+2014-02-07  Daniel Vetter  <daniel.vetter at ffwll.ch>
+
+	* drm/i915: Fix intel_pipe_to_cpu_transcoder for UMS
+
+2014-02-10  Paul Gortmaker  <paul.gortmaker at windriver.com>
+
+	* genirq: Add missing irq_to_desc export for CONFIG_SPARSE_IRQ=n
+
+2014-01-24  Marek Szyprowski  <m.szyprowski at samsung.com>
+
+	* x86: dma-mapping: fix GFP_ATOMIC macro usage
+
+2014-01-16  Marek Szyprowski  <m.szyprowski at samsung.com>
+
+	* ARM: dma-mapping: fix GFP_ATOMIC macro usage
+
+2014-02-11  Benjamin Herrenschmidt  <benh at kernel.crashing.org>
+
+	* powerpc/powernv: Add iommu DMA bypass support for IODA2
+
+2013-12-20  Thierry Reding  <thierry.reding at gmail.com>
+
+	* ARM: pxa: Add dummy backlight power supply on Mitac Mio A701
+
+2014-02-10  Eric Dumazet  <edumazet at google.com>
+
+	* 6lowpan: fix lockdep splats
+
+2014-02-10  John Greene  <jogreene at redhat.com>
+
+	* alx: add missing stats_lock spinlock init
+
+2014-02-08  Richard Yao  <ryao at gentoo.org>
+
+	* 9p/trans_virtio.c: Fix broken zero-copy on vmalloc() buffers
+
+2014-02-10  dingtianhong  <dingtianhong at huawei.com>
+
+	* bonding: remove unwanted bond lock for enslave processing
+
+2014-02-10  Liu Junliang  <liujunliang_ljl at 163.com>
+
+	* USB2NET : SR9800 : One chip USB2.0 USB2NET SR9800 Device Driver Support
+
+2014-01-22  Anton Blanchard  <anton at samba.org>
+
+	* powerpc: Fix endian issues in kexec and crash dump code
+
+2014-01-29  Kevin Hao  <haokexin at gmail.com>
+
+	* powerpc/ppc32: Fix the bug in the init of non-base exception stack for UP
+
+2013-12-23  Michael Ellerman  <mpe at ellerman.id.au>
+
+	* powerpc/xmon: Don't signal we've entered until we're finished printing
+
+2013-12-23  Michael Ellerman  <mpe at ellerman.id.au>
+
+	* powerpc/xmon: Fix timeout loop in get_output_lock()
+
+2013-12-23  Michael Ellerman  <michael at ellerman.id.au>
+
+	* powerpc/xmon: Don't loop forever in get_output_lock()
+
+2013-12-18  Anshuman Khandual  <khandual at linux.vnet.ibm.com>
+
+	* powerpc/perf: Configure BHRB filter before enabling PMU interrupts
+
+2014-01-29  Nathan Fontenot  <nfont at linux.vnet.ibm.com>
+
+	* crypto/nx/nx-842: Fix handling of vmalloc addresses
+
+2013-12-10  Michael Ellerman  <mpe at ellerman.id.au>
+
+	* powerpc/pseries: Select ARCH_RANDOM on pseries
+
+2014-01-24  Michael Ellerman  <mpe at ellerman.id.au>
+
+	* powerpc/perf: Add Power8 cache & TLB events
+
+2014-01-30  Laurent Dufour  <ldufour at linux.vnet.ibm.com>
+
+	* powerpc/relocate fix relocate processing in LE mode
+
+2014-01-31  Mahesh Salgaonkar  <mahesh at linux.vnet.ibm.com>
+
+	* powerpc: Fix kdump hang issue on p8 with relocation on exception enabled.
+
+2014-01-31  Mahesh Salgaonkar  <mahesh at linux.vnet.ibm.com>
+
+	* powerpc/pseries: Disable relocation on exception while going down during crash.
+
+2014-02-05  Thadeu Lima de Souza Cascardo  <cascardo at linux.vnet.ibm.com>
+
+	* powerpc/eeh: Drop taken reference to driver on eeh_rmv_device
+
+2014-02-07  Paul Gortmaker  <paul.gortmaker at windriver.com>
+
+	* powerpc: Fix build failure in sysdev/mpic.c for MPIC_WEIRD=y
+
+2014-02-10  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'akpm' (patches from Andrew Morton)
+
+2014-02-10  Xue jiufei  <xuejiufei at huawei.com>
+
+	* ocfs2: check existence of old dentry in ocfs2_link()
+
+2014-02-10  Junxiao Bi  <junxiao.bi at oracle.com>
+
+	* ocfs2: update inode size after zeroing the hole
+
+2014-02-10  Younger Liu  <younger.liu at huawei.com>
+
+	* ocfs2: fix issue that ocfs2_setattr() does not deal with new_i_size==i_size
+
+2014-02-10  Naoya Horiguchi  <n-horiguchi at ah.jp.nec.com>
+
+	* mm/memory-failure.c: move refcount only in !MF_COUNT_INCREASED
+
+2014-02-10  Paul Gortmaker  <paul.gortmaker at windriver.com>
+
+	* smp.h: fix x86+cpu.c sparse warnings about arch nonboot CPU calls
+
+2014-02-10  Rafael Aquini  <aquini at redhat.com>
+
+	* mm: fix page leak at nfs_symlink()
+
+2014-02-10  Steven Rostedt  <rostedt at goodmis.org>
+
+	* slub: do not assert not having lock in removing freed partial
+
+2014-02-10  Borislav Petkov  <bp at suse.de>
+
+	* gitignore: add all.config
+
+2014-02-10  Younger Liu  <younger.liucn at gmail.com>
+
+	* ocfs2: fix ocfs2_sync_file() if filesystem is readonly
+
+2014-02-10  Prarit Bhargava  <prarit at redhat.com>
+
+	* drivers/edac/edac_mc_sysfs.c: poll timeout cannot be zero
+
+2014-02-10  Eric W. Biederman  <ebiederm at xmission.com>
+
+	* fs/file.c:fdtable: avoid triggering OOMs from alloc_fdmem
+
+2014-02-10  Mel Gorman  <mgorman at suse.de>
+
+	* xen: properly account for _PAGE_NUMA during xen pte translations
+
+2014-02-10  David Rientjes  <rientjes at google.com>
+
+	* mm/slub.c: list_lock may not be held in some circumstances
+
+2014-02-09  John Ogness  <john.ogness at linutronix.de>
+
+	* tcp: tsq: fix nonagle handling
+
+2014-02-10  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'bridge'
+
+2014-02-07  Toshiaki Makita  <makita.toshiaki at lab.ntt.co.jp>
+
+	* bridge: Prevent possible race condition in br_fdb_change_mac_address
+
+2014-02-07  Toshiaki Makita  <makita.toshiaki at lab.ntt.co.jp>
+
+	* bridge: Properly check if local fdb entry can be deleted when deleting vlan
+
+2014-02-07  Toshiaki Makita  <makita.toshiaki at lab.ntt.co.jp>
+
+	* bridge: Properly check if local fdb entry can be deleted in br_fdb_delete_by_port
+
+2014-02-07  Toshiaki Makita  <makita.toshiaki at lab.ntt.co.jp>
+
+	* bridge: Properly check if local fdb entry can be deleted in br_fdb_change_mac_address
+
+2014-02-07  Toshiaki Makita  <makita.toshiaki at lab.ntt.co.jp>
+
+	* bridge: Fix the way to check if a local fdb entry can be deleted
+
+2014-02-07  Toshiaki Makita  <makita.toshiaki at lab.ntt.co.jp>
+
+	* bridge: Change local fdb entries whenever mac address of bridge device changes
+
+2014-02-07  Toshiaki Makita  <makita.toshiaki at lab.ntt.co.jp>
+
+	* bridge: Fix the way to find old local fdb entries in br_fdb_change_mac_address
+
+2014-02-10  Trond Myklebust  <trond.myklebust at primarydata.com>
+
+	* SUNRPC: Don't create a gss auth cache unless rpc.gssd is running
+
+2014-02-10  Ivan Khoronzhuk  <ivan.khoronzhuk at ti.com>
+
+	* ARM: keystone: dts: fix clkvcp3 control register address
+
+2014-01-28  Ivan Khoronzhuk  <ivan.khoronzhuk at ti.com>
+
+	* clk: keystone: gate: fix clk_init_data initialization
+
+2014-02-10  Steve French  <smfrench at gmail.com>
+
+	* [CIFS] Fix cifsacl mounts over smb2 to not call cifs
+
+2014-02-10  Jens Axboe  <axboe at fb.com>
+
+	* Merge branch 'stable/for-jens-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip into for-linus
+
+2014-02-03  Geert Uytterhoeven  <geert at linux-m68k.org>
+
+	* m68k: Wire up sched_setattr and sched_getattr
+
+2014-02-03  Geert Uytterhoeven  <geert at linux-m68k.org>
+
+	* m68k: Switch to asm-generic/barrier.h
+
+2014-02-03  Geert Uytterhoeven  <geert at linux-m68k.org>
+
+	* m68k: Sort arch/m68k/include/asm/Kbuild
+
+2014-01-31  Michal Simek  <michal.simek at xilinx.com>
+
+	* ARM: zynq: Reserve not DMAable space in front of the kernel
+
+2014-02-10  Kevin Hilman  <khilman at linaro.org>
+
+	* Merge tag 'at91-fixes' of git://github.com/at91linux/linux-at91 into fixes
+
+2014-02-06  Nishanth Menon  <nm at ti.com>
+
+	* ARM: multi_v7_defconfig: Select CONFIG_SOC_DRA7XX
+
+2014-01-29  Philipp Zabel  <p.zabel at pengutronix.de>
+
+	* ARM: imx6: Initialize low-power mode early again
+
+2014-02-10  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-next' of git://git.samba.org/sfrench/cifs-2.6
+
+2014-02-04  Linus Walleij  <linus.walleij at linaro.org>
+
+	* ARM: pxa: fix various compilation problems
+
+2014-02-04  Linus Walleij  <linus.walleij at linaro.org>
+
+	* ARM: pxa: fix compilation problem on AM300EPD board
+
+2014-02-10  Kevin Hilman  <khilman at linaro.org>
+
+	* Merge tag 'mvebu-phy_ata-fixes-3.14' of git://git.infradead.org/linux-mvebu into fixes
+
+2014-02-04  Will Deacon  <will.deacon at arm.com>
+
+	* iommu/arm-smmu: fix compilation issue when !CONFIG_ARM_AMBA
+
+2014-02-06  Will Deacon  <will.deacon at arm.com>
+
+	* iommu/arm-smmu: set CBARn.BPSHCFG to NSH for s1-s2-bypass contexts
+
+2014-02-05  Will Deacon  <will.deacon at arm.com>
+
+	* iommu/arm-smmu: fix table flushing during initial allocations
+
+2014-02-04  Will Deacon  <will.deacon at arm.com>
+
+	* iommu/arm-smmu: really fix page table locking
+
+2014-01-03  Yifan Zhang  <zhangyf at marvell.com>
+
+	* iommu/arm-smmu: fix pud/pmd entry fill sequence
+
+2014-02-06  Ben Hutchings  <ben at decadent.org.uk>
+
+	* perf trace: Add fallback definition of EFD_SEMAPHORE
+
+2013-12-30  Vince Weaver  <vincent.weaver at maine.edu>
+
+	* perf list: Fix checking for supported events on older kernels
+
+2014-02-04  Jiri Olsa  <jolsa at redhat.com>
+
+	* perf tools: Handle PERF_RECORD_HEADER_EVENT_TYPE properly
+
+2014-02-05  Masami Hiramatsu  <masami.hiramatsu.pt at hitachi.com>
+
+	* perf probe: Do not add offset twice to uprobe address
+
+2014-02-06  Trond Myklebust  <trond.myklebust at primarydata.com>
+
+	* NFS: Do not set NFS_INO_INVALID_LABEL unless server supports labeled NFS
+
+2014-02-06  Adam Thomson  <Adam.Thomson.Opensource at diasemi.com>
+
+	* ASoC: da9055: Fix device registration of PMIC and CODEC devices
+
+2014-02-10  Xiubo Li  <Li.Xiubo at freescale.com>
+
+	* ASoC: fsl-esai: fix ESAI TDM slot setting
+
+2014-02-08  Shawn Guo  <shawn.guo at linaro.org>
+
+	* ASoC: fsl: fix pm support of machine drivers
+
+2014-02-10  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPI / dock: Use acpi_device_enumerated() to check if dock is present
+
+2014-02-07  Will Deacon  <will.deacon at arm.com>
+
+	* ARM: 7955/1: spinlock: ensure we have a compiler barrier before sev
+
+2014-02-07  Will Deacon  <will.deacon at arm.com>
+
+	* ARM: 7953/1: mm: ensure TLB invalidation is complete before enabling MMU
+
+2014-02-06  Santosh Shilimkar  <santosh.shilimkar at ti.com>
+
+	* ARM: 7952/1: mm: Fix the memblock allocation for LPAE machines
+
+2014-02-02  Christoffer Dall  <christoffer.dall at linaro.org>
+
+	* ARM: 7950/1: mm: Fix stage-2 device memory attributes
+
+2014-02-10  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Fix undefined symbol due to builtin/module mixup
+
+2014-02-08  Edgar E. Iglesias  <edgar.iglesias at gmail.com>
+
+	* microblaze: Fix a typo when disabling stack protection
+
+2014-01-30  Michal Simek  <michal.simek at xilinx.com>
+
+	* microblaze: Define readq and writeq IO helper function
+
+2014-01-30  Michal Simek  <michal.simek at xilinx.com>
+
+	* microblaze: Fix missing HZ macro
+
+2014-02-09  Jesper Juhl  <jj at chaosbits.net>
+
+	* tcp: correct code comment stating 3 min timeout for FIN_WAIT2, we only do 1 min
+
+2014-02-09  Christian Engelmayer  <cengelma at gmx.at>
+
+	* net: vxge: Remove unused device pointer
+
+2014-02-09  Raymond Wanyoike  <raymond.wanyoike at gmail.com>
+
+	* net: qmi_wwan: add ZTE MF667
+
+2014-02-08  Christian Engelmayer  <cengelma at gmx.at>
+
+	* 3c59x: Remove unused pointer in vortex_eisa_cleanup()
+
+2014-02-07  Maciej Żenczykowski  <maze at google.com>
+
+	* net: fix 'ip rule' iif/oif device rename
+
+2014-02-08  Christian Engelmayer  <cengelma at gmx.at>
+
+	* wan: dlci: Remove unused netdev_priv pointer
+
+2014-02-07  Christian Engelmayer  <cengelma at gmx.at>
+
+	* 6lowpan: Remove unused pointer in lowpan_header_create()
+
+2014-02-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.14-rc2
+
+2014-02-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security
+
+2014-02-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
+
+2014-02-10  James Morris  <james.l.morris at oracle.com>
+
+	* Merge branch 'stable-3.14' of git://git.infradead.org/users/pcmoore/selinux into for-linus
+
+2014-02-10  Dave Chinner  <dchinner at redhat.com>
+
+	* xfs: ensure correct log item buffer alignment
+
+2014-02-10  Christoph Hellwig  <hch at infradead.org>
+
+	* xfs: ensure correct timestamp updates from truncate
+
+2014-02-02  Al Viro  <viro at zeniv.linux.org.uk>
+
+	* fix a kmap leak in virtio_console
+
+2014-02-09  Al Viro  <viro at zeniv.linux.org.uk>
+
+	* fix O_SYNC|O_APPEND syncing the wrong range on write()
+
+2014-02-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
+
+2014-02-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-02  Stephen Boyd  <sboyd at codeaurora.org>
+
+	* genirq: Add devm_request_any_context_irq()
+
+2014-02-04  Steven Rostedt  <rostedt at goodmis.org>
+
+	* x86: Use preempt_disable_notrace() in cycles_2_ns()
+
+2014-02-05  Peter Zijlstra  <peterz at infradead.org>
+
+	* perf/x86: Fix Userspace RDPMC switch
+
+2014-02-05  Peter Zijlstra  <peterz at infradead.org>
+
+	* perf/x86/intel/p6: Add userspace RDPMC quirk for PPro
+
+2014-02-08  Filipe David Borba Manana  <fdmanana at gmail.com>
+
+	* Btrfs: fix data corruption when reading/updating compressed extents
+
+2014-02-07  Josef Bacik  <jbacik at fb.com>
+
+	* Btrfs: don't loop forever if we can't run because of the tree mod log
+
+2014-02-07  David Sterba  <dsterba at suse.cz>
+
+	* btrfs: reserve no transaction units in btrfs_ioctl_set_features
+
+2014-02-07  Jeff Mahoney  <jeffm at suse.com>
+
+	* btrfs: commit transaction after setting label and features
+
+2014-02-05  Josef Bacik  <jbacik at fb.com>
+
+	* Btrfs: fix assert screwup for the pending move stuff
+
+2014-02-08  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pinctrl-v3.14-2' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl
+
+2014-02-08  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Merge tag 'iio-fixes-for-3.14b' of git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio into staging-linus
+
+2014-02-08  Russell King  <rmk+kernel at arm.linux.org.uk>
+
+	* ARM: dts: fix spdif pinmux configuration
+
+2014-02-08  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'irq-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-08  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-08  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'jfs-3.14-rc2' of git://github.com/kleikamp/linux-shaggy
+
+2014-02-07  Dave Kleikamp  <dave.kleikamp at oracle.com>
+
+	* jfs: fix generic posix ACL regression
+
+2014-02-08  Tejun Heo  <tj at kernel.org>
+
+	* cgroup: fix locking in cgroup_cfts_commit()
+
+2014-02-08  Tejun Heo  <tj at kernel.org>
+
+	* cgroup: fix error return from cgroup_create()
+
+2014-02-08  Tejun Heo  <tj at kernel.org>
+
+	* cgroup: fix error return value in cgroup_mount()
+
+2014-01-27  Guenter Roeck  <linux at roeck-us.net>
+
+	* iio: max1363: Use devm_regulator_get_optional for optional regulator
+
+2014-10-01  Peter Meerwald  <pmeerw at pmeerw.net>
+
+	* iio:accel:bma180: Use modifier instead of index in channel specification
+
+2014-01-24  Marcus Folkesson  <marcus.folkesson at gmail.com>
+
+	* iio: adis16400: Set timestamp as the last element in chan_spec
+
+2014-04-02  Beomho Seo  <beomho.seo at samsung.com>
+
+	* iio: ak8975: Fix calculation formula for convert micro tesla to gauss unit
+
+2014-08-02  Hartmut Knaack  <knaack.h at gmx.de>
+
+	* staging:iio:ad799x fix typo in ad799x_events[]
+
+2014-01-13  Alexandre Belloni  <alexandre.belloni at free-electrons.com>
+
+	* iio: mxs-lradc: remove useless scale_available files
+
+2014-01-13  Alexandre Belloni  <alexandre.belloni at free-electrons.com>
+
+	* iio: mxs-lradc: fix buffer overflow
+
+2014-10-01  Peter Meerwald  <pmeerw at pmeerw.net>
+
+	* iio:magnetometer:mag3110: Fix output of decimal digits in show_int_plus_micros()
+
+2014-10-01  Peter Meerwald  <pmeerw at pmeerw.net>
+
+	* iio:magnetometer:mag3110: Report busy in _read_raw() / write_raw() when buffer is enabled
+
+2014-01-31  Richard Weinberger  <richard at nod.at>
+
+	* watchdog: dw_wdt: Add dependency on HAS_IOMEM
+
+2014-02-07  Steve French  <smfrench at gmail.com>
+
+	* [CIFS] clean up page array when uncached write send fails
+
+2014-02-07  Jeff Layton  <jlayton at redhat.com>
+
+	* cifs: use a flexarray in cifs_writedata
+
+2014-02-07  Russell King  <rmk+kernel at arm.linux.org.uk>
+
+	* drivers/base: fix devres handling for master device
+
+2014-02-03  Sudeep Dutt  <sudeep.dutt at intel.com>
+
+	* misc: mic: fix possible signed underflow (undefined behavior) in userspace API
+
+2014-02-02  Maxime Ripard  <maxime.ripard at free-electrons.com>
+
+	* ARM: sunxi: dt: Convert to the new SID compatibles
+
+2014-02-02  Maxime Ripard  <maxime.ripard at free-electrons.com>
+
+	* misc: eeprom: sunxi: Add new compatibles
+
+2014-01-25  Christian Engelmayer  <cengelma at gmx.at>
+
+	* misc: genwqe: Fix potential memory leak when pinning memory
+
+2014-01-28  Fu Wei  <wefu at redhat.com>
+
+	* Documentation:Update Documentation/zh_CN/arm64/memory.txt
+
+2014-01-28  Fu Wei  <wefu at redhat.com>
+
+	* Documentation:Update Documentation/zh_CN/arm64/booting.txt
+
+2014-01-28  Fu Wei  <wefu at redhat.com>
+
+	* Documentation:Chinese translation of Documentation/arm64/tagged-pointers.txt
+
+2014-01-31  Sarah Sharp  <sarah.a.sharp at linux.intel.com>
+
+	* Revert "usb: xhci: Link TRB must not occur within a USB payload burst"
+
+2014-01-31  Sarah Sharp  <sarah.a.sharp at linux.intel.com>
+
+	* Revert "xhci: Avoid infinite loop when sg urb requires too many trbs"
+
+2014-01-31  Sarah Sharp  <sarah.a.sharp at linux.intel.com>
+
+	* Revert "xhci: Set scatter-gather limit to avoid failed block writes."
+
+2014-01-31  Sarah Sharp  <sarah.a.sharp at linux.intel.com>
+
+	* xhci 1.0: Limit arbitrarily-aligned scatter gather.
+
+2014-02-07  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'driver-core-3.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core
+
+2014-02-02  Maxime Ripard  <maxime.ripard at free-electrons.com>
+
+	* ARM: sunxi: dt: Change the touchscreen compatibles
+
+2014-02-01  Maxime Ripard  <maxime.ripard at free-electrons.com>
+
+	* ARM: sun7i: dt: Fix interrupt trigger types
+
+2014-02-07  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
+
+2014-02-07  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
+
+2014-02-07  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'upstream' of git://git.linux-mips.org/pub/scm/ralf/upstream-linus
+
+2014-02-07  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'v4l_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
+
+2014-02-07  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging
+
+2014-02-07  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pm+acpi-3.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2014-02-05  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: do not dereference a NULL bio pointer
+
+2014-02-07  H. Peter Anvin  <hpa at linux.intel.com>
+
+	* Merge tag 'efi-urgent' into x86/urgent
+
+2014-02-07  Jan Moskyto Matejka  <mq at suse.cz>
+
+	* Modpost: fixed USB alias generation for ranges including 0x9 and 0xA
+
+2014-02-05  Maurizio Lombardi  <mlombard at redhat.com>
+
+	* wlags49_h2: Fix overflow in wireless_set_essid()
+
+2014-02-05  Alan Cox  <alan at linux.intel.com>
+
+	* xlr_net: Fix missing trivial allocation check
+
+2014-02-04  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* staging: r8188eu: overflow in rtw_p2p_get_go_device_address()
+
+2014-02-04  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* staging: r8188eu: array overflow in rtw_mp_ioctl_hdl()
+
+2014-02-02  Larry Finger  <Larry.Finger at lwfinger.net>
+
+	* staging: r8188eu: Fix typo in USB_DEVICE list
+
+2014-01-28  Heinrich Schuchardt  <xypron.glpk at gmx.de>
+
+	* usbip/userspace/libsrc/names.c: memory leak
+
+2014-01-22  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* gpu: ion: dereferencing an ERR_PTR
+
+2014-01-21  Ian Abbott  <abbotti at mev.co.uk>
+
+	* staging: comedi: usbduxsigma: fix unaligned dereferences
+
+2014-01-21  Ian Abbott  <abbotti at mev.co.uk>
+
+	* staging: comedi: fix too early cleanup in comedi_auto_config()
+
+2014-01-20  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* staging: android: ion: dummy: fix an error code
+
+2014-02-03  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: take map_sem for read in handle_reply()
+
+2014-01-31  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: factor out logic from ceph_osdc_start_request()
+
+2014-01-24  Cédric Dufour - Idiap Research Institute  <cedric.dufour at idiap.ch>
+
+	* staging: lustre: fix quotactl permission denied (LU-4530)
+
+2014-02-04  Prakash Kamliya  <pkamliya at codeaurora.org>
+
+	* staging: android: sync: Signal pt before sync_timeline object gets destroyed
+
+2014-02-05  H Hartley Sweeten  <hsweeten at visionengravers.com>
+
+	* staging: comedi: adv_pci1710: fix analog output readback value
+
+2014-02-06  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* staging: r8188eu: memory corruption handling long ssids
+
+2014-02-07  Mark Rutland  <mark.rutland at arm.com>
+
+	* arm64: defconfig: Expand default enabled features
+
+2014-02-02  Steve French  <smfrench at gmail.com>
+
+	* retrieving CIFS ACLs when mounted with SMB2 fails dropping session
+
+2014-02-01  Steve French  <smfrench at gmail.com>
+
+	* Add protocol specific operation for CIFS xattrs
+
+2014-02-04  Will Deacon  <will.deacon at arm.com>
+
+	* arm64: asm: remove redundant "cc" clobbers
+
+2014-02-04  Will Deacon  <will.deacon at arm.com>
+
+	* arm64: atomics: fix use of acquire + release for full barrier semantics
+
+2014-02-07  Hannes Reinecke  <hare at suse.de>
+
+	* tty: Set correct tty name in 'active' sysfs attribute
+
+2014-01-07  Lars Poeschel  <poeschel at lemonage.de>
+
+	* tty: n_gsm: Fix for modems with brk in modem status control
+
+2014-01-15  Paul Gortmaker  <paul.gortmaker at windriver.com>
+
+	* drivers/tty/hvc: don't use module_init in non-modular hyp. console code
+
+2014-02-04  Paul Bolle  <pebolle at tiscali.nl>
+
+	* raw: set range for MAX_RAW_DEVS
+
+2014-02-04  Paul Bolle  <pebolle at tiscali.nl>
+
+	* raw: test against runtime value of max_raw_minors
+
+2014-02-06  Adam Thomson  <Adam.Thomson.Opensource at diasemi.com>
+
+	* regulator: da9055: Remove use of regmap_irq_get_virq()
+
+2014-01-16  K. Y. Srinivasan  <kys at microsoft.com>
+
+	* Drivers: hv: vmbus: Don't timeout during the initial connection with host
+
+2014-01-15  K. Y. Srinivasan  <kys at microsoft.com>
+
+	* Drivers: hv: vmbus: Specify the target CPU that should receive notification
+
+2014-02-04  Nicolas Ferre  <nicolas.ferre at atmel.com>
+
+	* ARM: at91: add Atmel's SAMA5D3 Xplained board
+
+2013-12-17  Boris BREZILLON  <b.brezillon at overkiz.com>
+
+	* spi/atmel: document clock properties
+
+2013-12-17  Boris BREZILLON  <b.brezillon at overkiz.com>
+
+	* mmc: atmel-mci: document clock properties
+
+2014-01-14  Bo Shen  <voice.shen at atmel.com>
+
+	* ARM: at91: enable USB host on at91sam9n12ek board
+
+2014-01-16  Boris BREZILLON  <b.brezillon at overkiz.com>
+
+	* ARM: at91/dt: fix sama5d3 ohci hclk clock reference
+
+2014-01-15  Jean-Jacques Hiblot  <jjhiblot at traphandler.com>
+
+	* ARM: at91/dt: sam9263: fix compatibility string for the I2C
+
+2014-02-07  Martyn Welch  <martyn.welch at ge.com>
+
+	* VME: Correct read/write alignment algorithm
+
+2014-02-07  Daniel Vetter  <daniel.vetter at ffwll.ch>
+
+	* drm/i915: Disable dp aux irq on g4x
+
+2014-02-06  Hugh Dickins  <hughd at google.com>
+
+	* cgroup: use an ordered workqueue for cgroup destruction
+
+2014-02-07  Jarkko Nikula  <jarkko.nikula at linux.intel.com>
+
+	* ASoC: rt5640: Add ACPI ID for Intel Baytrail
+
+2014-02-07  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Fix mic capture on Sony VAIO Pro 11
+
+2014-02-07  David Henningsson  <david.henningsson at canonical.com>
+
+	* ALSA: hda - Add a headset quirk for Dell XPS 13
+
+2014-01-30  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Fix inconsistent Mic mute LED
+
+2014-02-06  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Fix leftover ifdef checks after modularization
+
+2014-02-06  Eli Cohen  <eli at dev.mellanox.co.il>
+
+	* IB/mlx5: Don't set "block multicast loopback" capability
+
+2014-02-06  Adam Thomson  <Adam.Thomson.Opensource at diasemi.com>
+
+	* hwmon: (da9055) Remove use of regmap_irq_get_virq()
+
+2014-02-06  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branches 'acpi-cleanup' and 'acpi-video'
+
+2014-02-06  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'pm-cpufreq'
+
+2014-02-06  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branches 'acpi-pci-hotplug' and 'acpi-hotplug'
+
+2014-02-06  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'akpm' (patches from Andrew Morton)
+
+2014-02-06  KOSAKI Motohiro  <kosaki.motohiro at jp.fujitsu.com>
+
+	* mm: __set_page_dirty uses spin_lock_irqsave instead of spin_lock_irq
+
+2014-02-06  Tang Chen  <tangchen at cn.fujitsu.com>
+
+	* arch/x86/mm/numa.c: fix array index overflow when synchronizing nid to memblock.reserved.
+
+2014-02-06  Tang Chen  <tangchen at cn.fujitsu.com>
+
+	* arch/x86/mm/numa.c: initialize numa_kernel_nodes in numa_clear_kernel_node_hotplug()
+
+2014-02-06  KOSAKI Motohiro  <kosaki.motohiro at jp.fujitsu.com>
+
+	* mm: __set_page_dirty_nobuffers() uses spin_lock_irqsave() instead of spin_lock_irq()
+
+2014-02-06  Weijie Yang  <weijie.yang at samsung.com>
+
+	* mm/swap: fix race on swap_info reuse between swapoff and swapon
+
+2014-02-06  Shaohua Li  <shli at kernel.org>
+
+	* swap: add a simple detector for inappropriate swapin readahead
+
+2014-02-06  Zongxun Wang  <wangzongxun at huawei.com>
+
+	* ocfs2: free allocated clusters if error occurs after ocfs2_claim_clusters
+
+2014-02-06  Randy Dunlap  <rdunlap at infradead.org>
+
+	* Documentation/kernel-parameters.txt: fix memmap= language
+
+2014-02-06  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'sound-3.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2014-02-06  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2014-02-05  andrea.merello  <andrea.merello at gmail.com>
+
+	* rtl8180: Add error check for pci_map_single return value in TX path
+
+2014-02-05  andrea.merello  <andrea.merello at gmail.com>
+
+	* rtl8180: Add error check for pci_map_single return value in RX path
+
+2014-02-06  John W. Linville  <linville at tuxdriver.com>
+
+	* Merge branch 'for-john' of git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211
+
+2014-02-03  Borislav Petkov  <bp at suse.de>
+
+	* x86, microcode, AMD: Unify valid container checks
+
+2014-01-25  Sebastian Hesselbarth  <sebastian.hesselbarth at gmail.com>
+
+	* clk: mvebu: kirkwood: maintain clock init order
+
+2014-01-25  Sebastian Hesselbarth  <sebastian.hesselbarth at gmail.com>
+
+	* clk: mvebu: dove: maintain clock init order
+
+2014-01-25  Sebastian Hesselbarth  <sebastian.hesselbarth at gmail.com>
+
+	* clk: mvebu: armada-xp: maintain clock init order
+
+2014-01-25  Sebastian Hesselbarth  <sebastian.hesselbarth at gmail.com>
+
+	* clk: mvebu: armada-370: maintain clock init order
+
+2014-01-24  Sebastian Hesselbarth  <sebastian.hesselbarth at gmail.com>
+
+	* irqchip: orion: clear stale interrupts in irq_startup
+
+2014-01-23  Sebastian Hesselbarth  <sebastian.hesselbarth at gmail.com>
+
+	* irqchip: orion: use handle_edge_irq on bridge irqs
+
+2014-01-23  Sebastian Hesselbarth  <sebastian.hesselbarth at gmail.com>
+
+	* irqchip: orion: clear bridge cause register on init
+
+2014-02-06  Peter Oberparleiter  <oberpar at linux.vnet.ibm.com>
+
+	* x86, hweight: Fix BUG when booting with CONFIG_GCOV_PROFILE_ALL=y
+
+2014-02-04  Tim Kryger  <tim.kryger at linaro.org>
+
+	* clocksource: Kona: Print warning rather than panic
+
+2014-01-24  Mikulas Patocka  <mpatocka at redhat.com>
+
+	* time: Fix overflow when HZ is smaller than 60
+
+2014-02-06  Huei-Horng Yo  <hiroshi at ghostsinthelab.org>
+
+	* HID: apple: add Apple wireless keyboard 2011 JIS model support
+
+2014-02-05  Laxman Dewangan  <ldewangan at nvidia.com>
+
+	* pinctrl: tegra: return correct error type
+
+2014-02-05  Florian Vaussard  <florian.vaussard at epfl.ch>
+
+	* pinctrl: do not init debugfs entries for unimplemented functionalities
+
+2014-02-05  Aaro Koskinen  <aaro.koskinen at iki.fi>
+
+	* MIPS: fpu.h: Fix build when CONFIG_BUG is not set
+
+2014-02-06  Will Deacon  <will.deacon at arm.com>
+
+	* arm64: barriers: allow dsb macro to take option parameter
+
+2014-02-05  Sebastian Ott  <sebott at linux.vnet.ibm.com>
+
+	* s390/cio: improve cio_commit_config
+
+2014-02-04  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* gpio: consumer.h: Move forward declarations outside #ifdef
+
+2014-01-29  Johannes Berg  <johannes.berg at intel.com>
+
+	* mac80211: fix virtual monitor interface iteration
+
+2014-02-01  Johannes Berg  <johannes.berg at intel.com>
+
+	* mac80211: fix fragmentation code, particularly for encryption
+
+2014-01-30  Sujith Manoharan  <c_manoha at qca.qualcomm.com>
+
+	* mac80211: Fix IBSS disconnect
+
+2014-01-27  Emmanuel Grumbach  <emmanuel.grumbach at intel.com>
+
+	* mac80211: release the channel in error path in start_ap
+
+2014-01-22  Johannes Berg  <johannes.berg at intel.com>
+
+	* cfg80211: send scan results from work queue
+
+2014-01-22  Johannes Berg  <johannes.berg at intel.com>
+
+	* cfg80211: fix scan done race
+
+2014-01-30  Dave Airlie  <airlied at redhat.com>
+
+	* drm/radeon: allow geom rings to be setup on r600/r700 (v2)
+
+2014-02-06  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'vmwgfx-fixes-3.14-2014-02-05' of git://people.freedesktop.org/~thomash/linux into drm-next
+
+2014-02-06  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'ttm-fixes-3.14-2014-02-05' of git://people.freedesktop.org/~thomash/linux into drm-next
+
+2014-02-05  Dave Airlie  <airlied at redhat.com>
+
+	* drm/mgag200,ast,cirrus: fix regression with drm_can_sleep conversion
+
+2014-02-05  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-05  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'stable/for-linus-3.14-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
+
+2014-02-05  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'please-pull-ia64-syscalls' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux
+
+2014-02-05  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.infradead.org/users/willy/linux-nvme
+
+2014-02-05  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'regulator-v3.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
+
+2014-02-05  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
+
+2014-01-14  Matt Fleming  <matt.fleming at intel.com>
+
+	* x86/efi: Allow mapping BGRT on x86-32
+
+2014-02-05  Ingo Molnar  <mingo at kernel.org>
+
+	* x86: Disable CONFIG_X86_DECODER_SELFTEST in allmod/allyesconfigs
+
+2014-02-05  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* execve: use 'struct filename *' for executable name passing
+
+2014-01-29  Tejun Heo  <tj at kernel.org>
+
+	* kernfs: make kernfs_deactivate() honor KERNFS_LOCKDEP flag
+
+2014-01-28  Christian Engelmayer  <cengelma at gmx.at>
+
+	* usb: core: Fix potential memory leak adding dyn USBdevice IDs
+
+2014-02-02  Ulrich Hahn  <uhahn at eanco.de>
+
+	* USB: ftdi_sio: add Tagsys RFID Reader IDs
+
+2014-02-04  Bjørn Mork  <bjorn at mork.no>
+
+	* usb: qcserial: add Netgear Aircard 340U
+
+2014-01-30  Stephen Smalley  <sds at tycho.nsa.gov>
+
+	* SELinux:  Fix kernel BUG on empty security contexts.
+
+2014-01-28  Paul Moore  <pmoore at redhat.com>
+
+	* selinux: add SOCK_DIAG_BY_FAMILY to the list of netlink message types
+
+2014-02-05  Krzysztof Kozlowski  <k.kozlowski at samsung.com>
+
+	* regulator: max14577: Add missing of_node_put
+
+2014-02-04  Geert Uytterhoeven  <geert+renesas at linux-m68k.org>
+
+	* DT: Add vendor prefix for Spansion Inc.
+
+2014-02-03  Geert Uytterhoeven  <geert+renesas at linux-m68k.org>
+
+	* of/device: Nullify match table in of_match_device() for CONFIG_OF=n
+
+2014-01-30  Heiko Stuebner  <heiko.stuebner at bqreaders.com>
+
+	* dt-bindings: add vendor-prefix for neonode
+
+2014-02-03  Kleber Sacilotto de Souza  <klebers at linux.vnet.ibm.com>
+
+	* of: fix PCI bus match for PCIe slots
+
+2014-02-03  Rob Herring  <robh at kernel.org>
+
+	* of: restructure for_each macros to fix compile warnings
+
+2014-01-28  Paul Moore  <pmoore at redhat.com>
+
+	* Merge tag 'v3.13' into stable-3.14
+
+2014-02-05  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/ttm: Don't clear page metadata of imported sg pages
+
+2014-02-04  Colin Cross  <ccross at android.com>
+
+	* security: select correct default LSM_MMAP_MIN_ADDR on arm on arm64
+
+2014-02-05  Catalin Marinas  <catalin.marinas at arm.com>
+
+	* arm64: compat: Wire up new AArch32 syscalls
+
+2014-02-03  Nathan Lynch  <nathan_lynch at mentor.com>
+
+	* arm64: vdso: update wtm fields for CLOCK_MONOTONIC_COARSE
+
+2014-02-05  Nathan Lynch  <nathan_lynch at mentor.com>
+
+	* arm64: vdso: fix coarse clock handling
+
+2014-02-04  Toshi Kani  <toshi.kani at hp.com>
+
+	* ACPI / hotplug: Fix panic on eject to ejected device
+
+2014-02-05  Mark Rutland  <mark.rutland at arm.com>
+
+	* arm64: simplify pgd_alloc
+
+2014-02-05  Mark Rutland  <mark.rutland at arm.com>
+
+	* arm64: fix typo: s/SERRROR/SERROR/
+
+2014-02-04  Catalin Marinas  <catalin.marinas at arm.com>
+
+	* arm64: Invalidate the TLB when replacing pmd entries during boot
+
+2014-02-04  Laura Abbott  <lauraa at codeaurora.org>
+
+	* arm64: Align CMA sizes to PAGE_SIZE
+
+2014-02-05  Vinayak Kale  <vkale at apm.com>
+
+	* arm64: add DSB after icache flush in __flush_icache_all()
+
+2014-02-04  Axel Lin  <axel.lin at ingics.com>
+
+	* gpio: tb10x: GPIO_TB10X needs to select GENERIC_IRQ_CHIP
+
+2014-02-04  Axel Lin  <axel.lin at ingics.com>
+
+	* gpio: clps711x: Add module alias to support module auto loading
+
+2014-02-03  Martin Schwidefsky  <schwidefsky at de.ibm.com>
+
+	* s390: fix kernel crash due to linkage stack instructions
+
+2014-01-30  Nitin A Kamble  <nitin.a.kamble at intel.com>
+
+	* genirq: Generic irq chip requires IRQ_DOMAIN
+
+2014-01-24  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/ttm: Fix TTM object open regression
+
+2014-01-30  Dave Jones  <davej at redhat.com>
+
+	* vmwgfx: Fix unitialized stack read in vmw_setup_otable_base
+
+2014-02-05  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Improve loopback path lookups for AD1983
+
+2014-02-05  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Reemit context bindings when necessary v2
+
+2014-01-31  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Detect old user-space drivers and set up legacy emulation v2
+
+2014-01-31  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Emulate legacy shaders on guest-backed devices v2
+
+2014-01-30  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Fix legacy surface reference size copyback
+
+2014-01-30  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Fix SET_SHADER_CONST emulation on guest-backed devices
+
+2014-01-30  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Fix regression caused by "drm/ttm: make ttm reservation calls behave like reservation calls"
+
+2014-01-30  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Don't commit staged bindings if execbuf fails
+
+2014-02-03  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Fix missing VREF setup for Mac Pro 1,1
+
+2014-02-02  Andy Zhou  <azhou at nicira.com>
+
+	* openvswitch: Suppress error messages on megaflow updates
+
+2014-02-05  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Add missing mixer widget for AD1983
+
+2014-01-31  Pravin B Shelar  <pshelar at nicira.com>
+
+	* openvswitch: Fix ovs_flow_free() ovs-lock assert.
+
+2014-01-23  Daniele Di Proietto  <daniele.di.proietto at gmail.com>
+
+	* openvswitch: Fix ovs_dp_cmd_msg_size()
+
+2014-01-21  Andy Zhou  <azhou at nicira.com>
+
+	* openvswitch: Fix kernel panic on ovs_flow_free
+
+2014-01-14  Thomas Graf  <tgraf at suug.ch>
+
+	* openvswitch: Pad OVS_PACKET_ATTR_PACKET if linear copy was performed
+
+2014-02-03  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda/realtek - Avoid invalid COEFs for ALC271X
+
+2014-02-04  Andrew Lunn  <andrew at lunn.ch>
+
+	* ata: sata_mv: Fix probe failures with optional phys
+
+2014-02-04  Andrew Lunn  <andrew at lunn.ch>
+
+	* drivers: phy: Add support for optional phys
+
+2014-02-04  Andrew Lunn  <andrew at lunn.ch>
+
+	* drivers: phy: Make NULL a valid phy reference
+
+2014-02-04  David Vrabel  <david.vrabel at citrix.com>
+
+	* xen-netfront: handle backend CLOSED without CLOSING
+
+2014-02-04  Dmitry Kravkov  <dmitry at broadcom.com>
+
+	* bnx2x: fix L2-GRE TCP issues
+
+2014-02-04  Bjørn Mork  <bjorn at mork.no>
+
+	* net: qmi_wwan: add Netgear Aircard 340U
+
+2014-02-05  Dave Airlie  <airlied at redhat.com>
+
+	* drm/mgag200: fix typo causing bw limits to be ignored on some chips
+
+2014-02-04  Fernando Luis Vazquez Cao  <fernando_b1 at lab.ntt.co.jp>
+
+	* rtnetlink: fix oops in rtnl_link_get_slave_info_data_size
+
+2014-02-04  Stefan Sørensen  <stefan.sorensen at spectralink.com>
+
+	* ptp: Allow selecting trigger/event index in testptp
+
+2014-02-04  Max Filippov  <jcmvbkbc at gmail.com>
+
+	* net: ethoc: set up MII management bus clock
+
+2014-02-04  Max Filippov  <jcmvbkbc at gmail.com>
+
+	* net: ethoc: don't advertise gigabit speed on attached PHY
+
+2014-02-03  Florian Fainelli  <f.fainelli at gmail.com>
+
+	* net: phy: ensure Gigabit features are masked off if requested
+
+2014-02-03  Stefan Sørensen  <stefan.sorensen at spectralink.com>
+
+	* net:phy:dp83640: Initialize PTP clocks at device init.
+
+2014-02-03  Stefan Sørensen  <stefan.sorensen at spectralink.com>
+
+	* net:phy:dp83640: Do not hardcode timestamping event edge
+
+2014-01-12  Willy Tarreau  <w at 1wt.eu>
+
+	* ARM: mvebu: dt: add missing alias 'eth3' on Armada XP mv78260
+
+2014-02-05  NeilBrown  <neilb at suse.de>
+
+	* md/raid1: restore ability for check and repair to fix read errors.
+
+2014-01-21  Luis G.F  <luisgf at luisgf.es>
+
+	* ACPI / battery: Fix incorrect sscanf() string in acpi_battery_init_alarm()
+
+2014-01-30  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* ACPI / proc: remove unneeded NULL check
+
+2014-01-30  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* ACPI / utils: remove a pointless NULL check
+
+2014-02-03  Mika Westerberg  <mika.westerberg at linux.intel.com>
+
+	* ACPI / video: Add HP EliteBook Revolve 810 to the blacklist
+
+2014-02-04  Stanislaw Gruszka  <sgruszka at redhat.com>
+
+	* pinctrl: protect pinctrl_list add
+
+2014-01-30  Alan Stern  <stern at rowland.harvard.edu>
+
+	* usb-storage: enable multi-LUN scanning when needed
+
+2014-01-24  Kristóf Ralovich  <kristof.ralovich at gmail.com>
+
+	* USB: simple: add Dynastream ANT USB-m Stick device support
+
+2014-01-21  Alan Stern  <stern at rowland.harvard.edu>
+
+	* usb-storage: add unusual-devs entry for BlackBerry 9000
+
+2014-01-30  Alan Stern  <stern at rowland.harvard.edu>
+
+	* usb-storage: restrict bcdDevice range for Super Top in Cypress ATACB
+
+2014-01-28  Josh Boyer  <jwboyer at fedoraproject.org>
+
+	* usb: phy: move some error messages to debug
+
+2014-01-14  Bjørn Mork  <bjorn at mork.no>
+
+	* usb: ftdi_sio: add Mindstorms EV3 console adapter
+
+2014-02-04  Paul Zimmerman  <Paul.Zimmerman at synopsys.com>
+
+	* usb: dwc2: fix memory corruption in dwc2 driver
+
+2014-02-04  Paul Zimmerman  <Paul.Zimmerman at synopsys.com>
+
+	* usb: dwc2: fix role switch breakage
+
+2014-02-04  Andre Heider  <a.heider at gmail.com>
+
+	* usb: dwc2: bail out early when booting with "nousb"
+
+2014-02-04  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Merge tag 'for-usb-linus-2014-02-04' of git://git.kernel.org/pub/scm/linux/kernel/git/sarah/xhci into usb-linus
+
+2014-02-03  Dirk Brandewie  <dirk.j.brandewie at intel.com>
+
+	* intel_pstate: Take core C0 time into account for core busy calculation
+
+2014-01-20  Axel Lin  <axel.lin at ingics.com>
+
+	* spi: nuc900: Set SPI_LSB_FIRST for master->mode_bits if hw->pdata->lsb is true
+
+2014-02-04  Sujith Manoharan  <c_manoha at qca.qualcomm.com>
+
+	* ath9k: Fix TX power calculation
+
+2014-02-04  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
+
+2014-01-31  Jani Nikula  <jani.nikula at intel.com>
+
+	* drm/i915: demote opregion excessive timeout WARN_ONCE to DRM_INFO_ONCE
+
+2014-01-31  Jani Nikula  <jani.nikula at intel.com>
+
+	* drm: add DRM_INFO_ONCE() to print a one-time DRM_INFO() message
+
+2014-02-04  Daniel Vetter  <daniel.vetter at ffwll.ch>
+
+	* MAINTAINERS: Update drm/i915 git repo
+
+2014-01-21  Geert Uytterhoeven  <geert+renesas at linux-m68k.org>
+
+	* spi: rspi: Document support for Renesas QSPI in Kconfig
+
+2014-02-04  Will Deacon  <will.deacon at arm.com>
+
+	* arm64: vdso: prevent ld from aligning PT_LOAD segments to 64k
+
+2014-01-30  Michael Holzheu  <holzheu at linux.vnet.ibm.com>
+
+	* s390/dump: Fix dump memory detection
+
+2014-02-04  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'regulator/fix/ab3100' and 'regulator/fix/s2mps11' into regulator-linus
+
+2014-02-04  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'regulator/fix/core' into regulator-linus
+
+2014-02-04  James Hogan  <james.hogan at imgtec.com>
+
+	* MIPS: Wire up sched_setattr/sched_getattr syscalls
+
+2014-01-29  Manuel Lauss  <manuel.lauss at gmail.com>
+
+	* MIPS: Alchemy: Fix DB1100 GPIO registration
+
+2014-01-29  Martin Bugge  <marbugge at cisco.com>
+
+	* [media] adv7842: Composite free-run platfrom-data fix
+
+2014-01-23  Martin Bugge  <marbugge at cisco.com>
+
+	* [media] v4l2-dv-timings: fix GTF calculation
+
+2014-01-17  Masanari Iida  <standby24x7 at gmail.com>
+
+	* [media] hdpvr: Fix memory leak in debug
+
+2014-01-16  Antti Palosaari  <crope at iki.fi>
+
+	* [media] af9035: add ID [2040:f900] Hauppauge WinTV-MiniStick 2
+
+2014-01-30  Dave Jones  <davej at fedoraproject.org>
+
+	* [media] mxl111sf: Fix compile when CONFIG_DVB_USB_MXL111SF is unset
+
+2014-01-30  Dave Jones  <davej at fedoraproject.org>
+
+	* [media] mxl111sf: Fix unintentional garbage stack read
+
+2014-01-30  Andi Shyti  <andi at etezian.org>
+
+	* [media] cx24117: use a valid dev pointer for dev_err printout
+
+2014-01-30  Andi Shyti  <andi at etezian.org>
+
+	* [media] cx24117: remove dead code in always 'false' if statement
+
+2014-01-29  Michael Krufky  <mkrufky at linuxtv.org>
+
+	* [media] update Michael Krufky's email address
+
+2014-01-08  Ricardo Ribalda  <ricardo.ribalda at gmail.com>
+
+	* [media] vb2: Check if there are buffers before streamon
+
+2014-01-03  Hans Verkuil  <hverkuil at xs4all.nl>
+
+	* [media] Revert "[media] videobuf_vm_{open,close} race fixes"
+
+2013-12-20  Alexey Khoroshilov  <khoroshilov at ispras.ru>
+
+	* [media] go7007-loader: fix usb_dev leak
+
+2013-12-19  Levente Kurusa  <levex at linux.com>
+
+	* [media] media: bt8xx: add missing put_device call
+
+2014-01-29  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* UBI: fix some use after free bugs
+
+2014-02-04  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Fix silent output on Toshiba Satellite L40
+
+2014-02-03  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPI / hotplug / PCI: Fix bridge removal race vs dock events
+
+2014-02-03  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPI / hotplug / PCI: Fix bridge removal race in handle_hotplug_event()
+
+2014-02-03  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPI / hotplug / PCI: Scan root bus under the PCI rescan-remove lock
+
+2014-02-03  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPI / hotplug / PCI: Move PCI rescan-remove locking to hotplug_event()
+
+2014-02-03  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPI / hotplug / PCI: Remove entries from bus->devices in reverse order
+
+2014-01-29  Mukesh Rathor  <mukesh.rathor at oracle.com>
+
+	* xen/pvh: set CR4 flags for APs
+
+2014-02-03  Tejun Heo  <tj at kernel.org>
+
+	* nfs: include xattr.h from fs/nfs/nfs3proc.c
+
+2014-01-28  Li Zefan  <lizefan at huawei.com>
+
+	* cpuset: update MAINTAINERS entry
+
+2014-01-28  Tejun Heo  <tj at kernel.org>
+
+	* arm, pm, vmpressure: add missing slab.h includes
+
+2014-01-31  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: fix error handling in ceph_osdc_init()
+
+2014-02-01  Filipe David Borba Manana  <fdmanana at gmail.com>
+
+	* Btrfs: use late_initcall instead of module_init
+
+2014-01-29  Filipe David Borba Manana  <fdmanana at gmail.com>
+
+	* Btrfs: use btrfs_crc32c everywhere instead of libcrc32c
+
+2014-01-29  Josef Bacik  <jbacik at fb.com>
+
+	* Btrfs: disable snapshot aware defrag for now
+
+2014-02-03  Tejun Heo  <tj at kernel.org>
+
+	* sata_sil: apply MOD15WRITE quirk to TOSHIBA MK2561GSYN
+
+2014-01-25  Marek Belisko  <marek at goldelico.com>
+
+	* of: add vendor prefix for Honeywell
+
+2013-12-18  Kumar Gala  <galak at codeaurora.org>
+
+	* of: Update qcom vendor prefix description
+
+2014-01-29  Emilio López  <emilio at elopez.com.ar>
+
+	* of: add vendor prefix for Allwinner Technology
+
+2014-02-03  Konrad Rzeszutek Wilk  <konrad.wilk at oracle.com>
+
+	* Revert "xen/grant-table: Avoid m2p_override during mapping"
+
+2014-02-01  Benjamin Tissoires  <benjamin.tissoires at redhat.com>
+
+	* HID: fix buffer allocations
+
+2014-02-03  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: usb-audio: Add missing kconfig dependecy
+
+2014-01-27  Qipan Li  <Qipan.Li at csr.com>
+
+	* pinctrl: sirf: correct the pin index of ac97_pins group
+
+2014-01-22  Chris Ruehl  <chris.ruehl at gtsys.com.hk>
+
+	* pinctrl: imx27: fix offset calculation in imx_read_2bit
+
+2014-01-23  Tony Prisk  <linux at prisktech.co.nz>
+
+	* pinctrl: vt8500: Change devicetree data parsing
+
+2014-01-22  Chris Ruehl  <chris.ruehl at gtsys.com.hk>
+
+	* pinctrl: imx27: fix wrong offset to ICONFB
+
+2014-01-21  Nicolas Ferre  <nicolas.ferre at atmel.com>
+
+	* pinctrl: at91: use locked variant of irq_set_handler
+
+2014-01-30  Guenter Roeck  <linux at roeck-us.net>
+
+	* hwmon: (pmbus) Support per-page exponent in linear mode
+
+2014-02-01  Rob Herring  <robh at kernel.org>
+
+	* ARM: fix HAVE_ARM_TWD selection for OMAP and shmobile
+
+2014-02-01  Rob Herring  <robh at kernel.org>
+
+	* ARM: moxart: move DMA_OF selection to driver
+
+2014-02-01  Rob Herring  <robh at kernel.org>
+
+	* ARM: hisi: fix kconfig warning on HAVE_ARM_TWD
+
+2014-02-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linus 3.14-rc1
+
+2014-02-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'parisc-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
+
+2014-01-29  Mikulas Patocka  <mikulas at artax.karlin.mff.cuni.cz>
+
+	* hpfs: optimize quad buffer loading
+
+2014-01-29  Mikulas Patocka  <mikulas at artax.karlin.mff.cuni.cz>
+
+	* hpfs: remember free space
+
+2014-01-31  Helge Deller  <deller at gmx.de>
+
+	* parisc: add flexible mmap memory layout support
+
+2014-01-16  Guy Martin  <gmsoft at tuxicoman.be>
+
+	* parisc: Make EWOULDBLOCK be equal to EAGAIN on parisc
+
+2014-01-31  Helge Deller  <deller at gmx.de>
+
+	* parisc: convert uapi/asm/stat.h to use native types only
+
+2014-01-31  Helge Deller  <deller at gmx.de>
+
+	* parisc: wire up sched_setattr and sched_getattr
+
+2014-01-31  Helge Deller  <deller at gmx.de>
+
+	* parisc: fix cache-flushing
+
+2014-01-31  Helge Deller  <deller at gmx.de>
+
+	* parisc/sti_console: prefer Linux fonts over built-in ROM fonts
+
+2014-02-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jdelvare/staging
+
+2014-02-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'slab/next' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux
+
+2014-02-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux
+
+2014-02-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2014-01-31  Keith Busch  <keith.busch at intel.com>
+
+	* NVMe: Namespace use after free on surprise removal
+
+2014-01-25  Jean-Francois Moine  <moinejf at free.fr>
+
+	* drm/i2c: tda998x: fix the ENABLE_SPACE register
+
+2014-02-02  Jean Delvare  <khali at linux-fr.org>
+
+	* hwmon: Fix SENSORS_TMP102 dependencies to eliminate build errors
+
+2014-02-02  Jean Delvare  <khali at linux-fr.org>
+
+	* hwmon: Fix SENSORS_LM75 dependencies to eliminate build errors
+
+2014-01-25  Jean-Francois Moine  <moinejf at free.fr>
+
+	* drm/i2c: tda998x: set the PLL division factor in range 0..3
+
+2014-01-25  Jean-Francois Moine  <moinejf at free.fr>
+
+	* drm/i2c: tda998x: force the page register at startup time
+
+2014-01-25  Jean-Francois Moine  <moinejf at free.fr>
+
+	* drm/i2c: tda998x: free the CEC device on encoder_destroy
+
+2014-01-25  Jean-Francois Moine  <moinejf at free.fr>
+
+	* drm/i2c: tda998x: check the CEC device creation
+
+2014-01-25  Jean-Francois Moine  <moinejf at free.fr>
+
+	* drm/i2c: tda998x: fix bad value in the AIF
+
+2013-12-05  Emmanuel Grumbach  <emmanuel.grumbach at intel.com>
+
+	* iwlwifi: mvm: don't allow A band if SKU forbids it
+
+2014-01-28  Emmanuel Grumbach  <emmanuel.grumbach at intel.com>
+
+	* iwlwifi: mvm: BT Coex - disable BT when TXing probe request in scan
+
+2014-02-02  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2014-02-01  Petr Tesarik  <ptesarik at suse.cz>
+
+	* x86: Fix the initialization of physnode_map
+
+2014-01-23  Andy Shevchenko  <andriy.shevchenko at linux.intel.com>
+
+	* tools/power turbostat: introduce -s to dump counters
+
+2014-01-23  Andy Shevchenko  <andriy.shevchenko at linux.intel.com>
+
+	* tools/power turbostat: remove unused command line option
+
+2014-02-01  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'misc' of git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild
+
+2014-01-28  Pali Rohár  <pali.rohar at gmail.com>
+
+	* afs: proc cells and rootcell are writeable
+
+2014-01-31  Heiko Carstens  <heiko.carstens at de.ibm.com>
+
+	* tile: remove compat_sys_lookup_dcookie declaration to fix compile error
+
+2014-02-01  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.samba.org/sfrench/cifs-2.6
+
+2014-02-01  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
+
+2014-02-01  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'staging-3.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
+
+2014-02-01  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Revert "PCI: Remove from bus_list and release resources in pci_release_dev()"
+
+2014-01-30  Krzysztof Kozlowski  <k.kozlowski at samsung.com>
+
+	* power: max17040: Fix NULL pointer dereference when there is no platform_data
+
+2014-01-31  Olof Johansson  <olof at lixom.net>
+
+	* ARM: multi_v7_defconfig: remove redundant entries and re-enable TI_EDMA
+
+2014-01-31  Olof Johansson  <olof at lixom.net>
+
+	* ARM: multi_v7_defconfig: add mvebu drivers
+
+2013-12-05  Tim Kryger  <tim.kryger at linaro.org>
+
+	* clocksource: kona: Add basic use of external clock
+
+2014-01-31  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'nfs-for-3.14-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
+
+2014-01-31  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'sound-fix-3.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2014-01-31  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending
+
+2014-01-27  Lorenzo Pieralisi  <lorenzo.pieralisi at arm.com>
+
+	* drivers: bus: fix CCI driver kcalloc call parameters swap
+
+2014-01-07  Tim Kryger  <tim.kryger at linaro.org>
+
+	* ARM: dts: bcm28155-ap: Fix Card Detection GPIO
+
+2014-01-31  Olof Johansson  <olof at lixom.net>
+
+	* Merge tag 'renesas-dt-fixes2-for-v3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas into fixes
+
+2014-01-11  Fabio Estevam  <fabio.estevam at freescale.com>
+
+	* ARM: multi_v7_defconfig: Select CONFIG_AT803X_PHY
+
+2014-01-09  Grygorii Strashko  <grygorii.strashko at ti.com>
+
+	* ARM: keystone: config: fix build warning when CONFIG_DMADEVICES is not set
+
+2014-01-03  Barry Song  <Baohua.Song at csr.com>
+
+	* MAINTAINERS: ARM: SiRF: use regex patterns to involve all SiRF drivers
+
+2014-01-31  Olof Johansson  <olof at lixom.net>
+
+	* Merge tag 'mvebu-fixes-3.13-2' of git://git.infradead.org/linux-mvebu into fixes
+
+2013-12-02  Soren Brinkmann  <soren.brinkmann at xilinx.com>
+
+	* ARM: dts: zynq: Add SDHCI nodes
+
+2014-01-31  Rob Herring  <robh at kernel.org>
+
+	* ARM: hisi: don't select SMP
+
+2014-01-24  Stephen Warren  <swarren at nvidia.com>
+
+	* ARM: tegra: rebuild tegra_defconfig to add DEBUG_FS
+
+2013-12-20  Stephen Warren  <swarren at nvidia.com>
+
+	* ARM: multi_v7: copy most options from tegra_defconfig
+
+2014-01-29  Linus Walleij  <linus.walleij at linaro.org>
+
+	* ARM: iop32x: fix power off handling for the EM7210 board
+
+2014-01-24  Linus Walleij  <linus.walleij at linaro.org>
+
+	* ARM: integrator: restore static map on the CP
+
+2014-01-31  Oleg Drokin  <green at linuxhacker.ru>
+
+	* Fix mountpoint reference leakage in linkat
+
+2014-01-31  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
+
+2014-01-31  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mattst88/alpha
+
+2014-01-29  Adrian Hunter  <adrian.hunter at intel.com>
+
+	* perf buildid-cache: Check relocation when checking for existing kcore
+
+2014-01-29  Adrian Hunter  <adrian.hunter at intel.com>
+
+	* perf tools: Adjust kallsyms for relocated kernel
+
+2014-01-29  Adrian Hunter  <adrian.hunter at intel.com>
+
+	* perf tests: No need to set up ref_reloc_sym
+
+2014-01-29  Adrian Hunter  <adrian.hunter at intel.com>
+
+	* perf symbols: Prevent the use of kcore if the kernel has moved
+
+2014-01-29  Adrian Hunter  <adrian.hunter at intel.com>
+
+	* perf record: Get ref_reloc_sym from kernel map
+
+2014-01-29  Adrian Hunter  <adrian.hunter at intel.com>
+
+	* perf machine: Set up ref_reloc_sym in machine__create_kernel_maps()
+
+2014-01-29  Adrian Hunter  <adrian.hunter at intel.com>
+
+	* perf machine: Add machine__get_kallsyms_filename()
+
+2014-01-29  Adrian Hunter  <adrian.hunter at intel.com>
+
+	* perf tools: Add kallsyms__get_function_start()
+
+2014-01-29  Adrian Hunter  <adrian.hunter at intel.com>
+
+	* perf symbols: Fix symbol annotation for relocated kernel
+
+2014-01-27  Francesco Fusco  <ffusco at redhat.com>
+
+	* perf tools: Fix include for non x86 architectures
+
+2014-01-29  Christoph Hellwig  <hch at infradead.org>
+
+	* hfsplus: use xattr handlers for removexattr
+
+2014-01-30  Stephan Springl  <springl-kernel at bfw-online.de>
+
+	* Typo in compat_sys_lseek() declaration
+
+2014-01-30  Andrew Ruder  <andrew.ruder at elecsyscorp.com>
+
+	* fs/super.c: sync ro remount after blocking writers
+
+2014-01-27  Jeff Layton  <jlayton at redhat.com>
+
+	* vfs: unexport the getname() symbol
+
+2014-01-31  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'v4l_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
+
+2014-01-31  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pm+acpi-3.14-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2014-01-22  Mikulas Patocka  <mpatocka at redhat.com>
+
+	* alpha: fix broken network checksum
+
+2013-12-20  蔡正龙  <zhenglong.cai at cs2c.com.cn>
+
+	* alpha: Enable system-call auditing support.
+
+2014-01-31  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-31  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-31  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-30  Stephen Warren  <swarren at nvidia.com>
+
+	* ALSA: hda/hdmi - allow PIN_OUT to be dynamically enabled
+
+2014-01-30  Krzysztof Kozlowski  <k.kozlowski at samsung.com>
+
+	* regulator: s2mps11: Fix NULL pointer of_node value when using platform data
+
+2014-01-30  Peter Ujfalusi  <peter.ujfalusi at ti.com>
+
+	* ASoC: davinci-evm: Add pm callbacks to platform driver
+
+2014-01-30  Peter Ujfalusi  <peter.ujfalusi at ti.com>
+
+	* ASoC: davinci-mcasp: Consolidate pm_runtime_get/put() use in the driver
+
+2014-01-30  Peter Ujfalusi  <peter.ujfalusi at ti.com>
+
+	* ASoC: davinci-mcasp: Configure xxTDM, xxFMT and xxFMCT registers synchronously
+
+2014-01-30  Peter Ujfalusi  <peter.ujfalusi at ti.com>
+
+	* ASoC: davinci-mcasp: Harmonize the sub hw_params function names
+
+2014-01-31  Sachin Prabhu  <sprabhu at redhat.com>
+
+	* cifs: Fix check for regular file in couldbe_mf_symlink()
+
+2014-01-27  Dave Jones  <davej at fedoraproject.org>
+
+	* xen/pvh: Fix misplaced kfree from xlated_setup_gnttab_pages
+
+2014-01-22  Bob Liu  <lliubbo at gmail.com>
+
+	* drivers: xen: deaggressive selfballoon driver
+
+2014-01-23  Zoltan Kiss  <zoltan.kiss at citrix.com>
+
+	* xen/grant-table: Avoid m2p_override during mapping
+
+2014-01-27  Malahal Naineni  <malahal at us.ibm.com>
+
+	* nfs: initialize the ACL support bits to zero.
+
+2014-01-30  Denis V. Lunev  <den at openvz.org>
+
+	* ata: enable quirk from jmicron JMB350 for JMB394
+
+2014-01-28  Masanari Iida  <standby24x7 at gmail.com>
+
+	* mm: Fix warning on make htmldocs caused by slab.c
+
+2014-01-24  Dave Hansen  <dave.hansen at linux.intel.com>
+
+	* mm: slub: work around unneeded lockdep warning
+
+2014-01-28  Dave Hansen  <dave.hansen at linux.intel.com>
+
+	* mm: sl[uo]b: fix misleading comments
+
+2014-01-15  Steve Capper  <Steve.Capper at arm.com>
+
+	* arm64: mm: Introduce PTE_WRITE
+
+2014-01-15  Steve Capper  <Steve.Capper at arm.com>
+
+	* arm64: mm: Remove PTE_BIT_FUNC macro
+
+2014-01-30  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
+
+2014-01-30  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'upstream-3.14-rc1' of git://git.infradead.org/linux-ubifs
+
+2014-01-30  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
+
+2014-01-28  Prarit Bhargava  <prarit at redhat.com>
+
+	* x86, cpu hotplug: Fix stack frame warning in check_irq_vectors_for_cpu_disable()
+
+2014-01-30  Sarah Sharp  <sarah.a.sharp at linux.intel.com>
+
+	* Revert "xhci: replace xhci_read_64() with readq()"
+
+2014-01-20  Emmanuel Grumbach  <emmanuel.grumbach at intel.com>
+
+	* iwlwifi: mvm: don't leak a station when we drain
+
+2013-12-30  David Spinadel  <david.spinadel at intel.com>
+
+	* iwlwifi: mvm: notify match found without filtering
+
+2014-01-23  Oren Givon  <oren.givon at intel.com>
+
+	* iwlwifi: add more 7265 HW IDs
+
+2014-01-23  Emmanuel Grumbach  <emmanuel.grumbach at intel.com>
+
+	* iwlwifi: mvm: print the version of the firmware when it asserts
+
+2014-01-16  Johannes Berg  <johannes.berg at intel.com>
+
+	* iwlwifi: mvm: disable scheduled scan
+
+2014-01-20  Johannes Berg  <johannes.berg at intel.com>
+
+	* iwlwifi: mvm: make local pointer non-static
+
+2014-01-30  Imre Deak  <imre.deak at intel.com>
+
+	* drm/i915: vlv: fix DP PHY lockup due to invalid PP sequencer setup
+
+2014-01-29  Nicolas Pitre  <nicolas.pitre at linaro.org>
+
+	* arm64: FIQs are unused
+
+2014-01-22  Harald Freudenberger  <freude at linux.vnet.ibm.com>
+
+	* crypto: s390 - fix des and des3_ede ctr concurrency issue
+
+2014-01-22  Harald Freudenberger  <freude at linux.vnet.ibm.com>
+
+	* crypto: s390 - fix des and des3_ede cbc concurrency issue
+
+2014-01-16  Harald Freudenberger  <freude at linux.vnet.ibm.com>
+
+	* crypto: s390 - fix concurrency issue in aes-ctr mode
+
+2014-01-30  Julien Grall  <julien.grall at linaro.org>
+
+	* xen/gnttab: Use phys_addr_t to describe the grant frame base address
+
+2014-01-20  Ian Campbell  <ian.campbell at citrix.com>
+
+	* xen: swiotlb: handle sizeof(dma_addr_t) != sizeof(phys_addr_t)
+
+2014-01-28  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* target: Fix percpu_ref_put race in transport_lun_remove_cmd
+
+2014-01-24  Andy Grover  <agrover at redhat.com>
+
+	* target/iscsi: Fix network portal creation race
+
+2014-01-30  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* ASoC: samsung: Fix trivial typo
+
+2014-01-30  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* ASoC: samsung: Remove invalid dependencies
+
+2014-01-30  Hui Wang  <hui.wang at canonical.com>
+
+	* ALSA: hda - add headset mic detect quirks for another Dell laptop
+
+2014-01-30  Takashi Iwai  <tiwai at suse.de>
+
+	* Merge branch 'xonar-dg' of git://git.alsa-project.org/alsa-kprivate into for-next
+
+2014-01-29  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* KVM: return an error code in kvm_vm_ioctl_register_coalesced_mmio()
+
+2014-01-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
+
+2014-01-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.infradead.org/users/vkoul/slave-dma
+
+2014-01-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/olof/chrome-platform
+
+2014-01-29  Sarah Sharp  <sarah.a.sharp at linux.intel.com>
+
+	* Revert "xhci: replace xhci_write_64() with writeq()"
+
+2014-01-30  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-nouveau-next' of git://anongit.freedesktop.org/git/nouveau/linux-2.6 into drm-next
+
+2014-01-30  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-next-3.14' of git://people.freedesktop.org/~agd5f/linux into drm-next
+
+2014-01-29  Heiko Carstens  <heiko.carstens at de.ibm.com>
+
+	* fs/compat: fix parameter handling for compat readv/writev syscalls
+
+2014-01-29  Andrew Morton  <akpm at linux-foundation.org>
+
+	* mm/mempolicy.c: convert to pr_foo()
+
+2014-01-29  Mel Gorman  <mgorman at suse.de>
+
+	* mm: numa: initialise numa balancing after jump label initialisation
+
+2014-01-29  Johannes Weiner  <hannes at cmpxchg.org>
+
+	* mm/page-writeback.c: do not count anon pages as dirtyable memory
+
+2014-01-29  Johannes Weiner  <hannes at cmpxchg.org>
+
+	* mm/page-writeback.c: fix dirty_balance_reserve subtraction from dirtyable memory
+
+2014-01-29  Aaron Tomlin  <atomlin at redhat.com>
+
+	* mm: document improved handling of swappiness==0
+
+2014-01-29  Lad, Prabhakar  <prabhakar.csengg at gmail.com>
+
+	* lib/genalloc.c: add check gen_pool_dma_alloc() if dma pointer is not NULL
+
+2014-01-23  Ilia Mirkin  <imirkin at alum.mit.edu>
+
+	* drm/nouveau: resume display if any later suspend bits fail
+
+2014-01-29  Maarten Lankhorst  <maarten.lankhorst at canonical.com>
+
+	* drm/nouveau: fix lock unbalance in nouveau_crtc_page_flip
+
+2013-11-14  Ben Skeggs  <bskeggs at redhat.com>
+
+	* drm/nouveau: implement hooks for needed for drm vblank timestamping support
+
+2013-11-14  Ben Skeggs  <bskeggs at redhat.com>
+
+	* drm/nouveau/disp: add a method to fetch info needed by drm vblank timestamping
+
+2014-01-24  Ben Skeggs  <bskeggs at redhat.com>
+
+	* drm/nv50: fill in crtc mode struct members from crtc_mode_fixup
+
+2014-01-28  Chris Zankel  <chris at zankel.net>
+
+	* xtensa: fix fast_syscall_spill_registers
+
+2014-01-28  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon/dce8: workaround for atom BlankCrtc table
+
+2014-01-27  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon/DCE4+: clear bios scratch dpms bit (v2)
+
+2014-01-27  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: set si_notify_smc_display_change properly
+
+2014-01-27  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: fix DAC interrupt handling on DCE5+
+
+2014-01-27  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: clean up active vram sizing
+
+2014-01-27  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: skip async dma init on r6xx
+
+2014-01-24  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon/runpm: don't runtime suspend non-PX cards
+
+2014-01-24  Roman Volkov  <v1ron at mail.ru>
+
+	* ALSA: oxygen: Xonar DG(X): cleanup and minor changes
+
+2014-01-24  Roman Volkov  <v1ron at mail.ru>
+
+	* ALSA: oxygen: Xonar DG(X): modify high-pass filter control
+
+2014-01-24  Roman Volkov  <v1ron at mail.ru>
+
+	* ALSA: oxygen: Xonar DG(X): modify input select functions
+
+2014-01-24  Roman Volkov  <v1ron at mail.ru>
+
+	* ALSA: oxygen: Xonar DG(X): modify capture volume functions
+
+2014-01-24  Roman Volkov  <v1ron at mail.ru>
+
+	* ALSA: oxygen: Xonar DG(X): use headphone volume control
+
+2014-01-24  Peter Zijlstra  <peterz at infradead.org>
+
+	* perf tools: Fix AAAAARGH64 memory barriers
+
+2014-01-22  Andrew Lunn  <andrew at lunn.ch>
+
+	* ATA: SATA_MV: Add missing Kconfig select statememnt
+
+2014-01-25  Fabio Estevam  <fabio.estevam at freescale.com>
+
+	* ata: pata_imx: Check the return value from clk_prepare_enable()
+
+2014-01-29  Paolo Bonzini  <pbonzini at redhat.com>
+
+	* Merge branch 'kvm-ppc-next' of git://github.com/agraf/linux-2.6 into kvm-queue
+
+2014-01-29  Trond Myklebust  <trond.myklebust at primarydata.com>
+
+	* NFSv4.1: Cleanup
+
+2014-01-29  Trond Myklebust  <trond.myklebust at primarydata.com>
+
+	* NFSv4.1: Clean up nfs41_sequence_done
+
+2014-01-29  Trond Myklebust  <trond.myklebust at primarydata.com>
+
+	* NFSv4: Fix a slot leak in nfs40_sequence_done
+
+2014-01-27  Paolo Bonzini  <pbonzini at redhat.com>
+
+	* x86, kvm: correctly access the KVM_CPUID_FEATURES leaf at 0x40000101
+
+2014-01-27  Paolo Bonzini  <pbonzini at redhat.com>
+
+	* x86, kvm: cache the base of the KVM cpuid leaves
+
+2014-01-29  Paolo Bonzini  <pbonzini at redhat.com>
+
+	* kvm: x86: move KVM_CAP_HYPERV_TIME outside #ifdef
+
+2014-01-29  Andy Adamson  <andros at netapp.com>
+
+	* NFSv4.1 free slot before resending I/O to MDS
+
+2014-01-29  Matthew Wilcox  <matthew.r.wilcox at intel.com>
+
+	* NVMe: Correct uses of INIT_WORK
+
+2014-01-29  David Henningsson  <david.henningsson at canonical.com>
+
+	* ALSA: hda - Add parameter for dumping processing coefficients
+
+2014-01-29  Chris Mason  <clm at fb.com>
+
+	* Btrfs: fix spin_unlock in check_ref_cleanup
+
+2014-01-09  Chris Mason  <clm at fb.com>
+
+	* Btrfs: setup inode location during btrfs_init_inode_locked
+
+2014-01-03  Chris Mason  <clm at fb.com>
+
+	* Btrfs: don't use ram_bytes for uncompressed inline items
+
+2014-01-11  Filipe David Borba Manana  <fdmanana at gmail.com>
+
+	* Btrfs: fix btrfs_search_slot_for_read backwards iteration
+
+2014-01-29  Wang Shilong  <wangsl.fnst at cn.fujitsu.com>
+
+	* Btrfs: do not export ulist functions
+
+2014-01-29  Wang Shilong  <wangsl.fnst at cn.fujitsu.com>
+
+	* Btrfs: rework ulist with list+rb_tree
+
+2014-01-28  Wang Shilong  <wangsl.fnst at cn.fujitsu.com>
+
+	* Btrfs: fix memory leaks on walking backrefs failure
+
+2014-01-28  Filipe David Borba Manana  <fdmanana at gmail.com>
+
+	* Btrfs: fix send file hole detection leading to data corruption
+
+2014-01-26  Wang Shilong  <wangsl.fnst at cn.fujitsu.com>
+
+	* Btrfs: add a reschedule point in btrfs_find_all_roots()
+
+2014-01-24  Filipe David Borba Manana  <fdmanana at gmail.com>
+
+	* Btrfs: make send's file extent item search more efficient
+
+2014-01-23  Wang Shilong  <wangsl.fnst at cn.fujitsu.com>
+
+	* Btrfs: fix to catch all errors when resolving indirect ref
+
+2014-01-23  Wang Shilong  <wangsl.fnst at cn.fujitsu.com>
+
+	* Btrfs: fix protection between walking backrefs and root deletion
+
+2014-01-23  Gui Hecheng  <guihc.fnst at cn.fujitsu.com>
+
+	* btrfs: fix warning while merging two adjacent extents
+
+2014-01-22  Filipe David Borba Manana  <fdmanana at gmail.com>
+
+	* Btrfs: fix infinite path build loops in incremental send
+
+2014-01-28  Arnd Bergmann  <arnd at arndb.de>
+
+	* dmaengine: mmp_pdma: fix mismerge
+
+2014-01-29  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branches 'pm-cpufreq' and 'pm-devfreq'
+
+2014-01-29  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branches 'acpi-processor', 'acpi-hotplug', 'acpi-init', 'acpi-pm' and 'acpica'
+
+2014-01-27  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPI / scan: Clear match_driver flag in acpi_bus_trim()
+
+2014-01-28  Gerald Schaefer  <gerald.schaefer at de.ibm.com>
+
+	* s390/appldata: restore missing init_virt_timer()
+
+2014-01-28  Ursula Braun  <ursula.braun at de.ibm.com>
+
+	* s390/qdio: correct program-controlled interruption checking
+
+2014-01-26  Jose Alonso  <joalonsof at gmail.com>
+
+	* s390/qdio: for_each macro correctness
+
+2014-01-29  Takashi Iwai  <tiwai at suse.de>
+
+	* Merge tag 'asoc-v3.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2014-01-28  Chris Zankel  <chris at zankel.net>
+
+	* xtensa: fix fast_syscall_spill_registers
+
+2014-01-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'for-linus-20140127' of git://git.infradead.org/linux-mtd
+
+2014-01-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/cooloney/linux-leds
+
+2014-01-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'tty-3.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
+
+2014-01-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'clk-for-linus-3.14-part2' of git://git.linaro.org/people/mike.turquette/linux
+
+2014-01-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'xfs-for-linus-v3.14-rc1-2' of git://oss.sgi.com/xfs/xfs
+
+2014-01-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* ceph: Fix up after semantic merge conflict
+
+2014-01-29  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'drm/for-3.14-rc1-20140123' of git://anongit.freedesktop.org/tegra/linux into drm-next
+
+2014-01-24  Dave Airlie  <airlied at redhat.com>
+
+	* drm: ast,cirrus,mgag200: use drm_can_sleep
+
+2014-01-29  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'drm-intel-fixes-2014-01-28' of git://people.freedesktop.org/~danvet/drm-intel into drm-next
+
+2014-01-29  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-armada-fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-cubox into drm-next
+
+2014-01-29  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'omapdrm-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tomba/linux into drm-next
+
+2014-01-29  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'gma500-next' of git://github.com/patjak/drm-gma500 into drm-next
+
+2014-01-27  Mark Brown  <broonie at linaro.org>
+
+	* ACPI / init: Flag use of ACPI and ACPI idioms for power supplies to regulator API
+
+2014-01-27  Konrad Rzeszutek Wilk  <konrad.wilk at oracle.com>
+
+	* acpi-cpufreq: De-register CPU notifier and free struct msr on error.
+
+2014-01-22  Anand Jain  <Anand.Jain at oracle.com>
+
+	* btrfs: undo sysfs when open_ctree() fails
+
+2014-01-23  Benjamin Tissoires  <benjamin.tissoires at redhat.com>
+
+	* HID: multitouch: add FocalTech FTxxxx support
+
+2014-01-28  Jeff Layton  <jlayton at redhat.com>
+
+	* nfs: add memory barriers around NFS_INO_INVALID_DATA and NFS_INO_INVALIDATING
+
+2014-01-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
+
+2014-01-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.open-osd.org/linux-open-osd
+
+2014-01-28  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* ceph: cast PAGE_SIZE to size_t in ceph_sync_write()
+
+2014-01-28  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* ceph: fix dout() compile warnings in ceph_filemap_fault()
+
+2014-01-28  Tony Luck  <tony.luck at intel.com>
+
+	* [IA64] Wire up new sched_setattr and sched_getattr syscalls
+
+2014-01-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'microblaze-3.14-rc1' of git://git.monstr.eu/linux-2.6-microblaze
+
+2014-01-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
+
+2014-01-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'cris-correction-for-3.14' of git://jni.nu/cris
+
+2014-01-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
+
+2014-01-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'nfs-for-3.14-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
+
+2014-01-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
+
+2014-01-28  Trond Myklebust  <trond.myklebust at primarydata.com>
+
+	* NFS: Fix races in nfs_revalidate_mapping
+
+2014-01-28  Reyad Attiyat  <reyad.attiyat at gmail.com>
+
+	* HID: microsoft: Add ID's for Surface Type/Touch Cover 2
+
+2014-01-27  Yufeng Shen  <miletus at chromium.org>
+
+	* HID: usbhid: quirk for CY-TM75 75 inch Touch Overlay
+
+2014-01-28  Jesper Nilsson  <jespern at axis.com>
+
+	* CRISv10: Readd missing header
+
+2014-01-27  Peter Zijlstra  <peterz at infradead.org>
+
+	* sched: Make sched_class::get_rr_interval() optional
+
+2014-01-08  Patrik Jakobsson  <patrik.r.jakobsson at gmail.com>
+
+	* drm/gma500: Lock struct_mutex around cursor updates
+
+2014-01-27  Mark Brown  <broonie at linaro.org>
+
+	* regulator: core: Correct default return value for full constraints
+
+2014-01-13  Akash Goel  <akash.goel at intel.com>
+
+	* drm/i915: Fix the offset issue for the stolen GEM objects
+
+2013-12-18  Huang Shijie  <shijie8 at gmail.com>
+
+	* mtd: gpmi: add sanity check when mapping DMA for read_buf/write_buf
+
+2013-12-18  Huang Shijie  <shijie8 at gmail.com>
+
+	* mtd: gpmi: allocate a proper buffer for non ECC read/write
+
+2014-01-21  Geert Uytterhoeven  <geert+renesas at linux-m68k.org>
+
+	* mtd: m25p80: Set rx_nbits for Quad SPI transfers
+
+2014-01-21  Geert Uytterhoeven  <geert+renesas at linux-m68k.org>
+
+	* mtd: m25p80: Enable Quad SPI read transfers for s25fl512s
+
+2014-01-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'akpm' (incoming from Andrew)
+
+2014-01-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
+
+2014-01-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
+
+2014-01-27  Ard Biesheuvel  <ard.biesheuvel at linaro.org>
+
+	* firmware/google: drop 'select EFI' to avoid recursive dependency
+
+2014-01-27  Heiko Carstens  <heiko.carstens at de.ibm.com>
+
+	* compat: fix sys_fanotify_mark
+
+2014-01-27  Joe Perches  <joe at perches.com>
+
+	* checkpatch.pl: check for function declarations without arguments
+
+2014-01-27  Wanpeng Li  <liwanp at linux.vnet.ibm.com>
+
+	* mm/migrate.c: fix setting of cpupid on page migration twice against normal page
+
+2014-01-15  Michael Ellerman  <mpe at ellerman.id.au>
+
+	* powerpc: Implement arch_spin_is_locked() using arch_spin_value_unlocked()
+
+2014-01-15  Michael Ellerman  <mpe at ellerman.id.au>
+
+	* powerpc: Add support for the optimised lockref implementation
+
+2014-01-02  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* leds: s3c24xx: Remove hardware.h inclusion
+
+2013-12-28  ZHAO Gang  <gamerh2o at gmail.com>
+
+	* leds: replace list_for_each with list_for_each_entry
+
+2014-01-02  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* leds: kirkwood: Cleanup in header files
+
+2013-12-11  Olof Johansson  <olof at lixom.net>
+
+	* leds: pwm: Remove a warning on non-DT platforms
+
+2013-12-11  Xiubo Li  <Li.Xiubo at freescale.com>
+
+	* leds: leds-pwm: fix duty time overflow.
+
+2013-12-06  Alexander Shiyan  <shc_work at mail.ru>
+
+	* leds: leds-mc13783: Remove unneeded mc13xxx_{un}lock
+
+2013-12-06  Alexander Shiyan  <shc_work at mail.ru>
+
+	* leds: leds-mc13783: Remove duplicate field in platform data
+
+2013-12-08  Chen Gang  <gang.chen.5i5j at gmail.com>
+
+	* drivers: leds: leds-tca6507: check CONFIG_GPIOLIB whether defined for 'gpio_base'
+
+2013-11-20  Milo Kim  <milo.kim at ti.com>
+
+	* leds: lp5523: Support LED MUX configuration on running a pattern
+
+2013-11-20  Milo Kim  <milo.kim at ti.com>
+
+	* leds: lp5521/5523: Fix multiple engine usage bug
+
+2013-11-12  NeilBrown  <neilb at suse.de>
+
+	* LEDS: tca6507 - fix up some comments.
+
+2013-10-31  NeilBrown  <neilb at suse.de>
+
+	* LEDS: tca6507: add device-tree support for GPIO configuration.
+
+2014-01-27  Matthew Wilcox  <matthew.r.wilcox at intel.com>
+
+	* NVMe: Include device and queue numbers in interrupt name
+
+2014-01-27  Keith Busch  <keith.busch at intel.com>
+
+	* NVMe: Add a pci_driver shutdown method
+
+2013-12-16  Keith Busch  <keith.busch at intel.com>
+
+	* NVMe: Disable admin queue on init failure
+
+2014-01-27  Russell King  <rmk+kernel at arm.linux.org.uk>
+
+	* DRM: armada: fix missing DRM_KMS_FB_HELPER select
+
+2013-12-03  Marek Szyprowski  <m.szyprowski at samsung.com>
+
+	* [media] media: v4l2-dev: fix video device index assignment
+
+2014-01-24  Lv Zheng  <lv.zheng at intel.com>
+
+	* ACPICA: Remove bool usage from ACPICA.
+
+2014-01-22  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* PM / devfreq: Disable Exynos4 driver build on multiplatform
+
+2014-01-27  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPI / PM: Use ACPI_COMPANION() to get ACPI companions of devices
+
+2014-01-27  Jiang Liu  <jiang.liu at linux.intel.com>
+
+	* ACPI / scan: reduce log level of "ACPI: \_PR_.CPU4: failed to get CPU APIC ID"
+
+2014-01-27  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: support CEPH_FEATURE_OSD_CACHEPOOL feature
+
+2014-01-27  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: follow redirect replies from osds
+
+2014-01-27  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: rename ceph_osd_request::r_{oloc,oid} to r_base_{oloc,oid}
+
+2014-01-27  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: follow {read,write}_tier fields on osd request submission
+
+2014-01-27  Ilya Dryomov  <ilya.dryomov at inktank.com>
+
+	* libceph: add ceph_pg_pool_by_id()
+
+2014-01-27  Mike Turquette  <mturquette at linaro.org>
+
+	* clk: sort Makefile
+
+2014-01-27  Jeff Layton  <jlayton at redhat.com>
+
+	* sunrpc: turn warn_gssd() log message into a dprintk()
+
+2014-01-27  Jeff Layton  <jlayton at redhat.com>
+
+	* NFS: fix the handling of NFS_INO_INVALID_DATA flag in nfs_revalidate_mapping
+
+2014-01-24  Emilio López  <emilio at elopez.com.ar>
+
+	* clk: sunxi: fix overflow when setting up divided factors
+
+2014-01-17  Stephen Boyd  <sboyd at codeaurora.org>
+
+	* clk: Export more clk-provider functions
+
+2014-01-17  Stephen Boyd  <sboyd at codeaurora.org>
+
+	* dt-bindings: qcom: Fix warning with duplicate dt define
+
+2014-01-25  Sebastian Hesselbarth  <sebastian.hesselbarth at gmail.com>
+
+	* clk: si5351: remove variant from platform_data
+
+2014-01-25  Baruch Siach  <baruch at tkos.co.il>
+
+	* spi: correct the transfer_one_message documentation wording
+
+2014-01-27  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'asoc/fix/arizona', 'asoc/fix/fsl', 'asoc/fix/omap', 'asoc/fix/samsung', 'asoc/fix/simple', 'asoc/fix/tlv320aic32x4' and 'asoc/fix/wm5100' into asoc-linus
+
+2014-01-27  Markus Pargmann  <mpa at pengutronix.de>
+
+	* ASoC: tlv320aic32x4: Fix MICPGA input configuration
+
+2014-01-27  Markus Pargmann  <mpa at pengutronix.de>
+
+	* ASoC: tlv320aic32x4: Fix mono playback
+
+2014-01-27  Larry Finger  <Larry.Finger at lwfinger.net>
+
+	* staging: r8821ae: Enable build by reverting BROKEN marking
+
+2014-01-26  Larry Finger  <Larry.Finger at lwfinger.net>
+
+	* staging: r8821ae: Fix build problems
+
+2014-01-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'trace-fixes-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
+
+2014-01-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'stable/for-linus-3.14-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb
+
+2014-01-27  Chris Wilson  <chris at chris-wilson.co.uk>
+
+	* drm/i915: Decouple GPU error reporting from ring initialisation
+
+2014-01-27  Jingoo Han  <jg1.han at samsung.com>
+
+	* arm64: mm: fix the function name in comment of cpu_do_switch_mm
+
+2014-01-08  Paul Mackerras  <paulus at samba.org>
+
+	* KVM: PPC: Book3S PR: Cope with doorbell interrupts
+
+2014-01-08  Michael Neuling  <mikey at neuling.org>
+
+	* KVM: PPC: Book3S HV: Add software abort codes for transactional memory
+
+2014-01-08  Michael Neuling  <mikey at neuling.org>
+
+	* KVM: PPC: Book3S HV: Add new state for transactional memory
+
+2014-01-08  Michael Neuling  <mikey at neuling.org>
+
+	* powerpc/Kconfig: Make TM select VSX and VMX
+
+2014-01-08  Anton Blanchard  <anton at samba.org>
+
+	* KVM: PPC: Book3S HV: Basic little-endian guest support
+
+2014-01-08  Paul Mackerras  <paulus at samba.org>
+
+	* KVM: PPC: Book3S HV: Add support for DABRX register on POWER7
+
+2014-01-08  Paul Mackerras  <paulus at samba.org>
+
+	* KVM: PPC: Book3S HV: Prepare for host using hypervisor doorbells
+
+2014-01-08  Paul Mackerras  <paulus at samba.org>
+
+	* KVM: PPC: Book3S HV: Handle new LPCR bits on POWER8
+
+2014-01-22  Avi Kivity  <avi at cloudius-systems.com>
+
+	* perf tools: Demangle kernel and kernel module symbols too
+
+2014-01-27  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Merge branch 'master' into staging-next
+
+2014-01-24  Stephen Rothwell  <sfr at canb.auug.org.au>
+
+	* Staging: rtl8812ae: disable due to build errors
+
+2014-01-24  Jan Kiszka  <jan.kiszka at siemens.com>
+
+	* KVM: x86: Validate guest writes to MSR_IA32_APICBASE
+
+2014-01-24  Pankaj Dubey  <pankaj.dubey at samsung.com>
+
+	* arm64: fix build error if DMA_CMA is enabled
+
+2013-11-22  Michal Simek  <michal.simek at xilinx.com>
+
+	* microblaze: Add missing v8.50.a version
+
+2013-11-19  Michal Simek  <michal.simek at xilinx.com>
+
+	* microblaze: Fix missing bracket in printk
+
+2013-11-19  Michal Simek  <michal.simek at xilinx.com>
+
+	* microblaze: Fix compilation error for BS=0
+
+2013-08-23  Michal Simek  <michal.simek at xilinx.com>
+
+	* microblaze: Disable stack protection from bootloader
+
+2013-11-20  Michal Simek  <michal.simek at xilinx.com>
+
+	* microblaze: Define read/write{b,w,l}_relaxed MMIO
+
+2014-01-26  Steve French  <smfrench at gmail.com>
+
+	* [CIFS] Fix SMB2 mounts so they don't try to set or get xattrs via cifs
+
+2014-01-26  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml
+
+2014-01-26  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'mmc-updates-for-3.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/cjb/mmc
+
+2014-01-26  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'for-3.14-merge-window' of git://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs
+
+2013-11-14  James Hogan  <james.hogan at imgtec.com>
+
+	* um: hostfs: make functions static
+
+2014-01-15  Richard Weinberger  <richard at nod.at>
+
+	* um: Include generic barrier.h
+
+2013-09-13  Richard Weinberger  <richard at nod.at>
+
+	* um: Removed unused attributes from thread_struct
+
+2014-01-25  Baruch Siach  <baruch at tkos.co.il>
+
+	* perf/doc: Remove mention of non-existent set_perf_event_pending() from design.txt
+
+2014-01-25  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'ipmi' (ipmi patches from Corey Minyard)
+
+2014-01-24  Corey Minyard  <cminyard at mvista.com>
+
+	* ipmi: Cleanup error return
+
+2014-01-24  Xie XiuQi  <xiexiuqi at huawei.com>
+
+	* ipmi: fix timeout calculation when bmc is disconnected
+
+2014-01-24  Xie XiuQi  <xiexiuqi at huawei.com>
+
+	* ipmi: use USEC_PER_SEC instead of 1000000 for more meaningful
+
+2014-01-24  Michael Opdenacker  <michael.opdenacker at free-electrons.com>
+
+	* ipmi: remove deprecated IRQF_DISABLED
+
+2014-01-25  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'spi-v3.14-2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi
+
+2014-01-25  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'regulator-v3.14-2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
+
+2014-01-25  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'regmap-v3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regmap
+
+2013-10-30  Max Filippov  <jcmvbkbc at gmail.com>
+
+	* xtensa: save current register frame in fast_syscall_spill_registers_fixup
+
+2014-01-22  Max Filippov  <jcmvbkbc at gmail.com>
+
+	* xtensa: introduce spill_registers_kernel macro
+
+2014-01-25  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
+
+2014-01-25  Stanislaw Gruszka  <sgruszka at redhat.com>
+
+	* i915: remove pm_qos request on error
+
+2014-01-11  Sebastian Reichel  <sre at debian.org>
+
+	* dt: binding documentation for bq2415x charger
+
+2014-01-24  Adrien Vergé  <adrienverge at gmail.com>
+
+	* ALSA: hda - Fix silent output on MacBook Air 1,1
+
+2014-01-21  David Cohen  <david.a.cohen at linux.intel.com>
+
+	* x86/intel/mid: Fix X86_INTEL_MID dependencies
+
+2014-01-25  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge branch 'linus' into x86/urgent
+
+2014-01-21  Toshi Kani  <toshi.kani at hp.com>
+
+	* arch/x86/mm/srat: Skip NUMA_NO_NODE while parsing SLIT
+
+2014-01-21  Mel Gorman  <mgorman at suse.de>
+
+	* mm, x86: Revisit tlb_flushall_shift tuning for page flushes except on IvyBridge
+
+2014-01-21  Mel Gorman  <mgorman at suse.de>
+
+	* x86: mm: change tlb_flushall_shift for IvyBridge
+
+2014-01-21  Mel Gorman  <mgorman at suse.de>
+
+	* x86/mm: Eliminate redundant page table walk during TLB range flushing
+
+2014-01-21  Mel Gorman  <mgorman at suse.de>
+
+	* x86/mm: Clean up inconsistencies when flushing TLB ranges
+
+2014-01-21  Mel Gorman  <mgorman at suse.de>
+
+	* mm, x86: Account for TLB flushes only when debugging
+
+2014-01-21  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* x86/AMD/NB: Fix amd_set_subcaches() parameter type
+
+2014-01-23  Aravind Gopalakrishnan  <Aravind.Gopalakrishnan at amd.com>
+
+	* x86/quirks: Add workaround for AMD F16h Erratum792
+
+2014-01-25  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge branch 'timers/core' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks into timers/urgent
+
+2014-01-25  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2014-01-23  Sagi Grimberg  <sagig at mellanox.com>
+
+	* target: Report bad sector in sense data for DIF errors
+
+2014-01-20  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* iscsi-target: Convert gfp_t parameter to task state bitmask
+
+2014-01-20  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* iscsi-target: Fix connection reset hang with percpu_ida_alloc
+
+2014-01-24  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pm+acpi-3.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2014-01-21  Eric Sandeen  <sandeen at sandeen.net>
+
+	* xfs: allow logical-sector sized O_DIRECT
+
+2014-01-21  Eric Sandeen  <sandeen at sandeen.net>
+
+	* xfs: rename xfs_buftarg structure members
+
+2014-01-21  Eric Sandeen  <sandeen at sandeen.net>
+
+	* xfs: clean up xfs_buftarg
+
+2014-01-24  Eric Van Hensbergen  <ericvh at gmail.com>
+
+	* 9p: update documentation
+
+2014-01-24  Daniel Vetter  <daniel.vetter at ffwll.ch>
+
+	* Revert "drm/i915: Mask reserved bits in display/sprite address registers"
+
+2014-01-23  Vadim Rozenfeld  <vrozenfe at redhat.com>
+
+	* KVM: x86: mark hyper-v vapic assist page as dirty
+
+2014-01-24  Lorenzo Pieralisi  <Lorenzo.Pieralisi at arm.com>
+
+	* arm64: kernel: fix per-cpu offset restore on resume
+
+2014-01-24  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* ASoC: samsung: Remove dma.h inclusion
+
+2014-01-24  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* ASoC: samsung: Add NULL check in i2s.c
+
+2014-01-24  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* ASoC: Samsung: Fix build error due to missing dependency
+
+2014-01-24  Martin Schwidefsky  <schwidefsky at de.ibm.com>
+
+	* s390/hypfs: add interface for diagnose 0x304
+
+2014-01-10  Xiubo Li  <Li.Xiubo at freescale.com>
+
+	* ASoC: simple-card: fix simple card widgets routing property name usage
+
+2014-01-19  Kent Overstreet  <kmo at daterainc.com>
+
+	* percpu_ida: Make percpu_ida_alloc + callers accept task state bitmask
+
+2014-01-23  Linus Walleij  <linus.walleij at linaro.org>
+
+	* regulator: ab3100: cast fix
+
+2014-01-23  Masami Hiramatsu  <masami.hiramatsu.pt at hitachi.com>
+
+	* perf symbols: Load map before using map->map_ip()
+
+2014-01-22  Josh Boyer  <jwboyer at fedoraproject.org>
+
+	* perf tools: Fix traceevent plugin path definitions
+
+2014-01-23  Vadim Rozenfeld  <vrozenfe at redhat.com>
+
+	* KVM: x86: mark hyper-v hypercall page as dirty
+
+2014-01-23  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Staging: rtl8821ae: add TODO file
+
+2014-01-23  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Staging: rtl8821ae: removed unused functions and variables
+
+2014-01-23  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Staging: rtl8821ae: rc.c: fix up function prototypes
+
+2014-01-21  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Staging: rtl8812ae: Add Realtek 8821 PCI WIFI driver
+
+2014-01-23  Steven Rostedt (Red Hat)  <rostedt at goodmis.org>
+
+	* tracing: Check if tracing is enabled in trace_puts()
+
+2014-01-14  Boaz Harrosh  <bharrosh at panasas.com>
+
+	* exofs: Print less in r4w
+
+2014-01-14  Boaz Harrosh  <bharrosh at panasas.com>
+
+	* exofs: Allow corrupted directory entry to be empty file
+
+2014-01-13  Boaz Harrosh  <bharrosh at panasas.com>
+
+	* exofs: Allow O_DIRECT open
+
+2014-01-23  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2014-01-13  Boaz Harrosh  <bharrosh at panasas.com>
+
+	* ore: Don't crash on NULL bio in _clear_bio
+
+2014-01-23  Dmitry Torokhov  <dmitry.torokhov at gmail.com>
+
+	* Merge branch 'next' into for-linus
+
+2014-01-21  Adrian Hunter  <adrian.hunter at intel.com>
+
+	* mmc: sdhci-pci: Fix possibility of chip->fixes being null
+
+2014-01-09  Thierry Reding  <treding at nvidia.com>
+
+	* drm/tegra: Obtain head number from DT
+
+2014-01-23  Mark Brown  <broonie at linaro.org>
+
+	* Merge commit 'spi/topic/sc18is602' into spi-linus
+
+2014-01-23  Mark Brown  <broonie at linaro.org>
+
+	* Merge commit 'spi/fix/rcar' into spi-linus
+
+2014-01-23  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'spi/topic/pxa2xx', 'spi/topic/qspi', 'spi/topic/s3c24xx', 'spi/topic/s3c64xx', 'spi/topic/sh', 'spi/topic/tegra114', 'spi/topic/tegra20-sflash', 'spi/topic/tegra20-slink', 'spi/topic/txx9' and 'spi/topic/xcomm' into spi-linus
+
+2014-01-23  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* ASoC: samsung: Fix Kconfig dependency
+
+2014-01-23  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* ASoC: wm5100: Export wm5100_detect
+
+2014-01-23  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'regulator/topic/s2mps11', 'regulator/topic/s5m8767', 'regulator/topic/stw481x-vmmc', 'regulator/topic/tps51632', 'regulator/topic/tps62360', 'regulator/topic/tps65910', 'regulator/topic/twl' and 'regulator/topic/wm831x' into regulator-linus
+
+2014-01-23  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'regulator/topic/db8500', 'regulator/topic/gpio', 'regulator/topic/lp3971', 'regulator/topic/lp3972', 'regulator/topic/max14577', 'regulator/topic/max77693', 'regulator/topic/mc13892', 'regulator/topic/pcf50633' and 'regulator/topic/pfuze100' into regulator-linus
+
+2014-01-23  Todd Previte  <tprevite at gmail.com>
+
+	* drm/i915: VLV2 - Fix hotplug detect bits
+
+2014-01-23  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: Refactor slot assignment code
+
+2014-01-23  Paolo Bonzini  <pbonzini at redhat.com>
+
+	* Merge tag 'kvm-s390-20140117' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into kvm-queue
+
+2014-01-23  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* ALSA: bits vs bytes bug in snd_card_create()
+
+2013-11-21  Boaz Harrosh  <bharrosh at panasas.com>
+
+	* ore: Fix wrong math in allocation of per device BIO
+
+2014-01-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'modules-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux
+
+2014-01-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
+
+2014-01-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'dm-3.14-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
+
+2014-01-23  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-nouveau-next' of git://anongit.freedesktop.org/git/nouveau/linux-2.6 into drm-next
+
+2014-01-23  Ben Skeggs  <bskeggs at redhat.com>
+
+	* drm/nouveau: call drm_vblank_cleanup() earlier
+
+2014-01-22  Ben Skeggs  <bskeggs at redhat.com>
+
+	* drm/nouveau: create base display from common code
+
+2014-01-17  Ilia Mirkin  <imirkin at alum.mit.edu>
+
+	* drm/nv50/gr: print mpc trap name when it's not an mp trap
+
+2014-01-17  Ilia Mirkin  <imirkin at alum.mit.edu>
+
+	* drm/nv50/gr: update list of mp errors, make it a bitfield
+
+2014-01-16  Ilia Mirkin  <imirkin at alum.mit.edu>
+
+	* drm/nv50/gr: add more trap names to print on error
+
+2014-01-19  Ilia Mirkin  <imirkin at alum.mit.edu>
+
+	* drm/nouveau/devinit: lock/unlock crtc regs for all devices, not just pre-nv50
+
+2014-01-14  Maarten Lankhorst  <maarten.lankhorst at canonical.com>
+
+	* drm/nouveau: hold mutex while syncing to kernel channel
+
+2014-01-14  Ilia Mirkin  <imirkin at alum.mit.edu>
+
+	* drm/nv50-/devinit: prevent use of engines marked as disabled by hw/vbios
+
+2014-01-09  Ilia Mirkin  <imirkin at alum.mit.edu>
+
+	* drm/nouveau/device: provide a way for devinit to mark engines as disabled
+
+2014-01-14  Ben Skeggs  <bskeggs at redhat.com>
+
+	* drm/nouveau/devinit: tidy up the subdev class definition
+
+2013-12-23  Ben Skeggs  <bskeggs at redhat.com>
+
+	* drm/nouveau/bar: tidy up the subdev and object class definitions
+
+2013-12-23  Ben Skeggs  <bskeggs at redhat.com>
+
+	* drm/nouveau/instmem: tidy up the object class definition
+
+2013-12-23  Ben Skeggs  <bskeggs at redhat.com>
+
+	* drm/nouveau/instmem: tidy up the subdev class definition
+
+2014-01-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'scsi-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
+
+2014-01-10  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* mtd: s3c2410: Merge plat/regs-nand.h into s3c2410.c
+
+2014-01-22  Boaz Harrosh  <bharrosh at panasas.com>
+
+	* pnfs: Proper delay for NFS4ERR_RECALLCONFLICT in layout_get_done
+
+2014-01-21  Takashi Iwai  <tiwai at suse.de>
+
+	* drm/cirrus: correct register values for 16bpp
+
+2014-01-21  Jeff Mahoney  <jeffm at suse.com>
+
+	* drm/nouveau: make vga_switcheroo code depend on VGA_SWITCHEROO
+
+2014-01-21  Dave Airlie  <airlied at redhat.com>
+
+	* drm/mgag200: on cards with < 2MB VRAM default to 16-bit
+
+2014-01-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pci-v3.14-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
+
+2014-01-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'trace-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
+
+2014-01-22  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Merge branch 'fixes' into tty-next
+
+2014-01-22  Thomas Gleixner  <tglx at linutronix.de>
+
+	* Merge tag 'mvebu-irqchip-fixes-3.13' of git://git.infradead.org/linux-mvebu into irq/core
+
+2013-12-05  Daniel Tang  <dt.tangr at gmail.com>
+
+	* irqchip: Add support for TI-NSPIRE irqchip
+
+2013-12-04  Magnus Damm  <damm at opensource.se>
+
+	* irqchip: renesas-irqc: Enable mask on suspend
+
+2013-12-04  Magnus Damm  <damm at opensource.se>
+
+	* irqchip: renesas-irqc: Use lazy disable
+
+2014-01-22  James Bottomley  <JBottomley at Parallels.com>
+
+	* Merge branch 'misc' into for-linus
+
+2014-01-22  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* ASoC: samsung: smdk_wm8994: Fix build error
+
+2014-01-22  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* ASoC: Samsung: s3c-i2s-v2: Fix build error
+
+2014-01-22  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* ASoC: samsung: Fix build regressions due to gpio re-org
+
+2014-01-22  Jiri Kosina  <jkosina at suse.cz>
+
+	* Merge branches 'for-3.13/upstream-fixes', 'for-3.14/i2c-hid', 'for-3.14/sensor-hub', 'for-3.14/sony' and 'for-3.14/upstream' into for-linus
+
+2014-01-22  Heiko Carstens  <heiko.carstens at de.ibm.com>
+
+	* s390: wire up sys_sched_setattr/sys_sched_getattr
+
+2014-01-21  Heiko Carstens  <heiko.carstens at de.ibm.com>
+
+	* s390/uapi: fix struct statfs64 definition
+
+2014-01-20  Heiko Carstens  <heiko.carstens at de.ibm.com>
+
+	* s390/uaccess: remove dead extern declarations, make functions static
+
+2014-01-21  Kenneth Graunke  <kenneth at whitecape.org>
+
+	* drm/i915: Allow reading the TIMESTAMP register on Gen8.
+
+2014-01-20  Chris Wilson  <chris at chris-wilson.co.uk>
+
+	* drm/i915: Repeat evictions whilst pageflip completions are outstanding
+
+2014-01-20  Chris Wilson  <chris at chris-wilson.co.uk>
+
+	* drm/i915: Wait for completion of pending flips when starved of fences
+
+2014-01-17  Imre Deak  <imre.deak at intel.com>
+
+	* drm/i915: don't disable DP port after a failed link training
+
+2014-01-16  Imre Deak  <imre.deak at intel.com>
+
+	* drm/i915: don't disable the DP port if the link is lost
+
+2014-01-16  Ville Syrjälä  <ville.syrjala at linux.intel.com>
+
+	* drm/i915: Eliminate lots of WARNs when there's no backlight present
+
+2014-01-15  Dongmao Zhang  <dmzhang at suse.com>
+
+	* dm log userspace: allow mark requests to piggyback on flush requests
+
+2014-01-21  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'akpm' (incoming from Andrew)
+
+2014-01-21  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata
+
+2014-01-21  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
+
+2014-01-21  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
+
+2014-01-21  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
+
+2014-01-21  Joonsoo Kim  <iamjoonsoo.kim at lge.com>
+
+	* mm/migrate: remove unused function, fail_migrate_page()
+
+2014-01-21  Joonsoo Kim  <iamjoonsoo.kim at lge.com>
+
+	* mm/migrate: remove putback_lru_pages, fix comment on putback_movable_pages
+
+2014-01-21  Joonsoo Kim  <iamjoonsoo.kim at lge.com>
+
+	* mm/migrate: correct failure handling if !hugepage_migration_support()
+
+2014-01-21  Naoya Horiguchi  <n-horiguchi at ah.jp.nec.com>
+
+	* mm/migrate: add comment about permanent failure path
+
+2014-01-22  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-vbl-timestamp' of git://gitorious.org/vsyrjala/linux into drm-next
+
+2014-01-22  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'topic/core-stuff' of git://people.freedesktop.org/~danvet/drm-intel into drm-next
+
+2014-01-22  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'vmwgfx-next' of git://people.freedesktop.org/~thomash/linux into drm-next
+
+2014-01-20  Fabio Estevam  <fabio.estevam at freescale.com>
+
+	* ASoC: fsl_ssi: Do not print 'baud clock' error message all the time
+
+2014-01-20  Fabio Estevam  <fabio.estevam at freescale.com>
+
+	* ASoC: fsl_ssi: We do support master mode now
+
+2014-01-21  Charles Keepax  <ckeepax at opensource.wolfsonmicro.com>
+
+	* ASoC: wm5110: Extend SYSCLK patch file for rev D
+
+2014-01-21  Joe Thornber  <ejt at redhat.com>
+
+	* dm space map metadata: fix bug in resizing of thin metadata
+
+2014-01-16  Namhyung Kim  <namhyung at kernel.org>
+
+	* perf symbols: Fix JIT symbol resolution on heap
+
+2014-01-20  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Fix recently introduced sparse / smatch warnings and errors
+
+2014-01-20  Mark Brown  <broonie at linaro.org>
+
+	* ASoC: omap: Make RX51 depend on GPIOLIB due to jack usage
+
+2014-01-02  Laura Abbott  <lauraa at codeaurora.org>
+
+	* percpu: use VMALLOC_TOTAL instead of VMALLOC_END - VMALLOC_START
+
+2014-01-20  Daniel Vetter  <daniel.vetter at ffwll.ch>
+
+	* drm/gem: Always initialize the gem object in object_init
+
+2014-01-20  Frank Praznik  <frank.praznik at oh.rr.com>
+
+	* HID: sony: Use colors for the Dualshock 4 LED names
+
+2014-01-20  Frank Praznik  <frank.praznik at oh.rr.com>
+
+	* HID: sony: Add annotated HID descriptor for the Dualshock 4
+
+2014-01-20  Ping Cheng  <pinglinux at gmail.com>
+
+	* Input: wacom - add support for DTU-1031
+
+2014-01-16  Ping Cheng  <pinglinux at gmail.com>
+
+	* Input: wacom - fix wacom->shared guards for dual input devices
+
+2014-01-16  Lothar Waßmann  <LW at KARO-electronics.de>
+
+	* Input: edt_ft5x06 - use devm_* functions where appropriate
+
+2014-01-20  Arnaud Ebalard  <arno at natisbad.org>
+
+	* ARM: mvebu: fix compilation warning on Armada 370 (i.e. non-SMP)
+
+2014-01-20  Ben Dooks  <ben.dooks at codethink.co.uk>
+
+	* ARM: shmobile: r8a7790.dtsi: ficx i2c[0-3] clock reference
+
+2014-01-21  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-next-3.14' of git://people.freedesktop.org/~agd5f/linux into drm-next
+
+2014-01-20  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: add UVD support for OLAND
+
+2014-01-17  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: fix minor typos in si_dpm.c
+
+2014-01-16  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: set the full cache bit for fences on r7xx+
+
+2014-01-16  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: fix surface sync in fence on cayman (v2)
+
+2014-01-07  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon/dpm: disable mclk switching on desktop RV770
+
+2014-01-16  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: fix endian handling in radeon_atom_init_mc_reg_table
+
+2014-01-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-x32-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86/mpx' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-kaslr-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20  Gregory CLEMENT  <gregory.clement at free-electrons.com>
+
+	* ARM: mvebu: Fix kernel hang in mvebu_soc_id_init() when of_iomap failed
+
+2014-01-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-platform-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-microcode-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-intel-mid-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-efi-kexec-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-cpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-cleanups-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-build-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-11  Brian Norris  <computersforpeace at gmail.com>
+
+	* mtd: mtdram: add missing 'const'
+
+2014-01-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-apic-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-12-04  Brian Norris  <computersforpeace at gmail.com>
+
+	* mtd: m25p80: assign default read command
+
+2014-01-07  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* mtd: nuc900_nand: remove redundant return value check of platform_get_resource()
+
+2014-01-07  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* mtd: plat_nand: remove redundant return value check of platform_get_resource()
+
+2014-01-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-03  Huang Shijie  <b32955 at freescale.com>
+
+	* mtd: nand: add Intel manufacturer ID
+
+2013-12-26  Huang Shijie  <b32955 at freescale.com>
+
+	* mtd: nand: add SanDisk manufacturer ID
+
+2013-12-25  Huang Shijie  <b32955 at freescale.com>
+
+	* mtd: nand: add support for Samsung K9LCG08U0B
+
+2014-01-13  Rodolfo Giometti  <giometti at linux.it>
+
+	* mtd: nand: pxa3xx: Add support for 2048 bytes page size devices
+
+2014-01-17  Stephane Eranian  <eranian at google.com>
+
+	* perf stat: Fix memory corruption of xyarray when cpumask is used
+
+2014-01-20  Stephane Eranian  <eranian at google.com>
+
+	* perf evsel: Remove duplicate member zeroing after free
+
+2014-01-20  Alan Cox  <alan at linux.intel.com>
+
+	* perf tools: Ensure sscanf does not overrun the "mem" field
+
+2014-01-17  Stephane Eranian  <eranian at google.com>
+
+	* perf stat: fix NULL pointer reference bug with event unit
+
+2014-01-13  Baruch Siach  <baruch at tkos.co.il>
+
+	* perf tools: Add support for the xtensa architecture
+
+2014-01-20  Stanislav Fomichev  <stfomichev at yandex-team.ru>
+
+	* perf session: Free cpu_map in perf_session__cpu_bitmap
+
+2014-01-20  Stanislav Fomichev  <stfomichev at yandex-team.ru>
+
+	* perf timechart: Fix wrong SVG height
+
+2014-01-20  Ingo Molnar  <mingo at kernel.org>
+
+	* x86/intel/mpx: Remove unused LWP structure
+
+2014-01-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20  Alan  <gnomes at lxorguk.ukuu.org.uk>
+
+	* x86, doc, kconfig: Fix dud URL for Microcode data
+
+2013-12-19  Vandana Kannan  <vandana.kannan at intel.com>
+
+	* drm/edid: Populate picture aspect ratio for CEA modes
+
+2013-11-29  Thomas Wood  <thomas.wood at intel.com>
+
+	* drm/edid: parse the list of additional 3D modes
+
+2013-11-29  Thomas Wood  <thomas.wood at intel.com>
+
+	* drm/edid: split VIC display mode lookup into a separate function
+
+2013-11-28  Damien Lespiau  <damien.lespiau at intel.com>
+
+	* drm: Make the connector mode_valid() vfunc return a drm_mode_status enum
+
+2014-01-16  Pavel Shilovsky  <piastry at etersoft.ru>
+
+	* CIFS: Cleanup cifs open codepath
+
+2014-01-16  Pavel Shilovsky  <piastry at etersoft.ru>
+
+	* CIFS: Remove extra indentation in cifs_sfu_type
+
+2014-01-16  Pavel Shilovsky  <piastry at etersoft.ru>
+
+	* CIFS: Cleanup cifs_mknod
+
+2014-01-16  Pavel Shilovsky  <piastry at etersoft.ru>
+
+	* CIFS: Cleanup CIFSSMBOpen
+
+2014-01-13  Adrian Hunter  <adrian.hunter at intel.com>
+
+	* mmc: sdhci-pci: Fix BYT sd card getting stuck in runtime suspend
+
+2013-11-14  Adrian Hunter  <adrian.hunter at intel.com>
+
+	* mmc: sdhci: Allow for long command timeouts
+
+2014-01-20  Takashi Iwai  <tiwai at suse.de>
+
+	* Merge tag 'asoc-v3.14-3' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2013-12-26  Andrew Lunn  <andrew at lunn.ch>
+
+	* SATA: MV: Add support for the optional PHYs
+
+2014-01-17  Frank Praznik  <frank.praznik at oh.rr.com>
+
+	* HID: sony: Cache the output report for the Dualshock 4
+
+2014-01-20  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/topic/compress' into asoc-next
+
+2014-01-20  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/topic/dma' into asoc-next
+
+2014-01-20  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/topic/dapm' into asoc-next
+
+2013-10-29  Ville Syrjälä  <ville.syrjala at linux.intel.com>
+
+	* drm/i915: Add a kludge for DSL incrementing too late and ISR not working
+
+2013-10-28  Ville Syrjälä  <ville.syrjala at linux.intel.com>
+
+	* drm/radeon: Move the early vblank IRQ fixup to radeon_get_crtc_scanoutpos()
+
+2013-10-28  Ville Syrjälä  <ville.syrjala at linux.intel.com>
+
+	* drm: Pass 'flags' from the caller to .get_scanout_position()
+
+2013-10-28  Ville Syrjälä  <ville.syrjala at linux.intel.com>
+
+	* drm: Fix vblank timestamping constants for interlaced modes
+
+2014-01-20  Takashi Iwai  <tiwai at suse.de>
+
+	* Merge branch 'for-next' into for-linus
+
+2013-10-28  Ville Syrjälä  <ville.syrjala at linux.intel.com>
+
+	* drm/i915: Fix scanoutpos calculations for interlaced modes
+
+2013-10-26  Ville Syrjälä  <ville.syrjala at linux.intel.com>
+
+	* drm: Change {pixel,line,frame}dur_ns from s64 to int
+
+2013-10-27  Ville Syrjälä  <ville.syrjala at linux.intel.com>
+
+	* drm: Use crtc_clock in drm_calc_timestamping_constants()
+
+2013-10-27  Ville Syrjälä  <ville.syrjala at linux.intel.com>
+
+	* drm/radeon: Populate crtc_clock in radeon_atom_get_tv_timings()
+
+2013-10-26  Ville Syrjälä  <ville.syrjala at linux.intel.com>
+
+	* drm: Simplify the math in drm_calc_timestamping_constants()
+
+2013-12-02  Sachin Prabhu  <sprabhu at redhat.com>
+
+	* cifs: Add support for follow_link on dfs shares under posix extensions
+
+2013-11-27  Sachin Prabhu  <sprabhu at redhat.com>
+
+	* cifs: move unix extension call to cifs_query_symlink()
+
+2013-11-25  Sachin Prabhu  <sprabhu at redhat.com>
+
+	* cifs: Re-order M-F Symlink code
+
+2013-11-25  Sachin Prabhu  <sprabhu at redhat.com>
+
+	* cifs: Add create MFSymlinks to protocol ops struct
+
+2013-11-25  Sachin Prabhu  <sprabhu at redhat.com>
+
+	* cifs: use protocol specific call for query_mf_symlink()
+
+2013-11-25  Sachin Prabhu  <sprabhu at redhat.com>
+
+	* cifs: Rename MF symlink function names
+
+2013-11-25  Sachin Prabhu  <sprabhu at redhat.com>
+
+	* cifs: Rename and cleanup open_query_close_cifs_symlink()
+
+2014-01-19  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.13
+
+2014-01-19  Ilia Mirkin  <imirkin at alum.mit.edu>
+
+	* drm/nouveau/mxm: fix null deref on load
+
+2014-01-19  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'acpi-3.13-fixup' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2014-01-20  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-intel-next' of git://people.freedesktop.org/~danvet/drm-intel into drm-next
+
+2014-01-20  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'vmwgfx-next-2014-01-17' of git://people.freedesktop.org/~thomash/linux into drm-next
+
+2014-01-17  Al Viro  <viro at ZenIV.linux.org.uk>
+
+	* tracing: Fix buggered tee(2) on tracing_pipe
+
+2014-01-19  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-19  Max Filippov  <jcmvbkbc at gmail.com>
+
+	* xtensa: export ccount_freq
+
+2014-01-16  Max Filippov  <jcmvbkbc at gmail.com>
+
+	* xtensa: fix warning '"CONFIG_OF" is not defined'
+
+2014-01-16  Stephen Boyd  <sboyd at codeaurora.org>
+
+	* clocksource: Timer-sun5i: Switch to sched_clock_register()
+
+2014-01-19  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
+
+2014-01-19  Hans de Goede  <hdegoede at redhat.com>
+
+	* sata-highbank: Remove unnecessary ahci_platform.h include
+
+2014-01-10  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* iscsi-target: Pre-allocate more tags to avoid ack starvation
+
+2013-12-16  Dirk Brandewie  <dirk.j.brandewie at intel.com>
+
+	* turbostat: Add option to report joules consumed per sample
+
+2013-12-03  Len Brown  <len.brown at intel.com>
+
+	* turbostat: run on HSX
+
+2013-08-20  Josh Triplett  <josh at joshtriplett.org>
+
+	* turbostat: Add a .gitignore to ignore the compiled turbostat binary
+
+2013-08-20  Josh Triplett  <josh at joshtriplett.org>
+
+	* turbostat: Clean up error handling; disambiguate error messages; use err and errx
+
+2013-08-20  Josh Triplett  <josh at joshtriplett.org>
+
+	* turbostat: Factor out common function to open file and exit on failure
+
+2013-08-20  Josh Triplett  <josh at joshtriplett.org>
+
+	* turbostat: Add a helper to parse a single int out of a file
+
+2013-08-20  Josh Triplett  <josh at joshtriplett.org>
+
+	* turbostat: Check return value of fscanf
+
+2013-08-20  Josh Triplett  <josh at joshtriplett.org>
+
+	* turbostat: Use GCC's CPUID functions to support PIC
+
+2013-08-20  Josh Triplett  <josh at joshtriplett.org>
+
+	* turbostat: Don't attempt to printf an off_t with %zx
+
+2013-08-20  Josh Triplett  <josh at joshtriplett.org>
+
+	* turbostat: Don't put unprocessed uapi headers in the include path
+
+2014-01-17  Ezequiel Garcia  <ezequiel.garcia at free-electrons.com>
+
+	* ARM: kirkwood: kirkwood_pm_init() should return void
+
+2014-01-18  Bjorn Helgaas  <bhelgaas at google.com>
+
+	* Merge branch 'eisa' into next
+
+2014-01-18  SeongJae Park  <sj38.park at gmail.com>
+
+	* cgroup: trivial style updates
+
+2014-01-17  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Revert "ACPI: Add BayTrail SoC GPIO and LPSS ACPI IDs"
+
+2014-01-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2013-11-26  Bing Zhao  <bzhao at marvell.com>
+
+	* mmc: sdio: add a quirk for broken SDIO_CCCR_INTx polling
+
+2013-12-23  Aisheng Dong  <b29396 at freescale.com>
+
+	* mmc: sdhci: fix lockdep error in tuning routine
+
+2014-01-17  Heiko Carstens  <heiko.carstens at de.ibm.com>
+
+	* s390/bpf,jit: fix 32 bit divisions, use unsigned divide instructions
+
+2014-01-16  Eric Dumazet  <edumazet at google.com>
+
+	* parisc: fix SO_MAX_PACING_RATE typo
+
+2014-01-16  Hannes Frederic Sowa  <hannes at stressinduktion.org>
+
+	* ipv6: simplify detection of first operational link-local address on interface
+
+2014-01-16  Christoph Paasch  <christoph.paasch at uclouvain.be>
+
+	* tcp: metrics: Avoid duplicate entries with the same destination-IP
+
+2014-01-16  Gerald Schaefer  <gerald.schaefer at de.ibm.com>
+
+	* net: rds: fix per-cpu helper usage
+
+2014-01-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
+
+2014-01-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
+
+2014-01-17  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* clk: samsung: Remove unneeded semicolon
+
+2014-01-13  Zhangfei Gao  <zhangfei.gao at linaro.org>
+
+	* mmc: dw_mmc: k3: remove clk_table
+
+2014-01-17  Bjorn Helgaas  <bhelgaas at google.com>
+
+	* Revert "EISA: Initialize device before its resources"
+
+2014-01-17  Bjorn Helgaas  <bhelgaas at google.com>
+
+	* Revert "EISA: Log device resources in dmesg"
+
+2014-01-17  Stephen Boyd  <sboyd at codeaurora.org>
+
+	* clk: qcom: Fix modular build
+
+2013-08-02  Tero Kristo  <t-kristo at ti.com>
+
+	* ARM: OMAP3: use DT clock init if DT data is available
+
+2013-07-19  Tero Kristo  <t-kristo at ti.com>
+
+	* ARM: AM33xx: remove old clock data and link in new clock init code
+
+2013-11-21  Tero Kristo  <t-kristo at ti.com>
+
+	* ARM: AM43xx: Enable clock init
+
+2014-01-12  Qiaowei Ren  <qiaowei.ren at intel.com>
+
+	* x86, mpx: Add MPX related opcodes to the x86 opcode map
+
+2014-01-15  Arun Shamanna Lakshmi  <aruns at nvidia.com>
+
+	* ASoC: dapm: Fix double prefix addition
+
+2014-01-17  Liam Girdwood  <liam.r.girdwood at linux.intel.com>
+
+	* ASoC: compress: Add suport for DPCM into compressed audio
+
+2014-01-17  Liam Girdwood  <liam.r.girdwood at linux.intel.com>
+
+	* ASoC: DPCM: make some DPCM API calls non static for compressed usage
+
+2014-01-17  Axel Lin  <axel.lin at ingics.com>
+
+	* spi: sc18is602: Convert to use bits_per_word_mask
+
+2014-01-14  Frederic Weisbecker  <fweisbec at gmail.com>
+
+	* perf tools: Remove unnecessary callchain cursor state restore on unmatch
+
+2014-01-14  Frederic Weisbecker  <fweisbec at gmail.com>
+
+	* perf callchain: Spare double comparison of callchain first entry
+
+2013-06-04  Cornelia Huck  <cornelia.huck at de.ibm.com>
+
+	* KVM: s390: virtio-ccw: Handle command rejects.
+
+2014-01-16  Frank Praznik  <frank.praznik at oh.rr.com>
+
+	* HID: sony: Map gyroscopes and accelerometers to axes
+
+2014-01-16  Frank Praznik  <frank.praznik at oh.rr.com>
+
+	* HID: sony: Fix spacing in the device definitions.
+
+2014-01-16  Frank Praznik  <frank.praznik at oh.rr.com>
+
+	* HID: sony: Use standard output reports instead of raw reports to send data to the Dualshock 4.
+
+2014-01-16  Frank Praznik  <frank.praznik at oh.rr.com>
+
+	* HID: sony: Use separate identifiers for USB and Bluetooth connected Dualshock 4 controllers.
+
+2014-01-17  Jakob Bornecrantz  <jakob at vmware.com>
+
+	* drm/vmwgfx: Invalidate surface on non-readback unbind
+
+2014-01-16  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Silence the device command verifier
+
+2014-01-15  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Implement 64-bit Otable- and MOB binding v2
+
+2014-01-15  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Fix surface framebuffer check for guest-backed surfaces
+
+2014-01-16  David S. Miller  <davem at davemloft.net>
+
+	* Merge tag 'batman-adv-fix-for-davem' of git://git.open-mesh.org/linux-merge
+
+2013-12-20  Lukasz Majewski  <l.majewski at samsung.com>
+
+	* thermal: exynos: boost: Automatic enable/disable of BOOST feature (at Exynos4412)
+
+2014-01-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
+
+2014-01-16  Hugh Dickins  <hughd at google.com>
+
+	* percpu_counter: unbreak __percpu_counter_add()
+
+2014-01-16  Fengguang Wu  <fengguang.wu at intel.com>
+
+	* x86, intel_mid: Replace memcpy with struct assignment
+
+2014-01-16  Mika Westerberg  <mika.westerberg at linux.intel.com>
+
+	* e1000e: Fix compilation warning when !CONFIG_PM_SLEEP
+
+2014-01-16  David Cohen  <david.a.cohen at linux.intel.com>
+
+	* x86, intel-mid: Return proper error code from get_gpio_by_name()
+
+2014-01-16  David Cohen  <david.a.cohen at linux.intel.com>
+
+	* x86, intel-mid: Check get_gpio_by_name() error code on platform code
+
+2014-01-16  David Cohen  <david.a.cohen at linux.intel.com>
+
+	* x86, intel-mid: sfi_handle_*_dev() should check for pdata error code
+
+2014-01-14  Kharlamov Alexey  <derlafff at yandex.ru>
+
+	* HID: hid-holtek-mouse: add new a070 mouse
+
+2014-01-14  Srinivas Pandruvada  <srinivas.pandruvada at linux.intel.com>
+
+	* HID: hid-sensor-hub: Fix buggy report descriptors
+
+2014-01-08  Benjamin Tisssoires  <benjamin.tissoires at redhat.com>
+
+	* HID: logitech-dj: Fix USB 3.0 issue
+
+2014-01-11  Frank Praznik  <frank.praznik at oh.rr.com>
+
+	* HID: sony: Rename worker function
+
+2014-01-16  Daniel Vetter  <daniel.vetter at ffwll.ch>
+
+	* Merge commit origin/master into drm-intel-next
+
+2014-01-16  H. Peter Anvin  <hpa at linux.intel.com>
+
+	* x86, tsc, apic: Unbreak static (MSR) calibration when CONFIG_X86_LOCAL_APIC=n
+
+2014-01-14  Frederic Weisbecker  <fweisbec at gmail.com>
+
+	* perf tools: Do proper comm override error handling
+
+2014-01-16  Masami Hiramatsu  <masami.hiramatsu.pt at hitachi.com>
+
+	* perf symbols: Export elf_section_by_name and reuse
+
+2014-01-16  Masami Hiramatsu  <masami.hiramatsu.pt at hitachi.com>
+
+	* perf probe: Release all dynamically allocated parameters
+
+2014-01-16  Masami Hiramatsu  <masami.hiramatsu.pt at hitachi.com>
+
+	* perf probe: Release allocated probe_trace_event if failed
+
+2014-01-16  Namhyung Kim  <namhyung at kernel.org>
+
+	* perf tools: Add 'build-test' make target
+
+2014-01-16  Namhyung Kim  <namhyung at kernel.org>
+
+	* tools lib traceevent: Unregister handler when xen plugin is unloaded
+
+2014-01-16  Namhyung Kim  <namhyung at kernel.org>
+
+	* tools lib traceevent: Unregister handler when scsi plugin is unloaded
+
+2014-01-15  Mike Snitzer  <snitzer at redhat.com>
+
+	* dm cache: add policy name to status output
+
+2014-01-16  Catalin Marinas  <catalin.marinas at arm.com>
+
+	* Revert "arm64: Fix memory shareability attribute for ioremap_wc/cache"
+
+2014-01-16  Peter Zijlstra  <peterz at infradead.org>
+
+	* sched: Fix __sched_setscheduler() nice test
+
+2014-01-16  Sebastian Hesselbarth  <sebastian.hesselbarth at gmail.com>
+
+	* ARM: orion: provide C-style interrupt handler for MULTI_IRQ_HANDLER
+
+2014-01-16  Takashi Iwai  <tiwai at suse.de>
+
+	* Merge tag 'asoc-v3.13-rc8-2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2014-01-16  Tejun Heo  <tj at kernel.org>
+
+	* libata: disable LPM for some WD SATA-I devices
+
+2014-01-16  Xiubo Li  <Li.Xiubo at freescale.com>
+
+	* ASoC: core: Fix possible NULL pointer dereference of pcm->config
+
+2014-01-16  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'asoc/fix/adau1701' and 'asoc/fix/tlv320aic32x4' into asoc-linus
+
+2014-01-16  Takashi Iwai  <tiwai at suse.de>
+
+	* Merge tag 'asoc-v3.14-2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-next
+
+2014-01-16  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'asoc/topic/adsp', 'asoc/topic/atmel', 'asoc/topic/bcm2835', 'asoc/topic/docs', 'asoc/topic/fsl', 'asoc/topic/generic', 'asoc/topic/kirkwood', 'asoc/topic/mc13783', 'asoc/topic/mxs', 'asoc/topic/nuc900', 'asoc/topic/sai', 'asoc/topic/sh', 'asoc/topic/ssm2602', 'asoc/topic/tlv320aic3x', 'asoc/topic/twl4030', 'asoc/topic/ux500', 'asoc/topic/width' and 'asoc/topic/x86' into for-tiwai
+
+2014-01-16  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/topic/arizona' into for-tiwai
+
+2014-01-16  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/topic/pcm' into for-tiwai
+
+2014-01-16  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/topic/dma' into for-tiwai
+
+2014-01-16  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/topic/dapm' into for-tiwai
+
+2014-01-16  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/topic/core' into for-tiwai
+
+2014-01-16  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'asoc/fix/adau1701' and 'asoc/fix/tlv320aic32x4' into for-tiwai
+
+2014-01-16  Hui Wang  <hui.wang at canonical.com>
+
+	* ALSA: hda - add headset mic detect quirks for some Dell machines
+
+2014-01-16  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
+
+2014-01-16  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge branch 'perf/urgent' into perf/core
+
+2014-01-15  Robert Richter  <rric at kernel.org>
+
+	* perf/x86/amd/ibs: Fix waking up from S3 for AMD family 10h
+
+2014-01-10  Peter Zijlstra  <peterz at infradead.org>
+
+	* x86, mm, perf: Allow recursive faults from interrupts
+
+2013-10-21  Bin Gao  <bin.gao at intel.com>
+
+	* x86, tsc: Add static (MSR) TSC calibration on Intel Atom SoCs
+
+2014-01-13  Prarit Bhargava  <prarit at redhat.com>
+
+	* x86: Add check for number of available vectors before CPU down
+
+2014-01-16  Dave Airlie  <airlied at redhat.com>
+
+	* drm/mgag200: fix oops in cursor code.
+
+2014-01-15  Mike Snitzer  <snitzer at redhat.com>
+
+	* dm thin: fix pool feature parsing
+
+2014-01-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branches 'sched-urgent-for-linus' and 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging
+
+2014-01-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-arm
+
+2014-01-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'writeback-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/linux
+
+2014-01-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'i2c/for-current' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux
+
+2014-01-15  Eric Dumazet  <edumazet at google.com>
+
+	* bpf: do not use reciprocal divide
+
+2014-01-15  Ivan Vecera  <ivecera at redhat.com>
+
+	* be2net: add dma_mapping_error() check for dma_map_page()
+
+2014-01-15  Yuval Mintz  <yuvalmin at broadcom.com>
+
+	* bnx2x: Don't release PCI bars on shutdown
+
+2014-01-14  Richard Weinberger  <richard at nod.at>
+
+	* net,via-rhine: Fix tx_timeout handling
+
+2014-01-15  Markus Pargmann  <mpa at pengutronix.de>
+
+	* ASoC: tlv320aic32x4: Fix regmap range_min
+
+2014-01-15  Kevin Hilman  <khilman at linaro.org>
+
+	* sched/nohz: Fix overflow error in scheduler_tick_max_deferment()
+
+2014-01-15  Mark Brown  <broonie at linaro.org>
+
+	* ASoC: core: Return -ENOTSUPP from set_sysclk() if no operation provided
+
+2014-01-15  Marek Lindner  <mareklindner at neomailbox.ch>
+
+	* batman-adv: fix batman-adv header overhead calculation
+
+2013-12-16  David Cohen  <david.a.cohen at linux.intel.com>
+
+	* x86, intel-mid: Remove deprecated X86_MDFLD and X86_WANT_INTEL_MID configs
+
+2013-12-16  David Cohen  <david.a.cohen at linux.intel.com>
+
+	* x86, intel-mid: Add Merrifield platform support
+
+2013-12-16  Kuppuswamy Sathyanarayanan  <sathyanarayanan.kuppuswamy at linux.intel.com>
+
+	* x86, intel-mid: Add Clovertrail platform support
+
+2013-12-16  David Cohen  <david.a.cohen at linux.intel.com>
+
+	* x86, intel-mid: Move Medfield code out of intel-mid.c core file
+
+2014-01-15  H. Peter Anvin  <hpa at zytor.com>
+
+	* x86, apic: Make disabled_cpu_apicid static read_mostly, fix typos
+
+2014-01-15  Mark Rutland  <mark.rutland at arm.com>
+
+	* tools lib traceevent: fix pointer-integer size mismatch
+
+2014-01-14  Namhyung Kim  <namhyung at kernel.org>
+
+	* perf hists: Convert hist entry functions to use struct he_stat
+
+2014-01-14  Namhyung Kim  <namhyung at kernel.org>
+
+	* perf tools: Factor out sample__resolve_callchain()
+
+2014-01-14  Namhyung Kim  <namhyung at kernel.org>
+
+	* perf tools: Remove symbol_conf.use_callchain check
+
+2014-01-15  Mark Rutland  <mark.rutland at arm.com>
+
+	* perf: tools: Fix cross building
+
+2014-01-15  Namhyung Kim  <namhyung at kernel.org>
+
+	* tools lib traceevent: Make plugin unload function receive pevent
+
+2014-01-15  Namhyung Kim  <namhyung at kernel.org>
+
+	* tools lib traceevent: Get rid of die() finally!!
+
+2014-01-15  Namhyung Kim  <namhyung at kernel.org>
+
+	* tools lib traceevent: Get rid of malloc_or_die() in trace_seq_init()
+
+2014-01-15  Bjorn Helgaas  <bhelgaas at google.com>
+
+	* Merge branch 'pci/reset' into next
+
+2014-01-15  Bjorn Helgaas  <bhelgaas at google.com>
+
+	* Merge branch 'pci/locking' into next
+
+2014-01-15  Bjorn Helgaas  <bhelgaas at google.com>
+
+	* Merge branch 'pci/misc' into next
+
+2014-01-14  Alex Williamson  <alex.williamson at redhat.com>
+
+	* vfio-pci: Use pci "try" reset interface
+
+2014-01-14  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* PCI: Check parent kobject in pci_destroy_dev()
+
+2014-01-10  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* xen/pcifront: Use global PCI rescan-remove locking
+
+2014-01-15  Rafael J. Wysocki  <rjw at rjwysocki.net>
+
+	* powerpc/eeh: Use global PCI rescan-remove locking
+
+2014-01-15  HATAYAMA Daisuke  <d.hatayama at jp.fujitsu.com>
+
+	* x86, apic, kexec: Add disable_cpu_apicid kernel parameter
+
+2014-01-14  Mauro Carvalho Chehab  <m.chehab at samsung.com>
+
+	* [media] rc-core: reuse device numbers
+
+2014-01-14  Mauro Carvalho Chehab  <m.chehab at samsung.com>
+
+	* [media] em28xx-cards: properly initialize the device bitmap
+
+2014-01-14  Monam Agarwal  <monamagarwal123 at gmail.com>
+
+	* [media] Staging: media: Fix line length exceeding 80 characters in as102_drv.c
+
+2014-01-15  Andrew Jones  <drjones at redhat.com>
+
+	* kvm: x86: fix apic_base enable check
+
+2014-01-15  Borislav Petkov  <bp at suse.de>
+
+	* x86, cpu, amd: Fix a shadowed variable situation
+
+2014-01-14  Arun Shamanna Lakshmi  <aruns at nvidia.com>
+
+	* ASoC: dapm: Change prototype of soc_widget_read
+
+2014-01-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'akpm' (incoming from Andrew)
+
+2014-01-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'md/3.13-fixes' of git://neil.brown.name/md
+
+2014-01-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2014-01-14  Ming Lei  <tom.leiming at gmail.com>
+
+	* lib/percpu_counter.c: fix __percpu_counter_add()
+
+2014-01-14  Qais Yousef  <qais.yousef at imgtec.com>
+
+	* crash_dump: fix compilation error (on MIPS at least)
+
+2014-01-14  Mikulas Patocka  <mpatocka at redhat.com>
+
+	* mm: fix crash when using XFS on loopback
+
+2014-01-14  Aaro Koskinen  <aaro.koskinen at iki.fi>
+
+	* MIPS: fix blast_icache32 on loongson2
+
+2014-01-14  Huacai Chen  <chenhc at lemote.com>
+
+	* MIPS: fix case mismatch in local_r4k_flush_icache_range()
+
+2014-01-14  Andreas Rohner  <andreas.rohner at gmx.net>
+
+	* nilfs2: fix segctor bug that causes file system corruption
+
+2014-01-15  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge branch 'clockevents/3.13-fixes' of git://git.linaro.org/people/daniel.lezcano/linux into timers/urgent
+
+2014-01-15  Vasant Hegde  <hegdevasant at linux.vnet.ibm.com>
+
+	* powerpc/powernv: Call OPAL sync before kexec'ing
+
+2014-01-15  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-nouveau-next' of git://git.freedesktop.org/git/nouveau/linux-2.6 into drm-fixes
+
+2014-01-13  Aneesh Kumar K.V  <aneesh.kumar at linux.vnet.ibm.com>
+
+	* powerpc/thp: Fix crash on mremap
+
+2014-01-14  Ben Skeggs  <bskeggs at redhat.com>
+
+	* drm/nouveau: fix null ptr dereferences on some boards
+
+2014-01-13  Mikulas Patocka  <mpatocka at redhat.com>
+
+	* dm sysfs: fix a module unload race
+
+2014-01-13  Mikulas Patocka  <mpatocka at redhat.com>
+
+	* dm snapshot: use dm-bufio prefetch
+
+2014-01-13  Mikulas Patocka  <mpatocka at redhat.com>
+
+	* dm snapshot: use dm-bufio
+
+2014-01-14  Jitendra Kalsaria  <jitendra.kalsaria at qlogic.com>
+
+	* qlge: Fix vlan netdev features.
+
+2014-01-13  Hannes Frederic Sowa  <hannes at stressinduktion.org>
+
+	* net: avoid reference counter overflows on fib_rules in multicast forwarding
+
+2014-01-12  Peter Korsgaard  <peter at korsgaard.com>
+
+	* dm9601: add USB IDs for new dm96xx variants
+
+2014-01-12  Michael S. Tsirkin  <mst at redhat.com>
+
+	* MAINTAINERS: add virtio-dev ML for virtio
+
+2014-01-15  Borislav Petkov  <bp at suse.de>
+
+	* x86, cpu, amd: Add workaround for family 16h, erratum 793
+
+2014-01-14  Bjorn Helgaas  <bhelgaas at google.com>
+
+	* PCI: Fix pci_check_and_unmask_intx() comment typos
+
+2014-01-11  Christian Engelmayer  <cengelma at gmx.at>
+
+	* ieee802154: Fix memory leak in ieee802154_add_iface()
+
+2013-12-16  Alex Williamson  <alex.williamson at redhat.com>
+
+	* PCI: Add pci_try_reset_function(), pci_try_reset_slot(), pci_try_reset_bus()
+
+2014-01-11  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: samsung: Remove SND_DMAENGINE_PCM_FLAG_NO_RESIDUE flag
+
+2014-01-11  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: axi-{spdif,i2s}: Remove SND_DMAENGINE_PCM_FLAG_NO_RESIDUE flag
+
+2014-01-11  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: generic-dmaengine-pcm: Check DMA residue granularity
+
+2014-01-11  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: generic-dmaengine-pcm: Check NO_RESIDUE flag at runtime
+
+2014-01-11  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* dma: pl330: Set residue_granularity
+
+2014-01-11  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* dma: Indicate residue granularity in dma_slave_caps
+
+2014-01-14  Xiubo Li  <Li.Xiubo at freescale.com>
+
+	* ASoC: simple-card: fix one bug to writing to the platform data
+
+2014-01-11  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: pcm: Use snd_pcm_rate_mask_intersect() helper
+
+2014-01-11  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ALSA: Add helper function for intersecting two rate masks
+
+2014-01-11  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: s6000: Don't mix SNDRV_PCM_RATE_CONTINUOUS with specific rates
+
+2014-01-11  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: fsl: Don't mix SNDRV_PCM_RATE_CONTINUOUS with specific rates
+
+2014-01-10  Max Filippov  <jcmvbkbc at gmail.com>
+
+	* xtensa: implement ndelay
+
+2014-01-10  Max Filippov  <jcmvbkbc at gmail.com>
+
+	* xtensa: clean up udelay
+
+2013-11-28  Baruch Siach  <baruch at tkos.co.il>
+
+	* xtensa: enable HAVE_PERF_EVENTS
+
+2013-12-29  Baruch Siach  <baruch at tkos.co.il>
+
+	* xtensa: remap io area defined in device tree
+
+2013-12-23  Baruch Siach  <baruch at tkos.co.il>
+
+	* xtensa: support default device tree buses
+
+2013-12-23  Baruch Siach  <baruch at tkos.co.il>
+
+	* xtensa: initialize device tree clock sources
+
+2013-12-25  Max Filippov  <jcmvbkbc at gmail.com>
+
+	* xtensa: xtfpga: fix definitions of platform devices
+
+2013-12-01  Baruch Siach  <baruch at tkos.co.il>
+
+	* xtensa: standardize devicetree cpu compatible strings
+
+2013-11-17  Baruch Siach  <baruch at tkos.co.il>
+
+	* xtensa: avoid duplicate of IO range definitions
+
+2013-11-11  Baruch Siach  <baruch at tkos.co.il>
+
+	* xtensa: fix ATOMCTL register documentation
+
+2013-12-12  Kirill Tkhai  <tkhai at yandex.ru>
+
+	* xtensa: Enable irqs after cpu is set online
+
+2013-11-10  Max Filippov  <jcmvbkbc at gmail.com>
+
+	* xtensa: ISS: raise network polling rate to 10 times/sec
+
+2013-12-10  Kees Cook  <keescook at chromium.org>
+
+	* x86, kaslr: Clarify RANDOMIZE_BASE_MAX_OFFSET
+
+2013-12-07  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* x86, kaslr: Remove unused including <linux/version.h>
+
+2014-01-13  Mikulas Patocka  <mpatocka at redhat.com>
+
+	* dm snapshot: prepare for switch to using dm-bufio
+
+2014-01-14  Jean Delvare  <khali at linux-fr.org>
+
+	* hwmon: (coretemp) Fix truncated name of alarm attributes
+
+2014-01-14  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
+
+2014-01-13  Stephen Warren  <swarren at nvidia.com>
+
+	* i2c: Re-instate body of i2c_parent_is_i2c_adapter()
+
+2014-01-14  Steven Rostedt (Red Hat)  <rostedt at goodmis.org>
+
+	* tracing: Have trace buffer point back to trace_array
+
+2014-01-02  Arnd Bergmann  <arnd at arndb.de>
+
+	* sound: oss: remove last sleep_on users
+
+2014-01-02  Arnd Bergmann  <arnd at arndb.de>
+
+	* sound: oss: dmasound: kill SLEEP() macro to avoid race
+
+2014-01-02  Arnd Bergmann  <arnd at arndb.de>
+
+	* sound: oss: midibuf: fix sleep_on races
+
+2014-01-09  Stephen Warren  <swarren at nvidia.com>
+
+	* drm/panel: update EDID BLOB in panel_simple_get_modes()
+
+2014-01-02  Arnd Bergmann  <arnd at arndb.de>
+
+	* sound: oss: vwsnd: avoid interruptible_sleep_on
+
+2014-01-02  Arnd Bergmann  <arnd at arndb.de>
+
+	* sound: oss: msnd_pinnacle: avoid interruptible_sleep_on_timeout
+
+2014-01-14  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Fix endless vmaster hook call in thinkpad_helper.c
+
+2014-01-13  Thierry Reding  <treding at nvidia.com>
+
+	* gpu: host1x: Remove unnecessary include
+
+2014-01-13  Thierry Reding  <treding at nvidia.com>
+
+	* drm/tegra: Use proper data type
+
+2014-01-13  Thierry Reding  <treding at nvidia.com>
+
+	* drm/tegra: Clarify how panel modes override others
+
+2014-01-10  Thierry Reding  <treding at nvidia.com>
+
+	* drm/tegra: Fix possible CRTC mask for RGB outputs
+
+2014-01-12  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge branch 'clockevents/3.14' of git://git.linaro.org/people/daniel.lezcano/linux into timers/core
+
+2014-01-14  Richard Weinberger  <richard at nod.at>
+
+	* x86/apic: Read Error Status Register correctly
+
+2014-01-14  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'amd_ucode_for_3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp into x86/microcode
+
+2014-01-10  Bjørn Mork  <bjorn at mork.no>
+
+	* net: usbnet: fix SG initialisation
+
+2014-01-10  Neal Cardwell  <ncardwell at google.com>
+
+	* inet_diag: fix inet_diag_dump_icsk() to use correct state for timewait sockets
+
+2013-12-12  NeilBrown  <neilb at suse.de>
+
+	* md: fix problem when adding device to read-only array with bitmap.
+
+2014-01-06  NeilBrown  <neilb at suse.de>
+
+	* md/raid10: fix bug when raid10 recovery fails to recover a block.
+
+2014-01-14  NeilBrown  <neilb at suse.de>
+
+	* md/raid5: fix a recently broken BUG_ON().
+
+2014-01-14  NeilBrown  <neilb at suse.de>
+
+	* md/raid1: fix request counting bug in new 'barrier' code.
+
+2014-01-14  NeilBrown  <neilb at suse.de>
+
+	* md/raid10: fix two bugs in handling of known-bad-blocks.
+
+2014-01-06  NeilBrown  <neilb at suse.de>
+
+	* md/raid5: Fix possible confusion when multiple write errors occur.
+
+2013-12-20  Randy Dunlap  <rdunlap at infradead.org>
+
+	* gpu: fix qxl missing crc32_le
+
+2014-01-06  Rashika  <rashika.kheria at gmail.com>
+
+	* drivers: gpu: Include appropriate header file in r128_ioc32.c
+
+2014-01-06  Rashika  <rashika.kheria at gmail.com>
+
+	* drivers: gpu: Mark function as static in via_drv.c
+
+2014-01-14  Dave Airlie  <airlied at redhat.com>
+
+	* Revert "drm: copy mode type in drm_mode_connector_list_update()"
+
+2014-01-14  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'drm-intel-fixes-2014-01-13' of git://people.freedesktop.org/~danvet/drm-intel into drm-fixes
+
+2013-12-31  Gregory CLEMENT  <gregory.clement at free-electrons.com>
+
+	* i2c: mv64xxx: Document the newly introduced Armada XP A0 compatible
+
+2013-12-31  Gregory CLEMENT  <gregory.clement at free-electrons.com>
+
+	* i2c: mv64xxx: Fix bus hang on A0 version of the Armada XP SoCs
+
+2014-01-13  Bjorn Helgaas  <bhelgaas at google.com>
+
+	* Merge branch 'pci/ifndefs' into next
+
+2014-01-13  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* lustre: delete linux/lustre_debug.h
+
+2014-01-13  Bjorn Helgaas  <bhelgaas at google.com>
+
+	* PCI: Cleanup pci.h whitespace
+
+2014-01-13  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* staging: lustre: remove some unused debug macros
+
+2014-01-10  Marek Roszko  <mark.roszko at gmail.com>
+
+	* tty/serial: at91: disable uart timer at start of shutdown
+
+2014-01-13  Paul Zimmerman  <Paul.Zimmerman at synopsys.com>
+
+	* usb: dwc2: move device tree bindings doc to correct place
+
+2014-01-13  Bjorn Helgaas  <bhelgaas at google.com>
+
+	* Merge branch 'pci/dead-code' into next
+
+2014-01-11  Yann Droneaud  <ydroneaud at opteya.com>
+
+	* perf tools: Remove unused test-volatile-register-var.c
+
+2014-01-04  Tetsuo Handa  <penguin-kernel at I-love.SAKURA.ne.jp>
+
+	* slub: Fix possible format string bug.
+
+2014-01-10  Peter Zijlstra  <peterz at infradead.org>
+
+	* slub: use lockdep_assert_held
+
+2014-01-09  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* cxgb4: silence shift wrapping static checker warning
+
+2013-12-01  Borislav Petkov  <bp at suse.de>
+
+	* x86, microcode: Move to a proper location
+
+2013-11-29  Borislav Petkov  <bp at suse.de>
+
+	* x86, microcode, AMD: Fix early ucode loading
+
+2013-12-04  Borislav Petkov  <bp at suse.de>
+
+	* x86, microcode: Share native MSR accessing variants
+
+2013-12-04  Borislav Petkov  <bp at suse.de>
+
+	* x86, ramdisk: Export relocated ramdisk VA
+
+2014-01-13  Arnaldo Carvalho de Melo  <acme at redhat.com>
+
+	* perf probe: Fix build when DWARF support libraries not present
+
+2014-01-13  Steven Rostedt (Red Hat)  <rostedt at goodmis.org>
+
+	* ftrace: Fix synchronization location disabling and freeing ftrace_ops
+
+2014-01-13  Xiubo Li  <Li.Xiubo at freescale.com>
+
+	* ASoC: simple-card: use snd_soc_card_set/get_drvdata
+
+2014-01-13  Steven Rostedt (Red Hat)  <rostedt at goodmis.org>
+
+	* ftrace: Have function graph only trace based on global_ops filters
+
+2014-01-12  Hugh Dickins  <hughd at google.com>
+
+	* cgroup: remove stray references to css_id
+
+2013-12-30  Ramkumar Ramachandra  <artagnon at gmail.com>
+
+	* perf diff: Color the Weighted Diff column
+
+2013-12-30  Ramkumar Ramachandra  <artagnon at gmail.com>
+
+	* perf diff: Color the Ratio column
+
+2013-12-30  Ramkumar Ramachandra  <artagnon at gmail.com>
+
+	* perf diff: Color the Delta column
+
+2014-01-13  Prarit Bhargava  <prarit at redhat.com>
+
+	* x86/irq: Fix kbuild warning in smp_irq_move_cleanup_interrupt()
+
+2013-12-30  Ramkumar Ramachandra  <artagnon at gmail.com>
+
+	* perf tools: Generalize percent_color_snprintf()
+
+2014-01-11  Markus Pargmann  <mpa at pengutronix.de>
+
+	* ASoC: tlv320aic3x: Add tlv320aic32x4 as compatible
+
+2014-01-11  Markus Pargmann  <mpa at pengutronix.de>
+
+	* ASoC: codec: tlv320aic32x4: Fix regmap range config
+
+2014-01-08  Mark Brown  <broonie at linaro.org>
+
+	* ASoC: max9850: Use params_width() rather than memory format
+
+2014-01-08  Mark Brown  <broonie at linaro.org>
+
+	* ASoC: max98095: Use params_width() rather than memory format
+
+2014-01-08  Mark Brown  <broonie at linaro.org>
+
+	* ASoC: max98090: Use params_width() rather than memory format
+
+2014-01-08  Mark Brown  <broonie at linaro.org>
+
+	* ASoC: max98088: Use params_width() rather than memory format
+
+2014-01-08  Mark Brown  <broonie at linaro.org>
+
+	* ASoC: isabelle: Use params_width() rather than memory format
+
+2014-01-08  Mark Brown  <broonie at linaro.org>
+
+	* ASoC: da9055: Use params_width() rather than memory format
+
+2014-01-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
+
+2014-01-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-13  Benjamin Herrenschmidt  <benh at kernel.crashing.org>
+
+	* powerpc: Check return value of instance-to-package OF call
+
+2014-01-12  K. Y. Srinivasan  <kys at microsoft.com>
+
+	* Input: hyperv-keyboard - pass through 0xE1 prefix
+
+2014-01-12  Geert Uytterhoeven  <geert+renesas at linux-m68k.org>
+
+	* Input: logips2pp - fix spelling s/reciver/receiver/
+
+2014-01-12  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'ras_for_3.14_p2' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras into x86/ras
+
+2014-01-12  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'v3.13-rc8' into x86/ras, to pick up fixes.
+
+2014-01-12  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
+
+2014-01-12  Richard Weinberger  <richard at nod.at>
+
+	* um, x86: Fix vDSO build
+
+2013-12-23  Borislav Petkov  <bp at suse.de>
+
+	* x86, mce: Fix mce_start_timer semantics
+
+2014-01-10  Taras Kondratiuk  <taras.kondratiuk at linaro.org>
+
+	* ARM: 7938/1: OMAP4/highbank: Flush L2 cache before disabling
+
+2014-01-05  Prarit Bhargava  <prarit at redhat.com>
+
+	* x86/irq: Fix do_IRQ() interrupt warning for cpu hotplug retriggered irqs
+
+2014-01-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.13-rc8
+
+2014-01-09  Steven Rostedt  <rostedt at goodmis.org>
+
+	* SELinux: Fix possible NULL pointer dereference in selinux_inode_permission()
+
+2014-01-12  Hugh Dickins  <hughd at google.com>
+
+	* thp: fix copy_page_rep GPF by testing is_huge_zero_pmd once only
+
+2013-12-26  Ming Lei  <tom.leiming at gmail.com>
+
+	* block: null_blk: fix queue leak inside removing device
+
+2014-01-05  Yann Droneaud  <ydroneaud at opteya.com>
+
+	* perf: Introduce a flag to enable close-on-exec in perf_event_open()
+
+2014-01-08  Stephane Eranian  <eranian at google.com>
+
+	* perf/x86/intel: Add Intel RAPL PP1 energy counter support
+
+2014-01-08  Stephane Eranian  <eranian at google.com>
+
+	* perf/x86: Fix active_entry initialization
+
+2014-01-02  John Stultz  <john.stultz at linaro.org>
+
+	* sched_clock: Disable seqlock lockdep usage in sched_clock()
+
+2014-01-02  John Stultz  <john.stultz at linaro.org>
+
+	* seqlock: Use raw_ prefix instead of _no_lockdep
+
+2014-01-06  Rik van Riel  <riel at redhat.com>
+
+	* sched: Calculate effective load even if local weight is 0
+
+2014-01-11  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* x86, fpu, amd: Clear exceptions in AMD FXSAVE workaround
+
+2014-01-11  jon ernst  <jonernst07 at gmail.com>
+
+	* ext4: delete "set but not used" variables
+
+2014-01-10  Taras Kondratiuk  <taras.kondratiuk at linaro.org>
+
+	* ARM: 7939/1: traps: fix opcode endianness when read from user memory
+
+2014-01-10  Stephen Boyd  <sboyd at codeaurora.org>
+
+	* ARM: 7937/1: perf_event: Silence sparse warning
+
+2014-01-08  Sudeep Holla  <sudeep.holla at arm.com>
+
+	* ARM: 7934/1: DT/kernel: fix arch_match_cpu_phys_id to avoid erroneous match
+
+2014-01-11  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2014-01-11  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'xfs-for-linus-v3.13-rc8' of git://oss.sgi.com/xfs/xfs
+
+2014-01-11  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'leds-fixes-for-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/cooloney/linux-leds
+
+2014-01-11  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pm+acpi-3.13-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2014-01-11  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'mfd-fixes-3.13-2' of git://git.kernel.org/pub/scm/linux/kernel/git/sameo/mfd-fixes
+
+2013-12-02  Milo Kim  <milo.kim at ti.com>
+
+	* leds: lp5521/5523: Remove duplicate mutex
+
+2014-01-10  Jesse Barnes  <jbarnes at virtuousgeek.org>
+
+	* drm/i915/bdw: make sure south port interrupts are enabled properly v2
+
+2014-01-10  Chris Wilson  <chris at chris-wilson.co.uk>
+
+	* drm/i915: Include more information in disabled hotplug interrupt warning
+
+2014-01-10  Chris Wilson  <chris at chris-wilson.co.uk>
+
+	* drm/i915: Only complain about a rogue hotplug IRQ after disabling
+
+2014-01-10  Chris Wilson  <chris at chris-wilson.co.uk>
+
+	* drm/i915: Only WARN about a stuck hotplug irq ONCE
+
+2013-12-18  Daniel Vetter  <daniel.vetter at ffwll.ch>
+
+	* drm/i915: s/hotplugt_status_gen4/hotplug_status_g4x/
+
+2014-01-07  Chuansheng Liu  <chuansheng.liu at intel.com>
+
+	* xfs: Calling destroy_work_on_stack() to pair with INIT_WORK_ONSTACK()
+
+2014-01-01  Jie Liu  <jeff.liu at oracle.com>
+
+	* xfs: fix off-by-one error in xfs_attr3_rmt_verify
+
+2014-01-09  Shahed Shaikh  <shahed.shaikh at qlogic.com>
+
+	* qlcnic: Fix ethtool statistics length calculation
+
+2014-01-09  Manish Chopra  <manish.chopra at qlogic.com>
+
+	* qlcnic: Fix bug in TX statistics
+
+2014-01-10  Jason Wang  <jasowang at redhat.com>
+
+	* net: core: explicitly select a txq before doing l2 forwarding
+
+2014-01-10  Jason Wang  <jasowang at redhat.com>
+
+	* macvlan: forbid L2 fowarding offload for macvtap
+
+2014-01-10  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless
+
+2014-01-09  Michal Schmidt  <mschmidt at redhat.com>
+
+	* bnx2x: fix DMA unmapping of TSO split BDs
+
+2014-01-09  Alex Williamson  <alex.williamson at redhat.com>
+
+	* PCI: Never treat a VF as a multifunction device
+
+2014-01-10  Dominique Martinet  <dominique.martinet at cea.fr>
+
+	* 9P: introduction of a new cache=mmap model.
+
+2014-01-10  Nicolin Chen  <Guangyu.Chen at freescale.com>
+
+	* ASoC: fsl_esai: Add ESAI CPU DAI driver
+
+2014-01-09  Charles Keepax  <ckeepax at opensource.wolfsonmicro.com>
+
+	* ASoC: wm5110: Add controls for headphone short circuit protection
+
+2014-01-09  Charles Keepax  <ckeepax at opensource.wolfsonmicro.com>
+
+	* mfd: wm5110: Add registers for headphone short circuit control
+
+2014-01-10  Mark Brown  <broonie at linaro.org>
+
+	* Merge tag 'v3.13-rc3' into asoc-arizona
+
+2014-01-10  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'clk-fixes-for-linus' of git://git.linaro.org/people/mike.turquette/linux
+
+2014-01-10  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2014-01-07  Ville Syrjälä  <ville.syrjala at linux.intel.com>
+
+	* drm/i915: Don't grab crtc mutexes in intel_modeset_gem_init()
+
+2014-01-08  Hannes Frederic Sowa  <hannes at stressinduktion.org>
+
+	* ipv6: add link-local, sit and loopback address with INFINITY_LIFE_TIME
+
+2013-11-08  Steven Rostedt (Red Hat)  <rostedt at goodmis.org>
+
+	* ftrace: Synchronize setting function_trace_op with ftrace_trace_function
+
+2014-01-07  Yuval Mintz  <yuvalmin at broadcom.com>
+
+	* bnx2x: prevent WARN during driver unload
+
+2014-01-10  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'pm-cpuidle'
+
+2014-01-09  Jiang Liu  <jiang.liu at linux.intel.com>
+
+	* intel_idle: close avn_cstates array with correct marker
+
+2014-01-09  Ben Myers  <bpm at sgi.com>
+
+	* Merge branch 'xfs-extent-list-locking-fixes' into for-next
+
+2014-01-09  Ben Myers  <bpm at sgi.com>
+
+	* Merge branch 'xfs-misc' into for-next
+
+2014-01-07  Chuansheng Liu  <chuansheng.liu at intel.com>
+
+	* xfs: Calling destroy_work_on_stack() to pair with INIT_WORK_ONSTACK()
+
+2013-11-13  Steven Rostedt  <rostedt at goodmis.org>
+
+	* ftrace/x86: Load ftrace_ops in parameter not the variable holding it
+
+2014-01-09  Xiubo Li  <Li.Xiubo at freescale.com>
+
+	* ASoC: simple-card: fix the cinfo error check
+
+2014-01-09  Xiubo Li  <Li.Xiubo at freescale.com>
+
+	* ASoC: simple-card: fix a bug where cinfo will be NULL before using it
+
+2014-01-09  Nicolin Chen  <Guangyu.Chen at freescale.com>
+
+	* ASoC: fsl_ssi: Set default slot number for common cases
+
+2014-01-09  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* ASoC: fsl-ssi: Add missing clk_disable_unprepare() on error in fsl_ssi_probe()
+
+2014-01-01  Qi Wang 王起 (qiwang)  <qiwang at micron.com>
+
+	* UBI: avoid program operation on NOR flash after erasure interrupted
+
+2014-01-09  John W. Linville  <linville at tuxdriver.com>
+
+	* Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless into for-davem
+
+2014-01-09  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'regmap/topic/ack' into regmap-next
+
+2014-01-06  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: ux500: Don't set unused struct snd_pcm_hardware fields
+
+2014-01-06  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: sh: Don't set unused struct snd_pcm_hardware fields
+
+2014-01-06  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: nuc900: Don't set unused struct snd_pcm_hardware fields
+
+2014-01-06  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: kirkwood: Don't set unused struct snd_pcm_hardware fields
+
+2014-01-06  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: intel: Don't set unused struct snd_pcm_hardware fields
+
+2014-01-09  Markus Pargmann  <mpa at pengutronix.de>
+
+	* ASoC: fsl-ssi: Fix stats compile warning
+
+2014-01-09  Markus Pargmann  <mpa at pengutronix.de>
+
+	* ASoC: fsl-ssi doc: Add list of supported compatibles
+
+2014-01-09  Charles Keepax  <ckeepax at opensource.wolfsonmicro.com>
+
+	* ASoC: wm_adsp: Mark wm_adsp2_boot_work as static
+
+2013-10-15  Archit Taneja  <archit at ti.com>
+
+	* drm/omap: Enable DT support for DMM
+
+2014-01-02  Archit Taneja  <archit at ti.com>
+
+	* drm/omap: fix: change dev_unload order
+
+2014-01-09  Jiang Liu  <jiang.liu at linux.intel.com>
+
+	* Revert "intel_idle: mark states tables with __initdata tag"
+
+2014-01-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'parisc-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
+
+2014-01-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata
+
+2014-01-08  David E. Box  <david.e.box at linux.intel.com>
+
+	* arch: x86: New MailBox support driver for Intel SOC's
+
+2014-01-05  John David Anglin  <dave.anglin at bell.net>
+
+	* parisc: Ensure full cache coherency for kmap/kunmap
+
+2014-01-08  Takashi Iwai  <tiwai at suse.de>
+
+	* ASoC: bcm: Remove obsoleted Kconfig dependency
+
+2014-01-08  John W. Linville  <linville at tuxdriver.com>
+
+	* Merge tag 'nfc-fixes-3.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/sameo/nfc-fixes
+
+2014-01-08  Charles Keepax  <ckeepax at opensource.wolfsonmicro.com>
+
+	* ASoC: wm_adsp: Start DSP booting earlier in the DAPM process
+
+2014-01-08  Charles Keepax  <ckeepax at opensource.wolfsonmicro.com>
+
+	* ASoC: wm_adsp: Factor out ADSP2 boot proceedure
+
+2013-12-20  Markus Pargmann  <mpa at pengutronix.de>
+
+	* ASoC: fsl-ssi: Drop ac97 specific trigger function
+
+2013-12-20  Markus Pargmann  <mpa at pengutronix.de>
+
+	* ASoC: fsl-ssi: Move RX/TX configuration to seperate functions
+
+2014-01-08  Jingoo Han  <jg1.han at samsung.com>
+
+	* regulator: twl: Fix checkpatch issue
+
+2013-12-16  James Hogan  <james.hogan at imgtec.com>
+
+	* clk: clk-divider: fix divisor > 255 bug
+
+2014-01-08  Paulo Zanoni  <paulo.r.zanoni at intel.com>
+
+	* drm/i915: fix DDI PLLs HW state readout code
+
+2014-01-07  Andreas Pretzsch  <apr at cn-eng.de>
+
+	* ASoC: ssm2602: add 16kHz sampling rate support
+
+2014-01-03  Peter Ujfalusi  <peter.ujfalusi at ti.com>
+
+	* ASoC: twl4030: Pass the twl4030_priv directly to twl4030_can_write_to_chip()
+
+2014-01-03  Peter Ujfalusi  <peter.ujfalusi at ti.com>
+
+	* ASoC: twl4030: Move the ctl cache update local to twl4030_write() function
+
+2014-01-03  Peter Ujfalusi  <peter.ujfalusi at ti.com>
+
+	* ASoC: twl4030: Parameter alignment fixes (for code consistency)
+
+2014-01-03  Peter Ujfalusi  <peter.ujfalusi at ti.com>
+
+	* ASoC: twl4030: Remove local reg cache
+
+2014-01-03  Peter Ujfalusi  <peter.ujfalusi at ti.com>
+
+	* ASoC: twl4030: Introduce local ctl register cache
+
+2014-01-03  Peter Ujfalusi  <peter.ujfalusi at ti.com>
+
+	* ASoC: twl4030: Remove reset registers functionality
+
+2014-01-08  Xiubo Li  <Li.Xiubo at freescale.com>
+
+	* ASoC: fsl-sai: Clean up the code
+
+2014-01-08  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* ASoC: ux500: Fix sparse non static symbol warning
+
+2014-01-08  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: adau1701: Fix ADAU1701_SEROCTL_WORD_LEN_16 constant
+
+2014-01-08  Liam Girdwood  <liam.r.girdwood at linux.intel.com>
+
+	* ASoC: sapm: Automatically connect DAI link widgets in DAPM graph.
+
+2014-01-08  Liam Girdwood  <liam.r.girdwood at linux.intel.com>
+
+	* ASoC: utils: Add internal call to determine if DAI is dummy.
+
+2014-01-08  Dave Airlie  <airlied at gmail.com>
+
+	* Merge branch 'drm-nouveau-next' of git://anongit.freedesktop.org/nouveau/linux-2.6 into drm-fixes
+
+2014-01-08  Dave Airlie  <airlied at gmail.com>
+
+	* Merge tag 'drm-intel-fixes-2014-01-08' of git://people.freedesktop.org/~danvet/drm-intel into drm-fixes
+
+2013-12-29  Christian Engelmayer  <cengelma at gmx.at>
+
+	* drm/nouveau/nouveau: fix memory leak in nouveau_crtc_page_flip()
+
+2014-01-07  Ilia Mirkin  <imirkin at alum.mit.edu>
+
+	* drm/nouveau/bios: fix offset calculation for BMPv1 bioses
+
+2014-01-07  Stephen Warren  <swarren at nvidia.com>
+
+	* serial: 8250: enable UART_BUG_NOMSR for Tegra
+
+2014-01-07  Mark Deneen  <mdeneen at gmail.com>
+
+	* tty/serial: at91: reset rx_ring when port is shutdown
+
+2014-01-07  Marek Roszko  <mark.roszko at gmail.com>
+
+	* tty/serial: at91: fix race condition in atmel_serial_remove
+
+2014-01-07  Marek Roszko  <mark.roszko at gmail.com>
+
+	* tty/serial: at91: Handle shutdown more safely
+
+2014-01-03  Qipan Li  <Qipan.Li at csr.com>
+
+	* serial: sirf: correct condition for fetching dma buffer into tty
+
+2014-01-03  Qipan Li  <Qipan.Li at csr.com>
+
+	* serial: sirf: provide pm entries of uart_ops
+
+2014-01-03  Qipan Li  <Qipan.Li at csr.com>
+
+	* serial: sirf: use PM macro initialize PM functions
+
+2013-12-31  Alexander Shiyan  <shc_work at mail.ru>
+
+	* serial: clps711x: Enable driver compilation with COMPILE_TEST
+
+2013-12-31  Alexander Shiyan  <shc_work at mail.ru>
+
+	* serial: clps711x: Add support for N_IRDA line discipline
+
+2014-01-07  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
+
+2014-01-07  Erik Hugne  <erik.hugne at ericsson.com>
+
+	* tipc: correctly unlink packets from deferred packet queue
+
+2014-01-07  Li RongQing  <roy.qing.li at gmail.com>
+
+	* ipv6: pcpu_tstats.syncp should be initialised in ip6_vti.c
+
+2014-01-07  Theodore Ts'o  <tytso at mit.edu>
+
+	* ext4: don't pass freed handle to ext4_walk_page_buffers
+
+2014-01-07  Liam Girdwood  <liam.r.girdwood at linux.intel.com>
+
+	* ASoC: docs: Update the Overview document
+
+2014-01-07  Russell King  <rmk+kernel at arm.linux.org.uk>
+
+	* Revert "ARM: 7908/1: mm: Fix the arm_dma_limit calculation"
+
+2014-01-07  Xiubo Li  <Li.Xiubo at freescale.com>
+
+	* ASoC: simple-card: keep the property's name the same pattern
+
+2014-01-07  Xiubo Li  <Li.Xiubo at freescale.com>
+
+	* ASoC: simple-card: fix the DAPM routes map parsing
+
+2013-12-19  Lee Jones  <lee.jones at linaro.org>
+
+	* ASoC: ux500: Dramatically reduce the size of the DAI driver data struct
+
+2013-12-19  Lee Jones  <lee.jones at linaro.org>
+
+	* ASoC: ux500_pcm: Differentiate between pdata and DT initialisation
+
+2013-12-19  Lee Jones  <lee.jones at linaro.org>
+
+	* ASoC: ux500_pcm: Take out pointless dev_dbg() call
+
+2013-12-19  Lee Jones  <lee.jones at linaro.org>
+
+	* ASoC: ux500: Store DMA data in the DAI differently in the pdata and DT case
+
+2014-01-06  Tetsuo Handa  <penguin-kernel at I-love.SAKURA.ne.jp>
+
+	* SELinux: Fix memory leak upon loading policy
+
+2014-01-06  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: mxs: Remove SND_DMAENGINE_PCM_FLAG_NO_RESIDUE flag
+
+2014-01-06  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: mxs: Don't set unused struct snd_pcm_hardware fields
+
+2014-01-07  Daniel Vetter  <daniel.vetter at ffwll.ch>
+
+	* MAINTAINERS: Updates for drm/i915
+
+2014-01-06  Paul Gortmaker  <paul.gortmaker at windriver.com>
+
+	* Input: delete non-required instances of include <linux/init.h>
+
+2014-01-04  Dmitry Torokhov  <dmitry.torokhov at gmail.com>
+
+	* Input: twl4030-keypad - convert to using managed resources
+
+2014-01-04  Dmitry Torokhov  <dmitry.torokhov at gmail.com>
+
+	* Input: twl6040-vibra - remove unneeded check for CONFIG_OF
+
+2014-01-06  Paul Gortmaker  <paul.gortmaker at windriver.com>
+
+	* x86: Delete non-required instances of include <linux/init.h>
+
+2014-01-06  Bob Gleitsmann  <rjgleits at bellsouth.net>
+
+	* drm/nouveau: return offset of allocated notifier
+
+2014-01-05  Ilia Mirkin  <imirkin at alum.mit.edu>
+
+	* drm/nouveau/bios: make jump conditional
+
+2014-01-07  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
+
+2014-01-07  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2014-01-06  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branches 'acpi-battery' and 'pm-cpufreq'
+
+2014-01-06  Curt Brune  <curt at cumulusnetworks.com>
+
+	* bridge: use spin_lock_bh() in br_multicast_set_hash_max
+
+2014-01-06  Hannes Frederic Sowa  <hannes at stressinduktion.org>
+
+	* ipv6: don't install anycast address for /128 addresses on routers
+
+2014-01-06  Lan Tianyu  <tianyu.lan at intel.com>
+
+	* ACPI / Battery: Add a _BIX quirk for NEC LZ750/LS
+
+2014-01-06  Dirk Brandewie  <dirk.j.brandewie at intel.com>
+
+	* intel_pstate: Add X86_FEATURE_APERFMPERF to cpu match parameters.
+
+2014-01-01  Jie Liu  <jeff.liu at oracle.com>
+
+	* xfs: fix off-by-one error in xfs_attr3_rmt_verify
+
+2014-01-06  John W. Linville  <linville at tuxdriver.com>
+
+	* Merge branch 'for-john' of git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi-fixes
+
+2014-01-06  John W. Linville  <linville at tuxdriver.com>
+
+	* Merge branch 'for-john' of git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211
+
+2014-01-06  Eric Whitney  <enwlinux at gmail.com>
+
+	* ext4: fix bigalloc regression
+
+2013-12-20  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: atmel: Don't set unused struct snd_pcm_hardware fields
+
+2014-01-06  Jiri Kosina  <jkosina at suse.cz>
+
+	* HID: hidraw: make comment more accurate and nicer
+
+2014-01-05  Alexander Shiyan  <shc_work at mail.ru>
+
+	* ASoC: mc13783: trivial: Cleanup module
+
+2014-01-05  Alexander Shiyan  <shc_work at mail.ru>
+
+	* ASoC: mc13783: Drop fixed ADC & DAC ports usage
+
+2014-01-05  Alexander Shiyan  <shc_work at mail.ru>
+
+	* ASoC: mc13783: Use core error messages if registration fails
+
+2014-01-05  Alexander Shiyan  <shc_work at mail.ru>
+
+	* ASoC: mc13783: Use module_platform_driver_probe()
+
+2014-01-04  Jesper Dangaard Brouer  <brouer at redhat.com>
+
+	* netfilter: only warn once on wrong seqadj usage
+
+2013-12-31  Daniel Borkmann  <dborkman at redhat.com>
+
+	* netfilter: nf_nat: fix access to uninitialized buffer in IRC NAT helper
+
+2014-01-03  David Howells  <dhowells at redhat.com>
+
+	* regulator: tps62360: Fix up a pointer-integer size mismatch warning
+
+2013-12-28  Alexander van Heukelum  <heukelum at fastmail.fm>
+
+	* Revert "drm/i915: assume all GM45 Acer laptops use inverted backlight PWM"
+
+2014-01-05  Mike Turquette  <mturquette at linaro.org>
+
+	* Merge tag 'samsung-clk-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tfiga/samsung-clk into clk-fixes
+
+2014-01-05  Josh Boyer  <jwboyer at redhat.com>
+
+	* xen-netback: Include header for vmalloc
+
+2014-01-06  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-arm
+
+2014-01-05  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branches 'acpi-ac' and 'acpi-tpm'
+
+2013-12-19  Jiang Liu  <jiang.liu at linux.intel.com>
+
+	* ACPI / TPM: fix memory leak when walking ACPI namespace
+
+2014-01-04  Alexander Mezin  <mezin.alexander at gmail.com>
+
+	* ACPI / AC: change notification handler type to ACPI_ALL_NOTIFY
+
+2014-01-03  Rob Herring  <rob.herring at calxeda.com>
+
+	* ARM: 7933/1: rename ioremap_cached to ioremap_cache
+
+2014-01-03  Russell King  <rmk+kernel at arm.linux.org.uk>
+
+	* ARM: fix "bad mode in ... handler" message for undefined instructions
+
+2014-01-02  Russell King  <rmk+kernel at arm.linux.org.uk>
+
+	* CRYPTO: Fix more AES build errors
+
+2014-01-05  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'v3.13-rc7' into x86/efi-kexec to resolve conflicts
+
+2014-01-04  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
+
+2014-01-04  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.13-rc7
+
+2014-01-04  H. Peter Anvin  <hpa at linux.intel.com>
+
+	* x86, boot: Move intcall() to the .inittext section
+
+2014-01-03  David Woodhouse  <dwmw2 at infradead.org>
+
+	* x86, boot: Use .code16 instead of .code16gcc
+
+2014-01-03  Steven Rostedt  <rostedt at goodmis.org>
+
+	* x86, sparse: Do not force removal of __user when calling copy_to/from_user_nocheck()
+
+2014-01-04  Sebastian Reichel  <sre at debian.org>
+
+	* Input: twl4030-keypad - add device tree support
+
+2014-01-04  Libo Chen  <clbchenlibo.chen at huawei.com>
+
+	* Input: twl6040-vibra - add missing of_node_put
+
+2014-01-03  Libo Chen  <clbchenlibo.chen at huawei.com>
+
+	* Input: twl4030-vibra - add missing of_node_put
+
+2013-11-14  Arron Wang  <arron.wang at intel.com>
+
+	* NFC: Fix target mode p2p link establishment
+
+2013-12-23  Kirill A. Shutemov  <kirill.shutemov at linux.intel.com>
+
+	* x86, cpu: Detect more TLB configuration
+
+2014-01-03  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'for-v3.13-fixes' of git://git.infradead.org/battery-2.6
+
+2014-01-03  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pm+acpi-3.13-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2013-12-05  Ben Hutchings  <ben at decadent.org.uk>
+
+	* deb-pkg: Fix building for MIPS big-endian or ARM OABI
+
+2013-12-05  Ben Hutchings  <ben at decadent.org.uk>
+
+	* deb-pkg: Fix cross-building linux-headers package
+
+2014-01-03  Dave Young  <dyoung at redhat.com>
+
+	* x86/efi: parse_efi_setup() build fix
+
+2014-01-03  Dave Young  <dyoung at redhat.com>
+
+	* x86: ksysfs.c build fix
+
+2013-12-02  Nishanth Menon  <nm at ti.com>
+
+	* scripts: Coccinelle script for pm_runtime_* return checks with IS_ERR_VALUE
+
+2014-01-01  Mark Salter  <msalter at redhat.com>
+
+	* Input: i8042 - cleanup SERIO_I8042 dependencies
+
+2014-01-01  Mark Salter  <msalter at redhat.com>
+
+	* Input: i8042 - select ARCH_MIGHT_HAVE_PC_SERIO on x86
+
+2014-01-01  Mark Salter  <msalter at redhat.com>
+
+	* Input: i8042 - select ARCH_MIGHT_HAVE_PC_SERIO on unicore32
+
+2014-01-01  Mark Salter  <msalter at redhat.com>
+
+	* Input: i8042 - select ARCH_MIGHT_HAVE_PC_SERIO on sparc
+
+2014-01-01  Mark Salter  <msalter at redhat.com>
+
+	* Input: i8042 - select ARCH_MIGHT_HAVE_PC_SERIO for SH_CAYMAN
+
+2014-01-01  Mark Salter  <msalter at redhat.com>
+
+	* Input: i8042 - select ARCH_MIGHT_HAVE_PC_SERIO on powerpc
+
+2014-01-01  Mark Salter  <msalter at redhat.com>
+
+	* Input: i8042 - select ARCH_MIGHT_HAVE_PC_SERIO on mips
+
+2014-01-01  Mark Salter  <msalter at redhat.com>
+
+	* Input: i8042 - select ARCH_MIGHT_HAVE_PC_SERIO on IA64
+
+2014-01-01  Mark Salter  <msalter at redhat.com>
+
+	* Input: i8042 - select ARCH_MIGHT_HAVE_PC_SERIO on ARM/Footbridge
+
+2014-01-01  Mark Salter  <msalter at redhat.com>
+
+	* Input: i8042 - select ARCH_MIGHT_HAVE_PC_SERIO on alpha
+
+2014-01-02  Petr Sebor  <petr at scssoft.com>
+
+	* Input: xpad - add new USB IDs for Logitech F310 and F710
+
+2014-01-02  Thomaz de Oliveira dos Reis  <thor27 at gmail.com>
+
+	* Input: xpad - change D-PAD mapping on Razer devices
+
+2014-01-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/virt/kvm/kvm
+
+2014-01-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'akpm' (incoming from Andrew)
+
+2014-01-02  Jason Baron  <jbaron at akamai.com>
+
+	* epoll: do not take the nested ep->mtx on EPOLL_CTL_DEL
+
+2014-01-02  Nobuhiro Iwamatsu  <nobuhiro.iwamatsu.yj at renesas.com>
+
+	* sh: add EXPORT_SYMBOL(min_low_pfn) and EXPORT_SYMBOL(max_low_pfn) to sh_ksyms_32.c
+
+2014-01-02  Jiang Liu  <jiang.liu at linux.intel.com>
+
+	* drivers/dma/ioat/dma.c: check DMA mapping error in ioat_dma_self_test()
+
+2014-01-02  Naoya Horiguchi  <n-horiguchi at ah.jp.nec.com>
+
+	* mm/memory-failure.c: transfer page count from head page to tail page after split thp
+
+2014-01-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'gfs2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-fixes
+
+2014-01-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
+
+2014-01-02  Olof Johansson  <olof at lixom.net>
+
+	* Merge tag 'renesas-fixes3-for-v3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas into fixes
+
+2013-12-28  Jan Kiszka  <jan.kiszka at web.de>
+
+	* KVM: nVMX: Unconditionally uninit the MMU on nested vmexit
+
+2014-01-02  Michal Marek  <mmarek at suse.cz>
+
+	* Merge commit v3.13-rc1 into kbuild/misc
+
+2014-01-02  Tetsuo Handa  <penguin-kernel at I-love.SAKURA.ne.jp>
+
+	* GFS2: Fix unsafe dereference in dump_holder()
+
+2014-01-01  Alan  <gnomes at lxorguk.ukuu.org.uk>
+
+	* sata_sis: missing PM support
+
+2014-01-01  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2014-01-01  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
+
+2014-01-01  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-fixes-3.13' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
+
+2013-12-31  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branches 'pm-cpufreq' and 'pm-cpuidle'
+
+2013-12-31  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'cpuidle/3.13-fixes' of git://git.linaro.org/people/daniel.lezcano/linux into pm-cpuidle
+
+2013-12-31  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branches 'acpi-pci-pm' and 'acpi-pci-hotplug'
+
+2013-12-26  Dmitry Torokhov  <dmitry.torokhov at gmail.com>
+
+	* Input: allocate absinfo data when setting ABS capability
+
+2013-12-29  Doug Anderson  <dianders at chromium.org>
+
+	* Input: cros_ec_keyb - fix problems with backslash
+
+2013-12-19  Oren Givon  <oren.givon at intel.com>
+
+	* iwlwifi: add new devices for 7265 series
+
+2013-12-30  Fabio Estevam  <fabio.estevam at freescale.com>
+
+	* regulator: wm831x-dcdc: Remove unneeded 'err' label
+
+2013-12-31  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug
+
+2013-12-31  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* intel_pstate: Fail initialization if P-state information is missing
+
+2013-11-27  Bo Shen  <voice.shen at atmel.com>
+
+	* ASoC: atmel: sam9x5_wm8731: remove platform_set_drvdata
+
+2013-12-23  Simon Guinot  <sguinot at lacie.com>
+
+	* ahci: add PCI ID for Marvell 88SE9170 SATA controller
+
+2013-12-31  Xiubo Li  <Li.Xiubo at freescale.com>
+
+	* ASoC: fsl_sai: fix the endianess for SAI fifo data.
+
+2013-12-31  Xiubo Li  <Li.Xiubo at freescale.com>
+
+	* ASoC: fsl_sai: Fix one bug for hardware limitation.
+
+2013-12-29  Jan Kiszka  <jan.kiszka at siemens.com>
+
+	* KVM: x86: Fix APIC map calculation after re-enabling
+
+2013-12-17  Krzysztof Kozlowski  <k.kozlowski at samsung.com>
+
+	* clk: exynos: File scope reg_save array should depend on PM_SLEEP
+
+2013-12-11  Abhilash Kesavan  <a.kesavan at samsung.com>
+
+	* clk: samsung: exynos5250: Add CLK_IGNORE_UNUSED flag for the sysreg clock
+
+2013-12-12  Abhilash Kesavan  <a.kesavan at samsung.com>
+
+	* ARM: dts: exynos5250: Fix MDMA0 clock number
+
+2013-12-12  Abhilash Kesavan  <a.kesavan at samsung.com>
+
+	* clk: samsung: exynos5250: Add MDMA0 clocks
+
+2013-12-12  Abhilash Kesavan  <a.kesavan at samsung.com>
+
+	* clk: samsung: exynos5250: Fix ACP gate register offset
+
+2013-12-25  Xiubo Li  <Li.Xiubo at freescale.com>
+
+	* ASoC: fsl_sai: Add disable operation for the corresponding data channel.
+
+2013-12-25  Xiubo Li  <Li.Xiubo at freescale.com>
+
+	* ASoC: fsl_sai: Move the global registers setting to _dai_probe()
+
+2013-12-26  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* regulator: s2mps11: Clean up redundant code
+
+2013-12-29  Axel Lin  <axel.lin at ingics.com>
+
+	* regulator: tps65910: Simplify setting enable_mask for regulators
+
+2013-12-13  Andre Przywara  <andre.przywara at linaro.org>
+
+	* ARM/cpuidle: remove __init tag from Calxeda cpuidle probe function
+
+2013-11-26  Soren Brinkmann  <soren.brinkmann at xilinx.com>
+
+	* clocksource: cadence_ttc: Fix mutex taken inside interrupt context
+
+2013-12-18  Sebastian Ott  <sebott at linux.vnet.ibm.com>
+
+	* s390/pci: obtain function handle in hotplug notifier
+
+2013-12-30  Benjamin Herrenschmidt  <benh at kernel.crashing.org>
+
+	* Merge remote-tracking branch 'agust/merge' into merge
+
+2013-12-28  Olof Johansson  <olof at lixom.net>
+
+	* powerpc: Fix alignment of secondary cpu spin vars
+
+2013-12-23  Anton Blanchard  <anton at samba.org>
+
+	* powerpc: Align p_end
+
+2013-12-20  Brian W Hart  <hartb at linux.vnet.ibm.com>
+
+	* powernv/eeh: Add buffer for P7IOC hub error data
+
+2013-12-19  Brian W Hart  <hartb at linux.vnet.ibm.com>
+
+	* powernv/eeh: Fix possible buffer overrun in ioda_eeh_phb_diag()
+
+2013-12-18  Paul E. McKenney  <paulmck at linux.vnet.ibm.com>
+
+	* powerpc: Make 64-bit non-VMX __copy_tofrom_user bi-endian
+
+2013-12-16  Rajesh B Prathipati  <rprathip at linux.vnet.ibm.com>
+
+	* powerpc: Make unaligned accesses endian-safe for powerpc
+
+2013-12-16  Michael Neuling  <mikey at neuling.org>
+
+	* powerpc: Fix bad stack check in exception entry
+
+2013-12-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.13-rc6
+
+2013-12-29  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* PCI / ACPI: Install wakeup notify handlers for all PCI devs with ACPI
+
+2013-12-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2013-12-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-12-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pm+acpi-3.13-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2013-12-24  Viresh Kumar  <viresh.kumar at linaro.org>
+
+	* cpufreq: preserve user_policy across suspend/resume
+
+2013-12-27  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* cpufreq: Clean up after a failing light-weight initialization
+
+2013-12-21  Matt Fleming  <matt.fleming at intel.com>
+
+	* x86/efi: Delete superfluous global variables
+
+2013-12-20  Dave Young  <dyoung at redhat.com>
+
+	* x86: Reserve setup_data ranges late after parsing memmap cmdline
+
+2013-12-20  Dave Young  <dyoung at redhat.com>
+
+	* x86: Export x86 boot_params to sysfs
+
+2013-12-20  Dave Young  <dyoung at redhat.com>
+
+	* x86: Add xloadflags bit for EFI runtime support on kexec
+
+2013-12-20  Dave Young  <dyoung at redhat.com>
+
+	* x86/efi: Pass necessary EFI data for kexec via setup_data
+
+2013-12-21  Laura Abbott  <lauraa at codeaurora.org>
+
+	* ARM: 7931/1: Correct virt_addr_valid
+
+2013-12-16  Steven Capper  <steve.capper at linaro.org>
+
+	* ARM: 7923/1: mm: fix dcache flush logic for compound high pages
+
+2013-12-29  Russell King  <rmk+kernel at arm.linux.org.uk>
+
+	* ARM: fix footbridge clockevent device
+
+2013-09-10  Li Zefan  <lizefan at huawei.com>
+
+	* slub: Fix calculation of cpu slabs
+
+2013-12-28  Olof Johansson  <olof at lixom.net>
+
+	* Merge tag 'omap-for-v3.13/intc-ldp-fix' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
+
+2013-12-28  Olof Johansson  <olof at lixom.net>
+
+	* Merge tag 'renesas-fixes2-for-v3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas into fixes
+
+2013-12-11  Linus Walleij  <linus.walleij at linaro.org>
+
+	* ARM: pxa: fix USB gadget driver compilation regression
+
+2013-12-27  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* Input: keypad-omap - cleanup header file
+
+2013-12-27  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* Input: keypad-ep93xx - cleanup header file
+
+2013-12-15  Dmitry Torokhov  <dmitry.torokhov at gmail.com>
+
+	* Input: pmic8xxx-pwrkey - switch to using managed resources
+
+2013-12-17  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* Input: pmic8xxx-pwrkey - pass correct device identity to free_irq()
+
+2013-12-27  H. Peter Anvin  <hpa at zytor.com>
+
+	* x86: Slightly tweak the access_ok() C variant for better code
+
+2013-12-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* x86: Replace assembly access_ok() with a C variant
+
+2013-12-27  Arnaldo Carvalho de Melo  <acme at redhat.com>
+
+	* perf tools: Use zfree to help detect use after free bugs
+
+2013-12-26  Arnaldo Carvalho de Melo  <acme at redhat.com>
+
+	* perf tools: Introduce zfree
+
+2013-12-27  Tony Lindgren  <tony at atomide.com>
+
+	* Merge tag 'for-v3.13-rc/hwmod-fixes-b' of git://git.kernel.org/pub/scm/linux/kernel/git/pjw/omap-pending into debug-ll-and-ldp-backlight-fix
+
+2013-12-27  Tony Lindgren  <tony at atomide.com>
+
+	* ARM: OMAP2+: Fix LCD panel backlight regression for LDP legacy booting
+
+2013-12-26  Yunkang Tang  <tommywill2011 at gmail.com>
+
+	* Input: ALPS - add support for "Dolphin" devices
+
+2013-12-27  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branches 'powercap' and 'acpi-lpss' with new device IDs
+
+2013-12-27  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branches 'pm-cpufreq' and 'pm-sleep' containing PM fixes
+
+2013-12-26  Arnaldo Carvalho de Melo  <acme at redhat.com>
+
+	* perf tools: No need to test against NULL before calling free()
+
+2013-12-26  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
+
+2013-12-26  Namhyung Kim  <namhyung.kim at lge.com>
+
+	* perf ui/tui: Implement header window
+
+2013-12-26  Namhyung Kim  <namhyung.kim at lge.com>
+
+	* perf ui/tui: Split help message for perf top and report
+
+2013-12-16  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* ARM: shmobile: mackerel: Fix coherent DMA mask
+
+2013-12-16  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* ARM: shmobile: kzm9g: Fix coherent DMA mask
+
+2013-12-16  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* ARM: shmobile: armadillo: Fix coherent DMA mask
+
+2013-12-26  Simon Horman  <horms+renesas at verge.net.au>
+
+	* Revert "ARM: shmobile: r8a7791: Add SSI clocks in device tree"
+
+2013-12-26  Simon Horman  <horms+renesas at verge.net.au>
+
+	* Revert "ARM: shmobile: r8a7790: Add SSI clocks in device tree"
+
+2013-12-23  Suman Anna  <s-anna at ti.com>
+
+	* ARM: OMAP2+: hwmod_data: fix missing OMAP_INTC_START in irq data
+
+2013-12-12  Rajendra Nayak  <rnayak at ti.com>
+
+	* ARM: DRA7: hwmod: Fix boot crash with DEBUG_LL
+
+2013-12-24  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.dk/linux-block
+
+2013-12-24  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
+
+2013-12-24  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
+
+2013-12-24  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata
+
+2013-12-19  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* ARM: shmobile: r8a7791: Add SSI clocks in device tree
+
+2013-12-19  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* ARM: shmobile: r8a7790: Add SSI clocks in device tree
+
+2013-12-19  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* ARM: shmobile: r8a7791: Add QSPI module clock in device tree
+
+2013-12-19  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* ARM: shmobile: r8a7790: Add QSPI module clock in device tree
+
+2013-12-19  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* ARM: shmobile: r8a7791: Add MSIOF clocks in device tree
+
+2013-12-19  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* ARM: shmobile: r8a7790: Add MSIOF clocks in device tree
+
+2013-12-19  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* ARM: shmobile: Remove Koelsch reference DTS
+
+2013-12-23  Geert Uytterhoeven  <geert+renesas at linux-m68k.org>
+
+	* spi: rspi: Fix typo when clearing SPSR_OVRF
+
+2013-12-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.13-rc5
+
+2013-12-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2013-12-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'firewire-fix' of git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394
+
+2013-12-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending
+
+2013-12-11  Jacob Pan  <jacob.jun.pan at linux.intel.com>
+
+	* powercap / RAPL: add support for ValleyView Soc
+
+2013-12-19  Masami Ichikawa  <masami256 at gmail.com>
+
+	* PM / sleep: Fix memory leak in pm_vt_switch_unregister().
+
+2013-12-19  Jason Baron  <jbaron at akamai.com>
+
+	* cpufreq: Use CONFIG_CPU_FREQ_DEFAULT_* to set initial policy for setpolicy drivers
+
+2013-12-20  Viresh Kumar  <viresh.kumar at linaro.org>
+
+	* cpufreq: remove sysfs files for CPUs which failed to come back after resume
+
+2013-12-21  Matias Bjørling  <m at bjorling.me>
+
+	* null_blk: support submit_queues on use_per_node_hctx
+
+2013-12-21  Matias Bjørling  <m at bjorling.me>
+
+	* null_blk: set use_per_node_hctx param to false
+
+2013-12-21  Matias Bjørling  <m at bjorling.me>
+
+	* null_blk: corrections to documentation
+
+2013-12-20  Fabio Estevam  <fabio.estevam at freescale.com>
+
+	* regulator: tps51632-regulator: Fix spelling
+
+2013-11-25  Chen, Gong  <gong.chen at linux.intel.com>
+
+	* ACPI, APEI, GHES: Cleanup ghes memory error handling
+
+2013-12-18  Chen, Gong  <gong.chen at linux.intel.com>
+
+	* ACPI, APEI: Cleanup alignment-aware accesses
+
+2013-11-25  Chen, Gong  <gong.chen at linux.intel.com>
+
+	* ACPI, APEI, GHES: Do not report only correctable errors with SCI
+
+2013-12-16  H.J. Lu  <hjl.tools at gmail.com>
+
+	* x86, x32: Use __kernel_long_t for __statfs_word
+
+2013-12-16  H.J. Lu  <hjl.tools at gmail.com>
+
+	* x86, x32: Use __kernel_long_t/__kernel_ulong_t in x86-64 stat.h
+
+2013-12-19  Benjamin Tissoires  <benjamin.tissoires at redhat.com>
+
+	* HID: input: fix input sysfs path for hid devices
+
+2013-12-20  Matteo Facchinetti  <matteo.facchinetti at sirius-es.it>
+
+	* powerpc/512x: dts: disable MPC5125 usb module
+
+2013-12-20  Kevin Hilman  <khilman at linaro.org>
+
+	* Merge tag 'renesas-fixes-for-v3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas into fixes
+
+2013-12-20  Kevin Hilman  <khilman at linaro.org>
+
+	* Merge tag 'omap-for-v3.13/display-fix' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
+
+2013-12-20  Theodore Ts'o  <tytso at mit.edu>
+
+	* ext4: add explicit casts when masking cluster sizes
+
+2013-12-19  Steven Whitehouse  <swhiteho at redhat.com>
+
+	* GFS2: Wait for async DIO in glock state changes
+
+2013-12-18  Steven Whitehouse  <swhiteho at redhat.com>
+
+	* GFS2: Fix incorrect invalidation for DIO/buffered I/O
+
+2013-12-04  Matt Gates  <matthew.gates at hp.com>
+
+	* [SCSI] hpsa: allow SCSI mid layer to handle unit attention
+
+2013-12-04  Stephen M. Cameron  <scameron at beardog.cce.hp.com>
+
+	* [SCSI] hpsa: do not require board "not ready" status after hard reset
+
+2013-12-04  Stephen M. Cameron  <scameron at beardog.cce.hp.com>
+
+	* [SCSI] hpsa: enable unit attention reporting
+
+2013-12-04  Stephen M. Cameron  <scameron at beardog.cce.hp.com>
+
+	* [SCSI] hpsa: rename scsi prefetch field
+
+2013-12-04  Stephen M. Cameron  <scameron at beardog.cce.hp.com>
+
+	* [SCSI] hpsa: use workqueue instead of kernel thread for lockup detection
+
+2013-12-04  wenxiong at linux.vnet.ibm.com  <wenxiong at linux.vnet.ibm.com>
+
+	* [SCSI] ipr: increase dump size in ipr driver
+
+2013-12-19  Kevin Hilman  <khilman at linaro.org>
+
+	* Merge tag 'keystone/maintainer-file' of git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone into fixes
+
+2013-12-16  Jan Beulich  <JBeulich at suse.com>
+
+	* x86/efi: Don't select EFI from certain special ACPI drivers
+
+2013-12-18  Len Brown  <len.brown at intel.com>
+
+	* x86 idle: Repair large-server 50-watt idle-power regression
+
+2013-12-18  Tejun Heo  <tj at kernel.org>
+
+	* libata, freezer: avoid block device removal while system is frozen
+
+2013-12-17  Will Deacon  <will.deacon at arm.com>
+
+	* arm64: ptrace: avoid using HW_BREAKPOINT_EMPTY for disabled events
+
+2013-12-06  Lans Zhang  <jia.zhang at windriver.com>
+
+	* x86/mm/numa: Fix 32-bit kernel NUMA boot
+
+2013-12-19  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'v3.13-rc4' into x86/mm
+
+2013-12-19  Takashi Iwai  <tiwai at suse.de>
+
+	* Merge tag 'asoc-v3.13-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2013-12-19  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'asoc/fix/adsp', 'asoc/fix/arizona', 'asoc/fix/atmel', 'asoc/fix/fsl', 'asoc/fix/kirkwood', 'asoc/fix/tegra', 'asoc/fix/wm8904' and 'asoc/fix/wm8962' into asoc-linus
+
+2013-12-19  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/fix/dma' into asoc-linus
+
+2013-12-19  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/fix/core' into asoc-linus
+
+2013-12-19  Charles Keepax  <ckeepax at opensource.wolfsonmicro.com>
+
+	* ASoC: wm5110: Add support for ASRC RATE 1
+
+2013-12-19  Charles Keepax  <ckeepax at opensource.wolfsonmicro.com>
+
+	* ASoC: wm5110: Add FSH for ISRCs
+
+2013-12-16  Ben Dooks  <ben.dooks at codethink.co.uk>
+
+	* ARM: shmobile: r8a7790: fix shdi resource sizes
+
+2013-12-16  Kuninori Morimoto  <kuninori.morimoto.gx at renesas.com>
+
+	* ARM: shmobile: bockw: fixup DMA mask
+
+2013-12-11  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* ARM: shmobile: armadillo: Add PWM backlight power supply
+
+2013-12-18  H. Peter Anvin  <hpa at linux.intel.com>
+
+	* x86, realmode: Pointer walk cleanups, pull out invariant use of __pa()
+
+2013-12-06  Christoph Hellwig  <hch at infradead.org>
+
+	* xfs: assert that we hold the ilock for extent map access
+
+2013-12-06  Christoph Hellwig  <hch at infradead.org>
+
+	* xfs: use xfs_ilock_attr_map_shared in xfs_attr_list_int
+
+2013-12-06  Christoph Hellwig  <hch at infradead.org>
+
+	* xfs: use xfs_ilock_attr_map_shared in xfs_attr_get
+
+2013-12-06  Christoph Hellwig  <hch at infradead.org>
+
+	* xfs: use xfs_ilock_data_map_shared in xfs_qm_dqiterate
+
+2013-12-06  Christoph Hellwig  <hch at infradead.org>
+
+	* xfs: use xfs_ilock_data_map_shared in xfs_qm_dqtobp
+
+2013-12-10  Gerhard Sittig  <gsi at denx.de>
+
+	* powerpc/512x: dts: remove misplaced IRQ spec from 'soc' node (5125)
+
+2013-12-18  John W. Linville  <linville at tuxdriver.com>
+
+	* Merge branch 'for-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth
+
+2013-12-17  Krzysztof Kozlowski  <k.kozlowski at samsung.com>
+
+	* mfd: sec: Remove sec_reg* regmap helpers
+
+2013-12-18  Antonio Ospite  <ospite at studenti.unina.it>
+
+	* Input: fix typos in Documentation/input/gamepad.txt
+
+2013-12-17  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* Input: zforce - fix error return code in zforce_start()
+
+2013-12-16  Hans de Goede  <hdegoede at redhat.com>
+
+	* Input: elantech -  improve clickpad detection
+
+2013-12-18  Ben Myers  <bpm at sgi.com>
+
+	* Merge branch 'xfs-for-linus-v3.13-rc5' into for-next
+
+2013-12-18  Martin Schwidefsky  <schwidefsky at de.ibm.com>
+
+	* s390/3270: fix allocation of tty3270_screen structure
+
+2013-12-04  Vijaya Mohan Guvva  <vmohan at brocade.com>
+
+	* [SCSI] bfa: Chinook quad port 16G FC HBA claim issue
+
+2013-12-18  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
+
+2013-12-18  Hui Wang  <hui.wang at canonical.com>
+
+	* ALSA: hda - Add Dell headset detection quirk for one more laptop model
+
+2013-12-18  Bo Shen  <voice.shen at atmel.com>
+
+	* ASoC: wm8904: fix DSP mode B configuration
+
+2013-12-18  Charles Keepax  <ckeepax at opensource.wolfsonmicro.com>
+
+	* ASoC: wm_adsp: Add small delay while polling DSP RAM start
+
+2013-12-18  Charles Keepax  <ckeepax at opensource.wolfsonmicro.com>
+
+	* ASoC: wm_adsp: Remove duplicate info message for DSP RAM ready
+
+2013-12-18  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'please-pull-einj' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras into x86/ras
+
+2013-11-22  Randy Dunlap  <rdunlap at infradead.org>
+
+	* slab.h: remove duplicate kmalloc declaration and fix kernel-doc warnings
+
+2013-12-18  Jan Kara  <jack at suse.cz>
+
+	* ext4: fix deadlock when writing in ENOSPC conditions
+
+2013-12-16  Tomi Valkeinen  <tomi.valkeinen at ti.com>
+
+	* Revert "ARM: OMAP2+: Remove legacy mux code for display.c"
+
+2013-11-06  Tony Luck  <tony.luck at intel.com>
+
+	* ACPI, APEI, EINJ: Changes to the ACPI/APEI/EINJ debugfs interface
+
+2013-12-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 's2mps11-build' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
+
+2013-12-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-12-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-12-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-12-17  Namhyung Kim  <namhyung at kernel.org>
+
+	* tools lib traceevent: Get rid of die() in some string conversion functions
+
+2013-12-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'gpio-v3.13-4' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio
+
+2013-12-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
+
+2013-12-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
+
+2013-10-11  Josh Boyer  <jwboyer at redhat.com>
+
+	* cpupower: Fix segfault due to incorrect getopt_long arugments
+
+2013-12-16  Sujith Manoharan  <c_manoha at qca.qualcomm.com>
+
+	* ath9k: Fix interrupt handling for the AR9002 family
+
+2013-12-11  Larry Finger  <Larry.Finger at lwfinger.net>
+
+	* rtlwifi: pci: Fix oops on driver unload
+
+2013-11-28  Mathy Vanhoef  <vanhoefm at gmail.com>
+
+	* ath9k_htc: properly set MAC address and BSSID mask
+
+2013-12-17  JongHo Kim  <furmuwon at gmail.com>
+
+	* ALSA: Add SNDRV_PCM_STATE_PAUSED case in wait_for_avail function
+
+2013-12-12  Dave Chinner  <dchinner at redhat.com>
+
+	* xfs: abort metadata writeback on permanent errors
+
+2013-12-12  Dave Chinner  <dchinner at redhat.com>
+
+	* xfs: swalloc doesn't align allocations properly
+
+2013-12-17  Christoph Hellwig  <hch at infradead.org>
+
+	* xfs: remove xfsbdstrat error
+
+2013-11-22  Dave Chinner  <dchinner at redhat.com>
+
+	* xfs: align initial file allocations correctly
+
+2013-12-08  Namjae Jeon  <namjae.jeon at samsung.com>
+
+	* MAINTAINERS: fix incorrect mail address of XFS maintainer
+
+2013-11-26  Jie Liu  <jeff.liu at oracle.com>
+
+	* xfs: fix infinite loop by detaching the group/project hints from user dquot
+
+2013-11-26  Jie Liu  <jeff.liu at oracle.com>
+
+	* xfs: fix assertion failure at xfs_setattr_nonsize
+
+2013-11-26  Jie Liu  <jeff.liu at oracle.com>
+
+	* xfs: fix false assertion at xfs_qm_vop_create_dqattach
+
+2013-10-05  Mark Tinguely  <tinguely at sgi.com>
+
+	* xfs: fix memory leak in xfs_dir2_node_removename
+
+2013-11-27  Will Deacon  <will.deacon at arm.com>
+
+	* Revert "ARM: 7556/1: perf: fix updated event period in response to PERF_EVENT_IOC_PERIOD"
+
+2013-11-27  Jean-Francois Moine  <moinejf at free.fr>
+
+	* ASoC: kirkwood: Fix the CPU DAI rates
+
+2013-12-17  Charles Keepax  <ckeepax at opensource.wolfsonmicro.com>
+
+	* ASoC: wm5110: Correct HPOUT3 DAPM route typo
+
+2013-12-17  Li Zefan  <lizefan at huawei.com>
+
+	* cgroup: don't recycle cgroup id until all csses' have been destroyed
+
+2013-12-17  Antonio Ospite  <ospite at studenti.unina.it>
+
+	* HID: debug: add labels for some new buttons
+
+2013-12-16  Marc Carino  <marc.ceeeee at gmail.com>
+
+	* libata: implement ATA_HORKAGE_NO_NCQ_TRIM and apply it to Micro M500 SSDs
+
+2013-12-17  Marcel Holtmann  <marcel at holtmann.org>
+
+	* Bluetooth: Fix HCI User Channel permission check in hci_sock_sendmsg
+
+2013-11-23  Santosh Shilimkar  <santosh.shilimkar at ti.com>
+
+	* MAINTAINERS: Add keystone clock drivers
+
+2013-12-14  Oleg Nesterov  <oleg at redhat.com>
+
+	* selinux: selinux_setprocattr()->ptrace_parent() needs rcu_read_lock()
+
+2013-12-16  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* SELinux: remove duplicated include from hooks.c
+
+2013-12-16  Felix Fietkau  <nbd at openwrt.org>
+
+	* mac80211: move "bufferable MMPDU" check to fix AP mode scan
+
+2013-12-16  Javier Lopez  <jlopex at cozybit.com>
+
+	* mac80211_hwsim: Fix NULL pointer dereference
+
+2013-12-13  Nenghua Cao  <nhcao at marvell.com>
+
+	* ASoC: dapm: update DPCM runtime when mixer/mux changes
+
+2013-12-14  Qiaowei Ren  <qiaowei.ren at intel.com>
+
+	* x86: replace futex_atomic_cmpxchg_inatomic() with user_atomic_cmpxchg_inatomic
+
+2013-12-14  Qiaowei Ren  <qiaowei.ren at intel.com>
+
+	* x86: add user_atomic_cmpxchg_inatomic at uaccess.h
+
+2013-12-16  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'ras_for_3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp into x86/ras
+
+2013-12-13  Jiri Kosina  <jkosina at suse.cz>
+
+	* HID: remove SIS entries from hid_have_special_driver[]
+
+2013-12-11  Krzysztof Kozlowski  <k.kozlowski at samsung.com>
+
+	* mfd: s2mps11: Fix build after regmap field rename in sec-core.c
+
+2013-12-16  Johannes Berg  <johannes.berg at intel.com>
+
+	* radiotap: fix bitmap-end-finding buffer overrun
+
+2013-12-15  Rafał Miłecki  <zajec5 at gmail.com>
+
+	* Input: define KEY_WWAN for Wireless WAN
+
+2013-11-23  Aleksej Makarov  <aleksej.makarov at sonymobile.com>
+
+	* Input: don't call input_dev_release_keys() in resume
+
+2013-12-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.13-rc4
+
+2013-12-10  Matias Bjorling  <m at bjorling.me>
+
+	* null_blk: mem garbage on NUMA systems during init
+
+2013-12-13  Sergey Senozhatsky  <sergey.senozhatsky at gmail.com>
+
+	* radeon_pm: fix oops in hwmon_attributes_visible() and radeon_hwmon_show_temp_thresh()
+
+2013-12-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2013-12-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-12-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pci-v3.13-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
+
+2013-12-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security
+
+2013-12-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Revert "selinux: consider filesystem subtype in policies"
+
+2013-12-15  Jiri Kosina  <jkosina at suse.cz>
+
+	* HID: microsoft: no fallthrough in MS ergonomy 0xff05 usage
+
+2013-12-15  Stefan Richter  <stefanr at s5r6.in-berlin.de>
+
+	* firewire: sbp2: bring back WRITE SAME support
+
+2013-12-14  Heiko Stübner  <heiko at sntech.de>
+
+	* Input: zforce - fix possible driver hang during suspend
+
+2013-12-14  Carolyn Wyborny  <carolyn.wyborny at intel.com>
+
+	* igb: Fix for issue where values could be too high for udelay function.
+
+2013-12-14  Jesse Brandeburg  <jesse.brandeburg at intel.com>
+
+	* i40e: fix null dereference
+
+2013-12-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'edac_fixes_for_3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp
+
+2013-12-13  Tomasz Figa  <tomasz.figa at gmail.com>
+
+	* ARM: s3c64xx: dt: Fix boot failure due to double clock initialization
+
+2013-12-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-arm
+
+2013-12-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'arc-fixes-for-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc
+
+2013-12-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'dm-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
+
+2013-12-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
+
+2013-12-10  Russell King  <rmk+kernel at arm.linux.org.uk>
+
+	* ARM: fix asm/memory.h build error
+
+2013-12-14  Jan Kara  <jack at suse.cz>
+
+	* writeback: Fix data corruption on NFS
+
+2013-12-13  Ben Myers  <bpm at sgi.com>
+
+	* Merge branch 'xfs-factor-icluster-macros' into for-next
+
+2013-12-13  Paul Moore  <pmoore at redhat.com>
+
+	* selinux: revert 102aefdda4d8275ce7d7100bc16c88c74272b260
+
+2013-12-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'regulator-v3.13-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
+
+2013-12-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'regmap-v3.13-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regmap
+
+2013-12-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'i2c/for-current' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux
+
+2013-12-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'for-linus-20131212' of git://git.infradead.org/linux-mtd
+
+2013-12-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'fixes' of git://git.infradead.org/users/vkoul/slave-dma
+
+2013-12-13  Joe Thornber  <ejt at redhat.com>
+
+	* dm array: fix a reference counting bug in shadow_ablock
+
+2013-12-13  Joe Thornber  <ejt at redhat.com>
+
+	* dm space map: disallow decrementing a reference count below zero
+
+2013-12-13  Levente Kurusa  <levex at linux.com>
+
+	* EISA: Call put_device() if device_register() fails
+
+2013-11-13  Li Wang  <liwang at ubuntukylin.com>
+
+	* ceph: Avoid data inconsistency due to d-cache aliasing in readpage()
+
+2013-12-05  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: initialize inode before instantiating dentry
+
+2013-11-25  Lior Amsalem  <alior at marvell.com>
+
+	* irqchip: armada-370-xp: fix MSI race condition
+
+2013-11-25  Lior Amsalem  <alior at marvell.com>
+
+	* irqchip: armada-370-xp: fix IPI race condition
+
+2013-12-13  Emanuel Krenz  <emanuelkrenz at web.de>
+
+	* HID: add support for SiS multitouch panel in the touch monitor LG 23ET83V
+
+2013-12-13  Takashi Iwai  <tiwai at suse.de>
+
+	* Merge tag 'asoc-v3.13-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2013-12-13  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'regulator/topic/constraints' into regulator-linus
+
+2013-12-13  Hui Wang  <hui.wang at canonical.com>
+
+	* ALSA: hda - Add Dell headset detection quirk for three laptop models
+
+2013-12-13  Benjamin Herrenschmidt  <benh at kernel.crashing.org>
+
+	* powerpc/powernv: Fix OPAL LPC access in Little Endian
+
+2013-12-13  Anton Blanchard  <anton at samba.org>
+
+	* powerpc/powernv: Fix endian issue in opal_xscom_read
+
+2013-12-13  Jie Liu  <jeff.liu at oracle.com>
+
+	* xfs: use xfs_icluster_size_fsb in xfs_imap
+
+2013-12-13  Jie Liu  <jeff.liu at oracle.com>
+
+	* xfs: use xfs_icluster_size_fsb in xfs_ifree_cluster
+
+2013-12-12  Anton Blanchard  <anton at samba.org>
+
+	* powerpc: Fix endian issues in crash dump code
+
+2013-12-12  Anton Blanchard  <anton at samba.org>
+
+	* powerpc/pseries: Fix endian issues in MSI code
+
+2013-12-12  Anton Blanchard  <anton at samba.org>
+
+	* powerpc/pseries: Fix PCIE link speed endian issue
+
+2013-12-12  Anton Blanchard  <anton at samba.org>
+
+	* powerpc/pseries: Fix endian issues in nvram code
+
+2013-12-12  Anton Blanchard  <anton at samba.org>
+
+	* powerpc/pseries: Fix endian issues in /proc/ppc64/lparcfg
+
+2013-12-12  Anton Blanchard  <anton at samba.org>
+
+	* powerpc: Fix topology core_id endian issue on LE builds
+
+2013-12-13  James Morris  <james.l.morris at oracle.com>
+
+	* Merge branch 'master' of git://git.infradead.org/users/pcmoore/selinux_fixes into for-linus
+
+2013-12-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'akpm' (fixes from Andrew)
+
+2013-12-13  Christoph Hellwig  <hch at infradead.org>
+
+	* xfs: remove the quotaoff log format from the quotaoff log item
+
+2013-12-13  Christoph Hellwig  <hch at infradead.org>
+
+	* xfs: remove the dquot log format from the dquot log item
+
+2013-12-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
+
+2013-12-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2013-12-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
+
+2013-12-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.13' of git://linux-nfs.org/~bfields/linux
+
+2013-12-10  Ezequiel Garcia  <ezequiel.garcia at free-electrons.com>
+
+	* mtd: nand: pxa3xx: Use info->use_dma to release DMA resources
+
+2013-12-09  Ezequiel Garcia  <ezequiel.garcia at free-electrons.com>
+
+	* Partially revert "mtd: nand: pxa3xx: Introduce 'marvell,armada370-nand' compatible string"
+
+2013-12-10  Paul Moore  <pmoore at redhat.com>
+
+	* selinux: process labeled IPsec TCP SYN-ACK packets properly in selinux_ip_postroute()
+
+2013-12-10  Paul Moore  <pmoore at redhat.com>
+
+	* selinux: look for IPsec labels on both inbound and outbound packets
+
+2013-12-04  Paul Moore  <pmoore at redhat.com>
+
+	* selinux: handle TCP SYN-ACK packets correctly in selinux_ip_postroute()
+
+2013-12-04  Paul Moore  <pmoore at redhat.com>
+
+	* selinux: handle TCP SYN-ACK packets correctly in selinux_ip_output()
+
+2013-12-04  Fabio Estevam  <fabio.estevam at freescale.com>
+
+	* i2c: imx: Check the return value from clk_prepare_enable()
+
+2013-12-12  Gleb Natapov  <gleb at redhat.com>
+
+	* KVM: x86: fix guest-initiated crash with x2apic (CVE-2013-6376)
+
+2013-11-20  Andy Honig  <ahonig at google.com>
+
+	* KVM: x86: Convert vapic synchronization to _cached functions (CVE-2013-6368)
+
+2013-11-19  Andy Honig  <ahonig at google.com>
+
+	* KVM: x86: Fix potential divide by 0 in lapic (CVE-2013-6367)
+
+2013-11-18  Andy Honig  <ahonig at google.com>
+
+	* KVM: Improve create VCPU parameter (CVE-2013-4587)
+
+2013-12-09  Elie De Brauwer  <eliedebrauwer at gmail.com>
+
+	* i2c: mux: Inherit retry count and timeout from parent for muxed bus
+
+2013-12-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'sound-3.13-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2013-12-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
+
+2013-12-12  Paul Durrant  <Paul.Durrant at citrix.com>
+
+	* xen-netback: fix gso_prefix check
+
+2013-12-12  Sebastian Siewior  <bigeasy at linutronix.de>
+
+	* net: make neigh_priv_len in struct net_device 16bit instead of 8bit
+
+2013-11-29  Valentine Barshak  <valentine.barshak at cogentembedded.com>
+
+	* gpio: rcar: Fix level interrupt handling
+
+2013-12-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'v4l_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
+
+2013-12-10  Stephen Boyd  <sboyd at codeaurora.org>
+
+	* gpio: msm: Fix irq mask/unmask by writing bits instead of numbers
+
+2013-12-11  Mugunthan V N  <mugunthanvnm at ti.com>
+
+	* drivers: net: cpsw: fix for cpsw crash when build as modules
+
+2013-12-11  Paul Durrant  <Paul.Durrant at citrix.com>
+
+	* xen-netback: napi: don't prematurely request a tx event
+
+2013-12-11  Paul Durrant  <Paul.Durrant at citrix.com>
+
+	* xen-netback: napi: fix abuse of budget
+
+2013-12-12  David Henningsson  <david.henningsson at canonical.com>
+
+	* ALSA: hda - Add enable_msi=0 workaround for four HP machines
+
+2013-01-10  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* Btrfs: fix access_ok() check in btrfs_ioctl_send()
+
+2013-12-11  Wang Shilong  <wangsl.fnst at cn.fujitsu.com>
+
+	* Btrfs: make sure we cleanup all reloc roots if error happens
+
+2013-12-10  Wang Shilong  <wangsl.fnst at cn.fujitsu.com>
+
+	* Btrfs: skip building backref tree for uuid and quota tree when doing balance relocation
+
+2013-12-11  Wang Shilong  <wangsl.fnst at cn.fujitsu.com>
+
+	* Btrfs: fix an oops when doing balance relocation
+
+2013-12-12  Ingo Molnar  <mingo at kernel.org>
+
+	* x86/traps: Clean up error exception handler definitions
+
+2013-12-12  Yang Yingliang  <yangyingliang at huawei.com>
+
+	* sch_tbf: use do_div() for 64-bit divide
+
+2013-12-11  Eric Dumazet  <edumazet at google.com>
+
+	* udp: ipv4: must add synchronization in udp_sk_rx_dst_set()
+
+2013-12-11  Philippe De Muyter  <phdm at macqel.be>
+
+	* net:fec: remove duplicate lines in comment about errata ERR006358
+
+2013-12-10  Maxime Ripard  <maxime.ripard at free-electrons.com>
+
+	* ARM: sun6i: dt: Fix interrupt trigger types
+
+2013-12-10  Maxime Ripard  <maxime.ripard at free-electrons.com>
+
+	* ARM: sun7i: dt: Fix interrupt trigger types
+
+2013-12-10  Shawn Guo  <shawn.guo at linaro.org>
+
+	* MAINTAINERS: merge IMX6 entry into IMX
+
+2013-12-06  Stephen Warren  <swarren at nvidia.com>
+
+	* ARM: tegra: add missing break to fuse initialization code
+
+2013-12-10  Sergei Ianovich  <ynvich at gmail.com>
+
+	* ARM: pxa: prevent PXA270 occasional reboot freezes
+
+2013-12-12  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'asoc/fix/atmel', 'asoc/fix/fsl', 'asoc/fix/tegra' and 'asoc/fix/wm8962' into asoc-linus
+
+2013-12-12  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/fix/dma' into asoc-linus
+
+2013-12-12  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/fix/core' into asoc-linus
+
+2013-12-11  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'regulator/fix/as3722' and 'regulator/fix/pfuze100' into regulator-linus
+
+2013-12-11  David S. Miller  <davem at davemloft.net>
+
+	* Revert "8390 : Replace ei_debug with msg_enable/NETIF_MSG_* feature"
+
+2013-12-11  Chad Hanson  <chanson at trustedcs.com>
+
+	* selinux: fix broken peer recv check
+
+2013-11-22  Dave Chinner  <dchinner at redhat.com>
+
+	* xfs: align initial file allocations correctly
+
+2013-12-10  Ben Myers  <bpm at sgi.com>
+
+	* xfs: fix calculation of freed inode cluster blocks
+
+2013-12-06  Chen, Gong  <gong.chen at linux.intel.com>
+
+	* ACPI, eMCA: Combine eMCA/EDAC event reporting priority
+
+2013-12-11  Bjorn Helgaas  <bhelgaas at google.com>
+
+	* MAINTAINERS: Add DesignWare, i.MX6, Armada, R-Car PCI host maintainers
+
+2013-12-06  Chen, Gong  <gong.chen at linux.intel.com>
+
+	* EDAC, sb_edac: Modify H/W event reporting policy
+
+2013-12-06  Chen, Gong  <gong.chen at linux.intel.com>
+
+	* EDAC: Add an edac_report parameter to EDAC
+
+2013-11-18  Peter Zijlstra  <peterz at infradead.org>
+
+	* sched/fair: Rework sched_fair time accounting
+
+2013-11-18  Peter Zijlstra  <peterz at infradead.org>
+
+	* math64: Add mul_u64_u32_shr()
+
+2013-11-28  Peter Zijlstra  <peterz at infradead.org>
+
+	* sched: Remove PREEMPT_NEED_RESCHED from generic code
+
+2013-12-11  Peter Zijlstra  <peterz at infradead.org>
+
+	* sched: Initialize power_orig for overlapping groups
+
+2013-12-10  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Add static DAC/pin mapping for AD1986A codec
+
+2013-12-11  Hui Wang  <hui.wang at canonical.com>
+
+	* ALSA: hda - One more Dell headset detection quirk
+
+2013-12-02  Jeff Layton  <jlayton at redhat.com>
+
+	* nfsd: when reusing an existing repcache entry, unhash it first
+
+2013-12-05  Mikulas Patocka  <mpatocka at redhat.com>
+
+	* dm stats: initialize read-only module parameter
+
+2013-11-29  Matthew Garrett  <matthew.garrett at nebula.com>
+
+	* x86, efi: Don't use (U)EFI time services on 32 bit
+
+2013-12-10  H. Peter Anvin  <hpa at linux.intel.com>
+
+	* x86, build, icc: Remove uninitialized_var() from compiler-intel.h
+
+2013-12-09  Paul Moore  <pmoore at redhat.com>
+
+	* selinux: process labeled IPsec TCP SYN-ACK packets properly in selinux_ip_postroute()
+
+2013-12-10  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge branch 'clockevents/fixes' of git://git.linaro.org/people/daniel.lezcano/linux into timers/urgent
+
+2013-12-10  Dinh Nguyen  <dinguyen at altera.com>
+
+	* clocksource: dw_apb_timer_of: Fix support for dts binding "snps,dw-apb-timer"
+
+2013-12-10  Dinh Nguyen  <dinguyen at altera.com>
+
+	* clocksource: dw_apb_timer_of: Fix read_sched_clock
+
+2013-12-02  Marc Zyngier  <marc.zyngier at arm.com>
+
+	* clocksource: sunxi: Stop timer from ticking before enabling interrupts
+
+2013-10-19  Thierry Reding  <thierry.reding at gmail.com>
+
+	* clocksource: clksrc-of: Do not drop unheld reference on device node
+
+2013-11-26  Ezequiel Garcia  <ezequiel.garcia at free-electrons.com>
+
+	* clocksource: armada-370-xp: Register sched_clock after the counter reset
+
+2013-11-20  Axel Lin  <axel.lin at ingics.com>
+
+	* clocksource: time-efm32: Select CLKSRC_MMIO
+
+2013-12-05  Roger Quadros  <rogerq at ti.com>
+
+	* gpio: twl4030: Fix regression for twl gpio LED output
+
+2013-11-26  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* sh-pfc: Fix PINMUX_GPIO macro
+
+2013-12-10  Linus Walleij  <linus.walleij at linaro.org>
+
+	* MAINTAINERS: update GPIO maintainers entry
+
+2013-12-02  Thomas Gleixner  <tglx at linutronix.de>
+
+	* mfd: rtsx_pcr: Disable interrupts before cancelling delayed works
+
+2013-12-03  cpw  <cpw at sgi.com>
+
+	* x86/UV: Fix NULL pointer dereference in uv_flush_tlb_others() if the 'nobau' boot option is used
+
+2013-12-09  Michael Hennerich  <michael.hennerich at analog.com>
+
+	* Input: adxl34x - Fix bug in definition of ADXL346_2D_ORIENT
+
+2013-12-07  Dmitry Torokhov  <dmitry.torokhov at gmail.com>
+
+	* Input: serio - fix sysfs layout
+
+2013-12-09  Dmitry Torokhov  <dmitry.torokhov at gmail.com>
+
+	* Merge tag 'v3.13-rc3' into for-linus
+
+2013-12-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
+
+2013-12-09  Mahesh Salgaonkar  <mahesh at linux.vnet.ibm.com>
+
+	* powerpc: Fix up the kdump base cap to 128M
+
+2013-12-09  Thadeu Lima de Souza Cascardo  <cascardo at linux.vnet.ibm.com>
+
+	* powernv: Fix VFIO support with PHB3
+
+2013-12-09  Anatolij Gustschin  <agust at denx.de>
+
+	* powerpc/52xx: Re-enable bestcomm driver in defconfigs
+
+2013-12-09  Olof Johansson  <olof at lixom.net>
+
+	* powerpc/pasemi: Turn on devtmpfs in defconfig
+
+2013-12-04  Cedric Le Goater  <clg at fr.ibm.com>
+
+	* offb: Add palette hack for little endian
+
+2013-12-04  Cedric Le Goater  <clg at fr.ibm.com>
+
+	* offb: Little endian fixes
+
+2013-12-07  Hong H. Pham  <hong.pham at windriver.com>
+
+	* powerpc: Fix PTE page address mismatch in pgtable ctor/dtor
+
+2013-12-06  Ilia Mirkin  <imirkin at alum.mit.edu>
+
+	* powerpc/44x: Fix ocm_block allocation
+
+2013-12-09  H. Peter Anvin  <hpa at linux.intel.com>
+
+	* x86, build: Pass in additional -mno-mmx, -mno-sse options
+
+2013-12-09  Jon Medhurst  <tixy at linaro.org>
+
+	* ARM: 7917/1: cacheflush: correctly limit range of memory region being flushed
+
+2013-12-05  Konstantin Khlebnikov  <k.khlebnikov at samsung.com>
+
+	* ARM: 7913/1: fix framepointer check in unwind_frame
+
+2013-12-05  Konstantin Khlebnikov  <k.khlebnikov at samsung.com>
+
+	* ARM: 7912/1: check stack pointer in get_wchan
+
+2013-12-02  Santosh Shilimkar  <santosh.shilimkar at ti.com>
+
+	* ARM: 7909/1: mm: Call setup_dma_zone() post early_paging_init()
+
+2013-12-09  Paul Moore  <pmoore at redhat.com>
+
+	* selinux: look for IPsec labels on both inbound and outbound packets
+
+2013-12-03  Stephen Warren  <swarren at nvidia.com>
+
+	* ASoC: don't leak on error in snd_dmaengine_pcm_register
+
+2013-12-05  Jan Weitzel  <j.weitzel at phytec.de>
+
+	* ASoC: tlv320aic3x: no mono controls 3007 model
+
+2013-12-06  Nicolin Chen  <b42378 at freescale.com>
+
+	* ASoC: fsl: imx-wm8962: Don't update bias_level in machine driver
+
+2013-12-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2013-12-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'mfd-fixes-3.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/sameo/mfd-fixes
+
+2013-12-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pm-3.13-rc3-fixup' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2013-12-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
+
+2013-12-09  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
+
+2013-12-06  Jingoo Han  <jg1.han at samsung.com>
+
+	* regulator: stw481x-vmmc: use devm_regulator_register()
+
+2013-11-05  Tim Harvey  <tharvey at gateworks.com>
+
+	* regulator: pfuze100: allow misprogrammed ID
+
+2013-12-09  Axel Lin  <axel.lin at ingics.com>
+
+	* regulator: pfuze100: Fix address of FABID
+
+2013-12-06  Stephen Warren  <swarren at nvidia.com>
+
+	* ASoC: tegra: fix uninitialized variables in set_fmt
+
+2013-12-05  Srinivas Pandruvada  <srinivas.pandruvada at linux.intel.com>
+
+	* HID: hid-sensor-hub: fix duplicate sysfs entry error
+
+2013-12-05  Tom Lendacky  <thomas.lendacky at amd.com>
+
+	* crypto: scatterwalk - Use sg_chain_ptr on chain entries
+
+2013-12-08  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'tty-3.13-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
+
+2013-12-08  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'staging-3.13-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
+
+2013-12-08  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'char-misc-3.13-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
+
+2013-12-08  Dmitry Monakhov  <dmonakhov at openvz.org>
+
+	* jbd2: rename obsoleted msg JBD->JBD2
+
+2013-12-08  Jan Kara  <jack at suse.cz>
+
+	* jbd2: revise KERN_EMERG error messages
+
+2013-12-02  Soren Brinkmann  <soren.brinkmann at xilinx.com>
+
+	* tty: xuartps: Properly guard sysrq specific code
+
+2013-10-31  Jonathan Woithe  <jwoithe at atrad.com.au>
+
+	* serial: 8250: Fix initialisation of Quatech cards with the AMCC PCI chip
+
+2013-11-08  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* serial: icom: dereference after free in load_code()
+
+2013-11-12  Mika Westerberg  <mika.westerberg at linux.intel.com>
+
+	* serial: 8250_dw: add new ACPI IDs
+
+2013-12-04  Sebastian Andrzej Siewior  <bigeasy at linutronix.de>
+
+	* tty: serial: pch: don't crash if DMA enabled but not loaded
+
+2013-12-05  Heiko Stübner  <heiko at sntech.de>
+
+	* serial: samsung: move clock deactivation below uart registration
+
+2013-12-08  Rui Wang  <ruiv.wang at gmail.com>
+
+	* PCI, AER: Fix severity usage in aer trace event
+
+2013-12-08  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Revert "cpufreq: fix garbage kobjects on errors during suspend/resume"
+
+2013-12-08  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Revert "cpufreq: suspend governors on system suspend/hibernate"
+
+2013-11-27  Khalid Aziz  <khalid.aziz at oracle.com>
+
+	* PCI: Disable Bus Master only on kexec reboot
+
+2013-12-05  Qiaowei Ren  <qiaowei.ren at intel.com>
+
+	* x86, xsave: Support eager-only xsave features, add MPX support
+
+2013-12-06  Tejun Heo  <tj at kernel.org>
+
+	* cgroup: fix cgroup_create() error handling path
+
+2013-12-07  Qiaowei Ren  <qiaowei.ren at intel.com>
+
+	* x86, cpufeature: Define the Intel MPX feature flag
+
+2013-12-06  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.13-rc3
+
+2013-12-06  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'trace-fixes-3.13-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
+
+2013-12-06  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kvack.org/~bcrl/aio-next
+
+2013-12-06  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
+
+2013-12-06  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security
+
+2013-12-06  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'dt-fixes-for-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux
+
+2013-12-04  Gu Zheng  <guz.fnst at cn.fujitsu.com>
+
+	* aio: clean up aio ring in the fail path
+
+2013-12-07  James Morris  <james.l.morris at oracle.com>
+
+	* Merge branch 'free-memory' of git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity into for-linus
+
+2013-12-05  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pm-3.13-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2013-12-06  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branches 'pm-epoll', 'pnp' and 'powercap'
+
+2013-12-06  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branches 'pm-cpuidle' and 'pm-cpufreq'
+
+2013-12-05  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'stable' of git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile
+
+2013-12-05  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.dk/linux-block
+
+2013-12-05  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'nfs-for-3.13-3' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
+
+2013-12-05  Tony Lu  <zlu at tilera.com>
+
+	* ftrace: default to tilegx if ARCH=tile is specified
+
+2013-12-01  Yunkang Tang  <tommywill2011 at gmail.com>
+
+	* Input: ALPS - add support for DualPoint device on Dell XT2 model
+
+2013-12-05  Matt Walker  <matt.g.d.walker at gmail.com>
+
+	* Input: elantech - add support for newer (August 2013) devices
+
+2013-12-05  Ping Cheng  <pinglinux at gmail.com>
+
+	* Input: add SW_MUTE_DEVICE switch definition
+
+2013-12-03  Steven Rostedt  <rostedt at goodmis.org>
+
+	* tracing: Only run synchronize_sched() at instance deletion time
+
+2013-12-05  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
+
+2013-12-04  Tejun Heo  <tj at kernel.org>
+
+	* percpu: fix spurious sparse warnings from DEFINE_PER_CPU()
+
+2013-12-05  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'fbdev-fixes-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tomba/linux
+
+2013-12-05  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'sound-3.13-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2013-12-05  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pinctrl-v3.13-2' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl
+
+2013-12-05  Ming Lei  <tom.leiming at gmail.com>
+
+	* blk-mq: fix use-after-free of request
+
+2013-12-04  Nicolin Chen  <b42378 at freescale.com>
+
+	* ASoC: wm8962: Enable SYSCLK provisonally before fetching generated DSPCLK_DIV
+
+2013-12-05  Maria Dimakopoulou  <maria.n.dimakopoulou at gmail.com>
+
+	* perf/x86: Fix constraint table end marker bug
+
+2013-12-04  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-12-04  Fenghua Yu  <fenghua.yu at intel.com>
+
+	* x86/apic, doc: Justification for disabling IO APIC before Local APIC
+
+2013-12-05  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-fixes-3.13' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
+
+2013-12-04  Srinivas Pandruvada  <srinivas.pandruvada at linux.intel.com>
+
+	* PowerCap: Fix mode for energy counter
+
+2013-12-05  Dmitry Torokhov  <dmitry.torokhov at gmail.com>
+
+	* PNP: fix restoring devices after hibernation
+
+2013-11-28  Thomas Wood  <thomas.wood at intel.com>
+
+	* drm: fix the addition of the side-by-side (half) flag for extra 3D modes
+
+2013-11-29  Thomas Wood  <thomas.wood at intel.com>
+
+	* drm/edid: fix length check when adding extra 3D modes
+
+2013-12-03  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon/atom: fix bus probes when hw_i2c is set (v2)
+
+2013-12-03  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: fix null pointer dereference in dce6+ audio code
+
+2013-12-03  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon: fixup bad vram size on SI
+
+2013-12-05  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'drm-intel-fixes-2013-12-02' of git://people.freedesktop.org/~danvet/drm-intel into drm-fixes
+
+2013-12-05  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'exynos-drm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/daeinki/drm-exynos into drm-fixes
+
+2013-12-05  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'drm/for-3.13-rc3' of git://anongit.freedesktop.org/tegra/linux into drm-fixes
+
+2013-12-04  Rob Clark  <robdclark at gmail.com>
+
+	* udl: fix issue with imported prime buffers
+
+2013-12-04  H. Peter Anvin  <hpa at linux.intel.com>
+
+	* x86, bitops: Correct the assembly constraints to testing bitops
+
+2013-12-04  Geyslan G. Bem  <geyslan at gmail.com>
+
+	* selinux: fix possible memory leak
+
+2013-12-03  Paul Moore  <pmoore at redhat.com>
+
+	* selinux: pull address family directly from the request_sock struct
+
+2013-12-03  Paul Moore  <pmoore at redhat.com>
+
+	* selinux: ensure that the cached NetLabel secattr matches the desired SID
+
+2013-12-03  Paul Moore  <pmoore at redhat.com>
+
+	* selinux: handle TCP SYN-ACK packets correctly in selinux_ip_postroute()
+
+2013-12-03  Paul Moore  <pmoore at redhat.com>
+
+	* selinux: handle TCP SYN-ACK packets correctly in selinux_ip_output()
+
+2013-12-02  Helge Deller  <deller at gmx.de>
+
+	* nfs: fix do_div() warning by instead using sector_div()
+
+2013-12-04  Trond Myklebust  <Trond.Myklebust at netapp.com>
+
+	* MAINTAINERS: Update contact information for Trond Myklebust
+
+2013-12-04  Trond Myklebust  <Trond.Myklebust at netapp.com>
+
+	* NFSv4.1: Prevent a 3-way deadlock between layoutreturn, open and state recovery
+
+2013-12-04  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'gpio-v3.13-3' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio
+
+2013-12-04  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2013-12-04  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'parisc-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
+
+2013-12-04  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'squashfs-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/pkl/squashfs-next
+
+2013-12-04  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
+
+2013-12-04  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-28  Mark Brown  <broonie at linaro.org>
+
+	* ASoC: adsp: Use async writes where possible
+
+2013-11-28  Marco Piazza  <mpiazza at gmail.com>
+
+	* Bluetooth: Add support for Toshiba Bluetooth device [0930:0220]
+
+2013-12-04  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Fix silent output on MacBook Air 2,1
+
+2013-12-04  Jingoo Han  <jg1.han at samsung.com>
+
+	* spi: sc18is602: Use devm_spi_register_master()
+
+2013-12-04  Bo Shen  <voice.shen at atmel.com>
+
+	* ASoC: sam9x5_wm8731: change to work in DSP A mode
+
+2013-12-04  Bo Shen  <voice.shen at atmel.com>
+
+	* ASoC: atmel_ssc_dai: add dai trigger ops
+
+2013-12-04  Nicolin Chen  <b42378 at freescale.com>
+
+	* ASoC: soc-pcm: Use valid condition for snd_soc_dai_digital_mute() in hw_free()
+
+2013-12-04  Takashi Iwai  <tiwai at suse.de>
+
+	* Merge tag 'asoc-v3.13-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2013-12-02  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* video: vt8500: fix error handling in probe()
+
+2013-10-22  Johan Hovold  <jhovold at gmail.com>
+
+	* atmel_lcdfb: fix module autoload
+
+2013-12-04  David Henningsson  <david.henningsson at canonical.com>
+
+	* ALSA: hda - Fix missing ELD info when using jackpoll_ms parameter
+
+2013-12-04  Kailang Yang  <kailang at realtek.com>
+
+	* ALSA: hda/realtek - remove hp_automute_hook from alc283_fixup_chromebook
+
+2013-12-03  Dan Williams  <dan.j.williams at intel.com>
+
+	* dma: fix build breakage in s3c24xx-dma
+
+2013-12-03  Chris Mason  <clm at fb.com>
+
+	* Btrfs: update the MAINTAINERS file
+
+2013-11-20  H. Peter Anvin  <hpa at linux.intel.com>
+
+	* x86-64, build: Always pass in -mno-sse
+
+2013-12-03  Helge Deller  <deller at gmx.de>
+
+	* parisc: update 64bit defconfigs and use SIL680 instead of SIIMAGE driver
+
+2013-12-03  Dinh Nguyen  <dinguyen at altera.com>
+
+	* arm: dts: socfpga: Change some clocks of gate-clk type to perip-clk
+
+2013-07-17  Dinh Nguyen  <dinguyen at altera.com>
+
+	* arm: socfpga: Enable ARM_TWD for socfpga
+
+2013-12-03  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Merge tag 'iio-fixes-for-3.13b' of git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio into staging-linus
+
+2013-12-03  Jeff Moyer  <jmoyer at redhat.com>
+
+	* blk-mq: fix dereference of rq->mq_ctx if allocation fails
+
+2013-12-03  Konrad Rzeszutek Wilk  <konrad.wilk at oracle.com>
+
+	* cpuidle: Check for dev before deregistering it.
+
+2013-12-03  Olof Johansson  <olof at lixom.net>
+
+	* ARM: multi_v7_defconfig: enable SDHCI_BCM_KONA and MMC_BLOCK_MINORS=16
+
+2013-12-02  Olof Johansson  <olof at lixom.net>
+
+	* ARM: sunxi_defconfig: enable NFS, TMPFS, PRINTK_TIME and nfsroot support
+
+2013-12-02  Olof Johansson  <olof at lixom.net>
+
+	* Merge tag 'omap-for-v3.13/more-dt-regressions' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
+
+2013-12-02  Olof Johansson  <olof at lixom.net>
+
+	* ARM: multi_v7_defconfig: enable network for BeagleBone Black
+
+2013-12-03  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'asoc/fix/arizona', 'asoc/fix/atmel', 'asoc/fix/fsl', 'asoc/fix/kirkwood', 'asoc/fix/omap', 'asoc/fix/rcar', 'asoc/fix/wm8731' and 'asoc/fix/wm8990' into asoc-linus
+
+2013-12-03  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/fix/dapm' into asoc-linus
+
+2013-12-03  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/fix/core' into asoc-linus
+
+2013-12-03  Bo Shen  <voice.shen at atmel.com>
+
+	* ASoC: wm8731: fix dsp mode configuration
+
+2013-12-02  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: ssm2602: Use core for applying symmetry constraints
+
+2013-12-03  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/topic/symmetry' into asoc-ssm2602
+
+2013-11-21  Vijaya Mohan Guvva  <vmohan at brocade.com>
+
+	* [SCSI] bfa: Fix crash when symb name set for offline vport
+
+2013-11-15  Amit Pundir  <amit.pundir at linaro.org>
+
+	* epoll: drop EPOLLWAKEUP if PM_SLEEP is disabled
+
+2013-12-03  Bjørn Mork  <bjorn at mork.no>
+
+	* cpufreq: fix garbage kobjects on errors during suspend/resume
+
+2013-12-03  Olivier Gay  <ogay at logitech.com>
+
+	* HID: logitech-dj: add HIDRAW dependency in Kconfig
+
+2013-11-29  Heikki Krogerus  <heikki.krogerus at linux.intel.com>
+
+	* gpiolib: change a warning to debug message when failing to get gpio
+
+2013-11-22  Liu Gang  <Gang.Liu at freescale.com>
+
+	* powerpc/gpio: Fix the wrong GPIO input data on MPC8572/MPC8536
+
+2013-11-23  Alexandre Courbot  <acourbot at nvidia.com>
+
+	* gpiolib: use platform GPIO mappings as fallback
+
+2013-11-23  Alexandre Courbot  <acourbot at nvidia.com>
+
+	* Documentation: gpiolib: add 00-INDEX file
+
+2013-11-23  Alexandre Courbot  <acourbot at nvidia.com>
+
+	* gpiolib: fix lookup of platform-mapped GPIOs
+
+2013-11-25  Alexandre Courbot  <acourbot at nvidia.com>
+
+	* gpiolib: add missing declarations
+
+2013-11-28  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* sh-pfc: sh7372: Fix pin bias setup
+
+2013-11-28  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* sh-pfc: r8a7740: Fix pin bias setup
+
+2013-11-08  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* drm/tegra: return -EFAULT if copy_from_user() fails
+
+2013-12-02  Kailang Yang  <kailang at realtek.com>
+
+	* ALSA: hda/realtek - Independent of model for HP
+
+2013-12-02  David Henningsson  <david.henningsson at canonical.com>
+
+	* ALSA: hda - Fix headset mic input after muted internal mic (Dell/Realtek)
+
+2013-11-21  Gerhard Sittig  <gsi at denx.de>
+
+	* dt: binding: reword PowerPC 8xxx GPIO documentation
+
+2013-11-25  Stephen Warren  <swarren at nvidia.com>
+
+	* ARM: tegra: delete nvidia,tegra20-spi.txt binding
+
+2013-10-23  Chanwoo Choi  <cw00.choi at samsung.com>
+
+	* hwmon: ntc_thermistor: Fix typo (pullup-uV -> pullup-uv)
+
+2013-11-13  Wei Ni  <wni at nvidia.com>
+
+	* of: add vendor prefix for GMT
+
+2013-11-18  Laurent Pinchart  <laurent.pinchart at ideasonboard.com>
+
+	* clk: exynos: Fix typos in DT bindings documentation
+
+2013-11-18  Thierry Reding  <thierry.reding at gmail.com>
+
+	* of: Add vendor prefix for LG Corporation
+
+2013-11-18  Fabio Estevam  <fabio.estevam at freescale.com>
+
+	* Documentation: net: fsl-fec.txt: Add phy-supply entry
+
+2013-11-08  Sricharan R  <r.sricharan at ti.com>
+
+	* ARM: dts: doc: Document missing binding for omap5-mpu
+
+2013-11-07  Rob Herring  <rob.herring at calxeda.com>
+
+	* dt-bindings: add ARMv8 PMU binding
+
+2013-10-08  Stephen Warren  <swarren at nvidia.com>
+
+	* MAINTAINERS: remove swarren from DT bindings
+
+2013-08-08  Kumar Gala  <galak at codeaurora.org>
+
+	* MAINTAINERS: Add Kumar to Device Tree Binding maintainers group
+
+2013-12-02  Roberto Sassu  <roberto.sassu at polito.it>
+
+	* ima: properly free ima_template_entry structures
+
+2013-12-02  Christoph Paasch  <christoph.paasch at uclouvain.be>
+
+	* ima: Do not free 'entry' before it is initialized
+
+2013-12-02  Dave Airlie  <airlied at redhat.com>
+
+	* drm/radeon: fix VGT_GS_INSTANCE_CNT register
+
+2013-12-02  Alexandre Demers  <alexandre.f.demers at gmail.com>
+
+	* drm/radeon: Fix a typo in Cayman and Evergreen registers
+
+2013-11-26  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon/dpm: simplify state adjust logic for NI
+
+2013-12-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'leds-fixes-for-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/cooloney/linux-leds
+
+2013-11-28  Peter Ujfalusi  <peter.ujfalusi at ti.com>
+
+	* leds: pwm: Fix for deferred probe in DT booted mode
+
+2013-12-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* uio: we cannot mmap unaligned page contents
+
+2013-12-02  Florian Vaussard  <florian.vaussard at epfl.ch>
+
+	* ARM: dts: Fix the name of supplies for smsc911x shared by OMAP
+
+2013-11-15  James Bottomley  <JBottomley at Parallels.com>
+
+	* [SCSI] enclosure: fix WARN_ON in dual path device removing
+
+2013-11-11  Nikith Ganigarakoppal  <Nikith.Ganigarakoppal at pmcs.com>
+
+	* [SCSI] pm80xx: Tasklets synchronization fix.
+
+2013-10-30  Nikith Ganigarakoppal  <Nikith.Ganigarakoppal at pmcs.com>
+
+	* [SCSI] pm80xx: Resetting the phy state.
+
+2013-10-30  Nikith Ganigarakoppal  <Nikith.Ganigarakoppal at pmcs.com>
+
+	* [SCSI] pm80xx: Fix for direct attached device.
+
+2013-11-13  Nikith Ganigarakoppal  <Nikith.Ganigarakoppal at pmcs.com>
+
+	* [SCSI] pm80xx: Module author addition
+
+2013-12-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'irq-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-12-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-12-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-12-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'stable/for-linus-3.13-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
+
+2013-12-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'spi-v3.13-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi
+
+2013-12-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2013-11-28  Vince Hsu  <vinceh at nvidia.com>
+
+	* regulator: as3722: set the correct current limit
+
+2013-12-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* vfs: fix subtle use-after-free of pipe_inode_info
+
+2013-12-02  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Use always amps for auto-mute on AD1986A codec
+
+2013-12-02  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda/analog - Handle inverted EAPD properly in vmaster hook
+
+2013-12-02  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Another fixup for ASUS laptop with ALC660 codec
+
+2013-11-30  Ben Hutchings  <ben at decadent.org.uk>
+
+	* HID: kye: Fix missing break in kye_report_fixup()
+
+2013-12-02  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: atmel: Fix possible array overflow
+
+2013-10-01  Inki Dae  <inki.dae at samsung.com>
+
+	* drm/exynos: release unhandled page flip events at postclose.
+
+2013-09-19  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* drm/exynos: Fix trivial typo in exynos_drm_fimd.c
+
+2013-12-02  Shawn Guo  <shawn.guo at linaro.org>
+
+	* ASoC: core: fix devres parameter in devm_snd_soc_register_card()
+
+2013-11-28  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: mxs: Use devm_snd_dmaengine_pcm_register()
+
+2013-11-28  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: bcm2835-i2s: Use devm_snd_dmaengine_pcm_register()
+
+2013-12-02  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/topic/dma' into asoc-bcm2835
+
+2013-11-30  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: Set SNDRV_PCM_INFO_JOINT_DUPLEX for PCMs with symmetry constraints
+
+2013-11-30  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: generic-dmaengine-pcm: Set BATCH flag when residue reporting is not supported
+
+2013-11-30  Jarkko Nikula  <jarkko.nikula at bitmer.com>
+
+	* ASoC: omap: n810: Convert to clk_prepare_enable/clk_disable_unprepare
+
+2013-11-09  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* ASoC: fsl: set correct platform drvdata in pcm030_fabric_probe()
+
+2013-11-08  Fabio Estevam  <fabio.estevam at freescale.com>
+
+	* ASoC: fsl: imx-pcm-fiq: Remove unused 'runtime' variable
+
+2013-11-05  Oskar Schirmer  <oskar at scara.com>
+
+	* ASoC: fsl: imx-pcm-fiq: remove bogus period delta calculation
+
+2013-12-01  Axel Lin  <axel.lin at ingics.com>
+
+	* pinctrl: abx500: Fix header file include guard
+
+2013-11-26  Christian Engelmayer  <christian.engelmayer at frequentis.com>
+
+	* Input: usbtouchscreen - separate report and transmit buffer size handling
+
+2013-11-26  Dmitry Torokhov  <dmitry.torokhov at gmail.com>
+
+	* Input: sur40 - suppress false uninitialized variable warning
+
+2013-12-01  Eugenia Emantayev  <eugenia at mellanox.com>
+
+	* net/mlx4_en: Remove selftest TX queues empty condition
+
+2013-12-01  fan.du  <fan.du at windriver.com>
+
+	* {pktgen, xfrm} Update IPv4 header total len and checksum after tranformation
+
+2013-11-28  Michael S. Tsirkin  <mst at redhat.com>
+
+	* virtio_net: make all RX paths handle erors consistently
+
+2013-11-28  Michael S. Tsirkin  <mst at redhat.com>
+
+	* virtio_net: fix error handling for mergeable buffers
+
+2013-12-01  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml
+
+2013-12-01  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-arm
+
+2013-11-24  Austin Boyle  <boyle.austin at gmail.com>
+
+	* max17042_battery: Fix build errors caused by missing REGMAP_I2C config
+
+2013-11-22  Shuah Khan  <shuah.kh at samsung.com>
+
+	* power_supply: Fix Oops from NULL pointer dereference from wakeup_source_activate
+
+2013-11-29  Richard Weinberger  <richard at nod.at>
+
+	* um: Build always with -mcmodel=large on 64bit
+
+2013-11-21  Richard Weinberger  <richard at nod.at>
+
+	* um: Rename print_stack_trace to do_stack_trace
+
+2013-11-01  Stephen M. Cameron  <scameron at beardog.cce.hp.com>
+
+	* [SCSI] hpsa: return 0 from driver probe function on success, not 1
+
+2013-11-30  Fabio Estevam  <festevam at gmail.com>
+
+	* ARM: 7907/1: lib: delay-loop: Add align directive to fix BogoMIPS calculation
+
+2013-11-25  Dave Martin  <dave.martin at linaro.org>
+
+	* ARM: 7897/1: kexec: Use the right ISA for relocate_new_kernel
+
+2013-11-21  Victor Kamensky  <victor.kamensky at linaro.org>
+
+	* ARM: 7895/1: signal: fix armv7-m build issue in sigreturn_codes.S
+
+2013-11-29  Russell King  <rmk+kernel at arm.linux.org.uk>
+
+	* ARM: footbridge: fix EBSA285 LEDs
+
+2013-09-23  Stephen M. Cameron  <scameron at beardog.cce.hp.com>
+
+	* [SCSI] hpsa: do not discard scsi status on aborted commands
+
+2013-11-30  Helge Deller  <deller at gmx.de>
+
+	* parisc: remove CONFIG_MLONGCALLS=y from defconfigs
+
+2013-11-29  Thomas Huth  <thuth at linux.vnet.ibm.com>
+
+	* virtio_net: Fixed a trivial typo (fitler --> filter)
+
+2013-11-30  Helge Deller  <deller at gmx.de>
+
+	* parisc: fix kernel memory layout in vmlinux.ld.S
+
+2013-11-30  Helge Deller  <deller at gmx.de>
+
+	* parisc: use kernel_text_address() in unwind functions
+
+2013-10-31  Chen Gang  <gang.chen at asianux.com>
+
+	* parisc: remove empty SERIAL_PORT_DFNS in serial.h
+
+2013-11-29  stephen hemminger  <stephen at networkplumber.org>
+
+	* netem: fix gemodel loss generator
+
+2013-11-29  stephen hemminger  <stephen at networkplumber.org>
+
+	* netem: fix loss 4 state model
+
+2013-11-29  stephen hemminger  <stephen at networkplumber.org>
+
+	* netem: missing break in ge loss generator
+
+2013-11-29  Arvid Brodin  <arvid.brodin at alten.se>
+
+	* net/hsr: Support iproute print_opt ('ip -details ...')
+
+2013-11-28  Russell King  <rmk+kernel at arm.linux.org.uk>
+
+	* ARM: footbridge: fix VGA initialisation
+
+2013-11-28  Russell King  <rmk+kernel at arm.linux.org.uk>
+
+	* ARM: fix booting low-vectors machines
+
+2013-11-25  Paul Drews  <paul.drews at intel.com>
+
+	* ACPI: Add BayTrail SoC GPIO and LPSS ACPI IDs
+
+2013-11-21  Aristeu Rozanski  <aris at redhat.com>
+
+	* sb_edac: Shut up compiler warning when EDAC_DEBUG is enabled
+
+2013-11-29  Levente Kurusa  <levex at linux.com>
+
+	* x86, mce: Call put_device on device_register failure
+
+2013-11-27  Roberto Sassu  <roberto.sassu at polito.it>
+
+	* ima: store address of template_fmt_copy in a pointer before calling strsep
+
+2013-11-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.13-rc2
+
+2013-11-29  Peter Hurley  <peter at hurleysoftware.com>
+
+	* n_tty: Fix missing newline echo
+
+2013-11-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'arm64-stable' of git://git.kernel.org/pub/scm/linux/kernel/git/cmarinas/linux-aarch64
+
+2013-11-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
+
+2013-11-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'i2c/for-current' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux
+
+2013-11-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
+
+2013-11-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata
+
+2013-11-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
+
+2013-11-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'sound-3.13-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2013-11-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
+
+2013-11-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2013-11-27  Catalin Marinas  <catalin.marinas at arm.com>
+
+	* arm64: Move PTE_PROT_NONE higher up
+
+2013-11-29  Charles Keepax  <ckeepax at opensource.wolfsonmicro.com>
+
+	* ASoC: core: Use consistent byte ordering in snd_soc_bytes_get
+
+2013-11-29  Catalin Marinas  <catalin.marinas at arm.com>
+
+	* arm64: Use Normal NonCacheable memory for writecombine
+
+2013-11-29  Chris Wilson  <chris at chris-wilson.co.uk>
+
+	* drm/i915: Pin pages whilst allocating for dma-buf vmap()
+
+2013-11-29  Ville Syrjälä  <ville.syrjala at linux.intel.com>
+
+	* drm/i915: MI_PREDICATE_RESULT_2 is HSW only
+
+2013-11-29  Ville Syrjälä  <ville.syrjala at linux.intel.com>
+
+	* drm/i915: Make the DERRMR SRM target global GTT
+
+2013-11-29  Thomas Gleixner  <tglx at linutronix.de>
+
+	* nohz: Fix another inconsistency between CONFIG_NO_HZ=n and nohz=off
+
+2013-11-29  Madper Xie  <cxie at redhat.com>
+
+	* efi-pstore: Make efi-pstore return a unique id
+
+2013-11-29  Al Viro  <viro at zeniv.linux.org.uk>
+
+	* fix bogus path_put() of nd->root after some unlazy_walk() failures
+
+2013-10-23  Martin K. Petersen  <martin.petersen at oracle.com>
+
+	* [SCSI] Disable WRITE SAME for RAID and virtual host adapter drivers
+
+2013-11-28  Dave Airlie  <airlied at redhat.com>
+
+	* drm/qxl: fix memory leak in release list handling
+
+2013-11-11  Matt Fleming  <matt.fleming at intel.com>
+
+	* x86/efi: Fix earlyprintk off-by-one bug
+
+2013-10-30  Seiji Aguchi  <seiji.aguchi at hds.com>
+
+	* efivars, efi-pstore: Hold off deletion of sysfs entry until the scan is completed
+
+2013-11-28  Matthew Leach  <Matthew.Leach at arm.com>
+
+	* arm64: debug: make aarch32 bkpt checking endian clean
+
+2013-11-28  Matthew Leach  <Matthew.Leach at arm.com>
+
+	* arm64: ptrace: fix compat registes get/set to be endian clean
+
+2013-11-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'gpio-v3.13-2' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio
+
+2013-11-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'md/3.13-fixes' of git://neil.brown.name/md
+
+2013-11-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-next' of git://git.samba.org/sfrench/cifs-2.6
+
+2013-11-28  Helge Deller  <deller at gmx.de>
+
+	* kernel/extable: fix address-checks for core_kernel and init areas
+
+2013-11-28  Mark Rutland  <mark.rutland at arm.com>
+
+	* irqchip: Gic: fix boot for chained gics
+
+2013-11-28  Horia Geanta  <horia.geanta at freescale.com>
+
+	* crypto: testmgr - fix sglen in test_aead for case 'dst != src'
+
+2013-11-28  Horia Geanta  <horia.geanta at freescale.com>
+
+	* crypto: talitos - fix aead sglen for case 'dst != src'
+
+2013-11-28  Horia Geanta  <horia.geanta at freescale.com>
+
+	* crypto: caam - fix aead sglen for case 'dst != src'
+
+2013-11-28  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Add LFE chmap to ASUS ET2700
+
+2013-11-28  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Initialize missing bass speaker pin for ASUS AIO ET2700
+
+2013-11-27  Viresh Kumar  <viresh.kumar at linaro.org>
+
+	* cpufreq: suspend governors on system suspend/hibernate
+
+2013-11-28  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: Add resource managed snd_dmaengine_pcm_register()
+
+2013-11-26  Bockholdt Arne  <a.bockholdt at precitec-optronik.de>
+
+	* intel_idle: Fixed C6 state on Avoton/Rangeley processors
+
+2013-11-27  Toshi Kani  <toshi.kani at hp.com>
+
+	* ACPI / PCI / hotplug: Avoid warning when _ADR not present
+
+2013-11-28  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'spi/fix/bcm2835', 'spi/fix/bcm63xx', 'spi/fix/mpc512x-psc', 'spi/fix/mxs', 'spi/fix/pxa2xx', 'spi/fix/qspi', 'spi/fix/rspi' and 'spi/fix/txx9' into spi-linus
+
+2013-11-28  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'spi/fix/core' into spi-linus
+
+2013-11-29  Chew, Chiau Ee  <chiau.ee.chew at intel.com>
+
+	* spi/pxa2xx: Restore private register bits.
+
+2013-11-27  Oleksij Rempel  <linux at rempel-privat.de>
+
+	* ALSA: hda - limit mic boost on Asus UX31[A,E]
+
+2013-11-28  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Check leaf nodes to find aamix amps
+
+2013-11-28  Vineet Gupta  <vgupta at synopsys.com>
+
+	* ARC: [perf] Fix a few thinkos
+
+2013-11-25  Florian Meier  <florian.meier at koalo.de>
+
+	* i2c: bcm2835: Linking platform nodes to adapter nodes
+
+2013-11-21  Magnus Damm  <damm at opensource.se>
+
+	* ARM: shmobile: r8a7790: Fix GPIO resources in DTS
+
+2013-11-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2013-11-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'tty-3.13-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
+
+2013-11-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'staging-3.13-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
+
+2013-11-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'driver-core-3.13-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core
+
+2013-11-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
+
+2013-11-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pm+acpi-3.13-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2013-11-27  Ilia Mirkin  <imirkin at alum.mit.edu>
+
+	* drm/nouveau/hwmon: fix compilation without CONFIG_HWMON
+
+2013-11-26  David Herrmann  <dh.herrmann at gmail.com>
+
+	* drm/sysfs: fix OOM verification
+
+2013-11-27  Jens Axboe  <axboe at kernel.dk>
+
+	* Merge branch 'stable/for-jens-3.13-take-two' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip into for-linus
+
+2013-11-28  NeilBrown  <neilb at suse.de>
+
+	* md/raid5: fix newly-broken locking in get_active_stripe.
+
+2013-11-28  NeilBrown  <neilb at suse.de>
+
+	* md: test mddev->flags more safely in md_check_recovery.
+
+2013-11-25  NeilBrown  <neilb at suse.de>
+
+	* md/raid5: fix new memory-reference bug in alloc_thread_groups.
+
+2013-11-27  Tejun Heo  <tj at kernel.org>
+
+	* cgroup: fix cgroup_subsys_state leak for seq_files
+
+2013-11-25  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ATA: Fix port removal ordering
+
+2013-11-26  Peter Zijlstra  <peterz at infradead.org>
+
+	* cpuset: Fix memory allocator deadlock
+
+2013-11-27  Victor Kamensky  <victor.kamensky at linaro.org>
+
+	* i2c: omap: raw read and write endian fix
+
+2013-11-21  Fabio Estevam  <festevam at gmail.com>
+
+	* ASoC: ssm2602: Use IS_ENABLED() macro
+
+2013-11-27  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Revert "sysfs: handle duplicate removal attempts in sysfs_remove_group()"
+
+2013-11-27  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Staging: tidspbridge: disable driver
+
+2013-11-25  Jean-Francois Moine  <moinejf at free.fr>
+
+	* ASoC: kirkwood: Fix erroneous double output while playing
+
+2013-11-27  Mark Brown  <broonie at linaro.org>
+
+	* regulator: core: Check for DT every time we check full constraints
+
+2013-11-26  Jean-Francois Moine  <moinejf at free.fr>
+
+	* ASoC: kirkwood: Fix invalid S/PDIF format
+
+2013-11-27  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: pcm: Always honor DAI min and max sample rate constraints
+
+2013-11-27  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: pcm: Fix rate_max calculation
+
+2013-11-27  Mark Brown  <broonie at linaro.org>
+
+	* regulator: core: Replace checks of have_full_constraints with a function
+
+2013-11-27  Charles Keepax  <ckeepax at opensource.wolfsonmicro.com>
+
+	* ASoC: wm5110: Remove output OSR and PGA volume controls
+
+2013-11-27  Bo Shen  <voice.shen at atmel.com>
+
+	* ASoC: atmel: sam9x5_wm8731: fix oops when unload module
+
+2013-11-21  Markus Mayer  <markus.mayer at linaro.org>
+
+	* gpio: bcm281xx: Fix return value of bcm_kona_gpio_get()
+
+2013-11-26  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Fix hp-mic mode without VREF bits
+
+2013-11-26  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Create Headhpone Mic Jack Mode when really needed
+
+2013-11-27  Linus Walleij  <linus.walleij at linaro.org>
+
+	* gpio: pl061: move irqdomain initialization
+
+2013-11-26  Thomas Pugliese  <thomas.pugliese at gmail.com>
+
+	* ALSA: usb: use multiple packets per urb for Wireless USB inbound audio
+
+2013-11-20  Nicolas Dichtel  <nicolas.dichtel at 6wind.com>
+
+	* sched/doc: Fix generation of device-drivers
+
+2013-11-21  Thomas Gleixner  <tglx at linutronix.de>
+
+	* sched: Expose preempt_schedule_irq()
+
+2013-11-26  David Herrmann  <dh.herrmann at gmail.com>
+
+	* HID: uhid: fix leak for 64/32 UHID_CREATE
+
+2013-11-27  Samuel Ortiz  <sameo at linux.intel.com>
+
+	* Merge tag 'mfd-lee-3.13-fixes-1' of git://git.linaro.org/people/ljones/mfd
+
+2013-11-27  James Ralston  <james.d.ralston at intel.com>
+
+	* mfd: lpc_ich: Fix Wildcat Point info name field
+
+2013-11-17  Geert Uytterhoeven  <geert at linux-m68k.org>
+
+	* mfd: ti-ssp: Fix build
+
+2013-11-27  Hui Wang  <jason77.wang at gmail.com>
+
+	* ALSA: hda - Enable mute/mic-mute LEDs for more Thinkpads with Conexant codec
+
+2013-10-22  Dan Williams  <dan.j.williams at intel.com>
+
+	* [SCSI] libsas: fix usage of ata_tf_to_fis
+
+2013-11-14  Eric W. Biederman  <ebiederm at xmission.com>
+
+	* vfs: Fix a regression in mounting proc
+
+2013-11-14  Eric W. Biederman  <ebiederm at xmission.com>
+
+	* fork:  Allow CLONE_PARENT after setns(CLONE_NEWPID)
+
+2013-11-08  Eric W. Biederman  <ebiederm at xmission.com>
+
+	* vfs: In d_path don't call d_dname on a mount point
+
+2013-11-26  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'trace-fixes-v3.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
+
+2013-11-26  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus-bugs' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
+
+2013-11-26  Pali Rohár  <pali.rohar at gmail.com>
+
+	* Input: add key code for ambient light sensor button
+
+2013-11-26  Paul Moore  <pmoore at redhat.com>
+
+	* Merge tag 'v3.12'
+
+2013-11-26  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2013-11-26  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security
+
+2013-11-26  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'ntb-3.13' of git://github.com/jonmason/ntb
+
+2013-11-26  Jason Gunthorpe  <jgunthorpe at obsidianresearch.com>
+
+	* PCI: mvebu: Return 'unsupported' for Interrupt Line and Interrupt Pin
+
+2013-11-20  Matt Wilson  <msw at amazon.com>
+
+	* xen/gnttab: leave lazy MMU mode in the case of a m2p override failure
+
+2013-11-26  Thomas Gleixner  <tglx at linutronix.de>
+
+	* Merge branch 'clockevents/fixes' of git://git.linaro.org/people/dlezcano/linux into timers/urgent
+
+2013-11-26  Takashi Iwai  <tiwai at suse.de>
+
+	* Merge branch 'fix/firewire' into for-linus
+
+2013-11-20  Andy Adamson  <andros at netapp.com>
+
+	* SUNRPC: do not fail gss proc NULL calls with EACCES
+
+2013-11-09  Felipe Pena  <felipensp at gmail.com>
+
+	* block: xen-blkfront: Fix possible NULL ptr dereference
+
+2013-11-14  Tim Gardner  <tim.gardner at canonical.com>
+
+	* xen-blkfront: Silence pfn maybe-uninitialized warning
+
+2013-11-25  Steven Rostedt (Red Hat)  <rostedt at goodmis.org>
+
+	* ftrace: Fix function graph with loading of modules
+
+2013-11-26  Steven Rostedt (Red Hat)  <rostedt at goodmis.org>
+
+	* tracing: Allow events to have NULL strings
+
+2013-11-06  Geert Uytterhoeven  <geert at linux-m68k.org>
+
+	* fbdev: sh_mobile_meram: Fix defined but not used compiler warnings
+
+2013-11-19  Sasha Levin  <sasha.levin at oracle.com>
+
+	* video: kyro: fix incorrect sizes when copying to userspace
+
+2013-11-26  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branches 'regmap/fix/doc' and 'regmap/fix/mmio' into regmap-linus
+
+2013-11-26  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'regmap/fix/core' into regmap-linus
+
+2013-11-25  Stephen Warren  <swarren at nvidia.com>
+
+	* regmap: use IS_ERR() to check clk_get() results
+
+2013-11-25  Tim Kryger  <tim.kryger at linaro.org>
+
+	* i2c: i2c-bcm-kona: Fix module build
+
+2013-11-24  Martin Vogt  <mvogt1 at gmail.com>
+
+	* i2c: i2c-diolan-u2c: different usb endpoints for DLN-2-U2C
+
+2013-11-21  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* i2c: bcm-kona: remove duplicated include
+
+2013-11-26  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Drop bus->avoid_link_reset flag
+
+2013-11-26  Kailang Yang  <kailang at realtek.com>
+
+	* ALSA: hda/realtek - Set pcbeep amp for ALC668
+
+2013-11-26  Kailang Yang  <kailang at realtek.com>
+
+	* ALSA: hda/realtek - Add support of ALC231 codec
+
+2013-11-20  Taras Kondratiuk  <taras.kondratiuk at linaro.org>
+
+	* i2c: davinci: raw read and write endian fix
+
+2013-11-11  Aaro Koskinen  <aaro.koskinen at iki.fi>
+
+	* ARM: OMAPFB: panel-sony-acx565akm: fix bad unlock balance
+
+2013-11-18  Michal Marek  <mmarek at suse.cz>
+
+	* mfd: Make MFD_AS3722 depend on I2C=y
+
+2013-11-25  Stephen Warren  <swarren at wwwdotorg.org>
+
+	* ARM: bcm2835: add missing #xxx-cells to I2C nodes
+
+2013-11-26  Mattia Dongili  <malattia at linux.it>
+
+	* sony-laptop: do not scribble keyboard backlight registers on resume
+
+2013-11-25  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Revert "n_gsm: race between ld close and gsmtty open"
+
+2013-11-23  Andrew Liu  <andrew.liu200917 at gmail.com>
+
+	* Input: keyboard - "keycode & KEY_MAX" changes some keycode values
+
+2013-11-26  James Morris  <james.l.morris at oracle.com>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity into for-linus
+
+2013-11-25  Kevin Hilman  <khilman at linaro.org>
+
+	* Merge tag 'imx-fixes-3.13-2' of git://git.linaro.org/people/shawnguo/linux-2.6 into fixes
+
+2013-10-23  Doug Anderson  <dianders at chromium.org>
+
+	* ARM: dts: Add max77686 RTC interrupt to cros5250-common
+
+2013-11-13  Mika Westerberg  <mika.westerberg at linux.intel.com>
+
+	* HID: i2c-hid: disable interrupt on suspend
+
+2013-11-25  Dave Martin  <Dave.Martin at arm.com>
+
+	* ARM: vexpress/TC2: Implement MCPM power_down_finish()
+
+2013-11-11  Michal Marek  <mmarek at suse.cz>
+
+	* PCI: Omit PCI ID macro strings to shorten quirk names
+
+2013-11-24  Geyslan G. Bem  <geyslan at gmail.com>
+
+	* selinux: fix possible memory leak
+
+2013-11-24  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* PCI: Move device_del() from pci_stop_dev() to pci_destroy_dev()
+
+2013-11-18  Bjorn Helgaas  <bhelgaas at google.com>
+
+	* Revert "workqueue: allow work_on_cpu() to be called recursively"
+
+2013-11-25  Thierry Reding  <thierry.reding at gmail.com>
+
+	* ARM: tegra: Provide dummy powergate implementation
+
+2013-11-19  Olof Johansson  <olof at lixom.net>
+
+	* Merge tag 'omap-for-v3.13/more-fixes-for-merge-window-take2' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
+
+2013-11-12  Olof Johansson  <olof at lixom.net>
+
+	* ARM: omap: fix warning with LPAE build
+
+2013-11-18  Alexander Duyck  <alexander.h.duyck at intel.com>
+
+	* PCI: Avoid unnecessary CPU switch when calling driver .probe() method
+
+2013-11-25  Laxman Dewangan  <ldewangan at nvidia.com>
+
+	* irq: Enable all irqs unconditionally in irq_resume
+
+2013-11-19  Luciano Coelho  <luciano.coelho at intel.com>
+
+	* iwlwifi: mvm: use a cast to calculate the last seqno from the next one
+
+2013-11-19  Luciano Coelho  <luciano.coelho at intel.com>
+
+	* iwlwifi: mvm: set seqno also when no keys are set
+
+2013-11-05  Alexander Bondar  <alexander.bondar at intel.com>
+
+	* iwlwifi: pcie: stop sending commands to dead firmware
+
+2013-11-22  Olav Haugan  <ohaugan at codeaurora.org>
+
+	* staging: zsmalloc: Ensure handle is never 0 on success
+
+2013-11-21  Peng Tao  <bergwolf at gmail.com>
+
+	* staging/lustre/ptlrpc: fix ptlrpc_stop_pinger logic
+
+2013-11-25  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'regulator-v3.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
+
+2013-11-25  Roberto Sassu  <roberto.sassu at polito.it>
+
+	* ima: make a copy of template_fmt in template_desc_init_fields()
+
+2013-11-07  Peter Hurley  <peter at hurleysoftware.com>
+
+	* n_tty: Protect minimum_to_wake reset for concurrent readers
+
+2013-11-22  Florian Meier  <florian.meier at koalo.de>
+
+	* ASoC: Add support for BCM2835
+
+2013-11-19  Peter Hurley  <peter at hurleysoftware.com>
+
+	* tty: Reset hupped state on open
+
+2013-11-22  Geert Uytterhoeven  <geert at linux-m68k.org>
+
+	* TTY: amiserial, add missing platform check
+
+2013-11-21  Catalin Marinas  <catalin.marinas at arm.com>
+
+	* arm64: Unmask asynchronous aborts when in kernel mode
+
+2013-11-14  Catalin Marinas  <catalin.marinas at arm.com>
+
+	* arm64: dts: Reserve the memory used for secondary CPU release address
+
+2013-11-12  Marc Zyngier  <Marc.Zyngier at arm.com>
+
+	* arm64: let the core code deal with preempt_count
+
+2013-11-24  Steve French  <smfrench at gmail.com>
+
+	* [CIFS] Do not use btrfs refcopy ioctl for SMB2 copy offload
+
+2013-11-22  Mark Brown  <broonie at linaro.org>
+
+	* ASoC: wm8990: Mark the register map as dirty when powering down
+
+2013-11-08  Roberto Sassu  <roberto.sassu at polito.it>
+
+	* ima: do not send field length to userspace for digest of ima template
+
+2013-11-08  Roberto Sassu  <roberto.sassu at polito.it>
+
+	* ima: do not include field length in template digest calc for ima template
+
+2013-11-22  Martin Schwidefsky  <schwidefsky at de.ibm.com>
+
+	* s390/mm: handle asce-type exceptions as normal page fault
+
+2013-11-22  Martin Schwidefsky  <schwidefsky at de.ibm.com>
+
+	* s390,time: revert direct ktime path for s390 clockevent device
+
+2013-11-22  Martin Schwidefsky  <schwidefsky at de.ibm.com>
+
+	* s390/time,vdso: convert to the new update_vsyscall interface
+
+2013-11-21  Heiko Carstens  <heiko.carstens at de.ibm.com>
+
+	* s390/uaccess: add missing page table walk range check
+
+2013-11-14  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* pinctrl: rockchip: missing unlock on error in rockchip_set_pull()
+
+2013-11-10  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* pinctrl: abx500: fix some more bitwise AND tests
+
+2013-11-08  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* pinctrl: rockchip: testing the wrong variable
+
+2013-11-19  Axel Lin  <axel.lin at ingics.com>
+
+	* gpio: ucb1400: Add MODULE_ALIAS
+
+2013-11-19  Alexandre Courbot  <acourbot at nvidia.com>
+
+	* gpiolib: fix of_find_gpio() when OF not defined
+
+2013-11-13  Michal Nazarewicz  <mina86 at mina86.com>
+
+	* gpio: fix memory leak in error path
+
+2013-11-07  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* gpio: rcar: NULL dereference on error in probe()
+
+2013-11-07  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* gpio: msm: make msm_gpio.summary_irq signed for error handling
+
+2013-11-07  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* gpio: mvebu: make mvchip->irqbase signed for error handling
+
+2013-11-16  Alexandre Courbot  <acourbot at nvidia.com>
+
+	* gpiolib: use dedicated flags for GPIO properties
+
+2013-11-21  Vineet Gupta  <vgupta at synopsys.com>
+
+	* ARC: Add guard macro to uapi/asm/unistd.h
+
+2013-11-15  Vineet Gupta  <vgupta at synopsys.com>
+
+	* ARC: extable: Enable sorting at build time
+
+2013-11-24  Kent Overstreet  <kmo at daterainc.com>
+
+	* block: submit_bio_wait() conversions
+
+2013-11-22  Randy Dunlap  <rdunlap at infradead.org>
+
+	* slab.h: remove duplicate kmalloc declaration and fix kernel-doc warnings
+
+2013-11-24  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
+
+2013-11-19  Sourav Poddar  <sourav.poddar at ti.com>
+
+	* spi/qspi: Fix qspi remove path.
+
+2013-11-19  Sourav Poddar  <sourav.poddar at ti.com>
+
+	* spi/qspi: cleanup pm_runtime error check.
+
+2013-11-11  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* spi/qspi: set correct platform drvdata in ti_qspi_probe()
+
+2013-11-12  Mika Westerberg  <mika.westerberg at linux.intel.com>
+
+	* spi/pxa2xx: add new ACPI IDs
+
+2013-11-24  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'regulator/fix/pfuze100' into regulator-linus
+
+2013-11-24  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'regulator/fix/gpio' into regulator-linus
+
+2013-11-24  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'regulator/fix/fixed' into regulator-linus
+
+2013-11-24  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'regulator/fix/arizona' into regulator-linus
+
+2013-11-22  Stephen Warren  <swarren at nvidia.com>
+
+	* ASoC: dapm: Use SND_SOC_DAPM_INIT_REG_VAL in SND_SOC_DAPM_MUX
+
+2013-11-21  Jarkko Nikula  <jarkko.nikula at linux.intel.com>
+
+	* ASoC: Rename mid-x86 directory to intel
+
+2013-11-07  Kuninori Morimoto  <kuninori.morimoto.gx at renesas.com>
+
+	* ASoC: rcar: select REGMAP
+
+2013-11-20  Nicolin Chen  <b42378 at freescale.com>
+
+	* ASoC: soc-pcm: move DAIs parameters cleaning into hw_free()
+
+2013-11-13  Nicolin Chen  <b42378 at freescale.com>
+
+	* ASoC: soc-pcm: add symmetry for channels and sample bits
+
+2013-11-09  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* irqchip: renesas-intc-irqpin: Fix register bitfield shift calculation
+
+2013-11-06  Simon Horman  <horms+renesas at verge.net.au>
+
+	* ARM: shmobile: lager: phy fixup needs CONFIG_PHYLIB
+
+2013-11-24  Phillip Lougher  <phillip at squashfs.org.uk>
+
+	* Squashfs: fix failure to unlock pages on decompress error
+
+2013-11-23  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Revert "KEYS: verify a certificate is signed by a 'trusted' key"
+
+2013-11-23  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Revert "ima: define '_ima' as a builtin 'trusted' keyring"
+
+2013-11-23  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
+
+2013-11-12  Eric Van Hensbergen  <ericvh at gmail.com>
+
+	* net/9p: remove virtio default hack and set appropriate bits instead
+
+2013-10-21  Geyslan G. Bem  <geyslan at gmail.com>
+
+	* 9p: remove useless 'name' variable and assignment
+
+2013-10-21  Geyslan G. Bem  <geyslan at gmail.com>
+
+	* 9p: fix return value in case in v9fs_fid_xattr_set()
+
+2013-11-09  Li Wang  <liwang at ubuntukylin.com>
+
+	* ceph: allocate non-zero page to fscache in readpage()
+
+2013-10-31  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: wake up 'safe' waiters when unregistering request
+
+2013-09-26  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: cleanup aborted requests when re-sending requests.
+
+2013-09-22  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: handle race between cap reconnect and cap release
+
+2013-09-22  Yan, Zheng  <zheng.z.yan at intel.com>
+
+	* ceph: set caps count after composing cap reconnect message
+
+2013-11-17  Tejun Heo  <tj at kernel.org>
+
+	* sysfs: use a separate locking class for open files depending on mmap
+
+2013-11-17  Samir Benmendil  <samir.benmendil at gmail.com>
+
+	* ahci: add Marvell 9230 to the AHCI PCI device list
+
+2013-11-20  Yijing Wang  <wangyijing at huawei.com>
+
+	* ata: fix acpi_bus_get_device() return value check
+
+2013-11-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for_linus' of git://cavan.codon.org.uk/platform-drivers-x86
+
+2013-09-10  Li Bin  <huawei.libin at huawei.com>
+
+	* workqueue: fix pool ID allocation leakage and remove BUILD_BUG_ON() in init_workqueues
+
+2013-09-09  Li Bin  <huawei.libin at huawei.com>
+
+	* workqueue: fix comment typo for __queue_work()
+
+2013-09-05  Tejun Heo  <tj at kernel.org>
+
+	* workqueue: fix ordered workqueues in NUMA setups
+
+2013-11-14  Oleg Nesterov  <oleg at redhat.com>
+
+	* workqueue: swap set_cpus_allowed_ptr() and PF_NO_SETAFFINITY
+
+2013-11-21  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* pata_arasan_cf: add missing clk_disable_unprepare() on error path
+
+2013-11-22  Alistair Popple  <alistair at popple.id.au>
+
+	* ahci: add support for IBM Akebono platform device
+
+2013-11-22  Dmitry Torokhov  <dmitry.torokhov at gmail.com>
+
+	* Merge branch 'next' into for-linus
+
+2013-11-22  Tejun Heo  <tj at kernel.org>
+
+	* cgroup: use a dedicated workqueue for cgroup destruction
+
+2013-11-22  Martin Schwidefsky  <schwidefsky at de.ibm.com>
+
+	* time: Fix 1ns/tick drift w/ GENERIC_TIME_VSYSCALL_OLD
+
+2013-11-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.13-rc1
+
+2013-11-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'ecryptfs-3.13-rc1-quiet-checkers' of git://git.kernel.org/pub/scm/linux/kernel/git/tyhicks/ecryptfs
+
+2013-11-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'sound-fix2-3.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2013-11-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2013-11-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pci-v3.13-fixes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
+
+2013-11-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending
+
+2013-11-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging
+
+2013-11-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2013-11-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'fixes' of git://git.linaro.org/people/rmk/linux-arm
+
+2013-11-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'next' of git://git.kernel.org/pub/scm/virt/kvm/kvm
+
+2013-11-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kvack.org/~bcrl/aio-next
+
+2013-11-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.13' of git://linux-nfs.org/~bfields/linux
+
+2013-11-21  Takashi Sakamoto  <o-takashi at sakamocchi.jp>
+
+	* ALSA: firewire-lib: fix wrong value for FDF field as an empty packet
+
+2013-11-22  David Henningsson  <david.henningsson at canonical.com>
+
+	* ALSA: hda - Set current_headset_type to ALC_HEADSET_TYPE_ENUM (janitorial)
+
+2013-11-22  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Provide missing pin configs for VAIO with ALC260
+
+2013-11-21  Stephen Warren  <swarren at nvidia.com>
+
+	* spi: core: invert success test in devm_spi_register_master
+
+2013-11-21  Herbert Xu  <herbert at gondor.apana.org.au>
+
+	* gso: handle new frag_list of frags GRO packets
+
+2013-11-21  Johannes Berg  <johannes.berg at intel.com>
+
+	* genetlink: fix genl_set_err() group ID
+
+2013-11-21  Johannes Berg  <johannes.berg at intel.com>
+
+	* genetlink: fix genlmsg_multicast() bug
+
+2013-11-21  Daniel Borkmann  <dborkman at redhat.com>
+
+	* packet: fix use after free race in send path when dev is released
+
+2013-11-20  David Sterba  <dsterba at suse.cz>
+
+	* Documentation: filesystems: update btrfs tools section
+
+2013-11-20  David Sterba  <dsterba at suse.cz>
+
+	* Documentation: filesystems: add new btrfs mount options
+
+2013-11-21  Takashi Iwai  <tiwai at suse.de>
+
+	* Merge tag 'asoc-v3.13-5' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2013-11-20  Courtney Cavin  <courtney.cavin at sonymobile.com>
+
+	* regmap: make sure we unlock on failure in regmap_bulk_write
+
+2013-11-21  David Henningsson  <david.henningsson at canonical.com>
+
+	* ALSA: hda - Add headset quirk for Dell Inspiron 3135
+
+2013-11-21  David Herrmann  <dh.herrmann at gmail.com>
+
+	* drm/sysfs: fix hotplug regression since lifetime changes
+
+2013-11-20  Benjamin Tissoires  <benjamin.tissoires at redhat.com>
+
+	* HID: kye: fix unresponsive keyboard
+
+2013-11-20  Benjamin Tissoires  <benjamin.tissoires at redhat.com>
+
+	* HID: kye: Add report fixup for Genius Manticore Keyboard
+
+2013-11-18  Heiko Carstens  <heiko.carstens at de.ibm.com>
+
+	* KVM: kvm_clear_guest_page(): fix empty_zero_page usage
+
+2013-11-21  KaiChung Cheng  <kenny_cheng at wistron.com>
+
+	* HID: multicouh: add PID VID to support 1 new Wistron optical touch device
+
+2013-11-07  Benjamin Tissoires  <benjamin.tissoires at redhat.com>
+
+	* HID: appleir: force input to be set
+
+2013-11-21  Inki Dae  <inki.dae at samsung.com>
+
+	* drm/exynos: g2d: fix memory leak to userptr
+
+2013-11-21  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'ttm-fixes-3.13' of git://people.freedesktop.org/~thomash/linux into drm-fixes
+
+2013-11-21  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'vmwgfx-fixes-3.13' of git://people.freedesktop.org/~thomash/linux into drm-fixes
+
+2013-11-21  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'drm-intel-fixes-2013-11-20' of git://people.freedesktop.org/~danvet/drm-intel into drm-fixes
+
+2013-11-21  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-next-3.13' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
+
+2013-11-21  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Fix the headphone jack detection on Sony VAIO TX
+
+2013-11-21  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Fix missing bass speaker on ASUS N550
+
+2013-11-20  Eric Seppanen  <eric at purestorage.com>
+
+	* iscsi-target: chap auth shouldn't match username with trailing garbage
+
+2013-11-20  Eric Seppanen  <eric at purestorage.com>
+
+	* iscsi-target: fix extract_param to handle buffer length corner case
+
+2013-11-20  Haiyang Zhang  <haiyangz at microsoft.com>
+
+	* MAINTAINERS - add keyboard driver to Hyper-V file list
+
+2013-11-19  Dmitry Torokhov  <dmitry.torokhov at gmail.com>
+
+	* Input: atmel-wm97xx - fix compile error
+
+2013-11-19  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* Input: hp_sdc_rtc - unlock on error in hp_sdc_rtc_read_i8042timer()
+
+2013-11-19  Xie XiuQi  <xiexiuqi at huawei.com>
+
+	* Input: cyttsp4 -  remove unnecessary work pending test
+
+2013-11-20  David Sterba  <dsterba at suse.cz>
+
+	* btrfs: update kconfig help text
+
+2013-11-18  Akinobu Mita  <akinobu.mita at gmail.com>
+
+	* btrfs: fix bio_size_ok() for max_sectors > 0xffff
+
+2013-11-14  Steven Rostedt  <rostedt at goodmis.org>
+
+	* btrfs: Use trace condition for get_extent tracepoint
+
+2013-11-14  Anand Jain  <Anand.Jain at oracle.com>
+
+	* btrfs: fix typo in the log message
+
+2013-11-14  Miao Xie  <miaox at cn.fujitsu.com>
+
+	* Btrfs: fix list delete warning when removing ordered root from the list
+
+2013-11-13  Stefan Behrens  <sbehrens at giantdisaster.de>
+
+	* Btrfs: print bytenr instead of page pointer in check-int
+
+2013-11-12  Wang Shilong  <wangsl.fnst at cn.fujitsu.com>
+
+	* Btrfs: remove dead codes from ctree.h
+
+2013-11-06  Filipe David Borba Manana  <fdmanana at gmail.com>
+
+	* Btrfs: don't wait for ordered data outside desired range
+
+2013-11-06  Liu Bo  <bo.li.liu at oracle.com>
+
+	* Btrfs: fix lockdep error in async commit
+
+2013-10-03  Prarit Bhargava  <prarit at redhat.com>
+
+	* x86, wmi fix modalias_show return values
+
+2013-11-15  Kuppuswamy Sathyanarayanan  <sathyanarayanan.kuppuswamy at linux.intel.com>
+
+	* ipc: Added support for IPC interrupt mode
+
+2013-11-14  Kuppuswamy Sathyanarayanan  <sathyanarayanan.kuppuswamy at linux.intel.com>
+
+	* ipc: Handle error conditions in ipc command
+
+2013-11-14  Kuppuswamy Sathyanarayanan  <sathyanarayanan.kuppuswamy at linux.intel.com>
+
+	* ipc: Enabled ipc support for additional intel platforms
+
+2013-11-14  Kuppuswamy Sathyanarayanan  <sathyanarayanan.kuppuswamy at linux.intel.com>
+
+	* ipc: Added platform data structure
+
+2013-10-24  Takashi Iwai  <tiwai at suse.de>
+
+	* thinkpad_acpi: Fix build error when CONFIG_SND_MAX_CARDS > 32
+
+2013-11-07  Olof Johansson  <olof at lixom.net>
+
+	* platform: add chrome platform directory
+
+2013-10-29  Alex Hung  <alex.hung at canonical.com>
+
+	* hp-wmi: detect "2009 BIOS or later" flag by WMI 0x0d for wireless cmd
+
+2013-11-12  Alex Hung  <alex.hung at canonical.com>
+
+	* dell-wmi: Add KEY_MICMUTE to bios_to_linux_keycode
+
+2013-11-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
+
+2013-11-20  Stephen Boyd  <sboyd at codeaurora.org>
+
+	* clocksource: arm_arch_timer: Hide eventstream Kconfig on non-ARM
+
+2013-11-19  Will Deacon  <will.deacon at arm.com>
+
+	* ARM: 7894/1: kconfig: select GENERIC_CLOCKEVENTS if HAVE_ARM_ARCH_TIMER
+
+2013-11-19  Will Deacon  <will.deacon at arm.com>
+
+	* ARM: 7893/1: bitops: only emit .arch_extension mp if CONFIG_SMP
+
+2013-11-18  Yinghai Lu  <yinghai at kernel.org>
+
+	* PCI: Remove duplicate pci_disable_device() from pcie_portdrv_remove()
+
+2013-11-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mattst88/alpha
+
+2013-11-18  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* cpufreq: exynos: Remove unwanted EXPORT_SYMBOL
+
+2013-11-20  Hong Zhiguo  <zhiguohong at tencent.com>
+
+	* Update of blkg_stat and blkg_rwstat may happen in bh context. While u64_stats_fetch_retry is only preempt_disable on 32bit UP system. This is not enough to avoid preemption by bh and may read strange 64 bit value.
+
+2013-11-20  Stephen Warren  <swarren at nvidia.com>
+
+	* cpufreq: tegra: don't error target() when suspended
+
+2013-11-21  Ulrich Weigand  <Ulrich.Weigand at de.ibm.com>
+
+	* powerpc: Wrong DWARF CFI in the kernel vdso for little-endian / ELFv2
+
+2013-11-20  H. Peter Anvin  <hpa at linux.intel.com>
+
+	* x86-64, copy_user: Use leal to produce 32-bit results
+
+2013-11-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pm+acpi-2-3.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2013-11-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'next' of git://git.infradead.org/users/vkoul/slave-dma
+
+2013-11-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.dk/linux-block
+
+2013-11-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'md/3.13' of git://neil.brown.name/md
+
+2013-11-20  Trond Myklebust  <Trond.Myklebust at netapp.com>
+
+	* NFSv4: close needs to handle NFS4ERR_ADMIN_REVOKED
+
+2013-11-19  Trond Myklebust  <Trond.Myklebust at netapp.com>
+
+	* NFSv4: Update list of irrecoverable errors on DELEGRETURN
+
+2013-11-15  Andy Adamson  <andros at netapp.com>
+
+	* NFSv4 wait on recovery for async session errors
+
+2013-11-20  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* iscsi-target: Expose default_erl as TPG attribute
+
+2013-11-19  Hannes Reinecke  <hare at suse.de>
+
+	* target_core_configfs: split up ALUA supported states
+
+2013-11-19  Hannes Reinecke  <hare at suse.de>
+
+	* target_core_alua: Make supported states configurable
+
+2013-11-19  Hannes Reinecke  <hare at suse.de>
+
+	* target_core_alua: Store supported ALUA states
+
+2013-04-18  Jon Mason  <jon.mason at intel.com>
+
+	* NTB: Disable interrupts and poll under high load
+
+2013-10-03  Jon Mason  <jon.mason at intel.com>
+
+	* NTB: Enable Snoop on Primary Side
+
+2013-11-01  Jon Mason  <jon.mason at intel.com>
+
+	* NTB: Document HW errata
+
+2013-10-21  Michael Opdenacker  <michael.opdenacker at free-electrons.com>
+
+	* NTB: remove duplicate defines
+
+2013-11-19  Jon Mason  <jon.mason at intel.com>
+
+	* NTB: correct dmaengine_get/put usage
+
+2013-09-09  Jon Mason  <jon.mason at intel.com>
+
+	* NTB: Fix ntb_transport link down race
+
+2013-10-02  Alexander Gordeev  <agordeev at redhat.com>
+
+	* ntb: Fix missed call to pci_enable_msix()
+
+2013-09-13  Jon Mason  <jon.mason at intel.com>
+
+	* NTB: Fix NTB-RP Link Up
+
+2013-09-06  Jon Mason  <jon.mason at intel.com>
+
+	* NTB: Xeon Doorbell errata workaround
+
+2013-11-20  Yijing Wang  <wangyijing at huawei.com>
+
+	* hwmon: (acpi_power_meter) Fix acpi_bus_get_device() return value check
+
+2013-11-20  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Fix unbalanced runtime PM notification at resume
+
+2013-11-20  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/fix/wm8962' into asoc-linus
+
+2013-11-20  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/fix/rcar' into asoc-linus
+
+2013-11-20  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/fix/fsl' into asoc-linus
+
+2013-11-20  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/fix/dma' into asoc-linus
+
+2013-11-20  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/fix/cs42l52' into asoc-linus
+
+2013-11-20  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/fix/blackfin' into asoc-linus
+
+2013-11-20  Daniel Vetter  <daniel.vetter at ffwll.ch>
+
+	* drm/i915: Fix gen3 self-refresh watermarks
+
+2013-11-08  Florian Echtler  <floe at butterbrot.org>
+
+	* Input: add sur40 driver for Samsung SUR40 (aka MS Surface 2.0/Pixelsense)
+
+2013-11-20  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2013-11-20  David Henningsson  <david.henningsson at canonical.com>
+
+	* ALSA: hda - A casual Dell Headset quirk
+
+2013-11-14  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/ttm: Remove set_need_resched from the ttm fault handler
+
+2013-11-17  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/ttm: Don't move non-existing data
+
+2013-11-19  Sasha Levin  <sasha.levin at oracle.com>
+
+	* kvm: mmu: delay mmu audit activation
+
+2013-11-19  Heiko Carstens  <heiko.carstens at de.ibm.com>
+
+	* s390/mm: optimize copy_page
+
+2013-11-19  Stefan Weinhuber  <wein at de.ibm.com>
+
+	* s390/dasd: validate request size before building CCW/TCW request
+
+2013-11-19  Hendrik Brueckner  <brueckner at linux.vnet.ibm.com>
+
+	* s390/signal: always restore saved runtime instrumentation psw bit
+
+2013-11-19  Steve French  <smfrench at gmail.com>
+
+	* Check SMB3 dialects against downgrade attacks
+
+2013-11-10  Phillip Lougher  <phillip at squashfs.org.uk>
+
+	* Squashfs: Check stream is not NULL in decompressor_multi.c
+
+2013-11-13  Phillip Lougher  <phillip at squashfs.org.uk>
+
+	* Squashfs: Directly decompress into the page cache for file data
+
+2013-10-31  Phillip Lougher  <phillip at squashfs.org.uk>
+
+	* Squashfs: Restructure squashfs_readpage()
+
+2013-11-19  Jens Axboe  <axboe at kernel.dk>
+
+	* blk-mq: add blktrace insert event trace
+
+2013-11-19  Shaohua Li  <shli at fusionio.com>
+
+	* virtio-blk: virtqueue_kick() must be ordered with other virtqueue operations
+
+2013-10-31  Mahesh Rajashekhara  <Mahesh.Rajashekhara at pmcs.com>
+
+	* aacraid: prevent invalid pointer dereference
+
+2013-11-19  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2013-11-19  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
+
+2013-11-19  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'please-pull-fixia64' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux
+
+2013-11-19  Sasha Levin  <sasha.levin at oracle.com>
+
+	* aio: nullify aio->ring_pages after freeing it
+
+2013-11-19  Sasha Levin  <sasha.levin at oracle.com>
+
+	* aio: prevent double free in ioctx_alloc
+
+2013-11-14  Tim Gardner  <tim.gardner at canonical.com>
+
+	* SELinux: security_load_policy: Silence frame-larger-than warning
+
+2013-11-19  Richard Haines  <richard_c_haines at btinternet.com>
+
+	* SELinux: Update policy version to support constraints info
+
+2013-11-18  Kirill A. Shutemov  <kirill.shutemov at linux.intel.com>
+
+	* kernel/bounds: avoid circular dependencies in generated headers
+
+2013-11-19  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'genetlink_mcast'
+
+2013-11-19  Johannes Berg  <johannes.berg at intel.com>
+
+	* genetlink: make multicast groups const, prevent abuse
+
+2013-11-19  Johannes Berg  <johannes.berg at intel.com>
+
+	* genetlink: pass family to functions using groups
+
+2013-11-19  Johannes Berg  <johannes.berg at intel.com>
+
+	* genetlink: add and use genl_set_err()
+
+2013-11-19  Johannes Berg  <johannes.berg at intel.com>
+
+	* genetlink: remove family pointer from genl_multicast_group
+
+2013-11-19  Johannes Berg  <johannes.berg at intel.com>
+
+	* genetlink: remove genl_unregister_mc_group()
+
+2013-11-19  Johannes Berg  <johannes.berg at intel.com>
+
+	* hsr: don't call genl_unregister_mc_group()
+
+2013-11-19  Johannes Berg  <johannes.berg at intel.com>
+
+	* quota/genetlink: use proper genetlink multicast APIs
+
+2013-11-18  Trond Myklebust  <Trond.Myklebust at netapp.com>
+
+	* NFS: Fix a warning in nfs_setsecurity
+
+2013-11-13  Anna Schumaker  <bjschuma at netapp.com>
+
+	* NFS: Enabling v4.2 should not recompile nfsd and lockd
+
+2013-11-19  Andrey Vagin  <avagin at openvz.org>
+
+	* tcp: don't update snd_nxt, when a socket is switched from repair mode
+
+2013-11-19  Samuel Li  <samuel.li at amd.com>
+
+	* drm/radeon: hook up backlight functions for CI and KV family.
+
+2013-11-19  Ying Xue  <ying.xue at windriver.com>
+
+	* atm: idt77252: fix dev refcnt leak
+
+2013-11-19  fan.du  <fan.du at windriver.com>
+
+	* xfrm: Release dst if this dst is improper for vti tunnel
+
+2013-11-19  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'acpi-hotplug'
+
+2013-11-19  Mika Westerberg  <mika.westerberg at linux.intel.com>
+
+	* PCI / hotplug / ACPI: Drop unused acpiphp_debug declaration
+
+2013-11-19  Johannes Berg  <johannes.berg at intel.com>
+
+	* netlink: fix documentation typo in netlink_set_err()
+
+2013-11-19  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'arc-v3.13-rc1-part2' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc
+
+2013-11-19  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
+
+2013-11-19  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml
+
+2013-11-19  Arnaldo Carvalho de Melo  <acme at redhat.com>
+
+	* tools lib traceevent: Fix conversion of pointer to integer of different size
+
+2013-11-17  Kuninori Morimoto  <kuninori.morimoto.gx at renesas.com>
+
+	* ASoC: rcar: fixup dma_async_issue_pending() timing
+
+2013-11-08  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* ASoC: rcar: off by one in rsnd_scu_set_route()
+
+2013-11-19  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-19  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'irq-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-15  Geert Uytterhoeven  <geert at linux-m68k.org>
+
+	* genirq: Correct fuzzy and fragile IRQ_RETVAL() definition
+
+2013-11-19  Jens Axboe  <axboe at kernel.dk>
+
+	* blk-mq: ensure that we set REQ_IO_STAT so diskstats work
+
+2013-11-17  Shigeru Yoshida  <shigeru.yoshida at gmail.com>
+
+	* sched: Fix a trivial typo in comments
+
+2013-11-19  Alex Shi  <alex.shi at linaro.org>
+
+	* sched: Remove unused variable in 'struct sched_domain'
+
+2013-11-19  Peter Zijlstra  <peterz at infradead.org>
+
+	* sched: Avoid NULL dereference on sd_busy
+
+2013-11-12  Srikar Dronamraju  <srikar at linux.vnet.ibm.com>
+
+	* sched: Check sched_domain before computing group power
+
+2013-11-18  Daniel Vetter  <daniel.vetter at ffwll.ch>
+
+	* drm/i915: Replicate BIOS eDP bpp clamping hack for hsw
+
+2013-11-15  Vince Weaver  <vincent.weaver at maine.edu>
+
+	* perf/trace: Properly use u64 to hold event_id
+
+2013-09-13  Peter Zijlstra  <peterz at infradead.org>
+
+	* perf: Remove fragile swevent hlist optimization
+
+2013-11-14  Peter Zijlstra  <peterz at infradead.org>
+
+	* ftrace, perf: Avoid infinite event generation loop
+
+2013-11-18  Steven Rostedt  <rostedt at goodmis.org>
+
+	* tools lib traceevent: Fix use of multiple options in processing field
+
+2013-11-18  Namhyung Kim  <namhyung.kim at lge.com>
+
+	* perf header: Fix possible memory leaks in process_group_desc()
+
+2013-11-18  Namhyung Kim  <namhyung.kim at lge.com>
+
+	* perf header: Fix bogus group name
+
+2013-11-16  Frederic Weisbecker  <fweisbec at gmail.com>
+
+	* perf tools: Tag thread comm as overriden
+
+2013-11-19  Kirill A. Shutemov  <kirill.shutemov at linux.intel.com>
+
+	* x86/mm: Implement ASLR for hugetlb mappings
+
+2013-11-15  Cyrill Gorcunov  <gorcunov at gmail.com>
+
+	* x86/mm: Unify pte_to_pgoff() and pgoff_to_pte() helpers
+
+2013-11-18  Chris Wilson  <chris at chris-wilson.co.uk>
+
+	* drm/i915: Do not enable package C8 on unsupported hardware
+
+2013-11-19  David Henningsson  <david.henningsson at canonical.com>
+
+	* ALSA: hda - Also enable mute/micmute LED control for "Lenovo dock" fixup
+
+2013-11-14  majianpeng  <majianpeng at gmail.com>
+
+	* md/raid5: Use conf->device_lock protect changing of multi-thread resources.
+
+2013-11-14  majianpeng  <majianpeng at gmail.com>
+
+	* md/raid5: Before freeing old multi-thread worker, it should flush them.
+
+2013-11-14  majianpeng  <majianpeng at gmail.com>
+
+	* md/raid5: For stripe with R5_ReadNoMerge, we replace REQ_FLUSH with REQ_NOMERGE.
+
+2013-11-14  Aurelien Jarno  <aurelien at aurel32.net>
+
+	* UAPI: include <asm/byteorder.h> in linux/raid/md_p.h
+
+2013-11-15  majianpeng  <majianpeng at gmail.com>
+
+	* raid1: Rewrite the implementation of iobarrier.
+
+2013-11-19  Al Viro  <viro at ZenIV.linux.org.uk>
+
+	* seq_file: always clear m->count when we free m->buf
+
+2013-11-18  Olof Johansson  <olof at lixom.net>
+
+	* ARM: 7892/1: Fix warning for V7M builds
+
+2013-11-18  Tony Lindgren  <tony at atomide.com>
+
+	* ARM: OMAP2+: Remove legacy omap4_twl6030_hsmmc_init
+
+2013-11-18  Tony Lindgren  <tony at atomide.com>
+
+	* ARM: OMAP2+: Remove legacy mux code for display.c
+
+2013-11-19  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'pm-sleep'
+
+2013-11-19  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'pm-cpufreq'
+
+2013-11-19  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'pm-runtime'
+
+2013-11-19  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'pm-tools'
+
+2013-11-19  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'pm-cpuidle'
+
+2013-11-19  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'acpi-video'
+
+2013-11-19  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'acpi-ec'
+
+2013-11-18  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPI / scan: Set flags.match_driver in acpi_bus_scan_fixed()
+
+2013-11-14  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPI / PCI root: Clear driver_data before failing enumeration
+
+2013-11-14  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPI / hotplug: Fix PCI host bridge hot removal
+
+2013-11-14  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPI / hotplug: Fix acpi_bus_get_device() return value check
+
+2013-11-18  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://www.linux-watchdog.org/linux-watchdog
+
+2013-11-18  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'i2c/for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux
+
+2013-11-18  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
+
+2013-11-18  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'for-v3.13' of git://git.infradead.org/battery-2.6
+
+2013-11-18  Tony Lindgren  <tony at atomide.com>
+
+	* ARM: OMAP2+: Fix undefined reference to set_cntfreq
+
+2013-11-18  Tony Lindgren  <tony at atomide.com>
+
+	* gpio: twl4030: Fix passing of pdata in the device tree case
+
+2013-11-18  Steve French  <smfrench at gmail.com>
+
+	* Removed duplicated (and unneeded) goto
+
+2013-11-16  Steve French  <smfrench at gmail.com>
+
+	* CIFS: Fix SMB2/SMB3 Copy offload support (refcopy) for large files
+
+2013-11-18  Tony Lindgren  <tony at atomide.com>
+
+	* gpio: twl4030: Fix regression for twl gpio output
+
+2013-11-18  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'topic/kbuild-fixes-for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
+
+2013-11-18  Stephen Rothwell  <sfr at canb.auug.org.au>
+
+	* sparc64: merge fix
+
+2013-11-18  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'v4l_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
+
+2013-11-18  Kirill A. Shutemov  <kirill.shutemov at linux.intel.com>
+
+	* sparc64: fix build regession
+
+2013-11-18  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'linux_next' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac
+
+2013-11-13  Guenter Roeck  <linux at roeck-us.net>
+
+	* hwmon: (nct6775) NCT6791 supports weight control only for CPUFAN
+
+2013-11-13  Guenter Roeck  <linux at roeck-us.net>
+
+	* hwmon: (nct6775) Monitor additional temperature registers
+
+2013-11-09  Arnaud Ebalard  <arno at natisbad.org>
+
+	* hwmon: (lm75) Add support for GMT G751 chip
+
+2013-11-18  Ajit Khaparde  <ajit.khaparde at emulex.com>
+
+	* be2net: Delete secondary unicast MAC addresses during be_close
+
+2013-11-18  Ajit Khaparde  <ajit.khaparde at emulex.com>
+
+	* be2net: Fix unconditional enabling of Rx interface options
+
+2013-11-18  Zhi Yong Wu  <wuzhy at linux.vnet.ibm.com>
+
+	* net, virtio_net: replace the magic value
+
+2013-08-17  Guenter Roeck  <linux at roeck-us.net>
+
+	* watchdog: w83627hf: Use helper functions to access superio registers
+
+2013-08-17  Guenter Roeck  <linux at roeck-us.net>
+
+	* watchdog: w83627hf: Enable watchdog device only if not already enabled
+
+2013-08-17  Guenter Roeck  <linux at roeck-us.net>
+
+	* watchdog: w83627hf: Enable watchdog only once
+
+2013-10-28  Guenter Roeck  <linux at roeck-us.net>
+
+	* watchdog: w83627hf: Convert to watchdog infrastructure
+
+2013-11-18  Akinobu Mita  <akinobu.mita at gmail.com>
+
+	* bio: fix argument of __bio_add_page() for max_sectors > 0xffff
+
+2013-11-18  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* i2c: bcm-kona: fix error return code in bcm_kona_i2c_probe()
+
+2013-11-17  Stefano Stabellini  <stefano.stabellini at eu.citrix.com>
+
+	* xen/arm: p2m_init and p2m_lock should be static
+
+2013-11-18  Josh Boyer  <jwboyer at redhat.com>
+
+	* arm/xen: Export phys_to_mach to fix Xen module link errors
+
+2013-11-18  Michel Dänzer  <michel.daenzer at amd.com>
+
+	* drm/radeon/cik: Add macrotile mode array query
+
+2013-11-15  Ben Hutchings  <ben at decadent.org.uk>
+
+	* deb-pkg: Inhibit initramfs builders if CONFIG_BLK_DEV_INITRD is not set
+
+2013-11-08  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Make vmwgfx dma buffers prime aware
+
+2013-11-08  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Make surfaces prime-aware
+
+2013-11-13  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/vmwgfx: Hook up the prime ioctls
+
+2013-11-15  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* spi: spi-mxs: fix reference leak to master in mxs_spi_remove()
+
+2013-11-10  Kuninori Morimoto  <kuninori.morimoto.gx at renesas.com>
+
+	* ASoC: rcar: fixup mod access before checking
+
+2013-11-13  Thomas Hellstrom  <thellstrom at vmware.com>
+
+	* drm/ttm: Add a minimal prime implementation for ttm base objects
+
+2013-11-16  Shawn Guo  <shawn.guo at linaro.org>
+
+	* ARM: dts: imx6qdl: disable spdif "rxtx5" clock option
+
+2013-11-14  H. Peter Anvin  <hpa at zytor.com>
+
+	* Revert "init/Kconfig: add option to disable kernel compression"
+
+2013-11-16  Victor Kamensky  <victor.kamensky at linaro.org>
+
+	* watchdog: omap_wdt: raw read and write endian fix
+
+2013-11-11  Uwe Kleine-König  <u.kleine-koenig at pengutronix.de>
+
+	* watchdog: sirf: don't depend on dummy value of CLOCK_TICK_RATE
+
+2013-11-17  Andreas Werner  <wernerandy at gmx.de>
+
+	* i2c: i2c-eg20t: do not print error message in syslog if no ACK received
+
+2013-11-17  Roland Dreier  <roland at purestorage.com>
+
+	* Merge branches 'cma', 'cxgb4', 'flowsteer', 'ipoib', 'misc', 'mlx4', 'mlx5', 'nes', 'ocrdma', 'qib' and 'srp' into for-next
+
+2013-11-06  Matan Barak  <matanb at mellanox.com>
+
+	* IB/core: Re-enable create_flow/destroy_flow uverbs
+
+2013-11-06  Yann Droneaud  <ydroneaud at opteya.com>
+
+	* IB/core: extended command: an improved infrastructure for uverbs commands
+
+2013-11-06  Yann Droneaud  <ydroneaud at opteya.com>
+
+	* IB/core: Remove ib_uverbs_flow_spec structure from userspace
+
+2013-11-12  Geert Uytterhoeven  <geert at linux-m68k.org>
+
+	* um: Remove unused declarations from <as-layout.h>
+
+2013-10-27  Michael Opdenacker  <michael.opdenacker at free-electrons.com>
+
+	* um: remove used STDIO_CONSOLE Kconfig param
+
+2013-09-27  Ramkumar Ramachandra  <artagnon at gmail.com>
+
+	* um/vdso: add .gitignore for a couple of targets
+
+2013-11-16  Fenghua Yu  <fenghua.yu at intel.com>
+
+	* x86-64, copy_user: Remove zero byte check before copy user buffer.
+
+2013-11-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.samba.org/sfrench/cifs-2.6
+
+2013-11-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'nfs-for-3.13-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
+
+2013-11-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'sound-fix-3.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2013-11-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2013-10-22  Joe Perches  <joe at perches.com>
+
+	* IB/ucma: Convert use of typedef ctl_table to struct ctl_table
+
+2013-11-15  Zhao Hongjiang  <zhaohongjiang at huawei.com>
+
+	* IB/cm: Convert to using idr_alloc_cyclic()
+
+2013-11-16  Takashi Iwai  <tiwai at suse.de>
+
+	* Merge tag 'asoc-v3.13-4' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2013-11-16  Vinod Koul  <vinod.koul at intel.com>
+
+	* Merge commit 'dmaengine-3.13-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/dmaengine
+
+2013-11-15  Steve French  <smfrench at gmail.com>
+
+	* [CIFS] Warn if SMB3 encryption required by server
+
+2013-11-15  Steve French  <smfrench at gmail.com>
+
+	* setfacl removes part of ACL when setting POSIX ACLs to Samba
+
+2013-11-15  lan,Tianyu  <tianyu.lan at intel.com>
+
+	* cpufreq: governor: Remove fossil comment in the cpufreq_governor_dbs()
+
+2013-11-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
+
+2013-11-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial
+
+2013-11-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
+
+2013-11-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'mfd-3.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/sameo/mfd-next
+
+2013-11-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jdelvare/staging
+
+2013-11-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.dk/linux-block
+
+2013-11-16  Jiri Kosina  <jkosina at suse.cz>
+
+	* Merge branches 'for-3.12/upstream-fixes', 'for-3.13/holtek', 'for-3.13/i2c-hid', 'for-3.13/logitech', 'for-3.13/multitouch', 'for-3.13/roccat', 'for-3.13/upstream' and 'for-3.13/wiimote' into for-linus
+
+2013-11-14  Tim Kryger  <tim.kryger at linaro.org>
+
+	* i2c: bcm-kona: Introduce Broadcom I2C Driver
+
+2013-10-31  Eli Cohen  <eli at dev.mellanox.co.il>
+
+	* IB/mlx5: Fix page shift in create CQ for userspace
+
+2013-10-31  Eli Cohen  <eli at dev.mellanox.co.il>
+
+	* IB/mlx4: Fix device max capabilities check
+
+2013-10-31  Eli Cohen  <eli at dev.mellanox.co.il>
+
+	* IB/mlx5: Fix list_del of empty list
+
+2013-11-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
+
+2013-11-14  Tony Lindgren  <tony at atomide.com>
+
+	* i2c: cbus-gpio: Fix device tree binding
+
+2013-11-15  Aaron Lu  <aaron.lu at intel.com>
+
+	* ACPI / video: clean up DMI table for initial black screen problem
+
+2013-11-15  Steve French  <smfrench at gmail.com>
+
+	* [CIFS] Set copychunk defaults
+
+2013-11-10  Larry Finger  <Larry.Finger at lwfinger.net>
+
+	* rtlwifi: rtl8192cu: Fix more pointer arithmetic errors
+
+2013-11-13  Christoph Hellwig  <hch at infradead.org>
+
+	* nfs: fix pnfs Kconfig defaults
+
+2013-11-14  NeilBrown  <neilb at suse.de>
+
+	* NFS: correctly report misuse of "migration" mount option.
+
+2013-10-31  Eli Cohen  <eli at dev.mellanox.co.il>
+
+	* IB/core: Encorce MR access rights rules on kernel consumers
+
+2013-10-31  Eli Cohen  <eli at dev.mellanox.co.il>
+
+	* IB/mlx4: Fix endless loop in resize CQ
+
+2013-11-15  Charles Keepax  <ckeepax at opensource.wolfsonmicro.com>
+
+	* regulator: arizona-micsupp: Correct wm5110 voltage selection
+
+2013-11-15  Stefano Stabellini  <stefano.stabellini at eu.citrix.com>
+
+	* swiotlb-xen: add missing xen_dma_map_page call
+
+2013-11-15  Sebastian Ott  <sebott at linux.vnet.ibm.com>
+
+	* s390/pci: implement hotplug notifications
+
+2013-11-14  Sebastian Ott  <sebott at linux.vnet.ibm.com>
+
+	* s390/scm_block: do not hide eadm subchannel dependency
+
+2013-11-13  Michael Holzheu  <holzheu at linux.vnet.ibm.com>
+
+	* s390/sclp: Consolidate early sclp init calls to sclp_early_detect()
+
+2013-11-13  Michael Holzheu  <holzheu at linux.vnet.ibm.com>
+
+	* s390/sclp: Move early code from sclp_cmd.c to sclp_early.c
+
+2013-11-13  David Herrmann  <dh.herrmann at gmail.com>
+
+	* drm: check for !kdev in drm_unplug_minor()
+
+2013-11-15  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* spi: bcm63xx: fix reference leak to master in bcm63xx_spi_remove()
+
+2013-11-15  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* spi: txx9: fix reference leak to master in txx9spi_remove()
+
+2013-11-15  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* spi: mpc512x: fix reference leak to master in mpc512x_psc_spi_do_remove()
+
+2013-11-15  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* spi: rspi: use platform drvdata correctly in rspi_remove()
+
+2013-11-15  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* spi: bcm2835: fix reference leak to master in bcm2835_spi_remove()
+
+2013-11-14  Brian Austin  <brian.austin at cirrus.com>
+
+	* ASoC: cs42l52: Correct MIC CTL mask
+
+2013-11-15  Wei Ni  <wni at nvidia.com>
+
+	* Documentation: dt: hwmon: Add OF document for LM90
+
+2013-11-15  Wei Ni  <wni at nvidia.com>
+
+	* hwmon: (lm90) Add power control
+
+2013-11-14  David Rientjes  <rientjes at google.com>
+
+	* x86: Export 'boot_cpu_physical_apicid' to modules
+
+2013-11-14  Joe Perches  <joe at perches.com>
+
+	* MAINTAINERS: Update file patterns in the lockdep and scheduler entries
+
+2013-11-13  Mischa Jonker  <mjonker at synopsys.com>
+
+	* ARC: [plat-arcfpga] Add defconfig without initramfs location
+
+2013-11-13  Mischa Jonker  <mjonker at synopsys.com>
+
+	* ARC: perf: ARC 700 PMU doesn't support sampling events
+
+2013-11-15  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-nouveau-next' of git://anongit.freedesktop.org/git/nouveau/linux-2.6 into drm-next
+
+2013-11-14  Dmitry Torokhov  <dmitry.torokhov at gmail.com>
+
+	* Merge branch 'next' into for-linus
+
+2013-11-14  Dmitry Torokhov  <dmitry.torokhov at gmail.com>
+
+	* Revert "Input: ALPS - add support for model found on Dell XT2"
+
+2013-11-14  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* PM / Hibernate: Do not crash kernel in free_basic_memory_bitmaps()
+
+2013-11-13  Tyler Hicks  <tyhicks at canonical.com>
+
+	* eCryptfs: file->private_data is always valid
+
+2013-10-30  Ezequiel Garcia  <ezequiel.garcia at free-electrons.com>
+
+	* dma: mv_xor: Fix mis-usage of mmio 'base' and 'high_base' registers
+
+2013-10-30  Ezequiel Garcia  <ezequiel.garcia at free-electrons.com>
+
+	* dma: mv_xor: Remove unneeded NULL address check
+
+2013-11-13  Dan Williams  <dan.j.williams at intel.com>
+
+	* ioat: fix ioat3_irq_reinit
+
+2013-11-13  Dan Williams  <dan.j.williams at intel.com>
+
+	* ioat: kill msix_single_vector support
+
+2013-11-13  Dan Williams  <dan.j.williams at intel.com>
+
+	* raid6test: add new corner case for ioatdma driver
+
+2013-11-13  Dan Williams  <dan.j.williams at intel.com>
+
+	* ioatdma: clean up sed pool kmem_cache
+
+2013-11-14  Bjorn Helgaas  <bhelgaas at google.com>
+
+	* PCI: Fix whitespace, capitalization, and spelling errors
+
+2013-11-11  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* i2c: wmt: add missing clk_disable_unprepare() on error
+
+2013-11-14  Nicolin Chen  <b42378 at freescale.com>
+
+	* ASoC: wm8962: Turn on regcache_cache_only before disabling regulator
+
+2013-11-01  Jens Axboe  <axboe at kernel.dk>
+
+	* virtio_blk: blk-mq support
+
+2013-11-12  Oskar Schirmer  <oskar at scara.com>
+
+	* ASoC: fsl: imx-pcm-fiq: omit fiq counter to avoid harm in unbalanced situations
+
+2013-11-05  Tim Harvey  <tharvey at gateworks.com>
+
+	* regulator: pfuze100: allow misprogrammed ID
+
+2013-11-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
+
+2013-11-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'xfs-for-linus-v3.13-rc1' of git://oss.sgi.com/xfs/xfs
+
+2013-11-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-trace-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-14  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'fbdev-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tomba/linux
+
+2013-11-12  Maarten Lankhorst  <maarten.lankhorst at canonical.com>
+
+	* drm/nouveau: do not map evicted vram buffers in nouveau_bo_vma_add
+
+2013-11-13  Jeff Layton  <jlayton at redhat.com>
+
+	* nfs: don't retry detect_trunking with RPC_AUTH_UNIX more than once
+
+2013-11-14  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-next-3.13' of git://people.freedesktop.org/~agd5f/linux into drm-next
+
+2013-11-13  viresh kumar  <viresh.kumar at linaro.org>
+
+	* cpufreq: OMAP: Fix compilation error 'r & ret undeclared'
+
+2013-11-13  Ulf Hansson  <ulf.hansson at linaro.org>
+
+	* PM / Runtime: Fix error path for prepare
+
+2013-09-17  Thomas Gleixner  <tglx at linutronix.de>
+
+	* preempt: Make PREEMPT_ACTIVE generic
+
+2013-09-17  Thomas Gleixner  <tglx at linutronix.de>
+
+	* sparc: Use preempt_schedule_irq
+
+2013-09-17  Thomas Gleixner  <tglx at linutronix.de>
+
+	* ia64: Use preempt_schedule_irq
+
+2013-09-17  Thomas Gleixner  <tglx at linutronix.de>
+
+	* m32r: Use preempt_schedule_irq
+
+2013-09-17  Thomas Gleixner  <tglx at linutronix.de>
+
+	* hardirq: Make hardirq bits generic
+
+2013-11-11  Thomas Gleixner  <tglx at linutronix.de>
+
+	* m68k: Simplify low level interrupt handling code
+
+2013-11-06  Thomas Gleixner  <tglx at linutronix.de>
+
+	* genirq: Prevent spurious detection for unconditionally polled interrupts
+
+2013-11-13  Guennadi Liakhovetski  <g.liakhovetski at gmx.de>
+
+	* regulator: fixed: fix regulator_list_voltage() for regression
+
+2013-11-08  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* dma: pl330: silence a compile warning
+
+2013-11-08  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* dma: pl330: off by one in pl330_probe()
+
+2013-11-12  Peter Zijlstra  <peterz at infradead.org>
+
+	* block: Use u64_stats_init() to initialize seqcounts
+
+2013-11-09  Fengguang Wu  <fengguang.wu at intel.com>
+
+	* locking/lockdep: Mark __lockdep_count_forward_deps() as static
+
+2013-11-10  Michal Nazarewicz  <mina86 at mina86.com>
+
+	* sched/fair: Avoid integer overflow
+
+2013-11-11  Peter Zijlstra  <peterz at infradead.org>
+
+	* sched: Optimize task_sched_runtime()
+
+2013-11-06  Peter Zijlstra  <peterz at infradead.org>
+
+	* sched/numa: Cure update_numa_stats() vs. hotplug
+
+2013-10-29  Markus Pargmann  <mpa at pengutronix.de>
+
+	* dma: mxs-dma: Use semaphores for cyclic DMA
+
+2013-10-29  Markus Pargmann  <mpa at pengutronix.de>
+
+	* dma: mxs-dma: Update state after channel reset
+
+2013-10-29  Markus Pargmann  <mpa at pengutronix.de>
+
+	* dma: mxs-dma: Fix channel reset hardware bug
+
+2013-10-29  Markus Pargmann  <mpa at pengutronix.de>
+
+	* dma: mxs-dma: Report correct residue for cyclic DMA
+
+2013-11-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
+
+2013-11-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'akpm' (patches from Andrew Morton)
+
+2013-11-07  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* pm2301-charger: Remove unneeded NULL checks
+
+2013-10-31  NeilBrown  <neilb at suse.de>
+
+	* twl4030_charger: Add devicetree support
+
+2013-11-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
+
+2013-11-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'dlm-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm
+
+2013-11-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'upstream-3.13-rc1' of git://git.infradead.org/linux-ubi
+
+2013-11-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'upstream-3.13-rc1' of git://git.infradead.org/linux-ubifs
+
+2013-11-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse
+
+2013-11-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
+
+2013-11-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'for-f2fs-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
+
+2013-11-12  Mathias Krause  <minipli at googlemail.com>
+
+	* ipc, msg: fix message length check for negative values
+
+2013-11-12  Xie XiuQi  <xiexiuqi at huawei.com>
+
+	* ipc/util.c: remove unnecessary work pending test
+
+2013-11-12  Ilija Hadzic  <ihadzic at research.bell-labs.com>
+
+	* devpts: plug the memory leak in kill_sb
+
+2013-11-12  P J P  <ppandit at redhat.com>
+
+	* ./Makefile: export initial ramdisk compression config option
+
+2013-11-12  Christian Ruppert  <christian.ruppert at abilis.com>
+
+	* init/Kconfig: add option to disable kernel compression
+
+2013-11-12  Michal Nazarewicz  <mina86 at mina86.com>
+
+	* drivers: w1: make w1_slave::flags long to avoid memory corruption
+
+2013-11-12  Jingoo Han  <jg1.han at samsung.com>
+
+	* drivers/w1/masters/ds1wm.cuse dev_get_platdata()
+
+2013-11-12  Trond Myklebust  <Trond.Myklebust at netapp.com>
+
+	* SUNRPC: Avoid deep recursion in rpc_release_client
+
+2013-11-08  Ulf Hansson  <ulf.hansson at linaro.org>
+
+	* PM / Runtime: Update documentation around probe|remove|suspend
+
+2013-11-08  Xiaoguang Chen  <chenxg at marvell.com>
+
+	* cpufreq: conservative: set requested_freq to policy max when it is over policy max
+
+2013-11-09  Len Brown  <len.brown at intel.com>
+
+	* tools / power turbostat: Support Silvermont
+
+2013-11-09  Len Brown  <len.brown at intel.com>
+
+	* intel_idle: Support Intel Atom Processor C2000 Product Family
+
+2013-11-12  Thomas Renninger  <trenn at suse.de>
+
+	* x86/microcode/amd: Tone down printk(), don't treat a missing firmware file as an error
+
+2013-11-12  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2013-11-11  Steven Rostedt  <rostedt at goodmis.org>
+
+	* tools lib traceevent: Add direct access to dynamic arrays
+
+2013-10-25  Jiri Slaby  <jslaby at suse.cz>
+
+	* x86/dumpstack: Fix printk_address for direct addresses
+
+2013-11-12  Arnaldo Carvalho de Melo  <acme at redhat.com>
+
+	* perf target: Shorten perf_target__ to target__
+
+2013-11-12  Adrian Hunter  <adrian.hunter at intel.com>
+
+	* perf tests: Handle throttle events in 'object code reading' test
+
+2013-11-12  David Ahern  <dsahern at gmail.com>
+
+	* perf evlist: Refactor mmap_pages parsing
+
+2013-11-12  Tristan Rice  <rice at outerearth.net>
+
+	* HID: enable Mayflash USB Gamecube Adapter
+
+2013-11-12  Kees Cook  <keescook at chromium.org>
+
+	* x86, kaslr: Use char array to gain sizeof sanity
+
+2013-11-11  H. Peter Anvin  <hpa at zytor.com>
+
+	* x86, kaslr: Add a circular multiply for better bit diffusion
+
+2013-11-11  Kees Cook  <keescook at chromium.org>
+
+	* x86, kaslr: Mix entropy sources together as needed
+
+2013-11-07  Mischa Jonker  <mjonker at synopsys.com>
+
+	* ARC: Add documentation on DT binding for ARC700 PMU
+
+2013-11-07  Mischa Jonker  <mjonker at synopsys.com>
+
+	* ARC: Add perf support for ARC700 cores
+
+2013-11-11  Andreas Dilger  <adilger at dilger.ca>
+
+	* ext4: add prototypes for macro-generated functions
+
+2013-11-11  Andreas Dilger  <andreas.dilger at intel.com>
+
+	* ext4: return non-zero st_blocks for inline data
+
+2013-11-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-uv-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-uaccess-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-reboot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-12  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-platform-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-11  Gerhard Sittig  <gsi at denx.de>
+
+	* regmap: trivial comment fix (copy'n'paste error)
+
+2013-11-11  Samuel Ortiz  <sameo at linux.intel.com>
+
+	* Merge tag 'mfd-lee-3.13-3' of git://git.linaro.org/people/ljones/mfd
+
+2013-11-11  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'prandom'
+
+2013-11-11  Daniel Borkmann  <dborkman at redhat.com>
+
+	* random32: add test cases for taus113 implementation
+
+2013-11-11  Daniel Borkmann  <dborkman at redhat.com>
+
+	* random32: upgrade taus88 generator to taus113 from errata paper
+
+2013-11-11  Daniel Borkmann  <dborkman at redhat.com>
+
+	* random32: move rnd_state to linux/random.h
+
+2013-11-11  Hannes Frederic Sowa  <hannes at stressinduktion.org>
+
+	* random32: add prandom_reseed_late() and call when nonblocking pool becomes initialized
+
+2013-11-11  Hannes Frederic Sowa  <hannes at stressinduktion.org>
+
+	* random32: add periodic reseeding
+
+2013-11-11  Daniel Borkmann  <dborkman at redhat.com>
+
+	* random32: fix off-by-one in seeding requirement
+
+2013-11-11  Jonas Jensen  <jonas.jensen at gmail.com>
+
+	* PHY: Add RTL8201CP phy_driver to realtek
+
+2013-11-11  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* xtsonic: add missing platform_set_drvdata() in xtsonic_probe()
+
+2013-11-11  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* macmace: add missing platform_set_drvdata() in mace_probe()
+
+2013-11-11  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* ethernet/arc/arc_emac: add missing platform_set_drvdata() in arc_emac_probe()
+
+2013-11-11  Ingo Molnar  <mingo at kernel.org>
+
+	* Revert "x86/UV: Add uvtrace support"
+
+2013-11-10  Michal Nazarewicz  <mina86 at mina86.com>
+
+	* RDMA/cma: Remove unused argument and minor dead code
+
+2013-11-01  Sean Hefty  <sean.hefty at intel.com>
+
+	* RDMA/ucma: Discard events for IDs not yet claimed by user space
+
+2013-11-11  Stefano Stabellini  <stefano.stabellini at eu.citrix.com>
+
+	* xen/arm: pfn_to_mfn and mfn_to_pfn return the argument if nothing is in the p2m
+
+2013-11-11  H. Peter Anvin  <hpa at zytor.com>
+
+	* x86, trace: Change user|kernel_page_fault to page_fault_user|kernel
+
+2013-11-05  Alexander Shiyan  <shc_work at mail.ru>
+
+	* ARM: dts: i.MX51: Fix OTG PHY clock
+
+2013-10-31  Shawn Guo  <shawn.guo at linaro.org>
+
+	* ARM: imx: set up pllv3 POWER and BYPASS sequentially
+
+2013-10-30  Shawn Guo  <shawn.guo at linaro.org>
+
+	* ARM: imx: pllv3 needs relock in .set_rate() call
+
+2013-10-30  Shawn Guo  <shawn.guo at linaro.org>
+
+	* ARM: imx: add sleep for pllv3 relock
+
+2013-10-31  Lothar Waßmann  <LW at KARO-electronics.de>
+
+	* ARM: imx6q: add missing sentinel to divider table
+
+2013-10-31  Shawn Guo  <shawn.guo at linaro.org>
+
+	* ARM: imx: v7_cpu_resume() is needed by imx6sl build
+
+2013-10-31  Shawn Guo  <shawn.guo at linaro.org>
+
+	* ARM: imx: improve mxc_restart() on the SRC bit writes
+
+2013-10-28  Shawn Guo  <shawn.guo at linaro.org>
+
+	* ARM: imx: remove imx_src_prepare_restart() call
+
+2013-11-08  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* Documentation: mfd: Update s2mps11.txt
+
+2013-11-07  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* mfd: pm8921: Potential NULL dereference in pm8921_remove()
+
+2013-11-09  Sven Eckelmann  <sven at narfation.org>
+
+	* HID: sony: Add force feedback support for Dualshock3 USB
+
+2013-10-21  Forest Bond  <forest.bond at rapidrollout.com>
+
+	* Input: usbtouchscreen: ignore eGalax/D-Wav/EETI HIDs
+
+2013-10-21  Forest Bond  <forest.bond at rapidrollout.com>
+
+	* HID: don't ignore eGalax/D-Wav/EETI HIDs
+
+2013-11-10  Felipe Balbi  <balbi at ti.com>
+
+	* arm: dts: am335x sk: add touchscreen support
+
+2013-11-10  Felipe Balbi  <balbi at ti.com>
+
+	* Input: ti_am335x_tsc - fix spelling mistake in TSC/ADC DT binding
+
+2013-11-10  Duan Jiong  <duanj.fnst at cn.fujitsu.com>
+
+	* Input: cyttsp4 - replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO
+
+2013-11-10  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* Input: mma8450 - add missing i2c_set_clientdata() in mma8450_probe()
+
+2013-11-10  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* Input: mpu3050 - add missing i2c_set_clientdata() in mpu3050_probe()
+
+2013-11-10  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* Input: tnetv107x-keypad - make irqs signed for error handling
+
+2013-11-08  Hannes Frederic Sowa  <hannes at stressinduktion.org>
+
+	* ipv6: protect for_each_sk_fl_rcu in mem_check with rcu_read_lock_bh
+
+2013-11-11  David S. Miller  <davem at davemloft.net>
+
+	* vlan: Implement vlan_dev_get_egress_qos_mask as an inline.
+
+2013-11-09  Jacob Keller  <jacob.e.keller at intel.com>
+
+	* ixgbe: add warning when max_vfs is out of range.
+
+2013-10-31  Christian Ruppert  <christian.ruppert at abilis.com>
+
+	* ARC: [TB10x] Updates for GPIO and pinctrl
+
+2013-11-09  Al Viro  <viro at zeniv.linux.org.uk>
+
+	* ecryptfs: ->f_op is never NULL
+
+2013-11-09  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* regulator: gpio-regulator: Don't oops on missing regulator-type property
+
+2013-09-17  Dave Jones  <davej at redhat.com>
+
+	* RDMA/nes: Remove self-assignment from nes_query_qp()
+
+2011-09-20  J. Bruce Fields  <bfields at redhat.com>
+
+	* locks: break delegations on any attribute modification
+
+2011-09-20  J. Bruce Fields  <bfields at redhat.com>
+
+	* locks: break delegations on link
+
+2011-09-20  J. Bruce Fields  <bfields at redhat.com>
+
+	* locks: break delegations on rename
+
+2012-08-28  J. Bruce Fields  <bfields at redhat.com>
+
+	* locks: helper functions for delegation breaking
+
+2011-09-20  J. Bruce Fields  <bfields at redhat.com>
+
+	* locks: break delegations on unlink
+
+2013-10-10  Bart Van Assche  <bvanassche at acm.org>
+
+	* IB/srp: Report receive errors correctly
+
+2013-10-10  Bart Van Assche  <bvanassche at acm.org>
+
+	* IB/srp: Avoid offlining operational SCSI devices
+
+2013-10-10  Vu Pham  <vuhuong at mellanox.com>
+
+	* IB/srp: Remove target from list before freeing Scsi_Host structure
+
+2013-10-25  Mike Marciniszyn  <mike.marciniszyn at intel.com>
+
+	* IB/qib: Fix txselect regression
+
+2013-10-24  Mike Marciniszyn  <mike.marciniszyn at intel.com>
+
+	* IB/qib: Fix checkpatch __packed warnings
+
+2013-10-04  Jan Kara  <jack at suse.cz>
+
+	* IB/qib: Convert qib_user_sdma_pin_pages() to use get_user_pages_fast()
+
+2013-10-28  Naresh Gottumukkala  <bgottumukkala at emulex.com>
+
+	* RDMA/ocrdma: Remove redundant check in ocrdma_build_fr()
+
+2013-09-06  Naresh Gottumukkala  <bgottumukkala at emulex.com>
+
+	* RDMA/ocrdma: Fix a crash in rmmod
+
+2013-09-06  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* RDMA/ocrdma: Silence an integer underflow warning
+
+2013-08-21  Michal Schmidt  <mschmidt at redhat.com>
+
+	* IPoIB: lower NAPI weight
+
+2013-10-16  Erez Shitrit  <erezsh at mellanox.com>
+
+	* IPoIB: Start multicast join process only on active ports
+
+2013-10-16  Erez Shitrit  <erezsh at mellanox.com>
+
+	* IPoIB: Add path query flushing in ipoib_ib_dev_cleanup
+
+2013-10-27  Ben Hutchings  <ben at decadent.org.uk>
+
+	* IB/cxgb4: Fix formatting of physical address
+
+2013-09-24  Doug Ledford  <dledford at redhat.com>
+
+	* IB/cma: Check for GID on listening device first
+
+2013-11-06  Stefano Stabellini  <stefano.stabellini at eu.citrix.com>
+
+	* arm,arm64/include/asm/io.h: define struct bio_vec
+
+2013-11-08  Konrad Rzeszutek Wilk  <konrad.wilk at oracle.com>
+
+	* Merge remote-tracking branch 'stefano/swiotlb-xen-9.1' into stable/for-linus-3.13
+
+2013-11-08  Konrad Rzeszutek Wilk  <konrad.wilk at oracle.com>
+
+	* Merge tag 'v3.12-rc5' into stable/for-linus-3.13
+
+2013-11-04  Stefano Stabellini  <stefano.stabellini at eu.citrix.com>
+
+	* swiotlb-xen: missing include dma-direction.h
+
+2013-11-04  Stefano Stabellini  <stefano.stabellini at eu.citrix.com>
+
+	* pci-swiotlb-xen: call pci_request_acs only ifdef CONFIG_PCI
+
+2013-10-30  Stefano Stabellini  <stefano.stabellini at eu.citrix.com>
+
+	* arm: make SWIOTLB available
+
+2013-11-08  Paul Moore  <pmoore at redhat.com>
+
+	* Merge tag 'v3.12'
+
+2013-10-18  Ben Harris  <bjh21 at cam.ac.uk>
+
+	* floppy: Correct documentation of driver options when used as a module.
+
+2013-11-06  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* pktcdvd: debugfs functions return NULL on error
+
+2013-10-29  Roger Pau Monne  <roger.pau at citrix.com>
+
+	* xen-blkfront: restore the non-persistent data path
+
+2013-11-05  Bartlomiej Zolnierkiewicz  <b.zolnierkie at samsung.com>
+
+	* skd: fix formatting in skd_s1120.h
+
+2013-11-05  Bartlomiej Zolnierkiewicz  <b.zolnierkie at samsung.com>
+
+	* skd: reorder construct/destruct code
+
+2013-11-05  Bartlomiej Zolnierkiewicz  <b.zolnierkie at samsung.com>
+
+	* skd: cleanup skd_do_inq_page_da()
+
+2013-11-05  Bartlomiej Zolnierkiewicz  <b.zolnierkie at samsung.com>
+
+	* skd: remove SKD_OMIT_FROM_SRC_DIST ifdefs
+
+2013-11-05  Bartlomiej Zolnierkiewicz  <b.zolnierkie at samsung.com>
+
+	* skd: remove redundant skdev->pdev assignment from skd_pci_probe()
+
+2013-11-05  Bartlomiej Zolnierkiewicz  <b.zolnierkie at samsung.com>
+
+	* skd: use <asm/unaligned.h>
+
+2013-11-08  Stefan Achatz  <erazor_de at users.sourceforge.net>
+
+	* HID: roccat: add missing special driver declarations
+
+2013-11-06  Simon Wood  <simon at mungewell.org>
+
+	* HID:hid-lg4ff: Correct Auto-center strength for wheels other than MOMO and MOMO2
+
+2013-11-06  Simon Wood  <simon at mungewell.org>
+
+	* HID:hid-lg4ff: Initialize device properties before we touch autocentering.
+
+2013-11-06  Simon Wood  <simon at mungewell.org>
+
+	* HID:hid-lg4ff: ensure ConstantForce is disabled when set to 0
+
+2013-11-06  Simon Wood  <simon at mungewell.org>
+
+	* HID:hid-lg4ff: Switch autocentering off when strength is set to zero.
+
+2013-11-06  Simon Wood  <simon at mungewell.org>
+
+	* HID:hid-lg4ff: Scale autocentering force properly on Logitech wheel
+
+2013-11-01  Mauro Carvalho Chehab  <m.chehab at samsung.com>
+
+	* [media] platform drivers: Fix build on frv arch
+
+2013-11-02  Mauro Carvalho Chehab  <m.chehab at samsung.com>
+
+	* [media] lirc_zilog: Don't use dynamic static allocation
+
+2013-11-07  Michael Opdenacker  <michael.opdenacker at free-electrons.com>
+
+	* scripts/tags.sh: remove obsolete __devinit[const|data]
+
+2013-11-08  Theodore Ts'o  <tytso at mit.edu>
+
+	* ext4: use prandom_u32() instead of get_random_bytes()
+
+2013-11-07  Eric Sandeen  <sandeen at redhat.com>
+
+	* ext4: remove unreachable code after ext4_can_extents_be_merged()
+
+2013-11-07  Xiaoguang Chen  <chenxg at marvell.com>
+
+	* cpufreq: conservative: fix requested_freq reduction issue
+
+2013-11-07  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPI / hotplug: Consolidate deferred execution of ACPI hotplug routines
+
+2013-11-07  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* PM / runtime: Use pm_runtime_put_sync() in __device_release_driver()
+
+2013-11-06  Lee Jones  <lee.jones at linaro.org>
+
+	* ASoC: generic-dmaengine-pcm: Clear slave_config memory
+
+2013-10-31  Adrian Huang  <adrianhuang0701 at gmail.com>
+
+	* intel_pstate: skip the driver if ACPI has power mgmt option
+
+2013-11-07  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPI / hotplug: Do not execute "insert in progress" _OST
+
+2013-11-04  Gu Zheng  <guz.fnst at cn.fujitsu.com>
+
+	* xfs: simplify kmem_{zone_}zalloc
+
+2013-10-17  Randy Dunlap  <rdunlap at infradead.org>
+
+	* scripts/kernel-doc: make unknown function prototype a Warning instead of an Error
+
+2013-11-01  Dave Chinner  <dchinner at redhat.com>
+
+	* xfs: add tracepoints to AGF/AGI read operations
+
+2013-11-01  Dave Chinner  <dchinner at redhat.com>
+
+	* xfs: trace AIL manipulations
+
+2013-11-05  Herbert Xu  <herbert at gondor.apana.org.au>
+
+	* crypto: s390 - Fix aes-cbc IV corruption
+
+2013-11-05  Jan Kara  <jack at suse.cz>
+
+	* ext2: Fix fs corruption in ext2_get_xip_mem()
+
+2013-10-02  Maxim Patlasov  <MPatlasov at parallels.com>
+
+	* fuse: writepages: protect secondary requests from fuse file release
+
+2013-10-02  Maxim Patlasov  <MPatlasov at parallels.com>
+
+	* fuse: writepages: update bdi writeout when deleting secondary request
+
+2013-11-04  Srinivas Pandruvada  <srinivas.pandruvada at linux.intel.com>
+
+	* PowerCap: Fix build error with option -Werror=format-security
+
+2013-11-04  Benjamin LaHaise  <bcrl at kvack.org>
+
+	* Merge branch 'aio-fix' of http://evilpiepirate.org/git/linux-bcache
+
+2013-11-04  James Ralston  <james.d.ralston at intel.com>
+
+	* mfd: lpc_ich: Add Device IDs for Intel Wildcat Point-LP PCH
+
+2013-11-04  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2013-11-03  Stefan Achatz  <erazor_de at users.sourceforge.net>
+
+	* HID: roccat: fix Coverity CID 141438
+
+2013-10-10  Chanwoo Choi  <cw00.choi at samsung.com>
+
+	* mfd: max77693: Fix up bug of wrong interrupt number
+
+2013-11-03  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/topic/wm8996' into asoc-next
+
+2013-11-03  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/topic/wm8962' into asoc-next
+
+2013-11-03  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/topic/wm0010' into asoc-next
+
+2013-11-03  Jack Morgenstein  <jackm at dev.mellanox.co.il>
+
+	* net/mlx4_core: Fix call to __mlx4_unregister_mac
+
+2013-11-04  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'fixes-for-3.12' of git://gitorious.org/linux-can/linux-can
+
+2013-10-31  Daniel Borkmann  <dborkman at redhat.com>
+
+	* net: sctp: do not trigger BUG_ON in sctp_cmd_delete_tcb
+
+2013-11-03  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.12
+
+2013-11-03  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'upstream' of git://git.linux-mips.org/pub/scm/ralf/upstream-linus
+
+2013-11-03  Mathias Krause  <minipli at googlemail.com>
+
+	* ipc, msg: forbid negative values for "msg{max,mnb,mni}"
+
+2013-11-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux
+
+2013-11-02  Vineet Gupta  <Vineet.Gupta1 at synopsys.com>
+
+	* ARC: Incorrect mm reference used in vmalloc fault handler
+
+2013-11-01  Jason Wang  <jasowang at redhat.com>
+
+	* net: flow_dissector: fail on evil iph->ihl
+
+2013-11-02  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec
+
+2013-11-02  Ming Lei  <tom.leiming at gmail.com>
+
+	* scripts/kallsyms: filter symbols not in kernel address space
+
+2013-11-01  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-01  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'usb-3.12-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
+
+2013-11-01  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'sound-3.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2013-11-01  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'clk-fixes-for-linus' of git://git.linaro.org/people/mturquette/linux
+
+2013-11-01  Greg Thelen  <gthelen at google.com>
+
+	* memcg: remove incorrect underflow check
+
+2013-11-01  Richard Fitzgerald  <rf at opensource.wolfsonmicro.com>
+
+	* ASoC: wm8962: Add EQ coefficient support
+
+2013-11-01  Алексей Крамаренко  <alexeyk13 at yandex.ru>
+
+	* USB: serial: ftdi_sio: add id for Z3X Box device
+
+2013-10-30  Greg KH  <gregkh at linuxfoundation.org>
+
+	* USB: Maintainers change for usb serial drivers
+
+2013-11-01  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Revert "USB: pl2303: restrict the divisor based baud rate encoding method to the "HX" chip type"
+
+2013-11-01  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Revert "usb: pl2303: fix+improve the divsor based baud rate encoding method"
+
+2013-11-01  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Revert "usb: pl2303: do not round to the next nearest standard baud rate for the divisor based baud rate encoding method"
+
+2013-11-01  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Revert "usb: pl2303: remove 500000 baud from the list of standard baud rates"
+
+2013-11-01  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Revert "usb: pl2303: move the two baud rate encoding methods to separate functions"
+
+2013-11-01  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Revert "usb: pl2303: increase the allowed baud rate range for the divisor based encoding method"
+
+2013-10-30  Steffen Klassert  <steffen.klassert at secunet.com>
+
+	* xfrm: Fix null pointer dereference when decoding sessions
+
+2013-10-31  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'akpm' (fixes from Andrew Morton)
+
+2013-10-31  Ming Lei  <ming.lei at canonical.com>
+
+	* lib/scatterlist.c: don't flush_kernel_dcache_page on slab page
+
+2013-10-31  Johannes Weiner  <hannes at cmpxchg.org>
+
+	* mm: memcg: fix test for child groups
+
+2013-10-31  Johannes Weiner  <hannes at cmpxchg.org>
+
+	* mm: memcg: lockdep annotation for memcg OOM lock
+
+2013-10-31  Johannes Weiner  <hannes at cmpxchg.org>
+
+	* mm: memcg: use proper memcg in limit bypass
+
+2013-10-31  Stratos Karafotis  <stratosk at semaphore.gr>
+
+	* cpufreq: ondemand: Remove redundant return statement
+
+2013-10-31  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* vfs: decrapify dput(), fix cache behavior under normal load
+
+2013-10-31  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* i915: fix compiler warning
+
+2013-10-31  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-10-27  Olivier Sobrie  <olivier at sobrie.be>
+
+	* can: kvaser_usb: fix usb endpoints detection
+
+2013-10-28  Markus Pargmann  <mpa at pengutronix.de>
+
+	* can: c_can: Fix RX message handling, handle lost message before EOB
+
+2013-10-31  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
+
+2013-10-31  Russell King  <rmk+kernel at arm.linux.org.uk>
+
+	* ALSA: fix oops in snd_pcm_info() caused by ASoC DPCM
+
+2013-10-31  Heiko Stübner  <heiko at sntech.de>
+
+	* Input: add driver for Neonode zForce based touchscreens
+
+2013-10-31  Laurent Pinchart  <laurent.pinchart+renesas at ideasonboard.com>
+
+	* Input: sh_keysc - enable the driver on all ARM platforms
+
+2013-10-31  Kang Hu  <hukangustc at gmail.com>
+
+	* Input: remove a redundant max() call
+
+2013-10-31  Tom Gundersen  <teg at jklm.no>
+
+	* Input: mousedev - allow disabling even without CONFIG_EXPERT
+
+2013-10-31  Tom Gundersen  <teg at jklm.no>
+
+	* Input: allow deselecting serio drivers even without CONFIG_EXPERT
+
+2013-10-31  Tom Gundersen  <teg at jklm.no>
+
+	* Input: i8042 - add PNP modaliases
+
+2013-10-31  Daniel Stone  <daniel at fooishbar.org>
+
+	* Input: evdev - fall back to vmalloc for client event buffer
+
+2013-10-16  Joseph Salisbury  <joseph.salisbury at canonical.com>
+
+	* Input: cypress_ps2 - do not consider data bad if palm is detected
+
+2013-10-31  Masanari Iida  <standby24x7 at gmail.com>
+
+	* doc: usb: Fix typo in Documentation/usb/gadget_configs.txt
+
+2013-10-31  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* MIPS: ralink: fix return value check in rt_timer_probe()
+
+2013-10-18  Srinivas Kandagatla  <srinivas.kandagatla at st.com>
+
+	* [media] media: st-rc: Add ST remote control driver
+
+2013-10-31  Yunkang Tang  <tommywill2011 at gmail.com>
+
+	* Input: ALPS - add support for model found on Dell XT2
+
+2013-08-14  Viresh Kumar  <viresh.kumar at linaro.org>
+
+	* cpufreq: move freq change notifications to cpufreq core
+
+2013-10-29  Viresh Kumar  <viresh.kumar at linaro.org>
+
+	* cpufreq: distinguish drivers that do asynchronous notifications
+
+2013-10-30  Dirk Brandewie  <dirk.j.brandewie at intel.com>
+
+	* cpufreq/intel_pstate: Add static declarations to internal functions
+
+2013-10-30  Nicolas Pitre  <nicolas.pitre at linaro.org>
+
+	* cpufreq: arm_big_little: reconfigure switcher behavior at run time
+
+2013-10-21  Jingoo Han  <jg1.han at samsung.com>
+
+	* ARM: EXYNOS: Remove incorrect __init annotation from cpuidle driver
+
+2013-10-30  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'akpm' (fixes from Andrew Morton)
+
+2013-10-30  Greg Thelen  <gthelen at google.com>
+
+	* memcg: use __this_cpu_sub() to dec stats to avoid incorrect subtrahend casting
+
+2013-10-30  Greg Thelen  <gthelen at google.com>
+
+	* percpu: fix this_cpu_sub() subtrahend casting for unsigneds
+
+2013-10-30  Chen LinX  <linx.z.chen at intel.com>
+
+	* mm/pagewalk.c: fix walk_page_range() access of wrong PTEs
+
+2013-10-30  Masanari Iida  <standby24x7 at gmail.com>
+
+	* doc:net: Fix typo in Documentation/networking
+
+2013-10-30  Russell King  <rmk+kernel at arm.linux.org.uk>
+
+	* mm: list_lru: fix almost infinite loop causing effective livelock
+
+2013-10-30  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'tty-3.12-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
+
+2013-10-30  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2013-10-30  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'sound-3.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2013-10-30  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
+
+2013-10-29  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* Staging: sb105x: info leak in mp_get_count()
+
+2013-10-29  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* Staging: bcm: info leak in ioctl
+
+2013-10-29  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* staging: wlags49_h2: buffer overflow setting station name
+
+2013-10-29  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* aacraid: missing capable() check in compat ioctl
+
+2013-10-30  Takashi Iwai  <tiwai at suse.de>
+
+	* Merge tag 'asoc-fix-v3.12-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2013-10-30  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/fix/wm8994' into asoc-linus
+
+2013-10-30  Takashi Iwai  <tiwai at suse.de>
+
+	* ASoC: wm8996: Fix negative array index read
+
+2013-10-30  Takashi Iwai  <tiwai at suse.de>
+
+	* ASoC: wm_hubs: Add missing break in hp_supply_event()
+
+2013-10-30  Markos Chandras  <markos.chandras at imgtec.com>
+
+	* MIPS: malta: Fix GIC interrupt offsets
+
+2013-10-26  Jan Matějka  <yac at blesmrt.net>
+
+	* HID: multitouch: add manufacturer to Kconfig help text
+
+2013-10-19  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* HID: logitech-dj: small cleanup in rdcat()
+
+2013-10-21  Bibek Basu  <bbasu at nvidia.com>
+
+	* HID: i2c-hid: Stop querying for init reports
+
+2013-10-28  Stefan Achatz  <erazor_de at users.sourceforge.net>
+
+	* HID: roccat: add support for Ryos MK keyboards
+
+2013-10-28  Stefan Achatz  <erazor_de at users.sourceforge.net>
+
+	* HID: roccat: generalize some common code
+
+2013-10-28  Stefan Achatz  <erazor_de at users.sourceforge.net>
+
+	* HID: roccat: add new device return value
+
+2013-10-28  David Herrmann  <dh.herrmann at gmail.com>
+
+	* HID: wiimote: add pro-controller analog stick calibration
+
+2013-10-28  David Herrmann  <dh.herrmann at gmail.com>
+
+	* HID: wiimote: fix inverted pro-controller axes
+
+2013-10-30  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Add a fixup for ASUS N76VZ
+
+2013-10-30  Paolo Bonzini  <pbonzini at redhat.com>
+
+	* KVM: use a more sensible error number when debugfs directory creation fails
+
+2013-10-29  Tim Gardner  <tim.gardner at canonical.com>
+
+	* KVM: Fix modprobe failure for kvm_intel/kvm_amd
+
+2013-10-28  David Herrmann  <dh.herrmann at gmail.com>
+
+	* drm: allow DRM_IOCTL_VERSION on render-nodes
+
+2013-10-29  Joel Fernandes  <joelf at ti.com>
+
+	* crypto: omap-aes - Fix CTR mode counter length
+
+2013-10-26  Joni Lapilainen  <joni.lapilainen at gmail.com>
+
+	* crypto: omap-sham - Add missing modalias
+
+2013-10-25  Mathias Krause  <mathias.krause at secunet.com>
+
+	* padata: make the sequence counter an atomic_t
+
+2013-10-29  Nathan Hintz  <nlhintz at hotmail.com>
+
+	* bgmac: don't update slot on skb alloc/dma mapping error
+
+2013-10-30  Alistair Popple  <alistair at popple.id.au>
+
+	* ibm emac: Fix locking for enable/disable eob irq
+
+2013-10-30  Alistair Popple  <alistair at popple.id.au>
+
+	* ibm emac: Don't call napi_complete if napi_reschedule failed
+
+2013-10-29  Jason Wang  <jasowang at redhat.com>
+
+	* virtio-net: correctly handle cpu hotplug notifier during resuming
+
+2013-10-30  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'drm-intel-fixes-2013-10-29' of git://people.freedesktop.org/~danvet/drm-intel into drm-fixes
+
+2013-10-28  Vlad Yasevich  <vyasevic at redhat.com>
+
+	* bridge: pass correct vlan id to multicast code
+
+2013-10-29  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jesse/openvswitch
+
+2013-10-28  Michael Drüing  <michael at drueing.de>
+
+	* net: x25: Fix dead URLs in Kconfig
+
+2013-10-29  David S. Miller  <davem at davemloft.net>
+
+	* Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
+
+2013-10-08  Deng-Cheng Zhu  <dengcheng.zhu at imgtec.com>
+
+	* MIPS: Perf: Fix 74K cache map
+
+2013-10-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Fix a few incorrectly checked [io_]remap_pfn_range() calls
+
+2013-10-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-10-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Kconfig: make KOBJECT_RELEASE debugging require timer debugging
+
+2013-10-29  Daniel Vetter  <daniel.vetter at ffwll.ch>
+
+	* drm/i915: Fix the PPT fdi lane bifurcate state handling on ivb
+
+2013-10-28  Holger Eitzenberger  <holger at eitzenberger.org>
+
+	* netfilter: xt_NFQUEUE: fix --queue-bypass regression
+
+2013-10-17  Peter Zijlstra  <peterz at infradead.org>
+
+	* perf/x86: Fix NMI measurements
+
+2013-10-28  Peter Zijlstra  <peterz at infradead.org>
+
+	* perf: Fix perf ring buffer memory ordering
+
+2013-10-07  Mel Gorman  <mgorman at suse.de>
+
+	* mm: Account for a THP NUMA hinting update as one PTE update
+
+2013-10-29  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2013-10-28  Wei Liu  <wei.liu2 at citrix.com>
+
+	* xen-netback: use jiffies_64 value to calculate credit timeout
+
+2013-10-27  Ben Hutchings  <ben at decadent.org.uk>
+
+	* cxgb3: Fix length calculation in write_ofld_wr() on 32-bit architectures
+
+2013-10-27  Anton Vorontsov  <anton at enomsg.org>
+
+	* power_supply: Fix documentation for TEMP_*ALERT* properties
+
+2013-10-29  Stefano Stabellini  <stefano.stabellini at eu.citrix.com>
+
+	* swiotlb-xen: fix error code returned by xen_swiotlb_map_sg_attrs
+
+2013-10-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'xtensa-next-20131015' of git://github.com/czankel/xtensa-linux
+
+2013-10-28  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
+
+2013-10-28  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'regmap/topic/spmi' into regmap-next
+
+2013-10-28  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'regmap/topic/range' into regmap-next
+
+2013-10-28  Josh Cartwright  <joshc at codeaurora.org>
+
+	* regmap: add SPMI support
+
+2013-10-24  Zhouyi Zhou  <zhouzhouyi at gmail.com>
+
+	* perf tools: Fixup mmap event consumption
+
+2013-10-26  Jiri Olsa  <jolsa at redhat.com>
+
+	* perf top: Split -G and --call-graph
+
+2013-10-26  Jiri Olsa  <jolsa at redhat.com>
+
+	* perf record: Split -g and --call-graph
+
+2013-10-25  Jiri Olsa  <jolsa at redhat.com>
+
+	* perf hists: Add color overhead for stdio output buffer
+
+2013-10-27  Rob Pearce  <rob at flitspace.org.uk>
+
+	* drm/i915: No LVDS hardware on Intel D410PT and D425KT
+
+2013-10-21  Jani Nikula  <jani.nikula at intel.com>
+
+	* drm/i915/dp: workaround BIOS eDP bpp clamping issue
+
+2013-09-24  Ville Syrjälä  <ville.syrjala at linux.intel.com>
+
+	* drm/i915: Add HSW CRT output readout support
+
+2013-10-28  Takashi Iwai  <tiwai at suse.de>
+
+	* ASoC: dapm: Return -ENOMEM in snd_soc_dapm_new_dai_widgets()
+
+2013-10-28  Takashi Iwai  <tiwai at suse.de>
+
+	* ASoC: dapm: Fix source list debugfs outputs
+
+2013-10-28  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2013-10-28  Arnaldo Carvalho de Melo  <acme at redhat.com>
+
+	* perf tools: Fix up /proc/PID/maps parsing
+
+2013-10-25  Steffen Klassert  <steffen.klassert at secunet.com>
+
+	* xfrm: Increase the garbage collector threshold
+
+2013-10-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.12-rc7
+
+2013-10-26  Henrik Austad  <haustad at cisco.com>
+
+	* doc: add missing files to timers/00-INDEX
+
+2013-10-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'parisc-3.12' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
+
+2013-10-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-10-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-10-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-10-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending
+
+2013-10-27  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'fixes' of git://git.infradead.org/users/vkoul/slave-dma
+
+2013-10-26  Helge Deller  <deller at gmx.de>
+
+	* parisc: Do not crash 64bit SMP kernels on machines with >= 4GB RAM
+
+2013-08-21  Mats Kärrman  <Mats.Karrman at tritech.se>
+
+	* UBIFS: correct data corruption range
+
+2013-06-07  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* UBIFS: fix return code
+
+2013-10-26  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pm+acpi-3.12-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2013-10-26  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Fix silent headphone on Thinkpads with AD1984A codec
+
+2013-10-25  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Add missing initial vmaster hook at build_controls callback
+
+2013-10-23  Thierry Reding  <thierry.reding at gmail.com>
+
+	* PowerCap: Convert class code to use dev_groups
+
+2013-10-25  Takashi Iwai  <tiwai at suse.de>
+
+	* ASoC: dmaengine: Use SNDRV_PCM_STREAM_LAST for array size
+
+2013-10-25  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'for-linus-20131025' of git://git.infradead.org/linux-mtd
+
+2013-10-25  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* vhost/scsi: Fix incorrect usage of get_user_pages_fast write parameter
+
+2013-10-25  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* target/pscsi: fix return value check
+
+2013-10-25  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
+
+2013-10-25  David Woodhouse  <David.Woodhouse at intel.com>
+
+	* mtd: gpmi: fix ECC regression
+
+2013-10-25  Gu Zheng  <guz.fnst at cn.fujitsu.com>
+
+	* seq_file: always update file->f_pos in seq_lseek()
+
+2013-10-25  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* acpi-cpufreq: Fail initialization if driver cannot be registered
+
+2013-10-25  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2013-10-24  Lan Tianyu  <tianyu.lan at intel.com>
+
+	* x86/reboot: Correct pr_info() log message in the set_bios/pci/kbd_reboot()
+
+2013-10-25  Stefano Stabellini  <stefano.stabellini at eu.citrix.com>
+
+	* swiotlb-xen: static inline xen_phys_to_bus, xen_bus_to_phys, xen_virt_to_bus and range_straddles_page_boundary
+
+2013-10-25  Stefano Stabellini  <stefano.stabellini at eu.citrix.com>
+
+	* grant-table: call set_phys_to_machine after mapping grant refs
+
+2013-10-25  Stefano Stabellini  <stefano.stabellini at eu.citrix.com>
+
+	* arm,arm64: do not always merge biovec if we are running on Xen
+
+2013-10-25  James Bottomley  <JBottomley at Parallels.com>
+
+	* [SCSI] Revert "sg: use rwsem to solve race during exclusive open"
+
+2013-10-21  Anders F. U. Kiær  <ablacksheep at gmail.com>
+
+	* HID: add support for LEETGION Hellion Gaming Mouse
+
+2013-10-25  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'spi/topic/wr' into spi-next
+
+2013-10-25  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'spi/topic/txx9' into spi-next
+
+2013-10-25  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'spi/topic/topcliff' into spi-next
+
+2013-10-25  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'spi/topic/tegra114' into spi-next
+
+2013-10-25  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'spi/topic/tegra-slink' into spi-next
+
+2013-10-25  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'spi/topic/tegra' into spi-next
+
+2013-10-25  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'spi/topic/s3c64xx' into spi-next
+
+2013-10-25  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'spi/topic/s3c24xx' into spi-next
+
+2013-10-21  Forest Bond  <forest.bond at rapidrollout.com>
+
+	* HID: hid-multitouch: add support for SiS panels
+
+2013-10-25  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'ecryptfs-3.12-rc7-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tyhicks/ecryptfs
+
+2013-10-24  Russ Dill  <Russ.Dill at ti.com>
+
+	* PM / hibernate: Move software_resume to late_initcall_sync
+
+2013-10-19  Ezequiel Garcia  <ezequiel.garcia at free-electrons.com>
+
+	* mtd: nand: pxa3xx: Fix registered MTD name
+
+2013-10-24  Colin Ian King  <colin.king at canonical.com>
+
+	* eCryptfs: fix 32 bit corruption issue
+
+2013-10-24  Vinod Koul  <vinod.koul at intel.com>
+
+	* dmaengine: edma: fix another memory leak
+
+2013-10-24  Valentin Ilie  <valentin.ilie at gmail.com>
+
+	* dma: edma: Fix memory leak
+
+2013-10-24  Joseph Schuchart  <joseph.schuchart at tu-dresden.de>
+
+	* perf script python: Fix mem leak due to missing Py_DECREFs on dict entries
+
+2013-10-24  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/topic/wm8962' into asoc-next
+
+2013-10-24  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/topic/wm8400' into asoc-next
+
+2013-10-24  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/topic/twl6040' into asoc-next
+
+2013-10-24  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'asoc/topic/twl4030' into asoc-next
+
+2013-10-24  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'regulator/topic/tps65910' into regulator-next
+
+2013-10-24  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'regulator/topic/tps6586x' into regulator-next
+
+2013-10-24  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'regulator/topic/tps65090' into regulator-next
+
+2013-10-24  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'regulator/topic/tps65023' into regulator-next
+
+2013-10-24  Mark Brown  <broonie at linaro.org>
+
+	* Merge remote-tracking branch 'regulator/topic/tps6105x' into regulator-next
+
+2013-10-24  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* target: Fail XCOPY for non matching source + destination block_size
+
+2013-10-24  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* target: Generate failure for XCOPY I/O with non-zero scsi_status
+
+2013-10-24  Takashi Iwai  <tiwai at suse.de>
+
+	* ALSA: hda - Fix unbalanced runtime PM refcount after S3/S4
+
+2013-10-24  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'md/3.12-fixes' of git://neil.brown.name/md
+
+2013-10-24  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
+
+2013-10-19  Shaohua Li  <shli at kernel.org>
+
+	* raid5: avoid finding "discard" stripe
+
+2013-10-22  Daniel Borkmann  <dborkman at redhat.com>
+
+	* net: sctp: fix ASCONF to allow non SCTP_ADDR_SRC addresses in ipv6
+
+2013-10-22  Dave Jiang  <dave.jiang at intel.com>
+
+	* MAINTAINERS: add to ioatdma maintainer list
+
+2013-10-09  Mike Pagano  <mpagano at gentoo.org>
+
+	* show_delta: Update script to support python versions 2.5 through 3.3
+
+2013-08-15  Wolfram Sang  <wsa at the-dreams.de>
+
+	* scripts/coccinelle/api: remove devm_request_and_ioremap.cocci
+
+2013-10-23  Kirill Tkhai  <tkhai at yandex.ru>
+
+	* scripts/tags.sh: Increase identifier list
+
+2013-09-24  Thomas Gleixner  <tglx at linutronix.de>
+
+	* clockevents: Sanitize ticks to nsec conversion
+
+2013-10-23  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
+
+2013-10-23  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-rc' of git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux
+
+2013-09-16  Randy Dunlap  <rdunlap at infradead.org>
+
+	* platform/x86: fix asus-wmi build error
+
+2013-10-22  Kent Overstreet  <kmo at daterainc.com>
+
+	* bcache: Fixed incorrect order of arguments to bio_alloc_bioset()
+
+2013-10-23  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'v4l_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
+
+2013-10-23  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
+
+2013-10-22  Jason Gerecke  <killertofu at gmail.com>
+
+	* Input: wacom - add support for ISDv4 0x10E sensor
+
+2013-10-15  Jason Gerecke  <killertofu at gmail.com>
+
+	* Input: wacom - add support for ISDv4 0x10F sensor
+
+2013-08-16  wang.bo116 at zte.com.cn  <wang.bo116 at zte.com.cn>
+
+	* UBIFS: remove unnecessary code in ubifs_garbage_collect
+
+2013-10-21  Krzysztof Kozlowski  <k.kozlowski at samsung.com>
+
+	* spi/s3c64xx: Fix doubled clock disable on suspend
+
+2013-10-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
+
+2013-10-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.12-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata
+
+2013-10-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.12-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
+
+2013-10-17  Martin Schwidefsky  <schwidefsky at de.ibm.com>
+
+	* s390/time: correct use of store clock fast
+
+2013-10-21  Dirk Brandewie  <dirk.j.brandewie at intel.com>
+
+	* intel_pstate: Correct calculation of min pstate value
+
+2013-10-21  Brennan Shacklett  <brennan at genyes.org>
+
+	* intel_pstate: Improve accuracy by not truncating until final result
+
+2013-10-18  David Herrmann  <dh.herrmann at gmail.com>
+
+	* HID: wiimote: add LEGO-wiimote VID
+
+2013-10-21  Zhang Rui  <rui.zhang at intel.com>
+
+	* Merge branch 'x86_pkg_temp' of .git into for-rc
+
+2013-10-21  Zhang Rui  <rui.zhang at intel.com>
+
+	* Revert "drivers: thermal: parent virtual hwmon with thermal zone"
+
+2013-10-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'parisc-3.12' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
+
+2013-10-20  Al Viro  <viro at zeniv.linux.org.uk>
+
+	* nfsd regression since delayed fput()
+
+2013-10-20  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2013-10-19  Mark Brown  <broonie at linaro.org>
+
+	* ALSA: Add MAINTAINERS entry for dmaengine helpers
+
+2013-10-19  Takashi Iwai  <tiwai at suse.de>
+
+	* Merge tag 'asoc-v3.12-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2013-10-19  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.12-rc6
+
+2013-10-18  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
+
+2013-10-18  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pm+acpi-3.12-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2013-10-17  Tetsuo Handa  <penguin-kernel at I-love.SAKURA.ne.jp>
+
+	* mutex: Avoid gcc version dependent __builtin_constant_p() usage
+
+2013-10-18  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-10-18  Josef Bacik  <jbacik at fusionio.com>
+
+	* Btrfs: release path before starting transaction in can_nocow_extent
+
+2013-10-14  Heiko Stuebner  <heiko at sntech.de>
+
+	* MAINTAINERS: Add maintainers entry for Rockchip SoCs
+
+2013-09-13  Stephen Warren  <swarren at nvidia.com>
+
+	* MAINTAINERS: Tegra updates, and driver ownership
+
+2013-10-13  Nikolai Kondrashov  <spbnick at gmail.com>
+
+	* HID: Fix unit exponent parsing again
+
+2013-10-18  Xie XiuQi  <xiexiuqi at huawei.com>
+
+	* timekeeping: Fix some trivial typos in comments
+
+2013-10-18  Xie XiuQi  <xiexiuqi at huawei.com>
+
+	* mm: Fix some trivial typos in comments
+
+2013-10-18  Xie XiuQi  <xiexiuqi at huawei.com>
+
+	* irq: Fix some trivial typos in comments
+
+2013-10-18  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'acpi-fixes'
+
+2013-10-18  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'pm-fixes'
+
+2013-10-17  Jacob Pan  <jacob.jun.pan at linux.intel.com>
+
+	* PowerCap: Introduce Intel RAPL power capping driver
+
+2013-10-17  Mark Brown  <broonie at linaro.org>
+
+	* ASoC: wm8962: Move register initialisation to I2C probe()
+
+2013-10-17  Mark Brown  <broonie at linaro.org>
+
+	* ASoC: wm8962: Move interrupt initalisation to probe()
+
+2013-10-17  Michal Kubecek  <mkubecek at suse.cz>
+
+	* xfrm: prevent ipcomp scratch buffer race condition
+
+2013-10-15  Kees Cook  <keescook at chromium.org>
+
+	* x86/relocs: Add percpu fixup for GNU ld 2.23
+
+2013-10-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.samba.org/sfrench/cifs-2.6
+
+2013-10-17  Nicolas Ferre  <nicolas.ferre at atmel.com>
+
+	* tty/serial: at91: fix uart/usart selection for older products
+
+2013-10-17  Stephane Eranian  <eranian at google.com>
+
+	* perf: Disable PERF_RECORD_MMAP2 support
+
+2013-10-14  Arnaldo Carvalho de Melo  <acme at redhat.com>
+
+	* perf scripting perl: Fix build error on Fedora 12
+
+2013-10-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'driver-core-3.12-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core
+
+2013-10-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'usb-3.12-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
+
+2013-10-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'tty-3.12-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
+
+2013-10-16  Guenter Roeck  <linux at roeck-us.net>
+
+	* usb: usb_phy_gen: refine conditional declaration of usb_nop_xceiv_register
+
+2013-10-17  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPI / PM: Drop two functions that are not used any more
+
+2013-10-05  Mark Brown  <broonie at linaro.org>
+
+	* spi/tegra20-slink: Move first transfer preparation to prepare_message
+
+2013-10-05  Mark Brown  <broonie at linaro.org>
+
+	* spi/tegra20-slink: Crude refactoring to use core message parsing
+
+2013-10-11  Yasuaki Ishimatsu  <isimatu.yasuaki at jp.fujitsu.com>
+
+	* driver core: Release device_hotplug_lock when store_mem_state returns EINVAL
+
+2013-10-11  Srinivas Pandruvada  <srinivas.pandruvada at linux.intel.com>
+
+	* bitops: Introduce BIT_ULL
+
+2013-10-11  Jacob Pan  <jacob.jun.pan at linux.intel.com>
+
+	* x86 / msr: add 64bit _on_cpu access functions
+
+2013-10-11  Srinivas Pandruvada  <srinivas.pandruvada at linux.intel.com>
+
+	* PowerCap: Add to drivers Kconfig and Makefile
+
+2013-10-11  Srinivas Pandruvada  <srinivas.pandruvada at linux.intel.com>
+
+	* PowerCap: Add class driver
+
+2013-10-11  Srinivas Pandruvada  <srinivas.pandruvada at linux.intel.com>
+
+	* PowerCap: Documentation
+
+2013-10-11  Geyslan G. Bem  <geyslan at gmail.com>
+
+	* ecryptfs: Fix memory leakage in keystore.c
+
+2013-10-15  Alexei Starovoitov  <ast at plumgrid.com>
+
+	* openvswitch: fix vport-netdev unregister
+
+2013-10-14  Roel Kluin  <roel.kluin at gmail.com>
+
+	* serial: vt8500: add missing braces
+
+2013-09-19  Thomas Meyer  <thomas at m3y3r.de>
+
+	* xtensa: Cocci spatch "noderef"
+
+2013-10-16  Bart Van Assche  <bvanassche at acm.org>
+
+	* dlm: Avoid that dlm_release_lockspace() incorrectly returns -EBUSY
+
+2013-10-15  Bastien Nocera  <hadess at hadess.net>
+
+	* Input: wacom - export battery scope
+
+2013-10-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'devicetree-for-linus' of git://git.secretlab.ca/git/linux
+
+2013-10-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'fixes-for-v3.12' of git://git.linaro.org/people/mszyprowski/linux-dma-mapping
+
+2013-10-15  Ulf Hansson  <ulf.hansson at linaro.org>
+
+	* PM / Runtime: Respect autosuspend when idle triggers suspend
+
+2013-10-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/virt/kvm/kvm
+
+2013-10-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'stable/for-linus-3.12-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
+
+2013-10-15  Baruch Siach  <baruch at tkos.co.il>
+
+	* xtensa: don't use alternate signal stack on threads
+
+2013-10-11  Masami Hiramatsu  <masami.hiramatsu.pt at hitachi.com>
+
+	* perf probe: Fix to initialize fname always before use it
+
+2013-10-10  Miklos Szeredi  <mszeredi at suse.cz>
+
+	* ext[34]: fix double put in tmpfile
+
+2013-09-15  Eduardo Valentin  <eduardo.valentin at ti.com>
+
+	* drivers: thermal: allow ti-soc-thermal run without pcb zone
+
+2013-10-09  Lukasz Majewski  <l.majewski at samsung.com>
+
+	* thermal: exynos: Provide initial setting for TMU's test MUX address at Exynos4412
+
+2013-10-14  Lukasz Dorau  <lukasz.dorau at intel.com>
+
+	* libahci: fix turning on LEDs in ahci_start_port()
+
+2013-10-14  Jingoo Han  <jg1.han at samsung.com>
+
+	* regulator: tps65910: Fix checkpatch issue
+
+2013-10-14  Jingoo Han  <jg1.han at samsung.com>
+
+	* regulator: tps65023: Fix checkpatch issue
+
+2013-10-14  Jingoo Han  <jg1.han at samsung.com>
+
+	* spi: txx9: Fix checkpatch issue
+
+2013-10-14  Jingoo Han  <jg1.han at samsung.com>
+
+	* spi: tegra20-slink: Fix checkpatch issue
+
+2013-10-14  Heiko Carstens  <heiko.carstens at de.ibm.com>
+
+	* s390/vmlogrdr: fix array access in vmlogrdr_open()
+
+2013-10-14  Heiko Carstens  <heiko.carstens at de.ibm.com>
+
+	* s390/compat,signal: fix return value of copy_siginfo_(to|from)_user32()
+
+2013-10-09  Stefan Haberland  <stefan.haberland at de.ibm.com>
+
+	* s390/dasd: check for availability of prefix command during format
+
+2013-10-07  Martin Schwidefsky  <schwidefsky at de.ibm.com>
+
+	* s390/mm,kvm: fix software dirty bits vs. kvm for old machines
+
+2013-10-09  Raghavendra K T  <raghavendra.kt at linux.vnet.ibm.com>
+
+	* KVM: Enable pvspinlock after jump_label_init() to avoid VM hang
+
+2013-10-11  Marek Szyprowski  <m.szyprowski at samsung.com>
+
+	* Revert "drivers: of: add initialization code for dma reserved memory"
+
+2013-10-11  Marek Szyprowski  <m.szyprowski at samsung.com>
+
+	* Revert "ARM: init: add support for reserved memory defined by device tree"
+
+2013-10-14  Russ Anderson  <rja at sgi.com>
+
+	* x86: Update UV3 hub revision ID
+
+2013-10-14  Jason Cooper  <jason at lakedaemon.net>
+
+	* MAINTAINERS: ARM: mvebu: add Sebastian Hesselbarth
+
+2013-10-13  Tim Gardner  <tim.gardner at canonical.com>
+
+	* cifs: ntstatus_to_dos_map[] is not terminated
+
+2013-10-14  Grant Likely  <grant.likely at linaro.org>
+
+	* Revert "of: Feed entire flattened device tree into the random pool"
+
+2013-09-20  Simon Farnsworth  <simon.farnsworth at onelan.co.uk>
+
+	* [media] saa7134: Fix crash when device is closed before streamoff
+
+2013-10-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.12-rc5
+
+2013-10-12  Anjana V Kumar  <anjanavk12 at gmail.com>
+
+	* cgroup: fix to break the while loop in cgroup_attach_task() correctly
+
+2013-10-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://www.linux-watchdog.org/linux-watchdog
+
+2013-10-05  Maxime Ripard  <maxime.ripard at free-electrons.com>
+
+	* watchdog: sunxi: Fix section mismatch
+
+2013-09-23  Jingoo Han  <jg1.han at samsung.com>
+
+	* watchdog: kempld_wdt: Fix bit mask definition
+
+2013-08-23  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* watchdog: ts72xx_wdt: locking bug in ioctl
+
+2013-10-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2013-09-18  Yuvaraj Kumar C D  <yuvaraj.cd at gmail.com>
+
+	* ARM: exynos: dts: Update 5250 arch timer node with clock frequency
+
+2013-10-13  Olof Johansson  <olof at lixom.net>
+
+	* Merge tag 'fixes-against-v3.12-rc3-take2' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
+
+2013-10-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'parisc-3.12' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
+
+2013-10-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'fixes' of git://git.infradead.org/users/vkoul/slave-dma
+
+2013-10-09  Helge Deller  <deller at gmx.de>
+
+	* parisc: let probe_kernel_read() capture access to page zero
+
+2013-10-05  John David Anglin  <dave.anglin at bell.net>
+
+	* parisc: optimize variable initialization in do_page_fault
+
+2013-10-13  H. Peter Anvin  <hpa at linux.intel.com>
+
+	* x86, boot: Rename get_flags() and check_flags() to *_cpuflags()
+
+2013-10-10  Kees Cook  <keescook at chromium.org>
+
+	* x86, kaslr: Raise the maximum virtual address to -1 GiB on x86_64
+
+2013-10-10  Kees Cook  <keescook at chromium.org>
+
+	* x86, kaslr: Report kernel offset on panic
+
+2013-10-10  Kees Cook  <keescook at chromium.org>
+
+	* x86, kaslr: Select random position from e820 maps
+
+2013-10-10  Kees Cook  <keescook at chromium.org>
+
+	* x86, kaslr: Provide randomness functions
+
+2013-10-10  Kees Cook  <keescook at chromium.org>
+
+	* x86, kaslr: Return location from decompress_kernel
+
+2013-10-10  Kees Cook  <keescook at chromium.org>
+
+	* x86, boot: Move CPU flags out of cpucheck
+
+2013-10-10  Michael Davidson  <md at google.com>
+
+	* x86, relocs: Add more per-cpu gold special cases
+
+2013-09-30  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* vfs: allow O_PATH file descriptors for fstatfs()
+
+2013-10-11  Will Deacon  <will.deacon at arm.com>
+
+	* net: smc91x: dont't use SMC_outw for fixing up halfword-aligned data
+
+2013-10-11  Salva Peiró  <speiro at ai2.upv.es>
+
+	* farsync: fix info leak in ioctl
+
+2013-10-10  Oussama Ghorbel  <ou.ghorbel at gmail.com>
+
+	* ipv6: Initialize ip6_tnl.hlen in gre tunnel even if no route is found
+
+2013-10-06  stephen hemminger  <stephen at networkplumber.org>
+
+	* netem: free skb's in tree on reset
+
+2013-10-06  stephen hemminger  <stephen at networkplumber.org>
+
+	* netem: update backlog after drop
+
+2013-10-10  Eric Dumazet  <edumazet at google.com>
+
+	* l2tp: must disable bh before calling l2tp_xmit_skb()
+
+2013-10-10  Simon Horman  <horms+renesas at verge.net.au>
+
+	* net: sh_eth: Correct fix for RX packet errors on R8A7740
+
+2013-10-10  Kent Overstreet  <kmo at daterainc.com>
+
+	* aio: Fix a trinity splat
+
+2013-10-07  Geyslan G. Bem  <geyslan at gmail.com>
+
+	* dma: edma.c: remove edma_desc leakage
+
+2013-09-25  Miao Xie  <miaox at cn.fujitsu.com>
+
+	* Btrfs: fix oops caused by the space balance and dead roots
+
+2013-09-25  Miao Xie  <miaox at cn.fujitsu.com>
+
+	* Btrfs: insert orphan roots into fs radix tree
+
+2013-10-10  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'for-linus-20131008' of git://git.infradead.org/linux-mtd
+
+2013-10-08  Laxman Dewangan  <ldewangan at nvidia.com>
+
+	* regulator: tps65910: get regulators node from parent node only
+
+2013-10-08  Laxman Dewangan  <ldewangan at nvidia.com>
+
+	* regulator: tps6586x: get regulators node from parent node only
+
+2013-10-08  Laxman Dewangan  <ldewangan at nvidia.com>
+
+	* regulator: tps65090: get regulators node from parent node only
+
+2013-10-02  AceLan Kao  <acelan.kao at canonical.com>
+
+	* HID: usbhid: quirk for SiS Touchscreen
+
+2013-09-23  Pali Rohár  <pali.rohar at gmail.com>
+
+	* ARM: OMAP2: RX-51: Add missing max_current to rx51_lp5523_led_config
+
+2013-10-08  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-07-23  Jonathan Austin  <jonathan.austin at arm.com>
+
+	* clk: fixup argument order when setting VCO parameters
+
+2013-10-08  Ingo Molnar  <mingo at kernel.org>
+
+	* Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2013-10-07  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Revert "serial: i.MX: evaluate linux,stdout-path property"
+
+2013-09-17  Dinh Nguyen  <dinguyen at altera.com>
+
+	* clk: socfpga: Fix incorrect sdmmc clock name
+
+2013-10-08  Benjamin Herrenschmidt  <benh at kernel.crashing.org>
+
+	* powerpc/irq: Don't switch to irq stack from softirq stack
+
+2009-08-07  Gwendal Grignou  <gwendal at google.com>
+
+	* libata: make ata_eh_qc_retry() bump scmd->allowed on bogus failures
+
+2013-10-02  Luosong  <android at generaltouch.com>
+
+	* HID: multitouch: Fix GeneralTouch products and add more PIDs
+
+2013-09-27  Sachin Prabhu  <sprabhu at redhat.com>
+
+	* cifs: Allow LANMAN auth method for servers supporting unencapsulated authentication methods
+
+2013-10-06  Jan Klos  <honza.klos at gmail.com>
+
+	* cifs: Fix inability to write files >2GB to SMB2/3 shares
+
+2013-10-06  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* ASoC: twl6040: Use virtual DAPM mixer controls
+
+2013-10-03  Simon Guinot  <simon.guinot at sequanux.org>
+
+	* clk: armada-370: fix tclk frequencies
+
+2013-10-06  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.12-rc4
+
+2013-10-05  Eric W. Biederman  <ebiederm at xmission.com>
+
+	* net: Update the sysctl permissions handler to test effective uid/gid
+
+2013-10-06  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending
+
+2013-10-06  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'fixes' of git://git.infradead.org/users/vkoul/slave-dma
+
+2013-09-19  Tim Gardner  <tim.gardner at canonical.com>
+
+	* Input: cm109 - convert high volume dev_err() to dev_err_ratelimited()
+
+2013-10-06  David Herrmann  <dh.herrmann at gmail.com>
+
+	* Input: move name/timer init to input_alloc_dev()
+
+2013-10-05  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
+
+2013-10-05  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'gpio-v3.12-2' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio
+
+2013-10-05  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'usb-3.12-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
+
+2013-10-05  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'tty-3.12-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
+
+2013-10-05  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'staging-3.12-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
+
+2013-09-19  Darrick J. Wong  <darrick.wong at oracle.com>
+
+	* btrfs: Fix crash due to not allocating integrity data for a bioset
+
+2013-10-05  Chris Mason  <chris.mason at fusionio.com>
+
+	* Merge branch 'for-linus' into for-linus-3.12
+
+2013-10-04  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.samba.org/sfrench/cifs-2.6
+
+2013-10-04  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pci-v3.12-fixes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
+
+2013-10-04  Bjorn Helgaas  <bhelgaas at google.com>
+
+	* Revert "x86/PCI: MMCONFIG: Check earlier for MMCONFIG region at address zero"
+
+2013-10-04  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pm+acpi-3.12-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2013-10-04  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'xfs-for-linus-v3.12-rc4' of git://oss.sgi.com/xfs/xfs
+
+2013-10-04  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* selinux: remove 'flags' parameter from avc_audit()
+
+2013-10-04  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* selinux: avc_has_perm_flags has no more users
+
+2013-10-02  Ilya Dryomov  <idryomov at gmail.com>
+
+	* Btrfs: fix a use-after-free bug in btrfs_dev_replace_finishing
+
+2013-10-02  Ilya Dryomov  <idryomov at gmail.com>
+
+	* Btrfs: eliminate races in worker stopping code
+
+2013-10-01  Liu Bo  <bo.li.liu at oracle.com>
+
+	* Btrfs: fix crash of compressed writes
+
+2013-09-30  Josef Bacik  <jbacik at fusionio.com>
+
+	* Btrfs: fix transid verify errors when recovering log tree
+
+2013-10-01  Thierry Reding  <thierry.reding at gmail.com>
+
+	* xfs: Use kmem_free() instead of free()
+
+2013-09-27  tinguely at sgi.com  <tinguely at sgi.com>
+
+	* xfs: fix memory leak in xlog_recover_add_to_trans
+
+2013-09-30  Namhyung Kim  <namhyung.kim at lge.com>
+
+	* perf session: Fix infinite loop on invalid perf.data file
+
+2013-09-17  Michael Grzeschik  <m.grzeschik at pengutronix.de>
+
+	* dmaengine: imx-dma: fix callback path in tasklet
+
+2013-09-17  Michael Grzeschik  <m.grzeschik at pengutronix.de>
+
+	* dmaengine: imx-dma: fix lockdep issue between irqhandler and tasklet
+
+2013-09-17  Michael Grzeschik  <m.grzeschik at pengutronix.de>
+
+	* dmaengine: imx-dma: fix slow path issue in prep_dma_cyclic
+
+2013-10-03  Peter Zijlstra  <peterz at infradead.org>
+
+	* perf/x86: Clean up cap_user_time* setting
+
+2013-10-01  David Vrabel  <david.vrabel at citrix.com>
+
+	* xen/hvc: allow xenboot console to be used again
+
+2013-10-01  David Cohen  <david.a.cohen at linux.intel.com>
+
+	* usb: chipidea: add Intel Clovertrail pci id
+
+2013-10-02  Ian Abbott  <abbotti at mev.co.uk>
+
+	* staging: comedi: ni_65xx: (bug fix) confine insn_bits to one subdevice
+
+2013-10-03  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* iscsi-target; Allow an extra tag_num / 2 number of percpu_ida tags
+
+2013-10-03  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* iscsi-target: Perform release of acknowledged tags from RX context
+
+2013-10-03  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* iscsi-target: Only perform wait_for_tasks when performing shutdown
+
+2013-10-02  Mike Travis  <travis at sgi.com>
+
+	* x86/UV: Add call to KGDB/KDB from NMI handler
+
+2013-10-02  Mike Travis  <travis at sgi.com>
+
+	* kdb: Add support for external NMI handler to call KGDB/KDB
+
+2013-09-28  Richard Weinberger  <richard at nod.at>
+
+	* UBI: Add some asserts to ubi_attach_fastmap()
+
+2013-09-28  Richard Weinberger  <richard at nod.at>
+
+	* UBI: Fix memory leak in ubi_attach_fastmap() error path
+
+2013-09-28  Richard Genoud  <richard.genoud at gmail.com>
+
+	* UBI: simplify image sequence test
+
+2013-09-28  Richard Genoud  <richard.genoud at gmail.com>
+
+	* UBI: fastmap: fix backward compatibility with image_seq
+
+2013-09-27  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* staging:iio:ade7753/ade7754/ade7759: Use spi_w8r16be() instead of spi_w8r16()
+
+2013-09-27  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* hwmon: (adt7310) Use spi_w8r16be() instead spi_w8r16()
+
+2013-09-27  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* spi: Add a spi_w8r16be() helper
+
+2013-10-01  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* target: Fail on non zero scsi_status in compare_and_write_callback
+
+2013-10-01  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* target: Fix recursive COMPARE_AND_WRITE callback failure
+
+2013-10-01  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* target: Reset data_length for COMPARE_AND_WRITE to NoLB * block_size
+
+2013-09-30  Jack Wang  <jinpu.wang at profitbricks.com>
+
+	* ib_srpt: always set response for task management
+
+2013-10-02  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'pm-fixes'
+
+2013-10-02  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'acpi-fixes'
+
+2013-10-02  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'pm-cpufreq-fixes' into pm-fixes
+
+2013-09-27  Andreas Herrmann  <andreas.herrmann at calxeda.com>
+
+	* ARM: dma-mapping: Always pass proper prot flags to iommu_map()
+
+2013-09-13  Linus Walleij  <linus.walleij at linaro.org>
+
+	* clk: nomadik: set all timers to use 2.4 MHz TIMCLK
+
+2013-09-23  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* target: Fall back to vzalloc upon ->sess_cmd_map kzalloc failure
+
+2013-09-23  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* vhost/scsi: Use GFP_ATOMIC with percpu_ida_alloc for obtaining tag
+
+2013-09-18  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* ib_srpt: Destroy cm_id before destroying QP.
+
+2013-09-18  Nicholas Bellinger  <nab at linux-iscsi.org>
+
+	* target: Fix xop->dbl assignment in target_xcopy_parse_segdesc_02
+
+2013-10-01  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Merge tag 'fixes-for-v3.12-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb into usb-linus
+
+2013-10-01  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* ACPI: Use EXPORT_SYMBOL() for acpi_bus_get_device()
+
+2013-10-01  Srinivas Pandruvada  <srinivas.pandruvada at linux.intel.com>
+
+	* intel_pstate: fix no_turbo
+
+2013-09-24  Robert Baldyga  <r.baldyga at samsung.com>
+
+	* usb: gadget: s3c-hsotg: fix can_write limit for non-periodic endpoints
+
+2013-09-27  Robert Baldyga  <r.baldyga at samsung.com>
+
+	* usb: gadget: f_fs: fix error handling
+
+2013-10-01  Sebastian Andrzej Siewior  <bigeasy at linutronix.de>
+
+	* usb: musb: dsps: do not bind to "musb-hdrc"
+
+2013-09-25  Javier Martinez Canillas  <javier.martinez at collabora.co.uk>
+
+	* gpio/omap: auto-setup a GPIO when used as an IRQ
+
+2013-09-25  Javier Martinez Canillas  <javier.martinez at collabora.co.uk>
+
+	* gpio/omap: maintain GPIO and IRQ usage separately
+
+2013-09-30  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Merge tag 'iio-fixes-for-3.12b2' of git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio into staging-linus
+
+2013-09-27  Peter Hurley  <peter at hurleysoftware.com>
+
+	* tty: Fix pty master read() after slave closes
+
+2013-09-28  Michal Malý  <madcatxster at prifuk.cz>
+
+	* USB: serial: option: Ignore card reader interface on Huawei E1750
+
+2013-09-24  Denis CIOCCA  <denis.ciocca at st.com>
+
+	* iio:magnetometer: Bugfix magnetometer default output registers
+
+2013-09-21  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* iio: Remove debugfs entries in iio_device_unregister()
+
+2013-09-30  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* PM / hibernate: Fix user space driven resume regression
+
+2013-09-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.12-rc3
+
+2013-09-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'usb-3.12-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
+
+2013-09-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'tty-3.12-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
+
+2013-09-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'staging-3.12-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
+
+2013-09-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'driver-core-3.12-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core
+
+2013-09-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'char-misc-3.12-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
+
+2013-09-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-09-29  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2013-09-29  Ingo Molnar  <mingo at kernel.org>
+
+	* Revert "perf symbols: Demangle cloned functions"
+
+2013-09-24  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* iio: amplifiers: ad8366: Remove regulator_put
+
+2013-09-26  Rhyland Klein  <rklein at nvidia.com>
+
+	* spi/tegra114: Correct support for cs_change
+
+2013-09-17  Elie De Brauwer  <eliedebrauwer at gmail.com>
+
+	* mtd: m25p80: Fix 4 byte addressing mode for Micron devices.
+
+2013-09-16  Brian Norris  <computersforpeace at gmail.com>
+
+	* mtd: nand: fix memory leak in ONFI extended parameter page
+
+2013-09-27  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* ASoC: wm8993: drop regulator_bulk_free of devm_ allocated data
+
+2013-09-26  Larry Finger  <Larry.Finger at lwfinger.net>
+
+	* staging: r8188eu: Add new device ID
+
+2013-09-26  David Cohen  <david.a.cohen at linux.intel.com>
+
+	* usb: dwc3: add support for Merrifield
+
+2013-09-02  Shengzhou Liu  <Shengzhou.Liu at freescale.com>
+
+	* USB: fsl/ehci: fix failure of checking PHY_CLK_VALID during reinitialization
+
+2013-09-20  Al Viro  <viro at ZenIV.linux.org.uk>
+
+	* USB: Fix breakage in ffs_fs_mount()
+
+2013-09-24  Benson Leung  <bleung at chromium.org>
+
+	* driver core : Fix use after free of dev->parent in device_shutdown
+
+2013-09-23  Eric W. Biederman  <ebiederm at xmission.com>
+
+	* sysfs: Allow mounting without CONFIG_NET
+
+2013-09-04  K. Y. Srinivasan  <kys at microsoft.com>
+
+	* Drivers: hv: vmbus: Terminate vmbus version negotiation on timeout
+
+2013-09-06  K. Y. Srinivasan  <kys at microsoft.com>
+
+	* Drivers: hv: util: Correctly support ws2008R2 and earlier
+
+2013-09-26  Paul Moore  <pmoore at redhat.com>
+
+	* selinux: correct locking in selinux_netlbl_socket_connect)
+
+2013-09-26  Duan Jiong  <duanj.fnst at cn.fujitsu.com>
+
+	* selinux: Use kmemdup instead of kmalloc + memcpy
+
+2013-08-31  Gabor Juhos  <juhosg at openwrt.org>
+
+	* tty: ar933x_uart: move devicetree binding documentation
+
+2013-09-16  Ramneek Mehresh  <ramneek.mehresh at freescale.com>
+
+	* fsl/usb: Resolve PHY_CLK_VLD instability issue for ULPI phy
+
+2013-09-25  Peter Hurley  <peter at hurleysoftware.com>
+
+	* tty: Fix SIGTTOU not sent with tcflush()
+
+2013-09-24  Kurt Garloff  <kurt at garloff.de>
+
+	* usb/core/devio.c: Don't reject control message to endpoint with wrong direction bit
+
+2013-09-17  Geert Uytterhoeven  <geert at linux-m68k.org>
+
+	* usb: chipidea: USB_CHIPIDEA should depend on HAS_DMA
+
+2013-09-25  Steve French  <smfrench at gmail.com>
+
+	* [CIFS] update cifs.ko version
+
+2013-09-25  Steve French  <smfrench at gmail.com>
+
+	* [CIFS] Remove ext2 flags that have been moved to fs.h
+
+2013-09-17  Fabio Estevam  <fabio.estevam at freescale.com>
+
+	* staging: imx-drm: Fix probe failure
+
+2013-09-23  Malcolm Priestley  <tvboxspy at gmail.com>
+
+	* staging: vt6656: [BUG] iwctl_siwencodeext return if device not open
+
+2013-09-22  Malcolm Priestley  <tvboxspy at gmail.com>
+
+	* staging: vt6656: [BUG] main_usb.c oops on device_close move flag earlier.
+
+2013-09-17  Bin Liu  <b-liu at ti.com>
+
+	* usb: musb: gadget: fix otg active status flag
+
+2013-09-24  Srinivas Pandruvada  <srinivas.pandruvada at linux.intel.com>
+
+	* Thermal: x86_pkg_temp: change spin lock
+
+2013-09-24  Paul E. McKenney  <paulmck at linux.vnet.ibm.com>
+
+	* mm: Place preemption point in do_mlockall() loop
+
+2013-09-24  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'akpm' (patches from Andrew Morton)
+
+2013-09-24  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* spi: spi-topcliff-pch: fix a pci_iomap() check
+
+2013-09-23  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.12-rc2
+
+2013-09-23  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'staging-3.12-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
+
+2013-09-23  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'usb-3.12-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
+
+2013-09-23  Johan Hovold  <jhovold at gmail.com>
+
+	* usb: phy: gpio-vbus: fix deferred probe from __init
+
+2013-09-23  Johan Hovold  <jhovold at gmail.com>
+
+	* usb: gadget: pxa25x_udc: fix deferred probe from __init
+
+2013-09-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2013-09-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-3.12/core' of git://git.kernel.dk/linux-block
+
+2013-09-22  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
+
+2013-09-22  Anatol Pomozov  <anatol.pomozov at gmail.com>
+
+	* cfq: explicitly use 64bit divide operation for 64bit arguments
+
+2013-09-21  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Merge tag 'iio-fixes-for-3.12a' of git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio into staging-linus
+
+2013-09-21  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'nfs-for-3.12-3' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
+
+2013-09-21  Jun'ichi Nomura  <j-nomura at ce.jp.nec.com>
+
+	* block: Add nr_bios to block_rq_remap tracepoint
+
+2013-09-20  Josef Bacik  <jbacik at fusionio.com>
+
+	* Btrfs: create the uuid tree on remount rw
+
+2013-09-21  Jim McDonough  <jmcd at samba.org>
+
+	* [CIFS] Provide sane values for nlink
+
+2013-09-17  Mark Fasheh  <mfasheh at suse.de>
+
+	* btrfs: change extent-same to copy entire argument struct
+
+2013-09-16  Guangyu Sun  <guangyu.sun at oracle.com>
+
+	* Btrfs: dir_inode_operations should use btrfs_update_time also
+
+2013-09-13  Frank Holton  <fholton at gmail.com>
+
+	* btrfs: Add btrfs: prefix to kernel log output
+
+2013-09-13  David Sterba  <dsterba at suse.cz>
+
+	* btrfs: refuse to remount read-write after abort
+
+2013-09-13  chandan  <chandan at linux.vnet.ibm.com>
+
+	* Btrfs: btrfs_ioctl_default_subvol: Revert back to toplevel subvolume when arg is 0
+
+2013-09-11  Filipe David Borba Manana  <fdmanana at gmail.com>
+
+	* Btrfs: don't leak transaction in btrfs_sync_file()
+
+2013-09-11  Stefan Behrens  <sbehrens at giantdisaster.de>
+
+	* Btrfs: add the missing mutex unlock in write_all_supers()
+
+2013-09-18  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* iio:buffer_cb: Add missing iio_buffer_init()
+
+2013-09-18  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* iio: Prevent race between IIO chardev opening and IIO device free
+
+2013-09-18  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* iio: fix: Keep a reference to the IIO device for open file descriptors
+
+2013-09-18  Lars-Peter Clausen  <lars at metafoo.de>
+
+	* iio: Stop sampling when the device is removed
+
+2013-09-18  Peter Meerwald  <pmeerw at pmeerw.net>
+
+	* iio: Fix crash when scan_bytes is computed with active_scan_mask == NULL
+
+2013-09-18  Peter Meerwald  <pmeerw at pmeerw.net>
+
+	* iio: Fix mcp4725 dev-to-indio_dev conversion in suspend/resume
+
+2013-09-18  Peter Meerwald  <pmeerw at pmeerw.net>
+
+	* iio: Fix bma180 dev-to-indio_dev conversion in suspend/resume
+
+2013-09-18  Peter Meerwald  <pmeerw at pmeerw.net>
+
+	* iio: Fix tmp006 dev-to-indio_dev conversion in suspend/resume
+
+2013-09-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'pm+acpi-3.12-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2013-09-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
+
+2013-09-20  David Howells  <dhowells at redhat.com>
+
+	* CacheFiles: Don't try to dump the index key if the cookie has been cleared
+
+2013-09-20  Josh Boyer  <jwboyer at redhat.com>
+
+	* CacheFiles: Fix memory leak in cachefiles_check_auxdata error paths
+
+2013-09-19  Will Deacon  <will.deacon at arm.com>
+
+	* lockref: use cmpxchg64 explicitly for lockless updates
+
+2013-09-20  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'pm-cpufreq'
+
+2013-09-20  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* Merge branch 'acpi-pci'
+
+2013-09-20  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'arm64-stable' of git://git.kernel.org/pub/scm/linux/kernel/git/cmarinas/linux-aarch64
+
+2013-09-18  Steve Capper  <Steve.Capper at arm.com>
+
+	* arm64: Widen hwcap to be 64 bit
+
+2013-09-19  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2013-09-20  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'msm-fixes-3.12' of git://people.freedesktop.org/~robclark/linux into drm-fixes
+
+2013-09-20  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'exynos-drm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/daeinki/drm-exynos into drm-fixes
+
+2013-09-20  Dave Airlie  <airlied at redhat.com>
+
+	* Merge tag 'drm-intel-fixes-2013-09-19' of git://people.freedesktop.org/~danvet/drm-intel into drm-fixes
+
+2013-09-18  Yinghai Lu  <yinghai at kernel.org>
+
+	* cpufreq: return EEXIST instead of EBUSY for second registering
+
+2013-09-20  Dave Airlie  <airlied at redhat.com>
+
+	* Revert "drm: mark context support as a legacy subsystem"
+
+2013-09-14  Rafael J. Wysocki  <rafael.j.wysocki at intel.com>
+
+	* PCI / ACPI / PM: Clear pme_poll for devices in D3cold on wakeup
+
+2013-09-13  Daniel Vetter  <daniel.vetter at ffwll.ch>
+
+	* drm/fb-helper: don't sleep for screen unblank when an oops is in progress
+
+2013-09-10  Sudeep KarkadaNagesha  <sudeep.karkadanagesha at arm.com>
+
+	* ARM: shmobile: change dev_id to cpu0 while registering cpu clock
+
+2013-09-10  Sudeep KarkadaNagesha  <sudeep.karkadanagesha at arm.com>
+
+	* ARM: i.MX: change dev_id to cpu0 while registering cpu clock
+
+2013-09-10  Sudeep KarkadaNagesha  <sudeep.karkadanagesha at arm.com>
+
+	* cpufreq: imx6q-cpufreq: assign cpu_dev correctly to cpu0 device
+
+2013-09-10  Sudeep KarkadaNagesha  <sudeep.karkadanagesha at arm.com>
+
+	* cpufreq: cpufreq-cpu0: assign cpu_dev correctly to cpu0 device
+
+2013-09-13  Prarit Bhargava  <prarit at redhat.com>
+
+	* drm, ttm Fix uninitialized warning
+
+2013-09-17  Ben Skeggs  <bskeggs at redhat.com>
+
+	* drm/ttm: fix the tt_populated check in ttm_tt_destroy()
+
+2013-09-19  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-nouveau-next' of git://anongit.freedesktop.org/git/nouveau/linux-2.6 into drm-fixes
+
+2013-09-16  Andrey Moiseev  <o2g.org.ru at gmail.com>
+
+	* Input: i8042 - i8042_flush fix for a full 8042 buffer
+
+2013-09-18  Lukasz Czerwinski  <l.czerwinski at samsung.com>
+
+	* iio: iio_device_add_event_sysfs() bugfix
+
+2013-09-11  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* staging: iio: ade7854-spi: Fix return value
+
+2013-09-03  Peter Meerwald  <pmeerw at pmeerw.net>
+
+	* staging:iio:hmc5843: Fix measurement conversion
+
+2013-09-18  Paul Moore  <pmoore at redhat.com>
+
+	* selinux: add Paul Moore as a SELinux maintainer
+
+2013-09-18  Paul Moore  <pmoore at redhat.com>
+
+	* Merge git://git.infradead.org/users/eparis/selinux
+
+2013-09-16  Jeff Layton  <jlayton at redhat.com>
+
+	* cifs: stop trying to use virtual circuits
+
+2013-09-18  J. Bruce Fields  <bfields at redhat.com>
+
+	* RPCSEC_GSS: fix crash on destroying gss auth
+
+2013-09-04  David Howells  <dhowells at redhat.com>
+
+	* CIFS: FS-Cache: Uncache unread pages in cifs_readpages() before freeing them
+
+2013-09-18  Mike Dunn  <mikedunn at newsguy.com>
+
+	* Input: pxa27x_keypad - fix NULL pointer dereference
+
+2013-09-18  Mike Christie  <michaelc at cs.wisc.edu>
+
+	* If the queue is dying then we only call the rq->end_io callout. This leaves bios setup on the request, because the caller assumes when the blk_execute_rq_nowait/blk_execute_rq call has completed that the rq->bios have been cleaned up.
+
+2013-09-17  Ville Syrjälä  <ville.syrjala at linux.intel.com>
+
+	* drm/i915: Don't enable the cursor on a disable pipe
+
+2013-09-17  Jani Nikula  <jani.nikula at intel.com>
+
+	* drm/i915: do not update cursor in crtc mode set
+
+2013-09-17  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
+
+2013-09-15  Oleg Nesterov  <oleg at redhat.com>
+
+	* tty: disassociate_ctty() sends the extra SIGCONT
+
+2013-09-17  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Merge tag 'fixes-for-v3.12-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb into usb-linus
+
+2013-09-17  Michael S. Tsirkin  <mst at redhat.com>
+
+	* vhost-scsi: whitespace tweak
+
+2013-09-17  Michael S. Tsirkin  <mst at redhat.com>
+
+	* vhost/scsi: use vmalloc for order-10 allocation
+
+2013-05-29  Bjorn Helgaas  <bhelgaas at google.com>
+
+	* bio-integrity: Fix use of bs->bio_integrity_pool after free
+
+2013-09-17  Peter Hurley  <peter at hurleysoftware.com>
+
+	* n_tty: Fix EOF push index when termios changes
+
+2013-09-10  Johan Hovold  <jhovold at gmail.com>
+
+	* serial: pch_uart: remove unnecessary tty_port_tty_get
+
+2013-09-10  Johan Hovold  <jhovold at gmail.com>
+
+	* serial: pch_uart: fix tty-kref leak in dma-rx path
+
+2013-09-14  Frank Schäfer  <fschaefer.oss at googlemail.com>
+
+	* USB: pl2303: distinguish between original and cloned HX chips
+
+2013-09-03  Dave Jones  <davej at redhat.com>
+
+	* USB: Faraday fotg210: fix email addresses
+
+2013-09-03  Dave Jones  <davej at redhat.com>
+
+	* USB: fix typo in usb serial simple driver Kconfig
+
+2013-09-12  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Revert "USB: EHCI: support running URB giveback in tasklet context"
+
+2013-09-12  Chanho Park  <chanho61.park at samsung.com>
+
+	* usb: s3c-hsotg: do not disconnect gadget when receiving ErlySusp intr
+
+2013-09-12  Marek Szyprowski  <m.szyprowski at samsung.com>
+
+	* usb: s3c-hsotg: fix unregistration function
+
+2013-09-16  Peter Oh  <poh at broadcom.com>
+
+	* usb: gadget: f_mass_storage: reset endpoint driver data when disabled
+
+2013-09-16  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* usb: host: fsl-mph-dr-of: Staticize local symbols
+
+2013-09-16  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* usb: gadget: f_eem: Staticize eem_alloc
+
+2013-09-16  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* usb: gadget: f_ecm: Staticize ecm_alloc
+
+2013-09-16  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* usb: phy: omap-usb3: Fix return value
+
+2013-09-11  David Cohen  <david.a.cohen at linux.intel.com>
+
+	* usb: dwc3: gadget: avoid memory leak when failing to allocate all eps
+
+2013-09-10  Heikki Krogerus  <heikki.krogerus at linux.intel.com>
+
+	* usb: dwc3: remove extcon dependency
+
+2013-09-02  Chen Gang  <gang.chen at asianux.com>
+
+	* usb: gadget: add '__ref' for rndis_config_register() and cdc_config_register()
+
+2013-09-13  Dan Carpenter  <dan.carpenter at oracle.com>
+
+	* staging: line6: add bounds check in snd_toneport_source_put()
+
+2013-09-01  Ben Hutchings  <ben at decadent.org.uk>
+
+	* Staging: comedi: Fix dependencies for drivers misclassified as PCI
+
+2013-09-04  Larry Finger  <Larry.Finger at lwfinger.net>
+
+	* staging: r8188eu: Adjust RX gain
+
+2013-08-31  Larry Finger  <Larry.Finger at lwfinger.net>
+
+	* staging: r8188eu: Fix smatch warning in core/rtw_ieee80211.
+
+2013-08-31  Larry Finger  <Larry.Finger at lwfinger.net>
+
+	* staging: r8188eu: Fix smatch error in core/rtw_mlme_ext.c
+
+2013-08-31  Larry Finger  <Larry.Finger at lwfinger.net>
+
+	* staging: r8188eu: Fix Smatch off-by-one warning in hal/rtl8188e_hal_init.c
+
+2013-09-08  Guenter Roeck  <linux at roeck-us.net>
+
+	* staging: Disable lustre file system for MIPS, SH, and XTENSA
+
+2013-09-12  Greg Kroah-Hartman  <gregkh at linuxfoundation.org>
+
+	* Revert "staging: zram: Add auto loading of module if user opens /dev/zram."
+
+2013-09-05  Aaro Koskinen  <aaro.koskinen at iki.fi>
+
+	* staging: octeon-ethernet: rgmii: enable interrupts that we can handle
+
+2013-09-05  Aaro Koskinen  <aaro.koskinen at iki.fi>
+
+	* staging: octeon-ethernet: remove skb alloc failure warnings
+
+2013-09-05  Aaro Koskinen  <aaro.koskinen at iki.fi>
+
+	* staging: octeon-ethernet: make dropped packets to consume NAPI budget
+
+2013-09-17  Ben Skeggs  <bskeggs at redhat.com>
+
+	* drm/nouveau/ttm: prevent double-free in nouveau_sgdma_create_ttm() failure path
+
+2013-09-17  Ben Skeggs  <bskeggs at redhat.com>
+
+	* drm/nouveau/bios/init: fix thinko in INIT_CONFIGURE_MEM
+
+2013-06-07  Qin Chuanyu  <qinchuanyu at huawei.com>
+
+	* vhost: wake up worker outside spin_lock
+
+2013-09-04  Josh Boyer  <jwboyer at redhat.com>
+
+	* dma/Kconfig: Make TI_EDMA select TI_PRIV_EDMA
+
+2013-09-04  Josh Boyer  <jwboyer at redhat.com>
+
+	* edma: Update author email address
+
+2013-08-31  Mark Brown  <broonie at linaro.org>
+
+	* ASoC: wm8400: Use regmap for I/O
+
+2013-08-31  Mark Brown  <broonie at linaro.org>
+
+	* ASoC: wm8400: Use supplies to manage input power
+
+2013-08-30  Axel Lin  <axel.lin at ingics.com>
+
+	* spi: tegra: Use DIV_ROUND_UP instead of open coded
+
+2013-09-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.12-rc1
+
+2013-09-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'timers/core' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-09-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for-next' of git://git.samba.org/sfrench/cifs-2.6
+
+2013-09-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'upstream-3.12-rc1' of git://git.infradead.org/linux-ubi
+
+2013-09-16  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'upstream-3.12-rc1' of git://git.infradead.org/linux-ubifs
+
+2013-09-11  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* drm/exynos: fix return value check in lowlevel_buffer_allocate()
+
+2013-09-05  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* drm/exynos: Fix address space warnings in exynos_drm_fbdev.c
+
+2013-09-05  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* drm/exynos: Fix address space warning in exynos_drm_buf.c
+
+2013-09-05  Sachin Kamat  <sachin.kamat at linaro.org>
+
+	* drm/exynos: Remove redundant OF dependency
+
+2013-09-14  Rob Clark  <robdclark at gmail.com>
+
+	* drm/msm: drop unnecessary set_need_resched()
+
+2013-09-16  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-fixes-3.12' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
+
+2013-09-15  Christian König  <christian.koenig at amd.com>
+
+	* drm/radeon: avoid UVD corruptions on AGP cards
+
+2013-09-12  Daniel Vetter  <daniel.vetter at ffwll.ch>
+
+	* drm/udl: rip out set_need_resched
+
+2013-09-16  Dave Airlie  <airlied at redhat.com>
+
+	* Merge branch 'drm-fixes-3.12' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
+
+2013-09-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'upstream' of git://git.linux-mips.org/pub/scm/ralf/upstream-linus
+
+2013-09-15  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge branch 'for_linus' of git://cavan.codon.org.uk/platform-drivers-x86
+
+2013-09-10  Björn Jacke  <bj at sernet.de>
+
+	* cifs: update cifs.txt and remove some outdated infos
+
+2013-09-13  Sachin Prabhu  <sprabhu at redhat.com>
+
+	* cifs: Avoid calling unlock_page() twice in cifs_readpage() when using fscache
+
+2013-09-13  Sachin Prabhu  <sprabhu at redhat.com>
+
+	* cifs: Do not take a reference to the page in cifs_readpage_worker()
+
+2013-09-13  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging
+
+2013-09-11  Benjamin Tissoires  <benjamin.tissoires at redhat.com>
+
+	* HID: lenovo-tpkbd: fix leak if tpkbd_probe_tp fails
+
+2013-09-11  Markos Chandras  <markos.chandras at imgtec.com>
+
+	* MIPS: kernel: vpe: Make vpe_attrs an array of pointers.
+
+2013-09-12  Daniel Vetter  <daniel.vetter at ffwll.ch>
+
+	* drm/i915: kill set_need_resched
+
+2013-09-11  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* drm/msm: fix potential NULL pointer dereference
+
+2013-09-12  Dave Airlie  <airlied at redhat.com>
+
+	* drm/ast: fix the ast open key function
+
+2013-09-11  Daniel Vetter  <daniel.vetter at ffwll.ch>
+
+	* drm/i915/dvo: set crtc timings again for panel fixed modes
+
+2013-09-11  Daniel Vetter  <daniel.vetter at ffwll.ch>
+
+	* drm/i915/sdvo: Robustify the dtd<->drm_mode conversions
+
+2013-09-11  Rob Clark  <robdclark at gmail.com>
+
+	* drm/msm: workaround for missing irq
+
+2013-09-11  Rob Clark  <robdclark at gmail.com>
+
+	* drm/msm: return -EBUSY if bo still active
+
+2013-09-11  Wei Yongjun  <yongjun_wei at trendmicro.com.cn>
+
+	* drm/msm: fix return value check in ERR_PTR()
+
+2013-08-28  Tejun Heo  <tj at kernel.org>
+
+	* blkcg: relocate root_blkg setting and clearing
+
+2013-08-29  Joe Perches  <joe at perches.com>
+
+	* block: Convert kmalloc_node(...GFP_ZERO...) to kzalloc_node(...)
+
+2013-09-11  Jianpeng Ma  <majianpeng at gmail.com>
+
+	* block: trace all devices plug operation
+
+2013-09-09  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon/dpm: add bapm callback for kb/kv
+
+2013-09-09  Alex Deucher  <alexander.deucher at amd.com>
+
+	* drm/radeon/dpm: add bapm callback for trinity
+
+2013-08-16  Andi Kleen  <ak at linux.intel.com>
+
+	* x86: Add 1/2/4/8 byte optimization to 64bit __copy_{from,to}_user_inatomic
+
+2013-09-06  Rob Clark  <robdclark at gmail.com>
+
+	* drm/msm: fix cmdstream size check
+
+2013-09-03  Rob Clark  <robdclark at gmail.com>
+
+	* drm/msm: hangcheck harder
+
+2013-09-01  Rob Clark  <robdclark at gmail.com>
+
+	* drm/msm: handle read vs write fences
+
+2013-09-10  Daniel Vetter  <daniel.vetter at ffwll.ch>
+
+	* drm/i915/sdvo: Fully translate sync flags in the dtd->mode conversion
+
+2013-09-10  Takashi Iwai  <tiwai at suse.de>
+
+	* drm/i915: Use proper print format for debug prints
+
+2013-09-09  Dave Airlie  <airlied at gmail.com>
+
+	* drm/nouveau: fix oops on runtime suspend/resume
+
+2013-09-07  David Herrmann  <dh.herrmann at gmail.com>
+
+	* Input: evdev - add EVIOCREVOKE ioctl
+
+2013-09-06  Dmitry Torokhov  <dmitry.torokhov at gmail.com>
+
+	* Merge branch 'next' into for-linus
+
+2013-09-05  Dave Airlie  <airlied at gmail.com>
+
+	* Merge branch 'exynos-drm-next' of git://git.kernel.org/pub/scm/linux/kernel/git/daeinki/drm-exynos into drm-next
+
+2013-09-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.11
+
+2013-08-20  Ezequiel Garcia  <ezequiel.garcia at free-electrons.com>
+
+	* clocksource: armada-370-xp: Add detailed clock requirements in devicetree binding
+
+2013-08-20  Ezequiel Garcia  <ezequiel.garcia at free-electrons.com>
+
+	* clocksource: armada-370-xp: Get reference fixed-clock by name
+
+2013-08-20  Ezequiel Garcia  <ezequiel.garcia at free-electrons.com>
+
+	* clocksource: armada-370-xp: Replace WARN_ON with BUG_ON
+
+2013-08-13  Ezequiel Garcia  <ezequiel.garcia at free-electrons.com>
+
+	* clocksource: armada-370-xp: Fix device-tree binding
+
+2013-08-13  Ezequiel Garcia  <ezequiel.garcia at free-electrons.com>
+
+	* clocksource: armada-370-xp: Introduce new compatibles
+
+2013-09-02  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
+
+2013-08-29  David Herrmann  <dh.herrmann at gmail.com>
+
+	* Input: add SYN_MAX and SYN_CNT constants
+
+2013-08-29  Fabio Estevam  <fabio.estevam at freescale.com>
+
+	* Input: max11801_ts - convert to devm
+
+2013-08-29  Fabio Estevam  <fabio.estevam at freescale.com>
+
+	* Input: egalax-ts - fix typo and improve text
+
+2013-08-29  Mischa Jonker  <Mischa.Jonker at synopsys.com>
+
+	* Input: i8042 - disable the driver on ARC platforms
+
+2013-08-28  Eric Paris  <eparis at redhat.com>
+
+	* Revert "SELinux: do not handle seclabel as a special flag"
+
+2013-04-16  Anand Avati  <avati at redhat.com>
+
+	* selinux: consider filesystem subtype in policies
+
+2013-08-26  Mag  <magissia at magissia.com>
+
+	* Input: xpad - add signature for Razer Onza Classic Edition
+
+2013-08-15  Matteo Delfino  <kendatsuba at gmail.com>
+
+	* Input: elantech - fix packet check for v3 and v4 hardware
+
+2013-08-19  Richard Weinberger  <richard at nod.at>
+
+	* UBI: Fix invalidate_fastmap()
+
+2013-08-19  Richard Weinberger  <richard at nod.at>
+
+	* UBI: Fix PEB leak in wear_leveling_worker()
+
+2013-08-18  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.11-rc6
+
+2013-08-14  Mats Kärrman  <Mats.Karrman at tritech.se>
+
+	* UBIFS: remove invalid warn msg with tst_recovery enabled
+
+2013-08-11  Linus Torvalds  <torvalds at linux-foundation.org>
+
+	* Linux 3.11-rc5
diff --git a/ceph/Kconfig b/ceph/Kconfig
new file mode 100644
index 0000000..264e9bf
--- /dev/null
+++ b/ceph/Kconfig
@@ -0,0 +1,40 @@
+config CEPH_FS
+	tristate "Ceph distributed file system"
+	depends on INET
+	select CEPH_LIB
+	select LIBCRC32C
+	select CRYPTO_AES
+	select CRYPTO
+	default n
+	help
+	  Choose Y or M here to include support for mounting the
+	  experimental Ceph distributed file system.  Ceph is an extremely
+	  scalable file system designed to provide high performance,
+	  reliable access to petabytes of storage.
+
+	  More information at http://ceph.newdream.net/.
+
+	  If unsure, say N.
+
+if CEPH_FS
+config CEPH_FSCACHE
+	bool "Enable Ceph client caching support"
+	depends on CEPH_FS=m && FSCACHE || CEPH_FS=y && FSCACHE=y
+	help
+	  Choose Y here to enable persistent, read-only local
+	  caching support for Ceph clients using FS-Cache
+
+endif
+
+config CEPH_FS_POSIX_ACL
+	bool "Ceph POSIX Access Control Lists"
+	depends on CEPH_FS
+	select FS_POSIX_ACL
+	help
+	  POSIX Access Control Lists (ACLs) support permissions for users and
+	  groups beyond the owner/group/world scheme.
+
+	  To learn more about Access Control Lists, visit the POSIX ACLs for
+	  Linux website <http://acl.bestbits.at/>.
+
+	  If you don't know what Access Control Lists are, say N
diff --git a/ceph/Makefile b/ceph/Makefile
new file mode 100644
index 0000000..85a4230
--- /dev/null
+++ b/ceph/Makefile
@@ -0,0 +1,13 @@
+#
+# Makefile for CEPH filesystem.
+#
+
+obj-$(CONFIG_CEPH_FS) += ceph.o
+
+ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
+	export.o caps.o snap.o xattr.o \
+	mds_client.o mdsmap.o strings.o ceph_frag.o \
+	debugfs.o
+
+ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
+ceph-$(CONFIG_CEPH_FS_POSIX_ACL) += acl.o
diff --git a/ceph/acl.c b/ceph/acl.c
new file mode 100644
index 0000000..21887d6
--- /dev/null
+++ b/ceph/acl.c
@@ -0,0 +1,200 @@
+/*
+ * linux/fs/ceph/acl.c
+ *
+ * Copyright (C) 2013 Guangliang Zhao, <lucienchao at gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/ceph/ceph_debug.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/posix_acl.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include "super.h"
+
+static inline void ceph_set_cached_acl(struct inode *inode,
+					int type, struct posix_acl *acl)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+
+	spin_lock(&ci->i_ceph_lock);
+	if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0))
+		set_cached_acl(inode, type, acl);
+	spin_unlock(&ci->i_ceph_lock);
+}
+
+static inline struct posix_acl *ceph_get_cached_acl(struct inode *inode,
+							int type)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct posix_acl *acl = ACL_NOT_CACHED;
+
+	spin_lock(&ci->i_ceph_lock);
+	if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0))
+		acl = get_cached_acl(inode, type);
+	spin_unlock(&ci->i_ceph_lock);
+
+	return acl;
+}
+
+struct posix_acl *ceph_get_acl(struct inode *inode, int type)
+{
+	int size;
+	const char *name;
+	char *value = NULL;
+	struct posix_acl *acl;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name = POSIX_ACL_XATTR_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		name = POSIX_ACL_XATTR_DEFAULT;
+		break;
+	default:
+		BUG();
+	}
+
+	size = __ceph_getxattr(inode, name, "", 0);
+	if (size > 0) {
+		value = kzalloc(size, GFP_NOFS);
+		if (!value)
+			return ERR_PTR(-ENOMEM);
+		size = __ceph_getxattr(inode, name, value, size);
+	}
+
+	if (size > 0)
+		acl = posix_acl_from_xattr(&init_user_ns, value, size);
+	else if (size == -ERANGE || size == -ENODATA || size == 0)
+		acl = NULL;
+	else
+		acl = ERR_PTR(-EIO);
+
+	kfree(value);
+
+	if (!IS_ERR(acl))
+		ceph_set_cached_acl(inode, type, acl);
+
+	return acl;
+}
+
+int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+	int ret = 0, size = 0;
+	const char *name = NULL;
+	char *value = NULL;
+	struct iattr newattrs;
+	umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;
+	struct dentry *dentry;
+
+	if (acl) {
+		ret = posix_acl_valid(acl);
+		if (ret < 0)
+			goto out;
+	}
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name = POSIX_ACL_XATTR_ACCESS;
+		if (acl) {
+			ret = posix_acl_equiv_mode(acl, &new_mode);
+			if (ret < 0)
+				goto out;
+			if (ret == 0)
+				acl = NULL;
+		}
+		break;
+	case ACL_TYPE_DEFAULT:
+		if (!S_ISDIR(inode->i_mode)) {
+			ret = acl ? -EINVAL : 0;
+			goto out;
+		}
+		name = POSIX_ACL_XATTR_DEFAULT;
+		break;
+	default:
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (acl) {
+		size = posix_acl_xattr_size(acl->a_count);
+		value = kmalloc(size, GFP_NOFS);
+		if (!value) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
+		if (ret < 0)
+			goto out_free;
+	}
+
+	dentry = d_find_alias(inode);
+	if (new_mode != old_mode) {
+		newattrs.ia_mode = new_mode;
+		newattrs.ia_valid = ATTR_MODE;
+		ret = ceph_setattr(dentry, &newattrs);
+		if (ret)
+			goto out_dput;
+	}
+
+	ret = __ceph_setxattr(dentry, name, value, size, 0);
+	if (ret) {
+		if (new_mode != old_mode) {
+			newattrs.ia_mode = old_mode;
+			newattrs.ia_valid = ATTR_MODE;
+			ceph_setattr(dentry, &newattrs);
+		}
+		goto out_dput;
+	}
+
+	ceph_set_cached_acl(inode, type, acl);
+
+out_dput:
+	dput(dentry);
+out_free:
+	kfree(value);
+out:
+	return ret;
+}
+
+int ceph_init_acl(struct dentry *dentry, struct inode *inode, struct inode *dir)
+{
+	struct posix_acl *default_acl, *acl;
+	int error;
+
+	error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
+	if (error)
+		return error;
+
+	if (!default_acl && !acl)
+		cache_no_acl(inode);
+
+	if (default_acl) {
+		error = ceph_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
+		posix_acl_release(default_acl);
+	}
+	if (acl) {
+		if (!error)
+			error = ceph_set_acl(inode, acl, ACL_TYPE_ACCESS);
+		posix_acl_release(acl);
+	}
+	return error;
+}
diff --git a/ceph/addr.c b/ceph/addr.c
new file mode 100644
index 0000000..b53278c
--- /dev/null
+++ b/ceph/addr.c
@@ -0,0 +1,1345 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/backing-dev.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/writeback.h>	/* generic_writepages */
+#include <linux/slab.h>
+#include <linux/pagevec.h>
+#include <linux/task_io_accounting_ops.h>
+
+#include "super.h"
+#include "mds_client.h"
+#include "cache.h"
+#include <linux/ceph/osd_client.h>
+
+/*
+ * Ceph address space ops.
+ *
+ * There are a few funny things going on here.
+ *
+ * The page->private field is used to reference a struct
+ * ceph_snap_context for _every_ dirty page.  This indicates which
+ * snapshot the page was logically dirtied in, and thus which snap
+ * context needs to be associated with the osd write during writeback.
+ *
+ * Similarly, struct ceph_inode_info maintains a set of counters to
+ * count dirty pages on the inode.  In the absence of snapshots,
+ * i_wrbuffer_ref == i_wrbuffer_ref_head == the dirty page count.
+ *
+ * When a snapshot is taken (that is, when the client receives
+ * notification that a snapshot was taken), each inode with caps and
+ * with dirty pages (dirty pages implies there is a cap) gets a new
+ * ceph_cap_snap in the i_cap_snaps list (which is sorted in ascending
+ * order, new snaps go to the tail).  The i_wrbuffer_ref_head count is
+ * moved to capsnap->dirty. (Unless a sync write is currently in
+ * progress.  In that case, the capsnap is said to be "pending", new
+ * writes cannot start, and the capsnap isn't "finalized" until the
+ * write completes (or fails) and a final size/mtime for the inode for
+ * that snap can be settled upon.)  i_wrbuffer_ref_head is reset to 0.
+ *
+ * On writeback, we must submit writes to the osd IN SNAP ORDER.  So,
+ * we look for the first capsnap in i_cap_snaps and write out pages in
+ * that snap context _only_.  Then we move on to the next capsnap,
+ * eventually reaching the "live" or "head" context (i.e., pages that
+ * are not yet snapped) and are writing the most recently dirtied
+ * pages.
+ *
+ * Invalidate and so forth must take care to ensure the dirty page
+ * accounting is preserved.
+ */
+
+#define CONGESTION_ON_THRESH(congestion_kb) (congestion_kb >> (PAGE_SHIFT-10))
+#define CONGESTION_OFF_THRESH(congestion_kb)				\
+	(CONGESTION_ON_THRESH(congestion_kb) -				\
+	 (CONGESTION_ON_THRESH(congestion_kb) >> 2))
+
+static inline struct ceph_snap_context *page_snap_context(struct page *page)
+{
+	if (PagePrivate(page))
+		return (void *)page->private;
+	return NULL;
+}
+
+/*
+ * Dirty a page.  Optimistically adjust accounting, on the assumption
+ * that we won't race with invalidate.  If we do, readjust.
+ */
+static int ceph_set_page_dirty(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	struct inode *inode;
+	struct ceph_inode_info *ci;
+	struct ceph_snap_context *snapc;
+	int ret;
+
+	if (unlikely(!mapping))
+		return !TestSetPageDirty(page);
+
+	if (PageDirty(page)) {
+		dout("%p set_page_dirty %p idx %lu -- already dirty\n",
+		     mapping->host, page, page->index);
+		BUG_ON(!PagePrivate(page));
+		return 0;
+	}
+
+	inode = mapping->host;
+	ci = ceph_inode(inode);
+
+	/*
+	 * Note that we're grabbing a snapc ref here without holding
+	 * any locks!
+	 */
+	snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context);
+
+	/* dirty the head */
+	spin_lock(&ci->i_ceph_lock);
+	if (ci->i_head_snapc == NULL)
+		ci->i_head_snapc = ceph_get_snap_context(snapc);
+	++ci->i_wrbuffer_ref_head;
+	if (ci->i_wrbuffer_ref == 0)
+		ihold(inode);
+	++ci->i_wrbuffer_ref;
+	dout("%p set_page_dirty %p idx %lu head %d/%d -> %d/%d "
+	     "snapc %p seq %lld (%d snaps)\n",
+	     mapping->host, page, page->index,
+	     ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1,
+	     ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
+	     snapc, snapc->seq, snapc->num_snaps);
+	spin_unlock(&ci->i_ceph_lock);
+
+	/*
+	 * Reference snap context in page->private.  Also set
+	 * PagePrivate so that we get invalidatepage callback.
+	 */
+	BUG_ON(PagePrivate(page));
+	page->private = (unsigned long)snapc;
+	SetPagePrivate(page);
+
+	ret = __set_page_dirty_nobuffers(page);
+	WARN_ON(!PageLocked(page));
+	WARN_ON(!page->mapping);
+
+	return ret;
+}
+
+/*
+ * If we are truncating the full page (i.e. offset == 0), adjust the
+ * dirty page counters appropriately.  Only called if there is private
+ * data on the page.
+ */
+static void ceph_invalidatepage(struct page *page, unsigned int offset,
+				unsigned int length)
+{
+	struct inode *inode;
+	struct ceph_inode_info *ci;
+	struct ceph_snap_context *snapc = page_snap_context(page);
+
+	inode = page->mapping->host;
+	ci = ceph_inode(inode);
+
+	if (offset != 0 || length != PAGE_CACHE_SIZE) {
+		dout("%p invalidatepage %p idx %lu partial dirty page %u~%u\n",
+		     inode, page, page->index, offset, length);
+		return;
+	}
+
+	ceph_invalidate_fscache_page(inode, page);
+
+	if (!PagePrivate(page))
+		return;
+
+	/*
+	 * We can get non-dirty pages here due to races between
+	 * set_page_dirty and truncate_complete_page; just spit out a
+	 * warning, in case we end up with accounting problems later.
+	 */
+	if (!PageDirty(page))
+		pr_err("%p invalidatepage %p page not dirty\n", inode, page);
+
+	ClearPageChecked(page);
+
+	dout("%p invalidatepage %p idx %lu full dirty page\n",
+	     inode, page, page->index);
+
+	ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
+	ceph_put_snap_context(snapc);
+	page->private = 0;
+	ClearPagePrivate(page);
+}
+
+static int ceph_releasepage(struct page *page, gfp_t g)
+{
+	struct inode *inode = page->mapping ? page->mapping->host : NULL;
+	dout("%p releasepage %p idx %lu\n", inode, page, page->index);
+	WARN_ON(PageDirty(page));
+
+	/* Can we release the page from the cache? */
+	if (!ceph_release_fscache_page(page, g))
+		return 0;
+
+	return !PagePrivate(page);
+}
+
+/*
+ * read a single page, without unlocking it.
+ */
+static int readpage_nounlock(struct file *filp, struct page *page)
+{
+	struct inode *inode = file_inode(filp);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_osd_client *osdc =
+		&ceph_inode_to_client(inode)->client->osdc;
+	int err = 0;
+	u64 len = PAGE_CACHE_SIZE;
+
+	err = ceph_readpage_from_fscache(inode, page);
+
+	if (err == 0)
+		goto out;
+
+	dout("readpage inode %p file %p page %p index %lu\n",
+	     inode, filp, page, page->index);
+	err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
+				  (u64) page_offset(page), &len,
+				  ci->i_truncate_seq, ci->i_truncate_size,
+				  &page, 1, 0);
+	if (err == -ENOENT)
+		err = 0;
+	if (err < 0) {
+		SetPageError(page);
+		ceph_fscache_readpage_cancel(inode, page);
+		goto out;
+	} else {
+		if (err < PAGE_CACHE_SIZE) {
+		/* zero fill remainder of page */
+			zero_user_segment(page, err, PAGE_CACHE_SIZE);
+		} else {
+			flush_dcache_page(page);
+		}
+	}
+	SetPageUptodate(page);
+
+	if (err >= 0)
+		ceph_readpage_to_fscache(inode, page);
+
+out:
+	return err < 0 ? err : 0;
+}
+
+static int ceph_readpage(struct file *filp, struct page *page)
+{
+	int r = readpage_nounlock(filp, page);
+	unlock_page(page);
+	return r;
+}
+
+/*
+ * Finish an async read(ahead) op.
+ */
+static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
+{
+	struct inode *inode = req->r_inode;
+	struct ceph_osd_data *osd_data;
+	int rc = req->r_result;
+	int bytes = le32_to_cpu(msg->hdr.data_len);
+	int num_pages;
+	int i;
+
+	dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
+
+	/* unlock all pages, zeroing any data we didn't read */
+	osd_data = osd_req_op_extent_osd_data(req, 0);
+	BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
+	num_pages = calc_pages_for((u64)osd_data->alignment,
+					(u64)osd_data->length);
+	for (i = 0; i < num_pages; i++) {
+		struct page *page = osd_data->pages[i];
+
+		if (rc < 0)
+			goto unlock;
+		if (bytes < (int)PAGE_CACHE_SIZE) {
+			/* zero (remainder of) page */
+			int s = bytes < 0 ? 0 : bytes;
+			zero_user_segment(page, s, PAGE_CACHE_SIZE);
+		}
+ 		dout("finish_read %p uptodate %p idx %lu\n", inode, page,
+		     page->index);
+		flush_dcache_page(page);
+		SetPageUptodate(page);
+		ceph_readpage_to_fscache(inode, page);
+unlock:
+		unlock_page(page);
+		page_cache_release(page);
+		bytes -= PAGE_CACHE_SIZE;
+	}
+	kfree(osd_data->pages);
+}
+
+static void ceph_unlock_page_vector(struct page **pages, int num_pages)
+{
+	int i;
+
+	for (i = 0; i < num_pages; i++)
+		unlock_page(pages[i]);
+}
+
+/*
+ * start an async read(ahead) operation.  return nr_pages we submitted
+ * a read for on success, or negative error code.
+ */
+static int start_read(struct inode *inode, struct list_head *page_list, int max)
+{
+	struct ceph_osd_client *osdc =
+		&ceph_inode_to_client(inode)->client->osdc;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct page *page = list_entry(page_list->prev, struct page, lru);
+	struct ceph_vino vino;
+	struct ceph_osd_request *req;
+	u64 off;
+	u64 len;
+	int i;
+	struct page **pages;
+	pgoff_t next_index;
+	int nr_pages = 0;
+	int ret;
+
+	off = (u64) page_offset(page);
+
+	/* count pages */
+	next_index = page->index;
+	list_for_each_entry_reverse(page, page_list, lru) {
+		if (page->index != next_index)
+			break;
+		nr_pages++;
+		next_index++;
+		if (max && nr_pages == max)
+			break;
+	}
+	len = nr_pages << PAGE_CACHE_SHIFT;
+	dout("start_read %p nr_pages %d is %lld~%lld\n", inode, nr_pages,
+	     off, len);
+	vino = ceph_vino(inode);
+	req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len,
+				    1, CEPH_OSD_OP_READ,
+				    CEPH_OSD_FLAG_READ, NULL,
+				    ci->i_truncate_seq, ci->i_truncate_size,
+				    false);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	/* build page vector */
+	nr_pages = calc_pages_for(0, len);
+	pages = kmalloc(sizeof(*pages) * nr_pages, GFP_NOFS);
+	ret = -ENOMEM;
+	if (!pages)
+		goto out;
+	for (i = 0; i < nr_pages; ++i) {
+		page = list_entry(page_list->prev, struct page, lru);
+		BUG_ON(PageLocked(page));
+		list_del(&page->lru);
+
+ 		dout("start_read %p adding %p idx %lu\n", inode, page,
+		     page->index);
+		if (add_to_page_cache_lru(page, &inode->i_data, page->index,
+					  GFP_NOFS)) {
+			ceph_fscache_uncache_page(inode, page);
+			page_cache_release(page);
+			dout("start_read %p add_to_page_cache failed %p\n",
+			     inode, page);
+			nr_pages = i;
+			goto out_pages;
+		}
+		pages[i] = page;
+	}
+	osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, false);
+	req->r_callback = finish_read;
+	req->r_inode = inode;
+
+	ceph_osdc_build_request(req, off, NULL, vino.snap, NULL);
+
+	dout("start_read %p starting %p %lld~%lld\n", inode, req, off, len);
+	ret = ceph_osdc_start_request(osdc, req, false);
+	if (ret < 0)
+		goto out_pages;
+	ceph_osdc_put_request(req);
+	return nr_pages;
+
+out_pages:
+	ceph_unlock_page_vector(pages, nr_pages);
+	ceph_release_page_vector(pages, nr_pages);
+out:
+	ceph_osdc_put_request(req);
+	return ret;
+}
+
+
+/*
+ * Read multiple pages.  Leave pages we don't read + unlock in page_list;
+ * the caller (VM) cleans them up.
+ */
+static int ceph_readpages(struct file *file, struct address_space *mapping,
+			  struct list_head *page_list, unsigned nr_pages)
+{
+	struct inode *inode = file_inode(file);
+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	int rc = 0;
+	int max = 0;
+
+	rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
+					 &nr_pages);
+
+	if (rc == 0)
+		goto out;
+
+	if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
+		max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
+			>> PAGE_SHIFT;
+
+	dout("readpages %p file %p nr_pages %d max %d\n", inode,
+		file, nr_pages,
+	     max);
+	while (!list_empty(page_list)) {
+		rc = start_read(inode, page_list, max);
+		if (rc < 0)
+			goto out;
+		BUG_ON(rc == 0);
+	}
+out:
+	ceph_fscache_readpages_cancel(inode, page_list);
+
+	dout("readpages %p file %p ret %d\n", inode, file, rc);
+	return rc;
+}
+
+/*
+ * Get ref for the oldest snapc for an inode with dirty data... that is, the
+ * only snap context we are allowed to write back.
+ */
+static struct ceph_snap_context *get_oldest_context(struct inode *inode,
+						    u64 *snap_size)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_snap_context *snapc = NULL;
+	struct ceph_cap_snap *capsnap = NULL;
+
+	spin_lock(&ci->i_ceph_lock);
+	list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
+		dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
+		     capsnap->context, capsnap->dirty_pages);
+		if (capsnap->dirty_pages) {
+			snapc = ceph_get_snap_context(capsnap->context);
+			if (snap_size)
+				*snap_size = capsnap->size;
+			break;
+		}
+	}
+	if (!snapc && ci->i_wrbuffer_ref_head) {
+		snapc = ceph_get_snap_context(ci->i_head_snapc);
+		dout(" head snapc %p has %d dirty pages\n",
+		     snapc, ci->i_wrbuffer_ref_head);
+	}
+	spin_unlock(&ci->i_ceph_lock);
+	return snapc;
+}
+
+/*
+ * Write a single page, but leave the page locked.
+ *
+ * If we get a write error, set the page error bit, but still adjust the
+ * dirty page accounting (i.e., page is no longer dirty).
+ */
+static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
+{
+	struct inode *inode;
+	struct ceph_inode_info *ci;
+	struct ceph_fs_client *fsc;
+	struct ceph_osd_client *osdc;
+	struct ceph_snap_context *snapc, *oldest;
+	loff_t page_off = page_offset(page);
+	long writeback_stat;
+	u64 truncate_size, snap_size = 0;
+	u32 truncate_seq;
+	int err = 0, len = PAGE_CACHE_SIZE;
+
+	dout("writepage %p idx %lu\n", page, page->index);
+
+	if (!page->mapping || !page->mapping->host) {
+		dout("writepage %p - no mapping\n", page);
+		return -EFAULT;
+	}
+	inode = page->mapping->host;
+	ci = ceph_inode(inode);
+	fsc = ceph_inode_to_client(inode);
+	osdc = &fsc->client->osdc;
+
+	/* verify this is a writeable snap context */
+	snapc = page_snap_context(page);
+	if (snapc == NULL) {
+		dout("writepage %p page %p not dirty?\n", inode, page);
+		goto out;
+	}
+	oldest = get_oldest_context(inode, &snap_size);
+	if (snapc->seq > oldest->seq) {
+		dout("writepage %p page %p snapc %p not writeable - noop\n",
+		     inode, page, snapc);
+		/* we should only noop if called by kswapd */
+		WARN_ON((current->flags & PF_MEMALLOC) == 0);
+		ceph_put_snap_context(oldest);
+		goto out;
+	}
+	ceph_put_snap_context(oldest);
+
+	spin_lock(&ci->i_ceph_lock);
+	truncate_seq = ci->i_truncate_seq;
+	truncate_size = ci->i_truncate_size;
+	if (!snap_size)
+		snap_size = i_size_read(inode);
+	spin_unlock(&ci->i_ceph_lock);
+
+	/* is this a partial page at end of file? */
+	if (page_off >= snap_size) {
+		dout("%p page eof %llu\n", page, snap_size);
+		goto out;
+	}
+	if (snap_size < page_off + len)
+		len = snap_size - page_off;
+
+	dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
+	     inode, page, page->index, page_off, len, snapc);
+
+	writeback_stat = atomic_long_inc_return(&fsc->writeback_count);
+	if (writeback_stat >
+	    CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
+		set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
+
+	ceph_readpage_to_fscache(inode, page);
+
+	set_page_writeback(page);
+	err = ceph_osdc_writepages(osdc, ceph_vino(inode),
+				   &ci->i_layout, snapc,
+				   page_off, len,
+				   truncate_seq, truncate_size,
+				   &inode->i_mtime, &page, 1);
+	if (err < 0) {
+		dout("writepage setting page/mapping error %d %p\n", err, page);
+		SetPageError(page);
+		mapping_set_error(&inode->i_data, err);
+		if (wbc)
+			wbc->pages_skipped++;
+	} else {
+		dout("writepage cleaned page %p\n", page);
+		err = 0;  /* vfs expects us to return 0 */
+	}
+	page->private = 0;
+	ClearPagePrivate(page);
+	end_page_writeback(page);
+	ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
+	ceph_put_snap_context(snapc);  /* page's reference */
+out:
+	return err;
+}
+
+static int ceph_writepage(struct page *page, struct writeback_control *wbc)
+{
+	int err;
+	struct inode *inode = page->mapping->host;
+	BUG_ON(!inode);
+	ihold(inode);
+	err = writepage_nounlock(page, wbc);
+	unlock_page(page);
+	iput(inode);
+	return err;
+}
+
+
+/*
+ * lame release_pages helper.  release_pages() isn't exported to
+ * modules.
+ */
+static void ceph_release_pages(struct page **pages, int num)
+{
+	struct pagevec pvec;
+	int i;
+
+	pagevec_init(&pvec, 0);
+	for (i = 0; i < num; i++) {
+		if (pagevec_add(&pvec, pages[i]) == 0)
+			pagevec_release(&pvec);
+	}
+	pagevec_release(&pvec);
+}
+
+/*
+ * async writeback completion handler.
+ *
+ * If we get an error, set the mapping error bit, but not the individual
+ * page error bits.
+ */
+static void writepages_finish(struct ceph_osd_request *req,
+			      struct ceph_msg *msg)
+{
+	struct inode *inode = req->r_inode;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_osd_data *osd_data;
+	unsigned wrote;
+	struct page *page;
+	int num_pages;
+	int i;
+	struct ceph_snap_context *snapc = req->r_snapc;
+	struct address_space *mapping = inode->i_mapping;
+	int rc = req->r_result;
+	u64 bytes = req->r_ops[0].extent.length;
+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	long writeback_stat;
+	unsigned issued = ceph_caps_issued(ci);
+
+	osd_data = osd_req_op_extent_osd_data(req, 0);
+	BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
+	num_pages = calc_pages_for((u64)osd_data->alignment,
+					(u64)osd_data->length);
+	if (rc >= 0) {
+		/*
+		 * Assume we wrote the pages we originally sent.  The
+		 * osd might reply with fewer pages if our writeback
+		 * raced with a truncation and was adjusted at the osd,
+		 * so don't believe the reply.
+		 */
+		wrote = num_pages;
+	} else {
+		wrote = 0;
+		mapping_set_error(mapping, rc);
+	}
+	dout("writepages_finish %p rc %d bytes %llu wrote %d (pages)\n",
+	     inode, rc, bytes, wrote);
+
+	/* clean all pages */
+	for (i = 0; i < num_pages; i++) {
+		page = osd_data->pages[i];
+		BUG_ON(!page);
+		WARN_ON(!PageUptodate(page));
+
+		writeback_stat =
+			atomic_long_dec_return(&fsc->writeback_count);
+		if (writeback_stat <
+		    CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb))
+			clear_bdi_congested(&fsc->backing_dev_info,
+					    BLK_RW_ASYNC);
+
+		ceph_put_snap_context(page_snap_context(page));
+		page->private = 0;
+		ClearPagePrivate(page);
+		dout("unlocking %d %p\n", i, page);
+		end_page_writeback(page);
+
+		/*
+		 * We lost the cache cap, need to truncate the page before
+		 * it is unlocked, otherwise we'd truncate it later in the
+		 * page truncation thread, possibly losing some data that
+		 * raced its way in
+		 */
+		if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0)
+			generic_error_remove_page(inode->i_mapping, page);
+
+		unlock_page(page);
+	}
+	dout("%p wrote+cleaned %d pages\n", inode, wrote);
+	ceph_put_wrbuffer_cap_refs(ci, num_pages, snapc);
+
+	ceph_release_pages(osd_data->pages, num_pages);
+	if (osd_data->pages_from_pool)
+		mempool_free(osd_data->pages,
+			     ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool);
+	else
+		kfree(osd_data->pages);
+	ceph_osdc_put_request(req);
+}
+
+/*
+ * initiate async writeback
+ */
+static int ceph_writepages_start(struct address_space *mapping,
+				 struct writeback_control *wbc)
+{
+	struct inode *inode = mapping->host;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_vino vino = ceph_vino(inode);
+	pgoff_t index, start, end;
+	int range_whole = 0;
+	int should_loop = 1;
+	pgoff_t max_pages = 0, max_pages_ever = 0;
+	struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
+	struct pagevec pvec;
+	int done = 0;
+	int rc = 0;
+	unsigned wsize = 1 << inode->i_blkbits;
+	struct ceph_osd_request *req = NULL;
+	int do_sync;
+	u64 truncate_size, snap_size;
+	u32 truncate_seq;
+
+	/*
+	 * Include a 'sync' in the OSD request if this is a data
+	 * integrity write (e.g., O_SYNC write or fsync()), or if our
+	 * cap is being revoked.
+	 */
+	if ((wbc->sync_mode == WB_SYNC_ALL) ||
+		ceph_caps_revoking(ci, CEPH_CAP_FILE_BUFFER))
+		do_sync = 1;
+	dout("writepages_start %p dosync=%d (mode=%s)\n",
+	     inode, do_sync,
+	     wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
+	     (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
+
+	if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) {
+		pr_warning("writepage_start %p on forced umount\n", inode);
+		return -EIO; /* we're in a forced umount, don't write! */
+	}
+	if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize)
+		wsize = fsc->mount_options->wsize;
+	if (wsize < PAGE_CACHE_SIZE)
+		wsize = PAGE_CACHE_SIZE;
+	max_pages_ever = wsize >> PAGE_CACHE_SHIFT;
+
+	pagevec_init(&pvec, 0);
+
+	/* where to start/end? */
+	if (wbc->range_cyclic) {
+		start = mapping->writeback_index; /* Start from prev offset */
+		end = -1;
+		dout(" cyclic, start at %lu\n", start);
+	} else {
+		start = wbc->range_start >> PAGE_CACHE_SHIFT;
+		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+			range_whole = 1;
+		should_loop = 0;
+		dout(" not cyclic, %lu to %lu\n", start, end);
+	}
+	index = start;
+
+retry:
+	/* find oldest snap context with dirty data */
+	ceph_put_snap_context(snapc);
+	snap_size = 0;
+	snapc = get_oldest_context(inode, &snap_size);
+	if (!snapc) {
+		/* hmm, why does writepages get called when there
+		   is no dirty data? */
+		dout(" no snap context with dirty data?\n");
+		goto out;
+	}
+	if (snap_size == 0)
+		snap_size = i_size_read(inode);
+	dout(" oldest snapc is %p seq %lld (%d snaps)\n",
+	     snapc, snapc->seq, snapc->num_snaps);
+
+	spin_lock(&ci->i_ceph_lock);
+	truncate_seq = ci->i_truncate_seq;
+	truncate_size = ci->i_truncate_size;
+	if (!snap_size)
+		snap_size = i_size_read(inode);
+	spin_unlock(&ci->i_ceph_lock);
+
+	if (last_snapc && snapc != last_snapc) {
+		/* if we switched to a newer snapc, restart our scan at the
+		 * start of the original file range. */
+		dout("  snapc differs from last pass, restarting at %lu\n",
+		     index);
+		index = start;
+	}
+	last_snapc = snapc;
+
+	while (!done && index <= end) {
+		int num_ops = do_sync ? 2 : 1;
+		unsigned i;
+		int first;
+		pgoff_t next;
+		int pvec_pages, locked_pages;
+		struct page **pages = NULL;
+		mempool_t *pool = NULL;	/* Becomes non-null if mempool used */
+		struct page *page;
+		int want;
+		u64 offset, len;
+		long writeback_stat;
+
+		next = 0;
+		locked_pages = 0;
+		max_pages = max_pages_ever;
+
+get_more_pages:
+		first = -1;
+		want = min(end - index,
+			   min((pgoff_t)PAGEVEC_SIZE,
+			       max_pages - (pgoff_t)locked_pages) - 1)
+			+ 1;
+		pvec_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+						PAGECACHE_TAG_DIRTY,
+						want);
+		dout("pagevec_lookup_tag got %d\n", pvec_pages);
+		if (!pvec_pages && !locked_pages)
+			break;
+		for (i = 0; i < pvec_pages && locked_pages < max_pages; i++) {
+			page = pvec.pages[i];
+			dout("? %p idx %lu\n", page, page->index);
+			if (locked_pages == 0)
+				lock_page(page);  /* first page */
+			else if (!trylock_page(page))
+				break;
+
+			/* only dirty pages, or our accounting breaks */
+			if (unlikely(!PageDirty(page)) ||
+			    unlikely(page->mapping != mapping)) {
+				dout("!dirty or !mapping %p\n", page);
+				unlock_page(page);
+				break;
+			}
+			if (!wbc->range_cyclic && page->index > end) {
+				dout("end of range %p\n", page);
+				done = 1;
+				unlock_page(page);
+				break;
+			}
+			if (next && (page->index != next)) {
+				dout("not consecutive %p\n", page);
+				unlock_page(page);
+				break;
+			}
+			if (wbc->sync_mode != WB_SYNC_NONE) {
+				dout("waiting on writeback %p\n", page);
+				wait_on_page_writeback(page);
+			}
+			if (page_offset(page) >= snap_size) {
+				dout("%p page eof %llu\n", page, snap_size);
+				done = 1;
+				unlock_page(page);
+				break;
+			}
+			if (PageWriteback(page)) {
+				dout("%p under writeback\n", page);
+				unlock_page(page);
+				break;
+			}
+
+			/* only if matching snap context */
+			pgsnapc = page_snap_context(page);
+			if (pgsnapc->seq > snapc->seq) {
+				dout("page snapc %p %lld > oldest %p %lld\n",
+				     pgsnapc, pgsnapc->seq, snapc, snapc->seq);
+				unlock_page(page);
+				if (!locked_pages)
+					continue; /* keep looking for snap */
+				break;
+			}
+
+			if (!clear_page_dirty_for_io(page)) {
+				dout("%p !clear_page_dirty_for_io\n", page);
+				unlock_page(page);
+				break;
+			}
+
+			/*
+			 * We have something to write.  If this is
+			 * the first locked page this time through,
+			 * allocate an osd request and a page array
+			 * that it will use.
+			 */
+			if (locked_pages == 0) {
+				BUG_ON(pages);
+				/* prepare async write request */
+				offset = (u64)page_offset(page);
+				len = wsize;
+				req = ceph_osdc_new_request(&fsc->client->osdc,
+							&ci->i_layout, vino,
+							offset, &len, num_ops,
+							CEPH_OSD_OP_WRITE,
+							CEPH_OSD_FLAG_WRITE |
+							CEPH_OSD_FLAG_ONDISK,
+							snapc, truncate_seq,
+							truncate_size, true);
+				if (IS_ERR(req)) {
+					rc = PTR_ERR(req);
+					unlock_page(page);
+					break;
+				}
+
+				req->r_callback = writepages_finish;
+				req->r_inode = inode;
+
+				max_pages = calc_pages_for(0, (u64)len);
+				pages = kmalloc(max_pages * sizeof (*pages),
+						GFP_NOFS);
+				if (!pages) {
+					pool = fsc->wb_pagevec_pool;
+					pages = mempool_alloc(pool, GFP_NOFS);
+					BUG_ON(!pages);
+				}
+			}
+
+			/* note position of first page in pvec */
+			if (first < 0)
+				first = i;
+			dout("%p will write page %p idx %lu\n",
+			     inode, page, page->index);
+
+			writeback_stat =
+			       atomic_long_inc_return(&fsc->writeback_count);
+			if (writeback_stat > CONGESTION_ON_THRESH(
+				    fsc->mount_options->congestion_kb)) {
+				set_bdi_congested(&fsc->backing_dev_info,
+						  BLK_RW_ASYNC);
+			}
+
+			set_page_writeback(page);
+			pages[locked_pages] = page;
+			locked_pages++;
+			next = page->index + 1;
+		}
+
+		/* did we get anything? */
+		if (!locked_pages)
+			goto release_pvec_pages;
+		if (i) {
+			int j;
+			BUG_ON(!locked_pages || first < 0);
+
+			if (pvec_pages && i == pvec_pages &&
+			    locked_pages < max_pages) {
+				dout("reached end pvec, trying for more\n");
+				pagevec_reinit(&pvec);
+				goto get_more_pages;
+			}
+
+			/* shift unused pages over in the pvec...  we
+			 * will need to release them below. */
+			for (j = i; j < pvec_pages; j++) {
+				dout(" pvec leftover page %p\n",
+				     pvec.pages[j]);
+				pvec.pages[j-i+first] = pvec.pages[j];
+			}
+			pvec.nr -= i-first;
+		}
+
+		/* Format the osd request message and submit the write */
+
+		offset = page_offset(pages[0]);
+		len = min(snap_size - offset,
+			  (u64)locked_pages << PAGE_CACHE_SHIFT);
+		dout("writepages got %d pages at %llu~%llu\n",
+		     locked_pages, offset, len);
+
+		osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
+							!!pool, false);
+
+		pages = NULL;	/* request message now owns the pages array */
+		pool = NULL;
+
+		/* Update the write op length in case we changed it */
+
+		osd_req_op_extent_update(req, 0, len);
+
+		vino = ceph_vino(inode);
+		ceph_osdc_build_request(req, offset, snapc, vino.snap,
+					&inode->i_mtime);
+
+		rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
+		BUG_ON(rc);
+		req = NULL;
+
+		/* continue? */
+		index = next;
+		wbc->nr_to_write -= locked_pages;
+		if (wbc->nr_to_write <= 0)
+			done = 1;
+
+release_pvec_pages:
+		dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr,
+		     pvec.nr ? pvec.pages[0] : NULL);
+		pagevec_release(&pvec);
+
+		if (locked_pages && !done)
+			goto retry;
+	}
+
+	if (should_loop && !done) {
+		/* more to do; loop back to beginning of file */
+		dout("writepages looping back to beginning of file\n");
+		should_loop = 0;
+		index = 0;
+		goto retry;
+	}
+
+	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+		mapping->writeback_index = index;
+
+out:
+	if (req)
+		ceph_osdc_put_request(req);
+	ceph_put_snap_context(snapc);
+	dout("writepages done, rc = %d\n", rc);
+	return rc;
+}
+
+
+
+/*
+ * See if a given @snapc is either writeable, or already written.
+ */
+static int context_is_writeable_or_written(struct inode *inode,
+					   struct ceph_snap_context *snapc)
+{
+	struct ceph_snap_context *oldest = get_oldest_context(inode, NULL);
+	int ret = !oldest || snapc->seq <= oldest->seq;
+
+	ceph_put_snap_context(oldest);
+	return ret;
+}
+
+/*
+ * We are only allowed to write into/dirty the page if the page is
+ * clean, or already dirty within the same snap context.
+ *
+ * called with page locked.
+ * return success with page locked,
+ * or any failure (incl -EAGAIN) with page unlocked.
+ */
+static int ceph_update_writeable_page(struct file *file,
+			    loff_t pos, unsigned len,
+			    struct page *page)
+{
+	struct inode *inode = file_inode(file);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	loff_t page_off = pos & PAGE_CACHE_MASK;
+	int pos_in_page = pos & ~PAGE_CACHE_MASK;
+	int end_in_page = pos_in_page + len;
+	loff_t i_size;
+	int r;
+	struct ceph_snap_context *snapc, *oldest;
+
+retry_locked:
+	/* writepages currently holds page lock, but if we change that later, */
+	wait_on_page_writeback(page);
+
+	/* check snap context */
+	BUG_ON(!ci->i_snap_realm);
+	down_read(&mdsc->snap_rwsem);
+	BUG_ON(!ci->i_snap_realm->cached_context);
+	snapc = page_snap_context(page);
+	if (snapc && snapc != ci->i_head_snapc) {
+		/*
+		 * this page is already dirty in another (older) snap
+		 * context!  is it writeable now?
+		 */
+		oldest = get_oldest_context(inode, NULL);
+		up_read(&mdsc->snap_rwsem);
+
+		if (snapc->seq > oldest->seq) {
+			ceph_put_snap_context(oldest);
+			dout(" page %p snapc %p not current or oldest\n",
+			     page, snapc);
+			/*
+			 * queue for writeback, and wait for snapc to
+			 * be writeable or written
+			 */
+			snapc = ceph_get_snap_context(snapc);
+			unlock_page(page);
+			ceph_queue_writeback(inode);
+			r = wait_event_interruptible(ci->i_cap_wq,
+			       context_is_writeable_or_written(inode, snapc));
+			ceph_put_snap_context(snapc);
+			if (r == -ERESTARTSYS)
+				return r;
+			return -EAGAIN;
+		}
+		ceph_put_snap_context(oldest);
+
+		/* yay, writeable, do it now (without dropping page lock) */
+		dout(" page %p snapc %p not current, but oldest\n",
+		     page, snapc);
+		if (!clear_page_dirty_for_io(page))
+			goto retry_locked;
+		r = writepage_nounlock(page, NULL);
+		if (r < 0)
+			goto fail_nosnap;
+		goto retry_locked;
+	}
+
+	if (PageUptodate(page)) {
+		dout(" page %p already uptodate\n", page);
+		return 0;
+	}
+
+	/* full page? */
+	if (pos_in_page == 0 && len == PAGE_CACHE_SIZE)
+		return 0;
+
+	/* past end of file? */
+	i_size = inode->i_size;   /* caller holds i_mutex */
+
+	if (i_size + len > inode->i_sb->s_maxbytes) {
+		/* file is too big */
+		r = -EINVAL;
+		goto fail;
+	}
+
+	if (page_off >= i_size ||
+	    (pos_in_page == 0 && (pos+len) >= i_size &&
+	     end_in_page - pos_in_page != PAGE_CACHE_SIZE)) {
+		dout(" zeroing %p 0 - %d and %d - %d\n",
+		     page, pos_in_page, end_in_page, (int)PAGE_CACHE_SIZE);
+		zero_user_segments(page,
+				   0, pos_in_page,
+				   end_in_page, PAGE_CACHE_SIZE);
+		return 0;
+	}
+
+	/* we need to read it. */
+	up_read(&mdsc->snap_rwsem);
+	r = readpage_nounlock(file, page);
+	if (r < 0)
+		goto fail_nosnap;
+	goto retry_locked;
+
+fail:
+	up_read(&mdsc->snap_rwsem);
+fail_nosnap:
+	unlock_page(page);
+	return r;
+}
+
+/*
+ * We are only allowed to write into/dirty the page if the page is
+ * clean, or already dirty within the same snap context.
+ */
+static int ceph_write_begin(struct file *file, struct address_space *mapping,
+			    loff_t pos, unsigned len, unsigned flags,
+			    struct page **pagep, void **fsdata)
+{
+	struct inode *inode = file_inode(file);
+	struct page *page;
+	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	int r;
+
+	do {
+		/* get a page */
+		page = grab_cache_page_write_begin(mapping, index, 0);
+		if (!page)
+			return -ENOMEM;
+		*pagep = page;
+
+		dout("write_begin file %p inode %p page %p %d~%d\n", file,
+		     inode, page, (int)pos, (int)len);
+
+		r = ceph_update_writeable_page(file, pos, len, page);
+	} while (r == -EAGAIN);
+
+	return r;
+}
+
+/*
+ * we don't do anything in here that simple_write_end doesn't do
+ * except adjust dirty page accounting and drop read lock on
+ * mdsc->snap_rwsem.
+ */
+static int ceph_write_end(struct file *file, struct address_space *mapping,
+			  loff_t pos, unsigned len, unsigned copied,
+			  struct page *page, void *fsdata)
+{
+	struct inode *inode = file_inode(file);
+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	int check_cap = 0;
+
+	dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
+	     inode, page, (int)pos, (int)copied, (int)len);
+
+	/* zero the stale part of the page if we did a short copy */
+	if (copied < len)
+		zero_user_segment(page, from+copied, len);
+
+	/* did file size increase? */
+	/* (no need for i_size_read(); we caller holds i_mutex */
+	if (pos+copied > inode->i_size)
+		check_cap = ceph_inode_set_size(inode, pos+copied);
+
+	if (!PageUptodate(page))
+		SetPageUptodate(page);
+
+	set_page_dirty(page);
+
+	unlock_page(page);
+	up_read(&mdsc->snap_rwsem);
+	page_cache_release(page);
+
+	if (check_cap)
+		ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL);
+
+	return copied;
+}
+
+/*
+ * we set .direct_IO to indicate direct io is supported, but since we
+ * intercept O_DIRECT reads and writes early, this function should
+ * never get called.
+ */
+static ssize_t ceph_direct_io(int rw, struct kiocb *iocb,
+			      const struct iovec *iov,
+			      loff_t pos, unsigned long nr_segs)
+{
+	WARN_ON(1);
+	return -EINVAL;
+}
+
+const struct address_space_operations ceph_aops = {
+	.readpage = ceph_readpage,
+	.readpages = ceph_readpages,
+	.writepage = ceph_writepage,
+	.writepages = ceph_writepages_start,
+	.write_begin = ceph_write_begin,
+	.write_end = ceph_write_end,
+	.set_page_dirty = ceph_set_page_dirty,
+	.invalidatepage = ceph_invalidatepage,
+	.releasepage = ceph_releasepage,
+	.direct_IO = ceph_direct_io,
+};
+
+
+/*
+ * vm ops
+ */
+static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct inode *inode = file_inode(vma->vm_file);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_file_info *fi = vma->vm_file->private_data;
+	loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT;
+	int want, got, ret;
+
+	dout("filemap_fault %p %llx.%llx %llu~%zd trying to get caps\n",
+	     inode, ceph_vinop(inode), off, (size_t)PAGE_CACHE_SIZE);
+	if (fi->fmode & CEPH_FILE_MODE_LAZY)
+		want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
+	else
+		want = CEPH_CAP_FILE_CACHE;
+	while (1) {
+		got = 0;
+		ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
+		if (ret == 0)
+			break;
+		if (ret != -ERESTARTSYS) {
+			WARN_ON(1);
+			return VM_FAULT_SIGBUS;
+		}
+	}
+	dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
+	     inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got));
+
+	ret = filemap_fault(vma, vmf);
+
+	dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
+	     inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret);
+	ceph_put_cap_refs(ci, got);
+
+	return ret;
+}
+
+/*
+ * Reuse write_begin here for simplicity.
+ */
+static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct inode *inode = file_inode(vma->vm_file);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_file_info *fi = vma->vm_file->private_data;
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct page *page = vmf->page;
+	loff_t off = page_offset(page);
+	loff_t size = i_size_read(inode);
+	size_t len;
+	int want, got, ret;
+
+	if (off + PAGE_CACHE_SIZE <= size)
+		len = PAGE_CACHE_SIZE;
+	else
+		len = size & ~PAGE_CACHE_MASK;
+
+	dout("page_mkwrite %p %llx.%llx %llu~%zd getting caps i_size %llu\n",
+	     inode, ceph_vinop(inode), off, len, size);
+	if (fi->fmode & CEPH_FILE_MODE_LAZY)
+		want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
+	else
+		want = CEPH_CAP_FILE_BUFFER;
+	while (1) {
+		got = 0;
+		ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, off + len);
+		if (ret == 0)
+			break;
+		if (ret != -ERESTARTSYS) {
+			WARN_ON(1);
+			return VM_FAULT_SIGBUS;
+		}
+	}
+	dout("page_mkwrite %p %llu~%zd got cap refs on %s\n",
+	     inode, off, len, ceph_cap_string(got));
+
+	/* Update time before taking page lock */
+	file_update_time(vma->vm_file);
+
+	lock_page(page);
+
+	ret = VM_FAULT_NOPAGE;
+	if ((off > size) ||
+	    (page->mapping != inode->i_mapping))
+		goto out;
+
+	ret = ceph_update_writeable_page(vma->vm_file, off, len, page);
+	if (ret == 0) {
+		/* success.  we'll keep the page locked. */
+		set_page_dirty(page);
+		up_read(&mdsc->snap_rwsem);
+		ret = VM_FAULT_LOCKED;
+	} else {
+		if (ret == -ENOMEM)
+			ret = VM_FAULT_OOM;
+		else
+			ret = VM_FAULT_SIGBUS;
+	}
+out:
+	if (ret != VM_FAULT_LOCKED) {
+		unlock_page(page);
+	} else {
+		int dirty;
+		spin_lock(&ci->i_ceph_lock);
+		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
+		spin_unlock(&ci->i_ceph_lock);
+		if (dirty)
+			__mark_inode_dirty(inode, dirty);
+	}
+
+	dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n",
+	     inode, off, len, ceph_cap_string(got), ret);
+	ceph_put_cap_refs(ci, got);
+
+	return ret;
+}
+
+static struct vm_operations_struct ceph_vmops = {
+	.fault		= ceph_filemap_fault,
+	.page_mkwrite	= ceph_page_mkwrite,
+	.remap_pages	= generic_file_remap_pages,
+};
+
+int ceph_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct address_space *mapping = file->f_mapping;
+
+	if (!mapping->a_ops->readpage)
+		return -ENOEXEC;
+	file_accessed(file);
+	vma->vm_ops = &ceph_vmops;
+	return 0;
+}
diff --git a/ceph/cache.c b/ceph/cache.c
new file mode 100644
index 0000000..834f9f3
--- /dev/null
+++ b/ceph/cache.c
@@ -0,0 +1,402 @@
+/*
+ * Ceph cache definitions.
+ *
+ *  Copyright (C) 2013 by Adfin Solutions, Inc. All Rights Reserved.
+ *  Written by Milosz Tanski (milosz at adfin.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include "super.h"
+#include "cache.h"
+
+struct ceph_aux_inode {
+	struct timespec	mtime;
+	loff_t          size;
+};
+
+struct fscache_netfs ceph_cache_netfs = {
+	.name		= "ceph",
+	.version	= 0,
+};
+
+static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
+					     void *buffer, uint16_t maxbuf)
+{
+	const struct ceph_fs_client* fsc = cookie_netfs_data;
+	uint16_t klen;
+
+	klen = sizeof(fsc->client->fsid);
+	if (klen > maxbuf)
+		return 0;
+
+	memcpy(buffer, &fsc->client->fsid, klen);
+	return klen;
+}
+
+static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
+	.name		= "CEPH.fsid",
+	.type		= FSCACHE_COOKIE_TYPE_INDEX,
+	.get_key	= ceph_fscache_session_get_key,
+};
+
+int ceph_fscache_register(void)
+{
+	return fscache_register_netfs(&ceph_cache_netfs);
+}
+
+void ceph_fscache_unregister(void)
+{
+	fscache_unregister_netfs(&ceph_cache_netfs);
+}
+
+int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
+{
+	fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
+					      &ceph_fscache_fsid_object_def,
+					      fsc, true);
+
+	if (fsc->fscache == NULL) {
+		pr_err("Unable to resgister fsid: %p fscache cookie", fsc);
+		return 0;
+	}
+
+	fsc->revalidate_wq = alloc_workqueue("ceph-revalidate", 0, 1);
+	if (fsc->revalidate_wq == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data,
+					   void *buffer, uint16_t maxbuf)
+{
+	const struct ceph_inode_info* ci = cookie_netfs_data;
+	uint16_t klen;
+
+	/* use ceph virtual inode (id + snaphot) */
+	klen = sizeof(ci->i_vino);
+	if (klen > maxbuf)
+		return 0;
+
+	memcpy(buffer, &ci->i_vino, klen);
+	return klen;
+}
+
+static uint16_t ceph_fscache_inode_get_aux(const void *cookie_netfs_data,
+					   void *buffer, uint16_t bufmax)
+{
+	struct ceph_aux_inode aux;
+	const struct ceph_inode_info* ci = cookie_netfs_data;
+	const struct inode* inode = &ci->vfs_inode;
+
+	memset(&aux, 0, sizeof(aux));
+	aux.mtime = inode->i_mtime;
+	aux.size = inode->i_size;
+
+	memcpy(buffer, &aux, sizeof(aux));
+
+	return sizeof(aux);
+}
+
+static void ceph_fscache_inode_get_attr(const void *cookie_netfs_data,
+					uint64_t *size)
+{
+	const struct ceph_inode_info* ci = cookie_netfs_data;
+	const struct inode* inode = &ci->vfs_inode;
+
+	*size = inode->i_size;
+}
+
+static enum fscache_checkaux ceph_fscache_inode_check_aux(
+	void *cookie_netfs_data, const void *data, uint16_t dlen)
+{
+	struct ceph_aux_inode aux;
+	struct ceph_inode_info* ci = cookie_netfs_data;
+	struct inode* inode = &ci->vfs_inode;
+
+	if (dlen != sizeof(aux))
+		return FSCACHE_CHECKAUX_OBSOLETE;
+
+	memset(&aux, 0, sizeof(aux));
+	aux.mtime = inode->i_mtime;
+	aux.size = inode->i_size;
+
+	if (memcmp(data, &aux, sizeof(aux)) != 0)
+		return FSCACHE_CHECKAUX_OBSOLETE;
+
+	dout("ceph inode 0x%p cached okay", ci);
+	return FSCACHE_CHECKAUX_OKAY;
+}
+
+static void ceph_fscache_inode_now_uncached(void* cookie_netfs_data)
+{
+	struct ceph_inode_info* ci = cookie_netfs_data;
+	struct pagevec pvec;
+	pgoff_t first;
+	int loop, nr_pages;
+
+	pagevec_init(&pvec, 0);
+	first = 0;
+
+	dout("ceph inode 0x%p now uncached", ci);
+
+	while (1) {
+		nr_pages = pagevec_lookup(&pvec, ci->vfs_inode.i_mapping, first,
+					  PAGEVEC_SIZE - pagevec_count(&pvec));
+
+		if (!nr_pages)
+			break;
+
+		for (loop = 0; loop < nr_pages; loop++)
+			ClearPageFsCache(pvec.pages[loop]);
+
+		first = pvec.pages[nr_pages - 1]->index + 1;
+
+		pvec.nr = nr_pages;
+		pagevec_release(&pvec);
+		cond_resched();
+	}
+}
+
+static const struct fscache_cookie_def ceph_fscache_inode_object_def = {
+	.name		= "CEPH.inode",
+	.type		= FSCACHE_COOKIE_TYPE_DATAFILE,
+	.get_key	= ceph_fscache_inode_get_key,
+	.get_attr	= ceph_fscache_inode_get_attr,
+	.get_aux	= ceph_fscache_inode_get_aux,
+	.check_aux	= ceph_fscache_inode_check_aux,
+	.now_uncached	= ceph_fscache_inode_now_uncached,
+};
+
+void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
+					struct ceph_inode_info* ci)
+{
+	struct inode* inode = &ci->vfs_inode;
+
+	/* No caching for filesystem */
+	if (fsc->fscache == NULL)
+		return;
+
+	/* Only cache for regular files that are read only */
+	if ((ci->vfs_inode.i_mode & S_IFREG) == 0)
+		return;
+
+	/* Avoid multiple racing open requests */
+	mutex_lock(&inode->i_mutex);
+
+	if (ci->fscache)
+		goto done;
+
+	ci->fscache = fscache_acquire_cookie(fsc->fscache,
+					     &ceph_fscache_inode_object_def,
+					     ci, true);
+	fscache_check_consistency(ci->fscache);
+done:
+	mutex_unlock(&inode->i_mutex);
+
+}
+
+void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
+{
+	struct fscache_cookie* cookie;
+
+	if ((cookie = ci->fscache) == NULL)
+		return;
+
+	ci->fscache = NULL;
+
+	fscache_uncache_all_inode_pages(cookie, &ci->vfs_inode);
+	fscache_relinquish_cookie(cookie, 0);
+}
+
+static void ceph_vfs_readpage_complete(struct page *page, void *data, int error)
+{
+	if (!error)
+		SetPageUptodate(page);
+}
+
+static void ceph_vfs_readpage_complete_unlock(struct page *page, void *data, int error)
+{
+	if (!error)
+		SetPageUptodate(page);
+
+	unlock_page(page);
+}
+
+static inline int cache_valid(struct ceph_inode_info *ci)
+{
+	return ((ceph_caps_issued(ci) & CEPH_CAP_FILE_CACHE) &&
+		(ci->i_fscache_gen == ci->i_rdcache_gen));
+}
+
+
+/* Atempt to read from the fscache,
+ *
+ * This function is called from the readpage_nounlock context. DO NOT attempt to
+ * unlock the page here (or in the callback).
+ */
+int ceph_readpage_from_fscache(struct inode *inode, struct page *page)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int ret;
+
+	if (!cache_valid(ci))
+		return -ENOBUFS;
+
+	ret = fscache_read_or_alloc_page(ci->fscache, page,
+					 ceph_vfs_readpage_complete, NULL,
+					 GFP_KERNEL);
+
+	switch (ret) {
+		case 0: /* Page found */
+			dout("page read submitted\n");
+			return 0;
+		case -ENOBUFS: /* Pages were not found, and can't be */
+		case -ENODATA: /* Pages were not found */
+			dout("page/inode not in cache\n");
+			return ret;
+		default:
+			dout("%s: unknown error ret = %i\n", __func__, ret);
+			return ret;
+	}
+}
+
+int ceph_readpages_from_fscache(struct inode *inode,
+				  struct address_space *mapping,
+				  struct list_head *pages,
+				  unsigned *nr_pages)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int ret;
+
+	if (!cache_valid(ci))
+		return -ENOBUFS;
+
+	ret = fscache_read_or_alloc_pages(ci->fscache, mapping, pages, nr_pages,
+					  ceph_vfs_readpage_complete_unlock,
+					  NULL, mapping_gfp_mask(mapping));
+
+	switch (ret) {
+		case 0: /* All pages found */
+			dout("all-page read submitted\n");
+			return 0;
+		case -ENOBUFS: /* Some pages were not found, and can't be */
+		case -ENODATA: /* some pages were not found */
+			dout("page/inode not in cache\n");
+			return ret;
+		default:
+			dout("%s: unknown error ret = %i\n", __func__, ret);
+			return ret;
+	}
+}
+
+void ceph_readpage_to_fscache(struct inode *inode, struct page *page)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int ret;
+
+	if (!PageFsCache(page))
+		return;
+
+	if (!cache_valid(ci))
+		return;
+
+	ret = fscache_write_page(ci->fscache, page, GFP_KERNEL);
+	if (ret)
+		 fscache_uncache_page(ci->fscache, page);
+}
+
+void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+
+	if (!PageFsCache(page))
+		return;
+
+	fscache_wait_on_page_write(ci->fscache, page);
+	fscache_uncache_page(ci->fscache, page);
+}
+
+void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
+{
+	if (fsc->revalidate_wq)
+		destroy_workqueue(fsc->revalidate_wq);
+
+	fscache_relinquish_cookie(fsc->fscache, 0);
+	fsc->fscache = NULL;
+}
+
+static void ceph_revalidate_work(struct work_struct *work)
+{
+	int issued;
+	u32 orig_gen;
+	struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
+						  i_revalidate_work);
+	struct inode *inode = &ci->vfs_inode;
+
+	spin_lock(&ci->i_ceph_lock);
+	issued = __ceph_caps_issued(ci, NULL);
+	orig_gen = ci->i_rdcache_gen;
+	spin_unlock(&ci->i_ceph_lock);
+
+	if (!(issued & CEPH_CAP_FILE_CACHE)) {
+		dout("revalidate_work lost cache before validation %p\n",
+		     inode);
+		goto out;
+	}
+
+	if (!fscache_check_consistency(ci->fscache))
+		fscache_invalidate(ci->fscache);
+
+	spin_lock(&ci->i_ceph_lock);
+	/* Update the new valid generation (backwards sanity check too) */
+	if (orig_gen > ci->i_fscache_gen) {
+		ci->i_fscache_gen = orig_gen;
+	}
+	spin_unlock(&ci->i_ceph_lock);
+
+out:
+	iput(&ci->vfs_inode);
+}
+
+void ceph_queue_revalidate(struct inode *inode)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+
+	if (fsc->revalidate_wq == NULL || ci->fscache == NULL)
+		return;
+
+	ihold(inode);
+
+	if (queue_work(ceph_sb_to_client(inode->i_sb)->revalidate_wq,
+		       &ci->i_revalidate_work)) {
+		dout("ceph_queue_revalidate %p\n", inode);
+	} else {
+		dout("ceph_queue_revalidate %p failed\n)", inode);
+		iput(inode);
+	}
+}
+
+void ceph_fscache_inode_init(struct ceph_inode_info *ci)
+{
+	ci->fscache = NULL;
+	/* The first load is verifed cookie open time */
+	ci->i_fscache_gen = 1;
+	INIT_WORK(&ci->i_revalidate_work, ceph_revalidate_work);
+}
diff --git a/ceph/cache.h b/ceph/cache.h
new file mode 100644
index 0000000..5ac591b
--- /dev/null
+++ b/ceph/cache.h
@@ -0,0 +1,182 @@
+/*
+ * Ceph cache definitions.
+ *
+ *  Copyright (C) 2013 by Adfin Solutions, Inc. All Rights Reserved.
+ *  Written by Milosz Tanski (milosz at adfin.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#ifndef _CEPH_CACHE_H
+#define _CEPH_CACHE_H
+
+#ifdef CONFIG_CEPH_FSCACHE
+
+extern struct fscache_netfs ceph_cache_netfs;
+
+int ceph_fscache_register(void);
+void ceph_fscache_unregister(void);
+
+int ceph_fscache_register_fs(struct ceph_fs_client* fsc);
+void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc);
+
+void ceph_fscache_inode_init(struct ceph_inode_info *ci);
+void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
+					struct ceph_inode_info* ci);
+void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci);
+
+int ceph_readpage_from_fscache(struct inode *inode, struct page *page);
+int ceph_readpages_from_fscache(struct inode *inode,
+				struct address_space *mapping,
+				struct list_head *pages,
+				unsigned *nr_pages);
+void ceph_readpage_to_fscache(struct inode *inode, struct page *page);
+void ceph_invalidate_fscache_page(struct inode* inode, struct page *page);
+void ceph_queue_revalidate(struct inode *inode);
+
+static inline void ceph_fscache_update_objectsize(struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	fscache_attr_changed(ci->fscache);
+}
+
+static inline void ceph_fscache_invalidate(struct inode *inode)
+{
+	fscache_invalidate(ceph_inode(inode)->fscache);
+}
+
+static inline void ceph_fscache_uncache_page(struct inode *inode,
+					     struct page *page)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	return fscache_uncache_page(ci->fscache, page);
+}
+
+static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp)
+{
+	struct inode* inode = page->mapping->host;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	return fscache_maybe_release_page(ci->fscache, page, gfp);
+}
+
+static inline void ceph_fscache_readpage_cancel(struct inode *inode,
+						struct page *page)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	if (fscache_cookie_valid(ci->fscache) && PageFsCache(page))
+		__fscache_uncache_page(ci->fscache, page);
+}
+
+static inline void ceph_fscache_readpages_cancel(struct inode *inode,
+						 struct list_head *pages)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	return fscache_readpages_cancel(ci->fscache, pages);
+}
+
+#else
+
+static inline int ceph_fscache_register(void)
+{
+	return 0;
+}
+
+static inline void ceph_fscache_unregister(void)
+{
+}
+
+static inline int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
+{
+	return 0;
+}
+
+static inline void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
+{
+}
+
+static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci)
+{
+}
+
+static inline void ceph_fscache_register_inode_cookie(struct ceph_fs_client* parent_fsc,
+						      struct ceph_inode_info* ci)
+{
+}
+
+static inline void ceph_fscache_uncache_page(struct inode *inode,
+					     struct page *pages)
+{
+}
+
+static inline int ceph_readpage_from_fscache(struct inode* inode,
+					     struct page *page)
+{
+	return -ENOBUFS;
+}
+
+static inline int ceph_readpages_from_fscache(struct inode *inode,
+					      struct address_space *mapping,
+					      struct list_head *pages,
+					      unsigned *nr_pages)
+{
+	return -ENOBUFS;
+}
+
+static inline void ceph_readpage_to_fscache(struct inode *inode,
+					    struct page *page)
+{
+}
+
+static inline void ceph_fscache_update_objectsize(struct inode *inode)
+{
+}
+
+static inline void ceph_fscache_invalidate(struct inode *inode)
+{
+}
+
+static inline void ceph_invalidate_fscache_page(struct inode *inode,
+						struct page *page)
+{
+}
+
+static inline void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
+{
+}
+
+static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp)
+{
+	return 1;
+}
+
+static inline void ceph_fscache_readpage_cancel(struct inode *inode,
+						struct page *page)
+{
+}
+
+static inline void ceph_fscache_readpages_cancel(struct inode *inode,
+						 struct list_head *pages)
+{
+}
+
+static inline void ceph_queue_revalidate(struct inode *inode)
+{
+}
+
+#endif
+
+#endif
diff --git a/ceph/caps.c b/ceph/caps.c
new file mode 100644
index 0000000..c561b62
--- /dev/null
+++ b/ceph/caps.c
@@ -0,0 +1,3313 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/writeback.h>
+
+#include "super.h"
+#include "mds_client.h"
+#include "cache.h"
+#include <linux/ceph/decode.h>
+#include <linux/ceph/messenger.h>
+
+/*
+ * Capability management
+ *
+ * The Ceph metadata servers control client access to inode metadata
+ * and file data by issuing capabilities, granting clients permission
+ * to read and/or write both inode field and file data to OSDs
+ * (storage nodes).  Each capability consists of a set of bits
+ * indicating which operations are allowed.
+ *
+ * If the client holds a *_SHARED cap, the client has a coherent value
+ * that can be safely read from the cached inode.
+ *
+ * In the case of a *_EXCL (exclusive) or FILE_WR capabilities, the
+ * client is allowed to change inode attributes (e.g., file size,
+ * mtime), note its dirty state in the ceph_cap, and asynchronously
+ * flush that metadata change to the MDS.
+ *
+ * In the event of a conflicting operation (perhaps by another
+ * client), the MDS will revoke the conflicting client capabilities.
+ *
+ * In order for a client to cache an inode, it must hold a capability
+ * with at least one MDS server.  When inodes are released, release
+ * notifications are batched and periodically sent en masse to the MDS
+ * cluster to release server state.
+ */
+
+
+/*
+ * Generate readable cap strings for debugging output.
+ */
+#define MAX_CAP_STR 20
+static char cap_str[MAX_CAP_STR][40];
+static DEFINE_SPINLOCK(cap_str_lock);
+static int last_cap_str;
+
+static char *gcap_string(char *s, int c)
+{
+	if (c & CEPH_CAP_GSHARED)
+		*s++ = 's';
+	if (c & CEPH_CAP_GEXCL)
+		*s++ = 'x';
+	if (c & CEPH_CAP_GCACHE)
+		*s++ = 'c';
+	if (c & CEPH_CAP_GRD)
+		*s++ = 'r';
+	if (c & CEPH_CAP_GWR)
+		*s++ = 'w';
+	if (c & CEPH_CAP_GBUFFER)
+		*s++ = 'b';
+	if (c & CEPH_CAP_GLAZYIO)
+		*s++ = 'l';
+	return s;
+}
+
+const char *ceph_cap_string(int caps)
+{
+	int i;
+	char *s;
+	int c;
+
+	spin_lock(&cap_str_lock);
+	i = last_cap_str++;
+	if (last_cap_str == MAX_CAP_STR)
+		last_cap_str = 0;
+	spin_unlock(&cap_str_lock);
+
+	s = cap_str[i];
+
+	if (caps & CEPH_CAP_PIN)
+		*s++ = 'p';
+
+	c = (caps >> CEPH_CAP_SAUTH) & 3;
+	if (c) {
+		*s++ = 'A';
+		s = gcap_string(s, c);
+	}
+
+	c = (caps >> CEPH_CAP_SLINK) & 3;
+	if (c) {
+		*s++ = 'L';
+		s = gcap_string(s, c);
+	}
+
+	c = (caps >> CEPH_CAP_SXATTR) & 3;
+	if (c) {
+		*s++ = 'X';
+		s = gcap_string(s, c);
+	}
+
+	c = caps >> CEPH_CAP_SFILE;
+	if (c) {
+		*s++ = 'F';
+		s = gcap_string(s, c);
+	}
+
+	if (s == cap_str[i])
+		*s++ = '-';
+	*s = 0;
+	return cap_str[i];
+}
+
+void ceph_caps_init(struct ceph_mds_client *mdsc)
+{
+	INIT_LIST_HEAD(&mdsc->caps_list);
+	spin_lock_init(&mdsc->caps_list_lock);
+}
+
+void ceph_caps_finalize(struct ceph_mds_client *mdsc)
+{
+	struct ceph_cap *cap;
+
+	spin_lock(&mdsc->caps_list_lock);
+	while (!list_empty(&mdsc->caps_list)) {
+		cap = list_first_entry(&mdsc->caps_list,
+				       struct ceph_cap, caps_item);
+		list_del(&cap->caps_item);
+		kmem_cache_free(ceph_cap_cachep, cap);
+	}
+	mdsc->caps_total_count = 0;
+	mdsc->caps_avail_count = 0;
+	mdsc->caps_use_count = 0;
+	mdsc->caps_reserve_count = 0;
+	mdsc->caps_min_count = 0;
+	spin_unlock(&mdsc->caps_list_lock);
+}
+
+void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta)
+{
+	spin_lock(&mdsc->caps_list_lock);
+	mdsc->caps_min_count += delta;
+	BUG_ON(mdsc->caps_min_count < 0);
+	spin_unlock(&mdsc->caps_list_lock);
+}
+
+void ceph_reserve_caps(struct ceph_mds_client *mdsc,
+		      struct ceph_cap_reservation *ctx, int need)
+{
+	int i;
+	struct ceph_cap *cap;
+	int have;
+	int alloc = 0;
+	LIST_HEAD(newcaps);
+
+	dout("reserve caps ctx=%p need=%d\n", ctx, need);
+
+	/* first reserve any caps that are already allocated */
+	spin_lock(&mdsc->caps_list_lock);
+	if (mdsc->caps_avail_count >= need)
+		have = need;
+	else
+		have = mdsc->caps_avail_count;
+	mdsc->caps_avail_count -= have;
+	mdsc->caps_reserve_count += have;
+	BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+					 mdsc->caps_reserve_count +
+					 mdsc->caps_avail_count);
+	spin_unlock(&mdsc->caps_list_lock);
+
+	for (i = have; i < need; i++) {
+		cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
+		if (!cap)
+			break;
+		list_add(&cap->caps_item, &newcaps);
+		alloc++;
+	}
+	/* we didn't manage to reserve as much as we needed */
+	if (have + alloc != need)
+		pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n",
+			ctx, need, have + alloc);
+
+	spin_lock(&mdsc->caps_list_lock);
+	mdsc->caps_total_count += alloc;
+	mdsc->caps_reserve_count += alloc;
+	list_splice(&newcaps, &mdsc->caps_list);
+
+	BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+					 mdsc->caps_reserve_count +
+					 mdsc->caps_avail_count);
+	spin_unlock(&mdsc->caps_list_lock);
+
+	ctx->count = need;
+	dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n",
+	     ctx, mdsc->caps_total_count, mdsc->caps_use_count,
+	     mdsc->caps_reserve_count, mdsc->caps_avail_count);
+}
+
+int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
+			struct ceph_cap_reservation *ctx)
+{
+	dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count);
+	if (ctx->count) {
+		spin_lock(&mdsc->caps_list_lock);
+		BUG_ON(mdsc->caps_reserve_count < ctx->count);
+		mdsc->caps_reserve_count -= ctx->count;
+		mdsc->caps_avail_count += ctx->count;
+		ctx->count = 0;
+		dout("unreserve caps %d = %d used + %d resv + %d avail\n",
+		     mdsc->caps_total_count, mdsc->caps_use_count,
+		     mdsc->caps_reserve_count, mdsc->caps_avail_count);
+		BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+						 mdsc->caps_reserve_count +
+						 mdsc->caps_avail_count);
+		spin_unlock(&mdsc->caps_list_lock);
+	}
+	return 0;
+}
+
+static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc,
+				struct ceph_cap_reservation *ctx)
+{
+	struct ceph_cap *cap = NULL;
+
+	/* temporary, until we do something about cap import/export */
+	if (!ctx) {
+		cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
+		if (cap) {
+			spin_lock(&mdsc->caps_list_lock);
+			mdsc->caps_use_count++;
+			mdsc->caps_total_count++;
+			spin_unlock(&mdsc->caps_list_lock);
+		}
+		return cap;
+	}
+
+	spin_lock(&mdsc->caps_list_lock);
+	dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n",
+	     ctx, ctx->count, mdsc->caps_total_count, mdsc->caps_use_count,
+	     mdsc->caps_reserve_count, mdsc->caps_avail_count);
+	BUG_ON(!ctx->count);
+	BUG_ON(ctx->count > mdsc->caps_reserve_count);
+	BUG_ON(list_empty(&mdsc->caps_list));
+
+	ctx->count--;
+	mdsc->caps_reserve_count--;
+	mdsc->caps_use_count++;
+
+	cap = list_first_entry(&mdsc->caps_list, struct ceph_cap, caps_item);
+	list_del(&cap->caps_item);
+
+	BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+	       mdsc->caps_reserve_count + mdsc->caps_avail_count);
+	spin_unlock(&mdsc->caps_list_lock);
+	return cap;
+}
+
+void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap)
+{
+	spin_lock(&mdsc->caps_list_lock);
+	dout("put_cap %p %d = %d used + %d resv + %d avail\n",
+	     cap, mdsc->caps_total_count, mdsc->caps_use_count,
+	     mdsc->caps_reserve_count, mdsc->caps_avail_count);
+	mdsc->caps_use_count--;
+	/*
+	 * Keep some preallocated caps around (ceph_min_count), to
+	 * avoid lots of free/alloc churn.
+	 */
+	if (mdsc->caps_avail_count >= mdsc->caps_reserve_count +
+				      mdsc->caps_min_count) {
+		mdsc->caps_total_count--;
+		kmem_cache_free(ceph_cap_cachep, cap);
+	} else {
+		mdsc->caps_avail_count++;
+		list_add(&cap->caps_item, &mdsc->caps_list);
+	}
+
+	BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+	       mdsc->caps_reserve_count + mdsc->caps_avail_count);
+	spin_unlock(&mdsc->caps_list_lock);
+}
+
+void ceph_reservation_status(struct ceph_fs_client *fsc,
+			     int *total, int *avail, int *used, int *reserved,
+			     int *min)
+{
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+
+	if (total)
+		*total = mdsc->caps_total_count;
+	if (avail)
+		*avail = mdsc->caps_avail_count;
+	if (used)
+		*used = mdsc->caps_use_count;
+	if (reserved)
+		*reserved = mdsc->caps_reserve_count;
+	if (min)
+		*min = mdsc->caps_min_count;
+}
+
+/*
+ * Find ceph_cap for given mds, if any.
+ *
+ * Called with i_ceph_lock held.
+ */
+static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
+{
+	struct ceph_cap *cap;
+	struct rb_node *n = ci->i_caps.rb_node;
+
+	while (n) {
+		cap = rb_entry(n, struct ceph_cap, ci_node);
+		if (mds < cap->mds)
+			n = n->rb_left;
+		else if (mds > cap->mds)
+			n = n->rb_right;
+		else
+			return cap;
+	}
+	return NULL;
+}
+
+struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds)
+{
+	struct ceph_cap *cap;
+
+	spin_lock(&ci->i_ceph_lock);
+	cap = __get_cap_for_mds(ci, mds);
+	spin_unlock(&ci->i_ceph_lock);
+	return cap;
+}
+
+/*
+ * Return id of any MDS with a cap, preferably FILE_WR|BUFFER|EXCL, else -1.
+ */
+static int __ceph_get_cap_mds(struct ceph_inode_info *ci)
+{
+	struct ceph_cap *cap;
+	int mds = -1;
+	struct rb_node *p;
+
+	/* prefer mds with WR|BUFFER|EXCL caps */
+	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
+		cap = rb_entry(p, struct ceph_cap, ci_node);
+		mds = cap->mds;
+		if (cap->issued & (CEPH_CAP_FILE_WR |
+				   CEPH_CAP_FILE_BUFFER |
+				   CEPH_CAP_FILE_EXCL))
+			break;
+	}
+	return mds;
+}
+
+int ceph_get_cap_mds(struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int mds;
+	spin_lock(&ci->i_ceph_lock);
+	mds = __ceph_get_cap_mds(ceph_inode(inode));
+	spin_unlock(&ci->i_ceph_lock);
+	return mds;
+}
+
+/*
+ * Called under i_ceph_lock.
+ */
+static void __insert_cap_node(struct ceph_inode_info *ci,
+			      struct ceph_cap *new)
+{
+	struct rb_node **p = &ci->i_caps.rb_node;
+	struct rb_node *parent = NULL;
+	struct ceph_cap *cap = NULL;
+
+	while (*p) {
+		parent = *p;
+		cap = rb_entry(parent, struct ceph_cap, ci_node);
+		if (new->mds < cap->mds)
+			p = &(*p)->rb_left;
+		else if (new->mds > cap->mds)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&new->ci_node, parent, p);
+	rb_insert_color(&new->ci_node, &ci->i_caps);
+}
+
+/*
+ * (re)set cap hold timeouts, which control the delayed release
+ * of unused caps back to the MDS.  Should be called on cap use.
+ */
+static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
+			       struct ceph_inode_info *ci)
+{
+	struct ceph_mount_options *ma = mdsc->fsc->mount_options;
+
+	ci->i_hold_caps_min = round_jiffies(jiffies +
+					    ma->caps_wanted_delay_min * HZ);
+	ci->i_hold_caps_max = round_jiffies(jiffies +
+					    ma->caps_wanted_delay_max * HZ);
+	dout("__cap_set_timeouts %p min %lu max %lu\n", &ci->vfs_inode,
+	     ci->i_hold_caps_min - jiffies, ci->i_hold_caps_max - jiffies);
+}
+
+/*
+ * (Re)queue cap at the end of the delayed cap release list.
+ *
+ * If I_FLUSH is set, leave the inode at the front of the list.
+ *
+ * Caller holds i_ceph_lock
+ *    -> we take mdsc->cap_delay_lock
+ */
+static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
+				struct ceph_inode_info *ci)
+{
+	__cap_set_timeouts(mdsc, ci);
+	dout("__cap_delay_requeue %p flags %d at %lu\n", &ci->vfs_inode,
+	     ci->i_ceph_flags, ci->i_hold_caps_max);
+	if (!mdsc->stopping) {
+		spin_lock(&mdsc->cap_delay_lock);
+		if (!list_empty(&ci->i_cap_delay_list)) {
+			if (ci->i_ceph_flags & CEPH_I_FLUSH)
+				goto no_change;
+			list_del_init(&ci->i_cap_delay_list);
+		}
+		list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
+no_change:
+		spin_unlock(&mdsc->cap_delay_lock);
+	}
+}
+
+/*
+ * Queue an inode for immediate writeback.  Mark inode with I_FLUSH,
+ * indicating we should send a cap message to flush dirty metadata
+ * asap, and move to the front of the delayed cap list.
+ */
+static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc,
+				      struct ceph_inode_info *ci)
+{
+	dout("__cap_delay_requeue_front %p\n", &ci->vfs_inode);
+	spin_lock(&mdsc->cap_delay_lock);
+	ci->i_ceph_flags |= CEPH_I_FLUSH;
+	if (!list_empty(&ci->i_cap_delay_list))
+		list_del_init(&ci->i_cap_delay_list);
+	list_add(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
+	spin_unlock(&mdsc->cap_delay_lock);
+}
+
+/*
+ * Cancel delayed work on cap.
+ *
+ * Caller must hold i_ceph_lock.
+ */
+static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
+			       struct ceph_inode_info *ci)
+{
+	dout("__cap_delay_cancel %p\n", &ci->vfs_inode);
+	if (list_empty(&ci->i_cap_delay_list))
+		return;
+	spin_lock(&mdsc->cap_delay_lock);
+	list_del_init(&ci->i_cap_delay_list);
+	spin_unlock(&mdsc->cap_delay_lock);
+}
+
+/*
+ * Common issue checks for add_cap, handle_cap_grant.
+ */
+static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
+			      unsigned issued)
+{
+	unsigned had = __ceph_caps_issued(ci, NULL);
+
+	/*
+	 * Each time we receive FILE_CACHE anew, we increment
+	 * i_rdcache_gen.
+	 */
+	if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
+	    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) {
+		ci->i_rdcache_gen++;
+	}
+
+	/*
+	 * if we are newly issued FILE_SHARED, mark dir not complete; we
+	 * don't know what happened to this directory while we didn't
+	 * have the cap.
+	 */
+	if ((issued & CEPH_CAP_FILE_SHARED) &&
+	    (had & CEPH_CAP_FILE_SHARED) == 0) {
+		ci->i_shared_gen++;
+		if (S_ISDIR(ci->vfs_inode.i_mode)) {
+			dout(" marking %p NOT complete\n", &ci->vfs_inode);
+			__ceph_dir_clear_complete(ci);
+		}
+	}
+}
+
+/*
+ * Add a capability under the given MDS session.
+ *
+ * Caller should hold session snap_rwsem (read) and s_mutex.
+ *
+ * @fmode is the open file mode, if we are opening a file, otherwise
+ * it is < 0.  (This is so we can atomically add the cap and add an
+ * open file reference to it.)
+ */
+int ceph_add_cap(struct inode *inode,
+		 struct ceph_mds_session *session, u64 cap_id,
+		 int fmode, unsigned issued, unsigned wanted,
+		 unsigned seq, unsigned mseq, u64 realmino, int flags,
+		 struct ceph_cap_reservation *caps_reservation)
+{
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_cap *new_cap = NULL;
+	struct ceph_cap *cap;
+	int mds = session->s_mds;
+	int actual_wanted;
+
+	dout("add_cap %p mds%d cap %llx %s seq %d\n", inode,
+	     session->s_mds, cap_id, ceph_cap_string(issued), seq);
+
+	/*
+	 * If we are opening the file, include file mode wanted bits
+	 * in wanted.
+	 */
+	if (fmode >= 0)
+		wanted |= ceph_caps_for_mode(fmode);
+
+retry:
+	spin_lock(&ci->i_ceph_lock);
+	cap = __get_cap_for_mds(ci, mds);
+	if (!cap) {
+		if (new_cap) {
+			cap = new_cap;
+			new_cap = NULL;
+		} else {
+			spin_unlock(&ci->i_ceph_lock);
+			new_cap = get_cap(mdsc, caps_reservation);
+			if (new_cap == NULL)
+				return -ENOMEM;
+			goto retry;
+		}
+
+		cap->issued = 0;
+		cap->implemented = 0;
+		cap->mds = mds;
+		cap->mds_wanted = 0;
+		cap->mseq = 0;
+
+		cap->ci = ci;
+		__insert_cap_node(ci, cap);
+
+		/* add to session cap list */
+		cap->session = session;
+		spin_lock(&session->s_cap_lock);
+		list_add_tail(&cap->session_caps, &session->s_caps);
+		session->s_nr_caps++;
+		spin_unlock(&session->s_cap_lock);
+	} else {
+		if (new_cap)
+			ceph_put_cap(mdsc, new_cap);
+
+		/*
+		 * auth mds of the inode changed. we received the cap export
+		 * message, but still haven't received the cap import message.
+		 * handle_cap_export() updated the new auth MDS' cap.
+		 *
+		 * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing
+		 * a message that was send before the cap import message. So
+		 * don't remove caps.
+		 */
+		if (ceph_seq_cmp(seq, cap->seq) <= 0) {
+			WARN_ON(cap != ci->i_auth_cap);
+			WARN_ON(cap->cap_id != cap_id);
+			seq = cap->seq;
+			mseq = cap->mseq;
+			issued |= cap->issued;
+			flags |= CEPH_CAP_FLAG_AUTH;
+		}
+	}
+
+	if (!ci->i_snap_realm) {
+		/*
+		 * add this inode to the appropriate snap realm
+		 */
+		struct ceph_snap_realm *realm = ceph_lookup_snap_realm(mdsc,
+							       realmino);
+		if (realm) {
+			ceph_get_snap_realm(mdsc, realm);
+			spin_lock(&realm->inodes_with_caps_lock);
+			ci->i_snap_realm = realm;
+			list_add(&ci->i_snap_realm_item,
+				 &realm->inodes_with_caps);
+			spin_unlock(&realm->inodes_with_caps_lock);
+		} else {
+			pr_err("ceph_add_cap: couldn't find snap realm %llx\n",
+			       realmino);
+			WARN_ON(!realm);
+		}
+	}
+
+	__check_cap_issue(ci, cap, issued);
+
+	/*
+	 * If we are issued caps we don't want, or the mds' wanted
+	 * value appears to be off, queue a check so we'll release
+	 * later and/or update the mds wanted value.
+	 */
+	actual_wanted = __ceph_caps_wanted(ci);
+	if ((wanted & ~actual_wanted) ||
+	    (issued & ~actual_wanted & CEPH_CAP_ANY_WR)) {
+		dout(" issued %s, mds wanted %s, actual %s, queueing\n",
+		     ceph_cap_string(issued), ceph_cap_string(wanted),
+		     ceph_cap_string(actual_wanted));
+		__cap_delay_requeue(mdsc, ci);
+	}
+
+	if (flags & CEPH_CAP_FLAG_AUTH) {
+		if (ci->i_auth_cap == NULL ||
+		    ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) {
+			ci->i_auth_cap = cap;
+			cap->mds_wanted = wanted;
+		}
+		ci->i_cap_exporting_issued = 0;
+	} else {
+		WARN_ON(ci->i_auth_cap == cap);
+	}
+
+	dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n",
+	     inode, ceph_vinop(inode), cap, ceph_cap_string(issued),
+	     ceph_cap_string(issued|cap->issued), seq, mds);
+	cap->cap_id = cap_id;
+	cap->issued = issued;
+	cap->implemented |= issued;
+	if (ceph_seq_cmp(mseq, cap->mseq) > 0)
+		cap->mds_wanted = wanted;
+	else
+		cap->mds_wanted |= wanted;
+	cap->seq = seq;
+	cap->issue_seq = seq;
+	cap->mseq = mseq;
+	cap->cap_gen = session->s_cap_gen;
+
+	if (fmode >= 0)
+		__ceph_get_fmode(ci, fmode);
+	spin_unlock(&ci->i_ceph_lock);
+	wake_up_all(&ci->i_cap_wq);
+	return 0;
+}
+
+/*
+ * Return true if cap has not timed out and belongs to the current
+ * generation of the MDS session (i.e. has not gone 'stale' due to
+ * us losing touch with the mds).
+ */
+static int __cap_is_valid(struct ceph_cap *cap)
+{
+	unsigned long ttl;
+	u32 gen;
+
+	spin_lock(&cap->session->s_gen_ttl_lock);
+	gen = cap->session->s_cap_gen;
+	ttl = cap->session->s_cap_ttl;
+	spin_unlock(&cap->session->s_gen_ttl_lock);
+
+	if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) {
+		dout("__cap_is_valid %p cap %p issued %s "
+		     "but STALE (gen %u vs %u)\n", &cap->ci->vfs_inode,
+		     cap, ceph_cap_string(cap->issued), cap->cap_gen, gen);
+		return 0;
+	}
+
+	return 1;
+}
+
+/*
+ * Return set of valid cap bits issued to us.  Note that caps time
+ * out, and may be invalidated in bulk if the client session times out
+ * and session->s_cap_gen is bumped.
+ */
+int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
+{
+	int have = ci->i_snap_caps | ci->i_cap_exporting_issued;
+	struct ceph_cap *cap;
+	struct rb_node *p;
+
+	if (implemented)
+		*implemented = 0;
+	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
+		cap = rb_entry(p, struct ceph_cap, ci_node);
+		if (!__cap_is_valid(cap))
+			continue;
+		dout("__ceph_caps_issued %p cap %p issued %s\n",
+		     &ci->vfs_inode, cap, ceph_cap_string(cap->issued));
+		have |= cap->issued;
+		if (implemented)
+			*implemented |= cap->implemented;
+	}
+	/*
+	 * exclude caps issued by non-auth MDS, but are been revoking
+	 * by the auth MDS. The non-auth MDS should be revoking/exporting
+	 * these caps, but the message is delayed.
+	 */
+	if (ci->i_auth_cap) {
+		cap = ci->i_auth_cap;
+		have &= ~cap->implemented | cap->issued;
+	}
+	return have;
+}
+
+/*
+ * Get cap bits issued by caps other than @ocap
+ */
+int __ceph_caps_issued_other(struct ceph_inode_info *ci, struct ceph_cap *ocap)
+{
+	int have = ci->i_snap_caps;
+	struct ceph_cap *cap;
+	struct rb_node *p;
+
+	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
+		cap = rb_entry(p, struct ceph_cap, ci_node);
+		if (cap == ocap)
+			continue;
+		if (!__cap_is_valid(cap))
+			continue;
+		have |= cap->issued;
+	}
+	return have;
+}
+
+/*
+ * Move a cap to the end of the LRU (oldest caps at list head, newest
+ * at list tail).
+ */
+static void __touch_cap(struct ceph_cap *cap)
+{
+	struct ceph_mds_session *s = cap->session;
+
+	spin_lock(&s->s_cap_lock);
+	if (s->s_cap_iterator == NULL) {
+		dout("__touch_cap %p cap %p mds%d\n", &cap->ci->vfs_inode, cap,
+		     s->s_mds);
+		list_move_tail(&cap->session_caps, &s->s_caps);
+	} else {
+		dout("__touch_cap %p cap %p mds%d NOP, iterating over caps\n",
+		     &cap->ci->vfs_inode, cap, s->s_mds);
+	}
+	spin_unlock(&s->s_cap_lock);
+}
+
+/*
+ * Check if we hold the given mask.  If so, move the cap(s) to the
+ * front of their respective LRUs.  (This is the preferred way for
+ * callers to check for caps they want.)
+ */
+int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
+{
+	struct ceph_cap *cap;
+	struct rb_node *p;
+	int have = ci->i_snap_caps;
+
+	if ((have & mask) == mask) {
+		dout("__ceph_caps_issued_mask %p snap issued %s"
+		     " (mask %s)\n", &ci->vfs_inode,
+		     ceph_cap_string(have),
+		     ceph_cap_string(mask));
+		return 1;
+	}
+
+	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
+		cap = rb_entry(p, struct ceph_cap, ci_node);
+		if (!__cap_is_valid(cap))
+			continue;
+		if ((cap->issued & mask) == mask) {
+			dout("__ceph_caps_issued_mask %p cap %p issued %s"
+			     " (mask %s)\n", &ci->vfs_inode, cap,
+			     ceph_cap_string(cap->issued),
+			     ceph_cap_string(mask));
+			if (touch)
+				__touch_cap(cap);
+			return 1;
+		}
+
+		/* does a combination of caps satisfy mask? */
+		have |= cap->issued;
+		if ((have & mask) == mask) {
+			dout("__ceph_caps_issued_mask %p combo issued %s"
+			     " (mask %s)\n", &ci->vfs_inode,
+			     ceph_cap_string(cap->issued),
+			     ceph_cap_string(mask));
+			if (touch) {
+				struct rb_node *q;
+
+				/* touch this + preceding caps */
+				__touch_cap(cap);
+				for (q = rb_first(&ci->i_caps); q != p;
+				     q = rb_next(q)) {
+					cap = rb_entry(q, struct ceph_cap,
+						       ci_node);
+					if (!__cap_is_valid(cap))
+						continue;
+					__touch_cap(cap);
+				}
+			}
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Return true if mask caps are currently being revoked by an MDS.
+ */
+int __ceph_caps_revoking_other(struct ceph_inode_info *ci,
+			       struct ceph_cap *ocap, int mask)
+{
+	struct ceph_cap *cap;
+	struct rb_node *p;
+
+	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
+		cap = rb_entry(p, struct ceph_cap, ci_node);
+		if (cap != ocap &&
+		    (cap->implemented & ~cap->issued & mask))
+			return 1;
+	}
+	return 0;
+}
+
+int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
+{
+	struct inode *inode = &ci->vfs_inode;
+	int ret;
+
+	spin_lock(&ci->i_ceph_lock);
+	ret = __ceph_caps_revoking_other(ci, NULL, mask);
+	spin_unlock(&ci->i_ceph_lock);
+	dout("ceph_caps_revoking %p %s = %d\n", inode,
+	     ceph_cap_string(mask), ret);
+	return ret;
+}
+
+int __ceph_caps_used(struct ceph_inode_info *ci)
+{
+	int used = 0;
+	if (ci->i_pin_ref)
+		used |= CEPH_CAP_PIN;
+	if (ci->i_rd_ref)
+		used |= CEPH_CAP_FILE_RD;
+	if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages)
+		used |= CEPH_CAP_FILE_CACHE;
+	if (ci->i_wr_ref)
+		used |= CEPH_CAP_FILE_WR;
+	if (ci->i_wb_ref || ci->i_wrbuffer_ref)
+		used |= CEPH_CAP_FILE_BUFFER;
+	return used;
+}
+
+/*
+ * wanted, by virtue of open file modes
+ */
+int __ceph_caps_file_wanted(struct ceph_inode_info *ci)
+{
+	int want = 0;
+	int mode;
+	for (mode = 0; mode < CEPH_FILE_MODE_NUM; mode++)
+		if (ci->i_nr_by_mode[mode])
+			want |= ceph_caps_for_mode(mode);
+	return want;
+}
+
+/*
+ * Return caps we have registered with the MDS(s) as 'wanted'.
+ */
+int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
+{
+	struct ceph_cap *cap;
+	struct rb_node *p;
+	int mds_wanted = 0;
+
+	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
+		cap = rb_entry(p, struct ceph_cap, ci_node);
+		if (!__cap_is_valid(cap))
+			continue;
+		if (cap == ci->i_auth_cap)
+			mds_wanted |= cap->mds_wanted;
+		else
+			mds_wanted |= (cap->mds_wanted & ~CEPH_CAP_ANY_FILE_WR);
+	}
+	return mds_wanted;
+}
+
+/*
+ * called under i_ceph_lock
+ */
+static int __ceph_is_any_caps(struct ceph_inode_info *ci)
+{
+	return !RB_EMPTY_ROOT(&ci->i_caps) || ci->i_cap_exporting_issued;
+}
+
+int ceph_is_any_caps(struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int ret;
+
+	spin_lock(&ci->i_ceph_lock);
+	ret = __ceph_is_any_caps(ci);
+	spin_unlock(&ci->i_ceph_lock);
+
+	return ret;
+}
+
+/*
+ * Remove a cap.  Take steps to deal with a racing iterate_session_caps.
+ *
+ * caller should hold i_ceph_lock.
+ * caller will not hold session s_mutex if called from destroy_inode.
+ */
+void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
+{
+	struct ceph_mds_session *session = cap->session;
+	struct ceph_inode_info *ci = cap->ci;
+	struct ceph_mds_client *mdsc =
+		ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+	int removed = 0;
+
+	dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
+
+	/* remove from session list */
+	spin_lock(&session->s_cap_lock);
+	/*
+	 * s_cap_reconnect is protected by s_cap_lock. no one changes
+	 * s_cap_gen while session is in the reconnect state.
+	 */
+	if (queue_release &&
+	    (!session->s_cap_reconnect ||
+	     cap->cap_gen == session->s_cap_gen))
+		__queue_cap_release(session, ci->i_vino.ino, cap->cap_id,
+				    cap->mseq, cap->issue_seq);
+
+	if (session->s_cap_iterator == cap) {
+		/* not yet, we are iterating over this very cap */
+		dout("__ceph_remove_cap  delaying %p removal from session %p\n",
+		     cap, cap->session);
+	} else {
+		list_del_init(&cap->session_caps);
+		session->s_nr_caps--;
+		cap->session = NULL;
+		removed = 1;
+	}
+	/* protect backpointer with s_cap_lock: see iterate_session_caps */
+	cap->ci = NULL;
+	spin_unlock(&session->s_cap_lock);
+
+	/* remove from inode list */
+	rb_erase(&cap->ci_node, &ci->i_caps);
+	if (ci->i_auth_cap == cap)
+		ci->i_auth_cap = NULL;
+
+	if (removed)
+		ceph_put_cap(mdsc, cap);
+
+	if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
+		struct ceph_snap_realm *realm = ci->i_snap_realm;
+		spin_lock(&realm->inodes_with_caps_lock);
+		list_del_init(&ci->i_snap_realm_item);
+		ci->i_snap_realm_counter++;
+		ci->i_snap_realm = NULL;
+		spin_unlock(&realm->inodes_with_caps_lock);
+		ceph_put_snap_realm(mdsc, realm);
+	}
+	if (!__ceph_is_any_real_caps(ci))
+		__cap_delay_cancel(mdsc, ci);
+}
+
+/*
+ * Build and send a cap message to the given MDS.
+ *
+ * Caller should be holding s_mutex.
+ */
+static int send_cap_msg(struct ceph_mds_session *session,
+			u64 ino, u64 cid, int op,
+			int caps, int wanted, int dirty,
+			u32 seq, u64 flush_tid, u32 issue_seq, u32 mseq,
+			u64 size, u64 max_size,
+			struct timespec *mtime, struct timespec *atime,
+			u64 time_warp_seq,
+			kuid_t uid, kgid_t gid, umode_t mode,
+			u64 xattr_version,
+			struct ceph_buffer *xattrs_buf,
+			u64 follows)
+{
+	struct ceph_mds_caps *fc;
+	struct ceph_msg *msg;
+
+	dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s"
+	     " seq %u/%u mseq %u follows %lld size %llu/%llu"
+	     " xattr_ver %llu xattr_len %d\n", ceph_cap_op_name(op),
+	     cid, ino, ceph_cap_string(caps), ceph_cap_string(wanted),
+	     ceph_cap_string(dirty),
+	     seq, issue_seq, mseq, follows, size, max_size,
+	     xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0);
+
+	msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS, false);
+	if (!msg)
+		return -ENOMEM;
+
+	msg->hdr.tid = cpu_to_le64(flush_tid);
+
+	fc = msg->front.iov_base;
+	memset(fc, 0, sizeof(*fc));
+
+	fc->cap_id = cpu_to_le64(cid);
+	fc->op = cpu_to_le32(op);
+	fc->seq = cpu_to_le32(seq);
+	fc->issue_seq = cpu_to_le32(issue_seq);
+	fc->migrate_seq = cpu_to_le32(mseq);
+	fc->caps = cpu_to_le32(caps);
+	fc->wanted = cpu_to_le32(wanted);
+	fc->dirty = cpu_to_le32(dirty);
+	fc->ino = cpu_to_le64(ino);
+	fc->snap_follows = cpu_to_le64(follows);
+
+	fc->size = cpu_to_le64(size);
+	fc->max_size = cpu_to_le64(max_size);
+	if (mtime)
+		ceph_encode_timespec(&fc->mtime, mtime);
+	if (atime)
+		ceph_encode_timespec(&fc->atime, atime);
+	fc->time_warp_seq = cpu_to_le32(time_warp_seq);
+
+	fc->uid = cpu_to_le32(from_kuid(&init_user_ns, uid));
+	fc->gid = cpu_to_le32(from_kgid(&init_user_ns, gid));
+	fc->mode = cpu_to_le32(mode);
+
+	fc->xattr_version = cpu_to_le64(xattr_version);
+	if (xattrs_buf) {
+		msg->middle = ceph_buffer_get(xattrs_buf);
+		fc->xattr_len = cpu_to_le32(xattrs_buf->vec.iov_len);
+		msg->hdr.middle_len = cpu_to_le32(xattrs_buf->vec.iov_len);
+	}
+
+	ceph_con_send(&session->s_con, msg);
+	return 0;
+}
+
+void __queue_cap_release(struct ceph_mds_session *session,
+			 u64 ino, u64 cap_id, u32 migrate_seq,
+			 u32 issue_seq)
+{
+	struct ceph_msg *msg;
+	struct ceph_mds_cap_release *head;
+	struct ceph_mds_cap_item *item;
+
+	BUG_ON(!session->s_num_cap_releases);
+	msg = list_first_entry(&session->s_cap_releases,
+			       struct ceph_msg, list_head);
+
+	dout(" adding %llx release to mds%d msg %p (%d left)\n",
+	     ino, session->s_mds, msg, session->s_num_cap_releases);
+
+	BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
+	head = msg->front.iov_base;
+	le32_add_cpu(&head->num, 1);
+	item = msg->front.iov_base + msg->front.iov_len;
+	item->ino = cpu_to_le64(ino);
+	item->cap_id = cpu_to_le64(cap_id);
+	item->migrate_seq = cpu_to_le32(migrate_seq);
+	item->seq = cpu_to_le32(issue_seq);
+
+	session->s_num_cap_releases--;
+
+	msg->front.iov_len += sizeof(*item);
+	if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
+		dout(" release msg %p full\n", msg);
+		list_move_tail(&msg->list_head, &session->s_cap_releases_done);
+	} else {
+		dout(" release msg %p at %d/%d (%d)\n", msg,
+		     (int)le32_to_cpu(head->num),
+		     (int)CEPH_CAPS_PER_RELEASE,
+		     (int)msg->front.iov_len);
+	}
+}
+
+/*
+ * Queue cap releases when an inode is dropped from our cache.  Since
+ * inode is about to be destroyed, there is no need for i_ceph_lock.
+ */
+void ceph_queue_caps_release(struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct rb_node *p;
+
+	p = rb_first(&ci->i_caps);
+	while (p) {
+		struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
+		p = rb_next(p);
+		__ceph_remove_cap(cap, true);
+	}
+}
+
+/*
+ * Send a cap msg on the given inode.  Update our caps state, then
+ * drop i_ceph_lock and send the message.
+ *
+ * Make note of max_size reported/requested from mds, revoked caps
+ * that have now been implemented.
+ *
+ * Make half-hearted attempt ot to invalidate page cache if we are
+ * dropping RDCACHE.  Note that this will leave behind locked pages
+ * that we'll then need to deal with elsewhere.
+ *
+ * Return non-zero if delayed release, or we experienced an error
+ * such that the caller should requeue + retry later.
+ *
+ * called with i_ceph_lock, then drops it.
+ * caller should hold snap_rwsem (read), s_mutex.
+ */
+static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
+		      int op, int used, int want, int retain, int flushing,
+		      unsigned *pflush_tid)
+	__releases(cap->ci->i_ceph_lock)
+{
+	struct ceph_inode_info *ci = cap->ci;
+	struct inode *inode = &ci->vfs_inode;
+	u64 cap_id = cap->cap_id;
+	int held, revoking, dropping, keep;
+	u64 seq, issue_seq, mseq, time_warp_seq, follows;
+	u64 size, max_size;
+	struct timespec mtime, atime;
+	int wake = 0;
+	umode_t mode;
+	kuid_t uid;
+	kgid_t gid;
+	struct ceph_mds_session *session;
+	u64 xattr_version = 0;
+	struct ceph_buffer *xattr_blob = NULL;
+	int delayed = 0;
+	u64 flush_tid = 0;
+	int i;
+	int ret;
+
+	held = cap->issued | cap->implemented;
+	revoking = cap->implemented & ~cap->issued;
+	retain &= ~revoking;
+	dropping = cap->issued & ~retain;
+
+	dout("__send_cap %p cap %p session %p %s -> %s (revoking %s)\n",
+	     inode, cap, cap->session,
+	     ceph_cap_string(held), ceph_cap_string(held & retain),
+	     ceph_cap_string(revoking));
+	BUG_ON((retain & CEPH_CAP_PIN) == 0);
+
+	session = cap->session;
+
+	/* don't release wanted unless we've waited a bit. */
+	if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0 &&
+	    time_before(jiffies, ci->i_hold_caps_min)) {
+		dout(" delaying issued %s -> %s, wanted %s -> %s on send\n",
+		     ceph_cap_string(cap->issued),
+		     ceph_cap_string(cap->issued & retain),
+		     ceph_cap_string(cap->mds_wanted),
+		     ceph_cap_string(want));
+		want |= cap->mds_wanted;
+		retain |= cap->issued;
+		delayed = 1;
+	}
+	ci->i_ceph_flags &= ~(CEPH_I_NODELAY | CEPH_I_FLUSH);
+
+	cap->issued &= retain;  /* drop bits we don't want */
+	if (cap->implemented & ~cap->issued) {
+		/*
+		 * Wake up any waiters on wanted -> needed transition.
+		 * This is due to the weird transition from buffered
+		 * to sync IO... we need to flush dirty pages _before_
+		 * allowing sync writes to avoid reordering.
+		 */
+		wake = 1;
+	}
+	cap->implemented &= cap->issued | used;
+	cap->mds_wanted = want;
+
+	if (flushing) {
+		/*
+		 * assign a tid for flush operations so we can avoid
+		 * flush1 -> dirty1 -> flush2 -> flushack1 -> mark
+		 * clean type races.  track latest tid for every bit
+		 * so we can handle flush AxFw, flush Fw, and have the
+		 * first ack clean Ax.
+		 */
+		flush_tid = ++ci->i_cap_flush_last_tid;
+		if (pflush_tid)
+			*pflush_tid = flush_tid;
+		dout(" cap_flush_tid %d\n", (int)flush_tid);
+		for (i = 0; i < CEPH_CAP_BITS; i++)
+			if (flushing & (1 << i))
+				ci->i_cap_flush_tid[i] = flush_tid;
+
+		follows = ci->i_head_snapc->seq;
+	} else {
+		follows = 0;
+	}
+
+	keep = cap->implemented;
+	seq = cap->seq;
+	issue_seq = cap->issue_seq;
+	mseq = cap->mseq;
+	size = inode->i_size;
+	ci->i_reported_size = size;
+	max_size = ci->i_wanted_max_size;
+	ci->i_requested_max_size = max_size;
+	mtime = inode->i_mtime;
+	atime = inode->i_atime;
+	time_warp_seq = ci->i_time_warp_seq;
+	uid = inode->i_uid;
+	gid = inode->i_gid;
+	mode = inode->i_mode;
+
+	if (flushing & CEPH_CAP_XATTR_EXCL) {
+		__ceph_build_xattrs_blob(ci);
+		xattr_blob = ci->i_xattrs.blob;
+		xattr_version = ci->i_xattrs.version;
+	}
+
+	spin_unlock(&ci->i_ceph_lock);
+
+	ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
+		op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
+		size, max_size, &mtime, &atime, time_warp_seq,
+		uid, gid, mode, xattr_version, xattr_blob,
+		follows);
+	if (ret < 0) {
+		dout("error sending cap msg, must requeue %p\n", inode);
+		delayed = 1;
+	}
+
+	if (wake)
+		wake_up_all(&ci->i_cap_wq);
+
+	return delayed;
+}
+
+/*
+ * When a snapshot is taken, clients accumulate dirty metadata on
+ * inodes with capabilities in ceph_cap_snaps to describe the file
+ * state at the time the snapshot was taken.  This must be flushed
+ * asynchronously back to the MDS once sync writes complete and dirty
+ * data is written out.
+ *
+ * Unless @again is true, skip cap_snaps that were already sent to
+ * the MDS (i.e., during this session).
+ *
+ * Called under i_ceph_lock.  Takes s_mutex as needed.
+ */
+void __ceph_flush_snaps(struct ceph_inode_info *ci,
+			struct ceph_mds_session **psession,
+			int again)
+		__releases(ci->i_ceph_lock)
+		__acquires(ci->i_ceph_lock)
+{
+	struct inode *inode = &ci->vfs_inode;
+	int mds;
+	struct ceph_cap_snap *capsnap;
+	u32 mseq;
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct ceph_mds_session *session = NULL; /* if session != NULL, we hold
+						    session->s_mutex */
+	u64 next_follows = 0;  /* keep track of how far we've gotten through the
+			     i_cap_snaps list, and skip these entries next time
+			     around to avoid an infinite loop */
+
+	if (psession)
+		session = *psession;
+
+	dout("__flush_snaps %p\n", inode);
+retry:
+	list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
+		/* avoid an infiniute loop after retry */
+		if (capsnap->follows < next_follows)
+			continue;
+		/*
+		 * we need to wait for sync writes to complete and for dirty
+		 * pages to be written out.
+		 */
+		if (capsnap->dirty_pages || capsnap->writing)
+			break;
+
+		/*
+		 * if cap writeback already occurred, we should have dropped
+		 * the capsnap in ceph_put_wrbuffer_cap_refs.
+		 */
+		BUG_ON(capsnap->dirty == 0);
+
+		/* pick mds, take s_mutex */
+		if (ci->i_auth_cap == NULL) {
+			dout("no auth cap (migrating?), doing nothing\n");
+			goto out;
+		}
+
+		/* only flush each capsnap once */
+		if (!again && !list_empty(&capsnap->flushing_item)) {
+			dout("already flushed %p, skipping\n", capsnap);
+			continue;
+		}
+
+		mds = ci->i_auth_cap->session->s_mds;
+		mseq = ci->i_auth_cap->mseq;
+
+		if (session && session->s_mds != mds) {
+			dout("oops, wrong session %p mutex\n", session);
+			mutex_unlock(&session->s_mutex);
+			ceph_put_mds_session(session);
+			session = NULL;
+		}
+		if (!session) {
+			spin_unlock(&ci->i_ceph_lock);
+			mutex_lock(&mdsc->mutex);
+			session = __ceph_lookup_mds_session(mdsc, mds);
+			mutex_unlock(&mdsc->mutex);
+			if (session) {
+				dout("inverting session/ino locks on %p\n",
+				     session);
+				mutex_lock(&session->s_mutex);
+			}
+			/*
+			 * if session == NULL, we raced against a cap
+			 * deletion or migration.  retry, and we'll
+			 * get a better @mds value next time.
+			 */
+			spin_lock(&ci->i_ceph_lock);
+			goto retry;
+		}
+
+		capsnap->flush_tid = ++ci->i_cap_flush_last_tid;
+		atomic_inc(&capsnap->nref);
+		if (!list_empty(&capsnap->flushing_item))
+			list_del_init(&capsnap->flushing_item);
+		list_add_tail(&capsnap->flushing_item,
+			      &session->s_cap_snaps_flushing);
+		spin_unlock(&ci->i_ceph_lock);
+
+		dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
+		     inode, capsnap, capsnap->follows, capsnap->flush_tid);
+		send_cap_msg(session, ceph_vino(inode).ino, 0,
+			     CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0,
+			     capsnap->dirty, 0, capsnap->flush_tid, 0, mseq,
+			     capsnap->size, 0,
+			     &capsnap->mtime, &capsnap->atime,
+			     capsnap->time_warp_seq,
+			     capsnap->uid, capsnap->gid, capsnap->mode,
+			     capsnap->xattr_version, capsnap->xattr_blob,
+			     capsnap->follows);
+
+		next_follows = capsnap->follows + 1;
+		ceph_put_cap_snap(capsnap);
+
+		spin_lock(&ci->i_ceph_lock);
+		goto retry;
+	}
+
+	/* we flushed them all; remove this inode from the queue */
+	spin_lock(&mdsc->snap_flush_lock);
+	list_del_init(&ci->i_snap_flush_item);
+	spin_unlock(&mdsc->snap_flush_lock);
+
+out:
+	if (psession)
+		*psession = session;
+	else if (session) {
+		mutex_unlock(&session->s_mutex);
+		ceph_put_mds_session(session);
+	}
+}
+
+static void ceph_flush_snaps(struct ceph_inode_info *ci)
+{
+	spin_lock(&ci->i_ceph_lock);
+	__ceph_flush_snaps(ci, NULL, 0);
+	spin_unlock(&ci->i_ceph_lock);
+}
+
+/*
+ * Mark caps dirty.  If inode is newly dirty, return the dirty flags.
+ * Caller is then responsible for calling __mark_inode_dirty with the
+ * returned flags value.
+ */
+int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
+{
+	struct ceph_mds_client *mdsc =
+		ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+	struct inode *inode = &ci->vfs_inode;
+	int was = ci->i_dirty_caps;
+	int dirty = 0;
+
+	dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode,
+	     ceph_cap_string(mask), ceph_cap_string(was),
+	     ceph_cap_string(was | mask));
+	ci->i_dirty_caps |= mask;
+	if (was == 0) {
+		if (!ci->i_head_snapc)
+			ci->i_head_snapc = ceph_get_snap_context(
+				ci->i_snap_realm->cached_context);
+		dout(" inode %p now dirty snapc %p auth cap %p\n",
+		     &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap);
+		WARN_ON(!ci->i_auth_cap);
+		BUG_ON(!list_empty(&ci->i_dirty_item));
+		spin_lock(&mdsc->cap_dirty_lock);
+		list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
+		spin_unlock(&mdsc->cap_dirty_lock);
+		if (ci->i_flushing_caps == 0) {
+			ihold(inode);
+			dirty |= I_DIRTY_SYNC;
+		}
+	}
+	BUG_ON(list_empty(&ci->i_dirty_item));
+	if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
+	    (mask & CEPH_CAP_FILE_BUFFER))
+		dirty |= I_DIRTY_DATASYNC;
+	__cap_delay_requeue(mdsc, ci);
+	return dirty;
+}
+
+/*
+ * Add dirty inode to the flushing list.  Assigned a seq number so we
+ * can wait for caps to flush without starving.
+ *
+ * Called under i_ceph_lock.
+ */
+static int __mark_caps_flushing(struct inode *inode,
+				 struct ceph_mds_session *session)
+{
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int flushing;
+
+	BUG_ON(ci->i_dirty_caps == 0);
+	BUG_ON(list_empty(&ci->i_dirty_item));
+
+	flushing = ci->i_dirty_caps;
+	dout("__mark_caps_flushing flushing %s, flushing_caps %s -> %s\n",
+	     ceph_cap_string(flushing),
+	     ceph_cap_string(ci->i_flushing_caps),
+	     ceph_cap_string(ci->i_flushing_caps | flushing));
+	ci->i_flushing_caps |= flushing;
+	ci->i_dirty_caps = 0;
+	dout(" inode %p now !dirty\n", inode);
+
+	spin_lock(&mdsc->cap_dirty_lock);
+	list_del_init(&ci->i_dirty_item);
+
+	ci->i_cap_flush_seq = ++mdsc->cap_flush_seq;
+	if (list_empty(&ci->i_flushing_item)) {
+		list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing);
+		mdsc->num_cap_flushing++;
+		dout(" inode %p now flushing seq %lld\n", inode,
+		     ci->i_cap_flush_seq);
+	} else {
+		list_move_tail(&ci->i_flushing_item, &session->s_cap_flushing);
+		dout(" inode %p now flushing (more) seq %lld\n", inode,
+		     ci->i_cap_flush_seq);
+	}
+	spin_unlock(&mdsc->cap_dirty_lock);
+
+	return flushing;
+}
+
+/*
+ * try to invalidate mapping pages without blocking.
+ */
+static int try_nonblocking_invalidate(struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	u32 invalidating_gen = ci->i_rdcache_gen;
+
+	spin_unlock(&ci->i_ceph_lock);
+	invalidate_mapping_pages(&inode->i_data, 0, -1);
+	spin_lock(&ci->i_ceph_lock);
+
+	if (inode->i_data.nrpages == 0 &&
+	    invalidating_gen == ci->i_rdcache_gen) {
+		/* success. */
+		dout("try_nonblocking_invalidate %p success\n", inode);
+		/* save any racing async invalidate some trouble */
+		ci->i_rdcache_revoking = ci->i_rdcache_gen - 1;
+		return 0;
+	}
+	dout("try_nonblocking_invalidate %p failed\n", inode);
+	return -1;
+}
+
+/*
+ * Swiss army knife function to examine currently used and wanted
+ * versus held caps.  Release, flush, ack revoked caps to mds as
+ * appropriate.
+ *
+ *  CHECK_CAPS_NODELAY - caller is delayed work and we should not delay
+ *    cap release further.
+ *  CHECK_CAPS_AUTHONLY - we should only check the auth cap
+ *  CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
+ *    further delay.
+ */
+void ceph_check_caps(struct ceph_inode_info *ci, int flags,
+		     struct ceph_mds_session *session)
+{
+	struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct inode *inode = &ci->vfs_inode;
+	struct ceph_cap *cap;
+	int file_wanted, used, cap_used;
+	int took_snap_rwsem = 0;             /* true if mdsc->snap_rwsem held */
+	int issued, implemented, want, retain, revoking, flushing = 0;
+	int mds = -1;   /* keep track of how far we've gone through i_caps list
+			   to avoid an infinite loop on retry */
+	struct rb_node *p;
+	int tried_invalidate = 0;
+	int delayed = 0, sent = 0, force_requeue = 0, num;
+	int queue_invalidate = 0;
+	int is_delayed = flags & CHECK_CAPS_NODELAY;
+
+	/* if we are unmounting, flush any unused caps immediately. */
+	if (mdsc->stopping)
+		is_delayed = 1;
+
+	spin_lock(&ci->i_ceph_lock);
+
+	if (ci->i_ceph_flags & CEPH_I_FLUSH)
+		flags |= CHECK_CAPS_FLUSH;
+
+	/* flush snaps first time around only */
+	if (!list_empty(&ci->i_cap_snaps))
+		__ceph_flush_snaps(ci, &session, 0);
+	goto retry_locked;
+retry:
+	spin_lock(&ci->i_ceph_lock);
+retry_locked:
+	file_wanted = __ceph_caps_file_wanted(ci);
+	used = __ceph_caps_used(ci);
+	want = file_wanted | used;
+	issued = __ceph_caps_issued(ci, &implemented);
+	revoking = implemented & ~issued;
+
+	retain = want | CEPH_CAP_PIN;
+	if (!mdsc->stopping && inode->i_nlink > 0) {
+		if (want) {
+			retain |= CEPH_CAP_ANY;       /* be greedy */
+		} else {
+			retain |= CEPH_CAP_ANY_SHARED;
+			/*
+			 * keep RD only if we didn't have the file open RW,
+			 * because then the mds would revoke it anyway to
+			 * journal max_size=0.
+			 */
+			if (ci->i_max_size == 0)
+				retain |= CEPH_CAP_ANY_RD;
+		}
+	}
+
+	dout("check_caps %p file_want %s used %s dirty %s flushing %s"
+	     " issued %s revoking %s retain %s %s%s%s\n", inode,
+	     ceph_cap_string(file_wanted),
+	     ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
+	     ceph_cap_string(ci->i_flushing_caps),
+	     ceph_cap_string(issued), ceph_cap_string(revoking),
+	     ceph_cap_string(retain),
+	     (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "",
+	     (flags & CHECK_CAPS_NODELAY) ? " NODELAY" : "",
+	     (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "");
+
+	/*
+	 * If we no longer need to hold onto old our caps, and we may
+	 * have cached pages, but don't want them, then try to invalidate.
+	 * If we fail, it's because pages are locked.... try again later.
+	 */
+	if ((!is_delayed || mdsc->stopping) &&
+	    ci->i_wrbuffer_ref == 0 &&               /* no dirty pages... */
+	    inode->i_data.nrpages &&                 /* have cached pages */
+	    (file_wanted == 0 ||                     /* no open files */
+	     (revoking & (CEPH_CAP_FILE_CACHE|
+			  CEPH_CAP_FILE_LAZYIO))) && /*  or revoking cache */
+	    !tried_invalidate) {
+		dout("check_caps trying to invalidate on %p\n", inode);
+		if (try_nonblocking_invalidate(inode) < 0) {
+			if (revoking & (CEPH_CAP_FILE_CACHE|
+					CEPH_CAP_FILE_LAZYIO)) {
+				dout("check_caps queuing invalidate\n");
+				queue_invalidate = 1;
+				ci->i_rdcache_revoking = ci->i_rdcache_gen;
+			} else {
+				dout("check_caps failed to invalidate pages\n");
+				/* we failed to invalidate pages.  check these
+				   caps again later. */
+				force_requeue = 1;
+				__cap_set_timeouts(mdsc, ci);
+			}
+		}
+		tried_invalidate = 1;
+		goto retry_locked;
+	}
+
+	num = 0;
+	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
+		cap = rb_entry(p, struct ceph_cap, ci_node);
+		num++;
+
+		/* avoid looping forever */
+		if (mds >= cap->mds ||
+		    ((flags & CHECK_CAPS_AUTHONLY) && cap != ci->i_auth_cap))
+			continue;
+
+		/* NOTE: no side-effects allowed, until we take s_mutex */
+
+		cap_used = used;
+		if (ci->i_auth_cap && cap != ci->i_auth_cap)
+			cap_used &= ~ci->i_auth_cap->issued;
+
+		revoking = cap->implemented & ~cap->issued;
+		dout(" mds%d cap %p used %s issued %s implemented %s revoking %s\n",
+		     cap->mds, cap, ceph_cap_string(cap->issued),
+		     ceph_cap_string(cap_used),
+		     ceph_cap_string(cap->implemented),
+		     ceph_cap_string(revoking));
+
+		if (cap == ci->i_auth_cap &&
+		    (cap->issued & CEPH_CAP_FILE_WR)) {
+			/* request larger max_size from MDS? */
+			if (ci->i_wanted_max_size > ci->i_max_size &&
+			    ci->i_wanted_max_size > ci->i_requested_max_size) {
+				dout("requesting new max_size\n");
+				goto ack;
+			}
+
+			/* approaching file_max? */
+			if ((inode->i_size << 1) >= ci->i_max_size &&
+			    (ci->i_reported_size << 1) < ci->i_max_size) {
+				dout("i_size approaching max_size\n");
+				goto ack;
+			}
+		}
+		/* flush anything dirty? */
+		if (cap == ci->i_auth_cap && (flags & CHECK_CAPS_FLUSH) &&
+		    ci->i_dirty_caps) {
+			dout("flushing dirty caps\n");
+			goto ack;
+		}
+
+		/* completed revocation? going down and there are no caps? */
+		if (revoking && (revoking & cap_used) == 0) {
+			dout("completed revocation of %s\n",
+			     ceph_cap_string(cap->implemented & ~cap->issued));
+			goto ack;
+		}
+
+		/* want more caps from mds? */
+		if (want & ~(cap->mds_wanted | cap->issued))
+			goto ack;
+
+		/* things we might delay */
+		if ((cap->issued & ~retain) == 0 &&
+		    cap->mds_wanted == want)
+			continue;     /* nope, all good */
+
+		if (is_delayed)
+			goto ack;
+
+		/* delay? */
+		if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0 &&
+		    time_before(jiffies, ci->i_hold_caps_max)) {
+			dout(" delaying issued %s -> %s, wanted %s -> %s\n",
+			     ceph_cap_string(cap->issued),
+			     ceph_cap_string(cap->issued & retain),
+			     ceph_cap_string(cap->mds_wanted),
+			     ceph_cap_string(want));
+			delayed++;
+			continue;
+		}
+
+ack:
+		if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
+			dout(" skipping %p I_NOFLUSH set\n", inode);
+			continue;
+		}
+
+		if (session && session != cap->session) {
+			dout("oops, wrong session %p mutex\n", session);
+			mutex_unlock(&session->s_mutex);
+			session = NULL;
+		}
+		if (!session) {
+			session = cap->session;
+			if (mutex_trylock(&session->s_mutex) == 0) {
+				dout("inverting session/ino locks on %p\n",
+				     session);
+				spin_unlock(&ci->i_ceph_lock);
+				if (took_snap_rwsem) {
+					up_read(&mdsc->snap_rwsem);
+					took_snap_rwsem = 0;
+				}
+				mutex_lock(&session->s_mutex);
+				goto retry;
+			}
+		}
+		/* take snap_rwsem after session mutex */
+		if (!took_snap_rwsem) {
+			if (down_read_trylock(&mdsc->snap_rwsem) == 0) {
+				dout("inverting snap/in locks on %p\n",
+				     inode);
+				spin_unlock(&ci->i_ceph_lock);
+				down_read(&mdsc->snap_rwsem);
+				took_snap_rwsem = 1;
+				goto retry;
+			}
+			took_snap_rwsem = 1;
+		}
+
+		if (cap == ci->i_auth_cap && ci->i_dirty_caps)
+			flushing = __mark_caps_flushing(inode, session);
+		else
+			flushing = 0;
+
+		mds = cap->mds;  /* remember mds, so we don't repeat */
+		sent++;
+
+		/* __send_cap drops i_ceph_lock */
+		delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used,
+				      want, retain, flushing, NULL);
+		goto retry; /* retake i_ceph_lock and restart our cap scan. */
+	}
+
+	/*
+	 * Reschedule delayed caps release if we delayed anything,
+	 * otherwise cancel.
+	 */
+	if (delayed && is_delayed)
+		force_requeue = 1;   /* __send_cap delayed release; requeue */
+	if (!delayed && !is_delayed)
+		__cap_delay_cancel(mdsc, ci);
+	else if (!is_delayed || force_requeue)
+		__cap_delay_requeue(mdsc, ci);
+
+	spin_unlock(&ci->i_ceph_lock);
+
+	if (queue_invalidate)
+		ceph_queue_invalidate(inode);
+
+	if (session)
+		mutex_unlock(&session->s_mutex);
+	if (took_snap_rwsem)
+		up_read(&mdsc->snap_rwsem);
+}
+
+/*
+ * Try to flush dirty caps back to the auth mds.
+ */
+static int try_flush_caps(struct inode *inode, unsigned *flush_tid)
+{
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int flushing = 0;
+	struct ceph_mds_session *session = NULL;
+
+retry:
+	spin_lock(&ci->i_ceph_lock);
+	if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
+		dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode);
+		goto out;
+	}
+	if (ci->i_dirty_caps && ci->i_auth_cap) {
+		struct ceph_cap *cap = ci->i_auth_cap;
+		int used = __ceph_caps_used(ci);
+		int want = __ceph_caps_wanted(ci);
+		int delayed;
+
+		if (!session || session != cap->session) {
+			spin_unlock(&ci->i_ceph_lock);
+			if (session)
+				mutex_unlock(&session->s_mutex);
+			session = cap->session;
+			mutex_lock(&session->s_mutex);
+			goto retry;
+		}
+		if (cap->session->s_state < CEPH_MDS_SESSION_OPEN)
+			goto out;
+
+		flushing = __mark_caps_flushing(inode, session);
+
+		/* __send_cap drops i_ceph_lock */
+		delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want,
+				     cap->issued | cap->implemented, flushing,
+				     flush_tid);
+		if (!delayed)
+			goto out_unlocked;
+
+		spin_lock(&ci->i_ceph_lock);
+		__cap_delay_requeue(mdsc, ci);
+	}
+out:
+	spin_unlock(&ci->i_ceph_lock);
+out_unlocked:
+	if (session)
+		mutex_unlock(&session->s_mutex);
+	return flushing;
+}
+
+/*
+ * Return true if we've flushed caps through the given flush_tid.
+ */
+static int caps_are_flushed(struct inode *inode, unsigned tid)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int i, ret = 1;
+
+	spin_lock(&ci->i_ceph_lock);
+	for (i = 0; i < CEPH_CAP_BITS; i++)
+		if ((ci->i_flushing_caps & (1 << i)) &&
+		    ci->i_cap_flush_tid[i] <= tid) {
+			/* still flushing this bit */
+			ret = 0;
+			break;
+		}
+	spin_unlock(&ci->i_ceph_lock);
+	return ret;
+}
+
+/*
+ * Wait on any unsafe replies for the given inode.  First wait on the
+ * newest request, and make that the upper bound.  Then, if there are
+ * more requests, keep waiting on the oldest as long as it is still older
+ * than the original request.
+ */
+static void sync_write_wait(struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct list_head *head = &ci->i_unsafe_writes;
+	struct ceph_osd_request *req;
+	u64 last_tid;
+
+	spin_lock(&ci->i_unsafe_lock);
+	if (list_empty(head))
+		goto out;
+
+	/* set upper bound as _last_ entry in chain */
+	req = list_entry(head->prev, struct ceph_osd_request,
+			 r_unsafe_item);
+	last_tid = req->r_tid;
+
+	do {
+		ceph_osdc_get_request(req);
+		spin_unlock(&ci->i_unsafe_lock);
+		dout("sync_write_wait on tid %llu (until %llu)\n",
+		     req->r_tid, last_tid);
+		wait_for_completion(&req->r_safe_completion);
+		spin_lock(&ci->i_unsafe_lock);
+		ceph_osdc_put_request(req);
+
+		/*
+		 * from here on look at first entry in chain, since we
+		 * only want to wait for anything older than last_tid
+		 */
+		if (list_empty(head))
+			break;
+		req = list_entry(head->next, struct ceph_osd_request,
+				 r_unsafe_item);
+	} while (req->r_tid < last_tid);
+out:
+	spin_unlock(&ci->i_unsafe_lock);
+}
+
+int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+	struct inode *inode = file->f_mapping->host;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	unsigned flush_tid;
+	int ret;
+	int dirty;
+
+	dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
+	sync_write_wait(inode);
+
+	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (ret < 0)
+		return ret;
+	mutex_lock(&inode->i_mutex);
+
+	dirty = try_flush_caps(inode, &flush_tid);
+	dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
+
+	/*
+	 * only wait on non-file metadata writeback (the mds
+	 * can recover size and mtime, so we don't need to
+	 * wait for that)
+	 */
+	if (!datasync && (dirty & ~CEPH_CAP_ANY_FILE_WR)) {
+		dout("fsync waiting for flush_tid %u\n", flush_tid);
+		ret = wait_event_interruptible(ci->i_cap_wq,
+				       caps_are_flushed(inode, flush_tid));
+	}
+
+	dout("fsync %p%s done\n", inode, datasync ? " datasync" : "");
+	mutex_unlock(&inode->i_mutex);
+	return ret;
+}
+
+/*
+ * Flush any dirty caps back to the mds.  If we aren't asked to wait,
+ * queue inode for flush but don't do so immediately, because we can
+ * get by with fewer MDS messages if we wait for data writeback to
+ * complete first.
+ */
+int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	unsigned flush_tid;
+	int err = 0;
+	int dirty;
+	int wait = wbc->sync_mode == WB_SYNC_ALL;
+
+	dout("write_inode %p wait=%d\n", inode, wait);
+	if (wait) {
+		dirty = try_flush_caps(inode, &flush_tid);
+		if (dirty)
+			err = wait_event_interruptible(ci->i_cap_wq,
+				       caps_are_flushed(inode, flush_tid));
+	} else {
+		struct ceph_mds_client *mdsc =
+			ceph_sb_to_client(inode->i_sb)->mdsc;
+
+		spin_lock(&ci->i_ceph_lock);
+		if (__ceph_caps_dirty(ci))
+			__cap_delay_requeue_front(mdsc, ci);
+		spin_unlock(&ci->i_ceph_lock);
+	}
+	return err;
+}
+
+/*
+ * After a recovering MDS goes active, we need to resend any caps
+ * we were flushing.
+ *
+ * Caller holds session->s_mutex.
+ */
+static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
+				   struct ceph_mds_session *session)
+{
+	struct ceph_cap_snap *capsnap;
+
+	dout("kick_flushing_capsnaps mds%d\n", session->s_mds);
+	list_for_each_entry(capsnap, &session->s_cap_snaps_flushing,
+			    flushing_item) {
+		struct ceph_inode_info *ci = capsnap->ci;
+		struct inode *inode = &ci->vfs_inode;
+		struct ceph_cap *cap;
+
+		spin_lock(&ci->i_ceph_lock);
+		cap = ci->i_auth_cap;
+		if (cap && cap->session == session) {
+			dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
+			     cap, capsnap);
+			__ceph_flush_snaps(ci, &session, 1);
+		} else {
+			pr_err("%p auth cap %p not mds%d ???\n", inode,
+			       cap, session->s_mds);
+		}
+		spin_unlock(&ci->i_ceph_lock);
+	}
+}
+
+void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
+			     struct ceph_mds_session *session)
+{
+	struct ceph_inode_info *ci;
+
+	kick_flushing_capsnaps(mdsc, session);
+
+	dout("kick_flushing_caps mds%d\n", session->s_mds);
+	list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
+		struct inode *inode = &ci->vfs_inode;
+		struct ceph_cap *cap;
+		int delayed = 0;
+
+		spin_lock(&ci->i_ceph_lock);
+		cap = ci->i_auth_cap;
+		if (cap && cap->session == session) {
+			dout("kick_flushing_caps %p cap %p %s\n", inode,
+			     cap, ceph_cap_string(ci->i_flushing_caps));
+			delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
+					     __ceph_caps_used(ci),
+					     __ceph_caps_wanted(ci),
+					     cap->issued | cap->implemented,
+					     ci->i_flushing_caps, NULL);
+			if (delayed) {
+				spin_lock(&ci->i_ceph_lock);
+				__cap_delay_requeue(mdsc, ci);
+				spin_unlock(&ci->i_ceph_lock);
+			}
+		} else {
+			pr_err("%p auth cap %p not mds%d ???\n", inode,
+			       cap, session->s_mds);
+			spin_unlock(&ci->i_ceph_lock);
+		}
+	}
+}
+
+static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
+				     struct ceph_mds_session *session,
+				     struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_cap *cap;
+	int delayed = 0;
+
+	spin_lock(&ci->i_ceph_lock);
+	cap = ci->i_auth_cap;
+	dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
+	     ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
+
+	__ceph_flush_snaps(ci, &session, 1);
+
+	if (ci->i_flushing_caps) {
+		spin_lock(&mdsc->cap_dirty_lock);
+		list_move_tail(&ci->i_flushing_item,
+			       &cap->session->s_cap_flushing);
+		spin_unlock(&mdsc->cap_dirty_lock);
+
+		delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
+				     __ceph_caps_used(ci),
+				     __ceph_caps_wanted(ci),
+				     cap->issued | cap->implemented,
+				     ci->i_flushing_caps, NULL);
+		if (delayed) {
+			spin_lock(&ci->i_ceph_lock);
+			__cap_delay_requeue(mdsc, ci);
+			spin_unlock(&ci->i_ceph_lock);
+		}
+	} else {
+		spin_unlock(&ci->i_ceph_lock);
+	}
+}
+
+
+/*
+ * Take references to capabilities we hold, so that we don't release
+ * them to the MDS prematurely.
+ *
+ * Protected by i_ceph_lock.
+ */
+static void __take_cap_refs(struct ceph_inode_info *ci, int got)
+{
+	if (got & CEPH_CAP_PIN)
+		ci->i_pin_ref++;
+	if (got & CEPH_CAP_FILE_RD)
+		ci->i_rd_ref++;
+	if (got & CEPH_CAP_FILE_CACHE)
+		ci->i_rdcache_ref++;
+	if (got & CEPH_CAP_FILE_WR)
+		ci->i_wr_ref++;
+	if (got & CEPH_CAP_FILE_BUFFER) {
+		if (ci->i_wb_ref == 0)
+			ihold(&ci->vfs_inode);
+		ci->i_wb_ref++;
+		dout("__take_cap_refs %p wb %d -> %d (?)\n",
+		     &ci->vfs_inode, ci->i_wb_ref-1, ci->i_wb_ref);
+	}
+}
+
+/*
+ * Try to grab cap references.  Specify those refs we @want, and the
+ * minimal set we @need.  Also include the larger offset we are writing
+ * to (when applicable), and check against max_size here as well.
+ * Note that caller is responsible for ensuring max_size increases are
+ * requested from the MDS.
+ */
+static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
+			    int *got, loff_t endoff, int *check_max, int *err)
+{
+	struct inode *inode = &ci->vfs_inode;
+	int ret = 0;
+	int have, implemented;
+	int file_wanted;
+
+	dout("get_cap_refs %p need %s want %s\n", inode,
+	     ceph_cap_string(need), ceph_cap_string(want));
+	spin_lock(&ci->i_ceph_lock);
+
+	/* make sure file is actually open */
+	file_wanted = __ceph_caps_file_wanted(ci);
+	if ((file_wanted & need) == 0) {
+		dout("try_get_cap_refs need %s file_wanted %s, EBADF\n",
+		     ceph_cap_string(need), ceph_cap_string(file_wanted));
+		*err = -EBADF;
+		ret = 1;
+		goto out;
+	}
+
+	/* finish pending truncate */
+	while (ci->i_truncate_pending) {
+		spin_unlock(&ci->i_ceph_lock);
+		__ceph_do_pending_vmtruncate(inode);
+		spin_lock(&ci->i_ceph_lock);
+	}
+
+	have = __ceph_caps_issued(ci, &implemented);
+
+	if (have & need & CEPH_CAP_FILE_WR) {
+		if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) {
+			dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
+			     inode, endoff, ci->i_max_size);
+			if (endoff > ci->i_requested_max_size) {
+				*check_max = 1;
+				ret = 1;
+			}
+			goto out;
+		}
+		/*
+		 * If a sync write is in progress, we must wait, so that we
+		 * can get a final snapshot value for size+mtime.
+		 */
+		if (__ceph_have_pending_cap_snap(ci)) {
+			dout("get_cap_refs %p cap_snap_pending\n", inode);
+			goto out;
+		}
+	}
+
+	if ((have & need) == need) {
+		/*
+		 * Look at (implemented & ~have & not) so that we keep waiting
+		 * on transition from wanted -> needed caps.  This is needed
+		 * for WRBUFFER|WR -> WR to avoid a new WR sync write from
+		 * going before a prior buffered writeback happens.
+		 */
+		int not = want & ~(have & need);
+		int revoking = implemented & ~have;
+		dout("get_cap_refs %p have %s but not %s (revoking %s)\n",
+		     inode, ceph_cap_string(have), ceph_cap_string(not),
+		     ceph_cap_string(revoking));
+		if ((revoking & not) == 0) {
+			*got = need | (have & want);
+			__take_cap_refs(ci, *got);
+			ret = 1;
+		}
+	} else {
+		dout("get_cap_refs %p have %s needed %s\n", inode,
+		     ceph_cap_string(have), ceph_cap_string(need));
+	}
+out:
+	spin_unlock(&ci->i_ceph_lock);
+	dout("get_cap_refs %p ret %d got %s\n", inode,
+	     ret, ceph_cap_string(*got));
+	return ret;
+}
+
+/*
+ * Check the offset we are writing up to against our current
+ * max_size.  If necessary, tell the MDS we want to write to
+ * a larger offset.
+ */
+static void check_max_size(struct inode *inode, loff_t endoff)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int check = 0;
+
+	/* do we need to explicitly request a larger max_size? */
+	spin_lock(&ci->i_ceph_lock);
+	if (endoff >= ci->i_max_size && endoff > ci->i_wanted_max_size) {
+		dout("write %p at large endoff %llu, req max_size\n",
+		     inode, endoff);
+		ci->i_wanted_max_size = endoff;
+	}
+	/* duplicate ceph_check_caps()'s logic */
+	if (ci->i_auth_cap &&
+	    (ci->i_auth_cap->issued & CEPH_CAP_FILE_WR) &&
+	    ci->i_wanted_max_size > ci->i_max_size &&
+	    ci->i_wanted_max_size > ci->i_requested_max_size)
+		check = 1;
+	spin_unlock(&ci->i_ceph_lock);
+	if (check)
+		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
+}
+
+/*
+ * Wait for caps, and take cap references.  If we can't get a WR cap
+ * due to a small max_size, make sure we check_max_size (and possibly
+ * ask the mds) so we don't get hung up indefinitely.
+ */
+int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, int *got,
+		  loff_t endoff)
+{
+	int check_max, ret, err;
+
+retry:
+	if (endoff > 0)
+		check_max_size(&ci->vfs_inode, endoff);
+	check_max = 0;
+	err = 0;
+	ret = wait_event_interruptible(ci->i_cap_wq,
+				       try_get_cap_refs(ci, need, want,
+							got, endoff,
+							&check_max, &err));
+	if (err)
+		ret = err;
+	if (check_max)
+		goto retry;
+	return ret;
+}
+
+/*
+ * Take cap refs.  Caller must already know we hold at least one ref
+ * on the caps in question or we don't know this is safe.
+ */
+void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps)
+{
+	spin_lock(&ci->i_ceph_lock);
+	__take_cap_refs(ci, caps);
+	spin_unlock(&ci->i_ceph_lock);
+}
+
+/*
+ * Release cap refs.
+ *
+ * If we released the last ref on any given cap, call ceph_check_caps
+ * to release (or schedule a release).
+ *
+ * If we are releasing a WR cap (from a sync write), finalize any affected
+ * cap_snap, and wake up any waiters.
+ */
+void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
+{
+	struct inode *inode = &ci->vfs_inode;
+	int last = 0, put = 0, flushsnaps = 0, wake = 0;
+	struct ceph_cap_snap *capsnap;
+
+	spin_lock(&ci->i_ceph_lock);
+	if (had & CEPH_CAP_PIN)
+		--ci->i_pin_ref;
+	if (had & CEPH_CAP_FILE_RD)
+		if (--ci->i_rd_ref == 0)
+			last++;
+	if (had & CEPH_CAP_FILE_CACHE)
+		if (--ci->i_rdcache_ref == 0)
+			last++;
+	if (had & CEPH_CAP_FILE_BUFFER) {
+		if (--ci->i_wb_ref == 0) {
+			last++;
+			put++;
+		}
+		dout("put_cap_refs %p wb %d -> %d (?)\n",
+		     inode, ci->i_wb_ref+1, ci->i_wb_ref);
+	}
+	if (had & CEPH_CAP_FILE_WR)
+		if (--ci->i_wr_ref == 0) {
+			last++;
+			if (!list_empty(&ci->i_cap_snaps)) {
+				capsnap = list_first_entry(&ci->i_cap_snaps,
+						     struct ceph_cap_snap,
+						     ci_item);
+				if (capsnap->writing) {
+					capsnap->writing = 0;
+					flushsnaps =
+						__ceph_finish_cap_snap(ci,
+								       capsnap);
+					wake = 1;
+				}
+			}
+		}
+	spin_unlock(&ci->i_ceph_lock);
+
+	dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
+	     last ? " last" : "", put ? " put" : "");
+
+	if (last && !flushsnaps)
+		ceph_check_caps(ci, 0, NULL);
+	else if (flushsnaps)
+		ceph_flush_snaps(ci);
+	if (wake)
+		wake_up_all(&ci->i_cap_wq);
+	if (put)
+		iput(inode);
+}
+
+/*
+ * Release @nr WRBUFFER refs on dirty pages for the given @snapc snap
+ * context.  Adjust per-snap dirty page accounting as appropriate.
+ * Once all dirty data for a cap_snap is flushed, flush snapped file
+ * metadata back to the MDS.  If we dropped the last ref, call
+ * ceph_check_caps.
+ */
+void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
+				struct ceph_snap_context *snapc)
+{
+	struct inode *inode = &ci->vfs_inode;
+	int last = 0;
+	int complete_capsnap = 0;
+	int drop_capsnap = 0;
+	int found = 0;
+	struct ceph_cap_snap *capsnap = NULL;
+
+	spin_lock(&ci->i_ceph_lock);
+	ci->i_wrbuffer_ref -= nr;
+	last = !ci->i_wrbuffer_ref;
+
+	if (ci->i_head_snapc == snapc) {
+		ci->i_wrbuffer_ref_head -= nr;
+		if (ci->i_wrbuffer_ref_head == 0 &&
+		    ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) {
+			BUG_ON(!ci->i_head_snapc);
+			ceph_put_snap_context(ci->i_head_snapc);
+			ci->i_head_snapc = NULL;
+		}
+		dout("put_wrbuffer_cap_refs on %p head %d/%d -> %d/%d %s\n",
+		     inode,
+		     ci->i_wrbuffer_ref+nr, ci->i_wrbuffer_ref_head+nr,
+		     ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
+		     last ? " LAST" : "");
+	} else {
+		list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
+			if (capsnap->context == snapc) {
+				found = 1;
+				break;
+			}
+		}
+		BUG_ON(!found);
+		capsnap->dirty_pages -= nr;
+		if (capsnap->dirty_pages == 0) {
+			complete_capsnap = 1;
+			if (capsnap->dirty == 0)
+				/* cap writeback completed before we created
+				 * the cap_snap; no FLUSHSNAP is needed */
+				drop_capsnap = 1;
+		}
+		dout("put_wrbuffer_cap_refs on %p cap_snap %p "
+		     " snap %lld %d/%d -> %d/%d %s%s%s\n",
+		     inode, capsnap, capsnap->context->seq,
+		     ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr,
+		     ci->i_wrbuffer_ref, capsnap->dirty_pages,
+		     last ? " (wrbuffer last)" : "",
+		     complete_capsnap ? " (complete capsnap)" : "",
+		     drop_capsnap ? " (drop capsnap)" : "");
+		if (drop_capsnap) {
+			ceph_put_snap_context(capsnap->context);
+			list_del(&capsnap->ci_item);
+			list_del(&capsnap->flushing_item);
+			ceph_put_cap_snap(capsnap);
+		}
+	}
+
+	spin_unlock(&ci->i_ceph_lock);
+
+	if (last) {
+		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
+		iput(inode);
+	} else if (complete_capsnap) {
+		ceph_flush_snaps(ci);
+		wake_up_all(&ci->i_cap_wq);
+	}
+	if (drop_capsnap)
+		iput(inode);
+}
+
+/*
+ * Invalidate unlinked inode's aliases, so we can drop the inode ASAP.
+ */
+static void invalidate_aliases(struct inode *inode)
+{
+	struct dentry *dn, *prev = NULL;
+
+	dout("invalidate_aliases inode %p\n", inode);
+	d_prune_aliases(inode);
+	/*
+	 * For non-directory inode, d_find_alias() only returns
+	 * hashed dentry. After calling d_invalidate(), the
+	 * dentry becomes unhashed.
+	 *
+	 * For directory inode, d_find_alias() can return
+	 * unhashed dentry. But directory inode should have
+	 * one alias at most.
+	 */
+	while ((dn = d_find_alias(inode))) {
+		if (dn == prev) {
+			dput(dn);
+			break;
+		}
+		d_invalidate(dn);
+		if (prev)
+			dput(prev);
+		prev = dn;
+	}
+	if (prev)
+		dput(prev);
+}
+
+/*
+ * Handle a cap GRANT message from the MDS.  (Note that a GRANT may
+ * actually be a revocation if it specifies a smaller cap set.)
+ *
+ * caller holds s_mutex and i_ceph_lock, we drop both.
+ *
+ * return value:
+ *  0 - ok
+ *  1 - check_caps on auth cap only (writeback)
+ *  2 - check_caps (ack revoke)
+ */
+static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
+			     struct ceph_mds_session *session,
+			     struct ceph_cap *cap,
+			     struct ceph_buffer *xattr_buf)
+		__releases(ci->i_ceph_lock)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int mds = session->s_mds;
+	int seq = le32_to_cpu(grant->seq);
+	int newcaps = le32_to_cpu(grant->caps);
+	int issued, implemented, used, wanted, dirty;
+	u64 size = le64_to_cpu(grant->size);
+	u64 max_size = le64_to_cpu(grant->max_size);
+	struct timespec mtime, atime, ctime;
+	int check_caps = 0;
+	int wake = 0;
+	int writeback = 0;
+	int queue_invalidate = 0;
+	int deleted_inode = 0;
+	int queue_revalidate = 0;
+
+	dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
+	     inode, cap, mds, seq, ceph_cap_string(newcaps));
+	dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
+		inode->i_size);
+
+
+	/*
+	 * auth mds of the inode changed. we received the cap export message,
+	 * but still haven't received the cap import message. handle_cap_export
+	 * updated the new auth MDS' cap.
+	 *
+	 * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing a message
+	 * that was sent before the cap import message. So don't remove caps.
+	 */
+	if (ceph_seq_cmp(seq, cap->seq) <= 0) {
+		WARN_ON(cap != ci->i_auth_cap);
+		WARN_ON(cap->cap_id != le64_to_cpu(grant->cap_id));
+		seq = cap->seq;
+		newcaps |= cap->issued;
+	}
+
+	/*
+	 * If CACHE is being revoked, and we have no dirty buffers,
+	 * try to invalidate (once).  (If there are dirty buffers, we
+	 * will invalidate _after_ writeback.)
+	 */
+	if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) &&
+	    (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
+	    !ci->i_wrbuffer_ref) {
+		if (try_nonblocking_invalidate(inode)) {
+			/* there were locked pages.. invalidate later
+			   in a separate thread. */
+			if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
+				queue_invalidate = 1;
+				ci->i_rdcache_revoking = ci->i_rdcache_gen;
+			}
+		}
+
+		ceph_fscache_invalidate(inode);
+	}
+
+	/* side effects now are allowed */
+
+	issued = __ceph_caps_issued(ci, &implemented);
+	issued |= implemented | __ceph_caps_dirty(ci);
+
+	cap->cap_gen = session->s_cap_gen;
+	cap->seq = seq;
+
+	__check_cap_issue(ci, cap, newcaps);
+
+	if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
+		inode->i_mode = le32_to_cpu(grant->mode);
+		inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid));
+		inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid));
+		dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
+		     from_kuid(&init_user_ns, inode->i_uid),
+		     from_kgid(&init_user_ns, inode->i_gid));
+	}
+
+	if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
+		set_nlink(inode, le32_to_cpu(grant->nlink));
+		if (inode->i_nlink == 0 &&
+		    (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
+			deleted_inode = 1;
+	}
+
+	if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) {
+		int len = le32_to_cpu(grant->xattr_len);
+		u64 version = le64_to_cpu(grant->xattr_version);
+
+		if (version > ci->i_xattrs.version) {
+			dout(" got new xattrs v%llu on %p len %d\n",
+			     version, inode, len);
+			if (ci->i_xattrs.blob)
+				ceph_buffer_put(ci->i_xattrs.blob);
+			ci->i_xattrs.blob = ceph_buffer_get(xattr_buf);
+			ci->i_xattrs.version = version;
+			ceph_forget_all_cached_acls(inode);
+		}
+	}
+
+	/* Do we need to revalidate our fscache cookie. Don't bother on the
+	 * first cache cap as we already validate at cookie creation time. */
+	if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1)
+		queue_revalidate = 1;
+
+	/* size/ctime/mtime/atime? */
+	ceph_fill_file_size(inode, issued,
+			    le32_to_cpu(grant->truncate_seq),
+			    le64_to_cpu(grant->truncate_size), size);
+	ceph_decode_timespec(&mtime, &grant->mtime);
+	ceph_decode_timespec(&atime, &grant->atime);
+	ceph_decode_timespec(&ctime, &grant->ctime);
+	ceph_fill_file_time(inode, issued,
+			    le32_to_cpu(grant->time_warp_seq), &ctime, &mtime,
+			    &atime);
+
+
+	/* file layout may have changed */
+	ci->i_layout = grant->layout;
+
+	/* max size increase? */
+	if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
+		dout("max_size %lld -> %llu\n", ci->i_max_size, max_size);
+		ci->i_max_size = max_size;
+		if (max_size >= ci->i_wanted_max_size) {
+			ci->i_wanted_max_size = 0;  /* reset */
+			ci->i_requested_max_size = 0;
+		}
+		wake = 1;
+	}
+
+	/* check cap bits */
+	wanted = __ceph_caps_wanted(ci);
+	used = __ceph_caps_used(ci);
+	dirty = __ceph_caps_dirty(ci);
+	dout(" my wanted = %s, used = %s, dirty %s\n",
+	     ceph_cap_string(wanted),
+	     ceph_cap_string(used),
+	     ceph_cap_string(dirty));
+	if (wanted != le32_to_cpu(grant->wanted)) {
+		dout("mds wanted %s -> %s\n",
+		     ceph_cap_string(le32_to_cpu(grant->wanted)),
+		     ceph_cap_string(wanted));
+		/* imported cap may not have correct mds_wanted */
+		if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT)
+			check_caps = 1;
+	}
+
+	/* revocation, grant, or no-op? */
+	if (cap->issued & ~newcaps) {
+		int revoking = cap->issued & ~newcaps;
+
+		dout("revocation: %s -> %s (revoking %s)\n",
+		     ceph_cap_string(cap->issued),
+		     ceph_cap_string(newcaps),
+		     ceph_cap_string(revoking));
+		if (revoking & used & CEPH_CAP_FILE_BUFFER)
+			writeback = 1;  /* initiate writeback; will delay ack */
+		else if (revoking == CEPH_CAP_FILE_CACHE &&
+			 (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
+			 queue_invalidate)
+			; /* do nothing yet, invalidation will be queued */
+		else if (cap == ci->i_auth_cap)
+			check_caps = 1; /* check auth cap only */
+		else
+			check_caps = 2; /* check all caps */
+		cap->issued = newcaps;
+		cap->implemented |= newcaps;
+	} else if (cap->issued == newcaps) {
+		dout("caps unchanged: %s -> %s\n",
+		     ceph_cap_string(cap->issued), ceph_cap_string(newcaps));
+	} else {
+		dout("grant: %s -> %s\n", ceph_cap_string(cap->issued),
+		     ceph_cap_string(newcaps));
+		/* non-auth MDS is revoking the newly grant caps ? */
+		if (cap == ci->i_auth_cap &&
+		    __ceph_caps_revoking_other(ci, cap, newcaps))
+		    check_caps = 2;
+
+		cap->issued = newcaps;
+		cap->implemented |= newcaps; /* add bits only, to
+					      * avoid stepping on a
+					      * pending revocation */
+		wake = 1;
+	}
+	BUG_ON(cap->issued & ~cap->implemented);
+
+	spin_unlock(&ci->i_ceph_lock);
+
+	if (writeback)
+		/*
+		 * queue inode for writeback: we can't actually call
+		 * filemap_write_and_wait, etc. from message handler
+		 * context.
+		 */
+		ceph_queue_writeback(inode);
+	if (queue_invalidate)
+		ceph_queue_invalidate(inode);
+	if (deleted_inode)
+		invalidate_aliases(inode);
+	if (queue_revalidate)
+		ceph_queue_revalidate(inode);
+	if (wake)
+		wake_up_all(&ci->i_cap_wq);
+
+	if (check_caps == 1)
+		ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
+				session);
+	else if (check_caps == 2)
+		ceph_check_caps(ci, CHECK_CAPS_NODELAY, session);
+	else
+		mutex_unlock(&session->s_mutex);
+}
+
+/*
+ * Handle FLUSH_ACK from MDS, indicating that metadata we sent to the
+ * MDS has been safely committed.
+ */
+static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
+				 struct ceph_mds_caps *m,
+				 struct ceph_mds_session *session,
+				 struct ceph_cap *cap)
+	__releases(ci->i_ceph_lock)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	unsigned seq = le32_to_cpu(m->seq);
+	int dirty = le32_to_cpu(m->dirty);
+	int cleaned = 0;
+	int drop = 0;
+	int i;
+
+	for (i = 0; i < CEPH_CAP_BITS; i++)
+		if ((dirty & (1 << i)) &&
+		    flush_tid == ci->i_cap_flush_tid[i])
+			cleaned |= 1 << i;
+
+	dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s,"
+	     " flushing %s -> %s\n",
+	     inode, session->s_mds, seq, ceph_cap_string(dirty),
+	     ceph_cap_string(cleaned), ceph_cap_string(ci->i_flushing_caps),
+	     ceph_cap_string(ci->i_flushing_caps & ~cleaned));
+
+	if (ci->i_flushing_caps == (ci->i_flushing_caps & ~cleaned))
+		goto out;
+
+	ci->i_flushing_caps &= ~cleaned;
+
+	spin_lock(&mdsc->cap_dirty_lock);
+	if (ci->i_flushing_caps == 0) {
+		list_del_init(&ci->i_flushing_item);
+		if (!list_empty(&session->s_cap_flushing))
+			dout(" mds%d still flushing cap on %p\n",
+			     session->s_mds,
+			     &list_entry(session->s_cap_flushing.next,
+					 struct ceph_inode_info,
+					 i_flushing_item)->vfs_inode);
+		mdsc->num_cap_flushing--;
+		wake_up_all(&mdsc->cap_flushing_wq);
+		dout(" inode %p now !flushing\n", inode);
+
+		if (ci->i_dirty_caps == 0) {
+			dout(" inode %p now clean\n", inode);
+			BUG_ON(!list_empty(&ci->i_dirty_item));
+			drop = 1;
+			if (ci->i_wrbuffer_ref_head == 0) {
+				BUG_ON(!ci->i_head_snapc);
+				ceph_put_snap_context(ci->i_head_snapc);
+				ci->i_head_snapc = NULL;
+			}
+		} else {
+			BUG_ON(list_empty(&ci->i_dirty_item));
+		}
+	}
+	spin_unlock(&mdsc->cap_dirty_lock);
+	wake_up_all(&ci->i_cap_wq);
+
+out:
+	spin_unlock(&ci->i_ceph_lock);
+	if (drop)
+		iput(inode);
+}
+
+/*
+ * Handle FLUSHSNAP_ACK.  MDS has flushed snap data to disk and we can
+ * throw away our cap_snap.
+ *
+ * Caller hold s_mutex.
+ */
+static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
+				     struct ceph_mds_caps *m,
+				     struct ceph_mds_session *session)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	u64 follows = le64_to_cpu(m->snap_follows);
+	struct ceph_cap_snap *capsnap;
+	int drop = 0;
+
+	dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n",
+	     inode, ci, session->s_mds, follows);
+
+	spin_lock(&ci->i_ceph_lock);
+	list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
+		if (capsnap->follows == follows) {
+			if (capsnap->flush_tid != flush_tid) {
+				dout(" cap_snap %p follows %lld tid %lld !="
+				     " %lld\n", capsnap, follows,
+				     flush_tid, capsnap->flush_tid);
+				break;
+			}
+			WARN_ON(capsnap->dirty_pages || capsnap->writing);
+			dout(" removing %p cap_snap %p follows %lld\n",
+			     inode, capsnap, follows);
+			ceph_put_snap_context(capsnap->context);
+			list_del(&capsnap->ci_item);
+			list_del(&capsnap->flushing_item);
+			ceph_put_cap_snap(capsnap);
+			drop = 1;
+			break;
+		} else {
+			dout(" skipping cap_snap %p follows %lld\n",
+			     capsnap, capsnap->follows);
+		}
+	}
+	spin_unlock(&ci->i_ceph_lock);
+	if (drop)
+		iput(inode);
+}
+
+/*
+ * Handle TRUNC from MDS, indicating file truncation.
+ *
+ * caller hold s_mutex.
+ */
+static void handle_cap_trunc(struct inode *inode,
+			     struct ceph_mds_caps *trunc,
+			     struct ceph_mds_session *session)
+	__releases(ci->i_ceph_lock)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int mds = session->s_mds;
+	int seq = le32_to_cpu(trunc->seq);
+	u32 truncate_seq = le32_to_cpu(trunc->truncate_seq);
+	u64 truncate_size = le64_to_cpu(trunc->truncate_size);
+	u64 size = le64_to_cpu(trunc->size);
+	int implemented = 0;
+	int dirty = __ceph_caps_dirty(ci);
+	int issued = __ceph_caps_issued(ceph_inode(inode), &implemented);
+	int queue_trunc = 0;
+
+	issued |= implemented | dirty;
+
+	dout("handle_cap_trunc inode %p mds%d seq %d to %lld seq %d\n",
+	     inode, mds, seq, truncate_size, truncate_seq);
+	queue_trunc = ceph_fill_file_size(inode, issued,
+					  truncate_seq, truncate_size, size);
+	spin_unlock(&ci->i_ceph_lock);
+
+	if (queue_trunc) {
+		ceph_queue_vmtruncate(inode);
+		ceph_fscache_invalidate(inode);
+	}
+}
+
+/*
+ * Handle EXPORT from MDS.  Cap is being migrated _from_ this mds to a
+ * different one.  If we are the most recent migration we've seen (as
+ * indicated by mseq), make note of the migrating cap bits for the
+ * duration (until we see the corresponding IMPORT).
+ *
+ * caller holds s_mutex
+ */
+static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
+			      struct ceph_mds_cap_peer *ph,
+			      struct ceph_mds_session *session)
+{
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct ceph_mds_session *tsession = NULL;
+	struct ceph_cap *cap, *tcap;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	u64 t_cap_id;
+	unsigned mseq = le32_to_cpu(ex->migrate_seq);
+	unsigned t_seq, t_mseq;
+	int target, issued;
+	int mds = session->s_mds;
+
+	if (ph) {
+		t_cap_id = le64_to_cpu(ph->cap_id);
+		t_seq = le32_to_cpu(ph->seq);
+		t_mseq = le32_to_cpu(ph->mseq);
+		target = le32_to_cpu(ph->mds);
+	} else {
+		t_cap_id = t_seq = t_mseq = 0;
+		target = -1;
+	}
+
+	dout("handle_cap_export inode %p ci %p mds%d mseq %d target %d\n",
+	     inode, ci, mds, mseq, target);
+retry:
+	spin_lock(&ci->i_ceph_lock);
+	cap = __get_cap_for_mds(ci, mds);
+	if (!cap)
+		goto out_unlock;
+
+	if (target < 0) {
+		__ceph_remove_cap(cap, false);
+		goto out_unlock;
+	}
+
+	/*
+	 * now we know we haven't received the cap import message yet
+	 * because the exported cap still exist.
+	 */
+
+	issued = cap->issued;
+	WARN_ON(issued != cap->implemented);
+
+	tcap = __get_cap_for_mds(ci, target);
+	if (tcap) {
+		/* already have caps from the target */
+		if (tcap->cap_id != t_cap_id ||
+		    ceph_seq_cmp(tcap->seq, t_seq) < 0) {
+			dout(" updating import cap %p mds%d\n", tcap, target);
+			tcap->cap_id = t_cap_id;
+			tcap->seq = t_seq - 1;
+			tcap->issue_seq = t_seq - 1;
+			tcap->mseq = t_mseq;
+			tcap->issued |= issued;
+			tcap->implemented |= issued;
+			if (cap == ci->i_auth_cap)
+				ci->i_auth_cap = tcap;
+			if (ci->i_flushing_caps && ci->i_auth_cap == tcap) {
+				spin_lock(&mdsc->cap_dirty_lock);
+				list_move_tail(&ci->i_flushing_item,
+					       &tcap->session->s_cap_flushing);
+				spin_unlock(&mdsc->cap_dirty_lock);
+			}
+		}
+		__ceph_remove_cap(cap, false);
+		goto out_unlock;
+	}
+
+	if (tsession) {
+		int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
+		spin_unlock(&ci->i_ceph_lock);
+		/* add placeholder for the export tagert */
+		ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0,
+			     t_seq - 1, t_mseq, (u64)-1, flag, NULL);
+		goto retry;
+	}
+
+	spin_unlock(&ci->i_ceph_lock);
+	mutex_unlock(&session->s_mutex);
+
+	/* open target session */
+	tsession = ceph_mdsc_open_export_target_session(mdsc, target);
+	if (!IS_ERR(tsession)) {
+		if (mds > target) {
+			mutex_lock(&session->s_mutex);
+			mutex_lock_nested(&tsession->s_mutex,
+					  SINGLE_DEPTH_NESTING);
+		} else {
+			mutex_lock(&tsession->s_mutex);
+			mutex_lock_nested(&session->s_mutex,
+					  SINGLE_DEPTH_NESTING);
+		}
+		ceph_add_cap_releases(mdsc, tsession);
+	} else {
+		WARN_ON(1);
+		tsession = NULL;
+		target = -1;
+	}
+	goto retry;
+
+out_unlock:
+	spin_unlock(&ci->i_ceph_lock);
+	mutex_unlock(&session->s_mutex);
+	if (tsession) {
+		mutex_unlock(&tsession->s_mutex);
+		ceph_put_mds_session(tsession);
+	}
+}
+
+/*
+ * Handle cap IMPORT.  If there are temp bits from an older EXPORT,
+ * clean them up.
+ *
+ * caller holds s_mutex.
+ */
+static void handle_cap_import(struct ceph_mds_client *mdsc,
+			      struct inode *inode, struct ceph_mds_caps *im,
+			      struct ceph_mds_cap_peer *ph,
+			      struct ceph_mds_session *session,
+			      void *snaptrace, int snaptrace_len)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_cap *cap;
+	int mds = session->s_mds;
+	unsigned issued = le32_to_cpu(im->caps);
+	unsigned wanted = le32_to_cpu(im->wanted);
+	unsigned seq = le32_to_cpu(im->seq);
+	unsigned mseq = le32_to_cpu(im->migrate_seq);
+	u64 realmino = le64_to_cpu(im->realm);
+	u64 cap_id = le64_to_cpu(im->cap_id);
+	u64 p_cap_id;
+	int peer;
+
+	if (ph) {
+		p_cap_id = le64_to_cpu(ph->cap_id);
+		peer = le32_to_cpu(ph->mds);
+	} else {
+		p_cap_id = 0;
+		peer = -1;
+	}
+
+	dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n",
+	     inode, ci, mds, mseq, peer);
+
+	spin_lock(&ci->i_ceph_lock);
+	cap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL;
+	if (cap && cap->cap_id == p_cap_id) {
+		dout(" remove export cap %p mds%d flags %d\n",
+		     cap, peer, ph->flags);
+		if ((ph->flags & CEPH_CAP_FLAG_AUTH) &&
+		    (cap->seq != le32_to_cpu(ph->seq) ||
+		     cap->mseq != le32_to_cpu(ph->mseq))) {
+			pr_err("handle_cap_import: mismatched seq/mseq: "
+			       "ino (%llx.%llx) mds%d seq %d mseq %d "
+			       "importer mds%d has peer seq %d mseq %d\n",
+			       ceph_vinop(inode), peer, cap->seq,
+			       cap->mseq, mds, le32_to_cpu(ph->seq),
+			       le32_to_cpu(ph->mseq));
+		}
+		ci->i_cap_exporting_issued = cap->issued;
+		__ceph_remove_cap(cap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
+	}
+
+	/* make sure we re-request max_size, if necessary */
+	ci->i_wanted_max_size = 0;
+	ci->i_requested_max_size = 0;
+	spin_unlock(&ci->i_ceph_lock);
+
+	down_write(&mdsc->snap_rwsem);
+	ceph_update_snap_trace(mdsc, snaptrace, snaptrace+snaptrace_len,
+			       false);
+	downgrade_write(&mdsc->snap_rwsem);
+	ceph_add_cap(inode, session, cap_id, -1,
+		     issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH,
+		     NULL /* no caps context */);
+	kick_flushing_inode_caps(mdsc, session, inode);
+	up_read(&mdsc->snap_rwsem);
+
+}
+
+/*
+ * Handle a caps message from the MDS.
+ *
+ * Identify the appropriate session, inode, and call the right handler
+ * based on the cap op.
+ */
+void ceph_handle_caps(struct ceph_mds_session *session,
+		      struct ceph_msg *msg)
+{
+	struct ceph_mds_client *mdsc = session->s_mdsc;
+	struct super_block *sb = mdsc->fsc->sb;
+	struct inode *inode;
+	struct ceph_inode_info *ci;
+	struct ceph_cap *cap;
+	struct ceph_mds_caps *h;
+	struct ceph_mds_cap_peer *peer = NULL;
+	int mds = session->s_mds;
+	int op;
+	u32 seq, mseq;
+	struct ceph_vino vino;
+	u64 cap_id;
+	u64 size, max_size;
+	u64 tid;
+	void *snaptrace;
+	size_t snaptrace_len;
+	void *flock;
+	void *end;
+	u32 flock_len;
+
+	dout("handle_caps from mds%d\n", mds);
+
+	/* decode */
+	end = msg->front.iov_base + msg->front.iov_len;
+	tid = le64_to_cpu(msg->hdr.tid);
+	if (msg->front.iov_len < sizeof(*h))
+		goto bad;
+	h = msg->front.iov_base;
+	op = le32_to_cpu(h->op);
+	vino.ino = le64_to_cpu(h->ino);
+	vino.snap = CEPH_NOSNAP;
+	cap_id = le64_to_cpu(h->cap_id);
+	seq = le32_to_cpu(h->seq);
+	mseq = le32_to_cpu(h->migrate_seq);
+	size = le64_to_cpu(h->size);
+	max_size = le64_to_cpu(h->max_size);
+
+	snaptrace = h + 1;
+	snaptrace_len = le32_to_cpu(h->snap_trace_len);
+
+	if (le16_to_cpu(msg->hdr.version) >= 2) {
+		void *p = snaptrace + snaptrace_len;
+		ceph_decode_32_safe(&p, end, flock_len, bad);
+		if (p + flock_len > end)
+			goto bad;
+		flock = p;
+	} else {
+		flock = NULL;
+		flock_len = 0;
+	}
+
+	if (le16_to_cpu(msg->hdr.version) >= 3) {
+		if (op == CEPH_CAP_OP_IMPORT) {
+			void *p = flock + flock_len;
+			if (p + sizeof(*peer) > end)
+				goto bad;
+			peer = p;
+		} else if (op == CEPH_CAP_OP_EXPORT) {
+			/* recorded in unused fields */
+			peer = (void *)&h->size;
+		}
+	}
+
+	mutex_lock(&session->s_mutex);
+	session->s_seq++;
+	dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
+	     (unsigned)seq);
+
+	if (op == CEPH_CAP_OP_IMPORT)
+		ceph_add_cap_releases(mdsc, session);
+
+	/* lookup ino */
+	inode = ceph_find_inode(sb, vino);
+	ci = ceph_inode(inode);
+	dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
+	     vino.snap, inode);
+	if (!inode) {
+		dout(" i don't have ino %llx\n", vino.ino);
+
+		if (op == CEPH_CAP_OP_IMPORT) {
+			spin_lock(&session->s_cap_lock);
+			__queue_cap_release(session, vino.ino, cap_id,
+					    mseq, seq);
+			spin_unlock(&session->s_cap_lock);
+		}
+		goto flush_cap_releases;
+	}
+
+	/* these will work even if we don't have a cap yet */
+	switch (op) {
+	case CEPH_CAP_OP_FLUSHSNAP_ACK:
+		handle_cap_flushsnap_ack(inode, tid, h, session);
+		goto done;
+
+	case CEPH_CAP_OP_EXPORT:
+		handle_cap_export(inode, h, peer, session);
+		goto done_unlocked;
+
+	case CEPH_CAP_OP_IMPORT:
+		handle_cap_import(mdsc, inode, h, peer, session,
+				  snaptrace, snaptrace_len);
+	}
+
+	/* the rest require a cap */
+	spin_lock(&ci->i_ceph_lock);
+	cap = __get_cap_for_mds(ceph_inode(inode), mds);
+	if (!cap) {
+		dout(" no cap on %p ino %llx.%llx from mds%d\n",
+		     inode, ceph_ino(inode), ceph_snap(inode), mds);
+		spin_unlock(&ci->i_ceph_lock);
+		goto flush_cap_releases;
+	}
+
+	/* note that each of these drops i_ceph_lock for us */
+	switch (op) {
+	case CEPH_CAP_OP_REVOKE:
+	case CEPH_CAP_OP_GRANT:
+	case CEPH_CAP_OP_IMPORT:
+		handle_cap_grant(inode, h, session, cap, msg->middle);
+		goto done_unlocked;
+
+	case CEPH_CAP_OP_FLUSH_ACK:
+		handle_cap_flush_ack(inode, tid, h, session, cap);
+		break;
+
+	case CEPH_CAP_OP_TRUNC:
+		handle_cap_trunc(inode, h, session);
+		break;
+
+	default:
+		spin_unlock(&ci->i_ceph_lock);
+		pr_err("ceph_handle_caps: unknown cap op %d %s\n", op,
+		       ceph_cap_op_name(op));
+	}
+
+	goto done;
+
+flush_cap_releases:
+	/*
+	 * send any full release message to try to move things
+	 * along for the mds (who clearly thinks we still have this
+	 * cap).
+	 */
+	ceph_add_cap_releases(mdsc, session);
+	ceph_send_cap_releases(mdsc, session);
+
+done:
+	mutex_unlock(&session->s_mutex);
+done_unlocked:
+	if (inode)
+		iput(inode);
+	return;
+
+bad:
+	pr_err("ceph_handle_caps: corrupt message\n");
+	ceph_msg_dump(msg);
+	return;
+}
+
+/*
+ * Delayed work handler to process end of delayed cap release LRU list.
+ */
+void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
+{
+	struct ceph_inode_info *ci;
+	int flags = CHECK_CAPS_NODELAY;
+
+	dout("check_delayed_caps\n");
+	while (1) {
+		spin_lock(&mdsc->cap_delay_lock);
+		if (list_empty(&mdsc->cap_delay_list))
+			break;
+		ci = list_first_entry(&mdsc->cap_delay_list,
+				      struct ceph_inode_info,
+				      i_cap_delay_list);
+		if ((ci->i_ceph_flags & CEPH_I_FLUSH) == 0 &&
+		    time_before(jiffies, ci->i_hold_caps_max))
+			break;
+		list_del_init(&ci->i_cap_delay_list);
+		spin_unlock(&mdsc->cap_delay_lock);
+		dout("check_delayed_caps on %p\n", &ci->vfs_inode);
+		ceph_check_caps(ci, flags, NULL);
+	}
+	spin_unlock(&mdsc->cap_delay_lock);
+}
+
+/*
+ * Flush all dirty caps to the mds
+ */
+void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
+{
+	struct ceph_inode_info *ci;
+	struct inode *inode;
+
+	dout("flush_dirty_caps\n");
+	spin_lock(&mdsc->cap_dirty_lock);
+	while (!list_empty(&mdsc->cap_dirty)) {
+		ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info,
+				      i_dirty_item);
+		inode = &ci->vfs_inode;
+		ihold(inode);
+		dout("flush_dirty_caps %p\n", inode);
+		spin_unlock(&mdsc->cap_dirty_lock);
+		ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, NULL);
+		iput(inode);
+		spin_lock(&mdsc->cap_dirty_lock);
+	}
+	spin_unlock(&mdsc->cap_dirty_lock);
+	dout("flush_dirty_caps done\n");
+}
+
+/*
+ * Drop open file reference.  If we were the last open file,
+ * we may need to release capabilities to the MDS (or schedule
+ * their delayed release).
+ */
+void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
+{
+	struct inode *inode = &ci->vfs_inode;
+	int last = 0;
+
+	spin_lock(&ci->i_ceph_lock);
+	dout("put_fmode %p fmode %d %d -> %d\n", inode, fmode,
+	     ci->i_nr_by_mode[fmode], ci->i_nr_by_mode[fmode]-1);
+	BUG_ON(ci->i_nr_by_mode[fmode] == 0);
+	if (--ci->i_nr_by_mode[fmode] == 0)
+		last++;
+	spin_unlock(&ci->i_ceph_lock);
+
+	if (last && ci->i_vino.snap == CEPH_NOSNAP)
+		ceph_check_caps(ci, 0, NULL);
+}
+
+/*
+ * Helpers for embedding cap and dentry lease releases into mds
+ * requests.
+ *
+ * @force is used by dentry_release (below) to force inclusion of a
+ * record for the directory inode, even when there aren't any caps to
+ * drop.
+ */
+int ceph_encode_inode_release(void **p, struct inode *inode,
+			      int mds, int drop, int unless, int force)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_cap *cap;
+	struct ceph_mds_request_release *rel = *p;
+	int used, dirty;
+	int ret = 0;
+
+	spin_lock(&ci->i_ceph_lock);
+	used = __ceph_caps_used(ci);
+	dirty = __ceph_caps_dirty(ci);
+
+	dout("encode_inode_release %p mds%d used|dirty %s drop %s unless %s\n",
+	     inode, mds, ceph_cap_string(used|dirty), ceph_cap_string(drop),
+	     ceph_cap_string(unless));
+
+	/* only drop unused, clean caps */
+	drop &= ~(used | dirty);
+
+	cap = __get_cap_for_mds(ci, mds);
+	if (cap && __cap_is_valid(cap)) {
+		if (force ||
+		    ((cap->issued & drop) &&
+		     (cap->issued & unless) == 0)) {
+			if ((cap->issued & drop) &&
+			    (cap->issued & unless) == 0) {
+				int wanted = __ceph_caps_wanted(ci);
+				if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0)
+					wanted |= cap->mds_wanted;
+				dout("encode_inode_release %p cap %p "
+				     "%s -> %s, wanted %s -> %s\n", inode, cap,
+				     ceph_cap_string(cap->issued),
+				     ceph_cap_string(cap->issued & ~drop),
+				     ceph_cap_string(cap->mds_wanted),
+				     ceph_cap_string(wanted));
+
+				cap->issued &= ~drop;
+				cap->implemented &= ~drop;
+				cap->mds_wanted = wanted;
+			} else {
+				dout("encode_inode_release %p cap %p %s"
+				     " (force)\n", inode, cap,
+				     ceph_cap_string(cap->issued));
+			}
+
+			rel->ino = cpu_to_le64(ceph_ino(inode));
+			rel->cap_id = cpu_to_le64(cap->cap_id);
+			rel->seq = cpu_to_le32(cap->seq);
+			rel->issue_seq = cpu_to_le32(cap->issue_seq),
+			rel->mseq = cpu_to_le32(cap->mseq);
+			rel->caps = cpu_to_le32(cap->implemented);
+			rel->wanted = cpu_to_le32(cap->mds_wanted);
+			rel->dname_len = 0;
+			rel->dname_seq = 0;
+			*p += sizeof(*rel);
+			ret = 1;
+		} else {
+			dout("encode_inode_release %p cap %p %s\n",
+			     inode, cap, ceph_cap_string(cap->issued));
+		}
+	}
+	spin_unlock(&ci->i_ceph_lock);
+	return ret;
+}
+
+int ceph_encode_dentry_release(void **p, struct dentry *dentry,
+			       int mds, int drop, int unless)
+{
+	struct inode *dir = dentry->d_parent->d_inode;
+	struct ceph_mds_request_release *rel = *p;
+	struct ceph_dentry_info *di = ceph_dentry(dentry);
+	int force = 0;
+	int ret;
+
+	/*
+	 * force an record for the directory caps if we have a dentry lease.
+	 * this is racy (can't take i_ceph_lock and d_lock together), but it
+	 * doesn't have to be perfect; the mds will revoke anything we don't
+	 * release.
+	 */
+	spin_lock(&dentry->d_lock);
+	if (di->lease_session && di->lease_session->s_mds == mds)
+		force = 1;
+	spin_unlock(&dentry->d_lock);
+
+	ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
+
+	spin_lock(&dentry->d_lock);
+	if (ret && di->lease_session && di->lease_session->s_mds == mds) {
+		dout("encode_dentry_release %p mds%d seq %d\n",
+		     dentry, mds, (int)di->lease_seq);
+		rel->dname_len = cpu_to_le32(dentry->d_name.len);
+		memcpy(*p, dentry->d_name.name, dentry->d_name.len);
+		*p += dentry->d_name.len;
+		rel->dname_seq = cpu_to_le32(di->lease_seq);
+		__ceph_mdsc_drop_dentry_lease(dentry);
+	}
+	spin_unlock(&dentry->d_lock);
+	return ret;
+}
diff --git a/ceph/ceph_frag.c b/ceph/ceph_frag.c
new file mode 100644
index 0000000..bdce8b1
--- /dev/null
+++ b/ceph/ceph_frag.c
@@ -0,0 +1,22 @@
+/*
+ * Ceph 'frag' type
+ */
+#include <linux/module.h>
+#include <linux/ceph/types.h>
+
+int ceph_frag_compare(__u32 a, __u32 b)
+{
+	unsigned va = ceph_frag_value(a);
+	unsigned vb = ceph_frag_value(b);
+	if (va < vb)
+		return -1;
+	if (va > vb)
+		return 1;
+	va = ceph_frag_bits(a);
+	vb = ceph_frag_bits(b);
+	if (va < vb)
+		return -1;
+	if (va > vb)
+		return 1;
+	return 0;
+}
diff --git a/ceph/debugfs.c b/ceph/debugfs.c
new file mode 100644
index 0000000..16b54aa
--- /dev/null
+++ b/ceph/debugfs.c
@@ -0,0 +1,277 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/debugfs.h>
+
+#include "super.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+#include "mds_client.h"
+
+static int mdsmap_show(struct seq_file *s, void *p)
+{
+	int i;
+	struct ceph_fs_client *fsc = s->private;
+
+	if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL)
+		return 0;
+	seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch);
+	seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root);
+	seq_printf(s, "session_timeout %d\n",
+		       fsc->mdsc->mdsmap->m_session_timeout);
+	seq_printf(s, "session_autoclose %d\n",
+		       fsc->mdsc->mdsmap->m_session_autoclose);
+	for (i = 0; i < fsc->mdsc->mdsmap->m_max_mds; i++) {
+		struct ceph_entity_addr *addr =
+			&fsc->mdsc->mdsmap->m_info[i].addr;
+		int state = fsc->mdsc->mdsmap->m_info[i].state;
+
+		seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
+			       ceph_pr_addr(&addr->in_addr),
+			       ceph_mds_state_name(state));
+	}
+	return 0;
+}
+
+/*
+ * mdsc debugfs
+ */
+static int mdsc_show(struct seq_file *s, void *p)
+{
+	struct ceph_fs_client *fsc = s->private;
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_request *req;
+	struct rb_node *rp;
+	int pathlen;
+	u64 pathbase;
+	char *path;
+
+	mutex_lock(&mdsc->mutex);
+	for (rp = rb_first(&mdsc->request_tree); rp; rp = rb_next(rp)) {
+		req = rb_entry(rp, struct ceph_mds_request, r_node);
+
+		if (req->r_request && req->r_session)
+			seq_printf(s, "%lld\tmds%d\t", req->r_tid,
+				   req->r_session->s_mds);
+		else if (!req->r_request)
+			seq_printf(s, "%lld\t(no request)\t", req->r_tid);
+		else
+			seq_printf(s, "%lld\t(no session)\t", req->r_tid);
+
+		seq_printf(s, "%s", ceph_mds_op_name(req->r_op));
+
+		if (req->r_got_unsafe)
+			seq_printf(s, "\t(unsafe)");
+		else
+			seq_printf(s, "\t");
+
+		if (req->r_inode) {
+			seq_printf(s, " #%llx", ceph_ino(req->r_inode));
+		} else if (req->r_dentry) {
+			path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
+						    &pathbase, 0);
+			if (IS_ERR(path))
+				path = NULL;
+			spin_lock(&req->r_dentry->d_lock);
+			seq_printf(s, " #%llx/%.*s (%s)",
+				   ceph_ino(req->r_dentry->d_parent->d_inode),
+				   req->r_dentry->d_name.len,
+				   req->r_dentry->d_name.name,
+				   path ? path : "");
+			spin_unlock(&req->r_dentry->d_lock);
+			kfree(path);
+		} else if (req->r_path1) {
+			seq_printf(s, " #%llx/%s", req->r_ino1.ino,
+				   req->r_path1);
+		} else {
+			seq_printf(s, " #%llx", req->r_ino1.ino);
+		}
+
+		if (req->r_old_dentry) {
+			path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen,
+						    &pathbase, 0);
+			if (IS_ERR(path))
+				path = NULL;
+			spin_lock(&req->r_old_dentry->d_lock);
+			seq_printf(s, " #%llx/%.*s (%s)",
+				   req->r_old_dentry_dir ?
+				   ceph_ino(req->r_old_dentry_dir) : 0,
+				   req->r_old_dentry->d_name.len,
+				   req->r_old_dentry->d_name.name,
+				   path ? path : "");
+			spin_unlock(&req->r_old_dentry->d_lock);
+			kfree(path);
+		} else if (req->r_path2) {
+			if (req->r_ino2.ino)
+				seq_printf(s, " #%llx/%s", req->r_ino2.ino,
+					   req->r_path2);
+			else
+				seq_printf(s, " %s", req->r_path2);
+		}
+
+		seq_printf(s, "\n");
+	}
+	mutex_unlock(&mdsc->mutex);
+
+	return 0;
+}
+
+static int caps_show(struct seq_file *s, void *p)
+{
+	struct ceph_fs_client *fsc = s->private;
+	int total, avail, used, reserved, min;
+
+	ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min);
+	seq_printf(s, "total\t\t%d\n"
+		   "avail\t\t%d\n"
+		   "used\t\t%d\n"
+		   "reserved\t%d\n"
+		   "min\t%d\n",
+		   total, avail, used, reserved, min);
+	return 0;
+}
+
+static int dentry_lru_show(struct seq_file *s, void *ptr)
+{
+	struct ceph_fs_client *fsc = s->private;
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_dentry_info *di;
+
+	spin_lock(&mdsc->dentry_lru_lock);
+	list_for_each_entry(di, &mdsc->dentry_lru, lru) {
+		struct dentry *dentry = di->dentry;
+		seq_printf(s, "%p %p\t%.*s\n",
+			   di, dentry, dentry->d_name.len, dentry->d_name.name);
+	}
+	spin_unlock(&mdsc->dentry_lru_lock);
+
+	return 0;
+}
+
+CEPH_DEFINE_SHOW_FUNC(mdsmap_show)
+CEPH_DEFINE_SHOW_FUNC(mdsc_show)
+CEPH_DEFINE_SHOW_FUNC(caps_show)
+CEPH_DEFINE_SHOW_FUNC(dentry_lru_show)
+
+
+/*
+ * debugfs
+ */
+static int congestion_kb_set(void *data, u64 val)
+{
+	struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
+
+	fsc->mount_options->congestion_kb = (int)val;
+	return 0;
+}
+
+static int congestion_kb_get(void *data, u64 *val)
+{
+	struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
+
+	*val = (u64)fsc->mount_options->congestion_kb;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get,
+			congestion_kb_set, "%llu\n");
+
+
+void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
+{
+	dout("ceph_fs_debugfs_cleanup\n");
+	debugfs_remove(fsc->debugfs_bdi);
+	debugfs_remove(fsc->debugfs_congestion_kb);
+	debugfs_remove(fsc->debugfs_mdsmap);
+	debugfs_remove(fsc->debugfs_caps);
+	debugfs_remove(fsc->debugfs_mdsc);
+	debugfs_remove(fsc->debugfs_dentry_lru);
+}
+
+int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
+{
+	char name[100];
+	int err = -ENOMEM;
+
+	dout("ceph_fs_debugfs_init\n");
+	BUG_ON(!fsc->client->debugfs_dir);
+	fsc->debugfs_congestion_kb =
+		debugfs_create_file("writeback_congestion_kb",
+				    0600,
+				    fsc->client->debugfs_dir,
+				    fsc,
+				    &congestion_kb_fops);
+	if (!fsc->debugfs_congestion_kb)
+		goto out;
+
+	snprintf(name, sizeof(name), "../../bdi/%s",
+		 dev_name(fsc->backing_dev_info.dev));
+	fsc->debugfs_bdi =
+		debugfs_create_symlink("bdi",
+				       fsc->client->debugfs_dir,
+				       name);
+	if (!fsc->debugfs_bdi)
+		goto out;
+
+	fsc->debugfs_mdsmap = debugfs_create_file("mdsmap",
+					0600,
+					fsc->client->debugfs_dir,
+					fsc,
+					&mdsmap_show_fops);
+	if (!fsc->debugfs_mdsmap)
+		goto out;
+
+	fsc->debugfs_mdsc = debugfs_create_file("mdsc",
+						0600,
+						fsc->client->debugfs_dir,
+						fsc,
+						&mdsc_show_fops);
+	if (!fsc->debugfs_mdsc)
+		goto out;
+
+	fsc->debugfs_caps = debugfs_create_file("caps",
+						   0400,
+						   fsc->client->debugfs_dir,
+						   fsc,
+						   &caps_show_fops);
+	if (!fsc->debugfs_caps)
+		goto out;
+
+	fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
+					0600,
+					fsc->client->debugfs_dir,
+					fsc,
+					&dentry_lru_show_fops);
+	if (!fsc->debugfs_dentry_lru)
+		goto out;
+
+	return 0;
+
+out:
+	ceph_fs_debugfs_cleanup(fsc);
+	return err;
+}
+
+
+#else  /* CONFIG_DEBUG_FS */
+
+int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
+{
+	return 0;
+}
+
+void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
+{
+}
+
+#endif  /* CONFIG_DEBUG_FS */
diff --git a/ceph/dir.c b/ceph/dir.c
new file mode 100644
index 0000000..c29d6ae
--- /dev/null
+++ b/ceph/dir.c
@@ -0,0 +1,1349 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/spinlock.h>
+#include <linux/fs_struct.h>
+#include <linux/namei.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+#include "super.h"
+#include "mds_client.h"
+
+/*
+ * Directory operations: readdir, lookup, create, link, unlink,
+ * rename, etc.
+ */
+
+/*
+ * Ceph MDS operations are specified in terms of a base ino and
+ * relative path.  Thus, the client can specify an operation on a
+ * specific inode (e.g., a getattr due to fstat(2)), or as a path
+ * relative to, say, the root directory.
+ *
+ * Normally, we limit ourselves to strict inode ops (no path component)
+ * or dentry operations (a single path component relative to an ino).  The
+ * exception to this is open_root_dentry(), which will open the mount
+ * point by name.
+ */
+
+const struct inode_operations ceph_dir_iops;
+const struct file_operations ceph_dir_fops;
+const struct dentry_operations ceph_dentry_ops;
+
+/*
+ * Initialize ceph dentry state.
+ */
+int ceph_init_dentry(struct dentry *dentry)
+{
+	struct ceph_dentry_info *di;
+
+	if (dentry->d_fsdata)
+		return 0;
+
+	di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
+	if (!di)
+		return -ENOMEM;          /* oh well */
+
+	spin_lock(&dentry->d_lock);
+	if (dentry->d_fsdata) {
+		/* lost a race */
+		kmem_cache_free(ceph_dentry_cachep, di);
+		goto out_unlock;
+	}
+
+	if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
+		d_set_d_op(dentry, &ceph_dentry_ops);
+	else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
+		d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
+	else
+		d_set_d_op(dentry, &ceph_snap_dentry_ops);
+
+	di->dentry = dentry;
+	di->lease_session = NULL;
+	dentry->d_time = jiffies;
+	/* avoid reordering d_fsdata setup so that the check above is safe */
+	smp_mb();
+	dentry->d_fsdata = di;
+	ceph_dentry_lru_add(dentry);
+out_unlock:
+	spin_unlock(&dentry->d_lock);
+	return 0;
+}
+
+struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry)
+{
+	struct inode *inode = NULL;
+
+	if (!dentry)
+		return NULL;
+
+	spin_lock(&dentry->d_lock);
+	if (!IS_ROOT(dentry)) {
+		inode = dentry->d_parent->d_inode;
+		ihold(inode);
+	}
+	spin_unlock(&dentry->d_lock);
+	return inode;
+}
+
+
+/*
+ * for readdir, we encode the directory frag and offset within that
+ * frag into f_pos.
+ */
+static unsigned fpos_frag(loff_t p)
+{
+	return p >> 32;
+}
+static unsigned fpos_off(loff_t p)
+{
+	return p & 0xffffffff;
+}
+
+static int fpos_cmp(loff_t l, loff_t r)
+{
+	int v = ceph_frag_compare(fpos_frag(l), fpos_frag(r));
+	if (v)
+		return v;
+	return (int)(fpos_off(l) - fpos_off(r));
+}
+
+/*
+ * When possible, we try to satisfy a readdir by peeking at the
+ * dcache.  We make this work by carefully ordering dentries on
+ * d_u.d_child when we initially get results back from the MDS, and
+ * falling back to a "normal" sync readdir if any dentries in the dir
+ * are dropped.
+ *
+ * Complete dir indicates that we have all dentries in the dir.  It is
+ * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
+ * the MDS if/when the directory is modified).
+ */
+static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
+			    u32 shared_gen)
+{
+	struct ceph_file_info *fi = file->private_data;
+	struct dentry *parent = file->f_dentry;
+	struct inode *dir = parent->d_inode;
+	struct list_head *p;
+	struct dentry *dentry, *last;
+	struct ceph_dentry_info *di;
+	int err = 0;
+
+	/* claim ref on last dentry we returned */
+	last = fi->dentry;
+	fi->dentry = NULL;
+
+	dout("__dcache_readdir %p v%u at %llu (last %p)\n",
+	     dir, shared_gen, ctx->pos, last);
+
+	spin_lock(&parent->d_lock);
+
+	/* start at beginning? */
+	if (ctx->pos == 2 || last == NULL ||
+	    fpos_cmp(ctx->pos, ceph_dentry(last)->offset) < 0) {
+		if (list_empty(&parent->d_subdirs))
+			goto out_unlock;
+		p = parent->d_subdirs.prev;
+		dout(" initial p %p/%p\n", p->prev, p->next);
+	} else {
+		p = last->d_u.d_child.prev;
+	}
+
+more:
+	dentry = list_entry(p, struct dentry, d_u.d_child);
+	di = ceph_dentry(dentry);
+	while (1) {
+		dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next,
+		     d_unhashed(dentry) ? "!hashed" : "hashed",
+		     parent->d_subdirs.prev, parent->d_subdirs.next);
+		if (p == &parent->d_subdirs) {
+			fi->flags |= CEPH_F_ATEND;
+			goto out_unlock;
+		}
+		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+		if (di->lease_shared_gen == shared_gen &&
+		    !d_unhashed(dentry) && dentry->d_inode &&
+		    ceph_snap(dentry->d_inode) != CEPH_SNAPDIR &&
+		    ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
+		    fpos_cmp(ctx->pos, di->offset) <= 0)
+			break;
+		dout(" skipping %p %.*s at %llu (%llu)%s%s\n", dentry,
+		     dentry->d_name.len, dentry->d_name.name, di->offset,
+		     ctx->pos, d_unhashed(dentry) ? " unhashed" : "",
+		     !dentry->d_inode ? " null" : "");
+		spin_unlock(&dentry->d_lock);
+		p = p->prev;
+		dentry = list_entry(p, struct dentry, d_u.d_child);
+		di = ceph_dentry(dentry);
+	}
+
+	dget_dlock(dentry);
+	spin_unlock(&dentry->d_lock);
+	spin_unlock(&parent->d_lock);
+
+	/* make sure a dentry wasn't dropped while we didn't have parent lock */
+	if (!ceph_dir_is_complete(dir)) {
+		dout(" lost dir complete on %p; falling back to mds\n", dir);
+		dput(dentry);
+		err = -EAGAIN;
+		goto out;
+	}
+
+	dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, ctx->pos,
+	     dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
+	if (!dir_emit(ctx, dentry->d_name.name,
+		      dentry->d_name.len,
+		      ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino),
+		      dentry->d_inode->i_mode >> 12)) {
+		if (last) {
+			/* remember our position */
+			fi->dentry = last;
+			fi->next_offset = fpos_off(di->offset);
+		}
+		dput(dentry);
+		return 0;
+	}
+
+	ctx->pos = di->offset + 1;
+
+	if (last)
+		dput(last);
+	last = dentry;
+
+	spin_lock(&parent->d_lock);
+	p = p->prev;	/* advance to next dentry */
+	goto more;
+
+out_unlock:
+	spin_unlock(&parent->d_lock);
+out:
+	if (last)
+		dput(last);
+	return err;
+}
+
+/*
+ * make note of the last dentry we read, so we can
+ * continue at the same lexicographical point,
+ * regardless of what dir changes take place on the
+ * server.
+ */
+static int note_last_dentry(struct ceph_file_info *fi, const char *name,
+			    int len)
+{
+	kfree(fi->last_name);
+	fi->last_name = kmalloc(len+1, GFP_NOFS);
+	if (!fi->last_name)
+		return -ENOMEM;
+	memcpy(fi->last_name, name, len);
+	fi->last_name[len] = 0;
+	dout("note_last_dentry '%s'\n", fi->last_name);
+	return 0;
+}
+
+static int ceph_readdir(struct file *file, struct dir_context *ctx)
+{
+	struct ceph_file_info *fi = file->private_data;
+	struct inode *inode = file_inode(file);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	unsigned frag = fpos_frag(ctx->pos);
+	int off = fpos_off(ctx->pos);
+	int err;
+	u32 ftype;
+	struct ceph_mds_reply_info_parsed *rinfo;
+
+	dout("readdir %p file %p frag %u off %u\n", inode, file, frag, off);
+	if (fi->flags & CEPH_F_ATEND)
+		return 0;
+
+	/* always start with . and .. */
+	if (ctx->pos == 0) {
+		/* note dir version at start of readdir so we can tell
+		 * if any dentries get dropped */
+		fi->dir_release_count = atomic_read(&ci->i_release_count);
+
+		dout("readdir off 0 -> '.'\n");
+		if (!dir_emit(ctx, ".", 1, 
+			    ceph_translate_ino(inode->i_sb, inode->i_ino),
+			    inode->i_mode >> 12))
+			return 0;
+		ctx->pos = 1;
+		off = 1;
+	}
+	if (ctx->pos == 1) {
+		ino_t ino = parent_ino(file->f_dentry);
+		dout("readdir off 1 -> '..'\n");
+		if (!dir_emit(ctx, "..", 2,
+			    ceph_translate_ino(inode->i_sb, ino),
+			    inode->i_mode >> 12))
+			return 0;
+		ctx->pos = 2;
+		off = 2;
+	}
+
+	/* can we use the dcache? */
+	spin_lock(&ci->i_ceph_lock);
+	if ((ctx->pos == 2 || fi->dentry) &&
+	    !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
+	    ceph_snap(inode) != CEPH_SNAPDIR &&
+	    __ceph_dir_is_complete(ci) &&
+	    __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
+		u32 shared_gen = ci->i_shared_gen;
+		spin_unlock(&ci->i_ceph_lock);
+		err = __dcache_readdir(file, ctx, shared_gen);
+		if (err != -EAGAIN)
+			return err;
+		frag = fpos_frag(ctx->pos);
+		off = fpos_off(ctx->pos);
+	} else {
+		spin_unlock(&ci->i_ceph_lock);
+	}
+	if (fi->dentry) {
+		err = note_last_dentry(fi, fi->dentry->d_name.name,
+				       fi->dentry->d_name.len);
+		if (err)
+			return err;
+		dput(fi->dentry);
+		fi->dentry = NULL;
+	}
+
+	/* proceed with a normal readdir */
+
+more:
+	/* do we have the correct frag content buffered? */
+	if (fi->frag != frag || fi->last_readdir == NULL) {
+		struct ceph_mds_request *req;
+		int op = ceph_snap(inode) == CEPH_SNAPDIR ?
+			CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
+
+		/* discard old result, if any */
+		if (fi->last_readdir) {
+			ceph_mdsc_put_request(fi->last_readdir);
+			fi->last_readdir = NULL;
+		}
+
+		dout("readdir fetching %llx.%llx frag %x offset '%s'\n",
+		     ceph_vinop(inode), frag, fi->last_name);
+		req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
+		if (IS_ERR(req))
+			return PTR_ERR(req);
+		err = ceph_alloc_readdir_reply_buffer(req, inode);
+		if (err) {
+			ceph_mdsc_put_request(req);
+			return err;
+		}
+		req->r_inode = inode;
+		ihold(inode);
+		req->r_dentry = dget(file->f_dentry);
+		/* hints to request -> mds selection code */
+		req->r_direct_mode = USE_AUTH_MDS;
+		req->r_direct_hash = ceph_frag_value(frag);
+		req->r_direct_is_hash = true;
+		req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
+		req->r_readdir_offset = fi->next_offset;
+		req->r_args.readdir.frag = cpu_to_le32(frag);
+		err = ceph_mdsc_do_request(mdsc, NULL, req);
+		if (err < 0) {
+			ceph_mdsc_put_request(req);
+			return err;
+		}
+		dout("readdir got and parsed readdir result=%d"
+		     " on frag %x, end=%d, complete=%d\n", err, frag,
+		     (int)req->r_reply_info.dir_end,
+		     (int)req->r_reply_info.dir_complete);
+
+		if (!req->r_did_prepopulate) {
+			dout("readdir !did_prepopulate");
+			/* preclude from marking dir complete */
+			fi->dir_release_count--;
+		}
+
+		/* note next offset and last dentry name */
+		rinfo = &req->r_reply_info;
+		if (le32_to_cpu(rinfo->dir_dir->frag) != frag) {
+			frag = le32_to_cpu(rinfo->dir_dir->frag);
+			if (ceph_frag_is_leftmost(frag))
+				fi->next_offset = 2;
+			else
+				fi->next_offset = 0;
+			off = fi->next_offset;
+		}
+		fi->frag = frag;
+		fi->offset = fi->next_offset;
+		fi->last_readdir = req;
+
+		if (req->r_reply_info.dir_end) {
+			kfree(fi->last_name);
+			fi->last_name = NULL;
+			if (ceph_frag_is_rightmost(frag))
+				fi->next_offset = 2;
+			else
+				fi->next_offset = 0;
+		} else {
+			err = note_last_dentry(fi,
+				       rinfo->dir_dname[rinfo->dir_nr-1],
+				       rinfo->dir_dname_len[rinfo->dir_nr-1]);
+			if (err)
+				return err;
+			fi->next_offset += rinfo->dir_nr;
+		}
+	}
+
+	rinfo = &fi->last_readdir->r_reply_info;
+	dout("readdir frag %x num %d off %d chunkoff %d\n", frag,
+	     rinfo->dir_nr, off, fi->offset);
+
+	ctx->pos = ceph_make_fpos(frag, off);
+	while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) {
+		struct ceph_mds_reply_inode *in =
+			rinfo->dir_in[off - fi->offset].in;
+		struct ceph_vino vino;
+		ino_t ino;
+
+		dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n",
+		     off, off - fi->offset, rinfo->dir_nr, ctx->pos,
+		     rinfo->dir_dname_len[off - fi->offset],
+		     rinfo->dir_dname[off - fi->offset], in);
+		BUG_ON(!in);
+		ftype = le32_to_cpu(in->mode) >> 12;
+		vino.ino = le64_to_cpu(in->ino);
+		vino.snap = le64_to_cpu(in->snapid);
+		ino = ceph_vino_to_ino(vino);
+		if (!dir_emit(ctx,
+			    rinfo->dir_dname[off - fi->offset],
+			    rinfo->dir_dname_len[off - fi->offset],
+			    ceph_translate_ino(inode->i_sb, ino), ftype)) {
+			dout("filldir stopping us...\n");
+			return 0;
+		}
+		off++;
+		ctx->pos++;
+	}
+
+	if (fi->last_name) {
+		ceph_mdsc_put_request(fi->last_readdir);
+		fi->last_readdir = NULL;
+		goto more;
+	}
+
+	/* more frags? */
+	if (!ceph_frag_is_rightmost(frag)) {
+		frag = ceph_frag_next(frag);
+		off = 0;
+		ctx->pos = ceph_make_fpos(frag, off);
+		dout("readdir next frag is %x\n", frag);
+		goto more;
+	}
+	fi->flags |= CEPH_F_ATEND;
+
+	/*
+	 * if dir_release_count still matches the dir, no dentries
+	 * were released during the whole readdir, and we should have
+	 * the complete dir contents in our cache.
+	 */
+	spin_lock(&ci->i_ceph_lock);
+	if (atomic_read(&ci->i_release_count) == fi->dir_release_count) {
+		dout(" marking %p complete\n", inode);
+		__ceph_dir_set_complete(ci, fi->dir_release_count);
+	}
+	spin_unlock(&ci->i_ceph_lock);
+
+	dout("readdir %p file %p done.\n", inode, file);
+	return 0;
+}
+
+static void reset_readdir(struct ceph_file_info *fi, unsigned frag)
+{
+	if (fi->last_readdir) {
+		ceph_mdsc_put_request(fi->last_readdir);
+		fi->last_readdir = NULL;
+	}
+	kfree(fi->last_name);
+	fi->last_name = NULL;
+	if (ceph_frag_is_leftmost(frag))
+		fi->next_offset = 2;  /* compensate for . and .. */
+	else
+		fi->next_offset = 0;
+	if (fi->dentry) {
+		dput(fi->dentry);
+		fi->dentry = NULL;
+	}
+	fi->flags &= ~CEPH_F_ATEND;
+}
+
+static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+	struct ceph_file_info *fi = file->private_data;
+	struct inode *inode = file->f_mapping->host;
+	loff_t old_offset = ceph_make_fpos(fi->frag, fi->next_offset);
+	loff_t retval;
+
+	mutex_lock(&inode->i_mutex);
+	retval = -EINVAL;
+	switch (whence) {
+	case SEEK_END:
+		offset += inode->i_size + 2;   /* FIXME */
+		break;
+	case SEEK_CUR:
+		offset += file->f_pos;
+	case SEEK_SET:
+		break;
+	default:
+		goto out;
+	}
+
+	if (offset >= 0) {
+		if (offset != file->f_pos) {
+			file->f_pos = offset;
+			file->f_version = 0;
+			fi->flags &= ~CEPH_F_ATEND;
+		}
+		retval = offset;
+
+		/*
+		 * discard buffered readdir content on seekdir(0), or
+		 * seek to new frag, or seek prior to current chunk.
+		 */
+		if (offset == 0 ||
+		    fpos_frag(offset) != fi->frag ||
+		    fpos_off(offset) < fi->offset) {
+			dout("dir_llseek dropping %p content\n", file);
+			reset_readdir(fi, fpos_frag(offset));
+		}
+
+		/* bump dir_release_count if we did a forward seek */
+		if (fpos_cmp(offset, old_offset) > 0)
+			fi->dir_release_count--;
+	}
+out:
+	mutex_unlock(&inode->i_mutex);
+	return retval;
+}
+
+/*
+ * Handle lookups for the hidden .snap directory.
+ */
+int ceph_handle_snapdir(struct ceph_mds_request *req,
+			struct dentry *dentry, int err)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+	struct inode *parent = dentry->d_parent->d_inode; /* we hold i_mutex */
+
+	/* .snap dir? */
+	if (err == -ENOENT &&
+	    ceph_snap(parent) == CEPH_NOSNAP &&
+	    strcmp(dentry->d_name.name,
+		   fsc->mount_options->snapdir_name) == 0) {
+		struct inode *inode = ceph_get_snapdir(parent);
+		dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n",
+		     dentry, dentry->d_name.len, dentry->d_name.name, inode);
+		BUG_ON(!d_unhashed(dentry));
+		d_add(dentry, inode);
+		err = 0;
+	}
+	return err;
+}
+
+/*
+ * Figure out final result of a lookup/open request.
+ *
+ * Mainly, make sure we return the final req->r_dentry (if it already
+ * existed) in place of the original VFS-provided dentry when they
+ * differ.
+ *
+ * Gracefully handle the case where the MDS replies with -ENOENT and
+ * no trace (which it may do, at its discretion, e.g., if it doesn't
+ * care to issue a lease on the negative dentry).
+ */
+struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
+				  struct dentry *dentry, int err)
+{
+	if (err == -ENOENT) {
+		/* no trace? */
+		err = 0;
+		if (!req->r_reply_info.head->is_dentry) {
+			dout("ENOENT and no trace, dentry %p inode %p\n",
+			     dentry, dentry->d_inode);
+			if (dentry->d_inode) {
+				d_drop(dentry);
+				err = -ENOENT;
+			} else {
+				d_add(dentry, NULL);
+			}
+		}
+	}
+	if (err)
+		dentry = ERR_PTR(err);
+	else if (dentry != req->r_dentry)
+		dentry = dget(req->r_dentry);   /* we got spliced */
+	else
+		dentry = NULL;
+	return dentry;
+}
+
+static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry)
+{
+	return ceph_ino(inode) == CEPH_INO_ROOT &&
+		strncmp(dentry->d_name.name, ".ceph", 5) == 0;
+}
+
+/*
+ * Look up a single dir entry.  If there is a lookup intent, inform
+ * the MDS so that it gets our 'caps wanted' value in a single op.
+ */
+static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
+				  unsigned int flags)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_request *req;
+	int op;
+	int err;
+
+	dout("lookup %p dentry %p '%.*s'\n",
+	     dir, dentry, dentry->d_name.len, dentry->d_name.name);
+
+	if (dentry->d_name.len > NAME_MAX)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	err = ceph_init_dentry(dentry);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	/* can we conclude ENOENT locally? */
+	if (dentry->d_inode == NULL) {
+		struct ceph_inode_info *ci = ceph_inode(dir);
+		struct ceph_dentry_info *di = ceph_dentry(dentry);
+
+		spin_lock(&ci->i_ceph_lock);
+		dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
+		if (strncmp(dentry->d_name.name,
+			    fsc->mount_options->snapdir_name,
+			    dentry->d_name.len) &&
+		    !is_root_ceph_dentry(dir, dentry) &&
+		    __ceph_dir_is_complete(ci) &&
+		    (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
+			spin_unlock(&ci->i_ceph_lock);
+			dout(" dir %p complete, -ENOENT\n", dir);
+			d_add(dentry, NULL);
+			di->lease_shared_gen = ci->i_shared_gen;
+			return NULL;
+		}
+		spin_unlock(&ci->i_ceph_lock);
+	}
+
+	op = ceph_snap(dir) == CEPH_SNAPDIR ?
+		CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
+	req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
+	if (IS_ERR(req))
+		return ERR_CAST(req);
+	req->r_dentry = dget(dentry);
+	req->r_num_caps = 2;
+	/* we only need inode linkage */
+	req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
+	req->r_locked_dir = dir;
+	err = ceph_mdsc_do_request(mdsc, NULL, req);
+	err = ceph_handle_snapdir(req, dentry, err);
+	dentry = ceph_finish_lookup(req, dentry, err);
+	ceph_mdsc_put_request(req);  /* will dput(dentry) */
+	dout("lookup result=%p\n", dentry);
+	return dentry;
+}
+
+/*
+ * If we do a create but get no trace back from the MDS, follow up with
+ * a lookup (the VFS expects us to link up the provided dentry).
+ */
+int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
+{
+	struct dentry *result = ceph_lookup(dir, dentry, 0);
+
+	if (result && !IS_ERR(result)) {
+		/*
+		 * We created the item, then did a lookup, and found
+		 * it was already linked to another inode we already
+		 * had in our cache (and thus got spliced).  Link our
+		 * dentry to that inode, but don't hash it, just in
+		 * case the VFS wants to dereference it.
+		 */
+		BUG_ON(!result->d_inode);
+		d_instantiate(dentry, result->d_inode);
+		return 0;
+	}
+	return PTR_ERR(result);
+}
+
+static int ceph_mknod(struct inode *dir, struct dentry *dentry,
+		      umode_t mode, dev_t rdev)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_request *req;
+	int err;
+
+	if (ceph_snap(dir) != CEPH_NOSNAP)
+		return -EROFS;
+
+	dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n",
+	     dir, dentry, mode, rdev);
+	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS);
+	if (IS_ERR(req)) {
+		d_drop(dentry);
+		return PTR_ERR(req);
+	}
+	req->r_dentry = dget(dentry);
+	req->r_num_caps = 2;
+	req->r_locked_dir = dir;
+	req->r_args.mknod.mode = cpu_to_le32(mode);
+	req->r_args.mknod.rdev = cpu_to_le32(rdev);
+	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
+	err = ceph_mdsc_do_request(mdsc, dir, req);
+	if (!err && !req->r_reply_info.head->is_dentry)
+		err = ceph_handle_notrace_create(dir, dentry);
+	ceph_mdsc_put_request(req);
+
+	if (!err)
+		ceph_init_acl(dentry, dentry->d_inode, dir);
+	else
+		d_drop(dentry);
+	return err;
+}
+
+static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+		       bool excl)
+{
+	return ceph_mknod(dir, dentry, mode, 0);
+}
+
+static int ceph_symlink(struct inode *dir, struct dentry *dentry,
+			    const char *dest)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_request *req;
+	int err;
+
+	if (ceph_snap(dir) != CEPH_NOSNAP)
+		return -EROFS;
+
+	dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest);
+	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS);
+	if (IS_ERR(req)) {
+		d_drop(dentry);
+		return PTR_ERR(req);
+	}
+	req->r_dentry = dget(dentry);
+	req->r_num_caps = 2;
+	req->r_path2 = kstrdup(dest, GFP_NOFS);
+	req->r_locked_dir = dir;
+	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
+	err = ceph_mdsc_do_request(mdsc, dir, req);
+	if (!err && !req->r_reply_info.head->is_dentry)
+		err = ceph_handle_notrace_create(dir, dentry);
+	ceph_mdsc_put_request(req);
+	if (!err)
+		ceph_init_acl(dentry, dentry->d_inode, dir);
+	else
+		d_drop(dentry);
+	return err;
+}
+
+static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_request *req;
+	int err = -EROFS;
+	int op;
+
+	if (ceph_snap(dir) == CEPH_SNAPDIR) {
+		/* mkdir .snap/foo is a MKSNAP */
+		op = CEPH_MDS_OP_MKSNAP;
+		dout("mksnap dir %p snap '%.*s' dn %p\n", dir,
+		     dentry->d_name.len, dentry->d_name.name, dentry);
+	} else if (ceph_snap(dir) == CEPH_NOSNAP) {
+		dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode);
+		op = CEPH_MDS_OP_MKDIR;
+	} else {
+		goto out;
+	}
+	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto out;
+	}
+
+	req->r_dentry = dget(dentry);
+	req->r_num_caps = 2;
+	req->r_locked_dir = dir;
+	req->r_args.mkdir.mode = cpu_to_le32(mode);
+	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
+	err = ceph_mdsc_do_request(mdsc, dir, req);
+	if (!err && !req->r_reply_info.head->is_dentry)
+		err = ceph_handle_notrace_create(dir, dentry);
+	ceph_mdsc_put_request(req);
+out:
+	if (!err)
+		ceph_init_acl(dentry, dentry->d_inode, dir);
+	else
+		d_drop(dentry);
+	return err;
+}
+
+static int ceph_link(struct dentry *old_dentry, struct inode *dir,
+		     struct dentry *dentry)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_request *req;
+	int err;
+
+	if (ceph_snap(dir) != CEPH_NOSNAP)
+		return -EROFS;
+
+	dout("link in dir %p old_dentry %p dentry %p\n", dir,
+	     old_dentry, dentry);
+	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS);
+	if (IS_ERR(req)) {
+		d_drop(dentry);
+		return PTR_ERR(req);
+	}
+	req->r_dentry = dget(dentry);
+	req->r_num_caps = 2;
+	req->r_old_dentry = dget(old_dentry);
+	req->r_locked_dir = dir;
+	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
+	/* release LINK_SHARED on source inode (mds will lock it) */
+	req->r_old_inode_drop = CEPH_CAP_LINK_SHARED;
+	err = ceph_mdsc_do_request(mdsc, dir, req);
+	if (err) {
+		d_drop(dentry);
+	} else if (!req->r_reply_info.head->is_dentry) {
+		ihold(old_dentry->d_inode);
+		d_instantiate(dentry, old_dentry->d_inode);
+	}
+	ceph_mdsc_put_request(req);
+	return err;
+}
+
+/*
+ * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps.  If it
+ * looks like the link count will hit 0, drop any other caps (other
+ * than PIN) we don't specifically want (due to the file still being
+ * open).
+ */
+static int drop_caps_for_unlink(struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
+
+	spin_lock(&ci->i_ceph_lock);
+	if (inode->i_nlink == 1) {
+		drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
+		ci->i_ceph_flags |= CEPH_I_NODELAY;
+	}
+	spin_unlock(&ci->i_ceph_lock);
+	return drop;
+}
+
+/*
+ * rmdir and unlink are differ only by the metadata op code
+ */
+static int ceph_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct inode *inode = dentry->d_inode;
+	struct ceph_mds_request *req;
+	int err = -EROFS;
+	int op;
+
+	if (ceph_snap(dir) == CEPH_SNAPDIR) {
+		/* rmdir .snap/foo is RMSNAP */
+		dout("rmsnap dir %p '%.*s' dn %p\n", dir, dentry->d_name.len,
+		     dentry->d_name.name, dentry);
+		op = CEPH_MDS_OP_RMSNAP;
+	} else if (ceph_snap(dir) == CEPH_NOSNAP) {
+		dout("unlink/rmdir dir %p dn %p inode %p\n",
+		     dir, dentry, inode);
+		op = S_ISDIR(dentry->d_inode->i_mode) ?
+			CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK;
+	} else
+		goto out;
+	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto out;
+	}
+	req->r_dentry = dget(dentry);
+	req->r_num_caps = 2;
+	req->r_locked_dir = dir;
+	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
+	req->r_inode_drop = drop_caps_for_unlink(inode);
+	err = ceph_mdsc_do_request(mdsc, dir, req);
+	if (!err && !req->r_reply_info.head->is_dentry)
+		d_delete(dentry);
+	ceph_mdsc_put_request(req);
+out:
+	return err;
+}
+
+static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
+		       struct inode *new_dir, struct dentry *new_dentry)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_request *req;
+	int err;
+
+	if (ceph_snap(old_dir) != ceph_snap(new_dir))
+		return -EXDEV;
+	if (ceph_snap(old_dir) != CEPH_NOSNAP ||
+	    ceph_snap(new_dir) != CEPH_NOSNAP)
+		return -EROFS;
+	dout("rename dir %p dentry %p to dir %p dentry %p\n",
+	     old_dir, old_dentry, new_dir, new_dentry);
+	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+	ihold(old_dir);
+	req->r_dentry = dget(new_dentry);
+	req->r_num_caps = 2;
+	req->r_old_dentry = dget(old_dentry);
+	req->r_old_dentry_dir = old_dir;
+	req->r_locked_dir = new_dir;
+	req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
+	req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
+	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
+	/* release LINK_RDCACHE on source inode (mds will lock it) */
+	req->r_old_inode_drop = CEPH_CAP_LINK_SHARED;
+	if (new_dentry->d_inode)
+		req->r_inode_drop = drop_caps_for_unlink(new_dentry->d_inode);
+	err = ceph_mdsc_do_request(mdsc, old_dir, req);
+	if (!err && !req->r_reply_info.head->is_dentry) {
+		/*
+		 * Normally d_move() is done by fill_trace (called by
+		 * do_request, above).  If there is no trace, we need
+		 * to do it here.
+		 */
+
+		d_move(old_dentry, new_dentry);
+
+		/* ensure target dentry is invalidated, despite
+		   rehashing bug in vfs_rename_dir */
+		ceph_invalidate_dentry_lease(new_dentry);
+
+		/* d_move screws up sibling dentries' offsets */
+		ceph_dir_clear_complete(old_dir);
+		ceph_dir_clear_complete(new_dir);
+
+	}
+	ceph_mdsc_put_request(req);
+	return err;
+}
+
+/*
+ * Ensure a dentry lease will no longer revalidate.
+ */
+void ceph_invalidate_dentry_lease(struct dentry *dentry)
+{
+	spin_lock(&dentry->d_lock);
+	dentry->d_time = jiffies;
+	ceph_dentry(dentry)->lease_shared_gen = 0;
+	spin_unlock(&dentry->d_lock);
+}
+
+/*
+ * Check if dentry lease is valid.  If not, delete the lease.  Try to
+ * renew if the least is more than half up.
+ */
+static int dentry_lease_is_valid(struct dentry *dentry)
+{
+	struct ceph_dentry_info *di;
+	struct ceph_mds_session *s;
+	int valid = 0;
+	u32 gen;
+	unsigned long ttl;
+	struct ceph_mds_session *session = NULL;
+	struct inode *dir = NULL;
+	u32 seq = 0;
+
+	spin_lock(&dentry->d_lock);
+	di = ceph_dentry(dentry);
+	if (di->lease_session) {
+		s = di->lease_session;
+		spin_lock(&s->s_gen_ttl_lock);
+		gen = s->s_cap_gen;
+		ttl = s->s_cap_ttl;
+		spin_unlock(&s->s_gen_ttl_lock);
+
+		if (di->lease_gen == gen &&
+		    time_before(jiffies, dentry->d_time) &&
+		    time_before(jiffies, ttl)) {
+			valid = 1;
+			if (di->lease_renew_after &&
+			    time_after(jiffies, di->lease_renew_after)) {
+				/* we should renew */
+				dir = dentry->d_parent->d_inode;
+				session = ceph_get_mds_session(s);
+				seq = di->lease_seq;
+				di->lease_renew_after = 0;
+				di->lease_renew_from = jiffies;
+			}
+		}
+	}
+	spin_unlock(&dentry->d_lock);
+
+	if (session) {
+		ceph_mdsc_lease_send_msg(session, dir, dentry,
+					 CEPH_MDS_LEASE_RENEW, seq);
+		ceph_put_mds_session(session);
+	}
+	dout("dentry_lease_is_valid - dentry %p = %d\n", dentry, valid);
+	return valid;
+}
+
+/*
+ * Check if directory-wide content lease/cap is valid.
+ */
+static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
+{
+	struct ceph_inode_info *ci = ceph_inode(dir);
+	struct ceph_dentry_info *di = ceph_dentry(dentry);
+	int valid = 0;
+
+	spin_lock(&ci->i_ceph_lock);
+	if (ci->i_shared_gen == di->lease_shared_gen)
+		valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
+	spin_unlock(&ci->i_ceph_lock);
+	dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n",
+	     dir, (unsigned)ci->i_shared_gen, dentry,
+	     (unsigned)di->lease_shared_gen, valid);
+	return valid;
+}
+
+/*
+ * Check if cached dentry can be trusted.
+ */
+static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	int valid = 0;
+	struct inode *dir;
+
+	if (flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
+	     dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
+	     ceph_dentry(dentry)->offset);
+
+	dir = ceph_get_dentry_parent_inode(dentry);
+
+	/* always trust cached snapped dentries, snapdir dentry */
+	if (ceph_snap(dir) != CEPH_NOSNAP) {
+		dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry,
+		     dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
+		valid = 1;
+	} else if (dentry->d_inode &&
+		   ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) {
+		valid = 1;
+	} else if (dentry_lease_is_valid(dentry) ||
+		   dir_lease_is_valid(dir, dentry)) {
+		if (dentry->d_inode)
+			valid = ceph_is_any_caps(dentry->d_inode);
+		else
+			valid = 1;
+	}
+
+	dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid");
+	if (valid) {
+		ceph_dentry_lru_touch(dentry);
+	} else {
+		ceph_dir_clear_complete(dir);
+		d_drop(dentry);
+	}
+	iput(dir);
+	return valid;
+}
+
+/*
+ * Release our ceph_dentry_info.
+ */
+static void ceph_d_release(struct dentry *dentry)
+{
+	struct ceph_dentry_info *di = ceph_dentry(dentry);
+
+	dout("d_release %p\n", dentry);
+	ceph_dentry_lru_del(dentry);
+	if (di->lease_session)
+		ceph_put_mds_session(di->lease_session);
+	kmem_cache_free(ceph_dentry_cachep, di);
+	dentry->d_fsdata = NULL;
+}
+
+static int ceph_snapdir_d_revalidate(struct dentry *dentry,
+					  unsigned int flags)
+{
+	/*
+	 * Eventually, we'll want to revalidate snapped metadata
+	 * too... probably...
+	 */
+	return 1;
+}
+
+/*
+ * When the VFS prunes a dentry from the cache, we need to clear the
+ * complete flag on the parent directory.
+ *
+ * Called under dentry->d_lock.
+ */
+static void ceph_d_prune(struct dentry *dentry)
+{
+	dout("ceph_d_prune %p\n", dentry);
+
+	/* do we have a valid parent? */
+	if (IS_ROOT(dentry))
+		return;
+
+	/* if we are not hashed, we don't affect dir's completeness */
+	if (d_unhashed(dentry))
+		return;
+
+	/*
+	 * we hold d_lock, so d_parent is stable, and d_fsdata is never
+	 * cleared until d_release
+	 */
+	ceph_dir_clear_complete(dentry->d_parent->d_inode);
+}
+
+/*
+ * read() on a dir.  This weird interface hack only works if mounted
+ * with '-o dirstat'.
+ */
+static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
+			     loff_t *ppos)
+{
+	struct ceph_file_info *cf = file->private_data;
+	struct inode *inode = file_inode(file);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int left;
+	const int bufsize = 1024;
+
+	if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
+		return -EISDIR;
+
+	if (!cf->dir_info) {
+		cf->dir_info = kmalloc(bufsize, GFP_NOFS);
+		if (!cf->dir_info)
+			return -ENOMEM;
+		cf->dir_info_len =
+			snprintf(cf->dir_info, bufsize,
+				"entries:   %20lld\n"
+				" files:    %20lld\n"
+				" subdirs:  %20lld\n"
+				"rentries:  %20lld\n"
+				" rfiles:   %20lld\n"
+				" rsubdirs: %20lld\n"
+				"rbytes:    %20lld\n"
+				"rctime:    %10ld.%09ld\n",
+				ci->i_files + ci->i_subdirs,
+				ci->i_files,
+				ci->i_subdirs,
+				ci->i_rfiles + ci->i_rsubdirs,
+				ci->i_rfiles,
+				ci->i_rsubdirs,
+				ci->i_rbytes,
+				(long)ci->i_rctime.tv_sec,
+				(long)ci->i_rctime.tv_nsec);
+	}
+
+	if (*ppos >= cf->dir_info_len)
+		return 0;
+	size = min_t(unsigned, size, cf->dir_info_len-*ppos);
+	left = copy_to_user(buf, cf->dir_info + *ppos, size);
+	if (left == size)
+		return -EFAULT;
+	*ppos += (size - left);
+	return size - left;
+}
+
+/*
+ * an fsync() on a dir will wait for any uncommitted directory
+ * operations to commit.
+ */
+static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
+			  int datasync)
+{
+	struct inode *inode = file_inode(file);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct list_head *head = &ci->i_unsafe_dirops;
+	struct ceph_mds_request *req;
+	u64 last_tid;
+	int ret = 0;
+
+	dout("dir_fsync %p\n", inode);
+	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (ret)
+		return ret;
+	mutex_lock(&inode->i_mutex);
+
+	spin_lock(&ci->i_unsafe_lock);
+	if (list_empty(head))
+		goto out;
+
+	req = list_entry(head->prev,
+			 struct ceph_mds_request, r_unsafe_dir_item);
+	last_tid = req->r_tid;
+
+	do {
+		ceph_mdsc_get_request(req);
+		spin_unlock(&ci->i_unsafe_lock);
+
+		dout("dir_fsync %p wait on tid %llu (until %llu)\n",
+		     inode, req->r_tid, last_tid);
+		if (req->r_timeout) {
+			ret = wait_for_completion_timeout(
+				&req->r_safe_completion, req->r_timeout);
+			if (ret > 0)
+				ret = 0;
+			else if (ret == 0)
+				ret = -EIO;  /* timed out */
+		} else {
+			wait_for_completion(&req->r_safe_completion);
+		}
+		ceph_mdsc_put_request(req);
+
+		spin_lock(&ci->i_unsafe_lock);
+		if (ret || list_empty(head))
+			break;
+		req = list_entry(head->next,
+				 struct ceph_mds_request, r_unsafe_dir_item);
+	} while (req->r_tid < last_tid);
+out:
+	spin_unlock(&ci->i_unsafe_lock);
+	mutex_unlock(&inode->i_mutex);
+
+	return ret;
+}
+
+/*
+ * We maintain a private dentry LRU.
+ *
+ * FIXME: this needs to be changed to a per-mds lru to be useful.
+ */
+void ceph_dentry_lru_add(struct dentry *dn)
+{
+	struct ceph_dentry_info *di = ceph_dentry(dn);
+	struct ceph_mds_client *mdsc;
+
+	dout("dentry_lru_add %p %p '%.*s'\n", di, dn,
+	     dn->d_name.len, dn->d_name.name);
+	mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
+	spin_lock(&mdsc->dentry_lru_lock);
+	list_add_tail(&di->lru, &mdsc->dentry_lru);
+	mdsc->num_dentry++;
+	spin_unlock(&mdsc->dentry_lru_lock);
+}
+
+void ceph_dentry_lru_touch(struct dentry *dn)
+{
+	struct ceph_dentry_info *di = ceph_dentry(dn);
+	struct ceph_mds_client *mdsc;
+
+	dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn,
+	     dn->d_name.len, dn->d_name.name, di->offset);
+	mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
+	spin_lock(&mdsc->dentry_lru_lock);
+	list_move_tail(&di->lru, &mdsc->dentry_lru);
+	spin_unlock(&mdsc->dentry_lru_lock);
+}
+
+void ceph_dentry_lru_del(struct dentry *dn)
+{
+	struct ceph_dentry_info *di = ceph_dentry(dn);
+	struct ceph_mds_client *mdsc;
+
+	dout("dentry_lru_del %p %p '%.*s'\n", di, dn,
+	     dn->d_name.len, dn->d_name.name);
+	mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
+	spin_lock(&mdsc->dentry_lru_lock);
+	list_del_init(&di->lru);
+	mdsc->num_dentry--;
+	spin_unlock(&mdsc->dentry_lru_lock);
+}
+
+/*
+ * Return name hash for a given dentry.  This is dependent on
+ * the parent directory's hash function.
+ */
+unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
+{
+	struct ceph_inode_info *dci = ceph_inode(dir);
+
+	switch (dci->i_dir_layout.dl_dir_hash) {
+	case 0:	/* for backward compat */
+	case CEPH_STR_HASH_LINUX:
+		return dn->d_name.hash;
+
+	default:
+		return ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
+				     dn->d_name.name, dn->d_name.len);
+	}
+}
+
+const struct file_operations ceph_dir_fops = {
+	.read = ceph_read_dir,
+	.iterate = ceph_readdir,
+	.llseek = ceph_dir_llseek,
+	.open = ceph_open,
+	.release = ceph_release,
+	.unlocked_ioctl = ceph_ioctl,
+	.fsync = ceph_dir_fsync,
+};
+
+const struct inode_operations ceph_dir_iops = {
+	.lookup = ceph_lookup,
+	.permission = ceph_permission,
+	.getattr = ceph_getattr,
+	.setattr = ceph_setattr,
+	.setxattr = ceph_setxattr,
+	.getxattr = ceph_getxattr,
+	.listxattr = ceph_listxattr,
+	.removexattr = ceph_removexattr,
+	.get_acl = ceph_get_acl,
+	.set_acl = ceph_set_acl,
+	.mknod = ceph_mknod,
+	.symlink = ceph_symlink,
+	.mkdir = ceph_mkdir,
+	.link = ceph_link,
+	.unlink = ceph_unlink,
+	.rmdir = ceph_unlink,
+	.rename = ceph_rename,
+	.create = ceph_create,
+	.atomic_open = ceph_atomic_open,
+};
+
+const struct dentry_operations ceph_dentry_ops = {
+	.d_revalidate = ceph_d_revalidate,
+	.d_release = ceph_d_release,
+	.d_prune = ceph_d_prune,
+};
+
+const struct dentry_operations ceph_snapdir_dentry_ops = {
+	.d_revalidate = ceph_snapdir_d_revalidate,
+	.d_release = ceph_d_release,
+};
+
+const struct dentry_operations ceph_snap_dentry_ops = {
+	.d_release = ceph_d_release,
+	.d_prune = ceph_d_prune,
+};
diff --git a/ceph/export.c b/ceph/export.c
new file mode 100644
index 0000000..00d6af6
--- /dev/null
+++ b/ceph/export.c
@@ -0,0 +1,250 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/exportfs.h>
+#include <linux/slab.h>
+#include <asm/unaligned.h>
+
+#include "super.h"
+#include "mds_client.h"
+
+/*
+ * Basic fh
+ */
+struct ceph_nfs_fh {
+	u64 ino;
+} __attribute__ ((packed));
+
+/*
+ * Larger fh that includes parent ino.
+ */
+struct ceph_nfs_confh {
+	u64 ino, parent_ino;
+} __attribute__ ((packed));
+
+static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
+			  struct inode *parent_inode)
+{
+	int type;
+	struct ceph_nfs_fh *fh = (void *)rawfh;
+	struct ceph_nfs_confh *cfh = (void *)rawfh;
+	int connected_handle_length = sizeof(*cfh)/4;
+	int handle_length = sizeof(*fh)/4;
+
+	/* don't re-export snaps */
+	if (ceph_snap(inode) != CEPH_NOSNAP)
+		return -EINVAL;
+
+	if (parent_inode && (*max_len < connected_handle_length)) {
+		*max_len = connected_handle_length;
+		return FILEID_INVALID;
+	} else if (*max_len < handle_length) {
+		*max_len = handle_length;
+		return FILEID_INVALID;
+	}
+
+	if (parent_inode) {
+		dout("encode_fh %llx with parent %llx\n",
+		     ceph_ino(inode), ceph_ino(parent_inode));
+		cfh->ino = ceph_ino(inode);
+		cfh->parent_ino = ceph_ino(parent_inode);
+		*max_len = connected_handle_length;
+		type = FILEID_INO32_GEN_PARENT;
+	} else {
+		dout("encode_fh %llx\n", ceph_ino(inode));
+		fh->ino = ceph_ino(inode);
+		*max_len = handle_length;
+		type = FILEID_INO32_GEN;
+	}
+	return type;
+}
+
+static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
+{
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
+	struct inode *inode;
+	struct dentry *dentry;
+	struct ceph_vino vino;
+	int err;
+
+	vino.ino = ino;
+	vino.snap = CEPH_NOSNAP;
+	inode = ceph_find_inode(sb, vino);
+	if (!inode) {
+		struct ceph_mds_request *req;
+
+		req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
+					       USE_ANY_MDS);
+		if (IS_ERR(req))
+			return ERR_CAST(req);
+
+		req->r_ino1 = vino;
+		req->r_num_caps = 1;
+		err = ceph_mdsc_do_request(mdsc, NULL, req);
+		inode = req->r_target_inode;
+		if (inode)
+			ihold(inode);
+		ceph_mdsc_put_request(req);
+		if (!inode)
+			return ERR_PTR(-ESTALE);
+	}
+
+	dentry = d_obtain_alias(inode);
+	if (IS_ERR(dentry)) {
+		iput(inode);
+		return dentry;
+	}
+	err = ceph_init_dentry(dentry);
+	if (err < 0) {
+		dput(dentry);
+		return ERR_PTR(err);
+	}
+	dout("__fh_to_dentry %llx %p dentry %p\n", ino, inode, dentry);
+	return dentry;
+}
+
+/*
+ * convert regular fh to dentry
+ */
+static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
+					struct fid *fid,
+					int fh_len, int fh_type)
+{
+	struct ceph_nfs_fh *fh = (void *)fid->raw;
+
+	if (fh_type != FILEID_INO32_GEN  &&
+	    fh_type != FILEID_INO32_GEN_PARENT)
+		return NULL;
+	if (fh_len < sizeof(*fh) / 4)
+		return NULL;
+
+	dout("fh_to_dentry %llx\n", fh->ino);
+	return __fh_to_dentry(sb, fh->ino);
+}
+
+static struct dentry *__get_parent(struct super_block *sb,
+				   struct dentry *child, u64 ino)
+{
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
+	struct ceph_mds_request *req;
+	struct inode *inode;
+	struct dentry *dentry;
+	int err;
+
+	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT,
+				       USE_ANY_MDS);
+	if (IS_ERR(req))
+		return ERR_CAST(req);
+
+	if (child) {
+		req->r_inode = child->d_inode;
+		ihold(child->d_inode);
+	} else {
+		req->r_ino1 = (struct ceph_vino) {
+			.ino = ino,
+			.snap = CEPH_NOSNAP,
+		};
+	}
+	req->r_num_caps = 1;
+	err = ceph_mdsc_do_request(mdsc, NULL, req);
+	inode = req->r_target_inode;
+	if (inode)
+		ihold(inode);
+	ceph_mdsc_put_request(req);
+	if (!inode)
+		return ERR_PTR(-ENOENT);
+
+	dentry = d_obtain_alias(inode);
+	if (IS_ERR(dentry)) {
+		iput(inode);
+		return dentry;
+	}
+	err = ceph_init_dentry(dentry);
+	if (err < 0) {
+		dput(dentry);
+		return ERR_PTR(err);
+	}
+	dout("__get_parent ino %llx parent %p ino %llx.%llx\n",
+	     child ? ceph_ino(child->d_inode) : ino,
+	     dentry, ceph_vinop(inode));
+	return dentry;
+}
+
+struct dentry *ceph_get_parent(struct dentry *child)
+{
+	/* don't re-export snaps */
+	if (ceph_snap(child->d_inode) != CEPH_NOSNAP)
+		return ERR_PTR(-EINVAL);
+
+	dout("get_parent %p ino %llx.%llx\n",
+	     child, ceph_vinop(child->d_inode));
+	return __get_parent(child->d_sb, child, 0);
+}
+
+/*
+ * convert regular fh to parent
+ */
+static struct dentry *ceph_fh_to_parent(struct super_block *sb,
+					struct fid *fid,
+					int fh_len, int fh_type)
+{
+	struct ceph_nfs_confh *cfh = (void *)fid->raw;
+	struct dentry *dentry;
+
+	if (fh_type != FILEID_INO32_GEN_PARENT)
+		return NULL;
+	if (fh_len < sizeof(*cfh) / 4)
+		return NULL;
+
+	dout("fh_to_parent %llx\n", cfh->parent_ino);
+	dentry = __get_parent(sb, NULL, cfh->ino);
+	if (IS_ERR(dentry) && PTR_ERR(dentry) == -ENOENT)
+		dentry = __fh_to_dentry(sb, cfh->parent_ino);
+	return dentry;
+}
+
+static int ceph_get_name(struct dentry *parent, char *name,
+			 struct dentry *child)
+{
+	struct ceph_mds_client *mdsc;
+	struct ceph_mds_request *req;
+	int err;
+
+	mdsc = ceph_inode_to_client(child->d_inode)->mdsc;
+	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
+				       USE_ANY_MDS);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	mutex_lock(&parent->d_inode->i_mutex);
+
+	req->r_inode = child->d_inode;
+	ihold(child->d_inode);
+	req->r_ino2 = ceph_vino(parent->d_inode);
+	req->r_locked_dir = parent->d_inode;
+	req->r_num_caps = 2;
+	err = ceph_mdsc_do_request(mdsc, NULL, req);
+
+	mutex_unlock(&parent->d_inode->i_mutex);
+
+	if (!err) {
+		struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+		memcpy(name, rinfo->dname, rinfo->dname_len);
+		name[rinfo->dname_len] = 0;
+		dout("get_name %p ino %llx.%llx name %s\n",
+		     child, ceph_vinop(child->d_inode), name);
+	} else {
+		dout("get_name %p ino %llx.%llx err %d\n",
+		     child, ceph_vinop(child->d_inode), err);
+	}
+
+	ceph_mdsc_put_request(req);
+	return err;
+}
+
+const struct export_operations ceph_export_ops = {
+	.encode_fh = ceph_encode_fh,
+	.fh_to_dentry = ceph_fh_to_dentry,
+	.fh_to_parent = ceph_fh_to_parent,
+	.get_parent = ceph_get_parent,
+	.get_name = ceph_get_name,
+};
diff --git a/ceph/file.c b/ceph/file.c
new file mode 100644
index 0000000..66075a4
--- /dev/null
+++ b/ceph/file.c
@@ -0,0 +1,1294 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/writeback.h>
+#include <linux/aio.h>
+#include <linux/falloc.h>
+
+#include "super.h"
+#include "mds_client.h"
+#include "cache.h"
+
+/*
+ * Ceph file operations
+ *
+ * Implement basic open/close functionality, and implement
+ * read/write.
+ *
+ * We implement three modes of file I/O:
+ *  - buffered uses the generic_file_aio_{read,write} helpers
+ *
+ *  - synchronous is used when there is multi-client read/write
+ *    sharing, avoids the page cache, and synchronously waits for an
+ *    ack from the OSD.
+ *
+ *  - direct io takes the variant of the sync path that references
+ *    user pages directly.
+ *
+ * fsync() flushes and waits on dirty pages, but just queues metadata
+ * for writeback: since the MDS can recover size and mtime there is no
+ * need to wait for MDS acknowledgement.
+ */
+
+
+/*
+ * Prepare an open request.  Preallocate ceph_cap to avoid an
+ * inopportune ENOMEM later.
+ */
+static struct ceph_mds_request *
+prepare_open_request(struct super_block *sb, int flags, int create_mode)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_request *req;
+	int want_auth = USE_ANY_MDS;
+	int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN;
+
+	if (flags & (O_WRONLY|O_RDWR|O_CREAT|O_TRUNC))
+		want_auth = USE_AUTH_MDS;
+
+	req = ceph_mdsc_create_request(mdsc, op, want_auth);
+	if (IS_ERR(req))
+		goto out;
+	req->r_fmode = ceph_flags_to_mode(flags);
+	req->r_args.open.flags = cpu_to_le32(flags);
+	req->r_args.open.mode = cpu_to_le32(create_mode);
+out:
+	return req;
+}
+
+/*
+ * initialize private struct file data.
+ * if we fail, clean up by dropping fmode reference on the ceph_inode
+ */
+static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
+{
+	struct ceph_file_info *cf;
+	int ret = 0;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFREG:
+		/* First file open request creates the cookie, we want to keep
+		 * this cookie around for the filetime of the inode as not to
+		 * have to worry about fscache register / revoke / operation
+		 * races.
+		 *
+		 * Also, if we know the operation is going to invalidate data
+		 * (non readonly) just nuke the cache right away.
+		 */
+		ceph_fscache_register_inode_cookie(mdsc->fsc, ci);
+		if ((fmode & CEPH_FILE_MODE_WR))
+			ceph_fscache_invalidate(inode);
+	case S_IFDIR:
+		dout("init_file %p %p 0%o (regular)\n", inode, file,
+		     inode->i_mode);
+		cf = kmem_cache_alloc(ceph_file_cachep, GFP_NOFS | __GFP_ZERO);
+		if (cf == NULL) {
+			ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+			return -ENOMEM;
+		}
+		cf->fmode = fmode;
+		cf->next_offset = 2;
+		file->private_data = cf;
+		BUG_ON(inode->i_fop->release != ceph_release);
+		break;
+
+	case S_IFLNK:
+		dout("init_file %p %p 0%o (symlink)\n", inode, file,
+		     inode->i_mode);
+		ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+		break;
+
+	default:
+		dout("init_file %p %p 0%o (special)\n", inode, file,
+		     inode->i_mode);
+		/*
+		 * we need to drop the open ref now, since we don't
+		 * have .release set to ceph_release.
+		 */
+		ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+		BUG_ON(inode->i_fop->release == ceph_release);
+
+		/* call the proper open fop */
+		ret = inode->i_fop->open(inode, file);
+	}
+	return ret;
+}
+
+/*
+ * If we already have the requisite capabilities, we can satisfy
+ * the open request locally (no need to request new caps from the
+ * MDS).  We do, however, need to inform the MDS (asynchronously)
+ * if our wanted caps set expands.
+ */
+int ceph_open(struct inode *inode, struct file *file)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_request *req;
+	struct ceph_file_info *cf = file->private_data;
+	struct inode *parent_inode = NULL;
+	int err;
+	int flags, fmode, wanted;
+
+	if (cf) {
+		dout("open file %p is already opened\n", file);
+		return 0;
+	}
+
+	/* filter out O_CREAT|O_EXCL; vfs did that already.  yuck. */
+	flags = file->f_flags & ~(O_CREAT|O_EXCL);
+	if (S_ISDIR(inode->i_mode))
+		flags = O_DIRECTORY;  /* mds likes to know */
+
+	dout("open inode %p ino %llx.%llx file %p flags %d (%d)\n", inode,
+	     ceph_vinop(inode), file, flags, file->f_flags);
+	fmode = ceph_flags_to_mode(flags);
+	wanted = ceph_caps_for_mode(fmode);
+
+	/* snapped files are read-only */
+	if (ceph_snap(inode) != CEPH_NOSNAP && (file->f_mode & FMODE_WRITE))
+		return -EROFS;
+
+	/* trivially open snapdir */
+	if (ceph_snap(inode) == CEPH_SNAPDIR) {
+		spin_lock(&ci->i_ceph_lock);
+		__ceph_get_fmode(ci, fmode);
+		spin_unlock(&ci->i_ceph_lock);
+		return ceph_init_file(inode, file, fmode);
+	}
+
+	/*
+	 * No need to block if we have caps on the auth MDS (for
+	 * write) or any MDS (for read).  Update wanted set
+	 * asynchronously.
+	 */
+	spin_lock(&ci->i_ceph_lock);
+	if (__ceph_is_any_real_caps(ci) &&
+	    (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
+		int mds_wanted = __ceph_caps_mds_wanted(ci);
+		int issued = __ceph_caps_issued(ci, NULL);
+
+		dout("open %p fmode %d want %s issued %s using existing\n",
+		     inode, fmode, ceph_cap_string(wanted),
+		     ceph_cap_string(issued));
+		__ceph_get_fmode(ci, fmode);
+		spin_unlock(&ci->i_ceph_lock);
+
+		/* adjust wanted? */
+		if ((issued & wanted) != wanted &&
+		    (mds_wanted & wanted) != wanted &&
+		    ceph_snap(inode) != CEPH_SNAPDIR)
+			ceph_check_caps(ci, 0, NULL);
+
+		return ceph_init_file(inode, file, fmode);
+	} else if (ceph_snap(inode) != CEPH_NOSNAP &&
+		   (ci->i_snap_caps & wanted) == wanted) {
+		__ceph_get_fmode(ci, fmode);
+		spin_unlock(&ci->i_ceph_lock);
+		return ceph_init_file(inode, file, fmode);
+	}
+
+	spin_unlock(&ci->i_ceph_lock);
+
+	dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
+	req = prepare_open_request(inode->i_sb, flags, 0);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto out;
+	}
+	req->r_inode = inode;
+	ihold(inode);
+
+	req->r_num_caps = 1;
+	if (flags & O_CREAT)
+		parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
+	err = ceph_mdsc_do_request(mdsc, parent_inode, req);
+	iput(parent_inode);
+	if (!err)
+		err = ceph_init_file(inode, file, req->r_fmode);
+	ceph_mdsc_put_request(req);
+	dout("open result=%d on %llx.%llx\n", err, ceph_vinop(inode));
+out:
+	return err;
+}
+
+
+/*
+ * Do a lookup + open with a single request.  If we get a non-existent
+ * file or symlink, return 1 so the VFS can retry.
+ */
+int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
+		     struct file *file, unsigned flags, umode_t mode,
+		     int *opened)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_request *req;
+	struct dentry *dn;
+	int err;
+
+	dout("atomic_open %p dentry %p '%.*s' %s flags %d mode 0%o\n",
+	     dir, dentry, dentry->d_name.len, dentry->d_name.name,
+	     d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode);
+
+	if (dentry->d_name.len > NAME_MAX)
+		return -ENAMETOOLONG;
+
+	err = ceph_init_dentry(dentry);
+	if (err < 0)
+		return err;
+
+	/* do the open */
+	req = prepare_open_request(dir->i_sb, flags, mode);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+	req->r_dentry = dget(dentry);
+	req->r_num_caps = 2;
+	if (flags & O_CREAT) {
+		req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+		req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
+	}
+	req->r_locked_dir = dir;           /* caller holds dir->i_mutex */
+	err = ceph_mdsc_do_request(mdsc,
+				   (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
+				   req);
+	if (err)
+		goto out_err;
+
+	err = ceph_handle_snapdir(req, dentry, err);
+	if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
+		err = ceph_handle_notrace_create(dir, dentry);
+
+	if (d_unhashed(dentry)) {
+		dn = ceph_finish_lookup(req, dentry, err);
+		if (IS_ERR(dn))
+			err = PTR_ERR(dn);
+	} else {
+		/* we were given a hashed negative dentry */
+		dn = NULL;
+	}
+	if (err)
+		goto out_err;
+	if (dn || dentry->d_inode == NULL || S_ISLNK(dentry->d_inode->i_mode)) {
+		/* make vfs retry on splice, ENOENT, or symlink */
+		dout("atomic_open finish_no_open on dn %p\n", dn);
+		err = finish_no_open(file, dn);
+	} else {
+		dout("atomic_open finish_open on dn %p\n", dn);
+		if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
+			ceph_init_acl(dentry, dentry->d_inode, dir);
+			*opened |= FILE_CREATED;
+		}
+		err = finish_open(file, dentry, ceph_open, opened);
+	}
+out_err:
+	if (!req->r_err && req->r_target_inode)
+		ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode);
+	ceph_mdsc_put_request(req);
+	dout("atomic_open result=%d\n", err);
+	return err;
+}
+
+int ceph_release(struct inode *inode, struct file *file)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_file_info *cf = file->private_data;
+
+	dout("release inode %p file %p\n", inode, file);
+	ceph_put_fmode(ci, cf->fmode);
+	if (cf->last_readdir)
+		ceph_mdsc_put_request(cf->last_readdir);
+	kfree(cf->last_name);
+	kfree(cf->dir_info);
+	dput(cf->dentry);
+	kmem_cache_free(ceph_file_cachep, cf);
+
+	/* wake up anyone waiting for caps on this inode */
+	wake_up_all(&ci->i_cap_wq);
+	return 0;
+}
+
+/*
+ * Read a range of bytes striped over one or more objects.  Iterate over
+ * objects we stripe over.  (That's not atomic, but good enough for now.)
+ *
+ * If we get a short result from the OSD, check against i_size; we need to
+ * only return a short read to the caller if we hit EOF.
+ */
+static int striped_read(struct inode *inode,
+			u64 off, u64 len,
+			struct page **pages, int num_pages,
+			int *checkeof, bool o_direct,
+			unsigned long buf_align)
+{
+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	u64 pos, this_len, left;
+	int io_align, page_align;
+	int pages_left;
+	int read;
+	struct page **page_pos;
+	int ret;
+	bool hit_stripe, was_short;
+
+	/*
+	 * we may need to do multiple reads.  not atomic, unfortunately.
+	 */
+	pos = off;
+	left = len;
+	page_pos = pages;
+	pages_left = num_pages;
+	read = 0;
+	io_align = off & ~PAGE_MASK;
+
+more:
+	if (o_direct)
+		page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
+	else
+		page_align = pos & ~PAGE_MASK;
+	this_len = left;
+	ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
+				  &ci->i_layout, pos, &this_len,
+				  ci->i_truncate_seq,
+				  ci->i_truncate_size,
+				  page_pos, pages_left, page_align);
+	if (ret == -ENOENT)
+		ret = 0;
+	hit_stripe = this_len < left;
+	was_short = ret >= 0 && ret < this_len;
+	dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read,
+	     ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
+
+	if (ret >= 0) {
+		int didpages;
+		if (was_short && (pos + ret < inode->i_size)) {
+			u64 tmp = min(this_len - ret,
+					inode->i_size - pos - ret);
+			dout(" zero gap %llu to %llu\n",
+				pos + ret, pos + ret + tmp);
+			ceph_zero_page_vector_range(page_align + read + ret,
+							tmp, pages);
+			ret += tmp;
+		}
+
+		didpages = (page_align + ret) >> PAGE_CACHE_SHIFT;
+		pos += ret;
+		read = pos - off;
+		left -= ret;
+		page_pos += didpages;
+		pages_left -= didpages;
+
+		/* hit stripe and need continue*/
+		if (left && hit_stripe && pos < inode->i_size)
+			goto more;
+	}
+
+	if (read > 0) {
+		ret = read;
+		/* did we bounce off eof? */
+		if (pos + left > inode->i_size)
+			*checkeof = 1;
+	}
+
+	dout("striped_read returns %d\n", ret);
+	return ret;
+}
+
+/*
+ * Completely synchronous read and write methods.  Direct from __user
+ * buffer to osd, or directly to user pages (if O_DIRECT).
+ *
+ * If the read spans object boundary, just do multiple reads.
+ */
+static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
+				int *checkeof)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file_inode(file);
+	struct page **pages;
+	u64 off = iocb->ki_pos;
+	int num_pages, ret;
+	size_t len = i->count;
+
+	dout("sync_read on file %p %llu~%u %s\n", file, off,
+	     (unsigned)len,
+	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
+	/*
+	 * flush any page cache pages in this range.  this
+	 * will make concurrent normal and sync io slow,
+	 * but it will at least behave sensibly when they are
+	 * in sequence.
+	 */
+	ret = filemap_write_and_wait_range(inode->i_mapping, off,
+						off + len);
+	if (ret < 0)
+		return ret;
+
+	if (file->f_flags & O_DIRECT) {
+		while (iov_iter_count(i)) {
+			void __user *data = i->iov[0].iov_base + i->iov_offset;
+			size_t len = i->iov[0].iov_len - i->iov_offset;
+
+			num_pages = calc_pages_for((unsigned long)data, len);
+			pages = ceph_get_direct_page_vector(data,
+							    num_pages, true);
+			if (IS_ERR(pages))
+				return PTR_ERR(pages);
+
+			ret = striped_read(inode, off, len,
+					   pages, num_pages, checkeof,
+					   1, (unsigned long)data & ~PAGE_MASK);
+			ceph_put_page_vector(pages, num_pages, true);
+
+			if (ret <= 0)
+				break;
+			off += ret;
+			iov_iter_advance(i, ret);
+			if (ret < len)
+				break;
+		}
+	} else {
+		num_pages = calc_pages_for(off, len);
+		pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
+		if (IS_ERR(pages))
+			return PTR_ERR(pages);
+		ret = striped_read(inode, off, len, pages,
+					num_pages, checkeof, 0, 0);
+		if (ret > 0) {
+			int l, k = 0;
+			size_t left = len = ret;
+
+			while (left) {
+				void __user *data = i->iov[0].iov_base
+							+ i->iov_offset;
+				l = min(i->iov[0].iov_len - i->iov_offset,
+					left);
+
+				ret = ceph_copy_page_vector_to_user(&pages[k],
+								    data, off,
+								    l);
+				if (ret > 0) {
+					iov_iter_advance(i, ret);
+					left -= ret;
+					off += ret;
+					k = calc_pages_for(iocb->ki_pos,
+							   len - left + 1) - 1;
+					BUG_ON(k >= num_pages && left);
+				} else
+					break;
+			}
+		}
+		ceph_release_page_vector(pages, num_pages);
+	}
+
+	if (off > iocb->ki_pos) {
+		ret = off - iocb->ki_pos;
+		iocb->ki_pos = off;
+	}
+
+	dout("sync_read result %d\n", ret);
+	return ret;
+}
+
+/*
+ * Write commit request unsafe callback, called to tell us when a
+ * request is unsafe (that is, in flight--has been handed to the
+ * messenger to send to its target osd).  It is called again when
+ * we've received a response message indicating the request is
+ * "safe" (its CEPH_OSD_FLAG_ONDISK flag is set), or when a request
+ * is completed early (and unsuccessfully) due to a timeout or
+ * interrupt.
+ *
+ * This is used if we requested both an ACK and ONDISK commit reply
+ * from the OSD.
+ */
+static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
+{
+	struct ceph_inode_info *ci = ceph_inode(req->r_inode);
+
+	dout("%s %p tid %llu %ssafe\n", __func__, req, req->r_tid,
+		unsafe ? "un" : "");
+	if (unsafe) {
+		ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
+		spin_lock(&ci->i_unsafe_lock);
+		list_add_tail(&req->r_unsafe_item,
+			      &ci->i_unsafe_writes);
+		spin_unlock(&ci->i_unsafe_lock);
+	} else {
+		spin_lock(&ci->i_unsafe_lock);
+		list_del_init(&req->r_unsafe_item);
+		spin_unlock(&ci->i_unsafe_lock);
+		ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR);
+	}
+}
+
+
+/*
+ * Synchronous write, straight from __user pointer or user pages.
+ *
+ * If write spans object boundary, just do multiple writes.  (For a
+ * correct atomic write, we should e.g. take write locks on all
+ * objects, rollback on failure, etc.)
+ */
+static ssize_t
+ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
+		       unsigned long nr_segs, size_t count)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file_inode(file);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_snap_context *snapc;
+	struct ceph_vino vino;
+	struct ceph_osd_request *req;
+	struct page **pages;
+	int num_pages;
+	int written = 0;
+	int flags;
+	int check_caps = 0;
+	int page_align;
+	int ret;
+	struct timespec mtime = CURRENT_TIME;
+	loff_t pos = iocb->ki_pos;
+	struct iov_iter i;
+
+	if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
+		return -EROFS;
+
+	dout("sync_direct_write on file %p %lld~%u\n", file, pos,
+	     (unsigned)count);
+
+	ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + count);
+	if (ret < 0)
+		return ret;
+
+	ret = invalidate_inode_pages2_range(inode->i_mapping,
+					    pos >> PAGE_CACHE_SHIFT,
+					    (pos + count) >> PAGE_CACHE_SHIFT);
+	if (ret < 0)
+		dout("invalidate_inode_pages2_range returned %d\n", ret);
+
+	flags = CEPH_OSD_FLAG_ORDERSNAP |
+		CEPH_OSD_FLAG_ONDISK |
+		CEPH_OSD_FLAG_WRITE;
+
+	iov_iter_init(&i, iov, nr_segs, count, 0);
+
+	while (iov_iter_count(&i) > 0) {
+		void __user *data = i.iov->iov_base + i.iov_offset;
+		u64 len = i.iov->iov_len - i.iov_offset;
+
+		page_align = (unsigned long)data & ~PAGE_MASK;
+
+		snapc = ci->i_snap_realm->cached_context;
+		vino = ceph_vino(inode);
+		req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
+					    vino, pos, &len,
+					    2,/*include a 'startsync' command*/
+					    CEPH_OSD_OP_WRITE, flags, snapc,
+					    ci->i_truncate_seq,
+					    ci->i_truncate_size,
+					    false);
+		if (IS_ERR(req)) {
+			ret = PTR_ERR(req);
+			goto out;
+		}
+
+		num_pages = calc_pages_for(page_align, len);
+		pages = ceph_get_direct_page_vector(data, num_pages, false);
+		if (IS_ERR(pages)) {
+			ret = PTR_ERR(pages);
+			goto out;
+		}
+
+		/*
+		 * throw out any page cache pages in this range. this
+		 * may block.
+		 */
+		truncate_inode_pages_range(inode->i_mapping, pos,
+				   (pos+len) | (PAGE_CACHE_SIZE-1));
+		osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align,
+						false, false);
+
+		/* BUG_ON(vino.snap != CEPH_NOSNAP); */
+		ceph_osdc_build_request(req, pos, snapc, vino.snap, &mtime);
+
+		ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+		if (!ret)
+			ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+
+		ceph_put_page_vector(pages, num_pages, false);
+
+out:
+		ceph_osdc_put_request(req);
+		if (ret == 0) {
+			pos += len;
+			written += len;
+			iov_iter_advance(&i, (size_t)len);
+
+			if (pos > i_size_read(inode)) {
+				check_caps = ceph_inode_set_size(inode, pos);
+				if (check_caps)
+					ceph_check_caps(ceph_inode(inode),
+							CHECK_CAPS_AUTHONLY,
+							NULL);
+			}
+		} else
+			break;
+	}
+
+	if (ret != -EOLDSNAPC && written > 0) {
+		iocb->ki_pos = pos;
+		ret = written;
+	}
+	return ret;
+}
+
+
+/*
+ * Synchronous write, straight from __user pointer or user pages.
+ *
+ * If write spans object boundary, just do multiple writes.  (For a
+ * correct atomic write, we should e.g. take write locks on all
+ * objects, rollback on failure, etc.)
+ */
+static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
+			       unsigned long nr_segs, size_t count)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file_inode(file);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_snap_context *snapc;
+	struct ceph_vino vino;
+	struct ceph_osd_request *req;
+	struct page **pages;
+	u64 len;
+	int num_pages;
+	int written = 0;
+	int flags;
+	int check_caps = 0;
+	int ret;
+	struct timespec mtime = CURRENT_TIME;
+	loff_t pos = iocb->ki_pos;
+	struct iov_iter i;
+
+	if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
+		return -EROFS;
+
+	dout("sync_write on file %p %lld~%u\n", file, pos, (unsigned)count);
+
+	ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + count);
+	if (ret < 0)
+		return ret;
+
+	ret = invalidate_inode_pages2_range(inode->i_mapping,
+					    pos >> PAGE_CACHE_SHIFT,
+					    (pos + count) >> PAGE_CACHE_SHIFT);
+	if (ret < 0)
+		dout("invalidate_inode_pages2_range returned %d\n", ret);
+
+	flags = CEPH_OSD_FLAG_ORDERSNAP |
+		CEPH_OSD_FLAG_ONDISK |
+		CEPH_OSD_FLAG_WRITE |
+		CEPH_OSD_FLAG_ACK;
+
+	iov_iter_init(&i, iov, nr_segs, count, 0);
+
+	while ((len = iov_iter_count(&i)) > 0) {
+		size_t left;
+		int n;
+
+		snapc = ci->i_snap_realm->cached_context;
+		vino = ceph_vino(inode);
+		req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
+					    vino, pos, &len, 1,
+					    CEPH_OSD_OP_WRITE, flags, snapc,
+					    ci->i_truncate_seq,
+					    ci->i_truncate_size,
+					    false);
+		if (IS_ERR(req)) {
+			ret = PTR_ERR(req);
+			goto out;
+		}
+
+		/*
+		 * write from beginning of first page,
+		 * regardless of io alignment
+		 */
+		num_pages = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+		pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
+		if (IS_ERR(pages)) {
+			ret = PTR_ERR(pages);
+			goto out;
+		}
+
+		left = len;
+		for (n = 0; n < num_pages; n++) {
+			size_t plen = min_t(size_t, left, PAGE_SIZE);
+			ret = iov_iter_copy_from_user(pages[n], &i, 0, plen);
+			if (ret != plen) {
+				ret = -EFAULT;
+				break;
+			}
+			left -= ret;
+			iov_iter_advance(&i, ret);
+		}
+
+		if (ret < 0) {
+			ceph_release_page_vector(pages, num_pages);
+			goto out;
+		}
+
+		/* get a second commit callback */
+		req->r_unsafe_callback = ceph_sync_write_unsafe;
+		req->r_inode = inode;
+
+		osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
+						false, true);
+
+		/* BUG_ON(vino.snap != CEPH_NOSNAP); */
+		ceph_osdc_build_request(req, pos, snapc, vino.snap, &mtime);
+
+		ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+		if (!ret)
+			ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+
+out:
+		ceph_osdc_put_request(req);
+		if (ret == 0) {
+			pos += len;
+			written += len;
+
+			if (pos > i_size_read(inode)) {
+				check_caps = ceph_inode_set_size(inode, pos);
+				if (check_caps)
+					ceph_check_caps(ceph_inode(inode),
+							CHECK_CAPS_AUTHONLY,
+							NULL);
+			}
+		} else
+			break;
+	}
+
+	if (ret != -EOLDSNAPC && written > 0) {
+		ret = written;
+		iocb->ki_pos = pos;
+	}
+	return ret;
+}
+
+/*
+ * Wrap generic_file_aio_read with checks for cap bits on the inode.
+ * Atomically grab references, so that those bits are not released
+ * back to the MDS mid-read.
+ *
+ * Hmm, the sync read case isn't actually async... should it be?
+ */
+static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
+			     unsigned long nr_segs, loff_t pos)
+{
+	struct file *filp = iocb->ki_filp;
+	struct ceph_file_info *fi = filp->private_data;
+	size_t len = iocb->ki_nbytes;
+	struct inode *inode = file_inode(filp);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	ssize_t ret;
+	int want, got = 0;
+	int checkeof = 0, read = 0;
+
+again:
+	dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
+	     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, inode);
+
+	if (fi->fmode & CEPH_FILE_MODE_LAZY)
+		want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
+	else
+		want = CEPH_CAP_FILE_CACHE;
+	ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
+	if (ret < 0)
+		return ret;
+
+	if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
+	    (iocb->ki_filp->f_flags & O_DIRECT) ||
+	    (fi->flags & CEPH_F_SYNC)) {
+		struct iov_iter i;
+
+		dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n",
+		     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
+		     ceph_cap_string(got));
+
+		if (!read) {
+			ret = generic_segment_checks(iov, &nr_segs,
+							&len, VERIFY_WRITE);
+			if (ret)
+				goto out;
+		}
+
+		iov_iter_init(&i, iov, nr_segs, len, read);
+
+		/* hmm, this isn't really async... */
+		ret = ceph_sync_read(iocb, &i, &checkeof);
+	} else {
+		/*
+		 * We can't modify the content of iov,
+		 * so we only read from beginning.
+		 */
+		if (read) {
+			iocb->ki_pos = pos;
+			len = iocb->ki_nbytes;
+			read = 0;
+		}
+		dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
+		     inode, ceph_vinop(inode), pos, (unsigned)len,
+		     ceph_cap_string(got));
+
+		ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
+	}
+out:
+	dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
+	     inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
+	ceph_put_cap_refs(ci, got);
+
+	if (checkeof && ret >= 0) {
+		int statret = ceph_do_getattr(inode,
+					      CEPH_STAT_CAP_SIZE);
+
+		/* hit EOF or hole? */
+		if (statret == 0 && iocb->ki_pos < inode->i_size &&
+			ret < len) {
+			dout("sync_read hit hole, ppos %lld < size %lld"
+			     ", reading more\n", iocb->ki_pos,
+			     inode->i_size);
+
+			read += ret;
+			len -= ret;
+			checkeof = 0;
+			goto again;
+		}
+	}
+
+	if (ret >= 0)
+		ret += read;
+
+	return ret;
+}
+
+/*
+ * Take cap references to avoid releasing caps to MDS mid-write.
+ *
+ * If we are synchronous, and write with an old snap context, the OSD
+ * may return EOLDSNAPC.  In that case, retry the write.. _after_
+ * dropping our cap refs and allowing the pending snap to logically
+ * complete _before_ this write occurs.
+ *
+ * If we are near ENOSPC, write synchronously.
+ */
+static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
+		       unsigned long nr_segs, loff_t pos)
+{
+	struct file *file = iocb->ki_filp;
+	struct ceph_file_info *fi = file->private_data;
+	struct inode *inode = file_inode(file);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_osd_client *osdc =
+		&ceph_sb_to_client(inode->i_sb)->client->osdc;
+	ssize_t count, written = 0;
+	int err, want, got;
+
+	if (ceph_snap(inode) != CEPH_NOSNAP)
+		return -EROFS;
+
+	mutex_lock(&inode->i_mutex);
+
+	err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
+	if (err)
+		goto out;
+
+	/* We can write back this queue in page reclaim */
+	current->backing_dev_info = file->f_mapping->backing_dev_info;
+
+	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
+	if (err)
+		goto out;
+
+	if (count == 0)
+		goto out;
+
+	err = file_remove_suid(file);
+	if (err)
+		goto out;
+
+	err = file_update_time(file);
+	if (err)
+		goto out;
+
+retry_snap:
+	if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) {
+		err = -ENOSPC;
+		goto out;
+	}
+
+	dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
+	     inode, ceph_vinop(inode), pos, count, inode->i_size);
+	if (fi->fmode & CEPH_FILE_MODE_LAZY)
+		want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
+	else
+		want = CEPH_CAP_FILE_BUFFER;
+	got = 0;
+	err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos + count);
+	if (err < 0)
+		goto out;
+
+	dout("aio_write %p %llx.%llx %llu~%zd got cap refs on %s\n",
+	     inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
+
+	if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
+	    (file->f_flags & O_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
+		mutex_unlock(&inode->i_mutex);
+		if (file->f_flags & O_DIRECT)
+			written = ceph_sync_direct_write(iocb, iov,
+							 nr_segs, count);
+		else
+			written = ceph_sync_write(iocb, iov, nr_segs, count);
+		if (written == -EOLDSNAPC) {
+			dout("aio_write %p %llx.%llx %llu~%u"
+				"got EOLDSNAPC, retrying\n",
+				inode, ceph_vinop(inode),
+				pos, (unsigned)iov->iov_len);
+			mutex_lock(&inode->i_mutex);
+			goto retry_snap;
+		}
+	} else {
+		loff_t old_size = inode->i_size;
+		/*
+		 * No need to acquire the i_truncate_mutex. Because
+		 * the MDS revokes Fwb caps before sending truncate
+		 * message to us. We can't get Fwb cap while there
+		 * are pending vmtruncate. So write and vmtruncate
+		 * can not run at the same time
+		 */
+		written = generic_file_buffered_write(iocb, iov, nr_segs,
+						      pos, &iocb->ki_pos,
+						      count, 0);
+		if (inode->i_size > old_size)
+			ceph_fscache_update_objectsize(inode);
+		mutex_unlock(&inode->i_mutex);
+	}
+
+	if (written >= 0) {
+		int dirty;
+		spin_lock(&ci->i_ceph_lock);
+		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
+		spin_unlock(&ci->i_ceph_lock);
+		if (dirty)
+			__mark_inode_dirty(inode, dirty);
+	}
+
+	dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
+	     inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
+	     ceph_cap_string(got));
+	ceph_put_cap_refs(ci, got);
+
+	if (written >= 0 &&
+	    ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) ||
+	     ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
+		err = vfs_fsync_range(file, pos, pos + written - 1, 1);
+		if (err < 0)
+			written = err;
+	}
+
+	goto out_unlocked;
+
+out:
+	mutex_unlock(&inode->i_mutex);
+out_unlocked:
+	current->backing_dev_info = NULL;
+	return written ? written : err;
+}
+
+/*
+ * llseek.  be sure to verify file size on SEEK_END.
+ */
+static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
+{
+	struct inode *inode = file->f_mapping->host;
+	int ret;
+
+	mutex_lock(&inode->i_mutex);
+
+	if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
+		ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
+		if (ret < 0) {
+			offset = ret;
+			goto out;
+		}
+	}
+
+	switch (whence) {
+	case SEEK_END:
+		offset += inode->i_size;
+		break;
+	case SEEK_CUR:
+		/*
+		 * Here we special-case the lseek(fd, 0, SEEK_CUR)
+		 * position-querying operation.  Avoid rewriting the "same"
+		 * f_pos value back to the file because a concurrent read(),
+		 * write() or lseek() might have altered it
+		 */
+		if (offset == 0) {
+			offset = file->f_pos;
+			goto out;
+		}
+		offset += file->f_pos;
+		break;
+	case SEEK_DATA:
+		if (offset >= inode->i_size) {
+			ret = -ENXIO;
+			goto out;
+		}
+		break;
+	case SEEK_HOLE:
+		if (offset >= inode->i_size) {
+			ret = -ENXIO;
+			goto out;
+		}
+		offset = inode->i_size;
+		break;
+	}
+
+	offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
+
+out:
+	mutex_unlock(&inode->i_mutex);
+	return offset;
+}
+
+static inline void ceph_zero_partial_page(
+	struct inode *inode, loff_t offset, unsigned size)
+{
+	struct page *page;
+	pgoff_t index = offset >> PAGE_CACHE_SHIFT;
+
+	page = find_lock_page(inode->i_mapping, index);
+	if (page) {
+		wait_on_page_writeback(page);
+		zero_user(page, offset & (PAGE_CACHE_SIZE - 1), size);
+		unlock_page(page);
+		page_cache_release(page);
+	}
+}
+
+static void ceph_zero_pagecache_range(struct inode *inode, loff_t offset,
+				      loff_t length)
+{
+	loff_t nearly = round_up(offset, PAGE_CACHE_SIZE);
+	if (offset < nearly) {
+		loff_t size = nearly - offset;
+		if (length < size)
+			size = length;
+		ceph_zero_partial_page(inode, offset, size);
+		offset += size;
+		length -= size;
+	}
+	if (length >= PAGE_CACHE_SIZE) {
+		loff_t size = round_down(length, PAGE_CACHE_SIZE);
+		truncate_pagecache_range(inode, offset, offset + size - 1);
+		offset += size;
+		length -= size;
+	}
+	if (length)
+		ceph_zero_partial_page(inode, offset, length);
+}
+
+static int ceph_zero_partial_object(struct inode *inode,
+				    loff_t offset, loff_t *length)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_osd_request *req;
+	int ret = 0;
+	loff_t zero = 0;
+	int op;
+
+	if (!length) {
+		op = offset ? CEPH_OSD_OP_DELETE : CEPH_OSD_OP_TRUNCATE;
+		length = &zero;
+	} else {
+		op = CEPH_OSD_OP_ZERO;
+	}
+
+	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
+					ceph_vino(inode),
+					offset, length,
+					1, op,
+					CEPH_OSD_FLAG_WRITE |
+					CEPH_OSD_FLAG_ONDISK,
+					NULL, 0, 0, false);
+	if (IS_ERR(req)) {
+		ret = PTR_ERR(req);
+		goto out;
+	}
+
+	ceph_osdc_build_request(req, offset, NULL, ceph_vino(inode).snap,
+				&inode->i_mtime);
+
+	ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+	if (!ret) {
+		ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+		if (ret == -ENOENT)
+			ret = 0;
+	}
+	ceph_osdc_put_request(req);
+
+out:
+	return ret;
+}
+
+static int ceph_zero_objects(struct inode *inode, loff_t offset, loff_t length)
+{
+	int ret = 0;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	s32 stripe_unit = ceph_file_layout_su(ci->i_layout);
+	s32 stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
+	s32 object_size = ceph_file_layout_object_size(ci->i_layout);
+	u64 object_set_size = object_size * stripe_count;
+	u64 nearly, t;
+
+	/* round offset up to next period boundary */
+	nearly = offset + object_set_size - 1;
+	t = nearly;
+	nearly -= do_div(t, object_set_size);
+
+	while (length && offset < nearly) {
+		loff_t size = length;
+		ret = ceph_zero_partial_object(inode, offset, &size);
+		if (ret < 0)
+			return ret;
+		offset += size;
+		length -= size;
+	}
+	while (length >= object_set_size) {
+		int i;
+		loff_t pos = offset;
+		for (i = 0; i < stripe_count; ++i) {
+			ret = ceph_zero_partial_object(inode, pos, NULL);
+			if (ret < 0)
+				return ret;
+			pos += stripe_unit;
+		}
+		offset += object_set_size;
+		length -= object_set_size;
+	}
+	while (length) {
+		loff_t size = length;
+		ret = ceph_zero_partial_object(inode, offset, &size);
+		if (ret < 0)
+			return ret;
+		offset += size;
+		length -= size;
+	}
+	return ret;
+}
+
+static long ceph_fallocate(struct file *file, int mode,
+				loff_t offset, loff_t length)
+{
+	struct ceph_file_info *fi = file->private_data;
+	struct inode *inode = file_inode(file);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_osd_client *osdc =
+		&ceph_inode_to_client(inode)->client->osdc;
+	int want, got = 0;
+	int dirty;
+	int ret = 0;
+	loff_t endoff = 0;
+	loff_t size;
+
+	if (!S_ISREG(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	if (IS_SWAPFILE(inode))
+		return -ETXTBSY;
+
+	mutex_lock(&inode->i_mutex);
+
+	if (ceph_snap(inode) != CEPH_NOSNAP) {
+		ret = -EROFS;
+		goto unlock;
+	}
+
+	if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) &&
+		!(mode & FALLOC_FL_PUNCH_HOLE)) {
+		ret = -ENOSPC;
+		goto unlock;
+	}
+
+	size = i_size_read(inode);
+	if (!(mode & FALLOC_FL_KEEP_SIZE))
+		endoff = offset + length;
+
+	if (fi->fmode & CEPH_FILE_MODE_LAZY)
+		want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
+	else
+		want = CEPH_CAP_FILE_BUFFER;
+
+	ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
+	if (ret < 0)
+		goto unlock;
+
+	if (mode & FALLOC_FL_PUNCH_HOLE) {
+		if (offset < size)
+			ceph_zero_pagecache_range(inode, offset, length);
+		ret = ceph_zero_objects(inode, offset, length);
+	} else if (endoff > size) {
+		truncate_pagecache_range(inode, size, -1);
+		if (ceph_inode_set_size(inode, endoff))
+			ceph_check_caps(ceph_inode(inode),
+				CHECK_CAPS_AUTHONLY, NULL);
+	}
+
+	if (!ret) {
+		spin_lock(&ci->i_ceph_lock);
+		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
+		spin_unlock(&ci->i_ceph_lock);
+		if (dirty)
+			__mark_inode_dirty(inode, dirty);
+	}
+
+	ceph_put_cap_refs(ci, got);
+unlock:
+	mutex_unlock(&inode->i_mutex);
+	return ret;
+}
+
+const struct file_operations ceph_file_fops = {
+	.open = ceph_open,
+	.release = ceph_release,
+	.llseek = ceph_llseek,
+	.read = do_sync_read,
+	.write = do_sync_write,
+	.aio_read = ceph_aio_read,
+	.aio_write = ceph_aio_write,
+	.mmap = ceph_mmap,
+	.fsync = ceph_fsync,
+	.lock = ceph_lock,
+	.flock = ceph_flock,
+	.splice_read = generic_file_splice_read,
+	.splice_write = generic_file_splice_write,
+	.unlocked_ioctl = ceph_ioctl,
+	.compat_ioctl	= ceph_ioctl,
+	.fallocate	= ceph_fallocate,
+};
+
diff --git a/ceph/inode.c b/ceph/inode.c
new file mode 100644
index 0000000..233c6f9
--- /dev/null
+++ b/ceph/inode.c
@@ -0,0 +1,1927 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/namei.h>
+#include <linux/writeback.h>
+#include <linux/vmalloc.h>
+#include <linux/posix_acl.h>
+
+#include "super.h"
+#include "mds_client.h"
+#include "cache.h"
+#include <linux/ceph/decode.h>
+
+/*
+ * Ceph inode operations
+ *
+ * Implement basic inode helpers (get, alloc) and inode ops (getattr,
+ * setattr, etc.), xattr helpers, and helpers for assimilating
+ * metadata returned by the MDS into our cache.
+ *
+ * Also define helpers for doing asynchronous writeback, invalidation,
+ * and truncation for the benefit of those who can't afford to block
+ * (typically because they are in the message handler path).
+ */
+
+static const struct inode_operations ceph_symlink_iops;
+
+static void ceph_invalidate_work(struct work_struct *work);
+static void ceph_writeback_work(struct work_struct *work);
+static void ceph_vmtruncate_work(struct work_struct *work);
+
+/*
+ * find or create an inode, given the ceph ino number
+ */
+static int ceph_set_ino_cb(struct inode *inode, void *data)
+{
+	ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
+	inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data);
+	return 0;
+}
+
+struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino)
+{
+	struct inode *inode;
+	ino_t t = ceph_vino_to_ino(vino);
+
+	inode = iget5_locked(sb, t, ceph_ino_compare, ceph_set_ino_cb, &vino);
+	if (inode == NULL)
+		return ERR_PTR(-ENOMEM);
+	if (inode->i_state & I_NEW) {
+		dout("get_inode created new inode %p %llx.%llx ino %llx\n",
+		     inode, ceph_vinop(inode), (u64)inode->i_ino);
+		unlock_new_inode(inode);
+	}
+
+	dout("get_inode on %lu=%llx.%llx got %p\n", inode->i_ino, vino.ino,
+	     vino.snap, inode);
+	return inode;
+}
+
+/*
+ * get/constuct snapdir inode for a given directory
+ */
+struct inode *ceph_get_snapdir(struct inode *parent)
+{
+	struct ceph_vino vino = {
+		.ino = ceph_ino(parent),
+		.snap = CEPH_SNAPDIR,
+	};
+	struct inode *inode = ceph_get_inode(parent->i_sb, vino);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+
+	BUG_ON(!S_ISDIR(parent->i_mode));
+	if (IS_ERR(inode))
+		return inode;
+	inode->i_mode = parent->i_mode;
+	inode->i_uid = parent->i_uid;
+	inode->i_gid = parent->i_gid;
+	inode->i_op = &ceph_dir_iops;
+	inode->i_fop = &ceph_dir_fops;
+	ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */
+	ci->i_rbytes = 0;
+	return inode;
+}
+
+const struct inode_operations ceph_file_iops = {
+	.permission = ceph_permission,
+	.setattr = ceph_setattr,
+	.getattr = ceph_getattr,
+	.setxattr = ceph_setxattr,
+	.getxattr = ceph_getxattr,
+	.listxattr = ceph_listxattr,
+	.removexattr = ceph_removexattr,
+	.get_acl = ceph_get_acl,
+	.set_acl = ceph_set_acl,
+};
+
+
+/*
+ * We use a 'frag tree' to keep track of the MDS's directory fragments
+ * for a given inode (usually there is just a single fragment).  We
+ * need to know when a child frag is delegated to a new MDS, or when
+ * it is flagged as replicated, so we can direct our requests
+ * accordingly.
+ */
+
+/*
+ * find/create a frag in the tree
+ */
+static struct ceph_inode_frag *__get_or_create_frag(struct ceph_inode_info *ci,
+						    u32 f)
+{
+	struct rb_node **p;
+	struct rb_node *parent = NULL;
+	struct ceph_inode_frag *frag;
+	int c;
+
+	p = &ci->i_fragtree.rb_node;
+	while (*p) {
+		parent = *p;
+		frag = rb_entry(parent, struct ceph_inode_frag, node);
+		c = ceph_frag_compare(f, frag->frag);
+		if (c < 0)
+			p = &(*p)->rb_left;
+		else if (c > 0)
+			p = &(*p)->rb_right;
+		else
+			return frag;
+	}
+
+	frag = kmalloc(sizeof(*frag), GFP_NOFS);
+	if (!frag) {
+		pr_err("__get_or_create_frag ENOMEM on %p %llx.%llx "
+		       "frag %x\n", &ci->vfs_inode,
+		       ceph_vinop(&ci->vfs_inode), f);
+		return ERR_PTR(-ENOMEM);
+	}
+	frag->frag = f;
+	frag->split_by = 0;
+	frag->mds = -1;
+	frag->ndist = 0;
+
+	rb_link_node(&frag->node, parent, p);
+	rb_insert_color(&frag->node, &ci->i_fragtree);
+
+	dout("get_or_create_frag added %llx.%llx frag %x\n",
+	     ceph_vinop(&ci->vfs_inode), f);
+	return frag;
+}
+
+/*
+ * find a specific frag @f
+ */
+struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, u32 f)
+{
+	struct rb_node *n = ci->i_fragtree.rb_node;
+
+	while (n) {
+		struct ceph_inode_frag *frag =
+			rb_entry(n, struct ceph_inode_frag, node);
+		int c = ceph_frag_compare(f, frag->frag);
+		if (c < 0)
+			n = n->rb_left;
+		else if (c > 0)
+			n = n->rb_right;
+		else
+			return frag;
+	}
+	return NULL;
+}
+
+/*
+ * Choose frag containing the given value @v.  If @pfrag is
+ * specified, copy the frag delegation info to the caller if
+ * it is present.
+ */
+u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
+		     struct ceph_inode_frag *pfrag,
+		     int *found)
+{
+	u32 t = ceph_frag_make(0, 0);
+	struct ceph_inode_frag *frag;
+	unsigned nway, i;
+	u32 n;
+
+	if (found)
+		*found = 0;
+
+	mutex_lock(&ci->i_fragtree_mutex);
+	while (1) {
+		WARN_ON(!ceph_frag_contains_value(t, v));
+		frag = __ceph_find_frag(ci, t);
+		if (!frag)
+			break; /* t is a leaf */
+		if (frag->split_by == 0) {
+			if (pfrag)
+				memcpy(pfrag, frag, sizeof(*pfrag));
+			if (found)
+				*found = 1;
+			break;
+		}
+
+		/* choose child */
+		nway = 1 << frag->split_by;
+		dout("choose_frag(%x) %x splits by %d (%d ways)\n", v, t,
+		     frag->split_by, nway);
+		for (i = 0; i < nway; i++) {
+			n = ceph_frag_make_child(t, frag->split_by, i);
+			if (ceph_frag_contains_value(n, v)) {
+				t = n;
+				break;
+			}
+		}
+		BUG_ON(i == nway);
+	}
+	dout("choose_frag(%x) = %x\n", v, t);
+
+	mutex_unlock(&ci->i_fragtree_mutex);
+	return t;
+}
+
+/*
+ * Process dirfrag (delegation) info from the mds.  Include leaf
+ * fragment in tree ONLY if ndist > 0.  Otherwise, only
+ * branches/splits are included in i_fragtree)
+ */
+static int ceph_fill_dirfrag(struct inode *inode,
+			     struct ceph_mds_reply_dirfrag *dirinfo)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_inode_frag *frag;
+	u32 id = le32_to_cpu(dirinfo->frag);
+	int mds = le32_to_cpu(dirinfo->auth);
+	int ndist = le32_to_cpu(dirinfo->ndist);
+	int i;
+	int err = 0;
+
+	mutex_lock(&ci->i_fragtree_mutex);
+	if (ndist == 0) {
+		/* no delegation info needed. */
+		frag = __ceph_find_frag(ci, id);
+		if (!frag)
+			goto out;
+		if (frag->split_by == 0) {
+			/* tree leaf, remove */
+			dout("fill_dirfrag removed %llx.%llx frag %x"
+			     " (no ref)\n", ceph_vinop(inode), id);
+			rb_erase(&frag->node, &ci->i_fragtree);
+			kfree(frag);
+		} else {
+			/* tree branch, keep and clear */
+			dout("fill_dirfrag cleared %llx.%llx frag %x"
+			     " referral\n", ceph_vinop(inode), id);
+			frag->mds = -1;
+			frag->ndist = 0;
+		}
+		goto out;
+	}
+
+
+	/* find/add this frag to store mds delegation info */
+	frag = __get_or_create_frag(ci, id);
+	if (IS_ERR(frag)) {
+		/* this is not the end of the world; we can continue
+		   with bad/inaccurate delegation info */
+		pr_err("fill_dirfrag ENOMEM on mds ref %llx.%llx fg %x\n",
+		       ceph_vinop(inode), le32_to_cpu(dirinfo->frag));
+		err = -ENOMEM;
+		goto out;
+	}
+
+	frag->mds = mds;
+	frag->ndist = min_t(u32, ndist, CEPH_MAX_DIRFRAG_REP);
+	for (i = 0; i < frag->ndist; i++)
+		frag->dist[i] = le32_to_cpu(dirinfo->dist[i]);
+	dout("fill_dirfrag %llx.%llx frag %x ndist=%d\n",
+	     ceph_vinop(inode), frag->frag, frag->ndist);
+
+out:
+	mutex_unlock(&ci->i_fragtree_mutex);
+	return err;
+}
+
+
+/*
+ * initialize a newly allocated inode.
+ */
+struct inode *ceph_alloc_inode(struct super_block *sb)
+{
+	struct ceph_inode_info *ci;
+	int i;
+
+	ci = kmem_cache_alloc(ceph_inode_cachep, GFP_NOFS);
+	if (!ci)
+		return NULL;
+
+	dout("alloc_inode %p\n", &ci->vfs_inode);
+
+	spin_lock_init(&ci->i_ceph_lock);
+
+	ci->i_version = 0;
+	ci->i_time_warp_seq = 0;
+	ci->i_ceph_flags = 0;
+	atomic_set(&ci->i_release_count, 1);
+	atomic_set(&ci->i_complete_count, 0);
+	ci->i_symlink = NULL;
+
+	memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
+
+	ci->i_fragtree = RB_ROOT;
+	mutex_init(&ci->i_fragtree_mutex);
+
+	ci->i_xattrs.blob = NULL;
+	ci->i_xattrs.prealloc_blob = NULL;
+	ci->i_xattrs.dirty = false;
+	ci->i_xattrs.index = RB_ROOT;
+	ci->i_xattrs.count = 0;
+	ci->i_xattrs.names_size = 0;
+	ci->i_xattrs.vals_size = 0;
+	ci->i_xattrs.version = 0;
+	ci->i_xattrs.index_version = 0;
+
+	ci->i_caps = RB_ROOT;
+	ci->i_auth_cap = NULL;
+	ci->i_dirty_caps = 0;
+	ci->i_flushing_caps = 0;
+	INIT_LIST_HEAD(&ci->i_dirty_item);
+	INIT_LIST_HEAD(&ci->i_flushing_item);
+	ci->i_cap_flush_seq = 0;
+	ci->i_cap_flush_last_tid = 0;
+	memset(&ci->i_cap_flush_tid, 0, sizeof(ci->i_cap_flush_tid));
+	init_waitqueue_head(&ci->i_cap_wq);
+	ci->i_hold_caps_min = 0;
+	ci->i_hold_caps_max = 0;
+	INIT_LIST_HEAD(&ci->i_cap_delay_list);
+	INIT_LIST_HEAD(&ci->i_cap_snaps);
+	ci->i_head_snapc = NULL;
+	ci->i_snap_caps = 0;
+	ci->i_cap_exporting_issued = 0;
+
+	for (i = 0; i < CEPH_FILE_MODE_NUM; i++)
+		ci->i_nr_by_mode[i] = 0;
+
+	mutex_init(&ci->i_truncate_mutex);
+	ci->i_truncate_seq = 0;
+	ci->i_truncate_size = 0;
+	ci->i_truncate_pending = 0;
+
+	ci->i_max_size = 0;
+	ci->i_reported_size = 0;
+	ci->i_wanted_max_size = 0;
+	ci->i_requested_max_size = 0;
+
+	ci->i_pin_ref = 0;
+	ci->i_rd_ref = 0;
+	ci->i_rdcache_ref = 0;
+	ci->i_wr_ref = 0;
+	ci->i_wb_ref = 0;
+	ci->i_wrbuffer_ref = 0;
+	ci->i_wrbuffer_ref_head = 0;
+	ci->i_shared_gen = 0;
+	ci->i_rdcache_gen = 0;
+	ci->i_rdcache_revoking = 0;
+
+	INIT_LIST_HEAD(&ci->i_unsafe_writes);
+	INIT_LIST_HEAD(&ci->i_unsafe_dirops);
+	spin_lock_init(&ci->i_unsafe_lock);
+
+	ci->i_snap_realm = NULL;
+	INIT_LIST_HEAD(&ci->i_snap_realm_item);
+	INIT_LIST_HEAD(&ci->i_snap_flush_item);
+
+	INIT_WORK(&ci->i_wb_work, ceph_writeback_work);
+	INIT_WORK(&ci->i_pg_inv_work, ceph_invalidate_work);
+
+	INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);
+
+	ceph_fscache_inode_init(ci);
+
+	return &ci->vfs_inode;
+}
+
+static void ceph_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+
+	kmem_cache_free(ceph_inode_cachep, ci);
+}
+
+void ceph_destroy_inode(struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_inode_frag *frag;
+	struct rb_node *n;
+
+	dout("destroy_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode));
+
+	ceph_fscache_unregister_inode_cookie(ci);
+
+	ceph_queue_caps_release(inode);
+
+	/*
+	 * we may still have a snap_realm reference if there are stray
+	 * caps in i_cap_exporting_issued or i_snap_caps.
+	 */
+	if (ci->i_snap_realm) {
+		struct ceph_mds_client *mdsc =
+			ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+		struct ceph_snap_realm *realm = ci->i_snap_realm;
+
+		dout(" dropping residual ref to snap realm %p\n", realm);
+		spin_lock(&realm->inodes_with_caps_lock);
+		list_del_init(&ci->i_snap_realm_item);
+		spin_unlock(&realm->inodes_with_caps_lock);
+		ceph_put_snap_realm(mdsc, realm);
+	}
+
+	kfree(ci->i_symlink);
+	while ((n = rb_first(&ci->i_fragtree)) != NULL) {
+		frag = rb_entry(n, struct ceph_inode_frag, node);
+		rb_erase(n, &ci->i_fragtree);
+		kfree(frag);
+	}
+
+	__ceph_destroy_xattrs(ci);
+	if (ci->i_xattrs.blob)
+		ceph_buffer_put(ci->i_xattrs.blob);
+	if (ci->i_xattrs.prealloc_blob)
+		ceph_buffer_put(ci->i_xattrs.prealloc_blob);
+
+	call_rcu(&inode->i_rcu, ceph_i_callback);
+}
+
+int ceph_drop_inode(struct inode *inode)
+{
+	/*
+	 * Positve dentry and corresponding inode are always accompanied
+	 * in MDS reply. So no need to keep inode in the cache after
+	 * dropping all its aliases.
+	 */
+	return 1;
+}
+
+/*
+ * Helpers to fill in size, ctime, mtime, and atime.  We have to be
+ * careful because either the client or MDS may have more up to date
+ * info, depending on which capabilities are held, and whether
+ * time_warp_seq or truncate_seq have increased.  (Ordinarily, mtime
+ * and size are monotonically increasing, except when utimes() or
+ * truncate() increments the corresponding _seq values.)
+ */
+int ceph_fill_file_size(struct inode *inode, int issued,
+			u32 truncate_seq, u64 truncate_size, u64 size)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int queue_trunc = 0;
+
+	if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 ||
+	    (truncate_seq == ci->i_truncate_seq && size > inode->i_size)) {
+		dout("size %lld -> %llu\n", inode->i_size, size);
+		inode->i_size = size;
+		inode->i_blocks = (size + (1<<9) - 1) >> 9;
+		ci->i_reported_size = size;
+		if (truncate_seq != ci->i_truncate_seq) {
+			dout("truncate_seq %u -> %u\n",
+			     ci->i_truncate_seq, truncate_seq);
+			ci->i_truncate_seq = truncate_seq;
+
+			/* the MDS should have revoked these caps */
+			WARN_ON_ONCE(issued & (CEPH_CAP_FILE_EXCL |
+					       CEPH_CAP_FILE_RD |
+					       CEPH_CAP_FILE_WR |
+					       CEPH_CAP_FILE_LAZYIO));
+			/*
+			 * If we hold relevant caps, or in the case where we're
+			 * not the only client referencing this file and we
+			 * don't hold those caps, then we need to check whether
+			 * the file is either opened or mmaped
+			 */
+			if ((issued & (CEPH_CAP_FILE_CACHE|
+				       CEPH_CAP_FILE_BUFFER)) ||
+			    mapping_mapped(inode->i_mapping) ||
+			    __ceph_caps_file_wanted(ci)) {
+				ci->i_truncate_pending++;
+				queue_trunc = 1;
+			}
+		}
+	}
+	if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) >= 0 &&
+	    ci->i_truncate_size != truncate_size) {
+		dout("truncate_size %lld -> %llu\n", ci->i_truncate_size,
+		     truncate_size);
+		ci->i_truncate_size = truncate_size;
+	}
+
+	if (queue_trunc)
+		ceph_fscache_invalidate(inode);
+
+	return queue_trunc;
+}
+
+void ceph_fill_file_time(struct inode *inode, int issued,
+			 u64 time_warp_seq, struct timespec *ctime,
+			 struct timespec *mtime, struct timespec *atime)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int warn = 0;
+
+	if (issued & (CEPH_CAP_FILE_EXCL|
+		      CEPH_CAP_FILE_WR|
+		      CEPH_CAP_FILE_BUFFER|
+		      CEPH_CAP_AUTH_EXCL|
+		      CEPH_CAP_XATTR_EXCL)) {
+		if (timespec_compare(ctime, &inode->i_ctime) > 0) {
+			dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n",
+			     inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+			     ctime->tv_sec, ctime->tv_nsec);
+			inode->i_ctime = *ctime;
+		}
+		if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) {
+			/* the MDS did a utimes() */
+			dout("mtime %ld.%09ld -> %ld.%09ld "
+			     "tw %d -> %d\n",
+			     inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
+			     mtime->tv_sec, mtime->tv_nsec,
+			     ci->i_time_warp_seq, (int)time_warp_seq);
+
+			inode->i_mtime = *mtime;
+			inode->i_atime = *atime;
+			ci->i_time_warp_seq = time_warp_seq;
+		} else if (time_warp_seq == ci->i_time_warp_seq) {
+			/* nobody did utimes(); take the max */
+			if (timespec_compare(mtime, &inode->i_mtime) > 0) {
+				dout("mtime %ld.%09ld -> %ld.%09ld inc\n",
+				     inode->i_mtime.tv_sec,
+				     inode->i_mtime.tv_nsec,
+				     mtime->tv_sec, mtime->tv_nsec);
+				inode->i_mtime = *mtime;
+			}
+			if (timespec_compare(atime, &inode->i_atime) > 0) {
+				dout("atime %ld.%09ld -> %ld.%09ld inc\n",
+				     inode->i_atime.tv_sec,
+				     inode->i_atime.tv_nsec,
+				     atime->tv_sec, atime->tv_nsec);
+				inode->i_atime = *atime;
+			}
+		} else if (issued & CEPH_CAP_FILE_EXCL) {
+			/* we did a utimes(); ignore mds values */
+		} else {
+			warn = 1;
+		}
+	} else {
+		/* we have no write|excl caps; whatever the MDS says is true */
+		if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) {
+			inode->i_ctime = *ctime;
+			inode->i_mtime = *mtime;
+			inode->i_atime = *atime;
+			ci->i_time_warp_seq = time_warp_seq;
+		} else {
+			warn = 1;
+		}
+	}
+	if (warn) /* time_warp_seq shouldn't go backwards */
+		dout("%p mds time_warp_seq %llu < %u\n",
+		     inode, time_warp_seq, ci->i_time_warp_seq);
+}
+
+/*
+ * Populate an inode based on info from mds.  May be called on new or
+ * existing inodes.
+ */
+static int fill_inode(struct inode *inode,
+		      struct ceph_mds_reply_info_in *iinfo,
+		      struct ceph_mds_reply_dirfrag *dirinfo,
+		      struct ceph_mds_session *session,
+		      unsigned long ttl_from, int cap_fmode,
+		      struct ceph_cap_reservation *caps_reservation)
+{
+	struct ceph_mds_reply_inode *info = iinfo->in;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int i;
+	int issued = 0, implemented;
+	struct timespec mtime, atime, ctime;
+	u32 nsplits;
+	struct ceph_inode_frag *frag;
+	struct rb_node *rb_node;
+	struct ceph_buffer *xattr_blob = NULL;
+	int err = 0;
+	int queue_trunc = 0;
+
+	dout("fill_inode %p ino %llx.%llx v %llu had %llu\n",
+	     inode, ceph_vinop(inode), le64_to_cpu(info->version),
+	     ci->i_version);
+
+	/*
+	 * prealloc xattr data, if it looks like we'll need it.  only
+	 * if len > 4 (meaning there are actually xattrs; the first 4
+	 * bytes are the xattr count).
+	 */
+	if (iinfo->xattr_len > 4) {
+		xattr_blob = ceph_buffer_new(iinfo->xattr_len, GFP_NOFS);
+		if (!xattr_blob)
+			pr_err("fill_inode ENOMEM xattr blob %d bytes\n",
+			       iinfo->xattr_len);
+	}
+
+	spin_lock(&ci->i_ceph_lock);
+
+	/*
+	 * provided version will be odd if inode value is projected,
+	 * even if stable.  skip the update if we have newer stable
+	 * info (ours>=theirs, e.g. due to racing mds replies), unless
+	 * we are getting projected (unstable) info (in which case the
+	 * version is odd, and we want ours>theirs).
+	 *   us   them
+	 *   2    2     skip
+	 *   3    2     skip
+	 *   3    3     update
+	 */
+	if (le64_to_cpu(info->version) > 0 &&
+	    (ci->i_version & ~1) >= le64_to_cpu(info->version))
+		goto no_change;
+	
+	issued = __ceph_caps_issued(ci, &implemented);
+	issued |= implemented | __ceph_caps_dirty(ci);
+
+	/* update inode */
+	ci->i_version = le64_to_cpu(info->version);
+	inode->i_version++;
+	inode->i_rdev = le32_to_cpu(info->rdev);
+
+	if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
+		inode->i_mode = le32_to_cpu(info->mode);
+		inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid));
+		inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid));
+		dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
+		     from_kuid(&init_user_ns, inode->i_uid),
+		     from_kgid(&init_user_ns, inode->i_gid));
+	}
+
+	if ((issued & CEPH_CAP_LINK_EXCL) == 0)
+		set_nlink(inode, le32_to_cpu(info->nlink));
+
+	/* be careful with mtime, atime, size */
+	ceph_decode_timespec(&atime, &info->atime);
+	ceph_decode_timespec(&mtime, &info->mtime);
+	ceph_decode_timespec(&ctime, &info->ctime);
+	queue_trunc = ceph_fill_file_size(inode, issued,
+					  le32_to_cpu(info->truncate_seq),
+					  le64_to_cpu(info->truncate_size),
+					  le64_to_cpu(info->size));
+	ceph_fill_file_time(inode, issued,
+			    le32_to_cpu(info->time_warp_seq),
+			    &ctime, &mtime, &atime);
+
+	ci->i_layout = info->layout;
+	inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
+
+	/* xattrs */
+	/* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */
+	if ((issued & CEPH_CAP_XATTR_EXCL) == 0 &&
+	    le64_to_cpu(info->xattr_version) > ci->i_xattrs.version) {
+		if (ci->i_xattrs.blob)
+			ceph_buffer_put(ci->i_xattrs.blob);
+		ci->i_xattrs.blob = xattr_blob;
+		if (xattr_blob)
+			memcpy(ci->i_xattrs.blob->vec.iov_base,
+			       iinfo->xattr_data, iinfo->xattr_len);
+		ci->i_xattrs.version = le64_to_cpu(info->xattr_version);
+		ceph_forget_all_cached_acls(inode);
+		xattr_blob = NULL;
+	}
+
+	inode->i_mapping->a_ops = &ceph_aops;
+	inode->i_mapping->backing_dev_info =
+		&ceph_sb_to_client(inode->i_sb)->backing_dev_info;
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFIFO:
+	case S_IFBLK:
+	case S_IFCHR:
+	case S_IFSOCK:
+		init_special_inode(inode, inode->i_mode, inode->i_rdev);
+		inode->i_op = &ceph_file_iops;
+		break;
+	case S_IFREG:
+		inode->i_op = &ceph_file_iops;
+		inode->i_fop = &ceph_file_fops;
+		break;
+	case S_IFLNK:
+		inode->i_op = &ceph_symlink_iops;
+		if (!ci->i_symlink) {
+			u32 symlen = iinfo->symlink_len;
+			char *sym;
+
+			spin_unlock(&ci->i_ceph_lock);
+
+			err = -EINVAL;
+			if (WARN_ON(symlen != inode->i_size))
+				goto out;
+
+			err = -ENOMEM;
+			sym = kstrndup(iinfo->symlink, symlen, GFP_NOFS);
+			if (!sym)
+				goto out;
+
+			spin_lock(&ci->i_ceph_lock);
+			if (!ci->i_symlink)
+				ci->i_symlink = sym;
+			else
+				kfree(sym); /* lost a race */
+		}
+		break;
+	case S_IFDIR:
+		inode->i_op = &ceph_dir_iops;
+		inode->i_fop = &ceph_dir_fops;
+
+		ci->i_dir_layout = iinfo->dir_layout;
+
+		ci->i_files = le64_to_cpu(info->files);
+		ci->i_subdirs = le64_to_cpu(info->subdirs);
+		ci->i_rbytes = le64_to_cpu(info->rbytes);
+		ci->i_rfiles = le64_to_cpu(info->rfiles);
+		ci->i_rsubdirs = le64_to_cpu(info->rsubdirs);
+		ceph_decode_timespec(&ci->i_rctime, &info->rctime);
+		break;
+	default:
+		pr_err("fill_inode %llx.%llx BAD mode 0%o\n",
+		       ceph_vinop(inode), inode->i_mode);
+	}
+
+	/* set dir completion flag? */
+	if (S_ISDIR(inode->i_mode) &&
+	    ci->i_files == 0 && ci->i_subdirs == 0 &&
+	    ceph_snap(inode) == CEPH_NOSNAP &&
+	    (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
+	    (issued & CEPH_CAP_FILE_EXCL) == 0 &&
+	    !__ceph_dir_is_complete(ci)) {
+		dout(" marking %p complete (empty)\n", inode);
+		__ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
+	}
+no_change:
+	/* only update max_size on auth cap */
+	if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
+	    ci->i_max_size != le64_to_cpu(info->max_size)) {
+		dout("max_size %lld -> %llu\n", ci->i_max_size,
+		     le64_to_cpu(info->max_size));
+		ci->i_max_size = le64_to_cpu(info->max_size);
+	}
+
+	spin_unlock(&ci->i_ceph_lock);
+
+	/* queue truncate if we saw i_size decrease */
+	if (queue_trunc)
+		ceph_queue_vmtruncate(inode);
+
+	/* populate frag tree */
+	/* FIXME: move me up, if/when version reflects fragtree changes */
+	nsplits = le32_to_cpu(info->fragtree.nsplits);
+	mutex_lock(&ci->i_fragtree_mutex);
+	rb_node = rb_first(&ci->i_fragtree);
+	for (i = 0; i < nsplits; i++) {
+		u32 id = le32_to_cpu(info->fragtree.splits[i].frag);
+		frag = NULL;
+		while (rb_node) {
+			frag = rb_entry(rb_node, struct ceph_inode_frag, node);
+			if (ceph_frag_compare(frag->frag, id) >= 0) {
+				if (frag->frag != id)
+					frag = NULL;
+				else
+					rb_node = rb_next(rb_node);
+				break;
+			}
+			rb_node = rb_next(rb_node);
+			rb_erase(&frag->node, &ci->i_fragtree);
+			kfree(frag);
+			frag = NULL;
+		}
+		if (!frag) {
+			frag = __get_or_create_frag(ci, id);
+			if (IS_ERR(frag))
+				continue;
+		}
+		frag->split_by = le32_to_cpu(info->fragtree.splits[i].by);
+		dout(" frag %x split by %d\n", frag->frag, frag->split_by);
+	}
+	while (rb_node) {
+		frag = rb_entry(rb_node, struct ceph_inode_frag, node);
+		rb_node = rb_next(rb_node);
+		rb_erase(&frag->node, &ci->i_fragtree);
+		kfree(frag);
+	}
+	mutex_unlock(&ci->i_fragtree_mutex);
+
+	/* were we issued a capability? */
+	if (info->cap.caps) {
+		if (ceph_snap(inode) == CEPH_NOSNAP) {
+			ceph_add_cap(inode, session,
+				     le64_to_cpu(info->cap.cap_id),
+				     cap_fmode,
+				     le32_to_cpu(info->cap.caps),
+				     le32_to_cpu(info->cap.wanted),
+				     le32_to_cpu(info->cap.seq),
+				     le32_to_cpu(info->cap.mseq),
+				     le64_to_cpu(info->cap.realm),
+				     info->cap.flags,
+				     caps_reservation);
+		} else {
+			spin_lock(&ci->i_ceph_lock);
+			dout(" %p got snap_caps %s\n", inode,
+			     ceph_cap_string(le32_to_cpu(info->cap.caps)));
+			ci->i_snap_caps |= le32_to_cpu(info->cap.caps);
+			if (cap_fmode >= 0)
+				__ceph_get_fmode(ci, cap_fmode);
+			spin_unlock(&ci->i_ceph_lock);
+		}
+	} else if (cap_fmode >= 0) {
+		pr_warning("mds issued no caps on %llx.%llx\n",
+			   ceph_vinop(inode));
+		__ceph_get_fmode(ci, cap_fmode);
+	}
+
+	/* update delegation info? */
+	if (dirinfo)
+		ceph_fill_dirfrag(inode, dirinfo);
+
+	err = 0;
+
+out:
+	if (xattr_blob)
+		ceph_buffer_put(xattr_blob);
+	return err;
+}
+
+/*
+ * caller should hold session s_mutex.
+ */
+static void update_dentry_lease(struct dentry *dentry,
+				struct ceph_mds_reply_lease *lease,
+				struct ceph_mds_session *session,
+				unsigned long from_time)
+{
+	struct ceph_dentry_info *di = ceph_dentry(dentry);
+	long unsigned duration = le32_to_cpu(lease->duration_ms);
+	long unsigned ttl = from_time + (duration * HZ) / 1000;
+	long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000;
+	struct inode *dir;
+
+	/* only track leases on regular dentries */
+	if (dentry->d_op != &ceph_dentry_ops)
+		return;
+
+	spin_lock(&dentry->d_lock);
+	dout("update_dentry_lease %p duration %lu ms ttl %lu\n",
+	     dentry, duration, ttl);
+
+	/* make lease_rdcache_gen match directory */
+	dir = dentry->d_parent->d_inode;
+	di->lease_shared_gen = ceph_inode(dir)->i_shared_gen;
+
+	if (duration == 0)
+		goto out_unlock;
+
+	if (di->lease_gen == session->s_cap_gen &&
+	    time_before(ttl, dentry->d_time))
+		goto out_unlock;  /* we already have a newer lease. */
+
+	if (di->lease_session && di->lease_session != session)
+		goto out_unlock;
+
+	ceph_dentry_lru_touch(dentry);
+
+	if (!di->lease_session)
+		di->lease_session = ceph_get_mds_session(session);
+	di->lease_gen = session->s_cap_gen;
+	di->lease_seq = le32_to_cpu(lease->seq);
+	di->lease_renew_after = half_ttl;
+	di->lease_renew_from = 0;
+	dentry->d_time = ttl;
+out_unlock:
+	spin_unlock(&dentry->d_lock);
+	return;
+}
+
+/*
+ * splice a dentry to an inode.
+ * caller must hold directory i_mutex for this to be safe.
+ *
+ * we will only rehash the resulting dentry if @prehash is
+ * true; @prehash will be set to false (for the benefit of
+ * the caller) if we fail.
+ */
+static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
+				    bool *prehash)
+{
+	struct dentry *realdn;
+
+	BUG_ON(dn->d_inode);
+
+	/* dn must be unhashed */
+	if (!d_unhashed(dn))
+		d_drop(dn);
+	realdn = d_materialise_unique(dn, in);
+	if (IS_ERR(realdn)) {
+		pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
+		       PTR_ERR(realdn), dn, in, ceph_vinop(in));
+		if (prehash)
+			*prehash = false; /* don't rehash on error */
+		dn = realdn; /* note realdn contains the error */
+		goto out;
+	} else if (realdn) {
+		dout("dn %p (%d) spliced with %p (%d) "
+		     "inode %p ino %llx.%llx\n",
+		     dn, d_count(dn),
+		     realdn, d_count(realdn),
+		     realdn->d_inode, ceph_vinop(realdn->d_inode));
+		dput(dn);
+		dn = realdn;
+	} else {
+		BUG_ON(!ceph_dentry(dn));
+		dout("dn %p attached to %p ino %llx.%llx\n",
+		     dn, dn->d_inode, ceph_vinop(dn->d_inode));
+	}
+	if ((!prehash || *prehash) && d_unhashed(dn))
+		d_rehash(dn);
+out:
+	return dn;
+}
+
+/*
+ * Incorporate results into the local cache.  This is either just
+ * one inode, or a directory, dentry, and possibly linked-to inode (e.g.,
+ * after a lookup).
+ *
+ * A reply may contain
+ *         a directory inode along with a dentry.
+ *  and/or a target inode
+ *
+ * Called with snap_rwsem (read).
+ */
+int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
+		    struct ceph_mds_session *session)
+{
+	struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+	struct inode *in = NULL;
+	struct ceph_vino vino;
+	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+	int err = 0;
+
+	dout("fill_trace %p is_dentry %d is_target %d\n", req,
+	     rinfo->head->is_dentry, rinfo->head->is_target);
+
+#if 0
+	/*
+	 * Debugging hook:
+	 *
+	 * If we resend completed ops to a recovering mds, we get no
+	 * trace.  Since that is very rare, pretend this is the case
+	 * to ensure the 'no trace' handlers in the callers behave.
+	 *
+	 * Fill in inodes unconditionally to avoid breaking cap
+	 * invariants.
+	 */
+	if (rinfo->head->op & CEPH_MDS_OP_WRITE) {
+		pr_info("fill_trace faking empty trace on %lld %s\n",
+			req->r_tid, ceph_mds_op_name(rinfo->head->op));
+		if (rinfo->head->is_dentry) {
+			rinfo->head->is_dentry = 0;
+			err = fill_inode(req->r_locked_dir,
+					 &rinfo->diri, rinfo->dirfrag,
+					 session, req->r_request_started, -1);
+		}
+		if (rinfo->head->is_target) {
+			rinfo->head->is_target = 0;
+			ininfo = rinfo->targeti.in;
+			vino.ino = le64_to_cpu(ininfo->ino);
+			vino.snap = le64_to_cpu(ininfo->snapid);
+			in = ceph_get_inode(sb, vino);
+			err = fill_inode(in, &rinfo->targeti, NULL,
+					 session, req->r_request_started,
+					 req->r_fmode);
+			iput(in);
+		}
+	}
+#endif
+
+	if (!rinfo->head->is_target && !rinfo->head->is_dentry) {
+		dout("fill_trace reply is empty!\n");
+		if (rinfo->head->result == 0 && req->r_locked_dir)
+			ceph_invalidate_dir_request(req);
+		return 0;
+	}
+
+	if (rinfo->head->is_dentry) {
+		struct inode *dir = req->r_locked_dir;
+
+		if (dir) {
+			err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag,
+					 session, req->r_request_started, -1,
+					 &req->r_caps_reservation);
+			if (err < 0)
+				goto done;
+		} else {
+			WARN_ON_ONCE(1);
+		}
+
+		if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME) {
+			struct qstr dname;
+			struct dentry *dn, *parent;
+
+			BUG_ON(!rinfo->head->is_target);
+			BUG_ON(req->r_dentry);
+
+			parent = d_find_any_alias(dir);
+			BUG_ON(!parent);
+
+			dname.name = rinfo->dname;
+			dname.len = rinfo->dname_len;
+			dname.hash = full_name_hash(dname.name, dname.len);
+			vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+			vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+retry_lookup:
+			dn = d_lookup(parent, &dname);
+			dout("d_lookup on parent=%p name=%.*s got %p\n",
+			     parent, dname.len, dname.name, dn);
+
+			if (!dn) {
+				dn = d_alloc(parent, &dname);
+				dout("d_alloc %p '%.*s' = %p\n", parent,
+				     dname.len, dname.name, dn);
+				if (dn == NULL) {
+					dput(parent);
+					err = -ENOMEM;
+					goto done;
+				}
+				err = ceph_init_dentry(dn);
+				if (err < 0) {
+					dput(dn);
+					dput(parent);
+					goto done;
+				}
+			} else if (dn->d_inode &&
+				   (ceph_ino(dn->d_inode) != vino.ino ||
+				    ceph_snap(dn->d_inode) != vino.snap)) {
+				dout(" dn %p points to wrong inode %p\n",
+				     dn, dn->d_inode);
+				d_delete(dn);
+				dput(dn);
+				goto retry_lookup;
+			}
+
+			req->r_dentry = dn;
+			dput(parent);
+		}
+	}
+
+	if (rinfo->head->is_target) {
+		vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+		vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+
+		in = ceph_get_inode(sb, vino);
+		if (IS_ERR(in)) {
+			err = PTR_ERR(in);
+			goto done;
+		}
+		req->r_target_inode = in;
+
+		err = fill_inode(in, &rinfo->targeti, NULL,
+				session, req->r_request_started,
+				(!req->r_aborted && rinfo->head->result == 0) ?
+				req->r_fmode : -1,
+				&req->r_caps_reservation);
+		if (err < 0) {
+			pr_err("fill_inode badness %p %llx.%llx\n",
+				in, ceph_vinop(in));
+			goto done;
+		}
+	}
+
+	/*
+	 * ignore null lease/binding on snapdir ENOENT, or else we
+	 * will have trouble splicing in the virtual snapdir later
+	 */
+	if (rinfo->head->is_dentry && !req->r_aborted &&
+	    req->r_locked_dir &&
+	    (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
+					       fsc->mount_options->snapdir_name,
+					       req->r_dentry->d_name.len))) {
+		/*
+		 * lookup link rename   : null -> possibly existing inode
+		 * mknod symlink mkdir  : null -> new inode
+		 * unlink               : linked -> null
+		 */
+		struct inode *dir = req->r_locked_dir;
+		struct dentry *dn = req->r_dentry;
+		bool have_dir_cap, have_lease;
+
+		BUG_ON(!dn);
+		BUG_ON(!dir);
+		BUG_ON(dn->d_parent->d_inode != dir);
+		BUG_ON(ceph_ino(dir) !=
+		       le64_to_cpu(rinfo->diri.in->ino));
+		BUG_ON(ceph_snap(dir) !=
+		       le64_to_cpu(rinfo->diri.in->snapid));
+
+		/* do we have a lease on the whole dir? */
+		have_dir_cap =
+			(le32_to_cpu(rinfo->diri.in->cap.caps) &
+			 CEPH_CAP_FILE_SHARED);
+
+		/* do we have a dn lease? */
+		have_lease = have_dir_cap ||
+			le32_to_cpu(rinfo->dlease->duration_ms);
+		if (!have_lease)
+			dout("fill_trace  no dentry lease or dir cap\n");
+
+		/* rename? */
+		if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) {
+			struct inode *olddir = req->r_old_dentry_dir;
+			BUG_ON(!olddir);
+
+			dout(" src %p '%.*s' dst %p '%.*s'\n",
+			     req->r_old_dentry,
+			     req->r_old_dentry->d_name.len,
+			     req->r_old_dentry->d_name.name,
+			     dn, dn->d_name.len, dn->d_name.name);
+			dout("fill_trace doing d_move %p -> %p\n",
+			     req->r_old_dentry, dn);
+
+			d_move(req->r_old_dentry, dn);
+			dout(" src %p '%.*s' dst %p '%.*s'\n",
+			     req->r_old_dentry,
+			     req->r_old_dentry->d_name.len,
+			     req->r_old_dentry->d_name.name,
+			     dn, dn->d_name.len, dn->d_name.name);
+
+			/* ensure target dentry is invalidated, despite
+			   rehashing bug in vfs_rename_dir */
+			ceph_invalidate_dentry_lease(dn);
+
+			/* d_move screws up sibling dentries' offsets */
+			ceph_dir_clear_complete(dir);
+			ceph_dir_clear_complete(olddir);
+
+			dout("dn %p gets new offset %lld\n", req->r_old_dentry,
+			     ceph_dentry(req->r_old_dentry)->offset);
+
+			dn = req->r_old_dentry;  /* use old_dentry */
+		}
+
+		/* null dentry? */
+		if (!rinfo->head->is_target) {
+			dout("fill_trace null dentry\n");
+			if (dn->d_inode) {
+				dout("d_delete %p\n", dn);
+				d_delete(dn);
+			} else {
+				dout("d_instantiate %p NULL\n", dn);
+				d_instantiate(dn, NULL);
+				if (have_lease && d_unhashed(dn))
+					d_rehash(dn);
+				update_dentry_lease(dn, rinfo->dlease,
+						    session,
+						    req->r_request_started);
+			}
+			goto done;
+		}
+
+		/* attach proper inode */
+		if (!dn->d_inode) {
+			ceph_dir_clear_complete(dir);
+			ihold(in);
+			dn = splice_dentry(dn, in, &have_lease);
+			if (IS_ERR(dn)) {
+				err = PTR_ERR(dn);
+				goto done;
+			}
+			req->r_dentry = dn;  /* may have spliced */
+		} else if (dn->d_inode && dn->d_inode != in) {
+			dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
+			     dn, dn->d_inode, ceph_vinop(dn->d_inode),
+			     ceph_vinop(in));
+			have_lease = false;
+		}
+
+		if (have_lease)
+			update_dentry_lease(dn, rinfo->dlease, session,
+					    req->r_request_started);
+		dout(" final dn %p\n", dn);
+	} else if (!req->r_aborted &&
+		   (req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
+		    req->r_op == CEPH_MDS_OP_MKSNAP)) {
+		struct dentry *dn = req->r_dentry;
+		struct inode *dir = req->r_locked_dir;
+
+		/* fill out a snapdir LOOKUPSNAP dentry */
+		BUG_ON(!dn);
+		BUG_ON(!dir);
+		BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
+		dout(" linking snapped dir %p to dn %p\n", in, dn);
+		ceph_dir_clear_complete(dir);
+		ihold(in);
+		dn = splice_dentry(dn, in, NULL);
+		if (IS_ERR(dn)) {
+			err = PTR_ERR(dn);
+			goto done;
+		}
+		req->r_dentry = dn;  /* may have spliced */
+	}
+done:
+	dout("fill_trace done err=%d\n", err);
+	return err;
+}
+
+/*
+ * Prepopulate our cache with readdir results, leases, etc.
+ */
+static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
+					   struct ceph_mds_session *session)
+{
+	struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+	int i, err = 0;
+
+	for (i = 0; i < rinfo->dir_nr; i++) {
+		struct ceph_vino vino;
+		struct inode *in;
+		int rc;
+
+		vino.ino = le64_to_cpu(rinfo->dir_in[i].in->ino);
+		vino.snap = le64_to_cpu(rinfo->dir_in[i].in->snapid);
+
+		in = ceph_get_inode(req->r_dentry->d_sb, vino);
+		if (IS_ERR(in)) {
+			err = PTR_ERR(in);
+			dout("new_inode badness got %d\n", err);
+			continue;
+		}
+		rc = fill_inode(in, &rinfo->dir_in[i], NULL, session,
+				req->r_request_started, -1,
+				&req->r_caps_reservation);
+		if (rc < 0) {
+			pr_err("fill_inode badness on %p got %d\n", in, rc);
+			err = rc;
+			continue;
+		}
+	}
+
+	return err;
+}
+
+int ceph_readdir_prepopulate(struct ceph_mds_request *req,
+			     struct ceph_mds_session *session)
+{
+	struct dentry *parent = req->r_dentry;
+	struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+	struct qstr dname;
+	struct dentry *dn;
+	struct inode *in;
+	int err = 0, ret, i;
+	struct inode *snapdir = NULL;
+	struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
+	struct ceph_dentry_info *di;
+	u64 r_readdir_offset = req->r_readdir_offset;
+	u32 frag = le32_to_cpu(rhead->args.readdir.frag);
+
+	if (rinfo->dir_dir &&
+	    le32_to_cpu(rinfo->dir_dir->frag) != frag) {
+		dout("readdir_prepopulate got new frag %x -> %x\n",
+		     frag, le32_to_cpu(rinfo->dir_dir->frag));
+		frag = le32_to_cpu(rinfo->dir_dir->frag);
+		if (ceph_frag_is_leftmost(frag))
+			r_readdir_offset = 2;
+		else
+			r_readdir_offset = 0;
+	}
+
+	if (req->r_aborted)
+		return readdir_prepopulate_inodes_only(req, session);
+
+	if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) {
+		snapdir = ceph_get_snapdir(parent->d_inode);
+		parent = d_find_alias(snapdir);
+		dout("readdir_prepopulate %d items under SNAPDIR dn %p\n",
+		     rinfo->dir_nr, parent);
+	} else {
+		dout("readdir_prepopulate %d items under dn %p\n",
+		     rinfo->dir_nr, parent);
+		if (rinfo->dir_dir)
+			ceph_fill_dirfrag(parent->d_inode, rinfo->dir_dir);
+	}
+
+	/* FIXME: release caps/leases if error occurs */
+	for (i = 0; i < rinfo->dir_nr; i++) {
+		struct ceph_vino vino;
+
+		dname.name = rinfo->dir_dname[i];
+		dname.len = rinfo->dir_dname_len[i];
+		dname.hash = full_name_hash(dname.name, dname.len);
+
+		vino.ino = le64_to_cpu(rinfo->dir_in[i].in->ino);
+		vino.snap = le64_to_cpu(rinfo->dir_in[i].in->snapid);
+
+retry_lookup:
+		dn = d_lookup(parent, &dname);
+		dout("d_lookup on parent=%p name=%.*s got %p\n",
+		     parent, dname.len, dname.name, dn);
+
+		if (!dn) {
+			dn = d_alloc(parent, &dname);
+			dout("d_alloc %p '%.*s' = %p\n", parent,
+			     dname.len, dname.name, dn);
+			if (dn == NULL) {
+				dout("d_alloc badness\n");
+				err = -ENOMEM;
+				goto out;
+			}
+			ret = ceph_init_dentry(dn);
+			if (ret < 0) {
+				dput(dn);
+				err = ret;
+				goto out;
+			}
+		} else if (dn->d_inode &&
+			   (ceph_ino(dn->d_inode) != vino.ino ||
+			    ceph_snap(dn->d_inode) != vino.snap)) {
+			dout(" dn %p points to wrong inode %p\n",
+			     dn, dn->d_inode);
+			d_delete(dn);
+			dput(dn);
+			goto retry_lookup;
+		} else {
+			/* reorder parent's d_subdirs */
+			spin_lock(&parent->d_lock);
+			spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
+			list_move(&dn->d_u.d_child, &parent->d_subdirs);
+			spin_unlock(&dn->d_lock);
+			spin_unlock(&parent->d_lock);
+		}
+
+		/* inode */
+		if (dn->d_inode) {
+			in = dn->d_inode;
+		} else {
+			in = ceph_get_inode(parent->d_sb, vino);
+			if (IS_ERR(in)) {
+				dout("new_inode badness\n");
+				d_drop(dn);
+				dput(dn);
+				err = PTR_ERR(in);
+				goto out;
+			}
+		}
+
+		if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
+			       req->r_request_started, -1,
+			       &req->r_caps_reservation) < 0) {
+			pr_err("fill_inode badness on %p\n", in);
+			if (!dn->d_inode)
+				iput(in);
+			d_drop(dn);
+			goto next_item;
+		}
+
+		if (!dn->d_inode) {
+			dn = splice_dentry(dn, in, NULL);
+			if (IS_ERR(dn)) {
+				err = PTR_ERR(dn);
+				dn = NULL;
+				goto next_item;
+			}
+		}
+
+		di = dn->d_fsdata;
+		di->offset = ceph_make_fpos(frag, i + r_readdir_offset);
+
+		update_dentry_lease(dn, rinfo->dir_dlease[i],
+				    req->r_session,
+				    req->r_request_started);
+next_item:
+		if (dn)
+			dput(dn);
+	}
+	if (err == 0)
+		req->r_did_prepopulate = true;
+
+out:
+	if (snapdir) {
+		iput(snapdir);
+		dput(parent);
+	}
+	dout("readdir_prepopulate done\n");
+	return err;
+}
+
+int ceph_inode_set_size(struct inode *inode, loff_t size)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int ret = 0;
+
+	spin_lock(&ci->i_ceph_lock);
+	dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
+	inode->i_size = size;
+	inode->i_blocks = (size + (1 << 9) - 1) >> 9;
+
+	/* tell the MDS if we are approaching max_size */
+	if ((size << 1) >= ci->i_max_size &&
+	    (ci->i_reported_size << 1) < ci->i_max_size)
+		ret = 1;
+
+	spin_unlock(&ci->i_ceph_lock);
+	return ret;
+}
+
+/*
+ * Write back inode data in a worker thread.  (This can't be done
+ * in the message handler context.)
+ */
+void ceph_queue_writeback(struct inode *inode)
+{
+	ihold(inode);
+	if (queue_work(ceph_inode_to_client(inode)->wb_wq,
+		       &ceph_inode(inode)->i_wb_work)) {
+		dout("ceph_queue_writeback %p\n", inode);
+	} else {
+		dout("ceph_queue_writeback %p failed\n", inode);
+		iput(inode);
+	}
+}
+
+static void ceph_writeback_work(struct work_struct *work)
+{
+	struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
+						  i_wb_work);
+	struct inode *inode = &ci->vfs_inode;
+
+	dout("writeback %p\n", inode);
+	filemap_fdatawrite(&inode->i_data);
+	iput(inode);
+}
+
+/*
+ * queue an async invalidation
+ */
+void ceph_queue_invalidate(struct inode *inode)
+{
+	ihold(inode);
+	if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq,
+		       &ceph_inode(inode)->i_pg_inv_work)) {
+		dout("ceph_queue_invalidate %p\n", inode);
+	} else {
+		dout("ceph_queue_invalidate %p failed\n", inode);
+		iput(inode);
+	}
+}
+
+/*
+ * Invalidate inode pages in a worker thread.  (This can't be done
+ * in the message handler context.)
+ */
+static void ceph_invalidate_work(struct work_struct *work)
+{
+	struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
+						  i_pg_inv_work);
+	struct inode *inode = &ci->vfs_inode;
+	u32 orig_gen;
+	int check = 0;
+
+	mutex_lock(&ci->i_truncate_mutex);
+	spin_lock(&ci->i_ceph_lock);
+	dout("invalidate_pages %p gen %d revoking %d\n", inode,
+	     ci->i_rdcache_gen, ci->i_rdcache_revoking);
+	if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
+		if (__ceph_caps_revoking_other(ci, NULL, CEPH_CAP_FILE_CACHE))
+			check = 1;
+		spin_unlock(&ci->i_ceph_lock);
+		mutex_unlock(&ci->i_truncate_mutex);
+		goto out;
+	}
+	orig_gen = ci->i_rdcache_gen;
+	spin_unlock(&ci->i_ceph_lock);
+
+	truncate_inode_pages(inode->i_mapping, 0);
+
+	spin_lock(&ci->i_ceph_lock);
+	if (orig_gen == ci->i_rdcache_gen &&
+	    orig_gen == ci->i_rdcache_revoking) {
+		dout("invalidate_pages %p gen %d successful\n", inode,
+		     ci->i_rdcache_gen);
+		ci->i_rdcache_revoking--;
+		check = 1;
+	} else {
+		dout("invalidate_pages %p gen %d raced, now %d revoking %d\n",
+		     inode, orig_gen, ci->i_rdcache_gen,
+		     ci->i_rdcache_revoking);
+		if (__ceph_caps_revoking_other(ci, NULL, CEPH_CAP_FILE_CACHE))
+			check = 1;
+	}
+	spin_unlock(&ci->i_ceph_lock);
+	mutex_unlock(&ci->i_truncate_mutex);
+out:
+	if (check)
+		ceph_check_caps(ci, 0, NULL);
+	iput(inode);
+}
+
+
+/*
+ * called by trunc_wq;
+ *
+ * We also truncate in a separate thread as well.
+ */
+static void ceph_vmtruncate_work(struct work_struct *work)
+{
+	struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
+						  i_vmtruncate_work);
+	struct inode *inode = &ci->vfs_inode;
+
+	dout("vmtruncate_work %p\n", inode);
+	__ceph_do_pending_vmtruncate(inode);
+	iput(inode);
+}
+
+/*
+ * Queue an async vmtruncate.  If we fail to queue work, we will handle
+ * the truncation the next time we call __ceph_do_pending_vmtruncate.
+ */
+void ceph_queue_vmtruncate(struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+
+	ihold(inode);
+
+	if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
+		       &ci->i_vmtruncate_work)) {
+		dout("ceph_queue_vmtruncate %p\n", inode);
+	} else {
+		dout("ceph_queue_vmtruncate %p failed, pending=%d\n",
+		     inode, ci->i_truncate_pending);
+		iput(inode);
+	}
+}
+
+/*
+ * Make sure any pending truncation is applied before doing anything
+ * that may depend on it.
+ */
+void __ceph_do_pending_vmtruncate(struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	u64 to;
+	int wrbuffer_refs, finish = 0;
+
+	mutex_lock(&ci->i_truncate_mutex);
+retry:
+	spin_lock(&ci->i_ceph_lock);
+	if (ci->i_truncate_pending == 0) {
+		dout("__do_pending_vmtruncate %p none pending\n", inode);
+		spin_unlock(&ci->i_ceph_lock);
+		mutex_unlock(&ci->i_truncate_mutex);
+		return;
+	}
+
+	/*
+	 * make sure any dirty snapped pages are flushed before we
+	 * possibly truncate them.. so write AND block!
+	 */
+	if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
+		dout("__do_pending_vmtruncate %p flushing snaps first\n",
+		     inode);
+		spin_unlock(&ci->i_ceph_lock);
+		filemap_write_and_wait_range(&inode->i_data, 0,
+					     inode->i_sb->s_maxbytes);
+		goto retry;
+	}
+
+	/* there should be no reader or writer */
+	WARN_ON_ONCE(ci->i_rd_ref || ci->i_wr_ref);
+
+	to = ci->i_truncate_size;
+	wrbuffer_refs = ci->i_wrbuffer_ref;
+	dout("__do_pending_vmtruncate %p (%d) to %lld\n", inode,
+	     ci->i_truncate_pending, to);
+	spin_unlock(&ci->i_ceph_lock);
+
+	truncate_inode_pages(inode->i_mapping, to);
+
+	spin_lock(&ci->i_ceph_lock);
+	if (to == ci->i_truncate_size) {
+		ci->i_truncate_pending = 0;
+		finish = 1;
+	}
+	spin_unlock(&ci->i_ceph_lock);
+	if (!finish)
+		goto retry;
+
+	mutex_unlock(&ci->i_truncate_mutex);
+
+	if (wrbuffer_refs == 0)
+		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
+
+	wake_up_all(&ci->i_cap_wq);
+}
+
+/*
+ * symlinks
+ */
+static void *ceph_sym_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	struct ceph_inode_info *ci = ceph_inode(dentry->d_inode);
+	nd_set_link(nd, ci->i_symlink);
+	return NULL;
+}
+
+static const struct inode_operations ceph_symlink_iops = {
+	.readlink = generic_readlink,
+	.follow_link = ceph_sym_follow_link,
+	.setattr = ceph_setattr,
+	.getattr = ceph_getattr,
+	.setxattr = ceph_setxattr,
+	.getxattr = ceph_getxattr,
+	.listxattr = ceph_listxattr,
+	.removexattr = ceph_removexattr,
+};
+
+/*
+ * setattr
+ */
+int ceph_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	const unsigned int ia_valid = attr->ia_valid;
+	struct ceph_mds_request *req;
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
+	int issued;
+	int release = 0, dirtied = 0;
+	int mask = 0;
+	int err = 0;
+	int inode_dirty_flags = 0;
+
+	if (ceph_snap(inode) != CEPH_NOSNAP)
+		return -EROFS;
+
+	err = inode_change_ok(inode, attr);
+	if (err != 0)
+		return err;
+
+	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETATTR,
+				       USE_AUTH_MDS);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	spin_lock(&ci->i_ceph_lock);
+	issued = __ceph_caps_issued(ci, NULL);
+	dout("setattr %p issued %s\n", inode, ceph_cap_string(issued));
+
+	if (ia_valid & ATTR_UID) {
+		dout("setattr %p uid %d -> %d\n", inode,
+		     from_kuid(&init_user_ns, inode->i_uid),
+		     from_kuid(&init_user_ns, attr->ia_uid));
+		if (issued & CEPH_CAP_AUTH_EXCL) {
+			inode->i_uid = attr->ia_uid;
+			dirtied |= CEPH_CAP_AUTH_EXCL;
+		} else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
+			   !uid_eq(attr->ia_uid, inode->i_uid)) {
+			req->r_args.setattr.uid = cpu_to_le32(
+				from_kuid(&init_user_ns, attr->ia_uid));
+			mask |= CEPH_SETATTR_UID;
+			release |= CEPH_CAP_AUTH_SHARED;
+		}
+	}
+	if (ia_valid & ATTR_GID) {
+		dout("setattr %p gid %d -> %d\n", inode,
+		     from_kgid(&init_user_ns, inode->i_gid),
+		     from_kgid(&init_user_ns, attr->ia_gid));
+		if (issued & CEPH_CAP_AUTH_EXCL) {
+			inode->i_gid = attr->ia_gid;
+			dirtied |= CEPH_CAP_AUTH_EXCL;
+		} else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
+			   !gid_eq(attr->ia_gid, inode->i_gid)) {
+			req->r_args.setattr.gid = cpu_to_le32(
+				from_kgid(&init_user_ns, attr->ia_gid));
+			mask |= CEPH_SETATTR_GID;
+			release |= CEPH_CAP_AUTH_SHARED;
+		}
+	}
+	if (ia_valid & ATTR_MODE) {
+		dout("setattr %p mode 0%o -> 0%o\n", inode, inode->i_mode,
+		     attr->ia_mode);
+		if (issued & CEPH_CAP_AUTH_EXCL) {
+			inode->i_mode = attr->ia_mode;
+			dirtied |= CEPH_CAP_AUTH_EXCL;
+		} else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
+			   attr->ia_mode != inode->i_mode) {
+			inode->i_mode = attr->ia_mode;
+			req->r_args.setattr.mode = cpu_to_le32(attr->ia_mode);
+			mask |= CEPH_SETATTR_MODE;
+			release |= CEPH_CAP_AUTH_SHARED;
+		}
+	}
+
+	if (ia_valid & ATTR_ATIME) {
+		dout("setattr %p atime %ld.%ld -> %ld.%ld\n", inode,
+		     inode->i_atime.tv_sec, inode->i_atime.tv_nsec,
+		     attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec);
+		if (issued & CEPH_CAP_FILE_EXCL) {
+			ci->i_time_warp_seq++;
+			inode->i_atime = attr->ia_atime;
+			dirtied |= CEPH_CAP_FILE_EXCL;
+		} else if ((issued & CEPH_CAP_FILE_WR) &&
+			   timespec_compare(&inode->i_atime,
+					    &attr->ia_atime) < 0) {
+			inode->i_atime = attr->ia_atime;
+			dirtied |= CEPH_CAP_FILE_WR;
+		} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
+			   !timespec_equal(&inode->i_atime, &attr->ia_atime)) {
+			ceph_encode_timespec(&req->r_args.setattr.atime,
+					     &attr->ia_atime);
+			mask |= CEPH_SETATTR_ATIME;
+			release |= CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_RD |
+				CEPH_CAP_FILE_WR;
+		}
+	}
+	if (ia_valid & ATTR_MTIME) {
+		dout("setattr %p mtime %ld.%ld -> %ld.%ld\n", inode,
+		     inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
+		     attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec);
+		if (issued & CEPH_CAP_FILE_EXCL) {
+			ci->i_time_warp_seq++;
+			inode->i_mtime = attr->ia_mtime;
+			dirtied |= CEPH_CAP_FILE_EXCL;
+		} else if ((issued & CEPH_CAP_FILE_WR) &&
+			   timespec_compare(&inode->i_mtime,
+					    &attr->ia_mtime) < 0) {
+			inode->i_mtime = attr->ia_mtime;
+			dirtied |= CEPH_CAP_FILE_WR;
+		} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
+			   !timespec_equal(&inode->i_mtime, &attr->ia_mtime)) {
+			ceph_encode_timespec(&req->r_args.setattr.mtime,
+					     &attr->ia_mtime);
+			mask |= CEPH_SETATTR_MTIME;
+			release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD |
+				CEPH_CAP_FILE_WR;
+		}
+	}
+	if (ia_valid & ATTR_SIZE) {
+		dout("setattr %p size %lld -> %lld\n", inode,
+		     inode->i_size, attr->ia_size);
+		if (attr->ia_size > inode->i_sb->s_maxbytes) {
+			err = -EINVAL;
+			goto out;
+		}
+		if ((issued & CEPH_CAP_FILE_EXCL) &&
+		    attr->ia_size > inode->i_size) {
+			inode->i_size = attr->ia_size;
+			inode->i_blocks =
+				(attr->ia_size + (1 << 9) - 1) >> 9;
+			inode->i_ctime = attr->ia_ctime;
+			ci->i_reported_size = attr->ia_size;
+			dirtied |= CEPH_CAP_FILE_EXCL;
+		} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
+			   attr->ia_size != inode->i_size) {
+			req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
+			req->r_args.setattr.old_size =
+				cpu_to_le64(inode->i_size);
+			mask |= CEPH_SETATTR_SIZE;
+			release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD |
+				CEPH_CAP_FILE_WR;
+		}
+	}
+
+	/* these do nothing */
+	if (ia_valid & ATTR_CTIME) {
+		bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME|
+					 ATTR_MODE|ATTR_UID|ATTR_GID)) == 0;
+		dout("setattr %p ctime %ld.%ld -> %ld.%ld (%s)\n", inode,
+		     inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+		     attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
+		     only ? "ctime only" : "ignored");
+		inode->i_ctime = attr->ia_ctime;
+		if (only) {
+			/*
+			 * if kernel wants to dirty ctime but nothing else,
+			 * we need to choose a cap to dirty under, or do
+			 * a almost-no-op setattr
+			 */
+			if (issued & CEPH_CAP_AUTH_EXCL)
+				dirtied |= CEPH_CAP_AUTH_EXCL;
+			else if (issued & CEPH_CAP_FILE_EXCL)
+				dirtied |= CEPH_CAP_FILE_EXCL;
+			else if (issued & CEPH_CAP_XATTR_EXCL)
+				dirtied |= CEPH_CAP_XATTR_EXCL;
+			else
+				mask |= CEPH_SETATTR_CTIME;
+		}
+	}
+	if (ia_valid & ATTR_FILE)
+		dout("setattr %p ATTR_FILE ... hrm!\n", inode);
+
+	if (dirtied) {
+		inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied);
+		inode->i_ctime = CURRENT_TIME;
+	}
+
+	release &= issued;
+	spin_unlock(&ci->i_ceph_lock);
+
+	if (inode_dirty_flags)
+		__mark_inode_dirty(inode, inode_dirty_flags);
+
+	if (ia_valid & ATTR_MODE) {
+		err = posix_acl_chmod(inode, attr->ia_mode);
+		if (err)
+			goto out_put;
+	}
+
+	if (mask) {
+		req->r_inode = inode;
+		ihold(inode);
+		req->r_inode_drop = release;
+		req->r_args.setattr.mask = cpu_to_le32(mask);
+		req->r_num_caps = 1;
+		err = ceph_mdsc_do_request(mdsc, NULL, req);
+	}
+	dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
+	     ceph_cap_string(dirtied), mask);
+
+	ceph_mdsc_put_request(req);
+	if (mask & CEPH_SETATTR_SIZE)
+		__ceph_do_pending_vmtruncate(inode);
+	return err;
+out:
+	spin_unlock(&ci->i_ceph_lock);
+out_put:
+	ceph_mdsc_put_request(req);
+	return err;
+}
+
+/*
+ * Verify that we have a lease on the given mask.  If not,
+ * do a getattr against an mds.
+ */
+int ceph_do_getattr(struct inode *inode, int mask)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_request *req;
+	int err;
+
+	if (ceph_snap(inode) == CEPH_SNAPDIR) {
+		dout("do_getattr inode %p SNAPDIR\n", inode);
+		return 0;
+	}
+
+	dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode);
+	if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
+		return 0;
+
+	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+	req->r_inode = inode;
+	ihold(inode);
+	req->r_num_caps = 1;
+	req->r_args.getattr.mask = cpu_to_le32(mask);
+	err = ceph_mdsc_do_request(mdsc, NULL, req);
+	ceph_mdsc_put_request(req);
+	dout("do_getattr result=%d\n", err);
+	return err;
+}
+
+
+/*
+ * Check inode permissions.  We verify we have a valid value for
+ * the AUTH cap, then call the generic handler.
+ */
+int ceph_permission(struct inode *inode, int mask)
+{
+	int err;
+
+	if (mask & MAY_NOT_BLOCK)
+		return -ECHILD;
+
+	err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED);
+
+	if (!err)
+		err = generic_permission(inode, mask);
+	return err;
+}
+
+/*
+ * Get all attributes.  Hopefully somedata we'll have a statlite()
+ * and can limit the fields we require to be accurate.
+ */
+int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
+		 struct kstat *stat)
+{
+	struct inode *inode = dentry->d_inode;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int err;
+
+	err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL);
+	if (!err) {
+		generic_fillattr(inode, stat);
+		stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino);
+		if (ceph_snap(inode) != CEPH_NOSNAP)
+			stat->dev = ceph_snap(inode);
+		else
+			stat->dev = 0;
+		if (S_ISDIR(inode->i_mode)) {
+			if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb),
+						RBYTES))
+				stat->size = ci->i_rbytes;
+			else
+				stat->size = ci->i_files + ci->i_subdirs;
+			stat->blocks = 0;
+			stat->blksize = 65536;
+		}
+	}
+	return err;
+}
diff --git a/ceph/ioctl.c b/ceph/ioctl.c
new file mode 100644
index 0000000..2042fd1
--- /dev/null
+++ b/ceph/ioctl.c
@@ -0,0 +1,296 @@
+#include <linux/in.h>
+
+#include "super.h"
+#include "mds_client.h"
+#include <linux/ceph/ceph_debug.h>
+
+#include "ioctl.h"
+
+
+/*
+ * ioctls
+ */
+
+/*
+ * get and set the file layout
+ */
+static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
+{
+	struct ceph_inode_info *ci = ceph_inode(file_inode(file));
+	struct ceph_ioctl_layout l;
+	int err;
+
+	err = ceph_do_getattr(file_inode(file), CEPH_STAT_CAP_LAYOUT);
+	if (!err) {
+		l.stripe_unit = ceph_file_layout_su(ci->i_layout);
+		l.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
+		l.object_size = ceph_file_layout_object_size(ci->i_layout);
+		l.data_pool = le32_to_cpu(ci->i_layout.fl_pg_pool);
+		l.preferred_osd = (s32)-1;
+		if (copy_to_user(arg, &l, sizeof(l)))
+			return -EFAULT;
+	}
+
+	return err;
+}
+
+static long __validate_layout(struct ceph_mds_client *mdsc,
+			      struct ceph_ioctl_layout *l)
+{
+	int i, err;
+
+	/* validate striping parameters */
+	if ((l->object_size & ~PAGE_MASK) ||
+	    (l->stripe_unit & ~PAGE_MASK) ||
+	    (l->stripe_unit != 0 &&
+	     ((unsigned)l->object_size % (unsigned)l->stripe_unit)))
+		return -EINVAL;
+
+	/* make sure it's a valid data pool */
+	mutex_lock(&mdsc->mutex);
+	err = -EINVAL;
+	for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++)
+		if (mdsc->mdsmap->m_data_pg_pools[i] == l->data_pool) {
+			err = 0;
+			break;
+		}
+	mutex_unlock(&mdsc->mutex);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
+{
+	struct inode *inode = file_inode(file);
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_request *req;
+	struct ceph_ioctl_layout l;
+	struct ceph_inode_info *ci = ceph_inode(file_inode(file));
+	struct ceph_ioctl_layout nl;
+	int err;
+
+	if (copy_from_user(&l, arg, sizeof(l)))
+		return -EFAULT;
+
+	/* validate changed params against current layout */
+	err = ceph_do_getattr(file_inode(file), CEPH_STAT_CAP_LAYOUT);
+	if (err)
+		return err;
+
+	memset(&nl, 0, sizeof(nl));
+	if (l.stripe_count)
+		nl.stripe_count = l.stripe_count;
+	else
+		nl.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
+	if (l.stripe_unit)
+		nl.stripe_unit = l.stripe_unit;
+	else
+		nl.stripe_unit = ceph_file_layout_su(ci->i_layout);
+	if (l.object_size)
+		nl.object_size = l.object_size;
+	else
+		nl.object_size = ceph_file_layout_object_size(ci->i_layout);
+	if (l.data_pool)
+		nl.data_pool = l.data_pool;
+	else
+		nl.data_pool = ceph_file_layout_pg_pool(ci->i_layout);
+
+	/* this is obsolete, and always -1 */
+	nl.preferred_osd = le64_to_cpu(-1);
+
+	err = __validate_layout(mdsc, &nl);
+	if (err)
+		return err;
+
+	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETLAYOUT,
+				       USE_AUTH_MDS);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+	req->r_inode = inode;
+	ihold(inode);
+	req->r_num_caps = 1;
+
+	req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL;
+
+	req->r_args.setlayout.layout.fl_stripe_unit =
+		cpu_to_le32(l.stripe_unit);
+	req->r_args.setlayout.layout.fl_stripe_count =
+		cpu_to_le32(l.stripe_count);
+	req->r_args.setlayout.layout.fl_object_size =
+		cpu_to_le32(l.object_size);
+	req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool);
+
+	err = ceph_mdsc_do_request(mdsc, NULL, req);
+	ceph_mdsc_put_request(req);
+	return err;
+}
+
+/*
+ * Set a layout policy on a directory inode. All items in the tree
+ * rooted at this inode will inherit this layout on creation,
+ * (It doesn't apply retroactively )
+ * unless a subdirectory has its own layout policy.
+ */
+static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
+{
+	struct inode *inode = file_inode(file);
+	struct ceph_mds_request *req;
+	struct ceph_ioctl_layout l;
+	int err;
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+
+	/* copy and validate */
+	if (copy_from_user(&l, arg, sizeof(l)))
+		return -EFAULT;
+
+	err = __validate_layout(mdsc, &l);
+	if (err)
+		return err;
+
+	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETDIRLAYOUT,
+				       USE_AUTH_MDS);
+
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+	req->r_inode = inode;
+	ihold(inode);
+	req->r_num_caps = 1;
+
+	req->r_args.setlayout.layout.fl_stripe_unit =
+			cpu_to_le32(l.stripe_unit);
+	req->r_args.setlayout.layout.fl_stripe_count =
+			cpu_to_le32(l.stripe_count);
+	req->r_args.setlayout.layout.fl_object_size =
+			cpu_to_le32(l.object_size);
+	req->r_args.setlayout.layout.fl_pg_pool =
+			cpu_to_le32(l.data_pool);
+
+	err = ceph_mdsc_do_request(mdsc, inode, req);
+	ceph_mdsc_put_request(req);
+	return err;
+}
+
+/*
+ * Return object name, size/offset information, and location (OSD
+ * number, network address) for a given file offset.
+ */
+static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
+{
+	struct ceph_ioctl_dataloc dl;
+	struct inode *inode = file_inode(file);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_osd_client *osdc =
+		&ceph_sb_to_client(inode->i_sb)->client->osdc;
+	struct ceph_object_locator oloc;
+	struct ceph_object_id oid;
+	u64 len = 1, olen;
+	u64 tmp;
+	struct ceph_pg pgid;
+	int r;
+
+	/* copy and validate */
+	if (copy_from_user(&dl, arg, sizeof(dl)))
+		return -EFAULT;
+
+	down_read(&osdc->map_sem);
+	r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
+					  &dl.object_no, &dl.object_offset,
+					  &olen);
+	if (r < 0) {
+		up_read(&osdc->map_sem);
+		return -EIO;
+	}
+	dl.file_offset -= dl.object_offset;
+	dl.object_size = ceph_file_layout_object_size(ci->i_layout);
+	dl.block_size = ceph_file_layout_su(ci->i_layout);
+
+	/* block_offset = object_offset % block_size */
+	tmp = dl.object_offset;
+	dl.block_offset = do_div(tmp, dl.block_size);
+
+	snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx",
+		 ceph_ino(inode), dl.object_no);
+
+	oloc.pool = ceph_file_layout_pg_pool(ci->i_layout);
+	ceph_oid_set_name(&oid, dl.object_name);
+
+	r = ceph_oloc_oid_to_pg(osdc->osdmap, &oloc, &oid, &pgid);
+	if (r < 0) {
+		up_read(&osdc->map_sem);
+		return r;
+	}
+
+	dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
+	if (dl.osd >= 0) {
+		struct ceph_entity_addr *a =
+			ceph_osd_addr(osdc->osdmap, dl.osd);
+		if (a)
+			memcpy(&dl.osd_addr, &a->in_addr, sizeof(dl.osd_addr));
+	} else {
+		memset(&dl.osd_addr, 0, sizeof(dl.osd_addr));
+	}
+	up_read(&osdc->map_sem);
+
+	/* send result back to user */
+	if (copy_to_user(arg, &dl, sizeof(dl)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static long ceph_ioctl_lazyio(struct file *file)
+{
+	struct ceph_file_info *fi = file->private_data;
+	struct inode *inode = file_inode(file);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+
+	if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
+		spin_lock(&ci->i_ceph_lock);
+		ci->i_nr_by_mode[fi->fmode]--;
+		fi->fmode |= CEPH_FILE_MODE_LAZY;
+		ci->i_nr_by_mode[fi->fmode]++;
+		spin_unlock(&ci->i_ceph_lock);
+		dout("ioctl_layzio: file %p marked lazy\n", file);
+
+		ceph_check_caps(ci, 0, NULL);
+	} else {
+		dout("ioctl_layzio: file %p already lazy\n", file);
+	}
+	return 0;
+}
+
+static long ceph_ioctl_syncio(struct file *file)
+{
+	struct ceph_file_info *fi = file->private_data;
+
+	fi->flags |= CEPH_F_SYNC;
+	return 0;
+}
+
+long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg);
+	switch (cmd) {
+	case CEPH_IOC_GET_LAYOUT:
+		return ceph_ioctl_get_layout(file, (void __user *)arg);
+
+	case CEPH_IOC_SET_LAYOUT:
+		return ceph_ioctl_set_layout(file, (void __user *)arg);
+
+	case CEPH_IOC_SET_LAYOUT_POLICY:
+		return ceph_ioctl_set_layout_policy(file, (void __user *)arg);
+
+	case CEPH_IOC_GET_DATALOC:
+		return ceph_ioctl_get_dataloc(file, (void __user *)arg);
+
+	case CEPH_IOC_LAZYIO:
+		return ceph_ioctl_lazyio(file);
+
+	case CEPH_IOC_SYNCIO:
+		return ceph_ioctl_syncio(file);
+	}
+
+	return -ENOTTY;
+}
diff --git a/ceph/ioctl.h b/ceph/ioctl.h
new file mode 100644
index 0000000..c77028a
--- /dev/null
+++ b/ceph/ioctl.h
@@ -0,0 +1,100 @@
+#ifndef FS_CEPH_IOCTL_H
+#define FS_CEPH_IOCTL_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define CEPH_IOCTL_MAGIC 0x97
+
+/*
+ * CEPH_IOC_GET_LAYOUT - get file layout or dir layout policy
+ * CEPH_IOC_SET_LAYOUT - set file layout
+ * CEPH_IOC_SET_LAYOUT_POLICY - set dir layout policy
+ *
+ * The file layout specifies how file data is striped over objects in
+ * the distributed object store, which object pool they belong to (if
+ * it differs from the default), and an optional 'preferred osd' to
+ * store them on.
+ *
+ * Files get a new layout based on the policy set on the containing
+ * directory or one of its ancestors.  The GET_LAYOUT ioctl will let
+ * you examine the layout for a file or the policy on a directory.
+ *
+ * SET_LAYOUT will let you set a layout on a newly created file.  This
+ * only works immediately after the file is created and before any
+ * data is written to it.
+ *
+ * SET_LAYOUT_POLICY will let you set a layout policy (default layout)
+ * on a directory that will apply to any new files created in that
+ * directory (or any child directory that doesn't specify a layout of
+ * its own).
+ */
+
+/* use u64 to align sanely on all archs */
+struct ceph_ioctl_layout {
+	__u64 stripe_unit, stripe_count, object_size;
+	__u64 data_pool;
+
+	/* obsolete.  new values ignored, always return -1 */
+	__s64 preferred_osd;
+};
+
+#define CEPH_IOC_GET_LAYOUT _IOR(CEPH_IOCTL_MAGIC, 1,		\
+				   struct ceph_ioctl_layout)
+#define CEPH_IOC_SET_LAYOUT _IOW(CEPH_IOCTL_MAGIC, 2,		\
+				   struct ceph_ioctl_layout)
+#define CEPH_IOC_SET_LAYOUT_POLICY _IOW(CEPH_IOCTL_MAGIC, 5,	\
+				   struct ceph_ioctl_layout)
+
+/*
+ * CEPH_IOC_GET_DATALOC - get location of file data in the cluster
+ *
+ * Extract identity, address of the OSD and object storing a given
+ * file offset.
+ */
+struct ceph_ioctl_dataloc {
+	__u64 file_offset;           /* in+out: file offset */
+	__u64 object_offset;         /* out: offset in object */
+	__u64 object_no;             /* out: object # */
+	__u64 object_size;           /* out: object size */
+	char object_name[64];        /* out: object name */
+	__u64 block_offset;          /* out: offset in block */
+	__u64 block_size;            /* out: block length */
+	__s64 osd;                   /* out: osd # */
+	struct sockaddr_storage osd_addr; /* out: osd address */
+};
+
+#define CEPH_IOC_GET_DATALOC _IOWR(CEPH_IOCTL_MAGIC, 3,	\
+				   struct ceph_ioctl_dataloc)
+
+/*
+ * CEPH_IOC_LAZYIO - relax consistency
+ *
+ * Normally Ceph switches to synchronous IO when multiple clients have
+ * the file open (and or more for write).  Reads and writes bypass the
+ * page cache and go directly to the OSD.  Setting this flag on a file
+ * descriptor will allow buffered IO for this file in cases where the
+ * application knows it won't interfere with other nodes (or doesn't
+ * care).
+ */
+#define CEPH_IOC_LAZYIO _IO(CEPH_IOCTL_MAGIC, 4)
+
+/*
+ * CEPH_IOC_SYNCIO - force synchronous IO
+ *
+ * This ioctl sets a file flag that forces the synchronous IO that
+ * bypasses the page cache, even if it is not necessary.  This is
+ * essentially the opposite behavior of IOC_LAZYIO.  This forces the
+ * same read/write path as a file opened by multiple clients when one
+ * or more of those clients is opened for write.
+ *
+ * Note that this type of sync IO takes a different path than a file
+ * opened with O_SYNC/D_SYNC (writes hit the page cache and are
+ * immediately flushed on page boundaries).  It is very similar to
+ * O_DIRECT (writes bypass the page cache) excep that O_DIRECT writes
+ * are not copied (user page must remain stable) and O_DIRECT writes
+ * have alignment restrictions (on the buffer and file offset).
+ */
+#define CEPH_IOC_SYNCIO _IO(CEPH_IOCTL_MAGIC, 5)
+
+#endif
diff --git a/ceph/locks.c b/ceph/locks.c
new file mode 100644
index 0000000..1913988
--- /dev/null
+++ b/ceph/locks.c
@@ -0,0 +1,338 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/random.h>
+
+#include "super.h"
+#include "mds_client.h"
+#include <linux/ceph/pagelist.h>
+
+static u64 lock_secret;
+
+static inline u64 secure_addr(void *addr)
+{
+	u64 v = lock_secret ^ (u64)(unsigned long)addr;
+	/*
+	 * Set the most significant bit, so that MDS knows the 'owner'
+	 * is sufficient to identify the owner of lock. (old code uses
+	 * both 'owner' and 'pid')
+	 */
+	v |= (1ULL << 63);
+	return v;
+}
+
+void __init ceph_flock_init(void)
+{
+	get_random_bytes(&lock_secret, sizeof(lock_secret));
+}
+
+/**
+ * Implement fcntl and flock locking functions.
+ */
+static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
+			     int cmd, u8 wait, struct file_lock *fl)
+{
+	struct inode *inode = file_inode(file);
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_request *req;
+	int err;
+	u64 length = 0;
+	u64 owner;
+
+	req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+	req->r_inode = inode;
+	ihold(inode);
+	req->r_num_caps = 1;
+
+	/* mds requires start and length rather than start and end */
+	if (LLONG_MAX == fl->fl_end)
+		length = 0;
+	else
+		length = fl->fl_end - fl->fl_start + 1;
+
+	if (lock_type == CEPH_LOCK_FCNTL)
+		owner = secure_addr(fl->fl_owner);
+	else
+		owner = secure_addr(fl->fl_file);
+
+	dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, "
+	     "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type,
+	     (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length,
+	     wait, fl->fl_type);
+
+	req->r_args.filelock_change.rule = lock_type;
+	req->r_args.filelock_change.type = cmd;
+	req->r_args.filelock_change.owner = cpu_to_le64(owner);
+	req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
+	req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
+	req->r_args.filelock_change.length = cpu_to_le64(length);
+	req->r_args.filelock_change.wait = wait;
+
+	err = ceph_mdsc_do_request(mdsc, inode, req);
+
+	if (operation == CEPH_MDS_OP_GETFILELOCK) {
+		fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
+		if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
+			fl->fl_type = F_RDLCK;
+		else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
+			fl->fl_type = F_WRLCK;
+		else
+			fl->fl_type = F_UNLCK;
+
+		fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
+		length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
+						 le64_to_cpu(req->r_reply_info.filelock_reply->length);
+		if (length >= 1)
+			fl->fl_end = length -1;
+		else
+			fl->fl_end = 0;
+
+	}
+	ceph_mdsc_put_request(req);
+	dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
+	     "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type,
+	     (int)operation, (u64)fl->fl_pid, fl->fl_start,
+	     length, wait, fl->fl_type, err);
+	return err;
+}
+
+/**
+ * Attempt to set an fcntl lock.
+ * For now, this just goes away to the server. Later it may be more awesome.
+ */
+int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
+{
+	u8 lock_cmd;
+	int err;
+	u8 wait = 0;
+	u16 op = CEPH_MDS_OP_SETFILELOCK;
+
+	if (!(fl->fl_flags & FL_POSIX))
+		return -ENOLCK;
+	/* No mandatory locks */
+	if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
+		return -ENOLCK;
+
+	dout("ceph_lock, fl_owner: %p", fl->fl_owner);
+
+	/* set wait bit as appropriate, then make command as Ceph expects it*/
+	if (IS_GETLK(cmd))
+		op = CEPH_MDS_OP_GETFILELOCK;
+	else if (IS_SETLKW(cmd))
+		wait = 1;
+
+	if (F_RDLCK == fl->fl_type)
+		lock_cmd = CEPH_LOCK_SHARED;
+	else if (F_WRLCK == fl->fl_type)
+		lock_cmd = CEPH_LOCK_EXCL;
+	else
+		lock_cmd = CEPH_LOCK_UNLOCK;
+
+	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
+	if (!err) {
+		if (op != CEPH_MDS_OP_GETFILELOCK) {
+			dout("mds locked, locking locally");
+			err = posix_lock_file(file, fl, NULL);
+			if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
+				/* undo! This should only happen if
+				 * the kernel detects local
+				 * deadlock. */
+				ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
+						  CEPH_LOCK_UNLOCK, 0, fl);
+				dout("got %d on posix_lock_file, undid lock",
+				     err);
+			}
+		}
+
+	} else if (err == -ERESTARTSYS) {
+		dout("undoing lock\n");
+		ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
+				  CEPH_LOCK_UNLOCK, 0, fl);
+	}
+	return err;
+}
+
+int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
+{
+	u8 lock_cmd;
+	int err;
+	u8 wait = 0;
+
+	if (!(fl->fl_flags & FL_FLOCK))
+		return -ENOLCK;
+	/* No mandatory locks */
+	if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
+		return -ENOLCK;
+
+	dout("ceph_flock, fl_file: %p", fl->fl_file);
+
+	if (IS_SETLKW(cmd))
+		wait = 1;
+
+	if (F_RDLCK == fl->fl_type)
+		lock_cmd = CEPH_LOCK_SHARED;
+	else if (F_WRLCK == fl->fl_type)
+		lock_cmd = CEPH_LOCK_EXCL;
+	else
+		lock_cmd = CEPH_LOCK_UNLOCK;
+
+	err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
+				file, lock_cmd, wait, fl);
+	if (!err) {
+		err = flock_lock_file_wait(file, fl);
+		if (err) {
+			ceph_lock_message(CEPH_LOCK_FLOCK,
+					  CEPH_MDS_OP_SETFILELOCK,
+					  file, CEPH_LOCK_UNLOCK, 0, fl);
+			dout("got %d on flock_lock_file_wait, undid lock", err);
+		}
+	} else if (err == -ERESTARTSYS) {
+		dout("undoing lock\n");
+		ceph_lock_message(CEPH_LOCK_FLOCK,
+				  CEPH_MDS_OP_SETFILELOCK,
+				  file, CEPH_LOCK_UNLOCK, 0, fl);
+	}
+	return err;
+}
+
+/**
+ * Must be called with lock_flocks() already held. Fills in the passed
+ * counter variables, so you can prepare pagelist metadata before calling
+ * ceph_encode_locks.
+ */
+void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
+{
+	struct file_lock *lock;
+
+	*fcntl_count = 0;
+	*flock_count = 0;
+
+	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
+		if (lock->fl_flags & FL_POSIX)
+			++(*fcntl_count);
+		else if (lock->fl_flags & FL_FLOCK)
+			++(*flock_count);
+	}
+	dout("counted %d flock locks and %d fcntl locks",
+	     *flock_count, *fcntl_count);
+}
+
+/**
+ * Encode the flock and fcntl locks for the given inode into the ceph_filelock
+ * array. Must be called with inode->i_lock already held.
+ * If we encounter more of a specific lock type than expected, return -ENOSPC.
+ */
+int ceph_encode_locks_to_buffer(struct inode *inode,
+				struct ceph_filelock *flocks,
+				int num_fcntl_locks, int num_flock_locks)
+{
+	struct file_lock *lock;
+	int err = 0;
+	int seen_fcntl = 0;
+	int seen_flock = 0;
+	int l = 0;
+
+	dout("encoding %d flock and %d fcntl locks", num_flock_locks,
+	     num_fcntl_locks);
+
+	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
+		if (lock->fl_flags & FL_POSIX) {
+			++seen_fcntl;
+			if (seen_fcntl > num_fcntl_locks) {
+				err = -ENOSPC;
+				goto fail;
+			}
+			err = lock_to_ceph_filelock(lock, &flocks[l]);
+			if (err)
+				goto fail;
+			++l;
+		}
+	}
+	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
+		if (lock->fl_flags & FL_FLOCK) {
+			++seen_flock;
+			if (seen_flock > num_flock_locks) {
+				err = -ENOSPC;
+				goto fail;
+			}
+			err = lock_to_ceph_filelock(lock, &flocks[l]);
+			if (err)
+				goto fail;
+			++l;
+		}
+	}
+fail:
+	return err;
+}
+
+/**
+ * Copy the encoded flock and fcntl locks into the pagelist.
+ * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
+ * sequential flock locks.
+ * Returns zero on success.
+ */
+int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
+			   struct ceph_pagelist *pagelist,
+			   int num_fcntl_locks, int num_flock_locks)
+{
+	int err = 0;
+	__le32 nlocks;
+
+	nlocks = cpu_to_le32(num_fcntl_locks);
+	err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
+	if (err)
+		goto out_fail;
+
+	err = ceph_pagelist_append(pagelist, flocks,
+				   num_fcntl_locks * sizeof(*flocks));
+	if (err)
+		goto out_fail;
+
+	nlocks = cpu_to_le32(num_flock_locks);
+	err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
+	if (err)
+		goto out_fail;
+
+	err = ceph_pagelist_append(pagelist,
+				   &flocks[num_fcntl_locks],
+				   num_flock_locks * sizeof(*flocks));
+out_fail:
+	return err;
+}
+
+/*
+ * Given a pointer to a lock, convert it to a ceph filelock
+ */
+int lock_to_ceph_filelock(struct file_lock *lock,
+			  struct ceph_filelock *cephlock)
+{
+	int err = 0;
+	cephlock->start = cpu_to_le64(lock->fl_start);
+	cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
+	cephlock->client = cpu_to_le64(0);
+	cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
+	if (lock->fl_flags & FL_POSIX)
+		cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
+	else
+		cephlock->owner = cpu_to_le64(secure_addr(lock->fl_file));
+
+	switch (lock->fl_type) {
+	case F_RDLCK:
+		cephlock->type = CEPH_LOCK_SHARED;
+		break;
+	case F_WRLCK:
+		cephlock->type = CEPH_LOCK_EXCL;
+		break;
+	case F_UNLCK:
+		cephlock->type = CEPH_LOCK_UNLOCK;
+		break;
+	default:
+		dout("Have unknown lock type %d", lock->fl_type);
+		err = -EINVAL;
+	}
+
+	return err;
+}
diff --git a/ceph/mds_client.c b/ceph/mds_client.c
new file mode 100644
index 0000000..2b4d093
--- /dev/null
+++ b/ceph/mds_client.c
@@ -0,0 +1,3665 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/fs.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "super.h"
+#include "mds_client.h"
+
+#include <linux/ceph/ceph_features.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/pagelist.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/debugfs.h>
+
+/*
+ * A cluster of MDS (metadata server) daemons is responsible for
+ * managing the file system namespace (the directory hierarchy and
+ * inodes) and for coordinating shared access to storage.  Metadata is
+ * partitioning hierarchically across a number of servers, and that
+ * partition varies over time as the cluster adjusts the distribution
+ * in order to balance load.
+ *
+ * The MDS client is primarily responsible to managing synchronous
+ * metadata requests for operations like open, unlink, and so forth.
+ * If there is a MDS failure, we find out about it when we (possibly
+ * request and) receive a new MDS map, and can resubmit affected
+ * requests.
+ *
+ * For the most part, though, we take advantage of a lossless
+ * communications channel to the MDS, and do not need to worry about
+ * timing out or resubmitting requests.
+ *
+ * We maintain a stateful "session" with each MDS we interact with.
+ * Within each session, we sent periodic heartbeat messages to ensure
+ * any capabilities or leases we have been issues remain valid.  If
+ * the session times out and goes stale, our leases and capabilities
+ * are no longer valid.
+ */
+
+struct ceph_reconnect_state {
+	int nr_caps;
+	struct ceph_pagelist *pagelist;
+	bool flock;
+};
+
+static void __wake_requests(struct ceph_mds_client *mdsc,
+			    struct list_head *head);
+
+static const struct ceph_connection_operations mds_con_ops;
+
+
+/*
+ * mds reply parsing
+ */
+
+/*
+ * parse individual inode info
+ */
+static int parse_reply_info_in(void **p, void *end,
+			       struct ceph_mds_reply_info_in *info,
+			       u64 features)
+{
+	int err = -EIO;
+
+	info->in = *p;
+	*p += sizeof(struct ceph_mds_reply_inode) +
+		sizeof(*info->in->fragtree.splits) *
+		le32_to_cpu(info->in->fragtree.nsplits);
+
+	ceph_decode_32_safe(p, end, info->symlink_len, bad);
+	ceph_decode_need(p, end, info->symlink_len, bad);
+	info->symlink = *p;
+	*p += info->symlink_len;
+
+	if (features & CEPH_FEATURE_DIRLAYOUTHASH)
+		ceph_decode_copy_safe(p, end, &info->dir_layout,
+				      sizeof(info->dir_layout), bad);
+	else
+		memset(&info->dir_layout, 0, sizeof(info->dir_layout));
+
+	ceph_decode_32_safe(p, end, info->xattr_len, bad);
+	ceph_decode_need(p, end, info->xattr_len, bad);
+	info->xattr_data = *p;
+	*p += info->xattr_len;
+	return 0;
+bad:
+	return err;
+}
+
+/*
+ * parse a normal reply, which may contain a (dir+)dentry and/or a
+ * target inode.
+ */
+static int parse_reply_info_trace(void **p, void *end,
+				  struct ceph_mds_reply_info_parsed *info,
+				  u64 features)
+{
+	int err;
+
+	if (info->head->is_dentry) {
+		err = parse_reply_info_in(p, end, &info->diri, features);
+		if (err < 0)
+			goto out_bad;
+
+		if (unlikely(*p + sizeof(*info->dirfrag) > end))
+			goto bad;
+		info->dirfrag = *p;
+		*p += sizeof(*info->dirfrag) +
+			sizeof(u32)*le32_to_cpu(info->dirfrag->ndist);
+		if (unlikely(*p > end))
+			goto bad;
+
+		ceph_decode_32_safe(p, end, info->dname_len, bad);
+		ceph_decode_need(p, end, info->dname_len, bad);
+		info->dname = *p;
+		*p += info->dname_len;
+		info->dlease = *p;
+		*p += sizeof(*info->dlease);
+	}
+
+	if (info->head->is_target) {
+		err = parse_reply_info_in(p, end, &info->targeti, features);
+		if (err < 0)
+			goto out_bad;
+	}
+
+	if (unlikely(*p != end))
+		goto bad;
+	return 0;
+
+bad:
+	err = -EIO;
+out_bad:
+	pr_err("problem parsing mds trace %d\n", err);
+	return err;
+}
+
+/*
+ * parse readdir results
+ */
+static int parse_reply_info_dir(void **p, void *end,
+				struct ceph_mds_reply_info_parsed *info,
+				u64 features)
+{
+	u32 num, i = 0;
+	int err;
+
+	info->dir_dir = *p;
+	if (*p + sizeof(*info->dir_dir) > end)
+		goto bad;
+	*p += sizeof(*info->dir_dir) +
+		sizeof(u32)*le32_to_cpu(info->dir_dir->ndist);
+	if (*p > end)
+		goto bad;
+
+	ceph_decode_need(p, end, sizeof(num) + 2, bad);
+	num = ceph_decode_32(p);
+	info->dir_end = ceph_decode_8(p);
+	info->dir_complete = ceph_decode_8(p);
+	if (num == 0)
+		goto done;
+
+	BUG_ON(!info->dir_in);
+	info->dir_dname = (void *)(info->dir_in + num);
+	info->dir_dname_len = (void *)(info->dir_dname + num);
+	info->dir_dlease = (void *)(info->dir_dname_len + num);
+	if ((unsigned long)(info->dir_dlease + num) >
+	    (unsigned long)info->dir_in + info->dir_buf_size) {
+		pr_err("dir contents are larger than expected\n");
+		WARN_ON(1);
+		goto bad;
+	}
+
+	info->dir_nr = num;
+	while (num) {
+		/* dentry */
+		ceph_decode_need(p, end, sizeof(u32)*2, bad);
+		info->dir_dname_len[i] = ceph_decode_32(p);
+		ceph_decode_need(p, end, info->dir_dname_len[i], bad);
+		info->dir_dname[i] = *p;
+		*p += info->dir_dname_len[i];
+		dout("parsed dir dname '%.*s'\n", info->dir_dname_len[i],
+		     info->dir_dname[i]);
+		info->dir_dlease[i] = *p;
+		*p += sizeof(struct ceph_mds_reply_lease);
+
+		/* inode */
+		err = parse_reply_info_in(p, end, &info->dir_in[i], features);
+		if (err < 0)
+			goto out_bad;
+		i++;
+		num--;
+	}
+
+done:
+	if (*p != end)
+		goto bad;
+	return 0;
+
+bad:
+	err = -EIO;
+out_bad:
+	pr_err("problem parsing dir contents %d\n", err);
+	return err;
+}
+
+/*
+ * parse fcntl F_GETLK results
+ */
+static int parse_reply_info_filelock(void **p, void *end,
+				     struct ceph_mds_reply_info_parsed *info,
+				     u64 features)
+{
+	if (*p + sizeof(*info->filelock_reply) > end)
+		goto bad;
+
+	info->filelock_reply = *p;
+	*p += sizeof(*info->filelock_reply);
+
+	if (unlikely(*p != end))
+		goto bad;
+	return 0;
+
+bad:
+	return -EIO;
+}
+
+/*
+ * parse create results
+ */
+static int parse_reply_info_create(void **p, void *end,
+				  struct ceph_mds_reply_info_parsed *info,
+				  u64 features)
+{
+	if (features & CEPH_FEATURE_REPLY_CREATE_INODE) {
+		if (*p == end) {
+			info->has_create_ino = false;
+		} else {
+			info->has_create_ino = true;
+			info->ino = ceph_decode_64(p);
+		}
+	}
+
+	if (unlikely(*p != end))
+		goto bad;
+	return 0;
+
+bad:
+	return -EIO;
+}
+
+/*
+ * parse extra results
+ */
+static int parse_reply_info_extra(void **p, void *end,
+				  struct ceph_mds_reply_info_parsed *info,
+				  u64 features)
+{
+	if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
+		return parse_reply_info_filelock(p, end, info, features);
+	else if (info->head->op == CEPH_MDS_OP_READDIR ||
+		 info->head->op == CEPH_MDS_OP_LSSNAP)
+		return parse_reply_info_dir(p, end, info, features);
+	else if (info->head->op == CEPH_MDS_OP_CREATE)
+		return parse_reply_info_create(p, end, info, features);
+	else
+		return -EIO;
+}
+
+/*
+ * parse entire mds reply
+ */
+static int parse_reply_info(struct ceph_msg *msg,
+			    struct ceph_mds_reply_info_parsed *info,
+			    u64 features)
+{
+	void *p, *end;
+	u32 len;
+	int err;
+
+	info->head = msg->front.iov_base;
+	p = msg->front.iov_base + sizeof(struct ceph_mds_reply_head);
+	end = p + msg->front.iov_len - sizeof(struct ceph_mds_reply_head);
+
+	/* trace */
+	ceph_decode_32_safe(&p, end, len, bad);
+	if (len > 0) {
+		ceph_decode_need(&p, end, len, bad);
+		err = parse_reply_info_trace(&p, p+len, info, features);
+		if (err < 0)
+			goto out_bad;
+	}
+
+	/* extra */
+	ceph_decode_32_safe(&p, end, len, bad);
+	if (len > 0) {
+		ceph_decode_need(&p, end, len, bad);
+		err = parse_reply_info_extra(&p, p+len, info, features);
+		if (err < 0)
+			goto out_bad;
+	}
+
+	/* snap blob */
+	ceph_decode_32_safe(&p, end, len, bad);
+	info->snapblob_len = len;
+	info->snapblob = p;
+	p += len;
+
+	if (p != end)
+		goto bad;
+	return 0;
+
+bad:
+	err = -EIO;
+out_bad:
+	pr_err("mds parse_reply err %d\n", err);
+	return err;
+}
+
+static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
+{
+	if (!info->dir_in)
+		return;
+	free_pages((unsigned long)info->dir_in, get_order(info->dir_buf_size));
+}
+
+
+/*
+ * sessions
+ */
+static const char *session_state_name(int s)
+{
+	switch (s) {
+	case CEPH_MDS_SESSION_NEW: return "new";
+	case CEPH_MDS_SESSION_OPENING: return "opening";
+	case CEPH_MDS_SESSION_OPEN: return "open";
+	case CEPH_MDS_SESSION_HUNG: return "hung";
+	case CEPH_MDS_SESSION_CLOSING: return "closing";
+	case CEPH_MDS_SESSION_RESTARTING: return "restarting";
+	case CEPH_MDS_SESSION_RECONNECTING: return "reconnecting";
+	default: return "???";
+	}
+}
+
+static struct ceph_mds_session *get_session(struct ceph_mds_session *s)
+{
+	if (atomic_inc_not_zero(&s->s_ref)) {
+		dout("mdsc get_session %p %d -> %d\n", s,
+		     atomic_read(&s->s_ref)-1, atomic_read(&s->s_ref));
+		return s;
+	} else {
+		dout("mdsc get_session %p 0 -- FAIL", s);
+		return NULL;
+	}
+}
+
+void ceph_put_mds_session(struct ceph_mds_session *s)
+{
+	dout("mdsc put_session %p %d -> %d\n", s,
+	     atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1);
+	if (atomic_dec_and_test(&s->s_ref)) {
+		if (s->s_auth.authorizer)
+			ceph_auth_destroy_authorizer(
+				s->s_mdsc->fsc->client->monc.auth,
+				s->s_auth.authorizer);
+		kfree(s);
+	}
+}
+
+/*
+ * called under mdsc->mutex
+ */
+struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc,
+						   int mds)
+{
+	struct ceph_mds_session *session;
+
+	if (mds >= mdsc->max_sessions || mdsc->sessions[mds] == NULL)
+		return NULL;
+	session = mdsc->sessions[mds];
+	dout("lookup_mds_session %p %d\n", session,
+	     atomic_read(&session->s_ref));
+	get_session(session);
+	return session;
+}
+
+static bool __have_session(struct ceph_mds_client *mdsc, int mds)
+{
+	if (mds >= mdsc->max_sessions)
+		return false;
+	return mdsc->sessions[mds];
+}
+
+static int __verify_registered_session(struct ceph_mds_client *mdsc,
+				       struct ceph_mds_session *s)
+{
+	if (s->s_mds >= mdsc->max_sessions ||
+	    mdsc->sessions[s->s_mds] != s)
+		return -ENOENT;
+	return 0;
+}
+
+/*
+ * create+register a new session for given mds.
+ * called under mdsc->mutex.
+ */
+static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
+						 int mds)
+{
+	struct ceph_mds_session *s;
+
+	if (mds >= mdsc->mdsmap->m_max_mds)
+		return ERR_PTR(-EINVAL);
+
+	s = kzalloc(sizeof(*s), GFP_NOFS);
+	if (!s)
+		return ERR_PTR(-ENOMEM);
+	s->s_mdsc = mdsc;
+	s->s_mds = mds;
+	s->s_state = CEPH_MDS_SESSION_NEW;
+	s->s_ttl = 0;
+	s->s_seq = 0;
+	mutex_init(&s->s_mutex);
+
+	ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr);
+
+	spin_lock_init(&s->s_gen_ttl_lock);
+	s->s_cap_gen = 0;
+	s->s_cap_ttl = jiffies - 1;
+
+	spin_lock_init(&s->s_cap_lock);
+	s->s_renew_requested = 0;
+	s->s_renew_seq = 0;
+	INIT_LIST_HEAD(&s->s_caps);
+	s->s_nr_caps = 0;
+	s->s_trim_caps = 0;
+	atomic_set(&s->s_ref, 1);
+	INIT_LIST_HEAD(&s->s_waiting);
+	INIT_LIST_HEAD(&s->s_unsafe);
+	s->s_num_cap_releases = 0;
+	s->s_cap_reconnect = 0;
+	s->s_cap_iterator = NULL;
+	INIT_LIST_HEAD(&s->s_cap_releases);
+	INIT_LIST_HEAD(&s->s_cap_releases_done);
+	INIT_LIST_HEAD(&s->s_cap_flushing);
+	INIT_LIST_HEAD(&s->s_cap_snaps_flushing);
+
+	dout("register_session mds%d\n", mds);
+	if (mds >= mdsc->max_sessions) {
+		int newmax = 1 << get_count_order(mds+1);
+		struct ceph_mds_session **sa;
+
+		dout("register_session realloc to %d\n", newmax);
+		sa = kcalloc(newmax, sizeof(void *), GFP_NOFS);
+		if (sa == NULL)
+			goto fail_realloc;
+		if (mdsc->sessions) {
+			memcpy(sa, mdsc->sessions,
+			       mdsc->max_sessions * sizeof(void *));
+			kfree(mdsc->sessions);
+		}
+		mdsc->sessions = sa;
+		mdsc->max_sessions = newmax;
+	}
+	mdsc->sessions[mds] = s;
+	atomic_inc(&s->s_ref);  /* one ref to sessions[], one to caller */
+
+	ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds,
+		      ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
+
+	return s;
+
+fail_realloc:
+	kfree(s);
+	return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * called under mdsc->mutex
+ */
+static void __unregister_session(struct ceph_mds_client *mdsc,
+			       struct ceph_mds_session *s)
+{
+	dout("__unregister_session mds%d %p\n", s->s_mds, s);
+	BUG_ON(mdsc->sessions[s->s_mds] != s);
+	mdsc->sessions[s->s_mds] = NULL;
+	ceph_con_close(&s->s_con);
+	ceph_put_mds_session(s);
+}
+
+/*
+ * drop session refs in request.
+ *
+ * should be last request ref, or hold mdsc->mutex
+ */
+static void put_request_session(struct ceph_mds_request *req)
+{
+	if (req->r_session) {
+		ceph_put_mds_session(req->r_session);
+		req->r_session = NULL;
+	}
+}
+
+void ceph_mdsc_release_request(struct kref *kref)
+{
+	struct ceph_mds_request *req = container_of(kref,
+						    struct ceph_mds_request,
+						    r_kref);
+	destroy_reply_info(&req->r_reply_info);
+	if (req->r_request)
+		ceph_msg_put(req->r_request);
+	if (req->r_reply)
+		ceph_msg_put(req->r_reply);
+	if (req->r_inode) {
+		ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
+		iput(req->r_inode);
+	}
+	if (req->r_locked_dir)
+		ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
+	if (req->r_target_inode)
+		iput(req->r_target_inode);
+	if (req->r_dentry)
+		dput(req->r_dentry);
+	if (req->r_old_dentry)
+		dput(req->r_old_dentry);
+	if (req->r_old_dentry_dir) {
+		/*
+		 * track (and drop pins for) r_old_dentry_dir
+		 * separately, since r_old_dentry's d_parent may have
+		 * changed between the dir mutex being dropped and
+		 * this request being freed.
+		 */
+		ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
+				  CEPH_CAP_PIN);
+		iput(req->r_old_dentry_dir);
+	}
+	kfree(req->r_path1);
+	kfree(req->r_path2);
+	put_request_session(req);
+	ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation);
+	kfree(req);
+}
+
+/*
+ * lookup session, bump ref if found.
+ *
+ * called under mdsc->mutex.
+ */
+static struct ceph_mds_request *__lookup_request(struct ceph_mds_client *mdsc,
+					     u64 tid)
+{
+	struct ceph_mds_request *req;
+	struct rb_node *n = mdsc->request_tree.rb_node;
+
+	while (n) {
+		req = rb_entry(n, struct ceph_mds_request, r_node);
+		if (tid < req->r_tid)
+			n = n->rb_left;
+		else if (tid > req->r_tid)
+			n = n->rb_right;
+		else {
+			ceph_mdsc_get_request(req);
+			return req;
+		}
+	}
+	return NULL;
+}
+
+static void __insert_request(struct ceph_mds_client *mdsc,
+			     struct ceph_mds_request *new)
+{
+	struct rb_node **p = &mdsc->request_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct ceph_mds_request *req = NULL;
+
+	while (*p) {
+		parent = *p;
+		req = rb_entry(parent, struct ceph_mds_request, r_node);
+		if (new->r_tid < req->r_tid)
+			p = &(*p)->rb_left;
+		else if (new->r_tid > req->r_tid)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&new->r_node, parent, p);
+	rb_insert_color(&new->r_node, &mdsc->request_tree);
+}
+
+/*
+ * Register an in-flight request, and assign a tid.  Link to directory
+ * are modifying (if any).
+ *
+ * Called under mdsc->mutex.
+ */
+static void __register_request(struct ceph_mds_client *mdsc,
+			       struct ceph_mds_request *req,
+			       struct inode *dir)
+{
+	req->r_tid = ++mdsc->last_tid;
+	if (req->r_num_caps)
+		ceph_reserve_caps(mdsc, &req->r_caps_reservation,
+				  req->r_num_caps);
+	dout("__register_request %p tid %lld\n", req, req->r_tid);
+	ceph_mdsc_get_request(req);
+	__insert_request(mdsc, req);
+
+	req->r_uid = current_fsuid();
+	req->r_gid = current_fsgid();
+
+	if (dir) {
+		struct ceph_inode_info *ci = ceph_inode(dir);
+
+		ihold(dir);
+		spin_lock(&ci->i_unsafe_lock);
+		req->r_unsafe_dir = dir;
+		list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops);
+		spin_unlock(&ci->i_unsafe_lock);
+	}
+}
+
+static void __unregister_request(struct ceph_mds_client *mdsc,
+				 struct ceph_mds_request *req)
+{
+	dout("__unregister_request %p tid %lld\n", req, req->r_tid);
+	rb_erase(&req->r_node, &mdsc->request_tree);
+	RB_CLEAR_NODE(&req->r_node);
+
+	if (req->r_unsafe_dir) {
+		struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir);
+
+		spin_lock(&ci->i_unsafe_lock);
+		list_del_init(&req->r_unsafe_dir_item);
+		spin_unlock(&ci->i_unsafe_lock);
+
+		iput(req->r_unsafe_dir);
+		req->r_unsafe_dir = NULL;
+	}
+
+	complete_all(&req->r_safe_completion);
+
+	ceph_mdsc_put_request(req);
+}
+
+/*
+ * Choose mds to send request to next.  If there is a hint set in the
+ * request (e.g., due to a prior forward hint from the mds), use that.
+ * Otherwise, consult frag tree and/or caps to identify the
+ * appropriate mds.  If all else fails, choose randomly.
+ *
+ * Called under mdsc->mutex.
+ */
+static struct dentry *get_nonsnap_parent(struct dentry *dentry)
+{
+	/*
+	 * we don't need to worry about protecting the d_parent access
+	 * here because we never renaming inside the snapped namespace
+	 * except to resplice to another snapdir, and either the old or new
+	 * result is a valid result.
+	 */
+	while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
+		dentry = dentry->d_parent;
+	return dentry;
+}
+
+static int __choose_mds(struct ceph_mds_client *mdsc,
+			struct ceph_mds_request *req)
+{
+	struct inode *inode;
+	struct ceph_inode_info *ci;
+	struct ceph_cap *cap;
+	int mode = req->r_direct_mode;
+	int mds = -1;
+	u32 hash = req->r_direct_hash;
+	bool is_hash = req->r_direct_is_hash;
+
+	/*
+	 * is there a specific mds we should try?  ignore hint if we have
+	 * no session and the mds is not up (active or recovering).
+	 */
+	if (req->r_resend_mds >= 0 &&
+	    (__have_session(mdsc, req->r_resend_mds) ||
+	     ceph_mdsmap_get_state(mdsc->mdsmap, req->r_resend_mds) > 0)) {
+		dout("choose_mds using resend_mds mds%d\n",
+		     req->r_resend_mds);
+		return req->r_resend_mds;
+	}
+
+	if (mode == USE_RANDOM_MDS)
+		goto random;
+
+	inode = NULL;
+	if (req->r_inode) {
+		inode = req->r_inode;
+	} else if (req->r_dentry) {
+		/* ignore race with rename; old or new d_parent is okay */
+		struct dentry *parent = req->r_dentry->d_parent;
+		struct inode *dir = parent->d_inode;
+
+		if (dir->i_sb != mdsc->fsc->sb) {
+			/* not this fs! */
+			inode = req->r_dentry->d_inode;
+		} else if (ceph_snap(dir) != CEPH_NOSNAP) {
+			/* direct snapped/virtual snapdir requests
+			 * based on parent dir inode */
+			struct dentry *dn = get_nonsnap_parent(parent);
+			inode = dn->d_inode;
+			dout("__choose_mds using nonsnap parent %p\n", inode);
+		} else {
+			/* dentry target */
+			inode = req->r_dentry->d_inode;
+			if (!inode || mode == USE_AUTH_MDS) {
+				/* dir + name */
+				inode = dir;
+				hash = ceph_dentry_hash(dir, req->r_dentry);
+				is_hash = true;
+			}
+		}
+	}
+
+	dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash,
+	     (int)hash, mode);
+	if (!inode)
+		goto random;
+	ci = ceph_inode(inode);
+
+	if (is_hash && S_ISDIR(inode->i_mode)) {
+		struct ceph_inode_frag frag;
+		int found;
+
+		ceph_choose_frag(ci, hash, &frag, &found);
+		if (found) {
+			if (mode == USE_ANY_MDS && frag.ndist > 0) {
+				u8 r;
+
+				/* choose a random replica */
+				get_random_bytes(&r, 1);
+				r %= frag.ndist;
+				mds = frag.dist[r];
+				dout("choose_mds %p %llx.%llx "
+				     "frag %u mds%d (%d/%d)\n",
+				     inode, ceph_vinop(inode),
+				     frag.frag, mds,
+				     (int)r, frag.ndist);
+				if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
+				    CEPH_MDS_STATE_ACTIVE)
+					return mds;
+			}
+
+			/* since this file/dir wasn't known to be
+			 * replicated, then we want to look for the
+			 * authoritative mds. */
+			mode = USE_AUTH_MDS;
+			if (frag.mds >= 0) {
+				/* choose auth mds */
+				mds = frag.mds;
+				dout("choose_mds %p %llx.%llx "
+				     "frag %u mds%d (auth)\n",
+				     inode, ceph_vinop(inode), frag.frag, mds);
+				if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
+				    CEPH_MDS_STATE_ACTIVE)
+					return mds;
+			}
+		}
+	}
+
+	spin_lock(&ci->i_ceph_lock);
+	cap = NULL;
+	if (mode == USE_AUTH_MDS)
+		cap = ci->i_auth_cap;
+	if (!cap && !RB_EMPTY_ROOT(&ci->i_caps))
+		cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
+	if (!cap) {
+		spin_unlock(&ci->i_ceph_lock);
+		goto random;
+	}
+	mds = cap->session->s_mds;
+	dout("choose_mds %p %llx.%llx mds%d (%scap %p)\n",
+	     inode, ceph_vinop(inode), mds,
+	     cap == ci->i_auth_cap ? "auth " : "", cap);
+	spin_unlock(&ci->i_ceph_lock);
+	return mds;
+
+random:
+	mds = ceph_mdsmap_get_random_mds(mdsc->mdsmap);
+	dout("choose_mds chose random mds%d\n", mds);
+	return mds;
+}
+
+
+/*
+ * session messages
+ */
+static struct ceph_msg *create_session_msg(u32 op, u64 seq)
+{
+	struct ceph_msg *msg;
+	struct ceph_mds_session_head *h;
+
+	msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), GFP_NOFS,
+			   false);
+	if (!msg) {
+		pr_err("create_session_msg ENOMEM creating msg\n");
+		return NULL;
+	}
+	h = msg->front.iov_base;
+	h->op = cpu_to_le32(op);
+	h->seq = cpu_to_le64(seq);
+	return msg;
+}
+
+/*
+ * send session open request.
+ *
+ * called under mdsc->mutex
+ */
+static int __open_session(struct ceph_mds_client *mdsc,
+			  struct ceph_mds_session *session)
+{
+	struct ceph_msg *msg;
+	int mstate;
+	int mds = session->s_mds;
+
+	/* wait for mds to go active? */
+	mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds);
+	dout("open_session to mds%d (%s)\n", mds,
+	     ceph_mds_state_name(mstate));
+	session->s_state = CEPH_MDS_SESSION_OPENING;
+	session->s_renew_requested = jiffies;
+
+	/* send connect message */
+	msg = create_session_msg(CEPH_SESSION_REQUEST_OPEN, session->s_seq);
+	if (!msg)
+		return -ENOMEM;
+	ceph_con_send(&session->s_con, msg);
+	return 0;
+}
+
+/*
+ * open sessions for any export targets for the given mds
+ *
+ * called under mdsc->mutex
+ */
+static struct ceph_mds_session *
+__open_export_target_session(struct ceph_mds_client *mdsc, int target)
+{
+	struct ceph_mds_session *session;
+
+	session = __ceph_lookup_mds_session(mdsc, target);
+	if (!session) {
+		session = register_session(mdsc, target);
+		if (IS_ERR(session))
+			return session;
+	}
+	if (session->s_state == CEPH_MDS_SESSION_NEW ||
+	    session->s_state == CEPH_MDS_SESSION_CLOSING)
+		__open_session(mdsc, session);
+
+	return session;
+}
+
+struct ceph_mds_session *
+ceph_mdsc_open_export_target_session(struct ceph_mds_client *mdsc, int target)
+{
+	struct ceph_mds_session *session;
+
+	dout("open_export_target_session to mds%d\n", target);
+
+	mutex_lock(&mdsc->mutex);
+	session = __open_export_target_session(mdsc, target);
+	mutex_unlock(&mdsc->mutex);
+
+	return session;
+}
+
+static void __open_export_target_sessions(struct ceph_mds_client *mdsc,
+					  struct ceph_mds_session *session)
+{
+	struct ceph_mds_info *mi;
+	struct ceph_mds_session *ts;
+	int i, mds = session->s_mds;
+
+	if (mds >= mdsc->mdsmap->m_max_mds)
+		return;
+
+	mi = &mdsc->mdsmap->m_info[mds];
+	dout("open_export_target_sessions for mds%d (%d targets)\n",
+	     session->s_mds, mi->num_export_targets);
+
+	for (i = 0; i < mi->num_export_targets; i++) {
+		ts = __open_export_target_session(mdsc, mi->export_targets[i]);
+		if (!IS_ERR(ts))
+			ceph_put_mds_session(ts);
+	}
+}
+
+void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
+					   struct ceph_mds_session *session)
+{
+	mutex_lock(&mdsc->mutex);
+	__open_export_target_sessions(mdsc, session);
+	mutex_unlock(&mdsc->mutex);
+}
+
+/*
+ * session caps
+ */
+
+/*
+ * Free preallocated cap messages assigned to this session
+ */
+static void cleanup_cap_releases(struct ceph_mds_session *session)
+{
+	struct ceph_msg *msg;
+
+	spin_lock(&session->s_cap_lock);
+	while (!list_empty(&session->s_cap_releases)) {
+		msg = list_first_entry(&session->s_cap_releases,
+				       struct ceph_msg, list_head);
+		list_del_init(&msg->list_head);
+		ceph_msg_put(msg);
+	}
+	while (!list_empty(&session->s_cap_releases_done)) {
+		msg = list_first_entry(&session->s_cap_releases_done,
+				       struct ceph_msg, list_head);
+		list_del_init(&msg->list_head);
+		ceph_msg_put(msg);
+	}
+	spin_unlock(&session->s_cap_lock);
+}
+
+/*
+ * Helper to safely iterate over all caps associated with a session, with
+ * special care taken to handle a racing __ceph_remove_cap().
+ *
+ * Caller must hold session s_mutex.
+ */
+static int iterate_session_caps(struct ceph_mds_session *session,
+				 int (*cb)(struct inode *, struct ceph_cap *,
+					    void *), void *arg)
+{
+	struct list_head *p;
+	struct ceph_cap *cap;
+	struct inode *inode, *last_inode = NULL;
+	struct ceph_cap *old_cap = NULL;
+	int ret;
+
+	dout("iterate_session_caps %p mds%d\n", session, session->s_mds);
+	spin_lock(&session->s_cap_lock);
+	p = session->s_caps.next;
+	while (p != &session->s_caps) {
+		cap = list_entry(p, struct ceph_cap, session_caps);
+		inode = igrab(&cap->ci->vfs_inode);
+		if (!inode) {
+			p = p->next;
+			continue;
+		}
+		session->s_cap_iterator = cap;
+		spin_unlock(&session->s_cap_lock);
+
+		if (last_inode) {
+			iput(last_inode);
+			last_inode = NULL;
+		}
+		if (old_cap) {
+			ceph_put_cap(session->s_mdsc, old_cap);
+			old_cap = NULL;
+		}
+
+		ret = cb(inode, cap, arg);
+		last_inode = inode;
+
+		spin_lock(&session->s_cap_lock);
+		p = p->next;
+		if (cap->ci == NULL) {
+			dout("iterate_session_caps  finishing cap %p removal\n",
+			     cap);
+			BUG_ON(cap->session != session);
+			list_del_init(&cap->session_caps);
+			session->s_nr_caps--;
+			cap->session = NULL;
+			old_cap = cap;  /* put_cap it w/o locks held */
+		}
+		if (ret < 0)
+			goto out;
+	}
+	ret = 0;
+out:
+	session->s_cap_iterator = NULL;
+	spin_unlock(&session->s_cap_lock);
+
+	if (last_inode)
+		iput(last_inode);
+	if (old_cap)
+		ceph_put_cap(session->s_mdsc, old_cap);
+
+	return ret;
+}
+
+static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
+				  void *arg)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int drop = 0;
+
+	dout("removing cap %p, ci is %p, inode is %p\n",
+	     cap, ci, &ci->vfs_inode);
+	spin_lock(&ci->i_ceph_lock);
+	__ceph_remove_cap(cap, false);
+	if (!__ceph_is_any_real_caps(ci)) {
+		struct ceph_mds_client *mdsc =
+			ceph_sb_to_client(inode->i_sb)->mdsc;
+
+		spin_lock(&mdsc->cap_dirty_lock);
+		if (!list_empty(&ci->i_dirty_item)) {
+			pr_info(" dropping dirty %s state for %p %lld\n",
+				ceph_cap_string(ci->i_dirty_caps),
+				inode, ceph_ino(inode));
+			ci->i_dirty_caps = 0;
+			list_del_init(&ci->i_dirty_item);
+			drop = 1;
+		}
+		if (!list_empty(&ci->i_flushing_item)) {
+			pr_info(" dropping dirty+flushing %s state for %p %lld\n",
+				ceph_cap_string(ci->i_flushing_caps),
+				inode, ceph_ino(inode));
+			ci->i_flushing_caps = 0;
+			list_del_init(&ci->i_flushing_item);
+			mdsc->num_cap_flushing--;
+			drop = 1;
+		}
+		if (drop && ci->i_wrbuffer_ref) {
+			pr_info(" dropping dirty data for %p %lld\n",
+				inode, ceph_ino(inode));
+			ci->i_wrbuffer_ref = 0;
+			ci->i_wrbuffer_ref_head = 0;
+			drop++;
+		}
+		spin_unlock(&mdsc->cap_dirty_lock);
+	}
+	spin_unlock(&ci->i_ceph_lock);
+	while (drop--)
+		iput(inode);
+	return 0;
+}
+
+/*
+ * caller must hold session s_mutex
+ */
+static void remove_session_caps(struct ceph_mds_session *session)
+{
+	dout("remove_session_caps on %p\n", session);
+	iterate_session_caps(session, remove_session_caps_cb, NULL);
+
+	spin_lock(&session->s_cap_lock);
+	if (session->s_nr_caps > 0) {
+		struct super_block *sb = session->s_mdsc->fsc->sb;
+		struct inode *inode;
+		struct ceph_cap *cap, *prev = NULL;
+		struct ceph_vino vino;
+		/*
+		 * iterate_session_caps() skips inodes that are being
+		 * deleted, we need to wait until deletions are complete.
+		 * __wait_on_freeing_inode() is designed for the job,
+		 * but it is not exported, so use lookup inode function
+		 * to access it.
+		 */
+		while (!list_empty(&session->s_caps)) {
+			cap = list_entry(session->s_caps.next,
+					 struct ceph_cap, session_caps);
+			if (cap == prev)
+				break;
+			prev = cap;
+			vino = cap->ci->i_vino;
+			spin_unlock(&session->s_cap_lock);
+
+			inode = ceph_find_inode(sb, vino);
+			iput(inode);
+
+			spin_lock(&session->s_cap_lock);
+		}
+	}
+	spin_unlock(&session->s_cap_lock);
+
+	BUG_ON(session->s_nr_caps > 0);
+	BUG_ON(!list_empty(&session->s_cap_flushing));
+	cleanup_cap_releases(session);
+}
+
+/*
+ * wake up any threads waiting on this session's caps.  if the cap is
+ * old (didn't get renewed on the client reconnect), remove it now.
+ *
+ * caller must hold s_mutex.
+ */
+static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
+			      void *arg)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+
+	wake_up_all(&ci->i_cap_wq);
+	if (arg) {
+		spin_lock(&ci->i_ceph_lock);
+		ci->i_wanted_max_size = 0;
+		ci->i_requested_max_size = 0;
+		spin_unlock(&ci->i_ceph_lock);
+	}
+	return 0;
+}
+
+static void wake_up_session_caps(struct ceph_mds_session *session,
+				 int reconnect)
+{
+	dout("wake_up_session_caps %p mds%d\n", session, session->s_mds);
+	iterate_session_caps(session, wake_up_session_cb,
+			     (void *)(unsigned long)reconnect);
+}
+
+/*
+ * Send periodic message to MDS renewing all currently held caps.  The
+ * ack will reset the expiration for all caps from this session.
+ *
+ * caller holds s_mutex
+ */
+static int send_renew_caps(struct ceph_mds_client *mdsc,
+			   struct ceph_mds_session *session)
+{
+	struct ceph_msg *msg;
+	int state;
+
+	if (time_after_eq(jiffies, session->s_cap_ttl) &&
+	    time_after_eq(session->s_cap_ttl, session->s_renew_requested))
+		pr_info("mds%d caps stale\n", session->s_mds);
+	session->s_renew_requested = jiffies;
+
+	/* do not try to renew caps until a recovering mds has reconnected
+	 * with its clients. */
+	state = ceph_mdsmap_get_state(mdsc->mdsmap, session->s_mds);
+	if (state < CEPH_MDS_STATE_RECONNECT) {
+		dout("send_renew_caps ignoring mds%d (%s)\n",
+		     session->s_mds, ceph_mds_state_name(state));
+		return 0;
+	}
+
+	dout("send_renew_caps to mds%d (%s)\n", session->s_mds,
+		ceph_mds_state_name(state));
+	msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS,
+				 ++session->s_renew_seq);
+	if (!msg)
+		return -ENOMEM;
+	ceph_con_send(&session->s_con, msg);
+	return 0;
+}
+
+static int send_flushmsg_ack(struct ceph_mds_client *mdsc,
+			     struct ceph_mds_session *session, u64 seq)
+{
+	struct ceph_msg *msg;
+
+	dout("send_flushmsg_ack to mds%d (%s)s seq %lld\n",
+	     session->s_mds, session_state_name(session->s_state), seq);
+	msg = create_session_msg(CEPH_SESSION_FLUSHMSG_ACK, seq);
+	if (!msg)
+		return -ENOMEM;
+	ceph_con_send(&session->s_con, msg);
+	return 0;
+}
+
+
+/*
+ * Note new cap ttl, and any transition from stale -> not stale (fresh?).
+ *
+ * Called under session->s_mutex
+ */
+static void renewed_caps(struct ceph_mds_client *mdsc,
+			 struct ceph_mds_session *session, int is_renew)
+{
+	int was_stale;
+	int wake = 0;
+
+	spin_lock(&session->s_cap_lock);
+	was_stale = is_renew && time_after_eq(jiffies, session->s_cap_ttl);
+
+	session->s_cap_ttl = session->s_renew_requested +
+		mdsc->mdsmap->m_session_timeout*HZ;
+
+	if (was_stale) {
+		if (time_before(jiffies, session->s_cap_ttl)) {
+			pr_info("mds%d caps renewed\n", session->s_mds);
+			wake = 1;
+		} else {
+			pr_info("mds%d caps still stale\n", session->s_mds);
+		}
+	}
+	dout("renewed_caps mds%d ttl now %lu, was %s, now %s\n",
+	     session->s_mds, session->s_cap_ttl, was_stale ? "stale" : "fresh",
+	     time_before(jiffies, session->s_cap_ttl) ? "stale" : "fresh");
+	spin_unlock(&session->s_cap_lock);
+
+	if (wake)
+		wake_up_session_caps(session, 0);
+}
+
+/*
+ * send a session close request
+ */
+static int request_close_session(struct ceph_mds_client *mdsc,
+				 struct ceph_mds_session *session)
+{
+	struct ceph_msg *msg;
+
+	dout("request_close_session mds%d state %s seq %lld\n",
+	     session->s_mds, session_state_name(session->s_state),
+	     session->s_seq);
+	msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq);
+	if (!msg)
+		return -ENOMEM;
+	ceph_con_send(&session->s_con, msg);
+	return 0;
+}
+
+/*
+ * Called with s_mutex held.
+ */
+static int __close_session(struct ceph_mds_client *mdsc,
+			 struct ceph_mds_session *session)
+{
+	if (session->s_state >= CEPH_MDS_SESSION_CLOSING)
+		return 0;
+	session->s_state = CEPH_MDS_SESSION_CLOSING;
+	return request_close_session(mdsc, session);
+}
+
+/*
+ * Trim old(er) caps.
+ *
+ * Because we can't cache an inode without one or more caps, we do
+ * this indirectly: if a cap is unused, we prune its aliases, at which
+ * point the inode will hopefully get dropped to.
+ *
+ * Yes, this is a bit sloppy.  Our only real goal here is to respond to
+ * memory pressure from the MDS, though, so it needn't be perfect.
+ */
+static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
+{
+	struct ceph_mds_session *session = arg;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int used, wanted, oissued, mine;
+
+	if (session->s_trim_caps <= 0)
+		return -1;
+
+	spin_lock(&ci->i_ceph_lock);
+	mine = cap->issued | cap->implemented;
+	used = __ceph_caps_used(ci);
+	wanted = __ceph_caps_file_wanted(ci);
+	oissued = __ceph_caps_issued_other(ci, cap);
+
+	dout("trim_caps_cb %p cap %p mine %s oissued %s used %s wanted %s\n",
+	     inode, cap, ceph_cap_string(mine), ceph_cap_string(oissued),
+	     ceph_cap_string(used), ceph_cap_string(wanted));
+	if (cap == ci->i_auth_cap) {
+		if (ci->i_dirty_caps | ci->i_flushing_caps)
+			goto out;
+		if ((used | wanted) & CEPH_CAP_ANY_WR)
+			goto out;
+	}
+	if ((used | wanted) & ~oissued & mine)
+		goto out;   /* we need these caps */
+
+	session->s_trim_caps--;
+	if (oissued) {
+		/* we aren't the only cap.. just remove us */
+		__ceph_remove_cap(cap, true);
+	} else {
+		/* try to drop referring dentries */
+		spin_unlock(&ci->i_ceph_lock);
+		d_prune_aliases(inode);
+		dout("trim_caps_cb %p cap %p  pruned, count now %d\n",
+		     inode, cap, atomic_read(&inode->i_count));
+		return 0;
+	}
+
+out:
+	spin_unlock(&ci->i_ceph_lock);
+	return 0;
+}
+
+/*
+ * Trim session cap count down to some max number.
+ */
+static int trim_caps(struct ceph_mds_client *mdsc,
+		     struct ceph_mds_session *session,
+		     int max_caps)
+{
+	int trim_caps = session->s_nr_caps - max_caps;
+
+	dout("trim_caps mds%d start: %d / %d, trim %d\n",
+	     session->s_mds, session->s_nr_caps, max_caps, trim_caps);
+	if (trim_caps > 0) {
+		session->s_trim_caps = trim_caps;
+		iterate_session_caps(session, trim_caps_cb, session);
+		dout("trim_caps mds%d done: %d / %d, trimmed %d\n",
+		     session->s_mds, session->s_nr_caps, max_caps,
+			trim_caps - session->s_trim_caps);
+		session->s_trim_caps = 0;
+	}
+
+	ceph_add_cap_releases(mdsc, session);
+	ceph_send_cap_releases(mdsc, session);
+	return 0;
+}
+
+/*
+ * Allocate cap_release messages.  If there is a partially full message
+ * in the queue, try to allocate enough to cover it's remainder, so that
+ * we can send it immediately.
+ *
+ * Called under s_mutex.
+ */
+int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
+			  struct ceph_mds_session *session)
+{
+	struct ceph_msg *msg, *partial = NULL;
+	struct ceph_mds_cap_release *head;
+	int err = -ENOMEM;
+	int extra = mdsc->fsc->mount_options->cap_release_safety;
+	int num;
+
+	dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds,
+	     extra);
+
+	spin_lock(&session->s_cap_lock);
+
+	if (!list_empty(&session->s_cap_releases)) {
+		msg = list_first_entry(&session->s_cap_releases,
+				       struct ceph_msg,
+				 list_head);
+		head = msg->front.iov_base;
+		num = le32_to_cpu(head->num);
+		if (num) {
+			dout(" partial %p with (%d/%d)\n", msg, num,
+			     (int)CEPH_CAPS_PER_RELEASE);
+			extra += CEPH_CAPS_PER_RELEASE - num;
+			partial = msg;
+		}
+	}
+	while (session->s_num_cap_releases < session->s_nr_caps + extra) {
+		spin_unlock(&session->s_cap_lock);
+		msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE,
+				   GFP_NOFS, false);
+		if (!msg)
+			goto out_unlocked;
+		dout("add_cap_releases %p msg %p now %d\n", session, msg,
+		     (int)msg->front.iov_len);
+		head = msg->front.iov_base;
+		head->num = cpu_to_le32(0);
+		msg->front.iov_len = sizeof(*head);
+		spin_lock(&session->s_cap_lock);
+		list_add(&msg->list_head, &session->s_cap_releases);
+		session->s_num_cap_releases += CEPH_CAPS_PER_RELEASE;
+	}
+
+	if (partial) {
+		head = partial->front.iov_base;
+		num = le32_to_cpu(head->num);
+		dout(" queueing partial %p with %d/%d\n", partial, num,
+		     (int)CEPH_CAPS_PER_RELEASE);
+		list_move_tail(&partial->list_head,
+			       &session->s_cap_releases_done);
+		session->s_num_cap_releases -= CEPH_CAPS_PER_RELEASE - num;
+	}
+	err = 0;
+	spin_unlock(&session->s_cap_lock);
+out_unlocked:
+	return err;
+}
+
+/*
+ * flush all dirty inode data to disk.
+ *
+ * returns true if we've flushed through want_flush_seq
+ */
+static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
+{
+	int mds, ret = 1;
+
+	dout("check_cap_flush want %lld\n", want_flush_seq);
+	mutex_lock(&mdsc->mutex);
+	for (mds = 0; ret && mds < mdsc->max_sessions; mds++) {
+		struct ceph_mds_session *session = mdsc->sessions[mds];
+
+		if (!session)
+			continue;
+		get_session(session);
+		mutex_unlock(&mdsc->mutex);
+
+		mutex_lock(&session->s_mutex);
+		if (!list_empty(&session->s_cap_flushing)) {
+			struct ceph_inode_info *ci =
+				list_entry(session->s_cap_flushing.next,
+					   struct ceph_inode_info,
+					   i_flushing_item);
+			struct inode *inode = &ci->vfs_inode;
+
+			spin_lock(&ci->i_ceph_lock);
+			if (ci->i_cap_flush_seq <= want_flush_seq) {
+				dout("check_cap_flush still flushing %p "
+				     "seq %lld <= %lld to mds%d\n", inode,
+				     ci->i_cap_flush_seq, want_flush_seq,
+				     session->s_mds);
+				ret = 0;
+			}
+			spin_unlock(&ci->i_ceph_lock);
+		}
+		mutex_unlock(&session->s_mutex);
+		ceph_put_mds_session(session);
+
+		if (!ret)
+			return ret;
+		mutex_lock(&mdsc->mutex);
+	}
+
+	mutex_unlock(&mdsc->mutex);
+	dout("check_cap_flush ok, flushed thru %lld\n", want_flush_seq);
+	return ret;
+}
+
+/*
+ * called under s_mutex
+ */
+void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
+			    struct ceph_mds_session *session)
+{
+	struct ceph_msg *msg;
+
+	dout("send_cap_releases mds%d\n", session->s_mds);
+	spin_lock(&session->s_cap_lock);
+	while (!list_empty(&session->s_cap_releases_done)) {
+		msg = list_first_entry(&session->s_cap_releases_done,
+				 struct ceph_msg, list_head);
+		list_del_init(&msg->list_head);
+		spin_unlock(&session->s_cap_lock);
+		msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+		dout("send_cap_releases mds%d %p\n", session->s_mds, msg);
+		ceph_con_send(&session->s_con, msg);
+		spin_lock(&session->s_cap_lock);
+	}
+	spin_unlock(&session->s_cap_lock);
+}
+
+static void discard_cap_releases(struct ceph_mds_client *mdsc,
+				 struct ceph_mds_session *session)
+{
+	struct ceph_msg *msg;
+	struct ceph_mds_cap_release *head;
+	unsigned num;
+
+	dout("discard_cap_releases mds%d\n", session->s_mds);
+
+	if (!list_empty(&session->s_cap_releases)) {
+		/* zero out the in-progress message */
+		msg = list_first_entry(&session->s_cap_releases,
+					struct ceph_msg, list_head);
+		head = msg->front.iov_base;
+		num = le32_to_cpu(head->num);
+		dout("discard_cap_releases mds%d %p %u\n",
+		     session->s_mds, msg, num);
+		head->num = cpu_to_le32(0);
+		msg->front.iov_len = sizeof(*head);
+		session->s_num_cap_releases += num;
+	}
+
+	/* requeue completed messages */
+	while (!list_empty(&session->s_cap_releases_done)) {
+		msg = list_first_entry(&session->s_cap_releases_done,
+				 struct ceph_msg, list_head);
+		list_del_init(&msg->list_head);
+
+		head = msg->front.iov_base;
+		num = le32_to_cpu(head->num);
+		dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg,
+		     num);
+		session->s_num_cap_releases += num;
+		head->num = cpu_to_le32(0);
+		msg->front.iov_len = sizeof(*head);
+		list_add(&msg->list_head, &session->s_cap_releases);
+	}
+}
+
+/*
+ * requests
+ */
+
+int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
+				    struct inode *dir)
+{
+	struct ceph_inode_info *ci = ceph_inode(dir);
+	struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+	struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options;
+	size_t size = sizeof(*rinfo->dir_in) + sizeof(*rinfo->dir_dname_len) +
+		      sizeof(*rinfo->dir_dname) + sizeof(*rinfo->dir_dlease);
+	int order, num_entries;
+
+	spin_lock(&ci->i_ceph_lock);
+	num_entries = ci->i_files + ci->i_subdirs;
+	spin_unlock(&ci->i_ceph_lock);
+	num_entries = max(num_entries, 1);
+	num_entries = min(num_entries, opt->max_readdir);
+
+	order = get_order(size * num_entries);
+	while (order >= 0) {
+		rinfo->dir_in = (void*)__get_free_pages(GFP_NOFS | __GFP_NOWARN,
+							order);
+		if (rinfo->dir_in)
+			break;
+		order--;
+	}
+	if (!rinfo->dir_in)
+		return -ENOMEM;
+
+	num_entries = (PAGE_SIZE << order) / size;
+	num_entries = min(num_entries, opt->max_readdir);
+
+	rinfo->dir_buf_size = PAGE_SIZE << order;
+	req->r_num_caps = num_entries + 1;
+	req->r_args.readdir.max_entries = cpu_to_le32(num_entries);
+	req->r_args.readdir.max_bytes = cpu_to_le32(opt->max_readdir_bytes);
+	return 0;
+}
+
+/*
+ * Create an mds request.
+ */
+struct ceph_mds_request *
+ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
+{
+	struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS);
+
+	if (!req)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_init(&req->r_fill_mutex);
+	req->r_mdsc = mdsc;
+	req->r_started = jiffies;
+	req->r_resend_mds = -1;
+	INIT_LIST_HEAD(&req->r_unsafe_dir_item);
+	req->r_fmode = -1;
+	kref_init(&req->r_kref);
+	INIT_LIST_HEAD(&req->r_wait);
+	init_completion(&req->r_completion);
+	init_completion(&req->r_safe_completion);
+	INIT_LIST_HEAD(&req->r_unsafe_item);
+
+	req->r_op = op;
+	req->r_direct_mode = mode;
+	return req;
+}
+
+/*
+ * return oldest (lowest) request, tid in request tree, 0 if none.
+ *
+ * called under mdsc->mutex.
+ */
+static struct ceph_mds_request *__get_oldest_req(struct ceph_mds_client *mdsc)
+{
+	if (RB_EMPTY_ROOT(&mdsc->request_tree))
+		return NULL;
+	return rb_entry(rb_first(&mdsc->request_tree),
+			struct ceph_mds_request, r_node);
+}
+
+static u64 __get_oldest_tid(struct ceph_mds_client *mdsc)
+{
+	struct ceph_mds_request *req = __get_oldest_req(mdsc);
+
+	if (req)
+		return req->r_tid;
+	return 0;
+}
+
+/*
+ * Build a dentry's path.  Allocate on heap; caller must kfree.  Based
+ * on build_path_from_dentry in fs/cifs/dir.c.
+ *
+ * If @stop_on_nosnap, generate path relative to the first non-snapped
+ * inode.
+ *
+ * Encode hidden .snap dirs as a double /, i.e.
+ *   foo/.snap/bar -> foo//bar
+ */
+char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
+			   int stop_on_nosnap)
+{
+	struct dentry *temp;
+	char *path;
+	int len, pos;
+	unsigned seq;
+
+	if (dentry == NULL)
+		return ERR_PTR(-EINVAL);
+
+retry:
+	len = 0;
+	seq = read_seqbegin(&rename_lock);
+	rcu_read_lock();
+	for (temp = dentry; !IS_ROOT(temp);) {
+		struct inode *inode = temp->d_inode;
+		if (inode && ceph_snap(inode) == CEPH_SNAPDIR)
+			len++;  /* slash only */
+		else if (stop_on_nosnap && inode &&
+			 ceph_snap(inode) == CEPH_NOSNAP)
+			break;
+		else
+			len += 1 + temp->d_name.len;
+		temp = temp->d_parent;
+	}
+	rcu_read_unlock();
+	if (len)
+		len--;  /* no leading '/' */
+
+	path = kmalloc(len+1, GFP_NOFS);
+	if (path == NULL)
+		return ERR_PTR(-ENOMEM);
+	pos = len;
+	path[pos] = 0;	/* trailing null */
+	rcu_read_lock();
+	for (temp = dentry; !IS_ROOT(temp) && pos != 0; ) {
+		struct inode *inode;
+
+		spin_lock(&temp->d_lock);
+		inode = temp->d_inode;
+		if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
+			dout("build_path path+%d: %p SNAPDIR\n",
+			     pos, temp);
+		} else if (stop_on_nosnap && inode &&
+			   ceph_snap(inode) == CEPH_NOSNAP) {
+			spin_unlock(&temp->d_lock);
+			break;
+		} else {
+			pos -= temp->d_name.len;
+			if (pos < 0) {
+				spin_unlock(&temp->d_lock);
+				break;
+			}
+			strncpy(path + pos, temp->d_name.name,
+				temp->d_name.len);
+		}
+		spin_unlock(&temp->d_lock);
+		if (pos)
+			path[--pos] = '/';
+		temp = temp->d_parent;
+	}
+	rcu_read_unlock();
+	if (pos != 0 || read_seqretry(&rename_lock, seq)) {
+		pr_err("build_path did not end path lookup where "
+		       "expected, namelen is %d, pos is %d\n", len, pos);
+		/* presumably this is only possible if racing with a
+		   rename of one of the parent directories (we can not
+		   lock the dentries above us to prevent this, but
+		   retrying should be harmless) */
+		kfree(path);
+		goto retry;
+	}
+
+	*base = ceph_ino(temp->d_inode);
+	*plen = len;
+	dout("build_path on %p %d built %llx '%.*s'\n",
+	     dentry, d_count(dentry), *base, len, path);
+	return path;
+}
+
+static int build_dentry_path(struct dentry *dentry,
+			     const char **ppath, int *ppathlen, u64 *pino,
+			     int *pfreepath)
+{
+	char *path;
+
+	if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) {
+		*pino = ceph_ino(dentry->d_parent->d_inode);
+		*ppath = dentry->d_name.name;
+		*ppathlen = dentry->d_name.len;
+		return 0;
+	}
+	path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
+	if (IS_ERR(path))
+		return PTR_ERR(path);
+	*ppath = path;
+	*pfreepath = 1;
+	return 0;
+}
+
+static int build_inode_path(struct inode *inode,
+			    const char **ppath, int *ppathlen, u64 *pino,
+			    int *pfreepath)
+{
+	struct dentry *dentry;
+	char *path;
+
+	if (ceph_snap(inode) == CEPH_NOSNAP) {
+		*pino = ceph_ino(inode);
+		*ppathlen = 0;
+		return 0;
+	}
+	dentry = d_find_alias(inode);
+	path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
+	dput(dentry);
+	if (IS_ERR(path))
+		return PTR_ERR(path);
+	*ppath = path;
+	*pfreepath = 1;
+	return 0;
+}
+
+/*
+ * request arguments may be specified via an inode *, a dentry *, or
+ * an explicit ino+path.
+ */
+static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
+				  const char *rpath, u64 rino,
+				  const char **ppath, int *pathlen,
+				  u64 *ino, int *freepath)
+{
+	int r = 0;
+
+	if (rinode) {
+		r = build_inode_path(rinode, ppath, pathlen, ino, freepath);
+		dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
+		     ceph_snap(rinode));
+	} else if (rdentry) {
+		r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath);
+		dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
+		     *ppath);
+	} else if (rpath || rino) {
+		*ino = rino;
+		*ppath = rpath;
+		*pathlen = rpath ? strlen(rpath) : 0;
+		dout(" path %.*s\n", *pathlen, rpath);
+	}
+
+	return r;
+}
+
+/*
+ * called under mdsc->mutex
+ */
+static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
+					       struct ceph_mds_request *req,
+					       int mds)
+{
+	struct ceph_msg *msg;
+	struct ceph_mds_request_head *head;
+	const char *path1 = NULL;
+	const char *path2 = NULL;
+	u64 ino1 = 0, ino2 = 0;
+	int pathlen1 = 0, pathlen2 = 0;
+	int freepath1 = 0, freepath2 = 0;
+	int len;
+	u16 releases;
+	void *p, *end;
+	int ret;
+
+	ret = set_request_path_attr(req->r_inode, req->r_dentry,
+			      req->r_path1, req->r_ino1.ino,
+			      &path1, &pathlen1, &ino1, &freepath1);
+	if (ret < 0) {
+		msg = ERR_PTR(ret);
+		goto out;
+	}
+
+	ret = set_request_path_attr(NULL, req->r_old_dentry,
+			      req->r_path2, req->r_ino2.ino,
+			      &path2, &pathlen2, &ino2, &freepath2);
+	if (ret < 0) {
+		msg = ERR_PTR(ret);
+		goto out_free1;
+	}
+
+	len = sizeof(*head) +
+		pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64));
+
+	/* calculate (max) length for cap releases */
+	len += sizeof(struct ceph_mds_request_release) *
+		(!!req->r_inode_drop + !!req->r_dentry_drop +
+		 !!req->r_old_inode_drop + !!req->r_old_dentry_drop);
+	if (req->r_dentry_drop)
+		len += req->r_dentry->d_name.len;
+	if (req->r_old_dentry_drop)
+		len += req->r_old_dentry->d_name.len;
+
+	msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, GFP_NOFS, false);
+	if (!msg) {
+		msg = ERR_PTR(-ENOMEM);
+		goto out_free2;
+	}
+
+	msg->hdr.tid = cpu_to_le64(req->r_tid);
+
+	head = msg->front.iov_base;
+	p = msg->front.iov_base + sizeof(*head);
+	end = msg->front.iov_base + msg->front.iov_len;
+
+	head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
+	head->op = cpu_to_le32(req->r_op);
+	head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, req->r_uid));
+	head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, req->r_gid));
+	head->args = req->r_args;
+
+	ceph_encode_filepath(&p, end, ino1, path1);
+	ceph_encode_filepath(&p, end, ino2, path2);
+
+	/* make note of release offset, in case we need to replay */
+	req->r_request_release_offset = p - msg->front.iov_base;
+
+	/* cap releases */
+	releases = 0;
+	if (req->r_inode_drop)
+		releases += ceph_encode_inode_release(&p,
+		      req->r_inode ? req->r_inode : req->r_dentry->d_inode,
+		      mds, req->r_inode_drop, req->r_inode_unless, 0);
+	if (req->r_dentry_drop)
+		releases += ceph_encode_dentry_release(&p, req->r_dentry,
+		       mds, req->r_dentry_drop, req->r_dentry_unless);
+	if (req->r_old_dentry_drop)
+		releases += ceph_encode_dentry_release(&p, req->r_old_dentry,
+		       mds, req->r_old_dentry_drop, req->r_old_dentry_unless);
+	if (req->r_old_inode_drop)
+		releases += ceph_encode_inode_release(&p,
+		      req->r_old_dentry->d_inode,
+		      mds, req->r_old_inode_drop, req->r_old_inode_unless, 0);
+	head->num_releases = cpu_to_le16(releases);
+
+	BUG_ON(p > end);
+	msg->front.iov_len = p - msg->front.iov_base;
+	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+
+	if (req->r_data_len) {
+		/* outbound data set only by ceph_sync_setxattr() */
+		BUG_ON(!req->r_pages);
+		ceph_msg_data_add_pages(msg, req->r_pages, req->r_data_len, 0);
+	}
+
+	msg->hdr.data_len = cpu_to_le32(req->r_data_len);
+	msg->hdr.data_off = cpu_to_le16(0);
+
+out_free2:
+	if (freepath2)
+		kfree((char *)path2);
+out_free1:
+	if (freepath1)
+		kfree((char *)path1);
+out:
+	return msg;
+}
+
+/*
+ * called under mdsc->mutex if error, under no mutex if
+ * success.
+ */
+static void complete_request(struct ceph_mds_client *mdsc,
+			     struct ceph_mds_request *req)
+{
+	if (req->r_callback)
+		req->r_callback(mdsc, req);
+	else
+		complete_all(&req->r_completion);
+}
+
+/*
+ * called under mdsc->mutex
+ */
+static int __prepare_send_request(struct ceph_mds_client *mdsc,
+				  struct ceph_mds_request *req,
+				  int mds)
+{
+	struct ceph_mds_request_head *rhead;
+	struct ceph_msg *msg;
+	int flags = 0;
+
+	req->r_attempts++;
+	if (req->r_inode) {
+		struct ceph_cap *cap =
+			ceph_get_cap_for_mds(ceph_inode(req->r_inode), mds);
+
+		if (cap)
+			req->r_sent_on_mseq = cap->mseq;
+		else
+			req->r_sent_on_mseq = -1;
+	}
+	dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req,
+	     req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
+
+	if (req->r_got_unsafe) {
+		/*
+		 * Replay.  Do not regenerate message (and rebuild
+		 * paths, etc.); just use the original message.
+		 * Rebuilding paths will break for renames because
+		 * d_move mangles the src name.
+		 */
+		msg = req->r_request;
+		rhead = msg->front.iov_base;
+
+		flags = le32_to_cpu(rhead->flags);
+		flags |= CEPH_MDS_FLAG_REPLAY;
+		rhead->flags = cpu_to_le32(flags);
+
+		if (req->r_target_inode)
+			rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
+
+		rhead->num_retry = req->r_attempts - 1;
+
+		/* remove cap/dentry releases from message */
+		rhead->num_releases = 0;
+		msg->hdr.front_len = cpu_to_le32(req->r_request_release_offset);
+		msg->front.iov_len = req->r_request_release_offset;
+		return 0;
+	}
+
+	if (req->r_request) {
+		ceph_msg_put(req->r_request);
+		req->r_request = NULL;
+	}
+	msg = create_request_message(mdsc, req, mds);
+	if (IS_ERR(msg)) {
+		req->r_err = PTR_ERR(msg);
+		complete_request(mdsc, req);
+		return PTR_ERR(msg);
+	}
+	req->r_request = msg;
+
+	rhead = msg->front.iov_base;
+	rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
+	if (req->r_got_unsafe)
+		flags |= CEPH_MDS_FLAG_REPLAY;
+	if (req->r_locked_dir)
+		flags |= CEPH_MDS_FLAG_WANT_DENTRY;
+	rhead->flags = cpu_to_le32(flags);
+	rhead->num_fwd = req->r_num_fwd;
+	rhead->num_retry = req->r_attempts - 1;
+	rhead->ino = 0;
+
+	dout(" r_locked_dir = %p\n", req->r_locked_dir);
+	return 0;
+}
+
+/*
+ * send request, or put it on the appropriate wait list.
+ */
+static int __do_request(struct ceph_mds_client *mdsc,
+			struct ceph_mds_request *req)
+{
+	struct ceph_mds_session *session = NULL;
+	int mds = -1;
+	int err = -EAGAIN;
+
+	if (req->r_err || req->r_got_result) {
+		if (req->r_aborted)
+			__unregister_request(mdsc, req);
+		goto out;
+	}
+
+	if (req->r_timeout &&
+	    time_after_eq(jiffies, req->r_started + req->r_timeout)) {
+		dout("do_request timed out\n");
+		err = -EIO;
+		goto finish;
+	}
+
+	put_request_session(req);
+
+	mds = __choose_mds(mdsc, req);
+	if (mds < 0 ||
+	    ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) {
+		dout("do_request no mds or not active, waiting for map\n");
+		list_add(&req->r_wait, &mdsc->waiting_for_map);
+		goto out;
+	}
+
+	/* get, open session */
+	session = __ceph_lookup_mds_session(mdsc, mds);
+	if (!session) {
+		session = register_session(mdsc, mds);
+		if (IS_ERR(session)) {
+			err = PTR_ERR(session);
+			goto finish;
+		}
+	}
+	req->r_session = get_session(session);
+
+	dout("do_request mds%d session %p state %s\n", mds, session,
+	     session_state_name(session->s_state));
+	if (session->s_state != CEPH_MDS_SESSION_OPEN &&
+	    session->s_state != CEPH_MDS_SESSION_HUNG) {
+		if (session->s_state == CEPH_MDS_SESSION_NEW ||
+		    session->s_state == CEPH_MDS_SESSION_CLOSING)
+			__open_session(mdsc, session);
+		list_add(&req->r_wait, &session->s_waiting);
+		goto out_session;
+	}
+
+	/* send request */
+	req->r_resend_mds = -1;   /* forget any previous mds hint */
+
+	if (req->r_request_started == 0)   /* note request start time */
+		req->r_request_started = jiffies;
+
+	err = __prepare_send_request(mdsc, req, mds);
+	if (!err) {
+		ceph_msg_get(req->r_request);
+		ceph_con_send(&session->s_con, req->r_request);
+	}
+
+out_session:
+	ceph_put_mds_session(session);
+out:
+	return err;
+
+finish:
+	req->r_err = err;
+	complete_request(mdsc, req);
+	goto out;
+}
+
+/*
+ * called under mdsc->mutex
+ */
+static void __wake_requests(struct ceph_mds_client *mdsc,
+			    struct list_head *head)
+{
+	struct ceph_mds_request *req;
+	LIST_HEAD(tmp_list);
+
+	list_splice_init(head, &tmp_list);
+
+	while (!list_empty(&tmp_list)) {
+		req = list_entry(tmp_list.next,
+				 struct ceph_mds_request, r_wait);
+		list_del_init(&req->r_wait);
+		dout(" wake request %p tid %llu\n", req, req->r_tid);
+		__do_request(mdsc, req);
+	}
+}
+
+/*
+ * Wake up threads with requests pending for @mds, so that they can
+ * resubmit their requests to a possibly different mds.
+ */
+static void kick_requests(struct ceph_mds_client *mdsc, int mds)
+{
+	struct ceph_mds_request *req;
+	struct rb_node *p;
+
+	dout("kick_requests mds%d\n", mds);
+	for (p = rb_first(&mdsc->request_tree); p; p = rb_next(p)) {
+		req = rb_entry(p, struct ceph_mds_request, r_node);
+		if (req->r_got_unsafe)
+			continue;
+		if (req->r_session &&
+		    req->r_session->s_mds == mds) {
+			dout(" kicking tid %llu\n", req->r_tid);
+			__do_request(mdsc, req);
+		}
+	}
+}
+
+void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
+			      struct ceph_mds_request *req)
+{
+	dout("submit_request on %p\n", req);
+	mutex_lock(&mdsc->mutex);
+	__register_request(mdsc, req, NULL);
+	__do_request(mdsc, req);
+	mutex_unlock(&mdsc->mutex);
+}
+
+/*
+ * Synchrously perform an mds request.  Take care of all of the
+ * session setup, forwarding, retry details.
+ */
+int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
+			 struct inode *dir,
+			 struct ceph_mds_request *req)
+{
+	int err;
+
+	dout("do_request on %p\n", req);
+
+	/* take CAP_PIN refs for r_inode, r_locked_dir, r_old_dentry */
+	if (req->r_inode)
+		ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
+	if (req->r_locked_dir)
+		ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
+	if (req->r_old_dentry_dir)
+		ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
+				  CEPH_CAP_PIN);
+
+	/* issue */
+	mutex_lock(&mdsc->mutex);
+	__register_request(mdsc, req, dir);
+	__do_request(mdsc, req);
+
+	if (req->r_err) {
+		err = req->r_err;
+		__unregister_request(mdsc, req);
+		dout("do_request early error %d\n", err);
+		goto out;
+	}
+
+	/* wait */
+	mutex_unlock(&mdsc->mutex);
+	dout("do_request waiting\n");
+	if (req->r_timeout) {
+		err = (long)wait_for_completion_killable_timeout(
+			&req->r_completion, req->r_timeout);
+		if (err == 0)
+			err = -EIO;
+	} else {
+		err = wait_for_completion_killable(&req->r_completion);
+	}
+	dout("do_request waited, got %d\n", err);
+	mutex_lock(&mdsc->mutex);
+
+	/* only abort if we didn't race with a real reply */
+	if (req->r_got_result) {
+		err = le32_to_cpu(req->r_reply_info.head->result);
+	} else if (err < 0) {
+		dout("aborted request %lld with %d\n", req->r_tid, err);
+
+		/*
+		 * ensure we aren't running concurrently with
+		 * ceph_fill_trace or ceph_readdir_prepopulate, which
+		 * rely on locks (dir mutex) held by our caller.
+		 */
+		mutex_lock(&req->r_fill_mutex);
+		req->r_err = err;
+		req->r_aborted = true;
+		mutex_unlock(&req->r_fill_mutex);
+
+		if (req->r_locked_dir &&
+		    (req->r_op & CEPH_MDS_OP_WRITE))
+			ceph_invalidate_dir_request(req);
+	} else {
+		err = req->r_err;
+	}
+
+out:
+	mutex_unlock(&mdsc->mutex);
+	dout("do_request %p done, result %d\n", req, err);
+	return err;
+}
+
+/*
+ * Invalidate dir's completeness, dentry lease state on an aborted MDS
+ * namespace request.
+ */
+void ceph_invalidate_dir_request(struct ceph_mds_request *req)
+{
+	struct inode *inode = req->r_locked_dir;
+
+	dout("invalidate_dir_request %p (complete, lease(s))\n", inode);
+
+	ceph_dir_clear_complete(inode);
+	if (req->r_dentry)
+		ceph_invalidate_dentry_lease(req->r_dentry);
+	if (req->r_old_dentry)
+		ceph_invalidate_dentry_lease(req->r_old_dentry);
+}
+
+/*
+ * Handle mds reply.
+ *
+ * We take the session mutex and parse and process the reply immediately.
+ * This preserves the logical ordering of replies, capabilities, etc., sent
+ * by the MDS as they are applied to our local cache.
+ */
+static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
+{
+	struct ceph_mds_client *mdsc = session->s_mdsc;
+	struct ceph_mds_request *req;
+	struct ceph_mds_reply_head *head = msg->front.iov_base;
+	struct ceph_mds_reply_info_parsed *rinfo;  /* parsed reply info */
+	u64 tid;
+	int err, result;
+	int mds = session->s_mds;
+
+	if (msg->front.iov_len < sizeof(*head)) {
+		pr_err("mdsc_handle_reply got corrupt (short) reply\n");
+		ceph_msg_dump(msg);
+		return;
+	}
+
+	/* get request, session */
+	tid = le64_to_cpu(msg->hdr.tid);
+	mutex_lock(&mdsc->mutex);
+	req = __lookup_request(mdsc, tid);
+	if (!req) {
+		dout("handle_reply on unknown tid %llu\n", tid);
+		mutex_unlock(&mdsc->mutex);
+		return;
+	}
+	dout("handle_reply %p\n", req);
+
+	/* correct session? */
+	if (req->r_session != session) {
+		pr_err("mdsc_handle_reply got %llu on session mds%d"
+		       " not mds%d\n", tid, session->s_mds,
+		       req->r_session ? req->r_session->s_mds : -1);
+		mutex_unlock(&mdsc->mutex);
+		goto out;
+	}
+
+	/* dup? */
+	if ((req->r_got_unsafe && !head->safe) ||
+	    (req->r_got_safe && head->safe)) {
+		pr_warning("got a dup %s reply on %llu from mds%d\n",
+			   head->safe ? "safe" : "unsafe", tid, mds);
+		mutex_unlock(&mdsc->mutex);
+		goto out;
+	}
+	if (req->r_got_safe && !head->safe) {
+		pr_warning("got unsafe after safe on %llu from mds%d\n",
+			   tid, mds);
+		mutex_unlock(&mdsc->mutex);
+		goto out;
+	}
+
+	result = le32_to_cpu(head->result);
+
+	/*
+	 * Handle an ESTALE
+	 * if we're not talking to the authority, send to them
+	 * if the authority has changed while we weren't looking,
+	 * send to new authority
+	 * Otherwise we just have to return an ESTALE
+	 */
+	if (result == -ESTALE) {
+		dout("got ESTALE on request %llu", req->r_tid);
+		if (req->r_direct_mode != USE_AUTH_MDS) {
+			dout("not using auth, setting for that now");
+			req->r_direct_mode = USE_AUTH_MDS;
+			__do_request(mdsc, req);
+			mutex_unlock(&mdsc->mutex);
+			goto out;
+		} else  {
+			int mds = __choose_mds(mdsc, req);
+			if (mds >= 0 && mds != req->r_session->s_mds) {
+				dout("but auth changed, so resending");
+				__do_request(mdsc, req);
+				mutex_unlock(&mdsc->mutex);
+				goto out;
+			}
+		}
+		dout("have to return ESTALE on request %llu", req->r_tid);
+	}
+
+
+	if (head->safe) {
+		req->r_got_safe = true;
+		__unregister_request(mdsc, req);
+
+		if (req->r_got_unsafe) {
+			/*
+			 * We already handled the unsafe response, now do the
+			 * cleanup.  No need to examine the response; the MDS
+			 * doesn't include any result info in the safe
+			 * response.  And even if it did, there is nothing
+			 * useful we could do with a revised return value.
+			 */
+			dout("got safe reply %llu, mds%d\n", tid, mds);
+			list_del_init(&req->r_unsafe_item);
+
+			/* last unsafe request during umount? */
+			if (mdsc->stopping && !__get_oldest_req(mdsc))
+				complete_all(&mdsc->safe_umount_waiters);
+			mutex_unlock(&mdsc->mutex);
+			goto out;
+		}
+	} else {
+		req->r_got_unsafe = true;
+		list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe);
+	}
+
+	dout("handle_reply tid %lld result %d\n", tid, result);
+	rinfo = &req->r_reply_info;
+	err = parse_reply_info(msg, rinfo, session->s_con.peer_features);
+	mutex_unlock(&mdsc->mutex);
+
+	mutex_lock(&session->s_mutex);
+	if (err < 0) {
+		pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
+		ceph_msg_dump(msg);
+		goto out_err;
+	}
+
+	/* snap trace */
+	if (rinfo->snapblob_len) {
+		down_write(&mdsc->snap_rwsem);
+		ceph_update_snap_trace(mdsc, rinfo->snapblob,
+			       rinfo->snapblob + rinfo->snapblob_len,
+			       le32_to_cpu(head->op) == CEPH_MDS_OP_RMSNAP);
+		downgrade_write(&mdsc->snap_rwsem);
+	} else {
+		down_read(&mdsc->snap_rwsem);
+	}
+
+	/* insert trace into our cache */
+	mutex_lock(&req->r_fill_mutex);
+	err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
+	if (err == 0) {
+		if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
+				    req->r_op == CEPH_MDS_OP_LSSNAP))
+			ceph_readdir_prepopulate(req, req->r_session);
+		ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
+	}
+	mutex_unlock(&req->r_fill_mutex);
+
+	up_read(&mdsc->snap_rwsem);
+out_err:
+	mutex_lock(&mdsc->mutex);
+	if (!req->r_aborted) {
+		if (err) {
+			req->r_err = err;
+		} else {
+			req->r_reply = msg;
+			ceph_msg_get(msg);
+			req->r_got_result = true;
+		}
+	} else {
+		dout("reply arrived after request %lld was aborted\n", tid);
+	}
+	mutex_unlock(&mdsc->mutex);
+
+	ceph_add_cap_releases(mdsc, req->r_session);
+	mutex_unlock(&session->s_mutex);
+
+	/* kick calling process */
+	complete_request(mdsc, req);
+out:
+	ceph_mdsc_put_request(req);
+	return;
+}
+
+
+
+/*
+ * handle mds notification that our request has been forwarded.
+ */
+static void handle_forward(struct ceph_mds_client *mdsc,
+			   struct ceph_mds_session *session,
+			   struct ceph_msg *msg)
+{
+	struct ceph_mds_request *req;
+	u64 tid = le64_to_cpu(msg->hdr.tid);
+	u32 next_mds;
+	u32 fwd_seq;
+	int err = -EINVAL;
+	void *p = msg->front.iov_base;
+	void *end = p + msg->front.iov_len;
+
+	ceph_decode_need(&p, end, 2*sizeof(u32), bad);
+	next_mds = ceph_decode_32(&p);
+	fwd_seq = ceph_decode_32(&p);
+
+	mutex_lock(&mdsc->mutex);
+	req = __lookup_request(mdsc, tid);
+	if (!req) {
+		dout("forward tid %llu to mds%d - req dne\n", tid, next_mds);
+		goto out;  /* dup reply? */
+	}
+
+	if (req->r_aborted) {
+		dout("forward tid %llu aborted, unregistering\n", tid);
+		__unregister_request(mdsc, req);
+	} else if (fwd_seq <= req->r_num_fwd) {
+		dout("forward tid %llu to mds%d - old seq %d <= %d\n",
+		     tid, next_mds, req->r_num_fwd, fwd_seq);
+	} else {
+		/* resend. forward race not possible; mds would drop */
+		dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
+		BUG_ON(req->r_err);
+		BUG_ON(req->r_got_result);
+		req->r_num_fwd = fwd_seq;
+		req->r_resend_mds = next_mds;
+		put_request_session(req);
+		__do_request(mdsc, req);
+	}
+	ceph_mdsc_put_request(req);
+out:
+	mutex_unlock(&mdsc->mutex);
+	return;
+
+bad:
+	pr_err("mdsc_handle_forward decode error err=%d\n", err);
+}
+
+/*
+ * handle a mds session control message
+ */
+static void handle_session(struct ceph_mds_session *session,
+			   struct ceph_msg *msg)
+{
+	struct ceph_mds_client *mdsc = session->s_mdsc;
+	u32 op;
+	u64 seq;
+	int mds = session->s_mds;
+	struct ceph_mds_session_head *h = msg->front.iov_base;
+	int wake = 0;
+
+	/* decode */
+	if (msg->front.iov_len != sizeof(*h))
+		goto bad;
+	op = le32_to_cpu(h->op);
+	seq = le64_to_cpu(h->seq);
+
+	mutex_lock(&mdsc->mutex);
+	if (op == CEPH_SESSION_CLOSE)
+		__unregister_session(mdsc, session);
+	/* FIXME: this ttl calculation is generous */
+	session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose;
+	mutex_unlock(&mdsc->mutex);
+
+	mutex_lock(&session->s_mutex);
+
+	dout("handle_session mds%d %s %p state %s seq %llu\n",
+	     mds, ceph_session_op_name(op), session,
+	     session_state_name(session->s_state), seq);
+
+	if (session->s_state == CEPH_MDS_SESSION_HUNG) {
+		session->s_state = CEPH_MDS_SESSION_OPEN;
+		pr_info("mds%d came back\n", session->s_mds);
+	}
+
+	switch (op) {
+	case CEPH_SESSION_OPEN:
+		if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
+			pr_info("mds%d reconnect success\n", session->s_mds);
+		session->s_state = CEPH_MDS_SESSION_OPEN;
+		renewed_caps(mdsc, session, 0);
+		wake = 1;
+		if (mdsc->stopping)
+			__close_session(mdsc, session);
+		break;
+
+	case CEPH_SESSION_RENEWCAPS:
+		if (session->s_renew_seq == seq)
+			renewed_caps(mdsc, session, 1);
+		break;
+
+	case CEPH_SESSION_CLOSE:
+		if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
+			pr_info("mds%d reconnect denied\n", session->s_mds);
+		remove_session_caps(session);
+		wake = 1; /* for good measure */
+		wake_up_all(&mdsc->session_close_wq);
+		kick_requests(mdsc, mds);
+		break;
+
+	case CEPH_SESSION_STALE:
+		pr_info("mds%d caps went stale, renewing\n",
+			session->s_mds);
+		spin_lock(&session->s_gen_ttl_lock);
+		session->s_cap_gen++;
+		session->s_cap_ttl = jiffies - 1;
+		spin_unlock(&session->s_gen_ttl_lock);
+		send_renew_caps(mdsc, session);
+		break;
+
+	case CEPH_SESSION_RECALL_STATE:
+		trim_caps(mdsc, session, le32_to_cpu(h->max_caps));
+		break;
+
+	case CEPH_SESSION_FLUSHMSG:
+		send_flushmsg_ack(mdsc, session, seq);
+		break;
+
+	default:
+		pr_err("mdsc_handle_session bad op %d mds%d\n", op, mds);
+		WARN_ON(1);
+	}
+
+	mutex_unlock(&session->s_mutex);
+	if (wake) {
+		mutex_lock(&mdsc->mutex);
+		__wake_requests(mdsc, &session->s_waiting);
+		mutex_unlock(&mdsc->mutex);
+	}
+	return;
+
+bad:
+	pr_err("mdsc_handle_session corrupt message mds%d len %d\n", mds,
+	       (int)msg->front.iov_len);
+	ceph_msg_dump(msg);
+	return;
+}
+
+
+/*
+ * called under session->mutex.
+ */
+static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
+				   struct ceph_mds_session *session)
+{
+	struct ceph_mds_request *req, *nreq;
+	int err;
+
+	dout("replay_unsafe_requests mds%d\n", session->s_mds);
+
+	mutex_lock(&mdsc->mutex);
+	list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) {
+		err = __prepare_send_request(mdsc, req, session->s_mds);
+		if (!err) {
+			ceph_msg_get(req->r_request);
+			ceph_con_send(&session->s_con, req->r_request);
+		}
+	}
+	mutex_unlock(&mdsc->mutex);
+}
+
+/*
+ * Encode information about a cap for a reconnect with the MDS.
+ */
+static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
+			  void *arg)
+{
+	union {
+		struct ceph_mds_cap_reconnect v2;
+		struct ceph_mds_cap_reconnect_v1 v1;
+	} rec;
+	size_t reclen;
+	struct ceph_inode_info *ci;
+	struct ceph_reconnect_state *recon_state = arg;
+	struct ceph_pagelist *pagelist = recon_state->pagelist;
+	char *path;
+	int pathlen, err;
+	u64 pathbase;
+	struct dentry *dentry;
+
+	ci = cap->ci;
+
+	dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
+	     inode, ceph_vinop(inode), cap, cap->cap_id,
+	     ceph_cap_string(cap->issued));
+	err = ceph_pagelist_encode_64(pagelist, ceph_ino(inode));
+	if (err)
+		return err;
+
+	dentry = d_find_alias(inode);
+	if (dentry) {
+		path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0);
+		if (IS_ERR(path)) {
+			err = PTR_ERR(path);
+			goto out_dput;
+		}
+	} else {
+		path = NULL;
+		pathlen = 0;
+	}
+	err = ceph_pagelist_encode_string(pagelist, path, pathlen);
+	if (err)
+		goto out_free;
+
+	spin_lock(&ci->i_ceph_lock);
+	cap->seq = 0;        /* reset cap seq */
+	cap->issue_seq = 0;  /* and issue_seq */
+	cap->mseq = 0;       /* and migrate_seq */
+	cap->cap_gen = cap->session->s_cap_gen;
+
+	if (recon_state->flock) {
+		rec.v2.cap_id = cpu_to_le64(cap->cap_id);
+		rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
+		rec.v2.issued = cpu_to_le32(cap->issued);
+		rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
+		rec.v2.pathbase = cpu_to_le64(pathbase);
+		rec.v2.flock_len = 0;
+		reclen = sizeof(rec.v2);
+	} else {
+		rec.v1.cap_id = cpu_to_le64(cap->cap_id);
+		rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
+		rec.v1.issued = cpu_to_le32(cap->issued);
+		rec.v1.size = cpu_to_le64(inode->i_size);
+		ceph_encode_timespec(&rec.v1.mtime, &inode->i_mtime);
+		ceph_encode_timespec(&rec.v1.atime, &inode->i_atime);
+		rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
+		rec.v1.pathbase = cpu_to_le64(pathbase);
+		reclen = sizeof(rec.v1);
+	}
+	spin_unlock(&ci->i_ceph_lock);
+
+	if (recon_state->flock) {
+		int num_fcntl_locks, num_flock_locks;
+		struct ceph_filelock *flocks;
+
+encode_again:
+		spin_lock(&inode->i_lock);
+		ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
+		spin_unlock(&inode->i_lock);
+		flocks = kmalloc((num_fcntl_locks+num_flock_locks) *
+				 sizeof(struct ceph_filelock), GFP_NOFS);
+		if (!flocks) {
+			err = -ENOMEM;
+			goto out_free;
+		}
+		spin_lock(&inode->i_lock);
+		err = ceph_encode_locks_to_buffer(inode, flocks,
+						  num_fcntl_locks,
+						  num_flock_locks);
+		spin_unlock(&inode->i_lock);
+		if (err) {
+			kfree(flocks);
+			if (err == -ENOSPC)
+				goto encode_again;
+			goto out_free;
+		}
+		/*
+		 * number of encoded locks is stable, so copy to pagelist
+		 */
+		rec.v2.flock_len = cpu_to_le32(2*sizeof(u32) +
+				    (num_fcntl_locks+num_flock_locks) *
+				    sizeof(struct ceph_filelock));
+		err = ceph_pagelist_append(pagelist, &rec, reclen);
+		if (!err)
+			err = ceph_locks_to_pagelist(flocks, pagelist,
+						     num_fcntl_locks,
+						     num_flock_locks);
+		kfree(flocks);
+	} else {
+		err = ceph_pagelist_append(pagelist, &rec, reclen);
+	}
+
+	recon_state->nr_caps++;
+out_free:
+	kfree(path);
+out_dput:
+	dput(dentry);
+	return err;
+}
+
+
+/*
+ * If an MDS fails and recovers, clients need to reconnect in order to
+ * reestablish shared state.  This includes all caps issued through
+ * this session _and_ the snap_realm hierarchy.  Because it's not
+ * clear which snap realms the mds cares about, we send everything we
+ * know about.. that ensures we'll then get any new info the
+ * recovering MDS might have.
+ *
+ * This is a relatively heavyweight operation, but it's rare.
+ *
+ * called with mdsc->mutex held.
+ */
+static void send_mds_reconnect(struct ceph_mds_client *mdsc,
+			       struct ceph_mds_session *session)
+{
+	struct ceph_msg *reply;
+	struct rb_node *p;
+	int mds = session->s_mds;
+	int err = -ENOMEM;
+	int s_nr_caps;
+	struct ceph_pagelist *pagelist;
+	struct ceph_reconnect_state recon_state;
+
+	pr_info("mds%d reconnect start\n", mds);
+
+	pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
+	if (!pagelist)
+		goto fail_nopagelist;
+	ceph_pagelist_init(pagelist);
+
+	reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, GFP_NOFS, false);
+	if (!reply)
+		goto fail_nomsg;
+
+	mutex_lock(&session->s_mutex);
+	session->s_state = CEPH_MDS_SESSION_RECONNECTING;
+	session->s_seq = 0;
+
+	ceph_con_close(&session->s_con);
+	ceph_con_open(&session->s_con,
+		      CEPH_ENTITY_TYPE_MDS, mds,
+		      ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
+
+	/* replay unsafe requests */
+	replay_unsafe_requests(mdsc, session);
+
+	down_read(&mdsc->snap_rwsem);
+
+	dout("session %p state %s\n", session,
+	     session_state_name(session->s_state));
+
+	spin_lock(&session->s_gen_ttl_lock);
+	session->s_cap_gen++;
+	spin_unlock(&session->s_gen_ttl_lock);
+
+	spin_lock(&session->s_cap_lock);
+	/*
+	 * notify __ceph_remove_cap() that we are composing cap reconnect.
+	 * If a cap get released before being added to the cap reconnect,
+	 * __ceph_remove_cap() should skip queuing cap release.
+	 */
+	session->s_cap_reconnect = 1;
+	/* drop old cap expires; we're about to reestablish that state */
+	discard_cap_releases(mdsc, session);
+	spin_unlock(&session->s_cap_lock);
+
+	/* traverse this session's caps */
+	s_nr_caps = session->s_nr_caps;
+	err = ceph_pagelist_encode_32(pagelist, s_nr_caps);
+	if (err)
+		goto fail;
+
+	recon_state.nr_caps = 0;
+	recon_state.pagelist = pagelist;
+	recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK;
+	err = iterate_session_caps(session, encode_caps_cb, &recon_state);
+	if (err < 0)
+		goto fail;
+
+	spin_lock(&session->s_cap_lock);
+	session->s_cap_reconnect = 0;
+	spin_unlock(&session->s_cap_lock);
+
+	/*
+	 * snaprealms.  we provide mds with the ino, seq (version), and
+	 * parent for all of our realms.  If the mds has any newer info,
+	 * it will tell us.
+	 */
+	for (p = rb_first(&mdsc->snap_realms); p; p = rb_next(p)) {
+		struct ceph_snap_realm *realm =
+			rb_entry(p, struct ceph_snap_realm, node);
+		struct ceph_mds_snaprealm_reconnect sr_rec;
+
+		dout(" adding snap realm %llx seq %lld parent %llx\n",
+		     realm->ino, realm->seq, realm->parent_ino);
+		sr_rec.ino = cpu_to_le64(realm->ino);
+		sr_rec.seq = cpu_to_le64(realm->seq);
+		sr_rec.parent = cpu_to_le64(realm->parent_ino);
+		err = ceph_pagelist_append(pagelist, &sr_rec, sizeof(sr_rec));
+		if (err)
+			goto fail;
+	}
+
+	if (recon_state.flock)
+		reply->hdr.version = cpu_to_le16(2);
+
+	/* raced with cap release? */
+	if (s_nr_caps != recon_state.nr_caps) {
+		struct page *page = list_first_entry(&pagelist->head,
+						     struct page, lru);
+		__le32 *addr = kmap_atomic(page);
+		*addr = cpu_to_le32(recon_state.nr_caps);
+		kunmap_atomic(addr);
+	}
+
+	reply->hdr.data_len = cpu_to_le32(pagelist->length);
+	ceph_msg_data_add_pagelist(reply, pagelist);
+	ceph_con_send(&session->s_con, reply);
+
+	mutex_unlock(&session->s_mutex);
+
+	mutex_lock(&mdsc->mutex);
+	__wake_requests(mdsc, &session->s_waiting);
+	mutex_unlock(&mdsc->mutex);
+
+	up_read(&mdsc->snap_rwsem);
+	return;
+
+fail:
+	ceph_msg_put(reply);
+	up_read(&mdsc->snap_rwsem);
+	mutex_unlock(&session->s_mutex);
+fail_nomsg:
+	ceph_pagelist_release(pagelist);
+	kfree(pagelist);
+fail_nopagelist:
+	pr_err("error %d preparing reconnect for mds%d\n", err, mds);
+	return;
+}
+
+
+/*
+ * compare old and new mdsmaps, kicking requests
+ * and closing out old connections as necessary
+ *
+ * called under mdsc->mutex.
+ */
+static void check_new_map(struct ceph_mds_client *mdsc,
+			  struct ceph_mdsmap *newmap,
+			  struct ceph_mdsmap *oldmap)
+{
+	int i;
+	int oldstate, newstate;
+	struct ceph_mds_session *s;
+
+	dout("check_new_map new %u old %u\n",
+	     newmap->m_epoch, oldmap->m_epoch);
+
+	for (i = 0; i < oldmap->m_max_mds && i < mdsc->max_sessions; i++) {
+		if (mdsc->sessions[i] == NULL)
+			continue;
+		s = mdsc->sessions[i];
+		oldstate = ceph_mdsmap_get_state(oldmap, i);
+		newstate = ceph_mdsmap_get_state(newmap, i);
+
+		dout("check_new_map mds%d state %s%s -> %s%s (session %s)\n",
+		     i, ceph_mds_state_name(oldstate),
+		     ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "",
+		     ceph_mds_state_name(newstate),
+		     ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
+		     session_state_name(s->s_state));
+
+		if (i >= newmap->m_max_mds ||
+		    memcmp(ceph_mdsmap_get_addr(oldmap, i),
+			   ceph_mdsmap_get_addr(newmap, i),
+			   sizeof(struct ceph_entity_addr))) {
+			if (s->s_state == CEPH_MDS_SESSION_OPENING) {
+				/* the session never opened, just close it
+				 * out now */
+				__wake_requests(mdsc, &s->s_waiting);
+				__unregister_session(mdsc, s);
+			} else {
+				/* just close it */
+				mutex_unlock(&mdsc->mutex);
+				mutex_lock(&s->s_mutex);
+				mutex_lock(&mdsc->mutex);
+				ceph_con_close(&s->s_con);
+				mutex_unlock(&s->s_mutex);
+				s->s_state = CEPH_MDS_SESSION_RESTARTING;
+			}
+
+			/* kick any requests waiting on the recovering mds */
+			kick_requests(mdsc, i);
+		} else if (oldstate == newstate) {
+			continue;  /* nothing new with this mds */
+		}
+
+		/*
+		 * send reconnect?
+		 */
+		if (s->s_state == CEPH_MDS_SESSION_RESTARTING &&
+		    newstate >= CEPH_MDS_STATE_RECONNECT) {
+			mutex_unlock(&mdsc->mutex);
+			send_mds_reconnect(mdsc, s);
+			mutex_lock(&mdsc->mutex);
+		}
+
+		/*
+		 * kick request on any mds that has gone active.
+		 */
+		if (oldstate < CEPH_MDS_STATE_ACTIVE &&
+		    newstate >= CEPH_MDS_STATE_ACTIVE) {
+			if (oldstate != CEPH_MDS_STATE_CREATING &&
+			    oldstate != CEPH_MDS_STATE_STARTING)
+				pr_info("mds%d recovery completed\n", s->s_mds);
+			kick_requests(mdsc, i);
+			ceph_kick_flushing_caps(mdsc, s);
+			wake_up_session_caps(s, 1);
+		}
+	}
+
+	for (i = 0; i < newmap->m_max_mds && i < mdsc->max_sessions; i++) {
+		s = mdsc->sessions[i];
+		if (!s)
+			continue;
+		if (!ceph_mdsmap_is_laggy(newmap, i))
+			continue;
+		if (s->s_state == CEPH_MDS_SESSION_OPEN ||
+		    s->s_state == CEPH_MDS_SESSION_HUNG ||
+		    s->s_state == CEPH_MDS_SESSION_CLOSING) {
+			dout(" connecting to export targets of laggy mds%d\n",
+			     i);
+			__open_export_target_sessions(mdsc, s);
+		}
+	}
+}
+
+
+
+/*
+ * leases
+ */
+
+/*
+ * caller must hold session s_mutex, dentry->d_lock
+ */
+void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry)
+{
+	struct ceph_dentry_info *di = ceph_dentry(dentry);
+
+	ceph_put_mds_session(di->lease_session);
+	di->lease_session = NULL;
+}
+
+static void handle_lease(struct ceph_mds_client *mdsc,
+			 struct ceph_mds_session *session,
+			 struct ceph_msg *msg)
+{
+	struct super_block *sb = mdsc->fsc->sb;
+	struct inode *inode;
+	struct dentry *parent, *dentry;
+	struct ceph_dentry_info *di;
+	int mds = session->s_mds;
+	struct ceph_mds_lease *h = msg->front.iov_base;
+	u32 seq;
+	struct ceph_vino vino;
+	struct qstr dname;
+	int release = 0;
+
+	dout("handle_lease from mds%d\n", mds);
+
+	/* decode */
+	if (msg->front.iov_len < sizeof(*h) + sizeof(u32))
+		goto bad;
+	vino.ino = le64_to_cpu(h->ino);
+	vino.snap = CEPH_NOSNAP;
+	seq = le32_to_cpu(h->seq);
+	dname.name = (void *)h + sizeof(*h) + sizeof(u32);
+	dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32);
+	if (dname.len != get_unaligned_le32(h+1))
+		goto bad;
+
+	mutex_lock(&session->s_mutex);
+	session->s_seq++;
+
+	/* lookup inode */
+	inode = ceph_find_inode(sb, vino);
+	dout("handle_lease %s, ino %llx %p %.*s\n",
+	     ceph_lease_op_name(h->action), vino.ino, inode,
+	     dname.len, dname.name);
+	if (inode == NULL) {
+		dout("handle_lease no inode %llx\n", vino.ino);
+		goto release;
+	}
+
+	/* dentry */
+	parent = d_find_alias(inode);
+	if (!parent) {
+		dout("no parent dentry on inode %p\n", inode);
+		WARN_ON(1);
+		goto release;  /* hrm... */
+	}
+	dname.hash = full_name_hash(dname.name, dname.len);
+	dentry = d_lookup(parent, &dname);
+	dput(parent);
+	if (!dentry)
+		goto release;
+
+	spin_lock(&dentry->d_lock);
+	di = ceph_dentry(dentry);
+	switch (h->action) {
+	case CEPH_MDS_LEASE_REVOKE:
+		if (di->lease_session == session) {
+			if (ceph_seq_cmp(di->lease_seq, seq) > 0)
+				h->seq = cpu_to_le32(di->lease_seq);
+			__ceph_mdsc_drop_dentry_lease(dentry);
+		}
+		release = 1;
+		break;
+
+	case CEPH_MDS_LEASE_RENEW:
+		if (di->lease_session == session &&
+		    di->lease_gen == session->s_cap_gen &&
+		    di->lease_renew_from &&
+		    di->lease_renew_after == 0) {
+			unsigned long duration =
+				le32_to_cpu(h->duration_ms) * HZ / 1000;
+
+			di->lease_seq = seq;
+			dentry->d_time = di->lease_renew_from + duration;
+			di->lease_renew_after = di->lease_renew_from +
+				(duration >> 1);
+			di->lease_renew_from = 0;
+		}
+		break;
+	}
+	spin_unlock(&dentry->d_lock);
+	dput(dentry);
+
+	if (!release)
+		goto out;
+
+release:
+	/* let's just reuse the same message */
+	h->action = CEPH_MDS_LEASE_REVOKE_ACK;
+	ceph_msg_get(msg);
+	ceph_con_send(&session->s_con, msg);
+
+out:
+	iput(inode);
+	mutex_unlock(&session->s_mutex);
+	return;
+
+bad:
+	pr_err("corrupt lease message\n");
+	ceph_msg_dump(msg);
+}
+
+void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
+			      struct inode *inode,
+			      struct dentry *dentry, char action,
+			      u32 seq)
+{
+	struct ceph_msg *msg;
+	struct ceph_mds_lease *lease;
+	int len = sizeof(*lease) + sizeof(u32);
+	int dnamelen = 0;
+
+	dout("lease_send_msg inode %p dentry %p %s to mds%d\n",
+	     inode, dentry, ceph_lease_op_name(action), session->s_mds);
+	dnamelen = dentry->d_name.len;
+	len += dnamelen;
+
+	msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS, false);
+	if (!msg)
+		return;
+	lease = msg->front.iov_base;
+	lease->action = action;
+	lease->ino = cpu_to_le64(ceph_vino(inode).ino);
+	lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap);
+	lease->seq = cpu_to_le32(seq);
+	put_unaligned_le32(dnamelen, lease + 1);
+	memcpy((void *)(lease + 1) + 4, dentry->d_name.name, dnamelen);
+
+	/*
+	 * if this is a preemptive lease RELEASE, no need to
+	 * flush request stream, since the actual request will
+	 * soon follow.
+	 */
+	msg->more_to_follow = (action == CEPH_MDS_LEASE_RELEASE);
+
+	ceph_con_send(&session->s_con, msg);
+}
+
+/*
+ * Preemptively release a lease we expect to invalidate anyway.
+ * Pass @inode always, @dentry is optional.
+ */
+void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode,
+			     struct dentry *dentry)
+{
+	struct ceph_dentry_info *di;
+	struct ceph_mds_session *session;
+	u32 seq;
+
+	BUG_ON(inode == NULL);
+	BUG_ON(dentry == NULL);
+
+	/* is dentry lease valid? */
+	spin_lock(&dentry->d_lock);
+	di = ceph_dentry(dentry);
+	if (!di || !di->lease_session ||
+	    di->lease_session->s_mds < 0 ||
+	    di->lease_gen != di->lease_session->s_cap_gen ||
+	    !time_before(jiffies, dentry->d_time)) {
+		dout("lease_release inode %p dentry %p -- "
+		     "no lease\n",
+		     inode, dentry);
+		spin_unlock(&dentry->d_lock);
+		return;
+	}
+
+	/* we do have a lease on this dentry; note mds and seq */
+	session = ceph_get_mds_session(di->lease_session);
+	seq = di->lease_seq;
+	__ceph_mdsc_drop_dentry_lease(dentry);
+	spin_unlock(&dentry->d_lock);
+
+	dout("lease_release inode %p dentry %p to mds%d\n",
+	     inode, dentry, session->s_mds);
+	ceph_mdsc_lease_send_msg(session, inode, dentry,
+				 CEPH_MDS_LEASE_RELEASE, seq);
+	ceph_put_mds_session(session);
+}
+
+/*
+ * drop all leases (and dentry refs) in preparation for umount
+ */
+static void drop_leases(struct ceph_mds_client *mdsc)
+{
+	int i;
+
+	dout("drop_leases\n");
+	mutex_lock(&mdsc->mutex);
+	for (i = 0; i < mdsc->max_sessions; i++) {
+		struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
+		if (!s)
+			continue;
+		mutex_unlock(&mdsc->mutex);
+		mutex_lock(&s->s_mutex);
+		mutex_unlock(&s->s_mutex);
+		ceph_put_mds_session(s);
+		mutex_lock(&mdsc->mutex);
+	}
+	mutex_unlock(&mdsc->mutex);
+}
+
+
+
+/*
+ * delayed work -- periodically trim expired leases, renew caps with mds
+ */
+static void schedule_delayed(struct ceph_mds_client *mdsc)
+{
+	int delay = 5;
+	unsigned hz = round_jiffies_relative(HZ * delay);
+	schedule_delayed_work(&mdsc->delayed_work, hz);
+}
+
+static void delayed_work(struct work_struct *work)
+{
+	int i;
+	struct ceph_mds_client *mdsc =
+		container_of(work, struct ceph_mds_client, delayed_work.work);
+	int renew_interval;
+	int renew_caps;
+
+	dout("mdsc delayed_work\n");
+	ceph_check_delayed_caps(mdsc);
+
+	mutex_lock(&mdsc->mutex);
+	renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
+	renew_caps = time_after_eq(jiffies, HZ*renew_interval +
+				   mdsc->last_renew_caps);
+	if (renew_caps)
+		mdsc->last_renew_caps = jiffies;
+
+	for (i = 0; i < mdsc->max_sessions; i++) {
+		struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
+		if (s == NULL)
+			continue;
+		if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
+			dout("resending session close request for mds%d\n",
+			     s->s_mds);
+			request_close_session(mdsc, s);
+			ceph_put_mds_session(s);
+			continue;
+		}
+		if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
+			if (s->s_state == CEPH_MDS_SESSION_OPEN) {
+				s->s_state = CEPH_MDS_SESSION_HUNG;
+				pr_info("mds%d hung\n", s->s_mds);
+			}
+		}
+		if (s->s_state < CEPH_MDS_SESSION_OPEN) {
+			/* this mds is failed or recovering, just wait */
+			ceph_put_mds_session(s);
+			continue;
+		}
+		mutex_unlock(&mdsc->mutex);
+
+		mutex_lock(&s->s_mutex);
+		if (renew_caps)
+			send_renew_caps(mdsc, s);
+		else
+			ceph_con_keepalive(&s->s_con);
+		ceph_add_cap_releases(mdsc, s);
+		if (s->s_state == CEPH_MDS_SESSION_OPEN ||
+		    s->s_state == CEPH_MDS_SESSION_HUNG)
+			ceph_send_cap_releases(mdsc, s);
+		mutex_unlock(&s->s_mutex);
+		ceph_put_mds_session(s);
+
+		mutex_lock(&mdsc->mutex);
+	}
+	mutex_unlock(&mdsc->mutex);
+
+	schedule_delayed(mdsc);
+}
+
+int ceph_mdsc_init(struct ceph_fs_client *fsc)
+
+{
+	struct ceph_mds_client *mdsc;
+
+	mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS);
+	if (!mdsc)
+		return -ENOMEM;
+	mdsc->fsc = fsc;
+	fsc->mdsc = mdsc;
+	mutex_init(&mdsc->mutex);
+	mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
+	if (mdsc->mdsmap == NULL) {
+		kfree(mdsc);
+		return -ENOMEM;
+	}
+
+	init_completion(&mdsc->safe_umount_waiters);
+	init_waitqueue_head(&mdsc->session_close_wq);
+	INIT_LIST_HEAD(&mdsc->waiting_for_map);
+	mdsc->sessions = NULL;
+	mdsc->max_sessions = 0;
+	mdsc->stopping = 0;
+	init_rwsem(&mdsc->snap_rwsem);
+	mdsc->snap_realms = RB_ROOT;
+	INIT_LIST_HEAD(&mdsc->snap_empty);
+	spin_lock_init(&mdsc->snap_empty_lock);
+	mdsc->last_tid = 0;
+	mdsc->request_tree = RB_ROOT;
+	INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work);
+	mdsc->last_renew_caps = jiffies;
+	INIT_LIST_HEAD(&mdsc->cap_delay_list);
+	spin_lock_init(&mdsc->cap_delay_lock);
+	INIT_LIST_HEAD(&mdsc->snap_flush_list);
+	spin_lock_init(&mdsc->snap_flush_lock);
+	mdsc->cap_flush_seq = 0;
+	INIT_LIST_HEAD(&mdsc->cap_dirty);
+	INIT_LIST_HEAD(&mdsc->cap_dirty_migrating);
+	mdsc->num_cap_flushing = 0;
+	spin_lock_init(&mdsc->cap_dirty_lock);
+	init_waitqueue_head(&mdsc->cap_flushing_wq);
+	spin_lock_init(&mdsc->dentry_lru_lock);
+	INIT_LIST_HEAD(&mdsc->dentry_lru);
+
+	ceph_caps_init(mdsc);
+	ceph_adjust_min_caps(mdsc, fsc->min_caps);
+
+	return 0;
+}
+
+/*
+ * Wait for safe replies on open mds requests.  If we time out, drop
+ * all requests from the tree to avoid dangling dentry refs.
+ */
+static void wait_requests(struct ceph_mds_client *mdsc)
+{
+	struct ceph_mds_request *req;
+	struct ceph_fs_client *fsc = mdsc->fsc;
+
+	mutex_lock(&mdsc->mutex);
+	if (__get_oldest_req(mdsc)) {
+		mutex_unlock(&mdsc->mutex);
+
+		dout("wait_requests waiting for requests\n");
+		wait_for_completion_timeout(&mdsc->safe_umount_waiters,
+				    fsc->client->options->mount_timeout * HZ);
+
+		/* tear down remaining requests */
+		mutex_lock(&mdsc->mutex);
+		while ((req = __get_oldest_req(mdsc))) {
+			dout("wait_requests timed out on tid %llu\n",
+			     req->r_tid);
+			__unregister_request(mdsc, req);
+		}
+	}
+	mutex_unlock(&mdsc->mutex);
+	dout("wait_requests done\n");
+}
+
+/*
+ * called before mount is ro, and before dentries are torn down.
+ * (hmm, does this still race with new lookups?)
+ */
+void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
+{
+	dout("pre_umount\n");
+	mdsc->stopping = 1;
+
+	drop_leases(mdsc);
+	ceph_flush_dirty_caps(mdsc);
+	wait_requests(mdsc);
+
+	/*
+	 * wait for reply handlers to drop their request refs and
+	 * their inode/dcache refs
+	 */
+	ceph_msgr_flush();
+}
+
+/*
+ * wait for all write mds requests to flush.
+ */
+static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
+{
+	struct ceph_mds_request *req = NULL, *nextreq;
+	struct rb_node *n;
+
+	mutex_lock(&mdsc->mutex);
+	dout("wait_unsafe_requests want %lld\n", want_tid);
+restart:
+	req = __get_oldest_req(mdsc);
+	while (req && req->r_tid <= want_tid) {
+		/* find next request */
+		n = rb_next(&req->r_node);
+		if (n)
+			nextreq = rb_entry(n, struct ceph_mds_request, r_node);
+		else
+			nextreq = NULL;
+		if ((req->r_op & CEPH_MDS_OP_WRITE)) {
+			/* write op */
+			ceph_mdsc_get_request(req);
+			if (nextreq)
+				ceph_mdsc_get_request(nextreq);
+			mutex_unlock(&mdsc->mutex);
+			dout("wait_unsafe_requests  wait on %llu (want %llu)\n",
+			     req->r_tid, want_tid);
+			wait_for_completion(&req->r_safe_completion);
+			mutex_lock(&mdsc->mutex);
+			ceph_mdsc_put_request(req);
+			if (!nextreq)
+				break;  /* next dne before, so we're done! */
+			if (RB_EMPTY_NODE(&nextreq->r_node)) {
+				/* next request was removed from tree */
+				ceph_mdsc_put_request(nextreq);
+				goto restart;
+			}
+			ceph_mdsc_put_request(nextreq);  /* won't go away */
+		}
+		req = nextreq;
+	}
+	mutex_unlock(&mdsc->mutex);
+	dout("wait_unsafe_requests done\n");
+}
+
+void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
+{
+	u64 want_tid, want_flush;
+
+	if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
+		return;
+
+	dout("sync\n");
+	mutex_lock(&mdsc->mutex);
+	want_tid = mdsc->last_tid;
+	want_flush = mdsc->cap_flush_seq;
+	mutex_unlock(&mdsc->mutex);
+	dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush);
+
+	ceph_flush_dirty_caps(mdsc);
+
+	wait_unsafe_requests(mdsc, want_tid);
+	wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush));
+}
+
+/*
+ * true if all sessions are closed, or we force unmount
+ */
+static bool done_closing_sessions(struct ceph_mds_client *mdsc)
+{
+	int i, n = 0;
+
+	if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
+		return true;
+
+	mutex_lock(&mdsc->mutex);
+	for (i = 0; i < mdsc->max_sessions; i++)
+		if (mdsc->sessions[i])
+			n++;
+	mutex_unlock(&mdsc->mutex);
+	return n == 0;
+}
+
+/*
+ * called after sb is ro.
+ */
+void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
+{
+	struct ceph_mds_session *session;
+	int i;
+	struct ceph_fs_client *fsc = mdsc->fsc;
+	unsigned long timeout = fsc->client->options->mount_timeout * HZ;
+
+	dout("close_sessions\n");
+
+	/* close sessions */
+	mutex_lock(&mdsc->mutex);
+	for (i = 0; i < mdsc->max_sessions; i++) {
+		session = __ceph_lookup_mds_session(mdsc, i);
+		if (!session)
+			continue;
+		mutex_unlock(&mdsc->mutex);
+		mutex_lock(&session->s_mutex);
+		__close_session(mdsc, session);
+		mutex_unlock(&session->s_mutex);
+		ceph_put_mds_session(session);
+		mutex_lock(&mdsc->mutex);
+	}
+	mutex_unlock(&mdsc->mutex);
+
+	dout("waiting for sessions to close\n");
+	wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc),
+			   timeout);
+
+	/* tear down remaining sessions */
+	mutex_lock(&mdsc->mutex);
+	for (i = 0; i < mdsc->max_sessions; i++) {
+		if (mdsc->sessions[i]) {
+			session = get_session(mdsc->sessions[i]);
+			__unregister_session(mdsc, session);
+			mutex_unlock(&mdsc->mutex);
+			mutex_lock(&session->s_mutex);
+			remove_session_caps(session);
+			mutex_unlock(&session->s_mutex);
+			ceph_put_mds_session(session);
+			mutex_lock(&mdsc->mutex);
+		}
+	}
+	WARN_ON(!list_empty(&mdsc->cap_delay_list));
+	mutex_unlock(&mdsc->mutex);
+
+	ceph_cleanup_empty_realms(mdsc);
+
+	cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
+
+	dout("stopped\n");
+}
+
+static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
+{
+	dout("stop\n");
+	cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
+	if (mdsc->mdsmap)
+		ceph_mdsmap_destroy(mdsc->mdsmap);
+	kfree(mdsc->sessions);
+	ceph_caps_finalize(mdsc);
+}
+
+void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
+{
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+
+	dout("mdsc_destroy %p\n", mdsc);
+	ceph_mdsc_stop(mdsc);
+
+	/* flush out any connection work with references to us */
+	ceph_msgr_flush();
+
+	fsc->mdsc = NULL;
+	kfree(mdsc);
+	dout("mdsc_destroy %p done\n", mdsc);
+}
+
+
+/*
+ * handle mds map update.
+ */
+void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
+{
+	u32 epoch;
+	u32 maplen;
+	void *p = msg->front.iov_base;
+	void *end = p + msg->front.iov_len;
+	struct ceph_mdsmap *newmap, *oldmap;
+	struct ceph_fsid fsid;
+	int err = -EINVAL;
+
+	ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad);
+	ceph_decode_copy(&p, &fsid, sizeof(fsid));
+	if (ceph_check_fsid(mdsc->fsc->client, &fsid) < 0)
+		return;
+	epoch = ceph_decode_32(&p);
+	maplen = ceph_decode_32(&p);
+	dout("handle_map epoch %u len %d\n", epoch, (int)maplen);
+
+	/* do we need it? */
+	ceph_monc_got_mdsmap(&mdsc->fsc->client->monc, epoch);
+	mutex_lock(&mdsc->mutex);
+	if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) {
+		dout("handle_map epoch %u <= our %u\n",
+		     epoch, mdsc->mdsmap->m_epoch);
+		mutex_unlock(&mdsc->mutex);
+		return;
+	}
+
+	newmap = ceph_mdsmap_decode(&p, end);
+	if (IS_ERR(newmap)) {
+		err = PTR_ERR(newmap);
+		goto bad_unlock;
+	}
+
+	/* swap into place */
+	if (mdsc->mdsmap) {
+		oldmap = mdsc->mdsmap;
+		mdsc->mdsmap = newmap;
+		check_new_map(mdsc, newmap, oldmap);
+		ceph_mdsmap_destroy(oldmap);
+	} else {
+		mdsc->mdsmap = newmap;  /* first mds map */
+	}
+	mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
+
+	__wake_requests(mdsc, &mdsc->waiting_for_map);
+
+	mutex_unlock(&mdsc->mutex);
+	schedule_delayed(mdsc);
+	return;
+
+bad_unlock:
+	mutex_unlock(&mdsc->mutex);
+bad:
+	pr_err("error decoding mdsmap %d\n", err);
+	return;
+}
+
+static struct ceph_connection *con_get(struct ceph_connection *con)
+{
+	struct ceph_mds_session *s = con->private;
+
+	if (get_session(s)) {
+		dout("mdsc con_get %p ok (%d)\n", s, atomic_read(&s->s_ref));
+		return con;
+	}
+	dout("mdsc con_get %p FAIL\n", s);
+	return NULL;
+}
+
+static void con_put(struct ceph_connection *con)
+{
+	struct ceph_mds_session *s = con->private;
+
+	dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref) - 1);
+	ceph_put_mds_session(s);
+}
+
+/*
+ * if the client is unresponsive for long enough, the mds will kill
+ * the session entirely.
+ */
+static void peer_reset(struct ceph_connection *con)
+{
+	struct ceph_mds_session *s = con->private;
+	struct ceph_mds_client *mdsc = s->s_mdsc;
+
+	pr_warning("mds%d closed our session\n", s->s_mds);
+	send_mds_reconnect(mdsc, s);
+}
+
+static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
+{
+	struct ceph_mds_session *s = con->private;
+	struct ceph_mds_client *mdsc = s->s_mdsc;
+	int type = le16_to_cpu(msg->hdr.type);
+
+	mutex_lock(&mdsc->mutex);
+	if (__verify_registered_session(mdsc, s) < 0) {
+		mutex_unlock(&mdsc->mutex);
+		goto out;
+	}
+	mutex_unlock(&mdsc->mutex);
+
+	switch (type) {
+	case CEPH_MSG_MDS_MAP:
+		ceph_mdsc_handle_map(mdsc, msg);
+		break;
+	case CEPH_MSG_CLIENT_SESSION:
+		handle_session(s, msg);
+		break;
+	case CEPH_MSG_CLIENT_REPLY:
+		handle_reply(s, msg);
+		break;
+	case CEPH_MSG_CLIENT_REQUEST_FORWARD:
+		handle_forward(mdsc, s, msg);
+		break;
+	case CEPH_MSG_CLIENT_CAPS:
+		ceph_handle_caps(s, msg);
+		break;
+	case CEPH_MSG_CLIENT_SNAP:
+		ceph_handle_snap(mdsc, s, msg);
+		break;
+	case CEPH_MSG_CLIENT_LEASE:
+		handle_lease(mdsc, s, msg);
+		break;
+
+	default:
+		pr_err("received unknown message type %d %s\n", type,
+		       ceph_msg_type_name(type));
+	}
+out:
+	ceph_msg_put(msg);
+}
+
+/*
+ * authentication
+ */
+
+/*
+ * Note: returned pointer is the address of a structure that's
+ * managed separately.  Caller must *not* attempt to free it.
+ */
+static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
+					int *proto, int force_new)
+{
+	struct ceph_mds_session *s = con->private;
+	struct ceph_mds_client *mdsc = s->s_mdsc;
+	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
+	struct ceph_auth_handshake *auth = &s->s_auth;
+
+	if (force_new && auth->authorizer) {
+		ceph_auth_destroy_authorizer(ac, auth->authorizer);
+		auth->authorizer = NULL;
+	}
+	if (!auth->authorizer) {
+		int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
+						      auth);
+		if (ret)
+			return ERR_PTR(ret);
+	} else {
+		int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
+						      auth);
+		if (ret)
+			return ERR_PTR(ret);
+	}
+	*proto = ac->protocol;
+
+	return auth;
+}
+
+
+static int verify_authorizer_reply(struct ceph_connection *con, int len)
+{
+	struct ceph_mds_session *s = con->private;
+	struct ceph_mds_client *mdsc = s->s_mdsc;
+	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
+
+	return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer, len);
+}
+
+static int invalidate_authorizer(struct ceph_connection *con)
+{
+	struct ceph_mds_session *s = con->private;
+	struct ceph_mds_client *mdsc = s->s_mdsc;
+	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
+
+	ceph_auth_invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS);
+
+	return ceph_monc_validate_auth(&mdsc->fsc->client->monc);
+}
+
+static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con,
+				struct ceph_msg_header *hdr, int *skip)
+{
+	struct ceph_msg *msg;
+	int type = (int) le16_to_cpu(hdr->type);
+	int front_len = (int) le32_to_cpu(hdr->front_len);
+
+	if (con->in_msg)
+		return con->in_msg;
+
+	*skip = 0;
+	msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
+	if (!msg) {
+		pr_err("unable to allocate msg type %d len %d\n",
+		       type, front_len);
+		return NULL;
+	}
+
+	return msg;
+}
+
+static const struct ceph_connection_operations mds_con_ops = {
+	.get = con_get,
+	.put = con_put,
+	.dispatch = dispatch,
+	.get_authorizer = get_authorizer,
+	.verify_authorizer_reply = verify_authorizer_reply,
+	.invalidate_authorizer = invalidate_authorizer,
+	.peer_reset = peer_reset,
+	.alloc_msg = mds_alloc_msg,
+};
+
+/* eof */
diff --git a/ceph/mds_client.h b/ceph/mds_client.h
new file mode 100644
index 0000000..e90cfcc
--- /dev/null
+++ b/ceph/mds_client.h
@@ -0,0 +1,393 @@
+#ifndef _FS_CEPH_MDS_CLIENT_H
+#define _FS_CEPH_MDS_CLIENT_H
+
+#include <linux/completion.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+
+#include <linux/ceph/types.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/mdsmap.h>
+#include <linux/ceph/auth.h>
+
+/*
+ * Some lock dependencies:
+ *
+ * session->s_mutex
+ *         mdsc->mutex
+ *
+ *         mdsc->snap_rwsem
+ *
+ *         ci->i_ceph_lock
+ *                 mdsc->snap_flush_lock
+ *                 mdsc->cap_delay_lock
+ *
+ */
+
+struct ceph_fs_client;
+struct ceph_cap;
+
+/*
+ * parsed info about a single inode.  pointers are into the encoded
+ * on-wire structures within the mds reply message payload.
+ */
+struct ceph_mds_reply_info_in {
+	struct ceph_mds_reply_inode *in;
+	struct ceph_dir_layout dir_layout;
+	u32 symlink_len;
+	char *symlink;
+	u32 xattr_len;
+	char *xattr_data;
+};
+
+/*
+ * parsed info about an mds reply, including information about
+ * either: 1) the target inode and/or its parent directory and dentry,
+ * and directory contents (for readdir results), or
+ * 2) the file range lock info (for fcntl F_GETLK results).
+ */
+struct ceph_mds_reply_info_parsed {
+	struct ceph_mds_reply_head    *head;
+
+	/* trace */
+	struct ceph_mds_reply_info_in diri, targeti;
+	struct ceph_mds_reply_dirfrag *dirfrag;
+	char                          *dname;
+	u32                           dname_len;
+	struct ceph_mds_reply_lease   *dlease;
+
+	/* extra */
+	union {
+		/* for fcntl F_GETLK results */
+		struct ceph_filelock *filelock_reply;
+
+		/* for readdir results */
+		struct {
+			struct ceph_mds_reply_dirfrag *dir_dir;
+			size_t			      dir_buf_size;
+			int                           dir_nr;
+			char                          **dir_dname;
+			u32                           *dir_dname_len;
+			struct ceph_mds_reply_lease   **dir_dlease;
+			struct ceph_mds_reply_info_in *dir_in;
+			u8                            dir_complete, dir_end;
+		};
+
+		/* for create results */
+		struct {
+			bool has_create_ino;
+			u64 ino;
+		};
+	};
+
+	/* encoded blob describing snapshot contexts for certain
+	   operations (e.g., open) */
+	void *snapblob;
+	int snapblob_len;
+};
+
+
+/*
+ * cap releases are batched and sent to the MDS en masse.
+ */
+#define CEPH_CAPS_PER_RELEASE ((PAGE_CACHE_SIZE -			\
+				sizeof(struct ceph_mds_cap_release)) /	\
+			       sizeof(struct ceph_mds_cap_item))
+
+
+/*
+ * state associated with each MDS<->client session
+ */
+enum {
+	CEPH_MDS_SESSION_NEW = 1,
+	CEPH_MDS_SESSION_OPENING = 2,
+	CEPH_MDS_SESSION_OPEN = 3,
+	CEPH_MDS_SESSION_HUNG = 4,
+	CEPH_MDS_SESSION_CLOSING = 5,
+	CEPH_MDS_SESSION_RESTARTING = 6,
+	CEPH_MDS_SESSION_RECONNECTING = 7,
+};
+
+struct ceph_mds_session {
+	struct ceph_mds_client *s_mdsc;
+	int               s_mds;
+	int               s_state;
+	unsigned long     s_ttl;      /* time until mds kills us */
+	u64               s_seq;      /* incoming msg seq # */
+	struct mutex      s_mutex;    /* serialize session messages */
+
+	struct ceph_connection s_con;
+
+	struct ceph_auth_handshake s_auth;
+
+	/* protected by s_gen_ttl_lock */
+	spinlock_t        s_gen_ttl_lock;
+	u32               s_cap_gen;  /* inc each time we get mds stale msg */
+	unsigned long     s_cap_ttl;  /* when session caps expire */
+
+	/* protected by s_cap_lock */
+	spinlock_t        s_cap_lock;
+	struct list_head  s_caps;     /* all caps issued by this session */
+	int               s_nr_caps, s_trim_caps;
+	int               s_num_cap_releases;
+	int		  s_cap_reconnect;
+	struct list_head  s_cap_releases; /* waiting cap_release messages */
+	struct list_head  s_cap_releases_done; /* ready to send */
+	struct ceph_cap  *s_cap_iterator;
+
+	/* protected by mutex */
+	struct list_head  s_cap_flushing;     /* inodes w/ flushing caps */
+	struct list_head  s_cap_snaps_flushing;
+	unsigned long     s_renew_requested; /* last time we sent a renew req */
+	u64               s_renew_seq;
+
+	atomic_t          s_ref;
+	struct list_head  s_waiting;  /* waiting requests */
+	struct list_head  s_unsafe;   /* unsafe requests */
+};
+
+/*
+ * modes of choosing which MDS to send a request to
+ */
+enum {
+	USE_ANY_MDS,
+	USE_RANDOM_MDS,
+	USE_AUTH_MDS,   /* prefer authoritative mds for this metadata item */
+};
+
+struct ceph_mds_request;
+struct ceph_mds_client;
+
+/*
+ * request completion callback
+ */
+typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc,
+					     struct ceph_mds_request *req);
+
+/*
+ * an in-flight mds request
+ */
+struct ceph_mds_request {
+	u64 r_tid;                   /* transaction id */
+	struct rb_node r_node;
+	struct ceph_mds_client *r_mdsc;
+
+	int r_op;                    /* mds op code */
+
+	/* operation on what? */
+	struct inode *r_inode;              /* arg1 */
+	struct dentry *r_dentry;            /* arg1 */
+	struct dentry *r_old_dentry;        /* arg2: rename from or link from */
+	struct inode *r_old_dentry_dir;     /* arg2: old dentry's parent dir */
+	char *r_path1, *r_path2;
+	struct ceph_vino r_ino1, r_ino2;
+
+	struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */
+	struct inode *r_target_inode;       /* resulting inode */
+
+	struct mutex r_fill_mutex;
+
+	union ceph_mds_request_args r_args;
+	int r_fmode;        /* file mode, if expecting cap */
+	kuid_t r_uid;
+	kgid_t r_gid;
+
+	/* for choosing which mds to send this request to */
+	int r_direct_mode;
+	u32 r_direct_hash;      /* choose dir frag based on this dentry hash */
+	bool r_direct_is_hash;  /* true if r_direct_hash is valid */
+
+	/* data payload is used for xattr ops */
+	struct page **r_pages;
+	int r_num_pages;
+	int r_data_len;
+
+	/* what caps shall we drop? */
+	int r_inode_drop, r_inode_unless;
+	int r_dentry_drop, r_dentry_unless;
+	int r_old_dentry_drop, r_old_dentry_unless;
+	struct inode *r_old_inode;
+	int r_old_inode_drop, r_old_inode_unless;
+
+	struct ceph_msg  *r_request;  /* original request */
+	int r_request_release_offset;
+	struct ceph_msg  *r_reply;
+	struct ceph_mds_reply_info_parsed r_reply_info;
+	int r_err;
+	bool r_aborted;
+
+	unsigned long r_timeout;  /* optional.  jiffies */
+	unsigned long r_started;  /* start time to measure timeout against */
+	unsigned long r_request_started; /* start time for mds request only,
+					    used to measure lease durations */
+
+	/* link unsafe requests to parent directory, for fsync */
+	struct inode	*r_unsafe_dir;
+	struct list_head r_unsafe_dir_item;
+
+	struct ceph_mds_session *r_session;
+
+	int               r_attempts;   /* resend attempts */
+	int               r_num_fwd;    /* number of forward attempts */
+	int               r_resend_mds; /* mds to resend to next, if any*/
+	u32               r_sent_on_mseq; /* cap mseq request was sent at*/
+
+	struct kref       r_kref;
+	struct list_head  r_wait;
+	struct completion r_completion;
+	struct completion r_safe_completion;
+	ceph_mds_request_callback_t r_callback;
+	struct list_head  r_unsafe_item;  /* per-session unsafe list item */
+	bool		  r_got_unsafe, r_got_safe, r_got_result;
+
+	bool              r_did_prepopulate;
+	u32               r_readdir_offset;
+
+	struct ceph_cap_reservation r_caps_reservation;
+	int r_num_caps;
+};
+
+/*
+ * mds client state
+ */
+struct ceph_mds_client {
+	struct ceph_fs_client  *fsc;
+	struct mutex            mutex;         /* all nested structures */
+
+	struct ceph_mdsmap      *mdsmap;
+	struct completion       safe_umount_waiters;
+	wait_queue_head_t       session_close_wq;
+	struct list_head        waiting_for_map;
+
+	struct ceph_mds_session **sessions;    /* NULL for mds if no session */
+	int                     max_sessions;  /* len of s_mds_sessions */
+	int                     stopping;      /* true if shutting down */
+
+	/*
+	 * snap_rwsem will cover cap linkage into snaprealms, and
+	 * realm snap contexts.  (later, we can do per-realm snap
+	 * contexts locks..)  the empty list contains realms with no
+	 * references (implying they contain no inodes with caps) that
+	 * should be destroyed.
+	 */
+	struct rw_semaphore     snap_rwsem;
+	struct rb_root          snap_realms;
+	struct list_head        snap_empty;
+	spinlock_t              snap_empty_lock;  /* protect snap_empty */
+
+	u64                    last_tid;      /* most recent mds request */
+	struct rb_root         request_tree;  /* pending mds requests */
+	struct delayed_work    delayed_work;  /* delayed work */
+	unsigned long    last_renew_caps;  /* last time we renewed our caps */
+	struct list_head cap_delay_list;   /* caps with delayed release */
+	spinlock_t       cap_delay_lock;   /* protects cap_delay_list */
+	struct list_head snap_flush_list;  /* cap_snaps ready to flush */
+	spinlock_t       snap_flush_lock;
+
+	u64               cap_flush_seq;
+	struct list_head  cap_dirty;        /* inodes with dirty caps */
+	struct list_head  cap_dirty_migrating; /* ...that are migration... */
+	int               num_cap_flushing; /* # caps we are flushing */
+	spinlock_t        cap_dirty_lock;   /* protects above items */
+	wait_queue_head_t cap_flushing_wq;
+
+	/*
+	 * Cap reservations
+	 *
+	 * Maintain a global pool of preallocated struct ceph_caps, referenced
+	 * by struct ceph_caps_reservations.  This ensures that we preallocate
+	 * memory needed to successfully process an MDS response.  (If an MDS
+	 * sends us cap information and we fail to process it, we will have
+	 * problems due to the client and MDS being out of sync.)
+	 *
+	 * Reservations are 'owned' by a ceph_cap_reservation context.
+	 */
+	spinlock_t	caps_list_lock;
+	struct		list_head caps_list; /* unused (reserved or
+						unreserved) */
+	int		caps_total_count;    /* total caps allocated */
+	int		caps_use_count;      /* in use */
+	int		caps_reserve_count;  /* unused, reserved */
+	int		caps_avail_count;    /* unused, unreserved */
+	int		caps_min_count;      /* keep at least this many
+						(unreserved) */
+	spinlock_t	  dentry_lru_lock;
+	struct list_head  dentry_lru;
+	int		  num_dentry;
+};
+
+extern const char *ceph_mds_op_name(int op);
+
+extern struct ceph_mds_session *
+__ceph_lookup_mds_session(struct ceph_mds_client *, int mds);
+
+static inline struct ceph_mds_session *
+ceph_get_mds_session(struct ceph_mds_session *s)
+{
+	atomic_inc(&s->s_ref);
+	return s;
+}
+
+extern void ceph_put_mds_session(struct ceph_mds_session *s);
+
+extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc,
+			     struct ceph_msg *msg, int mds);
+
+extern int ceph_mdsc_init(struct ceph_fs_client *fsc);
+extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc);
+extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc);
+
+extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc);
+
+extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc,
+				    struct inode *inode,
+				    struct dentry *dn);
+
+extern void ceph_invalidate_dir_request(struct ceph_mds_request *req);
+extern int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
+					   struct inode *dir);
+extern struct ceph_mds_request *
+ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode);
+extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
+				     struct ceph_mds_request *req);
+extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
+				struct inode *dir,
+				struct ceph_mds_request *req);
+static inline void ceph_mdsc_get_request(struct ceph_mds_request *req)
+{
+	kref_get(&req->r_kref);
+}
+extern void ceph_mdsc_release_request(struct kref *kref);
+static inline void ceph_mdsc_put_request(struct ceph_mds_request *req)
+{
+	kref_put(&req->r_kref, ceph_mdsc_release_request);
+}
+
+extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
+				 struct ceph_mds_session *session);
+extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
+				   struct ceph_mds_session *session);
+
+extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
+
+extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
+				  int stop_on_nosnap);
+
+extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry);
+extern void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
+				     struct inode *inode,
+				     struct dentry *dentry, char action,
+				     u32 seq);
+
+extern void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc,
+				 struct ceph_msg *msg);
+
+extern struct ceph_mds_session *
+ceph_mdsc_open_export_target_session(struct ceph_mds_client *mdsc, int target);
+extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
+					  struct ceph_mds_session *session);
+
+#endif
diff --git a/ceph/mdsmap.c b/ceph/mdsmap.c
new file mode 100644
index 0000000..132b64e
--- /dev/null
+++ b/ceph/mdsmap.c
@@ -0,0 +1,189 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include <linux/ceph/mdsmap.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/decode.h>
+
+#include "super.h"
+
+
+/*
+ * choose a random mds that is "up" (i.e. has a state > 0), or -1.
+ */
+int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
+{
+	int n = 0;
+	int i;
+
+	/* special case for one mds */
+	if (1 == m->m_max_mds && m->m_info[0].state > 0)
+		return 0;
+
+	/* count */
+	for (i = 0; i < m->m_max_mds; i++)
+		if (m->m_info[i].state > 0)
+			n++;
+	if (n == 0)
+		return -1;
+
+	/* pick */
+	n = prandom_u32() % n;
+	i = 0;
+	for (i = 0; n > 0; i++, n--)
+		while (m->m_info[i].state <= 0)
+			i++;
+
+	return i;
+}
+
+/*
+ * Decode an MDS map
+ *
+ * Ignore any fields we don't care about (there are quite a few of
+ * them).
+ */
+struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
+{
+	struct ceph_mdsmap *m;
+	const void *start = *p;
+	int i, j, n;
+	int err = -EINVAL;
+	u16 version;
+
+	m = kzalloc(sizeof(*m), GFP_NOFS);
+	if (m == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	ceph_decode_16_safe(p, end, version, bad);
+	if (version > 3) {
+		pr_warning("got mdsmap version %d > 3, failing", version);
+		goto bad;
+	}
+
+	ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad);
+	m->m_epoch = ceph_decode_32(p);
+	m->m_client_epoch = ceph_decode_32(p);
+	m->m_last_failure = ceph_decode_32(p);
+	m->m_root = ceph_decode_32(p);
+	m->m_session_timeout = ceph_decode_32(p);
+	m->m_session_autoclose = ceph_decode_32(p);
+	m->m_max_file_size = ceph_decode_64(p);
+	m->m_max_mds = ceph_decode_32(p);
+
+	m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS);
+	if (m->m_info == NULL)
+		goto badmem;
+
+	/* pick out active nodes from mds_info (state > 0) */
+	n = ceph_decode_32(p);
+	for (i = 0; i < n; i++) {
+		u64 global_id;
+		u32 namelen;
+		s32 mds, inc, state;
+		u64 state_seq;
+		u8 infoversion;
+		struct ceph_entity_addr addr;
+		u32 num_export_targets;
+		void *pexport_targets = NULL;
+		struct ceph_timespec laggy_since;
+		struct ceph_mds_info *info;
+
+		ceph_decode_need(p, end, sizeof(u64)*2 + 1 + sizeof(u32), bad);
+		global_id = ceph_decode_64(p);
+		infoversion = ceph_decode_8(p);
+		*p += sizeof(u64);
+		namelen = ceph_decode_32(p);  /* skip mds name */
+		*p += namelen;
+
+		ceph_decode_need(p, end,
+				 4*sizeof(u32) + sizeof(u64) +
+				 sizeof(addr) + sizeof(struct ceph_timespec),
+				 bad);
+		mds = ceph_decode_32(p);
+		inc = ceph_decode_32(p);
+		state = ceph_decode_32(p);
+		state_seq = ceph_decode_64(p);
+		ceph_decode_copy(p, &addr, sizeof(addr));
+		ceph_decode_addr(&addr);
+		ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));
+		*p += sizeof(u32);
+		ceph_decode_32_safe(p, end, namelen, bad);
+		*p += namelen;
+		if (infoversion >= 2) {
+			ceph_decode_32_safe(p, end, num_export_targets, bad);
+			pexport_targets = *p;
+			*p += num_export_targets * sizeof(u32);
+		} else {
+			num_export_targets = 0;
+		}
+
+		dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n",
+		     i+1, n, global_id, mds, inc,
+		     ceph_pr_addr(&addr.in_addr),
+		     ceph_mds_state_name(state));
+
+		if (mds < 0 || mds >= m->m_max_mds || state <= 0)
+			continue;
+
+		info = &m->m_info[mds];
+		info->global_id = global_id;
+		info->state = state;
+		info->addr = addr;
+		info->laggy = (laggy_since.tv_sec != 0 ||
+			       laggy_since.tv_nsec != 0);
+		info->num_export_targets = num_export_targets;
+		if (num_export_targets) {
+			info->export_targets = kcalloc(num_export_targets,
+						       sizeof(u32), GFP_NOFS);
+			if (info->export_targets == NULL)
+				goto badmem;
+			for (j = 0; j < num_export_targets; j++)
+				info->export_targets[j] =
+				       ceph_decode_32(&pexport_targets);
+		} else {
+			info->export_targets = NULL;
+		}
+	}
+
+	/* pg_pools */
+	ceph_decode_32_safe(p, end, n, bad);
+	m->m_num_data_pg_pools = n;
+	m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS);
+	if (!m->m_data_pg_pools)
+		goto badmem;
+	ceph_decode_need(p, end, sizeof(u64)*(n+1), bad);
+	for (i = 0; i < n; i++)
+		m->m_data_pg_pools[i] = ceph_decode_64(p);
+	m->m_cas_pg_pool = ceph_decode_64(p);
+
+	/* ok, we don't care about the rest. */
+	dout("mdsmap_decode success epoch %u\n", m->m_epoch);
+	return m;
+
+badmem:
+	err = -ENOMEM;
+bad:
+	pr_err("corrupt mdsmap\n");
+	print_hex_dump(KERN_DEBUG, "mdsmap: ",
+		       DUMP_PREFIX_OFFSET, 16, 1,
+		       start, end - start, true);
+	ceph_mdsmap_destroy(m);
+	return ERR_PTR(err);
+}
+
+void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
+{
+	int i;
+
+	for (i = 0; i < m->m_max_mds; i++)
+		kfree(m->m_info[i].export_targets);
+	kfree(m->m_info);
+	kfree(m->m_data_pg_pools);
+	kfree(m);
+}
diff --git a/ceph/snap.c b/ceph/snap.c
new file mode 100644
index 0000000..f01645a
--- /dev/null
+++ b/ceph/snap.c
@@ -0,0 +1,932 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/sort.h>
+#include <linux/slab.h>
+
+#include "super.h"
+#include "mds_client.h"
+
+#include <linux/ceph/decode.h>
+
+/*
+ * Snapshots in ceph are driven in large part by cooperation from the
+ * client.  In contrast to local file systems or file servers that
+ * implement snapshots at a single point in the system, ceph's
+ * distributed access to storage requires clients to help decide
+ * whether a write logically occurs before or after a recently created
+ * snapshot.
+ *
+ * This provides a perfect instantanous client-wide snapshot.  Between
+ * clients, however, snapshots may appear to be applied at slightly
+ * different points in time, depending on delays in delivering the
+ * snapshot notification.
+ *
+ * Snapshots are _not_ file system-wide.  Instead, each snapshot
+ * applies to the subdirectory nested beneath some directory.  This
+ * effectively divides the hierarchy into multiple "realms," where all
+ * of the files contained by each realm share the same set of
+ * snapshots.  An individual realm's snap set contains snapshots
+ * explicitly created on that realm, as well as any snaps in its
+ * parent's snap set _after_ the point at which the parent became it's
+ * parent (due to, say, a rename).  Similarly, snaps from prior parents
+ * during the time intervals during which they were the parent are included.
+ *
+ * The client is spared most of this detail, fortunately... it must only
+ * maintains a hierarchy of realms reflecting the current parent/child
+ * realm relationship, and for each realm has an explicit list of snaps
+ * inherited from prior parents.
+ *
+ * A snap_realm struct is maintained for realms containing every inode
+ * with an open cap in the system.  (The needed snap realm information is
+ * provided by the MDS whenever a cap is issued, i.e., on open.)  A 'seq'
+ * version number is used to ensure that as realm parameters change (new
+ * snapshot, new parent, etc.) the client's realm hierarchy is updated.
+ *
+ * The realm hierarchy drives the generation of a 'snap context' for each
+ * realm, which simply lists the resulting set of snaps for the realm.  This
+ * is attached to any writes sent to OSDs.
+ */
+/*
+ * Unfortunately error handling is a bit mixed here.  If we get a snap
+ * update, but don't have enough memory to update our realm hierarchy,
+ * it's not clear what we can do about it (besides complaining to the
+ * console).
+ */
+
+
+/*
+ * increase ref count for the realm
+ *
+ * caller must hold snap_rwsem for write.
+ */
+void ceph_get_snap_realm(struct ceph_mds_client *mdsc,
+			 struct ceph_snap_realm *realm)
+{
+	dout("get_realm %p %d -> %d\n", realm,
+	     atomic_read(&realm->nref), atomic_read(&realm->nref)+1);
+	/*
+	 * since we _only_ increment realm refs or empty the empty
+	 * list with snap_rwsem held, adjusting the empty list here is
+	 * safe.  we do need to protect against concurrent empty list
+	 * additions, however.
+	 */
+	if (atomic_read(&realm->nref) == 0) {
+		spin_lock(&mdsc->snap_empty_lock);
+		list_del_init(&realm->empty_item);
+		spin_unlock(&mdsc->snap_empty_lock);
+	}
+
+	atomic_inc(&realm->nref);
+}
+
+static void __insert_snap_realm(struct rb_root *root,
+				struct ceph_snap_realm *new)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct ceph_snap_realm *r = NULL;
+
+	while (*p) {
+		parent = *p;
+		r = rb_entry(parent, struct ceph_snap_realm, node);
+		if (new->ino < r->ino)
+			p = &(*p)->rb_left;
+		else if (new->ino > r->ino)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&new->node, parent, p);
+	rb_insert_color(&new->node, root);
+}
+
+/*
+ * create and get the realm rooted at @ino and bump its ref count.
+ *
+ * caller must hold snap_rwsem for write.
+ */
+static struct ceph_snap_realm *ceph_create_snap_realm(
+	struct ceph_mds_client *mdsc,
+	u64 ino)
+{
+	struct ceph_snap_realm *realm;
+
+	realm = kzalloc(sizeof(*realm), GFP_NOFS);
+	if (!realm)
+		return ERR_PTR(-ENOMEM);
+
+	atomic_set(&realm->nref, 0);    /* tree does not take a ref */
+	realm->ino = ino;
+	INIT_LIST_HEAD(&realm->children);
+	INIT_LIST_HEAD(&realm->child_item);
+	INIT_LIST_HEAD(&realm->empty_item);
+	INIT_LIST_HEAD(&realm->dirty_item);
+	INIT_LIST_HEAD(&realm->inodes_with_caps);
+	spin_lock_init(&realm->inodes_with_caps_lock);
+	__insert_snap_realm(&mdsc->snap_realms, realm);
+	dout("create_snap_realm %llx %p\n", realm->ino, realm);
+	return realm;
+}
+
+/*
+ * lookup the realm rooted at @ino.
+ *
+ * caller must hold snap_rwsem for write.
+ */
+struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc,
+					       u64 ino)
+{
+	struct rb_node *n = mdsc->snap_realms.rb_node;
+	struct ceph_snap_realm *r;
+
+	while (n) {
+		r = rb_entry(n, struct ceph_snap_realm, node);
+		if (ino < r->ino)
+			n = n->rb_left;
+		else if (ino > r->ino)
+			n = n->rb_right;
+		else {
+			dout("lookup_snap_realm %llx %p\n", r->ino, r);
+			return r;
+		}
+	}
+	return NULL;
+}
+
+static void __put_snap_realm(struct ceph_mds_client *mdsc,
+			     struct ceph_snap_realm *realm);
+
+/*
+ * called with snap_rwsem (write)
+ */
+static void __destroy_snap_realm(struct ceph_mds_client *mdsc,
+				 struct ceph_snap_realm *realm)
+{
+	dout("__destroy_snap_realm %p %llx\n", realm, realm->ino);
+
+	rb_erase(&realm->node, &mdsc->snap_realms);
+
+	if (realm->parent) {
+		list_del_init(&realm->child_item);
+		__put_snap_realm(mdsc, realm->parent);
+	}
+
+	kfree(realm->prior_parent_snaps);
+	kfree(realm->snaps);
+	ceph_put_snap_context(realm->cached_context);
+	kfree(realm);
+}
+
+/*
+ * caller holds snap_rwsem (write)
+ */
+static void __put_snap_realm(struct ceph_mds_client *mdsc,
+			     struct ceph_snap_realm *realm)
+{
+	dout("__put_snap_realm %llx %p %d -> %d\n", realm->ino, realm,
+	     atomic_read(&realm->nref), atomic_read(&realm->nref)-1);
+	if (atomic_dec_and_test(&realm->nref))
+		__destroy_snap_realm(mdsc, realm);
+}
+
+/*
+ * caller needn't hold any locks
+ */
+void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
+			 struct ceph_snap_realm *realm)
+{
+	dout("put_snap_realm %llx %p %d -> %d\n", realm->ino, realm,
+	     atomic_read(&realm->nref), atomic_read(&realm->nref)-1);
+	if (!atomic_dec_and_test(&realm->nref))
+		return;
+
+	if (down_write_trylock(&mdsc->snap_rwsem)) {
+		__destroy_snap_realm(mdsc, realm);
+		up_write(&mdsc->snap_rwsem);
+	} else {
+		spin_lock(&mdsc->snap_empty_lock);
+		list_add(&realm->empty_item, &mdsc->snap_empty);
+		spin_unlock(&mdsc->snap_empty_lock);
+	}
+}
+
+/*
+ * Clean up any realms whose ref counts have dropped to zero.  Note
+ * that this does not include realms who were created but not yet
+ * used.
+ *
+ * Called under snap_rwsem (write)
+ */
+static void __cleanup_empty_realms(struct ceph_mds_client *mdsc)
+{
+	struct ceph_snap_realm *realm;
+
+	spin_lock(&mdsc->snap_empty_lock);
+	while (!list_empty(&mdsc->snap_empty)) {
+		realm = list_first_entry(&mdsc->snap_empty,
+				   struct ceph_snap_realm, empty_item);
+		list_del(&realm->empty_item);
+		spin_unlock(&mdsc->snap_empty_lock);
+		__destroy_snap_realm(mdsc, realm);
+		spin_lock(&mdsc->snap_empty_lock);
+	}
+	spin_unlock(&mdsc->snap_empty_lock);
+}
+
+void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc)
+{
+	down_write(&mdsc->snap_rwsem);
+	__cleanup_empty_realms(mdsc);
+	up_write(&mdsc->snap_rwsem);
+}
+
+/*
+ * adjust the parent realm of a given @realm.  adjust child list, and parent
+ * pointers, and ref counts appropriately.
+ *
+ * return true if parent was changed, 0 if unchanged, <0 on error.
+ *
+ * caller must hold snap_rwsem for write.
+ */
+static int adjust_snap_realm_parent(struct ceph_mds_client *mdsc,
+				    struct ceph_snap_realm *realm,
+				    u64 parentino)
+{
+	struct ceph_snap_realm *parent;
+
+	if (realm->parent_ino == parentino)
+		return 0;
+
+	parent = ceph_lookup_snap_realm(mdsc, parentino);
+	if (!parent) {
+		parent = ceph_create_snap_realm(mdsc, parentino);
+		if (IS_ERR(parent))
+			return PTR_ERR(parent);
+	}
+	dout("adjust_snap_realm_parent %llx %p: %llx %p -> %llx %p\n",
+	     realm->ino, realm, realm->parent_ino, realm->parent,
+	     parentino, parent);
+	if (realm->parent) {
+		list_del_init(&realm->child_item);
+		ceph_put_snap_realm(mdsc, realm->parent);
+	}
+	realm->parent_ino = parentino;
+	realm->parent = parent;
+	ceph_get_snap_realm(mdsc, parent);
+	list_add(&realm->child_item, &parent->children);
+	return 1;
+}
+
+
+static int cmpu64_rev(const void *a, const void *b)
+{
+	if (*(u64 *)a < *(u64 *)b)
+		return 1;
+	if (*(u64 *)a > *(u64 *)b)
+		return -1;
+	return 0;
+}
+
+/*
+ * build the snap context for a given realm.
+ */
+static int build_snap_context(struct ceph_snap_realm *realm)
+{
+	struct ceph_snap_realm *parent = realm->parent;
+	struct ceph_snap_context *snapc;
+	int err = 0;
+	u32 num = realm->num_prior_parent_snaps + realm->num_snaps;
+
+	/*
+	 * build parent context, if it hasn't been built.
+	 * conservatively estimate that all parent snaps might be
+	 * included by us.
+	 */
+	if (parent) {
+		if (!parent->cached_context) {
+			err = build_snap_context(parent);
+			if (err)
+				goto fail;
+		}
+		num += parent->cached_context->num_snaps;
+	}
+
+	/* do i actually need to update?  not if my context seq
+	   matches realm seq, and my parents' does to.  (this works
+	   because we rebuild_snap_realms() works _downward_ in
+	   hierarchy after each update.) */
+	if (realm->cached_context &&
+	    realm->cached_context->seq == realm->seq &&
+	    (!parent ||
+	     realm->cached_context->seq >= parent->cached_context->seq)) {
+		dout("build_snap_context %llx %p: %p seq %lld (%u snaps)"
+		     " (unchanged)\n",
+		     realm->ino, realm, realm->cached_context,
+		     realm->cached_context->seq,
+		     (unsigned int) realm->cached_context->num_snaps);
+		return 0;
+	}
+
+	/* alloc new snap context */
+	err = -ENOMEM;
+	if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64))
+		goto fail;
+	snapc = ceph_create_snap_context(num, GFP_NOFS);
+	if (!snapc)
+		goto fail;
+
+	/* build (reverse sorted) snap vector */
+	num = 0;
+	snapc->seq = realm->seq;
+	if (parent) {
+		u32 i;
+
+		/* include any of parent's snaps occurring _after_ my
+		   parent became my parent */
+		for (i = 0; i < parent->cached_context->num_snaps; i++)
+			if (parent->cached_context->snaps[i] >=
+			    realm->parent_since)
+				snapc->snaps[num++] =
+					parent->cached_context->snaps[i];
+		if (parent->cached_context->seq > snapc->seq)
+			snapc->seq = parent->cached_context->seq;
+	}
+	memcpy(snapc->snaps + num, realm->snaps,
+	       sizeof(u64)*realm->num_snaps);
+	num += realm->num_snaps;
+	memcpy(snapc->snaps + num, realm->prior_parent_snaps,
+	       sizeof(u64)*realm->num_prior_parent_snaps);
+	num += realm->num_prior_parent_snaps;
+
+	sort(snapc->snaps, num, sizeof(u64), cmpu64_rev, NULL);
+	snapc->num_snaps = num;
+	dout("build_snap_context %llx %p: %p seq %lld (%u snaps)\n",
+	     realm->ino, realm, snapc, snapc->seq,
+	     (unsigned int) snapc->num_snaps);
+
+	if (realm->cached_context)
+		ceph_put_snap_context(realm->cached_context);
+	realm->cached_context = snapc;
+	return 0;
+
+fail:
+	/*
+	 * if we fail, clear old (incorrect) cached_context... hopefully
+	 * we'll have better luck building it later
+	 */
+	if (realm->cached_context) {
+		ceph_put_snap_context(realm->cached_context);
+		realm->cached_context = NULL;
+	}
+	pr_err("build_snap_context %llx %p fail %d\n", realm->ino,
+	       realm, err);
+	return err;
+}
+
+/*
+ * rebuild snap context for the given realm and all of its children.
+ */
+static void rebuild_snap_realms(struct ceph_snap_realm *realm)
+{
+	struct ceph_snap_realm *child;
+
+	dout("rebuild_snap_realms %llx %p\n", realm->ino, realm);
+	build_snap_context(realm);
+
+	list_for_each_entry(child, &realm->children, child_item)
+		rebuild_snap_realms(child);
+}
+
+
+/*
+ * helper to allocate and decode an array of snapids.  free prior
+ * instance, if any.
+ */
+static int dup_array(u64 **dst, __le64 *src, u32 num)
+{
+	u32 i;
+
+	kfree(*dst);
+	if (num) {
+		*dst = kcalloc(num, sizeof(u64), GFP_NOFS);
+		if (!*dst)
+			return -ENOMEM;
+		for (i = 0; i < num; i++)
+			(*dst)[i] = get_unaligned_le64(src + i);
+	} else {
+		*dst = NULL;
+	}
+	return 0;
+}
+
+
+/*
+ * When a snapshot is applied, the size/mtime inode metadata is queued
+ * in a ceph_cap_snap (one for each snapshot) until writeback
+ * completes and the metadata can be flushed back to the MDS.
+ *
+ * However, if a (sync) write is currently in-progress when we apply
+ * the snapshot, we have to wait until the write succeeds or fails
+ * (and a final size/mtime is known).  In this case the
+ * cap_snap->writing = 1, and is said to be "pending."  When the write
+ * finishes, we __ceph_finish_cap_snap().
+ *
+ * Caller must hold snap_rwsem for read (i.e., the realm topology won't
+ * change).
+ */
+void ceph_queue_cap_snap(struct ceph_inode_info *ci)
+{
+	struct inode *inode = &ci->vfs_inode;
+	struct ceph_cap_snap *capsnap;
+	int used, dirty;
+
+	capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS);
+	if (!capsnap) {
+		pr_err("ENOMEM allocating ceph_cap_snap on %p\n", inode);
+		return;
+	}
+
+	spin_lock(&ci->i_ceph_lock);
+	used = __ceph_caps_used(ci);
+	dirty = __ceph_caps_dirty(ci);
+
+	/*
+	 * If there is a write in progress, treat that as a dirty Fw,
+	 * even though it hasn't completed yet; by the time we finish
+	 * up this capsnap it will be.
+	 */
+	if (used & CEPH_CAP_FILE_WR)
+		dirty |= CEPH_CAP_FILE_WR;
+
+	if (__ceph_have_pending_cap_snap(ci)) {
+		/* there is no point in queuing multiple "pending" cap_snaps,
+		   as no new writes are allowed to start when pending, so any
+		   writes in progress now were started before the previous
+		   cap_snap.  lucky us. */
+		dout("queue_cap_snap %p already pending\n", inode);
+		kfree(capsnap);
+	} else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
+			    CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) {
+		struct ceph_snap_context *snapc = ci->i_head_snapc;
+
+		/*
+		 * if we are a sync write, we may need to go to the snaprealm
+		 * to get the current snapc.
+		 */
+		if (!snapc)
+			snapc = ci->i_snap_realm->cached_context;
+
+		dout("queue_cap_snap %p cap_snap %p queuing under %p %s\n",
+		     inode, capsnap, snapc, ceph_cap_string(dirty));
+		ihold(inode);
+
+		atomic_set(&capsnap->nref, 1);
+		capsnap->ci = ci;
+		INIT_LIST_HEAD(&capsnap->ci_item);
+		INIT_LIST_HEAD(&capsnap->flushing_item);
+
+		capsnap->follows = snapc->seq;
+		capsnap->issued = __ceph_caps_issued(ci, NULL);
+		capsnap->dirty = dirty;
+
+		capsnap->mode = inode->i_mode;
+		capsnap->uid = inode->i_uid;
+		capsnap->gid = inode->i_gid;
+
+		if (dirty & CEPH_CAP_XATTR_EXCL) {
+			__ceph_build_xattrs_blob(ci);
+			capsnap->xattr_blob =
+				ceph_buffer_get(ci->i_xattrs.blob);
+			capsnap->xattr_version = ci->i_xattrs.version;
+		} else {
+			capsnap->xattr_blob = NULL;
+			capsnap->xattr_version = 0;
+		}
+
+		/* dirty page count moved from _head to this cap_snap;
+		   all subsequent writes page dirties occur _after_ this
+		   snapshot. */
+		capsnap->dirty_pages = ci->i_wrbuffer_ref_head;
+		ci->i_wrbuffer_ref_head = 0;
+		capsnap->context = snapc;
+		ci->i_head_snapc =
+			ceph_get_snap_context(ci->i_snap_realm->cached_context);
+		dout(" new snapc is %p\n", ci->i_head_snapc);
+		list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
+
+		if (used & CEPH_CAP_FILE_WR) {
+			dout("queue_cap_snap %p cap_snap %p snapc %p"
+			     " seq %llu used WR, now pending\n", inode,
+			     capsnap, snapc, snapc->seq);
+			capsnap->writing = 1;
+		} else {
+			/* note mtime, size NOW. */
+			__ceph_finish_cap_snap(ci, capsnap);
+		}
+	} else {
+		dout("queue_cap_snap %p nothing dirty|writing\n", inode);
+		kfree(capsnap);
+	}
+
+	spin_unlock(&ci->i_ceph_lock);
+}
+
+/*
+ * Finalize the size, mtime for a cap_snap.. that is, settle on final values
+ * to be used for the snapshot, to be flushed back to the mds.
+ *
+ * If capsnap can now be flushed, add to snap_flush list, and return 1.
+ *
+ * Caller must hold i_ceph_lock.
+ */
+int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
+			    struct ceph_cap_snap *capsnap)
+{
+	struct inode *inode = &ci->vfs_inode;
+	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+
+	BUG_ON(capsnap->writing);
+	capsnap->size = inode->i_size;
+	capsnap->mtime = inode->i_mtime;
+	capsnap->atime = inode->i_atime;
+	capsnap->ctime = inode->i_ctime;
+	capsnap->time_warp_seq = ci->i_time_warp_seq;
+	if (capsnap->dirty_pages) {
+		dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu "
+		     "still has %d dirty pages\n", inode, capsnap,
+		     capsnap->context, capsnap->context->seq,
+		     ceph_cap_string(capsnap->dirty), capsnap->size,
+		     capsnap->dirty_pages);
+		return 0;
+	}
+	dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu\n",
+	     inode, capsnap, capsnap->context,
+	     capsnap->context->seq, ceph_cap_string(capsnap->dirty),
+	     capsnap->size);
+
+	spin_lock(&mdsc->snap_flush_lock);
+	list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
+	spin_unlock(&mdsc->snap_flush_lock);
+	return 1;  /* caller may want to ceph_flush_snaps */
+}
+
+/*
+ * Queue cap_snaps for snap writeback for this realm and its children.
+ * Called under snap_rwsem, so realm topology won't change.
+ */
+static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
+{
+	struct ceph_inode_info *ci;
+	struct inode *lastinode = NULL;
+	struct ceph_snap_realm *child;
+
+	dout("queue_realm_cap_snaps %p %llx inodes\n", realm, realm->ino);
+
+	spin_lock(&realm->inodes_with_caps_lock);
+	list_for_each_entry(ci, &realm->inodes_with_caps,
+			    i_snap_realm_item) {
+		struct inode *inode = igrab(&ci->vfs_inode);
+		if (!inode)
+			continue;
+		spin_unlock(&realm->inodes_with_caps_lock);
+		if (lastinode)
+			iput(lastinode);
+		lastinode = inode;
+		ceph_queue_cap_snap(ci);
+		spin_lock(&realm->inodes_with_caps_lock);
+	}
+	spin_unlock(&realm->inodes_with_caps_lock);
+	if (lastinode)
+		iput(lastinode);
+
+	list_for_each_entry(child, &realm->children, child_item) {
+		dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n",
+		     realm, realm->ino, child, child->ino);
+		list_del_init(&child->dirty_item);
+		list_add(&child->dirty_item, &realm->dirty_item);
+	}
+
+	list_del_init(&realm->dirty_item);
+	dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino);
+}
+
+/*
+ * Parse and apply a snapblob "snap trace" from the MDS.  This specifies
+ * the snap realm parameters from a given realm and all of its ancestors,
+ * up to the root.
+ *
+ * Caller must hold snap_rwsem for write.
+ */
+int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
+			   void *p, void *e, bool deletion)
+{
+	struct ceph_mds_snap_realm *ri;    /* encoded */
+	__le64 *snaps;                     /* encoded */
+	__le64 *prior_parent_snaps;        /* encoded */
+	struct ceph_snap_realm *realm;
+	int invalidate = 0;
+	int err = -ENOMEM;
+	LIST_HEAD(dirty_realms);
+
+	dout("update_snap_trace deletion=%d\n", deletion);
+more:
+	ceph_decode_need(&p, e, sizeof(*ri), bad);
+	ri = p;
+	p += sizeof(*ri);
+	ceph_decode_need(&p, e, sizeof(u64)*(le32_to_cpu(ri->num_snaps) +
+			    le32_to_cpu(ri->num_prior_parent_snaps)), bad);
+	snaps = p;
+	p += sizeof(u64) * le32_to_cpu(ri->num_snaps);
+	prior_parent_snaps = p;
+	p += sizeof(u64) * le32_to_cpu(ri->num_prior_parent_snaps);
+
+	realm = ceph_lookup_snap_realm(mdsc, le64_to_cpu(ri->ino));
+	if (!realm) {
+		realm = ceph_create_snap_realm(mdsc, le64_to_cpu(ri->ino));
+		if (IS_ERR(realm)) {
+			err = PTR_ERR(realm);
+			goto fail;
+		}
+	}
+
+	/* ensure the parent is correct */
+	err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent));
+	if (err < 0)
+		goto fail;
+	invalidate += err;
+
+	if (le64_to_cpu(ri->seq) > realm->seq) {
+		dout("update_snap_trace updating %llx %p %lld -> %lld\n",
+		     realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
+		/* update realm parameters, snap lists */
+		realm->seq = le64_to_cpu(ri->seq);
+		realm->created = le64_to_cpu(ri->created);
+		realm->parent_since = le64_to_cpu(ri->parent_since);
+
+		realm->num_snaps = le32_to_cpu(ri->num_snaps);
+		err = dup_array(&realm->snaps, snaps, realm->num_snaps);
+		if (err < 0)
+			goto fail;
+
+		realm->num_prior_parent_snaps =
+			le32_to_cpu(ri->num_prior_parent_snaps);
+		err = dup_array(&realm->prior_parent_snaps, prior_parent_snaps,
+				realm->num_prior_parent_snaps);
+		if (err < 0)
+			goto fail;
+
+		/* queue realm for cap_snap creation */
+		list_add(&realm->dirty_item, &dirty_realms);
+
+		invalidate = 1;
+	} else if (!realm->cached_context) {
+		dout("update_snap_trace %llx %p seq %lld new\n",
+		     realm->ino, realm, realm->seq);
+		invalidate = 1;
+	} else {
+		dout("update_snap_trace %llx %p seq %lld unchanged\n",
+		     realm->ino, realm, realm->seq);
+	}
+
+	dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino,
+	     realm, invalidate, p, e);
+
+	if (p < e)
+		goto more;
+
+	/* invalidate when we reach the _end_ (root) of the trace */
+	if (invalidate)
+		rebuild_snap_realms(realm);
+
+	/*
+	 * queue cap snaps _after_ we've built the new snap contexts,
+	 * so that i_head_snapc can be set appropriately.
+	 */
+	while (!list_empty(&dirty_realms)) {
+		realm = list_first_entry(&dirty_realms, struct ceph_snap_realm,
+					 dirty_item);
+		queue_realm_cap_snaps(realm);
+	}
+
+	__cleanup_empty_realms(mdsc);
+	return 0;
+
+bad:
+	err = -EINVAL;
+fail:
+	pr_err("update_snap_trace error %d\n", err);
+	return err;
+}
+
+
+/*
+ * Send any cap_snaps that are queued for flush.  Try to carry
+ * s_mutex across multiple snap flushes to avoid locking overhead.
+ *
+ * Caller holds no locks.
+ */
+static void flush_snaps(struct ceph_mds_client *mdsc)
+{
+	struct ceph_inode_info *ci;
+	struct inode *inode;
+	struct ceph_mds_session *session = NULL;
+
+	dout("flush_snaps\n");
+	spin_lock(&mdsc->snap_flush_lock);
+	while (!list_empty(&mdsc->snap_flush_list)) {
+		ci = list_first_entry(&mdsc->snap_flush_list,
+				struct ceph_inode_info, i_snap_flush_item);
+		inode = &ci->vfs_inode;
+		ihold(inode);
+		spin_unlock(&mdsc->snap_flush_lock);
+		spin_lock(&ci->i_ceph_lock);
+		__ceph_flush_snaps(ci, &session, 0);
+		spin_unlock(&ci->i_ceph_lock);
+		iput(inode);
+		spin_lock(&mdsc->snap_flush_lock);
+	}
+	spin_unlock(&mdsc->snap_flush_lock);
+
+	if (session) {
+		mutex_unlock(&session->s_mutex);
+		ceph_put_mds_session(session);
+	}
+	dout("flush_snaps done\n");
+}
+
+
+/*
+ * Handle a snap notification from the MDS.
+ *
+ * This can take two basic forms: the simplest is just a snap creation
+ * or deletion notification on an existing realm.  This should update the
+ * realm and its children.
+ *
+ * The more difficult case is realm creation, due to snap creation at a
+ * new point in the file hierarchy, or due to a rename that moves a file or
+ * directory into another realm.
+ */
+void ceph_handle_snap(struct ceph_mds_client *mdsc,
+		      struct ceph_mds_session *session,
+		      struct ceph_msg *msg)
+{
+	struct super_block *sb = mdsc->fsc->sb;
+	int mds = session->s_mds;
+	u64 split;
+	int op;
+	int trace_len;
+	struct ceph_snap_realm *realm = NULL;
+	void *p = msg->front.iov_base;
+	void *e = p + msg->front.iov_len;
+	struct ceph_mds_snap_head *h;
+	int num_split_inos, num_split_realms;
+	__le64 *split_inos = NULL, *split_realms = NULL;
+	int i;
+	int locked_rwsem = 0;
+
+	/* decode */
+	if (msg->front.iov_len < sizeof(*h))
+		goto bad;
+	h = p;
+	op = le32_to_cpu(h->op);
+	split = le64_to_cpu(h->split);   /* non-zero if we are splitting an
+					  * existing realm */
+	num_split_inos = le32_to_cpu(h->num_split_inos);
+	num_split_realms = le32_to_cpu(h->num_split_realms);
+	trace_len = le32_to_cpu(h->trace_len);
+	p += sizeof(*h);
+
+	dout("handle_snap from mds%d op %s split %llx tracelen %d\n", mds,
+	     ceph_snap_op_name(op), split, trace_len);
+
+	mutex_lock(&session->s_mutex);
+	session->s_seq++;
+	mutex_unlock(&session->s_mutex);
+
+	down_write(&mdsc->snap_rwsem);
+	locked_rwsem = 1;
+
+	if (op == CEPH_SNAP_OP_SPLIT) {
+		struct ceph_mds_snap_realm *ri;
+
+		/*
+		 * A "split" breaks part of an existing realm off into
+		 * a new realm.  The MDS provides a list of inodes
+		 * (with caps) and child realms that belong to the new
+		 * child.
+		 */
+		split_inos = p;
+		p += sizeof(u64) * num_split_inos;
+		split_realms = p;
+		p += sizeof(u64) * num_split_realms;
+		ceph_decode_need(&p, e, sizeof(*ri), bad);
+		/* we will peek at realm info here, but will _not_
+		 * advance p, as the realm update will occur below in
+		 * ceph_update_snap_trace. */
+		ri = p;
+
+		realm = ceph_lookup_snap_realm(mdsc, split);
+		if (!realm) {
+			realm = ceph_create_snap_realm(mdsc, split);
+			if (IS_ERR(realm))
+				goto out;
+		}
+		ceph_get_snap_realm(mdsc, realm);
+
+		dout("splitting snap_realm %llx %p\n", realm->ino, realm);
+		for (i = 0; i < num_split_inos; i++) {
+			struct ceph_vino vino = {
+				.ino = le64_to_cpu(split_inos[i]),
+				.snap = CEPH_NOSNAP,
+			};
+			struct inode *inode = ceph_find_inode(sb, vino);
+			struct ceph_inode_info *ci;
+			struct ceph_snap_realm *oldrealm;
+
+			if (!inode)
+				continue;
+			ci = ceph_inode(inode);
+
+			spin_lock(&ci->i_ceph_lock);
+			if (!ci->i_snap_realm)
+				goto skip_inode;
+			/*
+			 * If this inode belongs to a realm that was
+			 * created after our new realm, we experienced
+			 * a race (due to another split notifications
+			 * arriving from a different MDS).  So skip
+			 * this inode.
+			 */
+			if (ci->i_snap_realm->created >
+			    le64_to_cpu(ri->created)) {
+				dout(" leaving %p in newer realm %llx %p\n",
+				     inode, ci->i_snap_realm->ino,
+				     ci->i_snap_realm);
+				goto skip_inode;
+			}
+			dout(" will move %p to split realm %llx %p\n",
+			     inode, realm->ino, realm);
+			/*
+			 * Move the inode to the new realm
+			 */
+			spin_lock(&realm->inodes_with_caps_lock);
+			list_del_init(&ci->i_snap_realm_item);
+			list_add(&ci->i_snap_realm_item,
+				 &realm->inodes_with_caps);
+			oldrealm = ci->i_snap_realm;
+			ci->i_snap_realm = realm;
+			spin_unlock(&realm->inodes_with_caps_lock);
+			spin_unlock(&ci->i_ceph_lock);
+
+			ceph_get_snap_realm(mdsc, realm);
+			ceph_put_snap_realm(mdsc, oldrealm);
+
+			iput(inode);
+			continue;
+
+skip_inode:
+			spin_unlock(&ci->i_ceph_lock);
+			iput(inode);
+		}
+
+		/* we may have taken some of the old realm's children. */
+		for (i = 0; i < num_split_realms; i++) {
+			struct ceph_snap_realm *child =
+				ceph_lookup_snap_realm(mdsc,
+					   le64_to_cpu(split_realms[i]));
+			if (!child)
+				continue;
+			adjust_snap_realm_parent(mdsc, child, realm->ino);
+		}
+	}
+
+	/*
+	 * update using the provided snap trace. if we are deleting a
+	 * snap, we can avoid queueing cap_snaps.
+	 */
+	ceph_update_snap_trace(mdsc, p, e,
+			       op == CEPH_SNAP_OP_DESTROY);
+
+	if (op == CEPH_SNAP_OP_SPLIT)
+		/* we took a reference when we created the realm, above */
+		ceph_put_snap_realm(mdsc, realm);
+
+	__cleanup_empty_realms(mdsc);
+
+	up_write(&mdsc->snap_rwsem);
+
+	flush_snaps(mdsc);
+	return;
+
+bad:
+	pr_err("corrupt snap message from mds%d\n", mds);
+	ceph_msg_dump(msg);
+out:
+	if (locked_rwsem)
+		up_write(&mdsc->snap_rwsem);
+	return;
+}
+
+
+
diff --git a/ceph/strings.c b/ceph/strings.c
new file mode 100644
index 0000000..51cc23e
--- /dev/null
+++ b/ceph/strings.c
@@ -0,0 +1,124 @@
+/*
+ * Ceph fs string constants
+ */
+#include <linux/module.h>
+#include <linux/ceph/types.h>
+
+
+const char *ceph_mds_state_name(int s)
+{
+	switch (s) {
+		/* down and out */
+	case CEPH_MDS_STATE_DNE:        return "down:dne";
+	case CEPH_MDS_STATE_STOPPED:    return "down:stopped";
+		/* up and out */
+	case CEPH_MDS_STATE_BOOT:       return "up:boot";
+	case CEPH_MDS_STATE_STANDBY:    return "up:standby";
+	case CEPH_MDS_STATE_STANDBY_REPLAY:    return "up:standby-replay";
+	case CEPH_MDS_STATE_REPLAYONCE: return "up:oneshot-replay";
+	case CEPH_MDS_STATE_CREATING:   return "up:creating";
+	case CEPH_MDS_STATE_STARTING:   return "up:starting";
+		/* up and in */
+	case CEPH_MDS_STATE_REPLAY:     return "up:replay";
+	case CEPH_MDS_STATE_RESOLVE:    return "up:resolve";
+	case CEPH_MDS_STATE_RECONNECT:  return "up:reconnect";
+	case CEPH_MDS_STATE_REJOIN:     return "up:rejoin";
+	case CEPH_MDS_STATE_CLIENTREPLAY: return "up:clientreplay";
+	case CEPH_MDS_STATE_ACTIVE:     return "up:active";
+	case CEPH_MDS_STATE_STOPPING:   return "up:stopping";
+	}
+	return "???";
+}
+
+const char *ceph_session_op_name(int op)
+{
+	switch (op) {
+	case CEPH_SESSION_REQUEST_OPEN: return "request_open";
+	case CEPH_SESSION_OPEN: return "open";
+	case CEPH_SESSION_REQUEST_CLOSE: return "request_close";
+	case CEPH_SESSION_CLOSE: return "close";
+	case CEPH_SESSION_REQUEST_RENEWCAPS: return "request_renewcaps";
+	case CEPH_SESSION_RENEWCAPS: return "renewcaps";
+	case CEPH_SESSION_STALE: return "stale";
+	case CEPH_SESSION_RECALL_STATE: return "recall_state";
+	case CEPH_SESSION_FLUSHMSG: return "flushmsg";
+	case CEPH_SESSION_FLUSHMSG_ACK: return "flushmsg_ack";
+	}
+	return "???";
+}
+
+const char *ceph_mds_op_name(int op)
+{
+	switch (op) {
+	case CEPH_MDS_OP_LOOKUP:  return "lookup";
+	case CEPH_MDS_OP_LOOKUPHASH:  return "lookuphash";
+	case CEPH_MDS_OP_LOOKUPPARENT:  return "lookupparent";
+	case CEPH_MDS_OP_LOOKUPINO:  return "lookupino";
+	case CEPH_MDS_OP_LOOKUPNAME:  return "lookupname";
+	case CEPH_MDS_OP_GETATTR:  return "getattr";
+	case CEPH_MDS_OP_SETXATTR: return "setxattr";
+	case CEPH_MDS_OP_SETATTR: return "setattr";
+	case CEPH_MDS_OP_RMXATTR: return "rmxattr";
+	case CEPH_MDS_OP_SETLAYOUT: return "setlayou";
+	case CEPH_MDS_OP_SETDIRLAYOUT: return "setdirlayout";
+	case CEPH_MDS_OP_READDIR: return "readdir";
+	case CEPH_MDS_OP_MKNOD: return "mknod";
+	case CEPH_MDS_OP_LINK: return "link";
+	case CEPH_MDS_OP_UNLINK: return "unlink";
+	case CEPH_MDS_OP_RENAME: return "rename";
+	case CEPH_MDS_OP_MKDIR: return "mkdir";
+	case CEPH_MDS_OP_RMDIR: return "rmdir";
+	case CEPH_MDS_OP_SYMLINK: return "symlink";
+	case CEPH_MDS_OP_CREATE: return "create";
+	case CEPH_MDS_OP_OPEN: return "open";
+	case CEPH_MDS_OP_LOOKUPSNAP: return "lookupsnap";
+	case CEPH_MDS_OP_LSSNAP: return "lssnap";
+	case CEPH_MDS_OP_MKSNAP: return "mksnap";
+	case CEPH_MDS_OP_RMSNAP: return "rmsnap";
+	case CEPH_MDS_OP_SETFILELOCK: return "setfilelock";
+	case CEPH_MDS_OP_GETFILELOCK: return "getfilelock";
+	}
+	return "???";
+}
+
+const char *ceph_cap_op_name(int op)
+{
+	switch (op) {
+	case CEPH_CAP_OP_GRANT: return "grant";
+	case CEPH_CAP_OP_REVOKE: return "revoke";
+	case CEPH_CAP_OP_TRUNC: return "trunc";
+	case CEPH_CAP_OP_EXPORT: return "export";
+	case CEPH_CAP_OP_IMPORT: return "import";
+	case CEPH_CAP_OP_UPDATE: return "update";
+	case CEPH_CAP_OP_DROP: return "drop";
+	case CEPH_CAP_OP_FLUSH: return "flush";
+	case CEPH_CAP_OP_FLUSH_ACK: return "flush_ack";
+	case CEPH_CAP_OP_FLUSHSNAP: return "flushsnap";
+	case CEPH_CAP_OP_FLUSHSNAP_ACK: return "flushsnap_ack";
+	case CEPH_CAP_OP_RELEASE: return "release";
+	case CEPH_CAP_OP_RENEW: return "renew";
+	}
+	return "???";
+}
+
+const char *ceph_lease_op_name(int o)
+{
+	switch (o) {
+	case CEPH_MDS_LEASE_REVOKE: return "revoke";
+	case CEPH_MDS_LEASE_RELEASE: return "release";
+	case CEPH_MDS_LEASE_RENEW: return "renew";
+	case CEPH_MDS_LEASE_REVOKE_ACK: return "revoke_ack";
+	}
+	return "???";
+}
+
+const char *ceph_snap_op_name(int o)
+{
+	switch (o) {
+	case CEPH_SNAP_OP_UPDATE: return "update";
+	case CEPH_SNAP_OP_CREATE: return "create";
+	case CEPH_SNAP_OP_DESTROY: return "destroy";
+	case CEPH_SNAP_OP_SPLIT: return "split";
+	}
+	return "???";
+}
diff --git a/ceph/super.c b/ceph/super.c
new file mode 100644
index 0000000..06150fd
--- /dev/null
+++ b/ceph/super.c
@@ -0,0 +1,1061 @@
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/backing-dev.h>
+#include <linux/ctype.h>
+#include <linux/fs.h>
+#include <linux/inet.h>
+#include <linux/in6.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/parser.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/statfs.h>
+#include <linux/string.h>
+
+#include "super.h"
+#include "mds_client.h"
+#include "cache.h"
+
+#include <linux/ceph/ceph_features.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/debugfs.h>
+
+/*
+ * Ceph superblock operations
+ *
+ * Handle the basics of mounting, unmounting.
+ */
+
+/*
+ * super ops
+ */
+static void ceph_put_super(struct super_block *s)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
+
+	dout("put_super\n");
+	ceph_mdsc_close_sessions(fsc->mdsc);
+
+	/*
+	 * ensure we release the bdi before put_anon_super releases
+	 * the device name.
+	 */
+	if (s->s_bdi == &fsc->backing_dev_info) {
+		bdi_unregister(&fsc->backing_dev_info);
+		s->s_bdi = NULL;
+	}
+
+	return;
+}
+
+static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode);
+	struct ceph_monmap *monmap = fsc->client->monc.monmap;
+	struct ceph_statfs st;
+	u64 fsid;
+	int err;
+
+	dout("statfs\n");
+	err = ceph_monc_do_statfs(&fsc->client->monc, &st);
+	if (err < 0)
+		return err;
+
+	/* fill in kstatfs */
+	buf->f_type = CEPH_SUPER_MAGIC;  /* ?? */
+
+	/*
+	 * express utilization in terms of large blocks to avoid
+	 * overflow on 32-bit machines.
+	 *
+	 * NOTE: for the time being, we make bsize == frsize to humor
+	 * not-yet-ancient versions of glibc that are broken.
+	 * Someday, we will probably want to report a real block
+	 * size...  whatever that may mean for a network file system!
+	 */
+	buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
+	buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
+	buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
+	buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
+	buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
+
+	buf->f_files = le64_to_cpu(st.num_objects);
+	buf->f_ffree = -1;
+	buf->f_namelen = NAME_MAX;
+
+	/* leave fsid little-endian, regardless of host endianness */
+	fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1);
+	buf->f_fsid.val[0] = fsid & 0xffffffff;
+	buf->f_fsid.val[1] = fsid >> 32;
+
+	return 0;
+}
+
+
+static int ceph_sync_fs(struct super_block *sb, int wait)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+
+	if (!wait) {
+		dout("sync_fs (non-blocking)\n");
+		ceph_flush_dirty_caps(fsc->mdsc);
+		dout("sync_fs (non-blocking) done\n");
+		return 0;
+	}
+
+	dout("sync_fs (blocking)\n");
+	ceph_osdc_sync(&fsc->client->osdc);
+	ceph_mdsc_sync(fsc->mdsc);
+	dout("sync_fs (blocking) done\n");
+	return 0;
+}
+
+/*
+ * mount options
+ */
+enum {
+	Opt_wsize,
+	Opt_rsize,
+	Opt_rasize,
+	Opt_caps_wanted_delay_min,
+	Opt_caps_wanted_delay_max,
+	Opt_cap_release_safety,
+	Opt_readdir_max_entries,
+	Opt_readdir_max_bytes,
+	Opt_congestion_kb,
+	Opt_last_int,
+	/* int args above */
+	Opt_snapdirname,
+	Opt_last_string,
+	/* string args above */
+	Opt_dirstat,
+	Opt_nodirstat,
+	Opt_rbytes,
+	Opt_norbytes,
+	Opt_asyncreaddir,
+	Opt_noasyncreaddir,
+	Opt_dcache,
+	Opt_nodcache,
+	Opt_ino32,
+	Opt_noino32,
+	Opt_fscache,
+	Opt_nofscache,
+#ifdef CONFIG_CEPH_FS_POSIX_ACL
+	Opt_acl,
+#endif
+	Opt_noacl
+};
+
+static match_table_t fsopt_tokens = {
+	{Opt_wsize, "wsize=%d"},
+	{Opt_rsize, "rsize=%d"},
+	{Opt_rasize, "rasize=%d"},
+	{Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
+	{Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
+	{Opt_cap_release_safety, "cap_release_safety=%d"},
+	{Opt_readdir_max_entries, "readdir_max_entries=%d"},
+	{Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
+	{Opt_congestion_kb, "write_congestion_kb=%d"},
+	/* int args above */
+	{Opt_snapdirname, "snapdirname=%s"},
+	/* string args above */
+	{Opt_dirstat, "dirstat"},
+	{Opt_nodirstat, "nodirstat"},
+	{Opt_rbytes, "rbytes"},
+	{Opt_norbytes, "norbytes"},
+	{Opt_asyncreaddir, "asyncreaddir"},
+	{Opt_noasyncreaddir, "noasyncreaddir"},
+	{Opt_dcache, "dcache"},
+	{Opt_nodcache, "nodcache"},
+	{Opt_ino32, "ino32"},
+	{Opt_noino32, "noino32"},
+	{Opt_fscache, "fsc"},
+	{Opt_nofscache, "nofsc"},
+#ifdef CONFIG_CEPH_FS_POSIX_ACL
+	{Opt_acl, "acl"},
+#endif
+	{Opt_noacl, "noacl"},
+	{-1, NULL}
+};
+
+static int parse_fsopt_token(char *c, void *private)
+{
+	struct ceph_mount_options *fsopt = private;
+	substring_t argstr[MAX_OPT_ARGS];
+	int token, intval, ret;
+
+	token = match_token((char *)c, fsopt_tokens, argstr);
+	if (token < 0)
+		return -EINVAL;
+
+	if (token < Opt_last_int) {
+		ret = match_int(&argstr[0], &intval);
+		if (ret < 0) {
+			pr_err("bad mount option arg (not int) "
+			       "at '%s'\n", c);
+			return ret;
+		}
+		dout("got int token %d val %d\n", token, intval);
+	} else if (token > Opt_last_int && token < Opt_last_string) {
+		dout("got string token %d val %s\n", token,
+		     argstr[0].from);
+	} else {
+		dout("got token %d\n", token);
+	}
+
+	switch (token) {
+	case Opt_snapdirname:
+		kfree(fsopt->snapdir_name);
+		fsopt->snapdir_name = kstrndup(argstr[0].from,
+					       argstr[0].to-argstr[0].from,
+					       GFP_KERNEL);
+		if (!fsopt->snapdir_name)
+			return -ENOMEM;
+		break;
+
+		/* misc */
+	case Opt_wsize:
+		fsopt->wsize = intval;
+		break;
+	case Opt_rsize:
+		fsopt->rsize = intval;
+		break;
+	case Opt_rasize:
+		fsopt->rasize = intval;
+		break;
+	case Opt_caps_wanted_delay_min:
+		fsopt->caps_wanted_delay_min = intval;
+		break;
+	case Opt_caps_wanted_delay_max:
+		fsopt->caps_wanted_delay_max = intval;
+		break;
+	case Opt_readdir_max_entries:
+		fsopt->max_readdir = intval;
+		break;
+	case Opt_readdir_max_bytes:
+		fsopt->max_readdir_bytes = intval;
+		break;
+	case Opt_congestion_kb:
+		fsopt->congestion_kb = intval;
+		break;
+	case Opt_dirstat:
+		fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT;
+		break;
+	case Opt_nodirstat:
+		fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
+		break;
+	case Opt_rbytes:
+		fsopt->flags |= CEPH_MOUNT_OPT_RBYTES;
+		break;
+	case Opt_norbytes:
+		fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
+		break;
+	case Opt_asyncreaddir:
+		fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR;
+		break;
+	case Opt_noasyncreaddir:
+		fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
+		break;
+	case Opt_dcache:
+		fsopt->flags |= CEPH_MOUNT_OPT_DCACHE;
+		break;
+	case Opt_nodcache:
+		fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE;
+		break;
+	case Opt_ino32:
+		fsopt->flags |= CEPH_MOUNT_OPT_INO32;
+		break;
+	case Opt_noino32:
+		fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
+		break;
+	case Opt_fscache:
+		fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
+		break;
+	case Opt_nofscache:
+		fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
+		break;
+#ifdef CONFIG_CEPH_FS_POSIX_ACL
+	case Opt_acl:
+		fsopt->sb_flags |= MS_POSIXACL;
+		break;
+#endif
+	case Opt_noacl:
+		fsopt->sb_flags &= ~MS_POSIXACL;
+		break;
+	default:
+		BUG_ON(token);
+	}
+	return 0;
+}
+
+static void destroy_mount_options(struct ceph_mount_options *args)
+{
+	dout("destroy_mount_options %p\n", args);
+	kfree(args->snapdir_name);
+	kfree(args);
+}
+
+static int strcmp_null(const char *s1, const char *s2)
+{
+	if (!s1 && !s2)
+		return 0;
+	if (s1 && !s2)
+		return -1;
+	if (!s1 && s2)
+		return 1;
+	return strcmp(s1, s2);
+}
+
+static int compare_mount_options(struct ceph_mount_options *new_fsopt,
+				 struct ceph_options *new_opt,
+				 struct ceph_fs_client *fsc)
+{
+	struct ceph_mount_options *fsopt1 = new_fsopt;
+	struct ceph_mount_options *fsopt2 = fsc->mount_options;
+	int ofs = offsetof(struct ceph_mount_options, snapdir_name);
+	int ret;
+
+	ret = memcmp(fsopt1, fsopt2, ofs);
+	if (ret)
+		return ret;
+
+	ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
+	if (ret)
+		return ret;
+
+	return ceph_compare_options(new_opt, fsc->client);
+}
+
+static int parse_mount_options(struct ceph_mount_options **pfsopt,
+			       struct ceph_options **popt,
+			       int flags, char *options,
+			       const char *dev_name,
+			       const char **path)
+{
+	struct ceph_mount_options *fsopt;
+	const char *dev_name_end;
+	int err;
+
+	if (!dev_name || !*dev_name)
+		return -EINVAL;
+
+	fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL);
+	if (!fsopt)
+		return -ENOMEM;
+
+	dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name);
+
+	fsopt->sb_flags = flags;
+	fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
+
+	fsopt->rsize = CEPH_RSIZE_DEFAULT;
+	fsopt->rasize = CEPH_RASIZE_DEFAULT;
+	fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
+	fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
+	fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
+	fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
+	fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
+	fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
+	fsopt->congestion_kb = default_congestion_kb();
+
+	/*
+	 * Distinguish the server list from the path in "dev_name".
+	 * Internally we do not include the leading '/' in the path.
+	 *
+	 * "dev_name" will look like:
+	 *     <server_spec>[,<server_spec>...]:[<path>]
+	 * where
+	 *     <server_spec> is <ip>[:<port>]
+	 *     <path> is optional, but if present must begin with '/'
+	 */
+	dev_name_end = strchr(dev_name, '/');
+	if (dev_name_end) {
+		/* skip over leading '/' for path */
+		*path = dev_name_end + 1;
+	} else {
+		/* path is empty */
+		dev_name_end = dev_name + strlen(dev_name);
+		*path = dev_name_end;
+	}
+	err = -EINVAL;
+	dev_name_end--;		/* back up to ':' separator */
+	if (dev_name_end < dev_name || *dev_name_end != ':') {
+		pr_err("device name is missing path (no : separator in %s)\n",
+				dev_name);
+		goto out;
+	}
+	dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
+	dout("server path '%s'\n", *path);
+
+	*popt = ceph_parse_options(options, dev_name, dev_name_end,
+				 parse_fsopt_token, (void *)fsopt);
+	if (IS_ERR(*popt)) {
+		err = PTR_ERR(*popt);
+		goto out;
+	}
+
+	/* success */
+	*pfsopt = fsopt;
+	return 0;
+
+out:
+	destroy_mount_options(fsopt);
+	return err;
+}
+
+/**
+ * ceph_show_options - Show mount options in /proc/mounts
+ * @m: seq_file to write to
+ * @root: root of that (sub)tree
+ */
+static int ceph_show_options(struct seq_file *m, struct dentry *root)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
+	struct ceph_mount_options *fsopt = fsc->mount_options;
+	struct ceph_options *opt = fsc->client->options;
+
+	if (opt->flags & CEPH_OPT_FSID)
+		seq_printf(m, ",fsid=%pU", &opt->fsid);
+	if (opt->flags & CEPH_OPT_NOSHARE)
+		seq_puts(m, ",noshare");
+	if (opt->flags & CEPH_OPT_NOCRC)
+		seq_puts(m, ",nocrc");
+
+	if (opt->name)
+		seq_printf(m, ",name=%s", opt->name);
+	if (opt->key)
+		seq_puts(m, ",secret=<hidden>");
+
+	if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
+		seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
+	if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
+		seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
+	if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
+		seq_printf(m, ",osdkeepalivetimeout=%d",
+			   opt->osd_keepalive_timeout);
+
+	if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
+		seq_puts(m, ",dirstat");
+	if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0)
+		seq_puts(m, ",norbytes");
+	if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
+		seq_puts(m, ",noasyncreaddir");
+	if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE)
+		seq_puts(m, ",dcache");
+	else
+		seq_puts(m, ",nodcache");
+	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
+		seq_puts(m, ",fsc");
+	else
+		seq_puts(m, ",nofsc");
+
+#ifdef CONFIG_CEPH_FS_POSIX_ACL
+	if (fsopt->sb_flags & MS_POSIXACL)
+		seq_puts(m, ",acl");
+	else
+		seq_puts(m, ",noacl");
+#endif
+
+	if (fsopt->wsize)
+		seq_printf(m, ",wsize=%d", fsopt->wsize);
+	if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
+		seq_printf(m, ",rsize=%d", fsopt->rsize);
+	if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
+		seq_printf(m, ",rasize=%d", fsopt->rasize);
+	if (fsopt->congestion_kb != default_congestion_kb())
+		seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
+	if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
+		seq_printf(m, ",caps_wanted_delay_min=%d",
+			 fsopt->caps_wanted_delay_min);
+	if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
+		seq_printf(m, ",caps_wanted_delay_max=%d",
+			   fsopt->caps_wanted_delay_max);
+	if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
+		seq_printf(m, ",cap_release_safety=%d",
+			   fsopt->cap_release_safety);
+	if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
+		seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir);
+	if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
+		seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
+	if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
+		seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
+	return 0;
+}
+
+/*
+ * handle any mon messages the standard library doesn't understand.
+ * return error if we don't either.
+ */
+static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
+{
+	struct ceph_fs_client *fsc = client->private;
+	int type = le16_to_cpu(msg->hdr.type);
+
+	switch (type) {
+	case CEPH_MSG_MDS_MAP:
+		ceph_mdsc_handle_map(fsc->mdsc, msg);
+		return 0;
+
+	default:
+		return -1;
+	}
+}
+
+/*
+ * create a new fs client
+ */
+static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
+					struct ceph_options *opt)
+{
+	struct ceph_fs_client *fsc;
+	const u64 supported_features =
+		CEPH_FEATURE_FLOCK |
+		CEPH_FEATURE_DIRLAYOUTHASH;
+	const u64 required_features = 0;
+	int page_count;
+	size_t size;
+	int err = -ENOMEM;
+
+	fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
+	if (!fsc)
+		return ERR_PTR(-ENOMEM);
+
+	fsc->client = ceph_create_client(opt, fsc, supported_features,
+					 required_features);
+	if (IS_ERR(fsc->client)) {
+		err = PTR_ERR(fsc->client);
+		goto fail;
+	}
+	fsc->client->extra_mon_dispatch = extra_mon_dispatch;
+	fsc->client->monc.want_mdsmap = 1;
+
+	fsc->mount_options = fsopt;
+
+	fsc->sb = NULL;
+	fsc->mount_state = CEPH_MOUNT_MOUNTING;
+
+	atomic_long_set(&fsc->writeback_count, 0);
+
+	err = bdi_init(&fsc->backing_dev_info);
+	if (err < 0)
+		goto fail_client;
+
+	err = -ENOMEM;
+	/*
+	 * The number of concurrent works can be high but they don't need
+	 * to be processed in parallel, limit concurrency.
+	 */
+	fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1);
+	if (fsc->wb_wq == NULL)
+		goto fail_bdi;
+	fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1);
+	if (fsc->pg_inv_wq == NULL)
+		goto fail_wb_wq;
+	fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1);
+	if (fsc->trunc_wq == NULL)
+		goto fail_pg_inv_wq;
+
+	/* set up mempools */
+	err = -ENOMEM;
+	page_count = fsc->mount_options->wsize >> PAGE_CACHE_SHIFT;
+	size = sizeof (struct page *) * (page_count ? page_count : 1);
+	fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size);
+	if (!fsc->wb_pagevec_pool)
+		goto fail_trunc_wq;
+
+	/* setup fscache */
+	if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) &&
+	    (ceph_fscache_register_fs(fsc) != 0))
+		goto fail_fscache;
+
+	/* caps */
+	fsc->min_caps = fsopt->max_readdir;
+
+	return fsc;
+
+fail_fscache:
+	ceph_fscache_unregister_fs(fsc);
+fail_trunc_wq:
+	destroy_workqueue(fsc->trunc_wq);
+fail_pg_inv_wq:
+	destroy_workqueue(fsc->pg_inv_wq);
+fail_wb_wq:
+	destroy_workqueue(fsc->wb_wq);
+fail_bdi:
+	bdi_destroy(&fsc->backing_dev_info);
+fail_client:
+	ceph_destroy_client(fsc->client);
+fail:
+	kfree(fsc);
+	return ERR_PTR(err);
+}
+
+static void destroy_fs_client(struct ceph_fs_client *fsc)
+{
+	dout("destroy_fs_client %p\n", fsc);
+
+	ceph_fscache_unregister_fs(fsc);
+
+	destroy_workqueue(fsc->wb_wq);
+	destroy_workqueue(fsc->pg_inv_wq);
+	destroy_workqueue(fsc->trunc_wq);
+
+	bdi_destroy(&fsc->backing_dev_info);
+
+	mempool_destroy(fsc->wb_pagevec_pool);
+
+	destroy_mount_options(fsc->mount_options);
+
+	ceph_fs_debugfs_cleanup(fsc);
+
+	ceph_destroy_client(fsc->client);
+
+	kfree(fsc);
+	dout("destroy_fs_client %p done\n", fsc);
+}
+
+/*
+ * caches
+ */
+struct kmem_cache *ceph_inode_cachep;
+struct kmem_cache *ceph_cap_cachep;
+struct kmem_cache *ceph_dentry_cachep;
+struct kmem_cache *ceph_file_cachep;
+
+static void ceph_inode_init_once(void *foo)
+{
+	struct ceph_inode_info *ci = foo;
+	inode_init_once(&ci->vfs_inode);
+}
+
+static int __init init_caches(void)
+{
+	int error = -ENOMEM;
+
+	ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
+				      sizeof(struct ceph_inode_info),
+				      __alignof__(struct ceph_inode_info),
+				      (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
+				      ceph_inode_init_once);
+	if (ceph_inode_cachep == NULL)
+		return -ENOMEM;
+
+	ceph_cap_cachep = KMEM_CACHE(ceph_cap,
+				     SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+	if (ceph_cap_cachep == NULL)
+		goto bad_cap;
+
+	ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
+					SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+	if (ceph_dentry_cachep == NULL)
+		goto bad_dentry;
+
+	ceph_file_cachep = KMEM_CACHE(ceph_file_info,
+				      SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+	if (ceph_file_cachep == NULL)
+		goto bad_file;
+
+	if ((error = ceph_fscache_register()))
+		goto bad_file;
+
+	return 0;
+bad_file:
+	kmem_cache_destroy(ceph_dentry_cachep);
+bad_dentry:
+	kmem_cache_destroy(ceph_cap_cachep);
+bad_cap:
+	kmem_cache_destroy(ceph_inode_cachep);
+	return error;
+}
+
+static void destroy_caches(void)
+{
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
+
+	kmem_cache_destroy(ceph_inode_cachep);
+	kmem_cache_destroy(ceph_cap_cachep);
+	kmem_cache_destroy(ceph_dentry_cachep);
+	kmem_cache_destroy(ceph_file_cachep);
+
+	ceph_fscache_unregister();
+}
+
+
+/*
+ * ceph_umount_begin - initiate forced umount.  Tear down down the
+ * mount, skipping steps that may hang while waiting for server(s).
+ */
+static void ceph_umount_begin(struct super_block *sb)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+
+	dout("ceph_umount_begin - starting forced umount\n");
+	if (!fsc)
+		return;
+	fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
+	return;
+}
+
+static const struct super_operations ceph_super_ops = {
+	.alloc_inode	= ceph_alloc_inode,
+	.destroy_inode	= ceph_destroy_inode,
+	.write_inode    = ceph_write_inode,
+	.drop_inode	= ceph_drop_inode,
+	.sync_fs        = ceph_sync_fs,
+	.put_super	= ceph_put_super,
+	.show_options   = ceph_show_options,
+	.statfs		= ceph_statfs,
+	.umount_begin   = ceph_umount_begin,
+};
+
+/*
+ * Bootstrap mount by opening the root directory.  Note the mount
+ * @started time from caller, and time out if this takes too long.
+ */
+static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
+				       const char *path,
+				       unsigned long started)
+{
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_request *req = NULL;
+	int err;
+	struct dentry *root;
+
+	/* open dir */
+	dout("open_root_inode opening '%s'\n", path);
+	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
+	if (IS_ERR(req))
+		return ERR_CAST(req);
+	req->r_path1 = kstrdup(path, GFP_NOFS);
+	req->r_ino1.ino = CEPH_INO_ROOT;
+	req->r_ino1.snap = CEPH_NOSNAP;
+	req->r_started = started;
+	req->r_timeout = fsc->client->options->mount_timeout * HZ;
+	req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
+	req->r_num_caps = 2;
+	err = ceph_mdsc_do_request(mdsc, NULL, req);
+	if (err == 0) {
+		struct inode *inode = req->r_target_inode;
+		req->r_target_inode = NULL;
+		dout("open_root_inode success\n");
+		if (ceph_ino(inode) == CEPH_INO_ROOT &&
+		    fsc->sb->s_root == NULL) {
+			root = d_make_root(inode);
+			if (!root) {
+				root = ERR_PTR(-ENOMEM);
+				goto out;
+			}
+		} else {
+			root = d_obtain_alias(inode);
+		}
+		ceph_init_dentry(root);
+		dout("open_root_inode success, root dentry is %p\n", root);
+	} else {
+		root = ERR_PTR(err);
+	}
+out:
+	ceph_mdsc_put_request(req);
+	return root;
+}
+
+
+
+
+/*
+ * mount: join the ceph cluster, and open root directory.
+ */
+static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
+		      const char *path)
+{
+	int err;
+	unsigned long started = jiffies;  /* note the start time */
+	struct dentry *root;
+	int first = 0;   /* first vfsmount for this super_block */
+
+	dout("mount start\n");
+	mutex_lock(&fsc->client->mount_mutex);
+
+	err = __ceph_open_session(fsc->client, started);
+	if (err < 0)
+		goto out;
+
+	dout("mount opening root\n");
+	root = open_root_dentry(fsc, "", started);
+	if (IS_ERR(root)) {
+		err = PTR_ERR(root);
+		goto out;
+	}
+	if (fsc->sb->s_root) {
+		dput(root);
+	} else {
+		fsc->sb->s_root = root;
+		first = 1;
+
+		err = ceph_fs_debugfs_init(fsc);
+		if (err < 0)
+			goto fail;
+	}
+
+	if (path[0] == 0) {
+		dget(root);
+	} else {
+		dout("mount opening base mountpoint\n");
+		root = open_root_dentry(fsc, path, started);
+		if (IS_ERR(root)) {
+			err = PTR_ERR(root);
+			goto fail;
+		}
+	}
+
+	fsc->mount_state = CEPH_MOUNT_MOUNTED;
+	dout("mount success\n");
+	mutex_unlock(&fsc->client->mount_mutex);
+	return root;
+
+out:
+	mutex_unlock(&fsc->client->mount_mutex);
+	return ERR_PTR(err);
+
+fail:
+	if (first) {
+		dput(fsc->sb->s_root);
+		fsc->sb->s_root = NULL;
+	}
+	goto out;
+}
+
+static int ceph_set_super(struct super_block *s, void *data)
+{
+	struct ceph_fs_client *fsc = data;
+	int ret;
+
+	dout("set_super %p data %p\n", s, data);
+
+	s->s_flags = fsc->mount_options->sb_flags;
+	s->s_maxbytes = 1ULL << 40;  /* temp value until we get mdsmap */
+
+	s->s_xattr = ceph_xattr_handlers;
+	s->s_fs_info = fsc;
+	fsc->sb = s;
+
+	s->s_op = &ceph_super_ops;
+	s->s_export_op = &ceph_export_ops;
+
+	s->s_time_gran = 1000;  /* 1000 ns == 1 us */
+
+	ret = set_anon_super(s, NULL);  /* what is that second arg for? */
+	if (ret != 0)
+		goto fail;
+
+	return ret;
+
+fail:
+	s->s_fs_info = NULL;
+	fsc->sb = NULL;
+	return ret;
+}
+
+/*
+ * share superblock if same fs AND options
+ */
+static int ceph_compare_super(struct super_block *sb, void *data)
+{
+	struct ceph_fs_client *new = data;
+	struct ceph_mount_options *fsopt = new->mount_options;
+	struct ceph_options *opt = new->client->options;
+	struct ceph_fs_client *other = ceph_sb_to_client(sb);
+
+	dout("ceph_compare_super %p\n", sb);
+
+	if (compare_mount_options(fsopt, opt, other)) {
+		dout("monitor(s)/mount options don't match\n");
+		return 0;
+	}
+	if ((opt->flags & CEPH_OPT_FSID) &&
+	    ceph_fsid_compare(&opt->fsid, &other->client->fsid)) {
+		dout("fsid doesn't match\n");
+		return 0;
+	}
+	if (fsopt->sb_flags != other->mount_options->sb_flags) {
+		dout("flags differ\n");
+		return 0;
+	}
+	return 1;
+}
+
+/*
+ * construct our own bdi so we can control readahead, etc.
+ */
+static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
+
+static int ceph_register_bdi(struct super_block *sb,
+			     struct ceph_fs_client *fsc)
+{
+	int err;
+
+	/* set ra_pages based on rasize mount option? */
+	if (fsc->mount_options->rasize >= PAGE_CACHE_SIZE)
+		fsc->backing_dev_info.ra_pages =
+			(fsc->mount_options->rasize + PAGE_CACHE_SIZE - 1)
+			>> PAGE_SHIFT;
+	else
+		fsc->backing_dev_info.ra_pages =
+			default_backing_dev_info.ra_pages;
+
+	err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld",
+			   atomic_long_inc_return(&bdi_seq));
+	if (!err)
+		sb->s_bdi = &fsc->backing_dev_info;
+	return err;
+}
+
+static struct dentry *ceph_mount(struct file_system_type *fs_type,
+		       int flags, const char *dev_name, void *data)
+{
+	struct super_block *sb;
+	struct ceph_fs_client *fsc;
+	struct dentry *res;
+	int err;
+	int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
+	const char *path = NULL;
+	struct ceph_mount_options *fsopt = NULL;
+	struct ceph_options *opt = NULL;
+
+	dout("ceph_mount\n");
+
+#ifdef CONFIG_CEPH_FS_POSIX_ACL
+	flags |= MS_POSIXACL;
+#endif
+	err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
+	if (err < 0) {
+		res = ERR_PTR(err);
+		goto out_final;
+	}
+
+	/* create client (which we may/may not use) */
+	fsc = create_fs_client(fsopt, opt);
+	if (IS_ERR(fsc)) {
+		res = ERR_CAST(fsc);
+		destroy_mount_options(fsopt);
+		ceph_destroy_options(opt);
+		goto out_final;
+	}
+
+	err = ceph_mdsc_init(fsc);
+	if (err < 0) {
+		res = ERR_PTR(err);
+		goto out;
+	}
+
+	if (ceph_test_opt(fsc->client, NOSHARE))
+		compare_super = NULL;
+	sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc);
+	if (IS_ERR(sb)) {
+		res = ERR_CAST(sb);
+		goto out;
+	}
+
+	if (ceph_sb_to_client(sb) != fsc) {
+		ceph_mdsc_destroy(fsc);
+		destroy_fs_client(fsc);
+		fsc = ceph_sb_to_client(sb);
+		dout("get_sb got existing client %p\n", fsc);
+	} else {
+		dout("get_sb using new client %p\n", fsc);
+		err = ceph_register_bdi(sb, fsc);
+		if (err < 0) {
+			res = ERR_PTR(err);
+			goto out_splat;
+		}
+	}
+
+	res = ceph_real_mount(fsc, path);
+	if (IS_ERR(res))
+		goto out_splat;
+	dout("root %p inode %p ino %llx.%llx\n", res,
+	     res->d_inode, ceph_vinop(res->d_inode));
+	return res;
+
+out_splat:
+	ceph_mdsc_close_sessions(fsc->mdsc);
+	deactivate_locked_super(sb);
+	goto out_final;
+
+out:
+	ceph_mdsc_destroy(fsc);
+	destroy_fs_client(fsc);
+out_final:
+	dout("ceph_mount fail %ld\n", PTR_ERR(res));
+	return res;
+}
+
+static void ceph_kill_sb(struct super_block *s)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
+	dout("kill_sb %p\n", s);
+	ceph_mdsc_pre_umount(fsc->mdsc);
+	kill_anon_super(s);    /* will call put_super after sb is r/o */
+	ceph_mdsc_destroy(fsc);
+	destroy_fs_client(fsc);
+}
+
+static struct file_system_type ceph_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "ceph",
+	.mount		= ceph_mount,
+	.kill_sb	= ceph_kill_sb,
+	.fs_flags	= FS_RENAME_DOES_D_MOVE,
+};
+MODULE_ALIAS_FS("ceph");
+
+#define _STRINGIFY(x) #x
+#define STRINGIFY(x) _STRINGIFY(x)
+
+static int __init init_ceph(void)
+{
+	int ret = init_caches();
+	if (ret)
+		goto out;
+
+	ceph_flock_init();
+	ceph_xattr_init();
+	ret = register_filesystem(&ceph_fs_type);
+	if (ret)
+		goto out_icache;
+
+	pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
+
+	return 0;
+
+out_icache:
+	ceph_xattr_exit();
+	destroy_caches();
+out:
+	return ret;
+}
+
+static void __exit exit_ceph(void)
+{
+	dout("exit_ceph\n");
+	unregister_filesystem(&ceph_fs_type);
+	ceph_xattr_exit();
+	destroy_caches();
+}
+
+module_init(init_ceph);
+module_exit(exit_ceph);
+
+MODULE_AUTHOR("Sage Weil <sage at newdream.net>");
+MODULE_AUTHOR("Yehuda Sadeh <yehuda at hq.newdream.net>");
+MODULE_AUTHOR("Patience Warnick <patience at newdream.net>");
+MODULE_DESCRIPTION("Ceph filesystem for Linux");
+MODULE_LICENSE("GPL");
diff --git a/ceph/super.h b/ceph/super.h
new file mode 100644
index 0000000..ead05cc
--- /dev/null
+++ b/ceph/super.h
@@ -0,0 +1,890 @@
+#ifndef _FS_CEPH_SUPER_H
+#define _FS_CEPH_SUPER_H
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <asm/unaligned.h>
+#include <linux/backing-dev.h>
+#include <linux/completion.h>
+#include <linux/exportfs.h>
+#include <linux/fs.h>
+#include <linux/mempool.h>
+#include <linux/pagemap.h>
+#include <linux/wait.h>
+#include <linux/writeback.h>
+#include <linux/slab.h>
+#include <linux/posix_acl.h>
+
+#include <linux/ceph/libceph.h>
+
+#ifdef CONFIG_CEPH_FSCACHE
+#include <linux/fscache.h>
+#endif
+
+/* f_type in struct statfs */
+#define CEPH_SUPER_MAGIC 0x00c36400
+
+/* large granularity for statfs utilization stats to facilitate
+ * large volume sizes on 32-bit machines. */
+#define CEPH_BLOCK_SHIFT   22  /* 4 MB */
+#define CEPH_BLOCK         (1 << CEPH_BLOCK_SHIFT)
+
+#define CEPH_MOUNT_OPT_DIRSTAT         (1<<4) /* `cat dirname` for stats */
+#define CEPH_MOUNT_OPT_RBYTES          (1<<5) /* dir st_bytes = rbytes */
+#define CEPH_MOUNT_OPT_NOASYNCREADDIR  (1<<7) /* no dcache readdir */
+#define CEPH_MOUNT_OPT_INO32           (1<<8) /* 32 bit inos */
+#define CEPH_MOUNT_OPT_DCACHE          (1<<9) /* use dcache for readdir etc */
+#define CEPH_MOUNT_OPT_FSCACHE         (1<<10) /* use fscache */
+
+#define CEPH_MOUNT_OPT_DEFAULT    (CEPH_MOUNT_OPT_RBYTES)
+
+#define ceph_set_mount_opt(fsc, opt) \
+	(fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
+#define ceph_test_mount_opt(fsc, opt) \
+	(!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
+
+#define CEPH_RSIZE_DEFAULT             0           /* max read size */
+#define CEPH_RASIZE_DEFAULT            (8192*1024) /* readahead */
+#define CEPH_MAX_READDIR_DEFAULT        1024
+#define CEPH_MAX_READDIR_BYTES_DEFAULT  (512*1024)
+#define CEPH_SNAPDIRNAME_DEFAULT        ".snap"
+
+struct ceph_mount_options {
+	int flags;
+	int sb_flags;
+
+	int wsize;            /* max write size */
+	int rsize;            /* max read size */
+	int rasize;           /* max readahead */
+	int congestion_kb;    /* max writeback in flight */
+	int caps_wanted_delay_min, caps_wanted_delay_max;
+	int cap_release_safety;
+	int max_readdir;       /* max readdir result (entires) */
+	int max_readdir_bytes; /* max readdir result (bytes) */
+
+	/*
+	 * everything above this point can be memcmp'd; everything below
+	 * is handled in compare_mount_options()
+	 */
+
+	char *snapdir_name;   /* default ".snap" */
+};
+
+struct ceph_fs_client {
+	struct super_block *sb;
+
+	struct ceph_mount_options *mount_options;
+	struct ceph_client *client;
+
+	unsigned long mount_state;
+	int min_caps;                  /* min caps i added */
+
+	struct ceph_mds_client *mdsc;
+
+	/* writeback */
+	mempool_t *wb_pagevec_pool;
+	struct workqueue_struct *wb_wq;
+	struct workqueue_struct *pg_inv_wq;
+	struct workqueue_struct *trunc_wq;
+	atomic_long_t writeback_count;
+
+	struct backing_dev_info backing_dev_info;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs_dentry_lru, *debugfs_caps;
+	struct dentry *debugfs_congestion_kb;
+	struct dentry *debugfs_bdi;
+	struct dentry *debugfs_mdsc, *debugfs_mdsmap;
+#endif
+
+#ifdef CONFIG_CEPH_FSCACHE
+	struct fscache_cookie *fscache;
+	struct workqueue_struct *revalidate_wq;
+#endif
+};
+
+
+/*
+ * File i/o capability.  This tracks shared state with the metadata
+ * server that allows us to cache or writeback attributes or to read
+ * and write data.  For any given inode, we should have one or more
+ * capabilities, one issued by each metadata server, and our
+ * cumulative access is the OR of all issued capabilities.
+ *
+ * Each cap is referenced by the inode's i_caps rbtree and by per-mds
+ * session capability lists.
+ */
+struct ceph_cap {
+	struct ceph_inode_info *ci;
+	struct rb_node ci_node;          /* per-ci cap tree */
+	struct ceph_mds_session *session;
+	struct list_head session_caps;   /* per-session caplist */
+	int mds;
+	u64 cap_id;       /* unique cap id (mds provided) */
+	int issued;       /* latest, from the mds */
+	int implemented;  /* implemented superset of issued (for revocation) */
+	int mds_wanted;
+	u32 seq, issue_seq, mseq;
+	u32 cap_gen;      /* active/stale cycle */
+	unsigned long last_used;
+	struct list_head caps_item;
+};
+
+#define CHECK_CAPS_NODELAY    1  /* do not delay any further */
+#define CHECK_CAPS_AUTHONLY   2  /* only check auth cap */
+#define CHECK_CAPS_FLUSH      4  /* flush any dirty caps */
+
+/*
+ * Snapped cap state that is pending flush to mds.  When a snapshot occurs,
+ * we first complete any in-process sync writes and writeback any dirty
+ * data before flushing the snapped state (tracked here) back to the MDS.
+ */
+struct ceph_cap_snap {
+	atomic_t nref;
+	struct ceph_inode_info *ci;
+	struct list_head ci_item, flushing_item;
+
+	u64 follows, flush_tid;
+	int issued, dirty;
+	struct ceph_snap_context *context;
+
+	umode_t mode;
+	kuid_t uid;
+	kgid_t gid;
+
+	struct ceph_buffer *xattr_blob;
+	u64 xattr_version;
+
+	u64 size;
+	struct timespec mtime, atime, ctime;
+	u64 time_warp_seq;
+	int writing;   /* a sync write is still in progress */
+	int dirty_pages;     /* dirty pages awaiting writeback */
+};
+
+static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
+{
+	if (atomic_dec_and_test(&capsnap->nref)) {
+		if (capsnap->xattr_blob)
+			ceph_buffer_put(capsnap->xattr_blob);
+		kfree(capsnap);
+	}
+}
+
+/*
+ * The frag tree describes how a directory is fragmented, potentially across
+ * multiple metadata servers.  It is also used to indicate points where
+ * metadata authority is delegated, and whether/where metadata is replicated.
+ *
+ * A _leaf_ frag will be present in the i_fragtree IFF there is
+ * delegation info.  That is, if mds >= 0 || ndist > 0.
+ */
+#define CEPH_MAX_DIRFRAG_REP 4
+
+struct ceph_inode_frag {
+	struct rb_node node;
+
+	/* fragtree state */
+	u32 frag;
+	int split_by;         /* i.e. 2^(split_by) children */
+
+	/* delegation and replication info */
+	int mds;              /* -1 if same authority as parent */
+	int ndist;            /* >0 if replicated */
+	int dist[CEPH_MAX_DIRFRAG_REP];
+};
+
+/*
+ * We cache inode xattrs as an encoded blob until they are first used,
+ * at which point we parse them into an rbtree.
+ */
+struct ceph_inode_xattr {
+	struct rb_node node;
+
+	const char *name;
+	int name_len;
+	const char *val;
+	int val_len;
+	int dirty;
+
+	int should_free_name;
+	int should_free_val;
+};
+
+/*
+ * Ceph dentry state
+ */
+struct ceph_dentry_info {
+	struct ceph_mds_session *lease_session;
+	u32 lease_gen, lease_shared_gen;
+	u32 lease_seq;
+	unsigned long lease_renew_after, lease_renew_from;
+	struct list_head lru;
+	struct dentry *dentry;
+	u64 time;
+	u64 offset;
+};
+
+struct ceph_inode_xattrs_info {
+	/*
+	 * (still encoded) xattr blob. we avoid the overhead of parsing
+	 * this until someone actually calls getxattr, etc.
+	 *
+	 * blob->vec.iov_len == 4 implies there are no xattrs; blob ==
+	 * NULL means we don't know.
+	*/
+	struct ceph_buffer *blob, *prealloc_blob;
+
+	struct rb_root index;
+	bool dirty;
+	int count;
+	int names_size;
+	int vals_size;
+	u64 version, index_version;
+};
+
+/*
+ * Ceph inode.
+ */
+struct ceph_inode_info {
+	struct ceph_vino i_vino;   /* ceph ino + snap */
+
+	spinlock_t i_ceph_lock;
+
+	u64 i_version;
+	u32 i_time_warp_seq;
+
+	unsigned i_ceph_flags;
+	atomic_t i_release_count;
+	atomic_t i_complete_count;
+
+	struct ceph_dir_layout i_dir_layout;
+	struct ceph_file_layout i_layout;
+	char *i_symlink;
+
+	/* for dirs */
+	struct timespec i_rctime;
+	u64 i_rbytes, i_rfiles, i_rsubdirs;
+	u64 i_files, i_subdirs;
+
+	struct rb_root i_fragtree;
+	struct mutex i_fragtree_mutex;
+
+	struct ceph_inode_xattrs_info i_xattrs;
+
+	/* capabilities.  protected _both_ by i_ceph_lock and cap->session's
+	 * s_mutex. */
+	struct rb_root i_caps;           /* cap list */
+	struct ceph_cap *i_auth_cap;     /* authoritative cap, if any */
+	unsigned i_dirty_caps, i_flushing_caps;     /* mask of dirtied fields */
+	struct list_head i_dirty_item, i_flushing_item;
+	u64 i_cap_flush_seq;
+	/* we need to track cap writeback on a per-cap-bit basis, to allow
+	 * overlapping, pipelined cap flushes to the mds.  we can probably
+	 * reduce the tid to 8 bits if we're concerned about inode size. */
+	u16 i_cap_flush_last_tid, i_cap_flush_tid[CEPH_CAP_BITS];
+	wait_queue_head_t i_cap_wq;      /* threads waiting on a capability */
+	unsigned long i_hold_caps_min; /* jiffies */
+	unsigned long i_hold_caps_max; /* jiffies */
+	struct list_head i_cap_delay_list;  /* for delayed cap release to mds */
+	struct ceph_cap_reservation i_cap_migration_resv;
+	struct list_head i_cap_snaps;   /* snapped state pending flush to mds */
+	struct ceph_snap_context *i_head_snapc;  /* set if wr_buffer_head > 0 or
+						    dirty|flushing caps */
+	unsigned i_snap_caps;           /* cap bits for snapped files */
+	unsigned i_cap_exporting_issued;
+
+	int i_nr_by_mode[CEPH_FILE_MODE_NUM];  /* open file counts */
+
+	struct mutex i_truncate_mutex;
+	u32 i_truncate_seq;        /* last truncate to smaller size */
+	u64 i_truncate_size;       /*  and the size we last truncated down to */
+	int i_truncate_pending;    /*  still need to call vmtruncate */
+
+	u64 i_max_size;            /* max file size authorized by mds */
+	u64 i_reported_size; /* (max_)size reported to or requested of mds */
+	u64 i_wanted_max_size;     /* offset we'd like to write too */
+	u64 i_requested_max_size;  /* max_size we've requested */
+
+	/* held references to caps */
+	int i_pin_ref;
+	int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
+	int i_wrbuffer_ref, i_wrbuffer_ref_head;
+	u32 i_shared_gen;       /* increment each time we get FILE_SHARED */
+	u32 i_rdcache_gen;      /* incremented each time we get FILE_CACHE. */
+	u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
+
+	struct list_head i_unsafe_writes; /* uncommitted sync writes */
+	struct list_head i_unsafe_dirops; /* uncommitted mds dir ops */
+	spinlock_t i_unsafe_lock;
+
+	struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */
+	int i_snap_realm_counter; /* snap realm (if caps) */
+	struct list_head i_snap_realm_item;
+	struct list_head i_snap_flush_item;
+
+	struct work_struct i_wb_work;  /* writeback work */
+	struct work_struct i_pg_inv_work;  /* page invalidation work */
+
+	struct work_struct i_vmtruncate_work;
+
+#ifdef CONFIG_CEPH_FSCACHE
+	struct fscache_cookie *fscache;
+	u32 i_fscache_gen; /* sequence, for delayed fscache validate */
+	struct work_struct i_revalidate_work;
+#endif
+	struct inode vfs_inode; /* at end */
+};
+
+static inline struct ceph_inode_info *ceph_inode(struct inode *inode)
+{
+	return container_of(inode, struct ceph_inode_info, vfs_inode);
+}
+
+static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode)
+{
+	return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
+}
+
+static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb)
+{
+	return (struct ceph_fs_client *)sb->s_fs_info;
+}
+
+static inline struct ceph_vino ceph_vino(struct inode *inode)
+{
+	return ceph_inode(inode)->i_vino;
+}
+
+/*
+ * ino_t is <64 bits on many architectures, blech.
+ *
+ *               i_ino (kernel inode)   st_ino (userspace)
+ * i386          32                     32
+ * x86_64+ino32  64                     32
+ * x86_64        64                     64
+ */
+static inline u32 ceph_ino_to_ino32(__u64 vino)
+{
+	u32 ino = vino & 0xffffffff;
+	ino ^= vino >> 32;
+	if (!ino)
+		ino = 2;
+	return ino;
+}
+
+/*
+ * kernel i_ino value
+ */
+static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
+{
+#if BITS_PER_LONG == 32
+	return ceph_ino_to_ino32(vino.ino);
+#else
+	return (ino_t)vino.ino;
+#endif
+}
+
+/*
+ * user-visible ino (stat, filldir)
+ */
+#if BITS_PER_LONG == 32
+static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
+{
+	return ino;
+}
+#else
+static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
+{
+	if (ceph_test_mount_opt(ceph_sb_to_client(sb), INO32))
+		ino = ceph_ino_to_ino32(ino);
+	return ino;
+}
+#endif
+
+
+/* for printf-style formatting */
+#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
+
+static inline u64 ceph_ino(struct inode *inode)
+{
+	return ceph_inode(inode)->i_vino.ino;
+}
+static inline u64 ceph_snap(struct inode *inode)
+{
+	return ceph_inode(inode)->i_vino.snap;
+}
+
+static inline int ceph_ino_compare(struct inode *inode, void *data)
+{
+	struct ceph_vino *pvino = (struct ceph_vino *)data;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	return ci->i_vino.ino == pvino->ino &&
+		ci->i_vino.snap == pvino->snap;
+}
+
+static inline struct inode *ceph_find_inode(struct super_block *sb,
+					    struct ceph_vino vino)
+{
+	ino_t t = ceph_vino_to_ino(vino);
+	return ilookup5(sb, t, ceph_ino_compare, &vino);
+}
+
+
+/*
+ * Ceph inode.
+ */
+#define CEPH_I_NODELAY   4  /* do not delay cap release */
+#define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */
+#define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */
+
+static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
+					   int release_count)
+{
+	atomic_set(&ci->i_complete_count, release_count);
+}
+
+static inline void __ceph_dir_clear_complete(struct ceph_inode_info *ci)
+{
+	atomic_inc(&ci->i_release_count);
+}
+
+static inline bool __ceph_dir_is_complete(struct ceph_inode_info *ci)
+{
+	return atomic_read(&ci->i_complete_count) ==
+		atomic_read(&ci->i_release_count);
+}
+
+static inline void ceph_dir_clear_complete(struct inode *inode)
+{
+	__ceph_dir_clear_complete(ceph_inode(inode));
+}
+
+static inline bool ceph_dir_is_complete(struct inode *inode)
+{
+	return __ceph_dir_is_complete(ceph_inode(inode));
+}
+
+
+/* find a specific frag @f */
+extern struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci,
+						u32 f);
+
+/*
+ * choose fragment for value @v.  copy frag content to pfrag, if leaf
+ * exists
+ */
+extern u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
+			    struct ceph_inode_frag *pfrag,
+			    int *found);
+
+static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry)
+{
+	return (struct ceph_dentry_info *)dentry->d_fsdata;
+}
+
+static inline loff_t ceph_make_fpos(unsigned frag, unsigned off)
+{
+	return ((loff_t)frag << 32) | (loff_t)off;
+}
+
+/*
+ * caps helpers
+ */
+static inline bool __ceph_is_any_real_caps(struct ceph_inode_info *ci)
+{
+	return !RB_EMPTY_ROOT(&ci->i_caps);
+}
+
+extern int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented);
+extern int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int t);
+extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
+				    struct ceph_cap *cap);
+
+static inline int ceph_caps_issued(struct ceph_inode_info *ci)
+{
+	int issued;
+	spin_lock(&ci->i_ceph_lock);
+	issued = __ceph_caps_issued(ci, NULL);
+	spin_unlock(&ci->i_ceph_lock);
+	return issued;
+}
+
+static inline int ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask,
+					int touch)
+{
+	int r;
+	spin_lock(&ci->i_ceph_lock);
+	r = __ceph_caps_issued_mask(ci, mask, touch);
+	spin_unlock(&ci->i_ceph_lock);
+	return r;
+}
+
+static inline int __ceph_caps_dirty(struct ceph_inode_info *ci)
+{
+	return ci->i_dirty_caps | ci->i_flushing_caps;
+}
+extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask);
+
+extern int __ceph_caps_revoking_other(struct ceph_inode_info *ci,
+				      struct ceph_cap *ocap, int mask);
+extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask);
+extern int __ceph_caps_used(struct ceph_inode_info *ci);
+
+extern int __ceph_caps_file_wanted(struct ceph_inode_info *ci);
+
+/*
+ * wanted, by virtue of open file modes AND cap refs (buffered/cached data)
+ */
+static inline int __ceph_caps_wanted(struct ceph_inode_info *ci)
+{
+	int w = __ceph_caps_file_wanted(ci) | __ceph_caps_used(ci);
+	if (w & CEPH_CAP_FILE_BUFFER)
+		w |= CEPH_CAP_FILE_EXCL;  /* we want EXCL if dirty data */
+	return w;
+}
+
+/* what the mds thinks we want */
+extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci);
+
+extern void ceph_caps_init(struct ceph_mds_client *mdsc);
+extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
+extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta);
+extern void ceph_reserve_caps(struct ceph_mds_client *mdsc,
+			     struct ceph_cap_reservation *ctx, int need);
+extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
+			       struct ceph_cap_reservation *ctx);
+extern void ceph_reservation_status(struct ceph_fs_client *client,
+				    int *total, int *avail, int *used,
+				    int *reserved, int *min);
+
+
+
+/*
+ * we keep buffered readdir results attached to file->private_data
+ */
+#define CEPH_F_SYNC     1
+#define CEPH_F_ATEND    2
+
+struct ceph_file_info {
+	short fmode;     /* initialized on open */
+	short flags;     /* CEPH_F_* */
+
+	/* readdir: position within the dir */
+	u32 frag;
+	struct ceph_mds_request *last_readdir;
+
+	/* readdir: position within a frag */
+	unsigned offset;       /* offset of last chunk, adjusted for . and .. */
+	unsigned next_offset;  /* offset of next chunk (last_name's + 1) */
+	char *last_name;       /* last entry in previous chunk */
+	struct dentry *dentry; /* next dentry (for dcache readdir) */
+	int dir_release_count;
+
+	/* used for -o dirstat read() on directory thing */
+	char *dir_info;
+	int dir_info_len;
+};
+
+
+
+/*
+ * A "snap realm" describes a subset of the file hierarchy sharing
+ * the same set of snapshots that apply to it.  The realms themselves
+ * are organized into a hierarchy, such that children inherit (some of)
+ * the snapshots of their parents.
+ *
+ * All inodes within the realm that have capabilities are linked into a
+ * per-realm list.
+ */
+struct ceph_snap_realm {
+	u64 ino;
+	atomic_t nref;
+	struct rb_node node;
+
+	u64 created, seq;
+	u64 parent_ino;
+	u64 parent_since;   /* snapid when our current parent became so */
+
+	u64 *prior_parent_snaps;      /* snaps inherited from any parents we */
+	u32 num_prior_parent_snaps;   /*  had prior to parent_since */
+	u64 *snaps;                   /* snaps specific to this realm */
+	u32 num_snaps;
+
+	struct ceph_snap_realm *parent;
+	struct list_head children;       /* list of child realms */
+	struct list_head child_item;
+
+	struct list_head empty_item;     /* if i have ref==0 */
+
+	struct list_head dirty_item;     /* if realm needs new context */
+
+	/* the current set of snaps for this realm */
+	struct ceph_snap_context *cached_context;
+
+	struct list_head inodes_with_caps;
+	spinlock_t inodes_with_caps_lock;
+};
+
+static inline int default_congestion_kb(void)
+{
+	int congestion_kb;
+
+	/*
+	 * Copied from NFS
+	 *
+	 * congestion size, scale with available memory.
+	 *
+	 *  64MB:    8192k
+	 * 128MB:   11585k
+	 * 256MB:   16384k
+	 * 512MB:   23170k
+	 *   1GB:   32768k
+	 *   2GB:   46340k
+	 *   4GB:   65536k
+	 *   8GB:   92681k
+	 *  16GB:  131072k
+	 *
+	 * This allows larger machines to have larger/more transfers.
+	 * Limit the default to 256M
+	 */
+	congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
+	if (congestion_kb > 256*1024)
+		congestion_kb = 256*1024;
+
+	return congestion_kb;
+}
+
+
+
+/* snap.c */
+struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc,
+					       u64 ino);
+extern void ceph_get_snap_realm(struct ceph_mds_client *mdsc,
+				struct ceph_snap_realm *realm);
+extern void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
+				struct ceph_snap_realm *realm);
+extern int ceph_update_snap_trace(struct ceph_mds_client *m,
+				  void *p, void *e, bool deletion);
+extern void ceph_handle_snap(struct ceph_mds_client *mdsc,
+			     struct ceph_mds_session *session,
+			     struct ceph_msg *msg);
+extern void ceph_queue_cap_snap(struct ceph_inode_info *ci);
+extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
+				  struct ceph_cap_snap *capsnap);
+extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
+
+/*
+ * a cap_snap is "pending" if it is still awaiting an in-progress
+ * sync write (that may/may not still update size, mtime, etc.).
+ */
+static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci)
+{
+	return !list_empty(&ci->i_cap_snaps) &&
+		list_entry(ci->i_cap_snaps.prev, struct ceph_cap_snap,
+			   ci_item)->writing;
+}
+
+/* inode.c */
+extern const struct inode_operations ceph_file_iops;
+
+extern struct inode *ceph_alloc_inode(struct super_block *sb);
+extern void ceph_destroy_inode(struct inode *inode);
+extern int ceph_drop_inode(struct inode *inode);
+
+extern struct inode *ceph_get_inode(struct super_block *sb,
+				    struct ceph_vino vino);
+extern struct inode *ceph_get_snapdir(struct inode *parent);
+extern int ceph_fill_file_size(struct inode *inode, int issued,
+			       u32 truncate_seq, u64 truncate_size, u64 size);
+extern void ceph_fill_file_time(struct inode *inode, int issued,
+				u64 time_warp_seq, struct timespec *ctime,
+				struct timespec *mtime, struct timespec *atime);
+extern int ceph_fill_trace(struct super_block *sb,
+			   struct ceph_mds_request *req,
+			   struct ceph_mds_session *session);
+extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
+				    struct ceph_mds_session *session);
+
+extern int ceph_inode_holds_cap(struct inode *inode, int mask);
+
+extern int ceph_inode_set_size(struct inode *inode, loff_t size);
+extern void __ceph_do_pending_vmtruncate(struct inode *inode);
+extern void ceph_queue_vmtruncate(struct inode *inode);
+
+extern void ceph_queue_invalidate(struct inode *inode);
+extern void ceph_queue_writeback(struct inode *inode);
+
+extern int ceph_do_getattr(struct inode *inode, int mask);
+extern int ceph_permission(struct inode *inode, int mask);
+extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
+extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
+			struct kstat *stat);
+
+/* xattr.c */
+extern int ceph_setxattr(struct dentry *, const char *, const void *,
+			 size_t, int);
+int __ceph_setxattr(struct dentry *, const char *, const void *, size_t, int);
+ssize_t __ceph_getxattr(struct inode *, const char *, void *, size_t);
+int __ceph_removexattr(struct dentry *, const char *);
+extern ssize_t ceph_getxattr(struct dentry *, const char *, void *, size_t);
+extern ssize_t ceph_listxattr(struct dentry *, char *, size_t);
+extern int ceph_removexattr(struct dentry *, const char *);
+extern void __ceph_build_xattrs_blob(struct ceph_inode_info *ci);
+extern void __ceph_destroy_xattrs(struct ceph_inode_info *ci);
+extern void __init ceph_xattr_init(void);
+extern void ceph_xattr_exit(void);
+
+/* acl.c */
+extern const struct xattr_handler *ceph_xattr_handlers[];
+
+#ifdef CONFIG_CEPH_FS_POSIX_ACL
+
+struct posix_acl *ceph_get_acl(struct inode *, int);
+int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+int ceph_init_acl(struct dentry *, struct inode *, struct inode *);
+
+static inline void ceph_forget_all_cached_acls(struct inode *inode)
+{
+       forget_all_cached_acls(inode);
+}
+
+#else
+
+#define ceph_get_acl NULL
+#define ceph_set_acl NULL
+
+static inline int ceph_init_acl(struct dentry *dentry, struct inode *inode,
+				struct inode *dir)
+{
+	return 0;
+}
+
+static inline int ceph_acl_chmod(struct dentry *dentry, struct inode *inode)
+{
+	return 0;
+}
+
+static inline void ceph_forget_all_cached_acls(struct inode *inode)
+{
+}
+
+#endif
+
+/* caps.c */
+extern const char *ceph_cap_string(int c);
+extern void ceph_handle_caps(struct ceph_mds_session *session,
+			     struct ceph_msg *msg);
+extern int ceph_add_cap(struct inode *inode,
+			struct ceph_mds_session *session, u64 cap_id,
+			int fmode, unsigned issued, unsigned wanted,
+			unsigned cap, unsigned seq, u64 realmino, int flags,
+			struct ceph_cap_reservation *caps_reservation);
+extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
+extern void ceph_put_cap(struct ceph_mds_client *mdsc,
+			 struct ceph_cap *cap);
+extern int ceph_is_any_caps(struct inode *inode);
+
+extern void __queue_cap_release(struct ceph_mds_session *session, u64 ino,
+				u64 cap_id, u32 migrate_seq, u32 issue_seq);
+extern void ceph_queue_caps_release(struct inode *inode);
+extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc);
+extern int ceph_fsync(struct file *file, loff_t start, loff_t end,
+		      int datasync);
+extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
+				    struct ceph_mds_session *session);
+extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,
+					     int mds);
+extern int ceph_get_cap_mds(struct inode *inode);
+extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
+extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
+extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
+				       struct ceph_snap_context *snapc);
+extern void __ceph_flush_snaps(struct ceph_inode_info *ci,
+			       struct ceph_mds_session **psession,
+			       int again);
+extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
+			    struct ceph_mds_session *session);
+extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
+extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
+
+extern int ceph_encode_inode_release(void **p, struct inode *inode,
+				     int mds, int drop, int unless, int force);
+extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
+				      int mds, int drop, int unless);
+
+extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
+			 int *got, loff_t endoff);
+
+/* for counting open files by mode */
+static inline void __ceph_get_fmode(struct ceph_inode_info *ci, int mode)
+{
+	ci->i_nr_by_mode[mode]++;
+}
+extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode);
+
+/* addr.c */
+extern const struct address_space_operations ceph_aops;
+extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
+
+/* file.c */
+extern const struct file_operations ceph_file_fops;
+extern const struct address_space_operations ceph_aops;
+
+extern int ceph_open(struct inode *inode, struct file *file);
+extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
+			    struct file *file, unsigned flags, umode_t mode,
+			    int *opened);
+extern int ceph_release(struct inode *inode, struct file *filp);
+
+/* dir.c */
+extern const struct file_operations ceph_dir_fops;
+extern const struct inode_operations ceph_dir_iops;
+extern const struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops,
+	ceph_snapdir_dentry_ops;
+
+extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry);
+extern int ceph_handle_snapdir(struct ceph_mds_request *req,
+			       struct dentry *dentry, int err);
+extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
+					 struct dentry *dentry, int err);
+
+extern void ceph_dentry_lru_add(struct dentry *dn);
+extern void ceph_dentry_lru_touch(struct dentry *dn);
+extern void ceph_dentry_lru_del(struct dentry *dn);
+extern void ceph_invalidate_dentry_lease(struct dentry *dentry);
+extern unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn);
+extern struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry);
+
+/*
+ * our d_ops vary depending on whether the inode is live,
+ * snapshotted (read-only), or a virtual ".snap" directory.
+ */
+int ceph_init_dentry(struct dentry *dentry);
+
+
+/* ioctl.c */
+extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+
+/* export.c */
+extern const struct export_operations ceph_export_ops;
+
+/* locks.c */
+extern __init void ceph_flock_init(void);
+extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl);
+extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl);
+extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num);
+extern int ceph_encode_locks_to_buffer(struct inode *inode,
+				       struct ceph_filelock *flocks,
+				       int num_fcntl_locks,
+				       int num_flock_locks);
+extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
+				  struct ceph_pagelist *pagelist,
+				  int num_fcntl_locks, int num_flock_locks);
+extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c);
+
+/* debugfs.c */
+extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
+extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client);
+
+#endif /* _FS_CEPH_SUPER_H */
diff --git a/ceph/xattr.c b/ceph/xattr.c
new file mode 100644
index 0000000..c9c2b88
--- /dev/null
+++ b/ceph/xattr.c
@@ -0,0 +1,1128 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include "super.h"
+#include "mds_client.h"
+
+#include <linux/ceph/decode.h>
+
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/slab.h>
+
+#define XATTR_CEPH_PREFIX "ceph."
+#define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
+
+static int __remove_xattr(struct ceph_inode_info *ci,
+			  struct ceph_inode_xattr *xattr);
+
+/*
+ * List of handlers for synthetic system.* attributes. Other
+ * attributes are handled directly.
+ */
+const struct xattr_handler *ceph_xattr_handlers[] = {
+#ifdef CONFIG_CEPH_FS_POSIX_ACL
+	&posix_acl_access_xattr_handler,
+	&posix_acl_default_xattr_handler,
+#endif
+	NULL,
+};
+
+static bool ceph_is_valid_xattr(const char *name)
+{
+	return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
+	       !strncmp(name, XATTR_SECURITY_PREFIX,
+			XATTR_SECURITY_PREFIX_LEN) ||
+	       !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
+	       !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
+	       !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
+}
+
+/*
+ * These define virtual xattrs exposing the recursive directory
+ * statistics and layout metadata.
+ */
+struct ceph_vxattr {
+	char *name;
+	size_t name_size;	/* strlen(name) + 1 (for '\0') */
+	size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
+			      size_t size);
+	bool readonly, hidden;
+	bool (*exists_cb)(struct ceph_inode_info *ci);
+};
+
+/* layouts */
+
+static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
+{
+	size_t s;
+	char *p = (char *)&ci->i_layout;
+
+	for (s = 0; s < sizeof(ci->i_layout); s++, p++)
+		if (*p)
+			return true;
+	return false;
+}
+
+static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
+				   size_t size)
+{
+	int ret;
+	struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+	struct ceph_osd_client *osdc = &fsc->client->osdc;
+	s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
+	const char *pool_name;
+	char buf[128];
+
+	dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
+	down_read(&osdc->map_sem);
+	pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
+	if (pool_name) {
+		size_t len = strlen(pool_name);
+		ret = snprintf(buf, sizeof(buf),
+		"stripe_unit=%lld stripe_count=%lld object_size=%lld pool=",
+		(unsigned long long)ceph_file_layout_su(ci->i_layout),
+		(unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
+	        (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
+		if (!size) {
+			ret += len;
+		} else if (ret + len > size) {
+			ret = -ERANGE;
+		} else {
+			memcpy(val, buf, ret);
+			memcpy(val + ret, pool_name, len);
+			ret += len;
+		}
+	} else {
+		ret = snprintf(buf, sizeof(buf),
+		"stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld",
+		(unsigned long long)ceph_file_layout_su(ci->i_layout),
+		(unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
+	        (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
+		(unsigned long long)pool);
+		if (size) {
+			if (ret <= size)
+				memcpy(val, buf, ret);
+			else
+				ret = -ERANGE;
+		}
+	}
+	up_read(&osdc->map_sem);
+	return ret;
+}
+
+static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
+					       char *val, size_t size)
+{
+	return snprintf(val, size, "%lld",
+			(unsigned long long)ceph_file_layout_su(ci->i_layout));
+}
+
+static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
+						char *val, size_t size)
+{
+	return snprintf(val, size, "%lld",
+	       (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout));
+}
+
+static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
+					       char *val, size_t size)
+{
+	return snprintf(val, size, "%lld",
+	       (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
+}
+
+static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
+					char *val, size_t size)
+{
+	int ret;
+	struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+	struct ceph_osd_client *osdc = &fsc->client->osdc;
+	s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
+	const char *pool_name;
+
+	down_read(&osdc->map_sem);
+	pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
+	if (pool_name)
+		ret = snprintf(val, size, "%s", pool_name);
+	else
+		ret = snprintf(val, size, "%lld", (unsigned long long)pool);
+	up_read(&osdc->map_sem);
+	return ret;
+}
+
+/* directories */
+
+static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
+					size_t size)
+{
+	return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs);
+}
+
+static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
+				      size_t size)
+{
+	return snprintf(val, size, "%lld", ci->i_files);
+}
+
+static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
+					size_t size)
+{
+	return snprintf(val, size, "%lld", ci->i_subdirs);
+}
+
+static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
+					 size_t size)
+{
+	return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs);
+}
+
+static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
+				       size_t size)
+{
+	return snprintf(val, size, "%lld", ci->i_rfiles);
+}
+
+static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
+					 size_t size)
+{
+	return snprintf(val, size, "%lld", ci->i_rsubdirs);
+}
+
+static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
+				       size_t size)
+{
+	return snprintf(val, size, "%lld", ci->i_rbytes);
+}
+
+static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
+				       size_t size)
+{
+	return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec,
+			(long)ci->i_rctime.tv_nsec);
+}
+
+
+#define CEPH_XATTR_NAME(_type, _name)	XATTR_CEPH_PREFIX #_type "." #_name
+#define CEPH_XATTR_NAME2(_type, _name, _name2)	\
+	XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
+
+#define XATTR_NAME_CEPH(_type, _name)					\
+	{								\
+		.name = CEPH_XATTR_NAME(_type, _name),			\
+		.name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
+		.getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
+		.readonly = true,				\
+		.hidden = false,				\
+		.exists_cb = NULL,			\
+	}
+#define XATTR_LAYOUT_FIELD(_type, _name, _field)			\
+	{								\
+		.name = CEPH_XATTR_NAME2(_type, _name, _field),	\
+		.name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
+		.getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
+		.readonly = false,				\
+		.hidden = true,			\
+		.exists_cb = ceph_vxattrcb_layout_exists,	\
+	}
+
+static struct ceph_vxattr ceph_dir_vxattrs[] = {
+	{
+		.name = "ceph.dir.layout",
+		.name_size = sizeof("ceph.dir.layout"),
+		.getxattr_cb = ceph_vxattrcb_layout,
+		.readonly = false,
+		.hidden = true,
+		.exists_cb = ceph_vxattrcb_layout_exists,
+	},
+	XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
+	XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
+	XATTR_LAYOUT_FIELD(dir, layout, object_size),
+	XATTR_LAYOUT_FIELD(dir, layout, pool),
+	XATTR_NAME_CEPH(dir, entries),
+	XATTR_NAME_CEPH(dir, files),
+	XATTR_NAME_CEPH(dir, subdirs),
+	XATTR_NAME_CEPH(dir, rentries),
+	XATTR_NAME_CEPH(dir, rfiles),
+	XATTR_NAME_CEPH(dir, rsubdirs),
+	XATTR_NAME_CEPH(dir, rbytes),
+	XATTR_NAME_CEPH(dir, rctime),
+	{ .name = NULL, 0 }	/* Required table terminator */
+};
+static size_t ceph_dir_vxattrs_name_size;	/* total size of all names */
+
+/* files */
+
+static struct ceph_vxattr ceph_file_vxattrs[] = {
+	{
+		.name = "ceph.file.layout",
+		.name_size = sizeof("ceph.file.layout"),
+		.getxattr_cb = ceph_vxattrcb_layout,
+		.readonly = false,
+		.hidden = true,
+		.exists_cb = ceph_vxattrcb_layout_exists,
+	},
+	XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
+	XATTR_LAYOUT_FIELD(file, layout, stripe_count),
+	XATTR_LAYOUT_FIELD(file, layout, object_size),
+	XATTR_LAYOUT_FIELD(file, layout, pool),
+	{ .name = NULL, 0 }	/* Required table terminator */
+};
+static size_t ceph_file_vxattrs_name_size;	/* total size of all names */
+
+static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
+{
+	if (S_ISDIR(inode->i_mode))
+		return ceph_dir_vxattrs;
+	else if (S_ISREG(inode->i_mode))
+		return ceph_file_vxattrs;
+	return NULL;
+}
+
+static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs)
+{
+	if (vxattrs == ceph_dir_vxattrs)
+		return ceph_dir_vxattrs_name_size;
+	if (vxattrs == ceph_file_vxattrs)
+		return ceph_file_vxattrs_name_size;
+	BUG();
+
+	return 0;
+}
+
+/*
+ * Compute the aggregate size (including terminating '\0') of all
+ * virtual extended attribute names in the given vxattr table.
+ */
+static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
+{
+	struct ceph_vxattr *vxattr;
+	size_t size = 0;
+
+	for (vxattr = vxattrs; vxattr->name; vxattr++)
+		if (!vxattr->hidden)
+			size += vxattr->name_size;
+
+	return size;
+}
+
+/* Routines called at initialization and exit time */
+
+void __init ceph_xattr_init(void)
+{
+	ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs);
+	ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs);
+}
+
+void ceph_xattr_exit(void)
+{
+	ceph_dir_vxattrs_name_size = 0;
+	ceph_file_vxattrs_name_size = 0;
+}
+
+static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
+						const char *name)
+{
+	struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
+
+	if (vxattr) {
+		while (vxattr->name) {
+			if (!strcmp(vxattr->name, name))
+				return vxattr;
+			vxattr++;
+		}
+	}
+
+	return NULL;
+}
+
+static int __set_xattr(struct ceph_inode_info *ci,
+			   const char *name, int name_len,
+			   const char *val, int val_len,
+			   int flags, int update_xattr,
+			   struct ceph_inode_xattr **newxattr)
+{
+	struct rb_node **p;
+	struct rb_node *parent = NULL;
+	struct ceph_inode_xattr *xattr = NULL;
+	int c;
+	int new = 0;
+
+	p = &ci->i_xattrs.index.rb_node;
+	while (*p) {
+		parent = *p;
+		xattr = rb_entry(parent, struct ceph_inode_xattr, node);
+		c = strncmp(name, xattr->name, min(name_len, xattr->name_len));
+		if (c < 0)
+			p = &(*p)->rb_left;
+		else if (c > 0)
+			p = &(*p)->rb_right;
+		else {
+			if (name_len == xattr->name_len)
+				break;
+			else if (name_len < xattr->name_len)
+				p = &(*p)->rb_left;
+			else
+				p = &(*p)->rb_right;
+		}
+		xattr = NULL;
+	}
+
+	if (update_xattr) {
+		int err = 0;
+		if (xattr && (flags & XATTR_CREATE))
+			err = -EEXIST;
+		else if (!xattr && (flags & XATTR_REPLACE))
+			err = -ENODATA;
+		if (err) {
+			kfree(name);
+			kfree(val);
+			return err;
+		}
+		if (update_xattr < 0) {
+			if (xattr)
+				__remove_xattr(ci, xattr);
+			kfree(name);
+			return 0;
+		}
+	}
+
+	if (!xattr) {
+		new = 1;
+		xattr = *newxattr;
+		xattr->name = name;
+		xattr->name_len = name_len;
+		xattr->should_free_name = update_xattr;
+
+		ci->i_xattrs.count++;
+		dout("__set_xattr count=%d\n", ci->i_xattrs.count);
+	} else {
+		kfree(*newxattr);
+		*newxattr = NULL;
+		if (xattr->should_free_val)
+			kfree((void *)xattr->val);
+
+		if (update_xattr) {
+			kfree((void *)name);
+			name = xattr->name;
+		}
+		ci->i_xattrs.names_size -= xattr->name_len;
+		ci->i_xattrs.vals_size -= xattr->val_len;
+	}
+	ci->i_xattrs.names_size += name_len;
+	ci->i_xattrs.vals_size += val_len;
+	if (val)
+		xattr->val = val;
+	else
+		xattr->val = "";
+
+	xattr->val_len = val_len;
+	xattr->dirty = update_xattr;
+	xattr->should_free_val = (val && update_xattr);
+
+	if (new) {
+		rb_link_node(&xattr->node, parent, p);
+		rb_insert_color(&xattr->node, &ci->i_xattrs.index);
+		dout("__set_xattr_val p=%p\n", p);
+	}
+
+	dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n",
+	     ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val);
+
+	return 0;
+}
+
+static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
+			   const char *name)
+{
+	struct rb_node **p;
+	struct rb_node *parent = NULL;
+	struct ceph_inode_xattr *xattr = NULL;
+	int name_len = strlen(name);
+	int c;
+
+	p = &ci->i_xattrs.index.rb_node;
+	while (*p) {
+		parent = *p;
+		xattr = rb_entry(parent, struct ceph_inode_xattr, node);
+		c = strncmp(name, xattr->name, xattr->name_len);
+		if (c == 0 && name_len > xattr->name_len)
+			c = 1;
+		if (c < 0)
+			p = &(*p)->rb_left;
+		else if (c > 0)
+			p = &(*p)->rb_right;
+		else {
+			dout("__get_xattr %s: found %.*s\n", name,
+			     xattr->val_len, xattr->val);
+			return xattr;
+		}
+	}
+
+	dout("__get_xattr %s: not found\n", name);
+
+	return NULL;
+}
+
+static void __free_xattr(struct ceph_inode_xattr *xattr)
+{
+	BUG_ON(!xattr);
+
+	if (xattr->should_free_name)
+		kfree((void *)xattr->name);
+	if (xattr->should_free_val)
+		kfree((void *)xattr->val);
+
+	kfree(xattr);
+}
+
+static int __remove_xattr(struct ceph_inode_info *ci,
+			  struct ceph_inode_xattr *xattr)
+{
+	if (!xattr)
+		return -ENODATA;
+
+	rb_erase(&xattr->node, &ci->i_xattrs.index);
+
+	if (xattr->should_free_name)
+		kfree((void *)xattr->name);
+	if (xattr->should_free_val)
+		kfree((void *)xattr->val);
+
+	ci->i_xattrs.names_size -= xattr->name_len;
+	ci->i_xattrs.vals_size -= xattr->val_len;
+	ci->i_xattrs.count--;
+	kfree(xattr);
+
+	return 0;
+}
+
+static int __remove_xattr_by_name(struct ceph_inode_info *ci,
+			   const char *name)
+{
+	struct rb_node **p;
+	struct ceph_inode_xattr *xattr;
+	int err;
+
+	p = &ci->i_xattrs.index.rb_node;
+	xattr = __get_xattr(ci, name);
+	err = __remove_xattr(ci, xattr);
+	return err;
+}
+
+static char *__copy_xattr_names(struct ceph_inode_info *ci,
+				char *dest)
+{
+	struct rb_node *p;
+	struct ceph_inode_xattr *xattr = NULL;
+
+	p = rb_first(&ci->i_xattrs.index);
+	dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count);
+
+	while (p) {
+		xattr = rb_entry(p, struct ceph_inode_xattr, node);
+		memcpy(dest, xattr->name, xattr->name_len);
+		dest[xattr->name_len] = '\0';
+
+		dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
+		     xattr->name_len, ci->i_xattrs.names_size);
+
+		dest += xattr->name_len + 1;
+		p = rb_next(p);
+	}
+
+	return dest;
+}
+
+void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
+{
+	struct rb_node *p, *tmp;
+	struct ceph_inode_xattr *xattr = NULL;
+
+	p = rb_first(&ci->i_xattrs.index);
+
+	dout("__ceph_destroy_xattrs p=%p\n", p);
+
+	while (p) {
+		xattr = rb_entry(p, struct ceph_inode_xattr, node);
+		tmp = p;
+		p = rb_next(tmp);
+		dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p,
+		     xattr->name_len, xattr->name);
+		rb_erase(tmp, &ci->i_xattrs.index);
+
+		__free_xattr(xattr);
+	}
+
+	ci->i_xattrs.names_size = 0;
+	ci->i_xattrs.vals_size = 0;
+	ci->i_xattrs.index_version = 0;
+	ci->i_xattrs.count = 0;
+	ci->i_xattrs.index = RB_ROOT;
+}
+
+static int __build_xattrs(struct inode *inode)
+	__releases(ci->i_ceph_lock)
+	__acquires(ci->i_ceph_lock)
+{
+	u32 namelen;
+	u32 numattr = 0;
+	void *p, *end;
+	u32 len;
+	const char *name, *val;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int xattr_version;
+	struct ceph_inode_xattr **xattrs = NULL;
+	int err = 0;
+	int i;
+
+	dout("__build_xattrs() len=%d\n",
+	     ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
+
+	if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
+		return 0; /* already built */
+
+	__ceph_destroy_xattrs(ci);
+
+start:
+	/* updated internal xattr rb tree */
+	if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) {
+		p = ci->i_xattrs.blob->vec.iov_base;
+		end = p + ci->i_xattrs.blob->vec.iov_len;
+		ceph_decode_32_safe(&p, end, numattr, bad);
+		xattr_version = ci->i_xattrs.version;
+		spin_unlock(&ci->i_ceph_lock);
+
+		xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *),
+				 GFP_NOFS);
+		err = -ENOMEM;
+		if (!xattrs)
+			goto bad_lock;
+		memset(xattrs, 0, numattr*sizeof(struct ceph_xattr *));
+		for (i = 0; i < numattr; i++) {
+			xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
+					    GFP_NOFS);
+			if (!xattrs[i])
+				goto bad_lock;
+		}
+
+		spin_lock(&ci->i_ceph_lock);
+		if (ci->i_xattrs.version != xattr_version) {
+			/* lost a race, retry */
+			for (i = 0; i < numattr; i++)
+				kfree(xattrs[i]);
+			kfree(xattrs);
+			xattrs = NULL;
+			goto start;
+		}
+		err = -EIO;
+		while (numattr--) {
+			ceph_decode_32_safe(&p, end, len, bad);
+			namelen = len;
+			name = p;
+			p += len;
+			ceph_decode_32_safe(&p, end, len, bad);
+			val = p;
+			p += len;
+
+			err = __set_xattr(ci, name, namelen, val, len,
+					  0, 0, &xattrs[numattr]);
+
+			if (err < 0)
+				goto bad;
+		}
+		kfree(xattrs);
+	}
+	ci->i_xattrs.index_version = ci->i_xattrs.version;
+	ci->i_xattrs.dirty = false;
+
+	return err;
+bad_lock:
+	spin_lock(&ci->i_ceph_lock);
+bad:
+	if (xattrs) {
+		for (i = 0; i < numattr; i++)
+			kfree(xattrs[i]);
+		kfree(xattrs);
+	}
+	ci->i_xattrs.names_size = 0;
+	return err;
+}
+
+static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
+				    int val_size)
+{
+	/*
+	 * 4 bytes for the length, and additional 4 bytes per each xattr name,
+	 * 4 bytes per each value
+	 */
+	int size = 4 + ci->i_xattrs.count*(4 + 4) +
+			     ci->i_xattrs.names_size +
+			     ci->i_xattrs.vals_size;
+	dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n",
+	     ci->i_xattrs.count, ci->i_xattrs.names_size,
+	     ci->i_xattrs.vals_size);
+
+	if (name_size)
+		size += 4 + 4 + name_size + val_size;
+
+	return size;
+}
+
+/*
+ * If there are dirty xattrs, reencode xattrs into the prealloc_blob
+ * and swap into place.
+ */
+void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
+{
+	struct rb_node *p;
+	struct ceph_inode_xattr *xattr = NULL;
+	void *dest;
+
+	dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
+	if (ci->i_xattrs.dirty) {
+		int need = __get_required_blob_size(ci, 0, 0);
+
+		BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);
+
+		p = rb_first(&ci->i_xattrs.index);
+		dest = ci->i_xattrs.prealloc_blob->vec.iov_base;
+
+		ceph_encode_32(&dest, ci->i_xattrs.count);
+		while (p) {
+			xattr = rb_entry(p, struct ceph_inode_xattr, node);
+
+			ceph_encode_32(&dest, xattr->name_len);
+			memcpy(dest, xattr->name, xattr->name_len);
+			dest += xattr->name_len;
+			ceph_encode_32(&dest, xattr->val_len);
+			memcpy(dest, xattr->val, xattr->val_len);
+			dest += xattr->val_len;
+
+			p = rb_next(p);
+		}
+
+		/* adjust buffer len; it may be larger than we need */
+		ci->i_xattrs.prealloc_blob->vec.iov_len =
+			dest - ci->i_xattrs.prealloc_blob->vec.iov_base;
+
+		if (ci->i_xattrs.blob)
+			ceph_buffer_put(ci->i_xattrs.blob);
+		ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
+		ci->i_xattrs.prealloc_blob = NULL;
+		ci->i_xattrs.dirty = false;
+		ci->i_xattrs.version++;
+	}
+}
+
+ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
+		      size_t size)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int err;
+	struct ceph_inode_xattr *xattr;
+	struct ceph_vxattr *vxattr = NULL;
+
+	if (!ceph_is_valid_xattr(name))
+		return -ENODATA;
+
+	/* let's see if a virtual xattr was requested */
+	vxattr = ceph_match_vxattr(inode, name);
+	if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
+		err = vxattr->getxattr_cb(ci, value, size);
+		return err;
+	}
+
+	spin_lock(&ci->i_ceph_lock);
+	dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
+	     ci->i_xattrs.version, ci->i_xattrs.index_version);
+
+	if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
+	    (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
+		goto get_xattr;
+	} else {
+		spin_unlock(&ci->i_ceph_lock);
+		/* get xattrs from mds (if we don't already have them) */
+		err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
+		if (err)
+			return err;
+	}
+
+	spin_lock(&ci->i_ceph_lock);
+
+	err = __build_xattrs(inode);
+	if (err < 0)
+		goto out;
+
+get_xattr:
+	err = -ENODATA;  /* == ENOATTR */
+	xattr = __get_xattr(ci, name);
+	if (!xattr)
+		goto out;
+
+	err = -ERANGE;
+	if (size && size < xattr->val_len)
+		goto out;
+
+	err = xattr->val_len;
+	if (size == 0)
+		goto out;
+
+	memcpy(value, xattr->val, xattr->val_len);
+
+out:
+	spin_unlock(&ci->i_ceph_lock);
+	return err;
+}
+
+ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
+		      size_t size)
+{
+	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+		return generic_getxattr(dentry, name, value, size);
+
+	return __ceph_getxattr(dentry->d_inode, name, value, size);
+}
+
+ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
+{
+	struct inode *inode = dentry->d_inode;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
+	u32 vir_namelen = 0;
+	u32 namelen;
+	int err;
+	u32 len;
+	int i;
+
+	spin_lock(&ci->i_ceph_lock);
+	dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
+	     ci->i_xattrs.version, ci->i_xattrs.index_version);
+
+	if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
+	    (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
+		goto list_xattr;
+	} else {
+		spin_unlock(&ci->i_ceph_lock);
+		err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
+		if (err)
+			return err;
+	}
+
+	spin_lock(&ci->i_ceph_lock);
+
+	err = __build_xattrs(inode);
+	if (err < 0)
+		goto out;
+
+list_xattr:
+	/*
+	 * Start with virtual dir xattr names (if any) (including
+	 * terminating '\0' characters for each).
+	 */
+	vir_namelen = ceph_vxattrs_name_size(vxattrs);
+
+	/* adding 1 byte per each variable due to the null termination */
+	namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
+	err = -ERANGE;
+	if (size && vir_namelen + namelen > size)
+		goto out;
+
+	err = namelen + vir_namelen;
+	if (size == 0)
+		goto out;
+
+	names = __copy_xattr_names(ci, names);
+
+	/* virtual xattr names, too */
+	err = namelen;
+	if (vxattrs) {
+		for (i = 0; vxattrs[i].name; i++) {
+			if (!vxattrs[i].hidden &&
+			    !(vxattrs[i].exists_cb &&
+			      !vxattrs[i].exists_cb(ci))) {
+				len = sprintf(names, "%s", vxattrs[i].name);
+				names += len + 1;
+				err += len + 1;
+			}
+		}
+	}
+
+out:
+	spin_unlock(&ci->i_ceph_lock);
+	return err;
+}
+
+static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
+			      const char *value, size_t size, int flags)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+	struct inode *inode = dentry->d_inode;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_mds_request *req;
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	int err;
+	int i, nr_pages;
+	struct page **pages = NULL;
+	void *kaddr;
+
+	/* copy value into some pages */
+	nr_pages = calc_pages_for(0, size);
+	if (nr_pages) {
+		pages = kmalloc(sizeof(pages[0])*nr_pages, GFP_NOFS);
+		if (!pages)
+			return -ENOMEM;
+		err = -ENOMEM;
+		for (i = 0; i < nr_pages; i++) {
+			pages[i] = __page_cache_alloc(GFP_NOFS);
+			if (!pages[i]) {
+				nr_pages = i;
+				goto out;
+			}
+			kaddr = kmap(pages[i]);
+			memcpy(kaddr, value + i*PAGE_CACHE_SIZE,
+			       min(PAGE_CACHE_SIZE, size-i*PAGE_CACHE_SIZE));
+		}
+	}
+
+	dout("setxattr value=%.*s\n", (int)size, value);
+
+	if (!value)
+		flags |= CEPH_XATTR_REMOVE;
+
+	/* do request */
+	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETXATTR,
+				       USE_AUTH_MDS);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto out;
+	}
+	req->r_inode = inode;
+	ihold(inode);
+	req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
+	req->r_num_caps = 1;
+	req->r_args.setxattr.flags = cpu_to_le32(flags);
+	req->r_path2 = kstrdup(name, GFP_NOFS);
+
+	req->r_pages = pages;
+	req->r_num_pages = nr_pages;
+	req->r_data_len = size;
+
+	dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
+	err = ceph_mdsc_do_request(mdsc, NULL, req);
+	ceph_mdsc_put_request(req);
+	dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
+
+out:
+	if (pages) {
+		for (i = 0; i < nr_pages; i++)
+			__free_page(pages[i]);
+		kfree(pages);
+	}
+	return err;
+}
+
+int __ceph_setxattr(struct dentry *dentry, const char *name,
+			const void *value, size_t size, int flags)
+{
+	struct inode *inode = dentry->d_inode;
+	struct ceph_vxattr *vxattr;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int issued;
+	int err;
+	int dirty = 0;
+	int name_len = strlen(name);
+	int val_len = size;
+	char *newname = NULL;
+	char *newval = NULL;
+	struct ceph_inode_xattr *xattr = NULL;
+	int required_blob_size;
+
+	if (!ceph_is_valid_xattr(name))
+		return -EOPNOTSUPP;
+
+	vxattr = ceph_match_vxattr(inode, name);
+	if (vxattr && vxattr->readonly)
+		return -EOPNOTSUPP;
+
+	/* pass any unhandled ceph.* xattrs through to the MDS */
+	if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
+		goto do_sync_unlocked;
+
+	/* preallocate memory for xattr name, value, index node */
+	err = -ENOMEM;
+	newname = kmemdup(name, name_len + 1, GFP_NOFS);
+	if (!newname)
+		goto out;
+
+	if (val_len) {
+		newval = kmemdup(value, val_len, GFP_NOFS);
+		if (!newval)
+			goto out;
+	}
+
+	xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
+	if (!xattr)
+		goto out;
+
+	spin_lock(&ci->i_ceph_lock);
+retry:
+	issued = __ceph_caps_issued(ci, NULL);
+	dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
+	if (!(issued & CEPH_CAP_XATTR_EXCL))
+		goto do_sync;
+	__build_xattrs(inode);
+
+	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
+
+	if (!ci->i_xattrs.prealloc_blob ||
+	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
+		struct ceph_buffer *blob;
+
+		spin_unlock(&ci->i_ceph_lock);
+		dout(" preaallocating new blob size=%d\n", required_blob_size);
+		blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
+		if (!blob)
+			goto out;
+		spin_lock(&ci->i_ceph_lock);
+		if (ci->i_xattrs.prealloc_blob)
+			ceph_buffer_put(ci->i_xattrs.prealloc_blob);
+		ci->i_xattrs.prealloc_blob = blob;
+		goto retry;
+	}
+
+	err = __set_xattr(ci, newname, name_len, newval, val_len,
+			  flags, value ? 1 : -1, &xattr);
+
+	if (!err) {
+		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
+		ci->i_xattrs.dirty = true;
+		inode->i_ctime = CURRENT_TIME;
+	}
+
+	spin_unlock(&ci->i_ceph_lock);
+	if (dirty)
+		__mark_inode_dirty(inode, dirty);
+	return err;
+
+do_sync:
+	spin_unlock(&ci->i_ceph_lock);
+do_sync_unlocked:
+	err = ceph_sync_setxattr(dentry, name, value, size, flags);
+out:
+	kfree(newname);
+	kfree(newval);
+	kfree(xattr);
+	return err;
+}
+
+int ceph_setxattr(struct dentry *dentry, const char *name,
+		  const void *value, size_t size, int flags)
+{
+	if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
+		return -EROFS;
+
+	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+		return generic_setxattr(dentry, name, value, size, flags);
+
+	return __ceph_setxattr(dentry, name, value, size, flags);
+}
+
+static int ceph_send_removexattr(struct dentry *dentry, const char *name)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct inode *inode = dentry->d_inode;
+	struct ceph_mds_request *req;
+	int err;
+
+	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RMXATTR,
+				       USE_AUTH_MDS);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+	req->r_inode = inode;
+	ihold(inode);
+	req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
+	req->r_num_caps = 1;
+	req->r_path2 = kstrdup(name, GFP_NOFS);
+
+	err = ceph_mdsc_do_request(mdsc, NULL, req);
+	ceph_mdsc_put_request(req);
+	return err;
+}
+
+int __ceph_removexattr(struct dentry *dentry, const char *name)
+{
+	struct inode *inode = dentry->d_inode;
+	struct ceph_vxattr *vxattr;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int issued;
+	int err;
+	int required_blob_size;
+	int dirty;
+
+	if (!ceph_is_valid_xattr(name))
+		return -EOPNOTSUPP;
+
+	vxattr = ceph_match_vxattr(inode, name);
+	if (vxattr && vxattr->readonly)
+		return -EOPNOTSUPP;
+
+	/* pass any unhandled ceph.* xattrs through to the MDS */
+	if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
+		goto do_sync_unlocked;
+
+	err = -ENOMEM;
+	spin_lock(&ci->i_ceph_lock);
+retry:
+	issued = __ceph_caps_issued(ci, NULL);
+	dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
+
+	if (!(issued & CEPH_CAP_XATTR_EXCL))
+		goto do_sync;
+	__build_xattrs(inode);
+
+	required_blob_size = __get_required_blob_size(ci, 0, 0);
+
+	if (!ci->i_xattrs.prealloc_blob ||
+	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
+		struct ceph_buffer *blob;
+
+		spin_unlock(&ci->i_ceph_lock);
+		dout(" preaallocating new blob size=%d\n", required_blob_size);
+		blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
+		if (!blob)
+			goto out;
+		spin_lock(&ci->i_ceph_lock);
+		if (ci->i_xattrs.prealloc_blob)
+			ceph_buffer_put(ci->i_xattrs.prealloc_blob);
+		ci->i_xattrs.prealloc_blob = blob;
+		goto retry;
+	}
+
+	err = __remove_xattr_by_name(ceph_inode(inode), name);
+
+	dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
+	ci->i_xattrs.dirty = true;
+	inode->i_ctime = CURRENT_TIME;
+	spin_unlock(&ci->i_ceph_lock);
+	if (dirty)
+		__mark_inode_dirty(inode, dirty);
+	return err;
+do_sync:
+	spin_unlock(&ci->i_ceph_lock);
+do_sync_unlocked:
+	err = ceph_send_removexattr(dentry, name);
+out:
+	return err;
+}
+
+int ceph_removexattr(struct dentry *dentry, const char *name)
+{
+	if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
+		return -EROFS;
+
+	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+		return generic_removexattr(dentry, name);
+
+	return __ceph_removexattr(dentry, name);
+}
diff --git a/keys/ceph-type.h b/keys/ceph-type.h
new file mode 100644
index 0000000..f69c4ac
--- /dev/null
+++ b/keys/ceph-type.h
@@ -0,0 +1,8 @@
+#ifndef _KEYS_CEPH_TYPE_H
+#define _KEYS_CEPH_TYPE_H
+
+#include <linux/key.h>
+
+extern struct key_type key_type_ceph;
+
+#endif
diff --git a/libceph/Kconfig b/libceph/Kconfig
new file mode 100644
index 0000000..e50cc69
--- /dev/null
+++ b/libceph/Kconfig
@@ -0,0 +1,43 @@
+config CEPH_LIB
+	tristate "Ceph core library"
+	depends on INET
+	select LIBCRC32C
+	select CRYPTO_AES
+	select CRYPTO
+	select KEYS
+	default n
+	help
+	  Choose Y or M here to include cephlib, which provides the
+	  common functionality to both the Ceph filesystem and
+	  to the rados block device (rbd).
+
+	  More information at http://ceph.newdream.net/.
+
+	  If unsure, say N.
+
+config CEPH_LIB_PRETTYDEBUG
+	bool "Include file:line in ceph debug output"
+	depends on CEPH_LIB
+	default n
+	help
+	  If you say Y here, debug output will include a filename and
+	  line to aid debugging.  This increases kernel size and slows
+	  execution slightly when debug call sites are enabled (e.g.,
+	  via CONFIG_DYNAMIC_DEBUG).
+
+	  If unsure, say N.
+
+config CEPH_LIB_USE_DNS_RESOLVER
+	bool "Use in-kernel support for DNS lookup"
+	depends on CEPH_LIB
+	select DNS_RESOLVER
+	default n
+	help
+	  If you say Y here, hostnames (e.g. monitor addresses) will
+	  be resolved using the CONFIG_DNS_RESOLVER facility.
+
+	  For information on how to use CONFIG_DNS_RESOLVER consult
+	  Documentation/networking/dns_resolver.txt
+
+	  If unsure, say N.
+
diff --git a/libceph/Makefile b/libceph/Makefile
new file mode 100644
index 0000000..958d985
--- /dev/null
+++ b/libceph/Makefile
@@ -0,0 +1,15 @@
+#
+# Makefile for CEPH filesystem.
+#
+obj-$(CONFIG_CEPH_LIB) += libceph.o
+
+libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
+	mon_client.o \
+	osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
+	debugfs.o \
+	auth.o auth_none.o \
+	crypto.o armor.o \
+	auth_x.o \
+	ceph_fs.o ceph_strings.o ceph_hash.o \
+	pagevec.o snapshot.o
+
diff --git a/libceph/armor.c b/libceph/armor.c
new file mode 100644
index 0000000..1fc1ee1
--- /dev/null
+++ b/libceph/armor.c
@@ -0,0 +1,105 @@
+
+#include <linux/errno.h>
+
+int ceph_armor(char *dst, const char *src, const char *end);
+int ceph_unarmor(char *dst, const char *src, const char *end);
+
+/*
+ * base64 encode/decode.
+ */
+
+static const char *pem_key =
+	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+static int encode_bits(int c)
+{
+	return pem_key[c];
+}
+
+static int decode_bits(char c)
+{
+	if (c >= 'A' && c <= 'Z')
+		return c - 'A';
+	if (c >= 'a' && c <= 'z')
+		return c - 'a' + 26;
+	if (c >= '0' && c <= '9')
+		return c - '0' + 52;
+	if (c == '+')
+		return 62;
+	if (c == '/')
+		return 63;
+	if (c == '=')
+		return 0; /* just non-negative, please */
+	return -EINVAL;
+}
+
+int ceph_armor(char *dst, const char *src, const char *end)
+{
+	int olen = 0;
+	int line = 0;
+
+	while (src < end) {
+		unsigned char a, b, c;
+
+		a = *src++;
+		*dst++ = encode_bits(a >> 2);
+		if (src < end) {
+			b = *src++;
+			*dst++ = encode_bits(((a & 3) << 4) | (b >> 4));
+			if (src < end) {
+				c = *src++;
+				*dst++ = encode_bits(((b & 15) << 2) |
+						     (c >> 6));
+				*dst++ = encode_bits(c & 63);
+			} else {
+				*dst++ = encode_bits((b & 15) << 2);
+				*dst++ = '=';
+			}
+		} else {
+			*dst++ = encode_bits(((a & 3) << 4));
+			*dst++ = '=';
+			*dst++ = '=';
+		}
+		olen += 4;
+		line += 4;
+		if (line == 64) {
+			line = 0;
+			*(dst++) = '\n';
+			olen++;
+		}
+	}
+	return olen;
+}
+
+int ceph_unarmor(char *dst, const char *src, const char *end)
+{
+	int olen = 0;
+
+	while (src < end) {
+		int a, b, c, d;
+
+		if (src[0] == '\n') {
+			src++;
+			continue;
+		}
+		if (src + 4 > end)
+			return -EINVAL;
+		a = decode_bits(src[0]);
+		b = decode_bits(src[1]);
+		c = decode_bits(src[2]);
+		d = decode_bits(src[3]);
+		if (a < 0 || b < 0 || c < 0 || d < 0)
+			return -EINVAL;
+
+		*dst++ = (a << 2) | (b >> 4);
+		if (src[2] == '=')
+			return olen + 1;
+		*dst++ = ((b & 15) << 4) | (c >> 2);
+		if (src[3] == '=')
+			return olen + 2;
+		*dst++ = ((c & 3) << 6) | d;
+		olen += 3;
+		src += 4;
+	}
+	return olen;
+}
diff --git a/libceph/auth.c b/libceph/auth.c
new file mode 100644
index 0000000..6b923bc
--- /dev/null
+++ b/libceph/auth.c
@@ -0,0 +1,340 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/types.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/messenger.h>
+#include "auth_none.h"
+#include "auth_x.h"
+
+
+/*
+ * get protocol handler
+ */
+static u32 supported_protocols[] = {
+	CEPH_AUTH_NONE,
+	CEPH_AUTH_CEPHX
+};
+
+static int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol)
+{
+	switch (protocol) {
+	case CEPH_AUTH_NONE:
+		return ceph_auth_none_init(ac);
+	case CEPH_AUTH_CEPHX:
+		return ceph_x_init(ac);
+	default:
+		return -ENOENT;
+	}
+}
+
+/*
+ * setup, teardown.
+ */
+struct ceph_auth_client *ceph_auth_init(const char *name, const struct ceph_crypto_key *key)
+{
+	struct ceph_auth_client *ac;
+	int ret;
+
+	dout("auth_init name '%s'\n", name);
+
+	ret = -ENOMEM;
+	ac = kzalloc(sizeof(*ac), GFP_NOFS);
+	if (!ac)
+		goto out;
+
+	mutex_init(&ac->mutex);
+	ac->negotiating = true;
+	if (name)
+		ac->name = name;
+	else
+		ac->name = CEPH_AUTH_NAME_DEFAULT;
+	dout("auth_init name %s\n", ac->name);
+	ac->key = key;
+	return ac;
+
+out:
+	return ERR_PTR(ret);
+}
+
+void ceph_auth_destroy(struct ceph_auth_client *ac)
+{
+	dout("auth_destroy %p\n", ac);
+	if (ac->ops)
+		ac->ops->destroy(ac);
+	kfree(ac);
+}
+
+/*
+ * Reset occurs when reconnecting to the monitor.
+ */
+void ceph_auth_reset(struct ceph_auth_client *ac)
+{
+	mutex_lock(&ac->mutex);
+	dout("auth_reset %p\n", ac);
+	if (ac->ops && !ac->negotiating)
+		ac->ops->reset(ac);
+	ac->negotiating = true;
+	mutex_unlock(&ac->mutex);
+}
+
+int ceph_entity_name_encode(const char *name, void **p, void *end)
+{
+	int len = strlen(name);
+
+	if (*p + 2*sizeof(u32) + len > end)
+		return -ERANGE;
+	ceph_encode_32(p, CEPH_ENTITY_TYPE_CLIENT);
+	ceph_encode_32(p, len);
+	ceph_encode_copy(p, name, len);
+	return 0;
+}
+
+/*
+ * Initiate protocol negotiation with monitor.  Include entity name
+ * and list supported protocols.
+ */
+int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
+{
+	struct ceph_mon_request_header *monhdr = buf;
+	void *p = monhdr + 1, *end = buf + len, *lenp;
+	int i, num;
+	int ret;
+
+	mutex_lock(&ac->mutex);
+	dout("auth_build_hello\n");
+	monhdr->have_version = 0;
+	monhdr->session_mon = cpu_to_le16(-1);
+	monhdr->session_mon_tid = 0;
+
+	ceph_encode_32(&p, 0);  /* no protocol, yet */
+
+	lenp = p;
+	p += sizeof(u32);
+
+	ceph_decode_need(&p, end, 1 + sizeof(u32), bad);
+	ceph_encode_8(&p, 1);
+	num = ARRAY_SIZE(supported_protocols);
+	ceph_encode_32(&p, num);
+	ceph_decode_need(&p, end, num * sizeof(u32), bad);
+	for (i = 0; i < num; i++)
+		ceph_encode_32(&p, supported_protocols[i]);
+
+	ret = ceph_entity_name_encode(ac->name, &p, end);
+	if (ret < 0)
+		goto out;
+	ceph_decode_need(&p, end, sizeof(u64), bad);
+	ceph_encode_64(&p, ac->global_id);
+
+	ceph_encode_32(&lenp, p - lenp - sizeof(u32));
+	ret = p - buf;
+out:
+	mutex_unlock(&ac->mutex);
+	return ret;
+
+bad:
+	ret = -ERANGE;
+	goto out;
+}
+
+static int ceph_build_auth_request(struct ceph_auth_client *ac,
+				   void *msg_buf, size_t msg_len)
+{
+	struct ceph_mon_request_header *monhdr = msg_buf;
+	void *p = monhdr + 1;
+	void *end = msg_buf + msg_len;
+	int ret;
+
+	monhdr->have_version = 0;
+	monhdr->session_mon = cpu_to_le16(-1);
+	monhdr->session_mon_tid = 0;
+
+	ceph_encode_32(&p, ac->protocol);
+
+	ret = ac->ops->build_request(ac, p + sizeof(u32), end);
+	if (ret < 0) {
+		pr_err("error %d building auth method %s request\n", ret,
+		       ac->ops->name);
+		goto out;
+	}
+	dout(" built request %d bytes\n", ret);
+	ceph_encode_32(&p, ret);
+	ret = p + ret - msg_buf;
+out:
+	return ret;
+}
+
+/*
+ * Handle auth message from monitor.
+ */
+int ceph_handle_auth_reply(struct ceph_auth_client *ac,
+			   void *buf, size_t len,
+			   void *reply_buf, size_t reply_len)
+{
+	void *p = buf;
+	void *end = buf + len;
+	int protocol;
+	s32 result;
+	u64 global_id;
+	void *payload, *payload_end;
+	int payload_len;
+	char *result_msg;
+	int result_msg_len;
+	int ret = -EINVAL;
+
+	mutex_lock(&ac->mutex);
+	dout("handle_auth_reply %p %p\n", p, end);
+	ceph_decode_need(&p, end, sizeof(u32) * 3 + sizeof(u64), bad);
+	protocol = ceph_decode_32(&p);
+	result = ceph_decode_32(&p);
+	global_id = ceph_decode_64(&p);
+	payload_len = ceph_decode_32(&p);
+	payload = p;
+	p += payload_len;
+	ceph_decode_need(&p, end, sizeof(u32), bad);
+	result_msg_len = ceph_decode_32(&p);
+	result_msg = p;
+	p += result_msg_len;
+	if (p != end)
+		goto bad;
+
+	dout(" result %d '%.*s' gid %llu len %d\n", result, result_msg_len,
+	     result_msg, global_id, payload_len);
+
+	payload_end = payload + payload_len;
+
+	if (global_id && ac->global_id != global_id) {
+		dout(" set global_id %lld -> %lld\n", ac->global_id, global_id);
+		ac->global_id = global_id;
+	}
+
+	if (ac->negotiating) {
+		/* server does not support our protocols? */
+		if (!protocol && result < 0) {
+			ret = result;
+			goto out;
+		}
+		/* set up (new) protocol handler? */
+		if (ac->protocol && ac->protocol != protocol) {
+			ac->ops->destroy(ac);
+			ac->protocol = 0;
+			ac->ops = NULL;
+		}
+		if (ac->protocol != protocol) {
+			ret = ceph_auth_init_protocol(ac, protocol);
+			if (ret) {
+				pr_err("error %d on auth protocol %d init\n",
+				       ret, protocol);
+				goto out;
+			}
+		}
+
+		ac->negotiating = false;
+	}
+
+	ret = ac->ops->handle_reply(ac, result, payload, payload_end);
+	if (ret == -EAGAIN) {
+		ret = ceph_build_auth_request(ac, reply_buf, reply_len);
+	} else if (ret) {
+		pr_err("auth method '%s' error %d\n", ac->ops->name, ret);
+	}
+
+out:
+	mutex_unlock(&ac->mutex);
+	return ret;
+
+bad:
+	pr_err("failed to decode auth msg\n");
+	ret = -EINVAL;
+	goto out;
+}
+
+int ceph_build_auth(struct ceph_auth_client *ac,
+		    void *msg_buf, size_t msg_len)
+{
+	int ret = 0;
+
+	mutex_lock(&ac->mutex);
+	if (!ac->protocol)
+		ret = ceph_auth_build_hello(ac, msg_buf, msg_len);
+	else if (ac->ops->should_authenticate(ac))
+		ret = ceph_build_auth_request(ac, msg_buf, msg_len);
+	mutex_unlock(&ac->mutex);
+	return ret;
+}
+
+int ceph_auth_is_authenticated(struct ceph_auth_client *ac)
+{
+	int ret = 0;
+
+	mutex_lock(&ac->mutex);
+	if (ac->ops)
+		ret = ac->ops->is_authenticated(ac);
+	mutex_unlock(&ac->mutex);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_auth_is_authenticated);
+
+int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
+				int peer_type,
+				struct ceph_auth_handshake *auth)
+{
+	int ret = 0;
+
+	mutex_lock(&ac->mutex);
+	if (ac->ops && ac->ops->create_authorizer)
+		ret = ac->ops->create_authorizer(ac, peer_type, auth);
+	mutex_unlock(&ac->mutex);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_auth_create_authorizer);
+
+void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac,
+				  struct ceph_authorizer *a)
+{
+	mutex_lock(&ac->mutex);
+	if (ac->ops && ac->ops->destroy_authorizer)
+		ac->ops->destroy_authorizer(ac, a);
+	mutex_unlock(&ac->mutex);
+}
+EXPORT_SYMBOL(ceph_auth_destroy_authorizer);
+
+int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
+				int peer_type,
+				struct ceph_auth_handshake *a)
+{
+	int ret = 0;
+
+	mutex_lock(&ac->mutex);
+	if (ac->ops && ac->ops->update_authorizer)
+		ret = ac->ops->update_authorizer(ac, peer_type, a);
+	mutex_unlock(&ac->mutex);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_auth_update_authorizer);
+
+int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
+				      struct ceph_authorizer *a, size_t len)
+{
+	int ret = 0;
+
+	mutex_lock(&ac->mutex);
+	if (ac->ops && ac->ops->verify_authorizer_reply)
+		ret = ac->ops->verify_authorizer_reply(ac, a, len);
+	mutex_unlock(&ac->mutex);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_auth_verify_authorizer_reply);
+
+void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, int peer_type)
+{
+	mutex_lock(&ac->mutex);
+	if (ac->ops && ac->ops->invalidate_authorizer)
+		ac->ops->invalidate_authorizer(ac, peer_type);
+	mutex_unlock(&ac->mutex);
+}
+EXPORT_SYMBOL(ceph_auth_invalidate_authorizer);
diff --git a/libceph/auth_none.c b/libceph/auth_none.c
new file mode 100644
index 0000000..8c93fa8
--- /dev/null
+++ b/libceph/auth_none.c
@@ -0,0 +1,137 @@
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/decode.h>
+#include <linux/ceph/auth.h>
+
+#include "auth_none.h"
+
+static void reset(struct ceph_auth_client *ac)
+{
+	struct ceph_auth_none_info *xi = ac->private;
+
+	xi->starting = true;
+	xi->built_authorizer = false;
+}
+
+static void destroy(struct ceph_auth_client *ac)
+{
+	kfree(ac->private);
+	ac->private = NULL;
+}
+
+static int is_authenticated(struct ceph_auth_client *ac)
+{
+	struct ceph_auth_none_info *xi = ac->private;
+
+	return !xi->starting;
+}
+
+static int should_authenticate(struct ceph_auth_client *ac)
+{
+	struct ceph_auth_none_info *xi = ac->private;
+
+	return xi->starting;
+}
+
+static int build_request(struct ceph_auth_client *ac, void *buf, void *end)
+{
+	return 0;
+}
+
+/*
+ * the generic auth code decode the global_id, and we carry no actual
+ * authenticate state, so nothing happens here.
+ */
+static int handle_reply(struct ceph_auth_client *ac, int result,
+			void *buf, void *end)
+{
+	struct ceph_auth_none_info *xi = ac->private;
+
+	xi->starting = false;
+	return result;
+}
+
+/*
+ * build an 'authorizer' with our entity_name and global_id.  we can
+ * reuse a single static copy since it is identical for all services
+ * we connect to.
+ */
+static int ceph_auth_none_create_authorizer(
+	struct ceph_auth_client *ac, int peer_type,
+	struct ceph_auth_handshake *auth)
+{
+	struct ceph_auth_none_info *ai = ac->private;
+	struct ceph_none_authorizer *au = &ai->au;
+	void *p, *end;
+	int ret;
+
+	if (!ai->built_authorizer) {
+		p = au->buf;
+		end = p + sizeof(au->buf);
+		ceph_encode_8(&p, 1);
+		ret = ceph_entity_name_encode(ac->name, &p, end - 8);
+		if (ret < 0)
+			goto bad;
+		ceph_decode_need(&p, end, sizeof(u64), bad2);
+		ceph_encode_64(&p, ac->global_id);
+		au->buf_len = p - (void *)au->buf;
+		ai->built_authorizer = true;
+		dout("built authorizer len %d\n", au->buf_len);
+	}
+
+	auth->authorizer = (struct ceph_authorizer *) au;
+	auth->authorizer_buf = au->buf;
+	auth->authorizer_buf_len = au->buf_len;
+	auth->authorizer_reply_buf = au->reply_buf;
+	auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
+
+	return 0;
+
+bad2:
+	ret = -ERANGE;
+bad:
+	return ret;
+}
+
+static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac,
+				      struct ceph_authorizer *a)
+{
+	/* nothing to do */
+}
+
+static const struct ceph_auth_client_ops ceph_auth_none_ops = {
+	.name = "none",
+	.reset = reset,
+	.destroy = destroy,
+	.is_authenticated = is_authenticated,
+	.should_authenticate = should_authenticate,
+	.build_request = build_request,
+	.handle_reply = handle_reply,
+	.create_authorizer = ceph_auth_none_create_authorizer,
+	.destroy_authorizer = ceph_auth_none_destroy_authorizer,
+};
+
+int ceph_auth_none_init(struct ceph_auth_client *ac)
+{
+	struct ceph_auth_none_info *xi;
+
+	dout("ceph_auth_none_init %p\n", ac);
+	xi = kzalloc(sizeof(*xi), GFP_NOFS);
+	if (!xi)
+		return -ENOMEM;
+
+	xi->starting = true;
+	xi->built_authorizer = false;
+
+	ac->protocol = CEPH_AUTH_NONE;
+	ac->private = xi;
+	ac->ops = &ceph_auth_none_ops;
+	return 0;
+}
+
diff --git a/libceph/auth_none.h b/libceph/auth_none.h
new file mode 100644
index 0000000..059a3ce
--- /dev/null
+++ b/libceph/auth_none.h
@@ -0,0 +1,29 @@
+#ifndef _FS_CEPH_AUTH_NONE_H
+#define _FS_CEPH_AUTH_NONE_H
+
+#include <linux/slab.h>
+#include <linux/ceph/auth.h>
+
+/*
+ * null security mode.
+ *
+ * we use a single static authorizer that simply encodes our entity name
+ * and global id.
+ */
+
+struct ceph_none_authorizer {
+	char buf[128];
+	int buf_len;
+	char reply_buf[0];
+};
+
+struct ceph_auth_none_info {
+	bool starting;
+	bool built_authorizer;
+	struct ceph_none_authorizer au;   /* we only need one; it's static */
+};
+
+int ceph_auth_none_init(struct ceph_auth_client *ac);
+
+#endif
+
diff --git a/libceph/auth_x.c b/libceph/auth_x.c
new file mode 100644
index 0000000..96238ba
--- /dev/null
+++ b/libceph/auth_x.c
@@ -0,0 +1,711 @@
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/decode.h>
+#include <linux/ceph/auth.h>
+
+#include "crypto.h"
+#include "auth_x.h"
+#include "auth_x_protocol.h"
+
+#define TEMP_TICKET_BUF_LEN	256
+
+static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
+
+static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
+{
+	struct ceph_x_info *xi = ac->private;
+	int need;
+
+	ceph_x_validate_tickets(ac, &need);
+	dout("ceph_x_is_authenticated want=%d need=%d have=%d\n",
+	     ac->want_keys, need, xi->have_keys);
+	return (ac->want_keys & xi->have_keys) == ac->want_keys;
+}
+
+static int ceph_x_should_authenticate(struct ceph_auth_client *ac)
+{
+	struct ceph_x_info *xi = ac->private;
+	int need;
+
+	ceph_x_validate_tickets(ac, &need);
+	dout("ceph_x_should_authenticate want=%d need=%d have=%d\n",
+	     ac->want_keys, need, xi->have_keys);
+	return need != 0;
+}
+
+static int ceph_x_encrypt_buflen(int ilen)
+{
+	return sizeof(struct ceph_x_encrypt_header) + ilen + 16 +
+		sizeof(u32);
+}
+
+static int ceph_x_encrypt(struct ceph_crypto_key *secret,
+			  void *ibuf, int ilen, void *obuf, size_t olen)
+{
+	struct ceph_x_encrypt_header head = {
+		.struct_v = 1,
+		.magic = cpu_to_le64(CEPHX_ENC_MAGIC)
+	};
+	size_t len = olen - sizeof(u32);
+	int ret;
+
+	ret = ceph_encrypt2(secret, obuf + sizeof(u32), &len,
+			    &head, sizeof(head), ibuf, ilen);
+	if (ret)
+		return ret;
+	ceph_encode_32(&obuf, len);
+	return len + sizeof(u32);
+}
+
+static int ceph_x_decrypt(struct ceph_crypto_key *secret,
+			  void **p, void *end, void *obuf, size_t olen)
+{
+	struct ceph_x_encrypt_header head;
+	size_t head_len = sizeof(head);
+	int len, ret;
+
+	len = ceph_decode_32(p);
+	if (*p + len > end)
+		return -EINVAL;
+
+	dout("ceph_x_decrypt len %d\n", len);
+	ret = ceph_decrypt2(secret, &head, &head_len, obuf, &olen,
+			    *p, len);
+	if (ret)
+		return ret;
+	if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC)
+		return -EPERM;
+	*p += len;
+	return olen;
+}
+
+/*
+ * get existing (or insert new) ticket handler
+ */
+static struct ceph_x_ticket_handler *
+get_ticket_handler(struct ceph_auth_client *ac, int service)
+{
+	struct ceph_x_ticket_handler *th;
+	struct ceph_x_info *xi = ac->private;
+	struct rb_node *parent = NULL, **p = &xi->ticket_handlers.rb_node;
+
+	while (*p) {
+		parent = *p;
+		th = rb_entry(parent, struct ceph_x_ticket_handler, node);
+		if (service < th->service)
+			p = &(*p)->rb_left;
+		else if (service > th->service)
+			p = &(*p)->rb_right;
+		else
+			return th;
+	}
+
+	/* add it */
+	th = kzalloc(sizeof(*th), GFP_NOFS);
+	if (!th)
+		return ERR_PTR(-ENOMEM);
+	th->service = service;
+	rb_link_node(&th->node, parent, p);
+	rb_insert_color(&th->node, &xi->ticket_handlers);
+	return th;
+}
+
+static void remove_ticket_handler(struct ceph_auth_client *ac,
+				  struct ceph_x_ticket_handler *th)
+{
+	struct ceph_x_info *xi = ac->private;
+
+	dout("remove_ticket_handler %p %d\n", th, th->service);
+	rb_erase(&th->node, &xi->ticket_handlers);
+	ceph_crypto_key_destroy(&th->session_key);
+	if (th->ticket_blob)
+		ceph_buffer_put(th->ticket_blob);
+	kfree(th);
+}
+
+static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
+				    struct ceph_crypto_key *secret,
+				    void *buf, void *end)
+{
+	struct ceph_x_info *xi = ac->private;
+	int num;
+	void *p = buf;
+	int ret;
+	char *dbuf;
+	char *ticket_buf;
+	u8 reply_struct_v;
+
+	dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
+	if (!dbuf)
+		return -ENOMEM;
+
+	ret = -ENOMEM;
+	ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
+	if (!ticket_buf)
+		goto out_dbuf;
+
+	ceph_decode_need(&p, end, 1 + sizeof(u32), bad);
+	reply_struct_v = ceph_decode_8(&p);
+	if (reply_struct_v != 1)
+		goto bad;
+	num = ceph_decode_32(&p);
+	dout("%d tickets\n", num);
+	while (num--) {
+		int type;
+		u8 tkt_struct_v, blob_struct_v;
+		struct ceph_x_ticket_handler *th;
+		void *dp, *dend;
+		int dlen;
+		char is_enc;
+		struct timespec validity;
+		struct ceph_crypto_key old_key;
+		void *tp, *tpend;
+		struct ceph_timespec new_validity;
+		struct ceph_crypto_key new_session_key;
+		struct ceph_buffer *new_ticket_blob;
+		unsigned long new_expires, new_renew_after;
+		u64 new_secret_id;
+
+		ceph_decode_need(&p, end, sizeof(u32) + 1, bad);
+
+		type = ceph_decode_32(&p);
+		dout(" ticket type %d %s\n", type, ceph_entity_type_name(type));
+
+		tkt_struct_v = ceph_decode_8(&p);
+		if (tkt_struct_v != 1)
+			goto bad;
+
+		th = get_ticket_handler(ac, type);
+		if (IS_ERR(th)) {
+			ret = PTR_ERR(th);
+			goto out;
+		}
+
+		/* blob for me */
+		dlen = ceph_x_decrypt(secret, &p, end, dbuf,
+				      TEMP_TICKET_BUF_LEN);
+		if (dlen <= 0) {
+			ret = dlen;
+			goto out;
+		}
+		dout(" decrypted %d bytes\n", dlen);
+		dend = dbuf + dlen;
+		dp = dbuf;
+
+		tkt_struct_v = ceph_decode_8(&dp);
+		if (tkt_struct_v != 1)
+			goto bad;
+
+		memcpy(&old_key, &th->session_key, sizeof(old_key));
+		ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
+		if (ret)
+			goto out;
+
+		ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
+		ceph_decode_timespec(&validity, &new_validity);
+		new_expires = get_seconds() + validity.tv_sec;
+		new_renew_after = new_expires - (validity.tv_sec / 4);
+		dout(" expires=%lu renew_after=%lu\n", new_expires,
+		     new_renew_after);
+
+		/* ticket blob for service */
+		ceph_decode_8_safe(&p, end, is_enc, bad);
+		tp = ticket_buf;
+		if (is_enc) {
+			/* encrypted */
+			dout(" encrypted ticket\n");
+			dlen = ceph_x_decrypt(&old_key, &p, end, ticket_buf,
+					      TEMP_TICKET_BUF_LEN);
+			if (dlen < 0) {
+				ret = dlen;
+				goto out;
+			}
+			dlen = ceph_decode_32(&tp);
+		} else {
+			/* unencrypted */
+			ceph_decode_32_safe(&p, end, dlen, bad);
+			ceph_decode_need(&p, end, dlen, bad);
+			ceph_decode_copy(&p, ticket_buf, dlen);
+		}
+		tpend = tp + dlen;
+		dout(" ticket blob is %d bytes\n", dlen);
+		ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
+		blob_struct_v = ceph_decode_8(&tp);
+		new_secret_id = ceph_decode_64(&tp);
+		ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
+		if (ret)
+			goto out;
+
+		/* all is well, update our ticket */
+		ceph_crypto_key_destroy(&th->session_key);
+		if (th->ticket_blob)
+			ceph_buffer_put(th->ticket_blob);
+		th->session_key = new_session_key;
+		th->ticket_blob = new_ticket_blob;
+		th->validity = new_validity;
+		th->secret_id = new_secret_id;
+		th->expires = new_expires;
+		th->renew_after = new_renew_after;
+		dout(" got ticket service %d (%s) secret_id %lld len %d\n",
+		     type, ceph_entity_type_name(type), th->secret_id,
+		     (int)th->ticket_blob->vec.iov_len);
+		xi->have_keys |= th->service;
+	}
+
+	ret = 0;
+out:
+	kfree(ticket_buf);
+out_dbuf:
+	kfree(dbuf);
+	return ret;
+
+bad:
+	ret = -EINVAL;
+	goto out;
+}
+
+static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
+				   struct ceph_x_ticket_handler *th,
+				   struct ceph_x_authorizer *au)
+{
+	int maxlen;
+	struct ceph_x_authorize_a *msg_a;
+	struct ceph_x_authorize_b msg_b;
+	void *p, *end;
+	int ret;
+	int ticket_blob_len =
+		(th->ticket_blob ? th->ticket_blob->vec.iov_len : 0);
+
+	dout("build_authorizer for %s %p\n",
+	     ceph_entity_type_name(th->service), au);
+
+	maxlen = sizeof(*msg_a) + sizeof(msg_b) +
+		ceph_x_encrypt_buflen(ticket_blob_len);
+	dout("  need len %d\n", maxlen);
+	if (au->buf && au->buf->alloc_len < maxlen) {
+		ceph_buffer_put(au->buf);
+		au->buf = NULL;
+	}
+	if (!au->buf) {
+		au->buf = ceph_buffer_new(maxlen, GFP_NOFS);
+		if (!au->buf)
+			return -ENOMEM;
+	}
+	au->service = th->service;
+	au->secret_id = th->secret_id;
+
+	msg_a = au->buf->vec.iov_base;
+	msg_a->struct_v = 1;
+	msg_a->global_id = cpu_to_le64(ac->global_id);
+	msg_a->service_id = cpu_to_le32(th->service);
+	msg_a->ticket_blob.struct_v = 1;
+	msg_a->ticket_blob.secret_id = cpu_to_le64(th->secret_id);
+	msg_a->ticket_blob.blob_len = cpu_to_le32(ticket_blob_len);
+	if (ticket_blob_len) {
+		memcpy(msg_a->ticket_blob.blob, th->ticket_blob->vec.iov_base,
+		       th->ticket_blob->vec.iov_len);
+	}
+	dout(" th %p secret_id %lld %lld\n", th, th->secret_id,
+	     le64_to_cpu(msg_a->ticket_blob.secret_id));
+
+	p = msg_a + 1;
+	p += ticket_blob_len;
+	end = au->buf->vec.iov_base + au->buf->vec.iov_len;
+
+	get_random_bytes(&au->nonce, sizeof(au->nonce));
+	msg_b.struct_v = 1;
+	msg_b.nonce = cpu_to_le64(au->nonce);
+	ret = ceph_x_encrypt(&th->session_key, &msg_b, sizeof(msg_b),
+			     p, end - p);
+	if (ret < 0)
+		goto out_buf;
+	p += ret;
+	au->buf->vec.iov_len = p - au->buf->vec.iov_base;
+	dout(" built authorizer nonce %llx len %d\n", au->nonce,
+	     (int)au->buf->vec.iov_len);
+	BUG_ON(au->buf->vec.iov_len > maxlen);
+	return 0;
+
+out_buf:
+	ceph_buffer_put(au->buf);
+	au->buf = NULL;
+	return ret;
+}
+
+static int ceph_x_encode_ticket(struct ceph_x_ticket_handler *th,
+				void **p, void *end)
+{
+	ceph_decode_need(p, end, 1 + sizeof(u64), bad);
+	ceph_encode_8(p, 1);
+	ceph_encode_64(p, th->secret_id);
+	if (th->ticket_blob) {
+		const char *buf = th->ticket_blob->vec.iov_base;
+		u32 len = th->ticket_blob->vec.iov_len;
+
+		ceph_encode_32_safe(p, end, len, bad);
+		ceph_encode_copy_safe(p, end, buf, len, bad);
+	} else {
+		ceph_encode_32_safe(p, end, 0, bad);
+	}
+
+	return 0;
+bad:
+	return -ERANGE;
+}
+
+static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
+{
+	int want = ac->want_keys;
+	struct ceph_x_info *xi = ac->private;
+	int service;
+
+	*pneed = ac->want_keys & ~(xi->have_keys);
+
+	for (service = 1; service <= want; service <<= 1) {
+		struct ceph_x_ticket_handler *th;
+
+		if (!(ac->want_keys & service))
+			continue;
+
+		if (*pneed & service)
+			continue;
+
+		th = get_ticket_handler(ac, service);
+
+		if (IS_ERR(th)) {
+			*pneed |= service;
+			continue;
+		}
+
+		if (get_seconds() >= th->renew_after)
+			*pneed |= service;
+		if (get_seconds() >= th->expires)
+			xi->have_keys &= ~service;
+	}
+}
+
+
+static int ceph_x_build_request(struct ceph_auth_client *ac,
+				void *buf, void *end)
+{
+	struct ceph_x_info *xi = ac->private;
+	int need;
+	struct ceph_x_request_header *head = buf;
+	int ret;
+	struct ceph_x_ticket_handler *th =
+		get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
+
+	if (IS_ERR(th))
+		return PTR_ERR(th);
+
+	ceph_x_validate_tickets(ac, &need);
+
+	dout("build_request want %x have %x need %x\n",
+	     ac->want_keys, xi->have_keys, need);
+
+	if (need & CEPH_ENTITY_TYPE_AUTH) {
+		struct ceph_x_authenticate *auth = (void *)(head + 1);
+		void *p = auth + 1;
+		struct ceph_x_challenge_blob tmp;
+		char tmp_enc[40];
+		u64 *u;
+
+		if (p > end)
+			return -ERANGE;
+
+		dout(" get_auth_session_key\n");
+		head->op = cpu_to_le16(CEPHX_GET_AUTH_SESSION_KEY);
+
+		/* encrypt and hash */
+		get_random_bytes(&auth->client_challenge, sizeof(u64));
+		tmp.client_challenge = auth->client_challenge;
+		tmp.server_challenge = cpu_to_le64(xi->server_challenge);
+		ret = ceph_x_encrypt(&xi->secret, &tmp, sizeof(tmp),
+				     tmp_enc, sizeof(tmp_enc));
+		if (ret < 0)
+			return ret;
+
+		auth->struct_v = 1;
+		auth->key = 0;
+		for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++)
+			auth->key ^= *(__le64 *)u;
+		dout(" server_challenge %llx client_challenge %llx key %llx\n",
+		     xi->server_challenge, le64_to_cpu(auth->client_challenge),
+		     le64_to_cpu(auth->key));
+
+		/* now encode the old ticket if exists */
+		ret = ceph_x_encode_ticket(th, &p, end);
+		if (ret < 0)
+			return ret;
+
+		return p - buf;
+	}
+
+	if (need) {
+		void *p = head + 1;
+		struct ceph_x_service_ticket_request *req;
+
+		if (p > end)
+			return -ERANGE;
+		head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY);
+
+		ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer);
+		if (ret)
+			return ret;
+		ceph_encode_copy(&p, xi->auth_authorizer.buf->vec.iov_base,
+				 xi->auth_authorizer.buf->vec.iov_len);
+
+		req = p;
+		req->keys = cpu_to_le32(need);
+		p += sizeof(*req);
+		return p - buf;
+	}
+
+	return 0;
+}
+
+static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
+			       void *buf, void *end)
+{
+	struct ceph_x_info *xi = ac->private;
+	struct ceph_x_reply_header *head = buf;
+	struct ceph_x_ticket_handler *th;
+	int len = end - buf;
+	int op;
+	int ret;
+
+	if (result)
+		return result;  /* XXX hmm? */
+
+	if (xi->starting) {
+		/* it's a hello */
+		struct ceph_x_server_challenge *sc = buf;
+
+		if (len != sizeof(*sc))
+			return -EINVAL;
+		xi->server_challenge = le64_to_cpu(sc->server_challenge);
+		dout("handle_reply got server challenge %llx\n",
+		     xi->server_challenge);
+		xi->starting = false;
+		xi->have_keys &= ~CEPH_ENTITY_TYPE_AUTH;
+		return -EAGAIN;
+	}
+
+	op = le16_to_cpu(head->op);
+	result = le32_to_cpu(head->result);
+	dout("handle_reply op %d result %d\n", op, result);
+	switch (op) {
+	case CEPHX_GET_AUTH_SESSION_KEY:
+		/* verify auth key */
+		ret = ceph_x_proc_ticket_reply(ac, &xi->secret,
+					       buf + sizeof(*head), end);
+		break;
+
+	case CEPHX_GET_PRINCIPAL_SESSION_KEY:
+		th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
+		if (IS_ERR(th))
+			return PTR_ERR(th);
+		ret = ceph_x_proc_ticket_reply(ac, &th->session_key,
+					       buf + sizeof(*head), end);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+	if (ret)
+		return ret;
+	if (ac->want_keys == xi->have_keys)
+		return 0;
+	return -EAGAIN;
+}
+
+static int ceph_x_create_authorizer(
+	struct ceph_auth_client *ac, int peer_type,
+	struct ceph_auth_handshake *auth)
+{
+	struct ceph_x_authorizer *au;
+	struct ceph_x_ticket_handler *th;
+	int ret;
+
+	th = get_ticket_handler(ac, peer_type);
+	if (IS_ERR(th))
+		return PTR_ERR(th);
+
+	au = kzalloc(sizeof(*au), GFP_NOFS);
+	if (!au)
+		return -ENOMEM;
+
+	ret = ceph_x_build_authorizer(ac, th, au);
+	if (ret) {
+		kfree(au);
+		return ret;
+	}
+
+	auth->authorizer = (struct ceph_authorizer *) au;
+	auth->authorizer_buf = au->buf->vec.iov_base;
+	auth->authorizer_buf_len = au->buf->vec.iov_len;
+	auth->authorizer_reply_buf = au->reply_buf;
+	auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
+
+	return 0;
+}
+
+static int ceph_x_update_authorizer(
+	struct ceph_auth_client *ac, int peer_type,
+	struct ceph_auth_handshake *auth)
+{
+	struct ceph_x_authorizer *au;
+	struct ceph_x_ticket_handler *th;
+
+	th = get_ticket_handler(ac, peer_type);
+	if (IS_ERR(th))
+		return PTR_ERR(th);
+
+	au = (struct ceph_x_authorizer *)auth->authorizer;
+	if (au->secret_id < th->secret_id) {
+		dout("ceph_x_update_authorizer service %u secret %llu < %llu\n",
+		     au->service, au->secret_id, th->secret_id);
+		return ceph_x_build_authorizer(ac, th, au);
+	}
+	return 0;
+}
+
+static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
+					  struct ceph_authorizer *a, size_t len)
+{
+	struct ceph_x_authorizer *au = (void *)a;
+	struct ceph_x_ticket_handler *th;
+	int ret = 0;
+	struct ceph_x_authorize_reply reply;
+	void *p = au->reply_buf;
+	void *end = p + sizeof(au->reply_buf);
+
+	th = get_ticket_handler(ac, au->service);
+	if (IS_ERR(th))
+		return PTR_ERR(th);
+	ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply));
+	if (ret < 0)
+		return ret;
+	if (ret != sizeof(reply))
+		return -EPERM;
+
+	if (au->nonce + 1 != le64_to_cpu(reply.nonce_plus_one))
+		ret = -EPERM;
+	else
+		ret = 0;
+	dout("verify_authorizer_reply nonce %llx got %llx ret %d\n",
+	     au->nonce, le64_to_cpu(reply.nonce_plus_one), ret);
+	return ret;
+}
+
+static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac,
+				      struct ceph_authorizer *a)
+{
+	struct ceph_x_authorizer *au = (void *)a;
+
+	ceph_buffer_put(au->buf);
+	kfree(au);
+}
+
+
+static void ceph_x_reset(struct ceph_auth_client *ac)
+{
+	struct ceph_x_info *xi = ac->private;
+
+	dout("reset\n");
+	xi->starting = true;
+	xi->server_challenge = 0;
+}
+
+static void ceph_x_destroy(struct ceph_auth_client *ac)
+{
+	struct ceph_x_info *xi = ac->private;
+	struct rb_node *p;
+
+	dout("ceph_x_destroy %p\n", ac);
+	ceph_crypto_key_destroy(&xi->secret);
+
+	while ((p = rb_first(&xi->ticket_handlers)) != NULL) {
+		struct ceph_x_ticket_handler *th =
+			rb_entry(p, struct ceph_x_ticket_handler, node);
+		remove_ticket_handler(ac, th);
+	}
+
+	if (xi->auth_authorizer.buf)
+		ceph_buffer_put(xi->auth_authorizer.buf);
+
+	kfree(ac->private);
+	ac->private = NULL;
+}
+
+static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
+				   int peer_type)
+{
+	struct ceph_x_ticket_handler *th;
+
+	th = get_ticket_handler(ac, peer_type);
+	if (!IS_ERR(th))
+		memset(&th->validity, 0, sizeof(th->validity));
+}
+
+
+static const struct ceph_auth_client_ops ceph_x_ops = {
+	.name = "x",
+	.is_authenticated = ceph_x_is_authenticated,
+	.should_authenticate = ceph_x_should_authenticate,
+	.build_request = ceph_x_build_request,
+	.handle_reply = ceph_x_handle_reply,
+	.create_authorizer = ceph_x_create_authorizer,
+	.update_authorizer = ceph_x_update_authorizer,
+	.verify_authorizer_reply = ceph_x_verify_authorizer_reply,
+	.destroy_authorizer = ceph_x_destroy_authorizer,
+	.invalidate_authorizer = ceph_x_invalidate_authorizer,
+	.reset =  ceph_x_reset,
+	.destroy = ceph_x_destroy,
+};
+
+
+int ceph_x_init(struct ceph_auth_client *ac)
+{
+	struct ceph_x_info *xi;
+	int ret;
+
+	dout("ceph_x_init %p\n", ac);
+	ret = -ENOMEM;
+	xi = kzalloc(sizeof(*xi), GFP_NOFS);
+	if (!xi)
+		goto out;
+
+	ret = -EINVAL;
+	if (!ac->key) {
+		pr_err("no secret set (for auth_x protocol)\n");
+		goto out_nomem;
+	}
+
+	ret = ceph_crypto_key_clone(&xi->secret, ac->key);
+	if (ret < 0) {
+		pr_err("cannot clone key: %d\n", ret);
+		goto out_nomem;
+	}
+
+	xi->starting = true;
+	xi->ticket_handlers = RB_ROOT;
+
+	ac->protocol = CEPH_AUTH_CEPHX;
+	ac->private = xi;
+	ac->ops = &ceph_x_ops;
+	return 0;
+
+out_nomem:
+	kfree(xi);
+out:
+	return ret;
+}
+
+
diff --git a/libceph/auth_x.h b/libceph/auth_x.h
new file mode 100644
index 0000000..65ee720
--- /dev/null
+++ b/libceph/auth_x.h
@@ -0,0 +1,51 @@
+#ifndef _FS_CEPH_AUTH_X_H
+#define _FS_CEPH_AUTH_X_H
+
+#include <linux/rbtree.h>
+
+#include <linux/ceph/auth.h>
+
+#include "crypto.h"
+#include "auth_x_protocol.h"
+
+/*
+ * Handle ticket for a single service.
+ */
+struct ceph_x_ticket_handler {
+	struct rb_node node;
+	unsigned int service;
+
+	struct ceph_crypto_key session_key;
+	struct ceph_timespec validity;
+
+	u64 secret_id;
+	struct ceph_buffer *ticket_blob;
+
+	unsigned long renew_after, expires;
+};
+
+
+struct ceph_x_authorizer {
+	struct ceph_buffer *buf;
+	unsigned int service;
+	u64 nonce;
+	u64 secret_id;
+	char reply_buf[128];  /* big enough for encrypted blob */
+};
+
+struct ceph_x_info {
+	struct ceph_crypto_key secret;
+
+	bool starting;
+	u64 server_challenge;
+
+	unsigned int have_keys;
+	struct rb_root ticket_handlers;
+
+	struct ceph_x_authorizer auth_authorizer;
+};
+
+int ceph_x_init(struct ceph_auth_client *ac);
+
+#endif
+
diff --git a/libceph/auth_x_protocol.h b/libceph/auth_x_protocol.h
new file mode 100644
index 0000000..671d305
--- /dev/null
+++ b/libceph/auth_x_protocol.h
@@ -0,0 +1,90 @@
+#ifndef __FS_CEPH_AUTH_X_PROTOCOL
+#define __FS_CEPH_AUTH_X_PROTOCOL
+
+#define CEPHX_GET_AUTH_SESSION_KEY      0x0100
+#define CEPHX_GET_PRINCIPAL_SESSION_KEY 0x0200
+#define CEPHX_GET_ROTATING_KEY          0x0400
+
+/* common bits */
+struct ceph_x_ticket_blob {
+	__u8 struct_v;
+	__le64 secret_id;
+	__le32 blob_len;
+	char blob[];
+} __attribute__ ((packed));
+
+
+/* common request/reply headers */
+struct ceph_x_request_header {
+	__le16 op;
+} __attribute__ ((packed));
+
+struct ceph_x_reply_header {
+	__le16 op;
+	__le32 result;
+} __attribute__ ((packed));
+
+
+/* authenticate handshake */
+
+/* initial hello (no reply header) */
+struct ceph_x_server_challenge {
+	__u8 struct_v;
+	__le64 server_challenge;
+} __attribute__ ((packed));
+
+struct ceph_x_authenticate {
+	__u8 struct_v;
+	__le64 client_challenge;
+	__le64 key;
+	/* ticket blob */
+} __attribute__ ((packed));
+
+struct ceph_x_service_ticket_request {
+	__u8 struct_v;
+	__le32 keys;
+} __attribute__ ((packed));
+
+struct ceph_x_challenge_blob {
+	__le64 server_challenge;
+	__le64 client_challenge;
+} __attribute__ ((packed));
+
+
+
+/* authorize handshake */
+
+/*
+ * The authorizer consists of two pieces:
+ *  a - service id, ticket blob
+ *  b - encrypted with session key
+ */
+struct ceph_x_authorize_a {
+	__u8 struct_v;
+	__le64 global_id;
+	__le32 service_id;
+	struct ceph_x_ticket_blob ticket_blob;
+} __attribute__ ((packed));
+
+struct ceph_x_authorize_b {
+	__u8 struct_v;
+	__le64 nonce;
+} __attribute__ ((packed));
+
+struct ceph_x_authorize_reply {
+	__u8 struct_v;
+	__le64 nonce_plus_one;
+} __attribute__ ((packed));
+
+
+/*
+ * encyption bundle
+ */
+#define CEPHX_ENC_MAGIC 0xff009cad8826aa55ull
+
+struct ceph_x_encrypt_header {
+	__u8 struct_v;
+	__le64 magic;
+} __attribute__ ((packed));
+
+#endif
diff --git a/libceph/buffer.c b/libceph/buffer.c
new file mode 100644
index 0000000..621b5f6
--- /dev/null
+++ b/libceph/buffer.c
@@ -0,0 +1,58 @@
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/buffer.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/libceph.h> /* for ceph_kv{malloc,free} */
+
+struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
+{
+	struct ceph_buffer *b;
+
+	b = kmalloc(sizeof(*b), gfp);
+	if (!b)
+		return NULL;
+
+	b->vec.iov_base = ceph_kvmalloc(len, gfp);
+	if (!b->vec.iov_base) {
+		kfree(b);
+		return NULL;
+	}
+
+	kref_init(&b->kref);
+	b->alloc_len = len;
+	b->vec.iov_len = len;
+	dout("buffer_new %p\n", b);
+	return b;
+}
+EXPORT_SYMBOL(ceph_buffer_new);
+
+void ceph_buffer_release(struct kref *kref)
+{
+	struct ceph_buffer *b = container_of(kref, struct ceph_buffer, kref);
+
+	dout("buffer_release %p\n", b);
+	ceph_kvfree(b->vec.iov_base);
+	kfree(b);
+}
+EXPORT_SYMBOL(ceph_buffer_release);
+
+int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end)
+{
+	size_t len;
+
+	ceph_decode_need(p, end, sizeof(u32), bad);
+	len = ceph_decode_32(p);
+	dout("decode_buffer len %d\n", (int)len);
+	ceph_decode_need(p, end, len, bad);
+	*b = ceph_buffer_new(len, GFP_NOFS);
+	if (!*b)
+		return -ENOMEM;
+	ceph_decode_copy(p, (*b)->vec.iov_base, len);
+	return 0;
+bad:
+	return -EINVAL;
+}
diff --git a/libceph/ceph_common.c b/libceph/ceph_common.c
new file mode 100644
index 0000000..67d7721
--- /dev/null
+++ b/libceph/ceph_common.c
@@ -0,0 +1,664 @@
+
+#include <linux/ceph/ceph_debug.h>
+#include <linux/backing-dev.h>
+#include <linux/ctype.h>
+#include <linux/fs.h>
+#include <linux/inet.h>
+#include <linux/in6.h>
+#include <linux/key.h>
+#include <keys/ceph-type.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/parser.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/statfs.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
+
+
+#include <linux/ceph/ceph_features.h>
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/debugfs.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/auth.h>
+#include "crypto.h"
+
+
+/*
+ * Module compatibility interface.  For now it doesn't do anything,
+ * but its existence signals a certain level of functionality.
+ *
+ * The data buffer is used to pass information both to and from
+ * libceph.  The return value indicates whether libceph determines
+ * it is compatible with the caller (from another kernel module),
+ * given the provided data.
+ *
+ * The data pointer can be null.
+ */
+bool libceph_compatible(void *data)
+{
+	return true;
+}
+EXPORT_SYMBOL(libceph_compatible);
+
+/*
+ * find filename portion of a path (/foo/bar/baz -> baz)
+ */
+const char *ceph_file_part(const char *s, int len)
+{
+	const char *e = s + len;
+
+	while (e != s && *(e-1) != '/')
+		e--;
+	return e;
+}
+EXPORT_SYMBOL(ceph_file_part);
+
+const char *ceph_msg_type_name(int type)
+{
+	switch (type) {
+	case CEPH_MSG_SHUTDOWN: return "shutdown";
+	case CEPH_MSG_PING: return "ping";
+	case CEPH_MSG_AUTH: return "auth";
+	case CEPH_MSG_AUTH_REPLY: return "auth_reply";
+	case CEPH_MSG_MON_MAP: return "mon_map";
+	case CEPH_MSG_MON_GET_MAP: return "mon_get_map";
+	case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe";
+	case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
+	case CEPH_MSG_STATFS: return "statfs";
+	case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
+	case CEPH_MSG_MDS_MAP: return "mds_map";
+	case CEPH_MSG_CLIENT_SESSION: return "client_session";
+	case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
+	case CEPH_MSG_CLIENT_REQUEST: return "client_request";
+	case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward";
+	case CEPH_MSG_CLIENT_REPLY: return "client_reply";
+	case CEPH_MSG_CLIENT_CAPS: return "client_caps";
+	case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
+	case CEPH_MSG_CLIENT_SNAP: return "client_snap";
+	case CEPH_MSG_CLIENT_LEASE: return "client_lease";
+	case CEPH_MSG_OSD_MAP: return "osd_map";
+	case CEPH_MSG_OSD_OP: return "osd_op";
+	case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
+	case CEPH_MSG_WATCH_NOTIFY: return "watch_notify";
+	default: return "unknown";
+	}
+}
+EXPORT_SYMBOL(ceph_msg_type_name);
+
+/*
+ * Initially learn our fsid, or verify an fsid matches.
+ */
+int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
+{
+	if (client->have_fsid) {
+		if (ceph_fsid_compare(&client->fsid, fsid)) {
+			pr_err("bad fsid, had %pU got %pU",
+			       &client->fsid, fsid);
+			return -1;
+		}
+	} else {
+		memcpy(&client->fsid, fsid, sizeof(*fsid));
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ceph_check_fsid);
+
+static int strcmp_null(const char *s1, const char *s2)
+{
+	if (!s1 && !s2)
+		return 0;
+	if (s1 && !s2)
+		return -1;
+	if (!s1 && s2)
+		return 1;
+	return strcmp(s1, s2);
+}
+
+int ceph_compare_options(struct ceph_options *new_opt,
+			 struct ceph_client *client)
+{
+	struct ceph_options *opt1 = new_opt;
+	struct ceph_options *opt2 = client->options;
+	int ofs = offsetof(struct ceph_options, mon_addr);
+	int i;
+	int ret;
+
+	ret = memcmp(opt1, opt2, ofs);
+	if (ret)
+		return ret;
+
+	ret = strcmp_null(opt1->name, opt2->name);
+	if (ret)
+		return ret;
+
+	if (opt1->key && !opt2->key)
+		return -1;
+	if (!opt1->key && opt2->key)
+		return 1;
+	if (opt1->key && opt2->key) {
+		if (opt1->key->type != opt2->key->type)
+			return -1;
+		if (opt1->key->created.tv_sec != opt2->key->created.tv_sec)
+			return -1;
+		if (opt1->key->created.tv_nsec != opt2->key->created.tv_nsec)
+			return -1;
+		if (opt1->key->len != opt2->key->len)
+			return -1;
+		if (opt1->key->key && !opt2->key->key)
+			return -1;
+		if (!opt1->key->key && opt2->key->key)
+			return 1;
+		if (opt1->key->key && opt2->key->key) {
+			ret = memcmp(opt1->key->key, opt2->key->key, opt1->key->len);
+			if (ret)
+				return ret;
+		}
+	}
+
+	/* any matching mon ip implies a match */
+	for (i = 0; i < opt1->num_mon; i++) {
+		if (ceph_monmap_contains(client->monc.monmap,
+				 &opt1->mon_addr[i]))
+			return 0;
+	}
+	return -1;
+}
+EXPORT_SYMBOL(ceph_compare_options);
+
+void *ceph_kvmalloc(size_t size, gfp_t flags)
+{
+	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
+		void *ptr = kmalloc(size, flags | __GFP_NOWARN);
+		if (ptr)
+			return ptr;
+	}
+
+	return __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
+}
+
+void ceph_kvfree(const void *ptr)
+{
+	if (is_vmalloc_addr(ptr))
+		vfree(ptr);
+	else
+		kfree(ptr);
+}
+
+
+static int parse_fsid(const char *str, struct ceph_fsid *fsid)
+{
+	int i = 0;
+	char tmp[3];
+	int err = -EINVAL;
+	int d;
+
+	dout("parse_fsid '%s'\n", str);
+	tmp[2] = 0;
+	while (*str && i < 16) {
+		if (ispunct(*str)) {
+			str++;
+			continue;
+		}
+		if (!isxdigit(str[0]) || !isxdigit(str[1]))
+			break;
+		tmp[0] = str[0];
+		tmp[1] = str[1];
+		if (sscanf(tmp, "%x", &d) < 1)
+			break;
+		fsid->fsid[i] = d & 0xff;
+		i++;
+		str += 2;
+	}
+
+	if (i == 16)
+		err = 0;
+	dout("parse_fsid ret %d got fsid %pU", err, fsid);
+	return err;
+}
+
+/*
+ * ceph options
+ */
+enum {
+	Opt_osdtimeout,
+	Opt_osdkeepalivetimeout,
+	Opt_mount_timeout,
+	Opt_osd_idle_ttl,
+	Opt_last_int,
+	/* int args above */
+	Opt_fsid,
+	Opt_name,
+	Opt_secret,
+	Opt_key,
+	Opt_ip,
+	Opt_last_string,
+	/* string args above */
+	Opt_share,
+	Opt_noshare,
+	Opt_crc,
+	Opt_nocrc,
+};
+
+static match_table_t opt_tokens = {
+	{Opt_osdtimeout, "osdtimeout=%d"},
+	{Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
+	{Opt_mount_timeout, "mount_timeout=%d"},
+	{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
+	/* int args above */
+	{Opt_fsid, "fsid=%s"},
+	{Opt_name, "name=%s"},
+	{Opt_secret, "secret=%s"},
+	{Opt_key, "key=%s"},
+	{Opt_ip, "ip=%s"},
+	/* string args above */
+	{Opt_share, "share"},
+	{Opt_noshare, "noshare"},
+	{Opt_crc, "crc"},
+	{Opt_nocrc, "nocrc"},
+	{-1, NULL}
+};
+
+void ceph_destroy_options(struct ceph_options *opt)
+{
+	dout("destroy_options %p\n", opt);
+	kfree(opt->name);
+	if (opt->key) {
+		ceph_crypto_key_destroy(opt->key);
+		kfree(opt->key);
+	}
+	kfree(opt->mon_addr);
+	kfree(opt);
+}
+EXPORT_SYMBOL(ceph_destroy_options);
+
+/* get secret from key store */
+static int get_secret(struct ceph_crypto_key *dst, const char *name) {
+	struct key *ukey;
+	int key_err;
+	int err = 0;
+	struct ceph_crypto_key *ckey;
+
+	ukey = request_key(&key_type_ceph, name, NULL);
+	if (!ukey || IS_ERR(ukey)) {
+		/* request_key errors don't map nicely to mount(2)
+		   errors; don't even try, but still printk */
+		key_err = PTR_ERR(ukey);
+		switch (key_err) {
+		case -ENOKEY:
+			pr_warning("ceph: Mount failed due to key not found: %s\n", name);
+			break;
+		case -EKEYEXPIRED:
+			pr_warning("ceph: Mount failed due to expired key: %s\n", name);
+			break;
+		case -EKEYREVOKED:
+			pr_warning("ceph: Mount failed due to revoked key: %s\n", name);
+			break;
+		default:
+			pr_warning("ceph: Mount failed due to unknown key error"
+			       " %d: %s\n", key_err, name);
+		}
+		err = -EPERM;
+		goto out;
+	}
+
+	ckey = ukey->payload.data;
+	err = ceph_crypto_key_clone(dst, ckey);
+	if (err)
+		goto out_key;
+	/* pass through, err is 0 */
+
+out_key:
+	key_put(ukey);
+out:
+	return err;
+}
+
+struct ceph_options *
+ceph_parse_options(char *options, const char *dev_name,
+			const char *dev_name_end,
+			int (*parse_extra_token)(char *c, void *private),
+			void *private)
+{
+	struct ceph_options *opt;
+	const char *c;
+	int err = -ENOMEM;
+	substring_t argstr[MAX_OPT_ARGS];
+
+	if (current->nsproxy->net_ns != &init_net)
+		return ERR_PTR(-EINVAL);
+
+	opt = kzalloc(sizeof(*opt), GFP_KERNEL);
+	if (!opt)
+		return ERR_PTR(-ENOMEM);
+	opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr),
+				GFP_KERNEL);
+	if (!opt->mon_addr)
+		goto out;
+
+	dout("parse_options %p options '%s' dev_name '%s'\n", opt, options,
+	     dev_name);
+
+	/* start with defaults */
+	opt->flags = CEPH_OPT_DEFAULT;
+	opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
+	opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
+	opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;   /* seconds */
+
+	/* get mon ip(s) */
+	/* ip1[:port1][,ip2[:port2]...] */
+	err = ceph_parse_ips(dev_name, dev_name_end, opt->mon_addr,
+			     CEPH_MAX_MON, &opt->num_mon);
+	if (err < 0)
+		goto out;
+
+	/* parse mount options */
+	while ((c = strsep(&options, ",")) != NULL) {
+		int token, intval, ret;
+		if (!*c)
+			continue;
+		err = -EINVAL;
+		token = match_token((char *)c, opt_tokens, argstr);
+		if (token < 0 && parse_extra_token) {
+			/* extra? */
+			err = parse_extra_token((char *)c, private);
+			if (err < 0) {
+				pr_err("bad option at '%s'\n", c);
+				goto out;
+			}
+			continue;
+		}
+		if (token < Opt_last_int) {
+			ret = match_int(&argstr[0], &intval);
+			if (ret < 0) {
+				pr_err("bad mount option arg (not int) "
+				       "at '%s'\n", c);
+				continue;
+			}
+			dout("got int token %d val %d\n", token, intval);
+		} else if (token > Opt_last_int && token < Opt_last_string) {
+			dout("got string token %d val %s\n", token,
+			     argstr[0].from);
+		} else {
+			dout("got token %d\n", token);
+		}
+		switch (token) {
+		case Opt_ip:
+			err = ceph_parse_ips(argstr[0].from,
+					     argstr[0].to,
+					     &opt->my_addr,
+					     1, NULL);
+			if (err < 0)
+				goto out;
+			opt->flags |= CEPH_OPT_MYIP;
+			break;
+
+		case Opt_fsid:
+			err = parse_fsid(argstr[0].from, &opt->fsid);
+			if (err == 0)
+				opt->flags |= CEPH_OPT_FSID;
+			break;
+		case Opt_name:
+			opt->name = kstrndup(argstr[0].from,
+					      argstr[0].to-argstr[0].from,
+					      GFP_KERNEL);
+			break;
+		case Opt_secret:
+		        opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL);
+			if (!opt->key) {
+				err = -ENOMEM;
+				goto out;
+			}
+			err = ceph_crypto_key_unarmor(opt->key, argstr[0].from);
+			if (err < 0)
+				goto out;
+			break;
+		case Opt_key:
+		        opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL);
+			if (!opt->key) {
+				err = -ENOMEM;
+				goto out;
+			}
+			err = get_secret(opt->key, argstr[0].from);
+			if (err < 0)
+				goto out;
+			break;
+
+			/* misc */
+		case Opt_osdtimeout:
+			pr_warning("ignoring deprecated osdtimeout option\n");
+			break;
+		case Opt_osdkeepalivetimeout:
+			opt->osd_keepalive_timeout = intval;
+			break;
+		case Opt_osd_idle_ttl:
+			opt->osd_idle_ttl = intval;
+			break;
+		case Opt_mount_timeout:
+			opt->mount_timeout = intval;
+			break;
+
+		case Opt_share:
+			opt->flags &= ~CEPH_OPT_NOSHARE;
+			break;
+		case Opt_noshare:
+			opt->flags |= CEPH_OPT_NOSHARE;
+			break;
+
+		case Opt_crc:
+			opt->flags &= ~CEPH_OPT_NOCRC;
+			break;
+		case Opt_nocrc:
+			opt->flags |= CEPH_OPT_NOCRC;
+			break;
+
+		default:
+			BUG_ON(token);
+		}
+	}
+
+	/* success */
+	return opt;
+
+out:
+	ceph_destroy_options(opt);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(ceph_parse_options);
+
+u64 ceph_client_id(struct ceph_client *client)
+{
+	return client->monc.auth->global_id;
+}
+EXPORT_SYMBOL(ceph_client_id);
+
+/*
+ * create a fresh client instance
+ */
+struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
+				       u64 supported_features,
+				       u64 required_features)
+{
+	struct ceph_client *client;
+	struct ceph_entity_addr *myaddr = NULL;
+	int err = -ENOMEM;
+
+	client = kzalloc(sizeof(*client), GFP_KERNEL);
+	if (client == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	client->private = private;
+	client->options = opt;
+
+	mutex_init(&client->mount_mutex);
+	init_waitqueue_head(&client->auth_wq);
+	client->auth_err = 0;
+
+	client->extra_mon_dispatch = NULL;
+	client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT |
+		supported_features;
+	client->required_features = CEPH_FEATURES_REQUIRED_DEFAULT |
+		required_features;
+
+	/* msgr */
+	if (ceph_test_opt(client, MYIP))
+		myaddr = &client->options->my_addr;
+	ceph_messenger_init(&client->msgr, myaddr,
+		client->supported_features,
+		client->required_features,
+		ceph_test_opt(client, NOCRC));
+
+	/* subsystems */
+	err = ceph_monc_init(&client->monc, client);
+	if (err < 0)
+		goto fail;
+	err = ceph_osdc_init(&client->osdc, client);
+	if (err < 0)
+		goto fail_monc;
+
+	return client;
+
+fail_monc:
+	ceph_monc_stop(&client->monc);
+fail:
+	kfree(client);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(ceph_create_client);
+
+void ceph_destroy_client(struct ceph_client *client)
+{
+	dout("destroy_client %p\n", client);
+
+	atomic_set(&client->msgr.stopping, 1);
+
+	/* unmount */
+	ceph_osdc_stop(&client->osdc);
+
+	ceph_monc_stop(&client->monc);
+
+	ceph_debugfs_client_cleanup(client);
+
+	ceph_destroy_options(client->options);
+
+	kfree(client);
+	dout("destroy_client %p done\n", client);
+}
+EXPORT_SYMBOL(ceph_destroy_client);
+
+/*
+ * true if we have the mon map (and have thus joined the cluster)
+ */
+static int have_mon_and_osd_map(struct ceph_client *client)
+{
+	return client->monc.monmap && client->monc.monmap->epoch &&
+	       client->osdc.osdmap && client->osdc.osdmap->epoch;
+}
+
+/*
+ * mount: join the ceph cluster, and open root directory.
+ */
+int __ceph_open_session(struct ceph_client *client, unsigned long started)
+{
+	int err;
+	unsigned long timeout = client->options->mount_timeout * HZ;
+
+	/* open session, and wait for mon and osd maps */
+	err = ceph_monc_open_session(&client->monc);
+	if (err < 0)
+		return err;
+
+	while (!have_mon_and_osd_map(client)) {
+		err = -EIO;
+		if (timeout && time_after_eq(jiffies, started + timeout))
+			return err;
+
+		/* wait */
+		dout("mount waiting for mon_map\n");
+		err = wait_event_interruptible_timeout(client->auth_wq,
+			have_mon_and_osd_map(client) || (client->auth_err < 0),
+			timeout);
+		if (err == -EINTR || err == -ERESTARTSYS)
+			return err;
+		if (client->auth_err < 0)
+			return client->auth_err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(__ceph_open_session);
+
+
+int ceph_open_session(struct ceph_client *client)
+{
+	int ret;
+	unsigned long started = jiffies;  /* note the start time */
+
+	dout("open_session start\n");
+	mutex_lock(&client->mount_mutex);
+
+	ret = __ceph_open_session(client, started);
+
+	mutex_unlock(&client->mount_mutex);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_open_session);
+
+
+static int __init init_ceph_lib(void)
+{
+	int ret = 0;
+
+	ret = ceph_debugfs_init();
+	if (ret < 0)
+		goto out;
+
+	ret = ceph_crypto_init();
+	if (ret < 0)
+		goto out_debugfs;
+
+	ret = ceph_msgr_init();
+	if (ret < 0)
+		goto out_crypto;
+
+	ret = ceph_osdc_setup();
+	if (ret < 0)
+		goto out_msgr;
+
+	pr_info("loaded (mon/osd proto %d/%d)\n",
+		CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL);
+
+	return 0;
+
+out_msgr:
+	ceph_msgr_exit();
+out_crypto:
+	ceph_crypto_shutdown();
+out_debugfs:
+	ceph_debugfs_cleanup();
+out:
+	return ret;
+}
+
+static void __exit exit_ceph_lib(void)
+{
+	dout("exit_ceph_lib\n");
+	ceph_osdc_cleanup();
+	ceph_msgr_exit();
+	ceph_crypto_shutdown();
+	ceph_debugfs_cleanup();
+}
+
+module_init(init_ceph_lib);
+module_exit(exit_ceph_lib);
+
+MODULE_AUTHOR("Sage Weil <sage at newdream.net>");
+MODULE_AUTHOR("Yehuda Sadeh <yehuda at hq.newdream.net>");
+MODULE_AUTHOR("Patience Warnick <patience at newdream.net>");
+MODULE_DESCRIPTION("Ceph filesystem for Linux");
+MODULE_LICENSE("GPL");
diff --git a/libceph/ceph_fs.c b/libceph/ceph_fs.c
new file mode 100644
index 0000000..41466cc
--- /dev/null
+++ b/libceph/ceph_fs.c
@@ -0,0 +1,78 @@
+/*
+ * Some non-inline ceph helpers
+ */
+#include <linux/module.h>
+#include <linux/ceph/types.h>
+
+/*
+ * return true if @layout appears to be valid
+ */
+int ceph_file_layout_is_valid(const struct ceph_file_layout *layout)
+{
+	__u32 su = le32_to_cpu(layout->fl_stripe_unit);
+	__u32 sc = le32_to_cpu(layout->fl_stripe_count);
+	__u32 os = le32_to_cpu(layout->fl_object_size);
+
+	/* stripe unit, object size must be non-zero, 64k increment */
+	if (!su || (su & (CEPH_MIN_STRIPE_UNIT-1)))
+		return 0;
+	if (!os || (os & (CEPH_MIN_STRIPE_UNIT-1)))
+		return 0;
+	/* object size must be a multiple of stripe unit */
+	if (os < su || os % su)
+		return 0;
+	/* stripe count must be non-zero */
+	if (!sc)
+		return 0;
+	return 1;
+}
+
+
+int ceph_flags_to_mode(int flags)
+{
+	int mode;
+
+#ifdef O_DIRECTORY  /* fixme */
+	if ((flags & O_DIRECTORY) == O_DIRECTORY)
+		return CEPH_FILE_MODE_PIN;
+#endif
+
+	switch (flags & O_ACCMODE) {
+	case O_WRONLY:
+		mode = CEPH_FILE_MODE_WR;
+		break;
+	case O_RDONLY:
+		mode = CEPH_FILE_MODE_RD;
+		break;
+	case O_RDWR:
+	case O_ACCMODE: /* this is what the VFS does */
+		mode = CEPH_FILE_MODE_RDWR;
+		break;
+	}
+#ifdef O_LAZY
+	if (flags & O_LAZY)
+		mode |= CEPH_FILE_MODE_LAZY;
+#endif
+
+	return mode;
+}
+EXPORT_SYMBOL(ceph_flags_to_mode);
+
+int ceph_caps_for_mode(int mode)
+{
+	int caps = CEPH_CAP_PIN;
+
+	if (mode & CEPH_FILE_MODE_RD)
+		caps |= CEPH_CAP_FILE_SHARED |
+			CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE;
+	if (mode & CEPH_FILE_MODE_WR)
+		caps |= CEPH_CAP_FILE_EXCL |
+			CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER |
+			CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL |
+			CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL;
+	if (mode & CEPH_FILE_MODE_LAZY)
+		caps |= CEPH_CAP_FILE_LAZYIO;
+
+	return caps;
+}
+EXPORT_SYMBOL(ceph_caps_for_mode);
diff --git a/libceph/ceph_hash.c b/libceph/ceph_hash.c
new file mode 100644
index 0000000..67bb1f1
--- /dev/null
+++ b/libceph/ceph_hash.c
@@ -0,0 +1,121 @@
+
+#include <linux/ceph/types.h>
+#include <linux/module.h>
+
+/*
+ * Robert Jenkin's hash function.
+ * http://burtleburtle.net/bob/hash/evahash.html
+ * This is in the public domain.
+ */
+#define mix(a, b, c)						\
+	do {							\
+		a = a - b;  a = a - c;  a = a ^ (c >> 13);	\
+		b = b - c;  b = b - a;  b = b ^ (a << 8);	\
+		c = c - a;  c = c - b;  c = c ^ (b >> 13);	\
+		a = a - b;  a = a - c;  a = a ^ (c >> 12);	\
+		b = b - c;  b = b - a;  b = b ^ (a << 16);	\
+		c = c - a;  c = c - b;  c = c ^ (b >> 5);	\
+		a = a - b;  a = a - c;  a = a ^ (c >> 3);	\
+		b = b - c;  b = b - a;  b = b ^ (a << 10);	\
+		c = c - a;  c = c - b;  c = c ^ (b >> 15);	\
+	} while (0)
+
+unsigned int ceph_str_hash_rjenkins(const char *str, unsigned int length)
+{
+	const unsigned char *k = (const unsigned char *)str;
+	__u32 a, b, c;  /* the internal state */
+	__u32 len;      /* how many key bytes still need mixing */
+
+	/* Set up the internal state */
+	len = length;
+	a = 0x9e3779b9;      /* the golden ratio; an arbitrary value */
+	b = a;
+	c = 0;               /* variable initialization of internal state */
+
+	/* handle most of the key */
+	while (len >= 12) {
+		a = a + (k[0] + ((__u32)k[1] << 8) + ((__u32)k[2] << 16) +
+			 ((__u32)k[3] << 24));
+		b = b + (k[4] + ((__u32)k[5] << 8) + ((__u32)k[6] << 16) +
+			 ((__u32)k[7] << 24));
+		c = c + (k[8] + ((__u32)k[9] << 8) + ((__u32)k[10] << 16) +
+			 ((__u32)k[11] << 24));
+		mix(a, b, c);
+		k = k + 12;
+		len = len - 12;
+	}
+
+	/* handle the last 11 bytes */
+	c = c + length;
+	switch (len) {            /* all the case statements fall through */
+	case 11:
+		c = c + ((__u32)k[10] << 24);
+	case 10:
+		c = c + ((__u32)k[9] << 16);
+	case 9:
+		c = c + ((__u32)k[8] << 8);
+		/* the first byte of c is reserved for the length */
+	case 8:
+		b = b + ((__u32)k[7] << 24);
+	case 7:
+		b = b + ((__u32)k[6] << 16);
+	case 6:
+		b = b + ((__u32)k[5] << 8);
+	case 5:
+		b = b + k[4];
+	case 4:
+		a = a + ((__u32)k[3] << 24);
+	case 3:
+		a = a + ((__u32)k[2] << 16);
+	case 2:
+		a = a + ((__u32)k[1] << 8);
+	case 1:
+		a = a + k[0];
+		/* case 0: nothing left to add */
+	}
+	mix(a, b, c);
+
+	return c;
+}
+
+/*
+ * linux dcache hash
+ */
+unsigned int ceph_str_hash_linux(const char *str, unsigned int length)
+{
+	unsigned long hash = 0;
+	unsigned char c;
+
+	while (length--) {
+		c = *str++;
+		hash = (hash + (c << 4) + (c >> 4)) * 11;
+	}
+	return hash;
+}
+
+
+unsigned int ceph_str_hash(int type, const char *s, unsigned int len)
+{
+	switch (type) {
+	case CEPH_STR_HASH_LINUX:
+		return ceph_str_hash_linux(s, len);
+	case CEPH_STR_HASH_RJENKINS:
+		return ceph_str_hash_rjenkins(s, len);
+	default:
+		return -1;
+	}
+}
+EXPORT_SYMBOL(ceph_str_hash);
+
+const char *ceph_str_hash_name(int type)
+{
+	switch (type) {
+	case CEPH_STR_HASH_LINUX:
+		return "linux";
+	case CEPH_STR_HASH_RJENKINS:
+		return "rjenkins";
+	default:
+		return "unknown";
+	}
+}
+EXPORT_SYMBOL(ceph_str_hash_name);
diff --git a/libceph/ceph_strings.c b/libceph/ceph_strings.c
new file mode 100644
index 0000000..1348df9
--- /dev/null
+++ b/libceph/ceph_strings.c
@@ -0,0 +1,123 @@
+/*
+ * Ceph string constants
+ */
+#include <linux/module.h>
+#include <linux/ceph/types.h>
+
+const char *ceph_entity_type_name(int type)
+{
+	switch (type) {
+	case CEPH_ENTITY_TYPE_MDS: return "mds";
+	case CEPH_ENTITY_TYPE_OSD: return "osd";
+	case CEPH_ENTITY_TYPE_MON: return "mon";
+	case CEPH_ENTITY_TYPE_CLIENT: return "client";
+	case CEPH_ENTITY_TYPE_AUTH: return "auth";
+	default: return "unknown";
+	}
+}
+
+const char *ceph_osd_op_name(int op)
+{
+	switch (op) {
+	case CEPH_OSD_OP_READ: return "read";
+	case CEPH_OSD_OP_STAT: return "stat";
+	case CEPH_OSD_OP_MAPEXT: return "mapext";
+	case CEPH_OSD_OP_SPARSE_READ: return "sparse-read";
+	case CEPH_OSD_OP_NOTIFY: return "notify";
+	case CEPH_OSD_OP_NOTIFY_ACK: return "notify-ack";
+	case CEPH_OSD_OP_ASSERT_VER: return "assert-version";
+
+	case CEPH_OSD_OP_MASKTRUNC: return "masktrunc";
+
+	case CEPH_OSD_OP_CREATE: return "create";
+	case CEPH_OSD_OP_WRITE: return "write";
+	case CEPH_OSD_OP_DELETE: return "delete";
+	case CEPH_OSD_OP_TRUNCATE: return "truncate";
+	case CEPH_OSD_OP_ZERO: return "zero";
+	case CEPH_OSD_OP_WRITEFULL: return "writefull";
+	case CEPH_OSD_OP_ROLLBACK: return "rollback";
+
+	case CEPH_OSD_OP_APPEND: return "append";
+	case CEPH_OSD_OP_STARTSYNC: return "startsync";
+	case CEPH_OSD_OP_SETTRUNC: return "settrunc";
+	case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc";
+
+	case CEPH_OSD_OP_TMAPUP: return "tmapup";
+	case CEPH_OSD_OP_TMAPGET: return "tmapget";
+	case CEPH_OSD_OP_TMAPPUT: return "tmapput";
+	case CEPH_OSD_OP_WATCH: return "watch";
+
+	case CEPH_OSD_OP_CLONERANGE: return "clonerange";
+	case CEPH_OSD_OP_ASSERT_SRC_VERSION: return "assert-src-version";
+	case CEPH_OSD_OP_SRC_CMPXATTR: return "src-cmpxattr";
+
+	case CEPH_OSD_OP_GETXATTR: return "getxattr";
+	case CEPH_OSD_OP_GETXATTRS: return "getxattrs";
+	case CEPH_OSD_OP_SETXATTR: return "setxattr";
+	case CEPH_OSD_OP_SETXATTRS: return "setxattrs";
+	case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs";
+	case CEPH_OSD_OP_RMXATTR: return "rmxattr";
+	case CEPH_OSD_OP_CMPXATTR: return "cmpxattr";
+
+	case CEPH_OSD_OP_PULL: return "pull";
+	case CEPH_OSD_OP_PUSH: return "push";
+	case CEPH_OSD_OP_BALANCEREADS: return "balance-reads";
+	case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads";
+	case CEPH_OSD_OP_SCRUB: return "scrub";
+	case CEPH_OSD_OP_SCRUB_RESERVE: return "scrub-reserve";
+	case CEPH_OSD_OP_SCRUB_UNRESERVE: return "scrub-unreserve";
+	case CEPH_OSD_OP_SCRUB_STOP: return "scrub-stop";
+	case CEPH_OSD_OP_SCRUB_MAP: return "scrub-map";
+
+	case CEPH_OSD_OP_WRLOCK: return "wrlock";
+	case CEPH_OSD_OP_WRUNLOCK: return "wrunlock";
+	case CEPH_OSD_OP_RDLOCK: return "rdlock";
+	case CEPH_OSD_OP_RDUNLOCK: return "rdunlock";
+	case CEPH_OSD_OP_UPLOCK: return "uplock";
+	case CEPH_OSD_OP_DNLOCK: return "dnlock";
+
+	case CEPH_OSD_OP_CALL: return "call";
+
+	case CEPH_OSD_OP_PGLS: return "pgls";
+	case CEPH_OSD_OP_PGLS_FILTER: return "pgls-filter";
+	case CEPH_OSD_OP_OMAPGETKEYS: return "omap-get-keys";
+	case CEPH_OSD_OP_OMAPGETVALS: return "omap-get-vals";
+	case CEPH_OSD_OP_OMAPGETHEADER: return "omap-get-header";
+	case CEPH_OSD_OP_OMAPGETVALSBYKEYS: return "omap-get-vals-by-keys";
+	case CEPH_OSD_OP_OMAPSETVALS: return "omap-set-vals";
+	case CEPH_OSD_OP_OMAPSETHEADER: return "omap-set-header";
+	case CEPH_OSD_OP_OMAPCLEAR: return "omap-clear";
+	case CEPH_OSD_OP_OMAPRMKEYS: return "omap-rm-keys";
+	}
+	return "???";
+}
+
+const char *ceph_osd_state_name(int s)
+{
+	switch (s) {
+	case CEPH_OSD_EXISTS:
+		return "exists";
+	case CEPH_OSD_UP:
+		return "up";
+	case CEPH_OSD_AUTOOUT:
+		return "autoout";
+	case CEPH_OSD_NEW:
+		return "new";
+	default:
+		return "???";
+	}
+}
+
+const char *ceph_pool_op_name(int op)
+{
+	switch (op) {
+	case POOL_OP_CREATE: return "create";
+	case POOL_OP_DELETE: return "delete";
+	case POOL_OP_AUID_CHANGE: return "auid change";
+	case POOL_OP_CREATE_SNAP: return "create snap";
+	case POOL_OP_DELETE_SNAP: return "delete snap";
+	case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap";
+	case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap";
+	}
+	return "???";
+}
diff --git a/libceph/crush/crush.c b/libceph/crush/crush.c
new file mode 100644
index 0000000..16bc199
--- /dev/null
+++ b/libceph/crush/crush.c
@@ -0,0 +1,129 @@
+
+#ifdef __KERNEL__
+# include <linux/slab.h>
+#else
+# include <stdlib.h>
+# include <assert.h>
+# define kfree(x) do { if (x) free(x); } while (0)
+# define BUG_ON(x) assert(!(x))
+#endif
+
+#include <linux/crush/crush.h>
+
+const char *crush_bucket_alg_name(int alg)
+{
+	switch (alg) {
+	case CRUSH_BUCKET_UNIFORM: return "uniform";
+	case CRUSH_BUCKET_LIST: return "list";
+	case CRUSH_BUCKET_TREE: return "tree";
+	case CRUSH_BUCKET_STRAW: return "straw";
+	default: return "unknown";
+	}
+}
+
+/**
+ * crush_get_bucket_item_weight - Get weight of an item in given bucket
+ * @b: bucket pointer
+ * @p: item index in bucket
+ */
+int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
+{
+	if ((__u32)p >= b->size)
+		return 0;
+
+	switch (b->alg) {
+	case CRUSH_BUCKET_UNIFORM:
+		return ((struct crush_bucket_uniform *)b)->item_weight;
+	case CRUSH_BUCKET_LIST:
+		return ((struct crush_bucket_list *)b)->item_weights[p];
+	case CRUSH_BUCKET_TREE:
+		return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)];
+	case CRUSH_BUCKET_STRAW:
+		return ((struct crush_bucket_straw *)b)->item_weights[p];
+	}
+	return 0;
+}
+
+void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b)
+{
+	kfree(b->h.perm);
+	kfree(b->h.items);
+	kfree(b);
+}
+
+void crush_destroy_bucket_list(struct crush_bucket_list *b)
+{
+	kfree(b->item_weights);
+	kfree(b->sum_weights);
+	kfree(b->h.perm);
+	kfree(b->h.items);
+	kfree(b);
+}
+
+void crush_destroy_bucket_tree(struct crush_bucket_tree *b)
+{
+	kfree(b->h.perm);
+	kfree(b->h.items);
+	kfree(b->node_weights);
+	kfree(b);
+}
+
+void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
+{
+	kfree(b->straws);
+	kfree(b->item_weights);
+	kfree(b->h.perm);
+	kfree(b->h.items);
+	kfree(b);
+}
+
+void crush_destroy_bucket(struct crush_bucket *b)
+{
+	switch (b->alg) {
+	case CRUSH_BUCKET_UNIFORM:
+		crush_destroy_bucket_uniform((struct crush_bucket_uniform *)b);
+		break;
+	case CRUSH_BUCKET_LIST:
+		crush_destroy_bucket_list((struct crush_bucket_list *)b);
+		break;
+	case CRUSH_BUCKET_TREE:
+		crush_destroy_bucket_tree((struct crush_bucket_tree *)b);
+		break;
+	case CRUSH_BUCKET_STRAW:
+		crush_destroy_bucket_straw((struct crush_bucket_straw *)b);
+		break;
+	}
+}
+
+/**
+ * crush_destroy - Destroy a crush_map
+ * @map: crush_map pointer
+ */
+void crush_destroy(struct crush_map *map)
+{
+	/* buckets */
+	if (map->buckets) {
+		__s32 b;
+		for (b = 0; b < map->max_buckets; b++) {
+			if (map->buckets[b] == NULL)
+				continue;
+			crush_destroy_bucket(map->buckets[b]);
+		}
+		kfree(map->buckets);
+	}
+
+	/* rules */
+	if (map->rules) {
+		__u32 b;
+		for (b = 0; b < map->max_rules; b++)
+			crush_destroy_rule(map->rules[b]);
+		kfree(map->rules);
+	}
+
+	kfree(map);
+}
+
+void crush_destroy_rule(struct crush_rule *rule)
+{
+	kfree(rule);
+}
diff --git a/libceph/crush/hash.c b/libceph/crush/hash.c
new file mode 100644
index 0000000..5bb63e3
--- /dev/null
+++ b/libceph/crush/hash.c
@@ -0,0 +1,149 @@
+
+#include <linux/types.h>
+#include <linux/crush/hash.h>
+
+/*
+ * Robert Jenkins' function for mixing 32-bit values
+ * http://burtleburtle.net/bob/hash/evahash.html
+ * a, b = random bits, c = input and output
+ */
+#define crush_hashmix(a, b, c) do {			\
+		a = a-b;  a = a-c;  a = a^(c>>13);	\
+		b = b-c;  b = b-a;  b = b^(a<<8);	\
+		c = c-a;  c = c-b;  c = c^(b>>13);	\
+		a = a-b;  a = a-c;  a = a^(c>>12);	\
+		b = b-c;  b = b-a;  b = b^(a<<16);	\
+		c = c-a;  c = c-b;  c = c^(b>>5);	\
+		a = a-b;  a = a-c;  a = a^(c>>3);	\
+		b = b-c;  b = b-a;  b = b^(a<<10);	\
+		c = c-a;  c = c-b;  c = c^(b>>15);	\
+	} while (0)
+
+#define crush_hash_seed 1315423911
+
+static __u32 crush_hash32_rjenkins1(__u32 a)
+{
+	__u32 hash = crush_hash_seed ^ a;
+	__u32 b = a;
+	__u32 x = 231232;
+	__u32 y = 1232;
+	crush_hashmix(b, x, hash);
+	crush_hashmix(y, a, hash);
+	return hash;
+}
+
+static __u32 crush_hash32_rjenkins1_2(__u32 a, __u32 b)
+{
+	__u32 hash = crush_hash_seed ^ a ^ b;
+	__u32 x = 231232;
+	__u32 y = 1232;
+	crush_hashmix(a, b, hash);
+	crush_hashmix(x, a, hash);
+	crush_hashmix(b, y, hash);
+	return hash;
+}
+
+static __u32 crush_hash32_rjenkins1_3(__u32 a, __u32 b, __u32 c)
+{
+	__u32 hash = crush_hash_seed ^ a ^ b ^ c;
+	__u32 x = 231232;
+	__u32 y = 1232;
+	crush_hashmix(a, b, hash);
+	crush_hashmix(c, x, hash);
+	crush_hashmix(y, a, hash);
+	crush_hashmix(b, x, hash);
+	crush_hashmix(y, c, hash);
+	return hash;
+}
+
+static __u32 crush_hash32_rjenkins1_4(__u32 a, __u32 b, __u32 c, __u32 d)
+{
+	__u32 hash = crush_hash_seed ^ a ^ b ^ c ^ d;
+	__u32 x = 231232;
+	__u32 y = 1232;
+	crush_hashmix(a, b, hash);
+	crush_hashmix(c, d, hash);
+	crush_hashmix(a, x, hash);
+	crush_hashmix(y, b, hash);
+	crush_hashmix(c, x, hash);
+	crush_hashmix(y, d, hash);
+	return hash;
+}
+
+static __u32 crush_hash32_rjenkins1_5(__u32 a, __u32 b, __u32 c, __u32 d,
+				      __u32 e)
+{
+	__u32 hash = crush_hash_seed ^ a ^ b ^ c ^ d ^ e;
+	__u32 x = 231232;
+	__u32 y = 1232;
+	crush_hashmix(a, b, hash);
+	crush_hashmix(c, d, hash);
+	crush_hashmix(e, x, hash);
+	crush_hashmix(y, a, hash);
+	crush_hashmix(b, x, hash);
+	crush_hashmix(y, c, hash);
+	crush_hashmix(d, x, hash);
+	crush_hashmix(y, e, hash);
+	return hash;
+}
+
+
+__u32 crush_hash32(int type, __u32 a)
+{
+	switch (type) {
+	case CRUSH_HASH_RJENKINS1:
+		return crush_hash32_rjenkins1(a);
+	default:
+		return 0;
+	}
+}
+
+__u32 crush_hash32_2(int type, __u32 a, __u32 b)
+{
+	switch (type) {
+	case CRUSH_HASH_RJENKINS1:
+		return crush_hash32_rjenkins1_2(a, b);
+	default:
+		return 0;
+	}
+}
+
+__u32 crush_hash32_3(int type, __u32 a, __u32 b, __u32 c)
+{
+	switch (type) {
+	case CRUSH_HASH_RJENKINS1:
+		return crush_hash32_rjenkins1_3(a, b, c);
+	default:
+		return 0;
+	}
+}
+
+__u32 crush_hash32_4(int type, __u32 a, __u32 b, __u32 c, __u32 d)
+{
+	switch (type) {
+	case CRUSH_HASH_RJENKINS1:
+		return crush_hash32_rjenkins1_4(a, b, c, d);
+	default:
+		return 0;
+	}
+}
+
+__u32 crush_hash32_5(int type, __u32 a, __u32 b, __u32 c, __u32 d, __u32 e)
+{
+	switch (type) {
+	case CRUSH_HASH_RJENKINS1:
+		return crush_hash32_rjenkins1_5(a, b, c, d, e);
+	default:
+		return 0;
+	}
+}
+
+const char *crush_hash_name(int type)
+{
+	switch (type) {
+	case CRUSH_HASH_RJENKINS1:
+		return "rjenkins1";
+	default:
+		return "unknown";
+	}
+}
diff --git a/libceph/crush/mapper.c b/libceph/crush/mapper.c
new file mode 100644
index 0000000..a1ef53c
--- /dev/null
+++ b/libceph/crush/mapper.c
@@ -0,0 +1,819 @@
+
+#ifdef __KERNEL__
+# include <linux/string.h>
+# include <linux/slab.h>
+# include <linux/bug.h>
+# include <linux/kernel.h>
+# ifndef dprintk
+#  define dprintk(args...)
+# endif
+#else
+# include <string.h>
+# include <stdio.h>
+# include <stdlib.h>
+# include <assert.h>
+# define BUG_ON(x) assert(!(x))
+# define dprintk(args...) /* printf(args) */
+# define kmalloc(x, f) malloc(x)
+# define kfree(x) free(x)
+#endif
+
+#include <linux/crush/crush.h>
+#include <linux/crush/hash.h>
+#include <linux/crush/mapper.h>
+
+/*
+ * Implement the core CRUSH mapping algorithm.
+ */
+
+/**
+ * crush_find_rule - find a crush_rule id for a given ruleset, type, and size.
+ * @map: the crush_map
+ * @ruleset: the storage ruleset id (user defined)
+ * @type: storage ruleset type (user defined)
+ * @size: output set size
+ */
+int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size)
+{
+	__u32 i;
+
+	for (i = 0; i < map->max_rules; i++) {
+		if (map->rules[i] &&
+		    map->rules[i]->mask.ruleset == ruleset &&
+		    map->rules[i]->mask.type == type &&
+		    map->rules[i]->mask.min_size <= size &&
+		    map->rules[i]->mask.max_size >= size)
+			return i;
+	}
+	return -1;
+}
+
+
+/*
+ * bucket choose methods
+ *
+ * For each bucket algorithm, we have a "choose" method that, given a
+ * crush input @x and replica position (usually, position in output set) @r,
+ * will produce an item in the bucket.
+ */
+
+/*
+ * Choose based on a random permutation of the bucket.
+ *
+ * We used to use some prime number arithmetic to do this, but it
+ * wasn't very random, and had some other bad behaviors.  Instead, we
+ * calculate an actual random permutation of the bucket members.
+ * Since this is expensive, we optimize for the r=0 case, which
+ * captures the vast majority of calls.
+ */
+static int bucket_perm_choose(struct crush_bucket *bucket,
+			      int x, int r)
+{
+	unsigned int pr = r % bucket->size;
+	unsigned int i, s;
+
+	/* start a new permutation if @x has changed */
+	if (bucket->perm_x != (__u32)x || bucket->perm_n == 0) {
+		dprintk("bucket %d new x=%d\n", bucket->id, x);
+		bucket->perm_x = x;
+
+		/* optimize common r=0 case */
+		if (pr == 0) {
+			s = crush_hash32_3(bucket->hash, x, bucket->id, 0) %
+				bucket->size;
+			bucket->perm[0] = s;
+			bucket->perm_n = 0xffff;   /* magic value, see below */
+			goto out;
+		}
+
+		for (i = 0; i < bucket->size; i++)
+			bucket->perm[i] = i;
+		bucket->perm_n = 0;
+	} else if (bucket->perm_n == 0xffff) {
+		/* clean up after the r=0 case above */
+		for (i = 1; i < bucket->size; i++)
+			bucket->perm[i] = i;
+		bucket->perm[bucket->perm[0]] = 0;
+		bucket->perm_n = 1;
+	}
+
+	/* calculate permutation up to pr */
+	for (i = 0; i < bucket->perm_n; i++)
+		dprintk(" perm_choose have %d: %d\n", i, bucket->perm[i]);
+	while (bucket->perm_n <= pr) {
+		unsigned int p = bucket->perm_n;
+		/* no point in swapping the final entry */
+		if (p < bucket->size - 1) {
+			i = crush_hash32_3(bucket->hash, x, bucket->id, p) %
+				(bucket->size - p);
+			if (i) {
+				unsigned int t = bucket->perm[p + i];
+				bucket->perm[p + i] = bucket->perm[p];
+				bucket->perm[p] = t;
+			}
+			dprintk(" perm_choose swap %d with %d\n", p, p+i);
+		}
+		bucket->perm_n++;
+	}
+	for (i = 0; i < bucket->size; i++)
+		dprintk(" perm_choose  %d: %d\n", i, bucket->perm[i]);
+
+	s = bucket->perm[pr];
+out:
+	dprintk(" perm_choose %d sz=%d x=%d r=%d (%d) s=%d\n", bucket->id,
+		bucket->size, x, r, pr, s);
+	return bucket->items[s];
+}
+
+/* uniform */
+static int bucket_uniform_choose(struct crush_bucket_uniform *bucket,
+				 int x, int r)
+{
+	return bucket_perm_choose(&bucket->h, x, r);
+}
+
+/* list */
+static int bucket_list_choose(struct crush_bucket_list *bucket,
+			      int x, int r)
+{
+	int i;
+
+	for (i = bucket->h.size-1; i >= 0; i--) {
+		__u64 w = crush_hash32_4(bucket->h.hash,x, bucket->h.items[i],
+					 r, bucket->h.id);
+		w &= 0xffff;
+		dprintk("list_choose i=%d x=%d r=%d item %d weight %x "
+			"sw %x rand %llx",
+			i, x, r, bucket->h.items[i], bucket->item_weights[i],
+			bucket->sum_weights[i], w);
+		w *= bucket->sum_weights[i];
+		w = w >> 16;
+		/*dprintk(" scaled %llx\n", w);*/
+		if (w < bucket->item_weights[i])
+			return bucket->h.items[i];
+	}
+
+	dprintk("bad list sums for bucket %d\n", bucket->h.id);
+	return bucket->h.items[0];
+}
+
+
+/* (binary) tree */
+static int height(int n)
+{
+	int h = 0;
+	while ((n & 1) == 0) {
+		h++;
+		n = n >> 1;
+	}
+	return h;
+}
+
+static int left(int x)
+{
+	int h = height(x);
+	return x - (1 << (h-1));
+}
+
+static int right(int x)
+{
+	int h = height(x);
+	return x + (1 << (h-1));
+}
+
+static int terminal(int x)
+{
+	return x & 1;
+}
+
+static int bucket_tree_choose(struct crush_bucket_tree *bucket,
+			      int x, int r)
+{
+	int n;
+	__u32 w;
+	__u64 t;
+
+	/* start at root */
+	n = bucket->num_nodes >> 1;
+
+	while (!terminal(n)) {
+		int l;
+		/* pick point in [0, w) */
+		w = bucket->node_weights[n];
+		t = (__u64)crush_hash32_4(bucket->h.hash, x, n, r,
+					  bucket->h.id) * (__u64)w;
+		t = t >> 32;
+
+		/* descend to the left or right? */
+		l = left(n);
+		if (t < bucket->node_weights[l])
+			n = l;
+		else
+			n = right(n);
+	}
+
+	return bucket->h.items[n >> 1];
+}
+
+
+/* straw */
+
+static int bucket_straw_choose(struct crush_bucket_straw *bucket,
+			       int x, int r)
+{
+	__u32 i;
+	int high = 0;
+	__u64 high_draw = 0;
+	__u64 draw;
+
+	for (i = 0; i < bucket->h.size; i++) {
+		draw = crush_hash32_3(bucket->h.hash, x, bucket->h.items[i], r);
+		draw &= 0xffff;
+		draw *= bucket->straws[i];
+		if (i == 0 || draw > high_draw) {
+			high = i;
+			high_draw = draw;
+		}
+	}
+	return bucket->h.items[high];
+}
+
+static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
+{
+	dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
+	BUG_ON(in->size == 0);
+	switch (in->alg) {
+	case CRUSH_BUCKET_UNIFORM:
+		return bucket_uniform_choose((struct crush_bucket_uniform *)in,
+					  x, r);
+	case CRUSH_BUCKET_LIST:
+		return bucket_list_choose((struct crush_bucket_list *)in,
+					  x, r);
+	case CRUSH_BUCKET_TREE:
+		return bucket_tree_choose((struct crush_bucket_tree *)in,
+					  x, r);
+	case CRUSH_BUCKET_STRAW:
+		return bucket_straw_choose((struct crush_bucket_straw *)in,
+					   x, r);
+	default:
+		dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
+		return in->items[0];
+	}
+}
+
+/*
+ * true if device is marked "out" (failed, fully offloaded)
+ * of the cluster
+ */
+static int is_out(const struct crush_map *map,
+		  const __u32 *weight, int weight_max,
+		  int item, int x)
+{
+	if (item >= weight_max)
+		return 1;
+	if (weight[item] >= 0x10000)
+		return 0;
+	if (weight[item] == 0)
+		return 1;
+	if ((crush_hash32_2(CRUSH_HASH_RJENKINS1, x, item) & 0xffff)
+	    < weight[item])
+		return 0;
+	return 1;
+}
+
+/**
+ * crush_choose_firstn - choose numrep distinct items of given type
+ * @map: the crush_map
+ * @bucket: the bucket we are choose an item from
+ * @x: crush input value
+ * @numrep: the number of items to choose
+ * @type: the type of item to choose
+ * @out: pointer to output vector
+ * @outpos: our position in that vector
+ * @tries: number of attempts to make
+ * @recurse_tries: number of attempts to have recursive chooseleaf make
+ * @local_retries: localized retries
+ * @local_fallback_retries: localized fallback retries
+ * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
+ * @vary_r: pass r to recursive calls
+ * @out2: second output vector for leaf items (if @recurse_to_leaf)
+ * @parent_r: r value passed from the parent
+ */
+static int crush_choose_firstn(const struct crush_map *map,
+			       struct crush_bucket *bucket,
+			       const __u32 *weight, int weight_max,
+			       int x, int numrep, int type,
+			       int *out, int outpos,
+			       unsigned int tries,
+			       unsigned int recurse_tries,
+			       unsigned int local_retries,
+			       unsigned int local_fallback_retries,
+			       int recurse_to_leaf,
+			       unsigned int vary_r,
+			       int *out2,
+			       int parent_r)
+{
+	int rep;
+	unsigned int ftotal, flocal;
+	int retry_descent, retry_bucket, skip_rep;
+	struct crush_bucket *in = bucket;
+	int r;
+	int i;
+	int item = 0;
+	int itemtype;
+	int collide, reject;
+
+	dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
+		recurse_to_leaf ? "_LEAF" : "",
+		bucket->id, x, outpos, numrep,
+		tries, recurse_tries, local_retries, local_fallback_retries,
+		parent_r);
+
+	for (rep = outpos; rep < numrep; rep++) {
+		/* keep trying until we get a non-out, non-colliding item */
+		ftotal = 0;
+		skip_rep = 0;
+		do {
+			retry_descent = 0;
+			in = bucket;               /* initial bucket */
+
+			/* choose through intervening buckets */
+			flocal = 0;
+			do {
+				collide = 0;
+				retry_bucket = 0;
+				r = rep + parent_r;
+				/* r' = r + f_total */
+				r += ftotal;
+
+				/* bucket choose */
+				if (in->size == 0) {
+					reject = 1;
+					goto reject;
+				}
+				if (local_fallback_retries > 0 &&
+				    flocal >= (in->size>>1) &&
+				    flocal > local_fallback_retries)
+					item = bucket_perm_choose(in, x, r);
+				else
+					item = crush_bucket_choose(in, x, r);
+				if (item >= map->max_devices) {
+					dprintk("   bad item %d\n", item);
+					skip_rep = 1;
+					break;
+				}
+
+				/* desired type? */
+				if (item < 0)
+					itemtype = map->buckets[-1-item]->type;
+				else
+					itemtype = 0;
+				dprintk("  item %d type %d\n", item, itemtype);
+
+				/* keep going? */
+				if (itemtype != type) {
+					if (item >= 0 ||
+					    (-1-item) >= map->max_buckets) {
+						dprintk("   bad item type %d\n", type);
+						skip_rep = 1;
+						break;
+					}
+					in = map->buckets[-1-item];
+					retry_bucket = 1;
+					continue;
+				}
+
+				/* collision? */
+				for (i = 0; i < outpos; i++) {
+					if (out[i] == item) {
+						collide = 1;
+						break;
+					}
+				}
+
+				reject = 0;
+				if (!collide && recurse_to_leaf) {
+					if (item < 0) {
+						int sub_r;
+						if (vary_r)
+							sub_r = r >> (vary_r-1);
+						else
+							sub_r = 0;
+						if (crush_choose_firstn(map,
+							 map->buckets[-1-item],
+							 weight, weight_max,
+							 x, outpos+1, 0,
+							 out2, outpos,
+							 recurse_tries, 0,
+							 local_retries,
+							 local_fallback_retries,
+							 0,
+							 vary_r,
+							 NULL,
+							 sub_r) <= outpos)
+							/* didn't get leaf */
+							reject = 1;
+					} else {
+						/* we already have a leaf! */
+						out2[outpos] = item;
+					}
+				}
+
+				if (!reject) {
+					/* out? */
+					if (itemtype == 0)
+						reject = is_out(map, weight,
+								weight_max,
+								item, x);
+					else
+						reject = 0;
+				}
+
+reject:
+				if (reject || collide) {
+					ftotal++;
+					flocal++;
+
+					if (collide && flocal <= local_retries)
+						/* retry locally a few times */
+						retry_bucket = 1;
+					else if (local_fallback_retries > 0 &&
+						 flocal <= in->size + local_fallback_retries)
+						/* exhaustive bucket search */
+						retry_bucket = 1;
+					else if (ftotal < tries)
+						/* then retry descent */
+						retry_descent = 1;
+					else
+						/* else give up */
+						skip_rep = 1;
+					dprintk("  reject %d  collide %d  "
+						"ftotal %u  flocal %u\n",
+						reject, collide, ftotal,
+						flocal);
+				}
+			} while (retry_bucket);
+		} while (retry_descent);
+
+		if (skip_rep) {
+			dprintk("skip rep\n");
+			continue;
+		}
+
+		dprintk("CHOOSE got %d\n", item);
+		out[outpos] = item;
+		outpos++;
+	}
+
+	dprintk("CHOOSE returns %d\n", outpos);
+	return outpos;
+}
+
+
+/**
+ * crush_choose_indep: alternative breadth-first positionally stable mapping
+ *
+ */
+static void crush_choose_indep(const struct crush_map *map,
+			       struct crush_bucket *bucket,
+			       const __u32 *weight, int weight_max,
+			       int x, int left, int numrep, int type,
+			       int *out, int outpos,
+			       unsigned int tries,
+			       unsigned int recurse_tries,
+			       int recurse_to_leaf,
+			       int *out2,
+			       int parent_r)
+{
+	struct crush_bucket *in = bucket;
+	int endpos = outpos + left;
+	int rep;
+	unsigned int ftotal;
+	int r;
+	int i;
+	int item = 0;
+	int itemtype;
+	int collide;
+
+	dprintk("CHOOSE%s INDEP bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
+		bucket->id, x, outpos, numrep);
+
+	/* initially my result is undefined */
+	for (rep = outpos; rep < endpos; rep++) {
+		out[rep] = CRUSH_ITEM_UNDEF;
+		if (out2)
+			out2[rep] = CRUSH_ITEM_UNDEF;
+	}
+
+	for (ftotal = 0; left > 0 && ftotal < tries; ftotal++) {
+		for (rep = outpos; rep < endpos; rep++) {
+			if (out[rep] != CRUSH_ITEM_UNDEF)
+				continue;
+
+			in = bucket;  /* initial bucket */
+
+			/* choose through intervening buckets */
+			for (;;) {
+				/* note: we base the choice on the position
+				 * even in the nested call.  that means that
+				 * if the first layer chooses the same bucket
+				 * in a different position, we will tend to
+				 * choose a different item in that bucket.
+				 * this will involve more devices in data
+				 * movement and tend to distribute the load.
+				 */
+				r = rep + parent_r;
+
+				/* be careful */
+				if (in->alg == CRUSH_BUCKET_UNIFORM &&
+				    in->size % numrep == 0)
+					/* r'=r+(n+1)*f_total */
+					r += (numrep+1) * ftotal;
+				else
+					/* r' = r + n*f_total */
+					r += numrep * ftotal;
+
+				/* bucket choose */
+				if (in->size == 0) {
+					dprintk("   empty bucket\n");
+					break;
+				}
+
+				item = crush_bucket_choose(in, x, r);
+				if (item >= map->max_devices) {
+					dprintk("   bad item %d\n", item);
+					out[rep] = CRUSH_ITEM_NONE;
+					if (out2)
+						out2[rep] = CRUSH_ITEM_NONE;
+					left--;
+					break;
+				}
+
+				/* desired type? */
+				if (item < 0)
+					itemtype = map->buckets[-1-item]->type;
+				else
+					itemtype = 0;
+				dprintk("  item %d type %d\n", item, itemtype);
+
+				/* keep going? */
+				if (itemtype != type) {
+					if (item >= 0 ||
+					    (-1-item) >= map->max_buckets) {
+						dprintk("   bad item type %d\n", type);
+						out[rep] = CRUSH_ITEM_NONE;
+						if (out2)
+							out2[rep] =
+								CRUSH_ITEM_NONE;
+						left--;
+						break;
+					}
+					in = map->buckets[-1-item];
+					continue;
+				}
+
+				/* collision? */
+				collide = 0;
+				for (i = outpos; i < endpos; i++) {
+					if (out[i] == item) {
+						collide = 1;
+						break;
+					}
+				}
+				if (collide)
+					break;
+
+				if (recurse_to_leaf) {
+					if (item < 0) {
+						crush_choose_indep(map,
+						   map->buckets[-1-item],
+						   weight, weight_max,
+						   x, 1, numrep, 0,
+						   out2, rep,
+						   recurse_tries, 0,
+						   0, NULL, r);
+						if (out2[rep] == CRUSH_ITEM_NONE) {
+							/* placed nothing; no leaf */
+							break;
+						}
+					} else {
+						/* we already have a leaf! */
+						out2[rep] = item;
+					}
+				}
+
+				/* out? */
+				if (itemtype == 0 &&
+				    is_out(map, weight, weight_max, item, x))
+					break;
+
+				/* yay! */
+				out[rep] = item;
+				left--;
+				break;
+			}
+		}
+	}
+	for (rep = outpos; rep < endpos; rep++) {
+		if (out[rep] == CRUSH_ITEM_UNDEF) {
+			out[rep] = CRUSH_ITEM_NONE;
+		}
+		if (out2 && out2[rep] == CRUSH_ITEM_UNDEF) {
+			out2[rep] = CRUSH_ITEM_NONE;
+		}
+	}
+}
+
+/**
+ * crush_do_rule - calculate a mapping with the given input and rule
+ * @map: the crush_map
+ * @ruleno: the rule id
+ * @x: hash input
+ * @result: pointer to result vector
+ * @result_max: maximum result size
+ * @weight: weight vector (for map leaves)
+ * @weight_max: size of weight vector
+ * @scratch: scratch vector for private use; must be >= 3 * result_max
+ */
+int crush_do_rule(const struct crush_map *map,
+		  int ruleno, int x, int *result, int result_max,
+		  const __u32 *weight, int weight_max,
+		  int *scratch)
+{
+	int result_len;
+	int *a = scratch;
+	int *b = scratch + result_max;
+	int *c = scratch + result_max*2;
+	int recurse_to_leaf;
+	int *w;
+	int wsize = 0;
+	int *o;
+	int osize;
+	int *tmp;
+	struct crush_rule *rule;
+	__u32 step;
+	int i, j;
+	int numrep;
+	/*
+	 * the original choose_total_tries value was off by one (it
+	 * counted "retries" and not "tries").  add one.
+	 */
+	int choose_tries = map->choose_total_tries + 1;
+	int choose_leaf_tries = 0;
+	/*
+	 * the local tries values were counted as "retries", though,
+	 * and need no adjustment
+	 */
+	int choose_local_retries = map->choose_local_tries;
+	int choose_local_fallback_retries = map->choose_local_fallback_tries;
+
+	int vary_r = map->chooseleaf_vary_r;
+
+	if ((__u32)ruleno >= map->max_rules) {
+		dprintk(" bad ruleno %d\n", ruleno);
+		return 0;
+	}
+
+	rule = map->rules[ruleno];
+	result_len = 0;
+	w = a;
+	o = b;
+
+	for (step = 0; step < rule->len; step++) {
+		int firstn = 0;
+		struct crush_rule_step *curstep = &rule->steps[step];
+
+		switch (curstep->op) {
+		case CRUSH_RULE_TAKE:
+			w[0] = curstep->arg1;
+			wsize = 1;
+			break;
+
+		case CRUSH_RULE_SET_CHOOSE_TRIES:
+			if (curstep->arg1 > 0)
+				choose_tries = curstep->arg1;
+			break;
+
+		case CRUSH_RULE_SET_CHOOSELEAF_TRIES:
+			if (curstep->arg1 > 0)
+				choose_leaf_tries = curstep->arg1;
+			break;
+
+		case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES:
+			if (curstep->arg1 >= 0)
+				choose_local_retries = curstep->arg1;
+			break;
+
+		case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES:
+			if (curstep->arg1 >= 0)
+				choose_local_fallback_retries = curstep->arg1;
+			break;
+
+		case CRUSH_RULE_SET_CHOOSELEAF_VARY_R:
+			if (curstep->arg1 >= 0)
+				vary_r = curstep->arg1;
+			break;
+
+		case CRUSH_RULE_CHOOSELEAF_FIRSTN:
+		case CRUSH_RULE_CHOOSE_FIRSTN:
+			firstn = 1;
+			/* fall through */
+		case CRUSH_RULE_CHOOSELEAF_INDEP:
+		case CRUSH_RULE_CHOOSE_INDEP:
+			if (wsize == 0)
+				break;
+
+			recurse_to_leaf =
+				curstep->op ==
+				 CRUSH_RULE_CHOOSELEAF_FIRSTN ||
+				curstep->op ==
+				CRUSH_RULE_CHOOSELEAF_INDEP;
+
+			/* reset output */
+			osize = 0;
+
+			for (i = 0; i < wsize; i++) {
+				/*
+				 * see CRUSH_N, CRUSH_N_MINUS macros.
+				 * basically, numrep <= 0 means relative to
+				 * the provided result_max
+				 */
+				numrep = curstep->arg1;
+				if (numrep <= 0) {
+					numrep += result_max;
+					if (numrep <= 0)
+						continue;
+				}
+				j = 0;
+				if (firstn) {
+					int recurse_tries;
+					if (choose_leaf_tries)
+						recurse_tries =
+							choose_leaf_tries;
+					else if (map->chooseleaf_descend_once)
+						recurse_tries = 1;
+					else
+						recurse_tries = choose_tries;
+					osize += crush_choose_firstn(
+						map,
+						map->buckets[-1-w[i]],
+						weight, weight_max,
+						x, numrep,
+						curstep->arg2,
+						o+osize, j,
+						choose_tries,
+						recurse_tries,
+						choose_local_retries,
+						choose_local_fallback_retries,
+						recurse_to_leaf,
+						vary_r,
+						c+osize,
+						0);
+				} else {
+					crush_choose_indep(
+						map,
+						map->buckets[-1-w[i]],
+						weight, weight_max,
+						x, numrep, numrep,
+						curstep->arg2,
+						o+osize, j,
+						choose_tries,
+						choose_leaf_tries ?
+						   choose_leaf_tries : 1,
+						recurse_to_leaf,
+						c+osize,
+						0);
+					osize += numrep;
+				}
+			}
+
+			if (recurse_to_leaf)
+				/* copy final _leaf_ values to output set */
+				memcpy(o, c, osize*sizeof(*o));
+
+			/* swap o and w arrays */
+			tmp = o;
+			o = w;
+			w = tmp;
+			wsize = osize;
+			break;
+
+
+		case CRUSH_RULE_EMIT:
+			for (i = 0; i < wsize && result_len < result_max; i++) {
+				result[result_len] = w[i];
+				result_len++;
+			}
+			wsize = 0;
+			break;
+
+		default:
+			dprintk(" unknown op %d at step %d\n",
+				curstep->op, step);
+			break;
+		}
+	}
+	return result_len;
+}
+
+
diff --git a/libceph/crypto.c b/libceph/crypto.c
new file mode 100644
index 0000000..6e7a236
--- /dev/null
+++ b/libceph/crypto.c
@@ -0,0 +1,487 @@
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/err.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <crypto/hash.h>
+#include <linux/key-type.h>
+
+#include <keys/ceph-type.h>
+#include <linux/ceph/decode.h>
+#include "crypto.h"
+
+int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
+			  const struct ceph_crypto_key *src)
+{
+	memcpy(dst, src, sizeof(struct ceph_crypto_key));
+	dst->key = kmemdup(src->key, src->len, GFP_NOFS);
+	if (!dst->key)
+		return -ENOMEM;
+	return 0;
+}
+
+int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end)
+{
+	if (*p + sizeof(u16) + sizeof(key->created) +
+	    sizeof(u16) + key->len > end)
+		return -ERANGE;
+	ceph_encode_16(p, key->type);
+	ceph_encode_copy(p, &key->created, sizeof(key->created));
+	ceph_encode_16(p, key->len);
+	ceph_encode_copy(p, key->key, key->len);
+	return 0;
+}
+
+int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end)
+{
+	ceph_decode_need(p, end, 2*sizeof(u16) + sizeof(key->created), bad);
+	key->type = ceph_decode_16(p);
+	ceph_decode_copy(p, &key->created, sizeof(key->created));
+	key->len = ceph_decode_16(p);
+	ceph_decode_need(p, end, key->len, bad);
+	key->key = kmalloc(key->len, GFP_NOFS);
+	if (!key->key)
+		return -ENOMEM;
+	ceph_decode_copy(p, key->key, key->len);
+	return 0;
+
+bad:
+	dout("failed to decode crypto key\n");
+	return -EINVAL;
+}
+
+int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey)
+{
+	int inlen = strlen(inkey);
+	int blen = inlen * 3 / 4;
+	void *buf, *p;
+	int ret;
+
+	dout("crypto_key_unarmor %s\n", inkey);
+	buf = kmalloc(blen, GFP_NOFS);
+	if (!buf)
+		return -ENOMEM;
+	blen = ceph_unarmor(buf, inkey, inkey+inlen);
+	if (blen < 0) {
+		kfree(buf);
+		return blen;
+	}
+
+	p = buf;
+	ret = ceph_crypto_key_decode(key, &p, p + blen);
+	kfree(buf);
+	if (ret)
+		return ret;
+	dout("crypto_key_unarmor key %p type %d len %d\n", key,
+	     key->type, key->len);
+	return 0;
+}
+
+
+
+#define AES_KEY_SIZE 16
+
+static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void)
+{
+	return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
+}
+
+static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
+
+static int ceph_aes_encrypt(const void *key, int key_len,
+			    void *dst, size_t *dst_len,
+			    const void *src, size_t src_len)
+{
+	struct scatterlist sg_in[2], sg_out[1];
+	struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
+	struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 };
+	int ret;
+	void *iv;
+	int ivsize;
+	size_t zero_padding = (0x10 - (src_len & 0x0f));
+	char pad[16];
+
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	memset(pad, zero_padding, zero_padding);
+
+	*dst_len = src_len + zero_padding;
+
+	crypto_blkcipher_setkey((void *)tfm, key, key_len);
+	sg_init_table(sg_in, 2);
+	sg_set_buf(&sg_in[0], src, src_len);
+	sg_set_buf(&sg_in[1], pad, zero_padding);
+	sg_init_table(sg_out, 1);
+	sg_set_buf(sg_out, dst, *dst_len);
+	iv = crypto_blkcipher_crt(tfm)->iv;
+	ivsize = crypto_blkcipher_ivsize(tfm);
+
+	memcpy(iv, aes_iv, ivsize);
+	/*
+	print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1,
+		       key, key_len, 1);
+	print_hex_dump(KERN_ERR, "enc src: ", DUMP_PREFIX_NONE, 16, 1,
+			src, src_len, 1);
+	print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1,
+			pad, zero_padding, 1);
+	*/
+	ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in,
+				     src_len + zero_padding);
+	crypto_free_blkcipher(tfm);
+	if (ret < 0)
+		pr_err("ceph_aes_crypt failed %d\n", ret);
+	/*
+	print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1,
+		       dst, *dst_len, 1);
+	*/
+	return 0;
+}
+
+static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
+			     size_t *dst_len,
+			     const void *src1, size_t src1_len,
+			     const void *src2, size_t src2_len)
+{
+	struct scatterlist sg_in[3], sg_out[1];
+	struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
+	struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 };
+	int ret;
+	void *iv;
+	int ivsize;
+	size_t zero_padding = (0x10 - ((src1_len + src2_len) & 0x0f));
+	char pad[16];
+
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	memset(pad, zero_padding, zero_padding);
+
+	*dst_len = src1_len + src2_len + zero_padding;
+
+	crypto_blkcipher_setkey((void *)tfm, key, key_len);
+	sg_init_table(sg_in, 3);
+	sg_set_buf(&sg_in[0], src1, src1_len);
+	sg_set_buf(&sg_in[1], src2, src2_len);
+	sg_set_buf(&sg_in[2], pad, zero_padding);
+	sg_init_table(sg_out, 1);
+	sg_set_buf(sg_out, dst, *dst_len);
+	iv = crypto_blkcipher_crt(tfm)->iv;
+	ivsize = crypto_blkcipher_ivsize(tfm);
+
+	memcpy(iv, aes_iv, ivsize);
+	/*
+	print_hex_dump(KERN_ERR, "enc  key: ", DUMP_PREFIX_NONE, 16, 1,
+		       key, key_len, 1);
+	print_hex_dump(KERN_ERR, "enc src1: ", DUMP_PREFIX_NONE, 16, 1,
+			src1, src1_len, 1);
+	print_hex_dump(KERN_ERR, "enc src2: ", DUMP_PREFIX_NONE, 16, 1,
+			src2, src2_len, 1);
+	print_hex_dump(KERN_ERR, "enc  pad: ", DUMP_PREFIX_NONE, 16, 1,
+			pad, zero_padding, 1);
+	*/
+	ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in,
+				     src1_len + src2_len + zero_padding);
+	crypto_free_blkcipher(tfm);
+	if (ret < 0)
+		pr_err("ceph_aes_crypt2 failed %d\n", ret);
+	/*
+	print_hex_dump(KERN_ERR, "enc  out: ", DUMP_PREFIX_NONE, 16, 1,
+		       dst, *dst_len, 1);
+	*/
+	return 0;
+}
+
+static int ceph_aes_decrypt(const void *key, int key_len,
+			    void *dst, size_t *dst_len,
+			    const void *src, size_t src_len)
+{
+	struct scatterlist sg_in[1], sg_out[2];
+	struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
+	struct blkcipher_desc desc = { .tfm = tfm };
+	char pad[16];
+	void *iv;
+	int ivsize;
+	int ret;
+	int last_byte;
+
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	crypto_blkcipher_setkey((void *)tfm, key, key_len);
+	sg_init_table(sg_in, 1);
+	sg_init_table(sg_out, 2);
+	sg_set_buf(sg_in, src, src_len);
+	sg_set_buf(&sg_out[0], dst, *dst_len);
+	sg_set_buf(&sg_out[1], pad, sizeof(pad));
+
+	iv = crypto_blkcipher_crt(tfm)->iv;
+	ivsize = crypto_blkcipher_ivsize(tfm);
+
+	memcpy(iv, aes_iv, ivsize);
+
+	/*
+	print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1,
+		       key, key_len, 1);
+	print_hex_dump(KERN_ERR, "dec  in: ", DUMP_PREFIX_NONE, 16, 1,
+		       src, src_len, 1);
+	*/
+
+	ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len);
+	crypto_free_blkcipher(tfm);
+	if (ret < 0) {
+		pr_err("ceph_aes_decrypt failed %d\n", ret);
+		return ret;
+	}
+
+	if (src_len <= *dst_len)
+		last_byte = ((char *)dst)[src_len - 1];
+	else
+		last_byte = pad[src_len - *dst_len - 1];
+	if (last_byte <= 16 && src_len >= last_byte) {
+		*dst_len = src_len - last_byte;
+	} else {
+		pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n",
+		       last_byte, (int)src_len);
+		return -EPERM;  /* bad padding */
+	}
+	/*
+	print_hex_dump(KERN_ERR, "dec out: ", DUMP_PREFIX_NONE, 16, 1,
+		       dst, *dst_len, 1);
+	*/
+	return 0;
+}
+
+static int ceph_aes_decrypt2(const void *key, int key_len,
+			     void *dst1, size_t *dst1_len,
+			     void *dst2, size_t *dst2_len,
+			     const void *src, size_t src_len)
+{
+	struct scatterlist sg_in[1], sg_out[3];
+	struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
+	struct blkcipher_desc desc = { .tfm = tfm };
+	char pad[16];
+	void *iv;
+	int ivsize;
+	int ret;
+	int last_byte;
+
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	sg_init_table(sg_in, 1);
+	sg_set_buf(sg_in, src, src_len);
+	sg_init_table(sg_out, 3);
+	sg_set_buf(&sg_out[0], dst1, *dst1_len);
+	sg_set_buf(&sg_out[1], dst2, *dst2_len);
+	sg_set_buf(&sg_out[2], pad, sizeof(pad));
+
+	crypto_blkcipher_setkey((void *)tfm, key, key_len);
+	iv = crypto_blkcipher_crt(tfm)->iv;
+	ivsize = crypto_blkcipher_ivsize(tfm);
+
+	memcpy(iv, aes_iv, ivsize);
+
+	/*
+	print_hex_dump(KERN_ERR, "dec  key: ", DUMP_PREFIX_NONE, 16, 1,
+		       key, key_len, 1);
+	print_hex_dump(KERN_ERR, "dec   in: ", DUMP_PREFIX_NONE, 16, 1,
+		       src, src_len, 1);
+	*/
+
+	ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len);
+	crypto_free_blkcipher(tfm);
+	if (ret < 0) {
+		pr_err("ceph_aes_decrypt failed %d\n", ret);
+		return ret;
+	}
+
+	if (src_len <= *dst1_len)
+		last_byte = ((char *)dst1)[src_len - 1];
+	else if (src_len <= *dst1_len + *dst2_len)
+		last_byte = ((char *)dst2)[src_len - *dst1_len - 1];
+	else
+		last_byte = pad[src_len - *dst1_len - *dst2_len - 1];
+	if (last_byte <= 16 && src_len >= last_byte) {
+		src_len -= last_byte;
+	} else {
+		pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n",
+		       last_byte, (int)src_len);
+		return -EPERM;  /* bad padding */
+	}
+
+	if (src_len < *dst1_len) {
+		*dst1_len = src_len;
+		*dst2_len = 0;
+	} else {
+		*dst2_len = src_len - *dst1_len;
+	}
+	/*
+	print_hex_dump(KERN_ERR, "dec  out1: ", DUMP_PREFIX_NONE, 16, 1,
+		       dst1, *dst1_len, 1);
+	print_hex_dump(KERN_ERR, "dec  out2: ", DUMP_PREFIX_NONE, 16, 1,
+		       dst2, *dst2_len, 1);
+	*/
+
+	return 0;
+}
+
+
+int ceph_decrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
+		 const void *src, size_t src_len)
+{
+	switch (secret->type) {
+	case CEPH_CRYPTO_NONE:
+		if (*dst_len < src_len)
+			return -ERANGE;
+		memcpy(dst, src, src_len);
+		*dst_len = src_len;
+		return 0;
+
+	case CEPH_CRYPTO_AES:
+		return ceph_aes_decrypt(secret->key, secret->len, dst,
+					dst_len, src, src_len);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+int ceph_decrypt2(struct ceph_crypto_key *secret,
+			void *dst1, size_t *dst1_len,
+			void *dst2, size_t *dst2_len,
+			const void *src, size_t src_len)
+{
+	size_t t;
+
+	switch (secret->type) {
+	case CEPH_CRYPTO_NONE:
+		if (*dst1_len + *dst2_len < src_len)
+			return -ERANGE;
+		t = min(*dst1_len, src_len);
+		memcpy(dst1, src, t);
+		*dst1_len = t;
+		src += t;
+		src_len -= t;
+		if (src_len) {
+			t = min(*dst2_len, src_len);
+			memcpy(dst2, src, t);
+			*dst2_len = t;
+		}
+		return 0;
+
+	case CEPH_CRYPTO_AES:
+		return ceph_aes_decrypt2(secret->key, secret->len,
+					 dst1, dst1_len, dst2, dst2_len,
+					 src, src_len);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+int ceph_encrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
+		 const void *src, size_t src_len)
+{
+	switch (secret->type) {
+	case CEPH_CRYPTO_NONE:
+		if (*dst_len < src_len)
+			return -ERANGE;
+		memcpy(dst, src, src_len);
+		*dst_len = src_len;
+		return 0;
+
+	case CEPH_CRYPTO_AES:
+		return ceph_aes_encrypt(secret->key, secret->len, dst,
+					dst_len, src, src_len);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
+		  const void *src1, size_t src1_len,
+		  const void *src2, size_t src2_len)
+{
+	switch (secret->type) {
+	case CEPH_CRYPTO_NONE:
+		if (*dst_len < src1_len + src2_len)
+			return -ERANGE;
+		memcpy(dst, src1, src1_len);
+		memcpy(dst + src1_len, src2, src2_len);
+		*dst_len = src1_len + src2_len;
+		return 0;
+
+	case CEPH_CRYPTO_AES:
+		return ceph_aes_encrypt2(secret->key, secret->len, dst, dst_len,
+					 src1, src1_len, src2, src2_len);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static int ceph_key_instantiate(struct key *key,
+				struct key_preparsed_payload *prep)
+{
+	struct ceph_crypto_key *ckey;
+	size_t datalen = prep->datalen;
+	int ret;
+	void *p;
+
+	ret = -EINVAL;
+	if (datalen <= 0 || datalen > 32767 || !prep->data)
+		goto err;
+
+	ret = key_payload_reserve(key, datalen);
+	if (ret < 0)
+		goto err;
+
+	ret = -ENOMEM;
+	ckey = kmalloc(sizeof(*ckey), GFP_KERNEL);
+	if (!ckey)
+		goto err;
+
+	/* TODO ceph_crypto_key_decode should really take const input */
+	p = (void *)prep->data;
+	ret = ceph_crypto_key_decode(ckey, &p, (char*)prep->data+datalen);
+	if (ret < 0)
+		goto err_ckey;
+
+	key->payload.data = ckey;
+	return 0;
+
+err_ckey:
+	kfree(ckey);
+err:
+	return ret;
+}
+
+static int ceph_key_match(const struct key *key, const void *description)
+{
+	return strcmp(key->description, description) == 0;
+}
+
+static void ceph_key_destroy(struct key *key) {
+	struct ceph_crypto_key *ckey = key->payload.data;
+
+	ceph_crypto_key_destroy(ckey);
+	kfree(ckey);
+}
+
+struct key_type key_type_ceph = {
+	.name		= "ceph",
+	.instantiate	= ceph_key_instantiate,
+	.match		= ceph_key_match,
+	.destroy	= ceph_key_destroy,
+};
+
+int ceph_crypto_init(void) {
+	return register_key_type(&key_type_ceph);
+}
+
+void ceph_crypto_shutdown(void) {
+	unregister_key_type(&key_type_ceph);
+}
diff --git a/libceph/crypto.h b/libceph/crypto.h
new file mode 100644
index 0000000..d149822
--- /dev/null
+++ b/libceph/crypto.h
@@ -0,0 +1,51 @@
+#ifndef _FS_CEPH_CRYPTO_H
+#define _FS_CEPH_CRYPTO_H
+
+#include <linux/ceph/types.h>
+#include <linux/ceph/buffer.h>
+
+/*
+ * cryptographic secret
+ */
+struct ceph_crypto_key {
+	int type;
+	struct ceph_timespec created;
+	int len;
+	void *key;
+};
+
+static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
+{
+	if (key)
+		kfree(key->key);
+}
+
+int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
+			  const struct ceph_crypto_key *src);
+int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end);
+int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end);
+int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in);
+
+/* crypto.c */
+int ceph_decrypt(struct ceph_crypto_key *secret,
+		 void *dst, size_t *dst_len,
+		 const void *src, size_t src_len);
+int ceph_encrypt(struct ceph_crypto_key *secret,
+		 void *dst, size_t *dst_len,
+		 const void *src, size_t src_len);
+int ceph_decrypt2(struct ceph_crypto_key *secret,
+		  void *dst1, size_t *dst1_len,
+		  void *dst2, size_t *dst2_len,
+		  const void *src, size_t src_len);
+int ceph_encrypt2(struct ceph_crypto_key *secret,
+		  void *dst, size_t *dst_len,
+		  const void *src1, size_t src1_len,
+		  const void *src2, size_t src2_len);
+int ceph_crypto_init(void);
+void ceph_crypto_shutdown(void);
+
+/* armor.c */
+int ceph_armor(char *dst, const char *src, const char *end);
+int ceph_unarmor(char *dst, const char *src, const char *end);
+
+#endif
diff --git a/libceph/debugfs.c b/libceph/debugfs.c
new file mode 100644
index 0000000..10421a4
--- /dev/null
+++ b/libceph/debugfs.c
@@ -0,0 +1,282 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/*
+ * Implement /sys/kernel/debug/ceph fun
+ *
+ * /sys/kernel/debug/ceph/client*  - an instance of the ceph client
+ *      .../osdmap      - current osdmap
+ *      .../monmap      - current monmap
+ *      .../osdc        - active osd requests
+ *      .../monc        - mon client state
+ *      .../dentry_lru  - dump contents of dentry lru
+ *      .../caps        - expose cap (reservation) stats
+ *      .../bdi         - symlink to ../../bdi/something
+ */
+
+static struct dentry *ceph_debugfs_dir;
+
+static int monmap_show(struct seq_file *s, void *p)
+{
+	int i;
+	struct ceph_client *client = s->private;
+
+	if (client->monc.monmap == NULL)
+		return 0;
+
+	seq_printf(s, "epoch %d\n", client->monc.monmap->epoch);
+	for (i = 0; i < client->monc.monmap->num_mon; i++) {
+		struct ceph_entity_inst *inst =
+			&client->monc.monmap->mon_inst[i];
+
+		seq_printf(s, "\t%s%lld\t%s\n",
+			   ENTITY_NAME(inst->name),
+			   ceph_pr_addr(&inst->addr.in_addr));
+	}
+	return 0;
+}
+
+static int osdmap_show(struct seq_file *s, void *p)
+{
+	int i;
+	struct ceph_client *client = s->private;
+	struct ceph_osdmap *map = client->osdc.osdmap;
+	struct rb_node *n;
+
+	if (map == NULL)
+		return 0;
+
+	seq_printf(s, "epoch %d\n", map->epoch);
+	seq_printf(s, "flags%s%s\n",
+		   (map->flags & CEPH_OSDMAP_NEARFULL) ?  " NEARFULL" : "",
+		   (map->flags & CEPH_OSDMAP_FULL) ?  " FULL" : "");
+
+	for (n = rb_first(&map->pg_pools); n; n = rb_next(n)) {
+		struct ceph_pg_pool_info *pool =
+			rb_entry(n, struct ceph_pg_pool_info, node);
+
+		seq_printf(s, "pool %lld pg_num %u (%d) read_tier %lld write_tier %lld\n",
+			   pool->id, pool->pg_num, pool->pg_num_mask,
+			   pool->read_tier, pool->write_tier);
+	}
+	for (i = 0; i < map->max_osd; i++) {
+		struct ceph_entity_addr *addr = &map->osd_addr[i];
+		int state = map->osd_state[i];
+		char sb[64];
+
+		seq_printf(s, "osd%d\t%s\t%3d%%\t(%s)\t%3d%%\n",
+			   i, ceph_pr_addr(&addr->in_addr),
+			   ((map->osd_weight[i]*100) >> 16),
+			   ceph_osdmap_state_str(sb, sizeof(sb), state),
+			   ((ceph_get_primary_affinity(map, i)*100) >> 16));
+	}
+	for (n = rb_first(&map->pg_temp); n; n = rb_next(n)) {
+		struct ceph_pg_mapping *pg =
+			rb_entry(n, struct ceph_pg_mapping, node);
+
+		seq_printf(s, "pg_temp %llu.%x [", pg->pgid.pool,
+			   pg->pgid.seed);
+		for (i = 0; i < pg->pg_temp.len; i++)
+			seq_printf(s, "%s%d", (i == 0 ? "" : ","),
+				   pg->pg_temp.osds[i]);
+		seq_printf(s, "]\n");
+	}
+	for (n = rb_first(&map->primary_temp); n; n = rb_next(n)) {
+		struct ceph_pg_mapping *pg =
+			rb_entry(n, struct ceph_pg_mapping, node);
+
+		seq_printf(s, "primary_temp %llu.%x %d\n", pg->pgid.pool,
+			   pg->pgid.seed, pg->primary_temp.osd);
+	}
+
+	return 0;
+}
+
+static int monc_show(struct seq_file *s, void *p)
+{
+	struct ceph_client *client = s->private;
+	struct ceph_mon_generic_request *req;
+	struct ceph_mon_client *monc = &client->monc;
+	struct rb_node *rp;
+
+	mutex_lock(&monc->mutex);
+
+	if (monc->have_mdsmap)
+		seq_printf(s, "have mdsmap %u\n", (unsigned int)monc->have_mdsmap);
+	if (monc->have_osdmap)
+		seq_printf(s, "have osdmap %u\n", (unsigned int)monc->have_osdmap);
+	if (monc->want_next_osdmap)
+		seq_printf(s, "want next osdmap\n");
+
+	for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
+		__u16 op;
+		req = rb_entry(rp, struct ceph_mon_generic_request, node);
+		op = le16_to_cpu(req->request->hdr.type);
+		if (op == CEPH_MSG_STATFS)
+			seq_printf(s, "%lld statfs\n", req->tid);
+		else
+			seq_printf(s, "%lld unknown\n", req->tid);
+	}
+
+	mutex_unlock(&monc->mutex);
+	return 0;
+}
+
+static int osdc_show(struct seq_file *s, void *pp)
+{
+	struct ceph_client *client = s->private;
+	struct ceph_osd_client *osdc = &client->osdc;
+	struct rb_node *p;
+
+	mutex_lock(&osdc->request_mutex);
+	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
+		struct ceph_osd_request *req;
+		unsigned int i;
+		int opcode;
+
+		req = rb_entry(p, struct ceph_osd_request, r_node);
+
+		seq_printf(s, "%lld\tosd%d\t%lld.%x\t", req->r_tid,
+			   req->r_osd ? req->r_osd->o_osd : -1,
+			   req->r_pgid.pool, req->r_pgid.seed);
+
+		seq_printf(s, "%.*s", req->r_base_oid.name_len,
+			   req->r_base_oid.name);
+
+		if (req->r_reassert_version.epoch)
+			seq_printf(s, "\t%u'%llu",
+			   (unsigned int)le32_to_cpu(req->r_reassert_version.epoch),
+			   le64_to_cpu(req->r_reassert_version.version));
+		else
+			seq_printf(s, "\t");
+
+		for (i = 0; i < req->r_num_ops; i++) {
+			opcode = req->r_ops[i].op;
+			seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
+		}
+
+		seq_printf(s, "\n");
+	}
+	mutex_unlock(&osdc->request_mutex);
+	return 0;
+}
+
+CEPH_DEFINE_SHOW_FUNC(monmap_show)
+CEPH_DEFINE_SHOW_FUNC(osdmap_show)
+CEPH_DEFINE_SHOW_FUNC(monc_show)
+CEPH_DEFINE_SHOW_FUNC(osdc_show)
+
+int ceph_debugfs_init(void)
+{
+	ceph_debugfs_dir = debugfs_create_dir("ceph", NULL);
+	if (!ceph_debugfs_dir)
+		return -ENOMEM;
+	return 0;
+}
+
+void ceph_debugfs_cleanup(void)
+{
+	debugfs_remove(ceph_debugfs_dir);
+}
+
+int ceph_debugfs_client_init(struct ceph_client *client)
+{
+	int ret = -ENOMEM;
+	char name[80];
+
+	snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
+		 client->monc.auth->global_id);
+
+	dout("ceph_debugfs_client_init %p %s\n", client, name);
+
+	BUG_ON(client->debugfs_dir);
+	client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
+	if (!client->debugfs_dir)
+		goto out;
+
+	client->monc.debugfs_file = debugfs_create_file("monc",
+						      0600,
+						      client->debugfs_dir,
+						      client,
+						      &monc_show_fops);
+	if (!client->monc.debugfs_file)
+		goto out;
+
+	client->osdc.debugfs_file = debugfs_create_file("osdc",
+						      0600,
+						      client->debugfs_dir,
+						      client,
+						      &osdc_show_fops);
+	if (!client->osdc.debugfs_file)
+		goto out;
+
+	client->debugfs_monmap = debugfs_create_file("monmap",
+					0600,
+					client->debugfs_dir,
+					client,
+					&monmap_show_fops);
+	if (!client->debugfs_monmap)
+		goto out;
+
+	client->debugfs_osdmap = debugfs_create_file("osdmap",
+					0600,
+					client->debugfs_dir,
+					client,
+					&osdmap_show_fops);
+	if (!client->debugfs_osdmap)
+		goto out;
+
+	return 0;
+
+out:
+	ceph_debugfs_client_cleanup(client);
+	return ret;
+}
+
+void ceph_debugfs_client_cleanup(struct ceph_client *client)
+{
+	dout("ceph_debugfs_client_cleanup %p\n", client);
+	debugfs_remove(client->debugfs_osdmap);
+	debugfs_remove(client->debugfs_monmap);
+	debugfs_remove(client->osdc.debugfs_file);
+	debugfs_remove(client->monc.debugfs_file);
+	debugfs_remove(client->debugfs_dir);
+}
+
+#else  /* CONFIG_DEBUG_FS */
+
+int ceph_debugfs_init(void)
+{
+	return 0;
+}
+
+void ceph_debugfs_cleanup(void)
+{
+}
+
+int ceph_debugfs_client_init(struct ceph_client *client)
+{
+	return 0;
+}
+
+void ceph_debugfs_client_cleanup(struct ceph_client *client)
+{
+}
+
+#endif  /* CONFIG_DEBUG_FS */
+
+EXPORT_SYMBOL(ceph_debugfs_init);
+EXPORT_SYMBOL(ceph_debugfs_cleanup);
diff --git a/libceph/messenger.c b/libceph/messenger.c
new file mode 100644
index 0000000..4f55f9c
--- /dev/null
+++ b/libceph/messenger.c
@@ -0,0 +1,3316 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/crc32c.h>
+#include <linux/ctype.h>
+#include <linux/highmem.h>
+#include <linux/inet.h>
+#include <linux/kthread.h>
+#include <linux/net.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#ifdef	CONFIG_BLOCK
+#include <linux/bio.h>
+#endif	/* CONFIG_BLOCK */
+#include <linux/dns_resolver.h>
+#include <net/tcp.h>
+
+#include <linux/ceph/ceph_features.h>
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/pagelist.h>
+#include <linux/export.h>
+
+#define list_entry_next(pos, member)					\
+	list_entry(pos->member.next, typeof(*pos), member)
+
+/*
+ * Ceph uses the messenger to exchange ceph_msg messages with other
+ * hosts in the system.  The messenger provides ordered and reliable
+ * delivery.  We tolerate TCP disconnects by reconnecting (with
+ * exponential backoff) in the case of a fault (disconnection, bad
+ * crc, protocol error).  Acks allow sent messages to be discarded by
+ * the sender.
+ */
+
+/*
+ * We track the state of the socket on a given connection using
+ * values defined below.  The transition to a new socket state is
+ * handled by a function which verifies we aren't coming from an
+ * unexpected state.
+ *
+ *      --------
+ *      | NEW* |  transient initial state
+ *      --------
+ *          | con_sock_state_init()
+ *          v
+ *      ----------
+ *      | CLOSED |  initialized, but no socket (and no
+ *      ----------  TCP connection)
+ *       ^      \
+ *       |       \ con_sock_state_connecting()
+ *       |        ----------------------
+ *       |                              \
+ *       + con_sock_state_closed()       \
+ *       |+---------------------------    \
+ *       | \                          \    \
+ *       |  -----------                \    \
+ *       |  | CLOSING |  socket event;  \    \
+ *       |  -----------  await close     \    \
+ *       |       ^                        \   |
+ *       |       |                         \  |
+ *       |       + con_sock_state_closing() \ |
+ *       |      / \                         | |
+ *       |     /   ---------------          | |
+ *       |    /                   \         v v
+ *       |   /                    --------------
+ *       |  /    -----------------| CONNECTING |  socket created, TCP
+ *       |  |   /                 --------------  connect initiated
+ *       |  |   | con_sock_state_connected()
+ *       |  |   v
+ *      -------------
+ *      | CONNECTED |  TCP connection established
+ *      -------------
+ *
+ * State values for ceph_connection->sock_state; NEW is assumed to be 0.
+ */
+
+#define CON_SOCK_STATE_NEW		0	/* -> CLOSED */
+#define CON_SOCK_STATE_CLOSED		1	/* -> CONNECTING */
+#define CON_SOCK_STATE_CONNECTING	2	/* -> CONNECTED or -> CLOSING */
+#define CON_SOCK_STATE_CONNECTED	3	/* -> CLOSING or -> CLOSED */
+#define CON_SOCK_STATE_CLOSING		4	/* -> CLOSED */
+
+/*
+ * connection states
+ */
+#define CON_STATE_CLOSED        1  /* -> PREOPEN */
+#define CON_STATE_PREOPEN       2  /* -> CONNECTING, CLOSED */
+#define CON_STATE_CONNECTING    3  /* -> NEGOTIATING, CLOSED */
+#define CON_STATE_NEGOTIATING   4  /* -> OPEN, CLOSED */
+#define CON_STATE_OPEN          5  /* -> STANDBY, CLOSED */
+#define CON_STATE_STANDBY       6  /* -> PREOPEN, CLOSED */
+
+/*
+ * ceph_connection flag bits
+ */
+#define CON_FLAG_LOSSYTX           0  /* we can close channel or drop
+				       * messages on errors */
+#define CON_FLAG_KEEPALIVE_PENDING 1  /* we need to send a keepalive */
+#define CON_FLAG_WRITE_PENDING	   2  /* we have data ready to send */
+#define CON_FLAG_SOCK_CLOSED	   3  /* socket state changed to closed */
+#define CON_FLAG_BACKOFF           4  /* need to retry queuing delayed work */
+
+static bool con_flag_valid(unsigned long con_flag)
+{
+	switch (con_flag) {
+	case CON_FLAG_LOSSYTX:
+	case CON_FLAG_KEEPALIVE_PENDING:
+	case CON_FLAG_WRITE_PENDING:
+	case CON_FLAG_SOCK_CLOSED:
+	case CON_FLAG_BACKOFF:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static void con_flag_clear(struct ceph_connection *con, unsigned long con_flag)
+{
+	BUG_ON(!con_flag_valid(con_flag));
+
+	clear_bit(con_flag, &con->flags);
+}
+
+static void con_flag_set(struct ceph_connection *con, unsigned long con_flag)
+{
+	BUG_ON(!con_flag_valid(con_flag));
+
+	set_bit(con_flag, &con->flags);
+}
+
+static bool con_flag_test(struct ceph_connection *con, unsigned long con_flag)
+{
+	BUG_ON(!con_flag_valid(con_flag));
+
+	return test_bit(con_flag, &con->flags);
+}
+
+static bool con_flag_test_and_clear(struct ceph_connection *con,
+					unsigned long con_flag)
+{
+	BUG_ON(!con_flag_valid(con_flag));
+
+	return test_and_clear_bit(con_flag, &con->flags);
+}
+
+static bool con_flag_test_and_set(struct ceph_connection *con,
+					unsigned long con_flag)
+{
+	BUG_ON(!con_flag_valid(con_flag));
+
+	return test_and_set_bit(con_flag, &con->flags);
+}
+
+/* Slab caches for frequently-allocated structures */
+
+static struct kmem_cache	*ceph_msg_cache;
+static struct kmem_cache	*ceph_msg_data_cache;
+
+/* static tag bytes (protocol control messages) */
+static char tag_msg = CEPH_MSGR_TAG_MSG;
+static char tag_ack = CEPH_MSGR_TAG_ACK;
+static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;
+
+#ifdef CONFIG_LOCKDEP
+static struct lock_class_key socket_class;
+#endif
+
+/*
+ * When skipping (ignoring) a block of input we read it into a "skip
+ * buffer," which is this many bytes in size.
+ */
+#define SKIP_BUF_SIZE	1024
+
+static void queue_con(struct ceph_connection *con);
+static void con_work(struct work_struct *);
+static void con_fault(struct ceph_connection *con);
+
+/*
+ * Nicely render a sockaddr as a string.  An array of formatted
+ * strings is used, to approximate reentrancy.
+ */
+#define ADDR_STR_COUNT_LOG	5	/* log2(# address strings in array) */
+#define ADDR_STR_COUNT		(1 << ADDR_STR_COUNT_LOG)
+#define ADDR_STR_COUNT_MASK	(ADDR_STR_COUNT - 1)
+#define MAX_ADDR_STR_LEN	64	/* 54 is enough */
+
+static char addr_str[ADDR_STR_COUNT][MAX_ADDR_STR_LEN];
+static atomic_t addr_str_seq = ATOMIC_INIT(0);
+
+static struct page *zero_page;		/* used in certain error cases */
+
+const char *ceph_pr_addr(const struct sockaddr_storage *ss)
+{
+	int i;
+	char *s;
+	struct sockaddr_in *in4 = (struct sockaddr_in *) ss;
+	struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss;
+
+	i = atomic_inc_return(&addr_str_seq) & ADDR_STR_COUNT_MASK;
+	s = addr_str[i];
+
+	switch (ss->ss_family) {
+	case AF_INET:
+		snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%hu", &in4->sin_addr,
+			 ntohs(in4->sin_port));
+		break;
+
+	case AF_INET6:
+		snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%hu", &in6->sin6_addr,
+			 ntohs(in6->sin6_port));
+		break;
+
+	default:
+		snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %hu)",
+			 ss->ss_family);
+	}
+
+	return s;
+}
+EXPORT_SYMBOL(ceph_pr_addr);
+
+static void encode_my_addr(struct ceph_messenger *msgr)
+{
+	memcpy(&msgr->my_enc_addr, &msgr->inst.addr, sizeof(msgr->my_enc_addr));
+	ceph_encode_addr(&msgr->my_enc_addr);
+}
+
+/*
+ * work queue for all reading and writing to/from the socket.
+ */
+static struct workqueue_struct *ceph_msgr_wq;
+
+static int ceph_msgr_slab_init(void)
+{
+	BUG_ON(ceph_msg_cache);
+	ceph_msg_cache = kmem_cache_create("ceph_msg",
+					sizeof (struct ceph_msg),
+					__alignof__(struct ceph_msg), 0, NULL);
+
+	if (!ceph_msg_cache)
+		return -ENOMEM;
+
+	BUG_ON(ceph_msg_data_cache);
+	ceph_msg_data_cache = kmem_cache_create("ceph_msg_data",
+					sizeof (struct ceph_msg_data),
+					__alignof__(struct ceph_msg_data),
+					0, NULL);
+	if (ceph_msg_data_cache)
+		return 0;
+
+	kmem_cache_destroy(ceph_msg_cache);
+	ceph_msg_cache = NULL;
+
+	return -ENOMEM;
+}
+
+static void ceph_msgr_slab_exit(void)
+{
+	BUG_ON(!ceph_msg_data_cache);
+	kmem_cache_destroy(ceph_msg_data_cache);
+	ceph_msg_data_cache = NULL;
+
+	BUG_ON(!ceph_msg_cache);
+	kmem_cache_destroy(ceph_msg_cache);
+	ceph_msg_cache = NULL;
+}
+
+static void _ceph_msgr_exit(void)
+{
+	if (ceph_msgr_wq) {
+		destroy_workqueue(ceph_msgr_wq);
+		ceph_msgr_wq = NULL;
+	}
+
+	ceph_msgr_slab_exit();
+
+	BUG_ON(zero_page == NULL);
+	kunmap(zero_page);
+	page_cache_release(zero_page);
+	zero_page = NULL;
+}
+
+int ceph_msgr_init(void)
+{
+	BUG_ON(zero_page != NULL);
+	zero_page = ZERO_PAGE(0);
+	page_cache_get(zero_page);
+
+	if (ceph_msgr_slab_init())
+		return -ENOMEM;
+
+	ceph_msgr_wq = alloc_workqueue("ceph-msgr", 0, 0);
+	if (ceph_msgr_wq)
+		return 0;
+
+	pr_err("msgr_init failed to create workqueue\n");
+	_ceph_msgr_exit();
+
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(ceph_msgr_init);
+
+void ceph_msgr_exit(void)
+{
+	BUG_ON(ceph_msgr_wq == NULL);
+
+	_ceph_msgr_exit();
+}
+EXPORT_SYMBOL(ceph_msgr_exit);
+
+void ceph_msgr_flush(void)
+{
+	flush_workqueue(ceph_msgr_wq);
+}
+EXPORT_SYMBOL(ceph_msgr_flush);
+
+/* Connection socket state transition functions */
+
+static void con_sock_state_init(struct ceph_connection *con)
+{
+	int old_state;
+
+	old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
+	if (WARN_ON(old_state != CON_SOCK_STATE_NEW))
+		printk("%s: unexpected old state %d\n", __func__, old_state);
+	dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+	     CON_SOCK_STATE_CLOSED);
+}
+
+static void con_sock_state_connecting(struct ceph_connection *con)
+{
+	int old_state;
+
+	old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTING);
+	if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED))
+		printk("%s: unexpected old state %d\n", __func__, old_state);
+	dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+	     CON_SOCK_STATE_CONNECTING);
+}
+
+static void con_sock_state_connected(struct ceph_connection *con)
+{
+	int old_state;
+
+	old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED);
+	if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING))
+		printk("%s: unexpected old state %d\n", __func__, old_state);
+	dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+	     CON_SOCK_STATE_CONNECTED);
+}
+
+static void con_sock_state_closing(struct ceph_connection *con)
+{
+	int old_state;
+
+	old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSING);
+	if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING &&
+			old_state != CON_SOCK_STATE_CONNECTED &&
+			old_state != CON_SOCK_STATE_CLOSING))
+		printk("%s: unexpected old state %d\n", __func__, old_state);
+	dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+	     CON_SOCK_STATE_CLOSING);
+}
+
+static void con_sock_state_closed(struct ceph_connection *con)
+{
+	int old_state;
+
+	old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
+	if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED &&
+		    old_state != CON_SOCK_STATE_CLOSING &&
+		    old_state != CON_SOCK_STATE_CONNECTING &&
+		    old_state != CON_SOCK_STATE_CLOSED))
+		printk("%s: unexpected old state %d\n", __func__, old_state);
+	dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+	     CON_SOCK_STATE_CLOSED);
+}
+
+/*
+ * socket callback functions
+ */
+
+/* data available on socket, or listen socket received a connect */
+static void ceph_sock_data_ready(struct sock *sk, int count_unused)
+{
+	struct ceph_connection *con = sk->sk_user_data;
+	if (atomic_read(&con->msgr->stopping)) {
+		return;
+	}
+
+	if (sk->sk_state != TCP_CLOSE_WAIT) {
+		dout("%s on %p state = %lu, queueing work\n", __func__,
+		     con, con->state);
+		queue_con(con);
+	}
+}
+
+/* socket has buffer space for writing */
+static void ceph_sock_write_space(struct sock *sk)
+{
+	struct ceph_connection *con = sk->sk_user_data;
+
+	/* only queue to workqueue if there is data we want to write,
+	 * and there is sufficient space in the socket buffer to accept
+	 * more data.  clear SOCK_NOSPACE so that ceph_sock_write_space()
+	 * doesn't get called again until try_write() fills the socket
+	 * buffer. See net/ipv4/tcp_input.c:tcp_check_space()
+	 * and net/core/stream.c:sk_stream_write_space().
+	 */
+	if (con_flag_test(con, CON_FLAG_WRITE_PENDING)) {
+		if (sk_stream_is_writeable(sk)) {
+			dout("%s %p queueing write work\n", __func__, con);
+			clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+			queue_con(con);
+		}
+	} else {
+		dout("%s %p nothing to write\n", __func__, con);
+	}
+}
+
+/* socket's state has changed */
+static void ceph_sock_state_change(struct sock *sk)
+{
+	struct ceph_connection *con = sk->sk_user_data;
+
+	dout("%s %p state = %lu sk_state = %u\n", __func__,
+	     con, con->state, sk->sk_state);
+
+	switch (sk->sk_state) {
+	case TCP_CLOSE:
+		dout("%s TCP_CLOSE\n", __func__);
+	case TCP_CLOSE_WAIT:
+		dout("%s TCP_CLOSE_WAIT\n", __func__);
+		con_sock_state_closing(con);
+		con_flag_set(con, CON_FLAG_SOCK_CLOSED);
+		queue_con(con);
+		break;
+	case TCP_ESTABLISHED:
+		dout("%s TCP_ESTABLISHED\n", __func__);
+		con_sock_state_connected(con);
+		queue_con(con);
+		break;
+	default:	/* Everything else is uninteresting */
+		break;
+	}
+}
+
+/*
+ * set up socket callbacks
+ */
+static void set_sock_callbacks(struct socket *sock,
+			       struct ceph_connection *con)
+{
+	struct sock *sk = sock->sk;
+	sk->sk_user_data = con;
+	sk->sk_data_ready = ceph_sock_data_ready;
+	sk->sk_write_space = ceph_sock_write_space;
+	sk->sk_state_change = ceph_sock_state_change;
+}
+
+
+/*
+ * socket helpers
+ */
+
+/*
+ * initiate connection to a remote socket.
+ */
+static int ceph_tcp_connect(struct ceph_connection *con)
+{
+	struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
+	struct socket *sock;
+	int ret;
+
+	BUG_ON(con->sock);
+	ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM,
+			       IPPROTO_TCP, &sock);
+	if (ret)
+		return ret;
+	sock->sk->sk_allocation = GFP_NOFS;
+
+#ifdef CONFIG_LOCKDEP
+	lockdep_set_class(&sock->sk->sk_lock, &socket_class);
+#endif
+
+	set_sock_callbacks(sock, con);
+
+	dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));
+
+	con_sock_state_connecting(con);
+	ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
+				 O_NONBLOCK);
+	if (ret == -EINPROGRESS) {
+		dout("connect %s EINPROGRESS sk_state = %u\n",
+		     ceph_pr_addr(&con->peer_addr.in_addr),
+		     sock->sk->sk_state);
+	} else if (ret < 0) {
+		pr_err("connect %s error %d\n",
+		       ceph_pr_addr(&con->peer_addr.in_addr), ret);
+		sock_release(sock);
+		con->error_msg = "connect error";
+
+		return ret;
+	}
+	con->sock = sock;
+	return 0;
+}
+
+static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
+{
+	struct kvec iov = {buf, len};
+	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
+	int r;
+
+	r = kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags);
+	if (r == -EAGAIN)
+		r = 0;
+	return r;
+}
+
+static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
+		     int page_offset, size_t length)
+{
+	void *kaddr;
+	int ret;
+
+	BUG_ON(page_offset + length > PAGE_SIZE);
+
+	kaddr = kmap(page);
+	BUG_ON(!kaddr);
+	ret = ceph_tcp_recvmsg(sock, kaddr + page_offset, length);
+	kunmap(page);
+
+	return ret;
+}
+
+/*
+ * write something.  @more is true if caller will be sending more data
+ * shortly.
+ */
+static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
+		     size_t kvlen, size_t len, int more)
+{
+	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
+	int r;
+
+	if (more)
+		msg.msg_flags |= MSG_MORE;
+	else
+		msg.msg_flags |= MSG_EOR;  /* superfluous, but what the hell */
+
+	r = kernel_sendmsg(sock, &msg, iov, kvlen, len);
+	if (r == -EAGAIN)
+		r = 0;
+	return r;
+}
+
+static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
+		     int offset, size_t size, bool more)
+{
+	int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR);
+	int ret;
+
+	ret = kernel_sendpage(sock, page, offset, size, flags);
+	if (ret == -EAGAIN)
+		ret = 0;
+
+	return ret;
+}
+
+
+/*
+ * Shutdown/close the socket for the given connection.
+ */
+static int con_close_socket(struct ceph_connection *con)
+{
+	int rc = 0;
+
+	dout("con_close_socket on %p sock %p\n", con, con->sock);
+	if (con->sock) {
+		rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
+		sock_release(con->sock);
+		con->sock = NULL;
+	}
+
+	/*
+	 * Forcibly clear the SOCK_CLOSED flag.  It gets set
+	 * independent of the connection mutex, and we could have
+	 * received a socket close event before we had the chance to
+	 * shut the socket down.
+	 */
+	con_flag_clear(con, CON_FLAG_SOCK_CLOSED);
+
+	con_sock_state_closed(con);
+	return rc;
+}
+
+/*
+ * Reset a connection.  Discard all incoming and outgoing messages
+ * and clear *_seq state.
+ */
+static void ceph_msg_remove(struct ceph_msg *msg)
+{
+	list_del_init(&msg->list_head);
+	BUG_ON(msg->con == NULL);
+	msg->con->ops->put(msg->con);
+	msg->con = NULL;
+
+	ceph_msg_put(msg);
+}
+static void ceph_msg_remove_list(struct list_head *head)
+{
+	while (!list_empty(head)) {
+		struct ceph_msg *msg = list_first_entry(head, struct ceph_msg,
+							list_head);
+		ceph_msg_remove(msg);
+	}
+}
+
+static void reset_connection(struct ceph_connection *con)
+{
+	/* reset connection, out_queue, msg_ and connect_seq */
+	/* discard existing out_queue and msg_seq */
+	dout("reset_connection %p\n", con);
+	ceph_msg_remove_list(&con->out_queue);
+	ceph_msg_remove_list(&con->out_sent);
+
+	if (con->in_msg) {
+		BUG_ON(con->in_msg->con != con);
+		con->in_msg->con = NULL;
+		ceph_msg_put(con->in_msg);
+		con->in_msg = NULL;
+		con->ops->put(con);
+	}
+
+	con->connect_seq = 0;
+	con->out_seq = 0;
+	if (con->out_msg) {
+		ceph_msg_put(con->out_msg);
+		con->out_msg = NULL;
+	}
+	con->in_seq = 0;
+	con->in_seq_acked = 0;
+}
+
+/*
+ * mark a peer down.  drop any open connections.
+ */
+void ceph_con_close(struct ceph_connection *con)
+{
+	mutex_lock(&con->mutex);
+	dout("con_close %p peer %s\n", con,
+	     ceph_pr_addr(&con->peer_addr.in_addr));
+	con->state = CON_STATE_CLOSED;
+
+	con_flag_clear(con, CON_FLAG_LOSSYTX);	/* so we retry next connect */
+	con_flag_clear(con, CON_FLAG_KEEPALIVE_PENDING);
+	con_flag_clear(con, CON_FLAG_WRITE_PENDING);
+	con_flag_clear(con, CON_FLAG_BACKOFF);
+
+	reset_connection(con);
+	con->peer_global_seq = 0;
+	cancel_delayed_work(&con->work);
+	con_close_socket(con);
+	mutex_unlock(&con->mutex);
+}
+EXPORT_SYMBOL(ceph_con_close);
+
+/*
+ * Reopen a closed connection, with a new peer address.
+ */
+void ceph_con_open(struct ceph_connection *con,
+		   __u8 entity_type, __u64 entity_num,
+		   struct ceph_entity_addr *addr)
+{
+	mutex_lock(&con->mutex);
+	dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
+
+	WARN_ON(con->state != CON_STATE_CLOSED);
+	con->state = CON_STATE_PREOPEN;
+
+	con->peer_name.type = (__u8) entity_type;
+	con->peer_name.num = cpu_to_le64(entity_num);
+
+	memcpy(&con->peer_addr, addr, sizeof(*addr));
+	con->delay = 0;      /* reset backoff memory */
+	mutex_unlock(&con->mutex);
+	queue_con(con);
+}
+EXPORT_SYMBOL(ceph_con_open);
+
+/*
+ * return true if this connection ever successfully opened
+ */
+bool ceph_con_opened(struct ceph_connection *con)
+{
+	return con->connect_seq > 0;
+}
+
+/*
+ * initialize a new connection.
+ */
+void ceph_con_init(struct ceph_connection *con, void *private,
+	const struct ceph_connection_operations *ops,
+	struct ceph_messenger *msgr)
+{
+	dout("con_init %p\n", con);
+	memset(con, 0, sizeof(*con));
+	con->private = private;
+	con->ops = ops;
+	con->msgr = msgr;
+
+	con_sock_state_init(con);
+
+	mutex_init(&con->mutex);
+	INIT_LIST_HEAD(&con->out_queue);
+	INIT_LIST_HEAD(&con->out_sent);
+	INIT_DELAYED_WORK(&con->work, con_work);
+
+	con->state = CON_STATE_CLOSED;
+}
+EXPORT_SYMBOL(ceph_con_init);
+
+
+/*
+ * We maintain a global counter to order connection attempts.  Get
+ * a unique seq greater than @gt.
+ */
+static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
+{
+	u32 ret;
+
+	spin_lock(&msgr->global_seq_lock);
+	if (msgr->global_seq < gt)
+		msgr->global_seq = gt;
+	ret = ++msgr->global_seq;
+	spin_unlock(&msgr->global_seq_lock);
+	return ret;
+}
+
+static void con_out_kvec_reset(struct ceph_connection *con)
+{
+	con->out_kvec_left = 0;
+	con->out_kvec_bytes = 0;
+	con->out_kvec_cur = &con->out_kvec[0];
+}
+
+static void con_out_kvec_add(struct ceph_connection *con,
+				size_t size, void *data)
+{
+	int index;
+
+	index = con->out_kvec_left;
+	BUG_ON(index >= ARRAY_SIZE(con->out_kvec));
+
+	con->out_kvec[index].iov_len = size;
+	con->out_kvec[index].iov_base = data;
+	con->out_kvec_left++;
+	con->out_kvec_bytes += size;
+}
+
+#ifdef CONFIG_BLOCK
+
+/*
+ * For a bio data item, a piece is whatever remains of the next
+ * entry in the current bio iovec, or the first entry in the next
+ * bio in the list.
+ */
+static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor,
+					size_t length)
+{
+	struct ceph_msg_data *data = cursor->data;
+	struct bio *bio;
+
+	BUG_ON(data->type != CEPH_MSG_DATA_BIO);
+
+	bio = data->bio;
+	BUG_ON(!bio);
+
+	cursor->resid = min(length, data->bio_length);
+	cursor->bio = bio;
+	cursor->bvec_iter = bio->bi_iter;
+	cursor->last_piece =
+		cursor->resid <= bio_iter_len(bio, cursor->bvec_iter);
+}
+
+static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
+						size_t *page_offset,
+						size_t *length)
+{
+	struct ceph_msg_data *data = cursor->data;
+	struct bio *bio;
+	struct bio_vec bio_vec;
+
+	BUG_ON(data->type != CEPH_MSG_DATA_BIO);
+
+	bio = cursor->bio;
+	BUG_ON(!bio);
+
+	bio_vec = bio_iter_iovec(bio, cursor->bvec_iter);
+
+	*page_offset = (size_t) bio_vec.bv_offset;
+	BUG_ON(*page_offset >= PAGE_SIZE);
+	if (cursor->last_piece) /* pagelist offset is always 0 */
+		*length = cursor->resid;
+	else
+		*length = (size_t) bio_vec.bv_len;
+	BUG_ON(*length > cursor->resid);
+	BUG_ON(*page_offset + *length > PAGE_SIZE);
+
+	return bio_vec.bv_page;
+}
+
+static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
+					size_t bytes)
+{
+	struct bio *bio;
+	struct bio_vec bio_vec;
+
+	BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO);
+
+	bio = cursor->bio;
+	BUG_ON(!bio);
+
+	bio_vec = bio_iter_iovec(bio, cursor->bvec_iter);
+
+	/* Advance the cursor offset */
+
+	BUG_ON(cursor->resid < bytes);
+	cursor->resid -= bytes;
+
+	bio_advance_iter(bio, &cursor->bvec_iter, bytes);
+
+	if (bytes < bio_vec.bv_len)
+		return false;	/* more bytes to process in this segment */
+
+	/* Move on to the next segment, and possibly the next bio */
+
+	if (!cursor->bvec_iter.bi_size) {
+		bio = bio->bi_next;
+		cursor->bio = bio;
+		if (bio)
+			cursor->bvec_iter = bio->bi_iter;
+		else
+			memset(&cursor->bvec_iter, 0,
+			       sizeof(cursor->bvec_iter));
+	}
+
+	if (!cursor->last_piece) {
+		BUG_ON(!cursor->resid);
+		BUG_ON(!bio);
+		/* A short read is OK, so use <= rather than == */
+		if (cursor->resid <= bio_iter_len(bio, cursor->bvec_iter))
+			cursor->last_piece = true;
+	}
+
+	return true;
+}
+#endif /* CONFIG_BLOCK */
+
+/*
+ * For a page array, a piece comes from the first page in the array
+ * that has not already been fully consumed.
+ */
+static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor,
+					size_t length)
+{
+	struct ceph_msg_data *data = cursor->data;
+	int page_count;
+
+	BUG_ON(data->type != CEPH_MSG_DATA_PAGES);
+
+	BUG_ON(!data->pages);
+	BUG_ON(!data->length);
+
+	cursor->resid = min(length, data->length);
+	page_count = calc_pages_for(data->alignment, (u64)data->length);
+	cursor->page_offset = data->alignment & ~PAGE_MASK;
+	cursor->page_index = 0;
+	BUG_ON(page_count > (int)USHRT_MAX);
+	cursor->page_count = (unsigned short)page_count;
+	BUG_ON(length > SIZE_MAX - cursor->page_offset);
+	cursor->last_piece = (size_t)cursor->page_offset + length <= PAGE_SIZE;
+}
+
+static struct page *
+ceph_msg_data_pages_next(struct ceph_msg_data_cursor *cursor,
+					size_t *page_offset, size_t *length)
+{
+	struct ceph_msg_data *data = cursor->data;
+
+	BUG_ON(data->type != CEPH_MSG_DATA_PAGES);
+
+	BUG_ON(cursor->page_index >= cursor->page_count);
+	BUG_ON(cursor->page_offset >= PAGE_SIZE);
+
+	*page_offset = cursor->page_offset;
+	if (cursor->last_piece)
+		*length = cursor->resid;
+	else
+		*length = PAGE_SIZE - *page_offset;
+
+	return data->pages[cursor->page_index];
+}
+
+static bool ceph_msg_data_pages_advance(struct ceph_msg_data_cursor *cursor,
+						size_t bytes)
+{
+	BUG_ON(cursor->data->type != CEPH_MSG_DATA_PAGES);
+
+	BUG_ON(cursor->page_offset + bytes > PAGE_SIZE);
+
+	/* Advance the cursor page offset */
+
+	cursor->resid -= bytes;
+	cursor->page_offset = (cursor->page_offset + bytes) & ~PAGE_MASK;
+	if (!bytes || cursor->page_offset)
+		return false;	/* more bytes to process in the current page */
+
+	if (!cursor->resid)
+		return false;   /* no more data */
+
+	/* Move on to the next page; offset is already at 0 */
+
+	BUG_ON(cursor->page_index >= cursor->page_count);
+	cursor->page_index++;
+	cursor->last_piece = cursor->resid <= PAGE_SIZE;
+
+	return true;
+}
+
+/*
+ * For a pagelist, a piece is whatever remains to be consumed in the
+ * first page in the list, or the front of the next page.
+ */
+static void
+ceph_msg_data_pagelist_cursor_init(struct ceph_msg_data_cursor *cursor,
+					size_t length)
+{
+	struct ceph_msg_data *data = cursor->data;
+	struct ceph_pagelist *pagelist;
+	struct page *page;
+
+	BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
+
+	pagelist = data->pagelist;
+	BUG_ON(!pagelist);
+
+	if (!length)
+		return;		/* pagelist can be assigned but empty */
+
+	BUG_ON(list_empty(&pagelist->head));
+	page = list_first_entry(&pagelist->head, struct page, lru);
+
+	cursor->resid = min(length, pagelist->length);
+	cursor->page = page;
+	cursor->offset = 0;
+	cursor->last_piece = cursor->resid <= PAGE_SIZE;
+}
+
+static struct page *
+ceph_msg_data_pagelist_next(struct ceph_msg_data_cursor *cursor,
+				size_t *page_offset, size_t *length)
+{
+	struct ceph_msg_data *data = cursor->data;
+	struct ceph_pagelist *pagelist;
+
+	BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
+
+	pagelist = data->pagelist;
+	BUG_ON(!pagelist);
+
+	BUG_ON(!cursor->page);
+	BUG_ON(cursor->offset + cursor->resid != pagelist->length);
+
+	/* offset of first page in pagelist is always 0 */
+	*page_offset = cursor->offset & ~PAGE_MASK;
+	if (cursor->last_piece)
+		*length = cursor->resid;
+	else
+		*length = PAGE_SIZE - *page_offset;
+
+	return cursor->page;
+}
+
+static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
+						size_t bytes)
+{
+	struct ceph_msg_data *data = cursor->data;
+	struct ceph_pagelist *pagelist;
+
+	BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
+
+	pagelist = data->pagelist;
+	BUG_ON(!pagelist);
+
+	BUG_ON(cursor->offset + cursor->resid != pagelist->length);
+	BUG_ON((cursor->offset & ~PAGE_MASK) + bytes > PAGE_SIZE);
+
+	/* Advance the cursor offset */
+
+	cursor->resid -= bytes;
+	cursor->offset += bytes;
+	/* offset of first page in pagelist is always 0 */
+	if (!bytes || cursor->offset & ~PAGE_MASK)
+		return false;	/* more bytes to process in the current page */
+
+	if (!cursor->resid)
+		return false;   /* no more data */
+
+	/* Move on to the next page */
+
+	BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head));
+	cursor->page = list_entry_next(cursor->page, lru);
+	cursor->last_piece = cursor->resid <= PAGE_SIZE;
+
+	return true;
+}
+
+/*
+ * Message data is handled (sent or received) in pieces, where each
+ * piece resides on a single page.  The network layer might not
+ * consume an entire piece at once.  A data item's cursor keeps
+ * track of which piece is next to process and how much remains to
+ * be processed in that piece.  It also tracks whether the current
+ * piece is the last one in the data item.
+ */
+static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
+{
+	size_t length = cursor->total_resid;
+
+	switch (cursor->data->type) {
+	case CEPH_MSG_DATA_PAGELIST:
+		ceph_msg_data_pagelist_cursor_init(cursor, length);
+		break;
+	case CEPH_MSG_DATA_PAGES:
+		ceph_msg_data_pages_cursor_init(cursor, length);
+		break;
+#ifdef CONFIG_BLOCK
+	case CEPH_MSG_DATA_BIO:
+		ceph_msg_data_bio_cursor_init(cursor, length);
+		break;
+#endif /* CONFIG_BLOCK */
+	case CEPH_MSG_DATA_NONE:
+	default:
+		/* BUG(); */
+		break;
+	}
+	cursor->need_crc = true;
+}
+
+static void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length)
+{
+	struct ceph_msg_data_cursor *cursor = &msg->cursor;
+	struct ceph_msg_data *data;
+
+	BUG_ON(!length);
+	BUG_ON(length > msg->data_length);
+	BUG_ON(list_empty(&msg->data));
+
+	cursor->data_head = &msg->data;
+	cursor->total_resid = length;
+	data = list_first_entry(&msg->data, struct ceph_msg_data, links);
+	cursor->data = data;
+
+	__ceph_msg_data_cursor_init(cursor);
+}
+
+/*
+ * Return the page containing the next piece to process for a given
+ * data item, and supply the page offset and length of that piece.
+ * Indicate whether this is the last piece in this data item.
+ */
+static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
+					size_t *page_offset, size_t *length,
+					bool *last_piece)
+{
+	struct page *page;
+
+	switch (cursor->data->type) {
+	case CEPH_MSG_DATA_PAGELIST:
+		page = ceph_msg_data_pagelist_next(cursor, page_offset, length);
+		break;
+	case CEPH_MSG_DATA_PAGES:
+		page = ceph_msg_data_pages_next(cursor, page_offset, length);
+		break;
+#ifdef CONFIG_BLOCK
+	case CEPH_MSG_DATA_BIO:
+		page = ceph_msg_data_bio_next(cursor, page_offset, length);
+		break;
+#endif /* CONFIG_BLOCK */
+	case CEPH_MSG_DATA_NONE:
+	default:
+		page = NULL;
+		break;
+	}
+	BUG_ON(!page);
+	BUG_ON(*page_offset + *length > PAGE_SIZE);
+	BUG_ON(!*length);
+	if (last_piece)
+		*last_piece = cursor->last_piece;
+
+	return page;
+}
+
+/*
+ * Returns true if the result moves the cursor on to the next piece
+ * of the data item.
+ */
+static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
+				size_t bytes)
+{
+	bool new_piece;
+
+	BUG_ON(bytes > cursor->resid);
+	switch (cursor->data->type) {
+	case CEPH_MSG_DATA_PAGELIST:
+		new_piece = ceph_msg_data_pagelist_advance(cursor, bytes);
+		break;
+	case CEPH_MSG_DATA_PAGES:
+		new_piece = ceph_msg_data_pages_advance(cursor, bytes);
+		break;
+#ifdef CONFIG_BLOCK
+	case CEPH_MSG_DATA_BIO:
+		new_piece = ceph_msg_data_bio_advance(cursor, bytes);
+		break;
+#endif /* CONFIG_BLOCK */
+	case CEPH_MSG_DATA_NONE:
+	default:
+		BUG();
+		break;
+	}
+	cursor->total_resid -= bytes;
+
+	if (!cursor->resid && cursor->total_resid) {
+		WARN_ON(!cursor->last_piece);
+		BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
+		cursor->data = list_entry_next(cursor->data, links);
+		__ceph_msg_data_cursor_init(cursor);
+		new_piece = true;
+	}
+	cursor->need_crc = new_piece;
+
+	return new_piece;
+}
+
+static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
+{
+	BUG_ON(!msg);
+	BUG_ON(!data_len);
+
+	/* Initialize data cursor */
+
+	ceph_msg_data_cursor_init(msg, (size_t)data_len);
+}
+
+/*
+ * Prepare footer for currently outgoing message, and finish things
+ * off.  Assumes out_kvec* are already valid.. we just add on to the end.
+ */
+static void prepare_write_message_footer(struct ceph_connection *con)
+{
+	struct ceph_msg *m = con->out_msg;
+	int v = con->out_kvec_left;
+
+	m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
+
+	dout("prepare_write_message_footer %p\n", con);
+	con->out_kvec_is_msg = true;
+	con->out_kvec[v].iov_base = &m->footer;
+	con->out_kvec[v].iov_len = sizeof(m->footer);
+	con->out_kvec_bytes += sizeof(m->footer);
+	con->out_kvec_left++;
+	con->out_more = m->more_to_follow;
+	con->out_msg_done = true;
+}
+
+/*
+ * Prepare headers for the next outgoing message.
+ */
+static void prepare_write_message(struct ceph_connection *con)
+{
+	struct ceph_msg *m;
+	u32 crc;
+
+	con_out_kvec_reset(con);
+	con->out_kvec_is_msg = true;
+	con->out_msg_done = false;
+
+	/* Sneak an ack in there first?  If we can get it into the same
+	 * TCP packet that's a good thing. */
+	if (con->in_seq > con->in_seq_acked) {
+		con->in_seq_acked = con->in_seq;
+		con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
+		con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
+		con_out_kvec_add(con, sizeof (con->out_temp_ack),
+			&con->out_temp_ack);
+	}
+
+	BUG_ON(list_empty(&con->out_queue));
+	m = list_first_entry(&con->out_queue, struct ceph_msg, list_head);
+	con->out_msg = m;
+	BUG_ON(m->con != con);
+
+	/* put message on sent list */
+	ceph_msg_get(m);
+	list_move_tail(&m->list_head, &con->out_sent);
+
+	/*
+	 * only assign outgoing seq # if we haven't sent this message
+	 * yet.  if it is requeued, resend with it's original seq.
+	 */
+	if (m->needs_out_seq) {
+		m->hdr.seq = cpu_to_le64(++con->out_seq);
+		m->needs_out_seq = false;
+	}
+	WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len));
+
+	dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
+	     m, con->out_seq, le16_to_cpu(m->hdr.type),
+	     le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len),
+	     m->data_length);
+	BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
+
+	/* tag + hdr + front + middle */
+	con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
+	con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
+	con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
+
+	if (m->middle)
+		con_out_kvec_add(con, m->middle->vec.iov_len,
+			m->middle->vec.iov_base);
+
+	/* fill in crc (except data pages), footer */
+	crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
+	con->out_msg->hdr.crc = cpu_to_le32(crc);
+	con->out_msg->footer.flags = 0;
+
+	crc = crc32c(0, m->front.iov_base, m->front.iov_len);
+	con->out_msg->footer.front_crc = cpu_to_le32(crc);
+	if (m->middle) {
+		crc = crc32c(0, m->middle->vec.iov_base,
+				m->middle->vec.iov_len);
+		con->out_msg->footer.middle_crc = cpu_to_le32(crc);
+	} else
+		con->out_msg->footer.middle_crc = 0;
+	dout("%s front_crc %u middle_crc %u\n", __func__,
+	     le32_to_cpu(con->out_msg->footer.front_crc),
+	     le32_to_cpu(con->out_msg->footer.middle_crc));
+
+	/* is there a data payload? */
+	con->out_msg->footer.data_crc = 0;
+	if (m->data_length) {
+		prepare_message_data(con->out_msg, m->data_length);
+		con->out_more = 1;  /* data + footer will follow */
+	} else {
+		/* no, queue up footer too and be done */
+		prepare_write_message_footer(con);
+	}
+
+	con_flag_set(con, CON_FLAG_WRITE_PENDING);
+}
+
+/*
+ * Prepare an ack.
+ */
+static void prepare_write_ack(struct ceph_connection *con)
+{
+	dout("prepare_write_ack %p %llu -> %llu\n", con,
+	     con->in_seq_acked, con->in_seq);
+	con->in_seq_acked = con->in_seq;
+
+	con_out_kvec_reset(con);
+
+	con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
+
+	con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
+	con_out_kvec_add(con, sizeof (con->out_temp_ack),
+				&con->out_temp_ack);
+
+	con->out_more = 1;  /* more will follow.. eventually.. */
+	con_flag_set(con, CON_FLAG_WRITE_PENDING);
+}
+
+/*
+ * Prepare to share the seq during handshake
+ */
+static void prepare_write_seq(struct ceph_connection *con)
+{
+	dout("prepare_write_seq %p %llu -> %llu\n", con,
+	     con->in_seq_acked, con->in_seq);
+	con->in_seq_acked = con->in_seq;
+
+	con_out_kvec_reset(con);
+
+	con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
+	con_out_kvec_add(con, sizeof (con->out_temp_ack),
+			 &con->out_temp_ack);
+
+	con_flag_set(con, CON_FLAG_WRITE_PENDING);
+}
+
+/*
+ * Prepare to write keepalive byte.
+ */
+static void prepare_write_keepalive(struct ceph_connection *con)
+{
+	dout("prepare_write_keepalive %p\n", con);
+	con_out_kvec_reset(con);
+	con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
+	con_flag_set(con, CON_FLAG_WRITE_PENDING);
+}
+
+/*
+ * Connection negotiation.
+ */
+
+static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection *con,
+						int *auth_proto)
+{
+	struct ceph_auth_handshake *auth;
+
+	if (!con->ops->get_authorizer) {
+		con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
+		con->out_connect.authorizer_len = 0;
+		return NULL;
+	}
+
+	/* Can't hold the mutex while getting authorizer */
+	mutex_unlock(&con->mutex);
+	auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
+	mutex_lock(&con->mutex);
+
+	if (IS_ERR(auth))
+		return auth;
+	if (con->state != CON_STATE_NEGOTIATING)
+		return ERR_PTR(-EAGAIN);
+
+	con->auth_reply_buf = auth->authorizer_reply_buf;
+	con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
+	return auth;
+}
+
+/*
+ * We connected to a peer and are saying hello.
+ */
+static void prepare_write_banner(struct ceph_connection *con)
+{
+	con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
+	con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr),
+					&con->msgr->my_enc_addr);
+
+	con->out_more = 0;
+	con_flag_set(con, CON_FLAG_WRITE_PENDING);
+}
+
+static int prepare_write_connect(struct ceph_connection *con)
+{
+	unsigned int global_seq = get_global_seq(con->msgr, 0);
+	int proto;
+	int auth_proto;
+	struct ceph_auth_handshake *auth;
+
+	switch (con->peer_name.type) {
+	case CEPH_ENTITY_TYPE_MON:
+		proto = CEPH_MONC_PROTOCOL;
+		break;
+	case CEPH_ENTITY_TYPE_OSD:
+		proto = CEPH_OSDC_PROTOCOL;
+		break;
+	case CEPH_ENTITY_TYPE_MDS:
+		proto = CEPH_MDSC_PROTOCOL;
+		break;
+	default:
+		BUG();
+	}
+
+	dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
+	     con->connect_seq, global_seq, proto);
+
+	con->out_connect.features = cpu_to_le64(con->msgr->supported_features);
+	con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
+	con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
+	con->out_connect.global_seq = cpu_to_le32(global_seq);
+	con->out_connect.protocol_version = cpu_to_le32(proto);
+	con->out_connect.flags = 0;
+
+	auth_proto = CEPH_AUTH_UNKNOWN;
+	auth = get_connect_authorizer(con, &auth_proto);
+	if (IS_ERR(auth))
+		return PTR_ERR(auth);
+
+	con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
+	con->out_connect.authorizer_len = auth ?
+		cpu_to_le32(auth->authorizer_buf_len) : 0;
+
+	con_out_kvec_add(con, sizeof (con->out_connect),
+					&con->out_connect);
+	if (auth && auth->authorizer_buf_len)
+		con_out_kvec_add(con, auth->authorizer_buf_len,
+					auth->authorizer_buf);
+
+	con->out_more = 0;
+	con_flag_set(con, CON_FLAG_WRITE_PENDING);
+
+	return 0;
+}
+
+/*
+ * write as much of pending kvecs to the socket as we can.
+ *  1 -> done
+ *  0 -> socket full, but more to do
+ * <0 -> error
+ */
+static int write_partial_kvec(struct ceph_connection *con)
+{
+	int ret;
+
+	dout("write_partial_kvec %p %d left\n", con, con->out_kvec_bytes);
+	while (con->out_kvec_bytes > 0) {
+		ret = ceph_tcp_sendmsg(con->sock, con->out_kvec_cur,
+				       con->out_kvec_left, con->out_kvec_bytes,
+				       con->out_more);
+		if (ret <= 0)
+			goto out;
+		con->out_kvec_bytes -= ret;
+		if (con->out_kvec_bytes == 0)
+			break;            /* done */
+
+		/* account for full iov entries consumed */
+		while (ret >= con->out_kvec_cur->iov_len) {
+			BUG_ON(!con->out_kvec_left);
+			ret -= con->out_kvec_cur->iov_len;
+			con->out_kvec_cur++;
+			con->out_kvec_left--;
+		}
+		/* and for a partially-consumed entry */
+		if (ret) {
+			con->out_kvec_cur->iov_len -= ret;
+			con->out_kvec_cur->iov_base += ret;
+		}
+	}
+	con->out_kvec_left = 0;
+	con->out_kvec_is_msg = false;
+	ret = 1;
+out:
+	dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con,
+	     con->out_kvec_bytes, con->out_kvec_left, ret);
+	return ret;  /* done! */
+}
+
+static u32 ceph_crc32c_page(u32 crc, struct page *page,
+				unsigned int page_offset,
+				unsigned int length)
+{
+	char *kaddr;
+
+	kaddr = kmap(page);
+	BUG_ON(kaddr == NULL);
+	crc = crc32c(crc, kaddr + page_offset, length);
+	kunmap(page);
+
+	return crc;
+}
+/*
+ * Write as much message data payload as we can.  If we finish, queue
+ * up the footer.
+ *  1 -> done, footer is now queued in out_kvec[].
+ *  0 -> socket full, but more to do
+ * <0 -> error
+ */
+static int write_partial_message_data(struct ceph_connection *con)
+{
+	struct ceph_msg *msg = con->out_msg;
+	struct ceph_msg_data_cursor *cursor = &msg->cursor;
+	bool do_datacrc = !con->msgr->nocrc;
+	u32 crc;
+
+	dout("%s %p msg %p\n", __func__, con, msg);
+
+	if (list_empty(&msg->data))
+		return -EINVAL;
+
+	/*
+	 * Iterate through each page that contains data to be
+	 * written, and send as much as possible for each.
+	 *
+	 * If we are calculating the data crc (the default), we will
+	 * need to map the page.  If we have no pages, they have
+	 * been revoked, so use the zero page.
+	 */
+	crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0;
+	while (cursor->resid) {
+		struct page *page;
+		size_t page_offset;
+		size_t length;
+		bool last_piece;
+		bool need_crc;
+		int ret;
+
+		page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
+							&last_piece);
+		ret = ceph_tcp_sendpage(con->sock, page, page_offset,
+				      length, last_piece);
+		if (ret <= 0) {
+			if (do_datacrc)
+				msg->footer.data_crc = cpu_to_le32(crc);
+
+			return ret;
+		}
+		if (do_datacrc && cursor->need_crc)
+			crc = ceph_crc32c_page(crc, page, page_offset, length);
+		need_crc = ceph_msg_data_advance(&msg->cursor, (size_t)ret);
+	}
+
+	dout("%s %p msg %p done\n", __func__, con, msg);
+
+	/* prepare and queue up footer, too */
+	if (do_datacrc)
+		msg->footer.data_crc = cpu_to_le32(crc);
+	else
+		msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
+	con_out_kvec_reset(con);
+	prepare_write_message_footer(con);
+
+	return 1;	/* must return > 0 to indicate success */
+}
+
+/*
+ * write some zeros
+ */
+static int write_partial_skip(struct ceph_connection *con)
+{
+	int ret;
+
+	while (con->out_skip > 0) {
+		size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE);
+
+		ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true);
+		if (ret <= 0)
+			goto out;
+		con->out_skip -= ret;
+	}
+	ret = 1;
+out:
+	return ret;
+}
+
+/*
+ * Prepare to read connection handshake, or an ack.
+ */
+static void prepare_read_banner(struct ceph_connection *con)
+{
+	dout("prepare_read_banner %p\n", con);
+	con->in_base_pos = 0;
+}
+
+static void prepare_read_connect(struct ceph_connection *con)
+{
+	dout("prepare_read_connect %p\n", con);
+	con->in_base_pos = 0;
+}
+
+static void prepare_read_ack(struct ceph_connection *con)
+{
+	dout("prepare_read_ack %p\n", con);
+	con->in_base_pos = 0;
+}
+
+static void prepare_read_seq(struct ceph_connection *con)
+{
+	dout("prepare_read_seq %p\n", con);
+	con->in_base_pos = 0;
+	con->in_tag = CEPH_MSGR_TAG_SEQ;
+}
+
+static void prepare_read_tag(struct ceph_connection *con)
+{
+	dout("prepare_read_tag %p\n", con);
+	con->in_base_pos = 0;
+	con->in_tag = CEPH_MSGR_TAG_READY;
+}
+
+/*
+ * Prepare to read a message.
+ */
+static int prepare_read_message(struct ceph_connection *con)
+{
+	dout("prepare_read_message %p\n", con);
+	BUG_ON(con->in_msg != NULL);
+	con->in_base_pos = 0;
+	con->in_front_crc = con->in_middle_crc = con->in_data_crc = 0;
+	return 0;
+}
+
+
+static int read_partial(struct ceph_connection *con,
+			int end, int size, void *object)
+{
+	while (con->in_base_pos < end) {
+		int left = end - con->in_base_pos;
+		int have = size - left;
+		int ret = ceph_tcp_recvmsg(con->sock, object + have, left);
+		if (ret <= 0)
+			return ret;
+		con->in_base_pos += ret;
+	}
+	return 1;
+}
+
+
+/*
+ * Read all or part of the connect-side handshake on a new connection
+ */
+static int read_partial_banner(struct ceph_connection *con)
+{
+	int size;
+	int end;
+	int ret;
+
+	dout("read_partial_banner %p at %d\n", con, con->in_base_pos);
+
+	/* peer's banner */
+	size = strlen(CEPH_BANNER);
+	end = size;
+	ret = read_partial(con, end, size, con->in_banner);
+	if (ret <= 0)
+		goto out;
+
+	size = sizeof (con->actual_peer_addr);
+	end += size;
+	ret = read_partial(con, end, size, &con->actual_peer_addr);
+	if (ret <= 0)
+		goto out;
+
+	size = sizeof (con->peer_addr_for_me);
+	end += size;
+	ret = read_partial(con, end, size, &con->peer_addr_for_me);
+	if (ret <= 0)
+		goto out;
+
+out:
+	return ret;
+}
+
+static int read_partial_connect(struct ceph_connection *con)
+{
+	int size;
+	int end;
+	int ret;
+
+	dout("read_partial_connect %p at %d\n", con, con->in_base_pos);
+
+	size = sizeof (con->in_reply);
+	end = size;
+	ret = read_partial(con, end, size, &con->in_reply);
+	if (ret <= 0)
+		goto out;
+
+	size = le32_to_cpu(con->in_reply.authorizer_len);
+	end += size;
+	ret = read_partial(con, end, size, con->auth_reply_buf);
+	if (ret <= 0)
+		goto out;
+
+	dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n",
+	     con, (int)con->in_reply.tag,
+	     le32_to_cpu(con->in_reply.connect_seq),
+	     le32_to_cpu(con->in_reply.global_seq));
+out:
+	return ret;
+
+}
+
+/*
+ * Verify the hello banner looks okay.
+ */
+static int verify_hello(struct ceph_connection *con)
+{
+	if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) {
+		pr_err("connect to %s got bad banner\n",
+		       ceph_pr_addr(&con->peer_addr.in_addr));
+		con->error_msg = "protocol error, bad banner";
+		return -1;
+	}
+	return 0;
+}
+
+static bool addr_is_blank(struct sockaddr_storage *ss)
+{
+	switch (ss->ss_family) {
+	case AF_INET:
+		return ((struct sockaddr_in *)ss)->sin_addr.s_addr == 0;
+	case AF_INET6:
+		return
+		     ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 &&
+		     ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 &&
+		     ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 &&
+		     ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0;
+	}
+	return false;
+}
+
+static int addr_port(struct sockaddr_storage *ss)
+{
+	switch (ss->ss_family) {
+	case AF_INET:
+		return ntohs(((struct sockaddr_in *)ss)->sin_port);
+	case AF_INET6:
+		return ntohs(((struct sockaddr_in6 *)ss)->sin6_port);
+	}
+	return 0;
+}
+
+static void addr_set_port(struct sockaddr_storage *ss, int p)
+{
+	switch (ss->ss_family) {
+	case AF_INET:
+		((struct sockaddr_in *)ss)->sin_port = htons(p);
+		break;
+	case AF_INET6:
+		((struct sockaddr_in6 *)ss)->sin6_port = htons(p);
+		break;
+	}
+}
+
+/*
+ * Unlike other *_pton function semantics, zero indicates success.
+ */
+static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss,
+		char delim, const char **ipend)
+{
+	struct sockaddr_in *in4 = (struct sockaddr_in *) ss;
+	struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss;
+
+	memset(ss, 0, sizeof(*ss));
+
+	if (in4_pton(str, len, (u8 *)&in4->sin_addr.s_addr, delim, ipend)) {
+		ss->ss_family = AF_INET;
+		return 0;
+	}
+
+	if (in6_pton(str, len, (u8 *)&in6->sin6_addr.s6_addr, delim, ipend)) {
+		ss->ss_family = AF_INET6;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+/*
+ * Extract hostname string and resolve using kernel DNS facility.
+ */
+#ifdef CONFIG_CEPH_LIB_USE_DNS_RESOLVER
+static int ceph_dns_resolve_name(const char *name, size_t namelen,
+		struct sockaddr_storage *ss, char delim, const char **ipend)
+{
+	const char *end, *delim_p;
+	char *colon_p, *ip_addr = NULL;
+	int ip_len, ret;
+
+	/*
+	 * The end of the hostname occurs immediately preceding the delimiter or
+	 * the port marker (':') where the delimiter takes precedence.
+	 */
+	delim_p = memchr(name, delim, namelen);
+	colon_p = memchr(name, ':', namelen);
+
+	if (delim_p && colon_p)
+		end = delim_p < colon_p ? delim_p : colon_p;
+	else if (!delim_p && colon_p)
+		end = colon_p;
+	else {
+		end = delim_p;
+		if (!end) /* case: hostname:/ */
+			end = name + namelen;
+	}
+
+	if (end <= name)
+		return -EINVAL;
+
+	/* do dns_resolve upcall */
+	ip_len = dns_query(NULL, name, end - name, NULL, &ip_addr, NULL);
+	if (ip_len > 0)
+		ret = ceph_pton(ip_addr, ip_len, ss, -1, NULL);
+	else
+		ret = -ESRCH;
+
+	kfree(ip_addr);
+
+	*ipend = end;
+
+	pr_info("resolve '%.*s' (ret=%d): %s\n", (int)(end - name), name,
+			ret, ret ? "failed" : ceph_pr_addr(ss));
+
+	return ret;
+}
+#else
+static inline int ceph_dns_resolve_name(const char *name, size_t namelen,
+		struct sockaddr_storage *ss, char delim, const char **ipend)
+{
+	return -EINVAL;
+}
+#endif
+
+/*
+ * Parse a server name (IP or hostname). If a valid IP address is not found
+ * then try to extract a hostname to resolve using userspace DNS upcall.
+ */
+static int ceph_parse_server_name(const char *name, size_t namelen,
+			struct sockaddr_storage *ss, char delim, const char **ipend)
+{
+	int ret;
+
+	ret = ceph_pton(name, namelen, ss, delim, ipend);
+	if (ret)
+		ret = ceph_dns_resolve_name(name, namelen, ss, delim, ipend);
+
+	return ret;
+}
+
+/*
+ * Parse an ip[:port] list into an addr array.  Use the default
+ * monitor port if a port isn't specified.
+ */
+int ceph_parse_ips(const char *c, const char *end,
+		   struct ceph_entity_addr *addr,
+		   int max_count, int *count)
+{
+	int i, ret = -EINVAL;
+	const char *p = c;
+
+	dout("parse_ips on '%.*s'\n", (int)(end-c), c);
+	for (i = 0; i < max_count; i++) {
+		const char *ipend;
+		struct sockaddr_storage *ss = &addr[i].in_addr;
+		int port;
+		char delim = ',';
+
+		if (*p == '[') {
+			delim = ']';
+			p++;
+		}
+
+		ret = ceph_parse_server_name(p, end - p, ss, delim, &ipend);
+		if (ret)
+			goto bad;
+		ret = -EINVAL;
+
+		p = ipend;
+
+		if (delim == ']') {
+			if (*p != ']') {
+				dout("missing matching ']'\n");
+				goto bad;
+			}
+			p++;
+		}
+
+		/* port? */
+		if (p < end && *p == ':') {
+			port = 0;
+			p++;
+			while (p < end && *p >= '0' && *p <= '9') {
+				port = (port * 10) + (*p - '0');
+				p++;
+			}
+			if (port == 0)
+				port = CEPH_MON_PORT;
+			else if (port > 65535)
+				goto bad;
+		} else {
+			port = CEPH_MON_PORT;
+		}
+
+		addr_set_port(ss, port);
+
+		dout("parse_ips got %s\n", ceph_pr_addr(ss));
+
+		if (p == end)
+			break;
+		if (*p != ',')
+			goto bad;
+		p++;
+	}
+
+	if (p != end)
+		goto bad;
+
+	if (count)
+		*count = i + 1;
+	return 0;
+
+bad:
+	pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_parse_ips);
+
+static int process_banner(struct ceph_connection *con)
+{
+	dout("process_banner on %p\n", con);
+
+	if (verify_hello(con) < 0)
+		return -1;
+
+	ceph_decode_addr(&con->actual_peer_addr);
+	ceph_decode_addr(&con->peer_addr_for_me);
+
+	/*
+	 * Make sure the other end is who we wanted.  note that the other
+	 * end may not yet know their ip address, so if it's 0.0.0.0, give
+	 * them the benefit of the doubt.
+	 */
+	if (memcmp(&con->peer_addr, &con->actual_peer_addr,
+		   sizeof(con->peer_addr)) != 0 &&
+	    !(addr_is_blank(&con->actual_peer_addr.in_addr) &&
+	      con->actual_peer_addr.nonce == con->peer_addr.nonce)) {
+		pr_warning("wrong peer, want %s/%d, got %s/%d\n",
+			   ceph_pr_addr(&con->peer_addr.in_addr),
+			   (int)le32_to_cpu(con->peer_addr.nonce),
+			   ceph_pr_addr(&con->actual_peer_addr.in_addr),
+			   (int)le32_to_cpu(con->actual_peer_addr.nonce));
+		con->error_msg = "wrong peer at address";
+		return -1;
+	}
+
+	/*
+	 * did we learn our address?
+	 */
+	if (addr_is_blank(&con->msgr->inst.addr.in_addr)) {
+		int port = addr_port(&con->msgr->inst.addr.in_addr);
+
+		memcpy(&con->msgr->inst.addr.in_addr,
+		       &con->peer_addr_for_me.in_addr,
+		       sizeof(con->peer_addr_for_me.in_addr));
+		addr_set_port(&con->msgr->inst.addr.in_addr, port);
+		encode_my_addr(con->msgr);
+		dout("process_banner learned my addr is %s\n",
+		     ceph_pr_addr(&con->msgr->inst.addr.in_addr));
+	}
+
+	return 0;
+}
+
+static int process_connect(struct ceph_connection *con)
+{
+	u64 sup_feat = con->msgr->supported_features;
+	u64 req_feat = con->msgr->required_features;
+	u64 server_feat = ceph_sanitize_features(
+				le64_to_cpu(con->in_reply.features));
+	int ret;
+
+	dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
+
+	switch (con->in_reply.tag) {
+	case CEPH_MSGR_TAG_FEATURES:
+		pr_err("%s%lld %s feature set mismatch,"
+		       " my %llx < server's %llx, missing %llx\n",
+		       ENTITY_NAME(con->peer_name),
+		       ceph_pr_addr(&con->peer_addr.in_addr),
+		       sup_feat, server_feat, server_feat & ~sup_feat);
+		con->error_msg = "missing required protocol features";
+		reset_connection(con);
+		return -1;
+
+	case CEPH_MSGR_TAG_BADPROTOVER:
+		pr_err("%s%lld %s protocol version mismatch,"
+		       " my %d != server's %d\n",
+		       ENTITY_NAME(con->peer_name),
+		       ceph_pr_addr(&con->peer_addr.in_addr),
+		       le32_to_cpu(con->out_connect.protocol_version),
+		       le32_to_cpu(con->in_reply.protocol_version));
+		con->error_msg = "protocol version mismatch";
+		reset_connection(con);
+		return -1;
+
+	case CEPH_MSGR_TAG_BADAUTHORIZER:
+		con->auth_retry++;
+		dout("process_connect %p got BADAUTHORIZER attempt %d\n", con,
+		     con->auth_retry);
+		if (con->auth_retry == 2) {
+			con->error_msg = "connect authorization failure";
+			return -1;
+		}
+		con_out_kvec_reset(con);
+		ret = prepare_write_connect(con);
+		if (ret < 0)
+			return ret;
+		prepare_read_connect(con);
+		break;
+
+	case CEPH_MSGR_TAG_RESETSESSION:
+		/*
+		 * If we connected with a large connect_seq but the peer
+		 * has no record of a session with us (no connection, or
+		 * connect_seq == 0), they will send RESETSESION to indicate
+		 * that they must have reset their session, and may have
+		 * dropped messages.
+		 */
+		dout("process_connect got RESET peer seq %u\n",
+		     le32_to_cpu(con->in_reply.connect_seq));
+		pr_err("%s%lld %s connection reset\n",
+		       ENTITY_NAME(con->peer_name),
+		       ceph_pr_addr(&con->peer_addr.in_addr));
+		reset_connection(con);
+		con_out_kvec_reset(con);
+		ret = prepare_write_connect(con);
+		if (ret < 0)
+			return ret;
+		prepare_read_connect(con);
+
+		/* Tell ceph about it. */
+		mutex_unlock(&con->mutex);
+		pr_info("reset on %s%lld\n", ENTITY_NAME(con->peer_name));
+		if (con->ops->peer_reset)
+			con->ops->peer_reset(con);
+		mutex_lock(&con->mutex);
+		if (con->state != CON_STATE_NEGOTIATING)
+			return -EAGAIN;
+		break;
+
+	case CEPH_MSGR_TAG_RETRY_SESSION:
+		/*
+		 * If we sent a smaller connect_seq than the peer has, try
+		 * again with a larger value.
+		 */
+		dout("process_connect got RETRY_SESSION my seq %u, peer %u\n",
+		     le32_to_cpu(con->out_connect.connect_seq),
+		     le32_to_cpu(con->in_reply.connect_seq));
+		con->connect_seq = le32_to_cpu(con->in_reply.connect_seq);
+		con_out_kvec_reset(con);
+		ret = prepare_write_connect(con);
+		if (ret < 0)
+			return ret;
+		prepare_read_connect(con);
+		break;
+
+	case CEPH_MSGR_TAG_RETRY_GLOBAL:
+		/*
+		 * If we sent a smaller global_seq than the peer has, try
+		 * again with a larger value.
+		 */
+		dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n",
+		     con->peer_global_seq,
+		     le32_to_cpu(con->in_reply.global_seq));
+		get_global_seq(con->msgr,
+			       le32_to_cpu(con->in_reply.global_seq));
+		con_out_kvec_reset(con);
+		ret = prepare_write_connect(con);
+		if (ret < 0)
+			return ret;
+		prepare_read_connect(con);
+		break;
+
+	case CEPH_MSGR_TAG_SEQ:
+	case CEPH_MSGR_TAG_READY:
+		if (req_feat & ~server_feat) {
+			pr_err("%s%lld %s protocol feature mismatch,"
+			       " my required %llx > server's %llx, need %llx\n",
+			       ENTITY_NAME(con->peer_name),
+			       ceph_pr_addr(&con->peer_addr.in_addr),
+			       req_feat, server_feat, req_feat & ~server_feat);
+			con->error_msg = "missing required protocol features";
+			reset_connection(con);
+			return -1;
+		}
+
+		WARN_ON(con->state != CON_STATE_NEGOTIATING);
+		con->state = CON_STATE_OPEN;
+		con->auth_retry = 0;    /* we authenticated; clear flag */
+		con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
+		con->connect_seq++;
+		con->peer_features = server_feat;
+		dout("process_connect got READY gseq %d cseq %d (%d)\n",
+		     con->peer_global_seq,
+		     le32_to_cpu(con->in_reply.connect_seq),
+		     con->connect_seq);
+		WARN_ON(con->connect_seq !=
+			le32_to_cpu(con->in_reply.connect_seq));
+
+		if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
+			con_flag_set(con, CON_FLAG_LOSSYTX);
+
+		con->delay = 0;      /* reset backoff memory */
+
+		if (con->in_reply.tag == CEPH_MSGR_TAG_SEQ) {
+			prepare_write_seq(con);
+			prepare_read_seq(con);
+		} else {
+			prepare_read_tag(con);
+		}
+		break;
+
+	case CEPH_MSGR_TAG_WAIT:
+		/*
+		 * If there is a connection race (we are opening
+		 * connections to each other), one of us may just have
+		 * to WAIT.  This shouldn't happen if we are the
+		 * client.
+		 */
+		pr_err("process_connect got WAIT as client\n");
+		con->error_msg = "protocol error, got WAIT as client";
+		return -1;
+
+	default:
+		pr_err("connect protocol error, will retry\n");
+		con->error_msg = "protocol error, garbage tag during connect";
+		return -1;
+	}
+	return 0;
+}
+
+
+/*
+ * read (part of) an ack
+ */
+static int read_partial_ack(struct ceph_connection *con)
+{
+	int size = sizeof (con->in_temp_ack);
+	int end = size;
+
+	return read_partial(con, end, size, &con->in_temp_ack);
+}
+
+/*
+ * We can finally discard anything that's been acked.
+ */
+static void process_ack(struct ceph_connection *con)
+{
+	struct ceph_msg *m;
+	u64 ack = le64_to_cpu(con->in_temp_ack);
+	u64 seq;
+
+	while (!list_empty(&con->out_sent)) {
+		m = list_first_entry(&con->out_sent, struct ceph_msg,
+				     list_head);
+		seq = le64_to_cpu(m->hdr.seq);
+		if (seq > ack)
+			break;
+		dout("got ack for seq %llu type %d at %p\n", seq,
+		     le16_to_cpu(m->hdr.type), m);
+		m->ack_stamp = jiffies;
+		ceph_msg_remove(m);
+	}
+	prepare_read_tag(con);
+}
+
+
+static int read_partial_message_section(struct ceph_connection *con,
+					struct kvec *section,
+					unsigned int sec_len, u32 *crc)
+{
+	int ret, left;
+
+	BUG_ON(!section);
+
+	while (section->iov_len < sec_len) {
+		BUG_ON(section->iov_base == NULL);
+		left = sec_len - section->iov_len;
+		ret = ceph_tcp_recvmsg(con->sock, (char *)section->iov_base +
+				       section->iov_len, left);
+		if (ret <= 0)
+			return ret;
+		section->iov_len += ret;
+	}
+	if (section->iov_len == sec_len)
+		*crc = crc32c(0, section->iov_base, section->iov_len);
+
+	return 1;
+}
+
+static int read_partial_msg_data(struct ceph_connection *con)
+{
+	struct ceph_msg *msg = con->in_msg;
+	struct ceph_msg_data_cursor *cursor = &msg->cursor;
+	const bool do_datacrc = !con->msgr->nocrc;
+	struct page *page;
+	size_t page_offset;
+	size_t length;
+	u32 crc = 0;
+	int ret;
+
+	BUG_ON(!msg);
+	if (list_empty(&msg->data))
+		return -EIO;
+
+	if (do_datacrc)
+		crc = con->in_data_crc;
+	while (cursor->resid) {
+		page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
+							NULL);
+		ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
+		if (ret <= 0) {
+			if (do_datacrc)
+				con->in_data_crc = crc;
+
+			return ret;
+		}
+
+		if (do_datacrc)
+			crc = ceph_crc32c_page(crc, page, page_offset, ret);
+		(void) ceph_msg_data_advance(&msg->cursor, (size_t)ret);
+	}
+	if (do_datacrc)
+		con->in_data_crc = crc;
+
+	return 1;	/* must return > 0 to indicate success */
+}
+
+/*
+ * read (part of) a message.
+ */
+static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip);
+
+static int read_partial_message(struct ceph_connection *con)
+{
+	struct ceph_msg *m = con->in_msg;
+	int size;
+	int end;
+	int ret;
+	unsigned int front_len, middle_len, data_len;
+	bool do_datacrc = !con->msgr->nocrc;
+	u64 seq;
+	u32 crc;
+
+	dout("read_partial_message con %p msg %p\n", con, m);
+
+	/* header */
+	size = sizeof (con->in_hdr);
+	end = size;
+	ret = read_partial(con, end, size, &con->in_hdr);
+	if (ret <= 0)
+		return ret;
+
+	crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc));
+	if (cpu_to_le32(crc) != con->in_hdr.crc) {
+		pr_err("read_partial_message bad hdr "
+		       " crc %u != expected %u\n",
+		       crc, con->in_hdr.crc);
+		return -EBADMSG;
+	}
+
+	front_len = le32_to_cpu(con->in_hdr.front_len);
+	if (front_len > CEPH_MSG_MAX_FRONT_LEN)
+		return -EIO;
+	middle_len = le32_to_cpu(con->in_hdr.middle_len);
+	if (middle_len > CEPH_MSG_MAX_MIDDLE_LEN)
+		return -EIO;
+	data_len = le32_to_cpu(con->in_hdr.data_len);
+	if (data_len > CEPH_MSG_MAX_DATA_LEN)
+		return -EIO;
+
+	/* verify seq# */
+	seq = le64_to_cpu(con->in_hdr.seq);
+	if ((s64)seq - (s64)con->in_seq < 1) {
+		pr_info("skipping %s%lld %s seq %lld expected %lld\n",
+			ENTITY_NAME(con->peer_name),
+			ceph_pr_addr(&con->peer_addr.in_addr),
+			seq, con->in_seq + 1);
+		con->in_base_pos = -front_len - middle_len - data_len -
+			sizeof(m->footer);
+		con->in_tag = CEPH_MSGR_TAG_READY;
+		return 0;
+	} else if ((s64)seq - (s64)con->in_seq > 1) {
+		pr_err("read_partial_message bad seq %lld expected %lld\n",
+		       seq, con->in_seq + 1);
+		con->error_msg = "bad message sequence # for incoming message";
+		return -EBADMSG;
+	}
+
+	/* allocate message? */
+	if (!con->in_msg) {
+		int skip = 0;
+
+		dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
+		     front_len, data_len);
+		ret = ceph_con_in_msg_alloc(con, &skip);
+		if (ret < 0)
+			return ret;
+
+		BUG_ON(!con->in_msg ^ skip);
+		if (con->in_msg && data_len > con->in_msg->data_length) {
+			pr_warning("%s skipping long message (%u > %zd)\n",
+				__func__, data_len, con->in_msg->data_length);
+			ceph_msg_put(con->in_msg);
+			con->in_msg = NULL;
+			skip = 1;
+		}
+		if (skip) {
+			/* skip this message */
+			dout("alloc_msg said skip message\n");
+			con->in_base_pos = -front_len - middle_len - data_len -
+				sizeof(m->footer);
+			con->in_tag = CEPH_MSGR_TAG_READY;
+			con->in_seq++;
+			return 0;
+		}
+
+		BUG_ON(!con->in_msg);
+		BUG_ON(con->in_msg->con != con);
+		m = con->in_msg;
+		m->front.iov_len = 0;    /* haven't read it yet */
+		if (m->middle)
+			m->middle->vec.iov_len = 0;
+
+		/* prepare for data payload, if any */
+
+		if (data_len)
+			prepare_message_data(con->in_msg, data_len);
+	}
+
+	/* front */
+	ret = read_partial_message_section(con, &m->front, front_len,
+					   &con->in_front_crc);
+	if (ret <= 0)
+		return ret;
+
+	/* middle */
+	if (m->middle) {
+		ret = read_partial_message_section(con, &m->middle->vec,
+						   middle_len,
+						   &con->in_middle_crc);
+		if (ret <= 0)
+			return ret;
+	}
+
+	/* (page) data */
+	if (data_len) {
+		ret = read_partial_msg_data(con);
+		if (ret <= 0)
+			return ret;
+	}
+
+	/* footer */
+	size = sizeof (m->footer);
+	end += size;
+	ret = read_partial(con, end, size, &m->footer);
+	if (ret <= 0)
+		return ret;
+
+	dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n",
+	     m, front_len, m->footer.front_crc, middle_len,
+	     m->footer.middle_crc, data_len, m->footer.data_crc);
+
+	/* crc ok? */
+	if (con->in_front_crc != le32_to_cpu(m->footer.front_crc)) {
+		pr_err("read_partial_message %p front crc %u != exp. %u\n",
+		       m, con->in_front_crc, m->footer.front_crc);
+		return -EBADMSG;
+	}
+	if (con->in_middle_crc != le32_to_cpu(m->footer.middle_crc)) {
+		pr_err("read_partial_message %p middle crc %u != exp %u\n",
+		       m, con->in_middle_crc, m->footer.middle_crc);
+		return -EBADMSG;
+	}
+	if (do_datacrc &&
+	    (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 &&
+	    con->in_data_crc != le32_to_cpu(m->footer.data_crc)) {
+		pr_err("read_partial_message %p data crc %u != exp. %u\n", m,
+		       con->in_data_crc, le32_to_cpu(m->footer.data_crc));
+		return -EBADMSG;
+	}
+
+	return 1; /* done! */
+}
+
+/*
+ * Process message.  This happens in the worker thread.  The callback should
+ * be careful not to do anything that waits on other incoming messages or it
+ * may deadlock.
+ */
+static void process_message(struct ceph_connection *con)
+{
+	struct ceph_msg *msg;
+
+	BUG_ON(con->in_msg->con != con);
+	con->in_msg->con = NULL;
+	msg = con->in_msg;
+	con->in_msg = NULL;
+	con->ops->put(con);
+
+	/* if first message, set peer_name */
+	if (con->peer_name.type == 0)
+		con->peer_name = msg->hdr.src;
+
+	con->in_seq++;
+	mutex_unlock(&con->mutex);
+
+	dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n",
+	     msg, le64_to_cpu(msg->hdr.seq),
+	     ENTITY_NAME(msg->hdr.src),
+	     le16_to_cpu(msg->hdr.type),
+	     ceph_msg_type_name(le16_to_cpu(msg->hdr.type)),
+	     le32_to_cpu(msg->hdr.front_len),
+	     le32_to_cpu(msg->hdr.data_len),
+	     con->in_front_crc, con->in_middle_crc, con->in_data_crc);
+	con->ops->dispatch(con, msg);
+
+	mutex_lock(&con->mutex);
+}
+
+
+/*
+ * Write something to the socket.  Called in a worker thread when the
+ * socket appears to be writeable and we have something ready to send.
+ */
+static int try_write(struct ceph_connection *con)
+{
+	int ret = 1;
+
+	dout("try_write start %p state %lu\n", con, con->state);
+
+more:
+	dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
+
+	/* open the socket first? */
+	if (con->state == CON_STATE_PREOPEN) {
+		BUG_ON(con->sock);
+		con->state = CON_STATE_CONNECTING;
+
+		con_out_kvec_reset(con);
+		prepare_write_banner(con);
+		prepare_read_banner(con);
+
+		BUG_ON(con->in_msg);
+		con->in_tag = CEPH_MSGR_TAG_READY;
+		dout("try_write initiating connect on %p new state %lu\n",
+		     con, con->state);
+		ret = ceph_tcp_connect(con);
+		if (ret < 0) {
+			con->error_msg = "connect error";
+			goto out;
+		}
+	}
+
+more_kvec:
+	/* kvec data queued? */
+	if (con->out_skip) {
+		ret = write_partial_skip(con);
+		if (ret <= 0)
+			goto out;
+	}
+	if (con->out_kvec_left) {
+		ret = write_partial_kvec(con);
+		if (ret <= 0)
+			goto out;
+	}
+
+	/* msg pages? */
+	if (con->out_msg) {
+		if (con->out_msg_done) {
+			ceph_msg_put(con->out_msg);
+			con->out_msg = NULL;   /* we're done with this one */
+			goto do_next;
+		}
+
+		ret = write_partial_message_data(con);
+		if (ret == 1)
+			goto more_kvec;  /* we need to send the footer, too! */
+		if (ret == 0)
+			goto out;
+		if (ret < 0) {
+			dout("try_write write_partial_message_data err %d\n",
+			     ret);
+			goto out;
+		}
+	}
+
+do_next:
+	if (con->state == CON_STATE_OPEN) {
+		/* is anything else pending? */
+		if (!list_empty(&con->out_queue)) {
+			prepare_write_message(con);
+			goto more;
+		}
+		if (con->in_seq > con->in_seq_acked) {
+			prepare_write_ack(con);
+			goto more;
+		}
+		if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) {
+			prepare_write_keepalive(con);
+			goto more;
+		}
+	}
+
+	/* Nothing to do! */
+	con_flag_clear(con, CON_FLAG_WRITE_PENDING);
+	dout("try_write nothing else to write.\n");
+	ret = 0;
+out:
+	dout("try_write done on %p ret %d\n", con, ret);
+	return ret;
+}
+
+
+
+/*
+ * Read what we can from the socket.
+ */
+static int try_read(struct ceph_connection *con)
+{
+	int ret = -1;
+
+more:
+	dout("try_read start on %p state %lu\n", con, con->state);
+	if (con->state != CON_STATE_CONNECTING &&
+	    con->state != CON_STATE_NEGOTIATING &&
+	    con->state != CON_STATE_OPEN)
+		return 0;
+
+	BUG_ON(!con->sock);
+
+	dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag,
+	     con->in_base_pos);
+
+	if (con->state == CON_STATE_CONNECTING) {
+		dout("try_read connecting\n");
+		ret = read_partial_banner(con);
+		if (ret <= 0)
+			goto out;
+		ret = process_banner(con);
+		if (ret < 0)
+			goto out;
+
+		con->state = CON_STATE_NEGOTIATING;
+
+		/*
+		 * Received banner is good, exchange connection info.
+		 * Do not reset out_kvec, as sending our banner raced
+		 * with receiving peer banner after connect completed.
+		 */
+		ret = prepare_write_connect(con);
+		if (ret < 0)
+			goto out;
+		prepare_read_connect(con);
+
+		/* Send connection info before awaiting response */
+		goto out;
+	}
+
+	if (con->state == CON_STATE_NEGOTIATING) {
+		dout("try_read negotiating\n");
+		ret = read_partial_connect(con);
+		if (ret <= 0)
+			goto out;
+		ret = process_connect(con);
+		if (ret < 0)
+			goto out;
+		goto more;
+	}
+
+	WARN_ON(con->state != CON_STATE_OPEN);
+
+	if (con->in_base_pos < 0) {
+		/*
+		 * skipping + discarding content.
+		 *
+		 * FIXME: there must be a better way to do this!
+		 */
+		static char buf[SKIP_BUF_SIZE];
+		int skip = min((int) sizeof (buf), -con->in_base_pos);
+
+		dout("skipping %d / %d bytes\n", skip, -con->in_base_pos);
+		ret = ceph_tcp_recvmsg(con->sock, buf, skip);
+		if (ret <= 0)
+			goto out;
+		con->in_base_pos += ret;
+		if (con->in_base_pos)
+			goto more;
+	}
+	if (con->in_tag == CEPH_MSGR_TAG_READY) {
+		/*
+		 * what's next?
+		 */
+		ret = ceph_tcp_recvmsg(con->sock, &con->in_tag, 1);
+		if (ret <= 0)
+			goto out;
+		dout("try_read got tag %d\n", (int)con->in_tag);
+		switch (con->in_tag) {
+		case CEPH_MSGR_TAG_MSG:
+			prepare_read_message(con);
+			break;
+		case CEPH_MSGR_TAG_ACK:
+			prepare_read_ack(con);
+			break;
+		case CEPH_MSGR_TAG_CLOSE:
+			con_close_socket(con);
+			con->state = CON_STATE_CLOSED;
+			goto out;
+		default:
+			goto bad_tag;
+		}
+	}
+	if (con->in_tag == CEPH_MSGR_TAG_MSG) {
+		ret = read_partial_message(con);
+		if (ret <= 0) {
+			switch (ret) {
+			case -EBADMSG:
+				con->error_msg = "bad crc";
+				ret = -EIO;
+				break;
+			case -EIO:
+				con->error_msg = "io error";
+				break;
+			}
+			goto out;
+		}
+		if (con->in_tag == CEPH_MSGR_TAG_READY)
+			goto more;
+		process_message(con);
+		if (con->state == CON_STATE_OPEN)
+			prepare_read_tag(con);
+		goto more;
+	}
+	if (con->in_tag == CEPH_MSGR_TAG_ACK ||
+	    con->in_tag == CEPH_MSGR_TAG_SEQ) {
+		/*
+		 * the final handshake seq exchange is semantically
+		 * equivalent to an ACK
+		 */
+		ret = read_partial_ack(con);
+		if (ret <= 0)
+			goto out;
+		process_ack(con);
+		goto more;
+	}
+
+out:
+	dout("try_read done on %p ret %d\n", con, ret);
+	return ret;
+
+bad_tag:
+	pr_err("try_read bad con->in_tag = %d\n", (int)con->in_tag);
+	con->error_msg = "protocol error, garbage tag";
+	ret = -1;
+	goto out;
+}
+
+
+/*
+ * Atomically queue work on a connection after the specified delay.
+ * Bump @con reference to avoid races with connection teardown.
+ * Returns 0 if work was queued, or an error code otherwise.
+ */
+static int queue_con_delay(struct ceph_connection *con, unsigned long delay)
+{
+	if (!con->ops->get(con)) {
+		dout("%s %p ref count 0\n", __func__, con);
+
+		return -ENOENT;
+	}
+
+	if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) {
+		dout("%s %p - already queued\n", __func__, con);
+		con->ops->put(con);
+
+		return -EBUSY;
+	}
+
+	dout("%s %p %lu\n", __func__, con, delay);
+
+	return 0;
+}
+
+static void queue_con(struct ceph_connection *con)
+{
+	(void) queue_con_delay(con, 0);
+}
+
+static bool con_sock_closed(struct ceph_connection *con)
+{
+	if (!con_flag_test_and_clear(con, CON_FLAG_SOCK_CLOSED))
+		return false;
+
+#define CASE(x)								\
+	case CON_STATE_ ## x:						\
+		con->error_msg = "socket closed (con state " #x ")";	\
+		break;
+
+	switch (con->state) {
+	CASE(CLOSED);
+	CASE(PREOPEN);
+	CASE(CONNECTING);
+	CASE(NEGOTIATING);
+	CASE(OPEN);
+	CASE(STANDBY);
+	default:
+		pr_warning("%s con %p unrecognized state %lu\n",
+			__func__, con, con->state);
+		con->error_msg = "unrecognized con state";
+		BUG();
+		break;
+	}
+#undef CASE
+
+	return true;
+}
+
+static bool con_backoff(struct ceph_connection *con)
+{
+	int ret;
+
+	if (!con_flag_test_and_clear(con, CON_FLAG_BACKOFF))
+		return false;
+
+	ret = queue_con_delay(con, round_jiffies_relative(con->delay));
+	if (ret) {
+		dout("%s: con %p FAILED to back off %lu\n", __func__,
+			con, con->delay);
+		BUG_ON(ret == -ENOENT);
+		con_flag_set(con, CON_FLAG_BACKOFF);
+	}
+
+	return true;
+}
+
+/* Finish fault handling; con->mutex must *not* be held here */
+
+static void con_fault_finish(struct ceph_connection *con)
+{
+	/*
+	 * in case we faulted due to authentication, invalidate our
+	 * current tickets so that we can get new ones.
+	 */
+	if (con->auth_retry && con->ops->invalidate_authorizer) {
+		dout("calling invalidate_authorizer()\n");
+		con->ops->invalidate_authorizer(con);
+	}
+
+	if (con->ops->fault)
+		con->ops->fault(con);
+}
+
+/*
+ * Do some work on a connection.  Drop a connection ref when we're done.
+ */
+static void con_work(struct work_struct *work)
+{
+	struct ceph_connection *con = container_of(work, struct ceph_connection,
+						   work.work);
+	bool fault;
+
+	mutex_lock(&con->mutex);
+	while (true) {
+		int ret;
+
+		if ((fault = con_sock_closed(con))) {
+			dout("%s: con %p SOCK_CLOSED\n", __func__, con);
+			break;
+		}
+		if (con_backoff(con)) {
+			dout("%s: con %p BACKOFF\n", __func__, con);
+			break;
+		}
+		if (con->state == CON_STATE_STANDBY) {
+			dout("%s: con %p STANDBY\n", __func__, con);
+			break;
+		}
+		if (con->state == CON_STATE_CLOSED) {
+			dout("%s: con %p CLOSED\n", __func__, con);
+			BUG_ON(con->sock);
+			break;
+		}
+		if (con->state == CON_STATE_PREOPEN) {
+			dout("%s: con %p PREOPEN\n", __func__, con);
+			BUG_ON(con->sock);
+		}
+
+		ret = try_read(con);
+		if (ret < 0) {
+			if (ret == -EAGAIN)
+				continue;
+			con->error_msg = "socket error on read";
+			fault = true;
+			break;
+		}
+
+		ret = try_write(con);
+		if (ret < 0) {
+			if (ret == -EAGAIN)
+				continue;
+			con->error_msg = "socket error on write";
+			fault = true;
+		}
+
+		break;	/* If we make it to here, we're done */
+	}
+	if (fault)
+		con_fault(con);
+	mutex_unlock(&con->mutex);
+
+	if (fault)
+		con_fault_finish(con);
+
+	con->ops->put(con);
+}
+
+/*
+ * Generic error/fault handler.  A retry mechanism is used with
+ * exponential backoff
+ */
+static void con_fault(struct ceph_connection *con)
+{
+	pr_warning("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
+	       ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
+	dout("fault %p state %lu to peer %s\n",
+	     con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
+
+	WARN_ON(con->state != CON_STATE_CONNECTING &&
+	       con->state != CON_STATE_NEGOTIATING &&
+	       con->state != CON_STATE_OPEN);
+
+	con_close_socket(con);
+
+	if (con_flag_test(con, CON_FLAG_LOSSYTX)) {
+		dout("fault on LOSSYTX channel, marking CLOSED\n");
+		con->state = CON_STATE_CLOSED;
+		return;
+	}
+
+	if (con->in_msg) {
+		BUG_ON(con->in_msg->con != con);
+		con->in_msg->con = NULL;
+		ceph_msg_put(con->in_msg);
+		con->in_msg = NULL;
+		con->ops->put(con);
+	}
+
+	/* Requeue anything that hasn't been acked */
+	list_splice_init(&con->out_sent, &con->out_queue);
+
+	/* If there are no messages queued or keepalive pending, place
+	 * the connection in a STANDBY state */
+	if (list_empty(&con->out_queue) &&
+	    !con_flag_test(con, CON_FLAG_KEEPALIVE_PENDING)) {
+		dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
+		con_flag_clear(con, CON_FLAG_WRITE_PENDING);
+		con->state = CON_STATE_STANDBY;
+	} else {
+		/* retry after a delay. */
+		con->state = CON_STATE_PREOPEN;
+		if (con->delay == 0)
+			con->delay = BASE_DELAY_INTERVAL;
+		else if (con->delay < MAX_DELAY_INTERVAL)
+			con->delay *= 2;
+		con_flag_set(con, CON_FLAG_BACKOFF);
+		queue_con(con);
+	}
+}
+
+
+
+/*
+ * initialize a new messenger instance
+ */
+void ceph_messenger_init(struct ceph_messenger *msgr,
+			struct ceph_entity_addr *myaddr,
+			u64 supported_features,
+			u64 required_features,
+			bool nocrc)
+{
+	msgr->supported_features = supported_features;
+	msgr->required_features = required_features;
+
+	spin_lock_init(&msgr->global_seq_lock);
+
+	if (myaddr)
+		msgr->inst.addr = *myaddr;
+
+	/* select a random nonce */
+	msgr->inst.addr.type = 0;
+	get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
+	encode_my_addr(msgr);
+	msgr->nocrc = nocrc;
+
+	atomic_set(&msgr->stopping, 0);
+
+	dout("%s %p\n", __func__, msgr);
+}
+EXPORT_SYMBOL(ceph_messenger_init);
+
+static void clear_standby(struct ceph_connection *con)
+{
+	/* come back from STANDBY? */
+	if (con->state == CON_STATE_STANDBY) {
+		dout("clear_standby %p and ++connect_seq\n", con);
+		con->state = CON_STATE_PREOPEN;
+		con->connect_seq++;
+		WARN_ON(con_flag_test(con, CON_FLAG_WRITE_PENDING));
+		WARN_ON(con_flag_test(con, CON_FLAG_KEEPALIVE_PENDING));
+	}
+}
+
+/*
+ * Queue up an outgoing message on the given connection.
+ */
+void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
+{
+	/* set src+dst */
+	msg->hdr.src = con->msgr->inst.name;
+	BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
+	msg->needs_out_seq = true;
+
+	mutex_lock(&con->mutex);
+
+	if (con->state == CON_STATE_CLOSED) {
+		dout("con_send %p closed, dropping %p\n", con, msg);
+		ceph_msg_put(msg);
+		mutex_unlock(&con->mutex);
+		return;
+	}
+
+	BUG_ON(msg->con != NULL);
+	msg->con = con->ops->get(con);
+	BUG_ON(msg->con == NULL);
+
+	BUG_ON(!list_empty(&msg->list_head));
+	list_add_tail(&msg->list_head, &con->out_queue);
+	dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg,
+	     ENTITY_NAME(con->peer_name), le16_to_cpu(msg->hdr.type),
+	     ceph_msg_type_name(le16_to_cpu(msg->hdr.type)),
+	     le32_to_cpu(msg->hdr.front_len),
+	     le32_to_cpu(msg->hdr.middle_len),
+	     le32_to_cpu(msg->hdr.data_len));
+
+	clear_standby(con);
+	mutex_unlock(&con->mutex);
+
+	/* if there wasn't anything waiting to send before, queue
+	 * new work */
+	if (con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0)
+		queue_con(con);
+}
+EXPORT_SYMBOL(ceph_con_send);
+
+/*
+ * Revoke a message that was previously queued for send
+ */
+void ceph_msg_revoke(struct ceph_msg *msg)
+{
+	struct ceph_connection *con = msg->con;
+
+	if (!con)
+		return;		/* Message not in our possession */
+
+	mutex_lock(&con->mutex);
+	if (!list_empty(&msg->list_head)) {
+		dout("%s %p msg %p - was on queue\n", __func__, con, msg);
+		list_del_init(&msg->list_head);
+		BUG_ON(msg->con == NULL);
+		msg->con->ops->put(msg->con);
+		msg->con = NULL;
+		msg->hdr.seq = 0;
+
+		ceph_msg_put(msg);
+	}
+	if (con->out_msg == msg) {
+		dout("%s %p msg %p - was sending\n", __func__, con, msg);
+		con->out_msg = NULL;
+		if (con->out_kvec_is_msg) {
+			con->out_skip = con->out_kvec_bytes;
+			con->out_kvec_is_msg = false;
+		}
+		msg->hdr.seq = 0;
+
+		ceph_msg_put(msg);
+	}
+	mutex_unlock(&con->mutex);
+}
+
+/*
+ * Revoke a message that we may be reading data into
+ */
+void ceph_msg_revoke_incoming(struct ceph_msg *msg)
+{
+	struct ceph_connection *con;
+
+	BUG_ON(msg == NULL);
+	if (!msg->con) {
+		dout("%s msg %p null con\n", __func__, msg);
+
+		return;		/* Message not in our possession */
+	}
+
+	con = msg->con;
+	mutex_lock(&con->mutex);
+	if (con->in_msg == msg) {
+		unsigned int front_len = le32_to_cpu(con->in_hdr.front_len);
+		unsigned int middle_len = le32_to_cpu(con->in_hdr.middle_len);
+		unsigned int data_len = le32_to_cpu(con->in_hdr.data_len);
+
+		/* skip rest of message */
+		dout("%s %p msg %p revoked\n", __func__, con, msg);
+		con->in_base_pos = con->in_base_pos -
+				sizeof(struct ceph_msg_header) -
+				front_len -
+				middle_len -
+				data_len -
+				sizeof(struct ceph_msg_footer);
+		ceph_msg_put(con->in_msg);
+		con->in_msg = NULL;
+		con->in_tag = CEPH_MSGR_TAG_READY;
+		con->in_seq++;
+	} else {
+		dout("%s %p in_msg %p msg %p no-op\n",
+		     __func__, con, con->in_msg, msg);
+	}
+	mutex_unlock(&con->mutex);
+}
+
+/*
+ * Queue a keepalive byte to ensure the tcp connection is alive.
+ */
+void ceph_con_keepalive(struct ceph_connection *con)
+{
+	dout("con_keepalive %p\n", con);
+	mutex_lock(&con->mutex);
+	clear_standby(con);
+	mutex_unlock(&con->mutex);
+	if (con_flag_test_and_set(con, CON_FLAG_KEEPALIVE_PENDING) == 0 &&
+	    con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0)
+		queue_con(con);
+}
+EXPORT_SYMBOL(ceph_con_keepalive);
+
+static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type)
+{
+	struct ceph_msg_data *data;
+
+	if (WARN_ON(!ceph_msg_data_type_valid(type)))
+		return NULL;
+
+	data = kmem_cache_zalloc(ceph_msg_data_cache, GFP_NOFS);
+	if (data)
+		data->type = type;
+	INIT_LIST_HEAD(&data->links);
+
+	return data;
+}
+
+static void ceph_msg_data_destroy(struct ceph_msg_data *data)
+{
+	if (!data)
+		return;
+
+	WARN_ON(!list_empty(&data->links));
+	if (data->type == CEPH_MSG_DATA_PAGELIST) {
+		ceph_pagelist_release(data->pagelist);
+		kfree(data->pagelist);
+	}
+	kmem_cache_free(ceph_msg_data_cache, data);
+}
+
+void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
+		size_t length, size_t alignment)
+{
+	struct ceph_msg_data *data;
+
+	BUG_ON(!pages);
+	BUG_ON(!length);
+
+	data = ceph_msg_data_create(CEPH_MSG_DATA_PAGES);
+	BUG_ON(!data);
+	data->pages = pages;
+	data->length = length;
+	data->alignment = alignment & ~PAGE_MASK;
+
+	list_add_tail(&data->links, &msg->data);
+	msg->data_length += length;
+}
+EXPORT_SYMBOL(ceph_msg_data_add_pages);
+
+void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
+				struct ceph_pagelist *pagelist)
+{
+	struct ceph_msg_data *data;
+
+	BUG_ON(!pagelist);
+	BUG_ON(!pagelist->length);
+
+	data = ceph_msg_data_create(CEPH_MSG_DATA_PAGELIST);
+	BUG_ON(!data);
+	data->pagelist = pagelist;
+
+	list_add_tail(&data->links, &msg->data);
+	msg->data_length += pagelist->length;
+}
+EXPORT_SYMBOL(ceph_msg_data_add_pagelist);
+
+#ifdef	CONFIG_BLOCK
+void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
+		size_t length)
+{
+	struct ceph_msg_data *data;
+
+	BUG_ON(!bio);
+
+	data = ceph_msg_data_create(CEPH_MSG_DATA_BIO);
+	BUG_ON(!data);
+	data->bio = bio;
+	data->bio_length = length;
+
+	list_add_tail(&data->links, &msg->data);
+	msg->data_length += length;
+}
+EXPORT_SYMBOL(ceph_msg_data_add_bio);
+#endif	/* CONFIG_BLOCK */
+
+/*
+ * construct a new message with given type, size
+ * the new msg has a ref count of 1.
+ */
+struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
+			      bool can_fail)
+{
+	struct ceph_msg *m;
+
+	m = kmem_cache_zalloc(ceph_msg_cache, flags);
+	if (m == NULL)
+		goto out;
+
+	m->hdr.type = cpu_to_le16(type);
+	m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
+	m->hdr.front_len = cpu_to_le32(front_len);
+
+	INIT_LIST_HEAD(&m->list_head);
+	kref_init(&m->kref);
+	INIT_LIST_HEAD(&m->data);
+
+	/* front */
+	if (front_len) {
+		m->front.iov_base = ceph_kvmalloc(front_len, flags);
+		if (m->front.iov_base == NULL) {
+			dout("ceph_msg_new can't allocate %d bytes\n",
+			     front_len);
+			goto out2;
+		}
+	} else {
+		m->front.iov_base = NULL;
+	}
+	m->front_alloc_len = m->front.iov_len = front_len;
+
+	dout("ceph_msg_new %p front %d\n", m, front_len);
+	return m;
+
+out2:
+	ceph_msg_put(m);
+out:
+	if (!can_fail) {
+		pr_err("msg_new can't create type %d front %d\n", type,
+		       front_len);
+		WARN_ON(1);
+	} else {
+		dout("msg_new can't create type %d front %d\n", type,
+		     front_len);
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(ceph_msg_new);
+
+/*
+ * Allocate "middle" portion of a message, if it is needed and wasn't
+ * allocated by alloc_msg.  This allows us to read a small fixed-size
+ * per-type header in the front and then gracefully fail (i.e.,
+ * propagate the error to the caller based on info in the front) when
+ * the middle is too large.
+ */
+static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg)
+{
+	int type = le16_to_cpu(msg->hdr.type);
+	int middle_len = le32_to_cpu(msg->hdr.middle_len);
+
+	dout("alloc_middle %p type %d %s middle_len %d\n", msg, type,
+	     ceph_msg_type_name(type), middle_len);
+	BUG_ON(!middle_len);
+	BUG_ON(msg->middle);
+
+	msg->middle = ceph_buffer_new(middle_len, GFP_NOFS);
+	if (!msg->middle)
+		return -ENOMEM;
+	return 0;
+}
+
+/*
+ * Allocate a message for receiving an incoming message on a
+ * connection, and save the result in con->in_msg.  Uses the
+ * connection's private alloc_msg op if available.
+ *
+ * Returns 0 on success, or a negative error code.
+ *
+ * On success, if we set *skip = 1:
+ *  - the next message should be skipped and ignored.
+ *  - con->in_msg == NULL
+ * or if we set *skip = 0:
+ *  - con->in_msg is non-null.
+ * On error (ENOMEM, EAGAIN, ...),
+ *  - con->in_msg == NULL
+ */
+static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
+{
+	struct ceph_msg_header *hdr = &con->in_hdr;
+	int middle_len = le32_to_cpu(hdr->middle_len);
+	struct ceph_msg *msg;
+	int ret = 0;
+
+	BUG_ON(con->in_msg != NULL);
+	BUG_ON(!con->ops->alloc_msg);
+
+	mutex_unlock(&con->mutex);
+	msg = con->ops->alloc_msg(con, hdr, skip);
+	mutex_lock(&con->mutex);
+	if (con->state != CON_STATE_OPEN) {
+		if (msg)
+			ceph_msg_put(msg);
+		return -EAGAIN;
+	}
+	if (msg) {
+		BUG_ON(*skip);
+		con->in_msg = msg;
+		con->in_msg->con = con->ops->get(con);
+		BUG_ON(con->in_msg->con == NULL);
+	} else {
+		/*
+		 * Null message pointer means either we should skip
+		 * this message or we couldn't allocate memory.  The
+		 * former is not an error.
+		 */
+		if (*skip)
+			return 0;
+		con->error_msg = "error allocating memory for incoming message";
+
+		return -ENOMEM;
+	}
+	memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
+
+	if (middle_len && !con->in_msg->middle) {
+		ret = ceph_alloc_middle(con, con->in_msg);
+		if (ret < 0) {
+			ceph_msg_put(con->in_msg);
+			con->in_msg = NULL;
+		}
+	}
+
+	return ret;
+}
+
+
+/*
+ * Free a generically kmalloc'd message.
+ */
+void ceph_msg_kfree(struct ceph_msg *m)
+{
+	dout("msg_kfree %p\n", m);
+	ceph_kvfree(m->front.iov_base);
+	kmem_cache_free(ceph_msg_cache, m);
+}
+
+/*
+ * Drop a msg ref.  Destroy as needed.
+ */
+void ceph_msg_last_put(struct kref *kref)
+{
+	struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
+	LIST_HEAD(data);
+	struct list_head *links;
+	struct list_head *next;
+
+	dout("ceph_msg_put last one on %p\n", m);
+	WARN_ON(!list_empty(&m->list_head));
+
+	/* drop middle, data, if any */
+	if (m->middle) {
+		ceph_buffer_put(m->middle);
+		m->middle = NULL;
+	}
+
+	list_splice_init(&m->data, &data);
+	list_for_each_safe(links, next, &data) {
+		struct ceph_msg_data *data;
+
+		data = list_entry(links, struct ceph_msg_data, links);
+		list_del_init(links);
+		ceph_msg_data_destroy(data);
+	}
+	m->data_length = 0;
+
+	if (m->pool)
+		ceph_msgpool_put(m->pool, m);
+	else
+		ceph_msg_kfree(m);
+}
+EXPORT_SYMBOL(ceph_msg_last_put);
+
+void ceph_msg_dump(struct ceph_msg *msg)
+{
+	pr_debug("msg_dump %p (front_alloc_len %d length %zd)\n", msg,
+		 msg->front_alloc_len, msg->data_length);
+	print_hex_dump(KERN_DEBUG, "header: ",
+		       DUMP_PREFIX_OFFSET, 16, 1,
+		       &msg->hdr, sizeof(msg->hdr), true);
+	print_hex_dump(KERN_DEBUG, " front: ",
+		       DUMP_PREFIX_OFFSET, 16, 1,
+		       msg->front.iov_base, msg->front.iov_len, true);
+	if (msg->middle)
+		print_hex_dump(KERN_DEBUG, "middle: ",
+			       DUMP_PREFIX_OFFSET, 16, 1,
+			       msg->middle->vec.iov_base,
+			       msg->middle->vec.iov_len, true);
+	print_hex_dump(KERN_DEBUG, "footer: ",
+		       DUMP_PREFIX_OFFSET, 16, 1,
+		       &msg->footer, sizeof(msg->footer), true);
+}
+EXPORT_SYMBOL(ceph_msg_dump);
diff --git a/libceph/mon_client.c b/libceph/mon_client.c
new file mode 100644
index 0000000..2ac9ef3
--- /dev/null
+++ b/libceph/mon_client.c
@@ -0,0 +1,1102 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/sched.h>
+
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/debugfs.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/auth.h>
+
+/*
+ * Interact with Ceph monitor cluster.  Handle requests for new map
+ * versions, and periodically resend as needed.  Also implement
+ * statfs() and umount().
+ *
+ * A small cluster of Ceph "monitors" are responsible for managing critical
+ * cluster configuration and state information.  An odd number (e.g., 3, 5)
+ * of cmon daemons use a modified version of the Paxos part-time parliament
+ * algorithm to manage the MDS map (mds cluster membership), OSD map, and
+ * list of clients who have mounted the file system.
+ *
+ * We maintain an open, active session with a monitor at all times in order to
+ * receive timely MDSMap updates.  We periodically send a keepalive byte on the
+ * TCP socket to ensure we detect a failure.  If the connection does break, we
+ * randomly hunt for a new monitor.  Once the connection is reestablished, we
+ * resend any outstanding requests.
+ */
+
+static const struct ceph_connection_operations mon_con_ops;
+
+static int __validate_auth(struct ceph_mon_client *monc);
+
+/*
+ * Decode a monmap blob (e.g., during mount).
+ */
+struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
+{
+	struct ceph_monmap *m = NULL;
+	int i, err = -EINVAL;
+	struct ceph_fsid fsid;
+	u32 epoch, num_mon;
+	u16 version;
+	u32 len;
+
+	ceph_decode_32_safe(&p, end, len, bad);
+	ceph_decode_need(&p, end, len, bad);
+
+	dout("monmap_decode %p %p len %d\n", p, end, (int)(end-p));
+
+	ceph_decode_16_safe(&p, end, version, bad);
+
+	ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad);
+	ceph_decode_copy(&p, &fsid, sizeof(fsid));
+	epoch = ceph_decode_32(&p);
+
+	num_mon = ceph_decode_32(&p);
+	ceph_decode_need(&p, end, num_mon*sizeof(m->mon_inst[0]), bad);
+
+	if (num_mon >= CEPH_MAX_MON)
+		goto bad;
+	m = kmalloc(sizeof(*m) + sizeof(m->mon_inst[0])*num_mon, GFP_NOFS);
+	if (m == NULL)
+		return ERR_PTR(-ENOMEM);
+	m->fsid = fsid;
+	m->epoch = epoch;
+	m->num_mon = num_mon;
+	ceph_decode_copy(&p, m->mon_inst, num_mon*sizeof(m->mon_inst[0]));
+	for (i = 0; i < num_mon; i++)
+		ceph_decode_addr(&m->mon_inst[i].addr);
+
+	dout("monmap_decode epoch %d, num_mon %d\n", m->epoch,
+	     m->num_mon);
+	for (i = 0; i < m->num_mon; i++)
+		dout("monmap_decode  mon%d is %s\n", i,
+		     ceph_pr_addr(&m->mon_inst[i].addr.in_addr));
+	return m;
+
+bad:
+	dout("monmap_decode failed with %d\n", err);
+	kfree(m);
+	return ERR_PTR(err);
+}
+
+/*
+ * return true if *addr is included in the monmap.
+ */
+int ceph_monmap_contains(struct ceph_monmap *m, struct ceph_entity_addr *addr)
+{
+	int i;
+
+	for (i = 0; i < m->num_mon; i++)
+		if (memcmp(addr, &m->mon_inst[i].addr, sizeof(*addr)) == 0)
+			return 1;
+	return 0;
+}
+
+/*
+ * Send an auth request.
+ */
+static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len)
+{
+	monc->pending_auth = 1;
+	monc->m_auth->front.iov_len = len;
+	monc->m_auth->hdr.front_len = cpu_to_le32(len);
+	ceph_msg_revoke(monc->m_auth);
+	ceph_msg_get(monc->m_auth);  /* keep our ref */
+	ceph_con_send(&monc->con, monc->m_auth);
+}
+
+/*
+ * Close monitor session, if any.
+ */
+static void __close_session(struct ceph_mon_client *monc)
+{
+	dout("__close_session closing mon%d\n", monc->cur_mon);
+	ceph_msg_revoke(monc->m_auth);
+	ceph_msg_revoke_incoming(monc->m_auth_reply);
+	ceph_msg_revoke(monc->m_subscribe);
+	ceph_msg_revoke_incoming(monc->m_subscribe_ack);
+	ceph_con_close(&monc->con);
+	monc->cur_mon = -1;
+	monc->pending_auth = 0;
+	ceph_auth_reset(monc->auth);
+}
+
+/*
+ * Open a session with a (new) monitor.
+ */
+static int __open_session(struct ceph_mon_client *monc)
+{
+	char r;
+	int ret;
+
+	if (monc->cur_mon < 0) {
+		get_random_bytes(&r, 1);
+		monc->cur_mon = r % monc->monmap->num_mon;
+		dout("open_session num=%d r=%d -> mon%d\n",
+		     monc->monmap->num_mon, r, monc->cur_mon);
+		monc->sub_sent = 0;
+		monc->sub_renew_after = jiffies;  /* i.e., expired */
+		monc->want_next_osdmap = !!monc->want_next_osdmap;
+
+		dout("open_session mon%d opening\n", monc->cur_mon);
+		ceph_con_open(&monc->con,
+			      CEPH_ENTITY_TYPE_MON, monc->cur_mon,
+			      &monc->monmap->mon_inst[monc->cur_mon].addr);
+
+		/* initiatiate authentication handshake */
+		ret = ceph_auth_build_hello(monc->auth,
+					    monc->m_auth->front.iov_base,
+					    monc->m_auth->front_alloc_len);
+		__send_prepared_auth_request(monc, ret);
+	} else {
+		dout("open_session mon%d already open\n", monc->cur_mon);
+	}
+	return 0;
+}
+
+static bool __sub_expired(struct ceph_mon_client *monc)
+{
+	return time_after_eq(jiffies, monc->sub_renew_after);
+}
+
+/*
+ * Reschedule delayed work timer.
+ */
+static void __schedule_delayed(struct ceph_mon_client *monc)
+{
+	unsigned int delay;
+
+	if (monc->cur_mon < 0 || __sub_expired(monc))
+		delay = 10 * HZ;
+	else
+		delay = 20 * HZ;
+	dout("__schedule_delayed after %u\n", delay);
+	schedule_delayed_work(&monc->delayed_work, delay);
+}
+
+/*
+ * Send subscribe request for mdsmap and/or osdmap.
+ */
+static void __send_subscribe(struct ceph_mon_client *monc)
+{
+	dout("__send_subscribe sub_sent=%u exp=%u want_osd=%d\n",
+	     (unsigned int)monc->sub_sent, __sub_expired(monc),
+	     monc->want_next_osdmap);
+	if ((__sub_expired(monc) && !monc->sub_sent) ||
+	    monc->want_next_osdmap == 1) {
+		struct ceph_msg *msg = monc->m_subscribe;
+		struct ceph_mon_subscribe_item *i;
+		void *p, *end;
+		int num;
+
+		p = msg->front.iov_base;
+		end = p + msg->front_alloc_len;
+
+		num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap;
+		ceph_encode_32(&p, num);
+
+		if (monc->want_next_osdmap) {
+			dout("__send_subscribe to 'osdmap' %u\n",
+			     (unsigned int)monc->have_osdmap);
+			ceph_encode_string(&p, end, "osdmap", 6);
+			i = p;
+			i->have = cpu_to_le64(monc->have_osdmap);
+			i->onetime = 1;
+			p += sizeof(*i);
+			monc->want_next_osdmap = 2;  /* requested */
+		}
+		if (monc->want_mdsmap) {
+			dout("__send_subscribe to 'mdsmap' %u+\n",
+			     (unsigned int)monc->have_mdsmap);
+			ceph_encode_string(&p, end, "mdsmap", 6);
+			i = p;
+			i->have = cpu_to_le64(monc->have_mdsmap);
+			i->onetime = 0;
+			p += sizeof(*i);
+		}
+		ceph_encode_string(&p, end, "monmap", 6);
+		i = p;
+		i->have = 0;
+		i->onetime = 0;
+		p += sizeof(*i);
+
+		msg->front.iov_len = p - msg->front.iov_base;
+		msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+		ceph_msg_revoke(msg);
+		ceph_con_send(&monc->con, ceph_msg_get(msg));
+
+		monc->sub_sent = jiffies | 1;  /* never 0 */
+	}
+}
+
+static void handle_subscribe_ack(struct ceph_mon_client *monc,
+				 struct ceph_msg *msg)
+{
+	unsigned int seconds;
+	struct ceph_mon_subscribe_ack *h = msg->front.iov_base;
+
+	if (msg->front.iov_len < sizeof(*h))
+		goto bad;
+	seconds = le32_to_cpu(h->duration);
+
+	mutex_lock(&monc->mutex);
+	if (monc->hunting) {
+		pr_info("mon%d %s session established\n",
+			monc->cur_mon,
+			ceph_pr_addr(&monc->con.peer_addr.in_addr));
+		monc->hunting = false;
+	}
+	dout("handle_subscribe_ack after %d seconds\n", seconds);
+	monc->sub_renew_after = monc->sub_sent + (seconds >> 1)*HZ - 1;
+	monc->sub_sent = 0;
+	mutex_unlock(&monc->mutex);
+	return;
+bad:
+	pr_err("got corrupt subscribe-ack msg\n");
+	ceph_msg_dump(msg);
+}
+
+/*
+ * Keep track of which maps we have
+ */
+int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got)
+{
+	mutex_lock(&monc->mutex);
+	monc->have_mdsmap = got;
+	mutex_unlock(&monc->mutex);
+	return 0;
+}
+EXPORT_SYMBOL(ceph_monc_got_mdsmap);
+
+int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got)
+{
+	mutex_lock(&monc->mutex);
+	monc->have_osdmap = got;
+	monc->want_next_osdmap = 0;
+	mutex_unlock(&monc->mutex);
+	return 0;
+}
+
+/*
+ * Register interest in the next osdmap
+ */
+void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
+{
+	dout("request_next_osdmap have %u\n", monc->have_osdmap);
+	mutex_lock(&monc->mutex);
+	if (!monc->want_next_osdmap)
+		monc->want_next_osdmap = 1;
+	if (monc->want_next_osdmap < 2)
+		__send_subscribe(monc);
+	mutex_unlock(&monc->mutex);
+}
+
+/*
+ *
+ */
+int ceph_monc_open_session(struct ceph_mon_client *monc)
+{
+	mutex_lock(&monc->mutex);
+	__open_session(monc);
+	__schedule_delayed(monc);
+	mutex_unlock(&monc->mutex);
+	return 0;
+}
+EXPORT_SYMBOL(ceph_monc_open_session);
+
+/*
+ * We require the fsid and global_id in order to initialize our
+ * debugfs dir.
+ */
+static bool have_debugfs_info(struct ceph_mon_client *monc)
+{
+	dout("have_debugfs_info fsid %d globalid %lld\n",
+	     (int)monc->client->have_fsid, monc->auth->global_id);
+	return monc->client->have_fsid && monc->auth->global_id > 0;
+}
+
+/*
+ * The monitor responds with mount ack indicate mount success.  The
+ * included client ticket allows the client to talk to MDSs and OSDs.
+ */
+static void ceph_monc_handle_map(struct ceph_mon_client *monc,
+				 struct ceph_msg *msg)
+{
+	struct ceph_client *client = monc->client;
+	struct ceph_monmap *monmap = NULL, *old = monc->monmap;
+	void *p, *end;
+	int had_debugfs_info, init_debugfs = 0;
+
+	mutex_lock(&monc->mutex);
+
+	had_debugfs_info = have_debugfs_info(monc);
+
+	dout("handle_monmap\n");
+	p = msg->front.iov_base;
+	end = p + msg->front.iov_len;
+
+	monmap = ceph_monmap_decode(p, end);
+	if (IS_ERR(monmap)) {
+		pr_err("problem decoding monmap, %d\n",
+		       (int)PTR_ERR(monmap));
+		goto out;
+	}
+
+	if (ceph_check_fsid(monc->client, &monmap->fsid) < 0) {
+		kfree(monmap);
+		goto out;
+	}
+
+	client->monc.monmap = monmap;
+	kfree(old);
+
+	if (!client->have_fsid) {
+		client->have_fsid = true;
+		if (!had_debugfs_info && have_debugfs_info(monc)) {
+			pr_info("client%lld fsid %pU\n",
+				ceph_client_id(monc->client),
+				&monc->client->fsid);
+			init_debugfs = 1;
+		}
+		mutex_unlock(&monc->mutex);
+
+		if (init_debugfs) {
+			/*
+			 * do debugfs initialization without mutex to avoid
+			 * creating a locking dependency
+			 */
+			ceph_debugfs_client_init(monc->client);
+		}
+
+		goto out_unlocked;
+	}
+out:
+	mutex_unlock(&monc->mutex);
+out_unlocked:
+	wake_up_all(&client->auth_wq);
+}
+
+/*
+ * generic requests (e.g., statfs, poolop)
+ */
+static struct ceph_mon_generic_request *__lookup_generic_req(
+	struct ceph_mon_client *monc, u64 tid)
+{
+	struct ceph_mon_generic_request *req;
+	struct rb_node *n = monc->generic_request_tree.rb_node;
+
+	while (n) {
+		req = rb_entry(n, struct ceph_mon_generic_request, node);
+		if (tid < req->tid)
+			n = n->rb_left;
+		else if (tid > req->tid)
+			n = n->rb_right;
+		else
+			return req;
+	}
+	return NULL;
+}
+
+static void __insert_generic_request(struct ceph_mon_client *monc,
+			    struct ceph_mon_generic_request *new)
+{
+	struct rb_node **p = &monc->generic_request_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct ceph_mon_generic_request *req = NULL;
+
+	while (*p) {
+		parent = *p;
+		req = rb_entry(parent, struct ceph_mon_generic_request, node);
+		if (new->tid < req->tid)
+			p = &(*p)->rb_left;
+		else if (new->tid > req->tid)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&new->node, parent, p);
+	rb_insert_color(&new->node, &monc->generic_request_tree);
+}
+
+static void release_generic_request(struct kref *kref)
+{
+	struct ceph_mon_generic_request *req =
+		container_of(kref, struct ceph_mon_generic_request, kref);
+
+	if (req->reply)
+		ceph_msg_put(req->reply);
+	if (req->request)
+		ceph_msg_put(req->request);
+
+	kfree(req);
+}
+
+static void put_generic_request(struct ceph_mon_generic_request *req)
+{
+	kref_put(&req->kref, release_generic_request);
+}
+
+static void get_generic_request(struct ceph_mon_generic_request *req)
+{
+	kref_get(&req->kref);
+}
+
+static struct ceph_msg *get_generic_reply(struct ceph_connection *con,
+					 struct ceph_msg_header *hdr,
+					 int *skip)
+{
+	struct ceph_mon_client *monc = con->private;
+	struct ceph_mon_generic_request *req;
+	u64 tid = le64_to_cpu(hdr->tid);
+	struct ceph_msg *m;
+
+	mutex_lock(&monc->mutex);
+	req = __lookup_generic_req(monc, tid);
+	if (!req) {
+		dout("get_generic_reply %lld dne\n", tid);
+		*skip = 1;
+		m = NULL;
+	} else {
+		dout("get_generic_reply %lld got %p\n", tid, req->reply);
+		*skip = 0;
+		m = ceph_msg_get(req->reply);
+		/*
+		 * we don't need to track the connection reading into
+		 * this reply because we only have one open connection
+		 * at a time, ever.
+		 */
+	}
+	mutex_unlock(&monc->mutex);
+	return m;
+}
+
+static int do_generic_request(struct ceph_mon_client *monc,
+			      struct ceph_mon_generic_request *req)
+{
+	int err;
+
+	/* register request */
+	mutex_lock(&monc->mutex);
+	req->tid = ++monc->last_tid;
+	req->request->hdr.tid = cpu_to_le64(req->tid);
+	__insert_generic_request(monc, req);
+	monc->num_generic_requests++;
+	ceph_con_send(&monc->con, ceph_msg_get(req->request));
+	mutex_unlock(&monc->mutex);
+
+	err = wait_for_completion_interruptible(&req->completion);
+
+	mutex_lock(&monc->mutex);
+	rb_erase(&req->node, &monc->generic_request_tree);
+	monc->num_generic_requests--;
+	mutex_unlock(&monc->mutex);
+
+	if (!err)
+		err = req->result;
+	return err;
+}
+
+/*
+ * statfs
+ */
+static void handle_statfs_reply(struct ceph_mon_client *monc,
+				struct ceph_msg *msg)
+{
+	struct ceph_mon_generic_request *req;
+	struct ceph_mon_statfs_reply *reply = msg->front.iov_base;
+	u64 tid = le64_to_cpu(msg->hdr.tid);
+
+	if (msg->front.iov_len != sizeof(*reply))
+		goto bad;
+	dout("handle_statfs_reply %p tid %llu\n", msg, tid);
+
+	mutex_lock(&monc->mutex);
+	req = __lookup_generic_req(monc, tid);
+	if (req) {
+		*(struct ceph_statfs *)req->buf = reply->st;
+		req->result = 0;
+		get_generic_request(req);
+	}
+	mutex_unlock(&monc->mutex);
+	if (req) {
+		complete_all(&req->completion);
+		put_generic_request(req);
+	}
+	return;
+
+bad:
+	pr_err("corrupt generic reply, tid %llu\n", tid);
+	ceph_msg_dump(msg);
+}
+
+/*
+ * Do a synchronous statfs().
+ */
+int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
+{
+	struct ceph_mon_generic_request *req;
+	struct ceph_mon_statfs *h;
+	int err;
+
+	req = kzalloc(sizeof(*req), GFP_NOFS);
+	if (!req)
+		return -ENOMEM;
+
+	kref_init(&req->kref);
+	req->buf = buf;
+	req->buf_len = sizeof(*buf);
+	init_completion(&req->completion);
+
+	err = -ENOMEM;
+	req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), GFP_NOFS,
+				    true);
+	if (!req->request)
+		goto out;
+	req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024, GFP_NOFS,
+				  true);
+	if (!req->reply)
+		goto out;
+
+	/* fill out request */
+	h = req->request->front.iov_base;
+	h->monhdr.have_version = 0;
+	h->monhdr.session_mon = cpu_to_le16(-1);
+	h->monhdr.session_mon_tid = 0;
+	h->fsid = monc->monmap->fsid;
+
+	err = do_generic_request(monc, req);
+
+out:
+	kref_put(&req->kref, release_generic_request);
+	return err;
+}
+EXPORT_SYMBOL(ceph_monc_do_statfs);
+
+/*
+ * pool ops
+ */
+static int get_poolop_reply_buf(const char *src, size_t src_len,
+				char *dst, size_t dst_len)
+{
+	u32 buf_len;
+
+	if (src_len != sizeof(u32) + dst_len)
+		return -EINVAL;
+
+	buf_len = le32_to_cpu(*(u32 *)src);
+	if (buf_len != dst_len)
+		return -EINVAL;
+
+	memcpy(dst, src + sizeof(u32), dst_len);
+	return 0;
+}
+
+static void handle_poolop_reply(struct ceph_mon_client *monc,
+				struct ceph_msg *msg)
+{
+	struct ceph_mon_generic_request *req;
+	struct ceph_mon_poolop_reply *reply = msg->front.iov_base;
+	u64 tid = le64_to_cpu(msg->hdr.tid);
+
+	if (msg->front.iov_len < sizeof(*reply))
+		goto bad;
+	dout("handle_poolop_reply %p tid %llu\n", msg, tid);
+
+	mutex_lock(&monc->mutex);
+	req = __lookup_generic_req(monc, tid);
+	if (req) {
+		if (req->buf_len &&
+		    get_poolop_reply_buf(msg->front.iov_base + sizeof(*reply),
+				     msg->front.iov_len - sizeof(*reply),
+				     req->buf, req->buf_len) < 0) {
+			mutex_unlock(&monc->mutex);
+			goto bad;
+		}
+		req->result = le32_to_cpu(reply->reply_code);
+		get_generic_request(req);
+	}
+	mutex_unlock(&monc->mutex);
+	if (req) {
+		complete(&req->completion);
+		put_generic_request(req);
+	}
+	return;
+
+bad:
+	pr_err("corrupt generic reply, tid %llu\n", tid);
+	ceph_msg_dump(msg);
+}
+
+/*
+ * Do a synchronous pool op.
+ */
+static int do_poolop(struct ceph_mon_client *monc, u32 op,
+			u32 pool, u64 snapid,
+			char *buf, int len)
+{
+	struct ceph_mon_generic_request *req;
+	struct ceph_mon_poolop *h;
+	int err;
+
+	req = kzalloc(sizeof(*req), GFP_NOFS);
+	if (!req)
+		return -ENOMEM;
+
+	kref_init(&req->kref);
+	req->buf = buf;
+	req->buf_len = len;
+	init_completion(&req->completion);
+
+	err = -ENOMEM;
+	req->request = ceph_msg_new(CEPH_MSG_POOLOP, sizeof(*h), GFP_NOFS,
+				    true);
+	if (!req->request)
+		goto out;
+	req->reply = ceph_msg_new(CEPH_MSG_POOLOP_REPLY, 1024, GFP_NOFS,
+				  true);
+	if (!req->reply)
+		goto out;
+
+	/* fill out request */
+	req->request->hdr.version = cpu_to_le16(2);
+	h = req->request->front.iov_base;
+	h->monhdr.have_version = 0;
+	h->monhdr.session_mon = cpu_to_le16(-1);
+	h->monhdr.session_mon_tid = 0;
+	h->fsid = monc->monmap->fsid;
+	h->pool = cpu_to_le32(pool);
+	h->op = cpu_to_le32(op);
+	h->auid = 0;
+	h->snapid = cpu_to_le64(snapid);
+	h->name_len = 0;
+
+	err = do_generic_request(monc, req);
+
+out:
+	kref_put(&req->kref, release_generic_request);
+	return err;
+}
+
+int ceph_monc_create_snapid(struct ceph_mon_client *monc,
+			    u32 pool, u64 *snapid)
+{
+	return do_poolop(monc,  POOL_OP_CREATE_UNMANAGED_SNAP,
+				   pool, 0, (char *)snapid, sizeof(*snapid));
+
+}
+EXPORT_SYMBOL(ceph_monc_create_snapid);
+
+int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
+			    u32 pool, u64 snapid)
+{
+	return do_poolop(monc,  POOL_OP_CREATE_UNMANAGED_SNAP,
+				   pool, snapid, NULL, 0);
+
+}
+
+/*
+ * Resend pending generic requests.
+ */
+static void __resend_generic_request(struct ceph_mon_client *monc)
+{
+	struct ceph_mon_generic_request *req;
+	struct rb_node *p;
+
+	for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) {
+		req = rb_entry(p, struct ceph_mon_generic_request, node);
+		ceph_msg_revoke(req->request);
+		ceph_msg_revoke_incoming(req->reply);
+		ceph_con_send(&monc->con, ceph_msg_get(req->request));
+	}
+}
+
+/*
+ * Delayed work.  If we haven't mounted yet, retry.  Otherwise,
+ * renew/retry subscription as needed (in case it is timing out, or we
+ * got an ENOMEM).  And keep the monitor connection alive.
+ */
+static void delayed_work(struct work_struct *work)
+{
+	struct ceph_mon_client *monc =
+		container_of(work, struct ceph_mon_client, delayed_work.work);
+
+	dout("monc delayed_work\n");
+	mutex_lock(&monc->mutex);
+	if (monc->hunting) {
+		__close_session(monc);
+		__open_session(monc);  /* continue hunting */
+	} else {
+		ceph_con_keepalive(&monc->con);
+
+		__validate_auth(monc);
+
+		if (ceph_auth_is_authenticated(monc->auth))
+			__send_subscribe(monc);
+	}
+	__schedule_delayed(monc);
+	mutex_unlock(&monc->mutex);
+}
+
+/*
+ * On startup, we build a temporary monmap populated with the IPs
+ * provided by mount(2).
+ */
+static int build_initial_monmap(struct ceph_mon_client *monc)
+{
+	struct ceph_options *opt = monc->client->options;
+	struct ceph_entity_addr *mon_addr = opt->mon_addr;
+	int num_mon = opt->num_mon;
+	int i;
+
+	/* build initial monmap */
+	monc->monmap = kzalloc(sizeof(*monc->monmap) +
+			       num_mon*sizeof(monc->monmap->mon_inst[0]),
+			       GFP_KERNEL);
+	if (!monc->monmap)
+		return -ENOMEM;
+	for (i = 0; i < num_mon; i++) {
+		monc->monmap->mon_inst[i].addr = mon_addr[i];
+		monc->monmap->mon_inst[i].addr.nonce = 0;
+		monc->monmap->mon_inst[i].name.type =
+			CEPH_ENTITY_TYPE_MON;
+		monc->monmap->mon_inst[i].name.num = cpu_to_le64(i);
+	}
+	monc->monmap->num_mon = num_mon;
+	return 0;
+}
+
+int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
+{
+	int err = 0;
+
+	dout("init\n");
+	memset(monc, 0, sizeof(*monc));
+	monc->client = cl;
+	monc->monmap = NULL;
+	mutex_init(&monc->mutex);
+
+	err = build_initial_monmap(monc);
+	if (err)
+		goto out;
+
+	/* connection */
+	/* authentication */
+	monc->auth = ceph_auth_init(cl->options->name,
+				    cl->options->key);
+	if (IS_ERR(monc->auth)) {
+		err = PTR_ERR(monc->auth);
+		goto out_monmap;
+	}
+	monc->auth->want_keys =
+		CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON |
+		CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS;
+
+	/* msgs */
+	err = -ENOMEM;
+	monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK,
+				     sizeof(struct ceph_mon_subscribe_ack),
+				     GFP_NOFS, true);
+	if (!monc->m_subscribe_ack)
+		goto out_auth;
+
+	monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS,
+					 true);
+	if (!monc->m_subscribe)
+		goto out_subscribe_ack;
+
+	monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS,
+					  true);
+	if (!monc->m_auth_reply)
+		goto out_subscribe;
+
+	monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS, true);
+	monc->pending_auth = 0;
+	if (!monc->m_auth)
+		goto out_auth_reply;
+
+	ceph_con_init(&monc->con, monc, &mon_con_ops,
+		      &monc->client->msgr);
+
+	monc->cur_mon = -1;
+	monc->hunting = true;
+	monc->sub_renew_after = jiffies;
+	monc->sub_sent = 0;
+
+	INIT_DELAYED_WORK(&monc->delayed_work, delayed_work);
+	monc->generic_request_tree = RB_ROOT;
+	monc->num_generic_requests = 0;
+	monc->last_tid = 0;
+
+	monc->have_mdsmap = 0;
+	monc->have_osdmap = 0;
+	monc->want_next_osdmap = 1;
+	return 0;
+
+out_auth_reply:
+	ceph_msg_put(monc->m_auth_reply);
+out_subscribe:
+	ceph_msg_put(monc->m_subscribe);
+out_subscribe_ack:
+	ceph_msg_put(monc->m_subscribe_ack);
+out_auth:
+	ceph_auth_destroy(monc->auth);
+out_monmap:
+	kfree(monc->monmap);
+out:
+	return err;
+}
+EXPORT_SYMBOL(ceph_monc_init);
+
+void ceph_monc_stop(struct ceph_mon_client *monc)
+{
+	dout("stop\n");
+	cancel_delayed_work_sync(&monc->delayed_work);
+
+	mutex_lock(&monc->mutex);
+	__close_session(monc);
+
+	mutex_unlock(&monc->mutex);
+
+	/*
+	 * flush msgr queue before we destroy ourselves to ensure that:
+	 *  - any work that references our embedded con is finished.
+	 *  - any osd_client or other work that may reference an authorizer
+	 *    finishes before we shut down the auth subsystem.
+	 */
+	ceph_msgr_flush();
+
+	ceph_auth_destroy(monc->auth);
+
+	ceph_msg_put(monc->m_auth);
+	ceph_msg_put(monc->m_auth_reply);
+	ceph_msg_put(monc->m_subscribe);
+	ceph_msg_put(monc->m_subscribe_ack);
+
+	kfree(monc->monmap);
+}
+EXPORT_SYMBOL(ceph_monc_stop);
+
+static void handle_auth_reply(struct ceph_mon_client *monc,
+			      struct ceph_msg *msg)
+{
+	int ret;
+	int was_auth = 0;
+	int had_debugfs_info, init_debugfs = 0;
+
+	mutex_lock(&monc->mutex);
+	had_debugfs_info = have_debugfs_info(monc);
+	was_auth = ceph_auth_is_authenticated(monc->auth);
+	monc->pending_auth = 0;
+	ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
+				     msg->front.iov_len,
+				     monc->m_auth->front.iov_base,
+				     monc->m_auth->front_alloc_len);
+	if (ret < 0) {
+		monc->client->auth_err = ret;
+		wake_up_all(&monc->client->auth_wq);
+	} else if (ret > 0) {
+		__send_prepared_auth_request(monc, ret);
+	} else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
+		dout("authenticated, starting session\n");
+
+		monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
+		monc->client->msgr.inst.name.num =
+					cpu_to_le64(monc->auth->global_id);
+
+		__send_subscribe(monc);
+		__resend_generic_request(monc);
+	}
+
+	if (!had_debugfs_info && have_debugfs_info(monc)) {
+		pr_info("client%lld fsid %pU\n",
+			ceph_client_id(monc->client),
+			&monc->client->fsid);
+		init_debugfs = 1;
+	}
+	mutex_unlock(&monc->mutex);
+
+	if (init_debugfs) {
+		/*
+		 * do debugfs initialization without mutex to avoid
+		 * creating a locking dependency
+		 */
+		ceph_debugfs_client_init(monc->client);
+	}
+}
+
+static int __validate_auth(struct ceph_mon_client *monc)
+{
+	int ret;
+
+	if (monc->pending_auth)
+		return 0;
+
+	ret = ceph_build_auth(monc->auth, monc->m_auth->front.iov_base,
+			      monc->m_auth->front_alloc_len);
+	if (ret <= 0)
+		return ret; /* either an error, or no need to authenticate */
+	__send_prepared_auth_request(monc, ret);
+	return 0;
+}
+
+int ceph_monc_validate_auth(struct ceph_mon_client *monc)
+{
+	int ret;
+
+	mutex_lock(&monc->mutex);
+	ret = __validate_auth(monc);
+	mutex_unlock(&monc->mutex);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_monc_validate_auth);
+
+/*
+ * handle incoming message
+ */
+static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
+{
+	struct ceph_mon_client *monc = con->private;
+	int type = le16_to_cpu(msg->hdr.type);
+
+	if (!monc)
+		return;
+
+	switch (type) {
+	case CEPH_MSG_AUTH_REPLY:
+		handle_auth_reply(monc, msg);
+		break;
+
+	case CEPH_MSG_MON_SUBSCRIBE_ACK:
+		handle_subscribe_ack(monc, msg);
+		break;
+
+	case CEPH_MSG_STATFS_REPLY:
+		handle_statfs_reply(monc, msg);
+		break;
+
+	case CEPH_MSG_POOLOP_REPLY:
+		handle_poolop_reply(monc, msg);
+		break;
+
+	case CEPH_MSG_MON_MAP:
+		ceph_monc_handle_map(monc, msg);
+		break;
+
+	case CEPH_MSG_OSD_MAP:
+		ceph_osdc_handle_map(&monc->client->osdc, msg);
+		break;
+
+	default:
+		/* can the chained handler handle it? */
+		if (monc->client->extra_mon_dispatch &&
+		    monc->client->extra_mon_dispatch(monc->client, msg) == 0)
+			break;
+			
+		pr_err("received unknown message type %d %s\n", type,
+		       ceph_msg_type_name(type));
+	}
+	ceph_msg_put(msg);
+}
+
+/*
+ * Allocate memory for incoming message
+ */
+static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
+				      struct ceph_msg_header *hdr,
+				      int *skip)
+{
+	struct ceph_mon_client *monc = con->private;
+	int type = le16_to_cpu(hdr->type);
+	int front_len = le32_to_cpu(hdr->front_len);
+	struct ceph_msg *m = NULL;
+
+	*skip = 0;
+
+	switch (type) {
+	case CEPH_MSG_MON_SUBSCRIBE_ACK:
+		m = ceph_msg_get(monc->m_subscribe_ack);
+		break;
+	case CEPH_MSG_POOLOP_REPLY:
+	case CEPH_MSG_STATFS_REPLY:
+		return get_generic_reply(con, hdr, skip);
+	case CEPH_MSG_AUTH_REPLY:
+		m = ceph_msg_get(monc->m_auth_reply);
+		break;
+	case CEPH_MSG_MON_MAP:
+	case CEPH_MSG_MDS_MAP:
+	case CEPH_MSG_OSD_MAP:
+		m = ceph_msg_new(type, front_len, GFP_NOFS, false);
+		if (!m)
+			return NULL;	/* ENOMEM--return skip == 0 */
+		break;
+	}
+
+	if (!m) {
+		pr_info("alloc_msg unknown type %d\n", type);
+		*skip = 1;
+	}
+	return m;
+}
+
+/*
+ * If the monitor connection resets, pick a new monitor and resubmit
+ * any pending requests.
+ */
+static void mon_fault(struct ceph_connection *con)
+{
+	struct ceph_mon_client *monc = con->private;
+
+	if (!monc)
+		return;
+
+	dout("mon_fault\n");
+	mutex_lock(&monc->mutex);
+	if (!con->private)
+		goto out;
+
+	if (!monc->hunting)
+		pr_info("mon%d %s session lost, "
+			"hunting for new mon\n", monc->cur_mon,
+			ceph_pr_addr(&monc->con.peer_addr.in_addr));
+
+	__close_session(monc);
+	if (!monc->hunting) {
+		/* start hunting */
+		monc->hunting = true;
+		__open_session(monc);
+	} else {
+		/* already hunting, let's wait a bit */
+		__schedule_delayed(monc);
+	}
+out:
+	mutex_unlock(&monc->mutex);
+}
+
+/*
+ * We can ignore refcounting on the connection struct, as all references
+ * will come from the messenger workqueue, which is drained prior to
+ * mon_client destruction.
+ */
+static struct ceph_connection *con_get(struct ceph_connection *con)
+{
+	return con;
+}
+
+static void con_put(struct ceph_connection *con)
+{
+}
+
+static const struct ceph_connection_operations mon_con_ops = {
+	.get = con_get,
+	.put = con_put,
+	.dispatch = dispatch,
+	.fault = mon_fault,
+	.alloc_msg = mon_alloc_msg,
+};
diff --git a/libceph/msgpool.c b/libceph/msgpool.c
new file mode 100644
index 0000000..ddec1c1
--- /dev/null
+++ b/libceph/msgpool.c
@@ -0,0 +1,83 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+
+#include <linux/ceph/msgpool.h>
+
+static void *msgpool_alloc(gfp_t gfp_mask, void *arg)
+{
+	struct ceph_msgpool *pool = arg;
+	struct ceph_msg *msg;
+
+	msg = ceph_msg_new(pool->type, pool->front_len, gfp_mask, true);
+	if (!msg) {
+		dout("msgpool_alloc %s failed\n", pool->name);
+	} else {
+		dout("msgpool_alloc %s %p\n", pool->name, msg);
+		msg->pool = pool;
+	}
+	return msg;
+}
+
+static void msgpool_free(void *element, void *arg)
+{
+	struct ceph_msgpool *pool = arg;
+	struct ceph_msg *msg = element;
+
+	dout("msgpool_release %s %p\n", pool->name, msg);
+	msg->pool = NULL;
+	ceph_msg_put(msg);
+}
+
+int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
+		      int front_len, int size, bool blocking, const char *name)
+{
+	dout("msgpool %s init\n", name);
+	pool->type = type;
+	pool->front_len = front_len;
+	pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool);
+	if (!pool->pool)
+		return -ENOMEM;
+	pool->name = name;
+	return 0;
+}
+
+void ceph_msgpool_destroy(struct ceph_msgpool *pool)
+{
+	dout("msgpool %s destroy\n", pool->name);
+	mempool_destroy(pool->pool);
+}
+
+struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
+				  int front_len)
+{
+	struct ceph_msg *msg;
+
+	if (front_len > pool->front_len) {
+		dout("msgpool_get %s need front %d, pool size is %d\n",
+		       pool->name, front_len, pool->front_len);
+		WARN_ON(1);
+
+		/* try to alloc a fresh message */
+		return ceph_msg_new(pool->type, front_len, GFP_NOFS, false);
+	}
+
+	msg = mempool_alloc(pool->pool, GFP_NOFS);
+	dout("msgpool_get %s %p\n", pool->name, msg);
+	return msg;
+}
+
+void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg)
+{
+	dout("msgpool_put %s %p\n", pool->name, msg);
+
+	/* reset msg front_len; user may have changed it */
+	msg->front.iov_len = pool->front_len;
+	msg->hdr.front_len = cpu_to_le32(pool->front_len);
+
+	kref_init(&msg->kref);  /* retake single ref */
+	mempool_free(msg, pool->pool);
+}
diff --git a/libceph/osd_client.c b/libceph/osd_client.c
new file mode 100644
index 0000000..b4157dc
--- /dev/null
+++ b/libceph/osd_client.c
@@ -0,0 +1,2904 @@
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/highmem.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#ifdef CONFIG_BLOCK
+#include <linux/bio.h>
+#endif
+
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/osd_client.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/pagelist.h>
+
+#define OSD_OP_FRONT_LEN	4096
+#define OSD_OPREPLY_FRONT_LEN	512
+
+static struct kmem_cache	*ceph_osd_request_cache;
+
+static const struct ceph_connection_operations osd_con_ops;
+
+static void __send_queued(struct ceph_osd_client *osdc);
+static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd);
+static void __register_request(struct ceph_osd_client *osdc,
+			       struct ceph_osd_request *req);
+static void __unregister_linger_request(struct ceph_osd_client *osdc,
+					struct ceph_osd_request *req);
+static void __send_request(struct ceph_osd_client *osdc,
+			   struct ceph_osd_request *req);
+
+/*
+ * Implement client access to distributed object storage cluster.
+ *
+ * All data objects are stored within a cluster/cloud of OSDs, or
+ * "object storage devices."  (Note that Ceph OSDs have _nothing_ to
+ * do with the T10 OSD extensions to SCSI.)  Ceph OSDs are simply
+ * remote daemons serving up and coordinating consistent and safe
+ * access to storage.
+ *
+ * Cluster membership and the mapping of data objects onto storage devices
+ * are described by the osd map.
+ *
+ * We keep track of pending OSD requests (read, write), resubmit
+ * requests to different OSDs when the cluster topology/data layout
+ * change, or retry the affected requests when the communications
+ * channel with an OSD is reset.
+ */
+
+/*
+ * calculate the mapping of a file extent onto an object, and fill out the
+ * request accordingly.  shorten extent as necessary if it crosses an
+ * object boundary.
+ *
+ * fill osd op in request message.
+ */
+static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen,
+			u64 *objnum, u64 *objoff, u64 *objlen)
+{
+	u64 orig_len = *plen;
+	int r;
+
+	/* object extent? */
+	r = ceph_calc_file_object_mapping(layout, off, orig_len, objnum,
+					  objoff, objlen);
+	if (r < 0)
+		return r;
+	if (*objlen < orig_len) {
+		*plen = *objlen;
+		dout(" skipping last %llu, final file extent %llu~%llu\n",
+		     orig_len - *plen, off, *plen);
+	}
+
+	dout("calc_layout objnum=%llx %llu~%llu\n", *objnum, *objoff, *objlen);
+
+	return 0;
+}
+
+static void ceph_osd_data_init(struct ceph_osd_data *osd_data)
+{
+	memset(osd_data, 0, sizeof (*osd_data));
+	osd_data->type = CEPH_OSD_DATA_TYPE_NONE;
+}
+
+static void ceph_osd_data_pages_init(struct ceph_osd_data *osd_data,
+			struct page **pages, u64 length, u32 alignment,
+			bool pages_from_pool, bool own_pages)
+{
+	osd_data->type = CEPH_OSD_DATA_TYPE_PAGES;
+	osd_data->pages = pages;
+	osd_data->length = length;
+	osd_data->alignment = alignment;
+	osd_data->pages_from_pool = pages_from_pool;
+	osd_data->own_pages = own_pages;
+}
+
+static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data,
+			struct ceph_pagelist *pagelist)
+{
+	osd_data->type = CEPH_OSD_DATA_TYPE_PAGELIST;
+	osd_data->pagelist = pagelist;
+}
+
+#ifdef CONFIG_BLOCK
+static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
+			struct bio *bio, size_t bio_length)
+{
+	osd_data->type = CEPH_OSD_DATA_TYPE_BIO;
+	osd_data->bio = bio;
+	osd_data->bio_length = bio_length;
+}
+#endif /* CONFIG_BLOCK */
+
+#define osd_req_op_data(oreq, whch, typ, fld)	\
+	({						\
+		BUG_ON(whch >= (oreq)->r_num_ops);	\
+		&(oreq)->r_ops[whch].typ.fld;		\
+	})
+
+static struct ceph_osd_data *
+osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
+{
+	BUG_ON(which >= osd_req->r_num_ops);
+
+	return &osd_req->r_ops[which].raw_data_in;
+}
+
+struct ceph_osd_data *
+osd_req_op_extent_osd_data(struct ceph_osd_request *osd_req,
+			unsigned int which)
+{
+	return osd_req_op_data(osd_req, which, extent, osd_data);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data);
+
+struct ceph_osd_data *
+osd_req_op_cls_response_data(struct ceph_osd_request *osd_req,
+			unsigned int which)
+{
+	return osd_req_op_data(osd_req, which, cls, response_data);
+}
+EXPORT_SYMBOL(osd_req_op_cls_response_data);	/* ??? */
+
+void osd_req_op_raw_data_in_pages(struct ceph_osd_request *osd_req,
+			unsigned int which, struct page **pages,
+			u64 length, u32 alignment,
+			bool pages_from_pool, bool own_pages)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_raw_data_in(osd_req, which);
+	ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+				pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_raw_data_in_pages);
+
+void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *osd_req,
+			unsigned int which, struct page **pages,
+			u64 length, u32 alignment,
+			bool pages_from_pool, bool own_pages)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+	ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+				pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_pages);
+
+void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *osd_req,
+			unsigned int which, struct ceph_pagelist *pagelist)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+	ceph_osd_data_pagelist_init(osd_data, pagelist);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist);
+
+#ifdef CONFIG_BLOCK
+void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
+			unsigned int which, struct bio *bio, size_t bio_length)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+	ceph_osd_data_bio_init(osd_data, bio, bio_length);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio);
+#endif /* CONFIG_BLOCK */
+
+static void osd_req_op_cls_request_info_pagelist(
+			struct ceph_osd_request *osd_req,
+			unsigned int which, struct ceph_pagelist *pagelist)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, cls, request_info);
+	ceph_osd_data_pagelist_init(osd_data, pagelist);
+}
+
+void osd_req_op_cls_request_data_pagelist(
+			struct ceph_osd_request *osd_req,
+			unsigned int which, struct ceph_pagelist *pagelist)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, cls, request_data);
+	ceph_osd_data_pagelist_init(osd_data, pagelist);
+}
+EXPORT_SYMBOL(osd_req_op_cls_request_data_pagelist);
+
+void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req,
+			unsigned int which, struct page **pages, u64 length,
+			u32 alignment, bool pages_from_pool, bool own_pages)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, cls, request_data);
+	ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+				pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_cls_request_data_pages);
+
+void osd_req_op_cls_response_data_pages(struct ceph_osd_request *osd_req,
+			unsigned int which, struct page **pages, u64 length,
+			u32 alignment, bool pages_from_pool, bool own_pages)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, cls, response_data);
+	ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+				pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_cls_response_data_pages);
+
+static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data)
+{
+	switch (osd_data->type) {
+	case CEPH_OSD_DATA_TYPE_NONE:
+		return 0;
+	case CEPH_OSD_DATA_TYPE_PAGES:
+		return osd_data->length;
+	case CEPH_OSD_DATA_TYPE_PAGELIST:
+		return (u64)osd_data->pagelist->length;
+#ifdef CONFIG_BLOCK
+	case CEPH_OSD_DATA_TYPE_BIO:
+		return (u64)osd_data->bio_length;
+#endif /* CONFIG_BLOCK */
+	default:
+		WARN(true, "unrecognized data type %d\n", (int)osd_data->type);
+		return 0;
+	}
+}
+
+static void ceph_osd_data_release(struct ceph_osd_data *osd_data)
+{
+	if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES && osd_data->own_pages) {
+		int num_pages;
+
+		num_pages = calc_pages_for((u64)osd_data->alignment,
+						(u64)osd_data->length);
+		ceph_release_page_vector(osd_data->pages, num_pages);
+	}
+	ceph_osd_data_init(osd_data);
+}
+
+static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
+			unsigned int which)
+{
+	struct ceph_osd_req_op *op;
+
+	BUG_ON(which >= osd_req->r_num_ops);
+	op = &osd_req->r_ops[which];
+
+	switch (op->op) {
+	case CEPH_OSD_OP_READ:
+	case CEPH_OSD_OP_WRITE:
+		ceph_osd_data_release(&op->extent.osd_data);
+		break;
+	case CEPH_OSD_OP_CALL:
+		ceph_osd_data_release(&op->cls.request_info);
+		ceph_osd_data_release(&op->cls.request_data);
+		ceph_osd_data_release(&op->cls.response_data);
+		break;
+	default:
+		break;
+	}
+}
+
+/*
+ * requests
+ */
+void ceph_osdc_release_request(struct kref *kref)
+{
+	struct ceph_osd_request *req;
+	unsigned int which;
+
+	req = container_of(kref, struct ceph_osd_request, r_kref);
+	if (req->r_request)
+		ceph_msg_put(req->r_request);
+	if (req->r_reply) {
+		ceph_msg_revoke_incoming(req->r_reply);
+		ceph_msg_put(req->r_reply);
+	}
+
+	for (which = 0; which < req->r_num_ops; which++)
+		osd_req_op_data_release(req, which);
+
+	ceph_put_snap_context(req->r_snapc);
+	if (req->r_mempool)
+		mempool_free(req, req->r_osdc->req_mempool);
+	else
+		kmem_cache_free(ceph_osd_request_cache, req);
+
+}
+EXPORT_SYMBOL(ceph_osdc_release_request);
+
+struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
+					       struct ceph_snap_context *snapc,
+					       unsigned int num_ops,
+					       bool use_mempool,
+					       gfp_t gfp_flags)
+{
+	struct ceph_osd_request *req;
+	struct ceph_msg *msg;
+	size_t msg_size;
+
+	BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX);
+	BUG_ON(num_ops > CEPH_OSD_MAX_OP);
+
+	msg_size = 4 + 4 + 8 + 8 + 4+8;
+	msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
+	msg_size += 1 + 8 + 4 + 4;     /* pg_t */
+	msg_size += 4 + CEPH_MAX_OID_NAME_LEN; /* oid */
+	msg_size += 2 + num_ops*sizeof(struct ceph_osd_op);
+	msg_size += 8;  /* snapid */
+	msg_size += 8;  /* snap_seq */
+	msg_size += 8 * (snapc ? snapc->num_snaps : 0);  /* snaps */
+	msg_size += 4;
+
+	if (use_mempool) {
+		req = mempool_alloc(osdc->req_mempool, gfp_flags);
+		memset(req, 0, sizeof(*req));
+	} else {
+		req = kmem_cache_zalloc(ceph_osd_request_cache, gfp_flags);
+	}
+	if (req == NULL)
+		return NULL;
+
+	req->r_osdc = osdc;
+	req->r_mempool = use_mempool;
+	req->r_num_ops = num_ops;
+
+	kref_init(&req->r_kref);
+	init_completion(&req->r_completion);
+	init_completion(&req->r_safe_completion);
+	RB_CLEAR_NODE(&req->r_node);
+	INIT_LIST_HEAD(&req->r_unsafe_item);
+	INIT_LIST_HEAD(&req->r_linger_item);
+	INIT_LIST_HEAD(&req->r_linger_osd);
+	INIT_LIST_HEAD(&req->r_req_lru_item);
+	INIT_LIST_HEAD(&req->r_osd_item);
+
+	req->r_base_oloc.pool = -1;
+	req->r_target_oloc.pool = -1;
+
+	/* create reply message */
+	if (use_mempool)
+		msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0);
+	else
+		msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY,
+				   OSD_OPREPLY_FRONT_LEN, gfp_flags, true);
+	if (!msg) {
+		ceph_osdc_put_request(req);
+		return NULL;
+	}
+	req->r_reply = msg;
+
+	/* create request message; allow space for oid */
+	if (use_mempool)
+		msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
+	else
+		msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags, true);
+	if (!msg) {
+		ceph_osdc_put_request(req);
+		return NULL;
+	}
+
+	memset(msg->front.iov_base, 0, msg->front.iov_len);
+
+	req->r_request = msg;
+
+	return req;
+}
+EXPORT_SYMBOL(ceph_osdc_alloc_request);
+
+static bool osd_req_opcode_valid(u16 opcode)
+{
+	switch (opcode) {
+	case CEPH_OSD_OP_READ:
+	case CEPH_OSD_OP_STAT:
+	case CEPH_OSD_OP_MAPEXT:
+	case CEPH_OSD_OP_MASKTRUNC:
+	case CEPH_OSD_OP_SPARSE_READ:
+	case CEPH_OSD_OP_NOTIFY:
+	case CEPH_OSD_OP_NOTIFY_ACK:
+	case CEPH_OSD_OP_ASSERT_VER:
+	case CEPH_OSD_OP_WRITE:
+	case CEPH_OSD_OP_WRITEFULL:
+	case CEPH_OSD_OP_TRUNCATE:
+	case CEPH_OSD_OP_ZERO:
+	case CEPH_OSD_OP_DELETE:
+	case CEPH_OSD_OP_APPEND:
+	case CEPH_OSD_OP_STARTSYNC:
+	case CEPH_OSD_OP_SETTRUNC:
+	case CEPH_OSD_OP_TRIMTRUNC:
+	case CEPH_OSD_OP_TMAPUP:
+	case CEPH_OSD_OP_TMAPPUT:
+	case CEPH_OSD_OP_TMAPGET:
+	case CEPH_OSD_OP_CREATE:
+	case CEPH_OSD_OP_ROLLBACK:
+	case CEPH_OSD_OP_WATCH:
+	case CEPH_OSD_OP_OMAPGETKEYS:
+	case CEPH_OSD_OP_OMAPGETVALS:
+	case CEPH_OSD_OP_OMAPGETHEADER:
+	case CEPH_OSD_OP_OMAPGETVALSBYKEYS:
+	case CEPH_OSD_OP_OMAPSETVALS:
+	case CEPH_OSD_OP_OMAPSETHEADER:
+	case CEPH_OSD_OP_OMAPCLEAR:
+	case CEPH_OSD_OP_OMAPRMKEYS:
+	case CEPH_OSD_OP_OMAP_CMP:
+	case CEPH_OSD_OP_SETALLOCHINT:
+	case CEPH_OSD_OP_CLONERANGE:
+	case CEPH_OSD_OP_ASSERT_SRC_VERSION:
+	case CEPH_OSD_OP_SRC_CMPXATTR:
+	case CEPH_OSD_OP_GETXATTR:
+	case CEPH_OSD_OP_GETXATTRS:
+	case CEPH_OSD_OP_CMPXATTR:
+	case CEPH_OSD_OP_SETXATTR:
+	case CEPH_OSD_OP_SETXATTRS:
+	case CEPH_OSD_OP_RESETXATTRS:
+	case CEPH_OSD_OP_RMXATTR:
+	case CEPH_OSD_OP_PULL:
+	case CEPH_OSD_OP_PUSH:
+	case CEPH_OSD_OP_BALANCEREADS:
+	case CEPH_OSD_OP_UNBALANCEREADS:
+	case CEPH_OSD_OP_SCRUB:
+	case CEPH_OSD_OP_SCRUB_RESERVE:
+	case CEPH_OSD_OP_SCRUB_UNRESERVE:
+	case CEPH_OSD_OP_SCRUB_STOP:
+	case CEPH_OSD_OP_SCRUB_MAP:
+	case CEPH_OSD_OP_WRLOCK:
+	case CEPH_OSD_OP_WRUNLOCK:
+	case CEPH_OSD_OP_RDLOCK:
+	case CEPH_OSD_OP_RDUNLOCK:
+	case CEPH_OSD_OP_UPLOCK:
+	case CEPH_OSD_OP_DNLOCK:
+	case CEPH_OSD_OP_CALL:
+	case CEPH_OSD_OP_PGLS:
+	case CEPH_OSD_OP_PGLS_FILTER:
+		return true;
+	default:
+		return false;
+	}
+}
+
+/*
+ * This is an osd op init function for opcodes that have no data or
+ * other information associated with them.  It also serves as a
+ * common init routine for all the other init functions, below.
+ */
+static struct ceph_osd_req_op *
+_osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
+				u16 opcode)
+{
+	struct ceph_osd_req_op *op;
+
+	BUG_ON(which >= osd_req->r_num_ops);
+	BUG_ON(!osd_req_opcode_valid(opcode));
+
+	op = &osd_req->r_ops[which];
+	memset(op, 0, sizeof (*op));
+	op->op = opcode;
+
+	return op;
+}
+
+void osd_req_op_init(struct ceph_osd_request *osd_req,
+				unsigned int which, u16 opcode)
+{
+	(void)_osd_req_op_init(osd_req, which, opcode);
+}
+EXPORT_SYMBOL(osd_req_op_init);
+
+void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
+				unsigned int which, u16 opcode,
+				u64 offset, u64 length,
+				u64 truncate_size, u32 truncate_seq)
+{
+	struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+	size_t payload_len = 0;
+
+	BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
+	       opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
+	       opcode != CEPH_OSD_OP_TRUNCATE);
+
+	op->extent.offset = offset;
+	op->extent.length = length;
+	op->extent.truncate_size = truncate_size;
+	op->extent.truncate_seq = truncate_seq;
+	if (opcode == CEPH_OSD_OP_WRITE)
+		payload_len += length;
+
+	op->payload_len = payload_len;
+}
+EXPORT_SYMBOL(osd_req_op_extent_init);
+
+void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
+				unsigned int which, u64 length)
+{
+	struct ceph_osd_req_op *op;
+	u64 previous;
+
+	BUG_ON(which >= osd_req->r_num_ops);
+	op = &osd_req->r_ops[which];
+	previous = op->extent.length;
+
+	if (length == previous)
+		return;		/* Nothing to do */
+	BUG_ON(length > previous);
+
+	op->extent.length = length;
+	op->payload_len -= previous - length;
+}
+EXPORT_SYMBOL(osd_req_op_extent_update);
+
+void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
+			u16 opcode, const char *class, const char *method)
+{
+	struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+	struct ceph_pagelist *pagelist;
+	size_t payload_len = 0;
+	size_t size;
+
+	BUG_ON(opcode != CEPH_OSD_OP_CALL);
+
+	pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
+	BUG_ON(!pagelist);
+	ceph_pagelist_init(pagelist);
+
+	op->cls.class_name = class;
+	size = strlen(class);
+	BUG_ON(size > (size_t) U8_MAX);
+	op->cls.class_len = size;
+	ceph_pagelist_append(pagelist, class, size);
+	payload_len += size;
+
+	op->cls.method_name = method;
+	size = strlen(method);
+	BUG_ON(size > (size_t) U8_MAX);
+	op->cls.method_len = size;
+	ceph_pagelist_append(pagelist, method, size);
+	payload_len += size;
+
+	osd_req_op_cls_request_info_pagelist(osd_req, which, pagelist);
+
+	op->cls.argc = 0;	/* currently unused */
+
+	op->payload_len = payload_len;
+}
+EXPORT_SYMBOL(osd_req_op_cls_init);
+
+void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
+				unsigned int which, u16 opcode,
+				u64 cookie, u64 version, int flag)
+{
+	struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+
+	BUG_ON(opcode != CEPH_OSD_OP_NOTIFY_ACK && opcode != CEPH_OSD_OP_WATCH);
+
+	op->watch.cookie = cookie;
+	op->watch.ver = version;
+	if (opcode == CEPH_OSD_OP_WATCH && flag)
+		op->watch.flag = (u8)1;
+}
+EXPORT_SYMBOL(osd_req_op_watch_init);
+
+void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
+				unsigned int which,
+				u64 expected_object_size,
+				u64 expected_write_size)
+{
+	struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
+						      CEPH_OSD_OP_SETALLOCHINT);
+
+	op->alloc_hint.expected_object_size = expected_object_size;
+	op->alloc_hint.expected_write_size = expected_write_size;
+
+	/*
+	 * CEPH_OSD_OP_SETALLOCHINT op is advisory and therefore deemed
+	 * not worth a feature bit.  Set FAILOK per-op flag to make
+	 * sure older osds don't trip over an unsupported opcode.
+	 */
+	op->flags |= CEPH_OSD_OP_FLAG_FAILOK;
+}
+EXPORT_SYMBOL(osd_req_op_alloc_hint_init);
+
+static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
+				struct ceph_osd_data *osd_data)
+{
+	u64 length = ceph_osd_data_length(osd_data);
+
+	if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
+		BUG_ON(length > (u64) SIZE_MAX);
+		if (length)
+			ceph_msg_data_add_pages(msg, osd_data->pages,
+					length, osd_data->alignment);
+	} else if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGELIST) {
+		BUG_ON(!length);
+		ceph_msg_data_add_pagelist(msg, osd_data->pagelist);
+#ifdef CONFIG_BLOCK
+	} else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) {
+		ceph_msg_data_add_bio(msg, osd_data->bio, length);
+#endif
+	} else {
+		BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);
+	}
+}
+
+static u64 osd_req_encode_op(struct ceph_osd_request *req,
+			      struct ceph_osd_op *dst, unsigned int which)
+{
+	struct ceph_osd_req_op *src;
+	struct ceph_osd_data *osd_data;
+	u64 request_data_len = 0;
+	u64 data_length;
+
+	BUG_ON(which >= req->r_num_ops);
+	src = &req->r_ops[which];
+	if (WARN_ON(!osd_req_opcode_valid(src->op))) {
+		pr_err("unrecognized osd opcode %d\n", src->op);
+
+		return 0;
+	}
+
+	switch (src->op) {
+	case CEPH_OSD_OP_STAT:
+		osd_data = &src->raw_data_in;
+		ceph_osdc_msg_data_add(req->r_reply, osd_data);
+		break;
+	case CEPH_OSD_OP_READ:
+	case CEPH_OSD_OP_WRITE:
+	case CEPH_OSD_OP_ZERO:
+	case CEPH_OSD_OP_DELETE:
+	case CEPH_OSD_OP_TRUNCATE:
+		if (src->op == CEPH_OSD_OP_WRITE)
+			request_data_len = src->extent.length;
+		dst->extent.offset = cpu_to_le64(src->extent.offset);
+		dst->extent.length = cpu_to_le64(src->extent.length);
+		dst->extent.truncate_size =
+			cpu_to_le64(src->extent.truncate_size);
+		dst->extent.truncate_seq =
+			cpu_to_le32(src->extent.truncate_seq);
+		osd_data = &src->extent.osd_data;
+		if (src->op == CEPH_OSD_OP_WRITE)
+			ceph_osdc_msg_data_add(req->r_request, osd_data);
+		else
+			ceph_osdc_msg_data_add(req->r_reply, osd_data);
+		break;
+	case CEPH_OSD_OP_CALL:
+		dst->cls.class_len = src->cls.class_len;
+		dst->cls.method_len = src->cls.method_len;
+		osd_data = &src->cls.request_info;
+		ceph_osdc_msg_data_add(req->r_request, osd_data);
+		BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGELIST);
+		request_data_len = osd_data->pagelist->length;
+
+		osd_data = &src->cls.request_data;
+		data_length = ceph_osd_data_length(osd_data);
+		if (data_length) {
+			BUG_ON(osd_data->type == CEPH_OSD_DATA_TYPE_NONE);
+			dst->cls.indata_len = cpu_to_le32(data_length);
+			ceph_osdc_msg_data_add(req->r_request, osd_data);
+			src->payload_len += data_length;
+			request_data_len += data_length;
+		}
+		osd_data = &src->cls.response_data;
+		ceph_osdc_msg_data_add(req->r_reply, osd_data);
+		break;
+	case CEPH_OSD_OP_STARTSYNC:
+		break;
+	case CEPH_OSD_OP_NOTIFY_ACK:
+	case CEPH_OSD_OP_WATCH:
+		dst->watch.cookie = cpu_to_le64(src->watch.cookie);
+		dst->watch.ver = cpu_to_le64(src->watch.ver);
+		dst->watch.flag = src->watch.flag;
+		break;
+	case CEPH_OSD_OP_SETALLOCHINT:
+		dst->alloc_hint.expected_object_size =
+		    cpu_to_le64(src->alloc_hint.expected_object_size);
+		dst->alloc_hint.expected_write_size =
+		    cpu_to_le64(src->alloc_hint.expected_write_size);
+		break;
+	default:
+		pr_err("unsupported osd opcode %s\n",
+			ceph_osd_op_name(src->op));
+		WARN_ON(1);
+
+		return 0;
+	}
+
+	dst->op = cpu_to_le16(src->op);
+	dst->flags = cpu_to_le32(src->flags);
+	dst->payload_len = cpu_to_le32(src->payload_len);
+
+	return request_data_len;
+}
+
+/*
+ * build new request AND message, calculate layout, and adjust file
+ * extent as needed.
+ *
+ * if the file was recently truncated, we include information about its
+ * old and new size so that the object can be updated appropriately.  (we
+ * avoid synchronously deleting truncated objects because it's slow.)
+ *
+ * if @do_sync, include a 'startsync' command so that the osd will flush
+ * data quickly.
+ */
+struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
+					       struct ceph_file_layout *layout,
+					       struct ceph_vino vino,
+					       u64 off, u64 *plen, int num_ops,
+					       int opcode, int flags,
+					       struct ceph_snap_context *snapc,
+					       u32 truncate_seq,
+					       u64 truncate_size,
+					       bool use_mempool)
+{
+	struct ceph_osd_request *req;
+	u64 objnum = 0;
+	u64 objoff = 0;
+	u64 objlen = 0;
+	u32 object_size;
+	u64 object_base;
+	int r;
+
+	BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
+	       opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
+	       opcode != CEPH_OSD_OP_TRUNCATE);
+
+	req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
+					GFP_NOFS);
+	if (!req)
+		return ERR_PTR(-ENOMEM);
+
+	req->r_flags = flags;
+
+	/* calculate max write size */
+	r = calc_layout(layout, off, plen, &objnum, &objoff, &objlen);
+	if (r < 0) {
+		ceph_osdc_put_request(req);
+		return ERR_PTR(r);
+	}
+
+	object_size = le32_to_cpu(layout->fl_object_size);
+	object_base = off - objoff;
+	if (!(truncate_seq == 1 && truncate_size == -1ULL)) {
+		if (truncate_size <= object_base) {
+			truncate_size = 0;
+		} else {
+			truncate_size -= object_base;
+			if (truncate_size > object_size)
+				truncate_size = object_size;
+		}
+	}
+
+	osd_req_op_extent_init(req, 0, opcode, objoff, objlen,
+				truncate_size, truncate_seq);
+
+	/*
+	 * A second op in the ops array means the caller wants to
+	 * also issue a include a 'startsync' command so that the
+	 * osd will flush data quickly.
+	 */
+	if (num_ops > 1)
+		osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
+
+	req->r_base_oloc.pool = ceph_file_layout_pg_pool(*layout);
+
+	snprintf(req->r_base_oid.name, sizeof(req->r_base_oid.name),
+		 "%llx.%08llx", vino.ino, objnum);
+	req->r_base_oid.name_len = strlen(req->r_base_oid.name);
+
+	return req;
+}
+EXPORT_SYMBOL(ceph_osdc_new_request);
+
+/*
+ * We keep osd requests in an rbtree, sorted by ->r_tid.
+ */
+static void __insert_request(struct ceph_osd_client *osdc,
+			     struct ceph_osd_request *new)
+{
+	struct rb_node **p = &osdc->requests.rb_node;
+	struct rb_node *parent = NULL;
+	struct ceph_osd_request *req = NULL;
+
+	while (*p) {
+		parent = *p;
+		req = rb_entry(parent, struct ceph_osd_request, r_node);
+		if (new->r_tid < req->r_tid)
+			p = &(*p)->rb_left;
+		else if (new->r_tid > req->r_tid)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&new->r_node, parent, p);
+	rb_insert_color(&new->r_node, &osdc->requests);
+}
+
+static struct ceph_osd_request *__lookup_request(struct ceph_osd_client *osdc,
+						 u64 tid)
+{
+	struct ceph_osd_request *req;
+	struct rb_node *n = osdc->requests.rb_node;
+
+	while (n) {
+		req = rb_entry(n, struct ceph_osd_request, r_node);
+		if (tid < req->r_tid)
+			n = n->rb_left;
+		else if (tid > req->r_tid)
+			n = n->rb_right;
+		else
+			return req;
+	}
+	return NULL;
+}
+
+static struct ceph_osd_request *
+__lookup_request_ge(struct ceph_osd_client *osdc,
+		    u64 tid)
+{
+	struct ceph_osd_request *req;
+	struct rb_node *n = osdc->requests.rb_node;
+
+	while (n) {
+		req = rb_entry(n, struct ceph_osd_request, r_node);
+		if (tid < req->r_tid) {
+			if (!n->rb_left)
+				return req;
+			n = n->rb_left;
+		} else if (tid > req->r_tid) {
+			n = n->rb_right;
+		} else {
+			return req;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * Resubmit requests pending on the given osd.
+ */
+static void __kick_osd_requests(struct ceph_osd_client *osdc,
+				struct ceph_osd *osd)
+{
+	struct ceph_osd_request *req, *nreq;
+	LIST_HEAD(resend);
+	int err;
+
+	dout("__kick_osd_requests osd%d\n", osd->o_osd);
+	err = __reset_osd(osdc, osd);
+	if (err)
+		return;
+	/*
+	 * Build up a list of requests to resend by traversing the
+	 * osd's list of requests.  Requests for a given object are
+	 * sent in tid order, and that is also the order they're
+	 * kept on this list.  Therefore all requests that are in
+	 * flight will be found first, followed by all requests that
+	 * have not yet been sent.  And to resend requests while
+	 * preserving this order we will want to put any sent
+	 * requests back on the front of the osd client's unsent
+	 * list.
+	 *
+	 * So we build a separate ordered list of already-sent
+	 * requests for the affected osd and splice it onto the
+	 * front of the osd client's unsent list.  Once we've seen a
+	 * request that has not yet been sent we're done.  Those
+	 * requests are already sitting right where they belong.
+	 */
+	list_for_each_entry(req, &osd->o_requests, r_osd_item) {
+		if (!req->r_sent)
+			break;
+		list_move_tail(&req->r_req_lru_item, &resend);
+		dout("requeueing %p tid %llu osd%d\n", req, req->r_tid,
+		     osd->o_osd);
+		if (!req->r_linger)
+			req->r_flags |= CEPH_OSD_FLAG_RETRY;
+	}
+	list_splice(&resend, &osdc->req_unsent);
+
+	/*
+	 * Linger requests are re-registered before sending, which
+	 * sets up a new tid for each.  We add them to the unsent
+	 * list at the end to keep things in tid order.
+	 */
+	list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
+				 r_linger_osd) {
+		/*
+		 * reregister request prior to unregistering linger so
+		 * that r_osd is preserved.
+		 */
+		BUG_ON(!list_empty(&req->r_req_lru_item));
+		__register_request(osdc, req);
+		list_add_tail(&req->r_req_lru_item, &osdc->req_unsent);
+		list_add_tail(&req->r_osd_item, &req->r_osd->o_requests);
+		__unregister_linger_request(osdc, req);
+		dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid,
+		     osd->o_osd);
+	}
+}
+
+/*
+ * If the osd connection drops, we need to resubmit all requests.
+ */
+static void osd_reset(struct ceph_connection *con)
+{
+	struct ceph_osd *osd = con->private;
+	struct ceph_osd_client *osdc;
+
+	if (!osd)
+		return;
+	dout("osd_reset osd%d\n", osd->o_osd);
+	osdc = osd->o_osdc;
+	down_read(&osdc->map_sem);
+	mutex_lock(&osdc->request_mutex);
+	__kick_osd_requests(osdc, osd);
+	__send_queued(osdc);
+	mutex_unlock(&osdc->request_mutex);
+	up_read(&osdc->map_sem);
+}
+
+/*
+ * Track open sessions with osds.
+ */
+static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
+{
+	struct ceph_osd *osd;
+
+	osd = kzalloc(sizeof(*osd), GFP_NOFS);
+	if (!osd)
+		return NULL;
+
+	atomic_set(&osd->o_ref, 1);
+	osd->o_osdc = osdc;
+	osd->o_osd = onum;
+	RB_CLEAR_NODE(&osd->o_node);
+	INIT_LIST_HEAD(&osd->o_requests);
+	INIT_LIST_HEAD(&osd->o_linger_requests);
+	INIT_LIST_HEAD(&osd->o_osd_lru);
+	osd->o_incarnation = 1;
+
+	ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr);
+
+	INIT_LIST_HEAD(&osd->o_keepalive_item);
+	return osd;
+}
+
+static struct ceph_osd *get_osd(struct ceph_osd *osd)
+{
+	if (atomic_inc_not_zero(&osd->o_ref)) {
+		dout("get_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref)-1,
+		     atomic_read(&osd->o_ref));
+		return osd;
+	} else {
+		dout("get_osd %p FAIL\n", osd);
+		return NULL;
+	}
+}
+
+static void put_osd(struct ceph_osd *osd)
+{
+	dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref),
+	     atomic_read(&osd->o_ref) - 1);
+	if (atomic_dec_and_test(&osd->o_ref) && osd->o_auth.authorizer) {
+		struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
+
+		ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer);
+		kfree(osd);
+	}
+}
+
+/*
+ * remove an osd from our map
+ */
+static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
+{
+	dout("__remove_osd %p\n", osd);
+	BUG_ON(!list_empty(&osd->o_requests));
+	rb_erase(&osd->o_node, &osdc->osds);
+	list_del_init(&osd->o_osd_lru);
+	ceph_con_close(&osd->o_con);
+	put_osd(osd);
+}
+
+static void remove_all_osds(struct ceph_osd_client *osdc)
+{
+	dout("%s %p\n", __func__, osdc);
+	mutex_lock(&osdc->request_mutex);
+	while (!RB_EMPTY_ROOT(&osdc->osds)) {
+		struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds),
+						struct ceph_osd, o_node);
+		__remove_osd(osdc, osd);
+	}
+	mutex_unlock(&osdc->request_mutex);
+}
+
+static void __move_osd_to_lru(struct ceph_osd_client *osdc,
+			      struct ceph_osd *osd)
+{
+	dout("__move_osd_to_lru %p\n", osd);
+	BUG_ON(!list_empty(&osd->o_osd_lru));
+	list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
+	osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ;
+}
+
+static void __remove_osd_from_lru(struct ceph_osd *osd)
+{
+	dout("__remove_osd_from_lru %p\n", osd);
+	if (!list_empty(&osd->o_osd_lru))
+		list_del_init(&osd->o_osd_lru);
+}
+
+static void remove_old_osds(struct ceph_osd_client *osdc)
+{
+	struct ceph_osd *osd, *nosd;
+
+	dout("__remove_old_osds %p\n", osdc);
+	mutex_lock(&osdc->request_mutex);
+	list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) {
+		if (time_before(jiffies, osd->lru_ttl))
+			break;
+		__remove_osd(osdc, osd);
+	}
+	mutex_unlock(&osdc->request_mutex);
+}
+
+/*
+ * reset osd connect
+ */
+static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
+{
+	struct ceph_entity_addr *peer_addr;
+
+	dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
+	if (list_empty(&osd->o_requests) &&
+	    list_empty(&osd->o_linger_requests)) {
+		__remove_osd(osdc, osd);
+
+		return -ENODEV;
+	}
+
+	peer_addr = &osdc->osdmap->osd_addr[osd->o_osd];
+	if (!memcmp(peer_addr, &osd->o_con.peer_addr, sizeof (*peer_addr)) &&
+			!ceph_con_opened(&osd->o_con)) {
+		struct ceph_osd_request *req;
+
+		dout("osd addr hasn't changed and connection never opened, "
+		     "letting msgr retry\n");
+		/* touch each r_stamp for handle_timeout()'s benfit */
+		list_for_each_entry(req, &osd->o_requests, r_osd_item)
+			req->r_stamp = jiffies;
+
+		return -EAGAIN;
+	}
+
+	ceph_con_close(&osd->o_con);
+	ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, peer_addr);
+	osd->o_incarnation++;
+
+	return 0;
+}
+
+static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new)
+{
+	struct rb_node **p = &osdc->osds.rb_node;
+	struct rb_node *parent = NULL;
+	struct ceph_osd *osd = NULL;
+
+	dout("__insert_osd %p osd%d\n", new, new->o_osd);
+	while (*p) {
+		parent = *p;
+		osd = rb_entry(parent, struct ceph_osd, o_node);
+		if (new->o_osd < osd->o_osd)
+			p = &(*p)->rb_left;
+		else if (new->o_osd > osd->o_osd)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&new->o_node, parent, p);
+	rb_insert_color(&new->o_node, &osdc->osds);
+}
+
+static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o)
+{
+	struct ceph_osd *osd;
+	struct rb_node *n = osdc->osds.rb_node;
+
+	while (n) {
+		osd = rb_entry(n, struct ceph_osd, o_node);
+		if (o < osd->o_osd)
+			n = n->rb_left;
+		else if (o > osd->o_osd)
+			n = n->rb_right;
+		else
+			return osd;
+	}
+	return NULL;
+}
+
+static void __schedule_osd_timeout(struct ceph_osd_client *osdc)
+{
+	schedule_delayed_work(&osdc->timeout_work,
+			osdc->client->options->osd_keepalive_timeout * HZ);
+}
+
+static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
+{
+	cancel_delayed_work(&osdc->timeout_work);
+}
+
+/*
+ * Register request, assign tid.  If this is the first request, set up
+ * the timeout event.
+ */
+static void __register_request(struct ceph_osd_client *osdc,
+			       struct ceph_osd_request *req)
+{
+	req->r_tid = ++osdc->last_tid;
+	req->r_request->hdr.tid = cpu_to_le64(req->r_tid);
+	dout("__register_request %p tid %lld\n", req, req->r_tid);
+	__insert_request(osdc, req);
+	ceph_osdc_get_request(req);
+	osdc->num_requests++;
+	if (osdc->num_requests == 1) {
+		dout(" first request, scheduling timeout\n");
+		__schedule_osd_timeout(osdc);
+	}
+}
+
+/*
+ * called under osdc->request_mutex
+ */
+static void __unregister_request(struct ceph_osd_client *osdc,
+				 struct ceph_osd_request *req)
+{
+	if (RB_EMPTY_NODE(&req->r_node)) {
+		dout("__unregister_request %p tid %lld not registered\n",
+			req, req->r_tid);
+		return;
+	}
+
+	dout("__unregister_request %p tid %lld\n", req, req->r_tid);
+	rb_erase(&req->r_node, &osdc->requests);
+	osdc->num_requests--;
+
+	if (req->r_osd) {
+		/* make sure the original request isn't in flight. */
+		ceph_msg_revoke(req->r_request);
+
+		list_del_init(&req->r_osd_item);
+		if (list_empty(&req->r_osd->o_requests) &&
+		    list_empty(&req->r_osd->o_linger_requests)) {
+			dout("moving osd to %p lru\n", req->r_osd);
+			__move_osd_to_lru(osdc, req->r_osd);
+		}
+		if (list_empty(&req->r_linger_item))
+			req->r_osd = NULL;
+	}
+
+	list_del_init(&req->r_req_lru_item);
+	ceph_osdc_put_request(req);
+
+	if (osdc->num_requests == 0) {
+		dout(" no requests, canceling timeout\n");
+		__cancel_osd_timeout(osdc);
+	}
+}
+
+/*
+ * Cancel a previously queued request message
+ */
+static void __cancel_request(struct ceph_osd_request *req)
+{
+	if (req->r_sent && req->r_osd) {
+		ceph_msg_revoke(req->r_request);
+		req->r_sent = 0;
+	}
+}
+
+static void __register_linger_request(struct ceph_osd_client *osdc,
+				    struct ceph_osd_request *req)
+{
+	dout("__register_linger_request %p\n", req);
+	ceph_osdc_get_request(req);
+	list_add_tail(&req->r_linger_item, &osdc->req_linger);
+	if (req->r_osd)
+		list_add_tail(&req->r_linger_osd,
+			      &req->r_osd->o_linger_requests);
+}
+
+static void __unregister_linger_request(struct ceph_osd_client *osdc,
+					struct ceph_osd_request *req)
+{
+	dout("__unregister_linger_request %p\n", req);
+	list_del_init(&req->r_linger_item);
+	if (req->r_osd) {
+		list_del_init(&req->r_linger_osd);
+
+		if (list_empty(&req->r_osd->o_requests) &&
+		    list_empty(&req->r_osd->o_linger_requests)) {
+			dout("moving osd to %p lru\n", req->r_osd);
+			__move_osd_to_lru(osdc, req->r_osd);
+		}
+		if (list_empty(&req->r_osd_item))
+			req->r_osd = NULL;
+	}
+	ceph_osdc_put_request(req);
+}
+
+void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
+					 struct ceph_osd_request *req)
+{
+	mutex_lock(&osdc->request_mutex);
+	if (req->r_linger) {
+		req->r_linger = 0;
+		__unregister_linger_request(osdc, req);
+	}
+	mutex_unlock(&osdc->request_mutex);
+}
+EXPORT_SYMBOL(ceph_osdc_unregister_linger_request);
+
+void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
+				  struct ceph_osd_request *req)
+{
+	if (!req->r_linger) {
+		dout("set_request_linger %p\n", req);
+		req->r_linger = 1;
+	}
+}
+EXPORT_SYMBOL(ceph_osdc_set_request_linger);
+
+/*
+ * Returns whether a request should be blocked from being sent
+ * based on the current osdmap and osd_client settings.
+ *
+ * Caller should hold map_sem for read.
+ */
+static bool __req_should_be_paused(struct ceph_osd_client *osdc,
+				   struct ceph_osd_request *req)
+{
+	bool pauserd = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD);
+	bool pausewr = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR) ||
+		ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL);
+	return (req->r_flags & CEPH_OSD_FLAG_READ && pauserd) ||
+		(req->r_flags & CEPH_OSD_FLAG_WRITE && pausewr);
+}
+
+/*
+ * Calculate mapping of a request to a PG.  Takes tiering into account.
+ */
+static int __calc_request_pg(struct ceph_osdmap *osdmap,
+			     struct ceph_osd_request *req,
+			     struct ceph_pg *pg_out)
+{
+	bool need_check_tiering;
+
+	need_check_tiering = false;
+	if (req->r_target_oloc.pool == -1) {
+		req->r_target_oloc = req->r_base_oloc; /* struct */
+		need_check_tiering = true;
+	}
+	if (req->r_target_oid.name_len == 0) {
+		ceph_oid_copy(&req->r_target_oid, &req->r_base_oid);
+		need_check_tiering = true;
+	}
+
+	if (need_check_tiering &&
+	    (req->r_flags & CEPH_OSD_FLAG_IGNORE_OVERLAY) == 0) {
+		struct ceph_pg_pool_info *pi;
+
+		pi = ceph_pg_pool_by_id(osdmap, req->r_target_oloc.pool);
+		if (pi) {
+			if ((req->r_flags & CEPH_OSD_FLAG_READ) &&
+			    pi->read_tier >= 0)
+				req->r_target_oloc.pool = pi->read_tier;
+			if ((req->r_flags & CEPH_OSD_FLAG_WRITE) &&
+			    pi->write_tier >= 0)
+				req->r_target_oloc.pool = pi->write_tier;
+		}
+		/* !pi is caught in ceph_oloc_oid_to_pg() */
+	}
+
+	return ceph_oloc_oid_to_pg(osdmap, &req->r_target_oloc,
+				   &req->r_target_oid, pg_out);
+}
+
+/*
+ * Pick an osd (the first 'up' osd in the pg), allocate the osd struct
+ * (as needed), and set the request r_osd appropriately.  If there is
+ * no up osd, set r_osd to NULL.  Move the request to the appropriate list
+ * (unsent, homeless) or leave on in-flight lru.
+ *
+ * Return 0 if unchanged, 1 if changed, or negative on error.
+ *
+ * Caller should hold map_sem for read and request_mutex.
+ */
+static int __map_request(struct ceph_osd_client *osdc,
+			 struct ceph_osd_request *req, int force_resend)
+{
+	struct ceph_pg pgid;
+	int acting[CEPH_PG_MAX_SIZE];
+	int num, o;
+	int err;
+	bool was_paused;
+
+	dout("map_request %p tid %lld\n", req, req->r_tid);
+
+	err = __calc_request_pg(osdc->osdmap, req, &pgid);
+	if (err) {
+		list_move(&req->r_req_lru_item, &osdc->req_notarget);
+		return err;
+	}
+	req->r_pgid = pgid;
+
+	num = ceph_calc_pg_acting(osdc->osdmap, pgid, acting, &o);
+	if (num < 0)
+		num = 0;
+
+	was_paused = req->r_paused;
+	req->r_paused = __req_should_be_paused(osdc, req);
+	if (was_paused && !req->r_paused)
+		force_resend = 1;
+
+	if ((!force_resend &&
+	     req->r_osd && req->r_osd->o_osd == o &&
+	     req->r_sent >= req->r_osd->o_incarnation &&
+	     req->r_num_pg_osds == num &&
+	     memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) ||
+	    (req->r_osd == NULL && o == -1) ||
+	    req->r_paused)
+		return 0;  /* no change */
+
+	dout("map_request tid %llu pgid %lld.%x osd%d (was osd%d)\n",
+	     req->r_tid, pgid.pool, pgid.seed, o,
+	     req->r_osd ? req->r_osd->o_osd : -1);
+
+	/* record full pg acting set */
+	memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num);
+	req->r_num_pg_osds = num;
+
+	if (req->r_osd) {
+		__cancel_request(req);
+		list_del_init(&req->r_osd_item);
+		req->r_osd = NULL;
+	}
+
+	req->r_osd = __lookup_osd(osdc, o);
+	if (!req->r_osd && o >= 0) {
+		err = -ENOMEM;
+		req->r_osd = create_osd(osdc, o);
+		if (!req->r_osd) {
+			list_move(&req->r_req_lru_item, &osdc->req_notarget);
+			goto out;
+		}
+
+		dout("map_request osd %p is osd%d\n", req->r_osd, o);
+		__insert_osd(osdc, req->r_osd);
+
+		ceph_con_open(&req->r_osd->o_con,
+			      CEPH_ENTITY_TYPE_OSD, o,
+			      &osdc->osdmap->osd_addr[o]);
+	}
+
+	if (req->r_osd) {
+		__remove_osd_from_lru(req->r_osd);
+		list_add_tail(&req->r_osd_item, &req->r_osd->o_requests);
+		list_move_tail(&req->r_req_lru_item, &osdc->req_unsent);
+	} else {
+		list_move_tail(&req->r_req_lru_item, &osdc->req_notarget);
+	}
+	err = 1;   /* osd or pg changed */
+
+out:
+	return err;
+}
+
+/*
+ * caller should hold map_sem (for read) and request_mutex
+ */
+static void __send_request(struct ceph_osd_client *osdc,
+			   struct ceph_osd_request *req)
+{
+	void *p;
+
+	dout("send_request %p tid %llu to osd%d flags %d pg %lld.%x\n",
+	     req, req->r_tid, req->r_osd->o_osd, req->r_flags,
+	     (unsigned long long)req->r_pgid.pool, req->r_pgid.seed);
+
+	/* fill in message content that changes each time we send it */
+	put_unaligned_le32(osdc->osdmap->epoch, req->r_request_osdmap_epoch);
+	put_unaligned_le32(req->r_flags, req->r_request_flags);
+	put_unaligned_le64(req->r_target_oloc.pool, req->r_request_pool);
+	p = req->r_request_pgid;
+	ceph_encode_64(&p, req->r_pgid.pool);
+	ceph_encode_32(&p, req->r_pgid.seed);
+	put_unaligned_le64(1, req->r_request_attempts);  /* FIXME */
+	memcpy(req->r_request_reassert_version, &req->r_reassert_version,
+	       sizeof(req->r_reassert_version));
+
+	req->r_stamp = jiffies;
+	list_move_tail(&req->r_req_lru_item, &osdc->req_lru);
+
+	ceph_msg_get(req->r_request); /* send consumes a ref */
+
+	req->r_sent = req->r_osd->o_incarnation;
+
+	ceph_con_send(&req->r_osd->o_con, req->r_request);
+}
+
+/*
+ * Send any requests in the queue (req_unsent).
+ */
+static void __send_queued(struct ceph_osd_client *osdc)
+{
+	struct ceph_osd_request *req, *tmp;
+
+	dout("__send_queued\n");
+	list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item)
+		__send_request(osdc, req);
+}
+
+/*
+ * Caller should hold map_sem for read and request_mutex.
+ */
+static int __ceph_osdc_start_request(struct ceph_osd_client *osdc,
+				     struct ceph_osd_request *req,
+				     bool nofail)
+{
+	int rc;
+
+	__register_request(osdc, req);
+	req->r_sent = 0;
+	req->r_got_reply = 0;
+	rc = __map_request(osdc, req, 0);
+	if (rc < 0) {
+		if (nofail) {
+			dout("osdc_start_request failed map, "
+				" will retry %lld\n", req->r_tid);
+			rc = 0;
+		} else {
+			__unregister_request(osdc, req);
+		}
+		return rc;
+	}
+
+	if (req->r_osd == NULL) {
+		dout("send_request %p no up osds in pg\n", req);
+		ceph_monc_request_next_osdmap(&osdc->client->monc);
+	} else {
+		__send_queued(osdc);
+	}
+
+	return 0;
+}
+
+/*
+ * Timeout callback, called every N seconds when 1 or more osd
+ * requests has been active for more than N seconds.  When this
+ * happens, we ping all OSDs with requests who have timed out to
+ * ensure any communications channel reset is detected.  Reset the
+ * request timeouts another N seconds in the future as we go.
+ * Reschedule the timeout event another N seconds in future (unless
+ * there are no open requests).
+ */
+static void handle_timeout(struct work_struct *work)
+{
+	struct ceph_osd_client *osdc =
+		container_of(work, struct ceph_osd_client, timeout_work.work);
+	struct ceph_osd_request *req;
+	struct ceph_osd *osd;
+	unsigned long keepalive =
+		osdc->client->options->osd_keepalive_timeout * HZ;
+	struct list_head slow_osds;
+	dout("timeout\n");
+	down_read(&osdc->map_sem);
+
+	ceph_monc_request_next_osdmap(&osdc->client->monc);
+
+	mutex_lock(&osdc->request_mutex);
+
+	/*
+	 * ping osds that are a bit slow.  this ensures that if there
+	 * is a break in the TCP connection we will notice, and reopen
+	 * a connection with that osd (from the fault callback).
+	 */
+	INIT_LIST_HEAD(&slow_osds);
+	list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) {
+		if (time_before(jiffies, req->r_stamp + keepalive))
+			break;
+
+		osd = req->r_osd;
+		BUG_ON(!osd);
+		dout(" tid %llu is slow, will send keepalive on osd%d\n",
+		     req->r_tid, osd->o_osd);
+		list_move_tail(&osd->o_keepalive_item, &slow_osds);
+	}
+	while (!list_empty(&slow_osds)) {
+		osd = list_entry(slow_osds.next, struct ceph_osd,
+				 o_keepalive_item);
+		list_del_init(&osd->o_keepalive_item);
+		ceph_con_keepalive(&osd->o_con);
+	}
+
+	__schedule_osd_timeout(osdc);
+	__send_queued(osdc);
+	mutex_unlock(&osdc->request_mutex);
+	up_read(&osdc->map_sem);
+}
+
+static void handle_osds_timeout(struct work_struct *work)
+{
+	struct ceph_osd_client *osdc =
+		container_of(work, struct ceph_osd_client,
+			     osds_timeout_work.work);
+	unsigned long delay =
+		osdc->client->options->osd_idle_ttl * HZ >> 2;
+
+	dout("osds timeout\n");
+	down_read(&osdc->map_sem);
+	remove_old_osds(osdc);
+	up_read(&osdc->map_sem);
+
+	schedule_delayed_work(&osdc->osds_timeout_work,
+			      round_jiffies_relative(delay));
+}
+
+static int ceph_oloc_decode(void **p, void *end,
+			    struct ceph_object_locator *oloc)
+{
+	u8 struct_v, struct_cv;
+	u32 len;
+	void *struct_end;
+	int ret = 0;
+
+	ceph_decode_need(p, end, 1 + 1 + 4, e_inval);
+	struct_v = ceph_decode_8(p);
+	struct_cv = ceph_decode_8(p);
+	if (struct_v < 3) {
+		pr_warn("got v %d < 3 cv %d of ceph_object_locator\n",
+			struct_v, struct_cv);
+		goto e_inval;
+	}
+	if (struct_cv > 6) {
+		pr_warn("got v %d cv %d > 6 of ceph_object_locator\n",
+			struct_v, struct_cv);
+		goto e_inval;
+	}
+	len = ceph_decode_32(p);
+	ceph_decode_need(p, end, len, e_inval);
+	struct_end = *p + len;
+
+	oloc->pool = ceph_decode_64(p);
+	*p += 4; /* skip preferred */
+
+	len = ceph_decode_32(p);
+	if (len > 0) {
+		pr_warn("ceph_object_locator::key is set\n");
+		goto e_inval;
+	}
+
+	if (struct_v >= 5) {
+		len = ceph_decode_32(p);
+		if (len > 0) {
+			pr_warn("ceph_object_locator::nspace is set\n");
+			goto e_inval;
+		}
+	}
+
+	if (struct_v >= 6) {
+		s64 hash = ceph_decode_64(p);
+		if (hash != -1) {
+			pr_warn("ceph_object_locator::hash is set\n");
+			goto e_inval;
+		}
+	}
+
+	/* skip the rest */
+	*p = struct_end;
+out:
+	return ret;
+
+e_inval:
+	ret = -EINVAL;
+	goto out;
+}
+
+static int ceph_redirect_decode(void **p, void *end,
+				struct ceph_request_redirect *redir)
+{
+	u8 struct_v, struct_cv;
+	u32 len;
+	void *struct_end;
+	int ret;
+
+	ceph_decode_need(p, end, 1 + 1 + 4, e_inval);
+	struct_v = ceph_decode_8(p);
+	struct_cv = ceph_decode_8(p);
+	if (struct_cv > 1) {
+		pr_warn("got v %d cv %d > 1 of ceph_request_redirect\n",
+			struct_v, struct_cv);
+		goto e_inval;
+	}
+	len = ceph_decode_32(p);
+	ceph_decode_need(p, end, len, e_inval);
+	struct_end = *p + len;
+
+	ret = ceph_oloc_decode(p, end, &redir->oloc);
+	if (ret)
+		goto out;
+
+	len = ceph_decode_32(p);
+	if (len > 0) {
+		pr_warn("ceph_request_redirect::object_name is set\n");
+		goto e_inval;
+	}
+
+	len = ceph_decode_32(p);
+	*p += len; /* skip osd_instructions */
+
+	/* skip the rest */
+	*p = struct_end;
+out:
+	return ret;
+
+e_inval:
+	ret = -EINVAL;
+	goto out;
+}
+
+static void complete_request(struct ceph_osd_request *req)
+{
+	complete_all(&req->r_safe_completion);  /* fsync waiter */
+}
+
+/*
+ * handle osd op reply.  either call the callback if it is specified,
+ * or do the completion to wake up the waiting thread.
+ */
+static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
+			 struct ceph_connection *con)
+{
+	void *p, *end;
+	struct ceph_osd_request *req;
+	struct ceph_request_redirect redir;
+	u64 tid;
+	int object_len;
+	unsigned int numops;
+	int payload_len, flags;
+	s32 result;
+	s32 retry_attempt;
+	struct ceph_pg pg;
+	int err;
+	u32 reassert_epoch;
+	u64 reassert_version;
+	u32 osdmap_epoch;
+	int already_completed;
+	u32 bytes;
+	unsigned int i;
+
+	tid = le64_to_cpu(msg->hdr.tid);
+	dout("handle_reply %p tid %llu\n", msg, tid);
+
+	p = msg->front.iov_base;
+	end = p + msg->front.iov_len;
+
+	ceph_decode_need(&p, end, 4, bad);
+	object_len = ceph_decode_32(&p);
+	ceph_decode_need(&p, end, object_len, bad);
+	p += object_len;
+
+	err = ceph_decode_pgid(&p, end, &pg);
+	if (err)
+		goto bad;
+
+	ceph_decode_need(&p, end, 8 + 4 + 4 + 8 + 4, bad);
+	flags = ceph_decode_64(&p);
+	result = ceph_decode_32(&p);
+	reassert_epoch = ceph_decode_32(&p);
+	reassert_version = ceph_decode_64(&p);
+	osdmap_epoch = ceph_decode_32(&p);
+
+	/* lookup */
+	down_read(&osdc->map_sem);
+	mutex_lock(&osdc->request_mutex);
+	req = __lookup_request(osdc, tid);
+	if (req == NULL) {
+		dout("handle_reply tid %llu dne\n", tid);
+		goto bad_mutex;
+	}
+	ceph_osdc_get_request(req);
+
+	dout("handle_reply %p tid %llu req %p result %d\n", msg, tid,
+	     req, result);
+
+	ceph_decode_need(&p, end, 4, bad_put);
+	numops = ceph_decode_32(&p);
+	if (numops > CEPH_OSD_MAX_OP)
+		goto bad_put;
+	if (numops != req->r_num_ops)
+		goto bad_put;
+	payload_len = 0;
+	ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad_put);
+	for (i = 0; i < numops; i++) {
+		struct ceph_osd_op *op = p;
+		int len;
+
+		len = le32_to_cpu(op->payload_len);
+		req->r_reply_op_len[i] = len;
+		dout(" op %d has %d bytes\n", i, len);
+		payload_len += len;
+		p += sizeof(*op);
+	}
+	bytes = le32_to_cpu(msg->hdr.data_len);
+	if (payload_len != bytes) {
+		pr_warning("sum of op payload lens %d != data_len %d",
+			   payload_len, bytes);
+		goto bad_put;
+	}
+
+	ceph_decode_need(&p, end, 4 + numops * 4, bad_put);
+	retry_attempt = ceph_decode_32(&p);
+	for (i = 0; i < numops; i++)
+		req->r_reply_op_result[i] = ceph_decode_32(&p);
+
+	if (le16_to_cpu(msg->hdr.version) >= 6) {
+		p += 8 + 4; /* skip replay_version */
+		p += 8; /* skip user_version */
+
+		err = ceph_redirect_decode(&p, end, &redir);
+		if (err)
+			goto bad_put;
+	} else {
+		redir.oloc.pool = -1;
+	}
+
+	if (redir.oloc.pool != -1) {
+		dout("redirect pool %lld\n", redir.oloc.pool);
+
+		__unregister_request(osdc, req);
+
+		req->r_target_oloc = redir.oloc; /* struct */
+
+		/*
+		 * Start redirect requests with nofail=true.  If
+		 * mapping fails, request will end up on the notarget
+		 * list, waiting for the new osdmap (which can take
+		 * a while), even though the original request mapped
+		 * successfully.  In the future we might want to follow
+		 * original request's nofail setting here.
+		 */
+		err = __ceph_osdc_start_request(osdc, req, true);
+		BUG_ON(err);
+
+		goto out_unlock;
+	}
+
+	already_completed = req->r_got_reply;
+	if (!req->r_got_reply) {
+		req->r_result = result;
+		dout("handle_reply result %d bytes %d\n", req->r_result,
+		     bytes);
+		if (req->r_result == 0)
+			req->r_result = bytes;
+
+		/* in case this is a write and we need to replay, */
+		req->r_reassert_version.epoch = cpu_to_le32(reassert_epoch);
+		req->r_reassert_version.version = cpu_to_le64(reassert_version);
+
+		req->r_got_reply = 1;
+	} else if ((flags & CEPH_OSD_FLAG_ONDISK) == 0) {
+		dout("handle_reply tid %llu dup ack\n", tid);
+		goto out_unlock;
+	}
+
+	dout("handle_reply tid %llu flags %d\n", tid, flags);
+
+	if (req->r_linger && (flags & CEPH_OSD_FLAG_ONDISK))
+		__register_linger_request(osdc, req);
+
+	/* either this is a read, or we got the safe response */
+	if (result < 0 ||
+	    (flags & CEPH_OSD_FLAG_ONDISK) ||
+	    ((flags & CEPH_OSD_FLAG_WRITE) == 0))
+		__unregister_request(osdc, req);
+
+	mutex_unlock(&osdc->request_mutex);
+	up_read(&osdc->map_sem);
+
+	if (!already_completed) {
+		if (req->r_unsafe_callback &&
+		    result >= 0 && !(flags & CEPH_OSD_FLAG_ONDISK))
+			req->r_unsafe_callback(req, true);
+		if (req->r_callback)
+			req->r_callback(req, msg);
+		else
+			complete_all(&req->r_completion);
+	}
+
+	if (flags & CEPH_OSD_FLAG_ONDISK) {
+		if (req->r_unsafe_callback && already_completed)
+			req->r_unsafe_callback(req, false);
+		complete_request(req);
+	}
+
+out:
+	dout("req=%p req->r_linger=%d\n", req, req->r_linger);
+	ceph_osdc_put_request(req);
+	return;
+out_unlock:
+	mutex_unlock(&osdc->request_mutex);
+	up_read(&osdc->map_sem);
+	goto out;
+
+bad_put:
+	req->r_result = -EIO;
+	__unregister_request(osdc, req);
+	if (req->r_callback)
+		req->r_callback(req, msg);
+	else
+		complete_all(&req->r_completion);
+	complete_request(req);
+	ceph_osdc_put_request(req);
+bad_mutex:
+	mutex_unlock(&osdc->request_mutex);
+	up_read(&osdc->map_sem);
+bad:
+	pr_err("corrupt osd_op_reply got %d %d\n",
+	       (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len));
+	ceph_msg_dump(msg);
+}
+
+static void reset_changed_osds(struct ceph_osd_client *osdc)
+{
+	struct rb_node *p, *n;
+
+	for (p = rb_first(&osdc->osds); p; p = n) {
+		struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node);
+
+		n = rb_next(p);
+		if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) ||
+		    memcmp(&osd->o_con.peer_addr,
+			   ceph_osd_addr(osdc->osdmap,
+					 osd->o_osd),
+			   sizeof(struct ceph_entity_addr)) != 0)
+			__reset_osd(osdc, osd);
+	}
+}
+
+/*
+ * Requeue requests whose mapping to an OSD has changed.  If requests map to
+ * no osd, request a new map.
+ *
+ * Caller should hold map_sem for read.
+ */
+static void kick_requests(struct ceph_osd_client *osdc, bool force_resend,
+			  bool force_resend_writes)
+{
+	struct ceph_osd_request *req, *nreq;
+	struct rb_node *p;
+	int needmap = 0;
+	int err;
+	bool force_resend_req;
+
+	dout("kick_requests %s %s\n", force_resend ? " (force resend)" : "",
+		force_resend_writes ? " (force resend writes)" : "");
+	mutex_lock(&osdc->request_mutex);
+	for (p = rb_first(&osdc->requests); p; ) {
+		req = rb_entry(p, struct ceph_osd_request, r_node);
+		p = rb_next(p);
+
+		/*
+		 * For linger requests that have not yet been
+		 * registered, move them to the linger list; they'll
+		 * be sent to the osd in the loop below.  Unregister
+		 * the request before re-registering it as a linger
+		 * request to ensure the __map_request() below
+		 * will decide it needs to be sent.
+		 */
+		if (req->r_linger && list_empty(&req->r_linger_item)) {
+			dout("%p tid %llu restart on osd%d\n",
+			     req, req->r_tid,
+			     req->r_osd ? req->r_osd->o_osd : -1);
+			ceph_osdc_get_request(req);
+			__unregister_request(osdc, req);
+			__register_linger_request(osdc, req);
+			ceph_osdc_put_request(req);
+			continue;
+		}
+
+		force_resend_req = force_resend ||
+			(force_resend_writes &&
+				req->r_flags & CEPH_OSD_FLAG_WRITE);
+		err = __map_request(osdc, req, force_resend_req);
+		if (err < 0)
+			continue;  /* error */
+		if (req->r_osd == NULL) {
+			dout("%p tid %llu maps to no osd\n", req, req->r_tid);
+			needmap++;  /* request a newer map */
+		} else if (err > 0) {
+			if (!req->r_linger) {
+				dout("%p tid %llu requeued on osd%d\n", req,
+				     req->r_tid,
+				     req->r_osd ? req->r_osd->o_osd : -1);
+				req->r_flags |= CEPH_OSD_FLAG_RETRY;
+			}
+		}
+	}
+
+	list_for_each_entry_safe(req, nreq, &osdc->req_linger,
+				 r_linger_item) {
+		dout("linger req=%p req->r_osd=%p\n", req, req->r_osd);
+
+		err = __map_request(osdc, req,
+				    force_resend || force_resend_writes);
+		dout("__map_request returned %d\n", err);
+		if (err == 0)
+			continue;  /* no change and no osd was specified */
+		if (err < 0)
+			continue;  /* hrm! */
+		if (req->r_osd == NULL) {
+			dout("tid %llu maps to no valid osd\n", req->r_tid);
+			needmap++;  /* request a newer map */
+			continue;
+		}
+
+		dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid,
+		     req->r_osd ? req->r_osd->o_osd : -1);
+		__register_request(osdc, req);
+		__unregister_linger_request(osdc, req);
+	}
+	reset_changed_osds(osdc);
+	mutex_unlock(&osdc->request_mutex);
+
+	if (needmap) {
+		dout("%d requests for down osds, need new map\n", needmap);
+		ceph_monc_request_next_osdmap(&osdc->client->monc);
+	}
+}
+
+
+/*
+ * Process updated osd map.
+ *
+ * The message contains any number of incremental and full maps, normally
+ * indicating some sort of topology change in the cluster.  Kick requests
+ * off to different OSDs as needed.
+ */
+void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
+{
+	void *p, *end, *next;
+	u32 nr_maps, maplen;
+	u32 epoch;
+	struct ceph_osdmap *newmap = NULL, *oldmap;
+	int err;
+	struct ceph_fsid fsid;
+	bool was_full;
+
+	dout("handle_map have %u\n", osdc->osdmap ? osdc->osdmap->epoch : 0);
+	p = msg->front.iov_base;
+	end = p + msg->front.iov_len;
+
+	/* verify fsid */
+	ceph_decode_need(&p, end, sizeof(fsid), bad);
+	ceph_decode_copy(&p, &fsid, sizeof(fsid));
+	if (ceph_check_fsid(osdc->client, &fsid) < 0)
+		return;
+
+	down_write(&osdc->map_sem);
+
+	was_full = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL);
+
+	/* incremental maps */
+	ceph_decode_32_safe(&p, end, nr_maps, bad);
+	dout(" %d inc maps\n", nr_maps);
+	while (nr_maps > 0) {
+		ceph_decode_need(&p, end, 2*sizeof(u32), bad);
+		epoch = ceph_decode_32(&p);
+		maplen = ceph_decode_32(&p);
+		ceph_decode_need(&p, end, maplen, bad);
+		next = p + maplen;
+		if (osdc->osdmap && osdc->osdmap->epoch+1 == epoch) {
+			dout("applying incremental map %u len %d\n",
+			     epoch, maplen);
+			newmap = osdmap_apply_incremental(&p, next,
+							  osdc->osdmap,
+							  &osdc->client->msgr);
+			if (IS_ERR(newmap)) {
+				err = PTR_ERR(newmap);
+				goto bad;
+			}
+			BUG_ON(!newmap);
+			if (newmap != osdc->osdmap) {
+				ceph_osdmap_destroy(osdc->osdmap);
+				osdc->osdmap = newmap;
+			}
+			was_full = was_full ||
+				ceph_osdmap_flag(osdc->osdmap,
+						 CEPH_OSDMAP_FULL);
+			kick_requests(osdc, 0, was_full);
+		} else {
+			dout("ignoring incremental map %u len %d\n",
+			     epoch, maplen);
+		}
+		p = next;
+		nr_maps--;
+	}
+	if (newmap)
+		goto done;
+
+	/* full maps */
+	ceph_decode_32_safe(&p, end, nr_maps, bad);
+	dout(" %d full maps\n", nr_maps);
+	while (nr_maps) {
+		ceph_decode_need(&p, end, 2*sizeof(u32), bad);
+		epoch = ceph_decode_32(&p);
+		maplen = ceph_decode_32(&p);
+		ceph_decode_need(&p, end, maplen, bad);
+		if (nr_maps > 1) {
+			dout("skipping non-latest full map %u len %d\n",
+			     epoch, maplen);
+		} else if (osdc->osdmap && osdc->osdmap->epoch >= epoch) {
+			dout("skipping full map %u len %d, "
+			     "older than our %u\n", epoch, maplen,
+			     osdc->osdmap->epoch);
+		} else {
+			int skipped_map = 0;
+
+			dout("taking full map %u len %d\n", epoch, maplen);
+			newmap = ceph_osdmap_decode(&p, p+maplen);
+			if (IS_ERR(newmap)) {
+				err = PTR_ERR(newmap);
+				goto bad;
+			}
+			BUG_ON(!newmap);
+			oldmap = osdc->osdmap;
+			osdc->osdmap = newmap;
+			if (oldmap) {
+				if (oldmap->epoch + 1 < newmap->epoch)
+					skipped_map = 1;
+				ceph_osdmap_destroy(oldmap);
+			}
+			was_full = was_full ||
+				ceph_osdmap_flag(osdc->osdmap,
+						 CEPH_OSDMAP_FULL);
+			kick_requests(osdc, skipped_map, was_full);
+		}
+		p += maplen;
+		nr_maps--;
+	}
+
+	if (!osdc->osdmap)
+		goto bad;
+done:
+	downgrade_write(&osdc->map_sem);
+	ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
+
+	/*
+	 * subscribe to subsequent osdmap updates if full to ensure
+	 * we find out when we are no longer full and stop returning
+	 * ENOSPC.
+	 */
+	if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) ||
+		ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD) ||
+		ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR))
+		ceph_monc_request_next_osdmap(&osdc->client->monc);
+
+	mutex_lock(&osdc->request_mutex);
+	__send_queued(osdc);
+	mutex_unlock(&osdc->request_mutex);
+	up_read(&osdc->map_sem);
+	wake_up_all(&osdc->client->auth_wq);
+	return;
+
+bad:
+	pr_err("osdc handle_map corrupt msg\n");
+	ceph_msg_dump(msg);
+	up_write(&osdc->map_sem);
+	return;
+}
+
+/*
+ * watch/notify callback event infrastructure
+ *
+ * These callbacks are used both for watch and notify operations.
+ */
+static void __release_event(struct kref *kref)
+{
+	struct ceph_osd_event *event =
+		container_of(kref, struct ceph_osd_event, kref);
+
+	dout("__release_event %p\n", event);
+	kfree(event);
+}
+
+static void get_event(struct ceph_osd_event *event)
+{
+	kref_get(&event->kref);
+}
+
+void ceph_osdc_put_event(struct ceph_osd_event *event)
+{
+	kref_put(&event->kref, __release_event);
+}
+EXPORT_SYMBOL(ceph_osdc_put_event);
+
+static void __insert_event(struct ceph_osd_client *osdc,
+			     struct ceph_osd_event *new)
+{
+	struct rb_node **p = &osdc->event_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct ceph_osd_event *event = NULL;
+
+	while (*p) {
+		parent = *p;
+		event = rb_entry(parent, struct ceph_osd_event, node);
+		if (new->cookie < event->cookie)
+			p = &(*p)->rb_left;
+		else if (new->cookie > event->cookie)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&new->node, parent, p);
+	rb_insert_color(&new->node, &osdc->event_tree);
+}
+
+static struct ceph_osd_event *__find_event(struct ceph_osd_client *osdc,
+					        u64 cookie)
+{
+	struct rb_node **p = &osdc->event_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct ceph_osd_event *event = NULL;
+
+	while (*p) {
+		parent = *p;
+		event = rb_entry(parent, struct ceph_osd_event, node);
+		if (cookie < event->cookie)
+			p = &(*p)->rb_left;
+		else if (cookie > event->cookie)
+			p = &(*p)->rb_right;
+		else
+			return event;
+	}
+	return NULL;
+}
+
+static void __remove_event(struct ceph_osd_event *event)
+{
+	struct ceph_osd_client *osdc = event->osdc;
+
+	if (!RB_EMPTY_NODE(&event->node)) {
+		dout("__remove_event removed %p\n", event);
+		rb_erase(&event->node, &osdc->event_tree);
+		ceph_osdc_put_event(event);
+	} else {
+		dout("__remove_event didn't remove %p\n", event);
+	}
+}
+
+int ceph_osdc_create_event(struct ceph_osd_client *osdc,
+			   void (*event_cb)(u64, u64, u8, void *),
+			   void *data, struct ceph_osd_event **pevent)
+{
+	struct ceph_osd_event *event;
+
+	event = kmalloc(sizeof(*event), GFP_NOIO);
+	if (!event)
+		return -ENOMEM;
+
+	dout("create_event %p\n", event);
+	event->cb = event_cb;
+	event->one_shot = 0;
+	event->data = data;
+	event->osdc = osdc;
+	INIT_LIST_HEAD(&event->osd_node);
+	RB_CLEAR_NODE(&event->node);
+	kref_init(&event->kref);   /* one ref for us */
+	kref_get(&event->kref);    /* one ref for the caller */
+
+	spin_lock(&osdc->event_lock);
+	event->cookie = ++osdc->event_count;
+	__insert_event(osdc, event);
+	spin_unlock(&osdc->event_lock);
+
+	*pevent = event;
+	return 0;
+}
+EXPORT_SYMBOL(ceph_osdc_create_event);
+
+void ceph_osdc_cancel_event(struct ceph_osd_event *event)
+{
+	struct ceph_osd_client *osdc = event->osdc;
+
+	dout("cancel_event %p\n", event);
+	spin_lock(&osdc->event_lock);
+	__remove_event(event);
+	spin_unlock(&osdc->event_lock);
+	ceph_osdc_put_event(event); /* caller's */
+}
+EXPORT_SYMBOL(ceph_osdc_cancel_event);
+
+
+static void do_event_work(struct work_struct *work)
+{
+	struct ceph_osd_event_work *event_work =
+		container_of(work, struct ceph_osd_event_work, work);
+	struct ceph_osd_event *event = event_work->event;
+	u64 ver = event_work->ver;
+	u64 notify_id = event_work->notify_id;
+	u8 opcode = event_work->opcode;
+
+	dout("do_event_work completing %p\n", event);
+	event->cb(ver, notify_id, opcode, event->data);
+	dout("do_event_work completed %p\n", event);
+	ceph_osdc_put_event(event);
+	kfree(event_work);
+}
+
+
+/*
+ * Process osd watch notifications
+ */
+static void handle_watch_notify(struct ceph_osd_client *osdc,
+				struct ceph_msg *msg)
+{
+	void *p, *end;
+	u8 proto_ver;
+	u64 cookie, ver, notify_id;
+	u8 opcode;
+	struct ceph_osd_event *event;
+	struct ceph_osd_event_work *event_work;
+
+	p = msg->front.iov_base;
+	end = p + msg->front.iov_len;
+
+	ceph_decode_8_safe(&p, end, proto_ver, bad);
+	ceph_decode_8_safe(&p, end, opcode, bad);
+	ceph_decode_64_safe(&p, end, cookie, bad);
+	ceph_decode_64_safe(&p, end, ver, bad);
+	ceph_decode_64_safe(&p, end, notify_id, bad);
+
+	spin_lock(&osdc->event_lock);
+	event = __find_event(osdc, cookie);
+	if (event) {
+		BUG_ON(event->one_shot);
+		get_event(event);
+	}
+	spin_unlock(&osdc->event_lock);
+	dout("handle_watch_notify cookie %lld ver %lld event %p\n",
+	     cookie, ver, event);
+	if (event) {
+		event_work = kmalloc(sizeof(*event_work), GFP_NOIO);
+		if (!event_work) {
+			dout("ERROR: could not allocate event_work\n");
+			goto done_err;
+		}
+		INIT_WORK(&event_work->work, do_event_work);
+		event_work->event = event;
+		event_work->ver = ver;
+		event_work->notify_id = notify_id;
+		event_work->opcode = opcode;
+		if (!queue_work(osdc->notify_wq, &event_work->work)) {
+			dout("WARNING: failed to queue notify event work\n");
+			goto done_err;
+		}
+	}
+
+	return;
+
+done_err:
+	ceph_osdc_put_event(event);
+	return;
+
+bad:
+	pr_err("osdc handle_watch_notify corrupt msg\n");
+	return;
+}
+
+/*
+ * build new request AND message
+ *
+ */
+void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off,
+				struct ceph_snap_context *snapc, u64 snap_id,
+				struct timespec *mtime)
+{
+	struct ceph_msg *msg = req->r_request;
+	void *p;
+	size_t msg_size;
+	int flags = req->r_flags;
+	u64 data_len;
+	unsigned int i;
+
+	req->r_snapid = snap_id;
+	req->r_snapc = ceph_get_snap_context(snapc);
+
+	/* encode request */
+	msg->hdr.version = cpu_to_le16(4);
+
+	p = msg->front.iov_base;
+	ceph_encode_32(&p, 1);   /* client_inc  is always 1 */
+	req->r_request_osdmap_epoch = p;
+	p += 4;
+	req->r_request_flags = p;
+	p += 4;
+	if (req->r_flags & CEPH_OSD_FLAG_WRITE)
+		ceph_encode_timespec(p, mtime);
+	p += sizeof(struct ceph_timespec);
+	req->r_request_reassert_version = p;
+	p += sizeof(struct ceph_eversion); /* will get filled in */
+
+	/* oloc */
+	ceph_encode_8(&p, 4);
+	ceph_encode_8(&p, 4);
+	ceph_encode_32(&p, 8 + 4 + 4);
+	req->r_request_pool = p;
+	p += 8;
+	ceph_encode_32(&p, -1);  /* preferred */
+	ceph_encode_32(&p, 0);   /* key len */
+
+	ceph_encode_8(&p, 1);
+	req->r_request_pgid = p;
+	p += 8 + 4;
+	ceph_encode_32(&p, -1);  /* preferred */
+
+	/* oid */
+	ceph_encode_32(&p, req->r_base_oid.name_len);
+	memcpy(p, req->r_base_oid.name, req->r_base_oid.name_len);
+	dout("oid '%.*s' len %d\n", req->r_base_oid.name_len,
+	     req->r_base_oid.name, req->r_base_oid.name_len);
+	p += req->r_base_oid.name_len;
+
+	/* ops--can imply data */
+	ceph_encode_16(&p, (u16)req->r_num_ops);
+	data_len = 0;
+	for (i = 0; i < req->r_num_ops; i++) {
+		data_len += osd_req_encode_op(req, p, i);
+		p += sizeof(struct ceph_osd_op);
+	}
+
+	/* snaps */
+	ceph_encode_64(&p, req->r_snapid);
+	ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0);
+	ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0);
+	if (req->r_snapc) {
+		for (i = 0; i < snapc->num_snaps; i++) {
+			ceph_encode_64(&p, req->r_snapc->snaps[i]);
+		}
+	}
+
+	req->r_request_attempts = p;
+	p += 4;
+
+	/* data */
+	if (flags & CEPH_OSD_FLAG_WRITE) {
+		u16 data_off;
+
+		/*
+		 * The header "data_off" is a hint to the receiver
+		 * allowing it to align received data into its
+		 * buffers such that there's no need to re-copy
+		 * it before writing it to disk (direct I/O).
+		 */
+		data_off = (u16) (off & 0xffff);
+		req->r_request->hdr.data_off = cpu_to_le16(data_off);
+	}
+	req->r_request->hdr.data_len = cpu_to_le32(data_len);
+
+	BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
+	msg_size = p - msg->front.iov_base;
+	msg->front.iov_len = msg_size;
+	msg->hdr.front_len = cpu_to_le32(msg_size);
+
+	dout("build_request msg_size was %d\n", (int)msg_size);
+}
+EXPORT_SYMBOL(ceph_osdc_build_request);
+
+/*
+ * Register request, send initial attempt.
+ */
+int ceph_osdc_start_request(struct ceph_osd_client *osdc,
+			    struct ceph_osd_request *req,
+			    bool nofail)
+{
+	int rc;
+
+	down_read(&osdc->map_sem);
+	mutex_lock(&osdc->request_mutex);
+
+	rc = __ceph_osdc_start_request(osdc, req, nofail);
+
+	mutex_unlock(&osdc->request_mutex);
+	up_read(&osdc->map_sem);
+
+	return rc;
+}
+EXPORT_SYMBOL(ceph_osdc_start_request);
+
+/*
+ * wait for a request to complete
+ */
+int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
+			   struct ceph_osd_request *req)
+{
+	int rc;
+
+	rc = wait_for_completion_interruptible(&req->r_completion);
+	if (rc < 0) {
+		mutex_lock(&osdc->request_mutex);
+		__cancel_request(req);
+		__unregister_request(osdc, req);
+		mutex_unlock(&osdc->request_mutex);
+		complete_request(req);
+		dout("wait_request tid %llu canceled/timed out\n", req->r_tid);
+		return rc;
+	}
+
+	dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result);
+	return req->r_result;
+}
+EXPORT_SYMBOL(ceph_osdc_wait_request);
+
+/*
+ * sync - wait for all in-flight requests to flush.  avoid starvation.
+ */
+void ceph_osdc_sync(struct ceph_osd_client *osdc)
+{
+	struct ceph_osd_request *req;
+	u64 last_tid, next_tid = 0;
+
+	mutex_lock(&osdc->request_mutex);
+	last_tid = osdc->last_tid;
+	while (1) {
+		req = __lookup_request_ge(osdc, next_tid);
+		if (!req)
+			break;
+		if (req->r_tid > last_tid)
+			break;
+
+		next_tid = req->r_tid + 1;
+		if ((req->r_flags & CEPH_OSD_FLAG_WRITE) == 0)
+			continue;
+
+		ceph_osdc_get_request(req);
+		mutex_unlock(&osdc->request_mutex);
+		dout("sync waiting on tid %llu (last is %llu)\n",
+		     req->r_tid, last_tid);
+		wait_for_completion(&req->r_safe_completion);
+		mutex_lock(&osdc->request_mutex);
+		ceph_osdc_put_request(req);
+	}
+	mutex_unlock(&osdc->request_mutex);
+	dout("sync done (thru tid %llu)\n", last_tid);
+}
+EXPORT_SYMBOL(ceph_osdc_sync);
+
+/*
+ * Call all pending notify callbacks - for use after a watch is
+ * unregistered, to make sure no more callbacks for it will be invoked
+ */
+extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc)
+{
+	flush_workqueue(osdc->notify_wq);
+}
+EXPORT_SYMBOL(ceph_osdc_flush_notifies);
+
+
+/*
+ * init, shutdown
+ */
+int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
+{
+	int err;
+
+	dout("init\n");
+	osdc->client = client;
+	osdc->osdmap = NULL;
+	init_rwsem(&osdc->map_sem);
+	init_completion(&osdc->map_waiters);
+	osdc->last_requested_map = 0;
+	mutex_init(&osdc->request_mutex);
+	osdc->last_tid = 0;
+	osdc->osds = RB_ROOT;
+	INIT_LIST_HEAD(&osdc->osd_lru);
+	osdc->requests = RB_ROOT;
+	INIT_LIST_HEAD(&osdc->req_lru);
+	INIT_LIST_HEAD(&osdc->req_unsent);
+	INIT_LIST_HEAD(&osdc->req_notarget);
+	INIT_LIST_HEAD(&osdc->req_linger);
+	osdc->num_requests = 0;
+	INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout);
+	INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
+	spin_lock_init(&osdc->event_lock);
+	osdc->event_tree = RB_ROOT;
+	osdc->event_count = 0;
+
+	schedule_delayed_work(&osdc->osds_timeout_work,
+	   round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ));
+
+	err = -ENOMEM;
+	osdc->req_mempool = mempool_create_kmalloc_pool(10,
+					sizeof(struct ceph_osd_request));
+	if (!osdc->req_mempool)
+		goto out;
+
+	err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP,
+				OSD_OP_FRONT_LEN, 10, true,
+				"osd_op");
+	if (err < 0)
+		goto out_mempool;
+	err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY,
+				OSD_OPREPLY_FRONT_LEN, 10, true,
+				"osd_op_reply");
+	if (err < 0)
+		goto out_msgpool;
+
+	err = -ENOMEM;
+	osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify");
+	if (!osdc->notify_wq)
+		goto out_msgpool_reply;
+
+	return 0;
+
+out_msgpool_reply:
+	ceph_msgpool_destroy(&osdc->msgpool_op_reply);
+out_msgpool:
+	ceph_msgpool_destroy(&osdc->msgpool_op);
+out_mempool:
+	mempool_destroy(osdc->req_mempool);
+out:
+	return err;
+}
+
+void ceph_osdc_stop(struct ceph_osd_client *osdc)
+{
+	flush_workqueue(osdc->notify_wq);
+	destroy_workqueue(osdc->notify_wq);
+	cancel_delayed_work_sync(&osdc->timeout_work);
+	cancel_delayed_work_sync(&osdc->osds_timeout_work);
+	if (osdc->osdmap) {
+		ceph_osdmap_destroy(osdc->osdmap);
+		osdc->osdmap = NULL;
+	}
+	remove_all_osds(osdc);
+	mempool_destroy(osdc->req_mempool);
+	ceph_msgpool_destroy(&osdc->msgpool_op);
+	ceph_msgpool_destroy(&osdc->msgpool_op_reply);
+}
+
+/*
+ * Read some contiguous pages.  If we cross a stripe boundary, shorten
+ * *plen.  Return number of bytes read, or error.
+ */
+int ceph_osdc_readpages(struct ceph_osd_client *osdc,
+			struct ceph_vino vino, struct ceph_file_layout *layout,
+			u64 off, u64 *plen,
+			u32 truncate_seq, u64 truncate_size,
+			struct page **pages, int num_pages, int page_align)
+{
+	struct ceph_osd_request *req;
+	int rc = 0;
+
+	dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
+	     vino.snap, off, *plen);
+	req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1,
+				    CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
+				    NULL, truncate_seq, truncate_size,
+				    false);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	/* it may be a short read due to an object boundary */
+
+	osd_req_op_extent_osd_data_pages(req, 0,
+				pages, *plen, page_align, false, false);
+
+	dout("readpages  final extent is %llu~%llu (%llu bytes align %d)\n",
+	     off, *plen, *plen, page_align);
+
+	ceph_osdc_build_request(req, off, NULL, vino.snap, NULL);
+
+	rc = ceph_osdc_start_request(osdc, req, false);
+	if (!rc)
+		rc = ceph_osdc_wait_request(osdc, req);
+
+	ceph_osdc_put_request(req);
+	dout("readpages result %d\n", rc);
+	return rc;
+}
+EXPORT_SYMBOL(ceph_osdc_readpages);
+
+/*
+ * do a synchronous write on N pages
+ */
+int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
+			 struct ceph_file_layout *layout,
+			 struct ceph_snap_context *snapc,
+			 u64 off, u64 len,
+			 u32 truncate_seq, u64 truncate_size,
+			 struct timespec *mtime,
+			 struct page **pages, int num_pages)
+{
+	struct ceph_osd_request *req;
+	int rc = 0;
+	int page_align = off & ~PAGE_MASK;
+
+	BUG_ON(vino.snap != CEPH_NOSNAP);	/* snapshots aren't writeable */
+	req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1,
+				    CEPH_OSD_OP_WRITE,
+				    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+				    snapc, truncate_seq, truncate_size,
+				    true);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	/* it may be a short write due to an object boundary */
+	osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align,
+				false, false);
+	dout("writepages %llu~%llu (%llu bytes)\n", off, len, len);
+
+	ceph_osdc_build_request(req, off, snapc, CEPH_NOSNAP, mtime);
+
+	rc = ceph_osdc_start_request(osdc, req, true);
+	if (!rc)
+		rc = ceph_osdc_wait_request(osdc, req);
+
+	ceph_osdc_put_request(req);
+	if (rc == 0)
+		rc = len;
+	dout("writepages result %d\n", rc);
+	return rc;
+}
+EXPORT_SYMBOL(ceph_osdc_writepages);
+
+int ceph_osdc_setup(void)
+{
+	BUG_ON(ceph_osd_request_cache);
+	ceph_osd_request_cache = kmem_cache_create("ceph_osd_request",
+					sizeof (struct ceph_osd_request),
+					__alignof__(struct ceph_osd_request),
+					0, NULL);
+
+	return ceph_osd_request_cache ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL(ceph_osdc_setup);
+
+void ceph_osdc_cleanup(void)
+{
+	BUG_ON(!ceph_osd_request_cache);
+	kmem_cache_destroy(ceph_osd_request_cache);
+	ceph_osd_request_cache = NULL;
+}
+EXPORT_SYMBOL(ceph_osdc_cleanup);
+
+/*
+ * handle incoming message
+ */
+static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
+{
+	struct ceph_osd *osd = con->private;
+	struct ceph_osd_client *osdc;
+	int type = le16_to_cpu(msg->hdr.type);
+
+	if (!osd)
+		goto out;
+	osdc = osd->o_osdc;
+
+	switch (type) {
+	case CEPH_MSG_OSD_MAP:
+		ceph_osdc_handle_map(osdc, msg);
+		break;
+	case CEPH_MSG_OSD_OPREPLY:
+		handle_reply(osdc, msg, con);
+		break;
+	case CEPH_MSG_WATCH_NOTIFY:
+		handle_watch_notify(osdc, msg);
+		break;
+
+	default:
+		pr_err("received unknown message type %d %s\n", type,
+		       ceph_msg_type_name(type));
+	}
+out:
+	ceph_msg_put(msg);
+}
+
+/*
+ * lookup and return message for incoming reply.  set up reply message
+ * pages.
+ */
+static struct ceph_msg *get_reply(struct ceph_connection *con,
+				  struct ceph_msg_header *hdr,
+				  int *skip)
+{
+	struct ceph_osd *osd = con->private;
+	struct ceph_osd_client *osdc = osd->o_osdc;
+	struct ceph_msg *m;
+	struct ceph_osd_request *req;
+	int front_len = le32_to_cpu(hdr->front_len);
+	int data_len = le32_to_cpu(hdr->data_len);
+	u64 tid;
+
+	tid = le64_to_cpu(hdr->tid);
+	mutex_lock(&osdc->request_mutex);
+	req = __lookup_request(osdc, tid);
+	if (!req) {
+		*skip = 1;
+		m = NULL;
+		dout("get_reply unknown tid %llu from osd%d\n", tid,
+		     osd->o_osd);
+		goto out;
+	}
+
+	if (req->r_reply->con)
+		dout("%s revoking msg %p from old con %p\n", __func__,
+		     req->r_reply, req->r_reply->con);
+	ceph_msg_revoke_incoming(req->r_reply);
+
+	if (front_len > req->r_reply->front_alloc_len) {
+		pr_warning("get_reply front %d > preallocated %d (%u#%llu)\n",
+			   front_len, req->r_reply->front_alloc_len,
+			   (unsigned int)con->peer_name.type,
+			   le64_to_cpu(con->peer_name.num));
+		m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS,
+				 false);
+		if (!m)
+			goto out;
+		ceph_msg_put(req->r_reply);
+		req->r_reply = m;
+	}
+	m = ceph_msg_get(req->r_reply);
+
+	if (data_len > 0) {
+		struct ceph_osd_data *osd_data;
+
+		/*
+		 * XXX This is assuming there is only one op containing
+		 * XXX page data.  Probably OK for reads, but this
+		 * XXX ought to be done more generally.
+		 */
+		osd_data = osd_req_op_extent_osd_data(req, 0);
+		if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
+			if (osd_data->pages &&
+				unlikely(osd_data->length < data_len)) {
+
+				pr_warning("tid %lld reply has %d bytes "
+					"we had only %llu bytes ready\n",
+					tid, data_len, osd_data->length);
+				*skip = 1;
+				ceph_msg_put(m);
+				m = NULL;
+				goto out;
+			}
+		}
+	}
+	*skip = 0;
+	dout("get_reply tid %lld %p\n", tid, m);
+
+out:
+	mutex_unlock(&osdc->request_mutex);
+	return m;
+
+}
+
+static struct ceph_msg *alloc_msg(struct ceph_connection *con,
+				  struct ceph_msg_header *hdr,
+				  int *skip)
+{
+	struct ceph_osd *osd = con->private;
+	int type = le16_to_cpu(hdr->type);
+	int front = le32_to_cpu(hdr->front_len);
+
+	*skip = 0;
+	switch (type) {
+	case CEPH_MSG_OSD_MAP:
+	case CEPH_MSG_WATCH_NOTIFY:
+		return ceph_msg_new(type, front, GFP_NOFS, false);
+	case CEPH_MSG_OSD_OPREPLY:
+		return get_reply(con, hdr, skip);
+	default:
+		pr_info("alloc_msg unexpected msg type %d from osd%d\n", type,
+			osd->o_osd);
+		*skip = 1;
+		return NULL;
+	}
+}
+
+/*
+ * Wrappers to refcount containing ceph_osd struct
+ */
+static struct ceph_connection *get_osd_con(struct ceph_connection *con)
+{
+	struct ceph_osd *osd = con->private;
+	if (get_osd(osd))
+		return con;
+	return NULL;
+}
+
+static void put_osd_con(struct ceph_connection *con)
+{
+	struct ceph_osd *osd = con->private;
+	put_osd(osd);
+}
+
+/*
+ * authentication
+ */
+/*
+ * Note: returned pointer is the address of a structure that's
+ * managed separately.  Caller must *not* attempt to free it.
+ */
+static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
+					int *proto, int force_new)
+{
+	struct ceph_osd *o = con->private;
+	struct ceph_osd_client *osdc = o->o_osdc;
+	struct ceph_auth_client *ac = osdc->client->monc.auth;
+	struct ceph_auth_handshake *auth = &o->o_auth;
+
+	if (force_new && auth->authorizer) {
+		ceph_auth_destroy_authorizer(ac, auth->authorizer);
+		auth->authorizer = NULL;
+	}
+	if (!auth->authorizer) {
+		int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
+						      auth);
+		if (ret)
+			return ERR_PTR(ret);
+	} else {
+		int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
+						     auth);
+		if (ret)
+			return ERR_PTR(ret);
+	}
+	*proto = ac->protocol;
+
+	return auth;
+}
+
+
+static int verify_authorizer_reply(struct ceph_connection *con, int len)
+{
+	struct ceph_osd *o = con->private;
+	struct ceph_osd_client *osdc = o->o_osdc;
+	struct ceph_auth_client *ac = osdc->client->monc.auth;
+
+	return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer, len);
+}
+
+static int invalidate_authorizer(struct ceph_connection *con)
+{
+	struct ceph_osd *o = con->private;
+	struct ceph_osd_client *osdc = o->o_osdc;
+	struct ceph_auth_client *ac = osdc->client->monc.auth;
+
+	ceph_auth_invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);
+	return ceph_monc_validate_auth(&osdc->client->monc);
+}
+
+static const struct ceph_connection_operations osd_con_ops = {
+	.get = get_osd_con,
+	.put = put_osd_con,
+	.dispatch = dispatch,
+	.get_authorizer = get_authorizer,
+	.verify_authorizer_reply = verify_authorizer_reply,
+	.invalidate_authorizer = invalidate_authorizer,
+	.alloc_msg = alloc_msg,
+	.fault = osd_reset,
+};
diff --git a/libceph/osdmap.c b/libceph/osdmap.c
new file mode 100644
index 0000000..8b8a5a2
--- /dev/null
+++ b/libceph/osdmap.c
@@ -0,0 +1,1724 @@
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <asm/div64.h>
+
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/osdmap.h>
+#include <linux/ceph/decode.h>
+#include <linux/crush/hash.h>
+#include <linux/crush/mapper.h>
+
+char *ceph_osdmap_state_str(char *str, int len, int state)
+{
+	if (!len)
+		return str;
+
+	if ((state & CEPH_OSD_EXISTS) && (state & CEPH_OSD_UP))
+		snprintf(str, len, "exists, up");
+	else if (state & CEPH_OSD_EXISTS)
+		snprintf(str, len, "exists");
+	else if (state & CEPH_OSD_UP)
+		snprintf(str, len, "up");
+	else
+		snprintf(str, len, "doesn't exist");
+
+	return str;
+}
+
+/* maps */
+
+static int calc_bits_of(unsigned int t)
+{
+	int b = 0;
+	while (t) {
+		t = t >> 1;
+		b++;
+	}
+	return b;
+}
+
+/*
+ * the foo_mask is the smallest value 2^n-1 that is >= foo.
+ */
+static void calc_pg_masks(struct ceph_pg_pool_info *pi)
+{
+	pi->pg_num_mask = (1 << calc_bits_of(pi->pg_num-1)) - 1;
+	pi->pgp_num_mask = (1 << calc_bits_of(pi->pgp_num-1)) - 1;
+}
+
+/*
+ * decode crush map
+ */
+static int crush_decode_uniform_bucket(void **p, void *end,
+				       struct crush_bucket_uniform *b)
+{
+	dout("crush_decode_uniform_bucket %p to %p\n", *p, end);
+	ceph_decode_need(p, end, (1+b->h.size) * sizeof(u32), bad);
+	b->item_weight = ceph_decode_32(p);
+	return 0;
+bad:
+	return -EINVAL;
+}
+
+static int crush_decode_list_bucket(void **p, void *end,
+				    struct crush_bucket_list *b)
+{
+	int j;
+	dout("crush_decode_list_bucket %p to %p\n", *p, end);
+	b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
+	if (b->item_weights == NULL)
+		return -ENOMEM;
+	b->sum_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
+	if (b->sum_weights == NULL)
+		return -ENOMEM;
+	ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
+	for (j = 0; j < b->h.size; j++) {
+		b->item_weights[j] = ceph_decode_32(p);
+		b->sum_weights[j] = ceph_decode_32(p);
+	}
+	return 0;
+bad:
+	return -EINVAL;
+}
+
+static int crush_decode_tree_bucket(void **p, void *end,
+				    struct crush_bucket_tree *b)
+{
+	int j;
+	dout("crush_decode_tree_bucket %p to %p\n", *p, end);
+	ceph_decode_32_safe(p, end, b->num_nodes, bad);
+	b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS);
+	if (b->node_weights == NULL)
+		return -ENOMEM;
+	ceph_decode_need(p, end, b->num_nodes * sizeof(u32), bad);
+	for (j = 0; j < b->num_nodes; j++)
+		b->node_weights[j] = ceph_decode_32(p);
+	return 0;
+bad:
+	return -EINVAL;
+}
+
+static int crush_decode_straw_bucket(void **p, void *end,
+				     struct crush_bucket_straw *b)
+{
+	int j;
+	dout("crush_decode_straw_bucket %p to %p\n", *p, end);
+	b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
+	if (b->item_weights == NULL)
+		return -ENOMEM;
+	b->straws = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
+	if (b->straws == NULL)
+		return -ENOMEM;
+	ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
+	for (j = 0; j < b->h.size; j++) {
+		b->item_weights[j] = ceph_decode_32(p);
+		b->straws[j] = ceph_decode_32(p);
+	}
+	return 0;
+bad:
+	return -EINVAL;
+}
+
+static int skip_name_map(void **p, void *end)
+{
+        int len;
+        ceph_decode_32_safe(p, end, len ,bad);
+        while (len--) {
+                int strlen;
+                *p += sizeof(u32);
+                ceph_decode_32_safe(p, end, strlen, bad);
+                *p += strlen;
+}
+        return 0;
+bad:
+        return -EINVAL;
+}
+
+static struct crush_map *crush_decode(void *pbyval, void *end)
+{
+	struct crush_map *c;
+	int err = -EINVAL;
+	int i, j;
+	void **p = &pbyval;
+	void *start = pbyval;
+	u32 magic;
+	u32 num_name_maps;
+
+	dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p));
+
+	c = kzalloc(sizeof(*c), GFP_NOFS);
+	if (c == NULL)
+		return ERR_PTR(-ENOMEM);
+
+        /* set tunables to default values */
+        c->choose_local_tries = 2;
+        c->choose_local_fallback_tries = 5;
+        c->choose_total_tries = 19;
+	c->chooseleaf_descend_once = 0;
+
+	ceph_decode_need(p, end, 4*sizeof(u32), bad);
+	magic = ceph_decode_32(p);
+	if (magic != CRUSH_MAGIC) {
+		pr_err("crush_decode magic %x != current %x\n",
+		       (unsigned int)magic, (unsigned int)CRUSH_MAGIC);
+		goto bad;
+	}
+	c->max_buckets = ceph_decode_32(p);
+	c->max_rules = ceph_decode_32(p);
+	c->max_devices = ceph_decode_32(p);
+
+	c->buckets = kcalloc(c->max_buckets, sizeof(*c->buckets), GFP_NOFS);
+	if (c->buckets == NULL)
+		goto badmem;
+	c->rules = kcalloc(c->max_rules, sizeof(*c->rules), GFP_NOFS);
+	if (c->rules == NULL)
+		goto badmem;
+
+	/* buckets */
+	for (i = 0; i < c->max_buckets; i++) {
+		int size = 0;
+		u32 alg;
+		struct crush_bucket *b;
+
+		ceph_decode_32_safe(p, end, alg, bad);
+		if (alg == 0) {
+			c->buckets[i] = NULL;
+			continue;
+		}
+		dout("crush_decode bucket %d off %x %p to %p\n",
+		     i, (int)(*p-start), *p, end);
+
+		switch (alg) {
+		case CRUSH_BUCKET_UNIFORM:
+			size = sizeof(struct crush_bucket_uniform);
+			break;
+		case CRUSH_BUCKET_LIST:
+			size = sizeof(struct crush_bucket_list);
+			break;
+		case CRUSH_BUCKET_TREE:
+			size = sizeof(struct crush_bucket_tree);
+			break;
+		case CRUSH_BUCKET_STRAW:
+			size = sizeof(struct crush_bucket_straw);
+			break;
+		default:
+			err = -EINVAL;
+			goto bad;
+		}
+		BUG_ON(size == 0);
+		b = c->buckets[i] = kzalloc(size, GFP_NOFS);
+		if (b == NULL)
+			goto badmem;
+
+		ceph_decode_need(p, end, 4*sizeof(u32), bad);
+		b->id = ceph_decode_32(p);
+		b->type = ceph_decode_16(p);
+		b->alg = ceph_decode_8(p);
+		b->hash = ceph_decode_8(p);
+		b->weight = ceph_decode_32(p);
+		b->size = ceph_decode_32(p);
+
+		dout("crush_decode bucket size %d off %x %p to %p\n",
+		     b->size, (int)(*p-start), *p, end);
+
+		b->items = kcalloc(b->size, sizeof(__s32), GFP_NOFS);
+		if (b->items == NULL)
+			goto badmem;
+		b->perm = kcalloc(b->size, sizeof(u32), GFP_NOFS);
+		if (b->perm == NULL)
+			goto badmem;
+		b->perm_n = 0;
+
+		ceph_decode_need(p, end, b->size*sizeof(u32), bad);
+		for (j = 0; j < b->size; j++)
+			b->items[j] = ceph_decode_32(p);
+
+		switch (b->alg) {
+		case CRUSH_BUCKET_UNIFORM:
+			err = crush_decode_uniform_bucket(p, end,
+				  (struct crush_bucket_uniform *)b);
+			if (err < 0)
+				goto bad;
+			break;
+		case CRUSH_BUCKET_LIST:
+			err = crush_decode_list_bucket(p, end,
+			       (struct crush_bucket_list *)b);
+			if (err < 0)
+				goto bad;
+			break;
+		case CRUSH_BUCKET_TREE:
+			err = crush_decode_tree_bucket(p, end,
+				(struct crush_bucket_tree *)b);
+			if (err < 0)
+				goto bad;
+			break;
+		case CRUSH_BUCKET_STRAW:
+			err = crush_decode_straw_bucket(p, end,
+				(struct crush_bucket_straw *)b);
+			if (err < 0)
+				goto bad;
+			break;
+		}
+	}
+
+	/* rules */
+	dout("rule vec is %p\n", c->rules);
+	for (i = 0; i < c->max_rules; i++) {
+		u32 yes;
+		struct crush_rule *r;
+
+		ceph_decode_32_safe(p, end, yes, bad);
+		if (!yes) {
+			dout("crush_decode NO rule %d off %x %p to %p\n",
+			     i, (int)(*p-start), *p, end);
+			c->rules[i] = NULL;
+			continue;
+		}
+
+		dout("crush_decode rule %d off %x %p to %p\n",
+		     i, (int)(*p-start), *p, end);
+
+		/* len */
+		ceph_decode_32_safe(p, end, yes, bad);
+#if BITS_PER_LONG == 32
+		err = -EINVAL;
+		if (yes > (ULONG_MAX - sizeof(*r))
+			  / sizeof(struct crush_rule_step))
+			goto bad;
+#endif
+		r = c->rules[i] = kmalloc(sizeof(*r) +
+					  yes*sizeof(struct crush_rule_step),
+					  GFP_NOFS);
+		if (r == NULL)
+			goto badmem;
+		dout(" rule %d is at %p\n", i, r);
+		r->len = yes;
+		ceph_decode_copy_safe(p, end, &r->mask, 4, bad); /* 4 u8's */
+		ceph_decode_need(p, end, r->len*3*sizeof(u32), bad);
+		for (j = 0; j < r->len; j++) {
+			r->steps[j].op = ceph_decode_32(p);
+			r->steps[j].arg1 = ceph_decode_32(p);
+			r->steps[j].arg2 = ceph_decode_32(p);
+		}
+	}
+
+	/* ignore trailing name maps. */
+        for (num_name_maps = 0; num_name_maps < 3; num_name_maps++) {
+                err = skip_name_map(p, end);
+                if (err < 0)
+                        goto done;
+        }
+
+        /* tunables */
+        ceph_decode_need(p, end, 3*sizeof(u32), done);
+        c->choose_local_tries = ceph_decode_32(p);
+        c->choose_local_fallback_tries =  ceph_decode_32(p);
+        c->choose_total_tries = ceph_decode_32(p);
+        dout("crush decode tunable choose_local_tries = %d",
+             c->choose_local_tries);
+        dout("crush decode tunable choose_local_fallback_tries = %d",
+             c->choose_local_fallback_tries);
+        dout("crush decode tunable choose_total_tries = %d",
+             c->choose_total_tries);
+
+	ceph_decode_need(p, end, sizeof(u32), done);
+	c->chooseleaf_descend_once = ceph_decode_32(p);
+	dout("crush decode tunable chooseleaf_descend_once = %d",
+	     c->chooseleaf_descend_once);
+
+done:
+	dout("crush_decode success\n");
+	return c;
+
+badmem:
+	err = -ENOMEM;
+bad:
+	dout("crush_decode fail %d\n", err);
+	crush_destroy(c);
+	return ERR_PTR(err);
+}
+
+/*
+ * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
+ * to a set of osds) and primary_temp (explicit primary setting)
+ */
+static int pgid_cmp(struct ceph_pg l, struct ceph_pg r)
+{
+	if (l.pool < r.pool)
+		return -1;
+	if (l.pool > r.pool)
+		return 1;
+	if (l.seed < r.seed)
+		return -1;
+	if (l.seed > r.seed)
+		return 1;
+	return 0;
+}
+
+static int __insert_pg_mapping(struct ceph_pg_mapping *new,
+			       struct rb_root *root)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct ceph_pg_mapping *pg = NULL;
+	int c;
+
+	dout("__insert_pg_mapping %llx %p\n", *(u64 *)&new->pgid, new);
+	while (*p) {
+		parent = *p;
+		pg = rb_entry(parent, struct ceph_pg_mapping, node);
+		c = pgid_cmp(new->pgid, pg->pgid);
+		if (c < 0)
+			p = &(*p)->rb_left;
+		else if (c > 0)
+			p = &(*p)->rb_right;
+		else
+			return -EEXIST;
+	}
+
+	rb_link_node(&new->node, parent, p);
+	rb_insert_color(&new->node, root);
+	return 0;
+}
+
+static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
+						   struct ceph_pg pgid)
+{
+	struct rb_node *n = root->rb_node;
+	struct ceph_pg_mapping *pg;
+	int c;
+
+	while (n) {
+		pg = rb_entry(n, struct ceph_pg_mapping, node);
+		c = pgid_cmp(pgid, pg->pgid);
+		if (c < 0) {
+			n = n->rb_left;
+		} else if (c > 0) {
+			n = n->rb_right;
+		} else {
+			dout("__lookup_pg_mapping %lld.%x got %p\n",
+			     pgid.pool, pgid.seed, pg);
+			return pg;
+		}
+	}
+	return NULL;
+}
+
+static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid)
+{
+	struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid);
+
+	if (pg) {
+		dout("__remove_pg_mapping %lld.%x %p\n", pgid.pool, pgid.seed,
+		     pg);
+		rb_erase(&pg->node, root);
+		kfree(pg);
+		return 0;
+	}
+	dout("__remove_pg_mapping %lld.%x dne\n", pgid.pool, pgid.seed);
+	return -ENOENT;
+}
+
+/*
+ * rbtree of pg pool info
+ */
+static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct ceph_pg_pool_info *pi = NULL;
+
+	while (*p) {
+		parent = *p;
+		pi = rb_entry(parent, struct ceph_pg_pool_info, node);
+		if (new->id < pi->id)
+			p = &(*p)->rb_left;
+		else if (new->id > pi->id)
+			p = &(*p)->rb_right;
+		else
+			return -EEXIST;
+	}
+
+	rb_link_node(&new->node, parent, p);
+	rb_insert_color(&new->node, root);
+	return 0;
+}
+
+static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id)
+{
+	struct ceph_pg_pool_info *pi;
+	struct rb_node *n = root->rb_node;
+
+	while (n) {
+		pi = rb_entry(n, struct ceph_pg_pool_info, node);
+		if (id < pi->id)
+			n = n->rb_left;
+		else if (id > pi->id)
+			n = n->rb_right;
+		else
+			return pi;
+	}
+	return NULL;
+}
+
+struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map, u64 id)
+{
+	return __lookup_pg_pool(&map->pg_pools, id);
+}
+
+const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id)
+{
+	struct ceph_pg_pool_info *pi;
+
+	if (id == CEPH_NOPOOL)
+		return NULL;
+
+	if (WARN_ON_ONCE(id > (u64) INT_MAX))
+		return NULL;
+
+	pi = __lookup_pg_pool(&map->pg_pools, (int) id);
+
+	return pi ? pi->name : NULL;
+}
+EXPORT_SYMBOL(ceph_pg_pool_name_by_id);
+
+int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
+{
+	struct rb_node *rbp;
+
+	for (rbp = rb_first(&map->pg_pools); rbp; rbp = rb_next(rbp)) {
+		struct ceph_pg_pool_info *pi =
+			rb_entry(rbp, struct ceph_pg_pool_info, node);
+		if (pi->name && strcmp(pi->name, name) == 0)
+			return pi->id;
+	}
+	return -ENOENT;
+}
+EXPORT_SYMBOL(ceph_pg_poolid_by_name);
+
+static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
+{
+	rb_erase(&pi->node, root);
+	kfree(pi->name);
+	kfree(pi);
+}
+
+static int decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
+{
+	u8 ev, cv;
+	unsigned len, num;
+	void *pool_end;
+
+	ceph_decode_need(p, end, 2 + 4, bad);
+	ev = ceph_decode_8(p);  /* encoding version */
+	cv = ceph_decode_8(p); /* compat version */
+	if (ev < 5) {
+		pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv);
+		return -EINVAL;
+	}
+	if (cv > 9) {
+		pr_warning("got v %d cv %d > 9 of ceph_pg_pool\n", ev, cv);
+		return -EINVAL;
+	}
+	len = ceph_decode_32(p);
+	ceph_decode_need(p, end, len, bad);
+	pool_end = *p + len;
+
+	pi->type = ceph_decode_8(p);
+	pi->size = ceph_decode_8(p);
+	pi->crush_ruleset = ceph_decode_8(p);
+	pi->object_hash = ceph_decode_8(p);
+
+	pi->pg_num = ceph_decode_32(p);
+	pi->pgp_num = ceph_decode_32(p);
+
+	*p += 4 + 4;  /* skip lpg* */
+	*p += 4;      /* skip last_change */
+	*p += 8 + 4;  /* skip snap_seq, snap_epoch */
+
+	/* skip snaps */
+	num = ceph_decode_32(p);
+	while (num--) {
+		*p += 8;  /* snapid key */
+		*p += 1 + 1; /* versions */
+		len = ceph_decode_32(p);
+		*p += len;
+	}
+
+	/* skip removed_snaps */
+	num = ceph_decode_32(p);
+	*p += num * (8 + 8);
+
+	*p += 8;  /* skip auid */
+	pi->flags = ceph_decode_64(p);
+	*p += 4;  /* skip crash_replay_interval */
+
+	if (ev >= 7)
+		*p += 1;  /* skip min_size */
+
+	if (ev >= 8)
+		*p += 8 + 8;  /* skip quota_max_* */
+
+	if (ev >= 9) {
+		/* skip tiers */
+		num = ceph_decode_32(p);
+		*p += num * 8;
+
+		*p += 8;  /* skip tier_of */
+		*p += 1;  /* skip cache_mode */
+
+		pi->read_tier = ceph_decode_64(p);
+		pi->write_tier = ceph_decode_64(p);
+	} else {
+		pi->read_tier = -1;
+		pi->write_tier = -1;
+	}
+
+	/* ignore the rest */
+
+	*p = pool_end;
+	calc_pg_masks(pi);
+	return 0;
+
+bad:
+	return -EINVAL;
+}
+
+static int decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
+{
+	struct ceph_pg_pool_info *pi;
+	u32 num, len;
+	u64 pool;
+
+	ceph_decode_32_safe(p, end, num, bad);
+	dout(" %d pool names\n", num);
+	while (num--) {
+		ceph_decode_64_safe(p, end, pool, bad);
+		ceph_decode_32_safe(p, end, len, bad);
+		dout("  pool %llu len %d\n", pool, len);
+		ceph_decode_need(p, end, len, bad);
+		pi = __lookup_pg_pool(&map->pg_pools, pool);
+		if (pi) {
+			char *name = kstrndup(*p, len, GFP_NOFS);
+
+			if (!name)
+				return -ENOMEM;
+			kfree(pi->name);
+			pi->name = name;
+			dout("  name is %s\n", pi->name);
+		}
+		*p += len;
+	}
+	return 0;
+
+bad:
+	return -EINVAL;
+}
+
+/*
+ * osd map
+ */
+void ceph_osdmap_destroy(struct ceph_osdmap *map)
+{
+	dout("osdmap_destroy %p\n", map);
+	if (map->crush)
+		crush_destroy(map->crush);
+	while (!RB_EMPTY_ROOT(&map->pg_temp)) {
+		struct ceph_pg_mapping *pg =
+			rb_entry(rb_first(&map->pg_temp),
+				 struct ceph_pg_mapping, node);
+		rb_erase(&pg->node, &map->pg_temp);
+		kfree(pg);
+	}
+	while (!RB_EMPTY_ROOT(&map->primary_temp)) {
+		struct ceph_pg_mapping *pg =
+			rb_entry(rb_first(&map->primary_temp),
+				 struct ceph_pg_mapping, node);
+		rb_erase(&pg->node, &map->primary_temp);
+		kfree(pg);
+	}
+	while (!RB_EMPTY_ROOT(&map->pg_pools)) {
+		struct ceph_pg_pool_info *pi =
+			rb_entry(rb_first(&map->pg_pools),
+				 struct ceph_pg_pool_info, node);
+		__remove_pg_pool(&map->pg_pools, pi);
+	}
+	kfree(map->osd_state);
+	kfree(map->osd_weight);
+	kfree(map->osd_addr);
+	kfree(map->osd_primary_affinity);
+	kfree(map);
+}
+
+/*
+ * Adjust max_osd value, (re)allocate arrays.
+ *
+ * The new elements are properly initialized.
+ */
+static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
+{
+	u8 *state;
+	u32 *weight;
+	struct ceph_entity_addr *addr;
+	int i;
+
+	state = krealloc(map->osd_state, max*sizeof(*state), GFP_NOFS);
+	weight = krealloc(map->osd_weight, max*sizeof(*weight), GFP_NOFS);
+	addr = krealloc(map->osd_addr, max*sizeof(*addr), GFP_NOFS);
+	if (!state || !weight || !addr) {
+		kfree(state);
+		kfree(weight);
+		kfree(addr);
+
+		return -ENOMEM;
+	}
+
+	for (i = map->max_osd; i < max; i++) {
+		state[i] = 0;
+		weight[i] = CEPH_OSD_OUT;
+		memset(addr + i, 0, sizeof(*addr));
+	}
+
+	map->osd_state = state;
+	map->osd_weight = weight;
+	map->osd_addr = addr;
+
+	if (map->osd_primary_affinity) {
+		u32 *affinity;
+
+		affinity = krealloc(map->osd_primary_affinity,
+				    max*sizeof(*affinity), GFP_NOFS);
+		if (!affinity)
+			return -ENOMEM;
+
+		for (i = map->max_osd; i < max; i++)
+			affinity[i] = CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
+
+		map->osd_primary_affinity = affinity;
+	}
+
+	map->max_osd = max;
+
+	return 0;
+}
+
+#define OSDMAP_WRAPPER_COMPAT_VER	7
+#define OSDMAP_CLIENT_DATA_COMPAT_VER	1
+
+/*
+ * Return 0 or error.  On success, *v is set to 0 for old (v6) osdmaps,
+ * to struct_v of the client_data section for new (v7 and above)
+ * osdmaps.
+ */
+static int get_osdmap_client_data_v(void **p, void *end,
+				    const char *prefix, u8 *v)
+{
+	u8 struct_v;
+
+	ceph_decode_8_safe(p, end, struct_v, e_inval);
+	if (struct_v >= 7) {
+		u8 struct_compat;
+
+		ceph_decode_8_safe(p, end, struct_compat, e_inval);
+		if (struct_compat > OSDMAP_WRAPPER_COMPAT_VER) {
+			pr_warning("got v %d cv %d > %d of %s ceph_osdmap\n",
+				   struct_v, struct_compat,
+				   OSDMAP_WRAPPER_COMPAT_VER, prefix);
+			return -EINVAL;
+		}
+		*p += 4; /* ignore wrapper struct_len */
+
+		ceph_decode_8_safe(p, end, struct_v, e_inval);
+		ceph_decode_8_safe(p, end, struct_compat, e_inval);
+		if (struct_compat > OSDMAP_CLIENT_DATA_COMPAT_VER) {
+			pr_warning("got v %d cv %d > %d of %s ceph_osdmap client data\n",
+				   struct_v, struct_compat,
+				   OSDMAP_CLIENT_DATA_COMPAT_VER, prefix);
+			return -EINVAL;
+		}
+		*p += 4; /* ignore client data struct_len */
+	} else {
+		u16 version;
+
+		*p -= 1;
+		ceph_decode_16_safe(p, end, version, e_inval);
+		if (version < 6) {
+			pr_warning("got v %d < 6 of %s ceph_osdmap\n", version,
+				   prefix);
+			return -EINVAL;
+		}
+
+		/* old osdmap enconding */
+		struct_v = 0;
+	}
+
+	*v = struct_v;
+	return 0;
+
+e_inval:
+	return -EINVAL;
+}
+
+static int __decode_pools(void **p, void *end, struct ceph_osdmap *map,
+			  bool incremental)
+{
+	u32 n;
+
+	ceph_decode_32_safe(p, end, n, e_inval);
+	while (n--) {
+		struct ceph_pg_pool_info *pi;
+		u64 pool;
+		int ret;
+
+		ceph_decode_64_safe(p, end, pool, e_inval);
+
+		pi = __lookup_pg_pool(&map->pg_pools, pool);
+		if (!incremental || !pi) {
+			pi = kzalloc(sizeof(*pi), GFP_NOFS);
+			if (!pi)
+				return -ENOMEM;
+
+			pi->id = pool;
+
+			ret = __insert_pg_pool(&map->pg_pools, pi);
+			if (ret) {
+				kfree(pi);
+				return ret;
+			}
+		}
+
+		ret = decode_pool(p, end, pi);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+
+e_inval:
+	return -EINVAL;
+}
+
+static int decode_pools(void **p, void *end, struct ceph_osdmap *map)
+{
+	return __decode_pools(p, end, map, false);
+}
+
+static int decode_new_pools(void **p, void *end, struct ceph_osdmap *map)
+{
+	return __decode_pools(p, end, map, true);
+}
+
+static int __decode_pg_temp(void **p, void *end, struct ceph_osdmap *map,
+			    bool incremental)
+{
+	u32 n;
+
+	ceph_decode_32_safe(p, end, n, e_inval);
+	while (n--) {
+		struct ceph_pg pgid;
+		u32 len, i;
+		int ret;
+
+		ret = ceph_decode_pgid(p, end, &pgid);
+		if (ret)
+			return ret;
+
+		ceph_decode_32_safe(p, end, len, e_inval);
+
+		ret = __remove_pg_mapping(&map->pg_temp, pgid);
+		BUG_ON(!incremental && ret != -ENOENT);
+
+		if (!incremental || len > 0) {
+			struct ceph_pg_mapping *pg;
+
+			ceph_decode_need(p, end, len*sizeof(u32), e_inval);
+
+			if (len > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
+				return -EINVAL;
+
+			pg = kzalloc(sizeof(*pg) + len*sizeof(u32), GFP_NOFS);
+			if (!pg)
+				return -ENOMEM;
+
+			pg->pgid = pgid;
+			pg->pg_temp.len = len;
+			for (i = 0; i < len; i++)
+				pg->pg_temp.osds[i] = ceph_decode_32(p);
+
+			ret = __insert_pg_mapping(pg, &map->pg_temp);
+			if (ret) {
+				kfree(pg);
+				return ret;
+			}
+		}
+	}
+
+	return 0;
+
+e_inval:
+	return -EINVAL;
+}
+
+static int decode_pg_temp(void **p, void *end, struct ceph_osdmap *map)
+{
+	return __decode_pg_temp(p, end, map, false);
+}
+
+static int decode_new_pg_temp(void **p, void *end, struct ceph_osdmap *map)
+{
+	return __decode_pg_temp(p, end, map, true);
+}
+
+static int __decode_primary_temp(void **p, void *end, struct ceph_osdmap *map,
+				 bool incremental)
+{
+	u32 n;
+
+	ceph_decode_32_safe(p, end, n, e_inval);
+	while (n--) {
+		struct ceph_pg pgid;
+		u32 osd;
+		int ret;
+
+		ret = ceph_decode_pgid(p, end, &pgid);
+		if (ret)
+			return ret;
+
+		ceph_decode_32_safe(p, end, osd, e_inval);
+
+		ret = __remove_pg_mapping(&map->primary_temp, pgid);
+		BUG_ON(!incremental && ret != -ENOENT);
+
+		if (!incremental || osd != (u32)-1) {
+			struct ceph_pg_mapping *pg;
+
+			pg = kzalloc(sizeof(*pg), GFP_NOFS);
+			if (!pg)
+				return -ENOMEM;
+
+			pg->pgid = pgid;
+			pg->primary_temp.osd = osd;
+
+			ret = __insert_pg_mapping(pg, &map->primary_temp);
+			if (ret) {
+				kfree(pg);
+				return ret;
+			}
+		}
+	}
+
+	return 0;
+
+e_inval:
+	return -EINVAL;
+}
+
+static int decode_primary_temp(void **p, void *end, struct ceph_osdmap *map)
+{
+	return __decode_primary_temp(p, end, map, false);
+}
+
+static int decode_new_primary_temp(void **p, void *end,
+				   struct ceph_osdmap *map)
+{
+	return __decode_primary_temp(p, end, map, true);
+}
+
+u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd)
+{
+	BUG_ON(osd >= map->max_osd);
+
+	if (!map->osd_primary_affinity)
+		return CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
+
+	return map->osd_primary_affinity[osd];
+}
+
+static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff)
+{
+	BUG_ON(osd >= map->max_osd);
+
+	if (!map->osd_primary_affinity) {
+		int i;
+
+		map->osd_primary_affinity = kmalloc(map->max_osd*sizeof(u32),
+						    GFP_NOFS);
+		if (!map->osd_primary_affinity)
+			return -ENOMEM;
+
+		for (i = 0; i < map->max_osd; i++)
+			map->osd_primary_affinity[i] =
+			    CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
+	}
+
+	map->osd_primary_affinity[osd] = aff;
+
+	return 0;
+}
+
+static int decode_primary_affinity(void **p, void *end,
+				   struct ceph_osdmap *map)
+{
+	u32 len, i;
+
+	ceph_decode_32_safe(p, end, len, e_inval);
+	if (len == 0) {
+		kfree(map->osd_primary_affinity);
+		map->osd_primary_affinity = NULL;
+		return 0;
+	}
+	if (len != map->max_osd)
+		goto e_inval;
+
+	ceph_decode_need(p, end, map->max_osd*sizeof(u32), e_inval);
+
+	for (i = 0; i < map->max_osd; i++) {
+		int ret;
+
+		ret = set_primary_affinity(map, i, ceph_decode_32(p));
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+
+e_inval:
+	return -EINVAL;
+}
+
+static int decode_new_primary_affinity(void **p, void *end,
+				       struct ceph_osdmap *map)
+{
+	u32 n;
+
+	ceph_decode_32_safe(p, end, n, e_inval);
+	while (n--) {
+		u32 osd, aff;
+		int ret;
+
+		ceph_decode_32_safe(p, end, osd, e_inval);
+		ceph_decode_32_safe(p, end, aff, e_inval);
+
+		ret = set_primary_affinity(map, osd, aff);
+		if (ret)
+			return ret;
+
+		pr_info("osd%d primary-affinity 0x%x\n", osd, aff);
+	}
+
+	return 0;
+
+e_inval:
+	return -EINVAL;
+}
+
+/*
+ * decode a full map.
+ */
+static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
+{
+	u8 struct_v;
+	u32 epoch = 0;
+	void *start = *p;
+	u32 max;
+	u32 len, i;
+	int err;
+
+	dout("%s %p to %p len %d\n", __func__, *p, end, (int)(end - *p));
+
+	err = get_osdmap_client_data_v(p, end, "full", &struct_v);
+	if (err)
+		goto bad;
+
+	/* fsid, epoch, created, modified */
+	ceph_decode_need(p, end, sizeof(map->fsid) + sizeof(u32) +
+			 sizeof(map->created) + sizeof(map->modified), e_inval);
+	ceph_decode_copy(p, &map->fsid, sizeof(map->fsid));
+	epoch = map->epoch = ceph_decode_32(p);
+	ceph_decode_copy(p, &map->created, sizeof(map->created));
+	ceph_decode_copy(p, &map->modified, sizeof(map->modified));
+
+	/* pools */
+	err = decode_pools(p, end, map);
+	if (err)
+		goto bad;
+
+	/* pool_name */
+	err = decode_pool_names(p, end, map);
+	if (err)
+		goto bad;
+
+	ceph_decode_32_safe(p, end, map->pool_max, e_inval);
+
+	ceph_decode_32_safe(p, end, map->flags, e_inval);
+
+	/* max_osd */
+	ceph_decode_32_safe(p, end, max, e_inval);
+
+	/* (re)alloc osd arrays */
+	err = osdmap_set_max_osd(map, max);
+	if (err)
+		goto bad;
+
+	/* osd_state, osd_weight, osd_addrs->client_addr */
+	ceph_decode_need(p, end, 3*sizeof(u32) +
+			 map->max_osd*(1 + sizeof(*map->osd_weight) +
+				       sizeof(*map->osd_addr)), e_inval);
+
+	if (ceph_decode_32(p) != map->max_osd)
+		goto e_inval;
+
+	ceph_decode_copy(p, map->osd_state, map->max_osd);
+
+	if (ceph_decode_32(p) != map->max_osd)
+		goto e_inval;
+
+	for (i = 0; i < map->max_osd; i++)
+		map->osd_weight[i] = ceph_decode_32(p);
+
+	if (ceph_decode_32(p) != map->max_osd)
+		goto e_inval;
+
+	ceph_decode_copy(p, map->osd_addr, map->max_osd*sizeof(*map->osd_addr));
+	for (i = 0; i < map->max_osd; i++)
+		ceph_decode_addr(&map->osd_addr[i]);
+
+	/* pg_temp */
+	err = decode_pg_temp(p, end, map);
+	if (err)
+		goto bad;
+
+	/* primary_temp */
+	if (struct_v >= 1) {
+		err = decode_primary_temp(p, end, map);
+		if (err)
+			goto bad;
+	}
+
+	/* primary_affinity */
+	if (struct_v >= 2) {
+		err = decode_primary_affinity(p, end, map);
+		if (err)
+			goto bad;
+	} else {
+		/* XXX can this happen? */
+		kfree(map->osd_primary_affinity);
+		map->osd_primary_affinity = NULL;
+	}
+
+	/* crush */
+	ceph_decode_32_safe(p, end, len, e_inval);
+	map->crush = crush_decode(*p, min(*p + len, end));
+	if (IS_ERR(map->crush)) {
+		err = PTR_ERR(map->crush);
+		map->crush = NULL;
+		goto bad;
+	}
+	*p += len;
+
+	/* ignore the rest */
+	*p = end;
+
+	dout("full osdmap epoch %d max_osd %d\n", map->epoch, map->max_osd);
+	return 0;
+
+e_inval:
+	err = -EINVAL;
+bad:
+	pr_err("corrupt full osdmap (%d) epoch %d off %d (%p of %p-%p)\n",
+	       err, epoch, (int)(*p - start), *p, start, end);
+	print_hex_dump(KERN_DEBUG, "osdmap: ",
+		       DUMP_PREFIX_OFFSET, 16, 1,
+		       start, end - start, true);
+	return err;
+}
+
+/*
+ * Allocate and decode a full map.
+ */
+struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
+{
+	struct ceph_osdmap *map;
+	int ret;
+
+	map = kzalloc(sizeof(*map), GFP_NOFS);
+	if (!map)
+		return ERR_PTR(-ENOMEM);
+
+	map->pg_temp = RB_ROOT;
+	map->primary_temp = RB_ROOT;
+	mutex_init(&map->crush_scratch_mutex);
+
+	ret = osdmap_decode(p, end, map);
+	if (ret) {
+		ceph_osdmap_destroy(map);
+		return ERR_PTR(ret);
+	}
+
+	return map;
+}
+
+/*
+ * decode and apply an incremental map update.
+ */
+struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
+					     struct ceph_osdmap *map,
+					     struct ceph_messenger *msgr)
+{
+	struct crush_map *newcrush = NULL;
+	struct ceph_fsid fsid;
+	u32 epoch = 0;
+	struct ceph_timespec modified;
+	s32 len;
+	u64 pool;
+	__s64 new_pool_max;
+	__s32 new_flags, max;
+	void *start = *p;
+	int err;
+	u8 struct_v;
+
+	dout("%s %p to %p len %d\n", __func__, *p, end, (int)(end - *p));
+
+	err = get_osdmap_client_data_v(p, end, "inc", &struct_v);
+	if (err)
+		goto bad;
+
+	/* fsid, epoch, modified, new_pool_max, new_flags */
+	ceph_decode_need(p, end, sizeof(fsid) + sizeof(u32) + sizeof(modified) +
+			 sizeof(u64) + sizeof(u32), e_inval);
+	ceph_decode_copy(p, &fsid, sizeof(fsid));
+	epoch = ceph_decode_32(p);
+	BUG_ON(epoch != map->epoch+1);
+	ceph_decode_copy(p, &modified, sizeof(modified));
+	new_pool_max = ceph_decode_64(p);
+	new_flags = ceph_decode_32(p);
+
+	/* full map? */
+	ceph_decode_32_safe(p, end, len, e_inval);
+	if (len > 0) {
+		dout("apply_incremental full map len %d, %p to %p\n",
+		     len, *p, end);
+		return ceph_osdmap_decode(p, min(*p+len, end));
+	}
+
+	/* new crush? */
+	ceph_decode_32_safe(p, end, len, e_inval);
+	if (len > 0) {
+		newcrush = crush_decode(*p, min(*p+len, end));
+		if (IS_ERR(newcrush)) {
+			err = PTR_ERR(newcrush);
+			newcrush = NULL;
+			goto bad;
+		}
+		*p += len;
+	}
+
+	/* new flags? */
+	if (new_flags >= 0)
+		map->flags = new_flags;
+	if (new_pool_max >= 0)
+		map->pool_max = new_pool_max;
+
+	/* new max? */
+	ceph_decode_32_safe(p, end, max, e_inval);
+	if (max >= 0) {
+		err = osdmap_set_max_osd(map, max);
+		if (err)
+			goto bad;
+	}
+
+	map->epoch++;
+	map->modified = modified;
+	if (newcrush) {
+		if (map->crush)
+			crush_destroy(map->crush);
+		map->crush = newcrush;
+		newcrush = NULL;
+	}
+
+	/* new_pools */
+	err = decode_new_pools(p, end, map);
+	if (err)
+		goto bad;
+
+	/* new_pool_names */
+	err = decode_pool_names(p, end, map);
+	if (err)
+		goto bad;
+
+	/* old_pool */
+	ceph_decode_32_safe(p, end, len, e_inval);
+	while (len--) {
+		struct ceph_pg_pool_info *pi;
+
+		ceph_decode_64_safe(p, end, pool, e_inval);
+		pi = __lookup_pg_pool(&map->pg_pools, pool);
+		if (pi)
+			__remove_pg_pool(&map->pg_pools, pi);
+	}
+
+	/* new_up */
+	ceph_decode_32_safe(p, end, len, e_inval);
+	while (len--) {
+		u32 osd;
+		struct ceph_entity_addr addr;
+		ceph_decode_32_safe(p, end, osd, e_inval);
+		ceph_decode_copy_safe(p, end, &addr, sizeof(addr), e_inval);
+		ceph_decode_addr(&addr);
+		pr_info("osd%d up\n", osd);
+		BUG_ON(osd >= map->max_osd);
+		map->osd_state[osd] |= CEPH_OSD_UP;
+		map->osd_addr[osd] = addr;
+	}
+
+	/* new_state */
+	ceph_decode_32_safe(p, end, len, e_inval);
+	while (len--) {
+		u32 osd;
+		u8 xorstate;
+		ceph_decode_32_safe(p, end, osd, e_inval);
+		xorstate = **(u8 **)p;
+		(*p)++;  /* clean flag */
+		if (xorstate == 0)
+			xorstate = CEPH_OSD_UP;
+		if (xorstate & CEPH_OSD_UP)
+			pr_info("osd%d down\n", osd);
+		if (osd < map->max_osd)
+			map->osd_state[osd] ^= xorstate;
+	}
+
+	/* new_weight */
+	ceph_decode_32_safe(p, end, len, e_inval);
+	while (len--) {
+		u32 osd, off;
+		ceph_decode_need(p, end, sizeof(u32)*2, e_inval);
+		osd = ceph_decode_32(p);
+		off = ceph_decode_32(p);
+		pr_info("osd%d weight 0x%x %s\n", osd, off,
+		     off == CEPH_OSD_IN ? "(in)" :
+		     (off == CEPH_OSD_OUT ? "(out)" : ""));
+		if (osd < map->max_osd)
+			map->osd_weight[osd] = off;
+	}
+
+	/* new_pg_temp */
+	err = decode_new_pg_temp(p, end, map);
+	if (err)
+		goto bad;
+
+	/* new_primary_temp */
+	if (struct_v >= 1) {
+		err = decode_new_primary_temp(p, end, map);
+		if (err)
+			goto bad;
+	}
+
+	/* new_primary_affinity */
+	if (struct_v >= 2) {
+		err = decode_new_primary_affinity(p, end, map);
+		if (err)
+			goto bad;
+	}
+
+	/* ignore the rest */
+	*p = end;
+
+	dout("inc osdmap epoch %d max_osd %d\n", map->epoch, map->max_osd);
+	return map;
+
+e_inval:
+	err = -EINVAL;
+bad:
+	pr_err("corrupt inc osdmap (%d) epoch %d off %d (%p of %p-%p)\n",
+	       err, epoch, (int)(*p - start), *p, start, end);
+	print_hex_dump(KERN_DEBUG, "osdmap: ",
+		       DUMP_PREFIX_OFFSET, 16, 1,
+		       start, end - start, true);
+	if (newcrush)
+		crush_destroy(newcrush);
+	return ERR_PTR(err);
+}
+
+
+
+
+/*
+ * calculate file layout from given offset, length.
+ * fill in correct oid, logical length, and object extent
+ * offset, length.
+ *
+ * for now, we write only a single su, until we can
+ * pass a stride back to the caller.
+ */
+int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
+				   u64 off, u64 len,
+				   u64 *ono,
+				   u64 *oxoff, u64 *oxlen)
+{
+	u32 osize = le32_to_cpu(layout->fl_object_size);
+	u32 su = le32_to_cpu(layout->fl_stripe_unit);
+	u32 sc = le32_to_cpu(layout->fl_stripe_count);
+	u32 bl, stripeno, stripepos, objsetno;
+	u32 su_per_object;
+	u64 t, su_offset;
+
+	dout("mapping %llu~%llu  osize %u fl_su %u\n", off, len,
+	     osize, su);
+	if (su == 0 || sc == 0)
+		goto invalid;
+	su_per_object = osize / su;
+	if (su_per_object == 0)
+		goto invalid;
+	dout("osize %u / su %u = su_per_object %u\n", osize, su,
+	     su_per_object);
+
+	if ((su & ~PAGE_MASK) != 0)
+		goto invalid;
+
+	/* bl = *off / su; */
+	t = off;
+	do_div(t, su);
+	bl = t;
+	dout("off %llu / su %u = bl %u\n", off, su, bl);
+
+	stripeno = bl / sc;
+	stripepos = bl % sc;
+	objsetno = stripeno / su_per_object;
+
+	*ono = objsetno * sc + stripepos;
+	dout("objset %u * sc %u = ono %u\n", objsetno, sc, (unsigned int)*ono);
+
+	/* *oxoff = *off % layout->fl_stripe_unit;  # offset in su */
+	t = off;
+	su_offset = do_div(t, su);
+	*oxoff = su_offset + (stripeno % su_per_object) * su;
+
+	/*
+	 * Calculate the length of the extent being written to the selected
+	 * object. This is the minimum of the full length requested (len) or
+	 * the remainder of the current stripe being written to.
+	 */
+	*oxlen = min_t(u64, len, su - su_offset);
+
+	dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
+	return 0;
+
+invalid:
+	dout(" invalid layout\n");
+	*ono = 0;
+	*oxoff = 0;
+	*oxlen = 0;
+	return -EINVAL;
+}
+EXPORT_SYMBOL(ceph_calc_file_object_mapping);
+
+/*
+ * Calculate mapping of a (oloc, oid) pair to a PG.  Should only be
+ * called with target's (oloc, oid), since tiering isn't taken into
+ * account.
+ */
+int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap,
+			struct ceph_object_locator *oloc,
+			struct ceph_object_id *oid,
+			struct ceph_pg *pg_out)
+{
+	struct ceph_pg_pool_info *pi;
+
+	pi = __lookup_pg_pool(&osdmap->pg_pools, oloc->pool);
+	if (!pi)
+		return -EIO;
+
+	pg_out->pool = oloc->pool;
+	pg_out->seed = ceph_str_hash(pi->object_hash, oid->name,
+				     oid->name_len);
+
+	dout("%s '%.*s' pgid %llu.%x\n", __func__, oid->name_len, oid->name,
+	     pg_out->pool, pg_out->seed);
+	return 0;
+}
+EXPORT_SYMBOL(ceph_oloc_oid_to_pg);
+
+static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
+		    int *result, int result_max,
+		    const __u32 *weight, int weight_max)
+{
+	int r;
+
+	BUG_ON(result_max > CEPH_PG_MAX_SIZE);
+
+	mutex_lock(&map->crush_scratch_mutex);
+	r = crush_do_rule(map->crush, ruleno, x, result, result_max,
+			  weight, weight_max, map->crush_scratch_ary);
+	mutex_unlock(&map->crush_scratch_mutex);
+
+	return r;
+}
+
+/*
+ * Calculate raw (crush) set for given pgid.
+ *
+ * Return raw set length, or error.
+ */
+static int pg_to_raw_osds(struct ceph_osdmap *osdmap,
+			  struct ceph_pg_pool_info *pool,
+			  struct ceph_pg pgid, u32 pps, int *osds)
+{
+	int ruleno;
+	int len;
+
+	/* crush */
+	ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset,
+				 pool->type, pool->size);
+	if (ruleno < 0) {
+		pr_err("no crush rule: pool %lld ruleset %d type %d size %d\n",
+		       pgid.pool, pool->crush_ruleset, pool->type,
+		       pool->size);
+		return -ENOENT;
+	}
+
+	len = do_crush(osdmap, ruleno, pps, osds,
+		       min_t(int, pool->size, CEPH_PG_MAX_SIZE),
+		       osdmap->osd_weight, osdmap->max_osd);
+	if (len < 0) {
+		pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n",
+		       len, ruleno, pgid.pool, pool->crush_ruleset,
+		       pool->type, pool->size);
+		return len;
+	}
+
+	return len;
+}
+
+/*
+ * Given raw set, calculate up set and up primary.
+ *
+ * Return up set length.  *primary is set to up primary osd id, or -1
+ * if up set is empty.
+ */
+static int raw_to_up_osds(struct ceph_osdmap *osdmap,
+			  struct ceph_pg_pool_info *pool,
+			  int *osds, int len, int *primary)
+{
+	int up_primary = -1;
+	int i;
+
+	if (ceph_can_shift_osds(pool)) {
+		int removed = 0;
+
+		for (i = 0; i < len; i++) {
+			if (ceph_osd_is_down(osdmap, osds[i])) {
+				removed++;
+				continue;
+			}
+			if (removed)
+				osds[i - removed] = osds[i];
+		}
+
+		len -= removed;
+		if (len > 0)
+			up_primary = osds[0];
+	} else {
+		for (i = len - 1; i >= 0; i--) {
+			if (ceph_osd_is_down(osdmap, osds[i]))
+				osds[i] = CRUSH_ITEM_NONE;
+			else
+				up_primary = osds[i];
+		}
+	}
+
+	*primary = up_primary;
+	return len;
+}
+
+static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps,
+				   struct ceph_pg_pool_info *pool,
+				   int *osds, int len, int *primary)
+{
+	int i;
+	int pos = -1;
+
+	/*
+	 * Do we have any non-default primary_affinity values for these
+	 * osds?
+	 */
+	if (!osdmap->osd_primary_affinity)
+		return;
+
+	for (i = 0; i < len; i++) {
+		int osd = osds[i];
+
+		if (osd != CRUSH_ITEM_NONE &&
+		    osdmap->osd_primary_affinity[osd] !=
+					CEPH_OSD_DEFAULT_PRIMARY_AFFINITY) {
+			break;
+		}
+	}
+	if (i == len)
+		return;
+
+	/*
+	 * Pick the primary.  Feed both the seed (for the pg) and the
+	 * osd into the hash/rng so that a proportional fraction of an
+	 * osd's pgs get rejected as primary.
+	 */
+	for (i = 0; i < len; i++) {
+		int osd = osds[i];
+		u32 aff;
+
+		if (osd == CRUSH_ITEM_NONE)
+			continue;
+
+		aff = osdmap->osd_primary_affinity[osd];
+		if (aff < CEPH_OSD_MAX_PRIMARY_AFFINITY &&
+		    (crush_hash32_2(CRUSH_HASH_RJENKINS1,
+				    pps, osd) >> 16) >= aff) {
+			/*
+			 * We chose not to use this primary.  Note it
+			 * anyway as a fallback in case we don't pick
+			 * anyone else, but keep looking.
+			 */
+			if (pos < 0)
+				pos = i;
+		} else {
+			pos = i;
+			break;
+		}
+	}
+	if (pos < 0)
+		return;
+
+	*primary = osds[pos];
+
+	if (ceph_can_shift_osds(pool) && pos > 0) {
+		/* move the new primary to the front */
+		for (i = pos; i > 0; i--)
+			osds[i] = osds[i - 1];
+		osds[0] = *primary;
+	}
+}
+
+/*
+ * Given up set, apply pg_temp and primary_temp mappings.
+ *
+ * Return acting set length.  *primary is set to acting primary osd id,
+ * or -1 if acting set is empty.
+ */
+static int apply_temps(struct ceph_osdmap *osdmap,
+		       struct ceph_pg_pool_info *pool, struct ceph_pg pgid,
+		       int *osds, int len, int *primary)
+{
+	struct ceph_pg_mapping *pg;
+	int temp_len;
+	int temp_primary;
+	int i;
+
+	/* raw_pg -> pg */
+	pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num,
+				    pool->pg_num_mask);
+
+	/* pg_temp? */
+	pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
+	if (pg) {
+		temp_len = 0;
+		temp_primary = -1;
+
+		for (i = 0; i < pg->pg_temp.len; i++) {
+			if (ceph_osd_is_down(osdmap, pg->pg_temp.osds[i])) {
+				if (ceph_can_shift_osds(pool))
+					continue;
+				else
+					osds[temp_len++] = CRUSH_ITEM_NONE;
+			} else {
+				osds[temp_len++] = pg->pg_temp.osds[i];
+			}
+		}
+
+		/* apply pg_temp's primary */
+		for (i = 0; i < temp_len; i++) {
+			if (osds[i] != CRUSH_ITEM_NONE) {
+				temp_primary = osds[i];
+				break;
+			}
+		}
+	} else {
+		temp_len = len;
+		temp_primary = *primary;
+	}
+
+	/* primary_temp? */
+	pg = __lookup_pg_mapping(&osdmap->primary_temp, pgid);
+	if (pg)
+		temp_primary = pg->primary_temp.osd;
+
+	*primary = temp_primary;
+	return temp_len;
+}
+
+/*
+ * Calculate acting set for given pgid.
+ *
+ * Return acting set length, or error.  *primary is set to acting
+ * primary osd id, or -1 if acting set is empty or on error.
+ */
+int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
+			int *osds, int *primary)
+{
+	struct ceph_pg_pool_info *pool;
+	u32 pps;
+	int len;
+
+	pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool);
+	if (!pool) {
+		*primary = -1;
+		return -ENOENT;
+	}
+
+	if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) {
+		/* hash pool id and seed so that pool PGs do not overlap */
+		pps = crush_hash32_2(CRUSH_HASH_RJENKINS1,
+				     ceph_stable_mod(pgid.seed, pool->pgp_num,
+						     pool->pgp_num_mask),
+				     pgid.pool);
+	} else {
+		/*
+		 * legacy behavior: add ps and pool together.  this is
+		 * not a great approach because the PGs from each pool
+		 * will overlap on top of each other: 0.5 == 1.4 ==
+		 * 2.3 == ...
+		 */
+		pps = ceph_stable_mod(pgid.seed, pool->pgp_num,
+				      pool->pgp_num_mask) +
+			(unsigned)pgid.pool;
+	}
+
+	len = pg_to_raw_osds(osdmap, pool, pgid, pps, osds);
+	if (len < 0) {
+		*primary = -1;
+		return len;
+	}
+
+	len = raw_to_up_osds(osdmap, pool, osds, len, primary);
+
+	apply_primary_affinity(osdmap, pps, pool, osds, len, primary);
+
+	len = apply_temps(osdmap, pool, pgid, osds, len, primary);
+
+	return len;
+}
+
+/*
+ * Return primary osd for given pgid, or -1 if none.
+ */
+int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
+{
+	int osds[CEPH_PG_MAX_SIZE];
+	int primary;
+
+	ceph_calc_pg_acting(osdmap, pgid, osds, &primary);
+
+	return primary;
+}
+EXPORT_SYMBOL(ceph_calc_pg_primary);
diff --git a/libceph/pagelist.c b/libceph/pagelist.c
new file mode 100644
index 0000000..92866be
--- /dev/null
+++ b/libceph/pagelist.c
@@ -0,0 +1,147 @@
+#include <linux/module.h>
+#include <linux/gfp.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+#include <linux/ceph/pagelist.h>
+
+static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
+{
+	if (pl->mapped_tail) {
+		struct page *page = list_entry(pl->head.prev, struct page, lru);
+		kunmap(page);
+		pl->mapped_tail = NULL;
+	}
+}
+
+int ceph_pagelist_release(struct ceph_pagelist *pl)
+{
+	ceph_pagelist_unmap_tail(pl);
+	while (!list_empty(&pl->head)) {
+		struct page *page = list_first_entry(&pl->head, struct page,
+						     lru);
+		list_del(&page->lru);
+		__free_page(page);
+	}
+	ceph_pagelist_free_reserve(pl);
+	return 0;
+}
+EXPORT_SYMBOL(ceph_pagelist_release);
+
+static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
+{
+	struct page *page;
+
+	if (!pl->num_pages_free) {
+		page = __page_cache_alloc(GFP_NOFS);
+	} else {
+		page = list_first_entry(&pl->free_list, struct page, lru);
+		list_del(&page->lru);
+		--pl->num_pages_free;
+	}
+	if (!page)
+		return -ENOMEM;
+	pl->room += PAGE_SIZE;
+	ceph_pagelist_unmap_tail(pl);
+	list_add_tail(&page->lru, &pl->head);
+	pl->mapped_tail = kmap(page);
+	return 0;
+}
+
+int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len)
+{
+	while (pl->room < len) {
+		size_t bit = pl->room;
+		int ret;
+
+		memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK),
+		       buf, bit);
+		pl->length += bit;
+		pl->room -= bit;
+		buf += bit;
+		len -= bit;
+		ret = ceph_pagelist_addpage(pl);
+		if (ret)
+			return ret;
+	}
+
+	memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len);
+	pl->length += len;
+	pl->room -= len;
+	return 0;
+}
+EXPORT_SYMBOL(ceph_pagelist_append);
+
+/* Allocate enough pages for a pagelist to append the given amount
+ * of data without without allocating.
+ * Returns: 0 on success, -ENOMEM on error.
+ */
+int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space)
+{
+	if (space <= pl->room)
+		return 0;
+	space -= pl->room;
+	space = (space + PAGE_SIZE - 1) >> PAGE_SHIFT;   /* conv to num pages */
+
+	while (space > pl->num_pages_free) {
+		struct page *page = __page_cache_alloc(GFP_NOFS);
+		if (!page)
+			return -ENOMEM;
+		list_add_tail(&page->lru, &pl->free_list);
+		++pl->num_pages_free;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ceph_pagelist_reserve);
+
+/* Free any pages that have been preallocated. */
+int ceph_pagelist_free_reserve(struct ceph_pagelist *pl)
+{
+	while (!list_empty(&pl->free_list)) {
+		struct page *page = list_first_entry(&pl->free_list,
+						     struct page, lru);
+		list_del(&page->lru);
+		__free_page(page);
+		--pl->num_pages_free;
+	}
+	BUG_ON(pl->num_pages_free);
+	return 0;
+}
+EXPORT_SYMBOL(ceph_pagelist_free_reserve);
+
+/* Create a truncation point. */
+void ceph_pagelist_set_cursor(struct ceph_pagelist *pl,
+			      struct ceph_pagelist_cursor *c)
+{
+	c->pl = pl;
+	c->page_lru = pl->head.prev;
+	c->room = pl->room;
+}
+EXPORT_SYMBOL(ceph_pagelist_set_cursor);
+
+/* Truncate a pagelist to the given point. Move extra pages to reserve.
+ * This won't sleep.
+ * Returns: 0 on success,
+ *          -EINVAL if the pagelist doesn't match the trunc point pagelist
+ */
+int ceph_pagelist_truncate(struct ceph_pagelist *pl,
+			   struct ceph_pagelist_cursor *c)
+{
+	struct page *page;
+
+	if (pl != c->pl)
+		return -EINVAL;
+	ceph_pagelist_unmap_tail(pl);
+	while (pl->head.prev != c->page_lru) {
+		page = list_entry(pl->head.prev, struct page, lru);
+		/* move from pagelist to reserve */
+		list_move_tail(&page->lru, &pl->free_list);
+		++pl->num_pages_free;
+	}
+	pl->room = c->room;
+	if (!list_empty(&pl->head)) {
+		page = list_entry(pl->head.prev, struct page, lru);
+		pl->mapped_tail = kmap(page);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ceph_pagelist_truncate);
diff --git a/libceph/pagevec.c b/libceph/pagevec.c
new file mode 100644
index 0000000..815a224
--- /dev/null
+++ b/libceph/pagevec.c
@@ -0,0 +1,231 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/writeback.h>
+
+#include <linux/ceph/libceph.h>
+
+/*
+ * build a vector of user pages
+ */
+struct page **ceph_get_direct_page_vector(const void __user *data,
+					  int num_pages, bool write_page)
+{
+	struct page **pages;
+	int got = 0;
+	int rc = 0;
+
+	pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
+	if (!pages)
+		return ERR_PTR(-ENOMEM);
+
+	down_read(&current->mm->mmap_sem);
+	while (got < num_pages) {
+		rc = get_user_pages(current, current->mm,
+		    (unsigned long)data + ((unsigned long)got * PAGE_SIZE),
+		    num_pages - got, write_page, 0, pages + got, NULL);
+		if (rc < 0)
+			break;
+		BUG_ON(rc == 0);
+		got += rc;
+	}
+	up_read(&current->mm->mmap_sem);
+	if (rc < 0)
+		goto fail;
+	return pages;
+
+fail:
+	ceph_put_page_vector(pages, got, false);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL(ceph_get_direct_page_vector);
+
+void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
+{
+	int i;
+
+	for (i = 0; i < num_pages; i++) {
+		if (dirty)
+			set_page_dirty_lock(pages[i]);
+		put_page(pages[i]);
+	}
+	kfree(pages);
+}
+EXPORT_SYMBOL(ceph_put_page_vector);
+
+void ceph_release_page_vector(struct page **pages, int num_pages)
+{
+	int i;
+
+	for (i = 0; i < num_pages; i++)
+		__free_pages(pages[i], 0);
+	kfree(pages);
+}
+EXPORT_SYMBOL(ceph_release_page_vector);
+
+/*
+ * allocate a vector new pages
+ */
+struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
+{
+	struct page **pages;
+	int i;
+
+	pages = kmalloc(sizeof(*pages) * num_pages, flags);
+	if (!pages)
+		return ERR_PTR(-ENOMEM);
+	for (i = 0; i < num_pages; i++) {
+		pages[i] = __page_cache_alloc(flags);
+		if (pages[i] == NULL) {
+			ceph_release_page_vector(pages, i);
+			return ERR_PTR(-ENOMEM);
+		}
+	}
+	return pages;
+}
+EXPORT_SYMBOL(ceph_alloc_page_vector);
+
+/*
+ * copy user data into a page vector
+ */
+int ceph_copy_user_to_page_vector(struct page **pages,
+					 const void __user *data,
+					 loff_t off, size_t len)
+{
+	int i = 0;
+	int po = off & ~PAGE_CACHE_MASK;
+	int left = len;
+	int l, bad;
+
+	while (left > 0) {
+		l = min_t(int, PAGE_CACHE_SIZE-po, left);
+		bad = copy_from_user(page_address(pages[i]) + po, data, l);
+		if (bad == l)
+			return -EFAULT;
+		data += l - bad;
+		left -= l - bad;
+		po += l - bad;
+		if (po == PAGE_CACHE_SIZE) {
+			po = 0;
+			i++;
+		}
+	}
+	return len;
+}
+EXPORT_SYMBOL(ceph_copy_user_to_page_vector);
+
+void ceph_copy_to_page_vector(struct page **pages,
+				    const void *data,
+				    loff_t off, size_t len)
+{
+	int i = 0;
+	size_t po = off & ~PAGE_CACHE_MASK;
+	size_t left = len;
+
+	while (left > 0) {
+		size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+
+		memcpy(page_address(pages[i]) + po, data, l);
+		data += l;
+		left -= l;
+		po += l;
+		if (po == PAGE_CACHE_SIZE) {
+			po = 0;
+			i++;
+		}
+	}
+}
+EXPORT_SYMBOL(ceph_copy_to_page_vector);
+
+void ceph_copy_from_page_vector(struct page **pages,
+				    void *data,
+				    loff_t off, size_t len)
+{
+	int i = 0;
+	size_t po = off & ~PAGE_CACHE_MASK;
+	size_t left = len;
+
+	while (left > 0) {
+		size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+
+		memcpy(data, page_address(pages[i]) + po, l);
+		data += l;
+		left -= l;
+		po += l;
+		if (po == PAGE_CACHE_SIZE) {
+			po = 0;
+			i++;
+		}
+	}
+}
+EXPORT_SYMBOL(ceph_copy_from_page_vector);
+
+/*
+ * copy user data from a page vector into a user pointer
+ */
+int ceph_copy_page_vector_to_user(struct page **pages,
+					 void __user *data,
+					 loff_t off, size_t len)
+{
+	int i = 0;
+	int po = off & ~PAGE_CACHE_MASK;
+	int left = len;
+	int l, bad;
+
+	while (left > 0) {
+		l = min_t(int, left, PAGE_CACHE_SIZE-po);
+		bad = copy_to_user(data, page_address(pages[i]) + po, l);
+		if (bad == l)
+			return -EFAULT;
+		data += l - bad;
+		left -= l - bad;
+		if (po) {
+			po += l - bad;
+			if (po == PAGE_CACHE_SIZE)
+				po = 0;
+		}
+		i++;
+	}
+	return len;
+}
+EXPORT_SYMBOL(ceph_copy_page_vector_to_user);
+
+/*
+ * Zero an extent within a page vector.  Offset is relative to the
+ * start of the first page.
+ */
+void ceph_zero_page_vector_range(int off, int len, struct page **pages)
+{
+	int i = off >> PAGE_CACHE_SHIFT;
+
+	off &= ~PAGE_CACHE_MASK;
+
+	dout("zero_page_vector_page %u~%u\n", off, len);
+
+	/* leading partial page? */
+	if (off) {
+		int end = min((int)PAGE_CACHE_SIZE, off + len);
+		dout("zeroing %d %p head from %d\n", i, pages[i],
+		     (int)off);
+		zero_user_segment(pages[i], off, end);
+		len -= (end - off);
+		i++;
+	}
+	while (len >= PAGE_CACHE_SIZE) {
+		dout("zeroing %d %p len=%d\n", i, pages[i], len);
+		zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
+		len -= PAGE_CACHE_SIZE;
+		i++;
+	}
+	/* trailing partial page? */
+	if (len) {
+		dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len);
+		zero_user_segment(pages[i], 0, len);
+	}
+}
+EXPORT_SYMBOL(ceph_zero_page_vector_range);
+
diff --git a/libceph/snapshot.c b/libceph/snapshot.c
new file mode 100644
index 0000000..154683f
--- /dev/null
+++ b/libceph/snapshot.c
@@ -0,0 +1,78 @@
+/*
+ * snapshot.c    Ceph snapshot context utility routines (part of libceph)
+ *
+ * Copyright (C) 2013 Inktank Storage, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <stddef.h>
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/ceph/libceph.h>
+
+/*
+ * Ceph snapshot contexts are reference counted objects, and the
+ * returned structure holds a single reference.  Acquire additional
+ * references with ceph_get_snap_context(), and release them with
+ * ceph_put_snap_context().  When the reference count reaches zero
+ * the entire structure is freed.
+ */
+
+/*
+ * Create a new ceph snapshot context large enough to hold the
+ * indicated number of snapshot ids (which can be 0).  Caller has
+ * to fill in snapc->seq and snapc->snaps[0..snap_count-1].
+ *
+ * Returns a null pointer if an error occurs.
+ */
+struct ceph_snap_context *ceph_create_snap_context(u32 snap_count,
+						gfp_t gfp_flags)
+{
+	struct ceph_snap_context *snapc;
+	size_t size;
+
+	size = sizeof (struct ceph_snap_context);
+	size += snap_count * sizeof (snapc->snaps[0]);
+	snapc = kzalloc(size, gfp_flags);
+	if (!snapc)
+		return NULL;
+
+	atomic_set(&snapc->nref, 1);
+	snapc->num_snaps = snap_count;
+
+	return snapc;
+}
+EXPORT_SYMBOL(ceph_create_snap_context);
+
+struct ceph_snap_context *ceph_get_snap_context(struct ceph_snap_context *sc)
+{
+	if (sc)
+		atomic_inc(&sc->nref);
+	return sc;
+}
+EXPORT_SYMBOL(ceph_get_snap_context);
+
+void ceph_put_snap_context(struct ceph_snap_context *sc)
+{
+	if (!sc)
+		return;
+	if (atomic_dec_and_test(&sc->nref)) {
+		/*printk(" deleting snap_context %p\n", sc);*/
+		kfree(sc);
+	}
+}
+EXPORT_SYMBOL(ceph_put_snap_context);
diff --git a/linux/ceph/auth.h b/linux/ceph/auth.h
new file mode 100644
index 0000000..5f33868
--- /dev/null
+++ b/linux/ceph/auth.h
@@ -0,0 +1,116 @@
+#ifndef _FS_CEPH_AUTH_H
+#define _FS_CEPH_AUTH_H
+
+#include <linux/ceph/types.h>
+#include <linux/ceph/buffer.h>
+
+/*
+ * Abstract interface for communicating with the authenticate module.
+ * There is some handshake that takes place between us and the monitor
+ * to acquire the necessary keys.  These are used to generate an
+ * 'authorizer' that we use when connecting to a service (mds, osd).
+ */
+
+struct ceph_auth_client;
+struct ceph_authorizer;
+
+struct ceph_auth_handshake {
+	struct ceph_authorizer *authorizer;
+	void *authorizer_buf;
+	size_t authorizer_buf_len;
+	void *authorizer_reply_buf;
+	size_t authorizer_reply_buf_len;
+};
+
+struct ceph_auth_client_ops {
+	const char *name;
+
+	/*
+	 * true if we are authenticated and can connect to
+	 * services.
+	 */
+	int (*is_authenticated)(struct ceph_auth_client *ac);
+
+	/*
+	 * true if we should (re)authenticate, e.g., when our tickets
+	 * are getting old and crusty.
+	 */
+	int (*should_authenticate)(struct ceph_auth_client *ac);
+
+	/*
+	 * build requests and process replies during monitor
+	 * handshake.  if handle_reply returns -EAGAIN, we build
+	 * another request.
+	 */
+	int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end);
+	int (*handle_reply)(struct ceph_auth_client *ac, int result,
+			    void *buf, void *end);
+
+	/*
+	 * Create authorizer for connecting to a service, and verify
+	 * the response to authenticate the service.
+	 */
+	int (*create_authorizer)(struct ceph_auth_client *ac, int peer_type,
+				 struct ceph_auth_handshake *auth);
+	/* ensure that an existing authorizer is up to date */
+	int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type,
+				 struct ceph_auth_handshake *auth);
+	int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
+				       struct ceph_authorizer *a, size_t len);
+	void (*destroy_authorizer)(struct ceph_auth_client *ac,
+				   struct ceph_authorizer *a);
+	void (*invalidate_authorizer)(struct ceph_auth_client *ac,
+				      int peer_type);
+
+	/* reset when we (re)connect to a monitor */
+	void (*reset)(struct ceph_auth_client *ac);
+
+	void (*destroy)(struct ceph_auth_client *ac);
+};
+
+struct ceph_auth_client {
+	u32 protocol;           /* CEPH_AUTH_* */
+	void *private;          /* for use by protocol implementation */
+	const struct ceph_auth_client_ops *ops;  /* null iff protocol==0 */
+
+	bool negotiating;       /* true if negotiating protocol */
+	const char *name;       /* entity name */
+	u64 global_id;          /* our unique id in system */
+	const struct ceph_crypto_key *key;     /* our secret key */
+	unsigned want_keys;     /* which services we want */
+
+	struct mutex mutex;
+};
+
+extern struct ceph_auth_client *ceph_auth_init(const char *name,
+					       const struct ceph_crypto_key *key);
+extern void ceph_auth_destroy(struct ceph_auth_client *ac);
+
+extern void ceph_auth_reset(struct ceph_auth_client *ac);
+
+extern int ceph_auth_build_hello(struct ceph_auth_client *ac,
+				 void *buf, size_t len);
+extern int ceph_handle_auth_reply(struct ceph_auth_client *ac,
+				  void *buf, size_t len,
+				  void *reply_buf, size_t reply_len);
+extern int ceph_entity_name_encode(const char *name, void **p, void *end);
+
+extern int ceph_build_auth(struct ceph_auth_client *ac,
+		    void *msg_buf, size_t msg_len);
+
+extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac);
+extern int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
+				       int peer_type,
+				       struct ceph_auth_handshake *auth);
+extern void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac,
+					 struct ceph_authorizer *a);
+extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
+				       int peer_type,
+				       struct ceph_auth_handshake *a);
+extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
+					     struct ceph_authorizer *a,
+					     size_t len);
+extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
+					    int peer_type);
+
+#endif
diff --git a/linux/ceph/buffer.h b/linux/ceph/buffer.h
new file mode 100644
index 0000000..07ad423
--- /dev/null
+++ b/linux/ceph/buffer.h
@@ -0,0 +1,38 @@
+#ifndef __FS_CEPH_BUFFER_H
+#define __FS_CEPH_BUFFER_H
+
+#include <linux/kref.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/types.h>
+#include <linux/uio.h>
+
+/*
+ * a simple reference counted buffer.
+ *
+ * use kmalloc for small sizes (<= one page), vmalloc for larger
+ * sizes.
+ */
+struct ceph_buffer {
+	struct kref kref;
+	struct kvec vec;
+	size_t alloc_len;
+};
+
+extern struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp);
+extern void ceph_buffer_release(struct kref *kref);
+
+static inline struct ceph_buffer *ceph_buffer_get(struct ceph_buffer *b)
+{
+	kref_get(&b->kref);
+	return b;
+}
+
+static inline void ceph_buffer_put(struct ceph_buffer *b)
+{
+	kref_put(&b->kref, ceph_buffer_release);
+}
+
+extern int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end);
+
+#endif
diff --git a/linux/ceph/ceph_debug.h b/linux/ceph/ceph_debug.h
new file mode 100644
index 0000000..aa2e191
--- /dev/null
+++ b/linux/ceph/ceph_debug.h
@@ -0,0 +1,38 @@
+#ifndef _FS_CEPH_DEBUG_H
+#define _FS_CEPH_DEBUG_H
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#ifdef CONFIG_CEPH_LIB_PRETTYDEBUG
+
+/*
+ * wrap pr_debug to include a filename:lineno prefix on each line.
+ * this incurs some overhead (kernel size and execution time) due to
+ * the extra function call at each call site.
+ */
+
+# if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
+extern const char *ceph_file_part(const char *s, int len);
+#  define dout(fmt, ...)						\
+	pr_debug("%.*s %12.12s:%-4d : " fmt,				\
+		 8 - (int)sizeof(KBUILD_MODNAME), "    ",		\
+		 ceph_file_part(__FILE__, sizeof(__FILE__)),		\
+		 __LINE__, ##__VA_ARGS__)
+# else
+/* faux printk call just to see any compiler warnings. */
+#  define dout(fmt, ...)	do {				\
+		if (0)						\
+			printk(KERN_DEBUG fmt, ##__VA_ARGS__);	\
+	} while (0)
+# endif
+
+#else
+
+/*
+ * or, just wrap pr_debug
+ */
+# define dout(fmt, ...)	pr_debug(" " fmt, ##__VA_ARGS__)
+
+#endif
+
+#endif
diff --git a/linux/ceph/ceph_features.h b/linux/ceph/ceph_features.h
new file mode 100644
index 0000000..d12659c
--- /dev/null
+++ b/linux/ceph/ceph_features.h
@@ -0,0 +1,104 @@
+#ifndef __CEPH_FEATURES
+#define __CEPH_FEATURES
+
+/*
+ * feature bits
+ */
+#define CEPH_FEATURE_UID            (1ULL<<0)
+#define CEPH_FEATURE_NOSRCADDR      (1ULL<<1)
+#define CEPH_FEATURE_MONCLOCKCHECK  (1ULL<<2)
+#define CEPH_FEATURE_FLOCK          (1ULL<<3)
+#define CEPH_FEATURE_SUBSCRIBE2     (1ULL<<4)
+#define CEPH_FEATURE_MONNAMES       (1ULL<<5)
+#define CEPH_FEATURE_RECONNECT_SEQ  (1ULL<<6)
+#define CEPH_FEATURE_DIRLAYOUTHASH  (1ULL<<7)
+#define CEPH_FEATURE_OBJECTLOCATOR  (1ULL<<8)
+#define CEPH_FEATURE_PGID64         (1ULL<<9)
+#define CEPH_FEATURE_INCSUBOSDMAP   (1ULL<<10)
+#define CEPH_FEATURE_PGPOOL3        (1ULL<<11)
+#define CEPH_FEATURE_OSDREPLYMUX    (1ULL<<12)
+#define CEPH_FEATURE_OSDENC         (1ULL<<13)
+#define CEPH_FEATURE_OMAP           (1ULL<<14)
+#define CEPH_FEATURE_MONENC         (1ULL<<15)
+#define CEPH_FEATURE_QUERY_T        (1ULL<<16)
+#define CEPH_FEATURE_INDEP_PG_MAP   (1ULL<<17)
+#define CEPH_FEATURE_CRUSH_TUNABLES (1ULL<<18)
+#define CEPH_FEATURE_CHUNKY_SCRUB   (1ULL<<19)
+#define CEPH_FEATURE_MON_NULLROUTE  (1ULL<<20)
+#define CEPH_FEATURE_MON_GV         (1ULL<<21)
+#define CEPH_FEATURE_BACKFILL_RESERVATION (1ULL<<22)
+#define CEPH_FEATURE_MSG_AUTH	    (1ULL<<23)
+#define CEPH_FEATURE_RECOVERY_RESERVATION (1ULL<<24)
+#define CEPH_FEATURE_CRUSH_TUNABLES2 (1ULL<<25)
+#define CEPH_FEATURE_CREATEPOOLID   (1ULL<<26)
+#define CEPH_FEATURE_REPLY_CREATE_INODE   (1ULL<<27)
+#define CEPH_FEATURE_OSD_HBMSGS     (1ULL<<28)
+#define CEPH_FEATURE_MDSENC         (1ULL<<29)
+#define CEPH_FEATURE_OSDHASHPSPOOL  (1ULL<<30)
+#define CEPH_FEATURE_MON_SINGLE_PAXOS (1ULL<<31)
+#define CEPH_FEATURE_OSD_SNAPMAPPER (1ULL<<32)
+#define CEPH_FEATURE_MON_SCRUB      (1ULL<<33)
+#define CEPH_FEATURE_OSD_PACKED_RECOVERY (1ULL<<34)
+#define CEPH_FEATURE_OSD_CACHEPOOL (1ULL<<35)
+#define CEPH_FEATURE_CRUSH_V2      (1ULL<<36)  /* new indep; SET_* steps */
+#define CEPH_FEATURE_EXPORT_PEER   (1ULL<<37)
+#define CEPH_FEATURE_OSD_ERASURE_CODES (1ULL<<38)
+#define CEPH_FEATURE_OSD_TMAP2OMAP (1ULL<<38)   /* overlap with EC */
+/* The process supports new-style OSDMap encoding. Monitors also use
+   this bit to determine if peers support NAK messages. */
+#define CEPH_FEATURE_OSDMAP_ENC    (1ULL<<39)
+#define CEPH_FEATURE_MDS_INLINE_DATA     (1ULL<<40)
+#define CEPH_FEATURE_CRUSH_TUNABLES3     (1ULL<<41)
+#define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41)  /* overlap w/ tunables3 */
+
+/*
+ * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
+ * vector to evaluate to 64 bit ~0.  To cope, we designate 1ULL << 63
+ * to mean 33 bit ~0, and introduce a helper below to do the
+ * translation.
+ *
+ * This was introduced by ceph.git commit
+ *   9ea02b84104045c2ffd7e7f4e7af512953855ecd v0.58-657-g9ea02b8
+ * and fixed by ceph.git commit
+ *   4255b5c2fb54ae40c53284b3ab700fdfc7e61748 v0.65-263-g4255b5c
+ */
+#define CEPH_FEATURE_RESERVED (1ULL<<63)
+
+static inline u64 ceph_sanitize_features(u64 features)
+{
+	if (features & CEPH_FEATURE_RESERVED) {
+		/* everything through OSD_SNAPMAPPER */
+		return 0x1ffffffffull;
+	} else {
+		return features;
+	}
+}
+
+/*
+ * Features supported.
+ */
+#define CEPH_FEATURES_SUPPORTED_DEFAULT		\
+	(CEPH_FEATURE_NOSRCADDR |		\
+	 CEPH_FEATURE_RECONNECT_SEQ |		\
+	 CEPH_FEATURE_PGID64 |			\
+	 CEPH_FEATURE_PGPOOL3 |			\
+	 CEPH_FEATURE_OSDENC |			\
+	 CEPH_FEATURE_CRUSH_TUNABLES |		\
+	 CEPH_FEATURE_CRUSH_TUNABLES2 |		\
+	 CEPH_FEATURE_REPLY_CREATE_INODE |	\
+	 CEPH_FEATURE_OSDHASHPSPOOL |		\
+	 CEPH_FEATURE_OSD_CACHEPOOL |		\
+	 CEPH_FEATURE_CRUSH_V2 |		\
+	 CEPH_FEATURE_EXPORT_PEER |		\
+	 CEPH_FEATURE_OSDMAP_ENC |		\
+	 CEPH_FEATURE_CRUSH_TUNABLES3 |		\
+	 CEPH_FEATURE_OSD_PRIMARY_AFFINITY)
+
+#define CEPH_FEATURES_REQUIRED_DEFAULT   \
+	(CEPH_FEATURE_NOSRCADDR |	 \
+	 CEPH_FEATURE_RECONNECT_SEQ |	 \
+	 CEPH_FEATURE_PGID64 |		 \
+	 CEPH_FEATURE_PGPOOL3 |		 \
+	 CEPH_FEATURE_OSDENC)
+
+#endif
diff --git a/linux/ceph/ceph_frag.h b/linux/ceph/ceph_frag.h
new file mode 100644
index 0000000..5babb8e
--- /dev/null
+++ b/linux/ceph/ceph_frag.h
@@ -0,0 +1,109 @@
+#ifndef FS_CEPH_FRAG_H
+#define FS_CEPH_FRAG_H
+
+/*
+ * "Frags" are a way to describe a subset of a 32-bit number space,
+ * using a mask and a value to match against that mask.  Any given frag
+ * (subset of the number space) can be partitioned into 2^n sub-frags.
+ *
+ * Frags are encoded into a 32-bit word:
+ *   8 upper bits = "bits"
+ *  24 lower bits = "value"
+ * (We could go to 5+27 bits, but who cares.)
+ *
+ * We use the _most_ significant bits of the 24 bit value.  This makes
+ * values logically sort.
+ *
+ * Unfortunately, because the "bits" field is still in the high bits, we
+ * can't sort encoded frags numerically.  However, it does allow you
+ * to feed encoded frags as values into frag_contains_value.
+ */
+static inline __u32 ceph_frag_make(__u32 b, __u32 v)
+{
+	return (b << 24) |
+		(v & (0xffffffu << (24-b)) & 0xffffffu);
+}
+static inline __u32 ceph_frag_bits(__u32 f)
+{
+	return f >> 24;
+}
+static inline __u32 ceph_frag_value(__u32 f)
+{
+	return f & 0xffffffu;
+}
+static inline __u32 ceph_frag_mask(__u32 f)
+{
+	return (0xffffffu << (24-ceph_frag_bits(f))) & 0xffffffu;
+}
+static inline __u32 ceph_frag_mask_shift(__u32 f)
+{
+	return 24 - ceph_frag_bits(f);
+}
+
+static inline int ceph_frag_contains_value(__u32 f, __u32 v)
+{
+	return (v & ceph_frag_mask(f)) == ceph_frag_value(f);
+}
+static inline int ceph_frag_contains_frag(__u32 f, __u32 sub)
+{
+	/* is sub as specific as us, and contained by us? */
+	return ceph_frag_bits(sub) >= ceph_frag_bits(f) &&
+	       (ceph_frag_value(sub) & ceph_frag_mask(f)) == ceph_frag_value(f);
+}
+
+static inline __u32 ceph_frag_parent(__u32 f)
+{
+	return ceph_frag_make(ceph_frag_bits(f) - 1,
+			 ceph_frag_value(f) & (ceph_frag_mask(f) << 1));
+}
+static inline int ceph_frag_is_left_child(__u32 f)
+{
+	return ceph_frag_bits(f) > 0 &&
+		(ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 0;
+}
+static inline int ceph_frag_is_right_child(__u32 f)
+{
+	return ceph_frag_bits(f) > 0 &&
+		(ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 1;
+}
+static inline __u32 ceph_frag_sibling(__u32 f)
+{
+	return ceph_frag_make(ceph_frag_bits(f),
+		      ceph_frag_value(f) ^ (0x1000000 >> ceph_frag_bits(f)));
+}
+static inline __u32 ceph_frag_left_child(__u32 f)
+{
+	return ceph_frag_make(ceph_frag_bits(f)+1, ceph_frag_value(f));
+}
+static inline __u32 ceph_frag_right_child(__u32 f)
+{
+	return ceph_frag_make(ceph_frag_bits(f)+1,
+	      ceph_frag_value(f) | (0x1000000 >> (1+ceph_frag_bits(f))));
+}
+static inline __u32 ceph_frag_make_child(__u32 f, int by, int i)
+{
+	int newbits = ceph_frag_bits(f) + by;
+	return ceph_frag_make(newbits,
+			 ceph_frag_value(f) | (i << (24 - newbits)));
+}
+static inline int ceph_frag_is_leftmost(__u32 f)
+{
+	return ceph_frag_value(f) == 0;
+}
+static inline int ceph_frag_is_rightmost(__u32 f)
+{
+	return ceph_frag_value(f) == ceph_frag_mask(f);
+}
+static inline __u32 ceph_frag_next(__u32 f)
+{
+	return ceph_frag_make(ceph_frag_bits(f),
+			 ceph_frag_value(f) + (0x1000000 >> ceph_frag_bits(f)));
+}
+
+/*
+ * comparator to sort frags logically, as when traversing the
+ * number space in ascending order...
+ */
+int ceph_frag_compare(__u32 a, __u32 b);
+
+#endif
diff --git a/linux/ceph/ceph_fs.h b/linux/ceph/ceph_fs.h
new file mode 100644
index 0000000..5f6db18
--- /dev/null
+++ b/linux/ceph/ceph_fs.h
@@ -0,0 +1,789 @@
+/*
+ * ceph_fs.h - Ceph constants and data types to share between kernel and
+ * user space.
+ *
+ * Most types in this file are defined as little-endian, and are
+ * primarily intended to describe data structures that pass over the
+ * wire or that are stored on disk.
+ *
+ * LGPL2
+ */
+
+#ifndef CEPH_FS_H
+#define CEPH_FS_H
+
+#include <linux/ceph/msgr.h>
+#include <linux/ceph/rados.h>
+
+/*
+ * subprotocol versions.  when specific messages types or high-level
+ * protocols change, bump the affected components.  we keep rev
+ * internal cluster protocols separately from the public,
+ * client-facing protocol.
+ */
+#define CEPH_OSDC_PROTOCOL   24 /* server/client */
+#define CEPH_MDSC_PROTOCOL   32 /* server/client */
+#define CEPH_MONC_PROTOCOL   15 /* server/client */
+
+
+#define CEPH_INO_ROOT   1
+#define CEPH_INO_CEPH   2       /* hidden .ceph dir */
+#define CEPH_INO_DOTDOT 3	/* used by ceph fuse for parent (..) */
+
+/* arbitrary limit on max # of monitors (cluster of 3 is typical) */
+#define CEPH_MAX_MON   31
+
+/*
+ * ceph_file_layout - describe data layout for a file/inode
+ */
+struct ceph_file_layout {
+	/* file -> object mapping */
+	__le32 fl_stripe_unit;     /* stripe unit, in bytes.  must be multiple
+				      of page size. */
+	__le32 fl_stripe_count;    /* over this many objects */
+	__le32 fl_object_size;     /* until objects are this big, then move to
+				      new objects */
+	__le32 fl_cas_hash;        /* UNUSED.  0 = none; 1 = sha256 */
+
+	/* pg -> disk layout */
+	__le32 fl_object_stripe_unit;  /* UNUSED.  for per-object parity, if any */
+
+	/* object -> pg layout */
+	__le32 fl_unused;       /* unused; used to be preferred primary for pg (-1 for none) */
+	__le32 fl_pg_pool;      /* namespace, crush ruleset, rep level */
+} __attribute__ ((packed));
+
+#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit))
+#define ceph_file_layout_stripe_count(l) \
+	((__s32)le32_to_cpu((l).fl_stripe_count))
+#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size))
+#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
+#define ceph_file_layout_object_su(l) \
+	((__s32)le32_to_cpu((l).fl_object_stripe_unit))
+#define ceph_file_layout_pg_pool(l) \
+	((__s32)le32_to_cpu((l).fl_pg_pool))
+
+static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l)
+{
+	return le32_to_cpu(l->fl_stripe_unit) *
+		le32_to_cpu(l->fl_stripe_count);
+}
+
+/* "period" == bytes before i start on a new set of objects */
+static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l)
+{
+	return le32_to_cpu(l->fl_object_size) *
+		le32_to_cpu(l->fl_stripe_count);
+}
+
+#define CEPH_MIN_STRIPE_UNIT 65536
+
+int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
+
+struct ceph_dir_layout {
+	__u8   dl_dir_hash;   /* see ceph_hash.h for ids */
+	__u8   dl_unused1;
+	__u16  dl_unused2;
+	__u32  dl_unused3;
+} __attribute__ ((packed));
+
+/* crypto algorithms */
+#define CEPH_CRYPTO_NONE 0x0
+#define CEPH_CRYPTO_AES  0x1
+
+#define CEPH_AES_IV "cephsageyudagreg"
+
+/* security/authentication protocols */
+#define CEPH_AUTH_UNKNOWN	0x0
+#define CEPH_AUTH_NONE	 	0x1
+#define CEPH_AUTH_CEPHX	 	0x2
+
+#define CEPH_AUTH_UID_DEFAULT ((__u64) -1)
+
+
+/*********************************************
+ * message layer
+ */
+
+/*
+ * message types
+ */
+
+/* misc */
+#define CEPH_MSG_SHUTDOWN               1
+#define CEPH_MSG_PING                   2
+
+/* client <-> monitor */
+#define CEPH_MSG_MON_MAP                4
+#define CEPH_MSG_MON_GET_MAP            5
+#define CEPH_MSG_STATFS                 13
+#define CEPH_MSG_STATFS_REPLY           14
+#define CEPH_MSG_MON_SUBSCRIBE          15
+#define CEPH_MSG_MON_SUBSCRIBE_ACK      16
+#define CEPH_MSG_AUTH			17
+#define CEPH_MSG_AUTH_REPLY		18
+#define CEPH_MSG_MON_GET_VERSION        19
+#define CEPH_MSG_MON_GET_VERSION_REPLY  20
+
+/* client <-> mds */
+#define CEPH_MSG_MDS_MAP                21
+
+#define CEPH_MSG_CLIENT_SESSION         22
+#define CEPH_MSG_CLIENT_RECONNECT       23
+
+#define CEPH_MSG_CLIENT_REQUEST         24
+#define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
+#define CEPH_MSG_CLIENT_REPLY           26
+#define CEPH_MSG_CLIENT_CAPS            0x310
+#define CEPH_MSG_CLIENT_LEASE           0x311
+#define CEPH_MSG_CLIENT_SNAP            0x312
+#define CEPH_MSG_CLIENT_CAPRELEASE      0x313
+
+/* pool ops */
+#define CEPH_MSG_POOLOP_REPLY           48
+#define CEPH_MSG_POOLOP                 49
+
+
+/* osd */
+#define CEPH_MSG_OSD_MAP                41
+#define CEPH_MSG_OSD_OP                 42
+#define CEPH_MSG_OSD_OPREPLY            43
+#define CEPH_MSG_WATCH_NOTIFY           44
+
+
+/* watch-notify operations */
+enum {
+  WATCH_NOTIFY				= 1, /* notifying watcher */
+  WATCH_NOTIFY_COMPLETE			= 2, /* notifier notified when done */
+};
+
+
+/* pool operations */
+enum {
+  POOL_OP_CREATE			= 0x01,
+  POOL_OP_DELETE			= 0x02,
+  POOL_OP_AUID_CHANGE			= 0x03,
+  POOL_OP_CREATE_SNAP			= 0x11,
+  POOL_OP_DELETE_SNAP			= 0x12,
+  POOL_OP_CREATE_UNMANAGED_SNAP		= 0x21,
+  POOL_OP_DELETE_UNMANAGED_SNAP		= 0x22,
+};
+
+struct ceph_mon_request_header {
+	__le64 have_version;
+	__le16 session_mon;
+	__le64 session_mon_tid;
+} __attribute__ ((packed));
+
+struct ceph_mon_statfs {
+	struct ceph_mon_request_header monhdr;
+	struct ceph_fsid fsid;
+} __attribute__ ((packed));
+
+struct ceph_statfs {
+	__le64 kb, kb_used, kb_avail;
+	__le64 num_objects;
+} __attribute__ ((packed));
+
+struct ceph_mon_statfs_reply {
+	struct ceph_fsid fsid;
+	__le64 version;
+	struct ceph_statfs st;
+} __attribute__ ((packed));
+
+const char *ceph_pool_op_name(int op);
+
+struct ceph_mon_poolop {
+	struct ceph_mon_request_header monhdr;
+	struct ceph_fsid fsid;
+	__le32 pool;
+	__le32 op;
+	__le64 auid;
+	__le64 snapid;
+	__le32 name_len;
+} __attribute__ ((packed));
+
+struct ceph_mon_poolop_reply {
+	struct ceph_mon_request_header monhdr;
+	struct ceph_fsid fsid;
+	__le32 reply_code;
+	__le32 epoch;
+	char has_data;
+	char data[0];
+} __attribute__ ((packed));
+
+struct ceph_mon_unmanaged_snap {
+	__le64 snapid;
+} __attribute__ ((packed));
+
+struct ceph_osd_getmap {
+	struct ceph_mon_request_header monhdr;
+	struct ceph_fsid fsid;
+	__le32 start;
+} __attribute__ ((packed));
+
+struct ceph_mds_getmap {
+	struct ceph_mon_request_header monhdr;
+	struct ceph_fsid fsid;
+} __attribute__ ((packed));
+
+struct ceph_client_mount {
+	struct ceph_mon_request_header monhdr;
+} __attribute__ ((packed));
+
+#define CEPH_SUBSCRIBE_ONETIME    1  /* i want only 1 update after have */
+
+struct ceph_mon_subscribe_item {
+	__le64 have_version;    __le64 have;
+	__u8 onetime;
+} __attribute__ ((packed));
+
+struct ceph_mon_subscribe_ack {
+	__le32 duration;         /* seconds */
+	struct ceph_fsid fsid;
+} __attribute__ ((packed));
+
+/*
+ * mdsmap flags
+ */
+#define CEPH_MDSMAP_DOWN    (1<<0)  /* cluster deliberately down */
+
+/*
+ * mds states
+ *   > 0 -> in
+ *  <= 0 -> out
+ */
+#define CEPH_MDS_STATE_DNE          0  /* down, does not exist. */
+#define CEPH_MDS_STATE_STOPPED     -1  /* down, once existed, but no subtrees.
+					  empty log. */
+#define CEPH_MDS_STATE_BOOT        -4  /* up, boot announcement. */
+#define CEPH_MDS_STATE_STANDBY     -5  /* up, idle.  waiting for assignment. */
+#define CEPH_MDS_STATE_CREATING    -6  /* up, creating MDS instance. */
+#define CEPH_MDS_STATE_STARTING    -7  /* up, starting previously stopped mds */
+#define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */
+#define CEPH_MDS_STATE_REPLAYONCE   -9 /* up, replaying an active node's journal */
+
+#define CEPH_MDS_STATE_REPLAY       8  /* up, replaying journal. */
+#define CEPH_MDS_STATE_RESOLVE      9  /* up, disambiguating distributed
+					  operations (import, rename, etc.) */
+#define CEPH_MDS_STATE_RECONNECT    10 /* up, reconnect to clients */
+#define CEPH_MDS_STATE_REJOIN       11 /* up, rejoining distributed cache */
+#define CEPH_MDS_STATE_CLIENTREPLAY 12 /* up, replaying client operations */
+#define CEPH_MDS_STATE_ACTIVE       13 /* up, active */
+#define CEPH_MDS_STATE_STOPPING     14 /* up, but exporting metadata */
+
+extern const char *ceph_mds_state_name(int s);
+
+
+/*
+ * metadata lock types.
+ *  - these are bitmasks.. we can compose them
+ *  - they also define the lock ordering by the MDS
+ *  - a few of these are internal to the mds
+ */
+#define CEPH_LOCK_DVERSION    1
+#define CEPH_LOCK_DN          2
+#define CEPH_LOCK_ISNAP       16
+#define CEPH_LOCK_IVERSION    32    /* mds internal */
+#define CEPH_LOCK_IFILE       64
+#define CEPH_LOCK_IAUTH       128
+#define CEPH_LOCK_ILINK       256
+#define CEPH_LOCK_IDFT        512   /* dir frag tree */
+#define CEPH_LOCK_INEST       1024  /* mds internal */
+#define CEPH_LOCK_IXATTR      2048
+#define CEPH_LOCK_IFLOCK      4096  /* advisory file locks */
+#define CEPH_LOCK_INO         8192  /* immutable inode bits; not a lock */
+#define CEPH_LOCK_IPOLICY     16384 /* policy lock on dirs. MDS internal */
+
+/* client_session ops */
+enum {
+	CEPH_SESSION_REQUEST_OPEN,
+	CEPH_SESSION_OPEN,
+	CEPH_SESSION_REQUEST_CLOSE,
+	CEPH_SESSION_CLOSE,
+	CEPH_SESSION_REQUEST_RENEWCAPS,
+	CEPH_SESSION_RENEWCAPS,
+	CEPH_SESSION_STALE,
+	CEPH_SESSION_RECALL_STATE,
+	CEPH_SESSION_FLUSHMSG,
+	CEPH_SESSION_FLUSHMSG_ACK,
+};
+
+extern const char *ceph_session_op_name(int op);
+
+struct ceph_mds_session_head {
+	__le32 op;
+	__le64 seq;
+	struct ceph_timespec stamp;
+	__le32 max_caps, max_leases;
+} __attribute__ ((packed));
+
+/* client_request */
+/*
+ * metadata ops.
+ *  & 0x001000 -> write op
+ *  & 0x010000 -> follow symlink (e.g. stat(), not lstat()).
+ &  & 0x100000 -> use weird ino/path trace
+ */
+#define CEPH_MDS_OP_WRITE        0x001000
+enum {
+	CEPH_MDS_OP_LOOKUP     = 0x00100,
+	CEPH_MDS_OP_GETATTR    = 0x00101,
+	CEPH_MDS_OP_LOOKUPHASH = 0x00102,
+	CEPH_MDS_OP_LOOKUPPARENT = 0x00103,
+	CEPH_MDS_OP_LOOKUPINO  = 0x00104,
+	CEPH_MDS_OP_LOOKUPNAME = 0x00105,
+
+	CEPH_MDS_OP_SETXATTR   = 0x01105,
+	CEPH_MDS_OP_RMXATTR    = 0x01106,
+	CEPH_MDS_OP_SETLAYOUT  = 0x01107,
+	CEPH_MDS_OP_SETATTR    = 0x01108,
+	CEPH_MDS_OP_SETFILELOCK= 0x01109,
+	CEPH_MDS_OP_GETFILELOCK= 0x00110,
+	CEPH_MDS_OP_SETDIRLAYOUT=0x0110a,
+
+	CEPH_MDS_OP_MKNOD      = 0x01201,
+	CEPH_MDS_OP_LINK       = 0x01202,
+	CEPH_MDS_OP_UNLINK     = 0x01203,
+	CEPH_MDS_OP_RENAME     = 0x01204,
+	CEPH_MDS_OP_MKDIR      = 0x01220,
+	CEPH_MDS_OP_RMDIR      = 0x01221,
+	CEPH_MDS_OP_SYMLINK    = 0x01222,
+
+	CEPH_MDS_OP_CREATE     = 0x01301,
+	CEPH_MDS_OP_OPEN       = 0x00302,
+	CEPH_MDS_OP_READDIR    = 0x00305,
+
+	CEPH_MDS_OP_LOOKUPSNAP = 0x00400,
+	CEPH_MDS_OP_MKSNAP     = 0x01400,
+	CEPH_MDS_OP_RMSNAP     = 0x01401,
+	CEPH_MDS_OP_LSSNAP     = 0x00402,
+};
+
+extern const char *ceph_mds_op_name(int op);
+
+
+#define CEPH_SETATTR_MODE   1
+#define CEPH_SETATTR_UID    2
+#define CEPH_SETATTR_GID    4
+#define CEPH_SETATTR_MTIME  8
+#define CEPH_SETATTR_ATIME 16
+#define CEPH_SETATTR_SIZE  32
+#define CEPH_SETATTR_CTIME 64
+
+/*
+ * Ceph setxattr request flags.
+ */
+#define CEPH_XATTR_CREATE  (1 << 0)
+#define CEPH_XATTR_REPLACE (1 << 1)
+#define CEPH_XATTR_REMOVE  (1 << 31)
+
+union ceph_mds_request_args {
+	struct {
+		__le32 mask;                 /* CEPH_CAP_* */
+	} __attribute__ ((packed)) getattr;
+	struct {
+		__le32 mode;
+		__le32 uid;
+		__le32 gid;
+		struct ceph_timespec mtime;
+		struct ceph_timespec atime;
+		__le64 size, old_size;       /* old_size needed by truncate */
+		__le32 mask;                 /* CEPH_SETATTR_* */
+	} __attribute__ ((packed)) setattr;
+	struct {
+		__le32 frag;                 /* which dir fragment */
+		__le32 max_entries;          /* how many dentries to grab */
+		__le32 max_bytes;
+	} __attribute__ ((packed)) readdir;
+	struct {
+		__le32 mode;
+		__le32 rdev;
+	} __attribute__ ((packed)) mknod;
+	struct {
+		__le32 mode;
+	} __attribute__ ((packed)) mkdir;
+	struct {
+		__le32 flags;
+		__le32 mode;
+		__le32 stripe_unit;          /* layout for newly created file */
+		__le32 stripe_count;         /* ... */
+		__le32 object_size;
+		__le32 file_replication;
+		__le32 unused;               /* used to be preferred osd */
+	} __attribute__ ((packed)) open;
+	struct {
+		__le32 flags;
+	} __attribute__ ((packed)) setxattr;
+	struct {
+		struct ceph_file_layout layout;
+	} __attribute__ ((packed)) setlayout;
+	struct {
+		__u8 rule; /* currently fcntl or flock */
+		__u8 type; /* shared, exclusive, remove*/
+		__le64 owner; /* owner of the lock */
+		__le64 pid; /* process id requesting the lock */
+		__le64 start; /* initial location to lock */
+		__le64 length; /* num bytes to lock from start */
+		__u8 wait; /* will caller wait for lock to become available? */
+	} __attribute__ ((packed)) filelock_change;
+} __attribute__ ((packed));
+
+#define CEPH_MDS_FLAG_REPLAY        1  /* this is a replayed op */
+#define CEPH_MDS_FLAG_WANT_DENTRY   2  /* want dentry in reply */
+
+struct ceph_mds_request_head {
+	__le64 oldest_client_tid;
+	__le32 mdsmap_epoch;           /* on client */
+	__le32 flags;                  /* CEPH_MDS_FLAG_* */
+	__u8 num_retry, num_fwd;       /* count retry, fwd attempts */
+	__le16 num_releases;           /* # include cap/lease release records */
+	__le32 op;                     /* mds op code */
+	__le32 caller_uid, caller_gid;
+	__le64 ino;                    /* use this ino for openc, mkdir, mknod,
+					  etc. (if replaying) */
+	union ceph_mds_request_args args;
+} __attribute__ ((packed));
+
+/* cap/lease release record */
+struct ceph_mds_request_release {
+	__le64 ino, cap_id;            /* ino and unique cap id */
+	__le32 caps, wanted;           /* new issued, wanted */
+	__le32 seq, issue_seq, mseq;
+	__le32 dname_seq;              /* if releasing a dentry lease, a */
+	__le32 dname_len;              /* string follows. */
+} __attribute__ ((packed));
+
+/* client reply */
+struct ceph_mds_reply_head {
+	__le32 op;
+	__le32 result;
+	__le32 mdsmap_epoch;
+	__u8 safe;                     /* true if committed to disk */
+	__u8 is_dentry, is_target;     /* true if dentry, target inode records
+					  are included with reply */
+} __attribute__ ((packed));
+
+/* one for each node split */
+struct ceph_frag_tree_split {
+	__le32 frag;                   /* this frag splits... */
+	__le32 by;                     /* ...by this many bits */
+} __attribute__ ((packed));
+
+struct ceph_frag_tree_head {
+	__le32 nsplits;                /* num ceph_frag_tree_split records */
+	struct ceph_frag_tree_split splits[];
+} __attribute__ ((packed));
+
+/* capability issue, for bundling with mds reply */
+struct ceph_mds_reply_cap {
+	__le32 caps, wanted;           /* caps issued, wanted */
+	__le64 cap_id;
+	__le32 seq, mseq;
+	__le64 realm;                  /* snap realm */
+	__u8 flags;                    /* CEPH_CAP_FLAG_* */
+} __attribute__ ((packed));
+
+#define CEPH_CAP_FLAG_AUTH	(1 << 0)  /* cap is issued by auth mds */
+#define CEPH_CAP_FLAG_RELEASE	(1 << 1)  /* release the cap */
+
+/* inode record, for bundling with mds reply */
+struct ceph_mds_reply_inode {
+	__le64 ino;
+	__le64 snapid;
+	__le32 rdev;
+	__le64 version;                /* inode version */
+	__le64 xattr_version;          /* version for xattr blob */
+	struct ceph_mds_reply_cap cap; /* caps issued for this inode */
+	struct ceph_file_layout layout;
+	struct ceph_timespec ctime, mtime, atime;
+	__le32 time_warp_seq;
+	__le64 size, max_size, truncate_size;
+	__le32 truncate_seq;
+	__le32 mode, uid, gid;
+	__le32 nlink;
+	__le64 files, subdirs, rbytes, rfiles, rsubdirs;  /* dir stats */
+	struct ceph_timespec rctime;
+	struct ceph_frag_tree_head fragtree;  /* (must be at end of struct) */
+} __attribute__ ((packed));
+/* followed by frag array, symlink string, dir layout, xattr blob */
+
+/* reply_lease follows dname, and reply_inode */
+struct ceph_mds_reply_lease {
+	__le16 mask;            /* lease type(s) */
+	__le32 duration_ms;     /* lease duration */
+	__le32 seq;
+} __attribute__ ((packed));
+
+struct ceph_mds_reply_dirfrag {
+	__le32 frag;            /* fragment */
+	__le32 auth;            /* auth mds, if this is a delegation point */
+	__le32 ndist;           /* number of mds' this is replicated on */
+	__le32 dist[];
+} __attribute__ ((packed));
+
+#define CEPH_LOCK_FCNTL    1
+#define CEPH_LOCK_FLOCK    2
+
+#define CEPH_LOCK_SHARED   1
+#define CEPH_LOCK_EXCL     2
+#define CEPH_LOCK_UNLOCK   4
+
+struct ceph_filelock {
+	__le64 start;/* file offset to start lock at */
+	__le64 length; /* num bytes to lock; 0 for all following start */
+	__le64 client; /* which client holds the lock */
+	__le64 owner; /* owner the lock */
+	__le64 pid; /* process id holding the lock on the client */
+	__u8 type; /* shared lock, exclusive lock, or unlock */
+} __attribute__ ((packed));
+
+
+/* file access modes */
+#define CEPH_FILE_MODE_PIN        0
+#define CEPH_FILE_MODE_RD         1
+#define CEPH_FILE_MODE_WR         2
+#define CEPH_FILE_MODE_RDWR       3  /* RD | WR */
+#define CEPH_FILE_MODE_LAZY       4  /* lazy io */
+#define CEPH_FILE_MODE_NUM        8  /* bc these are bit fields.. mostly */
+
+int ceph_flags_to_mode(int flags);
+
+
+/* capability bits */
+#define CEPH_CAP_PIN         1  /* no specific capabilities beyond the pin */
+
+/* generic cap bits */
+#define CEPH_CAP_GSHARED     1  /* client can reads */
+#define CEPH_CAP_GEXCL       2  /* client can read and update */
+#define CEPH_CAP_GCACHE      4  /* (file) client can cache reads */
+#define CEPH_CAP_GRD         8  /* (file) client can read */
+#define CEPH_CAP_GWR        16  /* (file) client can write */
+#define CEPH_CAP_GBUFFER    32  /* (file) client can buffer writes */
+#define CEPH_CAP_GWREXTEND  64  /* (file) client can extend EOF */
+#define CEPH_CAP_GLAZYIO   128  /* (file) client can perform lazy io */
+
+#define CEPH_CAP_SIMPLE_BITS  2
+#define CEPH_CAP_FILE_BITS    8
+
+/* per-lock shift */
+#define CEPH_CAP_SAUTH      2
+#define CEPH_CAP_SLINK      4
+#define CEPH_CAP_SXATTR     6
+#define CEPH_CAP_SFILE      8
+#define CEPH_CAP_SFLOCK    20
+
+#define CEPH_CAP_BITS      22
+
+/* composed values */
+#define CEPH_CAP_AUTH_SHARED  (CEPH_CAP_GSHARED  << CEPH_CAP_SAUTH)
+#define CEPH_CAP_AUTH_EXCL     (CEPH_CAP_GEXCL     << CEPH_CAP_SAUTH)
+#define CEPH_CAP_LINK_SHARED  (CEPH_CAP_GSHARED  << CEPH_CAP_SLINK)
+#define CEPH_CAP_LINK_EXCL     (CEPH_CAP_GEXCL     << CEPH_CAP_SLINK)
+#define CEPH_CAP_XATTR_SHARED (CEPH_CAP_GSHARED  << CEPH_CAP_SXATTR)
+#define CEPH_CAP_XATTR_EXCL    (CEPH_CAP_GEXCL     << CEPH_CAP_SXATTR)
+#define CEPH_CAP_FILE(x)    (x << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_SHARED   (CEPH_CAP_GSHARED   << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_EXCL     (CEPH_CAP_GEXCL     << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_CACHE    (CEPH_CAP_GCACHE    << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_RD       (CEPH_CAP_GRD       << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_WR       (CEPH_CAP_GWR       << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_BUFFER   (CEPH_CAP_GBUFFER   << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_LAZYIO   (CEPH_CAP_GLAZYIO   << CEPH_CAP_SFILE)
+#define CEPH_CAP_FLOCK_SHARED  (CEPH_CAP_GSHARED   << CEPH_CAP_SFLOCK)
+#define CEPH_CAP_FLOCK_EXCL    (CEPH_CAP_GEXCL     << CEPH_CAP_SFLOCK)
+
+
+/* cap masks (for getattr) */
+#define CEPH_STAT_CAP_INODE    CEPH_CAP_PIN
+#define CEPH_STAT_CAP_TYPE     CEPH_CAP_PIN  /* mode >> 12 */
+#define CEPH_STAT_CAP_SYMLINK  CEPH_CAP_PIN
+#define CEPH_STAT_CAP_UID      CEPH_CAP_AUTH_SHARED
+#define CEPH_STAT_CAP_GID      CEPH_CAP_AUTH_SHARED
+#define CEPH_STAT_CAP_MODE     CEPH_CAP_AUTH_SHARED
+#define CEPH_STAT_CAP_NLINK    CEPH_CAP_LINK_SHARED
+#define CEPH_STAT_CAP_LAYOUT   CEPH_CAP_FILE_SHARED
+#define CEPH_STAT_CAP_MTIME    CEPH_CAP_FILE_SHARED
+#define CEPH_STAT_CAP_SIZE     CEPH_CAP_FILE_SHARED
+#define CEPH_STAT_CAP_ATIME    CEPH_CAP_FILE_SHARED  /* fixme */
+#define CEPH_STAT_CAP_XATTR    CEPH_CAP_XATTR_SHARED
+#define CEPH_STAT_CAP_INODE_ALL (CEPH_CAP_PIN |			\
+				 CEPH_CAP_AUTH_SHARED |	\
+				 CEPH_CAP_LINK_SHARED |	\
+				 CEPH_CAP_FILE_SHARED |	\
+				 CEPH_CAP_XATTR_SHARED)
+
+#define CEPH_CAP_ANY_SHARED (CEPH_CAP_AUTH_SHARED |			\
+			      CEPH_CAP_LINK_SHARED |			\
+			      CEPH_CAP_XATTR_SHARED |			\
+			      CEPH_CAP_FILE_SHARED)
+#define CEPH_CAP_ANY_RD   (CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_RD |	\
+			   CEPH_CAP_FILE_CACHE)
+
+#define CEPH_CAP_ANY_EXCL (CEPH_CAP_AUTH_EXCL |		\
+			   CEPH_CAP_LINK_EXCL |		\
+			   CEPH_CAP_XATTR_EXCL |	\
+			   CEPH_CAP_FILE_EXCL)
+#define CEPH_CAP_ANY_FILE_WR (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER |	\
+			      CEPH_CAP_FILE_EXCL)
+#define CEPH_CAP_ANY_WR   (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR)
+#define CEPH_CAP_ANY      (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \
+			   CEPH_CAP_ANY_FILE_WR | CEPH_CAP_FILE_LAZYIO | \
+			   CEPH_CAP_PIN)
+
+#define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \
+			CEPH_LOCK_IXATTR)
+
+int ceph_caps_for_mode(int mode);
+
+enum {
+	CEPH_CAP_OP_GRANT,         /* mds->client grant */
+	CEPH_CAP_OP_REVOKE,        /* mds->client revoke */
+	CEPH_CAP_OP_TRUNC,         /* mds->client trunc notify */
+	CEPH_CAP_OP_EXPORT,        /* mds has exported the cap */
+	CEPH_CAP_OP_IMPORT,        /* mds has imported the cap */
+	CEPH_CAP_OP_UPDATE,        /* client->mds update */
+	CEPH_CAP_OP_DROP,          /* client->mds drop cap bits */
+	CEPH_CAP_OP_FLUSH,         /* client->mds cap writeback */
+	CEPH_CAP_OP_FLUSH_ACK,     /* mds->client flushed */
+	CEPH_CAP_OP_FLUSHSNAP,     /* client->mds flush snapped metadata */
+	CEPH_CAP_OP_FLUSHSNAP_ACK, /* mds->client flushed snapped metadata */
+	CEPH_CAP_OP_RELEASE,       /* client->mds release (clean) cap */
+	CEPH_CAP_OP_RENEW,         /* client->mds renewal request */
+};
+
+extern const char *ceph_cap_op_name(int op);
+
+/*
+ * caps message, used for capability callbacks, acks, requests, etc.
+ */
+struct ceph_mds_caps {
+	__le32 op;                  /* CEPH_CAP_OP_* */
+	__le64 ino, realm;
+	__le64 cap_id;
+	__le32 seq, issue_seq;
+	__le32 caps, wanted, dirty; /* latest issued/wanted/dirty */
+	__le32 migrate_seq;
+	__le64 snap_follows;
+	__le32 snap_trace_len;
+
+	/* authlock */
+	__le32 uid, gid, mode;
+
+	/* linklock */
+	__le32 nlink;
+
+	/* xattrlock */
+	__le32 xattr_len;
+	__le64 xattr_version;
+
+	/* filelock */
+	__le64 size, max_size, truncate_size;
+	__le32 truncate_seq;
+	struct ceph_timespec mtime, atime, ctime;
+	struct ceph_file_layout layout;
+	__le32 time_warp_seq;
+} __attribute__ ((packed));
+
+struct ceph_mds_cap_peer {
+	__le64 cap_id;
+	__le32 seq;
+	__le32 mseq;
+	__le32 mds;
+	__u8   flags;
+} __attribute__ ((packed));
+
+/* cap release msg head */
+struct ceph_mds_cap_release {
+	__le32 num;                /* number of cap_items that follow */
+} __attribute__ ((packed));
+
+struct ceph_mds_cap_item {
+	__le64 ino;
+	__le64 cap_id;
+	__le32 migrate_seq, seq;
+} __attribute__ ((packed));
+
+#define CEPH_MDS_LEASE_REVOKE           1  /*    mds  -> client */
+#define CEPH_MDS_LEASE_RELEASE          2  /* client  -> mds    */
+#define CEPH_MDS_LEASE_RENEW            3  /* client <-> mds    */
+#define CEPH_MDS_LEASE_REVOKE_ACK       4  /* client  -> mds    */
+
+extern const char *ceph_lease_op_name(int o);
+
+/* lease msg header */
+struct ceph_mds_lease {
+	__u8 action;            /* CEPH_MDS_LEASE_* */
+	__le16 mask;            /* which lease */
+	__le64 ino;
+	__le64 first, last;     /* snap range */
+	__le32 seq;
+	__le32 duration_ms;     /* duration of renewal */
+} __attribute__ ((packed));
+/* followed by a __le32+string for dname */
+
+/* client reconnect */
+struct ceph_mds_cap_reconnect {
+	__le64 cap_id;
+	__le32 wanted;
+	__le32 issued;
+	__le64 snaprealm;
+	__le64 pathbase;        /* base ino for our path to this ino */
+	__le32 flock_len;       /* size of flock state blob, if any */
+} __attribute__ ((packed));
+/* followed by flock blob */
+
+struct ceph_mds_cap_reconnect_v1 {
+	__le64 cap_id;
+	__le32 wanted;
+	__le32 issued;
+	__le64 size;
+	struct ceph_timespec mtime, atime;
+	__le64 snaprealm;
+	__le64 pathbase;        /* base ino for our path to this ino */
+} __attribute__ ((packed));
+
+struct ceph_mds_snaprealm_reconnect {
+	__le64 ino;     /* snap realm base */
+	__le64 seq;     /* snap seq for this snap realm */
+	__le64 parent;  /* parent realm */
+} __attribute__ ((packed));
+
+/*
+ * snaps
+ */
+enum {
+	CEPH_SNAP_OP_UPDATE,  /* CREATE or DESTROY */
+	CEPH_SNAP_OP_CREATE,
+	CEPH_SNAP_OP_DESTROY,
+	CEPH_SNAP_OP_SPLIT,
+};
+
+extern const char *ceph_snap_op_name(int o);
+
+/* snap msg header */
+struct ceph_mds_snap_head {
+	__le32 op;                /* CEPH_SNAP_OP_* */
+	__le64 split;             /* ino to split off, if any */
+	__le32 num_split_inos;    /* # inos belonging to new child realm */
+	__le32 num_split_realms;  /* # child realms udner new child realm */
+	__le32 trace_len;         /* size of snap trace blob */
+} __attribute__ ((packed));
+/* followed by split ino list, then split realms, then the trace blob */
+
+/*
+ * encode info about a snaprealm, as viewed by a client
+ */
+struct ceph_mds_snap_realm {
+	__le64 ino;           /* ino */
+	__le64 created;       /* snap: when created */
+	__le64 parent;        /* ino: parent realm */
+	__le64 parent_since;  /* snap: same parent since */
+	__le64 seq;           /* snap: version */
+	__le32 num_snaps;
+	__le32 num_prior_parent_snaps;
+} __attribute__ ((packed));
+/* followed by my snap list, then prior parent snap list */
+
+#endif
diff --git a/linux/ceph/ceph_hash.h b/linux/ceph/ceph_hash.h
new file mode 100644
index 0000000..d099c3f
--- /dev/null
+++ b/linux/ceph/ceph_hash.h
@@ -0,0 +1,13 @@
+#ifndef FS_CEPH_HASH_H
+#define FS_CEPH_HASH_H
+
+#define CEPH_STR_HASH_LINUX      0x1  /* linux dcache hash */
+#define CEPH_STR_HASH_RJENKINS   0x2  /* robert jenkins' */
+
+extern unsigned ceph_str_hash_linux(const char *s, unsigned len);
+extern unsigned ceph_str_hash_rjenkins(const char *s, unsigned len);
+
+extern unsigned ceph_str_hash(int type, const char *s, unsigned len);
+extern const char *ceph_str_hash_name(int type);
+
+#endif
diff --git a/linux/ceph/debugfs.h b/linux/ceph/debugfs.h
new file mode 100644
index 0000000..1df086d
--- /dev/null
+++ b/linux/ceph/debugfs.h
@@ -0,0 +1,33 @@
+#ifndef _FS_CEPH_DEBUGFS_H
+#define _FS_CEPH_DEBUGFS_H
+
+#include <linux/ceph/ceph_debug.h>
+#include <linux/ceph/types.h>
+
+#define CEPH_DEFINE_SHOW_FUNC(name)					\
+static int name##_open(struct inode *inode, struct file *file)		\
+{									\
+	struct seq_file *sf;						\
+	int ret;							\
+									\
+	ret = single_open(file, name, NULL);				\
+	sf = file->private_data;					\
+	sf->private = inode->i_private;					\
+	return ret;							\
+}									\
+									\
+static const struct file_operations name##_fops = {			\
+	.open		= name##_open,					\
+	.read		= seq_read,					\
+	.llseek		= seq_lseek,					\
+	.release	= single_release,				\
+};
+
+/* debugfs.c */
+extern int ceph_debugfs_init(void);
+extern void ceph_debugfs_cleanup(void);
+extern int ceph_debugfs_client_init(struct ceph_client *client);
+extern void ceph_debugfs_client_cleanup(struct ceph_client *client);
+
+#endif
+
diff --git a/linux/ceph/decode.h b/linux/ceph/decode.h
new file mode 100644
index 0000000..a6ef9cc
--- /dev/null
+++ b/linux/ceph/decode.h
@@ -0,0 +1,259 @@
+#ifndef __CEPH_DECODE_H
+#define __CEPH_DECODE_H
+
+#include <linux/err.h>
+#include <linux/bug.h>
+#include <linux/time.h>
+#include <asm/unaligned.h>
+
+#include <linux/ceph/types.h>
+
+/*
+ * in all cases,
+ *   void **p     pointer to position pointer
+ *   void *end    pointer to end of buffer (last byte + 1)
+ */
+
+static inline u64 ceph_decode_64(void **p)
+{
+	u64 v = get_unaligned_le64(*p);
+	*p += sizeof(u64);
+	return v;
+}
+static inline u32 ceph_decode_32(void **p)
+{
+	u32 v = get_unaligned_le32(*p);
+	*p += sizeof(u32);
+	return v;
+}
+static inline u16 ceph_decode_16(void **p)
+{
+	u16 v = get_unaligned_le16(*p);
+	*p += sizeof(u16);
+	return v;
+}
+static inline u8 ceph_decode_8(void **p)
+{
+	u8 v = *(u8 *)*p;
+	(*p)++;
+	return v;
+}
+static inline void ceph_decode_copy(void **p, void *pv, size_t n)
+{
+	memcpy(pv, *p, n);
+	*p += n;
+}
+
+/*
+ * bounds check input.
+ */
+static inline int ceph_has_room(void **p, void *end, size_t n)
+{
+	return end >= *p && n <= end - *p;
+}
+
+#define ceph_decode_need(p, end, n, bad)			\
+	do {							\
+		if (!likely(ceph_has_room(p, end, n)))		\
+			goto bad;				\
+	} while (0)
+
+#define ceph_decode_64_safe(p, end, v, bad)			\
+	do {							\
+		ceph_decode_need(p, end, sizeof(u64), bad);	\
+		v = ceph_decode_64(p);				\
+	} while (0)
+#define ceph_decode_32_safe(p, end, v, bad)			\
+	do {							\
+		ceph_decode_need(p, end, sizeof(u32), bad);	\
+		v = ceph_decode_32(p);				\
+	} while (0)
+#define ceph_decode_16_safe(p, end, v, bad)			\
+	do {							\
+		ceph_decode_need(p, end, sizeof(u16), bad);	\
+		v = ceph_decode_16(p);				\
+	} while (0)
+#define ceph_decode_8_safe(p, end, v, bad)			\
+	do {							\
+		ceph_decode_need(p, end, sizeof(u8), bad);	\
+		v = ceph_decode_8(p);				\
+	} while (0)
+
+#define ceph_decode_copy_safe(p, end, pv, n, bad)		\
+	do {							\
+		ceph_decode_need(p, end, n, bad);		\
+		ceph_decode_copy(p, pv, n);			\
+	} while (0)
+
+/*
+ * Allocate a buffer big enough to hold the wire-encoded string, and
+ * decode the string into it.  The resulting string will always be
+ * terminated with '\0'.  If successful, *p will be advanced
+ * past the decoded data.  Also, if lenp is not a null pointer, the
+ * length (not including the terminating '\0') will be recorded in
+ * *lenp.  Note that a zero-length string is a valid return value.
+ *
+ * Returns a pointer to the newly-allocated string buffer, or a
+ * pointer-coded errno if an error occurs.  Neither *p nor *lenp
+ * will have been updated if an error is returned.
+ *
+ * There are two possible failures:
+ *   - converting the string would require accessing memory at or
+ *     beyond the "end" pointer provided (-ERANGE)
+ *   - memory could not be allocated for the result (-ENOMEM)
+ */
+static inline char *ceph_extract_encoded_string(void **p, void *end,
+						size_t *lenp, gfp_t gfp)
+{
+	u32 len;
+	void *sp = *p;
+	char *buf;
+
+	ceph_decode_32_safe(&sp, end, len, bad);
+	if (!ceph_has_room(&sp, end, len))
+		goto bad;
+
+	buf = kmalloc(len + 1, gfp);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	if (len)
+		memcpy(buf, sp, len);
+	buf[len] = '\0';
+
+	*p = (char *) *p + sizeof (u32) + len;
+	if (lenp)
+		*lenp = (size_t) len;
+
+	return buf;
+
+bad:
+	return ERR_PTR(-ERANGE);
+}
+
+/*
+ * struct ceph_timespec <-> struct timespec
+ */
+static inline void ceph_decode_timespec(struct timespec *ts,
+					const struct ceph_timespec *tv)
+{
+	ts->tv_sec = (__kernel_time_t)le32_to_cpu(tv->tv_sec);
+	ts->tv_nsec = (long)le32_to_cpu(tv->tv_nsec);
+}
+static inline void ceph_encode_timespec(struct ceph_timespec *tv,
+					const struct timespec *ts)
+{
+	tv->tv_sec = cpu_to_le32((u32)ts->tv_sec);
+	tv->tv_nsec = cpu_to_le32((u32)ts->tv_nsec);
+}
+
+/*
+ * sockaddr_storage <-> ceph_sockaddr
+ */
+static inline void ceph_encode_addr(struct ceph_entity_addr *a)
+{
+	__be16 ss_family = htons(a->in_addr.ss_family);
+	a->in_addr.ss_family = *(__u16 *)&ss_family;
+}
+static inline void ceph_decode_addr(struct ceph_entity_addr *a)
+{
+	__be16 ss_family = *(__be16 *)&a->in_addr.ss_family;
+	a->in_addr.ss_family = ntohs(ss_family);
+	WARN_ON(a->in_addr.ss_family == 512);
+}
+
+/*
+ * encoders
+ */
+static inline void ceph_encode_64(void **p, u64 v)
+{
+	put_unaligned_le64(v, (__le64 *)*p);
+	*p += sizeof(u64);
+}
+static inline void ceph_encode_32(void **p, u32 v)
+{
+	put_unaligned_le32(v, (__le32 *)*p);
+	*p += sizeof(u32);
+}
+static inline void ceph_encode_16(void **p, u16 v)
+{
+	put_unaligned_le16(v, (__le16 *)*p);
+	*p += sizeof(u16);
+}
+static inline void ceph_encode_8(void **p, u8 v)
+{
+	*(u8 *)*p = v;
+	(*p)++;
+}
+static inline void ceph_encode_copy(void **p, const void *s, int len)
+{
+	memcpy(*p, s, len);
+	*p += len;
+}
+
+/*
+ * filepath, string encoders
+ */
+static inline void ceph_encode_filepath(void **p, void *end,
+					u64 ino, const char *path)
+{
+	u32 len = path ? strlen(path) : 0;
+	BUG_ON(*p + 1 + sizeof(ino) + sizeof(len) + len > end);
+	ceph_encode_8(p, 1);
+	ceph_encode_64(p, ino);
+	ceph_encode_32(p, len);
+	if (len)
+		memcpy(*p, path, len);
+	*p += len;
+}
+
+static inline void ceph_encode_string(void **p, void *end,
+				      const char *s, u32 len)
+{
+	BUG_ON(*p + sizeof(len) + len > end);
+	ceph_encode_32(p, len);
+	if (len)
+		memcpy(*p, s, len);
+	*p += len;
+}
+
+#define ceph_encode_need(p, end, n, bad)			\
+	do {							\
+		if (!likely(ceph_has_room(p, end, n)))		\
+			goto bad;				\
+	} while (0)
+
+#define ceph_encode_64_safe(p, end, v, bad)			\
+	do {							\
+		ceph_encode_need(p, end, sizeof(u64), bad);	\
+		ceph_encode_64(p, v);				\
+	} while (0)
+#define ceph_encode_32_safe(p, end, v, bad)			\
+	do {							\
+		ceph_encode_need(p, end, sizeof(u32), bad);	\
+		ceph_encode_32(p, v);				\
+	} while (0)
+#define ceph_encode_16_safe(p, end, v, bad)			\
+	do {							\
+		ceph_encode_need(p, end, sizeof(u16), bad);	\
+		ceph_encode_16(p, v);				\
+	} while (0)
+#define ceph_encode_8_safe(p, end, v, bad)			\
+	do {							\
+		ceph_encode_need(p, end, sizeof(u8), bad);	\
+		ceph_encode_8(p, v);				\
+	} while (0)
+
+#define ceph_encode_copy_safe(p, end, pv, n, bad)		\
+	do {							\
+		ceph_encode_need(p, end, n, bad);		\
+		ceph_encode_copy(p, pv, n);			\
+	} while (0)
+#define ceph_encode_string_safe(p, end, s, n, bad)		\
+	do {							\
+		ceph_encode_need(p, end, n, bad);		\
+		ceph_encode_string(p, end, s, n);		\
+	} while (0)
+
+
+#endif
diff --git a/linux/ceph/libceph.h b/linux/ceph/libceph.h
new file mode 100644
index 0000000..2f49aa4
--- /dev/null
+++ b/linux/ceph/libceph.h
@@ -0,0 +1,230 @@
+#ifndef _FS_CEPH_LIBCEPH_H
+#define _FS_CEPH_LIBCEPH_H
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <asm/unaligned.h>
+#include <linux/backing-dev.h>
+#include <linux/completion.h>
+#include <linux/exportfs.h>
+#include <linux/bug.h>
+#include <linux/fs.h>
+#include <linux/mempool.h>
+#include <linux/pagemap.h>
+#include <linux/wait.h>
+#include <linux/writeback.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/types.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/msgpool.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/osd_client.h>
+#include <linux/ceph/ceph_fs.h>
+
+/*
+ * mount options
+ */
+#define CEPH_OPT_FSID             (1<<0)
+#define CEPH_OPT_NOSHARE          (1<<1) /* don't share client with other sbs */
+#define CEPH_OPT_MYIP             (1<<2) /* specified my ip */
+#define CEPH_OPT_NOCRC            (1<<3) /* no data crc on writes */
+
+#define CEPH_OPT_DEFAULT   (0)
+
+#define ceph_set_opt(client, opt) \
+	(client)->options->flags |= CEPH_OPT_##opt;
+#define ceph_test_opt(client, opt) \
+	(!!((client)->options->flags & CEPH_OPT_##opt))
+
+struct ceph_options {
+	int flags;
+	struct ceph_fsid fsid;
+	struct ceph_entity_addr my_addr;
+	int mount_timeout;
+	int osd_idle_ttl;
+	int osd_keepalive_timeout;
+
+	/*
+	 * any type that can't be simply compared or doesn't need need
+	 * to be compared should go beyond this point,
+	 * ceph_compare_options() should be updated accordingly
+	 */
+
+	struct ceph_entity_addr *mon_addr; /* should be the first
+					      pointer type of args */
+	int num_mon;
+	char *name;
+	struct ceph_crypto_key *key;
+};
+
+/*
+ * defaults
+ */
+#define CEPH_MOUNT_TIMEOUT_DEFAULT  60
+#define CEPH_OSD_KEEPALIVE_DEFAULT  5
+#define CEPH_OSD_IDLE_TTL_DEFAULT    60
+
+#define CEPH_MSG_MAX_FRONT_LEN	(16*1024*1024)
+#define CEPH_MSG_MAX_MIDDLE_LEN	(16*1024*1024)
+#define CEPH_MSG_MAX_DATA_LEN	(16*1024*1024)
+
+#define CEPH_AUTH_NAME_DEFAULT   "guest"
+
+/*
+ * Delay telling the MDS we no longer want caps, in case we reopen
+ * the file.  Delay a minimum amount of time, even if we send a cap
+ * message for some other reason.  Otherwise, take the oppotunity to
+ * update the mds to avoid sending another message later.
+ */
+#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT      5  /* cap release delay */
+#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT     60  /* cap release delay */
+
+#define CEPH_CAP_RELEASE_SAFETY_DEFAULT        (CEPH_CAPS_PER_RELEASE * 4)
+
+/* mount state */
+enum {
+	CEPH_MOUNT_MOUNTING,
+	CEPH_MOUNT_MOUNTED,
+	CEPH_MOUNT_UNMOUNTING,
+	CEPH_MOUNT_UNMOUNTED,
+	CEPH_MOUNT_SHUTDOWN,
+};
+
+/*
+ * subtract jiffies
+ */
+static inline unsigned long time_sub(unsigned long a, unsigned long b)
+{
+	BUG_ON(time_after(b, a));
+	return (long)a - (long)b;
+}
+
+struct ceph_mds_client;
+
+/*
+ * per client state
+ *
+ * possibly shared by multiple mount points, if they are
+ * mounting the same ceph filesystem/cluster.
+ */
+struct ceph_client {
+	struct ceph_fsid fsid;
+	bool have_fsid;
+
+	void *private;
+
+	struct ceph_options *options;
+
+	struct mutex mount_mutex;      /* serialize mount attempts */
+	wait_queue_head_t auth_wq;
+	int auth_err;
+
+	int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *);
+
+	u64 supported_features;
+	u64 required_features;
+
+	struct ceph_messenger msgr;   /* messenger instance */
+	struct ceph_mon_client monc;
+	struct ceph_osd_client osdc;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs_dir;
+	struct dentry *debugfs_monmap;
+	struct dentry *debugfs_osdmap;
+#endif
+};
+
+
+
+/*
+ * snapshots
+ */
+
+/*
+ * A "snap context" is the set of existing snapshots when we
+ * write data.  It is used by the OSD to guide its COW behavior.
+ *
+ * The ceph_snap_context is refcounted, and attached to each dirty
+ * page, indicating which context the dirty data belonged when it was
+ * dirtied.
+ */
+struct ceph_snap_context {
+	atomic_t nref;
+	u64 seq;
+	u32 num_snaps;
+	u64 snaps[];
+};
+
+extern struct ceph_snap_context *ceph_create_snap_context(u32 snap_count,
+					gfp_t gfp_flags);
+extern struct ceph_snap_context *ceph_get_snap_context(
+					struct ceph_snap_context *sc);
+extern void ceph_put_snap_context(struct ceph_snap_context *sc);
+
+/*
+ * calculate the number of pages a given length and offset map onto,
+ * if we align the data.
+ */
+static inline int calc_pages_for(u64 off, u64 len)
+{
+	return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) -
+		(off >> PAGE_CACHE_SHIFT);
+}
+
+extern struct kmem_cache *ceph_inode_cachep;
+extern struct kmem_cache *ceph_cap_cachep;
+extern struct kmem_cache *ceph_dentry_cachep;
+extern struct kmem_cache *ceph_file_cachep;
+
+/* ceph_common.c */
+extern bool libceph_compatible(void *data);
+
+extern const char *ceph_msg_type_name(int type);
+extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
+extern void *ceph_kvmalloc(size_t size, gfp_t flags);
+extern void ceph_kvfree(const void *ptr);
+
+extern struct ceph_options *ceph_parse_options(char *options,
+			      const char *dev_name, const char *dev_name_end,
+			      int (*parse_extra_token)(char *c, void *private),
+			      void *private);
+extern void ceph_destroy_options(struct ceph_options *opt);
+extern int ceph_compare_options(struct ceph_options *new_opt,
+				struct ceph_client *client);
+extern struct ceph_client *ceph_create_client(struct ceph_options *opt,
+					      void *private,
+					      u64 supported_features,
+					      u64 required_features);
+extern u64 ceph_client_id(struct ceph_client *client);
+extern void ceph_destroy_client(struct ceph_client *client);
+extern int __ceph_open_session(struct ceph_client *client,
+			       unsigned long started);
+extern int ceph_open_session(struct ceph_client *client);
+
+/* pagevec.c */
+extern void ceph_release_page_vector(struct page **pages, int num_pages);
+
+extern struct page **ceph_get_direct_page_vector(const void __user *data,
+						 int num_pages,
+						 bool write_page);
+extern void ceph_put_page_vector(struct page **pages, int num_pages,
+				 bool dirty);
+extern void ceph_release_page_vector(struct page **pages, int num_pages);
+extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
+extern int ceph_copy_user_to_page_vector(struct page **pages,
+					 const void __user *data,
+					 loff_t off, size_t len);
+extern void ceph_copy_to_page_vector(struct page **pages,
+				    const void *data,
+				    loff_t off, size_t len);
+extern void ceph_copy_from_page_vector(struct page **pages,
+				    void *data,
+				    loff_t off, size_t len);
+extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data,
+				    loff_t off, size_t len);
+extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
+
+
+#endif /* _FS_CEPH_SUPER_H */
diff --git a/linux/ceph/mdsmap.h b/linux/ceph/mdsmap.h
new file mode 100644
index 0000000..87ed09f
--- /dev/null
+++ b/linux/ceph/mdsmap.h
@@ -0,0 +1,63 @@
+#ifndef _FS_CEPH_MDSMAP_H
+#define _FS_CEPH_MDSMAP_H
+
+#include <linux/bug.h>
+#include <linux/ceph/types.h>
+
+/*
+ * mds map - describe servers in the mds cluster.
+ *
+ * we limit fields to those the client actually xcares about
+ */
+struct ceph_mds_info {
+	u64 global_id;
+	struct ceph_entity_addr addr;
+	s32 state;
+	int num_export_targets;
+	bool laggy;
+	u32 *export_targets;
+};
+
+struct ceph_mdsmap {
+	u32 m_epoch, m_client_epoch, m_last_failure;
+	u32 m_root;
+	u32 m_session_timeout;          /* seconds */
+	u32 m_session_autoclose;        /* seconds */
+	u64 m_max_file_size;
+	u32 m_max_mds;                  /* size of m_addr, m_state arrays */
+	struct ceph_mds_info *m_info;
+
+	/* which object pools file data can be stored in */
+	int m_num_data_pg_pools;
+	u64 *m_data_pg_pools;
+	u64 m_cas_pg_pool;
+};
+
+static inline struct ceph_entity_addr *
+ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
+{
+	if (w >= m->m_max_mds)
+		return NULL;
+	return &m->m_info[w].addr;
+}
+
+static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w)
+{
+	BUG_ON(w < 0);
+	if (w >= m->m_max_mds)
+		return CEPH_MDS_STATE_DNE;
+	return m->m_info[w].state;
+}
+
+static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
+{
+	if (w >= 0 && w < m->m_max_mds)
+		return m->m_info[w].laggy;
+	return false;
+}
+
+extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
+extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
+extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
+
+#endif
diff --git a/linux/ceph/messenger.h b/linux/ceph/messenger.h
new file mode 100644
index 0000000..d21f2db
--- /dev/null
+++ b/linux/ceph/messenger.h
@@ -0,0 +1,304 @@
+#ifndef __FS_CEPH_MESSENGER_H
+#define __FS_CEPH_MESSENGER_H
+
+#include <linux/blk_types.h>
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/net.h>
+#include <linux/radix-tree.h>
+#include <linux/uio.h>
+#include <linux/workqueue.h>
+
+#include <linux/ceph/types.h>
+#include <linux/ceph/buffer.h>
+
+struct ceph_msg;
+struct ceph_connection;
+
+/*
+ * Ceph defines these callbacks for handling connection events.
+ */
+struct ceph_connection_operations {
+	struct ceph_connection *(*get)(struct ceph_connection *);
+	void (*put)(struct ceph_connection *);
+
+	/* handle an incoming message. */
+	void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m);
+
+	/* authorize an outgoing connection */
+	struct ceph_auth_handshake *(*get_authorizer) (
+				struct ceph_connection *con,
+			       int *proto, int force_new);
+	int (*verify_authorizer_reply) (struct ceph_connection *con, int len);
+	int (*invalidate_authorizer)(struct ceph_connection *con);
+
+	/* there was some error on the socket (disconnect, whatever) */
+	void (*fault) (struct ceph_connection *con);
+
+	/* a remote host as terminated a message exchange session, and messages
+	 * we sent (or they tried to send us) may be lost. */
+	void (*peer_reset) (struct ceph_connection *con);
+
+	struct ceph_msg * (*alloc_msg) (struct ceph_connection *con,
+					struct ceph_msg_header *hdr,
+					int *skip);
+};
+
+/* use format string %s%d */
+#define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num)
+
+struct ceph_messenger {
+	struct ceph_entity_inst inst;    /* my name+address */
+	struct ceph_entity_addr my_enc_addr;
+
+	atomic_t stopping;
+	bool nocrc;
+
+	/*
+	 * the global_seq counts connections i (attempt to) initiate
+	 * in order to disambiguate certain connect race conditions.
+	 */
+	u32 global_seq;
+	spinlock_t global_seq_lock;
+
+	u64 supported_features;
+	u64 required_features;
+};
+
+enum ceph_msg_data_type {
+	CEPH_MSG_DATA_NONE,	/* message contains no data payload */
+	CEPH_MSG_DATA_PAGES,	/* data source/destination is a page array */
+	CEPH_MSG_DATA_PAGELIST,	/* data source/destination is a pagelist */
+#ifdef CONFIG_BLOCK
+	CEPH_MSG_DATA_BIO,	/* data source/destination is a bio list */
+#endif /* CONFIG_BLOCK */
+};
+
+static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type)
+{
+	switch (type) {
+	case CEPH_MSG_DATA_NONE:
+	case CEPH_MSG_DATA_PAGES:
+	case CEPH_MSG_DATA_PAGELIST:
+#ifdef CONFIG_BLOCK
+	case CEPH_MSG_DATA_BIO:
+#endif /* CONFIG_BLOCK */
+		return true;
+	default:
+		return false;
+	}
+}
+
+struct ceph_msg_data {
+	struct list_head		links;	/* ceph_msg->data */
+	enum ceph_msg_data_type		type;
+	union {
+#ifdef CONFIG_BLOCK
+		struct {
+			struct bio	*bio;
+			size_t		bio_length;
+		};
+#endif /* CONFIG_BLOCK */
+		struct {
+			struct page	**pages;	/* NOT OWNER. */
+			size_t		length;		/* total # bytes */
+			unsigned int	alignment;	/* first page */
+		};
+		struct ceph_pagelist	*pagelist;
+	};
+};
+
+struct ceph_msg_data_cursor {
+	size_t			total_resid;	/* across all data items */
+	struct list_head	*data_head;	/* = &ceph_msg->data */
+
+	struct ceph_msg_data	*data;		/* current data item */
+	size_t			resid;		/* bytes not yet consumed */
+	bool			last_piece;	/* current is last piece */
+	bool			need_crc;	/* crc update needed */
+	union {
+#ifdef CONFIG_BLOCK
+		struct {				/* bio */
+			struct bio	*bio;		/* bio from list */
+			struct bvec_iter bvec_iter;
+		};
+#endif /* CONFIG_BLOCK */
+		struct {				/* pages */
+			unsigned int	page_offset;	/* offset in page */
+			unsigned short	page_index;	/* index in array */
+			unsigned short	page_count;	/* pages in array */
+		};
+		struct {				/* pagelist */
+			struct page	*page;		/* page from list */
+			size_t		offset;		/* bytes from list */
+		};
+	};
+};
+
+/*
+ * a single message.  it contains a header (src, dest, message type, etc.),
+ * footer (crc values, mainly), a "front" message body, and possibly a
+ * data payload (stored in some number of pages).
+ */
+struct ceph_msg {
+	struct ceph_msg_header hdr;	/* header */
+	struct ceph_msg_footer footer;	/* footer */
+	struct kvec front;              /* unaligned blobs of message */
+	struct ceph_buffer *middle;
+
+	size_t				data_length;
+	struct list_head		data;
+	struct ceph_msg_data_cursor	cursor;
+
+	struct ceph_connection *con;
+	struct list_head list_head;	/* links for connection lists */
+
+	struct kref kref;
+	bool more_to_follow;
+	bool needs_out_seq;
+	int front_alloc_len;
+	unsigned long ack_stamp;        /* tx: when we were acked */
+
+	struct ceph_msgpool *pool;
+};
+
+/* ceph connection fault delay defaults, for exponential backoff */
+#define BASE_DELAY_INTERVAL	(HZ/2)
+#define MAX_DELAY_INTERVAL	(5 * 60 * HZ)
+
+/*
+ * A single connection with another host.
+ *
+ * We maintain a queue of outgoing messages, and some session state to
+ * ensure that we can preserve the lossless, ordered delivery of
+ * messages in the case of a TCP disconnect.
+ */
+struct ceph_connection {
+	void *private;
+
+	const struct ceph_connection_operations *ops;
+
+	struct ceph_messenger *msgr;
+
+	atomic_t sock_state;
+	struct socket *sock;
+	struct ceph_entity_addr peer_addr; /* peer address */
+	struct ceph_entity_addr peer_addr_for_me;
+
+	unsigned long flags;
+	unsigned long state;
+	const char *error_msg;  /* error message, if any */
+
+	struct ceph_entity_name peer_name; /* peer name */
+
+	u64 peer_features;
+	u32 connect_seq;      /* identify the most recent connection
+				 attempt for this connection, client */
+	u32 peer_global_seq;  /* peer's global seq for this connection */
+
+	int auth_retry;       /* true if we need a newer authorizer */
+	void *auth_reply_buf;   /* where to put the authorizer reply */
+	int auth_reply_buf_len;
+
+	struct mutex mutex;
+
+	/* out queue */
+	struct list_head out_queue;
+	struct list_head out_sent;   /* sending or sent but unacked */
+	u64 out_seq;		     /* last message queued for send */
+
+	u64 in_seq, in_seq_acked;  /* last message received, acked */
+
+	/* connection negotiation temps */
+	char in_banner[CEPH_BANNER_MAX_LEN];
+	struct ceph_msg_connect out_connect;
+	struct ceph_msg_connect_reply in_reply;
+	struct ceph_entity_addr actual_peer_addr;
+
+	/* message out temps */
+	struct ceph_msg *out_msg;        /* sending message (== tail of
+					    out_sent) */
+	bool out_msg_done;
+
+	struct kvec out_kvec[8],         /* sending header/footer data */
+		*out_kvec_cur;
+	int out_kvec_left;   /* kvec's left in out_kvec */
+	int out_skip;        /* skip this many bytes */
+	int out_kvec_bytes;  /* total bytes left */
+	bool out_kvec_is_msg; /* kvec refers to out_msg */
+	int out_more;        /* there is more data after the kvecs */
+	__le64 out_temp_ack; /* for writing an ack */
+
+	/* message in temps */
+	struct ceph_msg_header in_hdr;
+	struct ceph_msg *in_msg;
+	u32 in_front_crc, in_middle_crc, in_data_crc;  /* calculated crc */
+
+	char in_tag;         /* protocol control byte */
+	int in_base_pos;     /* bytes read */
+	__le64 in_temp_ack;  /* for reading an ack */
+
+	struct delayed_work work;	    /* send|recv work */
+	unsigned long       delay;          /* current delay interval */
+};
+
+
+extern const char *ceph_pr_addr(const struct sockaddr_storage *ss);
+extern int ceph_parse_ips(const char *c, const char *end,
+			  struct ceph_entity_addr *addr,
+			  int max_count, int *count);
+
+
+extern int ceph_msgr_init(void);
+extern void ceph_msgr_exit(void);
+extern void ceph_msgr_flush(void);
+
+extern void ceph_messenger_init(struct ceph_messenger *msgr,
+			struct ceph_entity_addr *myaddr,
+			u64 supported_features,
+			u64 required_features,
+			bool nocrc);
+
+extern void ceph_con_init(struct ceph_connection *con, void *private,
+			const struct ceph_connection_operations *ops,
+			struct ceph_messenger *msgr);
+extern void ceph_con_open(struct ceph_connection *con,
+			  __u8 entity_type, __u64 entity_num,
+			  struct ceph_entity_addr *addr);
+extern bool ceph_con_opened(struct ceph_connection *con);
+extern void ceph_con_close(struct ceph_connection *con);
+extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
+
+extern void ceph_msg_revoke(struct ceph_msg *msg);
+extern void ceph_msg_revoke_incoming(struct ceph_msg *msg);
+
+extern void ceph_con_keepalive(struct ceph_connection *con);
+
+extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
+				size_t length, size_t alignment);
+extern void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
+				struct ceph_pagelist *pagelist);
+#ifdef CONFIG_BLOCK
+extern void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
+				size_t length);
+#endif /* CONFIG_BLOCK */
+
+extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
+				     bool can_fail);
+extern void ceph_msg_kfree(struct ceph_msg *m);
+
+
+static inline struct ceph_msg *ceph_msg_get(struct ceph_msg *msg)
+{
+	kref_get(&msg->kref);
+	return msg;
+}
+extern void ceph_msg_last_put(struct kref *kref);
+static inline void ceph_msg_put(struct ceph_msg *msg)
+{
+	kref_put(&msg->kref, ceph_msg_last_put);
+}
+
+extern void ceph_msg_dump(struct ceph_msg *msg);
+
+#endif
diff --git a/linux/ceph/mon_client.h b/linux/ceph/mon_client.h
new file mode 100644
index 0000000..a486f39
--- /dev/null
+++ b/linux/ceph/mon_client.h
@@ -0,0 +1,121 @@
+#ifndef _FS_CEPH_MON_CLIENT_H
+#define _FS_CEPH_MON_CLIENT_H
+
+#include <linux/completion.h>
+#include <linux/kref.h>
+#include <linux/rbtree.h>
+
+#include <linux/ceph/messenger.h>
+
+struct ceph_client;
+struct ceph_mount_args;
+struct ceph_auth_client;
+
+/*
+ * The monitor map enumerates the set of all monitors.
+ */
+struct ceph_monmap {
+	struct ceph_fsid fsid;
+	u32 epoch;
+	u32 num_mon;
+	struct ceph_entity_inst mon_inst[0];
+};
+
+struct ceph_mon_client;
+struct ceph_mon_generic_request;
+
+
+/*
+ * Generic mechanism for resending monitor requests.
+ */
+typedef void (*ceph_monc_request_func_t)(struct ceph_mon_client *monc,
+					 int newmon);
+
+/* a pending monitor request */
+struct ceph_mon_request {
+	struct ceph_mon_client *monc;
+	struct delayed_work delayed_work;
+	unsigned long delay;
+	ceph_monc_request_func_t do_request;
+};
+
+/*
+ * ceph_mon_generic_request is being used for the statfs and poolop requests
+ * which are bening done a bit differently because we need to get data back
+ * to the caller
+ */
+struct ceph_mon_generic_request {
+	struct kref kref;
+	u64 tid;
+	struct rb_node node;
+	int result;
+	void *buf;
+	int buf_len;
+	struct completion completion;
+	struct ceph_msg *request;  /* original request */
+	struct ceph_msg *reply;    /* and reply */
+};
+
+struct ceph_mon_client {
+	struct ceph_client *client;
+	struct ceph_monmap *monmap;
+
+	struct mutex mutex;
+	struct delayed_work delayed_work;
+
+	struct ceph_auth_client *auth;
+	struct ceph_msg *m_auth, *m_auth_reply, *m_subscribe, *m_subscribe_ack;
+	int pending_auth;
+
+	bool hunting;
+	int cur_mon;                       /* last monitor i contacted */
+	unsigned long sub_sent, sub_renew_after;
+	struct ceph_connection con;
+
+	/* pending generic requests */
+	struct rb_root generic_request_tree;
+	int num_generic_requests;
+	u64 last_tid;
+
+	/* mds/osd map */
+	int want_mdsmap;
+	int want_next_osdmap; /* 1 = want, 2 = want+asked */
+	u32 have_osdmap, have_mdsmap;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs_file;
+#endif
+};
+
+extern struct ceph_monmap *ceph_monmap_decode(void *p, void *end);
+extern int ceph_monmap_contains(struct ceph_monmap *m,
+				struct ceph_entity_addr *addr);
+
+extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl);
+extern void ceph_monc_stop(struct ceph_mon_client *monc);
+
+/*
+ * The model here is to indicate that we need a new map of at least
+ * epoch @want, and also call in when we receive a map.  We will
+ * periodically rerequest the map from the monitor cluster until we
+ * get what we want.
+ */
+extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 have);
+extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have);
+
+extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc);
+
+extern int ceph_monc_do_statfs(struct ceph_mon_client *monc,
+			       struct ceph_statfs *buf);
+
+extern int ceph_monc_open_session(struct ceph_mon_client *monc);
+
+extern int ceph_monc_validate_auth(struct ceph_mon_client *monc);
+
+extern int ceph_monc_create_snapid(struct ceph_mon_client *monc,
+				   u32 pool, u64 *snapid);
+
+extern int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
+				   u32 pool, u64 snapid);
+
+#endif
diff --git a/linux/ceph/msgpool.h b/linux/ceph/msgpool.h
new file mode 100644
index 0000000..4b0d389
--- /dev/null
+++ b/linux/ceph/msgpool.h
@@ -0,0 +1,26 @@
+#ifndef _FS_CEPH_MSGPOOL
+#define _FS_CEPH_MSGPOOL
+
+#include <linux/mempool.h>
+#include <linux/ceph/messenger.h>
+
+/*
+ * we use memory pools for preallocating messages we may receive, to
+ * avoid unexpected OOM conditions.
+ */
+struct ceph_msgpool {
+	const char *name;
+	mempool_t *pool;
+	int type;               /* preallocated message type */
+	int front_len;          /* preallocated payload size */
+};
+
+extern int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
+			     int front_len, int size, bool blocking,
+			     const char *name);
+extern void ceph_msgpool_destroy(struct ceph_msgpool *pool);
+extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *,
+					 int front_len);
+extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *);
+
+#endif
diff --git a/linux/ceph/msgr.h b/linux/ceph/msgr.h
new file mode 100644
index 0000000..3d94a73
--- /dev/null
+++ b/linux/ceph/msgr.h
@@ -0,0 +1,176 @@
+#ifndef CEPH_MSGR_H
+#define CEPH_MSGR_H
+
+/*
+ * Data types for message passing layer used by Ceph.
+ */
+
+#define CEPH_MON_PORT    6789  /* default monitor port */
+
+/*
+ * client-side processes will try to bind to ports in this
+ * range, simply for the benefit of tools like nmap or wireshark
+ * that would like to identify the protocol.
+ */
+#define CEPH_PORT_FIRST  6789
+#define CEPH_PORT_START  6800  /* non-monitors start here */
+#define CEPH_PORT_LAST   6900
+
+/*
+ * tcp connection banner.  include a protocol version. and adjust
+ * whenever the wire protocol changes.  try to keep this string length
+ * constant.
+ */
+#define CEPH_BANNER "ceph v027"
+#define CEPH_BANNER_MAX_LEN 30
+
+
+/*
+ * Rollover-safe type and comparator for 32-bit sequence numbers.
+ * Comparator returns -1, 0, or 1.
+ */
+typedef __u32 ceph_seq_t;
+
+static inline __s32 ceph_seq_cmp(__u32 a, __u32 b)
+{
+       return (__s32)a - (__s32)b;
+}
+
+
+/*
+ * entity_name -- logical name for a process participating in the
+ * network, e.g. 'mds0' or 'osd3'.
+ */
+struct ceph_entity_name {
+	__u8 type;      /* CEPH_ENTITY_TYPE_* */
+	__le64 num;
+} __attribute__ ((packed));
+
+#define CEPH_ENTITY_TYPE_MON    0x01
+#define CEPH_ENTITY_TYPE_MDS    0x02
+#define CEPH_ENTITY_TYPE_OSD    0x04
+#define CEPH_ENTITY_TYPE_CLIENT 0x08
+#define CEPH_ENTITY_TYPE_AUTH   0x20
+
+#define CEPH_ENTITY_TYPE_ANY    0xFF
+
+extern const char *ceph_entity_type_name(int type);
+
+/*
+ * entity_addr -- network address
+ */
+struct ceph_entity_addr {
+	__le32 type;
+	__le32 nonce;  /* unique id for process (e.g. pid) */
+	struct sockaddr_storage in_addr;
+} __attribute__ ((packed));
+
+struct ceph_entity_inst {
+	struct ceph_entity_name name;
+	struct ceph_entity_addr addr;
+} __attribute__ ((packed));
+
+
+/* used by message exchange protocol */
+#define CEPH_MSGR_TAG_READY         1  /* server->client: ready for messages */
+#define CEPH_MSGR_TAG_RESETSESSION  2  /* server->client: reset, try again */
+#define CEPH_MSGR_TAG_WAIT          3  /* server->client: wait for racing
+					  incoming connection */
+#define CEPH_MSGR_TAG_RETRY_SESSION 4  /* server->client + cseq: try again
+					  with higher cseq */
+#define CEPH_MSGR_TAG_RETRY_GLOBAL  5  /* server->client + gseq: try again
+					  with higher gseq */
+#define CEPH_MSGR_TAG_CLOSE         6  /* closing pipe */
+#define CEPH_MSGR_TAG_MSG           7  /* message */
+#define CEPH_MSGR_TAG_ACK           8  /* message ack */
+#define CEPH_MSGR_TAG_KEEPALIVE     9  /* just a keepalive byte! */
+#define CEPH_MSGR_TAG_BADPROTOVER  10  /* bad protocol version */
+#define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */
+#define CEPH_MSGR_TAG_FEATURES      12 /* insufficient features */
+#define CEPH_MSGR_TAG_SEQ           13 /* 64-bit int follows with seen seq number */
+
+
+/*
+ * connection negotiation
+ */
+struct ceph_msg_connect {
+	__le64 features;     /* supported feature bits */
+	__le32 host_type;    /* CEPH_ENTITY_TYPE_* */
+	__le32 global_seq;   /* count connections initiated by this host */
+	__le32 connect_seq;  /* count connections initiated in this session */
+	__le32 protocol_version;
+	__le32 authorizer_protocol;
+	__le32 authorizer_len;
+	__u8  flags;         /* CEPH_MSG_CONNECT_* */
+} __attribute__ ((packed));
+
+struct ceph_msg_connect_reply {
+	__u8 tag;
+	__le64 features;     /* feature bits for this session */
+	__le32 global_seq;
+	__le32 connect_seq;
+	__le32 protocol_version;
+	__le32 authorizer_len;
+	__u8 flags;
+} __attribute__ ((packed));
+
+#define CEPH_MSG_CONNECT_LOSSY  1  /* messages i send may be safely dropped */
+
+
+/*
+ * message header
+ */
+struct ceph_msg_header_old {
+	__le64 seq;       /* message seq# for this session */
+	__le64 tid;       /* transaction id */
+	__le16 type;      /* message type */
+	__le16 priority;  /* priority.  higher value == higher priority */
+	__le16 version;   /* version of message encoding */
+
+	__le32 front_len; /* bytes in main payload */
+	__le32 middle_len;/* bytes in middle payload */
+	__le32 data_len;  /* bytes of data payload */
+	__le16 data_off;  /* sender: include full offset;
+			     receiver: mask against ~PAGE_MASK */
+
+	struct ceph_entity_inst src, orig_src;
+	__le32 reserved;
+	__le32 crc;       /* header crc32c */
+} __attribute__ ((packed));
+
+struct ceph_msg_header {
+	__le64 seq;       /* message seq# for this session */
+	__le64 tid;       /* transaction id */
+	__le16 type;      /* message type */
+	__le16 priority;  /* priority.  higher value == higher priority */
+	__le16 version;   /* version of message encoding */
+
+	__le32 front_len; /* bytes in main payload */
+	__le32 middle_len;/* bytes in middle payload */
+	__le32 data_len;  /* bytes of data payload */
+	__le16 data_off;  /* sender: include full offset;
+			     receiver: mask against ~PAGE_MASK */
+
+	struct ceph_entity_name src;
+	__le32 reserved;
+	__le32 crc;       /* header crc32c */
+} __attribute__ ((packed));
+
+#define CEPH_MSG_PRIO_LOW     64
+#define CEPH_MSG_PRIO_DEFAULT 127
+#define CEPH_MSG_PRIO_HIGH    196
+#define CEPH_MSG_PRIO_HIGHEST 255
+
+/*
+ * follows data payload
+ */
+struct ceph_msg_footer {
+	__le32 front_crc, middle_crc, data_crc;
+	__u8 flags;
+} __attribute__ ((packed));
+
+#define CEPH_MSG_FOOTER_COMPLETE  (1<<0)   /* msg wasn't aborted */
+#define CEPH_MSG_FOOTER_NOCRC     (1<<1)   /* no data crc */
+
+
+#endif
diff --git a/linux/ceph/osd_client.h b/linux/ceph/osd_client.h
new file mode 100644
index 0000000..94ec696
--- /dev/null
+++ b/linux/ceph/osd_client.h
@@ -0,0 +1,374 @@
+#ifndef _FS_CEPH_OSD_CLIENT_H
+#define _FS_CEPH_OSD_CLIENT_H
+
+#include <linux/completion.h>
+#include <linux/kref.h>
+#include <linux/mempool.h>
+#include <linux/rbtree.h>
+
+#include <linux/ceph/types.h>
+#include <linux/ceph/osdmap.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/pagelist.h>
+
+struct ceph_msg;
+struct ceph_snap_context;
+struct ceph_osd_request;
+struct ceph_osd_client;
+struct ceph_authorizer;
+
+/*
+ * completion callback for async writepages
+ */
+typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *,
+				     struct ceph_msg *);
+typedef void (*ceph_osdc_unsafe_callback_t)(struct ceph_osd_request *, bool);
+
+/* a given osd we're communicating with */
+struct ceph_osd {
+	atomic_t o_ref;
+	struct ceph_osd_client *o_osdc;
+	int o_osd;
+	int o_incarnation;
+	struct rb_node o_node;
+	struct ceph_connection o_con;
+	struct list_head o_requests;
+	struct list_head o_linger_requests;
+	struct list_head o_osd_lru;
+	struct ceph_auth_handshake o_auth;
+	unsigned long lru_ttl;
+	int o_marked_for_keepalive;
+	struct list_head o_keepalive_item;
+};
+
+
+#define CEPH_OSD_MAX_OP	3
+
+enum ceph_osd_data_type {
+	CEPH_OSD_DATA_TYPE_NONE = 0,
+	CEPH_OSD_DATA_TYPE_PAGES,
+	CEPH_OSD_DATA_TYPE_PAGELIST,
+#ifdef CONFIG_BLOCK
+	CEPH_OSD_DATA_TYPE_BIO,
+#endif /* CONFIG_BLOCK */
+};
+
+struct ceph_osd_data {
+	enum ceph_osd_data_type	type;
+	union {
+		struct {
+			struct page	**pages;
+			u64		length;
+			u32		alignment;
+			bool		pages_from_pool;
+			bool		own_pages;
+		};
+		struct ceph_pagelist	*pagelist;
+#ifdef CONFIG_BLOCK
+		struct {
+			struct bio	*bio;		/* list of bios */
+			size_t		bio_length;	/* total in list */
+		};
+#endif /* CONFIG_BLOCK */
+	};
+};
+
+struct ceph_osd_req_op {
+	u16 op;           /* CEPH_OSD_OP_* */
+	u32 flags;        /* CEPH_OSD_OP_FLAG_* */
+	u32 payload_len;
+	union {
+		struct ceph_osd_data raw_data_in;
+		struct {
+			u64 offset, length;
+			u64 truncate_size;
+			u32 truncate_seq;
+			struct ceph_osd_data osd_data;
+		} extent;
+		struct {
+			const char *class_name;
+			const char *method_name;
+			struct ceph_osd_data request_info;
+			struct ceph_osd_data request_data;
+			struct ceph_osd_data response_data;
+			__u8 class_len;
+			__u8 method_len;
+			__u8 argc;
+		} cls;
+		struct {
+			u64 cookie;
+			u64 ver;
+			u32 prot_ver;
+			u32 timeout;
+			__u8 flag;
+		} watch;
+		struct {
+			u64 expected_object_size;
+			u64 expected_write_size;
+		} alloc_hint;
+	};
+};
+
+/* an in-flight request */
+struct ceph_osd_request {
+	u64             r_tid;              /* unique for this client */
+	struct rb_node  r_node;
+	struct list_head r_req_lru_item;
+	struct list_head r_osd_item;
+	struct list_head r_linger_item;
+	struct list_head r_linger_osd;
+	struct ceph_osd *r_osd;
+	struct ceph_pg   r_pgid;
+	int              r_pg_osds[CEPH_PG_MAX_SIZE];
+	int              r_num_pg_osds;
+
+	struct ceph_msg  *r_request, *r_reply;
+	int               r_flags;     /* any additional flags for the osd */
+	u32               r_sent;      /* >0 if r_request is sending/sent */
+
+	/* request osd ops array  */
+	unsigned int		r_num_ops;
+	struct ceph_osd_req_op	r_ops[CEPH_OSD_MAX_OP];
+
+	/* these are updated on each send */
+	__le32           *r_request_osdmap_epoch;
+	__le32           *r_request_flags;
+	__le64           *r_request_pool;
+	void             *r_request_pgid;
+	__le32           *r_request_attempts;
+	bool              r_paused;
+	struct ceph_eversion *r_request_reassert_version;
+
+	int               r_result;
+	int               r_reply_op_len[CEPH_OSD_MAX_OP];
+	s32               r_reply_op_result[CEPH_OSD_MAX_OP];
+	int               r_got_reply;
+	int		  r_linger;
+
+	struct ceph_osd_client *r_osdc;
+	struct kref       r_kref;
+	bool              r_mempool;
+	struct completion r_completion, r_safe_completion;
+	ceph_osdc_callback_t r_callback;
+	ceph_osdc_unsafe_callback_t r_unsafe_callback;
+	struct ceph_eversion r_reassert_version;
+	struct list_head  r_unsafe_item;
+
+	struct inode *r_inode;         	      /* for use by callbacks */
+	void *r_priv;			      /* ditto */
+
+	struct ceph_object_locator r_base_oloc;
+	struct ceph_object_id r_base_oid;
+	struct ceph_object_locator r_target_oloc;
+	struct ceph_object_id r_target_oid;
+
+	u64               r_snapid;
+	unsigned long     r_stamp;            /* send OR check time */
+
+	struct ceph_snap_context *r_snapc;    /* snap context for writes */
+};
+
+struct ceph_request_redirect {
+	struct ceph_object_locator oloc;
+};
+
+struct ceph_osd_event {
+	u64 cookie;
+	int one_shot;
+	struct ceph_osd_client *osdc;
+	void (*cb)(u64, u64, u8, void *);
+	void *data;
+	struct rb_node node;
+	struct list_head osd_node;
+	struct kref kref;
+};
+
+struct ceph_osd_event_work {
+	struct work_struct work;
+	struct ceph_osd_event *event;
+        u64 ver;
+        u64 notify_id;
+        u8 opcode;
+};
+
+struct ceph_osd_client {
+	struct ceph_client     *client;
+
+	struct ceph_osdmap     *osdmap;       /* current map */
+	struct rw_semaphore    map_sem;
+	struct completion      map_waiters;
+	u64                    last_requested_map;
+
+	struct mutex           request_mutex;
+	struct rb_root         osds;          /* osds */
+	struct list_head       osd_lru;       /* idle osds */
+	u64                    timeout_tid;   /* tid of timeout triggering rq */
+	u64                    last_tid;      /* tid of last request */
+	struct rb_root         requests;      /* pending requests */
+	struct list_head       req_lru;	      /* in-flight lru */
+	struct list_head       req_unsent;    /* unsent/need-resend queue */
+	struct list_head       req_notarget;  /* map to no osd */
+	struct list_head       req_linger;    /* lingering requests */
+	int                    num_requests;
+	struct delayed_work    timeout_work;
+	struct delayed_work    osds_timeout_work;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry 	       *debugfs_file;
+#endif
+
+	mempool_t              *req_mempool;
+
+	struct ceph_msgpool	msgpool_op;
+	struct ceph_msgpool	msgpool_op_reply;
+
+	spinlock_t		event_lock;
+	struct rb_root		event_tree;
+	u64			event_count;
+
+	struct workqueue_struct	*notify_wq;
+};
+
+extern int ceph_osdc_setup(void);
+extern void ceph_osdc_cleanup(void);
+
+extern int ceph_osdc_init(struct ceph_osd_client *osdc,
+			  struct ceph_client *client);
+extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
+
+extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
+				   struct ceph_msg *msg);
+extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
+				 struct ceph_msg *msg);
+
+extern void osd_req_op_init(struct ceph_osd_request *osd_req,
+					unsigned int which, u16 opcode);
+
+extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *,
+					unsigned int which,
+					struct page **pages, u64 length,
+					u32 alignment, bool pages_from_pool,
+					bool own_pages);
+
+extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
+					unsigned int which, u16 opcode,
+					u64 offset, u64 length,
+					u64 truncate_size, u32 truncate_seq);
+extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
+					unsigned int which, u64 length);
+
+extern struct ceph_osd_data *osd_req_op_extent_osd_data(
+					struct ceph_osd_request *osd_req,
+					unsigned int which);
+extern struct ceph_osd_data *osd_req_op_cls_response_data(
+					struct ceph_osd_request *osd_req,
+					unsigned int which);
+
+extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *,
+					unsigned int which,
+					struct page **pages, u64 length,
+					u32 alignment, bool pages_from_pool,
+					bool own_pages);
+extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
+					unsigned int which,
+					struct ceph_pagelist *pagelist);
+#ifdef CONFIG_BLOCK
+extern void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *,
+					unsigned int which,
+					struct bio *bio, size_t bio_length);
+#endif /* CONFIG_BLOCK */
+
+extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
+					unsigned int which,
+					struct ceph_pagelist *pagelist);
+extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
+					unsigned int which,
+					struct page **pages, u64 length,
+					u32 alignment, bool pages_from_pool,
+					bool own_pages);
+extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
+					unsigned int which,
+					struct page **pages, u64 length,
+					u32 alignment, bool pages_from_pool,
+					bool own_pages);
+
+extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req,
+					unsigned int which, u16 opcode,
+					const char *class, const char *method);
+extern void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
+					unsigned int which, u16 opcode,
+					u64 cookie, u64 version, int flag);
+extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
+				       unsigned int which,
+				       u64 expected_object_size,
+				       u64 expected_write_size);
+
+extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
+					       struct ceph_snap_context *snapc,
+					       unsigned int num_ops,
+					       bool use_mempool,
+					       gfp_t gfp_flags);
+
+extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off,
+				    struct ceph_snap_context *snapc,
+				    u64 snap_id,
+				    struct timespec *mtime);
+
+extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
+				      struct ceph_file_layout *layout,
+				      struct ceph_vino vino,
+				      u64 offset, u64 *len,
+				      int num_ops, int opcode, int flags,
+				      struct ceph_snap_context *snapc,
+				      u32 truncate_seq, u64 truncate_size,
+				      bool use_mempool);
+
+extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
+					 struct ceph_osd_request *req);
+extern void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
+						struct ceph_osd_request *req);
+
+static inline void ceph_osdc_get_request(struct ceph_osd_request *req)
+{
+	kref_get(&req->r_kref);
+}
+extern void ceph_osdc_release_request(struct kref *kref);
+static inline void ceph_osdc_put_request(struct ceph_osd_request *req)
+{
+	kref_put(&req->r_kref, ceph_osdc_release_request);
+}
+
+extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
+				   struct ceph_osd_request *req,
+				   bool nofail);
+extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
+				  struct ceph_osd_request *req);
+extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
+
+extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc);
+
+extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
+			       struct ceph_vino vino,
+			       struct ceph_file_layout *layout,
+			       u64 off, u64 *plen,
+			       u32 truncate_seq, u64 truncate_size,
+			       struct page **pages, int nr_pages,
+			       int page_align);
+
+extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
+				struct ceph_vino vino,
+				struct ceph_file_layout *layout,
+				struct ceph_snap_context *sc,
+				u64 off, u64 len,
+				u32 truncate_seq, u64 truncate_size,
+				struct timespec *mtime,
+				struct page **pages, int nr_pages);
+
+/* watch/notify events */
+extern int ceph_osdc_create_event(struct ceph_osd_client *osdc,
+				  void (*event_cb)(u64, u64, u8, void *),
+				  void *data, struct ceph_osd_event **pevent);
+extern void ceph_osdc_cancel_event(struct ceph_osd_event *event);
+extern void ceph_osdc_put_event(struct ceph_osd_event *event);
+#endif
+
diff --git a/linux/ceph/osdmap.h b/linux/ceph/osdmap.h
new file mode 100644
index 0000000..561ea89
--- /dev/null
+++ b/linux/ceph/osdmap.h
@@ -0,0 +1,225 @@
+#ifndef _FS_CEPH_OSDMAP_H
+#define _FS_CEPH_OSDMAP_H
+
+#include <linux/rbtree.h>
+#include <linux/ceph/types.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/ceph_fs.h>
+#include <linux/crush/crush.h>
+
+/*
+ * The osd map describes the current membership of the osd cluster and
+ * specifies the mapping of objects to placement groups and placement
+ * groups to (sets of) osds.  That is, it completely specifies the
+ * (desired) distribution of all data objects in the system at some
+ * point in time.
+ *
+ * Each map version is identified by an epoch, which increases monotonically.
+ *
+ * The map can be updated either via an incremental map (diff) describing
+ * the change between two successive epochs, or as a fully encoded map.
+ */
+struct ceph_pg {
+	uint64_t pool;
+	uint32_t seed;
+};
+
+#define CEPH_POOL_FLAG_HASHPSPOOL  1
+
+struct ceph_pg_pool_info {
+	struct rb_node node;
+	s64 id;
+	u8 type;
+	u8 size;
+	u8 crush_ruleset;
+	u8 object_hash;
+	u32 pg_num, pgp_num;
+	int pg_num_mask, pgp_num_mask;
+	s64 read_tier;
+	s64 write_tier; /* wins for read+write ops */
+	u64 flags;
+	char *name;
+};
+
+static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool)
+{
+	switch (pool->type) {
+	case CEPH_POOL_TYPE_REP:
+		return true;
+	case CEPH_POOL_TYPE_EC:
+		return false;
+	default:
+		BUG_ON(1);
+	}
+}
+
+struct ceph_object_locator {
+	s64 pool;
+};
+
+/*
+ * Maximum supported by kernel client object name length
+ *
+ * (probably outdated: must be >= RBD_MAX_MD_NAME_LEN -- currently 100)
+ */
+#define CEPH_MAX_OID_NAME_LEN 100
+
+struct ceph_object_id {
+	char name[CEPH_MAX_OID_NAME_LEN];
+	int name_len;
+};
+
+struct ceph_pg_mapping {
+	struct rb_node node;
+	struct ceph_pg pgid;
+
+	union {
+		struct {
+			int len;
+			int osds[];
+		} pg_temp;
+		struct {
+			int osd;
+		} primary_temp;
+	};
+};
+
+struct ceph_osdmap {
+	struct ceph_fsid fsid;
+	u32 epoch;
+	u32 mkfs_epoch;
+	struct ceph_timespec created, modified;
+
+	u32 flags;         /* CEPH_OSDMAP_* */
+
+	u32 max_osd;       /* size of osd_state, _offload, _addr arrays */
+	u8 *osd_state;     /* CEPH_OSD_* */
+	u32 *osd_weight;   /* 0 = failed, 0x10000 = 100% normal */
+	struct ceph_entity_addr *osd_addr;
+
+	struct rb_root pg_temp;
+	struct rb_root primary_temp;
+
+	u32 *osd_primary_affinity;
+
+	struct rb_root pg_pools;
+	u32 pool_max;
+
+	/* the CRUSH map specifies the mapping of placement groups to
+	 * the list of osds that store+replicate them. */
+	struct crush_map *crush;
+
+	struct mutex crush_scratch_mutex;
+	int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3];
+};
+
+static inline void ceph_oid_set_name(struct ceph_object_id *oid,
+				     const char *name)
+{
+	int len;
+
+	len = strlen(name);
+	if (len > sizeof(oid->name)) {
+		WARN(1, "ceph_oid_set_name '%s' len %d vs %zu, truncating\n",
+		     name, len, sizeof(oid->name));
+		len = sizeof(oid->name);
+	}
+
+	memcpy(oid->name, name, len);
+	oid->name_len = len;
+}
+
+static inline void ceph_oid_copy(struct ceph_object_id *dest,
+				 struct ceph_object_id *src)
+{
+	BUG_ON(src->name_len > sizeof(dest->name));
+	memcpy(dest->name, src->name, src->name_len);
+	dest->name_len = src->name_len;
+}
+
+static inline int ceph_osd_exists(struct ceph_osdmap *map, int osd)
+{
+	return osd >= 0 && osd < map->max_osd &&
+	       (map->osd_state[osd] & CEPH_OSD_EXISTS);
+}
+
+static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd)
+{
+	return ceph_osd_exists(map, osd) &&
+	       (map->osd_state[osd] & CEPH_OSD_UP);
+}
+
+static inline int ceph_osd_is_down(struct ceph_osdmap *map, int osd)
+{
+	return !ceph_osd_is_up(map, osd);
+}
+
+static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag)
+{
+	return map && (map->flags & flag);
+}
+
+extern char *ceph_osdmap_state_str(char *str, int len, int state);
+extern u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd);
+
+static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
+						     int osd)
+{
+	if (osd >= map->max_osd)
+		return NULL;
+	return &map->osd_addr[osd];
+}
+
+static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
+{
+	__u8 version;
+
+	if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) {
+		pr_warning("incomplete pg encoding");
+
+		return -EINVAL;
+	}
+	version = ceph_decode_8(p);
+	if (version > 1) {
+		pr_warning("do not understand pg encoding %d > 1",
+			(int)version);
+		return -EINVAL;
+	}
+
+	pgid->pool = ceph_decode_64(p);
+	pgid->seed = ceph_decode_32(p);
+	*p += 4;	/* skip deprecated preferred value */
+
+	return 0;
+}
+
+extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end);
+extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
+					    struct ceph_osdmap *map,
+					    struct ceph_messenger *msgr);
+extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
+
+/* calculate mapping of a file extent to an object */
+extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
+					 u64 off, u64 len,
+					 u64 *bno, u64 *oxoff, u64 *oxlen);
+
+/* calculate mapping of object to a placement group */
+extern int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap,
+			       struct ceph_object_locator *oloc,
+			       struct ceph_object_id *oid,
+			       struct ceph_pg *pg_out);
+
+extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap,
+			       struct ceph_pg pgid,
+			       int *osds, int *primary);
+extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
+				struct ceph_pg pgid);
+
+extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map,
+						    u64 id);
+
+extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
+extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
+
+#endif
diff --git a/linux/ceph/pagelist.h b/linux/ceph/pagelist.h
new file mode 100644
index 0000000..9660d6b
--- /dev/null
+++ b/linux/ceph/pagelist.h
@@ -0,0 +1,75 @@
+#ifndef __FS_CEPH_PAGELIST_H
+#define __FS_CEPH_PAGELIST_H
+
+#include <linux/list.h>
+
+struct ceph_pagelist {
+	struct list_head head;
+	void *mapped_tail;
+	size_t length;
+	size_t room;
+	struct list_head free_list;
+	size_t num_pages_free;
+};
+
+struct ceph_pagelist_cursor {
+	struct ceph_pagelist *pl;   /* pagelist, for error checking */
+	struct list_head *page_lru; /* page in list */
+	size_t room;		    /* room remaining to reset to */
+};
+
+static inline void ceph_pagelist_init(struct ceph_pagelist *pl)
+{
+	INIT_LIST_HEAD(&pl->head);
+	pl->mapped_tail = NULL;
+	pl->length = 0;
+	pl->room = 0;
+	INIT_LIST_HEAD(&pl->free_list);
+	pl->num_pages_free = 0;
+}
+
+extern int ceph_pagelist_release(struct ceph_pagelist *pl);
+
+extern int ceph_pagelist_append(struct ceph_pagelist *pl, const void *d, size_t l);
+
+extern int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space);
+
+extern int ceph_pagelist_free_reserve(struct ceph_pagelist *pl);
+
+extern void ceph_pagelist_set_cursor(struct ceph_pagelist *pl,
+				     struct ceph_pagelist_cursor *c);
+
+extern int ceph_pagelist_truncate(struct ceph_pagelist *pl,
+				  struct ceph_pagelist_cursor *c);
+
+static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v)
+{
+	__le64 ev = cpu_to_le64(v);
+	return ceph_pagelist_append(pl, &ev, sizeof(ev));
+}
+static inline int ceph_pagelist_encode_32(struct ceph_pagelist *pl, u32 v)
+{
+	__le32 ev = cpu_to_le32(v);
+	return ceph_pagelist_append(pl, &ev, sizeof(ev));
+}
+static inline int ceph_pagelist_encode_16(struct ceph_pagelist *pl, u16 v)
+{
+	__le16 ev = cpu_to_le16(v);
+	return ceph_pagelist_append(pl, &ev, sizeof(ev));
+}
+static inline int ceph_pagelist_encode_8(struct ceph_pagelist *pl, u8 v)
+{
+	return ceph_pagelist_append(pl, &v, 1);
+}
+static inline int ceph_pagelist_encode_string(struct ceph_pagelist *pl,
+					      char *s, size_t len)
+{
+	int ret = ceph_pagelist_encode_32(pl, len);
+	if (ret)
+		return ret;
+	if (len)
+		return ceph_pagelist_append(pl, s, len);
+	return 0;
+}
+
+#endif
diff --git a/linux/ceph/rados.h b/linux/ceph/rados.h
new file mode 100644
index 0000000..f20e0d8
--- /dev/null
+++ b/linux/ceph/rados.h
@@ -0,0 +1,436 @@
+#ifndef CEPH_RADOS_H
+#define CEPH_RADOS_H
+
+/*
+ * Data types for the Ceph distributed object storage layer RADOS
+ * (Reliable Autonomic Distributed Object Store).
+ */
+
+#include <linux/ceph/msgr.h>
+
+/*
+ * fs id
+ */
+struct ceph_fsid {
+	unsigned char fsid[16];
+};
+
+static inline int ceph_fsid_compare(const struct ceph_fsid *a,
+				    const struct ceph_fsid *b)
+{
+	return memcmp(a, b, sizeof(*a));
+}
+
+/*
+ * ino, object, etc.
+ */
+typedef __le64 ceph_snapid_t;
+#define CEPH_SNAPDIR ((__u64)(-1))  /* reserved for hidden .snap dir */
+#define CEPH_NOSNAP  ((__u64)(-2))  /* "head", "live" revision */
+#define CEPH_MAXSNAP ((__u64)(-3))  /* largest valid snapid */
+
+struct ceph_timespec {
+	__le32 tv_sec;
+	__le32 tv_nsec;
+} __attribute__ ((packed));
+
+
+/*
+ * object layout - how objects are mapped into PGs
+ */
+#define CEPH_OBJECT_LAYOUT_HASH     1
+#define CEPH_OBJECT_LAYOUT_LINEAR   2
+#define CEPH_OBJECT_LAYOUT_HASHINO  3
+
+/*
+ * pg layout -- how PGs are mapped onto (sets of) OSDs
+ */
+#define CEPH_PG_LAYOUT_CRUSH  0
+#define CEPH_PG_LAYOUT_HASH   1
+#define CEPH_PG_LAYOUT_LINEAR 2
+#define CEPH_PG_LAYOUT_HYBRID 3
+
+#define CEPH_PG_MAX_SIZE      16  /* max # osds in a single pg */
+
+/*
+ * placement group.
+ * we encode this into one __le64.
+ */
+struct ceph_pg_v1 {
+	__le16 preferred; /* preferred primary osd */
+	__le16 ps;        /* placement seed */
+	__le32 pool;      /* object pool */
+} __attribute__ ((packed));
+
+/*
+ * pg_pool is a set of pgs storing a pool of objects
+ *
+ *  pg_num -- base number of pseudorandomly placed pgs
+ *
+ *  pgp_num -- effective number when calculating pg placement.  this
+ * is used for pg_num increases.  new pgs result in data being "split"
+ * into new pgs.  for this to proceed smoothly, new pgs are intiially
+ * colocated with their parents; that is, pgp_num doesn't increase
+ * until the new pgs have successfully split.  only _then_ are the new
+ * pgs placed independently.
+ *
+ *  lpg_num -- localized pg count (per device).  replicas are randomly
+ * selected.
+ *
+ *  lpgp_num -- as above.
+ */
+#define CEPH_NOPOOL  ((__u64) (-1))  /* pool id not defined */
+
+#define CEPH_POOL_TYPE_REP     1
+#define CEPH_POOL_TYPE_RAID4   2 /* never implemented */
+#define CEPH_POOL_TYPE_EC      3
+
+/*
+ * stable_mod func is used to control number of placement groups.
+ * similar to straight-up modulo, but produces a stable mapping as b
+ * increases over time.  b is the number of bins, and bmask is the
+ * containing power of 2 minus 1.
+ *
+ * b <= bmask and bmask=(2**n)-1
+ * e.g., b=12 -> bmask=15, b=123 -> bmask=127
+ */
+static inline int ceph_stable_mod(int x, int b, int bmask)
+{
+	if ((x & bmask) < b)
+		return x & bmask;
+	else
+		return x & (bmask >> 1);
+}
+
+/*
+ * object layout - how a given object should be stored.
+ */
+struct ceph_object_layout {
+	struct ceph_pg_v1 ol_pgid;   /* raw pg, with _full_ ps precision. */
+	__le32 ol_stripe_unit;    /* for per-object parity, if any */
+} __attribute__ ((packed));
+
+/*
+ * compound epoch+version, used by storage layer to serialize mutations
+ */
+struct ceph_eversion {
+	__le32 epoch;
+	__le64 version;
+} __attribute__ ((packed));
+
+/*
+ * osd map bits
+ */
+
+/* status bits */
+#define CEPH_OSD_EXISTS  (1<<0)
+#define CEPH_OSD_UP      (1<<1)
+#define CEPH_OSD_AUTOOUT (1<<2)  /* osd was automatically marked out */
+#define CEPH_OSD_NEW     (1<<3)  /* osd is new, never marked in */
+
+extern const char *ceph_osd_state_name(int s);
+
+/* osd weights.  fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */
+#define CEPH_OSD_IN  0x10000
+#define CEPH_OSD_OUT 0
+
+/* osd primary-affinity.  fixed point value: 0x10000 == baseline */
+#define CEPH_OSD_MAX_PRIMARY_AFFINITY 0x10000
+#define CEPH_OSD_DEFAULT_PRIMARY_AFFINITY 0x10000
+
+
+/*
+ * osd map flag bits
+ */
+#define CEPH_OSDMAP_NEARFULL (1<<0)  /* sync writes (near ENOSPC) */
+#define CEPH_OSDMAP_FULL     (1<<1)  /* no data writes (ENOSPC) */
+#define CEPH_OSDMAP_PAUSERD  (1<<2)  /* pause all reads */
+#define CEPH_OSDMAP_PAUSEWR  (1<<3)  /* pause all writes */
+#define CEPH_OSDMAP_PAUSEREC (1<<4)  /* pause recovery */
+#define CEPH_OSDMAP_NOUP     (1<<5)  /* block osd boot */
+#define CEPH_OSDMAP_NODOWN   (1<<6)  /* block osd mark-down/failure */
+#define CEPH_OSDMAP_NOOUT    (1<<7)  /* block osd auto mark-out */
+#define CEPH_OSDMAP_NOIN     (1<<8)  /* block osd auto mark-in */
+#define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */
+#define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */
+
+/*
+ * The error code to return when an OSD can't handle a write
+ * because it is too large.
+ */
+#define OSD_WRITETOOBIG EMSGSIZE
+
+/*
+ * osd ops
+ *
+ * WARNING: do not use these op codes directly.  Use the helpers
+ * defined below instead.  In certain cases, op code behavior was
+ * redefined, resulting in special-cases in the helpers.
+ */
+#define CEPH_OSD_OP_MODE       0xf000
+#define CEPH_OSD_OP_MODE_RD    0x1000
+#define CEPH_OSD_OP_MODE_WR    0x2000
+#define CEPH_OSD_OP_MODE_RMW   0x3000
+#define CEPH_OSD_OP_MODE_SUB   0x4000
+
+#define CEPH_OSD_OP_TYPE       0x0f00
+#define CEPH_OSD_OP_TYPE_LOCK  0x0100
+#define CEPH_OSD_OP_TYPE_DATA  0x0200
+#define CEPH_OSD_OP_TYPE_ATTR  0x0300
+#define CEPH_OSD_OP_TYPE_EXEC  0x0400
+#define CEPH_OSD_OP_TYPE_PG    0x0500
+#define CEPH_OSD_OP_TYPE_MULTI 0x0600 /* multiobject */
+
+enum {
+	/** data **/
+	/* read */
+	CEPH_OSD_OP_READ      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 1,
+	CEPH_OSD_OP_STAT      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 2,
+	CEPH_OSD_OP_MAPEXT    = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 3,
+
+	/* fancy read */
+	CEPH_OSD_OP_MASKTRUNC   = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 4,
+	CEPH_OSD_OP_SPARSE_READ = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 5,
+
+	CEPH_OSD_OP_NOTIFY    = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 6,
+	CEPH_OSD_OP_NOTIFY_ACK = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 7,
+
+	/* versioning */
+	CEPH_OSD_OP_ASSERT_VER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 8,
+
+	/* write */
+	CEPH_OSD_OP_WRITE     = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 1,
+	CEPH_OSD_OP_WRITEFULL = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 2,
+	CEPH_OSD_OP_TRUNCATE  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 3,
+	CEPH_OSD_OP_ZERO      = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 4,
+	CEPH_OSD_OP_DELETE    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 5,
+
+	/* fancy write */
+	CEPH_OSD_OP_APPEND    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 6,
+	CEPH_OSD_OP_STARTSYNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 7,
+	CEPH_OSD_OP_SETTRUNC  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 8,
+	CEPH_OSD_OP_TRIMTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 9,
+
+	CEPH_OSD_OP_TMAPUP  = CEPH_OSD_OP_MODE_RMW | CEPH_OSD_OP_TYPE_DATA | 10,
+	CEPH_OSD_OP_TMAPPUT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 11,
+	CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12,
+
+	CEPH_OSD_OP_CREATE  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13,
+	CEPH_OSD_OP_ROLLBACK= CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 14,
+
+	CEPH_OSD_OP_WATCH   = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15,
+
+	/* omap */
+	CEPH_OSD_OP_OMAPGETKEYS   = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 17,
+	CEPH_OSD_OP_OMAPGETVALS   = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 18,
+	CEPH_OSD_OP_OMAPGETHEADER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 19,
+	CEPH_OSD_OP_OMAPGETVALSBYKEYS  =
+	  CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 20,
+	CEPH_OSD_OP_OMAPSETVALS   = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 21,
+	CEPH_OSD_OP_OMAPSETHEADER = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 22,
+	CEPH_OSD_OP_OMAPCLEAR     = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 23,
+	CEPH_OSD_OP_OMAPRMKEYS    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24,
+	CEPH_OSD_OP_OMAP_CMP      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25,
+
+	/* hints */
+	CEPH_OSD_OP_SETALLOCHINT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 35,
+
+	/** multi **/
+	CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1,
+	CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2,
+	CEPH_OSD_OP_SRC_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 3,
+
+	/** attrs **/
+	/* read */
+	CEPH_OSD_OP_GETXATTR  = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1,
+	CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2,
+	CEPH_OSD_OP_CMPXATTR  = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 3,
+
+	/* write */
+	CEPH_OSD_OP_SETXATTR  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1,
+	CEPH_OSD_OP_SETXATTRS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 2,
+	CEPH_OSD_OP_RESETXATTRS = CEPH_OSD_OP_MODE_WR|CEPH_OSD_OP_TYPE_ATTR | 3,
+	CEPH_OSD_OP_RMXATTR   = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 4,
+
+	/** subop **/
+	CEPH_OSD_OP_PULL            = CEPH_OSD_OP_MODE_SUB | 1,
+	CEPH_OSD_OP_PUSH            = CEPH_OSD_OP_MODE_SUB | 2,
+	CEPH_OSD_OP_BALANCEREADS    = CEPH_OSD_OP_MODE_SUB | 3,
+	CEPH_OSD_OP_UNBALANCEREADS  = CEPH_OSD_OP_MODE_SUB | 4,
+	CEPH_OSD_OP_SCRUB           = CEPH_OSD_OP_MODE_SUB | 5,
+	CEPH_OSD_OP_SCRUB_RESERVE   = CEPH_OSD_OP_MODE_SUB | 6,
+	CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7,
+	CEPH_OSD_OP_SCRUB_STOP      = CEPH_OSD_OP_MODE_SUB | 8,
+	CEPH_OSD_OP_SCRUB_MAP     = CEPH_OSD_OP_MODE_SUB | 9,
+
+	/** lock **/
+	CEPH_OSD_OP_WRLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1,
+	CEPH_OSD_OP_WRUNLOCK  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 2,
+	CEPH_OSD_OP_RDLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 3,
+	CEPH_OSD_OP_RDUNLOCK  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 4,
+	CEPH_OSD_OP_UPLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 5,
+	CEPH_OSD_OP_DNLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6,
+
+	/** exec **/
+	/* note: the RD bit here is wrong; see special-case below in helper */
+	CEPH_OSD_OP_CALL    = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1,
+
+	/** pg **/
+	CEPH_OSD_OP_PGLS      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1,
+	CEPH_OSD_OP_PGLS_FILTER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 2,
+};
+
+static inline int ceph_osd_op_type_lock(int op)
+{
+	return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_LOCK;
+}
+static inline int ceph_osd_op_type_data(int op)
+{
+	return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_DATA;
+}
+static inline int ceph_osd_op_type_attr(int op)
+{
+	return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_ATTR;
+}
+static inline int ceph_osd_op_type_exec(int op)
+{
+	return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_EXEC;
+}
+static inline int ceph_osd_op_type_pg(int op)
+{
+	return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG;
+}
+static inline int ceph_osd_op_type_multi(int op)
+{
+	return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_MULTI;
+}
+
+static inline int ceph_osd_op_mode_subop(int op)
+{
+	return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_SUB;
+}
+static inline int ceph_osd_op_mode_read(int op)
+{
+	return (op & CEPH_OSD_OP_MODE_RD) &&
+		op != CEPH_OSD_OP_CALL;
+}
+static inline int ceph_osd_op_mode_modify(int op)
+{
+	return op & CEPH_OSD_OP_MODE_WR;
+}
+
+/*
+ * note that the following tmap stuff is also defined in the ceph librados.h
+ * any modification here needs to be updated there
+ */
+#define CEPH_OSD_TMAP_HDR 'h'
+#define CEPH_OSD_TMAP_SET 's'
+#define CEPH_OSD_TMAP_CREATE 'c' /* create key */
+#define CEPH_OSD_TMAP_RM  'r'
+#define CEPH_OSD_TMAP_RMSLOPPY 'R'
+
+extern const char *ceph_osd_op_name(int op);
+
+/*
+ * osd op flags
+ *
+ * An op may be READ, WRITE, or READ|WRITE.
+ */
+enum {
+	CEPH_OSD_FLAG_ACK =            0x0001,  /* want (or is) "ack" ack */
+	CEPH_OSD_FLAG_ONNVRAM =        0x0002,  /* want (or is) "onnvram" ack */
+	CEPH_OSD_FLAG_ONDISK =         0x0004,  /* want (or is) "ondisk" ack */
+	CEPH_OSD_FLAG_RETRY =          0x0008,  /* resend attempt */
+	CEPH_OSD_FLAG_READ =           0x0010,  /* op may read */
+	CEPH_OSD_FLAG_WRITE =          0x0020,  /* op may write */
+	CEPH_OSD_FLAG_ORDERSNAP =      0x0040,  /* EOLDSNAP if snapc is out of order */
+	CEPH_OSD_FLAG_PEERSTAT_OLD =   0x0080,  /* DEPRECATED msg includes osd_peer_stat */
+	CEPH_OSD_FLAG_BALANCE_READS =  0x0100,
+	CEPH_OSD_FLAG_PARALLELEXEC =   0x0200,  /* execute op in parallel */
+	CEPH_OSD_FLAG_PGOP =           0x0400,  /* pg op, no object */
+	CEPH_OSD_FLAG_EXEC =           0x0800,  /* op may exec */
+	CEPH_OSD_FLAG_EXEC_PUBLIC =    0x1000,  /* DEPRECATED op may exec (public) */
+	CEPH_OSD_FLAG_LOCALIZE_READS = 0x2000,  /* read from nearby replica, if any */
+	CEPH_OSD_FLAG_RWORDERED =      0x4000,  /* order wrt concurrent reads */
+	CEPH_OSD_FLAG_IGNORE_CACHE =   0x8000,  /* ignore cache logic */
+	CEPH_OSD_FLAG_SKIPRWLOCKS =   0x10000,  /* skip rw locks */
+	CEPH_OSD_FLAG_IGNORE_OVERLAY = 0x20000, /* ignore pool overlay */
+	CEPH_OSD_FLAG_FLUSH =         0x40000,  /* this is part of flush */
+};
+
+enum {
+	CEPH_OSD_OP_FLAG_EXCL = 1,      /* EXCL object create */
+	CEPH_OSD_OP_FLAG_FAILOK = 2,    /* continue despite failure */
+};
+
+#define EOLDSNAPC    ERESTART  /* ORDERSNAP flag set; writer has old snapc*/
+#define EBLACKLISTED ESHUTDOWN /* blacklisted */
+
+/* xattr comparison */
+enum {
+	CEPH_OSD_CMPXATTR_OP_NOP = 0,
+	CEPH_OSD_CMPXATTR_OP_EQ  = 1,
+	CEPH_OSD_CMPXATTR_OP_NE  = 2,
+	CEPH_OSD_CMPXATTR_OP_GT  = 3,
+	CEPH_OSD_CMPXATTR_OP_GTE = 4,
+	CEPH_OSD_CMPXATTR_OP_LT  = 5,
+	CEPH_OSD_CMPXATTR_OP_LTE = 6
+};
+
+enum {
+	CEPH_OSD_CMPXATTR_MODE_STRING = 1,
+	CEPH_OSD_CMPXATTR_MODE_U64    = 2
+};
+
+#define RADOS_NOTIFY_VER	1
+
+/*
+ * an individual object operation.  each may be accompanied by some data
+ * payload
+ */
+struct ceph_osd_op {
+	__le16 op;           /* CEPH_OSD_OP_* */
+	__le32 flags;        /* CEPH_OSD_OP_FLAG_* */
+	union {
+		struct {
+			__le64 offset, length;
+			__le64 truncate_size;
+			__le32 truncate_seq;
+		} __attribute__ ((packed)) extent;
+		struct {
+			__le32 name_len;
+			__le32 value_len;
+			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
+			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
+		} __attribute__ ((packed)) xattr;
+		struct {
+			__u8 class_len;
+			__u8 method_len;
+			__u8 argc;
+			__le32 indata_len;
+		} __attribute__ ((packed)) cls;
+		struct {
+			__le64 cookie, count;
+		} __attribute__ ((packed)) pgls;
+	        struct {
+		        __le64 snapid;
+	        } __attribute__ ((packed)) snap;
+		struct {
+			__le64 cookie;
+			__le64 ver;
+			__u8 flag;	/* 0 = unwatch, 1 = watch */
+		} __attribute__ ((packed)) watch;
+		struct {
+			__le64 offset, length;
+			__le64 src_offset;
+		} __attribute__ ((packed)) clonerange;
+		struct {
+			__le64 expected_object_size;
+			__le64 expected_write_size;
+		} __attribute__ ((packed)) alloc_hint;
+	};
+	__le32 payload_len;
+} __attribute__ ((packed));
+
+
+#endif
diff --git a/linux/ceph/types.h b/linux/ceph/types.h
new file mode 100644
index 0000000..d3ff1cf
--- /dev/null
+++ b/linux/ceph/types.h
@@ -0,0 +1,29 @@
+#ifndef _FS_CEPH_TYPES_H
+#define _FS_CEPH_TYPES_H
+
+/* needed before including ceph_fs.h */
+#include <linux/in.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/string.h>
+
+#include <linux/ceph/ceph_fs.h>
+#include <linux/ceph/ceph_frag.h>
+#include <linux/ceph/ceph_hash.h>
+
+/*
+ * Identify inodes by both their ino AND snapshot id (a u64).
+ */
+struct ceph_vino {
+	u64 ino;
+	u64 snap;
+};
+
+
+/* context for the caps reservation mechanism */
+struct ceph_cap_reservation {
+	int count;
+};
+
+
+#endif
diff --git a/linux/crush/crush.h b/linux/crush/crush.h
new file mode 100644
index 0000000..4fad5f8
--- /dev/null
+++ b/linux/crush/crush.h
@@ -0,0 +1,201 @@
+#ifndef CEPH_CRUSH_CRUSH_H
+#define CEPH_CRUSH_CRUSH_H
+
+#include <linux/types.h>
+
+/*
+ * CRUSH is a pseudo-random data distribution algorithm that
+ * efficiently distributes input values (typically, data objects)
+ * across a heterogeneous, structured storage cluster.
+ *
+ * The algorithm was originally described in detail in this paper
+ * (although the algorithm has evolved somewhat since then):
+ *
+ *     http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf
+ *
+ * LGPL2
+ */
+
+
+#define CRUSH_MAGIC 0x00010000ul   /* for detecting algorithm revisions */
+
+#define CRUSH_MAX_DEPTH 10  /* max crush hierarchy depth */
+
+
+#define CRUSH_ITEM_UNDEF  0x7ffffffe  /* undefined result (internal use only) */
+#define CRUSH_ITEM_NONE   0x7fffffff  /* no result */
+
+/*
+ * CRUSH uses user-defined "rules" to describe how inputs should be
+ * mapped to devices.  A rule consists of sequence of steps to perform
+ * to generate the set of output devices.
+ */
+struct crush_rule_step {
+	__u32 op;
+	__s32 arg1;
+	__s32 arg2;
+};
+
+/* step op codes */
+enum {
+	CRUSH_RULE_NOOP = 0,
+	CRUSH_RULE_TAKE = 1,          /* arg1 = value to start with */
+	CRUSH_RULE_CHOOSE_FIRSTN = 2, /* arg1 = num items to pick */
+				      /* arg2 = type */
+	CRUSH_RULE_CHOOSE_INDEP = 3,  /* same */
+	CRUSH_RULE_EMIT = 4,          /* no args */
+	CRUSH_RULE_CHOOSELEAF_FIRSTN = 6,
+	CRUSH_RULE_CHOOSELEAF_INDEP = 7,
+
+	CRUSH_RULE_SET_CHOOSE_TRIES = 8, /* override choose_total_tries */
+	CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */
+	CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10,
+	CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11,
+	CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12
+};
+
+/*
+ * for specifying choose num (arg1) relative to the max parameter
+ * passed to do_rule
+ */
+#define CRUSH_CHOOSE_N            0
+#define CRUSH_CHOOSE_N_MINUS(x)   (-(x))
+
+/*
+ * The rule mask is used to describe what the rule is intended for.
+ * Given a ruleset and size of output set, we search through the
+ * rule list for a matching rule_mask.
+ */
+struct crush_rule_mask {
+	__u8 ruleset;
+	__u8 type;
+	__u8 min_size;
+	__u8 max_size;
+};
+
+struct crush_rule {
+	__u32 len;
+	struct crush_rule_mask mask;
+	struct crush_rule_step steps[0];
+};
+
+#define crush_rule_size(len) (sizeof(struct crush_rule) + \
+			      (len)*sizeof(struct crush_rule_step))
+
+
+
+/*
+ * A bucket is a named container of other items (either devices or
+ * other buckets).  Items within a bucket are chosen using one of a
+ * few different algorithms.  The table summarizes how the speed of
+ * each option measures up against mapping stability when items are
+ * added or removed.
+ *
+ *  Bucket Alg     Speed       Additions    Removals
+ *  ------------------------------------------------
+ *  uniform         O(1)       poor         poor
+ *  list            O(n)       optimal      poor
+ *  tree            O(log n)   good         good
+ *  straw           O(n)       optimal      optimal
+ */
+enum {
+	CRUSH_BUCKET_UNIFORM = 1,
+	CRUSH_BUCKET_LIST = 2,
+	CRUSH_BUCKET_TREE = 3,
+	CRUSH_BUCKET_STRAW = 4
+};
+extern const char *crush_bucket_alg_name(int alg);
+
+struct crush_bucket {
+	__s32 id;        /* this'll be negative */
+	__u16 type;      /* non-zero; type=0 is reserved for devices */
+	__u8 alg;        /* one of CRUSH_BUCKET_* */
+	__u8 hash;       /* which hash function to use, CRUSH_HASH_* */
+	__u32 weight;    /* 16-bit fixed point */
+	__u32 size;      /* num items */
+	__s32 *items;
+
+	/*
+	 * cached random permutation: used for uniform bucket and for
+	 * the linear search fallback for the other bucket types.
+	 */
+	__u32 perm_x;  /* @x for which *perm is defined */
+	__u32 perm_n;  /* num elements of *perm that are permuted/defined */
+	__u32 *perm;
+};
+
+struct crush_bucket_uniform {
+	struct crush_bucket h;
+	__u32 item_weight;  /* 16-bit fixed point; all items equally weighted */
+};
+
+struct crush_bucket_list {
+	struct crush_bucket h;
+	__u32 *item_weights;  /* 16-bit fixed point */
+	__u32 *sum_weights;   /* 16-bit fixed point.  element i is sum
+				 of weights 0..i, inclusive */
+};
+
+struct crush_bucket_tree {
+	struct crush_bucket h;  /* note: h.size is _tree_ size, not number of
+				   actual items */
+	__u8 num_nodes;
+	__u32 *node_weights;
+};
+
+struct crush_bucket_straw {
+	struct crush_bucket h;
+	__u32 *item_weights;   /* 16-bit fixed point */
+	__u32 *straws;         /* 16-bit fixed point */
+};
+
+
+
+/*
+ * CRUSH map includes all buckets, rules, etc.
+ */
+struct crush_map {
+	struct crush_bucket **buckets;
+	struct crush_rule **rules;
+
+	__s32 max_buckets;
+	__u32 max_rules;
+	__s32 max_devices;
+
+	/* choose local retries before re-descent */
+	__u32 choose_local_tries;
+	/* choose local attempts using a fallback permutation before
+	 * re-descent */
+	__u32 choose_local_fallback_tries;
+	/* choose attempts before giving up */ 
+	__u32 choose_total_tries;
+	/* attempt chooseleaf inner descent once for firstn mode; on
+	 * reject retry outer descent.  Note that this does *not*
+	 * apply to a collision: in that case we will retry as we used
+	 * to. */
+	__u32 chooseleaf_descend_once;
+
+	/* if non-zero, feed r into chooseleaf, bit-shifted right by (r-1)
+	 * bits.  a value of 1 is best for new clusters.  for legacy clusters
+	 * that want to limit reshuffling, a value of 3 or 4 will make the
+	 * mappings line up a bit better with previous mappings. */
+	__u8 chooseleaf_vary_r;
+};
+
+
+/* crush.c */
+extern int crush_get_bucket_item_weight(const struct crush_bucket *b, int pos);
+extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
+extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
+extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
+extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
+extern void crush_destroy_bucket(struct crush_bucket *b);
+extern void crush_destroy_rule(struct crush_rule *r);
+extern void crush_destroy(struct crush_map *map);
+
+static inline int crush_calc_tree_node(int i)
+{
+	return ((i+1) << 1)-1;
+}
+
+#endif
diff --git a/linux/crush/hash.h b/linux/crush/hash.h
new file mode 100644
index 0000000..91e8842
--- /dev/null
+++ b/linux/crush/hash.h
@@ -0,0 +1,17 @@
+#ifndef CEPH_CRUSH_HASH_H
+#define CEPH_CRUSH_HASH_H
+
+#define CRUSH_HASH_RJENKINS1   0
+
+#define CRUSH_HASH_DEFAULT CRUSH_HASH_RJENKINS1
+
+extern const char *crush_hash_name(int type);
+
+extern __u32 crush_hash32(int type, __u32 a);
+extern __u32 crush_hash32_2(int type, __u32 a, __u32 b);
+extern __u32 crush_hash32_3(int type, __u32 a, __u32 b, __u32 c);
+extern __u32 crush_hash32_4(int type, __u32 a, __u32 b, __u32 c, __u32 d);
+extern __u32 crush_hash32_5(int type, __u32 a, __u32 b, __u32 c, __u32 d,
+			    __u32 e);
+
+#endif
diff --git a/linux/crush/mapper.h b/linux/crush/mapper.h
new file mode 100644
index 0000000..eab3674
--- /dev/null
+++ b/linux/crush/mapper.h
@@ -0,0 +1,20 @@
+#ifndef CEPH_CRUSH_MAPPER_H
+#define CEPH_CRUSH_MAPPER_H
+
+/*
+ * CRUSH functions for find rules and then mapping an input to an
+ * output set.
+ *
+ * LGPL2
+ */
+
+#include <linux/crush/crush.h>
+
+extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size);
+extern int crush_do_rule(const struct crush_map *map,
+			 int ruleno,
+			 int x, int *result, int result_max,
+			 const __u32 *weights, int weight_max,
+			 int *scratch);
+
+#endif
diff --git a/rbd/Kconfig b/rbd/Kconfig
new file mode 100644
index 0000000..014a1cf
--- /dev/null
+++ b/rbd/Kconfig
@@ -0,0 +1,560 @@
+#
+# Block device driver configuration
+#
+
+menuconfig BLK_DEV
+	bool "Block devices"
+	depends on BLOCK
+	default y
+	---help---
+	  Say Y here to get to see options for various different block device
+	  drivers. This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and disabled;
+	  only do this if you know what you are doing.
+
+if BLK_DEV
+
+config BLK_DEV_NULL_BLK
+	tristate "Null test block driver"
+
+config BLK_DEV_FD
+	tristate "Normal floppy disk support"
+	depends on ARCH_MAY_HAVE_PC_FDC
+	---help---
+	  If you want to use the floppy disk drive(s) of your PC under Linux,
+	  say Y. Information about this driver, especially important for IBM
+	  Thinkpad users, is contained in
+	  <file:Documentation/blockdev/floppy.txt>.
+	  That file also contains the location of the Floppy driver FAQ as
+	  well as location of the fdutils package used to configure additional
+	  parameters of the driver at run time.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called floppy.
+
+config AMIGA_FLOPPY
+	tristate "Amiga floppy support"
+	depends on AMIGA
+
+config ATARI_FLOPPY
+	tristate "Atari floppy support"
+	depends on ATARI
+
+config MAC_FLOPPY
+	tristate "Support for PowerMac floppy"
+	depends on PPC_PMAC && !PPC_PMAC64
+	help
+	  If you have a SWIM-3 (Super Woz Integrated Machine 3; from Apple)
+	  floppy controller, say Y here. Most commonly found in PowerMacs.
+
+config BLK_DEV_SWIM
+	tristate "Support for SWIM Macintosh floppy"
+	depends on M68K && MAC
+	help
+	  You should select this option if you want floppy support
+	  and you don't have a II, IIfx, Q900, Q950 or AV series.
+
+config AMIGA_Z2RAM
+	tristate "Amiga Zorro II ramdisk support"
+	depends on ZORRO
+	help
+	  This enables support for using Chip RAM and Zorro II RAM as a
+	  ramdisk or as a swap partition. Say Y if you want to include this
+	  driver in the kernel.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called z2ram.
+
+config GDROM
+	tristate "SEGA Dreamcast GD-ROM drive"
+	depends on SH_DREAMCAST
+	help
+	  A standard SEGA Dreamcast comes with a modified CD ROM drive called a
+	  "GD-ROM" by SEGA to signify it is capable of reading special disks
+	  with up to 1 GB of data. This drive will also read standard CD ROM
+	  disks. Select this option to access any disks in your GD ROM drive.
+	  Most users will want to say "Y" here.
+	  You can also build this as a module which will be called gdrom.
+
+config PARIDE
+	tristate "Parallel port IDE device support"
+	depends on PARPORT_PC
+	---help---
+	  There are many external CD-ROM and disk devices that connect through
+	  your computer's parallel port. Most of them are actually IDE devices
+	  using a parallel port IDE adapter. This option enables the PARIDE
+	  subsystem which contains drivers for many of these external drives.
+	  Read <file:Documentation/blockdev/paride.txt> for more information.
+
+	  If you have said Y to the "Parallel-port support" configuration
+	  option, you may share a single port between your printer and other
+	  parallel port devices. Answer Y to build PARIDE support into your
+	  kernel, or M if you would like to build it as a loadable module. If
+	  your parallel port support is in a loadable module, you must build
+	  PARIDE as a module. If you built PARIDE support into your kernel,
+	  you may still build the individual protocol modules and high-level
+	  drivers as loadable modules. If you build this support as a module,
+	  it will be called paride.
+
+	  To use the PARIDE support, you must say Y or M here and also to at
+	  least one high-level driver (e.g. "Parallel port IDE disks",
+	  "Parallel port ATAPI CD-ROMs", "Parallel port ATAPI disks" etc.) and
+	  to at least one protocol driver (e.g. "ATEN EH-100 protocol",
+	  "MicroSolutions backpack protocol", "DataStor Commuter protocol"
+	  etc.).
+
+source "drivers/block/paride/Kconfig"
+
+source "drivers/block/mtip32xx/Kconfig"
+
+source "drivers/block/zram/Kconfig"
+
+config BLK_CPQ_DA
+	tristate "Compaq SMART2 support"
+	depends on PCI && VIRT_TO_BUS && 0
+	help
+	  This is the driver for Compaq Smart Array controllers.  Everyone
+	  using these boards should say Y here.  See the file
+	  <file:Documentation/blockdev/cpqarray.txt> for the current list of
+	  boards supported by this driver, and for further information on the
+	  use of this driver.
+
+config BLK_CPQ_CISS_DA
+	tristate "Compaq Smart Array 5xxx support"
+	depends on PCI
+	select CHECK_SIGNATURE
+	help
+	  This is the driver for Compaq Smart Array 5xxx controllers.
+	  Everyone using these boards should say Y here.
+	  See <file:Documentation/blockdev/cciss.txt> for the current list of
+	  boards supported by this driver, and for further information
+	  on the use of this driver.
+
+config CISS_SCSI_TAPE
+	bool "SCSI tape drive support for Smart Array 5xxx"
+	depends on BLK_CPQ_CISS_DA && PROC_FS
+	depends on SCSI=y || SCSI=BLK_CPQ_CISS_DA
+	help
+	  When enabled (Y), this option allows SCSI tape drives and SCSI medium
+	  changers (tape robots) to be accessed via a Compaq 5xxx array 
+	  controller.  (See <file:Documentation/blockdev/cciss.txt> for more details.)
+
+	  "SCSI support" and "SCSI tape support" must also be enabled for this 
+	  option to work.
+
+	  When this option is disabled (N), the SCSI portion of the driver 
+	  is not compiled.
+
+config BLK_DEV_DAC960
+	tristate "Mylex DAC960/DAC1100 PCI RAID Controller support"
+	depends on PCI
+	help
+	  This driver adds support for the Mylex DAC960, AcceleRAID, and
+	  eXtremeRAID PCI RAID controllers.  See the file
+	  <file:Documentation/blockdev/README.DAC960> for further information
+	  about this driver.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called DAC960.
+
+config BLK_DEV_UMEM
+	tristate "Micro Memory MM5415 Battery Backed RAM support"
+	depends on PCI
+	---help---
+	  Saying Y here will include support for the MM5415 family of
+	  battery backed (Non-volatile) RAM cards.
+	  <http://www.umem.com/>
+
+	  The cards appear as block devices that can be partitioned into
+	  as many as 15 partitions.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called umem.
+
+	  The umem driver has not yet been allocated a MAJOR number, so
+	  one is chosen dynamically.
+
+config BLK_DEV_UBD
+	bool "Virtual block device"
+	depends on UML
+	---help---
+          The User-Mode Linux port includes a driver called UBD which will let
+          you access arbitrary files on the host computer as block devices.
+          Unless you know that you do not need such virtual block devices say
+          Y here.
+
+config BLK_DEV_UBD_SYNC
+	bool "Always do synchronous disk IO for UBD"
+	depends on BLK_DEV_UBD
+	---help---
+	  Writes to the virtual block device are not immediately written to the
+	  host's disk; this may cause problems if, for example, the User-Mode
+	  Linux 'Virtual Machine' uses a journalling filesystem and the host
+	  computer crashes.
+
+          Synchronous operation (i.e. always writing data to the host's disk
+          immediately) is configurable on a per-UBD basis by using a special
+          kernel command line option.  Alternatively, you can say Y here to
+          turn on synchronous operation by default for all block devices.
+
+          If you're running a journalling file system (like reiserfs, for
+          example) in your virtual machine, you will want to say Y here.  If
+          you care for the safety of the data in your virtual machine, Y is a
+          wise choice too.  In all other cases (for example, if you're just
+          playing around with User-Mode Linux) you can choose N.
+
+config BLK_DEV_COW_COMMON
+	bool
+	default BLK_DEV_UBD
+
+config BLK_DEV_LOOP
+	tristate "Loopback device support"
+	---help---
+	  Saying Y here will allow you to use a regular file as a block
+	  device; you can then create a file system on that block device and
+	  mount it just as you would mount other block devices such as hard
+	  drive partitions, CD-ROM drives or floppy drives. The loop devices
+	  are block special device files with major number 7 and typically
+	  called /dev/loop0, /dev/loop1 etc.
+
+	  This is useful if you want to check an ISO 9660 file system before
+	  burning the CD, or if you want to use floppy images without first
+	  writing them to floppy. Furthermore, some Linux distributions avoid
+	  the need for a dedicated Linux partition by keeping their complete
+	  root file system inside a DOS FAT file using this loop device
+	  driver.
+
+	  To use the loop device, you need the losetup utility, found in the
+	  util-linux package, see
+	  <ftp://ftp.kernel.org/pub/linux/utils/util-linux/>.
+
+	  The loop device driver can also be used to "hide" a file system in
+	  a disk partition, floppy, or regular file, either using encryption
+	  (scrambling the data) or steganography (hiding the data in the low
+	  bits of, say, a sound file). This is also safe if the file resides
+	  on a remote file server.
+
+	  There are several ways of encrypting disks. Some of these require
+	  kernel patches. The vanilla kernel offers the cryptoloop option
+	  and a Device Mapper target (which is superior, as it supports all
+	  file systems). If you want to use the cryptoloop, say Y to both
+	  LOOP and CRYPTOLOOP, and make sure you have a recent (version 2.12
+	  or later) version of util-linux. Additionally, be aware that
+	  the cryptoloop is not safe for storing journaled filesystems.
+
+	  Note that this loop device has nothing to do with the loopback
+	  device used for network connections from the machine to itself.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called loop.
+
+	  Most users will answer N here.
+
+config BLK_DEV_LOOP_MIN_COUNT
+	int "Number of loop devices to pre-create at init time"
+	depends on BLK_DEV_LOOP
+	default 8
+	help
+	  Static number of loop devices to be unconditionally pre-created
+	  at init time.
+
+	  This default value can be overwritten on the kernel command
+	  line or with module-parameter loop.max_loop.
+
+	  The historic default is 8. If a late 2011 version of losetup(8)
+	  is used, it can be set to 0, since needed loop devices can be
+	  dynamically allocated with the /dev/loop-control interface.
+
+config BLK_DEV_CRYPTOLOOP
+	tristate "Cryptoloop Support"
+	select CRYPTO
+	select CRYPTO_CBC
+	depends on BLK_DEV_LOOP
+	---help---
+	  Say Y here if you want to be able to use the ciphers that are 
+	  provided by the CryptoAPI as loop transformation. This might be
+	  used as hard disk encryption.
+
+	  WARNING: This device is not safe for journaled file systems like
+	  ext3 or Reiserfs. Please use the Device Mapper crypto module
+	  instead, which can be configured to be on-disk compatible with the
+	  cryptoloop device.
+
+source "drivers/block/drbd/Kconfig"
+
+config BLK_DEV_NBD
+	tristate "Network block device support"
+	depends on NET
+	---help---
+	  Saying Y here will allow your computer to be a client for network
+	  block devices, i.e. it will be able to use block devices exported by
+	  servers (mount file systems on them etc.). Communication between
+	  client and server works over TCP/IP networking, but to the client
+	  program this is hidden: it looks like a regular local file access to
+	  a block device special file such as /dev/nd0.
+
+	  Network block devices also allows you to run a block-device in
+	  userland (making server and client physically the same computer,
+	  communicating using the loopback network device).
+
+	  Read <file:Documentation/blockdev/nbd.txt> for more information,
+	  especially about where to find the server code, which runs in user
+	  space and does not need special kernel support.
+
+	  Note that this has nothing to do with the network file systems NFS
+	  or Coda; you can say N here even if you intend to use NFS or Coda.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called nbd.
+
+	  If unsure, say N.
+
+config BLK_DEV_NVME
+	tristate "NVM Express block device"
+	depends on PCI
+	---help---
+	  The NVM Express driver is for solid state drives directly
+	  connected to the PCI or PCI Express bus.  If you know you
+	  don't have one of these, it is safe to answer N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called nvme.
+
+config BLK_DEV_SKD
+	tristate "STEC S1120 Block Driver"
+	depends on PCI
+	depends on 64BIT
+	---help---
+	Saying Y or M here will enable support for the
+	STEC, Inc. S1120 PCIe SSD.
+
+	Use device /dev/skd$N amd /dev/skd$Np$M.
+
+config BLK_DEV_OSD
+	tristate "OSD object-as-blkdev support"
+	depends on SCSI_OSD_ULD
+	---help---
+	  Saying Y or M here will allow the exporting of a single SCSI
+	  OSD (object-based storage) object as a Linux block device.
+
+	  For example, if you create a 2G object on an OSD device,
+	  you can then use this module to present that 2G object as
+	  a Linux block device.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called osdblk.
+
+	  If unsure, say N.
+
+config BLK_DEV_SX8
+	tristate "Promise SATA SX8 support"
+	depends on PCI
+	---help---
+	  Saying Y or M here will enable support for the 
+	  Promise SATA SX8 controllers.
+
+	  Use devices /dev/sx8/$N and /dev/sx8/$Np$M.
+
+config BLK_DEV_RAM
+	tristate "RAM block device support"
+	---help---
+	  Saying Y here will allow you to use a portion of your RAM memory as
+	  a block device, so that you can make file systems on it, read and
+	  write to it and do all the other things that you can do with normal
+	  block devices (such as hard drives). It is usually used to load and
+	  store a copy of a minimal root file system off of a floppy into RAM
+	  during the initial install of Linux.
+
+	  Note that the kernel command line option "ramdisk=XX" is now obsolete.
+	  For details, read <file:Documentation/blockdev/ramdisk.txt>.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called brd. An alias "rd" has been defined
+	  for historical reasons.
+
+	  Most normal users won't need the RAM disk functionality, and can
+	  thus say N here.
+
+config BLK_DEV_RAM_COUNT
+	int "Default number of RAM disks"
+	default "16"
+	depends on BLK_DEV_RAM
+	help
+	  The default value is 16 RAM disks. Change this if you know what you
+	  are doing. If you boot from a filesystem that needs to be extracted
+	  in memory, you will need at least one RAM disk (e.g. root on cramfs).
+
+config BLK_DEV_RAM_SIZE
+	int "Default RAM disk size (kbytes)"
+	depends on BLK_DEV_RAM
+	default "4096"
+	help
+	  The default value is 4096 kilobytes. Only change this if you know
+	  what you are doing.
+
+config BLK_DEV_XIP
+	bool "Support XIP filesystems on RAM block device"
+	depends on BLK_DEV_RAM
+	default n
+	help
+	  Support XIP filesystems (such as ext2 with XIP support on) on
+	  top of block ram device. This will slightly enlarge the kernel, and
+	  will prevent RAM block device backing store memory from being
+	  allocated from highmem (only a problem for highmem systems).
+
+config CDROM_PKTCDVD
+	tristate "Packet writing on CD/DVD media"
+	depends on !UML
+	help
+	  If you have a CDROM/DVD drive that supports packet writing, say
+	  Y to include support. It should work with any MMC/Mt Fuji
+	  compliant ATAPI or SCSI drive, which is just about any newer
+	  DVD/CD writer.
+
+	  Currently only writing to CD-RW, DVD-RW, DVD+RW and DVDRAM discs
+	  is possible.
+	  DVD-RW disks must be in restricted overwrite mode.
+
+	  See the file <file:Documentation/cdrom/packet-writing.txt>
+	  for further information on the use of this driver.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called pktcdvd.
+
+config CDROM_PKTCDVD_BUFFERS
+	int "Free buffers for data gathering"
+	depends on CDROM_PKTCDVD
+	default "8"
+	help
+	  This controls the maximum number of active concurrent packets. More
+	  concurrent packets can increase write performance, but also require
+	  more memory. Each concurrent packet will require approximately 64Kb
+	  of non-swappable kernel memory, memory which will be allocated when
+	  a disc is opened for writing.
+
+config CDROM_PKTCDVD_WCACHE
+	bool "Enable write caching"
+	depends on CDROM_PKTCDVD
+	help
+	  If enabled, write caching will be set for the CD-R/W device. For now
+	  this option is dangerous unless the CD-RW media is known good, as we
+	  don't do deferred write error handling yet.
+
+config ATA_OVER_ETH
+	tristate "ATA over Ethernet support"
+	depends on NET
+	help
+	This driver provides Support for ATA over Ethernet block
+	devices like the Coraid EtherDrive (R) Storage Blade.
+
+config MG_DISK
+	tristate "mGine mflash, gflash support"
+	depends on ARM && GPIOLIB
+	help
+	  mGine mFlash(gFlash) block device driver
+
+config MG_DISK_RES
+	int "Size of reserved area before MBR"
+	depends on MG_DISK
+	default 0
+	help
+	  Define size of reserved area that usually used for boot. Unit is KB.
+	  All of the block device operation will be taken this value as start
+	  offset
+	  Examples:
+			1024 => 1 MB
+
+config SUNVDC
+	tristate "Sun Virtual Disk Client support"
+	depends on SUN_LDOMS
+	help
+	  Support for virtual disk devices as a client under Sun
+	  Logical Domains.
+
+source "drivers/s390/block/Kconfig"
+
+config XILINX_SYSACE
+	tristate "Xilinx SystemACE support"
+	depends on 4xx || MICROBLAZE
+	help
+	  Include support for the Xilinx SystemACE CompactFlash interface
+
+config XEN_BLKDEV_FRONTEND
+	tristate "Xen virtual block device support"
+	depends on XEN
+	default y
+	select XEN_XENBUS_FRONTEND
+	help
+	  This driver implements the front-end of the Xen virtual
+	  block device driver.  It communicates with a back-end driver
+	  in another domain which drives the actual block device.
+
+config XEN_BLKDEV_BACKEND
+	tristate "Xen block-device backend driver"
+	depends on XEN_BACKEND
+	help
+	  The block-device backend driver allows the kernel to export its
+	  block devices to other guests via a high-performance shared-memory
+	  interface.
+
+	  The corresponding Linux frontend driver is enabled by the
+	  CONFIG_XEN_BLKDEV_FRONTEND configuration option.
+
+	  The backend driver attaches itself to a any block device specified
+	  in the XenBus configuration. There are no limits to what the block
+	  device as long as it has a major and minor.
+
+	  If you are compiling a kernel to run in a Xen block backend driver
+	  domain (often this is domain 0) you should say Y here. To
+	  compile this driver as a module, chose M here: the module
+	  will be called xen-blkback.
+
+
+config VIRTIO_BLK
+	tristate "Virtio block driver"
+	depends on VIRTIO
+	---help---
+	  This is the virtual block driver for virtio.  It can be used with
+          lguest or QEMU based VMMs (like KVM or Xen).  Say Y or M.
+
+config BLK_DEV_HD
+	bool "Very old hard disk (MFM/RLL/IDE) driver"
+	depends on HAVE_IDE
+	depends on !ARM || ARCH_RPC || BROKEN
+	help
+	  This is a very old hard disk driver that lacks the enhanced
+	  functionality of the newer ones.
+
+	  It is required for systems with ancient MFM/RLL/ESDI drives.
+
+	  If unsure, say N.
+
+config BLK_DEV_RBD
+	tristate "Rados block device (RBD)"
+	depends on INET && BLOCK
+	select CEPH_LIB
+	select LIBCRC32C
+	select CRYPTO_AES
+	select CRYPTO
+	default n
+	help
+	  Say Y here if you want include the Rados block device, which stripes
+	  a block device over objects stored in the Ceph distributed object
+	  store.
+
+	  More information at http://ceph.newdream.net/.
+
+	  If unsure, say N.
+
+config BLK_DEV_RSXX
+	tristate "IBM Flash Adapter 900GB Full Height PCIe Device Driver"
+	depends on PCI
+	help
+	  Device driver for IBM's high speed PCIe SSD
+	  storage device: Flash Adapter 900GB Full Height.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called rsxx.
+
+endif # BLK_DEV
diff --git a/rbd/Makefile b/rbd/Makefile
new file mode 100644
index 0000000..02b688d
--- /dev/null
+++ b/rbd/Makefile
@@ -0,0 +1,49 @@
+#
+# Makefile for the kernel block device drivers.
+#
+# 12 June 2000, Christoph Hellwig <hch at infradead.org>
+# Rewritten to use lists instead of if-statements.
+# 
+
+obj-$(CONFIG_MAC_FLOPPY)	+= swim3.o
+obj-$(CONFIG_BLK_DEV_SWIM)	+= swim_mod.o
+obj-$(CONFIG_BLK_DEV_FD)	+= floppy.o
+obj-$(CONFIG_AMIGA_FLOPPY)	+= amiflop.o
+obj-$(CONFIG_PS3_DISK)		+= ps3disk.o
+obj-$(CONFIG_PS3_VRAM)		+= ps3vram.o
+obj-$(CONFIG_ATARI_FLOPPY)	+= ataflop.o
+obj-$(CONFIG_AMIGA_Z2RAM)	+= z2ram.o
+obj-$(CONFIG_BLK_DEV_RAM)	+= brd.o
+obj-$(CONFIG_BLK_DEV_LOOP)	+= loop.o
+obj-$(CONFIG_BLK_CPQ_DA)	+= cpqarray.o
+obj-$(CONFIG_BLK_CPQ_CISS_DA)  += cciss.o
+obj-$(CONFIG_BLK_DEV_DAC960)	+= DAC960.o
+obj-$(CONFIG_XILINX_SYSACE)	+= xsysace.o
+obj-$(CONFIG_CDROM_PKTCDVD)	+= pktcdvd.o
+obj-$(CONFIG_MG_DISK)		+= mg_disk.o
+obj-$(CONFIG_SUNVDC)		+= sunvdc.o
+obj-$(CONFIG_BLK_DEV_NVME)	+= nvme.o
+obj-$(CONFIG_BLK_DEV_SKD)	+= skd.o
+obj-$(CONFIG_BLK_DEV_OSD)	+= osdblk.o
+
+obj-$(CONFIG_BLK_DEV_UMEM)	+= umem.o
+obj-$(CONFIG_BLK_DEV_NBD)	+= nbd.o
+obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o
+obj-$(CONFIG_VIRTIO_BLK)	+= virtio_blk.o
+
+obj-$(CONFIG_BLK_DEV_SX8)	+= sx8.o
+obj-$(CONFIG_BLK_DEV_HD)	+= hd.o
+
+obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= xen-blkfront.o
+obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= xen-blkback/
+obj-$(CONFIG_BLK_DEV_DRBD)     += drbd/
+obj-$(CONFIG_BLK_DEV_RBD)     += rbd.o
+obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX)	+= mtip32xx/
+
+obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
+obj-$(CONFIG_BLK_DEV_NULL_BLK)	+= null_blk.o
+obj-$(CONFIG_ZRAM) += zram/
+
+nvme-y		:= nvme-core.o nvme-scsi.o
+skd-y		:= skd_main.o
+swim_mod-y	:= swim.o swim_asm.o
diff --git a/rbd/rbd.c b/rbd/rbd.c
new file mode 100644
index 0000000..4c95b50
--- /dev/null
+++ b/rbd/rbd.c
@@ -0,0 +1,5406 @@
+
+/*
+   rbd.c -- Export ceph rados objects as a Linux block device
+
+
+   based on drivers/block/osdblk.c:
+
+   Copyright 2009 Red Hat, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+
+   For usage instructions, please refer to:
+
+                 Documentation/ABI/testing/sysfs-bus-rbd
+
+ */
+
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/osd_client.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/decode.h>
+#include <linux/parser.h>
+#include <linux/bsearch.h>
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+
+#include "rbd_types.h"
+
+#define RBD_DEBUG	/* Activate rbd_assert() calls */
+
+/*
+ * The basic unit of block I/O is a sector.  It is interpreted in a
+ * number of contexts in Linux (blk, bio, genhd), but the default is
+ * universally 512 bytes.  These symbols are just slightly more
+ * meaningful than the bare numbers they represent.
+ */
+#define	SECTOR_SHIFT	9
+#define	SECTOR_SIZE	(1ULL << SECTOR_SHIFT)
+
+/*
+ * Increment the given counter and return its updated value.
+ * If the counter is already 0 it will not be incremented.
+ * If the counter is already at its maximum value returns
+ * -EINVAL without updating it.
+ */
+static int atomic_inc_return_safe(atomic_t *v)
+{
+	unsigned int counter;
+
+	counter = (unsigned int)__atomic_add_unless(v, 1, 0);
+	if (counter <= (unsigned int)INT_MAX)
+		return (int)counter;
+
+	atomic_dec(v);
+
+	return -EINVAL;
+}
+
+/* Decrement the counter.  Return the resulting value, or -EINVAL */
+static int atomic_dec_return_safe(atomic_t *v)
+{
+	int counter;
+
+	counter = atomic_dec_return(v);
+	if (counter >= 0)
+		return counter;
+
+	atomic_inc(v);
+
+	return -EINVAL;
+}
+
+#define RBD_DRV_NAME "rbd"
+
+#define RBD_MINORS_PER_MAJOR		256
+#define RBD_SINGLE_MAJOR_PART_SHIFT	4
+
+#define RBD_SNAP_DEV_NAME_PREFIX	"snap_"
+#define RBD_MAX_SNAP_NAME_LEN	\
+			(NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
+
+#define RBD_MAX_SNAP_COUNT	510	/* allows max snapc to fit in 4KB */
+
+#define RBD_SNAP_HEAD_NAME	"-"
+
+#define	BAD_SNAP_INDEX	U32_MAX		/* invalid index into snap array */
+
+/* This allows a single page to hold an image name sent by OSD */
+#define RBD_IMAGE_NAME_LEN_MAX	(PAGE_SIZE - sizeof (__le32) - 1)
+#define RBD_IMAGE_ID_LEN_MAX	64
+
+#define RBD_OBJ_PREFIX_LEN_MAX	64
+
+/* Feature bits */
+
+#define RBD_FEATURE_LAYERING	(1<<0)
+#define RBD_FEATURE_STRIPINGV2	(1<<1)
+#define RBD_FEATURES_ALL \
+	    (RBD_FEATURE_LAYERING | RBD_FEATURE_STRIPINGV2)
+
+/* Features supported by this (client software) implementation. */
+
+#define RBD_FEATURES_SUPPORTED	(RBD_FEATURES_ALL)
+
+/*
+ * An RBD device name will be "rbd#", where the "rbd" comes from
+ * RBD_DRV_NAME above, and # is a unique integer identifier.
+ * MAX_INT_FORMAT_WIDTH is used in ensuring DEV_NAME_LEN is big
+ * enough to hold all possible device names.
+ */
+#define DEV_NAME_LEN		32
+#define MAX_INT_FORMAT_WIDTH	((5 * sizeof (int)) / 2 + 1)
+
+/*
+ * block device image metadata (in-memory version)
+ */
+struct rbd_image_header {
+	/* These six fields never change for a given rbd image */
+	char *object_prefix;
+	__u8 obj_order;
+	__u8 crypt_type;
+	__u8 comp_type;
+	u64 stripe_unit;
+	u64 stripe_count;
+	u64 features;		/* Might be changeable someday? */
+
+	/* The remaining fields need to be updated occasionally */
+	u64 image_size;
+	struct ceph_snap_context *snapc;
+	char *snap_names;	/* format 1 only */
+	u64 *snap_sizes;	/* format 1 only */
+};
+
+/*
+ * An rbd image specification.
+ *
+ * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely
+ * identify an image.  Each rbd_dev structure includes a pointer to
+ * an rbd_spec structure that encapsulates this identity.
+ *
+ * Each of the id's in an rbd_spec has an associated name.  For a
+ * user-mapped image, the names are supplied and the id's associated
+ * with them are looked up.  For a layered image, a parent image is
+ * defined by the tuple, and the names are looked up.
+ *
+ * An rbd_dev structure contains a parent_spec pointer which is
+ * non-null if the image it represents is a child in a layered
+ * image.  This pointer will refer to the rbd_spec structure used
+ * by the parent rbd_dev for its own identity (i.e., the structure
+ * is shared between the parent and child).
+ *
+ * Since these structures are populated once, during the discovery
+ * phase of image construction, they are effectively immutable so
+ * we make no effort to synchronize access to them.
+ *
+ * Note that code herein does not assume the image name is known (it
+ * could be a null pointer).
+ */
+struct rbd_spec {
+	u64		pool_id;
+	const char	*pool_name;
+
+	const char	*image_id;
+	const char	*image_name;
+
+	u64		snap_id;
+	const char	*snap_name;
+
+	struct kref	kref;
+};
+
+/*
+ * an instance of the client.  multiple devices may share an rbd client.
+ */
+struct rbd_client {
+	struct ceph_client	*client;
+	struct kref		kref;
+	struct list_head	node;
+};
+
+struct rbd_img_request;
+typedef void (*rbd_img_callback_t)(struct rbd_img_request *);
+
+#define	BAD_WHICH	U32_MAX		/* Good which or bad which, which? */
+
+struct rbd_obj_request;
+typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *);
+
+enum obj_request_type {
+	OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES
+};
+
+enum obj_req_flags {
+	OBJ_REQ_DONE,		/* completion flag: not done = 0, done = 1 */
+	OBJ_REQ_IMG_DATA,	/* object usage: standalone = 0, image = 1 */
+	OBJ_REQ_KNOWN,		/* EXISTS flag valid: no = 0, yes = 1 */
+	OBJ_REQ_EXISTS,		/* target exists: no = 0, yes = 1 */
+};
+
+struct rbd_obj_request {
+	const char		*object_name;
+	u64			offset;		/* object start byte */
+	u64			length;		/* bytes from offset */
+	unsigned long		flags;
+
+	/*
+	 * An object request associated with an image will have its
+	 * img_data flag set; a standalone object request will not.
+	 *
+	 * A standalone object request will have which == BAD_WHICH
+	 * and a null obj_request pointer.
+	 *
+	 * An object request initiated in support of a layered image
+	 * object (to check for its existence before a write) will
+	 * have which == BAD_WHICH and a non-null obj_request pointer.
+	 *
+	 * Finally, an object request for rbd image data will have
+	 * which != BAD_WHICH, and will have a non-null img_request
+	 * pointer.  The value of which will be in the range
+	 * 0..(img_request->obj_request_count-1).
+	 */
+	union {
+		struct rbd_obj_request	*obj_request;	/* STAT op */
+		struct {
+			struct rbd_img_request	*img_request;
+			u64			img_offset;
+			/* links for img_request->obj_requests list */
+			struct list_head	links;
+		};
+	};
+	u32			which;		/* posn image request list */
+
+	enum obj_request_type	type;
+	union {
+		struct bio	*bio_list;
+		struct {
+			struct page	**pages;
+			u32		page_count;
+		};
+	};
+	struct page		**copyup_pages;
+	u32			copyup_page_count;
+
+	struct ceph_osd_request	*osd_req;
+
+	u64			xferred;	/* bytes transferred */
+	int			result;
+
+	rbd_obj_callback_t	callback;
+	struct completion	completion;
+
+	struct kref		kref;
+};
+
+enum img_req_flags {
+	IMG_REQ_WRITE,		/* I/O direction: read = 0, write = 1 */
+	IMG_REQ_CHILD,		/* initiator: block = 0, child image = 1 */
+	IMG_REQ_LAYERED,	/* ENOENT handling: normal = 0, layered = 1 */
+};
+
+struct rbd_img_request {
+	struct rbd_device	*rbd_dev;
+	u64			offset;	/* starting image byte offset */
+	u64			length;	/* byte count from offset */
+	unsigned long		flags;
+	union {
+		u64			snap_id;	/* for reads */
+		struct ceph_snap_context *snapc;	/* for writes */
+	};
+	union {
+		struct request		*rq;		/* block request */
+		struct rbd_obj_request	*obj_request;	/* obj req initiator */
+	};
+	struct page		**copyup_pages;
+	u32			copyup_page_count;
+	spinlock_t		completion_lock;/* protects next_completion */
+	u32			next_completion;
+	rbd_img_callback_t	callback;
+	u64			xferred;/* aggregate bytes transferred */
+	int			result;	/* first nonzero obj_request result */
+
+	u32			obj_request_count;
+	struct list_head	obj_requests;	/* rbd_obj_request structs */
+
+	struct kref		kref;
+};
+
+#define for_each_obj_request(ireq, oreq) \
+	list_for_each_entry(oreq, &(ireq)->obj_requests, links)
+#define for_each_obj_request_from(ireq, oreq) \
+	list_for_each_entry_from(oreq, &(ireq)->obj_requests, links)
+#define for_each_obj_request_safe(ireq, oreq, n) \
+	list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links)
+
+struct rbd_mapping {
+	u64                     size;
+	u64                     features;
+	bool			read_only;
+};
+
+/*
+ * a single device
+ */
+struct rbd_device {
+	int			dev_id;		/* blkdev unique id */
+
+	int			major;		/* blkdev assigned major */
+	int			minor;
+	struct gendisk		*disk;		/* blkdev's gendisk and rq */
+
+	u32			image_format;	/* Either 1 or 2 */
+	struct rbd_client	*rbd_client;
+
+	char			name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
+
+	spinlock_t		lock;		/* queue, flags, open_count */
+
+	struct rbd_image_header	header;
+	unsigned long		flags;		/* possibly lock protected */
+	struct rbd_spec		*spec;
+
+	char			*header_name;
+
+	struct ceph_file_layout	layout;
+
+	struct ceph_osd_event   *watch_event;
+	struct rbd_obj_request	*watch_request;
+
+	struct rbd_spec		*parent_spec;
+	u64			parent_overlap;
+	atomic_t		parent_ref;
+	struct rbd_device	*parent;
+
+	/* protects updating the header */
+	struct rw_semaphore     header_rwsem;
+
+	struct rbd_mapping	mapping;
+
+	struct list_head	node;
+
+	/* sysfs related */
+	struct device		dev;
+	unsigned long		open_count;	/* protected by lock */
+};
+
+/*
+ * Flag bits for rbd_dev->flags.  If atomicity is required,
+ * rbd_dev->lock is used to protect access.
+ *
+ * Currently, only the "removing" flag (which is coupled with the
+ * "open_count" field) requires atomic access.
+ */
+enum rbd_dev_flags {
+	RBD_DEV_FLAG_EXISTS,	/* mapped snapshot has not been deleted */
+	RBD_DEV_FLAG_REMOVING,	/* this mapping is being removed */
+};
+
+static DEFINE_MUTEX(client_mutex);	/* Serialize client creation */
+
+static LIST_HEAD(rbd_dev_list);    /* devices */
+static DEFINE_SPINLOCK(rbd_dev_list_lock);
+
+static LIST_HEAD(rbd_client_list);		/* clients */
+static DEFINE_SPINLOCK(rbd_client_list_lock);
+
+/* Slab caches for frequently-allocated structures */
+
+static struct kmem_cache	*rbd_img_request_cache;
+static struct kmem_cache	*rbd_obj_request_cache;
+static struct kmem_cache	*rbd_segment_name_cache;
+
+static int rbd_major;
+static DEFINE_IDA(rbd_dev_id_ida);
+
+/*
+ * Default to false for now, as single-major requires >= 0.75 version of
+ * userspace rbd utility.
+ */
+static bool single_major = false;
+module_param(single_major, bool, S_IRUGO);
+MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: false)");
+
+static int rbd_img_request_submit(struct rbd_img_request *img_request);
+
+static void rbd_dev_device_release(struct device *dev);
+
+static ssize_t rbd_add(struct bus_type *bus, const char *buf,
+		       size_t count);
+static ssize_t rbd_remove(struct bus_type *bus, const char *buf,
+			  size_t count);
+static ssize_t rbd_add_single_major(struct bus_type *bus, const char *buf,
+				    size_t count);
+static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf,
+				       size_t count);
+static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping);
+static void rbd_spec_put(struct rbd_spec *spec);
+
+static int rbd_dev_id_to_minor(int dev_id)
+{
+	return dev_id << RBD_SINGLE_MAJOR_PART_SHIFT;
+}
+
+static int minor_to_rbd_dev_id(int minor)
+{
+	return minor >> RBD_SINGLE_MAJOR_PART_SHIFT;
+}
+
+static BUS_ATTR(add, S_IWUSR, NULL, rbd_add);
+static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove);
+static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major);
+static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major);
+
+static struct attribute *rbd_bus_attrs[] = {
+	&bus_attr_add.attr,
+	&bus_attr_remove.attr,
+	&bus_attr_add_single_major.attr,
+	&bus_attr_remove_single_major.attr,
+	NULL,
+};
+
+static umode_t rbd_bus_is_visible(struct kobject *kobj,
+				  struct attribute *attr, int index)
+{
+	if (!single_major &&
+	    (attr == &bus_attr_add_single_major.attr ||
+	     attr == &bus_attr_remove_single_major.attr))
+		return 0;
+
+	return attr->mode;
+}
+
+static const struct attribute_group rbd_bus_group = {
+	.attrs = rbd_bus_attrs,
+	.is_visible = rbd_bus_is_visible,
+};
+__ATTRIBUTE_GROUPS(rbd_bus);
+
+static struct bus_type rbd_bus_type = {
+	.name		= "rbd",
+	.bus_groups	= rbd_bus_groups,
+};
+
+static void rbd_root_dev_release(struct device *dev)
+{
+}
+
+static struct device rbd_root_dev = {
+	.init_name =    "rbd",
+	.release =      rbd_root_dev_release,
+};
+
+static __printf(2, 3)
+void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	if (!rbd_dev)
+		printk(KERN_WARNING "%s: %pV\n", RBD_DRV_NAME, &vaf);
+	else if (rbd_dev->disk)
+		printk(KERN_WARNING "%s: %s: %pV\n",
+			RBD_DRV_NAME, rbd_dev->disk->disk_name, &vaf);
+	else if (rbd_dev->spec && rbd_dev->spec->image_name)
+		printk(KERN_WARNING "%s: image %s: %pV\n",
+			RBD_DRV_NAME, rbd_dev->spec->image_name, &vaf);
+	else if (rbd_dev->spec && rbd_dev->spec->image_id)
+		printk(KERN_WARNING "%s: id %s: %pV\n",
+			RBD_DRV_NAME, rbd_dev->spec->image_id, &vaf);
+	else	/* punt */
+		printk(KERN_WARNING "%s: rbd_dev %p: %pV\n",
+			RBD_DRV_NAME, rbd_dev, &vaf);
+	va_end(args);
+}
+
+#ifdef RBD_DEBUG
+#define rbd_assert(expr)						\
+		if (unlikely(!(expr))) {				\
+			printk(KERN_ERR "\nAssertion failure in %s() "	\
+						"at line %d:\n\n"	\
+					"\trbd_assert(%s);\n\n",	\
+					__func__, __LINE__, #expr);	\
+			BUG();						\
+		}
+#else /* !RBD_DEBUG */
+#  define rbd_assert(expr)	((void) 0)
+#endif /* !RBD_DEBUG */
+
+static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request);
+static void rbd_img_parent_read(struct rbd_obj_request *obj_request);
+static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
+
+static int rbd_dev_refresh(struct rbd_device *rbd_dev);
+static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev);
+static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev);
+static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
+					u64 snap_id);
+static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
+				u8 *order, u64 *snap_size);
+static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
+		u64 *snap_features);
+static u64 rbd_snap_id_by_name(struct rbd_device *rbd_dev, const char *name);
+
+static int rbd_open(struct block_device *bdev, fmode_t mode)
+{
+	struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
+	bool removing = false;
+
+	if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only)
+		return -EROFS;
+
+	spin_lock_irq(&rbd_dev->lock);
+	if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags))
+		removing = true;
+	else
+		rbd_dev->open_count++;
+	spin_unlock_irq(&rbd_dev->lock);
+	if (removing)
+		return -ENOENT;
+
+	(void) get_device(&rbd_dev->dev);
+	set_device_ro(bdev, rbd_dev->mapping.read_only);
+
+	return 0;
+}
+
+static void rbd_release(struct gendisk *disk, fmode_t mode)
+{
+	struct rbd_device *rbd_dev = disk->private_data;
+	unsigned long open_count_before;
+
+	spin_lock_irq(&rbd_dev->lock);
+	open_count_before = rbd_dev->open_count--;
+	spin_unlock_irq(&rbd_dev->lock);
+	rbd_assert(open_count_before > 0);
+
+	put_device(&rbd_dev->dev);
+}
+
+static const struct block_device_operations rbd_bd_ops = {
+	.owner			= THIS_MODULE,
+	.open			= rbd_open,
+	.release		= rbd_release,
+};
+
+/*
+ * Initialize an rbd client instance.  Success or not, this function
+ * consumes ceph_opts.  Caller holds client_mutex.
+ */
+static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts)
+{
+	struct rbd_client *rbdc;
+	int ret = -ENOMEM;
+
+	dout("%s:\n", __func__);
+	rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL);
+	if (!rbdc)
+		goto out_opt;
+
+	kref_init(&rbdc->kref);
+	INIT_LIST_HEAD(&rbdc->node);
+
+	rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0);
+	if (IS_ERR(rbdc->client))
+		goto out_rbdc;
+	ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */
+
+	ret = ceph_open_session(rbdc->client);
+	if (ret < 0)
+		goto out_client;
+
+	spin_lock(&rbd_client_list_lock);
+	list_add_tail(&rbdc->node, &rbd_client_list);
+	spin_unlock(&rbd_client_list_lock);
+
+	dout("%s: rbdc %p\n", __func__, rbdc);
+
+	return rbdc;
+out_client:
+	ceph_destroy_client(rbdc->client);
+out_rbdc:
+	kfree(rbdc);
+out_opt:
+	if (ceph_opts)
+		ceph_destroy_options(ceph_opts);
+	dout("%s: error %d\n", __func__, ret);
+
+	return ERR_PTR(ret);
+}
+
+static struct rbd_client *__rbd_get_client(struct rbd_client *rbdc)
+{
+	kref_get(&rbdc->kref);
+
+	return rbdc;
+}
+
+/*
+ * Find a ceph client with specific addr and configuration.  If
+ * found, bump its reference count.
+ */
+static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts)
+{
+	struct rbd_client *client_node;
+	bool found = false;
+
+	if (ceph_opts->flags & CEPH_OPT_NOSHARE)
+		return NULL;
+
+	spin_lock(&rbd_client_list_lock);
+	list_for_each_entry(client_node, &rbd_client_list, node) {
+		if (!ceph_compare_options(ceph_opts, client_node->client)) {
+			__rbd_get_client(client_node);
+
+			found = true;
+			break;
+		}
+	}
+	spin_unlock(&rbd_client_list_lock);
+
+	return found ? client_node : NULL;
+}
+
+/*
+ * mount options
+ */
+enum {
+	Opt_last_int,
+	/* int args above */
+	Opt_last_string,
+	/* string args above */
+	Opt_read_only,
+	Opt_read_write,
+	/* Boolean args above */
+	Opt_last_bool,
+};
+
+static match_table_t rbd_opts_tokens = {
+	/* int args above */
+	/* string args above */
+	{Opt_read_only, "read_only"},
+	{Opt_read_only, "ro"},		/* Alternate spelling */
+	{Opt_read_write, "read_write"},
+	{Opt_read_write, "rw"},		/* Alternate spelling */
+	/* Boolean args above */
+	{-1, NULL}
+};
+
+struct rbd_options {
+	bool	read_only;
+};
+
+#define RBD_READ_ONLY_DEFAULT	false
+
+static int parse_rbd_opts_token(char *c, void *private)
+{
+	struct rbd_options *rbd_opts = private;
+	substring_t argstr[MAX_OPT_ARGS];
+	int token, intval, ret;
+
+	token = match_token(c, rbd_opts_tokens, argstr);
+	if (token < 0)
+		return -EINVAL;
+
+	if (token < Opt_last_int) {
+		ret = match_int(&argstr[0], &intval);
+		if (ret < 0) {
+			pr_err("bad mount option arg (not int) "
+			       "at '%s'\n", c);
+			return ret;
+		}
+		dout("got int token %d val %d\n", token, intval);
+	} else if (token > Opt_last_int && token < Opt_last_string) {
+		dout("got string token %d val %s\n", token,
+		     argstr[0].from);
+	} else if (token > Opt_last_string && token < Opt_last_bool) {
+		dout("got Boolean token %d\n", token);
+	} else {
+		dout("got token %d\n", token);
+	}
+
+	switch (token) {
+	case Opt_read_only:
+		rbd_opts->read_only = true;
+		break;
+	case Opt_read_write:
+		rbd_opts->read_only = false;
+		break;
+	default:
+		rbd_assert(false);
+		break;
+	}
+	return 0;
+}
+
+/*
+ * Get a ceph client with specific addr and configuration, if one does
+ * not exist create it.  Either way, ceph_opts is consumed by this
+ * function.
+ */
+static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts)
+{
+	struct rbd_client *rbdc;
+
+	mutex_lock_nested(&client_mutex, SINGLE_DEPTH_NESTING);
+	rbdc = rbd_client_find(ceph_opts);
+	if (rbdc)	/* using an existing client */
+		ceph_destroy_options(ceph_opts);
+	else
+		rbdc = rbd_client_create(ceph_opts);
+	mutex_unlock(&client_mutex);
+
+	return rbdc;
+}
+
+/*
+ * Destroy ceph client
+ *
+ * Caller must hold rbd_client_list_lock.
+ */
+static void rbd_client_release(struct kref *kref)
+{
+	struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
+
+	dout("%s: rbdc %p\n", __func__, rbdc);
+	spin_lock(&rbd_client_list_lock);
+	list_del(&rbdc->node);
+	spin_unlock(&rbd_client_list_lock);
+
+	ceph_destroy_client(rbdc->client);
+	kfree(rbdc);
+}
+
+/*
+ * Drop reference to ceph client node. If it's not referenced anymore, release
+ * it.
+ */
+static void rbd_put_client(struct rbd_client *rbdc)
+{
+	if (rbdc)
+		kref_put(&rbdc->kref, rbd_client_release);
+}
+
+static bool rbd_image_format_valid(u32 image_format)
+{
+	return image_format == 1 || image_format == 2;
+}
+
+static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk)
+{
+	size_t size;
+	u32 snap_count;
+
+	/* The header has to start with the magic rbd header text */
+	if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT)))
+		return false;
+
+	/* The bio layer requires at least sector-sized I/O */
+
+	if (ondisk->options.order < SECTOR_SHIFT)
+		return false;
+
+	/* If we use u64 in a few spots we may be able to loosen this */
+
+	if (ondisk->options.order > 8 * sizeof (int) - 1)
+		return false;
+
+	/*
+	 * The size of a snapshot header has to fit in a size_t, and
+	 * that limits the number of snapshots.
+	 */
+	snap_count = le32_to_cpu(ondisk->snap_count);
+	size = SIZE_MAX - sizeof (struct ceph_snap_context);
+	if (snap_count > size / sizeof (__le64))
+		return false;
+
+	/*
+	 * Not only that, but the size of the entire the snapshot
+	 * header must also be representable in a size_t.
+	 */
+	size -= snap_count * sizeof (__le64);
+	if ((u64) size < le64_to_cpu(ondisk->snap_names_len))
+		return false;
+
+	return true;
+}
+
+/*
+ * Fill an rbd image header with information from the given format 1
+ * on-disk header.
+ */
+static int rbd_header_from_disk(struct rbd_device *rbd_dev,
+				 struct rbd_image_header_ondisk *ondisk)
+{
+	struct rbd_image_header *header = &rbd_dev->header;
+	bool first_time = header->object_prefix == NULL;
+	struct ceph_snap_context *snapc;
+	char *object_prefix = NULL;
+	char *snap_names = NULL;
+	u64 *snap_sizes = NULL;
+	u32 snap_count;
+	size_t size;
+	int ret = -ENOMEM;
+	u32 i;
+
+	/* Allocate this now to avoid having to handle failure below */
+
+	if (first_time) {
+		size_t len;
+
+		len = strnlen(ondisk->object_prefix,
+				sizeof (ondisk->object_prefix));
+		object_prefix = kmalloc(len + 1, GFP_KERNEL);
+		if (!object_prefix)
+			return -ENOMEM;
+		memcpy(object_prefix, ondisk->object_prefix, len);
+		object_prefix[len] = '\0';
+	}
+
+	/* Allocate the snapshot context and fill it in */
+
+	snap_count = le32_to_cpu(ondisk->snap_count);
+	snapc = ceph_create_snap_context(snap_count, GFP_KERNEL);
+	if (!snapc)
+		goto out_err;
+	snapc->seq = le64_to_cpu(ondisk->snap_seq);
+	if (snap_count) {
+		struct rbd_image_snap_ondisk *snaps;
+		u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len);
+
+		/* We'll keep a copy of the snapshot names... */
+
+		if (snap_names_len > (u64)SIZE_MAX)
+			goto out_2big;
+		snap_names = kmalloc(snap_names_len, GFP_KERNEL);
+		if (!snap_names)
+			goto out_err;
+
+		/* ...as well as the array of their sizes. */
+
+		size = snap_count * sizeof (*header->snap_sizes);
+		snap_sizes = kmalloc(size, GFP_KERNEL);
+		if (!snap_sizes)
+			goto out_err;
+
+		/*
+		 * Copy the names, and fill in each snapshot's id
+		 * and size.
+		 *
+		 * Note that rbd_dev_v1_header_info() guarantees the
+		 * ondisk buffer we're working with has
+		 * snap_names_len bytes beyond the end of the
+		 * snapshot id array, this memcpy() is safe.
+		 */
+		memcpy(snap_names, &ondisk->snaps[snap_count], snap_names_len);
+		snaps = ondisk->snaps;
+		for (i = 0; i < snap_count; i++) {
+			snapc->snaps[i] = le64_to_cpu(snaps[i].id);
+			snap_sizes[i] = le64_to_cpu(snaps[i].image_size);
+		}
+	}
+
+	/* We won't fail any more, fill in the header */
+
+	if (first_time) {
+		header->object_prefix = object_prefix;
+		header->obj_order = ondisk->options.order;
+		header->crypt_type = ondisk->options.crypt_type;
+		header->comp_type = ondisk->options.comp_type;
+		/* The rest aren't used for format 1 images */
+		header->stripe_unit = 0;
+		header->stripe_count = 0;
+		header->features = 0;
+	} else {
+		ceph_put_snap_context(header->snapc);
+		kfree(header->snap_names);
+		kfree(header->snap_sizes);
+	}
+
+	/* The remaining fields always get updated (when we refresh) */
+
+	header->image_size = le64_to_cpu(ondisk->image_size);
+	header->snapc = snapc;
+	header->snap_names = snap_names;
+	header->snap_sizes = snap_sizes;
+
+	/* Make sure mapping size is consistent with header info */
+
+	if (rbd_dev->spec->snap_id == CEPH_NOSNAP || first_time)
+		if (rbd_dev->mapping.size != header->image_size)
+			rbd_dev->mapping.size = header->image_size;
+
+	return 0;
+out_2big:
+	ret = -EIO;
+out_err:
+	kfree(snap_sizes);
+	kfree(snap_names);
+	ceph_put_snap_context(snapc);
+	kfree(object_prefix);
+
+	return ret;
+}
+
+static const char *_rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, u32 which)
+{
+	const char *snap_name;
+
+	rbd_assert(which < rbd_dev->header.snapc->num_snaps);
+
+	/* Skip over names until we find the one we are looking for */
+
+	snap_name = rbd_dev->header.snap_names;
+	while (which--)
+		snap_name += strlen(snap_name) + 1;
+
+	return kstrdup(snap_name, GFP_KERNEL);
+}
+
+/*
+ * Snapshot id comparison function for use with qsort()/bsearch().
+ * Note that result is for snapshots in *descending* order.
+ */
+static int snapid_compare_reverse(const void *s1, const void *s2)
+{
+	u64 snap_id1 = *(u64 *)s1;
+	u64 snap_id2 = *(u64 *)s2;
+
+	if (snap_id1 < snap_id2)
+		return 1;
+	return snap_id1 == snap_id2 ? 0 : -1;
+}
+
+/*
+ * Search a snapshot context to see if the given snapshot id is
+ * present.
+ *
+ * Returns the position of the snapshot id in the array if it's found,
+ * or BAD_SNAP_INDEX otherwise.
+ *
+ * Note: The snapshot array is in kept sorted (by the osd) in
+ * reverse order, highest snapshot id first.
+ */
+static u32 rbd_dev_snap_index(struct rbd_device *rbd_dev, u64 snap_id)
+{
+	struct ceph_snap_context *snapc = rbd_dev->header.snapc;
+	u64 *found;
+
+	found = bsearch(&snap_id, &snapc->snaps, snapc->num_snaps,
+				sizeof (snap_id), snapid_compare_reverse);
+
+	return found ? (u32)(found - &snapc->snaps[0]) : BAD_SNAP_INDEX;
+}
+
+static const char *rbd_dev_v1_snap_name(struct rbd_device *rbd_dev,
+					u64 snap_id)
+{
+	u32 which;
+	const char *snap_name;
+
+	which = rbd_dev_snap_index(rbd_dev, snap_id);
+	if (which == BAD_SNAP_INDEX)
+		return ERR_PTR(-ENOENT);
+
+	snap_name = _rbd_dev_v1_snap_name(rbd_dev, which);
+	return snap_name ? snap_name : ERR_PTR(-ENOMEM);
+}
+
+static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id)
+{
+	if (snap_id == CEPH_NOSNAP)
+		return RBD_SNAP_HEAD_NAME;
+
+	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+	if (rbd_dev->image_format == 1)
+		return rbd_dev_v1_snap_name(rbd_dev, snap_id);
+
+	return rbd_dev_v2_snap_name(rbd_dev, snap_id);
+}
+
+static int rbd_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
+				u64 *snap_size)
+{
+	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+	if (snap_id == CEPH_NOSNAP) {
+		*snap_size = rbd_dev->header.image_size;
+	} else if (rbd_dev->image_format == 1) {
+		u32 which;
+
+		which = rbd_dev_snap_index(rbd_dev, snap_id);
+		if (which == BAD_SNAP_INDEX)
+			return -ENOENT;
+
+		*snap_size = rbd_dev->header.snap_sizes[which];
+	} else {
+		u64 size = 0;
+		int ret;
+
+		ret = _rbd_dev_v2_snap_size(rbd_dev, snap_id, NULL, &size);
+		if (ret)
+			return ret;
+
+		*snap_size = size;
+	}
+	return 0;
+}
+
+static int rbd_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
+			u64 *snap_features)
+{
+	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+	if (snap_id == CEPH_NOSNAP) {
+		*snap_features = rbd_dev->header.features;
+	} else if (rbd_dev->image_format == 1) {
+		*snap_features = 0;	/* No features for format 1 */
+	} else {
+		u64 features = 0;
+		int ret;
+
+		ret = _rbd_dev_v2_snap_features(rbd_dev, snap_id, &features);
+		if (ret)
+			return ret;
+
+		*snap_features = features;
+	}
+	return 0;
+}
+
+static int rbd_dev_mapping_set(struct rbd_device *rbd_dev)
+{
+	u64 snap_id = rbd_dev->spec->snap_id;
+	u64 size = 0;
+	u64 features = 0;
+	int ret;
+
+	ret = rbd_snap_size(rbd_dev, snap_id, &size);
+	if (ret)
+		return ret;
+	ret = rbd_snap_features(rbd_dev, snap_id, &features);
+	if (ret)
+		return ret;
+
+	rbd_dev->mapping.size = size;
+	rbd_dev->mapping.features = features;
+
+	return 0;
+}
+
+static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev)
+{
+	rbd_dev->mapping.size = 0;
+	rbd_dev->mapping.features = 0;
+}
+
+static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
+{
+	char *name;
+	u64 segment;
+	int ret;
+	char *name_format;
+
+	name = kmem_cache_alloc(rbd_segment_name_cache, GFP_NOIO);
+	if (!name)
+		return NULL;
+	segment = offset >> rbd_dev->header.obj_order;
+	name_format = "%s.%012llx";
+	if (rbd_dev->image_format == 2)
+		name_format = "%s.%016llx";
+	ret = snprintf(name, CEPH_MAX_OID_NAME_LEN + 1, name_format,
+			rbd_dev->header.object_prefix, segment);
+	if (ret < 0 || ret > CEPH_MAX_OID_NAME_LEN) {
+		pr_err("error formatting segment name for #%llu (%d)\n",
+			segment, ret);
+		kfree(name);
+		name = NULL;
+	}
+
+	return name;
+}
+
+static void rbd_segment_name_free(const char *name)
+{
+	/* The explicit cast here is needed to drop the const qualifier */
+
+	kmem_cache_free(rbd_segment_name_cache, (void *)name);
+}
+
+static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset)
+{
+	u64 segment_size = (u64) 1 << rbd_dev->header.obj_order;
+
+	return offset & (segment_size - 1);
+}
+
+static u64 rbd_segment_length(struct rbd_device *rbd_dev,
+				u64 offset, u64 length)
+{
+	u64 segment_size = (u64) 1 << rbd_dev->header.obj_order;
+
+	offset &= segment_size - 1;
+
+	rbd_assert(length <= U64_MAX - offset);
+	if (offset + length > segment_size)
+		length = segment_size - offset;
+
+	return length;
+}
+
+/*
+ * returns the size of an object in the image
+ */
+static u64 rbd_obj_bytes(struct rbd_image_header *header)
+{
+	return 1 << header->obj_order;
+}
+
+/*
+ * bio helpers
+ */
+
+static void bio_chain_put(struct bio *chain)
+{
+	struct bio *tmp;
+
+	while (chain) {
+		tmp = chain;
+		chain = chain->bi_next;
+		bio_put(tmp);
+	}
+}
+
+/*
+ * zeros a bio chain, starting at specific offset
+ */
+static void zero_bio_chain(struct bio *chain, int start_ofs)
+{
+	struct bio_vec bv;
+	struct bvec_iter iter;
+	unsigned long flags;
+	void *buf;
+	int pos = 0;
+
+	while (chain) {
+		bio_for_each_segment(bv, chain, iter) {
+			if (pos + bv.bv_len > start_ofs) {
+				int remainder = max(start_ofs - pos, 0);
+				buf = bvec_kmap_irq(&bv, &flags);
+				memset(buf + remainder, 0,
+				       bv.bv_len - remainder);
+				flush_dcache_page(bv.bv_page);
+				bvec_kunmap_irq(buf, &flags);
+			}
+			pos += bv.bv_len;
+		}
+
+		chain = chain->bi_next;
+	}
+}
+
+/*
+ * similar to zero_bio_chain(), zeros data defined by a page array,
+ * starting at the given byte offset from the start of the array and
+ * continuing up to the given end offset.  The pages array is
+ * assumed to be big enough to hold all bytes up to the end.
+ */
+static void zero_pages(struct page **pages, u64 offset, u64 end)
+{
+	struct page **page = &pages[offset >> PAGE_SHIFT];
+
+	rbd_assert(end > offset);
+	rbd_assert(end - offset <= (u64)SIZE_MAX);
+	while (offset < end) {
+		size_t page_offset;
+		size_t length;
+		unsigned long flags;
+		void *kaddr;
+
+		page_offset = offset & ~PAGE_MASK;
+		length = min_t(size_t, PAGE_SIZE - page_offset, end - offset);
+		local_irq_save(flags);
+		kaddr = kmap_atomic(*page);
+		memset(kaddr + page_offset, 0, length);
+		flush_dcache_page(*page);
+		kunmap_atomic(kaddr);
+		local_irq_restore(flags);
+
+		offset += length;
+		page++;
+	}
+}
+
+/*
+ * Clone a portion of a bio, starting at the given byte offset
+ * and continuing for the number of bytes indicated.
+ */
+static struct bio *bio_clone_range(struct bio *bio_src,
+					unsigned int offset,
+					unsigned int len,
+					gfp_t gfpmask)
+{
+	struct bio *bio;
+
+	bio = bio_clone(bio_src, gfpmask);
+	if (!bio)
+		return NULL;	/* ENOMEM */
+
+	bio_advance(bio, offset);
+	bio->bi_iter.bi_size = len;
+
+	return bio;
+}
+
+/*
+ * Clone a portion of a bio chain, starting at the given byte offset
+ * into the first bio in the source chain and continuing for the
+ * number of bytes indicated.  The result is another bio chain of
+ * exactly the given length, or a null pointer on error.
+ *
+ * The bio_src and offset parameters are both in-out.  On entry they
+ * refer to the first source bio and the offset into that bio where
+ * the start of data to be cloned is located.
+ *
+ * On return, bio_src is updated to refer to the bio in the source
+ * chain that contains first un-cloned byte, and *offset will
+ * contain the offset of that byte within that bio.
+ */
+static struct bio *bio_chain_clone_range(struct bio **bio_src,
+					unsigned int *offset,
+					unsigned int len,
+					gfp_t gfpmask)
+{
+	struct bio *bi = *bio_src;
+	unsigned int off = *offset;
+	struct bio *chain = NULL;
+	struct bio **end;
+
+	/* Build up a chain of clone bios up to the limit */
+
+	if (!bi || off >= bi->bi_iter.bi_size || !len)
+		return NULL;		/* Nothing to clone */
+
+	end = &chain;
+	while (len) {
+		unsigned int bi_size;
+		struct bio *bio;
+
+		if (!bi) {
+			rbd_warn(NULL, "bio_chain exhausted with %u left", len);
+			goto out_err;	/* EINVAL; ran out of bio's */
+		}
+		bi_size = min_t(unsigned int, bi->bi_iter.bi_size - off, len);
+		bio = bio_clone_range(bi, off, bi_size, gfpmask);
+		if (!bio)
+			goto out_err;	/* ENOMEM */
+
+		*end = bio;
+		end = &bio->bi_next;
+
+		off += bi_size;
+		if (off == bi->bi_iter.bi_size) {
+			bi = bi->bi_next;
+			off = 0;
+		}
+		len -= bi_size;
+	}
+	*bio_src = bi;
+	*offset = off;
+
+	return chain;
+out_err:
+	bio_chain_put(chain);
+
+	return NULL;
+}
+
+/*
+ * The default/initial value for all object request flags is 0.  For
+ * each flag, once its value is set to 1 it is never reset to 0
+ * again.
+ */
+static void obj_request_img_data_set(struct rbd_obj_request *obj_request)
+{
+	if (test_and_set_bit(OBJ_REQ_IMG_DATA, &obj_request->flags)) {
+		struct rbd_device *rbd_dev;
+
+		rbd_dev = obj_request->img_request->rbd_dev;
+		rbd_warn(rbd_dev, "obj_request %p already marked img_data\n",
+			obj_request);
+	}
+}
+
+static bool obj_request_img_data_test(struct rbd_obj_request *obj_request)
+{
+	smp_mb();
+	return test_bit(OBJ_REQ_IMG_DATA, &obj_request->flags) != 0;
+}
+
+static void obj_request_done_set(struct rbd_obj_request *obj_request)
+{
+	if (test_and_set_bit(OBJ_REQ_DONE, &obj_request->flags)) {
+		struct rbd_device *rbd_dev = NULL;
+
+		if (obj_request_img_data_test(obj_request))
+			rbd_dev = obj_request->img_request->rbd_dev;
+		rbd_warn(rbd_dev, "obj_request %p already marked done\n",
+			obj_request);
+	}
+}
+
+static bool obj_request_done_test(struct rbd_obj_request *obj_request)
+{
+	smp_mb();
+	return test_bit(OBJ_REQ_DONE, &obj_request->flags) != 0;
+}
+
+/*
+ * This sets the KNOWN flag after (possibly) setting the EXISTS
+ * flag.  The latter is set based on the "exists" value provided.
+ *
+ * Note that for our purposes once an object exists it never goes
+ * away again.  It's possible that the response from two existence
+ * checks are separated by the creation of the target object, and
+ * the first ("doesn't exist") response arrives *after* the second
+ * ("does exist").  In that case we ignore the second one.
+ */
+static void obj_request_existence_set(struct rbd_obj_request *obj_request,
+				bool exists)
+{
+	if (exists)
+		set_bit(OBJ_REQ_EXISTS, &obj_request->flags);
+	set_bit(OBJ_REQ_KNOWN, &obj_request->flags);
+	smp_mb();
+}
+
+static bool obj_request_known_test(struct rbd_obj_request *obj_request)
+{
+	smp_mb();
+	return test_bit(OBJ_REQ_KNOWN, &obj_request->flags) != 0;
+}
+
+static bool obj_request_exists_test(struct rbd_obj_request *obj_request)
+{
+	smp_mb();
+	return test_bit(OBJ_REQ_EXISTS, &obj_request->flags) != 0;
+}
+
+static void rbd_obj_request_get(struct rbd_obj_request *obj_request)
+{
+	dout("%s: obj %p (was %d)\n", __func__, obj_request,
+		atomic_read(&obj_request->kref.refcount));
+	kref_get(&obj_request->kref);
+}
+
+static void rbd_obj_request_destroy(struct kref *kref);
+static void rbd_obj_request_put(struct rbd_obj_request *obj_request)
+{
+	rbd_assert(obj_request != NULL);
+	dout("%s: obj %p (was %d)\n", __func__, obj_request,
+		atomic_read(&obj_request->kref.refcount));
+	kref_put(&obj_request->kref, rbd_obj_request_destroy);
+}
+
+static bool img_request_child_test(struct rbd_img_request *img_request);
+static void rbd_parent_request_destroy(struct kref *kref);
+static void rbd_img_request_destroy(struct kref *kref);
+static void rbd_img_request_put(struct rbd_img_request *img_request)
+{
+	rbd_assert(img_request != NULL);
+	dout("%s: img %p (was %d)\n", __func__, img_request,
+		atomic_read(&img_request->kref.refcount));
+	if (img_request_child_test(img_request))
+		kref_put(&img_request->kref, rbd_parent_request_destroy);
+	else
+		kref_put(&img_request->kref, rbd_img_request_destroy);
+}
+
+static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request,
+					struct rbd_obj_request *obj_request)
+{
+	rbd_assert(obj_request->img_request == NULL);
+
+	/* Image request now owns object's original reference */
+	obj_request->img_request = img_request;
+	obj_request->which = img_request->obj_request_count;
+	rbd_assert(!obj_request_img_data_test(obj_request));
+	obj_request_img_data_set(obj_request);
+	rbd_assert(obj_request->which != BAD_WHICH);
+	img_request->obj_request_count++;
+	list_add_tail(&obj_request->links, &img_request->obj_requests);
+	dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request,
+		obj_request->which);
+}
+
+static inline void rbd_img_obj_request_del(struct rbd_img_request *img_request,
+					struct rbd_obj_request *obj_request)
+{
+	rbd_assert(obj_request->which != BAD_WHICH);
+
+	dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request,
+		obj_request->which);
+	list_del(&obj_request->links);
+	rbd_assert(img_request->obj_request_count > 0);
+	img_request->obj_request_count--;
+	rbd_assert(obj_request->which == img_request->obj_request_count);
+	obj_request->which = BAD_WHICH;
+	rbd_assert(obj_request_img_data_test(obj_request));
+	rbd_assert(obj_request->img_request == img_request);
+	obj_request->img_request = NULL;
+	obj_request->callback = NULL;
+	rbd_obj_request_put(obj_request);
+}
+
+static bool obj_request_type_valid(enum obj_request_type type)
+{
+	switch (type) {
+	case OBJ_REQUEST_NODATA:
+	case OBJ_REQUEST_BIO:
+	case OBJ_REQUEST_PAGES:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static int rbd_obj_request_submit(struct ceph_osd_client *osdc,
+				struct rbd_obj_request *obj_request)
+{
+	dout("%s: osdc %p obj %p\n", __func__, osdc, obj_request);
+
+	return ceph_osdc_start_request(osdc, obj_request->osd_req, false);
+}
+
+static void rbd_img_request_complete(struct rbd_img_request *img_request)
+{
+
+	dout("%s: img %p\n", __func__, img_request);
+
+	/*
+	 * If no error occurred, compute the aggregate transfer
+	 * count for the image request.  We could instead use
+	 * atomic64_cmpxchg() to update it as each object request
+	 * completes; not clear which way is better off hand.
+	 */
+	if (!img_request->result) {
+		struct rbd_obj_request *obj_request;
+		u64 xferred = 0;
+
+		for_each_obj_request(img_request, obj_request)
+			xferred += obj_request->xferred;
+		img_request->xferred = xferred;
+	}
+
+	if (img_request->callback)
+		img_request->callback(img_request);
+	else
+		rbd_img_request_put(img_request);
+}
+
+/* Caller is responsible for rbd_obj_request_destroy(obj_request) */
+
+static int rbd_obj_request_wait(struct rbd_obj_request *obj_request)
+{
+	dout("%s: obj %p\n", __func__, obj_request);
+
+	return wait_for_completion_interruptible(&obj_request->completion);
+}
+
+/*
+ * The default/initial value for all image request flags is 0.  Each
+ * is conditionally set to 1 at image request initialization time
+ * and currently never change thereafter.
+ */
+static void img_request_write_set(struct rbd_img_request *img_request)
+{
+	set_bit(IMG_REQ_WRITE, &img_request->flags);
+	smp_mb();
+}
+
+static bool img_request_write_test(struct rbd_img_request *img_request)
+{
+	smp_mb();
+	return test_bit(IMG_REQ_WRITE, &img_request->flags) != 0;
+}
+
+static void img_request_child_set(struct rbd_img_request *img_request)
+{
+	set_bit(IMG_REQ_CHILD, &img_request->flags);
+	smp_mb();
+}
+
+static void img_request_child_clear(struct rbd_img_request *img_request)
+{
+	clear_bit(IMG_REQ_CHILD, &img_request->flags);
+	smp_mb();
+}
+
+static bool img_request_child_test(struct rbd_img_request *img_request)
+{
+	smp_mb();
+	return test_bit(IMG_REQ_CHILD, &img_request->flags) != 0;
+}
+
+static void img_request_layered_set(struct rbd_img_request *img_request)
+{
+	set_bit(IMG_REQ_LAYERED, &img_request->flags);
+	smp_mb();
+}
+
+static void img_request_layered_clear(struct rbd_img_request *img_request)
+{
+	clear_bit(IMG_REQ_LAYERED, &img_request->flags);
+	smp_mb();
+}
+
+static bool img_request_layered_test(struct rbd_img_request *img_request)
+{
+	smp_mb();
+	return test_bit(IMG_REQ_LAYERED, &img_request->flags) != 0;
+}
+
+static void
+rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request)
+{
+	u64 xferred = obj_request->xferred;
+	u64 length = obj_request->length;
+
+	dout("%s: obj %p img %p result %d %llu/%llu\n", __func__,
+		obj_request, obj_request->img_request, obj_request->result,
+		xferred, length);
+	/*
+	 * ENOENT means a hole in the image.  We zero-fill the entire
+	 * length of the request.  A short read also implies zero-fill
+	 * to the end of the request.  An error requires the whole
+	 * length of the request to be reported finished with an error
+	 * to the block layer.  In each case we update the xferred
+	 * count to indicate the whole request was satisfied.
+	 */
+	rbd_assert(obj_request->type != OBJ_REQUEST_NODATA);
+	if (obj_request->result == -ENOENT) {
+		if (obj_request->type == OBJ_REQUEST_BIO)
+			zero_bio_chain(obj_request->bio_list, 0);
+		else
+			zero_pages(obj_request->pages, 0, length);
+		obj_request->result = 0;
+	} else if (xferred < length && !obj_request->result) {
+		if (obj_request->type == OBJ_REQUEST_BIO)
+			zero_bio_chain(obj_request->bio_list, xferred);
+		else
+			zero_pages(obj_request->pages, xferred, length);
+	}
+	obj_request->xferred = length;
+	obj_request_done_set(obj_request);
+}
+
+static void rbd_obj_request_complete(struct rbd_obj_request *obj_request)
+{
+	dout("%s: obj %p cb %p\n", __func__, obj_request,
+		obj_request->callback);
+	if (obj_request->callback)
+		obj_request->callback(obj_request);
+	else
+		complete_all(&obj_request->completion);
+}
+
+static void rbd_osd_trivial_callback(struct rbd_obj_request *obj_request)
+{
+	dout("%s: obj %p\n", __func__, obj_request);
+	obj_request_done_set(obj_request);
+}
+
+static void rbd_osd_read_callback(struct rbd_obj_request *obj_request)
+{
+	struct rbd_img_request *img_request = NULL;
+	struct rbd_device *rbd_dev = NULL;
+	bool layered = false;
+
+	if (obj_request_img_data_test(obj_request)) {
+		img_request = obj_request->img_request;
+		layered = img_request && img_request_layered_test(img_request);
+		rbd_dev = img_request->rbd_dev;
+	}
+
+	dout("%s: obj %p img %p result %d %llu/%llu\n", __func__,
+		obj_request, img_request, obj_request->result,
+		obj_request->xferred, obj_request->length);
+	if (layered && obj_request->result == -ENOENT &&
+			obj_request->img_offset < rbd_dev->parent_overlap)
+		rbd_img_parent_read(obj_request);
+	else if (img_request)
+		rbd_img_obj_request_read_callback(obj_request);
+	else
+		obj_request_done_set(obj_request);
+}
+
+static void rbd_osd_write_callback(struct rbd_obj_request *obj_request)
+{
+	dout("%s: obj %p result %d %llu\n", __func__, obj_request,
+		obj_request->result, obj_request->length);
+	/*
+	 * There is no such thing as a successful short write.  Set
+	 * it to our originally-requested length.
+	 */
+	obj_request->xferred = obj_request->length;
+	obj_request_done_set(obj_request);
+}
+
+/*
+ * For a simple stat call there's nothing to do.  We'll do more if
+ * this is part of a write sequence for a layered image.
+ */
+static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request)
+{
+	dout("%s: obj %p\n", __func__, obj_request);
+	obj_request_done_set(obj_request);
+}
+
+static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
+				struct ceph_msg *msg)
+{
+	struct rbd_obj_request *obj_request = osd_req->r_priv;
+	u16 opcode;
+
+	dout("%s: osd_req %p msg %p\n", __func__, osd_req, msg);
+	rbd_assert(osd_req == obj_request->osd_req);
+	if (obj_request_img_data_test(obj_request)) {
+		rbd_assert(obj_request->img_request);
+		rbd_assert(obj_request->which != BAD_WHICH);
+	} else {
+		rbd_assert(obj_request->which == BAD_WHICH);
+	}
+
+	if (osd_req->r_result < 0)
+		obj_request->result = osd_req->r_result;
+
+	rbd_assert(osd_req->r_num_ops <= CEPH_OSD_MAX_OP);
+
+	/*
+	 * We support a 64-bit length, but ultimately it has to be
+	 * passed to blk_end_request(), which takes an unsigned int.
+	 */
+	obj_request->xferred = osd_req->r_reply_op_len[0];
+	rbd_assert(obj_request->xferred < (u64)UINT_MAX);
+
+	opcode = osd_req->r_ops[0].op;
+	switch (opcode) {
+	case CEPH_OSD_OP_READ:
+		rbd_osd_read_callback(obj_request);
+		break;
+	case CEPH_OSD_OP_SETALLOCHINT:
+		rbd_assert(osd_req->r_ops[1].op == CEPH_OSD_OP_WRITE);
+		/* fall through */
+	case CEPH_OSD_OP_WRITE:
+		rbd_osd_write_callback(obj_request);
+		break;
+	case CEPH_OSD_OP_STAT:
+		rbd_osd_stat_callback(obj_request);
+		break;
+	case CEPH_OSD_OP_CALL:
+	case CEPH_OSD_OP_NOTIFY_ACK:
+	case CEPH_OSD_OP_WATCH:
+		rbd_osd_trivial_callback(obj_request);
+		break;
+	default:
+		rbd_warn(NULL, "%s: unsupported op %hu\n",
+			obj_request->object_name, (unsigned short) opcode);
+		break;
+	}
+
+	if (obj_request_done_test(obj_request))
+		rbd_obj_request_complete(obj_request);
+}
+
+static void rbd_osd_req_format_read(struct rbd_obj_request *obj_request)
+{
+	struct rbd_img_request *img_request = obj_request->img_request;
+	struct ceph_osd_request *osd_req = obj_request->osd_req;
+	u64 snap_id;
+
+	rbd_assert(osd_req != NULL);
+
+	snap_id = img_request ? img_request->snap_id : CEPH_NOSNAP;
+	ceph_osdc_build_request(osd_req, obj_request->offset,
+			NULL, snap_id, NULL);
+}
+
+static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request)
+{
+	struct rbd_img_request *img_request = obj_request->img_request;
+	struct ceph_osd_request *osd_req = obj_request->osd_req;
+	struct ceph_snap_context *snapc;
+	struct timespec mtime = CURRENT_TIME;
+
+	rbd_assert(osd_req != NULL);
+
+	snapc = img_request ? img_request->snapc : NULL;
+	ceph_osdc_build_request(osd_req, obj_request->offset,
+			snapc, CEPH_NOSNAP, &mtime);
+}
+
+/*
+ * Create an osd request.  A read request has one osd op (read).
+ * A write request has either one (watch) or two (hint+write) osd ops.
+ * (All rbd data writes are prefixed with an allocation hint op, but
+ * technically osd watch is a write request, hence this distinction.)
+ */
+static struct ceph_osd_request *rbd_osd_req_create(
+					struct rbd_device *rbd_dev,
+					bool write_request,
+					unsigned int num_ops,
+					struct rbd_obj_request *obj_request)
+{
+	struct ceph_snap_context *snapc = NULL;
+	struct ceph_osd_client *osdc;
+	struct ceph_osd_request *osd_req;
+
+	if (obj_request_img_data_test(obj_request)) {
+		struct rbd_img_request *img_request = obj_request->img_request;
+
+		rbd_assert(write_request ==
+				img_request_write_test(img_request));
+		if (write_request)
+			snapc = img_request->snapc;
+	}
+
+	rbd_assert(num_ops == 1 || (write_request && num_ops == 2));
+
+	/* Allocate and initialize the request, for the num_ops ops */
+
+	osdc = &rbd_dev->rbd_client->client->osdc;
+	osd_req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false,
+					  GFP_ATOMIC);
+	if (!osd_req)
+		return NULL;	/* ENOMEM */
+
+	if (write_request)
+		osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
+	else
+		osd_req->r_flags = CEPH_OSD_FLAG_READ;
+
+	osd_req->r_callback = rbd_osd_req_callback;
+	osd_req->r_priv = obj_request;
+
+	osd_req->r_base_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout);
+	ceph_oid_set_name(&osd_req->r_base_oid, obj_request->object_name);
+
+	return osd_req;
+}
+
+/*
+ * Create a copyup osd request based on the information in the
+ * object request supplied.  A copyup request has three osd ops,
+ * a copyup method call, a hint op, and a write op.
+ */
+static struct ceph_osd_request *
+rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request)
+{
+	struct rbd_img_request *img_request;
+	struct ceph_snap_context *snapc;
+	struct rbd_device *rbd_dev;
+	struct ceph_osd_client *osdc;
+	struct ceph_osd_request *osd_req;
+
+	rbd_assert(obj_request_img_data_test(obj_request));
+	img_request = obj_request->img_request;
+	rbd_assert(img_request);
+	rbd_assert(img_request_write_test(img_request));
+
+	/* Allocate and initialize the request, for the three ops */
+
+	snapc = img_request->snapc;
+	rbd_dev = img_request->rbd_dev;
+	osdc = &rbd_dev->rbd_client->client->osdc;
+	osd_req = ceph_osdc_alloc_request(osdc, snapc, 3, false, GFP_ATOMIC);
+	if (!osd_req)
+		return NULL;	/* ENOMEM */
+
+	osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
+	osd_req->r_callback = rbd_osd_req_callback;
+	osd_req->r_priv = obj_request;
+
+	osd_req->r_base_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout);
+	ceph_oid_set_name(&osd_req->r_base_oid, obj_request->object_name);
+
+	return osd_req;
+}
+
+
+static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req)
+{
+	ceph_osdc_put_request(osd_req);
+}
+
+/* object_name is assumed to be a non-null pointer and NUL-terminated */
+
+static struct rbd_obj_request *rbd_obj_request_create(const char *object_name,
+						u64 offset, u64 length,
+						enum obj_request_type type)
+{
+	struct rbd_obj_request *obj_request;
+	size_t size;
+	char *name;
+
+	rbd_assert(obj_request_type_valid(type));
+
+	size = strlen(object_name) + 1;
+	name = kmalloc(size, GFP_KERNEL);
+	if (!name)
+		return NULL;
+
+	obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_KERNEL);
+	if (!obj_request) {
+		kfree(name);
+		return NULL;
+	}
+
+	obj_request->object_name = memcpy(name, object_name, size);
+	obj_request->offset = offset;
+	obj_request->length = length;
+	obj_request->flags = 0;
+	obj_request->which = BAD_WHICH;
+	obj_request->type = type;
+	INIT_LIST_HEAD(&obj_request->links);
+	init_completion(&obj_request->completion);
+	kref_init(&obj_request->kref);
+
+	dout("%s: \"%s\" %llu/%llu %d -> obj %p\n", __func__, object_name,
+		offset, length, (int)type, obj_request);
+
+	return obj_request;
+}
+
+static void rbd_obj_request_destroy(struct kref *kref)
+{
+	struct rbd_obj_request *obj_request;
+
+	obj_request = container_of(kref, struct rbd_obj_request, kref);
+
+	dout("%s: obj %p\n", __func__, obj_request);
+
+	rbd_assert(obj_request->img_request == NULL);
+	rbd_assert(obj_request->which == BAD_WHICH);
+
+	if (obj_request->osd_req)
+		rbd_osd_req_destroy(obj_request->osd_req);
+
+	rbd_assert(obj_request_type_valid(obj_request->type));
+	switch (obj_request->type) {
+	case OBJ_REQUEST_NODATA:
+		break;		/* Nothing to do */
+	case OBJ_REQUEST_BIO:
+		if (obj_request->bio_list)
+			bio_chain_put(obj_request->bio_list);
+		break;
+	case OBJ_REQUEST_PAGES:
+		if (obj_request->pages)
+			ceph_release_page_vector(obj_request->pages,
+						obj_request->page_count);
+		break;
+	}
+
+	kfree(obj_request->object_name);
+	obj_request->object_name = NULL;
+	kmem_cache_free(rbd_obj_request_cache, obj_request);
+}
+
+/* It's OK to call this for a device with no parent */
+
+static void rbd_spec_put(struct rbd_spec *spec);
+static void rbd_dev_unparent(struct rbd_device *rbd_dev)
+{
+	rbd_dev_remove_parent(rbd_dev);
+	rbd_spec_put(rbd_dev->parent_spec);
+	rbd_dev->parent_spec = NULL;
+	rbd_dev->parent_overlap = 0;
+}
+
+/*
+ * Parent image reference counting is used to determine when an
+ * image's parent fields can be safely torn down--after there are no
+ * more in-flight requests to the parent image.  When the last
+ * reference is dropped, cleaning them up is safe.
+ */
+static void rbd_dev_parent_put(struct rbd_device *rbd_dev)
+{
+	int counter;
+
+	if (!rbd_dev->parent_spec)
+		return;
+
+	counter = atomic_dec_return_safe(&rbd_dev->parent_ref);
+	if (counter > 0)
+		return;
+
+	/* Last reference; clean up parent data structures */
+
+	if (!counter)
+		rbd_dev_unparent(rbd_dev);
+	else
+		rbd_warn(rbd_dev, "parent reference underflow\n");
+}
+
+/*
+ * If an image has a non-zero parent overlap, get a reference to its
+ * parent.
+ *
+ * We must get the reference before checking for the overlap to
+ * coordinate properly with zeroing the parent overlap in
+ * rbd_dev_v2_parent_info() when an image gets flattened.  We
+ * drop it again if there is no overlap.
+ *
+ * Returns true if the rbd device has a parent with a non-zero
+ * overlap and a reference for it was successfully taken, or
+ * false otherwise.
+ */
+static bool rbd_dev_parent_get(struct rbd_device *rbd_dev)
+{
+	int counter;
+
+	if (!rbd_dev->parent_spec)
+		return false;
+
+	counter = atomic_inc_return_safe(&rbd_dev->parent_ref);
+	if (counter > 0 && rbd_dev->parent_overlap)
+		return true;
+
+	/* Image was flattened, but parent is not yet torn down */
+
+	if (counter < 0)
+		rbd_warn(rbd_dev, "parent reference overflow\n");
+
+	return false;
+}
+
+/*
+ * Caller is responsible for filling in the list of object requests
+ * that comprises the image request, and the Linux request pointer
+ * (if there is one).
+ */
+static struct rbd_img_request *rbd_img_request_create(
+					struct rbd_device *rbd_dev,
+					u64 offset, u64 length,
+					bool write_request)
+{
+	struct rbd_img_request *img_request;
+
+	img_request = kmem_cache_alloc(rbd_img_request_cache, GFP_ATOMIC);
+	if (!img_request)
+		return NULL;
+
+	if (write_request) {
+		down_read(&rbd_dev->header_rwsem);
+		ceph_get_snap_context(rbd_dev->header.snapc);
+		up_read(&rbd_dev->header_rwsem);
+	}
+
+	img_request->rq = NULL;
+	img_request->rbd_dev = rbd_dev;
+	img_request->offset = offset;
+	img_request->length = length;
+	img_request->flags = 0;
+	if (write_request) {
+		img_request_write_set(img_request);
+		img_request->snapc = rbd_dev->header.snapc;
+	} else {
+		img_request->snap_id = rbd_dev->spec->snap_id;
+	}
+	if (rbd_dev_parent_get(rbd_dev))
+		img_request_layered_set(img_request);
+	spin_lock_init(&img_request->completion_lock);
+	img_request->next_completion = 0;
+	img_request->callback = NULL;
+	img_request->result = 0;
+	img_request->obj_request_count = 0;
+	INIT_LIST_HEAD(&img_request->obj_requests);
+	kref_init(&img_request->kref);
+
+	dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev,
+		write_request ? "write" : "read", offset, length,
+		img_request);
+
+	return img_request;
+}
+
+static void rbd_img_request_destroy(struct kref *kref)
+{
+	struct rbd_img_request *img_request;
+	struct rbd_obj_request *obj_request;
+	struct rbd_obj_request *next_obj_request;
+
+	img_request = container_of(kref, struct rbd_img_request, kref);
+
+	dout("%s: img %p\n", __func__, img_request);
+
+	for_each_obj_request_safe(img_request, obj_request, next_obj_request)
+		rbd_img_obj_request_del(img_request, obj_request);
+	rbd_assert(img_request->obj_request_count == 0);
+
+	if (img_request_layered_test(img_request)) {
+		img_request_layered_clear(img_request);
+		rbd_dev_parent_put(img_request->rbd_dev);
+	}
+
+	if (img_request_write_test(img_request))
+		ceph_put_snap_context(img_request->snapc);
+
+	kmem_cache_free(rbd_img_request_cache, img_request);
+}
+
+static struct rbd_img_request *rbd_parent_request_create(
+					struct rbd_obj_request *obj_request,
+					u64 img_offset, u64 length)
+{
+	struct rbd_img_request *parent_request;
+	struct rbd_device *rbd_dev;
+
+	rbd_assert(obj_request->img_request);
+	rbd_dev = obj_request->img_request->rbd_dev;
+
+	parent_request = rbd_img_request_create(rbd_dev->parent,
+						img_offset, length, false);
+	if (!parent_request)
+		return NULL;
+
+	img_request_child_set(parent_request);
+	rbd_obj_request_get(obj_request);
+	parent_request->obj_request = obj_request;
+
+	return parent_request;
+}
+
+static void rbd_parent_request_destroy(struct kref *kref)
+{
+	struct rbd_img_request *parent_request;
+	struct rbd_obj_request *orig_request;
+
+	parent_request = container_of(kref, struct rbd_img_request, kref);
+	orig_request = parent_request->obj_request;
+
+	parent_request->obj_request = NULL;
+	rbd_obj_request_put(orig_request);
+	img_request_child_clear(parent_request);
+
+	rbd_img_request_destroy(kref);
+}
+
+static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
+{
+	struct rbd_img_request *img_request;
+	unsigned int xferred;
+	int result;
+	bool more;
+
+	rbd_assert(obj_request_img_data_test(obj_request));
+	img_request = obj_request->img_request;
+
+	rbd_assert(obj_request->xferred <= (u64)UINT_MAX);
+	xferred = (unsigned int)obj_request->xferred;
+	result = obj_request->result;
+	if (result) {
+		struct rbd_device *rbd_dev = img_request->rbd_dev;
+
+		rbd_warn(rbd_dev, "%s %llx at %llx (%llx)\n",
+			img_request_write_test(img_request) ? "write" : "read",
+			obj_request->length, obj_request->img_offset,
+			obj_request->offset);
+		rbd_warn(rbd_dev, "  result %d xferred %x\n",
+			result, xferred);
+		if (!img_request->result)
+			img_request->result = result;
+	}
+
+	/* Image object requests don't own their page array */
+
+	if (obj_request->type == OBJ_REQUEST_PAGES) {
+		obj_request->pages = NULL;
+		obj_request->page_count = 0;
+	}
+
+	if (img_request_child_test(img_request)) {
+		rbd_assert(img_request->obj_request != NULL);
+		more = obj_request->which < img_request->obj_request_count - 1;
+	} else {
+		rbd_assert(img_request->rq != NULL);
+		more = blk_end_request(img_request->rq, result, xferred);
+	}
+
+	return more;
+}
+
+static void rbd_img_obj_callback(struct rbd_obj_request *obj_request)
+{
+	struct rbd_img_request *img_request;
+	u32 which = obj_request->which;
+	bool more = true;
+
+	rbd_assert(obj_request_img_data_test(obj_request));
+	img_request = obj_request->img_request;
+
+	dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
+	rbd_assert(img_request != NULL);
+	rbd_assert(img_request->obj_request_count > 0);
+	rbd_assert(which != BAD_WHICH);
+	rbd_assert(which < img_request->obj_request_count);
+
+	spin_lock_irq(&img_request->completion_lock);
+	if (which != img_request->next_completion)
+		goto out;
+
+	for_each_obj_request_from(img_request, obj_request) {
+		rbd_assert(more);
+		rbd_assert(which < img_request->obj_request_count);
+
+		if (!obj_request_done_test(obj_request))
+			break;
+		more = rbd_img_obj_end_request(obj_request);
+		which++;
+	}
+
+	rbd_assert(more ^ (which == img_request->obj_request_count));
+	img_request->next_completion = which;
+out:
+	spin_unlock_irq(&img_request->completion_lock);
+
+	if (!more)
+		rbd_img_request_complete(img_request);
+}
+
+/*
+ * Split up an image request into one or more object requests, each
+ * to a different object.  The "type" parameter indicates whether
+ * "data_desc" is the pointer to the head of a list of bio
+ * structures, or the base of a page array.  In either case this
+ * function assumes data_desc describes memory sufficient to hold
+ * all data described by the image request.
+ */
+static int rbd_img_request_fill(struct rbd_img_request *img_request,
+					enum obj_request_type type,
+					void *data_desc)
+{
+	struct rbd_device *rbd_dev = img_request->rbd_dev;
+	struct rbd_obj_request *obj_request = NULL;
+	struct rbd_obj_request *next_obj_request;
+	bool write_request = img_request_write_test(img_request);
+	struct bio *bio_list = NULL;
+	unsigned int bio_offset = 0;
+	struct page **pages = NULL;
+	u64 img_offset;
+	u64 resid;
+	u16 opcode;
+
+	dout("%s: img %p type %d data_desc %p\n", __func__, img_request,
+		(int)type, data_desc);
+
+	opcode = write_request ? CEPH_OSD_OP_WRITE : CEPH_OSD_OP_READ;
+	img_offset = img_request->offset;
+	resid = img_request->length;
+	rbd_assert(resid > 0);
+
+	if (type == OBJ_REQUEST_BIO) {
+		bio_list = data_desc;
+		rbd_assert(img_offset ==
+			   bio_list->bi_iter.bi_sector << SECTOR_SHIFT);
+	} else {
+		rbd_assert(type == OBJ_REQUEST_PAGES);
+		pages = data_desc;
+	}
+
+	while (resid) {
+		struct ceph_osd_request *osd_req;
+		const char *object_name;
+		u64 offset;
+		u64 length;
+		unsigned int which = 0;
+
+		object_name = rbd_segment_name(rbd_dev, img_offset);
+		if (!object_name)
+			goto out_unwind;
+		offset = rbd_segment_offset(rbd_dev, img_offset);
+		length = rbd_segment_length(rbd_dev, img_offset, resid);
+		obj_request = rbd_obj_request_create(object_name,
+						offset, length, type);
+		/* object request has its own copy of the object name */
+		rbd_segment_name_free(object_name);
+		if (!obj_request)
+			goto out_unwind;
+
+		/*
+		 * set obj_request->img_request before creating the
+		 * osd_request so that it gets the right snapc
+		 */
+		rbd_img_obj_request_add(img_request, obj_request);
+
+		if (type == OBJ_REQUEST_BIO) {
+			unsigned int clone_size;
+
+			rbd_assert(length <= (u64)UINT_MAX);
+			clone_size = (unsigned int)length;
+			obj_request->bio_list =
+					bio_chain_clone_range(&bio_list,
+								&bio_offset,
+								clone_size,
+								GFP_ATOMIC);
+			if (!obj_request->bio_list)
+				goto out_unwind;
+		} else {
+			unsigned int page_count;
+
+			obj_request->pages = pages;
+			page_count = (u32)calc_pages_for(offset, length);
+			obj_request->page_count = page_count;
+			if ((offset + length) & ~PAGE_MASK)
+				page_count--;	/* more on last page */
+			pages += page_count;
+		}
+
+		osd_req = rbd_osd_req_create(rbd_dev, write_request,
+					     (write_request ? 2 : 1),
+					     obj_request);
+		if (!osd_req)
+			goto out_unwind;
+		obj_request->osd_req = osd_req;
+		obj_request->callback = rbd_img_obj_callback;
+
+		if (write_request) {
+			osd_req_op_alloc_hint_init(osd_req, which,
+					     rbd_obj_bytes(&rbd_dev->header),
+					     rbd_obj_bytes(&rbd_dev->header));
+			which++;
+		}
+
+		osd_req_op_extent_init(osd_req, which, opcode, offset, length,
+				       0, 0);
+		if (type == OBJ_REQUEST_BIO)
+			osd_req_op_extent_osd_data_bio(osd_req, which,
+					obj_request->bio_list, length);
+		else
+			osd_req_op_extent_osd_data_pages(osd_req, which,
+					obj_request->pages, length,
+					offset & ~PAGE_MASK, false, false);
+
+		if (write_request)
+			rbd_osd_req_format_write(obj_request);
+		else
+			rbd_osd_req_format_read(obj_request);
+
+		obj_request->img_offset = img_offset;
+
+		img_offset += length;
+		resid -= length;
+	}
+
+	return 0;
+
+out_unwind:
+	for_each_obj_request_safe(img_request, obj_request, next_obj_request)
+		rbd_img_obj_request_del(img_request, obj_request);
+
+	return -ENOMEM;
+}
+
+static void
+rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request)
+{
+	struct rbd_img_request *img_request;
+	struct rbd_device *rbd_dev;
+	struct page **pages;
+	u32 page_count;
+
+	rbd_assert(obj_request->type == OBJ_REQUEST_BIO);
+	rbd_assert(obj_request_img_data_test(obj_request));
+	img_request = obj_request->img_request;
+	rbd_assert(img_request);
+
+	rbd_dev = img_request->rbd_dev;
+	rbd_assert(rbd_dev);
+
+	pages = obj_request->copyup_pages;
+	rbd_assert(pages != NULL);
+	obj_request->copyup_pages = NULL;
+	page_count = obj_request->copyup_page_count;
+	rbd_assert(page_count);
+	obj_request->copyup_page_count = 0;
+	ceph_release_page_vector(pages, page_count);
+
+	/*
+	 * We want the transfer count to reflect the size of the
+	 * original write request.  There is no such thing as a
+	 * successful short write, so if the request was successful
+	 * we can just set it to the originally-requested length.
+	 */
+	if (!obj_request->result)
+		obj_request->xferred = obj_request->length;
+
+	/* Finish up with the normal image object callback */
+
+	rbd_img_obj_callback(obj_request);
+}
+
+static void
+rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
+{
+	struct rbd_obj_request *orig_request;
+	struct ceph_osd_request *osd_req;
+	struct ceph_osd_client *osdc;
+	struct rbd_device *rbd_dev;
+	struct page **pages;
+	u32 page_count;
+	int img_result;
+	u64 parent_length;
+	u64 offset;
+	u64 length;
+
+	rbd_assert(img_request_child_test(img_request));
+
+	/* First get what we need from the image request */
+
+	pages = img_request->copyup_pages;
+	rbd_assert(pages != NULL);
+	img_request->copyup_pages = NULL;
+	page_count = img_request->copyup_page_count;
+	rbd_assert(page_count);
+	img_request->copyup_page_count = 0;
+
+	orig_request = img_request->obj_request;
+	rbd_assert(orig_request != NULL);
+	rbd_assert(obj_request_type_valid(orig_request->type));
+	img_result = img_request->result;
+	parent_length = img_request->length;
+	rbd_assert(parent_length == img_request->xferred);
+	rbd_img_request_put(img_request);
+
+	rbd_assert(orig_request->img_request);
+	rbd_dev = orig_request->img_request->rbd_dev;
+	rbd_assert(rbd_dev);
+
+	/*
+	 * If the overlap has become 0 (most likely because the
+	 * image has been flattened) we need to free the pages
+	 * and re-submit the original write request.
+	 */
+	if (!rbd_dev->parent_overlap) {
+		struct ceph_osd_client *osdc;
+
+		ceph_release_page_vector(pages, page_count);
+		osdc = &rbd_dev->rbd_client->client->osdc;
+		img_result = rbd_obj_request_submit(osdc, orig_request);
+		if (!img_result)
+			return;
+	}
+
+	if (img_result)
+		goto out_err;
+
+	/*
+	 * The original osd request is of no use to use any more.
+	 * We need a new one that can hold the three ops in a copyup
+	 * request.  Allocate the new copyup osd request for the
+	 * original request, and release the old one.
+	 */
+	img_result = -ENOMEM;
+	osd_req = rbd_osd_req_create_copyup(orig_request);
+	if (!osd_req)
+		goto out_err;
+	rbd_osd_req_destroy(orig_request->osd_req);
+	orig_request->osd_req = osd_req;
+	orig_request->copyup_pages = pages;
+	orig_request->copyup_page_count = page_count;
+
+	/* Initialize the copyup op */
+
+	osd_req_op_cls_init(osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup");
+	osd_req_op_cls_request_data_pages(osd_req, 0, pages, parent_length, 0,
+						false, false);
+
+	/* Then the hint op */
+
+	osd_req_op_alloc_hint_init(osd_req, 1, rbd_obj_bytes(&rbd_dev->header),
+				   rbd_obj_bytes(&rbd_dev->header));
+
+	/* And the original write request op */
+
+	offset = orig_request->offset;
+	length = orig_request->length;
+	osd_req_op_extent_init(osd_req, 2, CEPH_OSD_OP_WRITE,
+					offset, length, 0, 0);
+	if (orig_request->type == OBJ_REQUEST_BIO)
+		osd_req_op_extent_osd_data_bio(osd_req, 2,
+					orig_request->bio_list, length);
+	else
+		osd_req_op_extent_osd_data_pages(osd_req, 2,
+					orig_request->pages, length,
+					offset & ~PAGE_MASK, false, false);
+
+	rbd_osd_req_format_write(orig_request);
+
+	/* All set, send it off. */
+
+	orig_request->callback = rbd_img_obj_copyup_callback;
+	osdc = &rbd_dev->rbd_client->client->osdc;
+	img_result = rbd_obj_request_submit(osdc, orig_request);
+	if (!img_result)
+		return;
+out_err:
+	/* Record the error code and complete the request */
+
+	orig_request->result = img_result;
+	orig_request->xferred = 0;
+	obj_request_done_set(orig_request);
+	rbd_obj_request_complete(orig_request);
+}
+
+/*
+ * Read from the parent image the range of data that covers the
+ * entire target of the given object request.  This is used for
+ * satisfying a layered image write request when the target of an
+ * object request from the image request does not exist.
+ *
+ * A page array big enough to hold the returned data is allocated
+ * and supplied to rbd_img_request_fill() as the "data descriptor."
+ * When the read completes, this page array will be transferred to
+ * the original object request for the copyup operation.
+ *
+ * If an error occurs, record it as the result of the original
+ * object request and mark it done so it gets completed.
+ */
+static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
+{
+	struct rbd_img_request *img_request = NULL;
+	struct rbd_img_request *parent_request = NULL;
+	struct rbd_device *rbd_dev;
+	u64 img_offset;
+	u64 length;
+	struct page **pages = NULL;
+	u32 page_count;
+	int result;
+
+	rbd_assert(obj_request_img_data_test(obj_request));
+	rbd_assert(obj_request_type_valid(obj_request->type));
+
+	img_request = obj_request->img_request;
+	rbd_assert(img_request != NULL);
+	rbd_dev = img_request->rbd_dev;
+	rbd_assert(rbd_dev->parent != NULL);
+
+	/*
+	 * Determine the byte range covered by the object in the
+	 * child image to which the original request was to be sent.
+	 */
+	img_offset = obj_request->img_offset - obj_request->offset;
+	length = (u64)1 << rbd_dev->header.obj_order;
+
+	/*
+	 * There is no defined parent data beyond the parent
+	 * overlap, so limit what we read at that boundary if
+	 * necessary.
+	 */
+	if (img_offset + length > rbd_dev->parent_overlap) {
+		rbd_assert(img_offset < rbd_dev->parent_overlap);
+		length = rbd_dev->parent_overlap - img_offset;
+	}
+
+	/*
+	 * Allocate a page array big enough to receive the data read
+	 * from the parent.
+	 */
+	page_count = (u32)calc_pages_for(0, length);
+	pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
+	if (IS_ERR(pages)) {
+		result = PTR_ERR(pages);
+		pages = NULL;
+		goto out_err;
+	}
+
+	result = -ENOMEM;
+	parent_request = rbd_parent_request_create(obj_request,
+						img_offset, length);
+	if (!parent_request)
+		goto out_err;
+
+	result = rbd_img_request_fill(parent_request, OBJ_REQUEST_PAGES, pages);
+	if (result)
+		goto out_err;
+	parent_request->copyup_pages = pages;
+	parent_request->copyup_page_count = page_count;
+
+	parent_request->callback = rbd_img_obj_parent_read_full_callback;
+	result = rbd_img_request_submit(parent_request);
+	if (!result)
+		return 0;
+
+	parent_request->copyup_pages = NULL;
+	parent_request->copyup_page_count = 0;
+	parent_request->obj_request = NULL;
+	rbd_obj_request_put(obj_request);
+out_err:
+	if (pages)
+		ceph_release_page_vector(pages, page_count);
+	if (parent_request)
+		rbd_img_request_put(parent_request);
+	obj_request->result = result;
+	obj_request->xferred = 0;
+	obj_request_done_set(obj_request);
+
+	return result;
+}
+
+static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request)
+{
+	struct rbd_obj_request *orig_request;
+	struct rbd_device *rbd_dev;
+	int result;
+
+	rbd_assert(!obj_request_img_data_test(obj_request));
+
+	/*
+	 * All we need from the object request is the original
+	 * request and the result of the STAT op.  Grab those, then
+	 * we're done with the request.
+	 */
+	orig_request = obj_request->obj_request;
+	obj_request->obj_request = NULL;
+	rbd_obj_request_put(orig_request);
+	rbd_assert(orig_request);
+	rbd_assert(orig_request->img_request);
+
+	result = obj_request->result;
+	obj_request->result = 0;
+
+	dout("%s: obj %p for obj %p result %d %llu/%llu\n", __func__,
+		obj_request, orig_request, result,
+		obj_request->xferred, obj_request->length);
+	rbd_obj_request_put(obj_request);
+
+	/*
+	 * If the overlap has become 0 (most likely because the
+	 * image has been flattened) we need to free the pages
+	 * and re-submit the original write request.
+	 */
+	rbd_dev = orig_request->img_request->rbd_dev;
+	if (!rbd_dev->parent_overlap) {
+		struct ceph_osd_client *osdc;
+
+		osdc = &rbd_dev->rbd_client->client->osdc;
+		result = rbd_obj_request_submit(osdc, orig_request);
+		if (!result)
+			return;
+	}
+
+	/*
+	 * Our only purpose here is to determine whether the object
+	 * exists, and we don't want to treat the non-existence as
+	 * an error.  If something else comes back, transfer the
+	 * error to the original request and complete it now.
+	 */
+	if (!result) {
+		obj_request_existence_set(orig_request, true);
+	} else if (result == -ENOENT) {
+		obj_request_existence_set(orig_request, false);
+	} else if (result) {
+		orig_request->result = result;
+		goto out;
+	}
+
+	/*
+	 * Resubmit the original request now that we have recorded
+	 * whether the target object exists.
+	 */
+	orig_request->result = rbd_img_obj_request_submit(orig_request);
+out:
+	if (orig_request->result)
+		rbd_obj_request_complete(orig_request);
+}
+
+static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
+{
+	struct rbd_obj_request *stat_request;
+	struct rbd_device *rbd_dev;
+	struct ceph_osd_client *osdc;
+	struct page **pages = NULL;
+	u32 page_count;
+	size_t size;
+	int ret;
+
+	/*
+	 * The response data for a STAT call consists of:
+	 *     le64 length;
+	 *     struct {
+	 *         le32 tv_sec;
+	 *         le32 tv_nsec;
+	 *     } mtime;
+	 */
+	size = sizeof (__le64) + sizeof (__le32) + sizeof (__le32);
+	page_count = (u32)calc_pages_for(0, size);
+	pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
+
+	ret = -ENOMEM;
+	stat_request = rbd_obj_request_create(obj_request->object_name, 0, 0,
+							OBJ_REQUEST_PAGES);
+	if (!stat_request)
+		goto out;
+
+	rbd_obj_request_get(obj_request);
+	stat_request->obj_request = obj_request;
+	stat_request->pages = pages;
+	stat_request->page_count = page_count;
+
+	rbd_assert(obj_request->img_request);
+	rbd_dev = obj_request->img_request->rbd_dev;
+	stat_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1,
+						   stat_request);
+	if (!stat_request->osd_req)
+		goto out;
+	stat_request->callback = rbd_img_obj_exists_callback;
+
+	osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT);
+	osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0,
+					false, false);
+	rbd_osd_req_format_read(stat_request);
+
+	osdc = &rbd_dev->rbd_client->client->osdc;
+	ret = rbd_obj_request_submit(osdc, stat_request);
+out:
+	if (ret)
+		rbd_obj_request_put(obj_request);
+
+	return ret;
+}
+
+static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request)
+{
+	struct rbd_img_request *img_request;
+	struct rbd_device *rbd_dev;
+	bool known;
+
+	rbd_assert(obj_request_img_data_test(obj_request));
+
+	img_request = obj_request->img_request;
+	rbd_assert(img_request);
+	rbd_dev = img_request->rbd_dev;
+
+	/*
+	 * Only writes to layered images need special handling.
+	 * Reads and non-layered writes are simple object requests.
+	 * Layered writes that start beyond the end of the overlap
+	 * with the parent have no parent data, so they too are
+	 * simple object requests.  Finally, if the target object is
+	 * known to already exist, its parent data has already been
+	 * copied, so a write to the object can also be handled as a
+	 * simple object request.
+	 */
+	if (!img_request_write_test(img_request) ||
+		!img_request_layered_test(img_request) ||
+		rbd_dev->parent_overlap <= obj_request->img_offset ||
+		((known = obj_request_known_test(obj_request)) &&
+			obj_request_exists_test(obj_request))) {
+
+		struct rbd_device *rbd_dev;
+		struct ceph_osd_client *osdc;
+
+		rbd_dev = obj_request->img_request->rbd_dev;
+		osdc = &rbd_dev->rbd_client->client->osdc;
+
+		return rbd_obj_request_submit(osdc, obj_request);
+	}
+
+	/*
+	 * It's a layered write.  The target object might exist but
+	 * we may not know that yet.  If we know it doesn't exist,
+	 * start by reading the data for the full target object from
+	 * the parent so we can use it for a copyup to the target.
+	 */
+	if (known)
+		return rbd_img_obj_parent_read_full(obj_request);
+
+	/* We don't know whether the target exists.  Go find out. */
+
+	return rbd_img_obj_exists_submit(obj_request);
+}
+
+static int rbd_img_request_submit(struct rbd_img_request *img_request)
+{
+	struct rbd_obj_request *obj_request;
+	struct rbd_obj_request *next_obj_request;
+
+	dout("%s: img %p\n", __func__, img_request);
+	for_each_obj_request_safe(img_request, obj_request, next_obj_request) {
+		int ret;
+
+		ret = rbd_img_obj_request_submit(obj_request);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void rbd_img_parent_read_callback(struct rbd_img_request *img_request)
+{
+	struct rbd_obj_request *obj_request;
+	struct rbd_device *rbd_dev;
+	u64 obj_end;
+	u64 img_xferred;
+	int img_result;
+
+	rbd_assert(img_request_child_test(img_request));
+
+	/* First get what we need from the image request and release it */
+
+	obj_request = img_request->obj_request;
+	img_xferred = img_request->xferred;
+	img_result = img_request->result;
+	rbd_img_request_put(img_request);
+
+	/*
+	 * If the overlap has become 0 (most likely because the
+	 * image has been flattened) we need to re-submit the
+	 * original request.
+	 */
+	rbd_assert(obj_request);
+	rbd_assert(obj_request->img_request);
+	rbd_dev = obj_request->img_request->rbd_dev;
+	if (!rbd_dev->parent_overlap) {
+		struct ceph_osd_client *osdc;
+
+		osdc = &rbd_dev->rbd_client->client->osdc;
+		img_result = rbd_obj_request_submit(osdc, obj_request);
+		if (!img_result)
+			return;
+	}
+
+	obj_request->result = img_result;
+	if (obj_request->result)
+		goto out;
+
+	/*
+	 * We need to zero anything beyond the parent overlap
+	 * boundary.  Since rbd_img_obj_request_read_callback()
+	 * will zero anything beyond the end of a short read, an
+	 * easy way to do this is to pretend the data from the
+	 * parent came up short--ending at the overlap boundary.
+	 */
+	rbd_assert(obj_request->img_offset < U64_MAX - obj_request->length);
+	obj_end = obj_request->img_offset + obj_request->length;
+	if (obj_end > rbd_dev->parent_overlap) {
+		u64 xferred = 0;
+
+		if (obj_request->img_offset < rbd_dev->parent_overlap)
+			xferred = rbd_dev->parent_overlap -
+					obj_request->img_offset;
+
+		obj_request->xferred = min(img_xferred, xferred);
+	} else {
+		obj_request->xferred = img_xferred;
+	}
+out:
+	rbd_img_obj_request_read_callback(obj_request);
+	rbd_obj_request_complete(obj_request);
+}
+
+static void rbd_img_parent_read(struct rbd_obj_request *obj_request)
+{
+	struct rbd_img_request *img_request;
+	int result;
+
+	rbd_assert(obj_request_img_data_test(obj_request));
+	rbd_assert(obj_request->img_request != NULL);
+	rbd_assert(obj_request->result == (s32) -ENOENT);
+	rbd_assert(obj_request_type_valid(obj_request->type));
+
+	/* rbd_read_finish(obj_request, obj_request->length); */
+	img_request = rbd_parent_request_create(obj_request,
+						obj_request->img_offset,
+						obj_request->length);
+	result = -ENOMEM;
+	if (!img_request)
+		goto out_err;
+
+	if (obj_request->type == OBJ_REQUEST_BIO)
+		result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
+						obj_request->bio_list);
+	else
+		result = rbd_img_request_fill(img_request, OBJ_REQUEST_PAGES,
+						obj_request->pages);
+	if (result)
+		goto out_err;
+
+	img_request->callback = rbd_img_parent_read_callback;
+	result = rbd_img_request_submit(img_request);
+	if (result)
+		goto out_err;
+
+	return;
+out_err:
+	if (img_request)
+		rbd_img_request_put(img_request);
+	obj_request->result = result;
+	obj_request->xferred = 0;
+	obj_request_done_set(obj_request);
+}
+
+static int rbd_obj_notify_ack_sync(struct rbd_device *rbd_dev, u64 notify_id)
+{
+	struct rbd_obj_request *obj_request;
+	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+	int ret;
+
+	obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
+							OBJ_REQUEST_NODATA);
+	if (!obj_request)
+		return -ENOMEM;
+
+	ret = -ENOMEM;
+	obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1,
+						  obj_request);
+	if (!obj_request->osd_req)
+		goto out;
+
+	osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_NOTIFY_ACK,
+					notify_id, 0, 0);
+	rbd_osd_req_format_read(obj_request);
+
+	ret = rbd_obj_request_submit(osdc, obj_request);
+	if (ret)
+		goto out;
+	ret = rbd_obj_request_wait(obj_request);
+out:
+	rbd_obj_request_put(obj_request);
+
+	return ret;
+}
+
+static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
+{
+	struct rbd_device *rbd_dev = (struct rbd_device *)data;
+	int ret;
+
+	if (!rbd_dev)
+		return;
+
+	dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__,
+		rbd_dev->header_name, (unsigned long long)notify_id,
+		(unsigned int)opcode);
+	ret = rbd_dev_refresh(rbd_dev);
+	if (ret)
+		rbd_warn(rbd_dev, "header refresh error (%d)\n", ret);
+
+	rbd_obj_notify_ack_sync(rbd_dev, notify_id);
+}
+
+/*
+ * Request sync osd watch/unwatch.  The value of "start" determines
+ * whether a watch request is being initiated or torn down.
+ */
+static int __rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, bool start)
+{
+	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+	struct rbd_obj_request *obj_request;
+	int ret;
+
+	rbd_assert(start ^ !!rbd_dev->watch_event);
+	rbd_assert(start ^ !!rbd_dev->watch_request);
+
+	if (start) {
+		ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev,
+						&rbd_dev->watch_event);
+		if (ret < 0)
+			return ret;
+		rbd_assert(rbd_dev->watch_event != NULL);
+	}
+
+	ret = -ENOMEM;
+	obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
+							OBJ_REQUEST_NODATA);
+	if (!obj_request)
+		goto out_cancel;
+
+	obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, 1,
+						  obj_request);
+	if (!obj_request->osd_req)
+		goto out_cancel;
+
+	if (start)
+		ceph_osdc_set_request_linger(osdc, obj_request->osd_req);
+	else
+		ceph_osdc_unregister_linger_request(osdc,
+					rbd_dev->watch_request->osd_req);
+
+	osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH,
+				rbd_dev->watch_event->cookie, 0, start ? 1 : 0);
+	rbd_osd_req_format_write(obj_request);
+
+	ret = rbd_obj_request_submit(osdc, obj_request);
+	if (ret)
+		goto out_cancel;
+	ret = rbd_obj_request_wait(obj_request);
+	if (ret)
+		goto out_cancel;
+	ret = obj_request->result;
+	if (ret)
+		goto out_cancel;
+
+	/*
+	 * A watch request is set to linger, so the underlying osd
+	 * request won't go away until we unregister it.  We retain
+	 * a pointer to the object request during that time (in
+	 * rbd_dev->watch_request), so we'll keep a reference to
+	 * it.  We'll drop that reference (below) after we've
+	 * unregistered it.
+	 */
+	if (start) {
+		rbd_dev->watch_request = obj_request;
+
+		return 0;
+	}
+
+	/* We have successfully torn down the watch request */
+
+	rbd_obj_request_put(rbd_dev->watch_request);
+	rbd_dev->watch_request = NULL;
+out_cancel:
+	/* Cancel the event if we're tearing down, or on error */
+	ceph_osdc_cancel_event(rbd_dev->watch_event);
+	rbd_dev->watch_event = NULL;
+	if (obj_request)
+		rbd_obj_request_put(obj_request);
+
+	return ret;
+}
+
+static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev)
+{
+	return __rbd_dev_header_watch_sync(rbd_dev, true);
+}
+
+static void rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev)
+{
+	int ret;
+
+	ret = __rbd_dev_header_watch_sync(rbd_dev, false);
+	if (ret) {
+		rbd_warn(rbd_dev, "unable to tear down watch request: %d\n",
+			 ret);
+	}
+}
+
+/*
+ * Synchronous osd object method call.  Returns the number of bytes
+ * returned in the outbound buffer, or a negative error code.
+ */
+static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
+			     const char *object_name,
+			     const char *class_name,
+			     const char *method_name,
+			     const void *outbound,
+			     size_t outbound_size,
+			     void *inbound,
+			     size_t inbound_size)
+{
+	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+	struct rbd_obj_request *obj_request;
+	struct page **pages;
+	u32 page_count;
+	int ret;
+
+	/*
+	 * Method calls are ultimately read operations.  The result
+	 * should placed into the inbound buffer provided.  They
+	 * also supply outbound data--parameters for the object
+	 * method.  Currently if this is present it will be a
+	 * snapshot id.
+	 */
+	page_count = (u32)calc_pages_for(0, inbound_size);
+	pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
+
+	ret = -ENOMEM;
+	obj_request = rbd_obj_request_create(object_name, 0, inbound_size,
+							OBJ_REQUEST_PAGES);
+	if (!obj_request)
+		goto out;
+
+	obj_request->pages = pages;
+	obj_request->page_count = page_count;
+
+	obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1,
+						  obj_request);
+	if (!obj_request->osd_req)
+		goto out;
+
+	osd_req_op_cls_init(obj_request->osd_req, 0, CEPH_OSD_OP_CALL,
+					class_name, method_name);
+	if (outbound_size) {
+		struct ceph_pagelist *pagelist;
+
+		pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
+		if (!pagelist)
+			goto out;
+
+		ceph_pagelist_init(pagelist);
+		ceph_pagelist_append(pagelist, outbound, outbound_size);
+		osd_req_op_cls_request_data_pagelist(obj_request->osd_req, 0,
+						pagelist);
+	}
+	osd_req_op_cls_response_data_pages(obj_request->osd_req, 0,
+					obj_request->pages, inbound_size,
+					0, false, false);
+	rbd_osd_req_format_read(obj_request);
+
+	ret = rbd_obj_request_submit(osdc, obj_request);
+	if (ret)
+		goto out;
+	ret = rbd_obj_request_wait(obj_request);
+	if (ret)
+		goto out;
+
+	ret = obj_request->result;
+	if (ret < 0)
+		goto out;
+
+	rbd_assert(obj_request->xferred < (u64)INT_MAX);
+	ret = (int)obj_request->xferred;
+	ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred);
+out:
+	if (obj_request)
+		rbd_obj_request_put(obj_request);
+	else
+		ceph_release_page_vector(pages, page_count);
+
+	return ret;
+}
+
+static void rbd_request_fn(struct request_queue *q)
+		__releases(q->queue_lock) __acquires(q->queue_lock)
+{
+	struct rbd_device *rbd_dev = q->queuedata;
+	bool read_only = rbd_dev->mapping.read_only;
+	struct request *rq;
+	int result;
+
+	while ((rq = blk_fetch_request(q))) {
+		bool write_request = rq_data_dir(rq) == WRITE;
+		struct rbd_img_request *img_request;
+		u64 offset;
+		u64 length;
+
+		/* Ignore any non-FS requests that filter through. */
+
+		if (rq->cmd_type != REQ_TYPE_FS) {
+			dout("%s: non-fs request type %d\n", __func__,
+				(int) rq->cmd_type);
+			__blk_end_request_all(rq, 0);
+			continue;
+		}
+
+		/* Ignore/skip any zero-length requests */
+
+		offset = (u64) blk_rq_pos(rq) << SECTOR_SHIFT;
+		length = (u64) blk_rq_bytes(rq);
+
+		if (!length) {
+			dout("%s: zero-length request\n", __func__);
+			__blk_end_request_all(rq, 0);
+			continue;
+		}
+
+		spin_unlock_irq(q->queue_lock);
+
+		/* Disallow writes to a read-only device */
+
+		if (write_request) {
+			result = -EROFS;
+			if (read_only)
+				goto end_request;
+			rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP);
+		}
+
+		/*
+		 * Quit early if the mapped snapshot no longer
+		 * exists.  It's still possible the snapshot will
+		 * have disappeared by the time our request arrives
+		 * at the osd, but there's no sense in sending it if
+		 * we already know.
+		 */
+		if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) {
+			dout("request for non-existent snapshot");
+			rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
+			result = -ENXIO;
+			goto end_request;
+		}
+
+		result = -EINVAL;
+		if (offset && length > U64_MAX - offset + 1) {
+			rbd_warn(rbd_dev, "bad request range (%llu~%llu)\n",
+				offset, length);
+			goto end_request;	/* Shouldn't happen */
+		}
+
+		result = -EIO;
+		if (offset + length > rbd_dev->mapping.size) {
+			rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)\n",
+				offset, length, rbd_dev->mapping.size);
+			goto end_request;
+		}
+
+		result = -ENOMEM;
+		img_request = rbd_img_request_create(rbd_dev, offset, length,
+							write_request);
+		if (!img_request)
+			goto end_request;
+
+		img_request->rq = rq;
+
+		result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
+						rq->bio);
+		if (!result)
+			result = rbd_img_request_submit(img_request);
+		if (result)
+			rbd_img_request_put(img_request);
+end_request:
+		spin_lock_irq(q->queue_lock);
+		if (result < 0) {
+			rbd_warn(rbd_dev, "%s %llx at %llx result %d\n",
+				write_request ? "write" : "read",
+				length, offset, result);
+
+			__blk_end_request_all(rq, result);
+		}
+	}
+}
+
+/*
+ * a queue callback. Makes sure that we don't create a bio that spans across
+ * multiple osd objects. One exception would be with a single page bios,
+ * which we handle later at bio_chain_clone_range()
+ */
+static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
+			  struct bio_vec *bvec)
+{
+	struct rbd_device *rbd_dev = q->queuedata;
+	sector_t sector_offset;
+	sector_t sectors_per_obj;
+	sector_t obj_sector_offset;
+	int ret;
+
+	/*
+	 * Find how far into its rbd object the partition-relative
+	 * bio start sector is to offset relative to the enclosing
+	 * device.
+	 */
+	sector_offset = get_start_sect(bmd->bi_bdev) + bmd->bi_sector;
+	sectors_per_obj = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT);
+	obj_sector_offset = sector_offset & (sectors_per_obj - 1);
+
+	/*
+	 * Compute the number of bytes from that offset to the end
+	 * of the object.  Account for what's already used by the bio.
+	 */
+	ret = (int) (sectors_per_obj - obj_sector_offset) << SECTOR_SHIFT;
+	if (ret > bmd->bi_size)
+		ret -= bmd->bi_size;
+	else
+		ret = 0;
+
+	/*
+	 * Don't send back more than was asked for.  And if the bio
+	 * was empty, let the whole thing through because:  "Note
+	 * that a block device *must* allow a single page to be
+	 * added to an empty bio."
+	 */
+	rbd_assert(bvec->bv_len <= PAGE_SIZE);
+	if (ret > (int) bvec->bv_len || !bmd->bi_size)
+		ret = (int) bvec->bv_len;
+
+	return ret;
+}
+
+static void rbd_free_disk(struct rbd_device *rbd_dev)
+{
+	struct gendisk *disk = rbd_dev->disk;
+
+	if (!disk)
+		return;
+
+	rbd_dev->disk = NULL;
+	if (disk->flags & GENHD_FL_UP) {
+		del_gendisk(disk);
+		if (disk->queue)
+			blk_cleanup_queue(disk->queue);
+	}
+	put_disk(disk);
+}
+
+static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
+				const char *object_name,
+				u64 offset, u64 length, void *buf)
+
+{
+	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+	struct rbd_obj_request *obj_request;
+	struct page **pages = NULL;
+	u32 page_count;
+	size_t size;
+	int ret;
+
+	page_count = (u32) calc_pages_for(offset, length);
+	pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
+	if (IS_ERR(pages))
+		ret = PTR_ERR(pages);
+
+	ret = -ENOMEM;
+	obj_request = rbd_obj_request_create(object_name, offset, length,
+							OBJ_REQUEST_PAGES);
+	if (!obj_request)
+		goto out;
+
+	obj_request->pages = pages;
+	obj_request->page_count = page_count;
+
+	obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1,
+						  obj_request);
+	if (!obj_request->osd_req)
+		goto out;
+
+	osd_req_op_extent_init(obj_request->osd_req, 0, CEPH_OSD_OP_READ,
+					offset, length, 0, 0);
+	osd_req_op_extent_osd_data_pages(obj_request->osd_req, 0,
+					obj_request->pages,
+					obj_request->length,
+					obj_request->offset & ~PAGE_MASK,
+					false, false);
+	rbd_osd_req_format_read(obj_request);
+
+	ret = rbd_obj_request_submit(osdc, obj_request);
+	if (ret)
+		goto out;
+	ret = rbd_obj_request_wait(obj_request);
+	if (ret)
+		goto out;
+
+	ret = obj_request->result;
+	if (ret < 0)
+		goto out;
+
+	rbd_assert(obj_request->xferred <= (u64) SIZE_MAX);
+	size = (size_t) obj_request->xferred;
+	ceph_copy_from_page_vector(pages, buf, 0, size);
+	rbd_assert(size <= (size_t)INT_MAX);
+	ret = (int)size;
+out:
+	if (obj_request)
+		rbd_obj_request_put(obj_request);
+	else
+		ceph_release_page_vector(pages, page_count);
+
+	return ret;
+}
+
+/*
+ * Read the complete header for the given rbd device.  On successful
+ * return, the rbd_dev->header field will contain up-to-date
+ * information about the image.
+ */
+static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
+{
+	struct rbd_image_header_ondisk *ondisk = NULL;
+	u32 snap_count = 0;
+	u64 names_size = 0;
+	u32 want_count;
+	int ret;
+
+	/*
+	 * The complete header will include an array of its 64-bit
+	 * snapshot ids, followed by the names of those snapshots as
+	 * a contiguous block of NUL-terminated strings.  Note that
+	 * the number of snapshots could change by the time we read
+	 * it in, in which case we re-read it.
+	 */
+	do {
+		size_t size;
+
+		kfree(ondisk);
+
+		size = sizeof (*ondisk);
+		size += snap_count * sizeof (struct rbd_image_snap_ondisk);
+		size += names_size;
+		ondisk = kmalloc(size, GFP_KERNEL);
+		if (!ondisk)
+			return -ENOMEM;
+
+		ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name,
+				       0, size, ondisk);
+		if (ret < 0)
+			goto out;
+		if ((size_t)ret < size) {
+			ret = -ENXIO;
+			rbd_warn(rbd_dev, "short header read (want %zd got %d)",
+				size, ret);
+			goto out;
+		}
+		if (!rbd_dev_ondisk_valid(ondisk)) {
+			ret = -ENXIO;
+			rbd_warn(rbd_dev, "invalid header");
+			goto out;
+		}
+
+		names_size = le64_to_cpu(ondisk->snap_names_len);
+		want_count = snap_count;
+		snap_count = le32_to_cpu(ondisk->snap_count);
+	} while (snap_count != want_count);
+
+	ret = rbd_header_from_disk(rbd_dev, ondisk);
+out:
+	kfree(ondisk);
+
+	return ret;
+}
+
+/*
+ * Clear the rbd device's EXISTS flag if the snapshot it's mapped to
+ * has disappeared from the (just updated) snapshot context.
+ */
+static void rbd_exists_validate(struct rbd_device *rbd_dev)
+{
+	u64 snap_id;
+
+	if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags))
+		return;
+
+	snap_id = rbd_dev->spec->snap_id;
+	if (snap_id == CEPH_NOSNAP)
+		return;
+
+	if (rbd_dev_snap_index(rbd_dev, snap_id) == BAD_SNAP_INDEX)
+		clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
+}
+
+static void rbd_dev_update_size(struct rbd_device *rbd_dev)
+{
+	sector_t size;
+	bool removing;
+
+	/*
+	 * Don't hold the lock while doing disk operations,
+	 * or lock ordering will conflict with the bdev mutex via:
+	 * rbd_add() -> blkdev_get() -> rbd_open()
+	 */
+	spin_lock_irq(&rbd_dev->lock);
+	removing = test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags);
+	spin_unlock_irq(&rbd_dev->lock);
+	/*
+	 * If the device is being removed, rbd_dev->disk has
+	 * been destroyed, so don't try to update its size
+	 */
+	if (!removing) {
+		size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE;
+		dout("setting size to %llu sectors", (unsigned long long)size);
+		set_capacity(rbd_dev->disk, size);
+		revalidate_disk(rbd_dev->disk);
+	}
+}
+
+static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+{
+	u64 mapping_size;
+	int ret;
+
+	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+	down_write(&rbd_dev->header_rwsem);
+	mapping_size = rbd_dev->mapping.size;
+	if (rbd_dev->image_format == 1)
+		ret = rbd_dev_v1_header_info(rbd_dev);
+	else
+		ret = rbd_dev_v2_header_info(rbd_dev);
+
+	/* If it's a mapped snapshot, validate its EXISTS flag */
+
+	rbd_exists_validate(rbd_dev);
+	up_write(&rbd_dev->header_rwsem);
+
+	if (mapping_size != rbd_dev->mapping.size) {
+		rbd_dev_update_size(rbd_dev);
+	}
+
+	return ret;
+}
+
+static int rbd_init_disk(struct rbd_device *rbd_dev)
+{
+	struct gendisk *disk;
+	struct request_queue *q;
+	u64 segment_size;
+
+	/* create gendisk info */
+	disk = alloc_disk(single_major ?
+			  (1 << RBD_SINGLE_MAJOR_PART_SHIFT) :
+			  RBD_MINORS_PER_MAJOR);
+	if (!disk)
+		return -ENOMEM;
+
+	snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
+		 rbd_dev->dev_id);
+	disk->major = rbd_dev->major;
+	disk->first_minor = rbd_dev->minor;
+	if (single_major)
+		disk->flags |= GENHD_FL_EXT_DEVT;
+	disk->fops = &rbd_bd_ops;
+	disk->private_data = rbd_dev;
+
+	q = blk_init_queue(rbd_request_fn, &rbd_dev->lock);
+	if (!q)
+		goto out_disk;
+
+	/* We use the default size, but let's be explicit about it. */
+	blk_queue_physical_block_size(q, SECTOR_SIZE);
+
+	/* set io sizes to object size */
+	segment_size = rbd_obj_bytes(&rbd_dev->header);
+	blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
+	blk_queue_max_segment_size(q, segment_size);
+	blk_queue_io_min(q, segment_size);
+	blk_queue_io_opt(q, segment_size);
+
+	blk_queue_merge_bvec(q, rbd_merge_bvec);
+	disk->queue = q;
+
+	q->queuedata = rbd_dev;
+
+	rbd_dev->disk = disk;
+
+	return 0;
+out_disk:
+	put_disk(disk);
+
+	return -ENOMEM;
+}
+
+/*
+  sysfs
+*/
+
+static struct rbd_device *dev_to_rbd_dev(struct device *dev)
+{
+	return container_of(dev, struct rbd_device, dev);
+}
+
+static ssize_t rbd_size_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+	return sprintf(buf, "%llu\n",
+		(unsigned long long)rbd_dev->mapping.size);
+}
+
+/*
+ * Note this shows the features for whatever's mapped, which is not
+ * necessarily the base image.
+ */
+static ssize_t rbd_features_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+	return sprintf(buf, "0x%016llx\n",
+			(unsigned long long)rbd_dev->mapping.features);
+}
+
+static ssize_t rbd_major_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+	if (rbd_dev->major)
+		return sprintf(buf, "%d\n", rbd_dev->major);
+
+	return sprintf(buf, "(none)\n");
+}
+
+static ssize_t rbd_minor_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+	return sprintf(buf, "%d\n", rbd_dev->minor);
+}
+
+static ssize_t rbd_client_id_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+	return sprintf(buf, "client%lld\n",
+			ceph_client_id(rbd_dev->rbd_client->client));
+}
+
+static ssize_t rbd_pool_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+	return sprintf(buf, "%s\n", rbd_dev->spec->pool_name);
+}
+
+static ssize_t rbd_pool_id_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+	return sprintf(buf, "%llu\n",
+			(unsigned long long) rbd_dev->spec->pool_id);
+}
+
+static ssize_t rbd_name_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+	if (rbd_dev->spec->image_name)
+		return sprintf(buf, "%s\n", rbd_dev->spec->image_name);
+
+	return sprintf(buf, "(unknown)\n");
+}
+
+static ssize_t rbd_image_id_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+	return sprintf(buf, "%s\n", rbd_dev->spec->image_id);
+}
+
+/*
+ * Shows the name of the currently-mapped snapshot (or
+ * RBD_SNAP_HEAD_NAME for the base image).
+ */
+static ssize_t rbd_snap_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+	return sprintf(buf, "%s\n", rbd_dev->spec->snap_name);
+}
+
+/*
+ * For an rbd v2 image, shows the pool id, image id, and snapshot id
+ * for the parent image.  If there is no parent, simply shows
+ * "(no parent image)".
+ */
+static ssize_t rbd_parent_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+	struct rbd_spec *spec = rbd_dev->parent_spec;
+	int count;
+	char *bufp = buf;
+
+	if (!spec)
+		return sprintf(buf, "(no parent image)\n");
+
+	count = sprintf(bufp, "pool_id %llu\npool_name %s\n",
+			(unsigned long long) spec->pool_id, spec->pool_name);
+	if (count < 0)
+		return count;
+	bufp += count;
+
+	count = sprintf(bufp, "image_id %s\nimage_name %s\n", spec->image_id,
+			spec->image_name ? spec->image_name : "(unknown)");
+	if (count < 0)
+		return count;
+	bufp += count;
+
+	count = sprintf(bufp, "snap_id %llu\nsnap_name %s\n",
+			(unsigned long long) spec->snap_id, spec->snap_name);
+	if (count < 0)
+		return count;
+	bufp += count;
+
+	count = sprintf(bufp, "overlap %llu\n", rbd_dev->parent_overlap);
+	if (count < 0)
+		return count;
+	bufp += count;
+
+	return (ssize_t) (bufp - buf);
+}
+
+static ssize_t rbd_image_refresh(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf,
+				 size_t size)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+	int ret;
+
+	ret = rbd_dev_refresh(rbd_dev);
+	if (ret)
+		rbd_warn(rbd_dev, ": manual header refresh error (%d)\n", ret);
+
+	return ret < 0 ? ret : size;
+}
+
+static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
+static DEVICE_ATTR(features, S_IRUGO, rbd_features_show, NULL);
+static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
+static DEVICE_ATTR(minor, S_IRUGO, rbd_minor_show, NULL);
+static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
+static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
+static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
+static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
+static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL);
+static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
+static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
+static DEVICE_ATTR(parent, S_IRUGO, rbd_parent_show, NULL);
+
+static struct attribute *rbd_attrs[] = {
+	&dev_attr_size.attr,
+	&dev_attr_features.attr,
+	&dev_attr_major.attr,
+	&dev_attr_minor.attr,
+	&dev_attr_client_id.attr,
+	&dev_attr_pool.attr,
+	&dev_attr_pool_id.attr,
+	&dev_attr_name.attr,
+	&dev_attr_image_id.attr,
+	&dev_attr_current_snap.attr,
+	&dev_attr_parent.attr,
+	&dev_attr_refresh.attr,
+	NULL
+};
+
+static struct attribute_group rbd_attr_group = {
+	.attrs = rbd_attrs,
+};
+
+static const struct attribute_group *rbd_attr_groups[] = {
+	&rbd_attr_group,
+	NULL
+};
+
+static void rbd_sysfs_dev_release(struct device *dev)
+{
+}
+
+static struct device_type rbd_device_type = {
+	.name		= "rbd",
+	.groups		= rbd_attr_groups,
+	.release	= rbd_sysfs_dev_release,
+};
+
+static struct rbd_spec *rbd_spec_get(struct rbd_spec *spec)
+{
+	kref_get(&spec->kref);
+
+	return spec;
+}
+
+static void rbd_spec_free(struct kref *kref);
+static void rbd_spec_put(struct rbd_spec *spec)
+{
+	if (spec)
+		kref_put(&spec->kref, rbd_spec_free);
+}
+
+static struct rbd_spec *rbd_spec_alloc(void)
+{
+	struct rbd_spec *spec;
+
+	spec = kzalloc(sizeof (*spec), GFP_KERNEL);
+	if (!spec)
+		return NULL;
+	kref_init(&spec->kref);
+
+	return spec;
+}
+
+static void rbd_spec_free(struct kref *kref)
+{
+	struct rbd_spec *spec = container_of(kref, struct rbd_spec, kref);
+
+	kfree(spec->pool_name);
+	kfree(spec->image_id);
+	kfree(spec->image_name);
+	kfree(spec->snap_name);
+	kfree(spec);
+}
+
+static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
+				struct rbd_spec *spec)
+{
+	struct rbd_device *rbd_dev;
+
+	rbd_dev = kzalloc(sizeof (*rbd_dev), GFP_KERNEL);
+	if (!rbd_dev)
+		return NULL;
+
+	spin_lock_init(&rbd_dev->lock);
+	rbd_dev->flags = 0;
+	atomic_set(&rbd_dev->parent_ref, 0);
+	INIT_LIST_HEAD(&rbd_dev->node);
+	init_rwsem(&rbd_dev->header_rwsem);
+
+	rbd_dev->spec = spec;
+	rbd_dev->rbd_client = rbdc;
+
+	/* Initialize the layout used for all rbd requests */
+
+	rbd_dev->layout.fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
+	rbd_dev->layout.fl_stripe_count = cpu_to_le32(1);
+	rbd_dev->layout.fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
+	rbd_dev->layout.fl_pg_pool = cpu_to_le32((u32) spec->pool_id);
+
+	return rbd_dev;
+}
+
+static void rbd_dev_destroy(struct rbd_device *rbd_dev)
+{
+	rbd_put_client(rbd_dev->rbd_client);
+	rbd_spec_put(rbd_dev->spec);
+	kfree(rbd_dev);
+}
+
+/*
+ * Get the size and object order for an image snapshot, or if
+ * snap_id is CEPH_NOSNAP, gets this information for the base
+ * image.
+ */
+static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
+				u8 *order, u64 *snap_size)
+{
+	__le64 snapid = cpu_to_le64(snap_id);
+	int ret;
+	struct {
+		u8 order;
+		__le64 size;
+	} __attribute__ ((packed)) size_buf = { 0 };
+
+	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+				"rbd", "get_size",
+				&snapid, sizeof (snapid),
+				&size_buf, sizeof (size_buf));
+	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+	if (ret < 0)
+		return ret;
+	if (ret < sizeof (size_buf))
+		return -ERANGE;
+
+	if (order) {
+		*order = size_buf.order;
+		dout("  order %u", (unsigned int)*order);
+	}
+	*snap_size = le64_to_cpu(size_buf.size);
+
+	dout("  snap_id 0x%016llx snap_size = %llu\n",
+		(unsigned long long)snap_id,
+		(unsigned long long)*snap_size);
+
+	return 0;
+}
+
+static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev)
+{
+	return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
+					&rbd_dev->header.obj_order,
+					&rbd_dev->header.image_size);
+}
+
+static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
+{
+	void *reply_buf;
+	int ret;
+	void *p;
+
+	reply_buf = kzalloc(RBD_OBJ_PREFIX_LEN_MAX, GFP_KERNEL);
+	if (!reply_buf)
+		return -ENOMEM;
+
+	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+				"rbd", "get_object_prefix", NULL, 0,
+				reply_buf, RBD_OBJ_PREFIX_LEN_MAX);
+	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+	if (ret < 0)
+		goto out;
+
+	p = reply_buf;
+	rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p,
+						p + ret, NULL, GFP_NOIO);
+	ret = 0;
+
+	if (IS_ERR(rbd_dev->header.object_prefix)) {
+		ret = PTR_ERR(rbd_dev->header.object_prefix);
+		rbd_dev->header.object_prefix = NULL;
+	} else {
+		dout("  object_prefix = %s\n", rbd_dev->header.object_prefix);
+	}
+out:
+	kfree(reply_buf);
+
+	return ret;
+}
+
+static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
+		u64 *snap_features)
+{
+	__le64 snapid = cpu_to_le64(snap_id);
+	struct {
+		__le64 features;
+		__le64 incompat;
+	} __attribute__ ((packed)) features_buf = { 0 };
+	u64 incompat;
+	int ret;
+
+	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+				"rbd", "get_features",
+				&snapid, sizeof (snapid),
+				&features_buf, sizeof (features_buf));
+	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+	if (ret < 0)
+		return ret;
+	if (ret < sizeof (features_buf))
+		return -ERANGE;
+
+	incompat = le64_to_cpu(features_buf.incompat);
+	if (incompat & ~RBD_FEATURES_SUPPORTED)
+		return -ENXIO;
+
+	*snap_features = le64_to_cpu(features_buf.features);
+
+	dout("  snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n",
+		(unsigned long long)snap_id,
+		(unsigned long long)*snap_features,
+		(unsigned long long)le64_to_cpu(features_buf.incompat));
+
+	return 0;
+}
+
+static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
+{
+	return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
+						&rbd_dev->header.features);
+}
+
+static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
+{
+	struct rbd_spec *parent_spec;
+	size_t size;
+	void *reply_buf = NULL;
+	__le64 snapid;
+	void *p;
+	void *end;
+	u64 pool_id;
+	char *image_id;
+	u64 snap_id;
+	u64 overlap;
+	int ret;
+
+	parent_spec = rbd_spec_alloc();
+	if (!parent_spec)
+		return -ENOMEM;
+
+	size = sizeof (__le64) +				/* pool_id */
+		sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX +	/* image_id */
+		sizeof (__le64) +				/* snap_id */
+		sizeof (__le64);				/* overlap */
+	reply_buf = kmalloc(size, GFP_KERNEL);
+	if (!reply_buf) {
+		ret = -ENOMEM;
+		goto out_err;
+	}
+
+	snapid = cpu_to_le64(CEPH_NOSNAP);
+	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+				"rbd", "get_parent",
+				&snapid, sizeof (snapid),
+				reply_buf, size);
+	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+	if (ret < 0)
+		goto out_err;
+
+	p = reply_buf;
+	end = reply_buf + ret;
+	ret = -ERANGE;
+	ceph_decode_64_safe(&p, end, pool_id, out_err);
+	if (pool_id == CEPH_NOPOOL) {
+		/*
+		 * Either the parent never existed, or we have
+		 * record of it but the image got flattened so it no
+		 * longer has a parent.  When the parent of a
+		 * layered image disappears we immediately set the
+		 * overlap to 0.  The effect of this is that all new
+		 * requests will be treated as if the image had no
+		 * parent.
+		 */
+		if (rbd_dev->parent_overlap) {
+			rbd_dev->parent_overlap = 0;
+			smp_mb();
+			rbd_dev_parent_put(rbd_dev);
+			pr_info("%s: clone image has been flattened\n",
+				rbd_dev->disk->disk_name);
+		}
+
+		goto out;	/* No parent?  No problem. */
+	}
+
+	/* The ceph file layout needs to fit pool id in 32 bits */
+
+	ret = -EIO;
+	if (pool_id > (u64)U32_MAX) {
+		rbd_warn(NULL, "parent pool id too large (%llu > %u)\n",
+			(unsigned long long)pool_id, U32_MAX);
+		goto out_err;
+	}
+
+	image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
+	if (IS_ERR(image_id)) {
+		ret = PTR_ERR(image_id);
+		goto out_err;
+	}
+	ceph_decode_64_safe(&p, end, snap_id, out_err);
+	ceph_decode_64_safe(&p, end, overlap, out_err);
+
+	/*
+	 * The parent won't change (except when the clone is
+	 * flattened, already handled that).  So we only need to
+	 * record the parent spec we have not already done so.
+	 */
+	if (!rbd_dev->parent_spec) {
+		parent_spec->pool_id = pool_id;
+		parent_spec->image_id = image_id;
+		parent_spec->snap_id = snap_id;
+		rbd_dev->parent_spec = parent_spec;
+		parent_spec = NULL;	/* rbd_dev now owns this */
+	}
+
+	/*
+	 * We always update the parent overlap.  If it's zero we
+	 * treat it specially.
+	 */
+	rbd_dev->parent_overlap = overlap;
+	smp_mb();
+	if (!overlap) {
+
+		/* A null parent_spec indicates it's the initial probe */
+
+		if (parent_spec) {
+			/*
+			 * The overlap has become zero, so the clone
+			 * must have been resized down to 0 at some
+			 * point.  Treat this the same as a flatten.
+			 */
+			rbd_dev_parent_put(rbd_dev);
+			pr_info("%s: clone image now standalone\n",
+				rbd_dev->disk->disk_name);
+		} else {
+			/*
+			 * For the initial probe, if we find the
+			 * overlap is zero we just pretend there was
+			 * no parent image.
+			 */
+			rbd_warn(rbd_dev, "ignoring parent of "
+						"clone with overlap 0\n");
+		}
+	}
+out:
+	ret = 0;
+out_err:
+	kfree(reply_buf);
+	rbd_spec_put(parent_spec);
+
+	return ret;
+}
+
+static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
+{
+	struct {
+		__le64 stripe_unit;
+		__le64 stripe_count;
+	} __attribute__ ((packed)) striping_info_buf = { 0 };
+	size_t size = sizeof (striping_info_buf);
+	void *p;
+	u64 obj_size;
+	u64 stripe_unit;
+	u64 stripe_count;
+	int ret;
+
+	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+				"rbd", "get_stripe_unit_count", NULL, 0,
+				(char *)&striping_info_buf, size);
+	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+	if (ret < 0)
+		return ret;
+	if (ret < size)
+		return -ERANGE;
+
+	/*
+	 * We don't actually support the "fancy striping" feature
+	 * (STRIPINGV2) yet, but if the striping sizes are the
+	 * defaults the behavior is the same as before.  So find
+	 * out, and only fail if the image has non-default values.
+	 */
+	ret = -EINVAL;
+	obj_size = (u64)1 << rbd_dev->header.obj_order;
+	p = &striping_info_buf;
+	stripe_unit = ceph_decode_64(&p);
+	if (stripe_unit != obj_size) {
+		rbd_warn(rbd_dev, "unsupported stripe unit "
+				"(got %llu want %llu)",
+				stripe_unit, obj_size);
+		return -EINVAL;
+	}
+	stripe_count = ceph_decode_64(&p);
+	if (stripe_count != 1) {
+		rbd_warn(rbd_dev, "unsupported stripe count "
+				"(got %llu want 1)", stripe_count);
+		return -EINVAL;
+	}
+	rbd_dev->header.stripe_unit = stripe_unit;
+	rbd_dev->header.stripe_count = stripe_count;
+
+	return 0;
+}
+
+static char *rbd_dev_image_name(struct rbd_device *rbd_dev)
+{
+	size_t image_id_size;
+	char *image_id;
+	void *p;
+	void *end;
+	size_t size;
+	void *reply_buf = NULL;
+	size_t len = 0;
+	char *image_name = NULL;
+	int ret;
+
+	rbd_assert(!rbd_dev->spec->image_name);
+
+	len = strlen(rbd_dev->spec->image_id);
+	image_id_size = sizeof (__le32) + len;
+	image_id = kmalloc(image_id_size, GFP_KERNEL);
+	if (!image_id)
+		return NULL;
+
+	p = image_id;
+	end = image_id + image_id_size;
+	ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32)len);
+
+	size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX;
+	reply_buf = kmalloc(size, GFP_KERNEL);
+	if (!reply_buf)
+		goto out;
+
+	ret = rbd_obj_method_sync(rbd_dev, RBD_DIRECTORY,
+				"rbd", "dir_get_name",
+				image_id, image_id_size,
+				reply_buf, size);
+	if (ret < 0)
+		goto out;
+	p = reply_buf;
+	end = reply_buf + ret;
+
+	image_name = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL);
+	if (IS_ERR(image_name))
+		image_name = NULL;
+	else
+		dout("%s: name is %s len is %zd\n", __func__, image_name, len);
+out:
+	kfree(reply_buf);
+	kfree(image_id);
+
+	return image_name;
+}
+
+static u64 rbd_v1_snap_id_by_name(struct rbd_device *rbd_dev, const char *name)
+{
+	struct ceph_snap_context *snapc = rbd_dev->header.snapc;
+	const char *snap_name;
+	u32 which = 0;
+
+	/* Skip over names until we find the one we are looking for */
+
+	snap_name = rbd_dev->header.snap_names;
+	while (which < snapc->num_snaps) {
+		if (!strcmp(name, snap_name))
+			return snapc->snaps[which];
+		snap_name += strlen(snap_name) + 1;
+		which++;
+	}
+	return CEPH_NOSNAP;
+}
+
+static u64 rbd_v2_snap_id_by_name(struct rbd_device *rbd_dev, const char *name)
+{
+	struct ceph_snap_context *snapc = rbd_dev->header.snapc;
+	u32 which;
+	bool found = false;
+	u64 snap_id;
+
+	for (which = 0; !found && which < snapc->num_snaps; which++) {
+		const char *snap_name;
+
+		snap_id = snapc->snaps[which];
+		snap_name = rbd_dev_v2_snap_name(rbd_dev, snap_id);
+		if (IS_ERR(snap_name)) {
+			/* ignore no-longer existing snapshots */
+			if (PTR_ERR(snap_name) == -ENOENT)
+				continue;
+			else
+				break;
+		}
+		found = !strcmp(name, snap_name);
+		kfree(snap_name);
+	}
+	return found ? snap_id : CEPH_NOSNAP;
+}
+
+/*
+ * Assumes name is never RBD_SNAP_HEAD_NAME; returns CEPH_NOSNAP if
+ * no snapshot by that name is found, or if an error occurs.
+ */
+static u64 rbd_snap_id_by_name(struct rbd_device *rbd_dev, const char *name)
+{
+	if (rbd_dev->image_format == 1)
+		return rbd_v1_snap_id_by_name(rbd_dev, name);
+
+	return rbd_v2_snap_id_by_name(rbd_dev, name);
+}
+
+/*
+ * When an rbd image has a parent image, it is identified by the
+ * pool, image, and snapshot ids (not names).  This function fills
+ * in the names for those ids.  (It's OK if we can't figure out the
+ * name for an image id, but the pool and snapshot ids should always
+ * exist and have names.)  All names in an rbd spec are dynamically
+ * allocated.
+ *
+ * When an image being mapped (not a parent) is probed, we have the
+ * pool name and pool id, image name and image id, and the snapshot
+ * name.  The only thing we're missing is the snapshot id.
+ */
+static int rbd_dev_spec_update(struct rbd_device *rbd_dev)
+{
+	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+	struct rbd_spec *spec = rbd_dev->spec;
+	const char *pool_name;
+	const char *image_name;
+	const char *snap_name;
+	int ret;
+
+	/*
+	 * An image being mapped will have the pool name (etc.), but
+	 * we need to look up the snapshot id.
+	 */
+	if (spec->pool_name) {
+		if (strcmp(spec->snap_name, RBD_SNAP_HEAD_NAME)) {
+			u64 snap_id;
+
+			snap_id = rbd_snap_id_by_name(rbd_dev, spec->snap_name);
+			if (snap_id == CEPH_NOSNAP)
+				return -ENOENT;
+			spec->snap_id = snap_id;
+		} else {
+			spec->snap_id = CEPH_NOSNAP;
+		}
+
+		return 0;
+	}
+
+	/* Get the pool name; we have to make our own copy of this */
+
+	pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, spec->pool_id);
+	if (!pool_name) {
+		rbd_warn(rbd_dev, "no pool with id %llu", spec->pool_id);
+		return -EIO;
+	}
+	pool_name = kstrdup(pool_name, GFP_KERNEL);
+	if (!pool_name)
+		return -ENOMEM;
+
+	/* Fetch the image name; tolerate failure here */
+
+	image_name = rbd_dev_image_name(rbd_dev);
+	if (!image_name)
+		rbd_warn(rbd_dev, "unable to get image name");
+
+	/* Look up the snapshot name, and make a copy */
+
+	snap_name = rbd_snap_name(rbd_dev, spec->snap_id);
+	if (IS_ERR(snap_name)) {
+		ret = PTR_ERR(snap_name);
+		goto out_err;
+	}
+
+	spec->pool_name = pool_name;
+	spec->image_name = image_name;
+	spec->snap_name = snap_name;
+
+	return 0;
+out_err:
+	kfree(image_name);
+	kfree(pool_name);
+
+	return ret;
+}
+
+static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
+{
+	size_t size;
+	int ret;
+	void *reply_buf;
+	void *p;
+	void *end;
+	u64 seq;
+	u32 snap_count;
+	struct ceph_snap_context *snapc;
+	u32 i;
+
+	/*
+	 * We'll need room for the seq value (maximum snapshot id),
+	 * snapshot count, and array of that many snapshot ids.
+	 * For now we have a fixed upper limit on the number we're
+	 * prepared to receive.
+	 */
+	size = sizeof (__le64) + sizeof (__le32) +
+			RBD_MAX_SNAP_COUNT * sizeof (__le64);
+	reply_buf = kzalloc(size, GFP_KERNEL);
+	if (!reply_buf)
+		return -ENOMEM;
+
+	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+				"rbd", "get_snapcontext", NULL, 0,
+				reply_buf, size);
+	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+	if (ret < 0)
+		goto out;
+
+	p = reply_buf;
+	end = reply_buf + ret;
+	ret = -ERANGE;
+	ceph_decode_64_safe(&p, end, seq, out);
+	ceph_decode_32_safe(&p, end, snap_count, out);
+
+	/*
+	 * Make sure the reported number of snapshot ids wouldn't go
+	 * beyond the end of our buffer.  But before checking that,
+	 * make sure the computed size of the snapshot context we
+	 * allocate is representable in a size_t.
+	 */
+	if (snap_count > (SIZE_MAX - sizeof (struct ceph_snap_context))
+				 / sizeof (u64)) {
+		ret = -EINVAL;
+		goto out;
+	}
+	if (!ceph_has_room(&p, end, snap_count * sizeof (__le64)))
+		goto out;
+	ret = 0;
+
+	snapc = ceph_create_snap_context(snap_count, GFP_KERNEL);
+	if (!snapc) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	snapc->seq = seq;
+	for (i = 0; i < snap_count; i++)
+		snapc->snaps[i] = ceph_decode_64(&p);
+
+	ceph_put_snap_context(rbd_dev->header.snapc);
+	rbd_dev->header.snapc = snapc;
+
+	dout("  snap context seq = %llu, snap_count = %u\n",
+		(unsigned long long)seq, (unsigned int)snap_count);
+out:
+	kfree(reply_buf);
+
+	return ret;
+}
+
+static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
+					u64 snap_id)
+{
+	size_t size;
+	void *reply_buf;
+	__le64 snapid;
+	int ret;
+	void *p;
+	void *end;
+	char *snap_name;
+
+	size = sizeof (__le32) + RBD_MAX_SNAP_NAME_LEN;
+	reply_buf = kmalloc(size, GFP_KERNEL);
+	if (!reply_buf)
+		return ERR_PTR(-ENOMEM);
+
+	snapid = cpu_to_le64(snap_id);
+	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+				"rbd", "get_snapshot_name",
+				&snapid, sizeof (snapid),
+				reply_buf, size);
+	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+	if (ret < 0) {
+		snap_name = ERR_PTR(ret);
+		goto out;
+	}
+
+	p = reply_buf;
+	end = reply_buf + ret;
+	snap_name = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
+	if (IS_ERR(snap_name))
+		goto out;
+
+	dout("  snap_id 0x%016llx snap_name = %s\n",
+		(unsigned long long)snap_id, snap_name);
+out:
+	kfree(reply_buf);
+
+	return snap_name;
+}
+
+static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev)
+{
+	bool first_time = rbd_dev->header.object_prefix == NULL;
+	int ret;
+
+	ret = rbd_dev_v2_image_size(rbd_dev);
+	if (ret)
+		return ret;
+
+	if (first_time) {
+		ret = rbd_dev_v2_header_onetime(rbd_dev);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * If the image supports layering, get the parent info.  We
+	 * need to probe the first time regardless.  Thereafter we
+	 * only need to if there's a parent, to see if it has
+	 * disappeared due to the mapped image getting flattened.
+	 */
+	if (rbd_dev->header.features & RBD_FEATURE_LAYERING &&
+			(first_time || rbd_dev->parent_spec)) {
+		bool warn;
+
+		ret = rbd_dev_v2_parent_info(rbd_dev);
+		if (ret)
+			return ret;
+
+		/*
+		 * Print a warning if this is the initial probe and
+		 * the image has a parent.  Don't print it if the
+		 * image now being probed is itself a parent.  We
+		 * can tell at this point because we won't know its
+		 * pool name yet (just its pool id).
+		 */
+		warn = rbd_dev->parent_spec && rbd_dev->spec->pool_name;
+		if (first_time && warn)
+			rbd_warn(rbd_dev, "WARNING: kernel layering "
+					"is EXPERIMENTAL!");
+	}
+
+	if (rbd_dev->spec->snap_id == CEPH_NOSNAP)
+		if (rbd_dev->mapping.size != rbd_dev->header.image_size)
+			rbd_dev->mapping.size = rbd_dev->header.image_size;
+
+	ret = rbd_dev_v2_snap_context(rbd_dev);
+	dout("rbd_dev_v2_snap_context returned %d\n", ret);
+
+	return ret;
+}
+
+static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
+{
+	struct device *dev;
+	int ret;
+
+	dev = &rbd_dev->dev;
+	dev->bus = &rbd_bus_type;
+	dev->type = &rbd_device_type;
+	dev->parent = &rbd_root_dev;
+	dev->release = rbd_dev_device_release;
+	dev_set_name(dev, "%d", rbd_dev->dev_id);
+	ret = device_register(dev);
+
+	return ret;
+}
+
+static void rbd_bus_del_dev(struct rbd_device *rbd_dev)
+{
+	device_unregister(&rbd_dev->dev);
+}
+
+/*
+ * Get a unique rbd identifier for the given new rbd_dev, and add
+ * the rbd_dev to the global list.
+ */
+static int rbd_dev_id_get(struct rbd_device *rbd_dev)
+{
+	int new_dev_id;
+
+	new_dev_id = ida_simple_get(&rbd_dev_id_ida,
+				    0, minor_to_rbd_dev_id(1 << MINORBITS),
+				    GFP_KERNEL);
+	if (new_dev_id < 0)
+		return new_dev_id;
+
+	rbd_dev->dev_id = new_dev_id;
+
+	spin_lock(&rbd_dev_list_lock);
+	list_add_tail(&rbd_dev->node, &rbd_dev_list);
+	spin_unlock(&rbd_dev_list_lock);
+
+	dout("rbd_dev %p given dev id %d\n", rbd_dev, rbd_dev->dev_id);
+
+	return 0;
+}
+
+/*
+ * Remove an rbd_dev from the global list, and record that its
+ * identifier is no longer in use.
+ */
+static void rbd_dev_id_put(struct rbd_device *rbd_dev)
+{
+	spin_lock(&rbd_dev_list_lock);
+	list_del_init(&rbd_dev->node);
+	spin_unlock(&rbd_dev_list_lock);
+
+	ida_simple_remove(&rbd_dev_id_ida, rbd_dev->dev_id);
+
+	dout("rbd_dev %p released dev id %d\n", rbd_dev, rbd_dev->dev_id);
+}
+
+/*
+ * Skips over white space at *buf, and updates *buf to point to the
+ * first found non-space character (if any). Returns the length of
+ * the token (string of non-white space characters) found.  Note
+ * that *buf must be terminated with '\0'.
+ */
+static inline size_t next_token(const char **buf)
+{
+        /*
+        * These are the characters that produce nonzero for
+        * isspace() in the "C" and "POSIX" locales.
+        */
+        const char *spaces = " \f\n\r\t\v";
+
+        *buf += strspn(*buf, spaces);	/* Find start of token */
+
+	return strcspn(*buf, spaces);   /* Return token length */
+}
+
+/*
+ * Finds the next token in *buf, and if the provided token buffer is
+ * big enough, copies the found token into it.  The result, if
+ * copied, is guaranteed to be terminated with '\0'.  Note that *buf
+ * must be terminated with '\0' on entry.
+ *
+ * Returns the length of the token found (not including the '\0').
+ * Return value will be 0 if no token is found, and it will be >=
+ * token_size if the token would not fit.
+ *
+ * The *buf pointer will be updated to point beyond the end of the
+ * found token.  Note that this occurs even if the token buffer is
+ * too small to hold it.
+ */
+static inline size_t copy_token(const char **buf,
+				char *token,
+				size_t token_size)
+{
+        size_t len;
+
+	len = next_token(buf);
+	if (len < token_size) {
+		memcpy(token, *buf, len);
+		*(token + len) = '\0';
+	}
+	*buf += len;
+
+        return len;
+}
+
+/*
+ * Finds the next token in *buf, dynamically allocates a buffer big
+ * enough to hold a copy of it, and copies the token into the new
+ * buffer.  The copy is guaranteed to be terminated with '\0'.  Note
+ * that a duplicate buffer is created even for a zero-length token.
+ *
+ * Returns a pointer to the newly-allocated duplicate, or a null
+ * pointer if memory for the duplicate was not available.  If
+ * the lenp argument is a non-null pointer, the length of the token
+ * (not including the '\0') is returned in *lenp.
+ *
+ * If successful, the *buf pointer will be updated to point beyond
+ * the end of the found token.
+ *
+ * Note: uses GFP_KERNEL for allocation.
+ */
+static inline char *dup_token(const char **buf, size_t *lenp)
+{
+	char *dup;
+	size_t len;
+
+	len = next_token(buf);
+	dup = kmemdup(*buf, len + 1, GFP_KERNEL);
+	if (!dup)
+		return NULL;
+	*(dup + len) = '\0';
+	*buf += len;
+
+	if (lenp)
+		*lenp = len;
+
+	return dup;
+}
+
+/*
+ * Parse the options provided for an "rbd add" (i.e., rbd image
+ * mapping) request.  These arrive via a write to /sys/bus/rbd/add,
+ * and the data written is passed here via a NUL-terminated buffer.
+ * Returns 0 if successful or an error code otherwise.
+ *
+ * The information extracted from these options is recorded in
+ * the other parameters which return dynamically-allocated
+ * structures:
+ *  ceph_opts
+ *      The address of a pointer that will refer to a ceph options
+ *      structure.  Caller must release the returned pointer using
+ *      ceph_destroy_options() when it is no longer needed.
+ *  rbd_opts
+ *	Address of an rbd options pointer.  Fully initialized by
+ *	this function; caller must release with kfree().
+ *  spec
+ *	Address of an rbd image specification pointer.  Fully
+ *	initialized by this function based on parsed options.
+ *	Caller must release with rbd_spec_put().
+ *
+ * The options passed take this form:
+ *  <mon_addrs> <options> <pool_name> <image_name> [<snap_id>]
+ * where:
+ *  <mon_addrs>
+ *      A comma-separated list of one or more monitor addresses.
+ *      A monitor address is an ip address, optionally followed
+ *      by a port number (separated by a colon).
+ *        I.e.:  ip1[:port1][,ip2[:port2]...]
+ *  <options>
+ *      A comma-separated list of ceph and/or rbd options.
+ *  <pool_name>
+ *      The name of the rados pool containing the rbd image.
+ *  <image_name>
+ *      The name of the image in that pool to map.
+ *  <snap_id>
+ *      An optional snapshot id.  If provided, the mapping will
+ *      present data from the image at the time that snapshot was
+ *      created.  The image head is used if no snapshot id is
+ *      provided.  Snapshot mappings are always read-only.
+ */
+static int rbd_add_parse_args(const char *buf,
+				struct ceph_options **ceph_opts,
+				struct rbd_options **opts,
+				struct rbd_spec **rbd_spec)
+{
+	size_t len;
+	char *options;
+	const char *mon_addrs;
+	char *snap_name;
+	size_t mon_addrs_size;
+	struct rbd_spec *spec = NULL;
+	struct rbd_options *rbd_opts = NULL;
+	struct ceph_options *copts;
+	int ret;
+
+	/* The first four tokens are required */
+
+	len = next_token(&buf);
+	if (!len) {
+		rbd_warn(NULL, "no monitor address(es) provided");
+		return -EINVAL;
+	}
+	mon_addrs = buf;
+	mon_addrs_size = len + 1;
+	buf += len;
+
+	ret = -EINVAL;
+	options = dup_token(&buf, NULL);
+	if (!options)
+		return -ENOMEM;
+	if (!*options) {
+		rbd_warn(NULL, "no options provided");
+		goto out_err;
+	}
+
+	spec = rbd_spec_alloc();
+	if (!spec)
+		goto out_mem;
+
+	spec->pool_name = dup_token(&buf, NULL);
+	if (!spec->pool_name)
+		goto out_mem;
+	if (!*spec->pool_name) {
+		rbd_warn(NULL, "no pool name provided");
+		goto out_err;
+	}
+
+	spec->image_name = dup_token(&buf, NULL);
+	if (!spec->image_name)
+		goto out_mem;
+	if (!*spec->image_name) {
+		rbd_warn(NULL, "no image name provided");
+		goto out_err;
+	}
+
+	/*
+	 * Snapshot name is optional; default is to use "-"
+	 * (indicating the head/no snapshot).
+	 */
+	len = next_token(&buf);
+	if (!len) {
+		buf = RBD_SNAP_HEAD_NAME; /* No snapshot supplied */
+		len = sizeof (RBD_SNAP_HEAD_NAME) - 1;
+	} else if (len > RBD_MAX_SNAP_NAME_LEN) {
+		ret = -ENAMETOOLONG;
+		goto out_err;
+	}
+	snap_name = kmemdup(buf, len + 1, GFP_KERNEL);
+	if (!snap_name)
+		goto out_mem;
+	*(snap_name + len) = '\0';
+	spec->snap_name = snap_name;
+
+	/* Initialize all rbd options to the defaults */
+
+	rbd_opts = kzalloc(sizeof (*rbd_opts), GFP_KERNEL);
+	if (!rbd_opts)
+		goto out_mem;
+
+	rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
+
+	copts = ceph_parse_options(options, mon_addrs,
+					mon_addrs + mon_addrs_size - 1,
+					parse_rbd_opts_token, rbd_opts);
+	if (IS_ERR(copts)) {
+		ret = PTR_ERR(copts);
+		goto out_err;
+	}
+	kfree(options);
+
+	*ceph_opts = copts;
+	*opts = rbd_opts;
+	*rbd_spec = spec;
+
+	return 0;
+out_mem:
+	ret = -ENOMEM;
+out_err:
+	kfree(rbd_opts);
+	rbd_spec_put(spec);
+	kfree(options);
+
+	return ret;
+}
+
+/*
+ * An rbd format 2 image has a unique identifier, distinct from the
+ * name given to it by the user.  Internally, that identifier is
+ * what's used to specify the names of objects related to the image.
+ *
+ * A special "rbd id" object is used to map an rbd image name to its
+ * id.  If that object doesn't exist, then there is no v2 rbd image
+ * with the supplied name.
+ *
+ * This function will record the given rbd_dev's image_id field if
+ * it can be determined, and in that case will return 0.  If any
+ * errors occur a negative errno will be returned and the rbd_dev's
+ * image_id field will be unchanged (and should be NULL).
+ */
+static int rbd_dev_image_id(struct rbd_device *rbd_dev)
+{
+	int ret;
+	size_t size;
+	char *object_name;
+	void *response;
+	char *image_id;
+
+	/*
+	 * When probing a parent image, the image id is already
+	 * known (and the image name likely is not).  There's no
+	 * need to fetch the image id again in this case.  We
+	 * do still need to set the image format though.
+	 */
+	if (rbd_dev->spec->image_id) {
+		rbd_dev->image_format = *rbd_dev->spec->image_id ? 2 : 1;
+
+		return 0;
+	}
+
+	/*
+	 * First, see if the format 2 image id file exists, and if
+	 * so, get the image's persistent id from it.
+	 */
+	size = sizeof (RBD_ID_PREFIX) + strlen(rbd_dev->spec->image_name);
+	object_name = kmalloc(size, GFP_NOIO);
+	if (!object_name)
+		return -ENOMEM;
+	sprintf(object_name, "%s%s", RBD_ID_PREFIX, rbd_dev->spec->image_name);
+	dout("rbd id object name is %s\n", object_name);
+
+	/* Response will be an encoded string, which includes a length */
+
+	size = sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX;
+	response = kzalloc(size, GFP_NOIO);
+	if (!response) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/* If it doesn't exist we'll assume it's a format 1 image */
+
+	ret = rbd_obj_method_sync(rbd_dev, object_name,
+				"rbd", "get_id", NULL, 0,
+				response, RBD_IMAGE_ID_LEN_MAX);
+	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+	if (ret == -ENOENT) {
+		image_id = kstrdup("", GFP_KERNEL);
+		ret = image_id ? 0 : -ENOMEM;
+		if (!ret)
+			rbd_dev->image_format = 1;
+	} else if (ret > sizeof (__le32)) {
+		void *p = response;
+
+		image_id = ceph_extract_encoded_string(&p, p + ret,
+						NULL, GFP_NOIO);
+		ret = IS_ERR(image_id) ? PTR_ERR(image_id) : 0;
+		if (!ret)
+			rbd_dev->image_format = 2;
+	} else {
+		ret = -EINVAL;
+	}
+
+	if (!ret) {
+		rbd_dev->spec->image_id = image_id;
+		dout("image_id is %s\n", image_id);
+	}
+out:
+	kfree(response);
+	kfree(object_name);
+
+	return ret;
+}
+
+/*
+ * Undo whatever state changes are made by v1 or v2 header info
+ * call.
+ */
+static void rbd_dev_unprobe(struct rbd_device *rbd_dev)
+{
+	struct rbd_image_header	*header;
+
+	/* Drop parent reference unless it's already been done (or none) */
+
+	if (rbd_dev->parent_overlap)
+		rbd_dev_parent_put(rbd_dev);
+
+	/* Free dynamic fields from the header, then zero it out */
+
+	header = &rbd_dev->header;
+	ceph_put_snap_context(header->snapc);
+	kfree(header->snap_sizes);
+	kfree(header->snap_names);
+	kfree(header->object_prefix);
+	memset(header, 0, sizeof (*header));
+}
+
+static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev)
+{
+	int ret;
+
+	ret = rbd_dev_v2_object_prefix(rbd_dev);
+	if (ret)
+		goto out_err;
+
+	/*
+	 * Get the and check features for the image.  Currently the
+	 * features are assumed to never change.
+	 */
+	ret = rbd_dev_v2_features(rbd_dev);
+	if (ret)
+		goto out_err;
+
+	/* If the image supports fancy striping, get its parameters */
+
+	if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) {
+		ret = rbd_dev_v2_striping_info(rbd_dev);
+		if (ret < 0)
+			goto out_err;
+	}
+	/* No support for crypto and compression type format 2 images */
+
+	return 0;
+out_err:
+	rbd_dev->header.features = 0;
+	kfree(rbd_dev->header.object_prefix);
+	rbd_dev->header.object_prefix = NULL;
+
+	return ret;
+}
+
+static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
+{
+	struct rbd_device *parent = NULL;
+	struct rbd_spec *parent_spec;
+	struct rbd_client *rbdc;
+	int ret;
+
+	if (!rbd_dev->parent_spec)
+		return 0;
+	/*
+	 * We need to pass a reference to the client and the parent
+	 * spec when creating the parent rbd_dev.  Images related by
+	 * parent/child relationships always share both.
+	 */
+	parent_spec = rbd_spec_get(rbd_dev->parent_spec);
+	rbdc = __rbd_get_client(rbd_dev->rbd_client);
+
+	ret = -ENOMEM;
+	parent = rbd_dev_create(rbdc, parent_spec);
+	if (!parent)
+		goto out_err;
+
+	ret = rbd_dev_image_probe(parent, false);
+	if (ret < 0)
+		goto out_err;
+	rbd_dev->parent = parent;
+	atomic_set(&rbd_dev->parent_ref, 1);
+
+	return 0;
+out_err:
+	if (parent) {
+		rbd_dev_unparent(rbd_dev);
+		kfree(rbd_dev->header_name);
+		rbd_dev_destroy(parent);
+	} else {
+		rbd_put_client(rbdc);
+		rbd_spec_put(parent_spec);
+	}
+
+	return ret;
+}
+
+static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
+{
+	int ret;
+
+	/* Get an id and fill in device name. */
+
+	ret = rbd_dev_id_get(rbd_dev);
+	if (ret)
+		return ret;
+
+	BUILD_BUG_ON(DEV_NAME_LEN
+			< sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH);
+	sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id);
+
+	/* Record our major and minor device numbers. */
+
+	if (!single_major) {
+		ret = register_blkdev(0, rbd_dev->name);
+		if (ret < 0)
+			goto err_out_id;
+
+		rbd_dev->major = ret;
+		rbd_dev->minor = 0;
+	} else {
+		rbd_dev->major = rbd_major;
+		rbd_dev->minor = rbd_dev_id_to_minor(rbd_dev->dev_id);
+	}
+
+	/* Set up the blkdev mapping. */
+
+	ret = rbd_init_disk(rbd_dev);
+	if (ret)
+		goto err_out_blkdev;
+
+	ret = rbd_dev_mapping_set(rbd_dev);
+	if (ret)
+		goto err_out_disk;
+	set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
+
+	ret = rbd_bus_add_dev(rbd_dev);
+	if (ret)
+		goto err_out_mapping;
+
+	/* Everything's ready.  Announce the disk to the world. */
+
+	set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
+	add_disk(rbd_dev->disk);
+
+	pr_info("%s: added with size 0x%llx\n", rbd_dev->disk->disk_name,
+		(unsigned long long) rbd_dev->mapping.size);
+
+	return ret;
+
+err_out_mapping:
+	rbd_dev_mapping_clear(rbd_dev);
+err_out_disk:
+	rbd_free_disk(rbd_dev);
+err_out_blkdev:
+	if (!single_major)
+		unregister_blkdev(rbd_dev->major, rbd_dev->name);
+err_out_id:
+	rbd_dev_id_put(rbd_dev);
+	rbd_dev_mapping_clear(rbd_dev);
+
+	return ret;
+}
+
+static int rbd_dev_header_name(struct rbd_device *rbd_dev)
+{
+	struct rbd_spec *spec = rbd_dev->spec;
+	size_t size;
+
+	/* Record the header object name for this rbd image. */
+
+	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+
+	if (rbd_dev->image_format == 1)
+		size = strlen(spec->image_name) + sizeof (RBD_SUFFIX);
+	else
+		size = sizeof (RBD_HEADER_PREFIX) + strlen(spec->image_id);
+
+	rbd_dev->header_name = kmalloc(size, GFP_KERNEL);
+	if (!rbd_dev->header_name)
+		return -ENOMEM;
+
+	if (rbd_dev->image_format == 1)
+		sprintf(rbd_dev->header_name, "%s%s",
+			spec->image_name, RBD_SUFFIX);
+	else
+		sprintf(rbd_dev->header_name, "%s%s",
+			RBD_HEADER_PREFIX, spec->image_id);
+	return 0;
+}
+
+static void rbd_dev_image_release(struct rbd_device *rbd_dev)
+{
+	rbd_dev_unprobe(rbd_dev);
+	kfree(rbd_dev->header_name);
+	rbd_dev->header_name = NULL;
+	rbd_dev->image_format = 0;
+	kfree(rbd_dev->spec->image_id);
+	rbd_dev->spec->image_id = NULL;
+
+	rbd_dev_destroy(rbd_dev);
+}
+
+/*
+ * Probe for the existence of the header object for the given rbd
+ * device.  If this image is the one being mapped (i.e., not a
+ * parent), initiate a watch on its header object before using that
+ * object to get detailed information about the rbd image.
+ */
+static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
+{
+	int ret;
+
+	/*
+	 * Get the id from the image id object.  Unless there's an
+	 * error, rbd_dev->spec->image_id will be filled in with
+	 * a dynamically-allocated string, and rbd_dev->image_format
+	 * will be set to either 1 or 2.
+	 */
+	ret = rbd_dev_image_id(rbd_dev);
+	if (ret)
+		return ret;
+	rbd_assert(rbd_dev->spec->image_id);
+	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+
+	ret = rbd_dev_header_name(rbd_dev);
+	if (ret)
+		goto err_out_format;
+
+	if (mapping) {
+		ret = rbd_dev_header_watch_sync(rbd_dev);
+		if (ret)
+			goto out_header_name;
+	}
+
+	if (rbd_dev->image_format == 1)
+		ret = rbd_dev_v1_header_info(rbd_dev);
+	else
+		ret = rbd_dev_v2_header_info(rbd_dev);
+	if (ret)
+		goto err_out_watch;
+
+	ret = rbd_dev_spec_update(rbd_dev);
+	if (ret)
+		goto err_out_probe;
+
+	ret = rbd_dev_probe_parent(rbd_dev);
+	if (ret)
+		goto err_out_probe;
+
+	dout("discovered format %u image, header name is %s\n",
+		rbd_dev->image_format, rbd_dev->header_name);
+
+	return 0;
+err_out_probe:
+	rbd_dev_unprobe(rbd_dev);
+err_out_watch:
+	if (mapping)
+		rbd_dev_header_unwatch_sync(rbd_dev);
+out_header_name:
+	kfree(rbd_dev->header_name);
+	rbd_dev->header_name = NULL;
+err_out_format:
+	rbd_dev->image_format = 0;
+	kfree(rbd_dev->spec->image_id);
+	rbd_dev->spec->image_id = NULL;
+
+	dout("probe failed, returning %d\n", ret);
+
+	return ret;
+}
+
+static ssize_t do_rbd_add(struct bus_type *bus,
+			  const char *buf,
+			  size_t count)
+{
+	struct rbd_device *rbd_dev = NULL;
+	struct ceph_options *ceph_opts = NULL;
+	struct rbd_options *rbd_opts = NULL;
+	struct rbd_spec *spec = NULL;
+	struct rbd_client *rbdc;
+	struct ceph_osd_client *osdc;
+	bool read_only;
+	int rc = -ENOMEM;
+
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	/* parse add command */
+	rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec);
+	if (rc < 0)
+		goto err_out_module;
+	read_only = rbd_opts->read_only;
+	kfree(rbd_opts);
+	rbd_opts = NULL;	/* done with this */
+
+	rbdc = rbd_get_client(ceph_opts);
+	if (IS_ERR(rbdc)) {
+		rc = PTR_ERR(rbdc);
+		goto err_out_args;
+	}
+
+	/* pick the pool */
+	osdc = &rbdc->client->osdc;
+	rc = ceph_pg_poolid_by_name(osdc->osdmap, spec->pool_name);
+	if (rc < 0)
+		goto err_out_client;
+	spec->pool_id = (u64)rc;
+
+	/* The ceph file layout needs to fit pool id in 32 bits */
+
+	if (spec->pool_id > (u64)U32_MAX) {
+		rbd_warn(NULL, "pool id too large (%llu > %u)\n",
+				(unsigned long long)spec->pool_id, U32_MAX);
+		rc = -EIO;
+		goto err_out_client;
+	}
+
+	rbd_dev = rbd_dev_create(rbdc, spec);
+	if (!rbd_dev)
+		goto err_out_client;
+	rbdc = NULL;		/* rbd_dev now owns this */
+	spec = NULL;		/* rbd_dev now owns this */
+
+	rc = rbd_dev_image_probe(rbd_dev, true);
+	if (rc < 0)
+		goto err_out_rbd_dev;
+
+	/* If we are mapping a snapshot it must be marked read-only */
+
+	if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
+		read_only = true;
+	rbd_dev->mapping.read_only = read_only;
+
+	rc = rbd_dev_device_setup(rbd_dev);
+	if (rc) {
+		/*
+		 * rbd_dev_header_unwatch_sync() can't be moved into
+		 * rbd_dev_image_release() without refactoring, see
+		 * commit 1f3ef78861ac.
+		 */
+		rbd_dev_header_unwatch_sync(rbd_dev);
+		rbd_dev_image_release(rbd_dev);
+		goto err_out_module;
+	}
+
+	return count;
+
+err_out_rbd_dev:
+	rbd_dev_destroy(rbd_dev);
+err_out_client:
+	rbd_put_client(rbdc);
+err_out_args:
+	rbd_spec_put(spec);
+err_out_module:
+	module_put(THIS_MODULE);
+
+	dout("Error adding device %s\n", buf);
+
+	return (ssize_t)rc;
+}
+
+static ssize_t rbd_add(struct bus_type *bus,
+		       const char *buf,
+		       size_t count)
+{
+	if (single_major)
+		return -EINVAL;
+
+	return do_rbd_add(bus, buf, count);
+}
+
+static ssize_t rbd_add_single_major(struct bus_type *bus,
+				    const char *buf,
+				    size_t count)
+{
+	return do_rbd_add(bus, buf, count);
+}
+
+static void rbd_dev_device_release(struct device *dev)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+	rbd_free_disk(rbd_dev);
+	clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
+	rbd_dev_mapping_clear(rbd_dev);
+	if (!single_major)
+		unregister_blkdev(rbd_dev->major, rbd_dev->name);
+	rbd_dev_id_put(rbd_dev);
+	rbd_dev_mapping_clear(rbd_dev);
+}
+
+static void rbd_dev_remove_parent(struct rbd_device *rbd_dev)
+{
+	while (rbd_dev->parent) {
+		struct rbd_device *first = rbd_dev;
+		struct rbd_device *second = first->parent;
+		struct rbd_device *third;
+
+		/*
+		 * Follow to the parent with no grandparent and
+		 * remove it.
+		 */
+		while (second && (third = second->parent)) {
+			first = second;
+			second = third;
+		}
+		rbd_assert(second);
+		rbd_dev_image_release(second);
+		first->parent = NULL;
+		first->parent_overlap = 0;
+
+		rbd_assert(first->parent_spec);
+		rbd_spec_put(first->parent_spec);
+		first->parent_spec = NULL;
+	}
+}
+
+static ssize_t do_rbd_remove(struct bus_type *bus,
+			     const char *buf,
+			     size_t count)
+{
+	struct rbd_device *rbd_dev = NULL;
+	struct list_head *tmp;
+	int dev_id;
+	unsigned long ul;
+	bool already = false;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &ul);
+	if (ret)
+		return ret;
+
+	/* convert to int; abort if we lost anything in the conversion */
+	dev_id = (int)ul;
+	if (dev_id != ul)
+		return -EINVAL;
+
+	ret = -ENOENT;
+	spin_lock(&rbd_dev_list_lock);
+	list_for_each(tmp, &rbd_dev_list) {
+		rbd_dev = list_entry(tmp, struct rbd_device, node);
+		if (rbd_dev->dev_id == dev_id) {
+			ret = 0;
+			break;
+		}
+	}
+	if (!ret) {
+		spin_lock_irq(&rbd_dev->lock);
+		if (rbd_dev->open_count)
+			ret = -EBUSY;
+		else
+			already = test_and_set_bit(RBD_DEV_FLAG_REMOVING,
+							&rbd_dev->flags);
+		spin_unlock_irq(&rbd_dev->lock);
+	}
+	spin_unlock(&rbd_dev_list_lock);
+	if (ret < 0 || already)
+		return ret;
+
+	rbd_dev_header_unwatch_sync(rbd_dev);
+	/*
+	 * flush remaining watch callbacks - these must be complete
+	 * before the osd_client is shutdown
+	 */
+	dout("%s: flushing notifies", __func__);
+	ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
+
+	/*
+	 * Don't free anything from rbd_dev->disk until after all
+	 * notifies are completely processed. Otherwise
+	 * rbd_bus_del_dev() will race with rbd_watch_cb(), resulting
+	 * in a potential use after free of rbd_dev->disk or rbd_dev.
+	 */
+	rbd_bus_del_dev(rbd_dev);
+	rbd_dev_image_release(rbd_dev);
+	module_put(THIS_MODULE);
+
+	return count;
+}
+
+static ssize_t rbd_remove(struct bus_type *bus,
+			  const char *buf,
+			  size_t count)
+{
+	if (single_major)
+		return -EINVAL;
+
+	return do_rbd_remove(bus, buf, count);
+}
+
+static ssize_t rbd_remove_single_major(struct bus_type *bus,
+				       const char *buf,
+				       size_t count)
+{
+	return do_rbd_remove(bus, buf, count);
+}
+
+/*
+ * create control files in sysfs
+ * /sys/bus/rbd/...
+ */
+static int rbd_sysfs_init(void)
+{
+	int ret;
+
+	ret = device_register(&rbd_root_dev);
+	if (ret < 0)
+		return ret;
+
+	ret = bus_register(&rbd_bus_type);
+	if (ret < 0)
+		device_unregister(&rbd_root_dev);
+
+	return ret;
+}
+
+static void rbd_sysfs_cleanup(void)
+{
+	bus_unregister(&rbd_bus_type);
+	device_unregister(&rbd_root_dev);
+}
+
+static int rbd_slab_init(void)
+{
+	rbd_assert(!rbd_img_request_cache);
+	rbd_img_request_cache = kmem_cache_create("rbd_img_request",
+					sizeof (struct rbd_img_request),
+					__alignof__(struct rbd_img_request),
+					0, NULL);
+	if (!rbd_img_request_cache)
+		return -ENOMEM;
+
+	rbd_assert(!rbd_obj_request_cache);
+	rbd_obj_request_cache = kmem_cache_create("rbd_obj_request",
+					sizeof (struct rbd_obj_request),
+					__alignof__(struct rbd_obj_request),
+					0, NULL);
+	if (!rbd_obj_request_cache)
+		goto out_err;
+
+	rbd_assert(!rbd_segment_name_cache);
+	rbd_segment_name_cache = kmem_cache_create("rbd_segment_name",
+					CEPH_MAX_OID_NAME_LEN + 1, 1, 0, NULL);
+	if (rbd_segment_name_cache)
+		return 0;
+out_err:
+	if (rbd_obj_request_cache) {
+		kmem_cache_destroy(rbd_obj_request_cache);
+		rbd_obj_request_cache = NULL;
+	}
+
+	kmem_cache_destroy(rbd_img_request_cache);
+	rbd_img_request_cache = NULL;
+
+	return -ENOMEM;
+}
+
+static void rbd_slab_exit(void)
+{
+	rbd_assert(rbd_segment_name_cache);
+	kmem_cache_destroy(rbd_segment_name_cache);
+	rbd_segment_name_cache = NULL;
+
+	rbd_assert(rbd_obj_request_cache);
+	kmem_cache_destroy(rbd_obj_request_cache);
+	rbd_obj_request_cache = NULL;
+
+	rbd_assert(rbd_img_request_cache);
+	kmem_cache_destroy(rbd_img_request_cache);
+	rbd_img_request_cache = NULL;
+}
+
+static int __init rbd_init(void)
+{
+	int rc;
+
+	if (!libceph_compatible(NULL)) {
+		rbd_warn(NULL, "libceph incompatibility (quitting)");
+		return -EINVAL;
+	}
+
+	rc = rbd_slab_init();
+	if (rc)
+		return rc;
+
+	if (single_major) {
+		rbd_major = register_blkdev(0, RBD_DRV_NAME);
+		if (rbd_major < 0) {
+			rc = rbd_major;
+			goto err_out_slab;
+		}
+	}
+
+	rc = rbd_sysfs_init();
+	if (rc)
+		goto err_out_blkdev;
+
+	if (single_major)
+		pr_info("loaded (major %d)\n", rbd_major);
+	else
+		pr_info("loaded\n");
+
+	return 0;
+
+err_out_blkdev:
+	if (single_major)
+		unregister_blkdev(rbd_major, RBD_DRV_NAME);
+err_out_slab:
+	rbd_slab_exit();
+	return rc;
+}
+
+static void __exit rbd_exit(void)
+{
+	rbd_sysfs_cleanup();
+	if (single_major)
+		unregister_blkdev(rbd_major, RBD_DRV_NAME);
+	rbd_slab_exit();
+}
+
+module_init(rbd_init);
+module_exit(rbd_exit);
+
+MODULE_AUTHOR("Alex Elder <elder at inktank.com>");
+MODULE_AUTHOR("Sage Weil <sage at newdream.net>");
+MODULE_AUTHOR("Yehuda Sadeh <yehuda at hq.newdream.net>");
+/* following authorship retained from original osdblk.c */
+MODULE_AUTHOR("Jeff Garzik <jeff at garzik.org>");
+
+MODULE_DESCRIPTION("RADOS Block Device (RBD) driver");
+MODULE_LICENSE("GPL");
diff --git a/rbd/rbd_types.h b/rbd/rbd_types.h
new file mode 100644
index 0000000..49d77cb
--- /dev/null
+++ b/rbd/rbd_types.h
@@ -0,0 +1,81 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2010 Sage Weil <sage at newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef CEPH_RBD_TYPES_H
+#define CEPH_RBD_TYPES_H
+
+#include <linux/types.h>
+
+/* For format version 2, rbd image 'foo' consists of objects
+ *   rbd_id.foo		- id of image
+ *   rbd_header.<id>	- image metadata
+ *   rbd_data.<id>.0000000000000000
+ *   rbd_data.<id>.0000000000000001
+ *   ...		- data
+ * Clients do not access header data directly in rbd format 2.
+ */
+
+#define RBD_HEADER_PREFIX      "rbd_header."
+#define RBD_DATA_PREFIX        "rbd_data."
+#define RBD_ID_PREFIX          "rbd_id."
+
+/*
+ * For format version 1, rbd image 'foo' consists of objects
+ *   foo.rbd		- image metadata
+ *   rb.<idhi>.<idlo>.00000000
+ *   rb.<idhi>.<idlo>.00000001
+ *   ...		- data
+ * There is no notion of a persistent image id in rbd format 1.
+ */
+
+#define RBD_SUFFIX		".rbd"
+
+#define RBD_DIRECTORY           "rbd_directory"
+#define RBD_INFO                "rbd_info"
+
+#define RBD_DEFAULT_OBJ_ORDER	22   /* 4MB */
+#define RBD_MIN_OBJ_ORDER       16
+#define RBD_MAX_OBJ_ORDER       30
+
+#define RBD_COMP_NONE		0
+#define RBD_CRYPT_NONE		0
+
+#define RBD_HEADER_TEXT		"<<< Rados Block Device Image >>>\n"
+#define RBD_HEADER_SIGNATURE	"RBD"
+#define RBD_HEADER_VERSION	"001.005"
+
+struct rbd_image_snap_ondisk {
+	__le64 id;
+	__le64 image_size;
+} __attribute__((packed));
+
+struct rbd_image_header_ondisk {
+	char text[40];
+	char object_prefix[24];
+	char signature[4];
+	char version[8];
+	struct {
+		__u8 order;
+		__u8 crypt_type;
+		__u8 comp_type;
+		__u8 unused;
+	} __attribute__((packed)) options;
+	__le64 image_size;
+	__le64 snap_seq;
+	__le32 snap_count;
+	__le32 reserved;
+	__le64 snap_names_len;
+	struct rbd_image_snap_ondisk snaps[0];
+} __attribute__((packed));
+
+
+#endif

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ceph/ceph-dkms.git