[Pkg-ceph-commits] [ceph-dkms] 02/02: Imported Upstream version 3.14+git20140429
Dmitry Smirnov
onlyjob at moszumanska.debian.org
Thu May 8 20:10:42 UTC 2014
This is an automated email from the git hooks/post-receive script.
onlyjob pushed a commit to branch upstream
in repository ceph-dkms.
commit 29507ad (upstream)
Author: Dmitry Smirnov <onlyjob at member.fsf.org>
Date: Thu May 8 13:36:04 2014
Imported Upstream version 3.14+git20140429
---
ChangeLog | 18347 +++++++++++++++++++++++++++++++++++++++++++
ceph/Kconfig | 40 +
ceph/Makefile | 13 +
ceph/acl.c | 200 +
ceph/addr.c | 1345 ++++
ceph/cache.c | 402 +
ceph/cache.h | 182 +
ceph/caps.c | 3313 ++++++++
ceph/ceph_frag.c | 22 +
ceph/debugfs.c | 277 +
ceph/dir.c | 1349 ++++
ceph/export.c | 250 +
ceph/file.c | 1294 +++
ceph/inode.c | 1927 +++++
ceph/ioctl.c | 296 +
ceph/ioctl.h | 100 +
ceph/locks.c | 338 +
ceph/mds_client.c | 3665 +++++++++
ceph/mds_client.h | 393 +
ceph/mdsmap.c | 189 +
ceph/snap.c | 932 +++
ceph/strings.c | 124 +
ceph/super.c | 1061 +++
ceph/super.h | 890 +++
ceph/xattr.c | 1128 +++
keys/ceph-type.h | 8 +
libceph/Kconfig | 43 +
libceph/Makefile | 15 +
libceph/armor.c | 105 +
libceph/auth.c | 340 +
libceph/auth_none.c | 137 +
libceph/auth_none.h | 29 +
libceph/auth_x.c | 711 ++
libceph/auth_x.h | 51 +
libceph/auth_x_protocol.h | 90 +
libceph/buffer.c | 58 +
libceph/ceph_common.c | 664 ++
libceph/ceph_fs.c | 78 +
libceph/ceph_hash.c | 121 +
libceph/ceph_strings.c | 123 +
libceph/crush/crush.c | 129 +
libceph/crush/hash.c | 149 +
libceph/crush/mapper.c | 819 ++
libceph/crypto.c | 487 ++
libceph/crypto.h | 51 +
libceph/debugfs.c | 282 +
libceph/messenger.c | 3316 ++++++++
libceph/mon_client.c | 1102 +++
libceph/msgpool.c | 83 +
libceph/osd_client.c | 2904 +++++++
libceph/osdmap.c | 1724 ++++
libceph/pagelist.c | 147 +
libceph/pagevec.c | 231 +
libceph/snapshot.c | 78 +
linux/ceph/auth.h | 116 +
linux/ceph/buffer.h | 38 +
linux/ceph/ceph_debug.h | 38 +
linux/ceph/ceph_features.h | 104 +
linux/ceph/ceph_frag.h | 109 +
linux/ceph/ceph_fs.h | 789 ++
linux/ceph/ceph_hash.h | 13 +
linux/ceph/debugfs.h | 33 +
linux/ceph/decode.h | 259 +
linux/ceph/libceph.h | 230 +
linux/ceph/mdsmap.h | 63 +
linux/ceph/messenger.h | 304 +
linux/ceph/mon_client.h | 121 +
linux/ceph/msgpool.h | 26 +
linux/ceph/msgr.h | 176 +
linux/ceph/osd_client.h | 374 +
linux/ceph/osdmap.h | 225 +
linux/ceph/pagelist.h | 75 +
linux/ceph/rados.h | 436 +
linux/ceph/types.h | 29 +
linux/crush/crush.h | 201 +
linux/crush/hash.h | 17 +
linux/crush/mapper.h | 20 +
rbd/Kconfig | 560 ++
rbd/Makefile | 49 +
rbd/rbd.c | 5406 +++++++++++++
rbd/rbd_types.h | 81 +
81 files changed, 62044 insertions(+)
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..5991140
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,18347 @@
+2014-04-27 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: reserve caps for file layout/lock MDS requests
+
+2014-04-17 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: avoid releasing caps that are being used
+
+2014-04-14 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: clear directory's completeness when creating file
+
+2014-04-10 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: fix non-default values check in apply_primary_affinity()
+
+2014-04-09 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: use fpos_cmp() to compare dentry positions
+
+2014-04-08 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: check directory's completeness before emitting directory entry
+
+2014-04-06 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: skip invalid dentry during dcache readdir
+
+2014-04-04 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: dump pool {read,write}_tier to debugfs
+
+2014-04-02 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: output primary affinity values on osdmap updates
+
+2014-04-01 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: flush cap release queue when trimming session caps
+
+2014-04-01 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: don't grabs open file reference for aborted request
+
+2014-04-01 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: drop extra open file reference in ceph_atomic_open()
+
+2014-03-29 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: preallocate buffer for readdir reply
+
+2014-03-24 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: enable PRIMARY_AFFINITY feature bit
+
+2014-03-24 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: redo ceph_calc_pg_primary() in terms of ceph_calc_pg_acting()
+
+2014-03-24 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: add support for osd primary affinity
+
+2014-03-24 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: add support for primary_temp mappings
+
+2014-03-24 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: return primary from ceph_calc_pg_acting()
+
+2014-03-24 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: switch ceph_calc_pg_acting() to new helpers
+
+2014-03-24 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: introduce apply_temps() helper
+
+2014-03-24 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: introduce pg_to_raw_osds() and raw_to_up_osds() helpers
+
+2014-03-24 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: ceph_can_shift_osds(pool) and pool type defines
+
+2014-03-24 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: ceph_osd_{exists,is_up,is_down}(osd) definitions
+
+2014-03-21 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: enable OSDMAP_ENC feature bit
+
+2014-03-21 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: primary_affinity decode bits
+
+2014-03-21 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: primary_affinity infrastructure
+
+2014-03-21 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: primary_temp decode bits
+
+2014-03-21 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: primary_temp infrastructure
+
+2014-03-21 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: generalize ceph_pg_mapping
+
+2014-03-21 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: introduce get_osdmap_client_data_v()
+
+2014-03-21 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: introduce decode{,_new}_pg_temp() and switch to them
+
+2014-03-21 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: switch osdmap_set_max_osd() to krealloc()
+
+2014-03-21 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: introduce decode{,_new}_pools() and switch to them
+
+2014-03-21 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: rename __decode_pool{,_names}() to decode_pool{,_names}()
+
+2014-03-13 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: fix and clarify ceph_decode_need() sizes
+
+2014-03-13 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: nuke bogus encoding version check in osdmap_apply_incremental()
+
+2014-03-13 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: fixup error handling in osdmap_apply_incremental()
+
+2014-03-13 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: fix crush_decode() call site in osdmap_decode()
+
+2014-03-13 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: check length of osdmap osd arrays
+
+2014-03-13 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: safely decode max_osd value in osdmap_decode()
+
+2014-03-13 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: fixup error handling in osdmap_decode()
+
+2014-03-13 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: split osdmap allocation and decode steps
+
+2014-03-13 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: dump osdmap and enhance output on decode errors
+
+2014-03-13 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: dump pg_temp mappings to debugfs
+
+2014-03-13 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: do not prefix osd lines with \t in debugfs output
+
+2014-03-13 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: refer to osdmap directly in osdmap_show()
+
+2014-03-19 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * crush: support chooseleaf_vary_r tunable (tunables3) by default
+
+2014-03-19 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * crush: add SET_CHOOSELEAF_VARY_R step
+
+2014-03-19 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * crush: add chooseleaf_vary_r tunable
+
+2014-03-19 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * crush: allow crush rules to set (re)tries counts to 0
+
+2014-03-19 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * crush: fix off-by-one errors in total_tries refactor
+
+2014-03-24 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: don't include ceph.{file,dir}.layout vxattr in listxattr()
+
+2014-03-24 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: check buffer size in ceph_vxattrcb_layout()
+
+2014-03-24 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: fix null pointer dereference in discard_cap_releases()
+
+2014-03-23 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * libceph: fix oops in ceph_msg_data_{pages,pagelist}_advance()
+
+2014-03-21 Fabian Frederick <fabf at skynet.be>
+
+ * ceph: Remove get/set acl on symlinks
+
+2014-03-18 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: set mds_wanted when MDS reply changes a cap to auth cap
+
+2014-03-09 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: use fl->fl_file as owner identifier of flock and posix lock
+
+2014-03-04 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: forbid mandatory file lock
+
+2014-03-04 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: use fl->fl_type to decide flock operation
+
+2014-03-08 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: update i_max_size even if inode version does not change
+
+2014-03-08 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: make sure write caps are registered with auth MDS
+
+2014-03-01 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: print inode number for LOOKUPINO request
+
+2014-03-06 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: add get_name() NFS export callback
+
+2014-03-01 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: fix ceph_fh_to_parent()
+
+2014-03-01 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: add get_parent() NFS export callback
+
+2014-03-01 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: simplify ceph_fh_to_dentry()
+
+2013-12-26 Yunchuan Wen <yunchuanwen at ubuntukylin.com>
+
+ * ceph: fscache: Wait for completion of object initialization
+
+2013-12-26 Yunchuan Wen <yunchuanwen at ubuntukylin.com>
+
+ * ceph: fscache: Update object store limit after file writing
+
+2013-12-26 Yunchuan Wen <yunchuanwen at ubuntukylin.com>
+
+ * ceph: fscache: add an interface to synchronize object store limit
+
+2013-02-05 Sage Weil <sage at inktank.com>
+
+ * ceph: do not set r_old_dentry_dir on link()
+
+2013-02-05 Sage Weil <sage at inktank.com>
+
+ * ceph: do not assume r_old_dentry[_dir] always set together
+
+2013-02-05 Sage Weil <sage at inktank.com>
+
+ * ceph: do not chain inode updates to parent fsync
+
+2013-02-05 Sage Weil <sage at inktank.com>
+
+ * ceph: avoid useless ceph_get_dentry_parent_inode() in ceph_rename()
+
+2014-03-03 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: let MDS adjust readdir 'frag'
+
+2014-02-28 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: fix reset_readdir()
+
+2014-02-27 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: fix ceph_dir_llseek()
+
+2014-02-25 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * rbd: prefix rbd writes with CEPH_OSD_OP_SETALLOCHINT osd op
+
+2014-02-25 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * rbd: num_ops parameter for rbd_osd_req_create()
+
+2014-02-25 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: bump CEPH_OSD_MAX_OP to 3
+
+2014-02-25 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: add support for CEPH_OSD_OP_SETALLOCHINT osd op
+
+2014-02-25 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: encode CEPH_OSD_OP_FLAG_* op flags
+
+2014-03-04 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * rbd: fix error paths in rbd_img_request_fill()
+
+2014-03-04 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * rbd: remove out_partial label in rbd_img_request_fill()
+
+2014-01-31 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: a per-osdc crush scratch buffer
+
+2014-03-30 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.14
+
+2014-03-30 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
+
+2014-03-28 Randy Dunlap <rdunlap at infradead.org>
+
+ * MAINTAINERS: resume as Documentation maintainer
+
+2014-03-30 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
+
+2014-03-30 Eric Paris <eparis at redhat.com>
+
+ * AUDIT: Allow login in non-init namespaces
+
+2014-03-30 Theodore Ts'o <tytso at mit.edu>
+
+ * ext4: atomically set inode->i_flags in ext4_set_inode_flags()
+
+2014-03-20 Al Viro <viro at zeniv.linux.org.uk>
+
+ * switch mnt_hash to hlist
+
+2014-03-21 Al Viro <viro at zeniv.linux.org.uk>
+
+ * don't bother with propagate_mnt() unless the target is shared
+
+2014-03-20 Al Viro <viro at zeniv.linux.org.uk>
+
+ * keep shadowed vfsmounts together
+
+2014-02-28 Al Viro <viro at zeniv.linux.org.uk>
+
+ * resizable namespace.c hashes
+
+2014-03-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
+
+2014-03-06 Dmitry Torokhov <dmitry.torokhov at gmail.com>
+
+ * Input: mousedev - fix race when creating mixed device
+
+2014-03-29 Elias Vanderstuyft <elias.vds at gmail.com>
+
+ * Input: don't modify the id of ioctl-provided ff effect on upload failure
+
+2014-03-25 Alex Elder <elder at linaro.org>
+
+ * rbd: drop an unsafe assertion
+
+2014-03-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2014-03-28 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'vlan_offloads'
+
+2014-03-27 Vlad Yasevich <vyasevic at redhat.com>
+
+ * vlan: Warn the user if lowerdev has bad vlan features.
+
+2014-03-27 Vlad Yasevich <vyasevic at redhat.com>
+
+ * veth: Turn off vlan rx acceleration in vlan_features
+
+2014-03-27 Vlad Yasevich <vyasevic at redhat.com>
+
+ * ifb: Remove vlan acceleration from vlan_features
+
+2014-03-27 Vlad Yasevich <vyasevic at redhat.com>
+
+ * qlge: Do not propaged vlan tag offloads to vlans
+
+2014-03-27 Vlad Yasevich <vyasevic at redhat.com>
+
+ * bridge: Fix crash with vlan filtering and tcpdump
+
+2014-03-27 Vlad Yasevich <vyasevic at redhat.com>
+
+ * net: Account for all vlan headers in skb_mac_gso_segment
+
+2014-03-27 Veaceslav Falico <vfalico at redhat.com>
+
+ * MAINTAINERS: bonding: change email address
+
+2014-03-27 Jay Vosburgh <fubar at us.ibm.com>
+
+ * MAINTAINERS: bonding: change email address
+
+2014-03-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'akpm' (patches from Andrew Morton)
+
+2014-03-28 Artem Fetishev <artem_fetishev at epam.com>
+
+ * x86: fix boot on uniprocessor systems
+
+2014-03-28 Sasha Levin <sasha.levin at oracle.com>
+
+ * ocfs2: check if cluster name exists before deref
+
+2014-03-27 Hannes Frederic Sowa <hannes at stressinduktion.org>
+
+ * ipv6: move DAD and addrconf_verify processing to workqueue
+
+2014-03-27 Eric Dumazet <edumazet at google.com>
+
+ * tcp: fix get_timewait4_sock() delay computation on 64bit
+
+2014-03-27 Flavio Leitner <fbl at redhat.com>
+
+ * openvswitch: fix a possible deadlock and lockdep warning
+
+2014-03-27 Toshiaki Makita <makita.toshiaki at lab.ntt.co.jp>
+
+ * bridge: Fix handling stacked vlan tags
+
+2014-03-27 Toshiaki Makita <makita.toshiaki at lab.ntt.co.jp>
+
+ * bridge: Fix inabillity to retrieve vlan tags when tx offload is disabled
+
+2014-03-27 Michael S. Tsirkin <mst at redhat.com>
+
+ * vhost: validate vhost_get_vq_desc return value
+
+2014-03-27 Michael S. Tsirkin <mst at redhat.com>
+
+ * vhost: fix total length when packets are too short
+
+2014-03-28 Sasha Levin <sasha.levin at oracle.com>
+
+ * random32: avoid attempt to late reseed if in the middle of seeding
+
+2014-03-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
+
+2014-03-27 Sasha Levin <sasha.levin at oracle.com>
+
+ * random32: assign to network folks in MAINTAINERS
+
+2014-03-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2014-03-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'i2c/for-current' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux
+
+2014-03-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'stable/for-linus-3.14-rc8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
+
+2014-03-28 Hans de Goede <hdegoede at redhat.com>
+
+ * Input: synaptics - add manual min/max quirk for ThinkPad X240
+
+2014-03-28 Benjamin Tissoires <benjamin.tissoires at redhat.com>
+
+ * Input: synaptics - add manual min/max quirk
+
+2014-03-27 John Stultz <john.stultz at linaro.org>
+
+ * time: Revert to calling clock_was_set_delayed() while in irq context
+
+2014-03-26 Daniel Vetter <daniel.vetter at ffwll.ch>
+
+ * drm/i915: Undo gtt scratch pte unmapping again
+
+2014-03-27 Dave Airlie <airlied at redhat.com>
+
+ * drm/radeon: fix runtime suspend breaking secondary GPUs
+
+2014-03-27 Wei Yang <weiyang at linux.vnet.ibm.com>
+
+ * net/mlx4_core: pass pci_device_id.driver_data to __mlx4_init_one during reset
+
+2014-03-26 Zoltan Kiss <zoltan.kiss at citrix.com>
+
+ * core, nfqueue, openvswitch: Orphan frags in skb_zerocopy and handle errors
+
+2014-03-26 Vlad Yasevich <vyasevic at redhat.com>
+
+ * vlan: Set hard_header_len according to available acceleration
+
+2014-03-26 Oliver Neukum <oneukum at suse.de>
+
+ * usbnet: include wait queue head in device structure
+
+2014-03-26 Jason Wang <jasowang at redhat.com>
+
+ * virtio-net: correct error handling of virtqueue_kick()
+
+2014-03-26 Jan Kara <jack at suse.cz>
+
+ * vfs: Allocate anon_inode_inode in anon_inode_init()
+
+2014-03-26 Dave Airlie <airlied at redhat.com>
+
+ * drm/nouveau: fail runtime pm properly.
+
+2014-03-25 Dave Airlie <airlied at redhat.com>
+
+ * drm/udl: take reference to device struct for dma-bufs
+
+2014-03-25 Eric Dumazet <edumazet at google.com>
+
+ * net: unix: non blocking recvmsg() should not return -EINTR
+
+2014-03-26 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'mvneta'
+
+2014-03-26 Thomas Petazzoni <thomas.petazzoni at free-electrons.com>
+
+ * net: mvneta: use devm_ioremap_resource() instead of of_iomap()
+
+2014-03-26 Thomas Petazzoni <thomas.petazzoni at free-electrons.com>
+
+ * net: mvneta: fix usage as a module on RGMII configurations
+
+2014-03-26 Thomas Petazzoni <thomas.petazzoni at free-electrons.com>
+
+ * net: mvneta: rename MVNETA_GMAC2_PSC_ENABLE to MVNETA_GMAC2_PCS_ENABLE
+
+2014-03-26 Hans de Goede <hdegoede at redhat.com>
+
+ * Input: cypress_ps2 - don't report as a button pads
+
+2014-03-24 Vlad Yasevich <vyasevic at redhat.com>
+
+ * tg3: Do not include vlan acceleration features in vlan_features
+
+2014-03-23 Pravin B Shelar <pshelar at nicira.com>
+
+ * ip_tunnel: Fix dst ref-count.
+
+2014-03-26 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'trace-fixes-v3.14-rc7-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
+
+2014-03-25 Steven Rostedt (Red Hat) <rostedt at goodmis.org>
+
+ * tracing: Fix traceon trigger condition to actually turn tracing on
+
+2014-03-25 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * fs: remove now stale label in anon_inode_init()
+
+2014-03-25 Jan Kara <jack at suse.cz>
+
+ * fs: Avoid userspace mounting anon_inodefs filesystem
+
+2014-03-25 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'nfsd-next' of git://linux-nfs.org/~bfields/linux
+
+2014-03-25 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
+
+2014-03-25 David Vrabel <david.vrabel at citrix.com>
+
+ * Revert "xen: properly account for _PAGE_NUMA during xen pte translations"
+
+2014-03-15 Wei Liu <wei.liu2 at citrix.com>
+
+ * xen/balloon: flush persistent kmaps in correct position
+
+2014-03-24 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.14-rc8
+
+2014-03-24 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'parisc-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
+
+2014-03-24 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
+
+2014-03-24 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2014-03-24 Erik Hugne <erik.hugne at ericsson.com>
+
+ * tipc: fix spinlock recursion bug for failed subscriptions
+
+2014-03-24 David Stevens <dlstevens at us.ibm.com>
+
+ * vxlan: fix nonfunctional neigh_reduce()
+
+2014-03-24 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'davinci_emac'
+
+2014-03-24 Christian Riesch <christian.riesch at omicron.at>
+
+ * net: davinci_emac: Fix rollback of emac_dev_open()
+
+2014-03-24 Christian Riesch <christian.riesch at omicron.at>
+
+ * net: davinci_emac: Replace devm_request_irq with request_irq
+
+2014-03-21 Li RongQing <roy.qing.li at gmail.com>
+
+ * netpoll: fix the skb check in pkt_is_ns
+
+2014-03-24 David S. Miller <davem at davemloft.net>
+
+ * sparc64: Make sure %pil interrupts are enabled during hypervisor yield.
+
+2014-03-18 Scott Wood <scottwood at freescale.com>
+
+ * i2c: cpm: Fix build by adding of_address.h and of_irq.h
+
+2014-03-21 Nishanth Menon <nm at ti.com>
+
+ * net: micrel : ks8851-ml: add vdd-supply support
+
+2014-02-21 Will Deacon <will.deacon at arm.com>
+
+ * parisc: locks: remove redundant arch_*_relax operations
+
+2014-03-23 Helge Deller <deller at gmx.de>
+
+ * parisc: wire up sys_utimes
+
+2014-03-01 John David Anglin <dave.anglin at bell.net>
+
+ * parisc: Remove unused CONFIG_PARISC_TMPALIAS code
+
+2014-03-23 Helge Deller <deller at gmx.de>
+
+ * partly revert commit 8a10bc9: parisc/sti_console: prefer Linux fonts over built-in ROM fonts
+
+2014-03-20 Al Viro <viro at zeniv.linux.org.uk>
+
+ * rcuwalk: recheck mount_lock after mountpoint crossing attempts
+
+2014-03-23 Al Viro <viro at zeniv.linux.org.uk>
+
+ * make prepend_name() work correctly when called with negative *buflen
+
+2014-03-16 Eric Biggers <ebiggers3 at gmail.com>
+
+ * vfs: Don't let __fdget_pos() get FMODE_PATH files
+
+2014-03-16 Eric Biggers <ebiggers3 at gmail.com>
+
+ * vfs: atomic f_pos access in llseek()
+
+2014-03-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2014-03-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2014-03-20 Dave Jones <davej at redhat.com>
+
+ * block: free q->flush_rq in blk_init_allocated_queue error paths
+
+2014-03-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * futex: revert back to the explicit waiter counting code
+
+2014-03-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'trace-fixes-v3.14-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
+
+2014-03-20 Hugh Dickins <hughd at google.com>
+
+ * mm: fix swapops.h:131 bug if remap_file_pages raced migration
+
+2014-03-20 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jesse/openvswitch
+
+2014-03-19 Nicolas Dichtel <nicolas.dichtel at 6wind.com>
+
+ * ip6mr: fix mfc notification flags
+
+2014-03-19 Nicolas Dichtel <nicolas.dichtel at 6wind.com>
+
+ * ipmr: fix mfc notification flags
+
+2014-03-19 Nicolas Dichtel <nicolas.dichtel at 6wind.com>
+
+ * rtnetlink: fix fdb notification flags
+
+2014-03-19 Eric Dumazet <edumazet at google.com>
+
+ * tcp: syncookies: do not use getnstimeofday()
+
+2014-03-19 stephen hemminger <shemming at brocade.com>
+
+ * netlink: fix setsockopt in mmap examples in documentation
+
+2014-03-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'upstream' of git://git.linux-mips.org/pub/scm/ralf/upstream-linus
+
+2014-03-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'sound-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2014-03-20 Ben Pfaff <blp at nicira.com>
+
+ * openvswitch: Correctly report flow used times for first 5 minutes after boot.
+
+2014-02-13 Vaibhav Nagarnaik <vnagarnaik at google.com>
+
+ * tracing: Fix array size mismatch in format string
+
+2013-11-27 Jim Quinlan <jim2101024 at gmail.com>
+
+ * MIPS: Make local_irq_disable macro safe for non-Mipsr2
+
+2014-03-20 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'exynos-drm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/daeinki/drm-exynos into drm-fixes
+
+2014-03-17 Daniel Kurtz <djkurtz at chromium.org>
+
+ * drm/exynos: Fix (more) freeing issues in exynos_drm_drv.c
+
+2014-03-18 Hugh Dickins <hughd at google.com>
+
+ * mm: fix bad rss-counter if remap_file_pages raced migration
+
+2014-03-19 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pci-v3.14-fixes-3' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
+
+2014-03-19 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
+
+2014-03-19 Andreas Herrmann <andreas.herrmann at caviumnetworks.com>
+
+ * MIPS: Octeon: Fix warning in of_device_alloc on cn3xxx
+
+2014-03-18 Viller Hsiao <villerhsiao at gmail.com>
+
+ * MIPS: ftrace: Tweak safe_load()/safe_store() macros
+
+2014-03-20 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'drm-intel-fixes-2014-03-19' of git://anongit.freedesktop.org/drm-intel into drm-fixes
+
+2014-03-18 Roger Luethi <rl at hellgate.ch>
+
+ * via-rhine: Disable device in error path
+
+2014-03-19 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-19 Charles Keepax <ckeepax at opensource.wolfsonmicro.com>
+
+ * ALSA: compress: Pass through return value of open ops callback
+
+2014-02-13 Rafał Miłecki <zajec5 at gmail.com>
+
+ * MIPS: BCM47XX: Check all (32) GPIOs when looking for a pin
+
+2014-03-18 Chris Wilson <chris at chris-wilson.co.uk>
+
+ * drm/i915: Disable stolen memory when DMAR is active
+
+2014-03-17 Jani Nikula <jani.nikula at intel.com>
+
+ * Revert "drm/i915: don't touch the VDD when disabling the panel"
+
+2014-03-18 Li Zefan <lizefan at huawei.com>
+
+ * cgroup: fix a failure path in create_css()
+
+2014-03-18 Bjorn Helgaas <bhelgaas at google.com>
+
+ * Revert "[PATCH] Insert GART region into resource map"
+
+2014-03-18 Peter Senna Tschudin <peter.senna at gmail.com>
+
+ * ATHEROS-ATL1E: Convert iounmap to pci_iounmap
+
+2014-03-18 David Stevens <dlstevens at us.ibm.com>
+
+ * vxlan: fix potential NULL dereference in arp_reduce()
+
+2014-03-18 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'cnic-net'
+
+2014-03-17 Michael Chan <mchan at broadcom.com>
+
+ * cnic: Update version to 2.5.20 and copyright year.
+
+2014-03-17 Michael Chan <mchan at broadcom.com>
+
+ * cnic,bnx2i,bnx2fc: Fix inconsistent use of page size
+
+2014-03-17 Michael Chan <mchan at broadcom.com>
+
+ * cnic: Use proper ulp_ops for per device operations.
+
+2014-03-17 Bjørn Mork <bjorn at mork.no>
+
+ * net: cdc_ncm: fix control message ordering
+
+2014-03-17 lucien <lucien.xin at gmail.com>
+
+ * ipv6: ip6_append_data_mtu do not handle the mtu of the second fragment properly
+
+2014-03-16 Paul Bolle <pebolle at tiscali.nl>
+
+ * isdn/capi: Make Middleware depend on CAPI2.0
+
+2014-03-18 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
+
+2014-03-18 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec
+
+2014-03-18 Clemens Ladisch <clemens at ladisch.de>
+
+ * ALSA: oxygen: Xonar DG(X): fix Stereo Upmixing regression
+
+2014-03-18 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2014-01-21 Alex Smith <alex.smith at imgtec.com>
+
+ * MIPS: Fix possible build error with transparent hugepages enabled
+
+2014-01-30 Daniel Vetter <daniel.vetter at ffwll.ch>
+
+ * drm: Fix use-after-free in the shadow-attache exit code
+
+2014-03-18 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'drm-intel-fixes-2014-03-17' of git://anongit.freedesktop.org/drm-intel into drm-fixes
+
+2014-03-16 Benedikt Spranger <b.spranger at linutronix.de>
+
+ * net: cpsw: do not register cpts twice
+
+2014-03-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.dk/linux-block
+
+2014-03-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'upstream' of git://git.linux-mips.org/pub/scm/ralf/upstream-linus
+
+2014-03-16 Peter Senna Tschudin <peter.senna at gmail.com>
+
+ * ATHEROS-ALX: Use dma_set_mask_and_coherent and fix a bug
+
+2014-03-07 Doug Wilson <doug.lkml at gmail.com>
+
+ * sparc64:tsb.c:use array size macro rather than number
+
+2014-03-14 Dave Kleikamp <dave.kleikamp at oracle.com>
+
+ * sparc64: don't treat 64-bit syscall return codes as 32-bit
+
+2014-02-14 Paul Burton <paul.burton at imgtec.com>
+
+ * MIPS: mark O32+FP64 experimental for now
+
+2014-03-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
+
+2014-02-22 Viller Hsiao <villerhsiao at gmail.com>
+
+ * MIPS: ftrace: Fix icache flush range error
+
+2014-03-17 Lars Persson <lars.persson at axis.com>
+
+ * MIPS: Fix syscall tracing interface
+
+2014-01-22 Markos Chandras <markos.chandras at imgtec.com>
+
+ * MIPS: asm: syscall: Fix copying system call arguments
+
+2014-03-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.14-rc7
+
+2014-03-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-10 Michael Kerrisk <mtk.manpages at gmail.com>
+
+ * ipc: Fix 2 bugs in msgrcv() MSG_COPY implementation
+
+2014-03-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
+
+2014-03-14 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless
+
+2014-03-14 Sebastian Hesselbarth <sebastian.hesselbarth at gmail.com>
+
+ * net: phy: fix uninitalized ethtool_wolinfo in phy_suspend
+
+2014-03-13 Joe Perches <joe at perches.com>
+
+ * MAINTAINERS: Add linux.nics at intel.com to INTEL ETHERNET DRIVERS
+
+2014-03-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pm+acpi-3.14-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2014-03-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'dm-3.14-fixes-4' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
+
+2014-02-10 Colin Ian King <colin.king at canonical.com>
+
+ * MIPS: Octeon: Fix fall through on bar type OCTEON_DMA_BAR_TYPE_SMALL
+
+2014-03-14 John W. Linville <linville at tuxdriver.com>
+
+ * Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless into for-davem
+
+2014-02-07 Huacai Chen <chenhc at lemote.com>
+
+ * MIPS: FPU: Fix conflict of register usage
+
+2014-02-09 Paul Bolle <pebolle at tiscali.nl>
+
+ * MIPS: Replace CONFIG_MIPS64 and CONFIG_MIPS32_R2
+
+2014-03-12 Patrick Palka <patrick at parcs.ath.cx>
+
+ * perf bench: Fix NULL pointer dereference in "perf bench all"
+
+2014-03-13 Simon Wood <simon at mungewell.org>
+
+ * HID: hid-lg4ff: Support new version of G27
+
+2014-03-13 Arnaldo Carvalho de Melo <acme at redhat.com>
+
+ * perf bench numa: Make no args mean 'run all tests'
+
+2014-03-13 Daniel J Blueman <daniel at numascale.com>
+
+ * x86/amd/numa: Fix northbridge quirk to assign correct NUMA node
+
+2014-03-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2014-03-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'i2c/for-current' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux
+
+2014-03-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2014-02-09 Richard Weinberger <richard at nod.at>
+
+ * i2c: Remove usage of orphaned symbol OF_I2C
+
+2014-03-13 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branches 'pnp', 'acpi-init', 'acpi-sleep' and 'pm-cpufreq'
+
+2014-03-13 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPI / sleep: Add extra checks for HW Reduced ACPI mode sleep states
+
+2014-03-12 Heiner Kallweit <heiner.kallweit at web.de>
+
+ * ipv6: Avoid unnecessary temporary addresses being generated
+
+2014-03-12 Stefan Wahren <stefan.wahren at i2se.com>
+
+ * eth: fec: Fix lost promiscuous mode after reconnecting cable
+
+2014-03-12 dingtianhong <dingtianhong at huawei.com>
+
+ * bonding: set correct vlan id for alb xmit path
+
+2014-03-12 Alexander Aring <alex.aring at gmail.com>
+
+ * at86rf230: fix lockdep splats
+
+2014-03-12 Stanislaw Gruszka <sgruszka at redhat.com>
+
+ * Revert "rt2x00: rt2800lib: Update BBP register initialization for RT53xx"
+
+2014-03-12 Helmut Schaa <helmut.schaa at googlemail.com>
+
+ * ath9k: Fix sequence number assignment for non-data frames
+
+2014-03-13 Or Gerlitz <ogerlitz at mellanox.com>
+
+ * net/mlx4_en: Deregister multicast vxlan steering rules when going down
+
+2014-03-13 Arnd Bergmann <arnd at arndb.de>
+
+ * vmxnet3: fix building without CONFIG_PCI_MSI
+
+2014-03-13 Daniel Borkmann <dborkman at redhat.com>
+
+ * MAINTAINERS: add networking selftests to NETWORKING
+
+2014-03-13 Paul Mackerras <paulus at samba.org>
+
+ * KVM: PPC: Book3S HV: Fix register usage when loading/saving VRSAVE
+
+2014-03-13 Paul Mackerras <paulus at samba.org>
+
+ * KVM: PPC: Book3S HV: Remove bogus duplicate code
+
+2014-03-13 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'ttm-fixes-3.14-2014-03-12' of git://people.freedesktop.org/~thomash/linux into drm-fixes
+
+2014-03-13 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-fixes-3.14' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
+
+2014-03-13 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'vmwgfx-fixes-3.14-2014-03-13' of git://people.freedesktop.org/~thomash/linux into drm-fixes
+
+2014-03-12 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Fix a surface reference corner-case in legacy emulation mode
+
+2014-03-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pci-v3.14-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
+
+2014-03-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
+
+2014-03-13 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPI / init: Invoke early ACPI initialization later
+
+2014-03-12 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * cpufreq: Skip current frequency initialization for ->setpolicy drivers
+
+2014-03-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'sound-3.14-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2014-03-11 Matthew Leach <matthew.leach at arm.com>
+
+ * net: socket: error on a negative msg_namelen
+
+2014-03-12 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon/cik: properly set compute ring status on disable
+
+2014-03-11 Tobias Klauser <tklauser at distanz.ch>
+
+ * MAINTAINERS: Add tools/net to NETWORKING [GENERAL]
+
+2014-03-11 Geert Uytterhoeven <geert+renesas at linux-m68k.org>
+
+ * packet: doc: Spelling s/than/that/
+
+2014-03-12 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'mlx4'
+
+2014-03-12 Or Gerlitz <ogerlitz at mellanox.com>
+
+ * net/mlx4_core: Load the IB driver when the device supports IBoE
+
+2014-03-12 Or Gerlitz <ogerlitz at mellanox.com>
+
+ * net/mlx4_en: Handle vxlan steering rules for mac address changes
+
+2014-03-12 Or Gerlitz <ogerlitz at mellanox.com>
+
+ * net/mlx4_core: Fix wrong dump of the vxlan offloads device capability
+
+2014-03-12 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon/cik: stop the sdma engines in the enable() function
+
+2014-03-12 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon/cik: properly set sdma ring status on disable
+
+2014-03-11 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: fix runpm disabling on non-PX harder
+
+2014-03-11 Wei Liu <wei.liu2 at citrix.com>
+
+ * xen-netback: use skb_is_gso in xenvif_start_xmit
+
+2014-03-12 Rob Clark <rclark at redhat.com>
+
+ * drm/ttm: don't oops if no invalidate_caches()
+
+2014-03-12 Heinz Mauelshagen <heinzm at redhat.com>
+
+ * dm cache: fix access beyond end of origin device
+
+2014-03-12 Heinz Mauelshagen <heinzm at redhat.com>
+
+ * dm cache: fix truncation bug when copying a block to/from >2TB fast device
+
+2014-03-11 Radim Krčmář <rkrcmar at redhat.com>
+
+ * KVM: SVM: fix cr8 intercept window
+
+2014-03-11 Bjorn Helgaas <bhelgaas at google.com>
+
+ * PCI: Don't check resource_size() in pci_bus_alloc_resource()
+
+2014-03-11 Bjorn Helgaas <bhelgaas at google.com>
+
+ * PCI: Enable INTx in pci_reenable_device() only when MSI/MSI-X not enabled
+
+2014-03-07 Daniel Vetter <daniel.vetter at ffwll.ch>
+
+ * drm/i915: Don't enable display error interrupts from the start
+
+2014-03-11 Ville Syrjälä <ville.syrjala at linux.intel.com>
+
+ * drm/i915: Fix scanline counter fixup on BDW
+
+2014-03-11 Ville Syrjälä <ville.syrjala at linux.intel.com>
+
+ * drm/i915: Add a workaround for HSW scanline counter weirdness
+
+2014-03-12 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/ttm: Work around performance regression with VM_PFNMAP
+
+2014-02-27 Ales Novak <alnovak at suse.cz>
+
+ * [SCSI] storvsc: NULL pointer dereference fix
+
+2014-03-11 hayeswang <hayeswang at realtek.com>
+
+ * r8169: fix the incorrect tx descriptor version
+
+2014-03-11 Markos Chandras <markos.chandras at imgtec.com>
+
+ * tools/net/Makefile: Define PACKAGE to fix build problems
+
+2014-03-10 Alexei Starovoitov <ast at plumgrid.com>
+
+ * x86: bpf_jit: support negative offsets
+
+2014-03-10 Linus Lüssing <linus.luessing at web.de>
+
+ * bridge: multicast: enable snooping on general queries only
+
+2014-03-10 Linus Lüssing <linus.luessing at web.de>
+
+ * bridge: multicast: add sanity check for general query destination
+
+2014-03-06 Deng-Cheng Zhu <dengcheng.zhu at imgtec.com>
+
+ * MIPS: math-emu: Fix prefx detection and COP1X function field definition
+
+2014-03-10 Eric Dumazet <eric.dumazet at gmail.com>
+
+ * tcp: tcp_release_cb() should release socket ownership
+
+2014-03-11 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'skb_frags'
+
+2014-03-10 Michael S. Tsirkin <mst at redhat.com>
+
+ * skbuff: skb_segment: orphan frags before copying
+
+2014-03-11 Zhang Rui <rui.zhang at intel.com>
+
+ * PNP / ACPI: proper handling of ACPI IO/Memory resource parsing failures
+
+2014-03-11 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'stmmac'
+
+2014-03-10 Boris BREZILLON <b.brezillon.dev at gmail.com>
+
+ * ARM: at91: fix network interface ordering for sama5d36
+
+2014-03-11 Shawn Guo <shawn.guo at linaro.org>
+
+ * MAINTAINERS: update IMX kernel git tree
+
+2014-02-02 Suresh Siddha <sbsiddha at gmail.com>
+
+ * x86, fpu: Check tsk_used_math() in kernel_fpu_end() for eager FPU
+
+2014-03-11 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-next' of git://git.samba.org/sfrench/cifs-2.6
+
+2014-03-11 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
+
+2014-03-10 Dave Jones <davej at redhat.com>
+
+ * x86: Remove CONFIG_X86_OOSTORE
+
+2014-03-06 Dave Jones <davej at redhat.com>
+
+ * perf/x86: Fix leak in uncore_type_init failure paths
+
+2014-03-06 Fernando Luis Vazquez Cao <fernando at oss.ntt.co.jp>
+
+ * sched/clock: Prevent tracing recursion in sched_clock_cpu()
+
+2014-02-28 Peter Zijlstra <peterz at infradead.org>
+
+ * stop_machine: Fix^2 race between stop_two_cpus() and stop_cpus()
+
+2014-03-03 Juri Lelli <juri.lelli at gmail.com>
+
+ * sched/deadline: Deny unprivileged users to set/change SCHED_DEADLINE policy
+
+2014-03-11 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2014-03-11 Takashi Iwai <tiwai at suse.de>
+
+ * Merge tag 'asoc-v3.14-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2014-03-10 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'akpm' (patches from Andrew Morton)
+
+2014-03-10 Geert Uytterhoeven <geert at linux-m68k.org>
+
+ * cris: convert ffs from an object-like macro to a function-like macro
+
+2014-03-10 Sergei Antonov <saproj at gmail.com>
+
+ * hfsplus: add HFSX subfolder count support
+
+2014-03-10 Colin Ian King <colin.king at canonical.com>
+
+ * tools/testing/selftests/ipc/msgque.c: handle msgget failure return correctly
+
+2014-03-10 Michael Opdenacker <michael.opdenacker at free-electrons.com>
+
+ * MAINTAINERS: blackfin: add git repository
+
+2014-03-10 Andrew Morton <akpm at linux-foundation.org>
+
+ * revert "kallsyms: fix absolute addresses for kASLR"
+
+2014-03-10 Ben Hutchings <ben at decadent.org.uk>
+
+ * mm/Kconfig: fix URL for zsmalloc benchmark
+
+2014-03-10 Artem Fetishev <artem_fetishev at epam.com>
+
+ * fs/proc/base.c: fix GPF in /proc/$PID/map_files
+
+2014-03-10 Laura Abbott <lauraa at codeaurora.org>
+
+ * mm/compaction: break out of loop on !PageBuddy in isolate_freepages_block
+
+2014-03-10 Johannes Weiner <hannes at cmpxchg.org>
+
+ * mm: fix GFP_THISNODE callers and clarify
+
+2014-03-10 Jens Axboe <axboe at fb.com>
+
+ * mtip32xx: fix bad use of smp_processor_id()
+
+2014-03-10 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
+
+2014-03-10 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata
+
+2014-03-04 Al Viro <viro at zeniv.linux.org.uk>
+
+ * get rid of fget_light()
+
+2014-03-03 Al Viro <viro at zeniv.linux.org.uk>
+
+ * sockfd_lookup_light(): switch to fdget^W^Waway from fget_light
+
+2014-03-03 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * vfs: atomic f_pos accesses as per POSIX
+
+2014-02-10 Al Viro <viro at zeniv.linux.org.uk>
+
+ * ocfs2 syncs the wrong range...
+
+2014-03-10 Tejun Heo <tj at kernel.org>
+
+ * libata: use wider match for blacklisting Crucial M500
+
+2014-02-25 Don Zickus <dzickus at redhat.com>
+
+ * perf machine: Use map as success in ip__resolve_ams
+
+2014-03-02 Jiri Olsa <jolsa at redhat.com>
+
+ * perf symbols: Fix crash in elf_section_by_name
+
+2014-02-06 Ben Hutchings <ben at decadent.org.uk>
+
+ * perf trace: Decode architecture-specific signal numbers
+
+2014-03-10 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'asoc/fix/88pm860', 'asoc/fix/omap' and 'asoc/fix/si476x' into asoc-linus
+
+2014-03-10 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/fix/pcm' into asoc-linus
+
+2014-03-04 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: 88pm860: Fix IO setup
+
+2014-03-04 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: si476x: Fix IO setup
+
+2014-02-26 Giridhar Malavali <giridhar.malavali at qlogic.com>
+
+ * [SCSI] qla2xxx: Poll during initialization for ISP25xx and ISP83xx
+
+2014-02-06 Lukasz Dorau <lukasz.dorau at intel.com>
+
+ * [SCSI] isci: correct erroneous for_each_isci_host macro
+
+2014-02-06 Dan Williams <dan.j.williams at intel.com>
+
+ * [SCSI] isci: fix reset timeout handling
+
+2013-12-19 Mike Christie <michaelc at cs.wisc.edu>
+
+ * [SCSI] be2iscsi: fix bad if expression
+
+2014-02-26 Chad Dupuis <chad.dupuis at qlogic.com>
+
+ * [SCSI] qla2xxx: Fix multiqueue MSI-X registration.
+
+2014-03-07 Nikolay Aleksandrov <nikolay at redhat.com>
+
+ * selinux: add gfp argument to security_xfrm_policy_alloc and fix callers
+
+2014-03-07 Nikolay Aleksandrov <nikolay at redhat.com>
+
+ * net: af_key: fix sleeping under rcu
+
+2014-03-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.14-rc6
+
+2014-03-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2014-03-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'nfs-for-3.14-5' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
+
+2014-03-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'usb-3.14-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
+
+2014-03-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'staging-3.15-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
+
+2014-03-09 David Howells <dhowells at redhat.com>
+
+ * KEYS: Make the keyring cycle detector ignore other keyrings of the same name
+
+2014-03-09 Michael Chan <mchan at broadcom.com>
+
+ * bnx2: Fix shutdown sequence
+
+2014-03-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux
+
+2014-03-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'spi-v3.14-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi
+
+2014-03-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending
+
+2014-03-09 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Revert "ACPI / sleep: pm_power_off needs more sanity checks to be installed"
+
+2014-03-08 Olof Johansson <olof at lixom.net>
+
+ * Merge tag 'omap-for-v3.14/fixes-dt-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
+
+2014-03-08 Olof Johansson <olof at lixom.net>
+
+ * Merge tag 'bcm-for-3.14-pinctrl-reduced-rename' of git://github.com/broadcom/bcm11351 into fixes
+
+2014-03-08 Olof Johansson <olof at lixom.net>
+
+ * Merge tag 'sunxi-fixes-for-3.14' of https://github.com/mripard/linux into fixes
+
+2014-03-08 Mike Snitzer <msnitzer at redhat.com>
+
+ * block: change flush sequence list addition back to front add
+
+2014-03-08 Mike Snitzer <snitzer at redhat.com>
+
+ * block: fix q->flush_rq NULL pointer crash on dm-mpath flush
+
+2014-03-08 Eric W. Biederman <ebiederm at xmission.com>
+
+ * audit: Update kdoc for audit_send_reply and audit_list_rules_send
+
+2014-03-08 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
+
+2014-03-08 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata
+
+2014-03-08 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
+
+2014-03-08 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'firewire-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394
+
+2014-03-08 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'clk-fixes-for-linus' of git://git.linaro.org/people/mike.turquette/linux
+
+2014-03-08 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pm+acpi-3.14-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2014-03-07 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * x86: fix compile error due to X86_TRAP_NMI use in asm files
+
+2014-03-07 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-arm
+
+2014-03-07 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-07 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
+
+2014-03-07 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'trace-fixes-v3.14-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
+
+2014-03-07 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'irq-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-07 Ditang Chen <chendt.fnst at cn.fujitsu.com>
+
+ * SUNRPC: Fix oops when trace sunrpc_task events in nfs client
+
+2014-03-08 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'pm-cpufreq'
+
+2014-03-08 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branches 'acpi-resources', 'acpi-ec' and 'acpi-sleep'
+
+2014-03-07 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'dm-3.14-fixes-3' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
+
+2014-03-07 H. Peter Anvin <hpa at linux.intel.com>
+
+ * x86: Ignore NMIs that come in during early boot
+
+2014-02-26 Mark Rutland <mark.rutland at arm.com>
+
+ * ARM: 7992/1: boot: compressed: ignore bswapsdi2.S
+
+2014-02-25 Linus Walleij <linus.walleij at linaro.org>
+
+ * ARM: 7991/1: sa1100: fix compile problem on Collie
+
+2014-02-26 Russell King <rmk+kernel at arm.linux.org.uk>
+
+ * ARM: fix noMMU kallsyms symbol filtering
+
+2014-03-07 Mathias Nyman <mathias.nyman at linux.intel.com>
+
+ * Revert "USBNET: ax88179_178a: enable tso if usb host supports sg dma"
+
+2014-03-07 Mathias Nyman <mathias.nyman at linux.intel.com>
+
+ * Revert "xhci 1.0: Limit arbitrarily-aligned scatter gather."
+
+2014-03-04 Julius Werner <jwerner at chromium.org>
+
+ * usb: Make DELAY_INIT quirk wait 100ms between Get Configuration requests
+
+2014-03-04 Julius Werner <jwerner at chromium.org>
+
+ * usb: Add device quirk for Logitech HD Pro Webcams C920 and C930e
+
+2014-03-07 Michele Baldessari <michele at acksyn.org>
+
+ * libata: add ATA_HORKAGE_BROKEN_FPDMA_AA quirk for Seagate Momentus SpinPoint M8 (2BA30001)
+
+2014-03-07 Joe Thornber <ejt at redhat.com>
+
+ * dm space map metadata: fix refcount decrement below 0 which caused corruption
+
+2014-03-07 Tejun Heo <tj at kernel.org>
+
+ * firewire: don't use PREPARE_DELAYED_WORK
+
+2014-03-07 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Fix loud click noise with IdeaPad 410Y
+
+2014-03-05 Sagi Grimberg <sagig at mellanox.com>
+
+ * Target/sbc: Fix sbc_copy_prot for offset scatters
+
+2014-03-07 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'spi/fix/ath79', 'spi/fix/atmel', 'spi/fix/coldfire', 'spi/fix/fsl-dspi', 'spi/fix/imx' and 'spi/fix/topcliff-pch' into spi-linus
+
+2014-03-04 Anton Blanchard <anton at samba.org>
+
+ * powerpc: Align p_dyn, p_rela and p_st symbols
+
+2014-03-03 Michael Neuling <mikey at neuling.org>
+
+ * powerpc/tm: Fix crash when forking inside a transaction
+
+2014-03-07 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-fixes-3.14' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
+
+2014-03-06 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon/atom: select the proper number of lanes in transmitter setup
+
+2014-03-03 Russell King <rmk+kernel at arm.linux.org.uk>
+
+ * MAINTAINERS: add maintainer entry for TDA998x driver
+
+2014-03-06 Gerd Hoffmann <kraxel at redhat.com>
+
+ * drm: fix bochs kconfig dependencies
+
+2014-03-07 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-armada-fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-cubox into drm-fixes
+
+2014-03-07 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-fixes-3.14' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
+
+2014-03-06 Sabrina Dubroca <sd at queasysnail.net>
+
+ * ipv6: don't set DST_NOCOUNT for remotely added routes
+
+2014-03-06 Amir Vadai <amirv at mellanox.com>
+
+ * net/mlx4_core: mlx4_init_slave() shouldn't access comm channel before PF is ready
+
+2014-03-06 Amir Vadai <amirv at mellanox.com>
+
+ * net/mlx4_core: Fix memory access error in mlx4_QUERY_DEV_CAP_wrapper()
+
+2014-03-06 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon/dpm: fix typo in EVERGREEN_SMC_FIRMWARE_HEADER_softRegisters
+
+2014-03-04 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon/cik: fix typo in documentation
+
+2014-03-04 Paul Bolle <pebolle at tiscali.nl>
+
+ * drm/radeon: silence GCC warning on 32 bit
+
+2014-02-25 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: resume old pm late
+
+2014-02-28 Lauri Kasanen <cand at gmx.com>
+
+ * drm/radeon: TTM must be init with cpu-visible VRAM, v2
+
+2014-03-04 David Miller <davem at davemloft.net>
+
+ * sparc: serial: Clean up the locking for -rt
+
+2014-03-06 Stefan Richter <stefanr at s5r6.in-berlin.de>
+
+ * firewire: ohci: fix probe failure with Agere/LSI controllers
+
+2014-02-28 Deng-Cheng Zhu <dengcheng.zhu at imgtec.com>
+
+ * MIPS: APRP: Choose the correct VPE loader by fixing the linking
+
+2014-02-28 Deng-Cheng Zhu <dengcheng.zhu at imgtec.com>
+
+ * MIPS: APRP: Unregister rtlx interrupt hook at module exit
+
+2014-03-06 Mike Snitzer <snitzer at redhat.com>
+
+ * dm thin: fix Documentation for held metadata root feature
+
+2014-03-05 Peter Zijlstra <peterz at infradead.org>
+
+ * x86, trace: Further robustify CR2 handling vs tracing
+
+2014-03-01 Kieran Clancy <clancy.kieran at gmail.com>
+
+ * ACPI / EC: Clear stale EC events on Samsung systems
+
+2014-03-04 Viresh Kumar <viresh.kumar at linaro.org>
+
+ * cpufreq: Initialize governor for a new policy under policy->rwsem
+
+2014-03-04 Viresh Kumar <viresh.kumar at linaro.org>
+
+ * cpufreq: Initialize policy before making it available for others to use
+
+2014-03-04 Aaron Plattner <aplattner at nvidia.com>
+
+ * cpufreq: use cpufreq_cpu_get() to avoid cpufreq_get() race conditions
+
+2014-03-04 Ben Widawsky <benjamin.widawsky at intel.com>
+
+ * drm/i915: Fix PSR programming
+
+2014-03-05 Stefan Agner <stefan at agner.ch>
+
+ * clocksource: vf_pit_timer: use complement for sched_clock reading
+
+2014-03-06 Marc Zyngier <marc.zyngier at arm.com>
+
+ * ARM: KVM: fix non-VGIC compilation
+
+2014-02-28 Benoit Cousson <bcousson at baylibre.com>
+
+ * clk: shmobile: rcar-gen2: Use kick bit to allow Z clock frequency change
+
+2014-03-03 Joe Thornber <ejt at redhat.com>
+
+ * dm thin: fix noflush suspend IO queueing
+
+2014-03-03 Joe Thornber <ejt at redhat.com>
+
+ * dm thin: fix deadlock in __requeue_bio_list
+
+2014-03-03 Joe Thornber <ejt at redhat.com>
+
+ * dm thin: fix out of data space handling
+
+2014-02-14 Mike Snitzer <snitzer at redhat.com>
+
+ * dm thin: ensure user takes action to validate data and metadata consistency
+
+2014-03-04 Trond Myklebust <trond.myklebust at primarydata.com>
+
+ * NFSv4: Fail the truncate() if the lock/open stateid is invalid
+
+2014-03-04 Andy Adamson <andros at netapp.com>
+
+ * NFSv4.1 Fail data server I/O if stateid represents a lost lock
+
+2014-03-04 Trond Myklebust <trond.myklebust at primarydata.com>
+
+ * NFSv4: Fix the return value of nfs4_select_rw_stateid
+
+2014-03-05 Trond Myklebust <trond.myklebust at primarydata.com>
+
+ * NFSv4: nfs4_stateid_is_current should return 'true' for an invalid stateid
+
+2014-03-05 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: usb-audio: Add quirk for Logitech Webcam C500
+
+2014-03-05 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Use analog beep for Thinkpads with AD1984 codecs
+
+2014-03-05 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Add missing loopback merge path for AD1884/1984 codecs
+
+2014-03-05 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'drm-intel-fixes-2014-03-04' of ssh://git.freedesktop.org/git/drm-intel into drm-fixes
+
+2014-03-05 Wenyou Yang <wenyou.yang at atmel.com>
+
+ * spi: atmel: add missing spi_master_{resume,suspend} calls to PM callbacks
+
+2014-02-14 Axel Lin <axel.lin at ingics.com>
+
+ * spi: coldfire-qspi: Fix getting correct address for *mcfqspi
+
+2014-02-14 Axel Lin <axel.lin at ingics.com>
+
+ * spi: fsl-dspi: Fix getting correct address for master
+
+2014-03-02 Patrick Lai <plai at codeaurora.org>
+
+ * ASoC: pcm: free path list before exiting from error conditions
+
+2014-03-02 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * iser-target: Fix command leak for tx_desc->comp_llnode_batch
+
+2014-02-27 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * iser-target: Ignore completions for FRWRs in isert_cq_tx_work
+
+2014-02-27 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * iser-target: Fix post_send_buf_count for RDMA READ/WRITE
+
+2014-02-03 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * iscsi/iser-target: Fix isert_conn->state hung shutdown issues
+
+2014-02-03 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * iscsi/iser-target: Use list_del_init for ->i_conn_node
+
+2014-02-26 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * iscsi-target: Fix iscsit_get_tpg_from_np tpg_state bug
+
+2014-03-03 Salva Peiró <speiro at ai2.upv.es>
+
+ * staging/cxt1e1/linux.c: Correct arbitrary memory write in c4_ioctl()
+
+2014-02-28 Jiri Olsa <jolsa at redhat.com>
+
+ * x86, trace: Fix CR2 corruption when tracing page faults
+
+2014-03-04 H. Peter Anvin <hpa at linux.intel.com>
+
+ * Merge tag 'efi-urgent' into x86/urgent
+
+2014-03-04 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2014-03-04 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'regulator-v3.14-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
+
+2014-03-04 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'akpm' (patches from Andrew Morton)
+
+2014-02-14 Mike Snitzer <snitzer at redhat.com>
+
+ * dm thin: synchronize the pool mode during suspend
+
+2014-03-03 Johannes Weiner <hannes at cmpxchg.org>
+
+ * mm: page_alloc: exempt GFP_THISNODE allocations from zone fairness
+
+2014-03-03 Liu Ping Fan <pingfank at linux.vnet.ibm.com>
+
+ * mm: numa: bugfix for LAST_CPUPID_NOT_IN_PAGE_FLAGS
+
+2014-03-03 Joe Perches <joe at perches.com>
+
+ * MAINTAINERS: add and correct types of some "T:" entries
+
+2014-03-03 Joe Perches <joe at perches.com>
+
+ * MAINTAINERS: use tab for separator
+
+2014-03-03 Alexandre Bounine <alexandre.bounine at idt.com>
+
+ * rapidio/tsi721: fix tasklet termination in dma channel release
+
+2014-03-03 Vyacheslav Dubeyko <slava at dubeyko.com>
+
+ * hfsplus: fix remount issue
+
+2014-03-03 Minchan Kim <minchan at kernel.org>
+
+ * zram: avoid null access when fail to alloc meta
+
+2014-03-03 Geert Uytterhoeven <geert at linux-m68k.org>
+
+ * sh: prefix sh-specific "CCR" and "CCR2" by "SH_"
+
+2014-03-03 Jan Kara <jack at suse.cz>
+
+ * ocfs2: fix quota file corruption
+
+2014-03-03 Vikas Sajjan <vikas.sajjan at linaro.org>
+
+ * drivers/rtc/rtc-s3c.c: fix incorrect way of save/restore of S3C2410_TICNT for TYPE_S3C64XX
+
+2014-03-03 Andy Honig <ahonig at google.com>
+
+ * kallsyms: fix absolute addresses for kASLR
+
+2014-03-03 Daniel M. Weeks <dan at danweeks.net>
+
+ * scripts/gen_initramfs_list.sh: fix flags for initramfs LZ4 compression
+
+2014-03-03 Vlastimil Babka <vbabka at suse.cz>
+
+ * mm: include VM_MIXEDMAP flag in the VM_SPECIAL list to avoid m(un)locking
+
+2014-03-03 Filipe Brandenburger <filbranden at google.com>
+
+ * memcg: reparent charges of children before processing parent
+
+2014-03-03 Hugh Dickins <hughd at google.com>
+
+ * memcg: fix endless loop in __mem_cgroup_iter_next()
+
+2014-03-03 Hugh Dickins <hughd at google.com>
+
+ * lib/radix-tree.c: swapoff tmpfs radix_tree: remember to rcu_read_unlock
+
+2014-03-03 Dan Williams <dan.j.williams at intel.com>
+
+ * dma debug: account for cachelines and read-only mappings in overlap tracking
+
+2014-03-03 David Rientjes <rientjes at google.com>
+
+ * mm: close PageTail race
+
+2014-03-03 Borislav Petkov <bp at suse.de>
+
+ * MAINTAINERS: EDAC: add Mauro and Borislav as interim patch collectors
+
+2014-03-04 Hui Wang <hui.wang at canonical.com>
+
+ * ALSA: hda - add automute fix for another dell AIO model
+
+2014-03-04 Aaro Koskinen <aaro.koskinen at iki.fi>
+
+ * ASoC: n810: fix init with DT boot
+
+2014-02-26 Steven Rostedt (Red Hat) <rostedt at goodmis.org>
+
+ * tracing: Do not add event files for modules that fail tracepoints
+
+2014-03-03 Mikulas Patocka <mpatocka at redhat.com>
+
+ * dm snapshot: fix metadata corruption
+
+2014-03-03 Marios Andreopoulos <opensource at andmarios.com>
+
+ * libata: disable queued TRIM for Crucial M500 mSATA SSDs
+
+2014-03-03 Mike Snitzer <snitzer at redhat.com>
+
+ * dm: fix Kconfig indentation
+
+2014-03-03 Vlad Yasevich <vyasevic at redhat.com>
+
+ * macvlan: Add support for 'always_on' offload features
+
+2014-03-03 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless
+
+2014-03-03 Daniel Borkmann <dborkman at redhat.com>
+
+ * net: sctp: fix sctp_sf_do_5_1D_ce to verify if we/peer is AUTH capable
+
+2014-03-03 David S. Miller <davem at davemloft.net>
+
+ * Merge tag 'linux-can-fixes-for-3.14-20140303' of git://gitorious.org/linux-can/linux-can
+
+2014-03-03 Xin Long <lucien.xin at gmail.com>
+
+ * ip_tunnel:multicast process cause panic due to skb->_skb_refdst NULL pointer
+
+2014-03-03 Schuyler Patton <spatton at ti.com>
+
+ * net: cpsw: fix cpdma rx descriptor leak on down interface
+
+2014-03-03 Vasundhara Volam <vasundhara.volam at emulex.com>
+
+ * be2net: isolate TX workarounds not applicable to Skyhawk-R
+
+2014-03-03 Vasundhara Volam <vasundhara.volam at emulex.com>
+
+ * be2net: Fix skb double free in be_xmit_wrokarounds() failure path
+
+2014-03-03 Somnath kotur <somnath.kotur at emulex.com>
+
+ * be2net: clear promiscuous bits in adapter->flags while disabling promiscuous mode
+
+2014-03-03 Somnath Kotur <somnath.kotur at emulex.com>
+
+ * be2net: Fix to reset transparent vlan tagging
+
+2014-03-01 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * qlcnic: dcb: a couple off by one bugs
+
+2014-02-28 Yuchung Cheng <ycheng at google.com>
+
+ * tcp: fix bogus RTT on special retransmission
+
+2014-03-01 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * hsr: off by one sanity check in hsr_register_frame_in()
+
+2014-03-03 John W. Linville <linville at tuxdriver.com>
+
+ * Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless into for-davem
+
+2014-03-03 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-03 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-03 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'clk-fixes-for-linus' of git://git.linaro.org/people/mike.turquette/linux
+
+2014-03-03 Russell King <rmk+kernel at arm.linux.org.uk>
+
+ * DRM: armada: fix use of kfifo_put()
+
+2014-03-03 Ville Syrjälä <ville.syrjala at linux.intel.com>
+
+ * drm/i915: Reject >165MHz modes w/ DVI monitors
+
+2014-02-27 Paulo Zanoni <paulo.r.zanoni at intel.com>
+
+ * drm/i915: fix assert_cursor on BDW
+
+2014-02-11 Imre Deak <imre.deak at intel.com>
+
+ * drm/i915: vlv: reserve GT power context early
+
+2014-01-24 Zhang Rui <rui.zhang at intel.com>
+
+ * Thermal: thermal zone governor fix
+
+2014-02-17 Ni Wade <wni at nvidia.com>
+
+ * Thermal: Allow first update of cooling device state
+
+2014-01-27 Richard Weinberger <richard at nod.at>
+
+ * thermal,rcar_thermal: Add dependency on HAS_IOMEM
+
+2014-03-02 Jean Delvare <jdelvare at suse.de>
+
+ * x86_pkg_temp_thermal: Fix the thermal zone type
+
+2014-03-02 Jean Delvare <jdelvare at suse.de>
+
+ * x86_pkg_temp_thermal: Do not expose as a hwmon device
+
+2014-03-03 Zhang Rui <rui.zhang at intel.com>
+
+ * Thermal: update INT3404 thermal driver help text
+
+2014-03-01 Oliver Hartkopp <socketcan at hartkopp.net>
+
+ * can: remove CAN FD compatibility for CAN 2.0 sockets
+
+2014-02-28 Marc Kleine-Budde <mkl at pengutronix.de>
+
+ * can: flexcan: factor out soft reset into seperate funtion
+
+2014-02-28 Marc Kleine-Budde <mkl at pengutronix.de>
+
+ * can: flexcan: flexcan_remove(): add missing netif_napi_del()
+
+2014-02-28 Marc Kleine-Budde <mkl at pengutronix.de>
+
+ * can: flexcan: fix transition from and to freeze mode in chip_{,un}freeze
+
+2014-02-28 Marc Kleine-Budde <mkl at pengutronix.de>
+
+ * can: flexcan: factor out transceiver {en,dis}able into seperate functions
+
+2014-02-28 Marc Kleine-Budde <mkl at pengutronix.de>
+
+ * can: flexcan: fix transition from and to low power mode in chip_{en,dis}able
+
+2014-02-28 Marc Kleine-Budde <mkl at pengutronix.de>
+
+ * can: flexcan: flexcan_open(): fix error path if flexcan_chip_start() fails
+
+2014-02-19 Marc Kleine-Budde <mkl at pengutronix.de>
+
+ * can: flexcan: fix shutdown: first disable chip, then all interrupts
+
+2014-02-14 Imre Deak <imre.deak at intel.com>
+
+ * drm/i915: fix pch pci device enumeration
+
+2014-01-13 Akash Goel <akash.goel at intel.com>
+
+ * drm/i915: Resolving the memory region conflict for Stolen area
+
+2014-02-25 Jani Nikula <jani.nikula at intel.com>
+
+ * drm/i915: use backlight legacy combination mode also for i915gm/i945gm
+
+2014-03-03 Marius Knaust <marius.knaust at gmail.com>
+
+ * ALSA: hda - Added inverted digital-mic handling for Acer TravelMate 8371
+
+2014-03-02 Gabor Juhos <juhosg at openwrt.org>
+
+ * spi: spi-ath79: fix initial GPIO CS line setup
+
+2014-02-27 Dave Airlie <airlied at redhat.com>
+
+ * MAINTAINERS: update AGP tree to point at drm tree
+
+2014-03-02 Trond Myklebust <trond.myklebust at primarydata.com>
+
+ * NFS: Fix a delegation callback race
+
+2014-03-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.14-rc5
+
+2014-02-28 Gerry Demaret <gerry at tigron.be>
+
+ * USB AX88179/178A: Support D-Link DUB-1312
+
+2014-03-02 Hauke Mehrtens <hauke at hauke-m.de>
+
+ * b44: always set duplex mode why phy changes
+
+2014-03-02 Hauke Mehrtens <hauke at hauke-m.de>
+
+ * b44: add calls to phy_{start,stop}
+
+2014-03-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2014-03-02 Li, Aubrey <aubrey.li at linux.intel.com>
+
+ * ACPI / sleep: pm_power_off needs more sanity checks to be installed
+
+2014-03-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'usb-3.14-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
+
+2014-03-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'driver-core-3.14-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core
+
+2014-03-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'staging-3.14-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
+
+2014-03-03 Dave Airlie <airlied at gmail.com>
+
+ * Merge branch 'drm-fixes-3.14' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
+
+2014-03-02 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Merge iio fixes into staging-linus
+
+2014-03-01 Marek Belisko <marek at goldelico.com>
+
+ * ARM: dts: omap3-gta04: Add ti,omap36xx to compatible property to avoid problems with booting
+
+2014-03-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-02 Dave Airlie <airlied at gmail.com>
+
+ * Merge tag 'vmwgfx-fixes-3.14-2014-03-02' of git://people.freedesktop.org/~thomash/linux into drm-fixes
+
+2014-03-01 Alexey Khoroshilov <khoroshilov at ispras.ru>
+
+ * drm/vmwgfx: avoid null pointer dereference at failure paths
+
+2014-02-28 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Make sure backing mobs are cleared when allocated. Update driver date.
+
+2014-02-28 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Remove some unused surface formats
+
+2014-03-01 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-03-01 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending
+
+2014-03-01 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'fixes' of git://git.infradead.org/users/vkoul/slave-dma
+
+2014-03-01 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pm+acpi-3.14-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2014-02-27 Zhang Rui <rui.zhang at intel.com>
+
+ * ACPI / resources: ignore invalid ACPI device resources
+
+2014-02-26 Trond Myklebust <trond.myklebust at primarydata.com>
+
+ * NFSv4: Fix another nfs4_sequence corruptor
+
+2014-03-01 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2014-02-28 Eric W. Biederman <ebiederm at xmission.com>
+
+ * audit: Send replies in the proper network namespace.
+
+2014-02-28 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Merge tag 'fixes-for-3.14d' of git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio into staging-linus
+
+2014-02-28 Russell King <rmk+kernel at arm.linux.org.uk>
+
+ * MAINTAINERS: add maintainer entry for Armada DRM driver
+
+2014-02-28 Ivan Vecera <ivecera at redhat.com>
+
+ * bna: fix vlan tag stripping and implement its toggling
+
+2014-02-28 Michael Chan <mchan at broadcom.com>
+
+ * tg3: Don't check undefined error bits in RXBD
+
+2014-02-20 Stephen Warren <swarren at nvidia.com>
+
+ * ARM: tegra: add LED options back into tegra_defconfig
+
+2014-02-22 Jeff Layton <jlayton at redhat.com>
+
+ * cifs: mask off top byte in get_rfc1002_length()
+
+2014-02-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'dm-3.14-fixes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
+
+2014-02-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'sound-3.14-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2014-02-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'edac_fixes_for_3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp
+
+2014-02-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
+
+2014-02-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
+
+2014-02-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
+
+2014-02-22 Javier Martinez Canillas <javier.martinez at collabora.co.uk>
+
+ * ARM: dts: omap3-igep: fix boot fail due wrong compatible match
+
+2014-02-26 Bing Zhao <bzhao at marvell.com>
+
+ * mwifiex: do not advertise usb autosuspend support
+
+2014-02-28 John W. Linville <linville at tuxdriver.com>
+
+ * Merge branch 'for-john' of git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211
+
+2014-02-28 John W. Linville <linville at tuxdriver.com>
+
+ * Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi-fixes
+
+2014-02-28 Heinz Mauelshagen <heinzm at redhat.com>
+
+ * dm cache mq: fix memory allocation failure for large cache devices
+
+2014-02-28 Catalin Marinas <catalin.marinas at arm.com>
+
+ * arm64: Fix !CONFIG_SMP kernel build
+
+2014-02-25 Steve Capper <steve.capper at linaro.org>
+
+ * arm64: mm: Add double logical invert to pte accessors
+
+2014-02-28 Arnd Bergmann <arnd at arndb.de>
+
+ * Merge tag 'omap-for-v3.14/fixes-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
+
+2014-02-27 Heinz Mauelshagen <heinzm at redhat.com>
+
+ * dm cache: fix truncation bug when mapping I/O to >2TB fast device
+
+2014-02-26 Jiri Olsa <jolsa at redhat.com>
+
+ * perf tools: Fix strict alias issue for find_first_bit
+
+2014-02-03 Eric W. Biederman <ebiederm at xmission.com>
+
+ * audit: Use struct net not pid_t to remember the network namespce to reply in
+
+2014-02-18 Stefan Richter <stefanr at s5r6.in-berlin.de>
+
+ * firewire: net: fix use after free
+
+2014-02-28 Benjamin Herrenschmidt <benh at kernel.crashing.org>
+
+ * powerpc/powernv: Fix indirect XSCOM unmangling
+
+2014-02-28 Benjamin Herrenschmidt <benh at kernel.crashing.org>
+
+ * powerpc/powernv: Fix opal_xscom_{read,write} prototype
+
+2014-02-25 Gavin Shan <shangw at linux.vnet.ibm.com>
+
+ * powerpc/powernv: Refactor PHB diag-data dump
+
+2014-02-25 Gavin Shan <shangw at linux.vnet.ibm.com>
+
+ * powerpc/powernv: Dump PHB diag-data immediately
+
+2014-02-26 Paul Mackerras <paulus at samba.org>
+
+ * powerpc: Increase stack redzone for 64-bit userspace to 512 bytes
+
+2014-02-26 Liu Ping Fan <kernelfans at gmail.com>
+
+ * powerpc/ftrace: bugfix for test_24bit_addr
+
+2014-02-24 Laurent Dufour <ldufour at linux.vnet.ibm.com>
+
+ * powerpc/crashdump : Fix page frame number check in copy_oldmem_page
+
+2014-02-20 Tony Breeds <tony at bakeyournoodle.com>
+
+ * powerpc/le: Ensure that the 'stop-self' RTAS token is handled correctly
+
+2014-02-27 Philippe De Muyter <phdm at macqel.be>
+
+ * spi: spi-imx: spi_imx_remove: do not disable disabled clocks
+
+2014-02-27 Tony Lindgren <tony at atomide.com>
+
+ * ARM: OMAP3: Fix pinctrl interrupts for core2
+
+2014-02-27 Hans Schillstrom <hans at schillstrom.com>
+
+ * ipv6: ipv6_find_hdr restore prev functionality
+
+2014-02-27 Duan Jiong <duanj.fnst at cn.fujitsu.com>
+
+ * neigh: recompute reachabletime before returning from neigh_periodic_work()
+
+2014-02-28 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branches 'pm-cpufreq', 'pm-hibernate' and 'acpi-processor'
+
+2014-02-27 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless
+
+2014-02-27 Yuval Mintz <yuvalmin at broadcom.com>
+
+ * bnx2x: Add missing bit in default Tx switching
+
+2014-02-27 Paolo Bonzini <pbonzini at redhat.com>
+
+ * kvm, vmx: Really fix lazy FPU on nested guest
+
+2014-01-11 Andi Kleen <ak at linux.intel.com>
+
+ * perf tools: fix BFD detection on opensuse
+
+2014-02-27 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec
+
+2014-02-27 Lorenzo Colitti <lorenzo at google.com>
+
+ * net: ipv6: ping: Use socket mark in routing lookup
+
+2014-02-27 John W. Linville <linville at tuxdriver.com>
+
+ * Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless into for-davem
+
+2014-02-27 Johannes Berg <johannes.berg at intel.com>
+
+ * mac80211: fix association to 20/40 MHz VHT networks
+
+2014-02-18 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: enable speaker allocation setup on dce3.2
+
+2014-02-18 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: change audio enable logic
+
+2014-02-18 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: fix audio disable on dce6+
+
+2014-02-26 Jerome Glisse <jglisse at redhat.com>
+
+ * drm/radeon: free uvd ring on unload
+
+2014-02-25 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: disable pll sharing for DP on DCE4.1
+
+2014-02-20 Christian König <christian.koenig at amd.com>
+
+ * drm/radeon: fix missing bo reservation
+
+2014-02-20 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: print the supported atpx function mask
+
+2014-02-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'metag-fixes-v3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/jhogan/metag
+
+2014-02-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pwm/for-3.14-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/thierry.reding/linux-pwm
+
+2014-02-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
+
+2014-02-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'upstream-3.14-rc5' of git://git.infradead.org/linux-ubifs
+
+2014-02-27 Andrew Honig <ahonig at google.com>
+
+ * kvm: x86: fix emulator buffer overflow (CVE-2014-0049)
+
+2014-02-26 Marc Zyngier <marc.zyngier at arm.com>
+
+ * arm/arm64: KVM: detect CPU reset on CPU_PM_EXIT
+
+2014-02-26 Hiroaki SHIMODA <shimoda.hiroaki at gmail.com>
+
+ * sch_tbf: Fix potential memory leak in tbf_change().
+
+2014-02-12 Mike Snitzer <snitzer at redhat.com>
+
+ * dm thin: allow metadata space larger than supported to go unused
+
+2014-02-27 Li Zefan <lizefan at huawei.com>
+
+ * cpuset: fix a race condition in __cpuset_node_allowed_softwall()
+
+2014-02-27 Li Zefan <lizefan at huawei.com>
+
+ * cpuset: fix a locking issue in cpuset_migrate_mm()
+
+2014-02-27 Rashika Kheria <rashika.kheria at gmail.com>
+
+ * genirq: Include missing header file in irqdomain.c
+
+2014-02-27 Ingo Molnar <mingo at kernel.org>
+
+ * Merge branch 'liblockdep-fixes' of https://github.com/sashalevin/liblockdep into core/urgent
+
+2014-02-27 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2014-02-27 Takashi Iwai <tiwai at suse.de>
+
+ * Merge tag 'asoc-v3.14-rc4-2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2014-02-24 Peter Zijlstra <peterz at infradead.org>
+
+ * perf: Fix hotplug splat
+
+2014-02-21 Peter Zijlstra <peterz at infradead.org>
+
+ * perf/x86: Fix event scheduling
+
+2014-02-21 Juri Lelli <juri.lelli at gmail.com>
+
+ * sched/deadline: Prevent rt_time growth to infinity
+
+2014-02-24 Juri Lelli <juri.lelli at gmail.com>
+
+ * sched/deadline: Switch CPU's presence test order
+
+2014-02-25 Kirill Tkhai <ktkhai at parallels.com>
+
+ * sched/deadline: Cleanup RT leftovers from {inc/dec}_dl_migration
+
+2014-02-18 George McCollister <george.mccollister at gmail.com>
+
+ * sched: Fix double normalization of vruntime
+
+2014-02-27 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/fix/wm8958' into asoc-linus
+
+2014-02-27 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'asoc/fix/da732x' and 'asoc/fix/sta32x' into asoc-linus
+
+2014-02-27 Mark Brown <broonie at linaro.org>
+
+ * Merge tag 'asoc-v3.14-rc4' into asoc-linus
+
+2014-02-27 Mark Brown <broonie at linaro.org>
+
+ * Merge tag 'asoc-v3.14-rc3' into asoc-linus
+
+2014-02-25 Johannes Berg <johannes.berg at intel.com>
+
+ * iwlwifi: fix TX status for aggregated packets
+
+2014-02-27 Takashi Iwai <tiwai at suse.de>
+
+ * ASoC: sta32x: Fix wrong enum for limiter2 release rate
+
+2014-02-16 Max Stepanov <Max.Stepanov at intel.com>
+
+ * iwlwifi: mvm: change of listen interval from 70 to 10
+
+2014-02-27 Takashi Iwai <tiwai at suse.de>
+
+ * Merge tag 'asoc-v3.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2014-02-18 Alex Deucher <alexdeucher at gmail.com>
+
+ * MAINTAINERS: update drm git tree entry
+
+2014-02-18 Alex Deucher <alexdeucher at gmail.com>
+
+ * MAINTAINERS: add entry for drm radeon driver
+
+2014-02-26 Alexander Stein <alexander.stein at systec-electronic.com>
+
+ * spi-topcliff-pch: Fix probing when DMA mode is used
+
+2014-02-26 Jiri Bohac <jbohac at suse.cz>
+
+ * bonding: disallow enslaving a bond to itself
+
+2014-02-08 Wang Nan <wangnan0 at huawei.com>
+
+ * tools/liblockdep: Use realpath for srctree and objtree
+
+2014-02-05 Sasha Levin <sasha.levin at oracle.com>
+
+ * tools/liblockdep: Add a stub for new rcu_is_watching
+
+2014-02-05 Sasha Levin <sasha.levin at oracle.com>
+
+ * tools/liblockdep: Mark runtests.sh as executable
+
+2014-01-31 Ira W. Snyder <iws at ovro.caltech.edu>
+
+ * tools/liblockdep: Add include directory to allow tests to compile
+
+2014-01-31 Ira W. Snyder <iws at ovro.caltech.edu>
+
+ * tools/liblockdep: Fix include of asm/hash.h
+
+2014-01-31 Ira W. Snyder <iws at ovro.caltech.edu>
+
+ * tools/liblockdep: Fix initialization code path
+
+2014-02-11 Masanari Iida <standby24x7 at gmail.com>
+
+ * clk:at91: Fix memory leak in of_at91_clk_master_setup()
+
+2014-02-19 Stanislaw Gruszka <sgruszka at redhat.com>
+
+ * usb: ehci: fix deadlock when threadirqs option is used
+
+2014-02-21 Joerg Dorchain <joerg at dorchain.net>
+
+ * USB: ftdi_sio: add Cressi Leonardo PID
+
+2014-02-26 Lan Tianyu <tianyu.lan at intel.com>
+
+ * ACPI / processor: Rework processor throttling with work_on_cpu()
+
+2014-02-26 Nikolay Aleksandrov <nikolay at redhat.com>
+
+ * bonding: fix a div error caused by the slave release path
+
+2014-02-26 Freddy Xin <freddy at asix.com.tw>
+
+ * AX88179_178A: Add VID:DID for Lenovo OneLinkDock Gigabit LAN
+
+2014-02-26 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'bonding_rtnl'
+
+2014-02-26 dingtianhong <dingtianhong at huawei.com>
+
+ * bonding: Fix RTNL: assertion failed at net/core/rtnetlink.c for ab arp monitor
+
+2014-02-26 dingtianhong <dingtianhong at huawei.com>
+
+ * bonding: Fix RTNL: assertion failed at net/core/rtnetlink.c for 802.3ad mode
+
+2014-02-25 Joe Perches <joe at perches.com>
+
+ * MAINTAINERS: Intel nic drivers
+
+2014-02-25 Edward Cree <ecree at solarflare.com>
+
+ * sfc: check for NULL efx->ptp_data in efx_ptp_event
+
+2014-02-25 Eric Dumazet <edumazet at google.com>
+
+ * net: tcp: use NET_INC_STATS()
+
+2014-01-21 Linus Walleij <linus.walleij at linaro.org>
+
+ * clk: nomadik: fix multiplatform problem
+
+2014-02-24 Marcelo Tosatti <mtosatti at redhat.com>
+
+ * KVM: MMU: drop read-only large sptes when creating lower level sptes
+
+2014-01-23 Christian Engelmayer <cengelma at gmx.at>
+
+ * pwm: lp3943: Fix potential memory leak during request
+
+2014-02-26 Hannes Reinecke <hare at suse.de>
+
+ * dm mpath: fix stalls when handling invalid ioctls
+
+2014-02-24 Mark Brown <broonie at linaro.org>
+
+ * ASoC: da732x: Mark DC offset control registers volatile
+
+2014-02-26 Fernando Luis Vázquez Cao <fernando_b1 at lab.ntt.co.jp>
+
+ * HID: hidraw: fix warning destroying hidraw device files after parent
+
+2014-02-26 Kailang Yang <kailang at realtek.com>
+
+ * ALSA: hda/realtek - Add more entry for enable HP mute led
+
+2014-02-19 Steffen Klassert <steffen.klassert at secunet.com>
+
+ * xfrm: Fix unlink race when policies are deleted.
+
+2014-02-09 Kees Cook <keescook at chromium.org>
+
+ * x86, kaslr: add missed "static" declarations
+
+2014-01-23 Eugene Surovegin <surovegin at google.com>
+
+ * x86, kaslr: export offset in VMCOREINFO ELF notes
+
+2014-02-18 Sebastian Capella <sebastian.capella at linaro.org>
+
+ * PM / hibernate: Fix restore hang in freeze_processes()
+
+2014-02-25 Dirk Brandewie <dirk.j.brandewie at intel.com>
+
+ * intel_pstate: Change busy calculation to use fixed point math.
+
+2014-02-25 Cristian Bercaru <cristian.bercaru at freescale.com>
+
+ * phy: unmask link partner capabilities
+
+2014-02-25 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'akpm' (patches from Andrew Morton)
+
+2014-02-25 Tobias Klauser <tklauser at distanz.ch>
+
+ * MAINTAINERS: change mailing list address for Altera UART drivers
+
+2014-02-25 Jan Beulich <JBeulich at suse.com>
+
+ * Makefile: fix build with make 3.80 again
+
+2014-02-25 Joe Perches <joe at perches.com>
+
+ * MAINTAINERS: update L: misuses
+
+2014-02-25 Fathi Boudra <fathi.boudra at linaro.org>
+
+ * Makefile: fix extra parenthesis typo when CC_STACKPROTECTOR_REGULAR is enabled
+
+2014-02-25 Davidlohr Bueso <davidlohr at hp.com>
+
+ * ipc,mqueue: remove limits for the amount of system-wide queues
+
+2014-02-25 Michal Hocko <mhocko at suse.cz>
+
+ * memcg: change oom_info_lock to mutex
+
+2014-02-25 Kirill A. Shutemov <kirill.shutemov at linux.intel.com>
+
+ * mm, thp: fix infinite loop on memcg OOM
+
+2014-02-25 Joe Perches <joe at perches.com>
+
+ * drivers/fmc/fmc-write-eeprom.c: fix decimal permissions
+
+2014-02-25 Joe Perches <joe at perches.com>
+
+ * drivers/iommu/omap-iommu-debug.c: fix decimal permissions
+
+2014-02-25 Kirill A. Shutemov <kirill.shutemov at linux.intel.com>
+
+ * mm, hwpoison: release page on PageHWPoison() in __do_fault()
+
+2014-02-25 James Hogan <james.hogan at imgtec.com>
+
+ * irq-metag*: stop set_affinity vectoring to offline cpus
+
+2014-02-25 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'dmaengine-fixes-3.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/dmaengine
+
+2014-02-25 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'for-linus-20140225' of git://git.infradead.org/linux-mtd
+
+2014-02-25 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/geert/linux-m68k
+
+2014-02-25 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'xtensa-next-20140224' of git://github.com/czankel/xtensa-linux
+
+2014-02-24 Felix Fietkau <nbd at openwrt.org>
+
+ * ath9k: fix invalid descriptor discarding
+
+2014-02-24 Felix Fietkau <nbd at openwrt.org>
+
+ * ath9k: reduce baseband hang detection false positive rate
+
+2014-02-19 Dan Williams <dan.j.williams at intel.com>
+
+ * ioat: fix tasklet tear down
+
+2014-02-25 Li Zefan <lizefan at huawei.com>
+
+ * sysfs: fix namespace refcnt leak
+
+2014-02-14 Janusz Dziedzic <janusz.dziedzic at tieto.com>
+
+ * cfg80211: regulatory: reset regdomain in case of error
+
+2014-02-21 Jan Kara <jack at suse.cz>
+
+ * fsnotify: Allocate overflow events with proper type
+
+2014-02-21 Jan Kara <jack at suse.cz>
+
+ * fanotify: Handle overflow in case of permission events
+
+2014-02-21 Jan Kara <jack at suse.cz>
+
+ * fsnotify: Fix detection whether overflow event is queued
+
+2014-02-25 Jean Delvare <jdelvare at suse.de>
+
+ * i7300_edac: Fix device reference count
+
+2014-02-24 Jean Delvare <jdelvare at suse.de>
+
+ * i7core_edac: Fix PCI device reference count
+
+2014-02-24 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Add a fixup for HP Folio 13 mute LED
+
+2014-02-24 Mike Turquette <mturquette at linaro.org>
+
+ * Merge branch 'clocks/fixes/drivers' of git://linuxtv.org/pinchartl/fbdev into clk-fixes
+
+2014-01-07 Sylwester Nawrocki <s.nawrocki at samsung.com>
+
+ * clk: Correct handling of NULL clk in __clk_{get, put}
+
+2014-02-22 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: wm8958-dsp: Fix firmware block loading
+
+2014-01-23 Sherman Yin <syin at broadcom.com>
+
+ * pinctrl: Rename Broadcom Capri pinctrl binding
+
+2014-02-24 Christian Daudt <bcm at fixthebug.org>
+
+ * pinctrl: refer to updated dt binding string.
+
+2014-01-23 Sherman Yin <syin at broadcom.com>
+
+ * Update dtsi with new pinctrl compatible string
+
+2014-02-20 Markus Pargmann <mpa at pengutronix.de>
+
+ * regulator: core: Replace direct ops->disable usage
+
+2014-02-20 Markus Pargmann <mpa at pengutronix.de>
+
+ * regulator: core: Replace direct ops->enable usage
+
+2014-02-24 Manu Gupta <manugupt1 at gmail.com>
+
+ * staging: r8188eu: Add new device ID
+
+2014-02-24 Venkatesh Srinivas <venkateshs at google.com>
+
+ * vhost/scsi: Check LUN structure byte 0 is set to 1, per spec
+
+2014-02-24 Mike Turquette <mturquette at linaro.org>
+
+ * Merge branch 'clk-tegra-more-fixes-3.14' of git://nv-tegra.nvidia.com/user/pdeschrijver/linux into clk-fixes
+
+2014-02-24 Juergen Beisert <jbe at pengutronix.de>
+
+ * staging:iio:adc:MXS:LRADC: fix touchscreen statemachine
+
+2014-02-24 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Merge 3.14-rc4 into char-misc-linus
+
+2014-02-24 Dr. Greg Wettstein <greg at enjellic.com>
+
+ * qla2xxx: Fix kernel panic on selective retransmission request
+
+2014-02-22 Felix Fietkau <nbd at openwrt.org>
+
+ * ath9k: fix ps-poll responses under a-mpdu sessions
+
+2014-02-21 Bing Zhao <bzhao at marvell.com>
+
+ * mwifiex: rename usb driver name registerring to usb core
+
+2014-02-24 John W. Linville <linville at tuxdriver.com>
+
+ * Merge tag 'nfc-fixes-3.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git/sameo/nfc-fixes
+
+2014-02-24 John W. Linville <linville at tuxdriver.com>
+
+ * Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi-fixes
+
+2014-02-19 Mike Snitzer <snitzer at redhat.com>
+
+ * dm thin: fix the error path for the thin device constructor
+
+2014-02-24 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security
+
+2014-02-24 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
+
+2014-02-20 Namhyung Kim <namhyung at kernel.org>
+
+ * perf symbols: Destroy unused symsrcs
+
+2014-02-20 Namhyung Kim <namhyung at kernel.org>
+
+ * perf annotate: Check availability of annotate when processing samples
+
+2014-02-19 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * clk: shmobile: Fix typo in MSTP clock DT bindings
+
+2014-01-07 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * clk: shmobile: rcar-gen2: Fix qspi divisor
+
+2014-01-07 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * clk: shmobile: rcar-gen2: Fix clock parent for all non-PLL clocks
+
+2014-02-03 James Hogan <james.hogan at imgtec.com>
+
+ * asm-generic: add sched_setattr/sched_getattr syscalls
+
+2014-02-21 Johannes Berg <johannes.berg at intel.com>
+
+ * mac80211: don't validate unchanged AP bandwidth while tracking
+
+2014-02-24 Chris Zankel <chris at zankel.net>
+
+ * Merge tag 'xtensa-for-next-20140221-1' into for_next
+
+2014-02-24 James Morris <james.l.morris at oracle.com>
+
+ * Merge branch 'stable-3.14' of git://git.infradead.org/users/pcmoore/selinux into for-linus
+
+2014-02-14 Jeff Layton <jlayton at redhat.com>
+
+ * cifs: sanity check length of data to send before sending
+
+2014-02-14 Pavel Shilovsky <piastry at etersoft.ru>
+
+ * CIFS: Fix wrong pos argument of cifs_find_lock_conflict
+
+2014-02-23 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.14-rc4
+
+2014-02-23 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2014-02-23 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'regulator-v3.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
+
+2014-02-23 Sagi Grimberg <sagig at mellanox.com>
+
+ * Target/sbc: Don't use sg as iterator in sbc_verify_read
+
+2014-02-23 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * target: Add DIF sense codes in transport_generic_request_failure
+
+2014-02-23 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * target/sbc: Fix sbc_dif_copy_prot addr offset bug
+
+2014-02-17 Pekon Gupta <pekon at ti.com>
+
+ * mtd: nand: omap: fix ecclayout->oobfree->length
+
+2014-02-17 Pekon Gupta <pekon at ti.com>
+
+ * mtd: nand: omap: fix ecclayout->oobfree->offset
+
+2014-02-17 Pekon Gupta <pekon at ti.com>
+
+ * mtd: nand: omap: fix ecclayout to be in sync with u-boot NAND driver
+
+2014-02-23 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-23 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-23 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'irq-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-06 Amitkumar Karwar <akarwar at marvell.com>
+
+ * NFC: NCI: Fix NULL pointer dereference
+
+2014-02-23 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'usb-3.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
+
+2014-02-23 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'tty-3.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
+
+2014-02-23 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'staging-3.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
+
+2014-02-23 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'char-misc-3.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
+
+2014-02-14 Matt Porter <mporter at linaro.org>
+
+ * MAINTAINERS: add additional ARM BCM281xx/BCM11xxx maintainer
+
+2014-02-23 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'regulator/fix/da9063', 'regulator/fix/max14577' and 'regulator/fix/s5m8767' into regulator-linus
+
+2014-02-23 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'regulator/fix/core' into regulator-linus
+
+2014-02-23 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'asoc/fix/sta32x', 'asoc/fix/wm8400', 'asoc/fix/wm8770', 'asoc/fix/wm8900' and 'asoc/fix/wm8994' into asoc-linus
+
+2014-02-23 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'asoc/fix/ad1980' and 'asoc/fix/isabelle' into asoc-linus
+
+2014-02-23 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/fix/dapm' into asoc-linus
+
+2014-02-23 Mark Brown <broonie at linaro.org>
+
+ * Merge tag 'asoc-v3.14-rc3' into asoc-linus
+
+2014-02-22 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: sta32x: Fix cache sync
+
+2014-02-22 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Revert "tty: Set correct tty name in 'active' sysfs attribute"
+
+2014-02-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging
+
+2014-02-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'xfs-fixes-for-3.14-rc4' of git://oss.sgi.com/xfs/xfs
+
+2014-02-22 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2014-02-21 Krzysztof Kozlowski <k.kozlowski at samsung.com>
+
+ * regulator: max14577: Fix invalid return value on DT parse success
+
+2014-02-21 Jan Kara <jack at suse.cz>
+
+ * Revert "writeback: do not sync data dirtied after sync start"
+
+2014-02-17 Santosh Shilimkar <santosh.shilimkar at ti.com>
+
+ * ARM: OMAP: Kill warning in CPUIDLE code with !CONFIG_SMP
+
+2014-02-17 Sebastian Reichel <sre at debian.org>
+
+ * ARM: OMAP2+: Add support for thumb mode on DT booted N900
+
+2014-02-21 Tony Lindgren <tony at atomide.com>
+
+ * Merge tag 'for-v3.14-rc/omap-fixes-a' of git://git.kernel.org/pub/scm/linux/kernel/git/pjw/omap-pending into omap-for-v3.14/fixes
+
+2014-02-21 Thomas Gleixner <tglx at linutronix.de>
+
+ * Merge tag 'irqchip-mvebu-fixes-3.14' of git://git.infradead.org/linux-mvebu into irq/urgent
+
+2014-02-21 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'dt-for-linus' of git://git.secretlab.ca/git/linux
+
+2014-02-21 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://www.linux-watchdog.org/linux-watchdog
+
+2014-02-07 Andrew Lunn <andrew at lunn.ch>
+
+ * irqchip: orion: Fix getting generic chip pointer.
+
+2014-02-19 Stephane Eranian <eranian at google.com>
+
+ * perf/x86/uncore: Fix IVT/SNB-EP uncore CBOX NID filter table
+
+2014-02-03 Peter Zijlstra <peterz at infradead.org>
+
+ * perf/x86: Correctly use FEATURE_PDCM
+
+2014-02-14 Markus Metzger <markus.t.metzger at intel.com>
+
+ * perf, nmi: Fix unknown NMI warning
+
+2014-02-19 Matthieu CASTET <matthieu.castet at parrot.com>
+
+ * usb: chipidea: need to mask when writting endptflush and endptprime
+
+2014-02-17 Arve Hjønnevåg <arve at android.com>
+
+ * staging: binder: Fix death notifications
+
+2014-02-18 Kirill Tkhai <tkhai at yandex.ru>
+
+ * sched/deadline: Remove useless dl_nr_total
+
+2014-02-17 Boris Ostrovsky <boris.ostrovsky at oracle.com>
+
+ * sched/deadline: Test for CPU's presence explicitly
+
+2014-02-14 Peter Zijlstra <peterz at infradead.org>
+
+ * sched: Add 'flags' argument to sched_{set,get}attr() syscalls
+
+2014-02-16 Vegard Nossum <vegard.nossum at oracle.com>
+
+ * sched: Fix information leak in sys_sched_getattr()
+
+2014-02-18 Rik van Riel <riel at redhat.com>
+
+ * sched,numa: add cond_resched to task_numa_work
+
+2014-02-11 Juri Lelli <juri.lelli at gmail.com>
+
+ * sched/core: Make dl_b->lock IRQ safe
+
+2014-02-11 Juri Lelli <juri.lelli at gmail.com>
+
+ * sched/core: Fix sched_rt_global_validate
+
+2014-02-19 Steven Rostedt <rostedt at goodmis.org>
+
+ * sched/deadline: Fix overflow to handle period==0 and deadline!=0
+
+2014-02-20 Juri Lelli <juri.lelli at gmail.com>
+
+ * sched/deadline: Fix bad accounting of nr_running
+
+2014-01-27 Stanislav Kholmanskikh <stanislav.kholmanskikh at oracle.com>
+
+ * watchdog: w83697hf_wdt: return ENODEV if no device was found
+
+2014-02-18 Max Filippov <jcmvbkbc at gmail.com>
+
+ * xtensa: wire up sched_setattr and sched_getattr syscalls
+
+2014-02-21 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
+
+2014-02-21 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pm+acpi-3.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2014-02-21 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'iommu-fixes-v3.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
+
+2014-02-21 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'sound-3.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2014-01-29 Max Filippov <jcmvbkbc at gmail.com>
+
+ * xtensa: xtfpga: set ethoc clock frequency
+
+2014-01-29 Max Filippov <jcmvbkbc at gmail.com>
+
+ * xtensa: xtfpga: use common clock framework
+
+2014-01-29 Max Filippov <jcmvbkbc at gmail.com>
+
+ * xtensa: support common clock framework
+
+2014-02-09 Paul Bolle <pebolle at tiscali.nl>
+
+ * xtensa: no need to select USE_GENERIC_SMP_HELPERS
+
+2014-02-07 Max Filippov <jcmvbkbc at gmail.com>
+
+ * xtensa: fsf: drop nonexistent GPIO32 support
+
+2014-01-31 Max Filippov <jcmvbkbc at gmail.com>
+
+ * xtensa: don't pass high memory to bootmem allocator
+
+2014-02-18 Thomas Petazzoni <thomas.petazzoni at free-electrons.com>
+
+ * ARM: 7980/1: kernel: improve error message when LPAE config doesn't match CPU
+
+2014-02-21 Kailang Yang <kailang at realtek.com>
+
+ * ALSA: hda/realtek - Add more entry for enable HP mute led
+
+2014-02-19 Peter Oberparleiter <oberpar at linux.vnet.ibm.com>
+
+ * s390/cio: Fix missing subchannels after CHPID configure on
+
+2014-02-18 Gerald Schaefer <gerald.schaefer at de.ibm.com>
+
+ * s390/pci/dma: use correct segment boundary size
+
+2014-02-10 Heiko Carstens <heiko.carstens at de.ibm.com>
+
+ * s390/compat: fix sys_sched_getattr compat wrapper
+
+2014-02-21 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'pm-cpufreq'
+
+2014-02-21 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branches 'acpi-pm' and 'acpi-video'
+
+2014-02-21 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branches 'acpi-cleanup', 'acpi-dock', 'acpi-pci' and 'acpi-dsm'
+
+2014-02-12 Dirk Brandewie <dirk.j.brandewie at intel.com>
+
+ * intel_pstate: Add support for Baytrail turbo P states
+
+2014-02-12 Dirk Brandewie <dirk.j.brandewie at intel.com>
+
+ * intel_pstate: Use LFM bus ratio as min ratio/P state
+
+2014-02-20 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Merge tag 'iio-fixes-for-3.14c' of git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio into char-misc-linus
+
+2014-02-20 Shuah Khan <shuah.kh at samsung.com>
+
+ * regulator: core: Change dummy supplies error message to a warning
+
+2014-02-20 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Merge tag 'fixes-for-v3.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb into usb-linus
+
+2014-01-30 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * tcm_qla2xxx: Fix NAA formatted name for NPIV WWPNs
+
+2014-02-19 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * tcm_qla2xxx: Perform configfs depend/undepend for base_tpg
+
+2014-02-19 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * tcm_qla2xxx: Add NPIV specific enable/disable attribute logic
+
+2014-02-19 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * qla2xxx: Check + fail when npiv_vports_inuse exists in shutdown
+
+2014-02-19 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * qla2xxx: Fix qlt_lport_register base_vha callback race
+
+2014-02-20 Jan Kara <jack at suse.cz>
+
+ * quota: Fix race between dqput() and dquot_scan_active()
+
+2014-02-18 Jan Kara <jack at suse.cz>
+
+ * udf: Fix data corruption on file type conversion
+
+2014-02-14 Sujith Manoharan <c_manoha at qca.qualcomm.com>
+
+ * ath9k: Fix ETSI compliance for AR9462 2.0
+
+2014-02-20 Arend van Spriel <arend at broadcom.com>
+
+ * brcmfmac: fix txglomming scatter-gather packet transfers
+
+2014-02-19 Stanislaw Gruszka <sgruszka at redhat.com>
+
+ * ath9k: protect tid->sched check
+
+2014-02-18 Amitkumar Karwar <akarwar at marvell.com>
+
+ * mwifiex: fix cmd and Tx data timeout issue for PCIe cards
+
+2014-02-18 Amitkumar Karwar <akarwar at marvell.com>
+
+ * mwifiex: add NULL check for PCIe Rx skb
+
+2014-02-18 Avinash Patil <patila at marvell.com>
+
+ * mwifiex: clean pcie ring only when device is present
+
+2014-02-17 James Cameron <quozl at laptop.org>
+
+ * libertas: fix scan result loss if SSID IE len 0
+
+2014-02-14 Kirill Tkhai <ktkhai at parallels.com>
+
+ * hostap: Do not free priv until timer handler has actually stopped using it
+
+2014-02-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pci-v3.14-fixes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
+
+2014-02-20 John W. Linville <linville at tuxdriver.com>
+
+ * Merge branch 'for-john' of git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211
+
+2014-02-20 Jiang Liu <jiang.liu at linux.intel.com>
+
+ * ACPI / nouveau: fix probing regression related to _DSM
+
+2014-02-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata
+
+2014-02-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
+
+2014-02-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
+
+2014-02-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'fixes-for-v3.14' of git://git.linaro.org/people/mszyprowski/linux-dma-mapping
+
+2014-02-16 Brian Campbell <brian.campbell at editshare.com>
+
+ * user_namespace.c: Remove duplicated word in comment
+
+2014-02-20 David Howells <dhowells at redhat.com>
+
+ * Sparc: sparc_cpu_model isn't in asm/system.h any more [ver #2]
+
+2014-02-18 Emmanuel Grumbach <emmanuel.grumbach at intel.com>
+
+ * iwlwifi: dvm: clear IWL_STA_UCODE_INPROGRESS when assoc fails
+
+2014-02-20 Peter De Schrijver <pdeschrijver at nvidia.com>
+
+ * clk: tegra124: remove gr2d and gr3d clocks
+
+2014-02-20 Eric Paris <eparis at redhat.com>
+
+ * SELinux: bigendian problems with filename trans rules
+
+2014-02-05 Daniel Mack <zonque at gmail.com>
+
+ * usb: musb: correct use of schedule_delayed_work()
+
+2014-02-18 Josh Cartwright <joshc at codeaurora.org>
+
+ * usb: phy: msm: fix compilation errors when !CONFIG_PM_SLEEP
+
+2014-01-20 Andrzej Pietrasiewicz <andrzej.p at samsung.com>
+
+ * usb: gadget: fix NULL pointer dereference
+
+2014-01-03 Peter Chen <peter.chen at freescale.com>
+
+ * usb: gadget: printer: using gadget_is_otg to check otg support at runtime
+
+2014-02-19 Steffen Klassert <steffen.klassert at secunet.com>
+
+ * xfrm: Clone states properly on migration
+
+2014-02-19 Steffen Klassert <steffen.klassert at secunet.com>
+
+ * xfrm: Take xfrm_state_lock in xfrm_migrate_state_find
+
+2014-02-19 Steffen Klassert <steffen.klassert at secunet.com>
+
+ * xfrm: Fix NULL pointer dereference on sub policy usage
+
+2014-02-19 Steffen Klassert <steffen.klassert at secunet.com>
+
+ * ip6_vti: Fix build when NET_IP_TUNNEL is not set.
+
+2014-02-19 Frank Praznik <frank.praznik at oh.rr.com>
+
+ * HID: sony: Fix work queue issues.
+
+2014-02-20 Joerg Roedel <joro at 8bytes.org>
+
+ * arm/smmu: Use irqsafe spinlock for domain lock
+
+2014-02-18 Grant Likely <grant.likely at linaro.org>
+
+ * of: Add self test for of_match_node()
+
+2014-02-18 Grant Likely <grant.likely at linaro.org>
+
+ * of: Move testcase FDT data into drivers/of
+
+2014-02-19 Kevin Hao <haokexin at gmail.com>
+
+ * of: reimplement the matching method for __of_match_node()
+
+2014-02-20 Johannes Berg <johannes.berg at intel.com>
+
+ * mac80211: fix station wakeup powersave race
+
+2014-02-18 Charles Keepax <ckeepax at opensource.wolfsonmicro.com>
+
+ * ASoC: dapm: Add locking to snd_soc_dapm_xxxx_pin functions
+
+2014-02-18 Charles Keepax <ckeepax at opensource.wolfsonmicro.com>
+
+ * Input - arizona-haptics: Fix double lock of dapm_mutex
+
+2014-02-17 Johannes Berg <johannes.berg at intel.com>
+
+ * mac80211: insert stations before adding to driver
+
+2014-02-20 Emmanuel Grumbach <emmanuel.grumbach at intel.com>
+
+ * mac80211: fix AP powersave TX vs. wakeup race
+
+2014-02-19 Peter De Schrijver <pdeschrijver at nvidia.com>
+
+ * clk: tegra: Fix vic03 mux index
+
+2014-02-20 Hui Wang <hui.wang at canonical.com>
+
+ * ALSA: hda - Enable front audio jacks on one HP desktop model
+
+2014-02-20 Takashi Iwai <tiwai at suse.de>
+
+ * Merge tag 'asoc-v3.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2014-02-20 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'drm/for-3.14-rc3' of git://anongit.freedesktop.org/tegra/linux into drm-fixes
+
+2014-02-18 Takashi Iwai <tiwai at suse.de>
+
+ * ASoC: wm8400: Fix the wrong number of enum items
+
+2014-02-18 Takashi Iwai <tiwai at suse.de>
+
+ * ASoC: isabelle: Fix the wrong number of items in enum ctls
+
+2014-02-18 Takashi Iwai <tiwai at suse.de>
+
+ * ASoC: ad1980: Fix wrong number of items for capture source
+
+2014-02-18 Takashi Iwai <tiwai at suse.de>
+
+ * ASoC: wm8994: Fix the wrong number of enum items
+
+2014-02-18 Takashi Iwai <tiwai at suse.de>
+
+ * ASoC: wm8900: Fix the wrong number of enum items
+
+2014-02-18 Takashi Iwai <tiwai at suse.de>
+
+ * ASoC: wm8770: Fix wrong number of enum items
+
+2014-02-13 Paul Gortmaker <paul.gortmaker at windriver.com>
+
+ * sparc32: make copy_to/from_user_page() usable from modular code
+
+2014-02-13 Paul Gortmaker <paul.gortmaker at windriver.com>
+
+ * sparc32: fix build failure for arch_jump_label_transform
+
+2014-02-18 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Revert "ACPI: Blacklist Win8 OSI for some HP laptop 2013 models"
+
+2014-02-18 Aaron Lu <aaron.lu at intel.com>
+
+ * ACPI / video: Add systems that should favour native backlight interface
+
+2014-02-13 Hans de Goede <hdegoede at redhat.com>
+
+ * ACPI / video: Filter the _BCL table for duplicate brightness values
+
+2014-02-19 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'nfs-for-3.14-4' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
+
+2014-02-19 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'mfd-fixes-3.14-1' of git://git.linaro.org/people/lee.jones/mfd
+
+2014-01-30 Tomi Valkeinen <tomi.valkeinen at ti.com>
+
+ * ARM: OMAP2+: clock: fix clkoutx2 with CLK_SET_RATE_PARENT
+
+2014-02-05 Illia Smyrnov <illia.smyrnov at globallogic.com>
+
+ * ARM: OMAP4: hwmod: Fix SOFTRESET logic for OMAP4
+
+2014-01-10 Suman Anna <s-anna at ti.com>
+
+ * ARM: DRA7: hwmod data: correct the sysc data for spinlock
+
+2014-02-16 Vaibhav Bedia <vaibhav.bedia at gmail.com>
+
+ * ARM: OMAP5: PRM: Fix reboot handling
+
+2014-02-18 Takashi Iwai <tiwai at suse.de>
+
+ * ASoC: sta32x: Fix array access overflow
+
+2014-02-19 Mika Westerberg <mika.westerberg at linux.intel.com>
+
+ * x86: tsc: Add missing Baytrail frequency to the table
+
+2014-02-19 Thomas Gleixner <tglx at linutronix.de>
+
+ * x86, tsc: Fallback to normal calibration if fast MSR calibration fails
+
+2014-02-17 Stephen Boyd <sboyd at codeaurora.org>
+
+ * sched_clock: Prevent callers from seeing half-updated data
+
+2014-02-18 Andy Adamson <andros at netapp.com>
+
+ * NFS fix error return in nfs4_select_rw_stateid
+
+2014-01-26 Geert Uytterhoeven <geert at linux-m68k.org>
+
+ * mfd: sec-core: sec_pmic_{suspend,resume}() should depend on CONFIG_PM_SLEEP
+
+2014-01-26 Geert Uytterhoeven <geert at linux-m68k.org>
+
+ * mfd: max14577: max14577_{suspend,resume}() should depend on CONFIG_PM_SLEEP
+
+2014-02-03 Lee Jones <lee.jones at linaro.org>
+
+ * mfd: tps65217: Naturalise cross-architecture discrepancies
+
+2014-01-23 Lee Jones <lee.jones at linaro.org>
+
+ * mfd: wm8994-core: Naturalise cross-architecture discrepancies
+
+2014-02-03 Lee Jones <lee.jones at linaro.org>
+
+ * mfd: max8998: Naturalise cross-architecture discrepancies
+
+2014-01-23 Lee Jones <lee.jones at linaro.org>
+
+ * mfd: max8997: Naturalise cross-architecture discrepancies
+
+2014-02-19 Alexander Stein <alexander.stein at systec-electronic.com>
+
+ * spi/topcliff-pch: Fix DMA channel
+
+2014-02-12 Inbal Hacohen <Inbal.Hacohen at intel.com>
+
+ * cfg80211: bugfix in regulatory user hint process
+
+2014-02-19 Hsin-Yu Chao <hychao at chromium.org>
+
+ * ALSA: hda/ca0132 - Fix recording from mode id 0x8
+
+2014-02-19 Hsin-Yu Chao <hychao at chromium.org>
+
+ * ALSA: hda/ca0132 - setup/cleanup streams
+
+2014-02-18 Mike Turquette <mturquette at linaro.org>
+
+ * Merge branch 'for_3.14-rcx/clk-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone into clk-fixes
+
+2014-02-18 Mike Turquette <mturquette at linaro.org>
+
+ * Merge tag 'mvebu-clk-fixes-3.14' of git://git.infradead.org/linux-mvebu into clk-fixes
+
+2014-01-07 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * clk: shmobile: rcar-gen2: Fix qspi divisor
+
+2014-01-07 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * clk: shmobile: rcar-gen2: Fix clock parent all non-PLL clocks
+
+2014-02-19 Eric Sandeen <sandeen at redhat.com>
+
+ * xfs: limit superblock corruption errors to actual corruption
+
+2014-02-19 Eric Sandeen <sandeen at redhat.com>
+
+ * xfs: skip verification on initial "guess" superblock read
+
+2014-02-19 Ben Myers <bpm at sgi.com>
+
+ * MAINTAINERS: SGI no longer maintaining XFS
+
+2014-02-19 Eric Sandeen <sandeen at redhat.com>
+
+ * xfs: xfs_sb_read_verify() doesn't flag bad crcs on primary sb
+
+2014-02-18 Stephen Warren <swarren at nvidia.com>
+
+ * ARM: tegra: only run PL310 init on systems with one
+
+2014-02-18 Charles Keepax <ckeepax at opensource.wolfsonmicro.com>
+
+ * ASoC: dapm: Correct regulator bypass error messages
+
+2014-02-19 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/fix/wm8993' into asoc-linus
+
+2014-02-19 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'asoc/fix/blackfin', 'asoc/fix/da9055', 'asoc/fix/davinci', 'asoc/fix/fsl', 'asoc/fix/fsl-esai', 'asoc/fix/max98090', 'asoc/fix/rt5640', 'asoc/fix/samsung' and 'asoc/fix/txx9aclc-ac97' into asoc-linus
+
+2014-02-18 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2014-02-18 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
+
+2014-02-18 Thierry Reding <treding at nvidia.com>
+
+ * ARM: tegra: Add head numbers to display controllers
+
+2014-02-18 Olof Johansson <olof at lixom.net>
+
+ * Merge tag 'mvebu-dt-fixes-3.14' of git://git.infradead.org/linux-mvebu into fixes
+
+2014-02-17 Srivatsa S. Bhat <srivatsa.bhat at linux.vnet.ibm.com>
+
+ * cpufreq: powernow-k8: Initialize per-cpu data-structures properly
+
+2014-02-17 viresh kumar <viresh.kumar at linaro.org>
+
+ * cpufreq: remove sysfs link when a cpu != policy->cpu, is removed
+
+2014-02-18 Shawn Guo <shawn.guo at linaro.org>
+
+ * ARM: imx6: build pm-imx6q.c independently of CONFIG_PM
+
+2014-02-18 Olof Johansson <olof at lixom.net>
+
+ * Merge tag 'omap-for-v3.14/fixes-against-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
+
+2014-02-13 Stephen Warren <swarren at nvidia.com>
+
+ * ARM: tegra: fix RTC0 alias for Cardhu
+
+2014-02-15 Guenter Roeck <linux at roeck-us.net>
+
+ * hwmon: (max1668) Fix writing the minimum temperature
+
+2014-02-18 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2014-02-18 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-arm
+
+2014-02-18 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'jfs-3.14-rc4' of git://github.com/kleikamp/linux-shaggy
+
+2014-02-18 Olof Johansson <olof at lixom.net>
+
+ * Merge tag 'pwm_pxa_for_v3.14' of https://git.kernel.org/pub/scm/linux/kernel/git/hzhuang1/linux into fixes
+
+2014-02-13 Tejun Heo <tj at kernel.org>
+
+ * cgroup: update cgroup_enable_task_cg_lists() to grab siglock
+
+2014-02-18 Florian Fainelli <f.fainelli at gmail.com>
+
+ * MAINTAINERS: add entry for the PHY library
+
+2014-02-18 Ben Dooks <ben.dooks at codethink.co.uk>
+
+ * of_mdio: fix phy interrupt passing
+
+2014-02-18 Thomas Petazzoni <thomas.petazzoni at free-electrons.com>
+
+ * net: ethernet: update dependency and help text of mvneta
+
+2014-02-18 Russell King <rmk+kernel at arm.linux.org.uk>
+
+ * NET: fec: only enable napi if we are successful
+
+2014-02-18 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * af_packet: remove a stray tab in packet_set_ring()
+
+2014-02-18 Kevin Hao <haokexin at gmail.com>
+
+ * Revert "of: search the best compatible match first in __of_match_node()"
+
+2014-02-19 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'ttm-fixes-3.14-2014-02-18' of git://people.freedesktop.org/~thomash/linux into drm-fixes
+
+2014-02-19 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'vmwgfx-fixes-3.14-2014-02-18' of git://people.freedesktop.org/~thomash/linux into drm-fixes
+
+2014-02-19 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-fixes-3.14' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
+
+2014-02-18 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless
+
+2014-02-15 Lai Jiangshan <laijs at cn.fujitsu.com>
+
+ * workqueue: ensure @task is valid across kthread_stop()
+
+2014-02-17 Daniel Borkmann <dborkman at redhat.com>
+
+ * net: sctp: fix sctp_connectx abi for ia32 emulation/compat mode
+
+2014-02-18 David S. Miller <davem at davemloft.net>
+
+ * Merge tag 'batman-adv-fix-for-davem' of git://git.open-mesh.org/linux-merge
+
+2014-02-17 Kishon Vijay Abraham I <kishon at ti.com>
+
+ * phy: let phy_provider_register be the last step in registering PHY
+
+2014-02-17 Hans de Goede <hdegoede at redhat.com>
+
+ * phy-core: Don't allow building phy-core as a module
+
+2014-02-17 Hans de Goede <hdegoede at redhat.com>
+
+ * phy-core: Don't propagate -ENOSUPP from phy_pm_runtime_get_sync to caller
+
+2014-02-17 Hans de Goede <hdegoede at redhat.com>
+
+ * phy-core: phy_get: Leave error logging to the caller
+
+2014-02-17 Richard Weinberger <richard at nod.at>
+
+ * phy,phy-bcm-kona-usb2.c: Add dependency on HAS_IOMEM
+
+2014-02-14 Daniel Mack <zonque at gmail.com>
+
+ * usb: musb: correct use of schedule_delayed_work()
+
+2014-02-14 Daniel Mack <zonque at gmail.com>
+
+ * usb: musb: do not sleep in atomic context
+
+2014-02-12 Aleksander Morgado <aleksander at aleksander.es>
+
+ * USB: serial: option: blacklist interface 4 for Cinterion PHS8 and PXS8
+
+2014-02-13 Alan Stern <stern at rowland.harvard.edu>
+
+ * USB: EHCI: add delay during suspend to prevent erroneous wakeups
+
+2014-02-14 David A. Long <dave.long at linaro.org>
+
+ * ARM: 7964/1: Detect section mismatches in thumb relocations
+
+2014-02-18 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon/ni: fix typo in dpm sq ramping setup
+
+2014-02-18 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon/si: fix typo in dpm sq ramping setup
+
+2014-02-18 Christian König <christian.koenig at amd.com>
+
+ * drm/radeon: fix CP semaphores on CIK
+
+2014-02-17 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * drm/radeon: delete a stray tab
+
+2014-02-17 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: fix display tiling setup on SI
+
+2014-02-17 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon/dpm: reduce r7xx vblank mclk threshold to 200
+
+2014-02-12 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: fill in DRM_CAPs for cursor size
+
+2014-02-12 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm: add DRM_CAPs for cursor size
+
+2014-02-03 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: unify bpc handling
+
+2014-02-12 Chao Bi <chao.bi at intel.com>
+
+ * mei: set client's read_cb to NULL when flow control fails
+
+2014-02-18 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
+
+2014-02-19 J. R. Okajima <hooanon05g at gmail.com>
+
+ * nfsd: fix lost nfserrno() call in nfsd_setattr()
+
+2014-01-14 Florian Fainelli <florian at openwrt.org>
+
+ * usb: gadget: bcm63xx_udc: fix build failure on DMA channel code
+
+2014-01-15 Daniel Mack <zonque at gmail.com>
+
+ * usb: musb: do not sleep in atomic context
+
+2014-02-03 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * usb: gadget: s3c2410_udc: Fix build error
+
+2014-02-04 Roger Quadros <rogerq at ti.com>
+
+ * usb: musb: core: Fix remote-wakeup resume
+
+2014-02-04 Ajay Kumar Gupta <ajay.gupta at ti.com>
+
+ * usb: musb: host: Fix SuperSpeed hub enumeration
+
+2014-02-17 Jason Cooper <jason at lakedaemon.net>
+
+ * ARM: dove: dt: revert PMU interrupt controller node
+
+2014-02-18 Mikulas Patocka <mpatocka at redhat.com>
+
+ * dm raid1: fix immutable biovec related BUG when retrying read bio
+
+2014-02-06 Felipe Balbi <balbi at ti.com>
+
+ * usb: musb: fix obex in g_nokia.ko causing kernel panic
+
+2014-02-18 Levente Kurusa <levex at linux.com>
+
+ * ahci: disable NCQ on Samsung pci-e SSDs on macbooks
+
+2014-02-10 Tomasz Nowicki <tomasz.nowicki at linaro.org>
+
+ * ACPI / PCI: Fix memory leak in acpi_pci_irq_enable()
+
+2014-02-12 Masanari Iida <standby24x7 at gmail.com>
+
+ * drm/ttm: Fix memory leak in ttm_agp_backend.c
+
+2014-02-09 Alexandre Courbot <acourbot at nvidia.com>
+
+ * drm/ttm: declare 'struct device' in ttm_page_alloc.h
+
+2014-02-16 Markus Pargmann <mpa at pengutronix.de>
+
+ * dma: sdma: Add imx25 compatible
+
+2014-02-18 Joerg Roedel <joro at 8bytes.org>
+
+ * Merge branch 'for-joerg/arm-smmu/fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into iommu/fixes
+
+2014-02-14 Denis CIOCCA <denis.ciocca at st.com>
+
+ * iio:gyro: bug on L3GD20H gyroscope support
+
+2014-02-14 Beomho Seo <beomho.seo at samsung.com>
+
+ * iio: cm32181: Change cm32181 ambient light sensor driver
+
+2014-02-14 Beomho Seo <beomho.seo at samsung.com>
+
+ * iio: cm36651: Fix read/write integration time function.
+
+2014-02-17 Jan Kara <jack at suse.cz>
+
+ * inotify: Fix reporting of cookies for inotify events
+
+2014-02-18 Hui Wang <hui.wang at canonical.com>
+
+ * ALSA: hda - add headset mic detect quirks for two Dell laptops
+
+2014-02-18 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-nouveau-next' of git://anongit.freedesktop.org/git/nouveau/linux-2.6 into drm-fixes
+
+2014-02-18 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'drm-intel-fixes-2014-02-14' of ssh://git.freedesktop.org/git/drm-intel into drm-fixes
+
+2014-02-18 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'tda998x-fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-cubox into drm-fixes
+
+2014-02-17 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * jbd2: fix use after free in jbd2_journal_start_reserved()
+
+2014-02-15 Ilia Mirkin <imirkin at alum.mit.edu>
+
+ * drm/nouveau: fix TTM_PL_TT memtype on pre-nv50
+
+2014-02-13 Ilia Mirkin <imirkin at alum.mit.edu>
+
+ * drm/nv50/disp: use correct register to determine DP display bpp
+
+2014-02-12 Emil Velikov <emil.l.velikov at gmail.com>
+
+ * drm/nouveau/fb: use correct ram oclass for nv1a hardware
+
+2014-02-08 Ilia Mirkin <imirkin at alum.mit.edu>
+
+ * drm/nv50/gr: add missing nv_error parameter priv
+
+2014-02-07 Alexandre Courbot <acourbot at nvidia.com>
+
+ * drm/nouveau: fix ENG_RUNLIST register address
+
+2014-02-05 Ilia Mirkin <imirkin at alum.mit.edu>
+
+ * drm/nv4c/bios: disallow retrieving from prom on nv4x igp's
+
+2014-02-05 Ilia Mirkin <imirkin at alum.mit.edu>
+
+ * drm/nv4c/vga: decode register is in a different place on nv4x igp's
+
+2014-02-05 Ilia Mirkin <imirkin at alum.mit.edu>
+
+ * drm/nv4c/mc: nv4x igp's have a different msi rearm register
+
+2014-01-29 Ilia Mirkin <imirkin at alum.mit.edu>
+
+ * drm/nouveau: set irq_enabled manually
+
+2014-02-12 Vinayak Kale <vkale at apm.com>
+
+ * ARM: 7957/1: add DSB after icache flush in __flush_icache_all()
+
+2014-02-11 Russell King <rmk+kernel at arm.linux.org.uk>
+
+ * Fix uses of dma_max_pfn() when converting to a limiting address
+
+2014-02-17 Duan Jiong <duanj.fnst at cn.fujitsu.com>
+
+ * ipv4: fix counter in_slow_tot
+
+2014-02-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
+
+2014-02-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.samba.org/sfrench/cifs-2.6
+
+2014-02-17 David Howells <dhowells at redhat.com>
+
+ * FS-Cache: Handle removal of unadded object to the fscache_object_list rb tree
+
+2014-02-17 Dave Jones <davej at redhat.com>
+
+ * reiserfs: fix utterly brain-damaged indentation.
+
+2014-02-17 Tommie Gannert <tommie at gannert.se>
+
+ * irtty-sir.c: Do not set_termios() on irtty_close()
+
+2014-02-17 John W. Linville <linville at tuxdriver.com>
+
+ * Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless into for-davem
+
+2014-02-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'dma-buf-for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/sumits/dma-buf
+
+2014-02-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/egtvedt/linux-avr32
+
+2014-02-13 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: fix __dcache_readdir()
+
+2014-02-16 Sage Weil <sage at inktank.com>
+
+ * ceph: add acl, noacl options for cephfs mount
+
+2014-02-16 Guangliang Zhao <lucienchao at gmail.com>
+
+ * ceph: make ceph_forget_all_cached_acls() static inline
+
+2014-02-11 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: add missing init_acl() for mkdir() and atomic_open()
+
+2014-02-11 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: fix ceph_set_acl()
+
+2014-02-11 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: fix ceph_removexattr()
+
+2014-02-11 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: remove xattr when null value is given to setxattr()
+
+2014-02-11 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: properly handle XATTR_CREATE and XATTR_REPLACE
+
+2014-02-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
+
+2014-02-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * printk: fix syslog() overflowing user buffer
+
+2013-12-19 David Herrmann <dh.herrmann at gmail.com>
+
+ * HID: hyperv: make sure input buffer is big enough
+
+2013-12-19 David Herrmann <dh.herrmann at gmail.com>
+
+ * HID: Bluetooth: hidp: make sure input buffers are big enough
+
+2014-02-14 Jiri Bohac <jiri at boha.cz>
+
+ * bonding: 802.3ad: make aggregator_identifier bond-private
+
+2014-02-13 Emil Goode <emilgoode at gmail.com>
+
+ * usbnet: remove generic hard_header_len check
+
+2014-02-17 Anthony Olech <anthony.olech.opensource at diasemi.com>
+
+ * Input: da9052_onkey - use correct register bit for key status
+
+2014-02-16 Trond Myklebust <trond.myklebust at primarydata.com>
+
+ * NFSv4: Use the correct net namespace in nfs4_update_server
+
+2014-02-17 Nicolas Dichtel <nicolas.dichtel at 6wind.com>
+
+ * gre: add link local route when local addr is any
+
+2014-02-15 Antonio Quartulli <antonio at meshcoding.com>
+
+ * batman-adv: fix potential kernel paging error for unicast transmissions
+
+2014-02-15 Antonio Quartulli <antonio at meshcoding.com>
+
+ * batman-adv: avoid double free when orig_node initialization fails
+
+2014-02-11 Antonio Quartulli <antonio at open-mesh.com>
+
+ * batman-adv: free skb on TVLV parsing success
+
+2014-02-11 Antonio Quartulli <antonio at open-mesh.com>
+
+ * batman-adv: fix TT CRC computation by ensuring byte order
+
+2014-02-08 Simon Wunderlich <sw at simonwunderlich.de>
+
+ * batman-adv: fix potential orig_node reference leak
+
+2014-01-29 Antonio Quartulli <antonio at open-mesh.com>
+
+ * batman-adv: avoid potential race condition when adding a new neighbour
+
+2014-01-30 Antonio Quartulli <antonio at meshcoding.com>
+
+ * batman-adv: properly check pskb_may_pull return value
+
+2014-01-28 Antonio Quartulli <antonio at meshcoding.com>
+
+ * batman-adv: release vlan object after checking the CRC
+
+2014-01-27 Antonio Quartulli <antonio at meshcoding.com>
+
+ * batman-adv: fix TT-TVLV parsing on OGM reception
+
+2014-02-12 Mikulas Patocka <mpatocka at redhat.com>
+
+ * dm io: fix I/O to multiple destinations
+
+2014-02-06 Mike Snitzer <snitzer at redhat.com>
+
+ * dm thin: avoid metadata commit if a pool's thin devices haven't changed
+
+2014-01-31 Mike Snitzer <snitzer at redhat.com>
+
+ * dm cache: do not add migration to completed list before unhooking bio
+
+2014-01-31 Mike Snitzer <snitzer at redhat.com>
+
+ * dm cache: move hook_info into common portion of per_bio_data structure
+
+2013-12-26 Andrew Bresticker <abrestic at chromium.org>
+
+ * clk: tegra: use max divider if divider overflows
+
+2013-12-26 Andrew Bresticker <abrestic at chromium.org>
+
+ * clk: tegra: cclk_lp has a pllx/2 divider
+
+2013-12-26 Andrew Bresticker <abrestic at chromium.org>
+
+ * clk: tegra: fix sdmmc clks on Tegra1x4
+
+2013-12-26 Mark Zhang <markz at nvidia.com>
+
+ * clk: tegra: fix host1x clock on Tegra124
+
+2013-12-26 David Ung <davidu at nvidia.com>
+
+ * clk: tegra: PLLD2 fixes for hdmi
+
+2013-12-26 Rhyland Klein <rklein at nvidia.com>
+
+ * clk: tegra: Fix PLLD mnp table
+
+2013-12-26 Gabe Black <gabeblack at chromium.org>
+
+ * clk: tegra: Fix PLLP rate table
+
+2013-12-02 Thierry Reding <thierry.reding at gmail.com>
+
+ * clk: tegra: Correct clock number for UARTE
+
+2013-12-19 Peter De Schrijver <pdeschrijver at nvidia.com>
+
+ * clk: tegra: Add missing Tegra20 fuse clks
+
+2014-02-03 Archana Patni <archana.patni at linux.intel.com>
+
+ * HID: hid-sensor-hub: quirk for STM Sensor hub
+
+2014-02-16 Chen Gang <gang.chen.5i5j at gmail.com>
+
+ * avr32: add generic vga.h to Kbuild
+
+2014-02-16 Chen Gang <gang.chen.5i5j at gmail.com>
+
+ * avr32: add generic ioremap_wc() definition in io.h
+
+2014-02-01 Chen Gang <gang.chen.5i5j at gmail.com>
+
+ * avr32: Makefile: add '-D__linux__' flag for gcc-4.4.7 use
+
+2014-01-10 Paul Gortmaker <paul.gortmaker at windriver.com>
+
+ * avr32: fix missing module.h causing build failure in mimc200/fram.c
+
+2014-02-14 Olof Johansson <olof at lixom.net>
+
+ * ARM64: unwind: Fix PC calculation
+
+2014-02-16 Clemens Ladisch <clemens at ladisch.de>
+
+ * ALSA: usb-audio: work around KEF X300A firmware bug
+
+2014-02-13 Linus Walleij <linus.walleij at linaro.org>
+
+ * dma: ste_dma40: don't dereference free:d descriptor
+
+2014-02-16 Daniel Borkmann <dborkman at redhat.com>
+
+ * packet: check for ndo_select_queue during queue selection
+
+2014-02-16 Daniel Borkmann <dborkman at redhat.com>
+
+ * netdevice: move netdev_cap_txqueue for shared usage to header
+
+2014-02-16 Daniel Borkmann <dborkman at redhat.com>
+
+ * netdevice: add queue selection fallback handler for ndo_select_queue
+
+2014-02-14 Ingo Molnar <mingo at elte.hu>
+
+ * drivers/net: tulip_remove_one needs to call pci_disable_device()
+
+2014-02-14 Matija Glavinic Pecotic <matija.glavinic-pecotic.ext at nsn.com>
+
+ * net: sctp: Fix a_rwnd/rwnd management to reflect real state of the receiver's buffer
+
+2014-02-14 Duan Jiong <duanj.fnst at cn.fujitsu.com>
+
+ * ipv4: distinguish EHOSTUNREACH from the ENETUNREACH
+
+2014-02-12 Haiyang Zhang <haiyangz at microsoft.com>
+
+ * hyperv: Fix the carrier status setting
+
+2014-02-13 Gerrit Renker <gerrit at erg.abdn.ac.uk>
+
+ * dccp: re-enable debug macro
+
+2014-02-16 Theodore Ts'o <tytso at mit.edu>
+
+ * ext4: don't leave i_crtime.tv_sec uninitialized
+
+2014-02-12 Gavin Shan <shangw at linux.vnet.ibm.com>
+
+ * powerpc/eeh: Disable EEH on reboot
+
+2014-02-12 Gavin Shan <shangw at linux.vnet.ibm.com>
+
+ * powerpc/eeh: Cleanup on eeh_subsystem_enabled
+
+2014-02-12 Gavin Shan <shangw at linux.vnet.ibm.com>
+
+ * powerpc/powernv: Rework EEH reset
+
+2014-02-12 Anton Blanchard <anton at au1.ibm.com>
+
+ * powerpc: Use unstripped VDSO image for more accurate profiling data
+
+2014-02-12 Anton Blanchard <anton at au1.ibm.com>
+
+ * powerpc: Link VDSOs at 0x0
+
+2014-02-12 Aneesh Kumar K.V <aneesh.kumar at linux.vnet.ibm.com>
+
+ * mm: Use ptep/pmdp_set_numa() for updating _PAGE_NUMA bit
+
+2014-02-12 Aneesh Kumar K.V <aneesh.kumar at linux.vnet.ibm.com>
+
+ * mm: Dirty accountable change only apply to non prot numa case
+
+2014-02-12 Aneesh Kumar K.V <aneesh.kumar at linux.vnet.ibm.com>
+
+ * powerpc/mm: Add new "set" flag argument to pte/pmd update function
+
+2014-01-17 Kleber Sacilotto de Souza <klebers at linux.vnet.ibm.com>
+
+ * powerpc/pseries: Add Gen3 definitions for PCIE link speed
+
+2014-01-17 Kleber Sacilotto de Souza <klebers at linux.vnet.ibm.com>
+
+ * powerpc/pseries: Fix regression on PCI link speed
+
+2014-01-17 Kevin Hao <haokexin at gmail.com>
+
+ * powerpc: Set the correct ksp_limit on ppc32 when switching to irq stack
+
+2014-02-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.14-rc3
+
+2014-02-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
+
+2014-02-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'dt-fixes-for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux
+
+2014-02-16 Trond Myklebust <trond.myklebust at primarydata.com>
+
+ * SUNRPC: Fix a pipe_version reference leak
+
+2014-02-16 Trond Myklebust <trond.myklebust at primarydata.com>
+
+ * SUNRPC: Ensure that gss_auth isn't freed before its upcall messages
+
+2014-02-16 Ezequiel Garcia <ezequiel.garcia at free-electrons.com>
+
+ * ata: sata_mv: Cleanup only the initialized ports
+
+2014-02-15 Theodore Ts'o <tytso at mit.edu>
+
+ * ext4: fix online resize with a non-standard blocks per group setting
+
+2014-02-15 Theodore Ts'o <tytso at mit.edu>
+
+ * ext4: fix online resize with very large inode tables
+
+2014-02-15 Jarno Rajahalme <jrajahalme at nicira.com>
+
+ * openvswitch: Fix race.
+
+2014-02-15 Jarno Rajahalme <jrajahalme at nicira.com>
+
+ * openvswitch: Read tcp flags only then the tranport header is present.
+
+2014-02-14 Jiri Pirko <jiri at resnulli.us>
+
+ * ovs: fix dp check in ovs_dp_reset_user_features
+
+2014-02-13 Stephen Warren <swarren at nvidia.com>
+
+ * ASoC: max98090: make REVISION_ID readable
+
+2014-02-14 Kevin Hao <haokexin at gmail.com>
+
+ * of: search the best compatible match first in __of_match_node()
+
+2014-02-15 Alexander Shiyan <shc_work at mail.ru>
+
+ * ASoC: txx9aclc_ac97: Fix kernel crash on probe
+
+2014-02-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending
+
+2014-02-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'i2c/for-current' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux
+
+2014-02-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branches 'irq-urgent-for-linus' and 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'trace-fixes-v3.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
+
+2014-02-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2014-02-12 Jean-Francois Dagenais <jeff.dagenais at gmail.com>
+
+ * Input: adp5588-keys - get value from data out when dir is out
+
+2014-02-15 Wolfram Sang <wsa at the-dreams.de>
+
+ * Documentation: i2c: mention ACPI method for instantiating devices
+
+2014-02-10 Wolfram Sang <wsa at the-dreams.de>
+
+ * Documentation: i2c: describe devicetree method for instantiating devices
+
+2014-02-15 Filipe David Borba Manana <fdmanana at gmail.com>
+
+ * Btrfs: use right clone root offset for compressed extents
+
+2014-01-15 Anand Jain <Anand.Jain at oracle.com>
+
+ * btrfs: fix null pointer deference at btrfs_sysfs_add_one+0x105
+
+2014-02-13 Wolfram Sang <wsa at the-dreams.de>
+
+ * i2c: mv64xxx: refactor message start to ensure proper initialization
+
+2014-02-15 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPI / dock: Make 'docked' sysfs attribute work as documented
+
+2014-02-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'usb-3.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
+
+2014-02-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'tty-3.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
+
+2014-02-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'staging-3.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
+
+2014-02-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'driver-core-3.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core
+
+2014-02-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'char-misc-3.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
+
+2014-02-14 Pavel Shilovsky <piastry at etersoft.ru>
+
+ * CIFS: Fix too big maxBuf size for SMB3 mounts
+
+2014-02-14 Jeff Layton <jlayton at redhat.com>
+
+ * cifs: ensure that uncached writes handle unmapped areas correctly
+
+2014-02-14 Josef Bacik <jbacik at fb.com>
+
+ * Btrfs: unset DCACHE_DISCONNECTED when mounting default subvol
+
+2014-02-13 Mitch Harder <mitch.harder at sabayonlinux.org>
+
+ * Btrfs: fix max_inline mount option
+
+2014-02-08 Liu Bo <bo.li.liu at oracle.com>
+
+ * Btrfs: fix a lockdep warning when cleaning up aborted transaction
+
+2014-02-14 Chris Mason <clm at fb.com>
+
+ * Revert "btrfs: add ioctl to export size of global metadata reservation"
+
+2014-02-14 Alexander Gordeev <agordeev at redhat.com>
+
+ * ahci: Fix broken fallback to single MSI mode
+
+2014-02-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.14' of git://linux-nfs.org/~bfields/linux
+
+2014-02-14 Bjorn Helgaas <bhelgaas at google.com>
+
+ * PCI: Enable INTx if BIOS left them disabled
+
+2014-02-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'md/3.14-fixes' of git://neil.brown.name/md
+
+2014-02-12 Brian Norris <computersforpeace at gmail.com>
+
+ * mtd: nand: fix off-by-one read retry mode counting
+
+2014-02-14 Kevin Hao <haokexin at gmail.com>
+
+ * Revert "OF: base: match each node compatible against all given matches first"
+
+2014-02-14 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Revert "misc: eeprom: sunxi: Add new compatibles"
+
+2014-02-14 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Revert "ARM: sunxi: dt: Convert to the new SID compatibles"
+
+2014-02-14 H. Peter Anvin <hpa at linux.intel.com>
+
+ * Merge remote-tracking branch 'efi/urgent' into x86/urgent
+
+2014-02-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
+
+2014-02-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pm+acpi-3.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2014-02-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'edac_for_3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp
+
+2014-02-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'fbdev-fixes-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tomba/linux
+
+2014-02-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.dk/linux-block
+
+2014-02-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'sound-3.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2014-02-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
+
+2014-02-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging
+
+2014-02-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'for-v3.14-fixes' of git://git.infradead.org/battery-2.6
+
+2014-02-14 Roland Dreier <roland at purestorage.com>
+
+ * Merge branches 'cma', 'cxgb4', 'iser', 'misc', 'mlx4', 'mlx5', 'nes', 'ocrdma', 'qib' and 'usnic' into for-next
+
+2014-02-04 Devesh Sharma <devesh.sharma at emulex.com>
+
+ * RDMA/ocrdma: Fix load time panic during GID table init
+
+2014-02-10 Devesh Sharma <devesh.sharma at emulex.com>
+
+ * RDMA/ocrdma: Fix traffic class shift
+
+2014-01-29 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * IB/iser: Fix use after free in iser_snd_completion()
+
+2014-02-04 Roi Dayan <roid at mellanox.com>
+
+ * IB/iser: Avoid dereferencing iscsi_iser conn object when not bound to iser connection
+
+2014-01-23 Upinder Malhi <umalhi at cisco.com>
+
+ * IB/usnic: Fix smatch endianness error
+
+2014-02-13 Florian Vaussard <florian.vaussard at epfl.ch>
+
+ * Documentation: dt: OMAP: Update Overo/Tobi
+
+2014-02-14 Li Zhong <zhong at linux.vnet.ibm.com>
+
+ * workqueue: add args to workqueue lockdep name
+
+2014-02-02 Christoffer Dall <christoffer.dall at linaro.org>
+
+ * arm64: KVM: Add VGIC device control for arm64
+
+2014-02-13 Matt Fleming <matt.fleming at intel.com>
+
+ * x86/efi: Check status field to validate BGRT header
+
+2014-02-12 Borislav Petkov <bp at suse.de>
+
+ * EDAC: Correct workqueue setup path
+
+2014-02-03 Borislav Petkov <bp at suse.de>
+
+ * EDAC: Poll timeout cannot be zero, p2
+
+2014-02-14 Borislav Petkov <bp at suse.de>
+
+ * x86/efi: Fix 32-bit fallout
+
+2014-01-22 Denis Carikli <denis at eukrea.com>
+
+ * video: Kconfig: Allow more broad selection of the imxfb framebuffer driver.
+
+2014-02-12 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * video: exynos: Fix S6E8AX0 LCD driver build error
+
+2014-02-14 Eli Cohen <eli at mellanox.com>
+
+ * IB/mlx5: Remove dependency on X86
+
+2014-02-13 Roland Dreier <roland at purestorage.com>
+
+ * mlx5: Add include of <linux/slab.h> because of kzalloc()/kfree() use
+
+2014-02-13 Doug Anderson <dianders at chromium.org>
+
+ * hwmon: (ntc_thermistor) Avoid math overflow
+
+2014-02-13 Florian Vaussard <florian.vaussard at epfl.ch>
+
+ * ARM: dts: Add support for both OMAP35xx and OMAP36xx Overo/Tobi
+
+2014-02-13 Florian Vaussard <florian.vaussard at epfl.ch>
+
+ * ARM: dts: omap3-tobi: Use the correct vendor prefix
+
+2014-02-13 Florian Vaussard <florian.vaussard at epfl.ch>
+
+ * ARM: dts: omap3-tobi: Fix boot with OMAP36xx-based Overo
+
+2014-02-12 Paul Bolle <pebolle at tiscali.nl>
+
+ * ARM: OMAP2+: Remove legacy macros for zoom platforms
+
+2014-02-09 Paul Bolle <pebolle at tiscali.nl>
+
+ * ARM: OMAP2+: Remove MACH_NOKIA_N800
+
+2014-02-09 Aaro Koskinen <aaro.koskinen at iki.fi>
+
+ * ARM: dts: N900: add missing compatible property
+
+2014-02-09 Aaro Koskinen <aaro.koskinen at iki.fi>
+
+ * ARM: dts: N9/N950: fix boot hang with 3.14-rc1
+
+2014-02-08 Aaro Koskinen <aaro.koskinen at iki.fi>
+
+ * ARM: OMAP1: nokia770: enable tahvo-usb
+
+2014-01-28 Pekon Gupta <pekon at ti.com>
+
+ * ARM: OMAP2+: gpmc: fix: DT ONENAND child nodes not probed when MTD_ONENAND is built as module
+
+2014-01-28 Pekon Gupta <pekon at ti.com>
+
+ * ARM: OMAP2+: gpmc: fix: DT NAND child nodes not probed when MTD_NAND is built as module
+
+2014-01-25 Marek Belisko <marek at goldelico.com>
+
+ * ARM: dts: omap3-gta04: Fix mmc1 properties.
+
+2014-01-25 NeilBrown <neilb at suse.de>
+
+ * ARM: dts: omap3-gta04: Fix 'aux' gpio key flags.
+
+2014-01-15 Nishanth Menon <nm at ti.com>
+
+ * ARM: OMAP2+: add missing ARCH_HAS_OPP
+
+2014-02-12 Mike Marciniszyn <mike.marciniszyn at intel.com>
+
+ * IB/qib: Add missing serdes init sequence
+
+2014-02-06 Kumar Sanghvi <kumaras at chelsio.com>
+
+ * RDMA/cxgb4: Add missing neigh_release in LE-Workaround path
+
+2014-02-09 Moni Shoua <monis at mellanox.co.il>
+
+ * IB: Report using RoCE IP based gids in port caps
+
+2013-12-23 Peter Ujfalusi <peter.ujfalusi at ti.com>
+
+ * ARM: dts: am335x-evmsk: Fix mmc1 support
+
+2013-12-23 Peter Ujfalusi <peter.ujfalusi at ti.com>
+
+ * ARM: DTS: am335x-evmsk: Correct audio clock frequency
+
+2013-12-21 Marek Belisko <marek at goldelico.com>
+
+ * ARM: dts: omap3-gta04: Add EOC irq gpio line handling.
+
+2014-02-05 Moni Shoua <monis at mellanox.co.il>
+
+ * IB/mlx4: Build the port IBoE GID table properly under bonding
+
+2014-02-05 Moni Shoua <monis at mellanox.co.il>
+
+ * IB/mlx4: Do IBoE GID table resets per-port
+
+2014-02-05 Moni Shoua <monis at mellanox.co.il>
+
+ * IB/mlx4: Do IBoE locking earlier when initializing the GID table
+
+2014-02-13 Dave Kleikamp <dave.kleikamp at oracle.com>
+
+ * jfs: set i_ctime when setting ACL
+
+2014-02-11 Thomas Gleixner <tglx at linutronix.de>
+
+ * tick: Clear broadcast pending bit when switching to oneshot
+
+2014-02-10 Arnaldo Carvalho de Melo <acme at redhat.com>
+
+ * perf trace: Fix ioctl 'request' beautifier build problems on !(i386 || x86_64) arches
+
+2014-02-12 Russell King - ARM Linux <linux at arm.linux.org.uk>
+
+ * hostap: fix "hostap: proc: Use remove_proc_subtree()"
+
+2014-02-10 Stanislaw Gruszka <stf_xl at wp.pl>
+
+ * rtl8187: fix regression on MIPS without coherent DMA
+
+2014-02-07 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * ath5k: shifting the wrong variable for AR5K_AR5210
+
+2014-02-01 Olivier Langlois <olivier at trillion01.com>
+
+ * rtlwifi: Fix incorrect return from rtl_ps_enable_nic()
+
+2014-02-01 Olivier Langlois <olivier at trillion01.com>
+
+ * rtlwifi: rtl8192ce: Fix too long disable of IRQs
+
+2014-02-13 John W. Linville <linville at tuxdriver.com>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi-fixes
+
+2014-02-07 NeilBrown <neilb at suse.de>
+
+ * lockd: send correct lock when granting a delayed lock.
+
+2014-02-12 Dave Jones <davej at redhat.com>
+
+ * drm/i2c: tda998x: Fix memory leak in tda998x_encoder_init error path.
+
+2014-02-06 Petr Písař <petr.pisar at atlas.cz>
+
+ * vt: Fix secure clear screen
+
+2014-02-13 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * regulator: s5m8767: Add missing of_node_put
+
+2014-02-13 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * regulator: s5m8767: Use of_get_child_by_name
+
+2014-02-11 Joe Schultz <jschultz at xes-inc.com>
+
+ * serial: 8250: Support XR17V35x fraction divisor
+
+2014-02-11 Peter Hurley <peter at hurleysoftware.com>
+
+ * n_tty: Fix stale echo output
+
+2014-01-27 Qipan Li <Qipan.Li at csr.com>
+
+ * serial: sirf: fix kernel panic caused by unpaired spinlock
+
+2014-02-11 Dmitry Eremin-Solenikov <dbaryshkov at gmail.com>
+
+ * serial: 8250_pci: unbreak last serial ports on NetMos 9865 cards
+
+2014-02-11 Peter Hurley <peter at hurleysoftware.com>
+
+ * n_tty: Fix poll() when TIME_CHAR and MIN_CHAR == 0
+
+2014-02-13 Michael Grzeschik <m.grzeschik at pengutronix.de>
+
+ * serial: omap: fix rs485 probe on defered pinctrl
+
+2014-01-16 Mika Westerberg <mika.westerberg at linux.intel.com>
+
+ * serial: 8250_dw: fix compilation warning when !CONFIG_PM_SLEEP
+
+2014-01-24 Markus Pargmann <mpa at pengutronix.de>
+
+ * serial: omap-serial: Move info message to probe function
+
+2014-02-13 Alexander Gordeev <agordeev at redhat.com>
+
+ * PCI/MSI: Add pci_enable_msi_exact() and pci_enable_msix_exact()
+
+2014-02-13 Alexander Gordeev <agordeev at redhat.com>
+
+ * PCI/MSI: Fix cut-and-paste errors in documentation
+
+2014-02-13 Alexander Gordeev <agordeev at redhat.com>
+
+ * PCI/MSI: Add pci_enable_msi() documentation back
+
+2014-02-13 Masanari Iida <standby24x7 at gmail.com>
+
+ * PCI/MSI: Fix pci_msix_vec_count() htmldocs failure
+
+2014-02-13 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * PCI/MSI: Fix leak of msi_attrs
+
+2014-02-13 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * PCI/MSI: Check kmalloc() return value, fix leak of name
+
+2014-02-13 H. Peter Anvin <hpa at linux.intel.com>
+
+ * x86, smap: smap_violation() is bogus if CONFIG_X86_SMAP is off
+
+2014-02-13 H. Peter Anvin <hpa at linux.intel.com>
+
+ * x86, smap: Don't enable SMAP if CONFIG_X86_SMAP is disabled
+
+2014-02-11 Jani Nikula <jani.nikula at intel.com>
+
+ * drm/i915/dp: add native aux defer retry limit
+
+2014-02-11 Jani Nikula <jani.nikula at intel.com>
+
+ * drm/i915/dp: increase native aux defer retry timeout
+
+2014-02-10 Luis G.F <luisgf at luisgf.es>
+
+ * ACPI / SBS: Fix incorrect sscanf() string
+
+2014-02-12 Shuah Khan <shuah.kh at samsung.com>
+
+ * ACPI / thermal: fix thermal driver compile error when CONFIG_PM_SLEEP is undefined
+
+2014-02-12 Shuah Khan <shuah.kh at samsung.com>
+
+ * ACPI / SBS: fix SBS driver compile error when CONFIG_PM_SLEEP is undefined
+
+2014-02-12 Shuah Khan <shuah.kh at samsung.com>
+
+ * ACPI / fan: fix fan driver compile error when CONFIG_PM_SLEEP is undefined
+
+2014-02-12 Shuah Khan <shuah.kh at samsung.com>
+
+ * ACPI / button: fix button driver compile error when CONFIG_PM_SLEEP is undefined
+
+2014-02-12 Shuah Khan <shuah.kh at samsung.com>
+
+ * ACPI / battery: fix battery driver compile error when CONFIG_PM_SLEEP is undefined
+
+2014-02-12 Shuah Khan <shuah.kh at samsung.com>
+
+ * ACPI / AC: fix AC driver compile error when CONFIG_PM_SLEEP is undefined
+
+2014-02-12 Emmanuel Grumbach <emmanuel.grumbach at intel.com>
+
+ * iwlwifi: disable TX AMPDU by default for iwldvm
+
+2014-02-12 Steven Noonan <steven at uplinklabs.net>
+
+ * compiler/gcc4: Make quirk for asm_volatile_goto() unconditional
+
+2014-02-03 Sumit Semwal <sumit.semwal at linaro.org>
+
+ * dma-buf: update debugfs output
+
+2014-02-06 Oleg Nesterov <oleg at redhat.com>
+
+ * md/raid5: Fix CPU hotplug callback registration
+
+2014-02-13 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'pm-cpufreq'
+
+2014-02-11 Sudeep Holla <sudeep.holla at arm.com>
+
+ * MAINTAINERS / cpufreq: update Sudeep's email address
+
+2014-02-12 Dirk Brandewie <dirk.j.brandewie at intel.com>
+
+ * intel_pstate: Remove energy reporting from pstate_sample tracepoint
+
+2014-02-03 Olof Johansson <olof at lixom.net>
+
+ * dma: mv_xor: Silence a bunch of LPAE-related warnings
+
+2014-02-12 Tejun Heo <tj at kernel.org>
+
+ * Revert "cgroup: use an ordered workqueue for cgroup destruction"
+
+2014-02-12 Sagi Grimberg <sagig at mellanox.com>
+
+ * Target/sbc: Fix protection copy routine
+
+2014-02-05 Jingoo Han <jg1.han at samsung.com>
+
+ * IB/srpt: replace strict_strtoul() with kstrtoul()
+
+2014-02-03 Roland Dreier <roland at purestorage.com>
+
+ * target: Simplify command completion by removing CMD_T_FAILED flag
+
+2014-02-03 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * iser-target: Fix leak on failure in isert_conn_create_fastreg_pool
+
+2014-02-03 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * iscsi-target: Fix SNACK Type 1 + BegRun=0 handling
+
+2014-02-03 Roland Dreier <roland at purestorage.com>
+
+ * target: Fix missing length check in spc_emulate_evpd_83()
+
+2014-01-31 Roland Dreier <roland at purestorage.com>
+
+ * qla2xxx: Remove last vestiges of qla_tgt_cmd.cmd_list
+
+2014-01-30 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * target: Fix 32-bit + CONFIG_LBDAF=n link error w/ sector_div
+
+2014-01-30 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * target: Fix free-after-use regression in PR unregister
+
+2014-02-05 Andrew Lunn <andrew at lunn.ch>
+
+ * PCI: mvebu: Use Device ID and revision from underlying endpoint
+
+2014-02-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'stable/for-linus-3.14-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
+
+2014-02-12 Dylan Reid <dgreid at chromium.org>
+
+ * ASoC: max98090: sync regcache on entering STANDBY
+
+2013-12-29 Julia Lawall <Julia.Lawall at lip6.fr>
+
+ * RDMA/amso1100: Fix error return code
+
+2013-12-29 Julia Lawall <Julia.Lawall at lip6.fr>
+
+ * RDMA/nes: Fix error return code
+
+2014-02-12 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Fix command defines and checks
+
+2014-02-12 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Fix possible integer overflow
+
+2014-02-12 Theodore Ts'o <tytso at mit.edu>
+
+ * ext4: don't try to modify s_flags if the the file system is read-only
+
+2014-02-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'regulator-v3.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
+
+2014-02-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'gpio-v3.14-2' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio
+
+2014-02-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'spi-v3.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi
+
+2014-02-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2014-02-12 Zheng Liu <wenqing.lz at taobao.com>
+
+ * ext4: fix error paths in swap_inode_boot_loader()
+
+2014-02-12 Martin Kepplinger <martink at posteo.de>
+
+ * ALSA: Revert "ALSA: hda/realtek - Avoid invalid COEFs for ALC271X"
+
+2014-02-12 Jens Axboe <axboe at fb.com>
+
+ * block: add cond_resched() to potentially long running ioctl discard loop
+
+2014-02-12 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: blackfin: Fix machine driver Kconfig dependencies
+
+2014-02-12 Eric Whitney <enwlinux at gmail.com>
+
+ * ext4: fix xfstest generic/299 block validity failures
+
+2014-02-12 Steve Twiss <stwiss.opensource at diasemi.com>
+
+ * regulator: da9063: Bug fix when setting max voltage on LDOs 5-11
+
+2014-02-12 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branches 'acpi-dock', 'acpi-scan' and 'acpi-pci-hotplug'
+
+2014-02-12 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPI / container: Fix error code path in container_device_attach()
+
+2014-02-12 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Remove stray const
+
+2014-02-12 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'regulator/fix/da9055' and 'regulator/fix/max14577' into regulator-linus
+
+2014-02-11 Mika Westerberg <mika.westerberg at linux.intel.com>
+
+ * ACPI / hotplug / PCI: Relax the checking of _STA return values
+
+2014-02-11 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * drm/vmwgfx: unlock on error path in vmw_execbuf_process()
+
+2014-02-12 Charmaine Lee <charmainel at vmware.com>
+
+ * drm/vmwgfx: Get maximum mob size from register SVGA_REG_MOB_MAX_SIZE
+
+2014-02-06 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Fix a couple of sparse warnings and errors
+
+2014-02-11 Felix Fietkau <nbd at openwrt.org>
+
+ * mac80211: send control port protocol frames to the VO queue
+
+2014-02-05 Ingo Tuchscherer <ingo.tuchscherer at linux.vnet.ibm.com>
+
+ * s390/zcrypt: additional check to avoid overflow in msg-type 6 requests
+
+2014-02-11 Dmitry Osipenko <digetx at gmail.com>
+
+ * drm/tegra: Add guard to avoid double disable/enable of RGB outputs
+
+2014-01-07 Erik Faye-Lund <kusmabite at gmail.com>
+
+ * gpu: host1x: do not check previously handled gathers
+
+2014-02-09 Paul Bolle <pebolle at tiscali.nl>
+
+ * drm/tegra: fix typo 'CONFIG_TEGRA_DRM_FBDEV'
+
+2014-02-11 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
+
+2014-02-11 Roger Pau Monne <roger.pau at citrix.com>
+
+ * xen-blkback: init persistent_purge_work work_struct
+
+2014-02-11 Steven Rostedt (Red Hat) <rostedt at goodmis.org>
+
+ * ftrace/x86: Use breakpoints for converting function graph caller
+
+2014-02-11 Randy Dunlap <rdunlap at infradead.org>
+
+ * staging/rtl8821ae: fix build, depends on MAC80211
+
+2014-02-12 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'tda998x-fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-cubox into drm-next
+
+2014-02-09 Raymond Wanyoike <raymond.wanyoike at gmail.com>
+
+ * usb: option: blacklist ZTE MF667 net interface
+
+2014-02-11 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Merge tag 'for-usb-linus-2014-02-11' of git://git.kernel.org/pub/scm/linux/kernel/git/sarah/xhci into usb-linus
+
+2014-02-11 Ville Syrjälä <ville.syrjala at linux.intel.com>
+
+ * drm/i915: Prevent MI_DISPLAY_FLIP straddling two cachelines on IVB
+
+2014-02-11 Ville Syrjälä <ville.syrjala at linux.intel.com>
+
+ * drm/i915: Add intel_ring_cachline_align()
+
+2014-02-11 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'dt-fixes-for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux
+
+2014-02-11 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'microblaze-3.14-rc3' of git://git.monstr.eu/linux-2.6-microblaze
+
+2014-02-11 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
+
+2014-02-11 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2014-02-11 Trond Myklebust <trond.myklebust at primarydata.com>
+
+ * SUNRPC: Fix potential memory scribble in xprt_free_bc_request()
+
+2014-02-11 J. Bruce Fields <bfields at redhat.com>
+
+ * nfsd4: fix acl buffer overrun
+
+2014-02-11 Steven Rostedt (Red Hat) <rostedt at goodmis.org>
+
+ * ring-buffer: Fix first commit on sub-buffer having non-zero delta
+
+2014-02-11 Linus Walleij <linus.walleij at linaro.org>
+
+ * ARM: ux500: disable msp2 device tree node
+
+2014-02-11 Christoph Hellwig <hch at infradead.org>
+
+ * blk-mq: pair blk_mq_start_request / blk_mq_requeue_request
+
+2014-02-11 Christoph Hellwig <hch at infradead.org>
+
+ * blk-mq: dont assume rq->errors is set when returning an error from ->queue_rq
+
+2014-02-10 Kent Overstreet <kmo at daterainc.com>
+
+ * block: Fix cloning of discard/write same bios
+
+2014-02-11 Li Zefan <lizefan at huawei.com>
+
+ * cgroup: protect modifications to cgroup_idr with cgroup_mutex
+
+2014-02-08 Paul Bolle <pebolle at tiscali.nl>
+
+ * ia64/xen: Remove Xen support for ia64 even more
+
+2014-02-10 David Vrabel <david.vrabel at citrix.com>
+
+ * xen: install xen/gntdev.h and xen/gntalloc.h
+
+2014-02-05 David Vrabel <david.vrabel at citrix.com>
+
+ * xen/events: bind all new interdomain events to VCPU0
+
+2014-02-11 Trond Myklebust <trond.myklebust at primarydata.com>
+
+ * SUNRPC: Fix races in xs_nospace()
+
+2014-01-28 Tomi Valkeinen <tomi.valkeinen at ti.com>
+
+ * OMAPDSS: fix fck field types
+
+2014-01-27 Tomi Valkeinen <tomi.valkeinen at ti.com>
+
+ * OMAPDSS: DISPC: decimation rounding fix
+
+2014-02-11 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'spi/fix/doc', 'spi/fix/nuc900' and 'spi/fix/rspi' into spi-linus
+
+2014-02-11 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'spi/fix/core' into spi-linus
+
+2014-02-06 Eytan Lifshitz <eytan.lifshitz at intel.com>
+
+ * mac80211: fix memory leak
+
+2014-02-11 Arik Nemtsov <arik at wizery.com>
+
+ * mac80211: fix sched_scan restart on recovery
+
+2014-02-07 Mika Kuoppala <mika.kuoppala at linux.intel.com>
+
+ * drm/i915: Pair va_copy with va_end in i915_error_vprintf
+
+2014-02-07 Daniel Vetter <daniel.vetter at ffwll.ch>
+
+ * drm/i915: Fix intel_pipe_to_cpu_transcoder for UMS
+
+2014-02-10 Paul Gortmaker <paul.gortmaker at windriver.com>
+
+ * genirq: Add missing irq_to_desc export for CONFIG_SPARSE_IRQ=n
+
+2014-01-24 Marek Szyprowski <m.szyprowski at samsung.com>
+
+ * x86: dma-mapping: fix GFP_ATOMIC macro usage
+
+2014-01-16 Marek Szyprowski <m.szyprowski at samsung.com>
+
+ * ARM: dma-mapping: fix GFP_ATOMIC macro usage
+
+2014-02-11 Benjamin Herrenschmidt <benh at kernel.crashing.org>
+
+ * powerpc/powernv: Add iommu DMA bypass support for IODA2
+
+2013-12-20 Thierry Reding <thierry.reding at gmail.com>
+
+ * ARM: pxa: Add dummy backlight power supply on Mitac Mio A701
+
+2014-02-10 Eric Dumazet <edumazet at google.com>
+
+ * 6lowpan: fix lockdep splats
+
+2014-02-10 John Greene <jogreene at redhat.com>
+
+ * alx: add missing stats_lock spinlock init
+
+2014-02-08 Richard Yao <ryao at gentoo.org>
+
+ * 9p/trans_virtio.c: Fix broken zero-copy on vmalloc() buffers
+
+2014-02-10 dingtianhong <dingtianhong at huawei.com>
+
+ * bonding: remove unwanted bond lock for enslave processing
+
+2014-02-10 Liu Junliang <liujunliang_ljl at 163.com>
+
+ * USB2NET : SR9800 : One chip USB2.0 USB2NET SR9800 Device Driver Support
+
+2014-01-22 Anton Blanchard <anton at samba.org>
+
+ * powerpc: Fix endian issues in kexec and crash dump code
+
+2014-01-29 Kevin Hao <haokexin at gmail.com>
+
+ * powerpc/ppc32: Fix the bug in the init of non-base exception stack for UP
+
+2013-12-23 Michael Ellerman <mpe at ellerman.id.au>
+
+ * powerpc/xmon: Don't signal we've entered until we're finished printing
+
+2013-12-23 Michael Ellerman <mpe at ellerman.id.au>
+
+ * powerpc/xmon: Fix timeout loop in get_output_lock()
+
+2013-12-23 Michael Ellerman <michael at ellerman.id.au>
+
+ * powerpc/xmon: Don't loop forever in get_output_lock()
+
+2013-12-18 Anshuman Khandual <khandual at linux.vnet.ibm.com>
+
+ * powerpc/perf: Configure BHRB filter before enabling PMU interrupts
+
+2014-01-29 Nathan Fontenot <nfont at linux.vnet.ibm.com>
+
+ * crypto/nx/nx-842: Fix handling of vmalloc addresses
+
+2013-12-10 Michael Ellerman <mpe at ellerman.id.au>
+
+ * powerpc/pseries: Select ARCH_RANDOM on pseries
+
+2014-01-24 Michael Ellerman <mpe at ellerman.id.au>
+
+ * powerpc/perf: Add Power8 cache & TLB events
+
+2014-01-30 Laurent Dufour <ldufour at linux.vnet.ibm.com>
+
+ * powerpc/relocate fix relocate processing in LE mode
+
+2014-01-31 Mahesh Salgaonkar <mahesh at linux.vnet.ibm.com>
+
+ * powerpc: Fix kdump hang issue on p8 with relocation on exception enabled.
+
+2014-01-31 Mahesh Salgaonkar <mahesh at linux.vnet.ibm.com>
+
+ * powerpc/pseries: Disable relocation on exception while going down during crash.
+
+2014-02-05 Thadeu Lima de Souza Cascardo <cascardo at linux.vnet.ibm.com>
+
+ * powerpc/eeh: Drop taken reference to driver on eeh_rmv_device
+
+2014-02-07 Paul Gortmaker <paul.gortmaker at windriver.com>
+
+ * powerpc: Fix build failure in sysdev/mpic.c for MPIC_WEIRD=y
+
+2014-02-10 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'akpm' (patches from Andrew Morton)
+
+2014-02-10 Xue jiufei <xuejiufei at huawei.com>
+
+ * ocfs2: check existence of old dentry in ocfs2_link()
+
+2014-02-10 Junxiao Bi <junxiao.bi at oracle.com>
+
+ * ocfs2: update inode size after zeroing the hole
+
+2014-02-10 Younger Liu <younger.liu at huawei.com>
+
+ * ocfs2: fix issue that ocfs2_setattr() does not deal with new_i_size==i_size
+
+2014-02-10 Naoya Horiguchi <n-horiguchi at ah.jp.nec.com>
+
+ * mm/memory-failure.c: move refcount only in !MF_COUNT_INCREASED
+
+2014-02-10 Paul Gortmaker <paul.gortmaker at windriver.com>
+
+ * smp.h: fix x86+cpu.c sparse warnings about arch nonboot CPU calls
+
+2014-02-10 Rafael Aquini <aquini at redhat.com>
+
+ * mm: fix page leak at nfs_symlink()
+
+2014-02-10 Steven Rostedt <rostedt at goodmis.org>
+
+ * slub: do not assert not having lock in removing freed partial
+
+2014-02-10 Borislav Petkov <bp at suse.de>
+
+ * gitignore: add all.config
+
+2014-02-10 Younger Liu <younger.liucn at gmail.com>
+
+ * ocfs2: fix ocfs2_sync_file() if filesystem is readonly
+
+2014-02-10 Prarit Bhargava <prarit at redhat.com>
+
+ * drivers/edac/edac_mc_sysfs.c: poll timeout cannot be zero
+
+2014-02-10 Eric W. Biederman <ebiederm at xmission.com>
+
+ * fs/file.c:fdtable: avoid triggering OOMs from alloc_fdmem
+
+2014-02-10 Mel Gorman <mgorman at suse.de>
+
+ * xen: properly account for _PAGE_NUMA during xen pte translations
+
+2014-02-10 David Rientjes <rientjes at google.com>
+
+ * mm/slub.c: list_lock may not be held in some circumstances
+
+2014-02-09 John Ogness <john.ogness at linutronix.de>
+
+ * tcp: tsq: fix nonagle handling
+
+2014-02-10 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'bridge'
+
+2014-02-07 Toshiaki Makita <makita.toshiaki at lab.ntt.co.jp>
+
+ * bridge: Prevent possible race condition in br_fdb_change_mac_address
+
+2014-02-07 Toshiaki Makita <makita.toshiaki at lab.ntt.co.jp>
+
+ * bridge: Properly check if local fdb entry can be deleted when deleting vlan
+
+2014-02-07 Toshiaki Makita <makita.toshiaki at lab.ntt.co.jp>
+
+ * bridge: Properly check if local fdb entry can be deleted in br_fdb_delete_by_port
+
+2014-02-07 Toshiaki Makita <makita.toshiaki at lab.ntt.co.jp>
+
+ * bridge: Properly check if local fdb entry can be deleted in br_fdb_change_mac_address
+
+2014-02-07 Toshiaki Makita <makita.toshiaki at lab.ntt.co.jp>
+
+ * bridge: Fix the way to check if a local fdb entry can be deleted
+
+2014-02-07 Toshiaki Makita <makita.toshiaki at lab.ntt.co.jp>
+
+ * bridge: Change local fdb entries whenever mac address of bridge device changes
+
+2014-02-07 Toshiaki Makita <makita.toshiaki at lab.ntt.co.jp>
+
+ * bridge: Fix the way to find old local fdb entries in br_fdb_change_mac_address
+
+2014-02-10 Trond Myklebust <trond.myklebust at primarydata.com>
+
+ * SUNRPC: Don't create a gss auth cache unless rpc.gssd is running
+
+2014-02-10 Ivan Khoronzhuk <ivan.khoronzhuk at ti.com>
+
+ * ARM: keystone: dts: fix clkvcp3 control register address
+
+2014-01-28 Ivan Khoronzhuk <ivan.khoronzhuk at ti.com>
+
+ * clk: keystone: gate: fix clk_init_data initialization
+
+2014-02-10 Steve French <smfrench at gmail.com>
+
+ * [CIFS] Fix cifsacl mounts over smb2 to not call cifs
+
+2014-02-10 Jens Axboe <axboe at fb.com>
+
+ * Merge branch 'stable/for-jens-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip into for-linus
+
+2014-02-03 Geert Uytterhoeven <geert at linux-m68k.org>
+
+ * m68k: Wire up sched_setattr and sched_getattr
+
+2014-02-03 Geert Uytterhoeven <geert at linux-m68k.org>
+
+ * m68k: Switch to asm-generic/barrier.h
+
+2014-02-03 Geert Uytterhoeven <geert at linux-m68k.org>
+
+ * m68k: Sort arch/m68k/include/asm/Kbuild
+
+2014-01-31 Michal Simek <michal.simek at xilinx.com>
+
+ * ARM: zynq: Reserve not DMAable space in front of the kernel
+
+2014-02-10 Kevin Hilman <khilman at linaro.org>
+
+ * Merge tag 'at91-fixes' of git://github.com/at91linux/linux-at91 into fixes
+
+2014-02-06 Nishanth Menon <nm at ti.com>
+
+ * ARM: multi_v7_defconfig: Select CONFIG_SOC_DRA7XX
+
+2014-01-29 Philipp Zabel <p.zabel at pengutronix.de>
+
+ * ARM: imx6: Initialize low-power mode early again
+
+2014-02-10 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-next' of git://git.samba.org/sfrench/cifs-2.6
+
+2014-02-04 Linus Walleij <linus.walleij at linaro.org>
+
+ * ARM: pxa: fix various compilation problems
+
+2014-02-04 Linus Walleij <linus.walleij at linaro.org>
+
+ * ARM: pxa: fix compilation problem on AM300EPD board
+
+2014-02-10 Kevin Hilman <khilman at linaro.org>
+
+ * Merge tag 'mvebu-phy_ata-fixes-3.14' of git://git.infradead.org/linux-mvebu into fixes
+
+2014-02-04 Will Deacon <will.deacon at arm.com>
+
+ * iommu/arm-smmu: fix compilation issue when !CONFIG_ARM_AMBA
+
+2014-02-06 Will Deacon <will.deacon at arm.com>
+
+ * iommu/arm-smmu: set CBARn.BPSHCFG to NSH for s1-s2-bypass contexts
+
+2014-02-05 Will Deacon <will.deacon at arm.com>
+
+ * iommu/arm-smmu: fix table flushing during initial allocations
+
+2014-02-04 Will Deacon <will.deacon at arm.com>
+
+ * iommu/arm-smmu: really fix page table locking
+
+2014-01-03 Yifan Zhang <zhangyf at marvell.com>
+
+ * iommu/arm-smmu: fix pud/pmd entry fill sequence
+
+2014-02-06 Ben Hutchings <ben at decadent.org.uk>
+
+ * perf trace: Add fallback definition of EFD_SEMAPHORE
+
+2013-12-30 Vince Weaver <vincent.weaver at maine.edu>
+
+ * perf list: Fix checking for supported events on older kernels
+
+2014-02-04 Jiri Olsa <jolsa at redhat.com>
+
+ * perf tools: Handle PERF_RECORD_HEADER_EVENT_TYPE properly
+
+2014-02-05 Masami Hiramatsu <masami.hiramatsu.pt at hitachi.com>
+
+ * perf probe: Do not add offset twice to uprobe address
+
+2014-02-06 Trond Myklebust <trond.myklebust at primarydata.com>
+
+ * NFS: Do not set NFS_INO_INVALID_LABEL unless server supports labeled NFS
+
+2014-02-06 Adam Thomson <Adam.Thomson.Opensource at diasemi.com>
+
+ * ASoC: da9055: Fix device registration of PMIC and CODEC devices
+
+2014-02-10 Xiubo Li <Li.Xiubo at freescale.com>
+
+ * ASoC: fsl-esai: fix ESAI TDM slot setting
+
+2014-02-08 Shawn Guo <shawn.guo at linaro.org>
+
+ * ASoC: fsl: fix pm support of machine drivers
+
+2014-02-10 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPI / dock: Use acpi_device_enumerated() to check if dock is present
+
+2014-02-07 Will Deacon <will.deacon at arm.com>
+
+ * ARM: 7955/1: spinlock: ensure we have a compiler barrier before sev
+
+2014-02-07 Will Deacon <will.deacon at arm.com>
+
+ * ARM: 7953/1: mm: ensure TLB invalidation is complete before enabling MMU
+
+2014-02-06 Santosh Shilimkar <santosh.shilimkar at ti.com>
+
+ * ARM: 7952/1: mm: Fix the memblock allocation for LPAE machines
+
+2014-02-02 Christoffer Dall <christoffer.dall at linaro.org>
+
+ * ARM: 7950/1: mm: Fix stage-2 device memory attributes
+
+2014-02-10 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Fix undefined symbol due to builtin/module mixup
+
+2014-02-08 Edgar E. Iglesias <edgar.iglesias at gmail.com>
+
+ * microblaze: Fix a typo when disabling stack protection
+
+2014-01-30 Michal Simek <michal.simek at xilinx.com>
+
+ * microblaze: Define readq and writeq IO helper function
+
+2014-01-30 Michal Simek <michal.simek at xilinx.com>
+
+ * microblaze: Fix missing HZ macro
+
+2014-02-09 Jesper Juhl <jj at chaosbits.net>
+
+ * tcp: correct code comment stating 3 min timeout for FIN_WAIT2, we only do 1 min
+
+2014-02-09 Christian Engelmayer <cengelma at gmx.at>
+
+ * net: vxge: Remove unused device pointer
+
+2014-02-09 Raymond Wanyoike <raymond.wanyoike at gmail.com>
+
+ * net: qmi_wwan: add ZTE MF667
+
+2014-02-08 Christian Engelmayer <cengelma at gmx.at>
+
+ * 3c59x: Remove unused pointer in vortex_eisa_cleanup()
+
+2014-02-07 Maciej Żenczykowski <maze at google.com>
+
+ * net: fix 'ip rule' iif/oif device rename
+
+2014-02-08 Christian Engelmayer <cengelma at gmx.at>
+
+ * wan: dlci: Remove unused netdev_priv pointer
+
+2014-02-07 Christian Engelmayer <cengelma at gmx.at>
+
+ * 6lowpan: Remove unused pointer in lowpan_header_create()
+
+2014-02-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.14-rc2
+
+2014-02-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security
+
+2014-02-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
+
+2014-02-10 James Morris <james.l.morris at oracle.com>
+
+ * Merge branch 'stable-3.14' of git://git.infradead.org/users/pcmoore/selinux into for-linus
+
+2014-02-10 Dave Chinner <dchinner at redhat.com>
+
+ * xfs: ensure correct log item buffer alignment
+
+2014-02-10 Christoph Hellwig <hch at infradead.org>
+
+ * xfs: ensure correct timestamp updates from truncate
+
+2014-02-02 Al Viro <viro at zeniv.linux.org.uk>
+
+ * fix a kmap leak in virtio_console
+
+2014-02-09 Al Viro <viro at zeniv.linux.org.uk>
+
+ * fix O_SYNC|O_APPEND syncing the wrong range on write()
+
+2014-02-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
+
+2014-02-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-02 Stephen Boyd <sboyd at codeaurora.org>
+
+ * genirq: Add devm_request_any_context_irq()
+
+2014-02-04 Steven Rostedt <rostedt at goodmis.org>
+
+ * x86: Use preempt_disable_notrace() in cycles_2_ns()
+
+2014-02-05 Peter Zijlstra <peterz at infradead.org>
+
+ * perf/x86: Fix Userspace RDPMC switch
+
+2014-02-05 Peter Zijlstra <peterz at infradead.org>
+
+ * perf/x86/intel/p6: Add userspace RDPMC quirk for PPro
+
+2014-02-08 Filipe David Borba Manana <fdmanana at gmail.com>
+
+ * Btrfs: fix data corruption when reading/updating compressed extents
+
+2014-02-07 Josef Bacik <jbacik at fb.com>
+
+ * Btrfs: don't loop forever if we can't run because of the tree mod log
+
+2014-02-07 David Sterba <dsterba at suse.cz>
+
+ * btrfs: reserve no transaction units in btrfs_ioctl_set_features
+
+2014-02-07 Jeff Mahoney <jeffm at suse.com>
+
+ * btrfs: commit transaction after setting label and features
+
+2014-02-05 Josef Bacik <jbacik at fb.com>
+
+ * Btrfs: fix assert screwup for the pending move stuff
+
+2014-02-08 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pinctrl-v3.14-2' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl
+
+2014-02-08 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Merge tag 'iio-fixes-for-3.14b' of git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio into staging-linus
+
+2014-02-08 Russell King <rmk+kernel at arm.linux.org.uk>
+
+ * ARM: dts: fix spdif pinmux configuration
+
+2014-02-08 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'irq-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-08 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-08 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'jfs-3.14-rc2' of git://github.com/kleikamp/linux-shaggy
+
+2014-02-07 Dave Kleikamp <dave.kleikamp at oracle.com>
+
+ * jfs: fix generic posix ACL regression
+
+2014-02-08 Tejun Heo <tj at kernel.org>
+
+ * cgroup: fix locking in cgroup_cfts_commit()
+
+2014-02-08 Tejun Heo <tj at kernel.org>
+
+ * cgroup: fix error return from cgroup_create()
+
+2014-02-08 Tejun Heo <tj at kernel.org>
+
+ * cgroup: fix error return value in cgroup_mount()
+
+2014-01-27 Guenter Roeck <linux at roeck-us.net>
+
+ * iio: max1363: Use devm_regulator_get_optional for optional regulator
+
+2014-10-01 Peter Meerwald <pmeerw at pmeerw.net>
+
+ * iio:accel:bma180: Use modifier instead of index in channel specification
+
+2014-01-24 Marcus Folkesson <marcus.folkesson at gmail.com>
+
+ * iio: adis16400: Set timestamp as the last element in chan_spec
+
+2014-04-02 Beomho Seo <beomho.seo at samsung.com>
+
+ * iio: ak8975: Fix calculation formula for convert micro tesla to gauss unit
+
+2014-08-02 Hartmut Knaack <knaack.h at gmx.de>
+
+ * staging:iio:ad799x fix typo in ad799x_events[]
+
+2014-01-13 Alexandre Belloni <alexandre.belloni at free-electrons.com>
+
+ * iio: mxs-lradc: remove useless scale_available files
+
+2014-01-13 Alexandre Belloni <alexandre.belloni at free-electrons.com>
+
+ * iio: mxs-lradc: fix buffer overflow
+
+2014-10-01 Peter Meerwald <pmeerw at pmeerw.net>
+
+ * iio:magnetometer:mag3110: Fix output of decimal digits in show_int_plus_micros()
+
+2014-10-01 Peter Meerwald <pmeerw at pmeerw.net>
+
+ * iio:magnetometer:mag3110: Report busy in _read_raw() / write_raw() when buffer is enabled
+
+2014-01-31 Richard Weinberger <richard at nod.at>
+
+ * watchdog: dw_wdt: Add dependency on HAS_IOMEM
+
+2014-02-07 Steve French <smfrench at gmail.com>
+
+ * [CIFS] clean up page array when uncached write send fails
+
+2014-02-07 Jeff Layton <jlayton at redhat.com>
+
+ * cifs: use a flexarray in cifs_writedata
+
+2014-02-07 Russell King <rmk+kernel at arm.linux.org.uk>
+
+ * drivers/base: fix devres handling for master device
+
+2014-02-03 Sudeep Dutt <sudeep.dutt at intel.com>
+
+ * misc: mic: fix possible signed underflow (undefined behavior) in userspace API
+
+2014-02-02 Maxime Ripard <maxime.ripard at free-electrons.com>
+
+ * ARM: sunxi: dt: Convert to the new SID compatibles
+
+2014-02-02 Maxime Ripard <maxime.ripard at free-electrons.com>
+
+ * misc: eeprom: sunxi: Add new compatibles
+
+2014-01-25 Christian Engelmayer <cengelma at gmx.at>
+
+ * misc: genwqe: Fix potential memory leak when pinning memory
+
+2014-01-28 Fu Wei <wefu at redhat.com>
+
+ * Documentation:Update Documentation/zh_CN/arm64/memory.txt
+
+2014-01-28 Fu Wei <wefu at redhat.com>
+
+ * Documentation:Update Documentation/zh_CN/arm64/booting.txt
+
+2014-01-28 Fu Wei <wefu at redhat.com>
+
+ * Documentation:Chinese translation of Documentation/arm64/tagged-pointers.txt
+
+2014-01-31 Sarah Sharp <sarah.a.sharp at linux.intel.com>
+
+ * Revert "usb: xhci: Link TRB must not occur within a USB payload burst"
+
+2014-01-31 Sarah Sharp <sarah.a.sharp at linux.intel.com>
+
+ * Revert "xhci: Avoid infinite loop when sg urb requires too many trbs"
+
+2014-01-31 Sarah Sharp <sarah.a.sharp at linux.intel.com>
+
+ * Revert "xhci: Set scatter-gather limit to avoid failed block writes."
+
+2014-01-31 Sarah Sharp <sarah.a.sharp at linux.intel.com>
+
+ * xhci 1.0: Limit arbitrarily-aligned scatter gather.
+
+2014-02-07 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'driver-core-3.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core
+
+2014-02-02 Maxime Ripard <maxime.ripard at free-electrons.com>
+
+ * ARM: sunxi: dt: Change the touchscreen compatibles
+
+2014-02-01 Maxime Ripard <maxime.ripard at free-electrons.com>
+
+ * ARM: sun7i: dt: Fix interrupt trigger types
+
+2014-02-07 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
+
+2014-02-07 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
+
+2014-02-07 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'upstream' of git://git.linux-mips.org/pub/scm/ralf/upstream-linus
+
+2014-02-07 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'v4l_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
+
+2014-02-07 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging
+
+2014-02-07 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pm+acpi-3.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2014-02-05 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: do not dereference a NULL bio pointer
+
+2014-02-07 H. Peter Anvin <hpa at linux.intel.com>
+
+ * Merge tag 'efi-urgent' into x86/urgent
+
+2014-02-07 Jan Moskyto Matejka <mq at suse.cz>
+
+ * Modpost: fixed USB alias generation for ranges including 0x9 and 0xA
+
+2014-02-05 Maurizio Lombardi <mlombard at redhat.com>
+
+ * wlags49_h2: Fix overflow in wireless_set_essid()
+
+2014-02-05 Alan Cox <alan at linux.intel.com>
+
+ * xlr_net: Fix missing trivial allocation check
+
+2014-02-04 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * staging: r8188eu: overflow in rtw_p2p_get_go_device_address()
+
+2014-02-04 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * staging: r8188eu: array overflow in rtw_mp_ioctl_hdl()
+
+2014-02-02 Larry Finger <Larry.Finger at lwfinger.net>
+
+ * staging: r8188eu: Fix typo in USB_DEVICE list
+
+2014-01-28 Heinrich Schuchardt <xypron.glpk at gmx.de>
+
+ * usbip/userspace/libsrc/names.c: memory leak
+
+2014-01-22 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * gpu: ion: dereferencing an ERR_PTR
+
+2014-01-21 Ian Abbott <abbotti at mev.co.uk>
+
+ * staging: comedi: usbduxsigma: fix unaligned dereferences
+
+2014-01-21 Ian Abbott <abbotti at mev.co.uk>
+
+ * staging: comedi: fix too early cleanup in comedi_auto_config()
+
+2014-01-20 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * staging: android: ion: dummy: fix an error code
+
+2014-02-03 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: take map_sem for read in handle_reply()
+
+2014-01-31 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: factor out logic from ceph_osdc_start_request()
+
+2014-01-24 Cédric Dufour - Idiap Research Institute <cedric.dufour at idiap.ch>
+
+ * staging: lustre: fix quotactl permission denied (LU-4530)
+
+2014-02-04 Prakash Kamliya <pkamliya at codeaurora.org>
+
+ * staging: android: sync: Signal pt before sync_timeline object gets destroyed
+
+2014-02-05 H Hartley Sweeten <hsweeten at visionengravers.com>
+
+ * staging: comedi: adv_pci1710: fix analog output readback value
+
+2014-02-06 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * staging: r8188eu: memory corruption handling long ssids
+
+2014-02-07 Mark Rutland <mark.rutland at arm.com>
+
+ * arm64: defconfig: Expand default enabled features
+
+2014-02-02 Steve French <smfrench at gmail.com>
+
+ * retrieving CIFS ACLs when mounted with SMB2 fails dropping session
+
+2014-02-01 Steve French <smfrench at gmail.com>
+
+ * Add protocol specific operation for CIFS xattrs
+
+2014-02-04 Will Deacon <will.deacon at arm.com>
+
+ * arm64: asm: remove redundant "cc" clobbers
+
+2014-02-04 Will Deacon <will.deacon at arm.com>
+
+ * arm64: atomics: fix use of acquire + release for full barrier semantics
+
+2014-02-07 Hannes Reinecke <hare at suse.de>
+
+ * tty: Set correct tty name in 'active' sysfs attribute
+
+2014-01-07 Lars Poeschel <poeschel at lemonage.de>
+
+ * tty: n_gsm: Fix for modems with brk in modem status control
+
+2014-01-15 Paul Gortmaker <paul.gortmaker at windriver.com>
+
+ * drivers/tty/hvc: don't use module_init in non-modular hyp. console code
+
+2014-02-04 Paul Bolle <pebolle at tiscali.nl>
+
+ * raw: set range for MAX_RAW_DEVS
+
+2014-02-04 Paul Bolle <pebolle at tiscali.nl>
+
+ * raw: test against runtime value of max_raw_minors
+
+2014-02-06 Adam Thomson <Adam.Thomson.Opensource at diasemi.com>
+
+ * regulator: da9055: Remove use of regmap_irq_get_virq()
+
+2014-01-16 K. Y. Srinivasan <kys at microsoft.com>
+
+ * Drivers: hv: vmbus: Don't timeout during the initial connection with host
+
+2014-01-15 K. Y. Srinivasan <kys at microsoft.com>
+
+ * Drivers: hv: vmbus: Specify the target CPU that should receive notification
+
+2014-02-04 Nicolas Ferre <nicolas.ferre at atmel.com>
+
+ * ARM: at91: add Atmel's SAMA5D3 Xplained board
+
+2013-12-17 Boris BREZILLON <b.brezillon at overkiz.com>
+
+ * spi/atmel: document clock properties
+
+2013-12-17 Boris BREZILLON <b.brezillon at overkiz.com>
+
+ * mmc: atmel-mci: document clock properties
+
+2014-01-14 Bo Shen <voice.shen at atmel.com>
+
+ * ARM: at91: enable USB host on at91sam9n12ek board
+
+2014-01-16 Boris BREZILLON <b.brezillon at overkiz.com>
+
+ * ARM: at91/dt: fix sama5d3 ohci hclk clock reference
+
+2014-01-15 Jean-Jacques Hiblot <jjhiblot at traphandler.com>
+
+ * ARM: at91/dt: sam9263: fix compatibility string for the I2C
+
+2014-02-07 Martyn Welch <martyn.welch at ge.com>
+
+ * VME: Correct read/write alignment algorithm
+
+2014-02-07 Daniel Vetter <daniel.vetter at ffwll.ch>
+
+ * drm/i915: Disable dp aux irq on g4x
+
+2014-02-06 Hugh Dickins <hughd at google.com>
+
+ * cgroup: use an ordered workqueue for cgroup destruction
+
+2014-02-07 Jarkko Nikula <jarkko.nikula at linux.intel.com>
+
+ * ASoC: rt5640: Add ACPI ID for Intel Baytrail
+
+2014-02-07 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Fix mic capture on Sony VAIO Pro 11
+
+2014-02-07 David Henningsson <david.henningsson at canonical.com>
+
+ * ALSA: hda - Add a headset quirk for Dell XPS 13
+
+2014-01-30 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Fix inconsistent Mic mute LED
+
+2014-02-06 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Fix leftover ifdef checks after modularization
+
+2014-02-06 Eli Cohen <eli at dev.mellanox.co.il>
+
+ * IB/mlx5: Don't set "block multicast loopback" capability
+
+2014-02-06 Adam Thomson <Adam.Thomson.Opensource at diasemi.com>
+
+ * hwmon: (da9055) Remove use of regmap_irq_get_virq()
+
+2014-02-06 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branches 'acpi-cleanup' and 'acpi-video'
+
+2014-02-06 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'pm-cpufreq'
+
+2014-02-06 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branches 'acpi-pci-hotplug' and 'acpi-hotplug'
+
+2014-02-06 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'akpm' (patches from Andrew Morton)
+
+2014-02-06 KOSAKI Motohiro <kosaki.motohiro at jp.fujitsu.com>
+
+ * mm: __set_page_dirty uses spin_lock_irqsave instead of spin_lock_irq
+
+2014-02-06 Tang Chen <tangchen at cn.fujitsu.com>
+
+ * arch/x86/mm/numa.c: fix array index overflow when synchronizing nid to memblock.reserved.
+
+2014-02-06 Tang Chen <tangchen at cn.fujitsu.com>
+
+ * arch/x86/mm/numa.c: initialize numa_kernel_nodes in numa_clear_kernel_node_hotplug()
+
+2014-02-06 KOSAKI Motohiro <kosaki.motohiro at jp.fujitsu.com>
+
+ * mm: __set_page_dirty_nobuffers() uses spin_lock_irqsave() instead of spin_lock_irq()
+
+2014-02-06 Weijie Yang <weijie.yang at samsung.com>
+
+ * mm/swap: fix race on swap_info reuse between swapoff and swapon
+
+2014-02-06 Shaohua Li <shli at kernel.org>
+
+ * swap: add a simple detector for inappropriate swapin readahead
+
+2014-02-06 Zongxun Wang <wangzongxun at huawei.com>
+
+ * ocfs2: free allocated clusters if error occurs after ocfs2_claim_clusters
+
+2014-02-06 Randy Dunlap <rdunlap at infradead.org>
+
+ * Documentation/kernel-parameters.txt: fix memmap= language
+
+2014-02-06 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'sound-3.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2014-02-06 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2014-02-05 andrea.merello <andrea.merello at gmail.com>
+
+ * rtl8180: Add error check for pci_map_single return value in TX path
+
+2014-02-05 andrea.merello <andrea.merello at gmail.com>
+
+ * rtl8180: Add error check for pci_map_single return value in RX path
+
+2014-02-06 John W. Linville <linville at tuxdriver.com>
+
+ * Merge branch 'for-john' of git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211
+
+2014-02-03 Borislav Petkov <bp at suse.de>
+
+ * x86, microcode, AMD: Unify valid container checks
+
+2014-01-25 Sebastian Hesselbarth <sebastian.hesselbarth at gmail.com>
+
+ * clk: mvebu: kirkwood: maintain clock init order
+
+2014-01-25 Sebastian Hesselbarth <sebastian.hesselbarth at gmail.com>
+
+ * clk: mvebu: dove: maintain clock init order
+
+2014-01-25 Sebastian Hesselbarth <sebastian.hesselbarth at gmail.com>
+
+ * clk: mvebu: armada-xp: maintain clock init order
+
+2014-01-25 Sebastian Hesselbarth <sebastian.hesselbarth at gmail.com>
+
+ * clk: mvebu: armada-370: maintain clock init order
+
+2014-01-24 Sebastian Hesselbarth <sebastian.hesselbarth at gmail.com>
+
+ * irqchip: orion: clear stale interrupts in irq_startup
+
+2014-01-23 Sebastian Hesselbarth <sebastian.hesselbarth at gmail.com>
+
+ * irqchip: orion: use handle_edge_irq on bridge irqs
+
+2014-01-23 Sebastian Hesselbarth <sebastian.hesselbarth at gmail.com>
+
+ * irqchip: orion: clear bridge cause register on init
+
+2014-02-06 Peter Oberparleiter <oberpar at linux.vnet.ibm.com>
+
+ * x86, hweight: Fix BUG when booting with CONFIG_GCOV_PROFILE_ALL=y
+
+2014-02-04 Tim Kryger <tim.kryger at linaro.org>
+
+ * clocksource: Kona: Print warning rather than panic
+
+2014-01-24 Mikulas Patocka <mpatocka at redhat.com>
+
+ * time: Fix overflow when HZ is smaller than 60
+
+2014-02-06 Huei-Horng Yo <hiroshi at ghostsinthelab.org>
+
+ * HID: apple: add Apple wireless keyboard 2011 JIS model support
+
+2014-02-05 Laxman Dewangan <ldewangan at nvidia.com>
+
+ * pinctrl: tegra: return correct error type
+
+2014-02-05 Florian Vaussard <florian.vaussard at epfl.ch>
+
+ * pinctrl: do not init debugfs entries for unimplemented functionalities
+
+2014-02-05 Aaro Koskinen <aaro.koskinen at iki.fi>
+
+ * MIPS: fpu.h: Fix build when CONFIG_BUG is not set
+
+2014-02-06 Will Deacon <will.deacon at arm.com>
+
+ * arm64: barriers: allow dsb macro to take option parameter
+
+2014-02-05 Sebastian Ott <sebott at linux.vnet.ibm.com>
+
+ * s390/cio: improve cio_commit_config
+
+2014-02-04 Lars-Peter Clausen <lars at metafoo.de>
+
+ * gpio: consumer.h: Move forward declarations outside #ifdef
+
+2014-01-29 Johannes Berg <johannes.berg at intel.com>
+
+ * mac80211: fix virtual monitor interface iteration
+
+2014-02-01 Johannes Berg <johannes.berg at intel.com>
+
+ * mac80211: fix fragmentation code, particularly for encryption
+
+2014-01-30 Sujith Manoharan <c_manoha at qca.qualcomm.com>
+
+ * mac80211: Fix IBSS disconnect
+
+2014-01-27 Emmanuel Grumbach <emmanuel.grumbach at intel.com>
+
+ * mac80211: release the channel in error path in start_ap
+
+2014-01-22 Johannes Berg <johannes.berg at intel.com>
+
+ * cfg80211: send scan results from work queue
+
+2014-01-22 Johannes Berg <johannes.berg at intel.com>
+
+ * cfg80211: fix scan done race
+
+2014-01-30 Dave Airlie <airlied at redhat.com>
+
+ * drm/radeon: allow geom rings to be setup on r600/r700 (v2)
+
+2014-02-06 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'vmwgfx-fixes-3.14-2014-02-05' of git://people.freedesktop.org/~thomash/linux into drm-next
+
+2014-02-06 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'ttm-fixes-3.14-2014-02-05' of git://people.freedesktop.org/~thomash/linux into drm-next
+
+2014-02-05 Dave Airlie <airlied at redhat.com>
+
+ * drm/mgag200,ast,cirrus: fix regression with drm_can_sleep conversion
+
+2014-02-05 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-02-05 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'stable/for-linus-3.14-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
+
+2014-02-05 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'please-pull-ia64-syscalls' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux
+
+2014-02-05 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.infradead.org/users/willy/linux-nvme
+
+2014-02-05 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'regulator-v3.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
+
+2014-02-05 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
+
+2014-01-14 Matt Fleming <matt.fleming at intel.com>
+
+ * x86/efi: Allow mapping BGRT on x86-32
+
+2014-02-05 Ingo Molnar <mingo at kernel.org>
+
+ * x86: Disable CONFIG_X86_DECODER_SELFTEST in allmod/allyesconfigs
+
+2014-02-05 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * execve: use 'struct filename *' for executable name passing
+
+2014-01-29 Tejun Heo <tj at kernel.org>
+
+ * kernfs: make kernfs_deactivate() honor KERNFS_LOCKDEP flag
+
+2014-01-28 Christian Engelmayer <cengelma at gmx.at>
+
+ * usb: core: Fix potential memory leak adding dyn USBdevice IDs
+
+2014-02-02 Ulrich Hahn <uhahn at eanco.de>
+
+ * USB: ftdi_sio: add Tagsys RFID Reader IDs
+
+2014-02-04 Bjørn Mork <bjorn at mork.no>
+
+ * usb: qcserial: add Netgear Aircard 340U
+
+2014-01-30 Stephen Smalley <sds at tycho.nsa.gov>
+
+ * SELinux: Fix kernel BUG on empty security contexts.
+
+2014-01-28 Paul Moore <pmoore at redhat.com>
+
+ * selinux: add SOCK_DIAG_BY_FAMILY to the list of netlink message types
+
+2014-02-05 Krzysztof Kozlowski <k.kozlowski at samsung.com>
+
+ * regulator: max14577: Add missing of_node_put
+
+2014-02-04 Geert Uytterhoeven <geert+renesas at linux-m68k.org>
+
+ * DT: Add vendor prefix for Spansion Inc.
+
+2014-02-03 Geert Uytterhoeven <geert+renesas at linux-m68k.org>
+
+ * of/device: Nullify match table in of_match_device() for CONFIG_OF=n
+
+2014-01-30 Heiko Stuebner <heiko.stuebner at bqreaders.com>
+
+ * dt-bindings: add vendor-prefix for neonode
+
+2014-02-03 Kleber Sacilotto de Souza <klebers at linux.vnet.ibm.com>
+
+ * of: fix PCI bus match for PCIe slots
+
+2014-02-03 Rob Herring <robh at kernel.org>
+
+ * of: restructure for_each macros to fix compile warnings
+
+2014-01-28 Paul Moore <pmoore at redhat.com>
+
+ * Merge tag 'v3.13' into stable-3.14
+
+2014-02-05 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/ttm: Don't clear page metadata of imported sg pages
+
+2014-02-04 Colin Cross <ccross at android.com>
+
+ * security: select correct default LSM_MMAP_MIN_ADDR on arm on arm64
+
+2014-02-05 Catalin Marinas <catalin.marinas at arm.com>
+
+ * arm64: compat: Wire up new AArch32 syscalls
+
+2014-02-03 Nathan Lynch <nathan_lynch at mentor.com>
+
+ * arm64: vdso: update wtm fields for CLOCK_MONOTONIC_COARSE
+
+2014-02-05 Nathan Lynch <nathan_lynch at mentor.com>
+
+ * arm64: vdso: fix coarse clock handling
+
+2014-02-04 Toshi Kani <toshi.kani at hp.com>
+
+ * ACPI / hotplug: Fix panic on eject to ejected device
+
+2014-02-05 Mark Rutland <mark.rutland at arm.com>
+
+ * arm64: simplify pgd_alloc
+
+2014-02-05 Mark Rutland <mark.rutland at arm.com>
+
+ * arm64: fix typo: s/SERRROR/SERROR/
+
+2014-02-04 Catalin Marinas <catalin.marinas at arm.com>
+
+ * arm64: Invalidate the TLB when replacing pmd entries during boot
+
+2014-02-04 Laura Abbott <lauraa at codeaurora.org>
+
+ * arm64: Align CMA sizes to PAGE_SIZE
+
+2014-02-05 Vinayak Kale <vkale at apm.com>
+
+ * arm64: add DSB after icache flush in __flush_icache_all()
+
+2014-02-04 Axel Lin <axel.lin at ingics.com>
+
+ * gpio: tb10x: GPIO_TB10X needs to select GENERIC_IRQ_CHIP
+
+2014-02-04 Axel Lin <axel.lin at ingics.com>
+
+ * gpio: clps711x: Add module alias to support module auto loading
+
+2014-02-03 Martin Schwidefsky <schwidefsky at de.ibm.com>
+
+ * s390: fix kernel crash due to linkage stack instructions
+
+2014-01-30 Nitin A Kamble <nitin.a.kamble at intel.com>
+
+ * genirq: Generic irq chip requires IRQ_DOMAIN
+
+2014-01-24 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/ttm: Fix TTM object open regression
+
+2014-01-30 Dave Jones <davej at redhat.com>
+
+ * vmwgfx: Fix unitialized stack read in vmw_setup_otable_base
+
+2014-02-05 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Improve loopback path lookups for AD1983
+
+2014-02-05 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Reemit context bindings when necessary v2
+
+2014-01-31 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Detect old user-space drivers and set up legacy emulation v2
+
+2014-01-31 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Emulate legacy shaders on guest-backed devices v2
+
+2014-01-30 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Fix legacy surface reference size copyback
+
+2014-01-30 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Fix SET_SHADER_CONST emulation on guest-backed devices
+
+2014-01-30 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Fix regression caused by "drm/ttm: make ttm reservation calls behave like reservation calls"
+
+2014-01-30 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Don't commit staged bindings if execbuf fails
+
+2014-02-03 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Fix missing VREF setup for Mac Pro 1,1
+
+2014-02-02 Andy Zhou <azhou at nicira.com>
+
+ * openvswitch: Suppress error messages on megaflow updates
+
+2014-02-05 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Add missing mixer widget for AD1983
+
+2014-01-31 Pravin B Shelar <pshelar at nicira.com>
+
+ * openvswitch: Fix ovs_flow_free() ovs-lock assert.
+
+2014-01-23 Daniele Di Proietto <daniele.di.proietto at gmail.com>
+
+ * openvswitch: Fix ovs_dp_cmd_msg_size()
+
+2014-01-21 Andy Zhou <azhou at nicira.com>
+
+ * openvswitch: Fix kernel panic on ovs_flow_free
+
+2014-01-14 Thomas Graf <tgraf at suug.ch>
+
+ * openvswitch: Pad OVS_PACKET_ATTR_PACKET if linear copy was performed
+
+2014-02-03 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda/realtek - Avoid invalid COEFs for ALC271X
+
+2014-02-04 Andrew Lunn <andrew at lunn.ch>
+
+ * ata: sata_mv: Fix probe failures with optional phys
+
+2014-02-04 Andrew Lunn <andrew at lunn.ch>
+
+ * drivers: phy: Add support for optional phys
+
+2014-02-04 Andrew Lunn <andrew at lunn.ch>
+
+ * drivers: phy: Make NULL a valid phy reference
+
+2014-02-04 David Vrabel <david.vrabel at citrix.com>
+
+ * xen-netfront: handle backend CLOSED without CLOSING
+
+2014-02-04 Dmitry Kravkov <dmitry at broadcom.com>
+
+ * bnx2x: fix L2-GRE TCP issues
+
+2014-02-04 Bjørn Mork <bjorn at mork.no>
+
+ * net: qmi_wwan: add Netgear Aircard 340U
+
+2014-02-05 Dave Airlie <airlied at redhat.com>
+
+ * drm/mgag200: fix typo causing bw limits to be ignored on some chips
+
+2014-02-04 Fernando Luis Vazquez Cao <fernando_b1 at lab.ntt.co.jp>
+
+ * rtnetlink: fix oops in rtnl_link_get_slave_info_data_size
+
+2014-02-04 Stefan Sørensen <stefan.sorensen at spectralink.com>
+
+ * ptp: Allow selecting trigger/event index in testptp
+
+2014-02-04 Max Filippov <jcmvbkbc at gmail.com>
+
+ * net: ethoc: set up MII management bus clock
+
+2014-02-04 Max Filippov <jcmvbkbc at gmail.com>
+
+ * net: ethoc: don't advertise gigabit speed on attached PHY
+
+2014-02-03 Florian Fainelli <f.fainelli at gmail.com>
+
+ * net: phy: ensure Gigabit features are masked off if requested
+
+2014-02-03 Stefan Sørensen <stefan.sorensen at spectralink.com>
+
+ * net:phy:dp83640: Initialize PTP clocks at device init.
+
+2014-02-03 Stefan Sørensen <stefan.sorensen at spectralink.com>
+
+ * net:phy:dp83640: Do not hardcode timestamping event edge
+
+2014-01-12 Willy Tarreau <w at 1wt.eu>
+
+ * ARM: mvebu: dt: add missing alias 'eth3' on Armada XP mv78260
+
+2014-02-05 NeilBrown <neilb at suse.de>
+
+ * md/raid1: restore ability for check and repair to fix read errors.
+
+2014-01-21 Luis G.F <luisgf at luisgf.es>
+
+ * ACPI / battery: Fix incorrect sscanf() string in acpi_battery_init_alarm()
+
+2014-01-30 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * ACPI / proc: remove unneeded NULL check
+
+2014-01-30 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * ACPI / utils: remove a pointless NULL check
+
+2014-02-03 Mika Westerberg <mika.westerberg at linux.intel.com>
+
+ * ACPI / video: Add HP EliteBook Revolve 810 to the blacklist
+
+2014-02-04 Stanislaw Gruszka <sgruszka at redhat.com>
+
+ * pinctrl: protect pinctrl_list add
+
+2014-01-30 Alan Stern <stern at rowland.harvard.edu>
+
+ * usb-storage: enable multi-LUN scanning when needed
+
+2014-01-24 Kristóf Ralovich <kristof.ralovich at gmail.com>
+
+ * USB: simple: add Dynastream ANT USB-m Stick device support
+
+2014-01-21 Alan Stern <stern at rowland.harvard.edu>
+
+ * usb-storage: add unusual-devs entry for BlackBerry 9000
+
+2014-01-30 Alan Stern <stern at rowland.harvard.edu>
+
+ * usb-storage: restrict bcdDevice range for Super Top in Cypress ATACB
+
+2014-01-28 Josh Boyer <jwboyer at fedoraproject.org>
+
+ * usb: phy: move some error messages to debug
+
+2014-01-14 Bjørn Mork <bjorn at mork.no>
+
+ * usb: ftdi_sio: add Mindstorms EV3 console adapter
+
+2014-02-04 Paul Zimmerman <Paul.Zimmerman at synopsys.com>
+
+ * usb: dwc2: fix memory corruption in dwc2 driver
+
+2014-02-04 Paul Zimmerman <Paul.Zimmerman at synopsys.com>
+
+ * usb: dwc2: fix role switch breakage
+
+2014-02-04 Andre Heider <a.heider at gmail.com>
+
+ * usb: dwc2: bail out early when booting with "nousb"
+
+2014-02-04 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Merge tag 'for-usb-linus-2014-02-04' of git://git.kernel.org/pub/scm/linux/kernel/git/sarah/xhci into usb-linus
+
+2014-02-03 Dirk Brandewie <dirk.j.brandewie at intel.com>
+
+ * intel_pstate: Take core C0 time into account for core busy calculation
+
+2014-01-20 Axel Lin <axel.lin at ingics.com>
+
+ * spi: nuc900: Set SPI_LSB_FIRST for master->mode_bits if hw->pdata->lsb is true
+
+2014-02-04 Sujith Manoharan <c_manoha at qca.qualcomm.com>
+
+ * ath9k: Fix TX power calculation
+
+2014-02-04 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
+
+2014-01-31 Jani Nikula <jani.nikula at intel.com>
+
+ * drm/i915: demote opregion excessive timeout WARN_ONCE to DRM_INFO_ONCE
+
+2014-01-31 Jani Nikula <jani.nikula at intel.com>
+
+ * drm: add DRM_INFO_ONCE() to print a one-time DRM_INFO() message
+
+2014-02-04 Daniel Vetter <daniel.vetter at ffwll.ch>
+
+ * MAINTAINERS: Update drm/i915 git repo
+
+2014-01-21 Geert Uytterhoeven <geert+renesas at linux-m68k.org>
+
+ * spi: rspi: Document support for Renesas QSPI in Kconfig
+
+2014-02-04 Will Deacon <will.deacon at arm.com>
+
+ * arm64: vdso: prevent ld from aligning PT_LOAD segments to 64k
+
+2014-01-30 Michael Holzheu <holzheu at linux.vnet.ibm.com>
+
+ * s390/dump: Fix dump memory detection
+
+2014-02-04 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'regulator/fix/ab3100' and 'regulator/fix/s2mps11' into regulator-linus
+
+2014-02-04 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'regulator/fix/core' into regulator-linus
+
+2014-02-04 James Hogan <james.hogan at imgtec.com>
+
+ * MIPS: Wire up sched_setattr/sched_getattr syscalls
+
+2014-01-29 Manuel Lauss <manuel.lauss at gmail.com>
+
+ * MIPS: Alchemy: Fix DB1100 GPIO registration
+
+2014-01-29 Martin Bugge <marbugge at cisco.com>
+
+ * [media] adv7842: Composite free-run platfrom-data fix
+
+2014-01-23 Martin Bugge <marbugge at cisco.com>
+
+ * [media] v4l2-dv-timings: fix GTF calculation
+
+2014-01-17 Masanari Iida <standby24x7 at gmail.com>
+
+ * [media] hdpvr: Fix memory leak in debug
+
+2014-01-16 Antti Palosaari <crope at iki.fi>
+
+ * [media] af9035: add ID [2040:f900] Hauppauge WinTV-MiniStick 2
+
+2014-01-30 Dave Jones <davej at fedoraproject.org>
+
+ * [media] mxl111sf: Fix compile when CONFIG_DVB_USB_MXL111SF is unset
+
+2014-01-30 Dave Jones <davej at fedoraproject.org>
+
+ * [media] mxl111sf: Fix unintentional garbage stack read
+
+2014-01-30 Andi Shyti <andi at etezian.org>
+
+ * [media] cx24117: use a valid dev pointer for dev_err printout
+
+2014-01-30 Andi Shyti <andi at etezian.org>
+
+ * [media] cx24117: remove dead code in always 'false' if statement
+
+2014-01-29 Michael Krufky <mkrufky at linuxtv.org>
+
+ * [media] update Michael Krufky's email address
+
+2014-01-08 Ricardo Ribalda <ricardo.ribalda at gmail.com>
+
+ * [media] vb2: Check if there are buffers before streamon
+
+2014-01-03 Hans Verkuil <hverkuil at xs4all.nl>
+
+ * [media] Revert "[media] videobuf_vm_{open,close} race fixes"
+
+2013-12-20 Alexey Khoroshilov <khoroshilov at ispras.ru>
+
+ * [media] go7007-loader: fix usb_dev leak
+
+2013-12-19 Levente Kurusa <levex at linux.com>
+
+ * [media] media: bt8xx: add missing put_device call
+
+2014-01-29 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * UBI: fix some use after free bugs
+
+2014-02-04 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Fix silent output on Toshiba Satellite L40
+
+2014-02-03 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPI / hotplug / PCI: Fix bridge removal race vs dock events
+
+2014-02-03 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPI / hotplug / PCI: Fix bridge removal race in handle_hotplug_event()
+
+2014-02-03 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPI / hotplug / PCI: Scan root bus under the PCI rescan-remove lock
+
+2014-02-03 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPI / hotplug / PCI: Move PCI rescan-remove locking to hotplug_event()
+
+2014-02-03 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPI / hotplug / PCI: Remove entries from bus->devices in reverse order
+
+2014-01-29 Mukesh Rathor <mukesh.rathor at oracle.com>
+
+ * xen/pvh: set CR4 flags for APs
+
+2014-02-03 Tejun Heo <tj at kernel.org>
+
+ * nfs: include xattr.h from fs/nfs/nfs3proc.c
+
+2014-01-28 Li Zefan <lizefan at huawei.com>
+
+ * cpuset: update MAINTAINERS entry
+
+2014-01-28 Tejun Heo <tj at kernel.org>
+
+ * arm, pm, vmpressure: add missing slab.h includes
+
+2014-01-31 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: fix error handling in ceph_osdc_init()
+
+2014-02-01 Filipe David Borba Manana <fdmanana at gmail.com>
+
+ * Btrfs: use late_initcall instead of module_init
+
+2014-01-29 Filipe David Borba Manana <fdmanana at gmail.com>
+
+ * Btrfs: use btrfs_crc32c everywhere instead of libcrc32c
+
+2014-01-29 Josef Bacik <jbacik at fb.com>
+
+ * Btrfs: disable snapshot aware defrag for now
+
+2014-02-03 Tejun Heo <tj at kernel.org>
+
+ * sata_sil: apply MOD15WRITE quirk to TOSHIBA MK2561GSYN
+
+2014-01-25 Marek Belisko <marek at goldelico.com>
+
+ * of: add vendor prefix for Honeywell
+
+2013-12-18 Kumar Gala <galak at codeaurora.org>
+
+ * of: Update qcom vendor prefix description
+
+2014-01-29 Emilio López <emilio at elopez.com.ar>
+
+ * of: add vendor prefix for Allwinner Technology
+
+2014-02-03 Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+
+ * Revert "xen/grant-table: Avoid m2p_override during mapping"
+
+2014-02-01 Benjamin Tissoires <benjamin.tissoires at redhat.com>
+
+ * HID: fix buffer allocations
+
+2014-02-03 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: usb-audio: Add missing kconfig dependecy
+
+2014-01-27 Qipan Li <Qipan.Li at csr.com>
+
+ * pinctrl: sirf: correct the pin index of ac97_pins group
+
+2014-01-22 Chris Ruehl <chris.ruehl at gtsys.com.hk>
+
+ * pinctrl: imx27: fix offset calculation in imx_read_2bit
+
+2014-01-23 Tony Prisk <linux at prisktech.co.nz>
+
+ * pinctrl: vt8500: Change devicetree data parsing
+
+2014-01-22 Chris Ruehl <chris.ruehl at gtsys.com.hk>
+
+ * pinctrl: imx27: fix wrong offset to ICONFB
+
+2014-01-21 Nicolas Ferre <nicolas.ferre at atmel.com>
+
+ * pinctrl: at91: use locked variant of irq_set_handler
+
+2014-01-30 Guenter Roeck <linux at roeck-us.net>
+
+ * hwmon: (pmbus) Support per-page exponent in linear mode
+
+2014-02-01 Rob Herring <robh at kernel.org>
+
+ * ARM: fix HAVE_ARM_TWD selection for OMAP and shmobile
+
+2014-02-01 Rob Herring <robh at kernel.org>
+
+ * ARM: moxart: move DMA_OF selection to driver
+
+2014-02-01 Rob Herring <robh at kernel.org>
+
+ * ARM: hisi: fix kconfig warning on HAVE_ARM_TWD
+
+2014-02-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linus 3.14-rc1
+
+2014-02-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'parisc-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
+
+2014-01-29 Mikulas Patocka <mikulas at artax.karlin.mff.cuni.cz>
+
+ * hpfs: optimize quad buffer loading
+
+2014-01-29 Mikulas Patocka <mikulas at artax.karlin.mff.cuni.cz>
+
+ * hpfs: remember free space
+
+2014-01-31 Helge Deller <deller at gmx.de>
+
+ * parisc: add flexible mmap memory layout support
+
+2014-01-16 Guy Martin <gmsoft at tuxicoman.be>
+
+ * parisc: Make EWOULDBLOCK be equal to EAGAIN on parisc
+
+2014-01-31 Helge Deller <deller at gmx.de>
+
+ * parisc: convert uapi/asm/stat.h to use native types only
+
+2014-01-31 Helge Deller <deller at gmx.de>
+
+ * parisc: wire up sched_setattr and sched_getattr
+
+2014-01-31 Helge Deller <deller at gmx.de>
+
+ * parisc: fix cache-flushing
+
+2014-01-31 Helge Deller <deller at gmx.de>
+
+ * parisc/sti_console: prefer Linux fonts over built-in ROM fonts
+
+2014-02-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jdelvare/staging
+
+2014-02-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'slab/next' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux
+
+2014-02-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux
+
+2014-02-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2014-01-31 Keith Busch <keith.busch at intel.com>
+
+ * NVMe: Namespace use after free on surprise removal
+
+2014-01-25 Jean-Francois Moine <moinejf at free.fr>
+
+ * drm/i2c: tda998x: fix the ENABLE_SPACE register
+
+2014-02-02 Jean Delvare <khali at linux-fr.org>
+
+ * hwmon: Fix SENSORS_TMP102 dependencies to eliminate build errors
+
+2014-02-02 Jean Delvare <khali at linux-fr.org>
+
+ * hwmon: Fix SENSORS_LM75 dependencies to eliminate build errors
+
+2014-01-25 Jean-Francois Moine <moinejf at free.fr>
+
+ * drm/i2c: tda998x: set the PLL division factor in range 0..3
+
+2014-01-25 Jean-Francois Moine <moinejf at free.fr>
+
+ * drm/i2c: tda998x: force the page register at startup time
+
+2014-01-25 Jean-Francois Moine <moinejf at free.fr>
+
+ * drm/i2c: tda998x: free the CEC device on encoder_destroy
+
+2014-01-25 Jean-Francois Moine <moinejf at free.fr>
+
+ * drm/i2c: tda998x: check the CEC device creation
+
+2014-01-25 Jean-Francois Moine <moinejf at free.fr>
+
+ * drm/i2c: tda998x: fix bad value in the AIF
+
+2013-12-05 Emmanuel Grumbach <emmanuel.grumbach at intel.com>
+
+ * iwlwifi: mvm: don't allow A band if SKU forbids it
+
+2014-01-28 Emmanuel Grumbach <emmanuel.grumbach at intel.com>
+
+ * iwlwifi: mvm: BT Coex - disable BT when TXing probe request in scan
+
+2014-02-02 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2014-02-01 Petr Tesarik <ptesarik at suse.cz>
+
+ * x86: Fix the initialization of physnode_map
+
+2014-01-23 Andy Shevchenko <andriy.shevchenko at linux.intel.com>
+
+ * tools/power turbostat: introduce -s to dump counters
+
+2014-01-23 Andy Shevchenko <andriy.shevchenko at linux.intel.com>
+
+ * tools/power turbostat: remove unused command line option
+
+2014-02-01 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'misc' of git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild
+
+2014-01-28 Pali Rohár <pali.rohar at gmail.com>
+
+ * afs: proc cells and rootcell are writeable
+
+2014-01-31 Heiko Carstens <heiko.carstens at de.ibm.com>
+
+ * tile: remove compat_sys_lookup_dcookie declaration to fix compile error
+
+2014-02-01 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.samba.org/sfrench/cifs-2.6
+
+2014-02-01 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
+
+2014-02-01 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'staging-3.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
+
+2014-02-01 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Revert "PCI: Remove from bus_list and release resources in pci_release_dev()"
+
+2014-01-30 Krzysztof Kozlowski <k.kozlowski at samsung.com>
+
+ * power: max17040: Fix NULL pointer dereference when there is no platform_data
+
+2014-01-31 Olof Johansson <olof at lixom.net>
+
+ * ARM: multi_v7_defconfig: remove redundant entries and re-enable TI_EDMA
+
+2014-01-31 Olof Johansson <olof at lixom.net>
+
+ * ARM: multi_v7_defconfig: add mvebu drivers
+
+2013-12-05 Tim Kryger <tim.kryger at linaro.org>
+
+ * clocksource: kona: Add basic use of external clock
+
+2014-01-31 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'nfs-for-3.14-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
+
+2014-01-31 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'sound-fix-3.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2014-01-31 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending
+
+2014-01-27 Lorenzo Pieralisi <lorenzo.pieralisi at arm.com>
+
+ * drivers: bus: fix CCI driver kcalloc call parameters swap
+
+2014-01-07 Tim Kryger <tim.kryger at linaro.org>
+
+ * ARM: dts: bcm28155-ap: Fix Card Detection GPIO
+
+2014-01-31 Olof Johansson <olof at lixom.net>
+
+ * Merge tag 'renesas-dt-fixes2-for-v3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas into fixes
+
+2014-01-11 Fabio Estevam <fabio.estevam at freescale.com>
+
+ * ARM: multi_v7_defconfig: Select CONFIG_AT803X_PHY
+
+2014-01-09 Grygorii Strashko <grygorii.strashko at ti.com>
+
+ * ARM: keystone: config: fix build warning when CONFIG_DMADEVICES is not set
+
+2014-01-03 Barry Song <Baohua.Song at csr.com>
+
+ * MAINTAINERS: ARM: SiRF: use regex patterns to involve all SiRF drivers
+
+2014-01-31 Olof Johansson <olof at lixom.net>
+
+ * Merge tag 'mvebu-fixes-3.13-2' of git://git.infradead.org/linux-mvebu into fixes
+
+2013-12-02 Soren Brinkmann <soren.brinkmann at xilinx.com>
+
+ * ARM: dts: zynq: Add SDHCI nodes
+
+2014-01-31 Rob Herring <robh at kernel.org>
+
+ * ARM: hisi: don't select SMP
+
+2014-01-24 Stephen Warren <swarren at nvidia.com>
+
+ * ARM: tegra: rebuild tegra_defconfig to add DEBUG_FS
+
+2013-12-20 Stephen Warren <swarren at nvidia.com>
+
+ * ARM: multi_v7: copy most options from tegra_defconfig
+
+2014-01-29 Linus Walleij <linus.walleij at linaro.org>
+
+ * ARM: iop32x: fix power off handling for the EM7210 board
+
+2014-01-24 Linus Walleij <linus.walleij at linaro.org>
+
+ * ARM: integrator: restore static map on the CP
+
+2014-01-31 Oleg Drokin <green at linuxhacker.ru>
+
+ * Fix mountpoint reference leakage in linkat
+
+2014-01-31 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
+
+2014-01-31 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mattst88/alpha
+
+2014-01-29 Adrian Hunter <adrian.hunter at intel.com>
+
+ * perf buildid-cache: Check relocation when checking for existing kcore
+
+2014-01-29 Adrian Hunter <adrian.hunter at intel.com>
+
+ * perf tools: Adjust kallsyms for relocated kernel
+
+2014-01-29 Adrian Hunter <adrian.hunter at intel.com>
+
+ * perf tests: No need to set up ref_reloc_sym
+
+2014-01-29 Adrian Hunter <adrian.hunter at intel.com>
+
+ * perf symbols: Prevent the use of kcore if the kernel has moved
+
+2014-01-29 Adrian Hunter <adrian.hunter at intel.com>
+
+ * perf record: Get ref_reloc_sym from kernel map
+
+2014-01-29 Adrian Hunter <adrian.hunter at intel.com>
+
+ * perf machine: Set up ref_reloc_sym in machine__create_kernel_maps()
+
+2014-01-29 Adrian Hunter <adrian.hunter at intel.com>
+
+ * perf machine: Add machine__get_kallsyms_filename()
+
+2014-01-29 Adrian Hunter <adrian.hunter at intel.com>
+
+ * perf tools: Add kallsyms__get_function_start()
+
+2014-01-29 Adrian Hunter <adrian.hunter at intel.com>
+
+ * perf symbols: Fix symbol annotation for relocated kernel
+
+2014-01-27 Francesco Fusco <ffusco at redhat.com>
+
+ * perf tools: Fix include for non x86 architectures
+
+2014-01-29 Christoph Hellwig <hch at infradead.org>
+
+ * hfsplus: use xattr handlers for removexattr
+
+2014-01-30 Stephan Springl <springl-kernel at bfw-online.de>
+
+ * Typo in compat_sys_lseek() declaration
+
+2014-01-30 Andrew Ruder <andrew.ruder at elecsyscorp.com>
+
+ * fs/super.c: sync ro remount after blocking writers
+
+2014-01-27 Jeff Layton <jlayton at redhat.com>
+
+ * vfs: unexport the getname() symbol
+
+2014-01-31 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'v4l_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
+
+2014-01-31 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pm+acpi-3.14-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2014-01-22 Mikulas Patocka <mpatocka at redhat.com>
+
+ * alpha: fix broken network checksum
+
+2013-12-20 蔡正龙 <zhenglong.cai at cs2c.com.cn>
+
+ * alpha: Enable system-call auditing support.
+
+2014-01-31 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-31 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-31 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-30 Stephen Warren <swarren at nvidia.com>
+
+ * ALSA: hda/hdmi - allow PIN_OUT to be dynamically enabled
+
+2014-01-30 Krzysztof Kozlowski <k.kozlowski at samsung.com>
+
+ * regulator: s2mps11: Fix NULL pointer of_node value when using platform data
+
+2014-01-30 Peter Ujfalusi <peter.ujfalusi at ti.com>
+
+ * ASoC: davinci-evm: Add pm callbacks to platform driver
+
+2014-01-30 Peter Ujfalusi <peter.ujfalusi at ti.com>
+
+ * ASoC: davinci-mcasp: Consolidate pm_runtime_get/put() use in the driver
+
+2014-01-30 Peter Ujfalusi <peter.ujfalusi at ti.com>
+
+ * ASoC: davinci-mcasp: Configure xxTDM, xxFMT and xxFMCT registers synchronously
+
+2014-01-30 Peter Ujfalusi <peter.ujfalusi at ti.com>
+
+ * ASoC: davinci-mcasp: Harmonize the sub hw_params function names
+
+2014-01-31 Sachin Prabhu <sprabhu at redhat.com>
+
+ * cifs: Fix check for regular file in couldbe_mf_symlink()
+
+2014-01-27 Dave Jones <davej at fedoraproject.org>
+
+ * xen/pvh: Fix misplaced kfree from xlated_setup_gnttab_pages
+
+2014-01-22 Bob Liu <lliubbo at gmail.com>
+
+ * drivers: xen: deaggressive selfballoon driver
+
+2014-01-23 Zoltan Kiss <zoltan.kiss at citrix.com>
+
+ * xen/grant-table: Avoid m2p_override during mapping
+
+2014-01-27 Malahal Naineni <malahal at us.ibm.com>
+
+ * nfs: initialize the ACL support bits to zero.
+
+2014-01-30 Denis V. Lunev <den at openvz.org>
+
+ * ata: enable quirk from jmicron JMB350 for JMB394
+
+2014-01-28 Masanari Iida <standby24x7 at gmail.com>
+
+ * mm: Fix warning on make htmldocs caused by slab.c
+
+2014-01-24 Dave Hansen <dave.hansen at linux.intel.com>
+
+ * mm: slub: work around unneeded lockdep warning
+
+2014-01-28 Dave Hansen <dave.hansen at linux.intel.com>
+
+ * mm: sl[uo]b: fix misleading comments
+
+2014-01-15 Steve Capper <Steve.Capper at arm.com>
+
+ * arm64: mm: Introduce PTE_WRITE
+
+2014-01-15 Steve Capper <Steve.Capper at arm.com>
+
+ * arm64: mm: Remove PTE_BIT_FUNC macro
+
+2014-01-30 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
+
+2014-01-30 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'upstream-3.14-rc1' of git://git.infradead.org/linux-ubifs
+
+2014-01-30 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
+
+2014-01-28 Prarit Bhargava <prarit at redhat.com>
+
+ * x86, cpu hotplug: Fix stack frame warning in check_irq_vectors_for_cpu_disable()
+
+2014-01-30 Sarah Sharp <sarah.a.sharp at linux.intel.com>
+
+ * Revert "xhci: replace xhci_read_64() with readq()"
+
+2014-01-20 Emmanuel Grumbach <emmanuel.grumbach at intel.com>
+
+ * iwlwifi: mvm: don't leak a station when we drain
+
+2013-12-30 David Spinadel <david.spinadel at intel.com>
+
+ * iwlwifi: mvm: notify match found without filtering
+
+2014-01-23 Oren Givon <oren.givon at intel.com>
+
+ * iwlwifi: add more 7265 HW IDs
+
+2014-01-23 Emmanuel Grumbach <emmanuel.grumbach at intel.com>
+
+ * iwlwifi: mvm: print the version of the firmware when it asserts
+
+2014-01-16 Johannes Berg <johannes.berg at intel.com>
+
+ * iwlwifi: mvm: disable scheduled scan
+
+2014-01-20 Johannes Berg <johannes.berg at intel.com>
+
+ * iwlwifi: mvm: make local pointer non-static
+
+2014-01-30 Imre Deak <imre.deak at intel.com>
+
+ * drm/i915: vlv: fix DP PHY lockup due to invalid PP sequencer setup
+
+2014-01-29 Nicolas Pitre <nicolas.pitre at linaro.org>
+
+ * arm64: FIQs are unused
+
+2014-01-22 Harald Freudenberger <freude at linux.vnet.ibm.com>
+
+ * crypto: s390 - fix des and des3_ede ctr concurrency issue
+
+2014-01-22 Harald Freudenberger <freude at linux.vnet.ibm.com>
+
+ * crypto: s390 - fix des and des3_ede cbc concurrency issue
+
+2014-01-16 Harald Freudenberger <freude at linux.vnet.ibm.com>
+
+ * crypto: s390 - fix concurrency issue in aes-ctr mode
+
+2014-01-30 Julien Grall <julien.grall at linaro.org>
+
+ * xen/gnttab: Use phys_addr_t to describe the grant frame base address
+
+2014-01-20 Ian Campbell <ian.campbell at citrix.com>
+
+ * xen: swiotlb: handle sizeof(dma_addr_t) != sizeof(phys_addr_t)
+
+2014-01-28 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * target: Fix percpu_ref_put race in transport_lun_remove_cmd
+
+2014-01-24 Andy Grover <agrover at redhat.com>
+
+ * target/iscsi: Fix network portal creation race
+
+2014-01-30 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * ASoC: samsung: Fix trivial typo
+
+2014-01-30 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * ASoC: samsung: Remove invalid dependencies
+
+2014-01-30 Hui Wang <hui.wang at canonical.com>
+
+ * ALSA: hda - add headset mic detect quirks for another Dell laptop
+
+2014-01-30 Takashi Iwai <tiwai at suse.de>
+
+ * Merge branch 'xonar-dg' of git://git.alsa-project.org/alsa-kprivate into for-next
+
+2014-01-29 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * KVM: return an error code in kvm_vm_ioctl_register_coalesced_mmio()
+
+2014-01-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
+
+2014-01-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.infradead.org/users/vkoul/slave-dma
+
+2014-01-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/olof/chrome-platform
+
+2014-01-29 Sarah Sharp <sarah.a.sharp at linux.intel.com>
+
+ * Revert "xhci: replace xhci_write_64() with writeq()"
+
+2014-01-30 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-nouveau-next' of git://anongit.freedesktop.org/git/nouveau/linux-2.6 into drm-next
+
+2014-01-30 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-next-3.14' of git://people.freedesktop.org/~agd5f/linux into drm-next
+
+2014-01-29 Heiko Carstens <heiko.carstens at de.ibm.com>
+
+ * fs/compat: fix parameter handling for compat readv/writev syscalls
+
+2014-01-29 Andrew Morton <akpm at linux-foundation.org>
+
+ * mm/mempolicy.c: convert to pr_foo()
+
+2014-01-29 Mel Gorman <mgorman at suse.de>
+
+ * mm: numa: initialise numa balancing after jump label initialisation
+
+2014-01-29 Johannes Weiner <hannes at cmpxchg.org>
+
+ * mm/page-writeback.c: do not count anon pages as dirtyable memory
+
+2014-01-29 Johannes Weiner <hannes at cmpxchg.org>
+
+ * mm/page-writeback.c: fix dirty_balance_reserve subtraction from dirtyable memory
+
+2014-01-29 Aaron Tomlin <atomlin at redhat.com>
+
+ * mm: document improved handling of swappiness==0
+
+2014-01-29 Lad, Prabhakar <prabhakar.csengg at gmail.com>
+
+ * lib/genalloc.c: add check gen_pool_dma_alloc() if dma pointer is not NULL
+
+2014-01-23 Ilia Mirkin <imirkin at alum.mit.edu>
+
+ * drm/nouveau: resume display if any later suspend bits fail
+
+2014-01-29 Maarten Lankhorst <maarten.lankhorst at canonical.com>
+
+ * drm/nouveau: fix lock unbalance in nouveau_crtc_page_flip
+
+2013-11-14 Ben Skeggs <bskeggs at redhat.com>
+
+ * drm/nouveau: implement hooks for needed for drm vblank timestamping support
+
+2013-11-14 Ben Skeggs <bskeggs at redhat.com>
+
+ * drm/nouveau/disp: add a method to fetch info needed by drm vblank timestamping
+
+2014-01-24 Ben Skeggs <bskeggs at redhat.com>
+
+ * drm/nv50: fill in crtc mode struct members from crtc_mode_fixup
+
+2014-01-28 Chris Zankel <chris at zankel.net>
+
+ * xtensa: fix fast_syscall_spill_registers
+
+2014-01-28 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon/dce8: workaround for atom BlankCrtc table
+
+2014-01-27 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon/DCE4+: clear bios scratch dpms bit (v2)
+
+2014-01-27 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: set si_notify_smc_display_change properly
+
+2014-01-27 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: fix DAC interrupt handling on DCE5+
+
+2014-01-27 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: clean up active vram sizing
+
+2014-01-27 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: skip async dma init on r6xx
+
+2014-01-24 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon/runpm: don't runtime suspend non-PX cards
+
+2014-01-24 Roman Volkov <v1ron at mail.ru>
+
+ * ALSA: oxygen: Xonar DG(X): cleanup and minor changes
+
+2014-01-24 Roman Volkov <v1ron at mail.ru>
+
+ * ALSA: oxygen: Xonar DG(X): modify high-pass filter control
+
+2014-01-24 Roman Volkov <v1ron at mail.ru>
+
+ * ALSA: oxygen: Xonar DG(X): modify input select functions
+
+2014-01-24 Roman Volkov <v1ron at mail.ru>
+
+ * ALSA: oxygen: Xonar DG(X): modify capture volume functions
+
+2014-01-24 Roman Volkov <v1ron at mail.ru>
+
+ * ALSA: oxygen: Xonar DG(X): use headphone volume control
+
+2014-01-24 Peter Zijlstra <peterz at infradead.org>
+
+ * perf tools: Fix AAAAARGH64 memory barriers
+
+2014-01-22 Andrew Lunn <andrew at lunn.ch>
+
+ * ATA: SATA_MV: Add missing Kconfig select statememnt
+
+2014-01-25 Fabio Estevam <fabio.estevam at freescale.com>
+
+ * ata: pata_imx: Check the return value from clk_prepare_enable()
+
+2014-01-29 Paolo Bonzini <pbonzini at redhat.com>
+
+ * Merge branch 'kvm-ppc-next' of git://github.com/agraf/linux-2.6 into kvm-queue
+
+2014-01-29 Trond Myklebust <trond.myklebust at primarydata.com>
+
+ * NFSv4.1: Cleanup
+
+2014-01-29 Trond Myklebust <trond.myklebust at primarydata.com>
+
+ * NFSv4.1: Clean up nfs41_sequence_done
+
+2014-01-29 Trond Myklebust <trond.myklebust at primarydata.com>
+
+ * NFSv4: Fix a slot leak in nfs40_sequence_done
+
+2014-01-27 Paolo Bonzini <pbonzini at redhat.com>
+
+ * x86, kvm: correctly access the KVM_CPUID_FEATURES leaf at 0x40000101
+
+2014-01-27 Paolo Bonzini <pbonzini at redhat.com>
+
+ * x86, kvm: cache the base of the KVM cpuid leaves
+
+2014-01-29 Paolo Bonzini <pbonzini at redhat.com>
+
+ * kvm: x86: move KVM_CAP_HYPERV_TIME outside #ifdef
+
+2014-01-29 Andy Adamson <andros at netapp.com>
+
+ * NFSv4.1 free slot before resending I/O to MDS
+
+2014-01-29 Matthew Wilcox <matthew.r.wilcox at intel.com>
+
+ * NVMe: Correct uses of INIT_WORK
+
+2014-01-29 David Henningsson <david.henningsson at canonical.com>
+
+ * ALSA: hda - Add parameter for dumping processing coefficients
+
+2014-01-29 Chris Mason <clm at fb.com>
+
+ * Btrfs: fix spin_unlock in check_ref_cleanup
+
+2014-01-09 Chris Mason <clm at fb.com>
+
+ * Btrfs: setup inode location during btrfs_init_inode_locked
+
+2014-01-03 Chris Mason <clm at fb.com>
+
+ * Btrfs: don't use ram_bytes for uncompressed inline items
+
+2014-01-11 Filipe David Borba Manana <fdmanana at gmail.com>
+
+ * Btrfs: fix btrfs_search_slot_for_read backwards iteration
+
+2014-01-29 Wang Shilong <wangsl.fnst at cn.fujitsu.com>
+
+ * Btrfs: do not export ulist functions
+
+2014-01-29 Wang Shilong <wangsl.fnst at cn.fujitsu.com>
+
+ * Btrfs: rework ulist with list+rb_tree
+
+2014-01-28 Wang Shilong <wangsl.fnst at cn.fujitsu.com>
+
+ * Btrfs: fix memory leaks on walking backrefs failure
+
+2014-01-28 Filipe David Borba Manana <fdmanana at gmail.com>
+
+ * Btrfs: fix send file hole detection leading to data corruption
+
+2014-01-26 Wang Shilong <wangsl.fnst at cn.fujitsu.com>
+
+ * Btrfs: add a reschedule point in btrfs_find_all_roots()
+
+2014-01-24 Filipe David Borba Manana <fdmanana at gmail.com>
+
+ * Btrfs: make send's file extent item search more efficient
+
+2014-01-23 Wang Shilong <wangsl.fnst at cn.fujitsu.com>
+
+ * Btrfs: fix to catch all errors when resolving indirect ref
+
+2014-01-23 Wang Shilong <wangsl.fnst at cn.fujitsu.com>
+
+ * Btrfs: fix protection between walking backrefs and root deletion
+
+2014-01-23 Gui Hecheng <guihc.fnst at cn.fujitsu.com>
+
+ * btrfs: fix warning while merging two adjacent extents
+
+2014-01-22 Filipe David Borba Manana <fdmanana at gmail.com>
+
+ * Btrfs: fix infinite path build loops in incremental send
+
+2014-01-28 Arnd Bergmann <arnd at arndb.de>
+
+ * dmaengine: mmp_pdma: fix mismerge
+
+2014-01-29 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branches 'pm-cpufreq' and 'pm-devfreq'
+
+2014-01-29 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branches 'acpi-processor', 'acpi-hotplug', 'acpi-init', 'acpi-pm' and 'acpica'
+
+2014-01-27 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPI / scan: Clear match_driver flag in acpi_bus_trim()
+
+2014-01-28 Gerald Schaefer <gerald.schaefer at de.ibm.com>
+
+ * s390/appldata: restore missing init_virt_timer()
+
+2014-01-28 Ursula Braun <ursula.braun at de.ibm.com>
+
+ * s390/qdio: correct program-controlled interruption checking
+
+2014-01-26 Jose Alonso <joalonsof at gmail.com>
+
+ * s390/qdio: for_each macro correctness
+
+2014-01-29 Takashi Iwai <tiwai at suse.de>
+
+ * Merge tag 'asoc-v3.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2014-01-28 Chris Zankel <chris at zankel.net>
+
+ * xtensa: fix fast_syscall_spill_registers
+
+2014-01-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'for-linus-20140127' of git://git.infradead.org/linux-mtd
+
+2014-01-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/cooloney/linux-leds
+
+2014-01-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'tty-3.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
+
+2014-01-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'clk-for-linus-3.14-part2' of git://git.linaro.org/people/mike.turquette/linux
+
+2014-01-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'xfs-for-linus-v3.14-rc1-2' of git://oss.sgi.com/xfs/xfs
+
+2014-01-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * ceph: Fix up after semantic merge conflict
+
+2014-01-29 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'drm/for-3.14-rc1-20140123' of git://anongit.freedesktop.org/tegra/linux into drm-next
+
+2014-01-24 Dave Airlie <airlied at redhat.com>
+
+ * drm: ast,cirrus,mgag200: use drm_can_sleep
+
+2014-01-29 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'drm-intel-fixes-2014-01-28' of git://people.freedesktop.org/~danvet/drm-intel into drm-next
+
+2014-01-29 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-armada-fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-cubox into drm-next
+
+2014-01-29 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'omapdrm-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tomba/linux into drm-next
+
+2014-01-29 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'gma500-next' of git://github.com/patjak/drm-gma500 into drm-next
+
+2014-01-27 Mark Brown <broonie at linaro.org>
+
+ * ACPI / init: Flag use of ACPI and ACPI idioms for power supplies to regulator API
+
+2014-01-27 Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+
+ * acpi-cpufreq: De-register CPU notifier and free struct msr on error.
+
+2014-01-22 Anand Jain <Anand.Jain at oracle.com>
+
+ * btrfs: undo sysfs when open_ctree() fails
+
+2014-01-23 Benjamin Tissoires <benjamin.tissoires at redhat.com>
+
+ * HID: multitouch: add FocalTech FTxxxx support
+
+2014-01-28 Jeff Layton <jlayton at redhat.com>
+
+ * nfs: add memory barriers around NFS_INO_INVALID_DATA and NFS_INO_INVALIDATING
+
+2014-01-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
+
+2014-01-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.open-osd.org/linux-open-osd
+
+2014-01-28 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * ceph: cast PAGE_SIZE to size_t in ceph_sync_write()
+
+2014-01-28 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * ceph: fix dout() compile warnings in ceph_filemap_fault()
+
+2014-01-28 Tony Luck <tony.luck at intel.com>
+
+ * [IA64] Wire up new sched_setattr and sched_getattr syscalls
+
+2014-01-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'microblaze-3.14-rc1' of git://git.monstr.eu/linux-2.6-microblaze
+
+2014-01-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
+
+2014-01-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'cris-correction-for-3.14' of git://jni.nu/cris
+
+2014-01-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
+
+2014-01-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'nfs-for-3.14-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
+
+2014-01-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
+
+2014-01-28 Trond Myklebust <trond.myklebust at primarydata.com>
+
+ * NFS: Fix races in nfs_revalidate_mapping
+
+2014-01-28 Reyad Attiyat <reyad.attiyat at gmail.com>
+
+ * HID: microsoft: Add ID's for Surface Type/Touch Cover 2
+
+2014-01-27 Yufeng Shen <miletus at chromium.org>
+
+ * HID: usbhid: quirk for CY-TM75 75 inch Touch Overlay
+
+2014-01-28 Jesper Nilsson <jespern at axis.com>
+
+ * CRISv10: Readd missing header
+
+2014-01-27 Peter Zijlstra <peterz at infradead.org>
+
+ * sched: Make sched_class::get_rr_interval() optional
+
+2014-01-08 Patrik Jakobsson <patrik.r.jakobsson at gmail.com>
+
+ * drm/gma500: Lock struct_mutex around cursor updates
+
+2014-01-27 Mark Brown <broonie at linaro.org>
+
+ * regulator: core: Correct default return value for full constraints
+
+2014-01-13 Akash Goel <akash.goel at intel.com>
+
+ * drm/i915: Fix the offset issue for the stolen GEM objects
+
+2013-12-18 Huang Shijie <shijie8 at gmail.com>
+
+ * mtd: gpmi: add sanity check when mapping DMA for read_buf/write_buf
+
+2013-12-18 Huang Shijie <shijie8 at gmail.com>
+
+ * mtd: gpmi: allocate a proper buffer for non ECC read/write
+
+2014-01-21 Geert Uytterhoeven <geert+renesas at linux-m68k.org>
+
+ * mtd: m25p80: Set rx_nbits for Quad SPI transfers
+
+2014-01-21 Geert Uytterhoeven <geert+renesas at linux-m68k.org>
+
+ * mtd: m25p80: Enable Quad SPI read transfers for s25fl512s
+
+2014-01-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'akpm' (incoming from Andrew)
+
+2014-01-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
+
+2014-01-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
+
+2014-01-27 Ard Biesheuvel <ard.biesheuvel at linaro.org>
+
+ * firmware/google: drop 'select EFI' to avoid recursive dependency
+
+2014-01-27 Heiko Carstens <heiko.carstens at de.ibm.com>
+
+ * compat: fix sys_fanotify_mark
+
+2014-01-27 Joe Perches <joe at perches.com>
+
+ * checkpatch.pl: check for function declarations without arguments
+
+2014-01-27 Wanpeng Li <liwanp at linux.vnet.ibm.com>
+
+ * mm/migrate.c: fix setting of cpupid on page migration twice against normal page
+
+2014-01-15 Michael Ellerman <mpe at ellerman.id.au>
+
+ * powerpc: Implement arch_spin_is_locked() using arch_spin_value_unlocked()
+
+2014-01-15 Michael Ellerman <mpe at ellerman.id.au>
+
+ * powerpc: Add support for the optimised lockref implementation
+
+2014-01-02 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * leds: s3c24xx: Remove hardware.h inclusion
+
+2013-12-28 ZHAO Gang <gamerh2o at gmail.com>
+
+ * leds: replace list_for_each with list_for_each_entry
+
+2014-01-02 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * leds: kirkwood: Cleanup in header files
+
+2013-12-11 Olof Johansson <olof at lixom.net>
+
+ * leds: pwm: Remove a warning on non-DT platforms
+
+2013-12-11 Xiubo Li <Li.Xiubo at freescale.com>
+
+ * leds: leds-pwm: fix duty time overflow.
+
+2013-12-06 Alexander Shiyan <shc_work at mail.ru>
+
+ * leds: leds-mc13783: Remove unneeded mc13xxx_{un}lock
+
+2013-12-06 Alexander Shiyan <shc_work at mail.ru>
+
+ * leds: leds-mc13783: Remove duplicate field in platform data
+
+2013-12-08 Chen Gang <gang.chen.5i5j at gmail.com>
+
+ * drivers: leds: leds-tca6507: check CONFIG_GPIOLIB whether defined for 'gpio_base'
+
+2013-11-20 Milo Kim <milo.kim at ti.com>
+
+ * leds: lp5523: Support LED MUX configuration on running a pattern
+
+2013-11-20 Milo Kim <milo.kim at ti.com>
+
+ * leds: lp5521/5523: Fix multiple engine usage bug
+
+2013-11-12 NeilBrown <neilb at suse.de>
+
+ * LEDS: tca6507 - fix up some comments.
+
+2013-10-31 NeilBrown <neilb at suse.de>
+
+ * LEDS: tca6507: add device-tree support for GPIO configuration.
+
+2014-01-27 Matthew Wilcox <matthew.r.wilcox at intel.com>
+
+ * NVMe: Include device and queue numbers in interrupt name
+
+2014-01-27 Keith Busch <keith.busch at intel.com>
+
+ * NVMe: Add a pci_driver shutdown method
+
+2013-12-16 Keith Busch <keith.busch at intel.com>
+
+ * NVMe: Disable admin queue on init failure
+
+2014-01-27 Russell King <rmk+kernel at arm.linux.org.uk>
+
+ * DRM: armada: fix missing DRM_KMS_FB_HELPER select
+
+2013-12-03 Marek Szyprowski <m.szyprowski at samsung.com>
+
+ * [media] media: v4l2-dev: fix video device index assignment
+
+2014-01-24 Lv Zheng <lv.zheng at intel.com>
+
+ * ACPICA: Remove bool usage from ACPICA.
+
+2014-01-22 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * PM / devfreq: Disable Exynos4 driver build on multiplatform
+
+2014-01-27 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPI / PM: Use ACPI_COMPANION() to get ACPI companions of devices
+
+2014-01-27 Jiang Liu <jiang.liu at linux.intel.com>
+
+ * ACPI / scan: reduce log level of "ACPI: \_PR_.CPU4: failed to get CPU APIC ID"
+
+2014-01-27 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: support CEPH_FEATURE_OSD_CACHEPOOL feature
+
+2014-01-27 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: follow redirect replies from osds
+
+2014-01-27 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: rename ceph_osd_request::r_{oloc,oid} to r_base_{oloc,oid}
+
+2014-01-27 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: follow {read,write}_tier fields on osd request submission
+
+2014-01-27 Ilya Dryomov <ilya.dryomov at inktank.com>
+
+ * libceph: add ceph_pg_pool_by_id()
+
+2014-01-27 Mike Turquette <mturquette at linaro.org>
+
+ * clk: sort Makefile
+
+2014-01-27 Jeff Layton <jlayton at redhat.com>
+
+ * sunrpc: turn warn_gssd() log message into a dprintk()
+
+2014-01-27 Jeff Layton <jlayton at redhat.com>
+
+ * NFS: fix the handling of NFS_INO_INVALID_DATA flag in nfs_revalidate_mapping
+
+2014-01-24 Emilio López <emilio at elopez.com.ar>
+
+ * clk: sunxi: fix overflow when setting up divided factors
+
+2014-01-17 Stephen Boyd <sboyd at codeaurora.org>
+
+ * clk: Export more clk-provider functions
+
+2014-01-17 Stephen Boyd <sboyd at codeaurora.org>
+
+ * dt-bindings: qcom: Fix warning with duplicate dt define
+
+2014-01-25 Sebastian Hesselbarth <sebastian.hesselbarth at gmail.com>
+
+ * clk: si5351: remove variant from platform_data
+
+2014-01-25 Baruch Siach <baruch at tkos.co.il>
+
+ * spi: correct the transfer_one_message documentation wording
+
+2014-01-27 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'asoc/fix/arizona', 'asoc/fix/fsl', 'asoc/fix/omap', 'asoc/fix/samsung', 'asoc/fix/simple', 'asoc/fix/tlv320aic32x4' and 'asoc/fix/wm5100' into asoc-linus
+
+2014-01-27 Markus Pargmann <mpa at pengutronix.de>
+
+ * ASoC: tlv320aic32x4: Fix MICPGA input configuration
+
+2014-01-27 Markus Pargmann <mpa at pengutronix.de>
+
+ * ASoC: tlv320aic32x4: Fix mono playback
+
+2014-01-27 Larry Finger <Larry.Finger at lwfinger.net>
+
+ * staging: r8821ae: Enable build by reverting BROKEN marking
+
+2014-01-26 Larry Finger <Larry.Finger at lwfinger.net>
+
+ * staging: r8821ae: Fix build problems
+
+2014-01-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'trace-fixes-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
+
+2014-01-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'stable/for-linus-3.14-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb
+
+2014-01-27 Chris Wilson <chris at chris-wilson.co.uk>
+
+ * drm/i915: Decouple GPU error reporting from ring initialisation
+
+2014-01-27 Jingoo Han <jg1.han at samsung.com>
+
+ * arm64: mm: fix the function name in comment of cpu_do_switch_mm
+
+2014-01-08 Paul Mackerras <paulus at samba.org>
+
+ * KVM: PPC: Book3S PR: Cope with doorbell interrupts
+
+2014-01-08 Michael Neuling <mikey at neuling.org>
+
+ * KVM: PPC: Book3S HV: Add software abort codes for transactional memory
+
+2014-01-08 Michael Neuling <mikey at neuling.org>
+
+ * KVM: PPC: Book3S HV: Add new state for transactional memory
+
+2014-01-08 Michael Neuling <mikey at neuling.org>
+
+ * powerpc/Kconfig: Make TM select VSX and VMX
+
+2014-01-08 Anton Blanchard <anton at samba.org>
+
+ * KVM: PPC: Book3S HV: Basic little-endian guest support
+
+2014-01-08 Paul Mackerras <paulus at samba.org>
+
+ * KVM: PPC: Book3S HV: Add support for DABRX register on POWER7
+
+2014-01-08 Paul Mackerras <paulus at samba.org>
+
+ * KVM: PPC: Book3S HV: Prepare for host using hypervisor doorbells
+
+2014-01-08 Paul Mackerras <paulus at samba.org>
+
+ * KVM: PPC: Book3S HV: Handle new LPCR bits on POWER8
+
+2014-01-22 Avi Kivity <avi at cloudius-systems.com>
+
+ * perf tools: Demangle kernel and kernel module symbols too
+
+2014-01-27 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Merge branch 'master' into staging-next
+
+2014-01-24 Stephen Rothwell <sfr at canb.auug.org.au>
+
+ * Staging: rtl8812ae: disable due to build errors
+
+2014-01-24 Jan Kiszka <jan.kiszka at siemens.com>
+
+ * KVM: x86: Validate guest writes to MSR_IA32_APICBASE
+
+2014-01-24 Pankaj Dubey <pankaj.dubey at samsung.com>
+
+ * arm64: fix build error if DMA_CMA is enabled
+
+2013-11-22 Michal Simek <michal.simek at xilinx.com>
+
+ * microblaze: Add missing v8.50.a version
+
+2013-11-19 Michal Simek <michal.simek at xilinx.com>
+
+ * microblaze: Fix missing bracket in printk
+
+2013-11-19 Michal Simek <michal.simek at xilinx.com>
+
+ * microblaze: Fix compilation error for BS=0
+
+2013-08-23 Michal Simek <michal.simek at xilinx.com>
+
+ * microblaze: Disable stack protection from bootloader
+
+2013-11-20 Michal Simek <michal.simek at xilinx.com>
+
+ * microblaze: Define read/write{b,w,l}_relaxed MMIO
+
+2014-01-26 Steve French <smfrench at gmail.com>
+
+ * [CIFS] Fix SMB2 mounts so they don't try to set or get xattrs via cifs
+
+2014-01-26 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml
+
+2014-01-26 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'mmc-updates-for-3.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/cjb/mmc
+
+2014-01-26 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'for-3.14-merge-window' of git://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs
+
+2013-11-14 James Hogan <james.hogan at imgtec.com>
+
+ * um: hostfs: make functions static
+
+2014-01-15 Richard Weinberger <richard at nod.at>
+
+ * um: Include generic barrier.h
+
+2013-09-13 Richard Weinberger <richard at nod.at>
+
+ * um: Removed unused attributes from thread_struct
+
+2014-01-25 Baruch Siach <baruch at tkos.co.il>
+
+ * perf/doc: Remove mention of non-existent set_perf_event_pending() from design.txt
+
+2014-01-25 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'ipmi' (ipmi patches from Corey Minyard)
+
+2014-01-24 Corey Minyard <cminyard at mvista.com>
+
+ * ipmi: Cleanup error return
+
+2014-01-24 Xie XiuQi <xiexiuqi at huawei.com>
+
+ * ipmi: fix timeout calculation when bmc is disconnected
+
+2014-01-24 Xie XiuQi <xiexiuqi at huawei.com>
+
+ * ipmi: use USEC_PER_SEC instead of 1000000 for more meaningful
+
+2014-01-24 Michael Opdenacker <michael.opdenacker at free-electrons.com>
+
+ * ipmi: remove deprecated IRQF_DISABLED
+
+2014-01-25 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'spi-v3.14-2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi
+
+2014-01-25 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'regulator-v3.14-2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
+
+2014-01-25 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'regmap-v3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regmap
+
+2013-10-30 Max Filippov <jcmvbkbc at gmail.com>
+
+ * xtensa: save current register frame in fast_syscall_spill_registers_fixup
+
+2014-01-22 Max Filippov <jcmvbkbc at gmail.com>
+
+ * xtensa: introduce spill_registers_kernel macro
+
+2014-01-25 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
+
+2014-01-25 Stanislaw Gruszka <sgruszka at redhat.com>
+
+ * i915: remove pm_qos request on error
+
+2014-01-11 Sebastian Reichel <sre at debian.org>
+
+ * dt: binding documentation for bq2415x charger
+
+2014-01-24 Adrien Vergé <adrienverge at gmail.com>
+
+ * ALSA: hda - Fix silent output on MacBook Air 1,1
+
+2014-01-21 David Cohen <david.a.cohen at linux.intel.com>
+
+ * x86/intel/mid: Fix X86_INTEL_MID dependencies
+
+2014-01-25 Ingo Molnar <mingo at kernel.org>
+
+ * Merge branch 'linus' into x86/urgent
+
+2014-01-21 Toshi Kani <toshi.kani at hp.com>
+
+ * arch/x86/mm/srat: Skip NUMA_NO_NODE while parsing SLIT
+
+2014-01-21 Mel Gorman <mgorman at suse.de>
+
+ * mm, x86: Revisit tlb_flushall_shift tuning for page flushes except on IvyBridge
+
+2014-01-21 Mel Gorman <mgorman at suse.de>
+
+ * x86: mm: change tlb_flushall_shift for IvyBridge
+
+2014-01-21 Mel Gorman <mgorman at suse.de>
+
+ * x86/mm: Eliminate redundant page table walk during TLB range flushing
+
+2014-01-21 Mel Gorman <mgorman at suse.de>
+
+ * x86/mm: Clean up inconsistencies when flushing TLB ranges
+
+2014-01-21 Mel Gorman <mgorman at suse.de>
+
+ * mm, x86: Account for TLB flushes only when debugging
+
+2014-01-21 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * x86/AMD/NB: Fix amd_set_subcaches() parameter type
+
+2014-01-23 Aravind Gopalakrishnan <Aravind.Gopalakrishnan at amd.com>
+
+ * x86/quirks: Add workaround for AMD F16h Erratum792
+
+2014-01-25 Ingo Molnar <mingo at kernel.org>
+
+ * Merge branch 'timers/core' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks into timers/urgent
+
+2014-01-25 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2014-01-23 Sagi Grimberg <sagig at mellanox.com>
+
+ * target: Report bad sector in sense data for DIF errors
+
+2014-01-20 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * iscsi-target: Convert gfp_t parameter to task state bitmask
+
+2014-01-20 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * iscsi-target: Fix connection reset hang with percpu_ida_alloc
+
+2014-01-24 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pm+acpi-3.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2014-01-21 Eric Sandeen <sandeen at sandeen.net>
+
+ * xfs: allow logical-sector sized O_DIRECT
+
+2014-01-21 Eric Sandeen <sandeen at sandeen.net>
+
+ * xfs: rename xfs_buftarg structure members
+
+2014-01-21 Eric Sandeen <sandeen at sandeen.net>
+
+ * xfs: clean up xfs_buftarg
+
+2014-01-24 Eric Van Hensbergen <ericvh at gmail.com>
+
+ * 9p: update documentation
+
+2014-01-24 Daniel Vetter <daniel.vetter at ffwll.ch>
+
+ * Revert "drm/i915: Mask reserved bits in display/sprite address registers"
+
+2014-01-23 Vadim Rozenfeld <vrozenfe at redhat.com>
+
+ * KVM: x86: mark hyper-v vapic assist page as dirty
+
+2014-01-24 Lorenzo Pieralisi <Lorenzo.Pieralisi at arm.com>
+
+ * arm64: kernel: fix per-cpu offset restore on resume
+
+2014-01-24 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * ASoC: samsung: Remove dma.h inclusion
+
+2014-01-24 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * ASoC: samsung: Add NULL check in i2s.c
+
+2014-01-24 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * ASoC: Samsung: Fix build error due to missing dependency
+
+2014-01-24 Martin Schwidefsky <schwidefsky at de.ibm.com>
+
+ * s390/hypfs: add interface for diagnose 0x304
+
+2014-01-10 Xiubo Li <Li.Xiubo at freescale.com>
+
+ * ASoC: simple-card: fix simple card widgets routing property name usage
+
+2014-01-19 Kent Overstreet <kmo at daterainc.com>
+
+ * percpu_ida: Make percpu_ida_alloc + callers accept task state bitmask
+
+2014-01-23 Linus Walleij <linus.walleij at linaro.org>
+
+ * regulator: ab3100: cast fix
+
+2014-01-23 Masami Hiramatsu <masami.hiramatsu.pt at hitachi.com>
+
+ * perf symbols: Load map before using map->map_ip()
+
+2014-01-22 Josh Boyer <jwboyer at fedoraproject.org>
+
+ * perf tools: Fix traceevent plugin path definitions
+
+2014-01-23 Vadim Rozenfeld <vrozenfe at redhat.com>
+
+ * KVM: x86: mark hyper-v hypercall page as dirty
+
+2014-01-23 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Staging: rtl8821ae: add TODO file
+
+2014-01-23 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Staging: rtl8821ae: removed unused functions and variables
+
+2014-01-23 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Staging: rtl8821ae: rc.c: fix up function prototypes
+
+2014-01-21 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Staging: rtl8812ae: Add Realtek 8821 PCI WIFI driver
+
+2014-01-23 Steven Rostedt (Red Hat) <rostedt at goodmis.org>
+
+ * tracing: Check if tracing is enabled in trace_puts()
+
+2014-01-14 Boaz Harrosh <bharrosh at panasas.com>
+
+ * exofs: Print less in r4w
+
+2014-01-14 Boaz Harrosh <bharrosh at panasas.com>
+
+ * exofs: Allow corrupted directory entry to be empty file
+
+2014-01-13 Boaz Harrosh <bharrosh at panasas.com>
+
+ * exofs: Allow O_DIRECT open
+
+2014-01-23 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2014-01-13 Boaz Harrosh <bharrosh at panasas.com>
+
+ * ore: Don't crash on NULL bio in _clear_bio
+
+2014-01-23 Dmitry Torokhov <dmitry.torokhov at gmail.com>
+
+ * Merge branch 'next' into for-linus
+
+2014-01-21 Adrian Hunter <adrian.hunter at intel.com>
+
+ * mmc: sdhci-pci: Fix possibility of chip->fixes being null
+
+2014-01-09 Thierry Reding <treding at nvidia.com>
+
+ * drm/tegra: Obtain head number from DT
+
+2014-01-23 Mark Brown <broonie at linaro.org>
+
+ * Merge commit 'spi/topic/sc18is602' into spi-linus
+
+2014-01-23 Mark Brown <broonie at linaro.org>
+
+ * Merge commit 'spi/fix/rcar' into spi-linus
+
+2014-01-23 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'spi/topic/pxa2xx', 'spi/topic/qspi', 'spi/topic/s3c24xx', 'spi/topic/s3c64xx', 'spi/topic/sh', 'spi/topic/tegra114', 'spi/topic/tegra20-sflash', 'spi/topic/tegra20-slink', 'spi/topic/txx9' and 'spi/topic/xcomm' into spi-linus
+
+2014-01-23 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * ASoC: samsung: Fix Kconfig dependency
+
+2014-01-23 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * ASoC: wm5100: Export wm5100_detect
+
+2014-01-23 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'regulator/topic/s2mps11', 'regulator/topic/s5m8767', 'regulator/topic/stw481x-vmmc', 'regulator/topic/tps51632', 'regulator/topic/tps62360', 'regulator/topic/tps65910', 'regulator/topic/twl' and 'regulator/topic/wm831x' into regulator-linus
+
+2014-01-23 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'regulator/topic/db8500', 'regulator/topic/gpio', 'regulator/topic/lp3971', 'regulator/topic/lp3972', 'regulator/topic/max14577', 'regulator/topic/max77693', 'regulator/topic/mc13892', 'regulator/topic/pcf50633' and 'regulator/topic/pfuze100' into regulator-linus
+
+2014-01-23 Todd Previte <tprevite at gmail.com>
+
+ * drm/i915: VLV2 - Fix hotplug detect bits
+
+2014-01-23 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: Refactor slot assignment code
+
+2014-01-23 Paolo Bonzini <pbonzini at redhat.com>
+
+ * Merge tag 'kvm-s390-20140117' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into kvm-queue
+
+2014-01-23 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * ALSA: bits vs bytes bug in snd_card_create()
+
+2013-11-21 Boaz Harrosh <bharrosh at panasas.com>
+
+ * ore: Fix wrong math in allocation of per device BIO
+
+2014-01-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'modules-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux
+
+2014-01-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
+
+2014-01-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'dm-3.14-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
+
+2014-01-23 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-nouveau-next' of git://anongit.freedesktop.org/git/nouveau/linux-2.6 into drm-next
+
+2014-01-23 Ben Skeggs <bskeggs at redhat.com>
+
+ * drm/nouveau: call drm_vblank_cleanup() earlier
+
+2014-01-22 Ben Skeggs <bskeggs at redhat.com>
+
+ * drm/nouveau: create base display from common code
+
+2014-01-17 Ilia Mirkin <imirkin at alum.mit.edu>
+
+ * drm/nv50/gr: print mpc trap name when it's not an mp trap
+
+2014-01-17 Ilia Mirkin <imirkin at alum.mit.edu>
+
+ * drm/nv50/gr: update list of mp errors, make it a bitfield
+
+2014-01-16 Ilia Mirkin <imirkin at alum.mit.edu>
+
+ * drm/nv50/gr: add more trap names to print on error
+
+2014-01-19 Ilia Mirkin <imirkin at alum.mit.edu>
+
+ * drm/nouveau/devinit: lock/unlock crtc regs for all devices, not just pre-nv50
+
+2014-01-14 Maarten Lankhorst <maarten.lankhorst at canonical.com>
+
+ * drm/nouveau: hold mutex while syncing to kernel channel
+
+2014-01-14 Ilia Mirkin <imirkin at alum.mit.edu>
+
+ * drm/nv50-/devinit: prevent use of engines marked as disabled by hw/vbios
+
+2014-01-09 Ilia Mirkin <imirkin at alum.mit.edu>
+
+ * drm/nouveau/device: provide a way for devinit to mark engines as disabled
+
+2014-01-14 Ben Skeggs <bskeggs at redhat.com>
+
+ * drm/nouveau/devinit: tidy up the subdev class definition
+
+2013-12-23 Ben Skeggs <bskeggs at redhat.com>
+
+ * drm/nouveau/bar: tidy up the subdev and object class definitions
+
+2013-12-23 Ben Skeggs <bskeggs at redhat.com>
+
+ * drm/nouveau/instmem: tidy up the object class definition
+
+2013-12-23 Ben Skeggs <bskeggs at redhat.com>
+
+ * drm/nouveau/instmem: tidy up the subdev class definition
+
+2014-01-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'scsi-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
+
+2014-01-10 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * mtd: s3c2410: Merge plat/regs-nand.h into s3c2410.c
+
+2014-01-22 Boaz Harrosh <bharrosh at panasas.com>
+
+ * pnfs: Proper delay for NFS4ERR_RECALLCONFLICT in layout_get_done
+
+2014-01-21 Takashi Iwai <tiwai at suse.de>
+
+ * drm/cirrus: correct register values for 16bpp
+
+2014-01-21 Jeff Mahoney <jeffm at suse.com>
+
+ * drm/nouveau: make vga_switcheroo code depend on VGA_SWITCHEROO
+
+2014-01-21 Dave Airlie <airlied at redhat.com>
+
+ * drm/mgag200: on cards with < 2MB VRAM default to 16-bit
+
+2014-01-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pci-v3.14-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
+
+2014-01-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'trace-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
+
+2014-01-22 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Merge branch 'fixes' into tty-next
+
+2014-01-22 Thomas Gleixner <tglx at linutronix.de>
+
+ * Merge tag 'mvebu-irqchip-fixes-3.13' of git://git.infradead.org/linux-mvebu into irq/core
+
+2013-12-05 Daniel Tang <dt.tangr at gmail.com>
+
+ * irqchip: Add support for TI-NSPIRE irqchip
+
+2013-12-04 Magnus Damm <damm at opensource.se>
+
+ * irqchip: renesas-irqc: Enable mask on suspend
+
+2013-12-04 Magnus Damm <damm at opensource.se>
+
+ * irqchip: renesas-irqc: Use lazy disable
+
+2014-01-22 James Bottomley <JBottomley at Parallels.com>
+
+ * Merge branch 'misc' into for-linus
+
+2014-01-22 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * ASoC: samsung: smdk_wm8994: Fix build error
+
+2014-01-22 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * ASoC: Samsung: s3c-i2s-v2: Fix build error
+
+2014-01-22 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * ASoC: samsung: Fix build regressions due to gpio re-org
+
+2014-01-22 Jiri Kosina <jkosina at suse.cz>
+
+ * Merge branches 'for-3.13/upstream-fixes', 'for-3.14/i2c-hid', 'for-3.14/sensor-hub', 'for-3.14/sony' and 'for-3.14/upstream' into for-linus
+
+2014-01-22 Heiko Carstens <heiko.carstens at de.ibm.com>
+
+ * s390: wire up sys_sched_setattr/sys_sched_getattr
+
+2014-01-21 Heiko Carstens <heiko.carstens at de.ibm.com>
+
+ * s390/uapi: fix struct statfs64 definition
+
+2014-01-20 Heiko Carstens <heiko.carstens at de.ibm.com>
+
+ * s390/uaccess: remove dead extern declarations, make functions static
+
+2014-01-21 Kenneth Graunke <kenneth at whitecape.org>
+
+ * drm/i915: Allow reading the TIMESTAMP register on Gen8.
+
+2014-01-20 Chris Wilson <chris at chris-wilson.co.uk>
+
+ * drm/i915: Repeat evictions whilst pageflip completions are outstanding
+
+2014-01-20 Chris Wilson <chris at chris-wilson.co.uk>
+
+ * drm/i915: Wait for completion of pending flips when starved of fences
+
+2014-01-17 Imre Deak <imre.deak at intel.com>
+
+ * drm/i915: don't disable DP port after a failed link training
+
+2014-01-16 Imre Deak <imre.deak at intel.com>
+
+ * drm/i915: don't disable the DP port if the link is lost
+
+2014-01-16 Ville Syrjälä <ville.syrjala at linux.intel.com>
+
+ * drm/i915: Eliminate lots of WARNs when there's no backlight present
+
+2014-01-15 Dongmao Zhang <dmzhang at suse.com>
+
+ * dm log userspace: allow mark requests to piggyback on flush requests
+
+2014-01-21 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'akpm' (incoming from Andrew)
+
+2014-01-21 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata
+
+2014-01-21 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
+
+2014-01-21 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
+
+2014-01-21 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
+
+2014-01-21 Joonsoo Kim <iamjoonsoo.kim at lge.com>
+
+ * mm/migrate: remove unused function, fail_migrate_page()
+
+2014-01-21 Joonsoo Kim <iamjoonsoo.kim at lge.com>
+
+ * mm/migrate: remove putback_lru_pages, fix comment on putback_movable_pages
+
+2014-01-21 Joonsoo Kim <iamjoonsoo.kim at lge.com>
+
+ * mm/migrate: correct failure handling if !hugepage_migration_support()
+
+2014-01-21 Naoya Horiguchi <n-horiguchi at ah.jp.nec.com>
+
+ * mm/migrate: add comment about permanent failure path
+
+2014-01-22 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-vbl-timestamp' of git://gitorious.org/vsyrjala/linux into drm-next
+
+2014-01-22 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'topic/core-stuff' of git://people.freedesktop.org/~danvet/drm-intel into drm-next
+
+2014-01-22 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'vmwgfx-next' of git://people.freedesktop.org/~thomash/linux into drm-next
+
+2014-01-20 Fabio Estevam <fabio.estevam at freescale.com>
+
+ * ASoC: fsl_ssi: Do not print 'baud clock' error message all the time
+
+2014-01-20 Fabio Estevam <fabio.estevam at freescale.com>
+
+ * ASoC: fsl_ssi: We do support master mode now
+
+2014-01-21 Charles Keepax <ckeepax at opensource.wolfsonmicro.com>
+
+ * ASoC: wm5110: Extend SYSCLK patch file for rev D
+
+2014-01-21 Joe Thornber <ejt at redhat.com>
+
+ * dm space map metadata: fix bug in resizing of thin metadata
+
+2014-01-16 Namhyung Kim <namhyung at kernel.org>
+
+ * perf symbols: Fix JIT symbol resolution on heap
+
+2014-01-20 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Fix recently introduced sparse / smatch warnings and errors
+
+2014-01-20 Mark Brown <broonie at linaro.org>
+
+ * ASoC: omap: Make RX51 depend on GPIOLIB due to jack usage
+
+2014-01-02 Laura Abbott <lauraa at codeaurora.org>
+
+ * percpu: use VMALLOC_TOTAL instead of VMALLOC_END - VMALLOC_START
+
+2014-01-20 Daniel Vetter <daniel.vetter at ffwll.ch>
+
+ * drm/gem: Always initialize the gem object in object_init
+
+2014-01-20 Frank Praznik <frank.praznik at oh.rr.com>
+
+ * HID: sony: Use colors for the Dualshock 4 LED names
+
+2014-01-20 Frank Praznik <frank.praznik at oh.rr.com>
+
+ * HID: sony: Add annotated HID descriptor for the Dualshock 4
+
+2014-01-20 Ping Cheng <pinglinux at gmail.com>
+
+ * Input: wacom - add support for DTU-1031
+
+2014-01-16 Ping Cheng <pinglinux at gmail.com>
+
+ * Input: wacom - fix wacom->shared guards for dual input devices
+
+2014-01-16 Lothar Waßmann <LW at KARO-electronics.de>
+
+ * Input: edt_ft5x06 - use devm_* functions where appropriate
+
+2014-01-20 Arnaud Ebalard <arno at natisbad.org>
+
+ * ARM: mvebu: fix compilation warning on Armada 370 (i.e. non-SMP)
+
+2014-01-20 Ben Dooks <ben.dooks at codethink.co.uk>
+
+ * ARM: shmobile: r8a7790.dtsi: ficx i2c[0-3] clock reference
+
+2014-01-21 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-next-3.14' of git://people.freedesktop.org/~agd5f/linux into drm-next
+
+2014-01-20 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: add UVD support for OLAND
+
+2014-01-17 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: fix minor typos in si_dpm.c
+
+2014-01-16 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: set the full cache bit for fences on r7xx+
+
+2014-01-16 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: fix surface sync in fence on cayman (v2)
+
+2014-01-07 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon/dpm: disable mclk switching on desktop RV770
+
+2014-01-16 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: fix endian handling in radeon_atom_init_mc_reg_table
+
+2014-01-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-x32-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86/mpx' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-kaslr-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20 Gregory CLEMENT <gregory.clement at free-electrons.com>
+
+ * ARM: mvebu: Fix kernel hang in mvebu_soc_id_init() when of_iomap failed
+
+2014-01-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-platform-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-microcode-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-intel-mid-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-efi-kexec-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-cpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-cleanups-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-build-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-11 Brian Norris <computersforpeace at gmail.com>
+
+ * mtd: mtdram: add missing 'const'
+
+2014-01-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-apic-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-12-04 Brian Norris <computersforpeace at gmail.com>
+
+ * mtd: m25p80: assign default read command
+
+2014-01-07 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * mtd: nuc900_nand: remove redundant return value check of platform_get_resource()
+
+2014-01-07 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * mtd: plat_nand: remove redundant return value check of platform_get_resource()
+
+2014-01-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-03 Huang Shijie <b32955 at freescale.com>
+
+ * mtd: nand: add Intel manufacturer ID
+
+2013-12-26 Huang Shijie <b32955 at freescale.com>
+
+ * mtd: nand: add SanDisk manufacturer ID
+
+2013-12-25 Huang Shijie <b32955 at freescale.com>
+
+ * mtd: nand: add support for Samsung K9LCG08U0B
+
+2014-01-13 Rodolfo Giometti <giometti at linux.it>
+
+ * mtd: nand: pxa3xx: Add support for 2048 bytes page size devices
+
+2014-01-17 Stephane Eranian <eranian at google.com>
+
+ * perf stat: Fix memory corruption of xyarray when cpumask is used
+
+2014-01-20 Stephane Eranian <eranian at google.com>
+
+ * perf evsel: Remove duplicate member zeroing after free
+
+2014-01-20 Alan Cox <alan at linux.intel.com>
+
+ * perf tools: Ensure sscanf does not overrun the "mem" field
+
+2014-01-17 Stephane Eranian <eranian at google.com>
+
+ * perf stat: fix NULL pointer reference bug with event unit
+
+2014-01-13 Baruch Siach <baruch at tkos.co.il>
+
+ * perf tools: Add support for the xtensa architecture
+
+2014-01-20 Stanislav Fomichev <stfomichev at yandex-team.ru>
+
+ * perf session: Free cpu_map in perf_session__cpu_bitmap
+
+2014-01-20 Stanislav Fomichev <stfomichev at yandex-team.ru>
+
+ * perf timechart: Fix wrong SVG height
+
+2014-01-20 Ingo Molnar <mingo at kernel.org>
+
+ * x86/intel/mpx: Remove unused LWP structure
+
+2014-01-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-20 Alan <gnomes at lxorguk.ukuu.org.uk>
+
+ * x86, doc, kconfig: Fix dud URL for Microcode data
+
+2013-12-19 Vandana Kannan <vandana.kannan at intel.com>
+
+ * drm/edid: Populate picture aspect ratio for CEA modes
+
+2013-11-29 Thomas Wood <thomas.wood at intel.com>
+
+ * drm/edid: parse the list of additional 3D modes
+
+2013-11-29 Thomas Wood <thomas.wood at intel.com>
+
+ * drm/edid: split VIC display mode lookup into a separate function
+
+2013-11-28 Damien Lespiau <damien.lespiau at intel.com>
+
+ * drm: Make the connector mode_valid() vfunc return a drm_mode_status enum
+
+2014-01-16 Pavel Shilovsky <piastry at etersoft.ru>
+
+ * CIFS: Cleanup cifs open codepath
+
+2014-01-16 Pavel Shilovsky <piastry at etersoft.ru>
+
+ * CIFS: Remove extra indentation in cifs_sfu_type
+
+2014-01-16 Pavel Shilovsky <piastry at etersoft.ru>
+
+ * CIFS: Cleanup cifs_mknod
+
+2014-01-16 Pavel Shilovsky <piastry at etersoft.ru>
+
+ * CIFS: Cleanup CIFSSMBOpen
+
+2014-01-13 Adrian Hunter <adrian.hunter at intel.com>
+
+ * mmc: sdhci-pci: Fix BYT sd card getting stuck in runtime suspend
+
+2013-11-14 Adrian Hunter <adrian.hunter at intel.com>
+
+ * mmc: sdhci: Allow for long command timeouts
+
+2014-01-20 Takashi Iwai <tiwai at suse.de>
+
+ * Merge tag 'asoc-v3.14-3' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2013-12-26 Andrew Lunn <andrew at lunn.ch>
+
+ * SATA: MV: Add support for the optional PHYs
+
+2014-01-17 Frank Praznik <frank.praznik at oh.rr.com>
+
+ * HID: sony: Cache the output report for the Dualshock 4
+
+2014-01-20 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/topic/compress' into asoc-next
+
+2014-01-20 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/topic/dma' into asoc-next
+
+2014-01-20 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/topic/dapm' into asoc-next
+
+2013-10-29 Ville Syrjälä <ville.syrjala at linux.intel.com>
+
+ * drm/i915: Add a kludge for DSL incrementing too late and ISR not working
+
+2013-10-28 Ville Syrjälä <ville.syrjala at linux.intel.com>
+
+ * drm/radeon: Move the early vblank IRQ fixup to radeon_get_crtc_scanoutpos()
+
+2013-10-28 Ville Syrjälä <ville.syrjala at linux.intel.com>
+
+ * drm: Pass 'flags' from the caller to .get_scanout_position()
+
+2013-10-28 Ville Syrjälä <ville.syrjala at linux.intel.com>
+
+ * drm: Fix vblank timestamping constants for interlaced modes
+
+2014-01-20 Takashi Iwai <tiwai at suse.de>
+
+ * Merge branch 'for-next' into for-linus
+
+2013-10-28 Ville Syrjälä <ville.syrjala at linux.intel.com>
+
+ * drm/i915: Fix scanoutpos calculations for interlaced modes
+
+2013-10-26 Ville Syrjälä <ville.syrjala at linux.intel.com>
+
+ * drm: Change {pixel,line,frame}dur_ns from s64 to int
+
+2013-10-27 Ville Syrjälä <ville.syrjala at linux.intel.com>
+
+ * drm: Use crtc_clock in drm_calc_timestamping_constants()
+
+2013-10-27 Ville Syrjälä <ville.syrjala at linux.intel.com>
+
+ * drm/radeon: Populate crtc_clock in radeon_atom_get_tv_timings()
+
+2013-10-26 Ville Syrjälä <ville.syrjala at linux.intel.com>
+
+ * drm: Simplify the math in drm_calc_timestamping_constants()
+
+2013-12-02 Sachin Prabhu <sprabhu at redhat.com>
+
+ * cifs: Add support for follow_link on dfs shares under posix extensions
+
+2013-11-27 Sachin Prabhu <sprabhu at redhat.com>
+
+ * cifs: move unix extension call to cifs_query_symlink()
+
+2013-11-25 Sachin Prabhu <sprabhu at redhat.com>
+
+ * cifs: Re-order M-F Symlink code
+
+2013-11-25 Sachin Prabhu <sprabhu at redhat.com>
+
+ * cifs: Add create MFSymlinks to protocol ops struct
+
+2013-11-25 Sachin Prabhu <sprabhu at redhat.com>
+
+ * cifs: use protocol specific call for query_mf_symlink()
+
+2013-11-25 Sachin Prabhu <sprabhu at redhat.com>
+
+ * cifs: Rename MF symlink function names
+
+2013-11-25 Sachin Prabhu <sprabhu at redhat.com>
+
+ * cifs: Rename and cleanup open_query_close_cifs_symlink()
+
+2014-01-19 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.13
+
+2014-01-19 Ilia Mirkin <imirkin at alum.mit.edu>
+
+ * drm/nouveau/mxm: fix null deref on load
+
+2014-01-19 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'acpi-3.13-fixup' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2014-01-20 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-intel-next' of git://people.freedesktop.org/~danvet/drm-intel into drm-next
+
+2014-01-20 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'vmwgfx-next-2014-01-17' of git://people.freedesktop.org/~thomash/linux into drm-next
+
+2014-01-17 Al Viro <viro at ZenIV.linux.org.uk>
+
+ * tracing: Fix buggered tee(2) on tracing_pipe
+
+2014-01-19 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-19 Max Filippov <jcmvbkbc at gmail.com>
+
+ * xtensa: export ccount_freq
+
+2014-01-16 Max Filippov <jcmvbkbc at gmail.com>
+
+ * xtensa: fix warning '"CONFIG_OF" is not defined'
+
+2014-01-16 Stephen Boyd <sboyd at codeaurora.org>
+
+ * clocksource: Timer-sun5i: Switch to sched_clock_register()
+
+2014-01-19 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
+
+2014-01-19 Hans de Goede <hdegoede at redhat.com>
+
+ * sata-highbank: Remove unnecessary ahci_platform.h include
+
+2014-01-10 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * iscsi-target: Pre-allocate more tags to avoid ack starvation
+
+2013-12-16 Dirk Brandewie <dirk.j.brandewie at intel.com>
+
+ * turbostat: Add option to report joules consumed per sample
+
+2013-12-03 Len Brown <len.brown at intel.com>
+
+ * turbostat: run on HSX
+
+2013-08-20 Josh Triplett <josh at joshtriplett.org>
+
+ * turbostat: Add a .gitignore to ignore the compiled turbostat binary
+
+2013-08-20 Josh Triplett <josh at joshtriplett.org>
+
+ * turbostat: Clean up error handling; disambiguate error messages; use err and errx
+
+2013-08-20 Josh Triplett <josh at joshtriplett.org>
+
+ * turbostat: Factor out common function to open file and exit on failure
+
+2013-08-20 Josh Triplett <josh at joshtriplett.org>
+
+ * turbostat: Add a helper to parse a single int out of a file
+
+2013-08-20 Josh Triplett <josh at joshtriplett.org>
+
+ * turbostat: Check return value of fscanf
+
+2013-08-20 Josh Triplett <josh at joshtriplett.org>
+
+ * turbostat: Use GCC's CPUID functions to support PIC
+
+2013-08-20 Josh Triplett <josh at joshtriplett.org>
+
+ * turbostat: Don't attempt to printf an off_t with %zx
+
+2013-08-20 Josh Triplett <josh at joshtriplett.org>
+
+ * turbostat: Don't put unprocessed uapi headers in the include path
+
+2014-01-17 Ezequiel Garcia <ezequiel.garcia at free-electrons.com>
+
+ * ARM: kirkwood: kirkwood_pm_init() should return void
+
+2014-01-18 Bjorn Helgaas <bhelgaas at google.com>
+
+ * Merge branch 'eisa' into next
+
+2014-01-18 SeongJae Park <sj38.park at gmail.com>
+
+ * cgroup: trivial style updates
+
+2014-01-17 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Revert "ACPI: Add BayTrail SoC GPIO and LPSS ACPI IDs"
+
+2014-01-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2013-11-26 Bing Zhao <bzhao at marvell.com>
+
+ * mmc: sdio: add a quirk for broken SDIO_CCCR_INTx polling
+
+2013-12-23 Aisheng Dong <b29396 at freescale.com>
+
+ * mmc: sdhci: fix lockdep error in tuning routine
+
+2014-01-17 Heiko Carstens <heiko.carstens at de.ibm.com>
+
+ * s390/bpf,jit: fix 32 bit divisions, use unsigned divide instructions
+
+2014-01-16 Eric Dumazet <edumazet at google.com>
+
+ * parisc: fix SO_MAX_PACING_RATE typo
+
+2014-01-16 Hannes Frederic Sowa <hannes at stressinduktion.org>
+
+ * ipv6: simplify detection of first operational link-local address on interface
+
+2014-01-16 Christoph Paasch <christoph.paasch at uclouvain.be>
+
+ * tcp: metrics: Avoid duplicate entries with the same destination-IP
+
+2014-01-16 Gerald Schaefer <gerald.schaefer at de.ibm.com>
+
+ * net: rds: fix per-cpu helper usage
+
+2014-01-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
+
+2014-01-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
+
+2014-01-17 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * clk: samsung: Remove unneeded semicolon
+
+2014-01-13 Zhangfei Gao <zhangfei.gao at linaro.org>
+
+ * mmc: dw_mmc: k3: remove clk_table
+
+2014-01-17 Bjorn Helgaas <bhelgaas at google.com>
+
+ * Revert "EISA: Initialize device before its resources"
+
+2014-01-17 Bjorn Helgaas <bhelgaas at google.com>
+
+ * Revert "EISA: Log device resources in dmesg"
+
+2014-01-17 Stephen Boyd <sboyd at codeaurora.org>
+
+ * clk: qcom: Fix modular build
+
+2013-08-02 Tero Kristo <t-kristo at ti.com>
+
+ * ARM: OMAP3: use DT clock init if DT data is available
+
+2013-07-19 Tero Kristo <t-kristo at ti.com>
+
+ * ARM: AM33xx: remove old clock data and link in new clock init code
+
+2013-11-21 Tero Kristo <t-kristo at ti.com>
+
+ * ARM: AM43xx: Enable clock init
+
+2014-01-12 Qiaowei Ren <qiaowei.ren at intel.com>
+
+ * x86, mpx: Add MPX related opcodes to the x86 opcode map
+
+2014-01-15 Arun Shamanna Lakshmi <aruns at nvidia.com>
+
+ * ASoC: dapm: Fix double prefix addition
+
+2014-01-17 Liam Girdwood <liam.r.girdwood at linux.intel.com>
+
+ * ASoC: compress: Add suport for DPCM into compressed audio
+
+2014-01-17 Liam Girdwood <liam.r.girdwood at linux.intel.com>
+
+ * ASoC: DPCM: make some DPCM API calls non static for compressed usage
+
+2014-01-17 Axel Lin <axel.lin at ingics.com>
+
+ * spi: sc18is602: Convert to use bits_per_word_mask
+
+2014-01-14 Frederic Weisbecker <fweisbec at gmail.com>
+
+ * perf tools: Remove unnecessary callchain cursor state restore on unmatch
+
+2014-01-14 Frederic Weisbecker <fweisbec at gmail.com>
+
+ * perf callchain: Spare double comparison of callchain first entry
+
+2013-06-04 Cornelia Huck <cornelia.huck at de.ibm.com>
+
+ * KVM: s390: virtio-ccw: Handle command rejects.
+
+2014-01-16 Frank Praznik <frank.praznik at oh.rr.com>
+
+ * HID: sony: Map gyroscopes and accelerometers to axes
+
+2014-01-16 Frank Praznik <frank.praznik at oh.rr.com>
+
+ * HID: sony: Fix spacing in the device definitions.
+
+2014-01-16 Frank Praznik <frank.praznik at oh.rr.com>
+
+ * HID: sony: Use standard output reports instead of raw reports to send data to the Dualshock 4.
+
+2014-01-16 Frank Praznik <frank.praznik at oh.rr.com>
+
+ * HID: sony: Use separate identifiers for USB and Bluetooth connected Dualshock 4 controllers.
+
+2014-01-17 Jakob Bornecrantz <jakob at vmware.com>
+
+ * drm/vmwgfx: Invalidate surface on non-readback unbind
+
+2014-01-16 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Silence the device command verifier
+
+2014-01-15 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Implement 64-bit Otable- and MOB binding v2
+
+2014-01-15 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Fix surface framebuffer check for guest-backed surfaces
+
+2014-01-16 David S. Miller <davem at davemloft.net>
+
+ * Merge tag 'batman-adv-fix-for-davem' of git://git.open-mesh.org/linux-merge
+
+2013-12-20 Lukasz Majewski <l.majewski at samsung.com>
+
+ * thermal: exynos: boost: Automatic enable/disable of BOOST feature (at Exynos4412)
+
+2014-01-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
+
+2014-01-16 Hugh Dickins <hughd at google.com>
+
+ * percpu_counter: unbreak __percpu_counter_add()
+
+2014-01-16 Fengguang Wu <fengguang.wu at intel.com>
+
+ * x86, intel_mid: Replace memcpy with struct assignment
+
+2014-01-16 Mika Westerberg <mika.westerberg at linux.intel.com>
+
+ * e1000e: Fix compilation warning when !CONFIG_PM_SLEEP
+
+2014-01-16 David Cohen <david.a.cohen at linux.intel.com>
+
+ * x86, intel-mid: Return proper error code from get_gpio_by_name()
+
+2014-01-16 David Cohen <david.a.cohen at linux.intel.com>
+
+ * x86, intel-mid: Check get_gpio_by_name() error code on platform code
+
+2014-01-16 David Cohen <david.a.cohen at linux.intel.com>
+
+ * x86, intel-mid: sfi_handle_*_dev() should check for pdata error code
+
+2014-01-14 Kharlamov Alexey <derlafff at yandex.ru>
+
+ * HID: hid-holtek-mouse: add new a070 mouse
+
+2014-01-14 Srinivas Pandruvada <srinivas.pandruvada at linux.intel.com>
+
+ * HID: hid-sensor-hub: Fix buggy report descriptors
+
+2014-01-08 Benjamin Tisssoires <benjamin.tissoires at redhat.com>
+
+ * HID: logitech-dj: Fix USB 3.0 issue
+
+2014-01-11 Frank Praznik <frank.praznik at oh.rr.com>
+
+ * HID: sony: Rename worker function
+
+2014-01-16 Daniel Vetter <daniel.vetter at ffwll.ch>
+
+ * Merge commit origin/master into drm-intel-next
+
+2014-01-16 H. Peter Anvin <hpa at linux.intel.com>
+
+ * x86, tsc, apic: Unbreak static (MSR) calibration when CONFIG_X86_LOCAL_APIC=n
+
+2014-01-14 Frederic Weisbecker <fweisbec at gmail.com>
+
+ * perf tools: Do proper comm override error handling
+
+2014-01-16 Masami Hiramatsu <masami.hiramatsu.pt at hitachi.com>
+
+ * perf symbols: Export elf_section_by_name and reuse
+
+2014-01-16 Masami Hiramatsu <masami.hiramatsu.pt at hitachi.com>
+
+ * perf probe: Release all dynamically allocated parameters
+
+2014-01-16 Masami Hiramatsu <masami.hiramatsu.pt at hitachi.com>
+
+ * perf probe: Release allocated probe_trace_event if failed
+
+2014-01-16 Namhyung Kim <namhyung at kernel.org>
+
+ * perf tools: Add 'build-test' make target
+
+2014-01-16 Namhyung Kim <namhyung at kernel.org>
+
+ * tools lib traceevent: Unregister handler when xen plugin is unloaded
+
+2014-01-16 Namhyung Kim <namhyung at kernel.org>
+
+ * tools lib traceevent: Unregister handler when scsi plugin is unloaded
+
+2014-01-15 Mike Snitzer <snitzer at redhat.com>
+
+ * dm cache: add policy name to status output
+
+2014-01-16 Catalin Marinas <catalin.marinas at arm.com>
+
+ * Revert "arm64: Fix memory shareability attribute for ioremap_wc/cache"
+
+2014-01-16 Peter Zijlstra <peterz at infradead.org>
+
+ * sched: Fix __sched_setscheduler() nice test
+
+2014-01-16 Sebastian Hesselbarth <sebastian.hesselbarth at gmail.com>
+
+ * ARM: orion: provide C-style interrupt handler for MULTI_IRQ_HANDLER
+
+2014-01-16 Takashi Iwai <tiwai at suse.de>
+
+ * Merge tag 'asoc-v3.13-rc8-2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2014-01-16 Tejun Heo <tj at kernel.org>
+
+ * libata: disable LPM for some WD SATA-I devices
+
+2014-01-16 Xiubo Li <Li.Xiubo at freescale.com>
+
+ * ASoC: core: Fix possible NULL pointer dereference of pcm->config
+
+2014-01-16 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'asoc/fix/adau1701' and 'asoc/fix/tlv320aic32x4' into asoc-linus
+
+2014-01-16 Takashi Iwai <tiwai at suse.de>
+
+ * Merge tag 'asoc-v3.14-2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-next
+
+2014-01-16 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'asoc/topic/adsp', 'asoc/topic/atmel', 'asoc/topic/bcm2835', 'asoc/topic/docs', 'asoc/topic/fsl', 'asoc/topic/generic', 'asoc/topic/kirkwood', 'asoc/topic/mc13783', 'asoc/topic/mxs', 'asoc/topic/nuc900', 'asoc/topic/sai', 'asoc/topic/sh', 'asoc/topic/ssm2602', 'asoc/topic/tlv320aic3x', 'asoc/topic/twl4030', 'asoc/topic/ux500', 'asoc/topic/width' and 'asoc/topic/x86' into for-tiwai
+
+2014-01-16 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/topic/arizona' into for-tiwai
+
+2014-01-16 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/topic/pcm' into for-tiwai
+
+2014-01-16 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/topic/dma' into for-tiwai
+
+2014-01-16 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/topic/dapm' into for-tiwai
+
+2014-01-16 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/topic/core' into for-tiwai
+
+2014-01-16 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'asoc/fix/adau1701' and 'asoc/fix/tlv320aic32x4' into for-tiwai
+
+2014-01-16 Hui Wang <hui.wang at canonical.com>
+
+ * ALSA: hda - add headset mic detect quirks for some Dell machines
+
+2014-01-16 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
+
+2014-01-16 Ingo Molnar <mingo at kernel.org>
+
+ * Merge branch 'perf/urgent' into perf/core
+
+2014-01-15 Robert Richter <rric at kernel.org>
+
+ * perf/x86/amd/ibs: Fix waking up from S3 for AMD family 10h
+
+2014-01-10 Peter Zijlstra <peterz at infradead.org>
+
+ * x86, mm, perf: Allow recursive faults from interrupts
+
+2013-10-21 Bin Gao <bin.gao at intel.com>
+
+ * x86, tsc: Add static (MSR) TSC calibration on Intel Atom SoCs
+
+2014-01-13 Prarit Bhargava <prarit at redhat.com>
+
+ * x86: Add check for number of available vectors before CPU down
+
+2014-01-16 Dave Airlie <airlied at redhat.com>
+
+ * drm/mgag200: fix oops in cursor code.
+
+2014-01-15 Mike Snitzer <snitzer at redhat.com>
+
+ * dm thin: fix pool feature parsing
+
+2014-01-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branches 'sched-urgent-for-linus' and 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging
+
+2014-01-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-arm
+
+2014-01-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'writeback-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/linux
+
+2014-01-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'i2c/for-current' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux
+
+2014-01-15 Eric Dumazet <edumazet at google.com>
+
+ * bpf: do not use reciprocal divide
+
+2014-01-15 Ivan Vecera <ivecera at redhat.com>
+
+ * be2net: add dma_mapping_error() check for dma_map_page()
+
+2014-01-15 Yuval Mintz <yuvalmin at broadcom.com>
+
+ * bnx2x: Don't release PCI bars on shutdown
+
+2014-01-14 Richard Weinberger <richard at nod.at>
+
+ * net,via-rhine: Fix tx_timeout handling
+
+2014-01-15 Markus Pargmann <mpa at pengutronix.de>
+
+ * ASoC: tlv320aic32x4: Fix regmap range_min
+
+2014-01-15 Kevin Hilman <khilman at linaro.org>
+
+ * sched/nohz: Fix overflow error in scheduler_tick_max_deferment()
+
+2014-01-15 Mark Brown <broonie at linaro.org>
+
+ * ASoC: core: Return -ENOTSUPP from set_sysclk() if no operation provided
+
+2014-01-15 Marek Lindner <mareklindner at neomailbox.ch>
+
+ * batman-adv: fix batman-adv header overhead calculation
+
+2013-12-16 David Cohen <david.a.cohen at linux.intel.com>
+
+ * x86, intel-mid: Remove deprecated X86_MDFLD and X86_WANT_INTEL_MID configs
+
+2013-12-16 David Cohen <david.a.cohen at linux.intel.com>
+
+ * x86, intel-mid: Add Merrifield platform support
+
+2013-12-16 Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy at linux.intel.com>
+
+ * x86, intel-mid: Add Clovertrail platform support
+
+2013-12-16 David Cohen <david.a.cohen at linux.intel.com>
+
+ * x86, intel-mid: Move Medfield code out of intel-mid.c core file
+
+2014-01-15 H. Peter Anvin <hpa at zytor.com>
+
+ * x86, apic: Make disabled_cpu_apicid static read_mostly, fix typos
+
+2014-01-15 Mark Rutland <mark.rutland at arm.com>
+
+ * tools lib traceevent: fix pointer-integer size mismatch
+
+2014-01-14 Namhyung Kim <namhyung at kernel.org>
+
+ * perf hists: Convert hist entry functions to use struct he_stat
+
+2014-01-14 Namhyung Kim <namhyung at kernel.org>
+
+ * perf tools: Factor out sample__resolve_callchain()
+
+2014-01-14 Namhyung Kim <namhyung at kernel.org>
+
+ * perf tools: Remove symbol_conf.use_callchain check
+
+2014-01-15 Mark Rutland <mark.rutland at arm.com>
+
+ * perf: tools: Fix cross building
+
+2014-01-15 Namhyung Kim <namhyung at kernel.org>
+
+ * tools lib traceevent: Make plugin unload function receive pevent
+
+2014-01-15 Namhyung Kim <namhyung at kernel.org>
+
+ * tools lib traceevent: Get rid of die() finally!!
+
+2014-01-15 Namhyung Kim <namhyung at kernel.org>
+
+ * tools lib traceevent: Get rid of malloc_or_die() in trace_seq_init()
+
+2014-01-15 Bjorn Helgaas <bhelgaas at google.com>
+
+ * Merge branch 'pci/reset' into next
+
+2014-01-15 Bjorn Helgaas <bhelgaas at google.com>
+
+ * Merge branch 'pci/locking' into next
+
+2014-01-15 Bjorn Helgaas <bhelgaas at google.com>
+
+ * Merge branch 'pci/misc' into next
+
+2014-01-14 Alex Williamson <alex.williamson at redhat.com>
+
+ * vfio-pci: Use pci "try" reset interface
+
+2014-01-14 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * PCI: Check parent kobject in pci_destroy_dev()
+
+2014-01-10 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * xen/pcifront: Use global PCI rescan-remove locking
+
+2014-01-15 Rafael J. Wysocki <rjw at rjwysocki.net>
+
+ * powerpc/eeh: Use global PCI rescan-remove locking
+
+2014-01-15 HATAYAMA Daisuke <d.hatayama at jp.fujitsu.com>
+
+ * x86, apic, kexec: Add disable_cpu_apicid kernel parameter
+
+2014-01-14 Mauro Carvalho Chehab <m.chehab at samsung.com>
+
+ * [media] rc-core: reuse device numbers
+
+2014-01-14 Mauro Carvalho Chehab <m.chehab at samsung.com>
+
+ * [media] em28xx-cards: properly initialize the device bitmap
+
+2014-01-14 Monam Agarwal <monamagarwal123 at gmail.com>
+
+ * [media] Staging: media: Fix line length exceeding 80 characters in as102_drv.c
+
+2014-01-15 Andrew Jones <drjones at redhat.com>
+
+ * kvm: x86: fix apic_base enable check
+
+2014-01-15 Borislav Petkov <bp at suse.de>
+
+ * x86, cpu, amd: Fix a shadowed variable situation
+
+2014-01-14 Arun Shamanna Lakshmi <aruns at nvidia.com>
+
+ * ASoC: dapm: Change prototype of soc_widget_read
+
+2014-01-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'akpm' (incoming from Andrew)
+
+2014-01-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'md/3.13-fixes' of git://neil.brown.name/md
+
+2014-01-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2014-01-14 Ming Lei <tom.leiming at gmail.com>
+
+ * lib/percpu_counter.c: fix __percpu_counter_add()
+
+2014-01-14 Qais Yousef <qais.yousef at imgtec.com>
+
+ * crash_dump: fix compilation error (on MIPS at least)
+
+2014-01-14 Mikulas Patocka <mpatocka at redhat.com>
+
+ * mm: fix crash when using XFS on loopback
+
+2014-01-14 Aaro Koskinen <aaro.koskinen at iki.fi>
+
+ * MIPS: fix blast_icache32 on loongson2
+
+2014-01-14 Huacai Chen <chenhc at lemote.com>
+
+ * MIPS: fix case mismatch in local_r4k_flush_icache_range()
+
+2014-01-14 Andreas Rohner <andreas.rohner at gmx.net>
+
+ * nilfs2: fix segctor bug that causes file system corruption
+
+2014-01-15 Ingo Molnar <mingo at kernel.org>
+
+ * Merge branch 'clockevents/3.13-fixes' of git://git.linaro.org/people/daniel.lezcano/linux into timers/urgent
+
+2014-01-15 Vasant Hegde <hegdevasant at linux.vnet.ibm.com>
+
+ * powerpc/powernv: Call OPAL sync before kexec'ing
+
+2014-01-15 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-nouveau-next' of git://git.freedesktop.org/git/nouveau/linux-2.6 into drm-fixes
+
+2014-01-13 Aneesh Kumar K.V <aneesh.kumar at linux.vnet.ibm.com>
+
+ * powerpc/thp: Fix crash on mremap
+
+2014-01-14 Ben Skeggs <bskeggs at redhat.com>
+
+ * drm/nouveau: fix null ptr dereferences on some boards
+
+2014-01-13 Mikulas Patocka <mpatocka at redhat.com>
+
+ * dm sysfs: fix a module unload race
+
+2014-01-13 Mikulas Patocka <mpatocka at redhat.com>
+
+ * dm snapshot: use dm-bufio prefetch
+
+2014-01-13 Mikulas Patocka <mpatocka at redhat.com>
+
+ * dm snapshot: use dm-bufio
+
+2014-01-14 Jitendra Kalsaria <jitendra.kalsaria at qlogic.com>
+
+ * qlge: Fix vlan netdev features.
+
+2014-01-13 Hannes Frederic Sowa <hannes at stressinduktion.org>
+
+ * net: avoid reference counter overflows on fib_rules in multicast forwarding
+
+2014-01-12 Peter Korsgaard <peter at korsgaard.com>
+
+ * dm9601: add USB IDs for new dm96xx variants
+
+2014-01-12 Michael S. Tsirkin <mst at redhat.com>
+
+ * MAINTAINERS: add virtio-dev ML for virtio
+
+2014-01-15 Borislav Petkov <bp at suse.de>
+
+ * x86, cpu, amd: Add workaround for family 16h, erratum 793
+
+2014-01-14 Bjorn Helgaas <bhelgaas at google.com>
+
+ * PCI: Fix pci_check_and_unmask_intx() comment typos
+
+2014-01-11 Christian Engelmayer <cengelma at gmx.at>
+
+ * ieee802154: Fix memory leak in ieee802154_add_iface()
+
+2013-12-16 Alex Williamson <alex.williamson at redhat.com>
+
+ * PCI: Add pci_try_reset_function(), pci_try_reset_slot(), pci_try_reset_bus()
+
+2014-01-11 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: samsung: Remove SND_DMAENGINE_PCM_FLAG_NO_RESIDUE flag
+
+2014-01-11 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: axi-{spdif,i2s}: Remove SND_DMAENGINE_PCM_FLAG_NO_RESIDUE flag
+
+2014-01-11 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: generic-dmaengine-pcm: Check DMA residue granularity
+
+2014-01-11 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: generic-dmaengine-pcm: Check NO_RESIDUE flag at runtime
+
+2014-01-11 Lars-Peter Clausen <lars at metafoo.de>
+
+ * dma: pl330: Set residue_granularity
+
+2014-01-11 Lars-Peter Clausen <lars at metafoo.de>
+
+ * dma: Indicate residue granularity in dma_slave_caps
+
+2014-01-14 Xiubo Li <Li.Xiubo at freescale.com>
+
+ * ASoC: simple-card: fix one bug to writing to the platform data
+
+2014-01-11 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: pcm: Use snd_pcm_rate_mask_intersect() helper
+
+2014-01-11 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ALSA: Add helper function for intersecting two rate masks
+
+2014-01-11 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: s6000: Don't mix SNDRV_PCM_RATE_CONTINUOUS with specific rates
+
+2014-01-11 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: fsl: Don't mix SNDRV_PCM_RATE_CONTINUOUS with specific rates
+
+2014-01-10 Max Filippov <jcmvbkbc at gmail.com>
+
+ * xtensa: implement ndelay
+
+2014-01-10 Max Filippov <jcmvbkbc at gmail.com>
+
+ * xtensa: clean up udelay
+
+2013-11-28 Baruch Siach <baruch at tkos.co.il>
+
+ * xtensa: enable HAVE_PERF_EVENTS
+
+2013-12-29 Baruch Siach <baruch at tkos.co.il>
+
+ * xtensa: remap io area defined in device tree
+
+2013-12-23 Baruch Siach <baruch at tkos.co.il>
+
+ * xtensa: support default device tree buses
+
+2013-12-23 Baruch Siach <baruch at tkos.co.il>
+
+ * xtensa: initialize device tree clock sources
+
+2013-12-25 Max Filippov <jcmvbkbc at gmail.com>
+
+ * xtensa: xtfpga: fix definitions of platform devices
+
+2013-12-01 Baruch Siach <baruch at tkos.co.il>
+
+ * xtensa: standardize devicetree cpu compatible strings
+
+2013-11-17 Baruch Siach <baruch at tkos.co.il>
+
+ * xtensa: avoid duplicate of IO range definitions
+
+2013-11-11 Baruch Siach <baruch at tkos.co.il>
+
+ * xtensa: fix ATOMCTL register documentation
+
+2013-12-12 Kirill Tkhai <tkhai at yandex.ru>
+
+ * xtensa: Enable irqs after cpu is set online
+
+2013-11-10 Max Filippov <jcmvbkbc at gmail.com>
+
+ * xtensa: ISS: raise network polling rate to 10 times/sec
+
+2013-12-10 Kees Cook <keescook at chromium.org>
+
+ * x86, kaslr: Clarify RANDOMIZE_BASE_MAX_OFFSET
+
+2013-12-07 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * x86, kaslr: Remove unused including <linux/version.h>
+
+2014-01-13 Mikulas Patocka <mpatocka at redhat.com>
+
+ * dm snapshot: prepare for switch to using dm-bufio
+
+2014-01-14 Jean Delvare <khali at linux-fr.org>
+
+ * hwmon: (coretemp) Fix truncated name of alarm attributes
+
+2014-01-14 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
+
+2014-01-13 Stephen Warren <swarren at nvidia.com>
+
+ * i2c: Re-instate body of i2c_parent_is_i2c_adapter()
+
+2014-01-14 Steven Rostedt (Red Hat) <rostedt at goodmis.org>
+
+ * tracing: Have trace buffer point back to trace_array
+
+2014-01-02 Arnd Bergmann <arnd at arndb.de>
+
+ * sound: oss: remove last sleep_on users
+
+2014-01-02 Arnd Bergmann <arnd at arndb.de>
+
+ * sound: oss: dmasound: kill SLEEP() macro to avoid race
+
+2014-01-02 Arnd Bergmann <arnd at arndb.de>
+
+ * sound: oss: midibuf: fix sleep_on races
+
+2014-01-09 Stephen Warren <swarren at nvidia.com>
+
+ * drm/panel: update EDID BLOB in panel_simple_get_modes()
+
+2014-01-02 Arnd Bergmann <arnd at arndb.de>
+
+ * sound: oss: vwsnd: avoid interruptible_sleep_on
+
+2014-01-02 Arnd Bergmann <arnd at arndb.de>
+
+ * sound: oss: msnd_pinnacle: avoid interruptible_sleep_on_timeout
+
+2014-01-14 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Fix endless vmaster hook call in thinkpad_helper.c
+
+2014-01-13 Thierry Reding <treding at nvidia.com>
+
+ * gpu: host1x: Remove unnecessary include
+
+2014-01-13 Thierry Reding <treding at nvidia.com>
+
+ * drm/tegra: Use proper data type
+
+2014-01-13 Thierry Reding <treding at nvidia.com>
+
+ * drm/tegra: Clarify how panel modes override others
+
+2014-01-10 Thierry Reding <treding at nvidia.com>
+
+ * drm/tegra: Fix possible CRTC mask for RGB outputs
+
+2014-01-12 Ingo Molnar <mingo at kernel.org>
+
+ * Merge branch 'clockevents/3.14' of git://git.linaro.org/people/daniel.lezcano/linux into timers/core
+
+2014-01-14 Richard Weinberger <richard at nod.at>
+
+ * x86/apic: Read Error Status Register correctly
+
+2014-01-14 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'amd_ucode_for_3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp into x86/microcode
+
+2014-01-10 Bjørn Mork <bjorn at mork.no>
+
+ * net: usbnet: fix SG initialisation
+
+2014-01-10 Neal Cardwell <ncardwell at google.com>
+
+ * inet_diag: fix inet_diag_dump_icsk() to use correct state for timewait sockets
+
+2013-12-12 NeilBrown <neilb at suse.de>
+
+ * md: fix problem when adding device to read-only array with bitmap.
+
+2014-01-06 NeilBrown <neilb at suse.de>
+
+ * md/raid10: fix bug when raid10 recovery fails to recover a block.
+
+2014-01-14 NeilBrown <neilb at suse.de>
+
+ * md/raid5: fix a recently broken BUG_ON().
+
+2014-01-14 NeilBrown <neilb at suse.de>
+
+ * md/raid1: fix request counting bug in new 'barrier' code.
+
+2014-01-14 NeilBrown <neilb at suse.de>
+
+ * md/raid10: fix two bugs in handling of known-bad-blocks.
+
+2014-01-06 NeilBrown <neilb at suse.de>
+
+ * md/raid5: Fix possible confusion when multiple write errors occur.
+
+2013-12-20 Randy Dunlap <rdunlap at infradead.org>
+
+ * gpu: fix qxl missing crc32_le
+
+2014-01-06 Rashika <rashika.kheria at gmail.com>
+
+ * drivers: gpu: Include appropriate header file in r128_ioc32.c
+
+2014-01-06 Rashika <rashika.kheria at gmail.com>
+
+ * drivers: gpu: Mark function as static in via_drv.c
+
+2014-01-14 Dave Airlie <airlied at redhat.com>
+
+ * Revert "drm: copy mode type in drm_mode_connector_list_update()"
+
+2014-01-14 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'drm-intel-fixes-2014-01-13' of git://people.freedesktop.org/~danvet/drm-intel into drm-fixes
+
+2013-12-31 Gregory CLEMENT <gregory.clement at free-electrons.com>
+
+ * i2c: mv64xxx: Document the newly introduced Armada XP A0 compatible
+
+2013-12-31 Gregory CLEMENT <gregory.clement at free-electrons.com>
+
+ * i2c: mv64xxx: Fix bus hang on A0 version of the Armada XP SoCs
+
+2014-01-13 Bjorn Helgaas <bhelgaas at google.com>
+
+ * Merge branch 'pci/ifndefs' into next
+
+2014-01-13 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * lustre: delete linux/lustre_debug.h
+
+2014-01-13 Bjorn Helgaas <bhelgaas at google.com>
+
+ * PCI: Cleanup pci.h whitespace
+
+2014-01-13 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * staging: lustre: remove some unused debug macros
+
+2014-01-10 Marek Roszko <mark.roszko at gmail.com>
+
+ * tty/serial: at91: disable uart timer at start of shutdown
+
+2014-01-13 Paul Zimmerman <Paul.Zimmerman at synopsys.com>
+
+ * usb: dwc2: move device tree bindings doc to correct place
+
+2014-01-13 Bjorn Helgaas <bhelgaas at google.com>
+
+ * Merge branch 'pci/dead-code' into next
+
+2014-01-11 Yann Droneaud <ydroneaud at opteya.com>
+
+ * perf tools: Remove unused test-volatile-register-var.c
+
+2014-01-04 Tetsuo Handa <penguin-kernel at I-love.SAKURA.ne.jp>
+
+ * slub: Fix possible format string bug.
+
+2014-01-10 Peter Zijlstra <peterz at infradead.org>
+
+ * slub: use lockdep_assert_held
+
+2014-01-09 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * cxgb4: silence shift wrapping static checker warning
+
+2013-12-01 Borislav Petkov <bp at suse.de>
+
+ * x86, microcode: Move to a proper location
+
+2013-11-29 Borislav Petkov <bp at suse.de>
+
+ * x86, microcode, AMD: Fix early ucode loading
+
+2013-12-04 Borislav Petkov <bp at suse.de>
+
+ * x86, microcode: Share native MSR accessing variants
+
+2013-12-04 Borislav Petkov <bp at suse.de>
+
+ * x86, ramdisk: Export relocated ramdisk VA
+
+2014-01-13 Arnaldo Carvalho de Melo <acme at redhat.com>
+
+ * perf probe: Fix build when DWARF support libraries not present
+
+2014-01-13 Steven Rostedt (Red Hat) <rostedt at goodmis.org>
+
+ * ftrace: Fix synchronization location disabling and freeing ftrace_ops
+
+2014-01-13 Xiubo Li <Li.Xiubo at freescale.com>
+
+ * ASoC: simple-card: use snd_soc_card_set/get_drvdata
+
+2014-01-13 Steven Rostedt (Red Hat) <rostedt at goodmis.org>
+
+ * ftrace: Have function graph only trace based on global_ops filters
+
+2014-01-12 Hugh Dickins <hughd at google.com>
+
+ * cgroup: remove stray references to css_id
+
+2013-12-30 Ramkumar Ramachandra <artagnon at gmail.com>
+
+ * perf diff: Color the Weighted Diff column
+
+2013-12-30 Ramkumar Ramachandra <artagnon at gmail.com>
+
+ * perf diff: Color the Ratio column
+
+2013-12-30 Ramkumar Ramachandra <artagnon at gmail.com>
+
+ * perf diff: Color the Delta column
+
+2014-01-13 Prarit Bhargava <prarit at redhat.com>
+
+ * x86/irq: Fix kbuild warning in smp_irq_move_cleanup_interrupt()
+
+2013-12-30 Ramkumar Ramachandra <artagnon at gmail.com>
+
+ * perf tools: Generalize percent_color_snprintf()
+
+2014-01-11 Markus Pargmann <mpa at pengutronix.de>
+
+ * ASoC: tlv320aic3x: Add tlv320aic32x4 as compatible
+
+2014-01-11 Markus Pargmann <mpa at pengutronix.de>
+
+ * ASoC: codec: tlv320aic32x4: Fix regmap range config
+
+2014-01-08 Mark Brown <broonie at linaro.org>
+
+ * ASoC: max9850: Use params_width() rather than memory format
+
+2014-01-08 Mark Brown <broonie at linaro.org>
+
+ * ASoC: max98095: Use params_width() rather than memory format
+
+2014-01-08 Mark Brown <broonie at linaro.org>
+
+ * ASoC: max98090: Use params_width() rather than memory format
+
+2014-01-08 Mark Brown <broonie at linaro.org>
+
+ * ASoC: max98088: Use params_width() rather than memory format
+
+2014-01-08 Mark Brown <broonie at linaro.org>
+
+ * ASoC: isabelle: Use params_width() rather than memory format
+
+2014-01-08 Mark Brown <broonie at linaro.org>
+
+ * ASoC: da9055: Use params_width() rather than memory format
+
+2014-01-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
+
+2014-01-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2014-01-13 Benjamin Herrenschmidt <benh at kernel.crashing.org>
+
+ * powerpc: Check return value of instance-to-package OF call
+
+2014-01-12 K. Y. Srinivasan <kys at microsoft.com>
+
+ * Input: hyperv-keyboard - pass through 0xE1 prefix
+
+2014-01-12 Geert Uytterhoeven <geert+renesas at linux-m68k.org>
+
+ * Input: logips2pp - fix spelling s/reciver/receiver/
+
+2014-01-12 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'ras_for_3.14_p2' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras into x86/ras
+
+2014-01-12 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'v3.13-rc8' into x86/ras, to pick up fixes.
+
+2014-01-12 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
+
+2014-01-12 Richard Weinberger <richard at nod.at>
+
+ * um, x86: Fix vDSO build
+
+2013-12-23 Borislav Petkov <bp at suse.de>
+
+ * x86, mce: Fix mce_start_timer semantics
+
+2014-01-10 Taras Kondratiuk <taras.kondratiuk at linaro.org>
+
+ * ARM: 7938/1: OMAP4/highbank: Flush L2 cache before disabling
+
+2014-01-05 Prarit Bhargava <prarit at redhat.com>
+
+ * x86/irq: Fix do_IRQ() interrupt warning for cpu hotplug retriggered irqs
+
+2014-01-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.13-rc8
+
+2014-01-09 Steven Rostedt <rostedt at goodmis.org>
+
+ * SELinux: Fix possible NULL pointer dereference in selinux_inode_permission()
+
+2014-01-12 Hugh Dickins <hughd at google.com>
+
+ * thp: fix copy_page_rep GPF by testing is_huge_zero_pmd once only
+
+2013-12-26 Ming Lei <tom.leiming at gmail.com>
+
+ * block: null_blk: fix queue leak inside removing device
+
+2014-01-05 Yann Droneaud <ydroneaud at opteya.com>
+
+ * perf: Introduce a flag to enable close-on-exec in perf_event_open()
+
+2014-01-08 Stephane Eranian <eranian at google.com>
+
+ * perf/x86/intel: Add Intel RAPL PP1 energy counter support
+
+2014-01-08 Stephane Eranian <eranian at google.com>
+
+ * perf/x86: Fix active_entry initialization
+
+2014-01-02 John Stultz <john.stultz at linaro.org>
+
+ * sched_clock: Disable seqlock lockdep usage in sched_clock()
+
+2014-01-02 John Stultz <john.stultz at linaro.org>
+
+ * seqlock: Use raw_ prefix instead of _no_lockdep
+
+2014-01-06 Rik van Riel <riel at redhat.com>
+
+ * sched: Calculate effective load even if local weight is 0
+
+2014-01-11 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * x86, fpu, amd: Clear exceptions in AMD FXSAVE workaround
+
+2014-01-11 jon ernst <jonernst07 at gmail.com>
+
+ * ext4: delete "set but not used" variables
+
+2014-01-10 Taras Kondratiuk <taras.kondratiuk at linaro.org>
+
+ * ARM: 7939/1: traps: fix opcode endianness when read from user memory
+
+2014-01-10 Stephen Boyd <sboyd at codeaurora.org>
+
+ * ARM: 7937/1: perf_event: Silence sparse warning
+
+2014-01-08 Sudeep Holla <sudeep.holla at arm.com>
+
+ * ARM: 7934/1: DT/kernel: fix arch_match_cpu_phys_id to avoid erroneous match
+
+2014-01-11 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2014-01-11 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'xfs-for-linus-v3.13-rc8' of git://oss.sgi.com/xfs/xfs
+
+2014-01-11 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'leds-fixes-for-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/cooloney/linux-leds
+
+2014-01-11 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pm+acpi-3.13-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2014-01-11 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'mfd-fixes-3.13-2' of git://git.kernel.org/pub/scm/linux/kernel/git/sameo/mfd-fixes
+
+2013-12-02 Milo Kim <milo.kim at ti.com>
+
+ * leds: lp5521/5523: Remove duplicate mutex
+
+2014-01-10 Jesse Barnes <jbarnes at virtuousgeek.org>
+
+ * drm/i915/bdw: make sure south port interrupts are enabled properly v2
+
+2014-01-10 Chris Wilson <chris at chris-wilson.co.uk>
+
+ * drm/i915: Include more information in disabled hotplug interrupt warning
+
+2014-01-10 Chris Wilson <chris at chris-wilson.co.uk>
+
+ * drm/i915: Only complain about a rogue hotplug IRQ after disabling
+
+2014-01-10 Chris Wilson <chris at chris-wilson.co.uk>
+
+ * drm/i915: Only WARN about a stuck hotplug irq ONCE
+
+2013-12-18 Daniel Vetter <daniel.vetter at ffwll.ch>
+
+ * drm/i915: s/hotplugt_status_gen4/hotplug_status_g4x/
+
+2014-01-07 Chuansheng Liu <chuansheng.liu at intel.com>
+
+ * xfs: Calling destroy_work_on_stack() to pair with INIT_WORK_ONSTACK()
+
+2014-01-01 Jie Liu <jeff.liu at oracle.com>
+
+ * xfs: fix off-by-one error in xfs_attr3_rmt_verify
+
+2014-01-09 Shahed Shaikh <shahed.shaikh at qlogic.com>
+
+ * qlcnic: Fix ethtool statistics length calculation
+
+2014-01-09 Manish Chopra <manish.chopra at qlogic.com>
+
+ * qlcnic: Fix bug in TX statistics
+
+2014-01-10 Jason Wang <jasowang at redhat.com>
+
+ * net: core: explicitly select a txq before doing l2 forwarding
+
+2014-01-10 Jason Wang <jasowang at redhat.com>
+
+ * macvlan: forbid L2 fowarding offload for macvtap
+
+2014-01-10 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless
+
+2014-01-09 Michal Schmidt <mschmidt at redhat.com>
+
+ * bnx2x: fix DMA unmapping of TSO split BDs
+
+2014-01-09 Alex Williamson <alex.williamson at redhat.com>
+
+ * PCI: Never treat a VF as a multifunction device
+
+2014-01-10 Dominique Martinet <dominique.martinet at cea.fr>
+
+ * 9P: introduction of a new cache=mmap model.
+
+2014-01-10 Nicolin Chen <Guangyu.Chen at freescale.com>
+
+ * ASoC: fsl_esai: Add ESAI CPU DAI driver
+
+2014-01-09 Charles Keepax <ckeepax at opensource.wolfsonmicro.com>
+
+ * ASoC: wm5110: Add controls for headphone short circuit protection
+
+2014-01-09 Charles Keepax <ckeepax at opensource.wolfsonmicro.com>
+
+ * mfd: wm5110: Add registers for headphone short circuit control
+
+2014-01-10 Mark Brown <broonie at linaro.org>
+
+ * Merge tag 'v3.13-rc3' into asoc-arizona
+
+2014-01-10 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'clk-fixes-for-linus' of git://git.linaro.org/people/mike.turquette/linux
+
+2014-01-10 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2014-01-07 Ville Syrjälä <ville.syrjala at linux.intel.com>
+
+ * drm/i915: Don't grab crtc mutexes in intel_modeset_gem_init()
+
+2014-01-08 Hannes Frederic Sowa <hannes at stressinduktion.org>
+
+ * ipv6: add link-local, sit and loopback address with INFINITY_LIFE_TIME
+
+2013-11-08 Steven Rostedt (Red Hat) <rostedt at goodmis.org>
+
+ * ftrace: Synchronize setting function_trace_op with ftrace_trace_function
+
+2014-01-07 Yuval Mintz <yuvalmin at broadcom.com>
+
+ * bnx2x: prevent WARN during driver unload
+
+2014-01-10 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'pm-cpuidle'
+
+2014-01-09 Jiang Liu <jiang.liu at linux.intel.com>
+
+ * intel_idle: close avn_cstates array with correct marker
+
+2014-01-09 Ben Myers <bpm at sgi.com>
+
+ * Merge branch 'xfs-extent-list-locking-fixes' into for-next
+
+2014-01-09 Ben Myers <bpm at sgi.com>
+
+ * Merge branch 'xfs-misc' into for-next
+
+2014-01-07 Chuansheng Liu <chuansheng.liu at intel.com>
+
+ * xfs: Calling destroy_work_on_stack() to pair with INIT_WORK_ONSTACK()
+
+2013-11-13 Steven Rostedt <rostedt at goodmis.org>
+
+ * ftrace/x86: Load ftrace_ops in parameter not the variable holding it
+
+2014-01-09 Xiubo Li <Li.Xiubo at freescale.com>
+
+ * ASoC: simple-card: fix the cinfo error check
+
+2014-01-09 Xiubo Li <Li.Xiubo at freescale.com>
+
+ * ASoC: simple-card: fix a bug where cinfo will be NULL before using it
+
+2014-01-09 Nicolin Chen <Guangyu.Chen at freescale.com>
+
+ * ASoC: fsl_ssi: Set default slot number for common cases
+
+2014-01-09 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * ASoC: fsl-ssi: Add missing clk_disable_unprepare() on error in fsl_ssi_probe()
+
+2014-01-01 Qi Wang 王起 (qiwang) <qiwang at micron.com>
+
+ * UBI: avoid program operation on NOR flash after erasure interrupted
+
+2014-01-09 John W. Linville <linville at tuxdriver.com>
+
+ * Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless into for-davem
+
+2014-01-09 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'regmap/topic/ack' into regmap-next
+
+2014-01-06 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: ux500: Don't set unused struct snd_pcm_hardware fields
+
+2014-01-06 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: sh: Don't set unused struct snd_pcm_hardware fields
+
+2014-01-06 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: nuc900: Don't set unused struct snd_pcm_hardware fields
+
+2014-01-06 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: kirkwood: Don't set unused struct snd_pcm_hardware fields
+
+2014-01-06 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: intel: Don't set unused struct snd_pcm_hardware fields
+
+2014-01-09 Markus Pargmann <mpa at pengutronix.de>
+
+ * ASoC: fsl-ssi: Fix stats compile warning
+
+2014-01-09 Markus Pargmann <mpa at pengutronix.de>
+
+ * ASoC: fsl-ssi doc: Add list of supported compatibles
+
+2014-01-09 Charles Keepax <ckeepax at opensource.wolfsonmicro.com>
+
+ * ASoC: wm_adsp: Mark wm_adsp2_boot_work as static
+
+2013-10-15 Archit Taneja <archit at ti.com>
+
+ * drm/omap: Enable DT support for DMM
+
+2014-01-02 Archit Taneja <archit at ti.com>
+
+ * drm/omap: fix: change dev_unload order
+
+2014-01-09 Jiang Liu <jiang.liu at linux.intel.com>
+
+ * Revert "intel_idle: mark states tables with __initdata tag"
+
+2014-01-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'parisc-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
+
+2014-01-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata
+
+2014-01-08 David E. Box <david.e.box at linux.intel.com>
+
+ * arch: x86: New MailBox support driver for Intel SOC's
+
+2014-01-05 John David Anglin <dave.anglin at bell.net>
+
+ * parisc: Ensure full cache coherency for kmap/kunmap
+
+2014-01-08 Takashi Iwai <tiwai at suse.de>
+
+ * ASoC: bcm: Remove obsoleted Kconfig dependency
+
+2014-01-08 John W. Linville <linville at tuxdriver.com>
+
+ * Merge tag 'nfc-fixes-3.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/sameo/nfc-fixes
+
+2014-01-08 Charles Keepax <ckeepax at opensource.wolfsonmicro.com>
+
+ * ASoC: wm_adsp: Start DSP booting earlier in the DAPM process
+
+2014-01-08 Charles Keepax <ckeepax at opensource.wolfsonmicro.com>
+
+ * ASoC: wm_adsp: Factor out ADSP2 boot proceedure
+
+2013-12-20 Markus Pargmann <mpa at pengutronix.de>
+
+ * ASoC: fsl-ssi: Drop ac97 specific trigger function
+
+2013-12-20 Markus Pargmann <mpa at pengutronix.de>
+
+ * ASoC: fsl-ssi: Move RX/TX configuration to seperate functions
+
+2014-01-08 Jingoo Han <jg1.han at samsung.com>
+
+ * regulator: twl: Fix checkpatch issue
+
+2013-12-16 James Hogan <james.hogan at imgtec.com>
+
+ * clk: clk-divider: fix divisor > 255 bug
+
+2014-01-08 Paulo Zanoni <paulo.r.zanoni at intel.com>
+
+ * drm/i915: fix DDI PLLs HW state readout code
+
+2014-01-07 Andreas Pretzsch <apr at cn-eng.de>
+
+ * ASoC: ssm2602: add 16kHz sampling rate support
+
+2014-01-03 Peter Ujfalusi <peter.ujfalusi at ti.com>
+
+ * ASoC: twl4030: Pass the twl4030_priv directly to twl4030_can_write_to_chip()
+
+2014-01-03 Peter Ujfalusi <peter.ujfalusi at ti.com>
+
+ * ASoC: twl4030: Move the ctl cache update local to twl4030_write() function
+
+2014-01-03 Peter Ujfalusi <peter.ujfalusi at ti.com>
+
+ * ASoC: twl4030: Parameter alignment fixes (for code consistency)
+
+2014-01-03 Peter Ujfalusi <peter.ujfalusi at ti.com>
+
+ * ASoC: twl4030: Remove local reg cache
+
+2014-01-03 Peter Ujfalusi <peter.ujfalusi at ti.com>
+
+ * ASoC: twl4030: Introduce local ctl register cache
+
+2014-01-03 Peter Ujfalusi <peter.ujfalusi at ti.com>
+
+ * ASoC: twl4030: Remove reset registers functionality
+
+2014-01-08 Xiubo Li <Li.Xiubo at freescale.com>
+
+ * ASoC: fsl-sai: Clean up the code
+
+2014-01-08 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * ASoC: ux500: Fix sparse non static symbol warning
+
+2014-01-08 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: adau1701: Fix ADAU1701_SEROCTL_WORD_LEN_16 constant
+
+2014-01-08 Liam Girdwood <liam.r.girdwood at linux.intel.com>
+
+ * ASoC: sapm: Automatically connect DAI link widgets in DAPM graph.
+
+2014-01-08 Liam Girdwood <liam.r.girdwood at linux.intel.com>
+
+ * ASoC: utils: Add internal call to determine if DAI is dummy.
+
+2014-01-08 Dave Airlie <airlied at gmail.com>
+
+ * Merge branch 'drm-nouveau-next' of git://anongit.freedesktop.org/nouveau/linux-2.6 into drm-fixes
+
+2014-01-08 Dave Airlie <airlied at gmail.com>
+
+ * Merge tag 'drm-intel-fixes-2014-01-08' of git://people.freedesktop.org/~danvet/drm-intel into drm-fixes
+
+2013-12-29 Christian Engelmayer <cengelma at gmx.at>
+
+ * drm/nouveau/nouveau: fix memory leak in nouveau_crtc_page_flip()
+
+2014-01-07 Ilia Mirkin <imirkin at alum.mit.edu>
+
+ * drm/nouveau/bios: fix offset calculation for BMPv1 bioses
+
+2014-01-07 Stephen Warren <swarren at nvidia.com>
+
+ * serial: 8250: enable UART_BUG_NOMSR for Tegra
+
+2014-01-07 Mark Deneen <mdeneen at gmail.com>
+
+ * tty/serial: at91: reset rx_ring when port is shutdown
+
+2014-01-07 Marek Roszko <mark.roszko at gmail.com>
+
+ * tty/serial: at91: fix race condition in atmel_serial_remove
+
+2014-01-07 Marek Roszko <mark.roszko at gmail.com>
+
+ * tty/serial: at91: Handle shutdown more safely
+
+2014-01-03 Qipan Li <Qipan.Li at csr.com>
+
+ * serial: sirf: correct condition for fetching dma buffer into tty
+
+2014-01-03 Qipan Li <Qipan.Li at csr.com>
+
+ * serial: sirf: provide pm entries of uart_ops
+
+2014-01-03 Qipan Li <Qipan.Li at csr.com>
+
+ * serial: sirf: use PM macro initialize PM functions
+
+2013-12-31 Alexander Shiyan <shc_work at mail.ru>
+
+ * serial: clps711x: Enable driver compilation with COMPILE_TEST
+
+2013-12-31 Alexander Shiyan <shc_work at mail.ru>
+
+ * serial: clps711x: Add support for N_IRDA line discipline
+
+2014-01-07 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
+
+2014-01-07 Erik Hugne <erik.hugne at ericsson.com>
+
+ * tipc: correctly unlink packets from deferred packet queue
+
+2014-01-07 Li RongQing <roy.qing.li at gmail.com>
+
+ * ipv6: pcpu_tstats.syncp should be initialised in ip6_vti.c
+
+2014-01-07 Theodore Ts'o <tytso at mit.edu>
+
+ * ext4: don't pass freed handle to ext4_walk_page_buffers
+
+2014-01-07 Liam Girdwood <liam.r.girdwood at linux.intel.com>
+
+ * ASoC: docs: Update the Overview document
+
+2014-01-07 Russell King <rmk+kernel at arm.linux.org.uk>
+
+ * Revert "ARM: 7908/1: mm: Fix the arm_dma_limit calculation"
+
+2014-01-07 Xiubo Li <Li.Xiubo at freescale.com>
+
+ * ASoC: simple-card: keep the property's name the same pattern
+
+2014-01-07 Xiubo Li <Li.Xiubo at freescale.com>
+
+ * ASoC: simple-card: fix the DAPM routes map parsing
+
+2013-12-19 Lee Jones <lee.jones at linaro.org>
+
+ * ASoC: ux500: Dramatically reduce the size of the DAI driver data struct
+
+2013-12-19 Lee Jones <lee.jones at linaro.org>
+
+ * ASoC: ux500_pcm: Differentiate between pdata and DT initialisation
+
+2013-12-19 Lee Jones <lee.jones at linaro.org>
+
+ * ASoC: ux500_pcm: Take out pointless dev_dbg() call
+
+2013-12-19 Lee Jones <lee.jones at linaro.org>
+
+ * ASoC: ux500: Store DMA data in the DAI differently in the pdata and DT case
+
+2014-01-06 Tetsuo Handa <penguin-kernel at I-love.SAKURA.ne.jp>
+
+ * SELinux: Fix memory leak upon loading policy
+
+2014-01-06 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: mxs: Remove SND_DMAENGINE_PCM_FLAG_NO_RESIDUE flag
+
+2014-01-06 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: mxs: Don't set unused struct snd_pcm_hardware fields
+
+2014-01-07 Daniel Vetter <daniel.vetter at ffwll.ch>
+
+ * MAINTAINERS: Updates for drm/i915
+
+2014-01-06 Paul Gortmaker <paul.gortmaker at windriver.com>
+
+ * Input: delete non-required instances of include <linux/init.h>
+
+2014-01-04 Dmitry Torokhov <dmitry.torokhov at gmail.com>
+
+ * Input: twl4030-keypad - convert to using managed resources
+
+2014-01-04 Dmitry Torokhov <dmitry.torokhov at gmail.com>
+
+ * Input: twl6040-vibra - remove unneeded check for CONFIG_OF
+
+2014-01-06 Paul Gortmaker <paul.gortmaker at windriver.com>
+
+ * x86: Delete non-required instances of include <linux/init.h>
+
+2014-01-06 Bob Gleitsmann <rjgleits at bellsouth.net>
+
+ * drm/nouveau: return offset of allocated notifier
+
+2014-01-05 Ilia Mirkin <imirkin at alum.mit.edu>
+
+ * drm/nouveau/bios: make jump conditional
+
+2014-01-07 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
+
+2014-01-07 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2014-01-06 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branches 'acpi-battery' and 'pm-cpufreq'
+
+2014-01-06 Curt Brune <curt at cumulusnetworks.com>
+
+ * bridge: use spin_lock_bh() in br_multicast_set_hash_max
+
+2014-01-06 Hannes Frederic Sowa <hannes at stressinduktion.org>
+
+ * ipv6: don't install anycast address for /128 addresses on routers
+
+2014-01-06 Lan Tianyu <tianyu.lan at intel.com>
+
+ * ACPI / Battery: Add a _BIX quirk for NEC LZ750/LS
+
+2014-01-06 Dirk Brandewie <dirk.j.brandewie at intel.com>
+
+ * intel_pstate: Add X86_FEATURE_APERFMPERF to cpu match parameters.
+
+2014-01-01 Jie Liu <jeff.liu at oracle.com>
+
+ * xfs: fix off-by-one error in xfs_attr3_rmt_verify
+
+2014-01-06 John W. Linville <linville at tuxdriver.com>
+
+ * Merge branch 'for-john' of git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi-fixes
+
+2014-01-06 John W. Linville <linville at tuxdriver.com>
+
+ * Merge branch 'for-john' of git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211
+
+2014-01-06 Eric Whitney <enwlinux at gmail.com>
+
+ * ext4: fix bigalloc regression
+
+2013-12-20 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: atmel: Don't set unused struct snd_pcm_hardware fields
+
+2014-01-06 Jiri Kosina <jkosina at suse.cz>
+
+ * HID: hidraw: make comment more accurate and nicer
+
+2014-01-05 Alexander Shiyan <shc_work at mail.ru>
+
+ * ASoC: mc13783: trivial: Cleanup module
+
+2014-01-05 Alexander Shiyan <shc_work at mail.ru>
+
+ * ASoC: mc13783: Drop fixed ADC & DAC ports usage
+
+2014-01-05 Alexander Shiyan <shc_work at mail.ru>
+
+ * ASoC: mc13783: Use core error messages if registration fails
+
+2014-01-05 Alexander Shiyan <shc_work at mail.ru>
+
+ * ASoC: mc13783: Use module_platform_driver_probe()
+
+2014-01-04 Jesper Dangaard Brouer <brouer at redhat.com>
+
+ * netfilter: only warn once on wrong seqadj usage
+
+2013-12-31 Daniel Borkmann <dborkman at redhat.com>
+
+ * netfilter: nf_nat: fix access to uninitialized buffer in IRC NAT helper
+
+2014-01-03 David Howells <dhowells at redhat.com>
+
+ * regulator: tps62360: Fix up a pointer-integer size mismatch warning
+
+2013-12-28 Alexander van Heukelum <heukelum at fastmail.fm>
+
+ * Revert "drm/i915: assume all GM45 Acer laptops use inverted backlight PWM"
+
+2014-01-05 Mike Turquette <mturquette at linaro.org>
+
+ * Merge tag 'samsung-clk-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tfiga/samsung-clk into clk-fixes
+
+2014-01-05 Josh Boyer <jwboyer at redhat.com>
+
+ * xen-netback: Include header for vmalloc
+
+2014-01-06 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-arm
+
+2014-01-05 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branches 'acpi-ac' and 'acpi-tpm'
+
+2013-12-19 Jiang Liu <jiang.liu at linux.intel.com>
+
+ * ACPI / TPM: fix memory leak when walking ACPI namespace
+
+2014-01-04 Alexander Mezin <mezin.alexander at gmail.com>
+
+ * ACPI / AC: change notification handler type to ACPI_ALL_NOTIFY
+
+2014-01-03 Rob Herring <rob.herring at calxeda.com>
+
+ * ARM: 7933/1: rename ioremap_cached to ioremap_cache
+
+2014-01-03 Russell King <rmk+kernel at arm.linux.org.uk>
+
+ * ARM: fix "bad mode in ... handler" message for undefined instructions
+
+2014-01-02 Russell King <rmk+kernel at arm.linux.org.uk>
+
+ * CRYPTO: Fix more AES build errors
+
+2014-01-05 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'v3.13-rc7' into x86/efi-kexec to resolve conflicts
+
+2014-01-04 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
+
+2014-01-04 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.13-rc7
+
+2014-01-04 H. Peter Anvin <hpa at linux.intel.com>
+
+ * x86, boot: Move intcall() to the .inittext section
+
+2014-01-03 David Woodhouse <dwmw2 at infradead.org>
+
+ * x86, boot: Use .code16 instead of .code16gcc
+
+2014-01-03 Steven Rostedt <rostedt at goodmis.org>
+
+ * x86, sparse: Do not force removal of __user when calling copy_to/from_user_nocheck()
+
+2014-01-04 Sebastian Reichel <sre at debian.org>
+
+ * Input: twl4030-keypad - add device tree support
+
+2014-01-04 Libo Chen <clbchenlibo.chen at huawei.com>
+
+ * Input: twl6040-vibra - add missing of_node_put
+
+2014-01-03 Libo Chen <clbchenlibo.chen at huawei.com>
+
+ * Input: twl4030-vibra - add missing of_node_put
+
+2013-11-14 Arron Wang <arron.wang at intel.com>
+
+ * NFC: Fix target mode p2p link establishment
+
+2013-12-23 Kirill A. Shutemov <kirill.shutemov at linux.intel.com>
+
+ * x86, cpu: Detect more TLB configuration
+
+2014-01-03 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'for-v3.13-fixes' of git://git.infradead.org/battery-2.6
+
+2014-01-03 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pm+acpi-3.13-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2013-12-05 Ben Hutchings <ben at decadent.org.uk>
+
+ * deb-pkg: Fix building for MIPS big-endian or ARM OABI
+
+2013-12-05 Ben Hutchings <ben at decadent.org.uk>
+
+ * deb-pkg: Fix cross-building linux-headers package
+
+2014-01-03 Dave Young <dyoung at redhat.com>
+
+ * x86/efi: parse_efi_setup() build fix
+
+2014-01-03 Dave Young <dyoung at redhat.com>
+
+ * x86: ksysfs.c build fix
+
+2013-12-02 Nishanth Menon <nm at ti.com>
+
+ * scripts: Coccinelle script for pm_runtime_* return checks with IS_ERR_VALUE
+
+2014-01-01 Mark Salter <msalter at redhat.com>
+
+ * Input: i8042 - cleanup SERIO_I8042 dependencies
+
+2014-01-01 Mark Salter <msalter at redhat.com>
+
+ * Input: i8042 - select ARCH_MIGHT_HAVE_PC_SERIO on x86
+
+2014-01-01 Mark Salter <msalter at redhat.com>
+
+ * Input: i8042 - select ARCH_MIGHT_HAVE_PC_SERIO on unicore32
+
+2014-01-01 Mark Salter <msalter at redhat.com>
+
+ * Input: i8042 - select ARCH_MIGHT_HAVE_PC_SERIO on sparc
+
+2014-01-01 Mark Salter <msalter at redhat.com>
+
+ * Input: i8042 - select ARCH_MIGHT_HAVE_PC_SERIO for SH_CAYMAN
+
+2014-01-01 Mark Salter <msalter at redhat.com>
+
+ * Input: i8042 - select ARCH_MIGHT_HAVE_PC_SERIO on powerpc
+
+2014-01-01 Mark Salter <msalter at redhat.com>
+
+ * Input: i8042 - select ARCH_MIGHT_HAVE_PC_SERIO on mips
+
+2014-01-01 Mark Salter <msalter at redhat.com>
+
+ * Input: i8042 - select ARCH_MIGHT_HAVE_PC_SERIO on IA64
+
+2014-01-01 Mark Salter <msalter at redhat.com>
+
+ * Input: i8042 - select ARCH_MIGHT_HAVE_PC_SERIO on ARM/Footbridge
+
+2014-01-01 Mark Salter <msalter at redhat.com>
+
+ * Input: i8042 - select ARCH_MIGHT_HAVE_PC_SERIO on alpha
+
+2014-01-02 Petr Sebor <petr at scssoft.com>
+
+ * Input: xpad - add new USB IDs for Logitech F310 and F710
+
+2014-01-02 Thomaz de Oliveira dos Reis <thor27 at gmail.com>
+
+ * Input: xpad - change D-PAD mapping on Razer devices
+
+2014-01-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/virt/kvm/kvm
+
+2014-01-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'akpm' (incoming from Andrew)
+
+2014-01-02 Jason Baron <jbaron at akamai.com>
+
+ * epoll: do not take the nested ep->mtx on EPOLL_CTL_DEL
+
+2014-01-02 Nobuhiro Iwamatsu <nobuhiro.iwamatsu.yj at renesas.com>
+
+ * sh: add EXPORT_SYMBOL(min_low_pfn) and EXPORT_SYMBOL(max_low_pfn) to sh_ksyms_32.c
+
+2014-01-02 Jiang Liu <jiang.liu at linux.intel.com>
+
+ * drivers/dma/ioat/dma.c: check DMA mapping error in ioat_dma_self_test()
+
+2014-01-02 Naoya Horiguchi <n-horiguchi at ah.jp.nec.com>
+
+ * mm/memory-failure.c: transfer page count from head page to tail page after split thp
+
+2014-01-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'gfs2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-fixes
+
+2014-01-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
+
+2014-01-02 Olof Johansson <olof at lixom.net>
+
+ * Merge tag 'renesas-fixes3-for-v3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas into fixes
+
+2013-12-28 Jan Kiszka <jan.kiszka at web.de>
+
+ * KVM: nVMX: Unconditionally uninit the MMU on nested vmexit
+
+2014-01-02 Michal Marek <mmarek at suse.cz>
+
+ * Merge commit v3.13-rc1 into kbuild/misc
+
+2014-01-02 Tetsuo Handa <penguin-kernel at I-love.SAKURA.ne.jp>
+
+ * GFS2: Fix unsafe dereference in dump_holder()
+
+2014-01-01 Alan <gnomes at lxorguk.ukuu.org.uk>
+
+ * sata_sis: missing PM support
+
+2014-01-01 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2014-01-01 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
+
+2014-01-01 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-fixes-3.13' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
+
+2013-12-31 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branches 'pm-cpufreq' and 'pm-cpuidle'
+
+2013-12-31 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'cpuidle/3.13-fixes' of git://git.linaro.org/people/daniel.lezcano/linux into pm-cpuidle
+
+2013-12-31 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branches 'acpi-pci-pm' and 'acpi-pci-hotplug'
+
+2013-12-26 Dmitry Torokhov <dmitry.torokhov at gmail.com>
+
+ * Input: allocate absinfo data when setting ABS capability
+
+2013-12-29 Doug Anderson <dianders at chromium.org>
+
+ * Input: cros_ec_keyb - fix problems with backslash
+
+2013-12-19 Oren Givon <oren.givon at intel.com>
+
+ * iwlwifi: add new devices for 7265 series
+
+2013-12-30 Fabio Estevam <fabio.estevam at freescale.com>
+
+ * regulator: wm831x-dcdc: Remove unneeded 'err' label
+
+2013-12-31 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug
+
+2013-12-31 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * intel_pstate: Fail initialization if P-state information is missing
+
+2013-11-27 Bo Shen <voice.shen at atmel.com>
+
+ * ASoC: atmel: sam9x5_wm8731: remove platform_set_drvdata
+
+2013-12-23 Simon Guinot <sguinot at lacie.com>
+
+ * ahci: add PCI ID for Marvell 88SE9170 SATA controller
+
+2013-12-31 Xiubo Li <Li.Xiubo at freescale.com>
+
+ * ASoC: fsl_sai: fix the endianess for SAI fifo data.
+
+2013-12-31 Xiubo Li <Li.Xiubo at freescale.com>
+
+ * ASoC: fsl_sai: Fix one bug for hardware limitation.
+
+2013-12-29 Jan Kiszka <jan.kiszka at siemens.com>
+
+ * KVM: x86: Fix APIC map calculation after re-enabling
+
+2013-12-17 Krzysztof Kozlowski <k.kozlowski at samsung.com>
+
+ * clk: exynos: File scope reg_save array should depend on PM_SLEEP
+
+2013-12-11 Abhilash Kesavan <a.kesavan at samsung.com>
+
+ * clk: samsung: exynos5250: Add CLK_IGNORE_UNUSED flag for the sysreg clock
+
+2013-12-12 Abhilash Kesavan <a.kesavan at samsung.com>
+
+ * ARM: dts: exynos5250: Fix MDMA0 clock number
+
+2013-12-12 Abhilash Kesavan <a.kesavan at samsung.com>
+
+ * clk: samsung: exynos5250: Add MDMA0 clocks
+
+2013-12-12 Abhilash Kesavan <a.kesavan at samsung.com>
+
+ * clk: samsung: exynos5250: Fix ACP gate register offset
+
+2013-12-25 Xiubo Li <Li.Xiubo at freescale.com>
+
+ * ASoC: fsl_sai: Add disable operation for the corresponding data channel.
+
+2013-12-25 Xiubo Li <Li.Xiubo at freescale.com>
+
+ * ASoC: fsl_sai: Move the global registers setting to _dai_probe()
+
+2013-12-26 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * regulator: s2mps11: Clean up redundant code
+
+2013-12-29 Axel Lin <axel.lin at ingics.com>
+
+ * regulator: tps65910: Simplify setting enable_mask for regulators
+
+2013-12-13 Andre Przywara <andre.przywara at linaro.org>
+
+ * ARM/cpuidle: remove __init tag from Calxeda cpuidle probe function
+
+2013-11-26 Soren Brinkmann <soren.brinkmann at xilinx.com>
+
+ * clocksource: cadence_ttc: Fix mutex taken inside interrupt context
+
+2013-12-18 Sebastian Ott <sebott at linux.vnet.ibm.com>
+
+ * s390/pci: obtain function handle in hotplug notifier
+
+2013-12-30 Benjamin Herrenschmidt <benh at kernel.crashing.org>
+
+ * Merge remote-tracking branch 'agust/merge' into merge
+
+2013-12-28 Olof Johansson <olof at lixom.net>
+
+ * powerpc: Fix alignment of secondary cpu spin vars
+
+2013-12-23 Anton Blanchard <anton at samba.org>
+
+ * powerpc: Align p_end
+
+2013-12-20 Brian W Hart <hartb at linux.vnet.ibm.com>
+
+ * powernv/eeh: Add buffer for P7IOC hub error data
+
+2013-12-19 Brian W Hart <hartb at linux.vnet.ibm.com>
+
+ * powernv/eeh: Fix possible buffer overrun in ioda_eeh_phb_diag()
+
+2013-12-18 Paul E. McKenney <paulmck at linux.vnet.ibm.com>
+
+ * powerpc: Make 64-bit non-VMX __copy_tofrom_user bi-endian
+
+2013-12-16 Rajesh B Prathipati <rprathip at linux.vnet.ibm.com>
+
+ * powerpc: Make unaligned accesses endian-safe for powerpc
+
+2013-12-16 Michael Neuling <mikey at neuling.org>
+
+ * powerpc: Fix bad stack check in exception entry
+
+2013-12-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.13-rc6
+
+2013-12-29 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * PCI / ACPI: Install wakeup notify handlers for all PCI devs with ACPI
+
+2013-12-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2013-12-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-12-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pm+acpi-3.13-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2013-12-24 Viresh Kumar <viresh.kumar at linaro.org>
+
+ * cpufreq: preserve user_policy across suspend/resume
+
+2013-12-27 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * cpufreq: Clean up after a failing light-weight initialization
+
+2013-12-21 Matt Fleming <matt.fleming at intel.com>
+
+ * x86/efi: Delete superfluous global variables
+
+2013-12-20 Dave Young <dyoung at redhat.com>
+
+ * x86: Reserve setup_data ranges late after parsing memmap cmdline
+
+2013-12-20 Dave Young <dyoung at redhat.com>
+
+ * x86: Export x86 boot_params to sysfs
+
+2013-12-20 Dave Young <dyoung at redhat.com>
+
+ * x86: Add xloadflags bit for EFI runtime support on kexec
+
+2013-12-20 Dave Young <dyoung at redhat.com>
+
+ * x86/efi: Pass necessary EFI data for kexec via setup_data
+
+2013-12-21 Laura Abbott <lauraa at codeaurora.org>
+
+ * ARM: 7931/1: Correct virt_addr_valid
+
+2013-12-16 Steven Capper <steve.capper at linaro.org>
+
+ * ARM: 7923/1: mm: fix dcache flush logic for compound high pages
+
+2013-12-29 Russell King <rmk+kernel at arm.linux.org.uk>
+
+ * ARM: fix footbridge clockevent device
+
+2013-09-10 Li Zefan <lizefan at huawei.com>
+
+ * slub: Fix calculation of cpu slabs
+
+2013-12-28 Olof Johansson <olof at lixom.net>
+
+ * Merge tag 'omap-for-v3.13/intc-ldp-fix' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
+
+2013-12-28 Olof Johansson <olof at lixom.net>
+
+ * Merge tag 'renesas-fixes2-for-v3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas into fixes
+
+2013-12-11 Linus Walleij <linus.walleij at linaro.org>
+
+ * ARM: pxa: fix USB gadget driver compilation regression
+
+2013-12-27 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * Input: keypad-omap - cleanup header file
+
+2013-12-27 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * Input: keypad-ep93xx - cleanup header file
+
+2013-12-15 Dmitry Torokhov <dmitry.torokhov at gmail.com>
+
+ * Input: pmic8xxx-pwrkey - switch to using managed resources
+
+2013-12-17 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * Input: pmic8xxx-pwrkey - pass correct device identity to free_irq()
+
+2013-12-27 H. Peter Anvin <hpa at zytor.com>
+
+ * x86: Slightly tweak the access_ok() C variant for better code
+
+2013-12-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * x86: Replace assembly access_ok() with a C variant
+
+2013-12-27 Arnaldo Carvalho de Melo <acme at redhat.com>
+
+ * perf tools: Use zfree to help detect use after free bugs
+
+2013-12-26 Arnaldo Carvalho de Melo <acme at redhat.com>
+
+ * perf tools: Introduce zfree
+
+2013-12-27 Tony Lindgren <tony at atomide.com>
+
+ * Merge tag 'for-v3.13-rc/hwmod-fixes-b' of git://git.kernel.org/pub/scm/linux/kernel/git/pjw/omap-pending into debug-ll-and-ldp-backlight-fix
+
+2013-12-27 Tony Lindgren <tony at atomide.com>
+
+ * ARM: OMAP2+: Fix LCD panel backlight regression for LDP legacy booting
+
+2013-12-26 Yunkang Tang <tommywill2011 at gmail.com>
+
+ * Input: ALPS - add support for "Dolphin" devices
+
+2013-12-27 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branches 'powercap' and 'acpi-lpss' with new device IDs
+
+2013-12-27 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branches 'pm-cpufreq' and 'pm-sleep' containing PM fixes
+
+2013-12-26 Arnaldo Carvalho de Melo <acme at redhat.com>
+
+ * perf tools: No need to test against NULL before calling free()
+
+2013-12-26 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
+
+2013-12-26 Namhyung Kim <namhyung.kim at lge.com>
+
+ * perf ui/tui: Implement header window
+
+2013-12-26 Namhyung Kim <namhyung.kim at lge.com>
+
+ * perf ui/tui: Split help message for perf top and report
+
+2013-12-16 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * ARM: shmobile: mackerel: Fix coherent DMA mask
+
+2013-12-16 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * ARM: shmobile: kzm9g: Fix coherent DMA mask
+
+2013-12-16 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * ARM: shmobile: armadillo: Fix coherent DMA mask
+
+2013-12-26 Simon Horman <horms+renesas at verge.net.au>
+
+ * Revert "ARM: shmobile: r8a7791: Add SSI clocks in device tree"
+
+2013-12-26 Simon Horman <horms+renesas at verge.net.au>
+
+ * Revert "ARM: shmobile: r8a7790: Add SSI clocks in device tree"
+
+2013-12-23 Suman Anna <s-anna at ti.com>
+
+ * ARM: OMAP2+: hwmod_data: fix missing OMAP_INTC_START in irq data
+
+2013-12-12 Rajendra Nayak <rnayak at ti.com>
+
+ * ARM: DRA7: hwmod: Fix boot crash with DEBUG_LL
+
+2013-12-24 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.dk/linux-block
+
+2013-12-24 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
+
+2013-12-24 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
+
+2013-12-24 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata
+
+2013-12-19 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * ARM: shmobile: r8a7791: Add SSI clocks in device tree
+
+2013-12-19 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * ARM: shmobile: r8a7790: Add SSI clocks in device tree
+
+2013-12-19 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * ARM: shmobile: r8a7791: Add QSPI module clock in device tree
+
+2013-12-19 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * ARM: shmobile: r8a7790: Add QSPI module clock in device tree
+
+2013-12-19 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * ARM: shmobile: r8a7791: Add MSIOF clocks in device tree
+
+2013-12-19 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * ARM: shmobile: r8a7790: Add MSIOF clocks in device tree
+
+2013-12-19 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * ARM: shmobile: Remove Koelsch reference DTS
+
+2013-12-23 Geert Uytterhoeven <geert+renesas at linux-m68k.org>
+
+ * spi: rspi: Fix typo when clearing SPSR_OVRF
+
+2013-12-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.13-rc5
+
+2013-12-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2013-12-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'firewire-fix' of git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394
+
+2013-12-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending
+
+2013-12-11 Jacob Pan <jacob.jun.pan at linux.intel.com>
+
+ * powercap / RAPL: add support for ValleyView Soc
+
+2013-12-19 Masami Ichikawa <masami256 at gmail.com>
+
+ * PM / sleep: Fix memory leak in pm_vt_switch_unregister().
+
+2013-12-19 Jason Baron <jbaron at akamai.com>
+
+ * cpufreq: Use CONFIG_CPU_FREQ_DEFAULT_* to set initial policy for setpolicy drivers
+
+2013-12-20 Viresh Kumar <viresh.kumar at linaro.org>
+
+ * cpufreq: remove sysfs files for CPUs which failed to come back after resume
+
+2013-12-21 Matias Bjørling <m at bjorling.me>
+
+ * null_blk: support submit_queues on use_per_node_hctx
+
+2013-12-21 Matias Bjørling <m at bjorling.me>
+
+ * null_blk: set use_per_node_hctx param to false
+
+2013-12-21 Matias Bjørling <m at bjorling.me>
+
+ * null_blk: corrections to documentation
+
+2013-12-20 Fabio Estevam <fabio.estevam at freescale.com>
+
+ * regulator: tps51632-regulator: Fix spelling
+
+2013-11-25 Chen, Gong <gong.chen at linux.intel.com>
+
+ * ACPI, APEI, GHES: Cleanup ghes memory error handling
+
+2013-12-18 Chen, Gong <gong.chen at linux.intel.com>
+
+ * ACPI, APEI: Cleanup alignment-aware accesses
+
+2013-11-25 Chen, Gong <gong.chen at linux.intel.com>
+
+ * ACPI, APEI, GHES: Do not report only correctable errors with SCI
+
+2013-12-16 H.J. Lu <hjl.tools at gmail.com>
+
+ * x86, x32: Use __kernel_long_t for __statfs_word
+
+2013-12-16 H.J. Lu <hjl.tools at gmail.com>
+
+ * x86, x32: Use __kernel_long_t/__kernel_ulong_t in x86-64 stat.h
+
+2013-12-19 Benjamin Tissoires <benjamin.tissoires at redhat.com>
+
+ * HID: input: fix input sysfs path for hid devices
+
+2013-12-20 Matteo Facchinetti <matteo.facchinetti at sirius-es.it>
+
+ * powerpc/512x: dts: disable MPC5125 usb module
+
+2013-12-20 Kevin Hilman <khilman at linaro.org>
+
+ * Merge tag 'renesas-fixes-for-v3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas into fixes
+
+2013-12-20 Kevin Hilman <khilman at linaro.org>
+
+ * Merge tag 'omap-for-v3.13/display-fix' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
+
+2013-12-20 Theodore Ts'o <tytso at mit.edu>
+
+ * ext4: add explicit casts when masking cluster sizes
+
+2013-12-19 Steven Whitehouse <swhiteho at redhat.com>
+
+ * GFS2: Wait for async DIO in glock state changes
+
+2013-12-18 Steven Whitehouse <swhiteho at redhat.com>
+
+ * GFS2: Fix incorrect invalidation for DIO/buffered I/O
+
+2013-12-04 Matt Gates <matthew.gates at hp.com>
+
+ * [SCSI] hpsa: allow SCSI mid layer to handle unit attention
+
+2013-12-04 Stephen M. Cameron <scameron at beardog.cce.hp.com>
+
+ * [SCSI] hpsa: do not require board "not ready" status after hard reset
+
+2013-12-04 Stephen M. Cameron <scameron at beardog.cce.hp.com>
+
+ * [SCSI] hpsa: enable unit attention reporting
+
+2013-12-04 Stephen M. Cameron <scameron at beardog.cce.hp.com>
+
+ * [SCSI] hpsa: rename scsi prefetch field
+
+2013-12-04 Stephen M. Cameron <scameron at beardog.cce.hp.com>
+
+ * [SCSI] hpsa: use workqueue instead of kernel thread for lockup detection
+
+2013-12-04 wenxiong at linux.vnet.ibm.com <wenxiong at linux.vnet.ibm.com>
+
+ * [SCSI] ipr: increase dump size in ipr driver
+
+2013-12-19 Kevin Hilman <khilman at linaro.org>
+
+ * Merge tag 'keystone/maintainer-file' of git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone into fixes
+
+2013-12-16 Jan Beulich <JBeulich at suse.com>
+
+ * x86/efi: Don't select EFI from certain special ACPI drivers
+
+2013-12-18 Len Brown <len.brown at intel.com>
+
+ * x86 idle: Repair large-server 50-watt idle-power regression
+
+2013-12-18 Tejun Heo <tj at kernel.org>
+
+ * libata, freezer: avoid block device removal while system is frozen
+
+2013-12-17 Will Deacon <will.deacon at arm.com>
+
+ * arm64: ptrace: avoid using HW_BREAKPOINT_EMPTY for disabled events
+
+2013-12-06 Lans Zhang <jia.zhang at windriver.com>
+
+ * x86/mm/numa: Fix 32-bit kernel NUMA boot
+
+2013-12-19 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'v3.13-rc4' into x86/mm
+
+2013-12-19 Takashi Iwai <tiwai at suse.de>
+
+ * Merge tag 'asoc-v3.13-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2013-12-19 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'asoc/fix/adsp', 'asoc/fix/arizona', 'asoc/fix/atmel', 'asoc/fix/fsl', 'asoc/fix/kirkwood', 'asoc/fix/tegra', 'asoc/fix/wm8904' and 'asoc/fix/wm8962' into asoc-linus
+
+2013-12-19 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/fix/dma' into asoc-linus
+
+2013-12-19 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/fix/core' into asoc-linus
+
+2013-12-19 Charles Keepax <ckeepax at opensource.wolfsonmicro.com>
+
+ * ASoC: wm5110: Add support for ASRC RATE 1
+
+2013-12-19 Charles Keepax <ckeepax at opensource.wolfsonmicro.com>
+
+ * ASoC: wm5110: Add FSH for ISRCs
+
+2013-12-16 Ben Dooks <ben.dooks at codethink.co.uk>
+
+ * ARM: shmobile: r8a7790: fix shdi resource sizes
+
+2013-12-16 Kuninori Morimoto <kuninori.morimoto.gx at renesas.com>
+
+ * ARM: shmobile: bockw: fixup DMA mask
+
+2013-12-11 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * ARM: shmobile: armadillo: Add PWM backlight power supply
+
+2013-12-18 H. Peter Anvin <hpa at linux.intel.com>
+
+ * x86, realmode: Pointer walk cleanups, pull out invariant use of __pa()
+
+2013-12-06 Christoph Hellwig <hch at infradead.org>
+
+ * xfs: assert that we hold the ilock for extent map access
+
+2013-12-06 Christoph Hellwig <hch at infradead.org>
+
+ * xfs: use xfs_ilock_attr_map_shared in xfs_attr_list_int
+
+2013-12-06 Christoph Hellwig <hch at infradead.org>
+
+ * xfs: use xfs_ilock_attr_map_shared in xfs_attr_get
+
+2013-12-06 Christoph Hellwig <hch at infradead.org>
+
+ * xfs: use xfs_ilock_data_map_shared in xfs_qm_dqiterate
+
+2013-12-06 Christoph Hellwig <hch at infradead.org>
+
+ * xfs: use xfs_ilock_data_map_shared in xfs_qm_dqtobp
+
+2013-12-10 Gerhard Sittig <gsi at denx.de>
+
+ * powerpc/512x: dts: remove misplaced IRQ spec from 'soc' node (5125)
+
+2013-12-18 John W. Linville <linville at tuxdriver.com>
+
+ * Merge branch 'for-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth
+
+2013-12-17 Krzysztof Kozlowski <k.kozlowski at samsung.com>
+
+ * mfd: sec: Remove sec_reg* regmap helpers
+
+2013-12-18 Antonio Ospite <ospite at studenti.unina.it>
+
+ * Input: fix typos in Documentation/input/gamepad.txt
+
+2013-12-17 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * Input: zforce - fix error return code in zforce_start()
+
+2013-12-16 Hans de Goede <hdegoede at redhat.com>
+
+ * Input: elantech - improve clickpad detection
+
+2013-12-18 Ben Myers <bpm at sgi.com>
+
+ * Merge branch 'xfs-for-linus-v3.13-rc5' into for-next
+
+2013-12-18 Martin Schwidefsky <schwidefsky at de.ibm.com>
+
+ * s390/3270: fix allocation of tty3270_screen structure
+
+2013-12-04 Vijaya Mohan Guvva <vmohan at brocade.com>
+
+ * [SCSI] bfa: Chinook quad port 16G FC HBA claim issue
+
+2013-12-18 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
+
+2013-12-18 Hui Wang <hui.wang at canonical.com>
+
+ * ALSA: hda - Add Dell headset detection quirk for one more laptop model
+
+2013-12-18 Bo Shen <voice.shen at atmel.com>
+
+ * ASoC: wm8904: fix DSP mode B configuration
+
+2013-12-18 Charles Keepax <ckeepax at opensource.wolfsonmicro.com>
+
+ * ASoC: wm_adsp: Add small delay while polling DSP RAM start
+
+2013-12-18 Charles Keepax <ckeepax at opensource.wolfsonmicro.com>
+
+ * ASoC: wm_adsp: Remove duplicate info message for DSP RAM ready
+
+2013-12-18 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'please-pull-einj' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras into x86/ras
+
+2013-11-22 Randy Dunlap <rdunlap at infradead.org>
+
+ * slab.h: remove duplicate kmalloc declaration and fix kernel-doc warnings
+
+2013-12-18 Jan Kara <jack at suse.cz>
+
+ * ext4: fix deadlock when writing in ENOSPC conditions
+
+2013-12-16 Tomi Valkeinen <tomi.valkeinen at ti.com>
+
+ * Revert "ARM: OMAP2+: Remove legacy mux code for display.c"
+
+2013-11-06 Tony Luck <tony.luck at intel.com>
+
+ * ACPI, APEI, EINJ: Changes to the ACPI/APEI/EINJ debugfs interface
+
+2013-12-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 's2mps11-build' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
+
+2013-12-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-12-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-12-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-12-17 Namhyung Kim <namhyung at kernel.org>
+
+ * tools lib traceevent: Get rid of die() in some string conversion functions
+
+2013-12-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'gpio-v3.13-4' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio
+
+2013-12-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
+
+2013-12-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
+
+2013-10-11 Josh Boyer <jwboyer at redhat.com>
+
+ * cpupower: Fix segfault due to incorrect getopt_long arugments
+
+2013-12-16 Sujith Manoharan <c_manoha at qca.qualcomm.com>
+
+ * ath9k: Fix interrupt handling for the AR9002 family
+
+2013-12-11 Larry Finger <Larry.Finger at lwfinger.net>
+
+ * rtlwifi: pci: Fix oops on driver unload
+
+2013-11-28 Mathy Vanhoef <vanhoefm at gmail.com>
+
+ * ath9k_htc: properly set MAC address and BSSID mask
+
+2013-12-17 JongHo Kim <furmuwon at gmail.com>
+
+ * ALSA: Add SNDRV_PCM_STATE_PAUSED case in wait_for_avail function
+
+2013-12-12 Dave Chinner <dchinner at redhat.com>
+
+ * xfs: abort metadata writeback on permanent errors
+
+2013-12-12 Dave Chinner <dchinner at redhat.com>
+
+ * xfs: swalloc doesn't align allocations properly
+
+2013-12-17 Christoph Hellwig <hch at infradead.org>
+
+ * xfs: remove xfsbdstrat error
+
+2013-11-22 Dave Chinner <dchinner at redhat.com>
+
+ * xfs: align initial file allocations correctly
+
+2013-12-08 Namjae Jeon <namjae.jeon at samsung.com>
+
+ * MAINTAINERS: fix incorrect mail address of XFS maintainer
+
+2013-11-26 Jie Liu <jeff.liu at oracle.com>
+
+ * xfs: fix infinite loop by detaching the group/project hints from user dquot
+
+2013-11-26 Jie Liu <jeff.liu at oracle.com>
+
+ * xfs: fix assertion failure at xfs_setattr_nonsize
+
+2013-11-26 Jie Liu <jeff.liu at oracle.com>
+
+ * xfs: fix false assertion at xfs_qm_vop_create_dqattach
+
+2013-10-05 Mark Tinguely <tinguely at sgi.com>
+
+ * xfs: fix memory leak in xfs_dir2_node_removename
+
+2013-11-27 Will Deacon <will.deacon at arm.com>
+
+ * Revert "ARM: 7556/1: perf: fix updated event period in response to PERF_EVENT_IOC_PERIOD"
+
+2013-11-27 Jean-Francois Moine <moinejf at free.fr>
+
+ * ASoC: kirkwood: Fix the CPU DAI rates
+
+2013-12-17 Charles Keepax <ckeepax at opensource.wolfsonmicro.com>
+
+ * ASoC: wm5110: Correct HPOUT3 DAPM route typo
+
+2013-12-17 Li Zefan <lizefan at huawei.com>
+
+ * cgroup: don't recycle cgroup id until all csses' have been destroyed
+
+2013-12-17 Antonio Ospite <ospite at studenti.unina.it>
+
+ * HID: debug: add labels for some new buttons
+
+2013-12-16 Marc Carino <marc.ceeeee at gmail.com>
+
+ * libata: implement ATA_HORKAGE_NO_NCQ_TRIM and apply it to Micro M500 SSDs
+
+2013-12-17 Marcel Holtmann <marcel at holtmann.org>
+
+ * Bluetooth: Fix HCI User Channel permission check in hci_sock_sendmsg
+
+2013-11-23 Santosh Shilimkar <santosh.shilimkar at ti.com>
+
+ * MAINTAINERS: Add keystone clock drivers
+
+2013-12-14 Oleg Nesterov <oleg at redhat.com>
+
+ * selinux: selinux_setprocattr()->ptrace_parent() needs rcu_read_lock()
+
+2013-12-16 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * SELinux: remove duplicated include from hooks.c
+
+2013-12-16 Felix Fietkau <nbd at openwrt.org>
+
+ * mac80211: move "bufferable MMPDU" check to fix AP mode scan
+
+2013-12-16 Javier Lopez <jlopex at cozybit.com>
+
+ * mac80211_hwsim: Fix NULL pointer dereference
+
+2013-12-13 Nenghua Cao <nhcao at marvell.com>
+
+ * ASoC: dapm: update DPCM runtime when mixer/mux changes
+
+2013-12-14 Qiaowei Ren <qiaowei.ren at intel.com>
+
+ * x86: replace futex_atomic_cmpxchg_inatomic() with user_atomic_cmpxchg_inatomic
+
+2013-12-14 Qiaowei Ren <qiaowei.ren at intel.com>
+
+ * x86: add user_atomic_cmpxchg_inatomic at uaccess.h
+
+2013-12-16 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'ras_for_3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp into x86/ras
+
+2013-12-13 Jiri Kosina <jkosina at suse.cz>
+
+ * HID: remove SIS entries from hid_have_special_driver[]
+
+2013-12-11 Krzysztof Kozlowski <k.kozlowski at samsung.com>
+
+ * mfd: s2mps11: Fix build after regmap field rename in sec-core.c
+
+2013-12-16 Johannes Berg <johannes.berg at intel.com>
+
+ * radiotap: fix bitmap-end-finding buffer overrun
+
+2013-12-15 Rafał Miłecki <zajec5 at gmail.com>
+
+ * Input: define KEY_WWAN for Wireless WAN
+
+2013-11-23 Aleksej Makarov <aleksej.makarov at sonymobile.com>
+
+ * Input: don't call input_dev_release_keys() in resume
+
+2013-12-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.13-rc4
+
+2013-12-10 Matias Bjorling <m at bjorling.me>
+
+ * null_blk: mem garbage on NUMA systems during init
+
+2013-12-13 Sergey Senozhatsky <sergey.senozhatsky at gmail.com>
+
+ * radeon_pm: fix oops in hwmon_attributes_visible() and radeon_hwmon_show_temp_thresh()
+
+2013-12-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2013-12-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-12-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pci-v3.13-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
+
+2013-12-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security
+
+2013-12-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Revert "selinux: consider filesystem subtype in policies"
+
+2013-12-15 Jiri Kosina <jkosina at suse.cz>
+
+ * HID: microsoft: no fallthrough in MS ergonomy 0xff05 usage
+
+2013-12-15 Stefan Richter <stefanr at s5r6.in-berlin.de>
+
+ * firewire: sbp2: bring back WRITE SAME support
+
+2013-12-14 Heiko Stübner <heiko at sntech.de>
+
+ * Input: zforce - fix possible driver hang during suspend
+
+2013-12-14 Carolyn Wyborny <carolyn.wyborny at intel.com>
+
+ * igb: Fix for issue where values could be too high for udelay function.
+
+2013-12-14 Jesse Brandeburg <jesse.brandeburg at intel.com>
+
+ * i40e: fix null dereference
+
+2013-12-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'edac_fixes_for_3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp
+
+2013-12-13 Tomasz Figa <tomasz.figa at gmail.com>
+
+ * ARM: s3c64xx: dt: Fix boot failure due to double clock initialization
+
+2013-12-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-arm
+
+2013-12-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'arc-fixes-for-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc
+
+2013-12-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'dm-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
+
+2013-12-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
+
+2013-12-10 Russell King <rmk+kernel at arm.linux.org.uk>
+
+ * ARM: fix asm/memory.h build error
+
+2013-12-14 Jan Kara <jack at suse.cz>
+
+ * writeback: Fix data corruption on NFS
+
+2013-12-13 Ben Myers <bpm at sgi.com>
+
+ * Merge branch 'xfs-factor-icluster-macros' into for-next
+
+2013-12-13 Paul Moore <pmoore at redhat.com>
+
+ * selinux: revert 102aefdda4d8275ce7d7100bc16c88c74272b260
+
+2013-12-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'regulator-v3.13-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
+
+2013-12-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'regmap-v3.13-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regmap
+
+2013-12-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'i2c/for-current' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux
+
+2013-12-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'for-linus-20131212' of git://git.infradead.org/linux-mtd
+
+2013-12-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'fixes' of git://git.infradead.org/users/vkoul/slave-dma
+
+2013-12-13 Joe Thornber <ejt at redhat.com>
+
+ * dm array: fix a reference counting bug in shadow_ablock
+
+2013-12-13 Joe Thornber <ejt at redhat.com>
+
+ * dm space map: disallow decrementing a reference count below zero
+
+2013-12-13 Levente Kurusa <levex at linux.com>
+
+ * EISA: Call put_device() if device_register() fails
+
+2013-11-13 Li Wang <liwang at ubuntukylin.com>
+
+ * ceph: Avoid data inconsistency due to d-cache aliasing in readpage()
+
+2013-12-05 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: initialize inode before instantiating dentry
+
+2013-11-25 Lior Amsalem <alior at marvell.com>
+
+ * irqchip: armada-370-xp: fix MSI race condition
+
+2013-11-25 Lior Amsalem <alior at marvell.com>
+
+ * irqchip: armada-370-xp: fix IPI race condition
+
+2013-12-13 Emanuel Krenz <emanuelkrenz at web.de>
+
+ * HID: add support for SiS multitouch panel in the touch monitor LG 23ET83V
+
+2013-12-13 Takashi Iwai <tiwai at suse.de>
+
+ * Merge tag 'asoc-v3.13-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2013-12-13 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'regulator/topic/constraints' into regulator-linus
+
+2013-12-13 Hui Wang <hui.wang at canonical.com>
+
+ * ALSA: hda - Add Dell headset detection quirk for three laptop models
+
+2013-12-13 Benjamin Herrenschmidt <benh at kernel.crashing.org>
+
+ * powerpc/powernv: Fix OPAL LPC access in Little Endian
+
+2013-12-13 Anton Blanchard <anton at samba.org>
+
+ * powerpc/powernv: Fix endian issue in opal_xscom_read
+
+2013-12-13 Jie Liu <jeff.liu at oracle.com>
+
+ * xfs: use xfs_icluster_size_fsb in xfs_imap
+
+2013-12-13 Jie Liu <jeff.liu at oracle.com>
+
+ * xfs: use xfs_icluster_size_fsb in xfs_ifree_cluster
+
+2013-12-12 Anton Blanchard <anton at samba.org>
+
+ * powerpc: Fix endian issues in crash dump code
+
+2013-12-12 Anton Blanchard <anton at samba.org>
+
+ * powerpc/pseries: Fix endian issues in MSI code
+
+2013-12-12 Anton Blanchard <anton at samba.org>
+
+ * powerpc/pseries: Fix PCIE link speed endian issue
+
+2013-12-12 Anton Blanchard <anton at samba.org>
+
+ * powerpc/pseries: Fix endian issues in nvram code
+
+2013-12-12 Anton Blanchard <anton at samba.org>
+
+ * powerpc/pseries: Fix endian issues in /proc/ppc64/lparcfg
+
+2013-12-12 Anton Blanchard <anton at samba.org>
+
+ * powerpc: Fix topology core_id endian issue on LE builds
+
+2013-12-13 James Morris <james.l.morris at oracle.com>
+
+ * Merge branch 'master' of git://git.infradead.org/users/pcmoore/selinux_fixes into for-linus
+
+2013-12-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'akpm' (fixes from Andrew)
+
+2013-12-13 Christoph Hellwig <hch at infradead.org>
+
+ * xfs: remove the quotaoff log format from the quotaoff log item
+
+2013-12-13 Christoph Hellwig <hch at infradead.org>
+
+ * xfs: remove the dquot log format from the dquot log item
+
+2013-12-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
+
+2013-12-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2013-12-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
+
+2013-12-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.13' of git://linux-nfs.org/~bfields/linux
+
+2013-12-10 Ezequiel Garcia <ezequiel.garcia at free-electrons.com>
+
+ * mtd: nand: pxa3xx: Use info->use_dma to release DMA resources
+
+2013-12-09 Ezequiel Garcia <ezequiel.garcia at free-electrons.com>
+
+ * Partially revert "mtd: nand: pxa3xx: Introduce 'marvell,armada370-nand' compatible string"
+
+2013-12-10 Paul Moore <pmoore at redhat.com>
+
+ * selinux: process labeled IPsec TCP SYN-ACK packets properly in selinux_ip_postroute()
+
+2013-12-10 Paul Moore <pmoore at redhat.com>
+
+ * selinux: look for IPsec labels on both inbound and outbound packets
+
+2013-12-04 Paul Moore <pmoore at redhat.com>
+
+ * selinux: handle TCP SYN-ACK packets correctly in selinux_ip_postroute()
+
+2013-12-04 Paul Moore <pmoore at redhat.com>
+
+ * selinux: handle TCP SYN-ACK packets correctly in selinux_ip_output()
+
+2013-12-04 Fabio Estevam <fabio.estevam at freescale.com>
+
+ * i2c: imx: Check the return value from clk_prepare_enable()
+
+2013-12-12 Gleb Natapov <gleb at redhat.com>
+
+ * KVM: x86: fix guest-initiated crash with x2apic (CVE-2013-6376)
+
+2013-11-20 Andy Honig <ahonig at google.com>
+
+ * KVM: x86: Convert vapic synchronization to _cached functions (CVE-2013-6368)
+
+2013-11-19 Andy Honig <ahonig at google.com>
+
+ * KVM: x86: Fix potential divide by 0 in lapic (CVE-2013-6367)
+
+2013-11-18 Andy Honig <ahonig at google.com>
+
+ * KVM: Improve create VCPU parameter (CVE-2013-4587)
+
+2013-12-09 Elie De Brauwer <eliedebrauwer at gmail.com>
+
+ * i2c: mux: Inherit retry count and timeout from parent for muxed bus
+
+2013-12-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'sound-3.13-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2013-12-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
+
+2013-12-12 Paul Durrant <Paul.Durrant at citrix.com>
+
+ * xen-netback: fix gso_prefix check
+
+2013-12-12 Sebastian Siewior <bigeasy at linutronix.de>
+
+ * net: make neigh_priv_len in struct net_device 16bit instead of 8bit
+
+2013-11-29 Valentine Barshak <valentine.barshak at cogentembedded.com>
+
+ * gpio: rcar: Fix level interrupt handling
+
+2013-12-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'v4l_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
+
+2013-12-10 Stephen Boyd <sboyd at codeaurora.org>
+
+ * gpio: msm: Fix irq mask/unmask by writing bits instead of numbers
+
+2013-12-11 Mugunthan V N <mugunthanvnm at ti.com>
+
+ * drivers: net: cpsw: fix for cpsw crash when build as modules
+
+2013-12-11 Paul Durrant <Paul.Durrant at citrix.com>
+
+ * xen-netback: napi: don't prematurely request a tx event
+
+2013-12-11 Paul Durrant <Paul.Durrant at citrix.com>
+
+ * xen-netback: napi: fix abuse of budget
+
+2013-12-12 David Henningsson <david.henningsson at canonical.com>
+
+ * ALSA: hda - Add enable_msi=0 workaround for four HP machines
+
+2013-01-10 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * Btrfs: fix access_ok() check in btrfs_ioctl_send()
+
+2013-12-11 Wang Shilong <wangsl.fnst at cn.fujitsu.com>
+
+ * Btrfs: make sure we cleanup all reloc roots if error happens
+
+2013-12-10 Wang Shilong <wangsl.fnst at cn.fujitsu.com>
+
+ * Btrfs: skip building backref tree for uuid and quota tree when doing balance relocation
+
+2013-12-11 Wang Shilong <wangsl.fnst at cn.fujitsu.com>
+
+ * Btrfs: fix an oops when doing balance relocation
+
+2013-12-12 Ingo Molnar <mingo at kernel.org>
+
+ * x86/traps: Clean up error exception handler definitions
+
+2013-12-12 Yang Yingliang <yangyingliang at huawei.com>
+
+ * sch_tbf: use do_div() for 64-bit divide
+
+2013-12-11 Eric Dumazet <edumazet at google.com>
+
+ * udp: ipv4: must add synchronization in udp_sk_rx_dst_set()
+
+2013-12-11 Philippe De Muyter <phdm at macqel.be>
+
+ * net:fec: remove duplicate lines in comment about errata ERR006358
+
+2013-12-10 Maxime Ripard <maxime.ripard at free-electrons.com>
+
+ * ARM: sun6i: dt: Fix interrupt trigger types
+
+2013-12-10 Maxime Ripard <maxime.ripard at free-electrons.com>
+
+ * ARM: sun7i: dt: Fix interrupt trigger types
+
+2013-12-10 Shawn Guo <shawn.guo at linaro.org>
+
+ * MAINTAINERS: merge IMX6 entry into IMX
+
+2013-12-06 Stephen Warren <swarren at nvidia.com>
+
+ * ARM: tegra: add missing break to fuse initialization code
+
+2013-12-10 Sergei Ianovich <ynvich at gmail.com>
+
+ * ARM: pxa: prevent PXA270 occasional reboot freezes
+
+2013-12-12 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'asoc/fix/atmel', 'asoc/fix/fsl', 'asoc/fix/tegra' and 'asoc/fix/wm8962' into asoc-linus
+
+2013-12-12 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/fix/dma' into asoc-linus
+
+2013-12-12 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/fix/core' into asoc-linus
+
+2013-12-11 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'regulator/fix/as3722' and 'regulator/fix/pfuze100' into regulator-linus
+
+2013-12-11 David S. Miller <davem at davemloft.net>
+
+ * Revert "8390 : Replace ei_debug with msg_enable/NETIF_MSG_* feature"
+
+2013-12-11 Chad Hanson <chanson at trustedcs.com>
+
+ * selinux: fix broken peer recv check
+
+2013-11-22 Dave Chinner <dchinner at redhat.com>
+
+ * xfs: align initial file allocations correctly
+
+2013-12-10 Ben Myers <bpm at sgi.com>
+
+ * xfs: fix calculation of freed inode cluster blocks
+
+2013-12-06 Chen, Gong <gong.chen at linux.intel.com>
+
+ * ACPI, eMCA: Combine eMCA/EDAC event reporting priority
+
+2013-12-11 Bjorn Helgaas <bhelgaas at google.com>
+
+ * MAINTAINERS: Add DesignWare, i.MX6, Armada, R-Car PCI host maintainers
+
+2013-12-06 Chen, Gong <gong.chen at linux.intel.com>
+
+ * EDAC, sb_edac: Modify H/W event reporting policy
+
+2013-12-06 Chen, Gong <gong.chen at linux.intel.com>
+
+ * EDAC: Add an edac_report parameter to EDAC
+
+2013-11-18 Peter Zijlstra <peterz at infradead.org>
+
+ * sched/fair: Rework sched_fair time accounting
+
+2013-11-18 Peter Zijlstra <peterz at infradead.org>
+
+ * math64: Add mul_u64_u32_shr()
+
+2013-11-28 Peter Zijlstra <peterz at infradead.org>
+
+ * sched: Remove PREEMPT_NEED_RESCHED from generic code
+
+2013-12-11 Peter Zijlstra <peterz at infradead.org>
+
+ * sched: Initialize power_orig for overlapping groups
+
+2013-12-10 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Add static DAC/pin mapping for AD1986A codec
+
+2013-12-11 Hui Wang <hui.wang at canonical.com>
+
+ * ALSA: hda - One more Dell headset detection quirk
+
+2013-12-02 Jeff Layton <jlayton at redhat.com>
+
+ * nfsd: when reusing an existing repcache entry, unhash it first
+
+2013-12-05 Mikulas Patocka <mpatocka at redhat.com>
+
+ * dm stats: initialize read-only module parameter
+
+2013-11-29 Matthew Garrett <matthew.garrett at nebula.com>
+
+ * x86, efi: Don't use (U)EFI time services on 32 bit
+
+2013-12-10 H. Peter Anvin <hpa at linux.intel.com>
+
+ * x86, build, icc: Remove uninitialized_var() from compiler-intel.h
+
+2013-12-09 Paul Moore <pmoore at redhat.com>
+
+ * selinux: process labeled IPsec TCP SYN-ACK packets properly in selinux_ip_postroute()
+
+2013-12-10 Ingo Molnar <mingo at kernel.org>
+
+ * Merge branch 'clockevents/fixes' of git://git.linaro.org/people/daniel.lezcano/linux into timers/urgent
+
+2013-12-10 Dinh Nguyen <dinguyen at altera.com>
+
+ * clocksource: dw_apb_timer_of: Fix support for dts binding "snps,dw-apb-timer"
+
+2013-12-10 Dinh Nguyen <dinguyen at altera.com>
+
+ * clocksource: dw_apb_timer_of: Fix read_sched_clock
+
+2013-12-02 Marc Zyngier <marc.zyngier at arm.com>
+
+ * clocksource: sunxi: Stop timer from ticking before enabling interrupts
+
+2013-10-19 Thierry Reding <thierry.reding at gmail.com>
+
+ * clocksource: clksrc-of: Do not drop unheld reference on device node
+
+2013-11-26 Ezequiel Garcia <ezequiel.garcia at free-electrons.com>
+
+ * clocksource: armada-370-xp: Register sched_clock after the counter reset
+
+2013-11-20 Axel Lin <axel.lin at ingics.com>
+
+ * clocksource: time-efm32: Select CLKSRC_MMIO
+
+2013-12-05 Roger Quadros <rogerq at ti.com>
+
+ * gpio: twl4030: Fix regression for twl gpio LED output
+
+2013-11-26 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * sh-pfc: Fix PINMUX_GPIO macro
+
+2013-12-10 Linus Walleij <linus.walleij at linaro.org>
+
+ * MAINTAINERS: update GPIO maintainers entry
+
+2013-12-02 Thomas Gleixner <tglx at linutronix.de>
+
+ * mfd: rtsx_pcr: Disable interrupts before cancelling delayed works
+
+2013-12-03 cpw <cpw at sgi.com>
+
+ * x86/UV: Fix NULL pointer dereference in uv_flush_tlb_others() if the 'nobau' boot option is used
+
+2013-12-09 Michael Hennerich <michael.hennerich at analog.com>
+
+ * Input: adxl34x - Fix bug in definition of ADXL346_2D_ORIENT
+
+2013-12-07 Dmitry Torokhov <dmitry.torokhov at gmail.com>
+
+ * Input: serio - fix sysfs layout
+
+2013-12-09 Dmitry Torokhov <dmitry.torokhov at gmail.com>
+
+ * Merge tag 'v3.13-rc3' into for-linus
+
+2013-12-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
+
+2013-12-09 Mahesh Salgaonkar <mahesh at linux.vnet.ibm.com>
+
+ * powerpc: Fix up the kdump base cap to 128M
+
+2013-12-09 Thadeu Lima de Souza Cascardo <cascardo at linux.vnet.ibm.com>
+
+ * powernv: Fix VFIO support with PHB3
+
+2013-12-09 Anatolij Gustschin <agust at denx.de>
+
+ * powerpc/52xx: Re-enable bestcomm driver in defconfigs
+
+2013-12-09 Olof Johansson <olof at lixom.net>
+
+ * powerpc/pasemi: Turn on devtmpfs in defconfig
+
+2013-12-04 Cedric Le Goater <clg at fr.ibm.com>
+
+ * offb: Add palette hack for little endian
+
+2013-12-04 Cedric Le Goater <clg at fr.ibm.com>
+
+ * offb: Little endian fixes
+
+2013-12-07 Hong H. Pham <hong.pham at windriver.com>
+
+ * powerpc: Fix PTE page address mismatch in pgtable ctor/dtor
+
+2013-12-06 Ilia Mirkin <imirkin at alum.mit.edu>
+
+ * powerpc/44x: Fix ocm_block allocation
+
+2013-12-09 H. Peter Anvin <hpa at linux.intel.com>
+
+ * x86, build: Pass in additional -mno-mmx, -mno-sse options
+
+2013-12-09 Jon Medhurst <tixy at linaro.org>
+
+ * ARM: 7917/1: cacheflush: correctly limit range of memory region being flushed
+
+2013-12-05 Konstantin Khlebnikov <k.khlebnikov at samsung.com>
+
+ * ARM: 7913/1: fix framepointer check in unwind_frame
+
+2013-12-05 Konstantin Khlebnikov <k.khlebnikov at samsung.com>
+
+ * ARM: 7912/1: check stack pointer in get_wchan
+
+2013-12-02 Santosh Shilimkar <santosh.shilimkar at ti.com>
+
+ * ARM: 7909/1: mm: Call setup_dma_zone() post early_paging_init()
+
+2013-12-09 Paul Moore <pmoore at redhat.com>
+
+ * selinux: look for IPsec labels on both inbound and outbound packets
+
+2013-12-03 Stephen Warren <swarren at nvidia.com>
+
+ * ASoC: don't leak on error in snd_dmaengine_pcm_register
+
+2013-12-05 Jan Weitzel <j.weitzel at phytec.de>
+
+ * ASoC: tlv320aic3x: no mono controls 3007 model
+
+2013-12-06 Nicolin Chen <b42378 at freescale.com>
+
+ * ASoC: fsl: imx-wm8962: Don't update bias_level in machine driver
+
+2013-12-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2013-12-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'mfd-fixes-3.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/sameo/mfd-fixes
+
+2013-12-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pm-3.13-rc3-fixup' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2013-12-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
+
+2013-12-09 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
+
+2013-12-06 Jingoo Han <jg1.han at samsung.com>
+
+ * regulator: stw481x-vmmc: use devm_regulator_register()
+
+2013-11-05 Tim Harvey <tharvey at gateworks.com>
+
+ * regulator: pfuze100: allow misprogrammed ID
+
+2013-12-09 Axel Lin <axel.lin at ingics.com>
+
+ * regulator: pfuze100: Fix address of FABID
+
+2013-12-06 Stephen Warren <swarren at nvidia.com>
+
+ * ASoC: tegra: fix uninitialized variables in set_fmt
+
+2013-12-05 Srinivas Pandruvada <srinivas.pandruvada at linux.intel.com>
+
+ * HID: hid-sensor-hub: fix duplicate sysfs entry error
+
+2013-12-05 Tom Lendacky <thomas.lendacky at amd.com>
+
+ * crypto: scatterwalk - Use sg_chain_ptr on chain entries
+
+2013-12-08 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'tty-3.13-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
+
+2013-12-08 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'staging-3.13-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
+
+2013-12-08 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'char-misc-3.13-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
+
+2013-12-08 Dmitry Monakhov <dmonakhov at openvz.org>
+
+ * jbd2: rename obsoleted msg JBD->JBD2
+
+2013-12-08 Jan Kara <jack at suse.cz>
+
+ * jbd2: revise KERN_EMERG error messages
+
+2013-12-02 Soren Brinkmann <soren.brinkmann at xilinx.com>
+
+ * tty: xuartps: Properly guard sysrq specific code
+
+2013-10-31 Jonathan Woithe <jwoithe at atrad.com.au>
+
+ * serial: 8250: Fix initialisation of Quatech cards with the AMCC PCI chip
+
+2013-11-08 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * serial: icom: dereference after free in load_code()
+
+2013-11-12 Mika Westerberg <mika.westerberg at linux.intel.com>
+
+ * serial: 8250_dw: add new ACPI IDs
+
+2013-12-04 Sebastian Andrzej Siewior <bigeasy at linutronix.de>
+
+ * tty: serial: pch: don't crash if DMA enabled but not loaded
+
+2013-12-05 Heiko Stübner <heiko at sntech.de>
+
+ * serial: samsung: move clock deactivation below uart registration
+
+2013-12-08 Rui Wang <ruiv.wang at gmail.com>
+
+ * PCI, AER: Fix severity usage in aer trace event
+
+2013-12-08 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Revert "cpufreq: fix garbage kobjects on errors during suspend/resume"
+
+2013-12-08 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Revert "cpufreq: suspend governors on system suspend/hibernate"
+
+2013-11-27 Khalid Aziz <khalid.aziz at oracle.com>
+
+ * PCI: Disable Bus Master only on kexec reboot
+
+2013-12-05 Qiaowei Ren <qiaowei.ren at intel.com>
+
+ * x86, xsave: Support eager-only xsave features, add MPX support
+
+2013-12-06 Tejun Heo <tj at kernel.org>
+
+ * cgroup: fix cgroup_create() error handling path
+
+2013-12-07 Qiaowei Ren <qiaowei.ren at intel.com>
+
+ * x86, cpufeature: Define the Intel MPX feature flag
+
+2013-12-06 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.13-rc3
+
+2013-12-06 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'trace-fixes-3.13-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
+
+2013-12-06 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kvack.org/~bcrl/aio-next
+
+2013-12-06 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
+
+2013-12-06 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security
+
+2013-12-06 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'dt-fixes-for-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux
+
+2013-12-04 Gu Zheng <guz.fnst at cn.fujitsu.com>
+
+ * aio: clean up aio ring in the fail path
+
+2013-12-07 James Morris <james.l.morris at oracle.com>
+
+ * Merge branch 'free-memory' of git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity into for-linus
+
+2013-12-05 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pm-3.13-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2013-12-06 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branches 'pm-epoll', 'pnp' and 'powercap'
+
+2013-12-06 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branches 'pm-cpuidle' and 'pm-cpufreq'
+
+2013-12-05 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'stable' of git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile
+
+2013-12-05 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.dk/linux-block
+
+2013-12-05 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'nfs-for-3.13-3' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
+
+2013-12-05 Tony Lu <zlu at tilera.com>
+
+ * ftrace: default to tilegx if ARCH=tile is specified
+
+2013-12-01 Yunkang Tang <tommywill2011 at gmail.com>
+
+ * Input: ALPS - add support for DualPoint device on Dell XT2 model
+
+2013-12-05 Matt Walker <matt.g.d.walker at gmail.com>
+
+ * Input: elantech - add support for newer (August 2013) devices
+
+2013-12-05 Ping Cheng <pinglinux at gmail.com>
+
+ * Input: add SW_MUTE_DEVICE switch definition
+
+2013-12-03 Steven Rostedt <rostedt at goodmis.org>
+
+ * tracing: Only run synchronize_sched() at instance deletion time
+
+2013-12-05 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
+
+2013-12-04 Tejun Heo <tj at kernel.org>
+
+ * percpu: fix spurious sparse warnings from DEFINE_PER_CPU()
+
+2013-12-05 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'fbdev-fixes-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tomba/linux
+
+2013-12-05 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'sound-3.13-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2013-12-05 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pinctrl-v3.13-2' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl
+
+2013-12-05 Ming Lei <tom.leiming at gmail.com>
+
+ * blk-mq: fix use-after-free of request
+
+2013-12-04 Nicolin Chen <b42378 at freescale.com>
+
+ * ASoC: wm8962: Enable SYSCLK provisonally before fetching generated DSPCLK_DIV
+
+2013-12-05 Maria Dimakopoulou <maria.n.dimakopoulou at gmail.com>
+
+ * perf/x86: Fix constraint table end marker bug
+
+2013-12-04 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-12-04 Fenghua Yu <fenghua.yu at intel.com>
+
+ * x86/apic, doc: Justification for disabling IO APIC before Local APIC
+
+2013-12-05 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-fixes-3.13' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
+
+2013-12-04 Srinivas Pandruvada <srinivas.pandruvada at linux.intel.com>
+
+ * PowerCap: Fix mode for energy counter
+
+2013-12-05 Dmitry Torokhov <dmitry.torokhov at gmail.com>
+
+ * PNP: fix restoring devices after hibernation
+
+2013-11-28 Thomas Wood <thomas.wood at intel.com>
+
+ * drm: fix the addition of the side-by-side (half) flag for extra 3D modes
+
+2013-11-29 Thomas Wood <thomas.wood at intel.com>
+
+ * drm/edid: fix length check when adding extra 3D modes
+
+2013-12-03 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon/atom: fix bus probes when hw_i2c is set (v2)
+
+2013-12-03 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: fix null pointer dereference in dce6+ audio code
+
+2013-12-03 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon: fixup bad vram size on SI
+
+2013-12-05 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'drm-intel-fixes-2013-12-02' of git://people.freedesktop.org/~danvet/drm-intel into drm-fixes
+
+2013-12-05 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'exynos-drm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/daeinki/drm-exynos into drm-fixes
+
+2013-12-05 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'drm/for-3.13-rc3' of git://anongit.freedesktop.org/tegra/linux into drm-fixes
+
+2013-12-04 Rob Clark <robdclark at gmail.com>
+
+ * udl: fix issue with imported prime buffers
+
+2013-12-04 H. Peter Anvin <hpa at linux.intel.com>
+
+ * x86, bitops: Correct the assembly constraints to testing bitops
+
+2013-12-04 Geyslan G. Bem <geyslan at gmail.com>
+
+ * selinux: fix possible memory leak
+
+2013-12-03 Paul Moore <pmoore at redhat.com>
+
+ * selinux: pull address family directly from the request_sock struct
+
+2013-12-03 Paul Moore <pmoore at redhat.com>
+
+ * selinux: ensure that the cached NetLabel secattr matches the desired SID
+
+2013-12-03 Paul Moore <pmoore at redhat.com>
+
+ * selinux: handle TCP SYN-ACK packets correctly in selinux_ip_postroute()
+
+2013-12-03 Paul Moore <pmoore at redhat.com>
+
+ * selinux: handle TCP SYN-ACK packets correctly in selinux_ip_output()
+
+2013-12-02 Helge Deller <deller at gmx.de>
+
+ * nfs: fix do_div() warning by instead using sector_div()
+
+2013-12-04 Trond Myklebust <Trond.Myklebust at netapp.com>
+
+ * MAINTAINERS: Update contact information for Trond Myklebust
+
+2013-12-04 Trond Myklebust <Trond.Myklebust at netapp.com>
+
+ * NFSv4.1: Prevent a 3-way deadlock between layoutreturn, open and state recovery
+
+2013-12-04 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'gpio-v3.13-3' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio
+
+2013-12-04 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2013-12-04 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'parisc-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
+
+2013-12-04 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'squashfs-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/pkl/squashfs-next
+
+2013-12-04 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
+
+2013-12-04 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-28 Mark Brown <broonie at linaro.org>
+
+ * ASoC: adsp: Use async writes where possible
+
+2013-11-28 Marco Piazza <mpiazza at gmail.com>
+
+ * Bluetooth: Add support for Toshiba Bluetooth device [0930:0220]
+
+2013-12-04 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Fix silent output on MacBook Air 2,1
+
+2013-12-04 Jingoo Han <jg1.han at samsung.com>
+
+ * spi: sc18is602: Use devm_spi_register_master()
+
+2013-12-04 Bo Shen <voice.shen at atmel.com>
+
+ * ASoC: sam9x5_wm8731: change to work in DSP A mode
+
+2013-12-04 Bo Shen <voice.shen at atmel.com>
+
+ * ASoC: atmel_ssc_dai: add dai trigger ops
+
+2013-12-04 Nicolin Chen <b42378 at freescale.com>
+
+ * ASoC: soc-pcm: Use valid condition for snd_soc_dai_digital_mute() in hw_free()
+
+2013-12-04 Takashi Iwai <tiwai at suse.de>
+
+ * Merge tag 'asoc-v3.13-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2013-12-02 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * video: vt8500: fix error handling in probe()
+
+2013-10-22 Johan Hovold <jhovold at gmail.com>
+
+ * atmel_lcdfb: fix module autoload
+
+2013-12-04 David Henningsson <david.henningsson at canonical.com>
+
+ * ALSA: hda - Fix missing ELD info when using jackpoll_ms parameter
+
+2013-12-04 Kailang Yang <kailang at realtek.com>
+
+ * ALSA: hda/realtek - remove hp_automute_hook from alc283_fixup_chromebook
+
+2013-12-03 Dan Williams <dan.j.williams at intel.com>
+
+ * dma: fix build breakage in s3c24xx-dma
+
+2013-12-03 Chris Mason <clm at fb.com>
+
+ * Btrfs: update the MAINTAINERS file
+
+2013-11-20 H. Peter Anvin <hpa at linux.intel.com>
+
+ * x86-64, build: Always pass in -mno-sse
+
+2013-12-03 Helge Deller <deller at gmx.de>
+
+ * parisc: update 64bit defconfigs and use SIL680 instead of SIIMAGE driver
+
+2013-12-03 Dinh Nguyen <dinguyen at altera.com>
+
+ * arm: dts: socfpga: Change some clocks of gate-clk type to perip-clk
+
+2013-07-17 Dinh Nguyen <dinguyen at altera.com>
+
+ * arm: socfpga: Enable ARM_TWD for socfpga
+
+2013-12-03 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Merge tag 'iio-fixes-for-3.13b' of git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio into staging-linus
+
+2013-12-03 Jeff Moyer <jmoyer at redhat.com>
+
+ * blk-mq: fix dereference of rq->mq_ctx if allocation fails
+
+2013-12-03 Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+
+ * cpuidle: Check for dev before deregistering it.
+
+2013-12-03 Olof Johansson <olof at lixom.net>
+
+ * ARM: multi_v7_defconfig: enable SDHCI_BCM_KONA and MMC_BLOCK_MINORS=16
+
+2013-12-02 Olof Johansson <olof at lixom.net>
+
+ * ARM: sunxi_defconfig: enable NFS, TMPFS, PRINTK_TIME and nfsroot support
+
+2013-12-02 Olof Johansson <olof at lixom.net>
+
+ * Merge tag 'omap-for-v3.13/more-dt-regressions' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
+
+2013-12-02 Olof Johansson <olof at lixom.net>
+
+ * ARM: multi_v7_defconfig: enable network for BeagleBone Black
+
+2013-12-03 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'asoc/fix/arizona', 'asoc/fix/atmel', 'asoc/fix/fsl', 'asoc/fix/kirkwood', 'asoc/fix/omap', 'asoc/fix/rcar', 'asoc/fix/wm8731' and 'asoc/fix/wm8990' into asoc-linus
+
+2013-12-03 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/fix/dapm' into asoc-linus
+
+2013-12-03 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/fix/core' into asoc-linus
+
+2013-12-03 Bo Shen <voice.shen at atmel.com>
+
+ * ASoC: wm8731: fix dsp mode configuration
+
+2013-12-02 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: ssm2602: Use core for applying symmetry constraints
+
+2013-12-03 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/topic/symmetry' into asoc-ssm2602
+
+2013-11-21 Vijaya Mohan Guvva <vmohan at brocade.com>
+
+ * [SCSI] bfa: Fix crash when symb name set for offline vport
+
+2013-11-15 Amit Pundir <amit.pundir at linaro.org>
+
+ * epoll: drop EPOLLWAKEUP if PM_SLEEP is disabled
+
+2013-12-03 Bjørn Mork <bjorn at mork.no>
+
+ * cpufreq: fix garbage kobjects on errors during suspend/resume
+
+2013-12-03 Olivier Gay <ogay at logitech.com>
+
+ * HID: logitech-dj: add HIDRAW dependency in Kconfig
+
+2013-11-29 Heikki Krogerus <heikki.krogerus at linux.intel.com>
+
+ * gpiolib: change a warning to debug message when failing to get gpio
+
+2013-11-22 Liu Gang <Gang.Liu at freescale.com>
+
+ * powerpc/gpio: Fix the wrong GPIO input data on MPC8572/MPC8536
+
+2013-11-23 Alexandre Courbot <acourbot at nvidia.com>
+
+ * gpiolib: use platform GPIO mappings as fallback
+
+2013-11-23 Alexandre Courbot <acourbot at nvidia.com>
+
+ * Documentation: gpiolib: add 00-INDEX file
+
+2013-11-23 Alexandre Courbot <acourbot at nvidia.com>
+
+ * gpiolib: fix lookup of platform-mapped GPIOs
+
+2013-11-25 Alexandre Courbot <acourbot at nvidia.com>
+
+ * gpiolib: add missing declarations
+
+2013-11-28 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * sh-pfc: sh7372: Fix pin bias setup
+
+2013-11-28 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * sh-pfc: r8a7740: Fix pin bias setup
+
+2013-11-08 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * drm/tegra: return -EFAULT if copy_from_user() fails
+
+2013-12-02 Kailang Yang <kailang at realtek.com>
+
+ * ALSA: hda/realtek - Independent of model for HP
+
+2013-12-02 David Henningsson <david.henningsson at canonical.com>
+
+ * ALSA: hda - Fix headset mic input after muted internal mic (Dell/Realtek)
+
+2013-11-21 Gerhard Sittig <gsi at denx.de>
+
+ * dt: binding: reword PowerPC 8xxx GPIO documentation
+
+2013-11-25 Stephen Warren <swarren at nvidia.com>
+
+ * ARM: tegra: delete nvidia,tegra20-spi.txt binding
+
+2013-10-23 Chanwoo Choi <cw00.choi at samsung.com>
+
+ * hwmon: ntc_thermistor: Fix typo (pullup-uV -> pullup-uv)
+
+2013-11-13 Wei Ni <wni at nvidia.com>
+
+ * of: add vendor prefix for GMT
+
+2013-11-18 Laurent Pinchart <laurent.pinchart at ideasonboard.com>
+
+ * clk: exynos: Fix typos in DT bindings documentation
+
+2013-11-18 Thierry Reding <thierry.reding at gmail.com>
+
+ * of: Add vendor prefix for LG Corporation
+
+2013-11-18 Fabio Estevam <fabio.estevam at freescale.com>
+
+ * Documentation: net: fsl-fec.txt: Add phy-supply entry
+
+2013-11-08 Sricharan R <r.sricharan at ti.com>
+
+ * ARM: dts: doc: Document missing binding for omap5-mpu
+
+2013-11-07 Rob Herring <rob.herring at calxeda.com>
+
+ * dt-bindings: add ARMv8 PMU binding
+
+2013-10-08 Stephen Warren <swarren at nvidia.com>
+
+ * MAINTAINERS: remove swarren from DT bindings
+
+2013-08-08 Kumar Gala <galak at codeaurora.org>
+
+ * MAINTAINERS: Add Kumar to Device Tree Binding maintainers group
+
+2013-12-02 Roberto Sassu <roberto.sassu at polito.it>
+
+ * ima: properly free ima_template_entry structures
+
+2013-12-02 Christoph Paasch <christoph.paasch at uclouvain.be>
+
+ * ima: Do not free 'entry' before it is initialized
+
+2013-12-02 Dave Airlie <airlied at redhat.com>
+
+ * drm/radeon: fix VGT_GS_INSTANCE_CNT register
+
+2013-12-02 Alexandre Demers <alexandre.f.demers at gmail.com>
+
+ * drm/radeon: Fix a typo in Cayman and Evergreen registers
+
+2013-11-26 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon/dpm: simplify state adjust logic for NI
+
+2013-12-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'leds-fixes-for-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/cooloney/linux-leds
+
+2013-11-28 Peter Ujfalusi <peter.ujfalusi at ti.com>
+
+ * leds: pwm: Fix for deferred probe in DT booted mode
+
+2013-12-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * uio: we cannot mmap unaligned page contents
+
+2013-12-02 Florian Vaussard <florian.vaussard at epfl.ch>
+
+ * ARM: dts: Fix the name of supplies for smsc911x shared by OMAP
+
+2013-11-15 James Bottomley <JBottomley at Parallels.com>
+
+ * [SCSI] enclosure: fix WARN_ON in dual path device removing
+
+2013-11-11 Nikith Ganigarakoppal <Nikith.Ganigarakoppal at pmcs.com>
+
+ * [SCSI] pm80xx: Tasklets synchronization fix.
+
+2013-10-30 Nikith Ganigarakoppal <Nikith.Ganigarakoppal at pmcs.com>
+
+ * [SCSI] pm80xx: Resetting the phy state.
+
+2013-10-30 Nikith Ganigarakoppal <Nikith.Ganigarakoppal at pmcs.com>
+
+ * [SCSI] pm80xx: Fix for direct attached device.
+
+2013-11-13 Nikith Ganigarakoppal <Nikith.Ganigarakoppal at pmcs.com>
+
+ * [SCSI] pm80xx: Module author addition
+
+2013-12-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'irq-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-12-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-12-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-12-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'stable/for-linus-3.13-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
+
+2013-12-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'spi-v3.13-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi
+
+2013-12-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2013-11-28 Vince Hsu <vinceh at nvidia.com>
+
+ * regulator: as3722: set the correct current limit
+
+2013-12-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * vfs: fix subtle use-after-free of pipe_inode_info
+
+2013-12-02 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Use always amps for auto-mute on AD1986A codec
+
+2013-12-02 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda/analog - Handle inverted EAPD properly in vmaster hook
+
+2013-12-02 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Another fixup for ASUS laptop with ALC660 codec
+
+2013-11-30 Ben Hutchings <ben at decadent.org.uk>
+
+ * HID: kye: Fix missing break in kye_report_fixup()
+
+2013-12-02 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: atmel: Fix possible array overflow
+
+2013-10-01 Inki Dae <inki.dae at samsung.com>
+
+ * drm/exynos: release unhandled page flip events at postclose.
+
+2013-09-19 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * drm/exynos: Fix trivial typo in exynos_drm_fimd.c
+
+2013-12-02 Shawn Guo <shawn.guo at linaro.org>
+
+ * ASoC: core: fix devres parameter in devm_snd_soc_register_card()
+
+2013-11-28 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: mxs: Use devm_snd_dmaengine_pcm_register()
+
+2013-11-28 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: bcm2835-i2s: Use devm_snd_dmaengine_pcm_register()
+
+2013-12-02 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/topic/dma' into asoc-bcm2835
+
+2013-11-30 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: Set SNDRV_PCM_INFO_JOINT_DUPLEX for PCMs with symmetry constraints
+
+2013-11-30 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: generic-dmaengine-pcm: Set BATCH flag when residue reporting is not supported
+
+2013-11-30 Jarkko Nikula <jarkko.nikula at bitmer.com>
+
+ * ASoC: omap: n810: Convert to clk_prepare_enable/clk_disable_unprepare
+
+2013-11-09 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * ASoC: fsl: set correct platform drvdata in pcm030_fabric_probe()
+
+2013-11-08 Fabio Estevam <fabio.estevam at freescale.com>
+
+ * ASoC: fsl: imx-pcm-fiq: Remove unused 'runtime' variable
+
+2013-11-05 Oskar Schirmer <oskar at scara.com>
+
+ * ASoC: fsl: imx-pcm-fiq: remove bogus period delta calculation
+
+2013-12-01 Axel Lin <axel.lin at ingics.com>
+
+ * pinctrl: abx500: Fix header file include guard
+
+2013-11-26 Christian Engelmayer <christian.engelmayer at frequentis.com>
+
+ * Input: usbtouchscreen - separate report and transmit buffer size handling
+
+2013-11-26 Dmitry Torokhov <dmitry.torokhov at gmail.com>
+
+ * Input: sur40 - suppress false uninitialized variable warning
+
+2013-12-01 Eugenia Emantayev <eugenia at mellanox.com>
+
+ * net/mlx4_en: Remove selftest TX queues empty condition
+
+2013-12-01 fan.du <fan.du at windriver.com>
+
+ * {pktgen, xfrm} Update IPv4 header total len and checksum after tranformation
+
+2013-11-28 Michael S. Tsirkin <mst at redhat.com>
+
+ * virtio_net: make all RX paths handle erors consistently
+
+2013-11-28 Michael S. Tsirkin <mst at redhat.com>
+
+ * virtio_net: fix error handling for mergeable buffers
+
+2013-12-01 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml
+
+2013-12-01 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'fixes' of git://ftp.arm.linux.org.uk/~rmk/linux-arm
+
+2013-11-24 Austin Boyle <boyle.austin at gmail.com>
+
+ * max17042_battery: Fix build errors caused by missing REGMAP_I2C config
+
+2013-11-22 Shuah Khan <shuah.kh at samsung.com>
+
+ * power_supply: Fix Oops from NULL pointer dereference from wakeup_source_activate
+
+2013-11-29 Richard Weinberger <richard at nod.at>
+
+ * um: Build always with -mcmodel=large on 64bit
+
+2013-11-21 Richard Weinberger <richard at nod.at>
+
+ * um: Rename print_stack_trace to do_stack_trace
+
+2013-11-01 Stephen M. Cameron <scameron at beardog.cce.hp.com>
+
+ * [SCSI] hpsa: return 0 from driver probe function on success, not 1
+
+2013-11-30 Fabio Estevam <festevam at gmail.com>
+
+ * ARM: 7907/1: lib: delay-loop: Add align directive to fix BogoMIPS calculation
+
+2013-11-25 Dave Martin <dave.martin at linaro.org>
+
+ * ARM: 7897/1: kexec: Use the right ISA for relocate_new_kernel
+
+2013-11-21 Victor Kamensky <victor.kamensky at linaro.org>
+
+ * ARM: 7895/1: signal: fix armv7-m build issue in sigreturn_codes.S
+
+2013-11-29 Russell King <rmk+kernel at arm.linux.org.uk>
+
+ * ARM: footbridge: fix EBSA285 LEDs
+
+2013-09-23 Stephen M. Cameron <scameron at beardog.cce.hp.com>
+
+ * [SCSI] hpsa: do not discard scsi status on aborted commands
+
+2013-11-30 Helge Deller <deller at gmx.de>
+
+ * parisc: remove CONFIG_MLONGCALLS=y from defconfigs
+
+2013-11-29 Thomas Huth <thuth at linux.vnet.ibm.com>
+
+ * virtio_net: Fixed a trivial typo (fitler --> filter)
+
+2013-11-30 Helge Deller <deller at gmx.de>
+
+ * parisc: fix kernel memory layout in vmlinux.ld.S
+
+2013-11-30 Helge Deller <deller at gmx.de>
+
+ * parisc: use kernel_text_address() in unwind functions
+
+2013-10-31 Chen Gang <gang.chen at asianux.com>
+
+ * parisc: remove empty SERIAL_PORT_DFNS in serial.h
+
+2013-11-29 stephen hemminger <stephen at networkplumber.org>
+
+ * netem: fix gemodel loss generator
+
+2013-11-29 stephen hemminger <stephen at networkplumber.org>
+
+ * netem: fix loss 4 state model
+
+2013-11-29 stephen hemminger <stephen at networkplumber.org>
+
+ * netem: missing break in ge loss generator
+
+2013-11-29 Arvid Brodin <arvid.brodin at alten.se>
+
+ * net/hsr: Support iproute print_opt ('ip -details ...')
+
+2013-11-28 Russell King <rmk+kernel at arm.linux.org.uk>
+
+ * ARM: footbridge: fix VGA initialisation
+
+2013-11-28 Russell King <rmk+kernel at arm.linux.org.uk>
+
+ * ARM: fix booting low-vectors machines
+
+2013-11-25 Paul Drews <paul.drews at intel.com>
+
+ * ACPI: Add BayTrail SoC GPIO and LPSS ACPI IDs
+
+2013-11-21 Aristeu Rozanski <aris at redhat.com>
+
+ * sb_edac: Shut up compiler warning when EDAC_DEBUG is enabled
+
+2013-11-29 Levente Kurusa <levex at linux.com>
+
+ * x86, mce: Call put_device on device_register failure
+
+2013-11-27 Roberto Sassu <roberto.sassu at polito.it>
+
+ * ima: store address of template_fmt_copy in a pointer before calling strsep
+
+2013-11-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.13-rc2
+
+2013-11-29 Peter Hurley <peter at hurleysoftware.com>
+
+ * n_tty: Fix missing newline echo
+
+2013-11-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'arm64-stable' of git://git.kernel.org/pub/scm/linux/kernel/git/cmarinas/linux-aarch64
+
+2013-11-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
+
+2013-11-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'i2c/for-current' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux
+
+2013-11-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
+
+2013-11-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata
+
+2013-11-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
+
+2013-11-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'sound-3.13-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2013-11-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
+
+2013-11-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2013-11-27 Catalin Marinas <catalin.marinas at arm.com>
+
+ * arm64: Move PTE_PROT_NONE higher up
+
+2013-11-29 Charles Keepax <ckeepax at opensource.wolfsonmicro.com>
+
+ * ASoC: core: Use consistent byte ordering in snd_soc_bytes_get
+
+2013-11-29 Catalin Marinas <catalin.marinas at arm.com>
+
+ * arm64: Use Normal NonCacheable memory for writecombine
+
+2013-11-29 Chris Wilson <chris at chris-wilson.co.uk>
+
+ * drm/i915: Pin pages whilst allocating for dma-buf vmap()
+
+2013-11-29 Ville Syrjälä <ville.syrjala at linux.intel.com>
+
+ * drm/i915: MI_PREDICATE_RESULT_2 is HSW only
+
+2013-11-29 Ville Syrjälä <ville.syrjala at linux.intel.com>
+
+ * drm/i915: Make the DERRMR SRM target global GTT
+
+2013-11-29 Thomas Gleixner <tglx at linutronix.de>
+
+ * nohz: Fix another inconsistency between CONFIG_NO_HZ=n and nohz=off
+
+2013-11-29 Madper Xie <cxie at redhat.com>
+
+ * efi-pstore: Make efi-pstore return a unique id
+
+2013-11-29 Al Viro <viro at zeniv.linux.org.uk>
+
+ * fix bogus path_put() of nd->root after some unlazy_walk() failures
+
+2013-10-23 Martin K. Petersen <martin.petersen at oracle.com>
+
+ * [SCSI] Disable WRITE SAME for RAID and virtual host adapter drivers
+
+2013-11-28 Dave Airlie <airlied at redhat.com>
+
+ * drm/qxl: fix memory leak in release list handling
+
+2013-11-11 Matt Fleming <matt.fleming at intel.com>
+
+ * x86/efi: Fix earlyprintk off-by-one bug
+
+2013-10-30 Seiji Aguchi <seiji.aguchi at hds.com>
+
+ * efivars, efi-pstore: Hold off deletion of sysfs entry until the scan is completed
+
+2013-11-28 Matthew Leach <Matthew.Leach at arm.com>
+
+ * arm64: debug: make aarch32 bkpt checking endian clean
+
+2013-11-28 Matthew Leach <Matthew.Leach at arm.com>
+
+ * arm64: ptrace: fix compat registes get/set to be endian clean
+
+2013-11-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'gpio-v3.13-2' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio
+
+2013-11-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'md/3.13-fixes' of git://neil.brown.name/md
+
+2013-11-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-next' of git://git.samba.org/sfrench/cifs-2.6
+
+2013-11-28 Helge Deller <deller at gmx.de>
+
+ * kernel/extable: fix address-checks for core_kernel and init areas
+
+2013-11-28 Mark Rutland <mark.rutland at arm.com>
+
+ * irqchip: Gic: fix boot for chained gics
+
+2013-11-28 Horia Geanta <horia.geanta at freescale.com>
+
+ * crypto: testmgr - fix sglen in test_aead for case 'dst != src'
+
+2013-11-28 Horia Geanta <horia.geanta at freescale.com>
+
+ * crypto: talitos - fix aead sglen for case 'dst != src'
+
+2013-11-28 Horia Geanta <horia.geanta at freescale.com>
+
+ * crypto: caam - fix aead sglen for case 'dst != src'
+
+2013-11-28 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Add LFE chmap to ASUS ET2700
+
+2013-11-28 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Initialize missing bass speaker pin for ASUS AIO ET2700
+
+2013-11-27 Viresh Kumar <viresh.kumar at linaro.org>
+
+ * cpufreq: suspend governors on system suspend/hibernate
+
+2013-11-28 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: Add resource managed snd_dmaengine_pcm_register()
+
+2013-11-26 Bockholdt Arne <a.bockholdt at precitec-optronik.de>
+
+ * intel_idle: Fixed C6 state on Avoton/Rangeley processors
+
+2013-11-27 Toshi Kani <toshi.kani at hp.com>
+
+ * ACPI / PCI / hotplug: Avoid warning when _ADR not present
+
+2013-11-28 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'spi/fix/bcm2835', 'spi/fix/bcm63xx', 'spi/fix/mpc512x-psc', 'spi/fix/mxs', 'spi/fix/pxa2xx', 'spi/fix/qspi', 'spi/fix/rspi' and 'spi/fix/txx9' into spi-linus
+
+2013-11-28 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'spi/fix/core' into spi-linus
+
+2013-11-29 Chew, Chiau Ee <chiau.ee.chew at intel.com>
+
+ * spi/pxa2xx: Restore private register bits.
+
+2013-11-27 Oleksij Rempel <linux at rempel-privat.de>
+
+ * ALSA: hda - limit mic boost on Asus UX31[A,E]
+
+2013-11-28 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Check leaf nodes to find aamix amps
+
+2013-11-28 Vineet Gupta <vgupta at synopsys.com>
+
+ * ARC: [perf] Fix a few thinkos
+
+2013-11-25 Florian Meier <florian.meier at koalo.de>
+
+ * i2c: bcm2835: Linking platform nodes to adapter nodes
+
+2013-11-21 Magnus Damm <damm at opensource.se>
+
+ * ARM: shmobile: r8a7790: Fix GPIO resources in DTS
+
+2013-11-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2013-11-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'tty-3.13-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
+
+2013-11-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'staging-3.13-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
+
+2013-11-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'driver-core-3.13-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core
+
+2013-11-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
+
+2013-11-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pm+acpi-3.13-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2013-11-27 Ilia Mirkin <imirkin at alum.mit.edu>
+
+ * drm/nouveau/hwmon: fix compilation without CONFIG_HWMON
+
+2013-11-26 David Herrmann <dh.herrmann at gmail.com>
+
+ * drm/sysfs: fix OOM verification
+
+2013-11-27 Jens Axboe <axboe at kernel.dk>
+
+ * Merge branch 'stable/for-jens-3.13-take-two' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip into for-linus
+
+2013-11-28 NeilBrown <neilb at suse.de>
+
+ * md/raid5: fix newly-broken locking in get_active_stripe.
+
+2013-11-28 NeilBrown <neilb at suse.de>
+
+ * md: test mddev->flags more safely in md_check_recovery.
+
+2013-11-25 NeilBrown <neilb at suse.de>
+
+ * md/raid5: fix new memory-reference bug in alloc_thread_groups.
+
+2013-11-27 Tejun Heo <tj at kernel.org>
+
+ * cgroup: fix cgroup_subsys_state leak for seq_files
+
+2013-11-25 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ATA: Fix port removal ordering
+
+2013-11-26 Peter Zijlstra <peterz at infradead.org>
+
+ * cpuset: Fix memory allocator deadlock
+
+2013-11-27 Victor Kamensky <victor.kamensky at linaro.org>
+
+ * i2c: omap: raw read and write endian fix
+
+2013-11-21 Fabio Estevam <festevam at gmail.com>
+
+ * ASoC: ssm2602: Use IS_ENABLED() macro
+
+2013-11-27 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Revert "sysfs: handle duplicate removal attempts in sysfs_remove_group()"
+
+2013-11-27 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Staging: tidspbridge: disable driver
+
+2013-11-25 Jean-Francois Moine <moinejf at free.fr>
+
+ * ASoC: kirkwood: Fix erroneous double output while playing
+
+2013-11-27 Mark Brown <broonie at linaro.org>
+
+ * regulator: core: Check for DT every time we check full constraints
+
+2013-11-26 Jean-Francois Moine <moinejf at free.fr>
+
+ * ASoC: kirkwood: Fix invalid S/PDIF format
+
+2013-11-27 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: pcm: Always honor DAI min and max sample rate constraints
+
+2013-11-27 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: pcm: Fix rate_max calculation
+
+2013-11-27 Mark Brown <broonie at linaro.org>
+
+ * regulator: core: Replace checks of have_full_constraints with a function
+
+2013-11-27 Charles Keepax <ckeepax at opensource.wolfsonmicro.com>
+
+ * ASoC: wm5110: Remove output OSR and PGA volume controls
+
+2013-11-27 Bo Shen <voice.shen at atmel.com>
+
+ * ASoC: atmel: sam9x5_wm8731: fix oops when unload module
+
+2013-11-21 Markus Mayer <markus.mayer at linaro.org>
+
+ * gpio: bcm281xx: Fix return value of bcm_kona_gpio_get()
+
+2013-11-26 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Fix hp-mic mode without VREF bits
+
+2013-11-26 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Create Headhpone Mic Jack Mode when really needed
+
+2013-11-27 Linus Walleij <linus.walleij at linaro.org>
+
+ * gpio: pl061: move irqdomain initialization
+
+2013-11-26 Thomas Pugliese <thomas.pugliese at gmail.com>
+
+ * ALSA: usb: use multiple packets per urb for Wireless USB inbound audio
+
+2013-11-20 Nicolas Dichtel <nicolas.dichtel at 6wind.com>
+
+ * sched/doc: Fix generation of device-drivers
+
+2013-11-21 Thomas Gleixner <tglx at linutronix.de>
+
+ * sched: Expose preempt_schedule_irq()
+
+2013-11-26 David Herrmann <dh.herrmann at gmail.com>
+
+ * HID: uhid: fix leak for 64/32 UHID_CREATE
+
+2013-11-27 Samuel Ortiz <sameo at linux.intel.com>
+
+ * Merge tag 'mfd-lee-3.13-fixes-1' of git://git.linaro.org/people/ljones/mfd
+
+2013-11-27 James Ralston <james.d.ralston at intel.com>
+
+ * mfd: lpc_ich: Fix Wildcat Point info name field
+
+2013-11-17 Geert Uytterhoeven <geert at linux-m68k.org>
+
+ * mfd: ti-ssp: Fix build
+
+2013-11-27 Hui Wang <jason77.wang at gmail.com>
+
+ * ALSA: hda - Enable mute/mic-mute LEDs for more Thinkpads with Conexant codec
+
+2013-10-22 Dan Williams <dan.j.williams at intel.com>
+
+ * [SCSI] libsas: fix usage of ata_tf_to_fis
+
+2013-11-14 Eric W. Biederman <ebiederm at xmission.com>
+
+ * vfs: Fix a regression in mounting proc
+
+2013-11-14 Eric W. Biederman <ebiederm at xmission.com>
+
+ * fork: Allow CLONE_PARENT after setns(CLONE_NEWPID)
+
+2013-11-08 Eric W. Biederman <ebiederm at xmission.com>
+
+ * vfs: In d_path don't call d_dname on a mount point
+
+2013-11-26 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'trace-fixes-v3.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
+
+2013-11-26 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus-bugs' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
+
+2013-11-26 Pali Rohár <pali.rohar at gmail.com>
+
+ * Input: add key code for ambient light sensor button
+
+2013-11-26 Paul Moore <pmoore at redhat.com>
+
+ * Merge tag 'v3.12'
+
+2013-11-26 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2013-11-26 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security
+
+2013-11-26 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'ntb-3.13' of git://github.com/jonmason/ntb
+
+2013-11-26 Jason Gunthorpe <jgunthorpe at obsidianresearch.com>
+
+ * PCI: mvebu: Return 'unsupported' for Interrupt Line and Interrupt Pin
+
+2013-11-20 Matt Wilson <msw at amazon.com>
+
+ * xen/gnttab: leave lazy MMU mode in the case of a m2p override failure
+
+2013-11-26 Thomas Gleixner <tglx at linutronix.de>
+
+ * Merge branch 'clockevents/fixes' of git://git.linaro.org/people/dlezcano/linux into timers/urgent
+
+2013-11-26 Takashi Iwai <tiwai at suse.de>
+
+ * Merge branch 'fix/firewire' into for-linus
+
+2013-11-20 Andy Adamson <andros at netapp.com>
+
+ * SUNRPC: do not fail gss proc NULL calls with EACCES
+
+2013-11-09 Felipe Pena <felipensp at gmail.com>
+
+ * block: xen-blkfront: Fix possible NULL ptr dereference
+
+2013-11-14 Tim Gardner <tim.gardner at canonical.com>
+
+ * xen-blkfront: Silence pfn maybe-uninitialized warning
+
+2013-11-25 Steven Rostedt (Red Hat) <rostedt at goodmis.org>
+
+ * ftrace: Fix function graph with loading of modules
+
+2013-11-26 Steven Rostedt (Red Hat) <rostedt at goodmis.org>
+
+ * tracing: Allow events to have NULL strings
+
+2013-11-06 Geert Uytterhoeven <geert at linux-m68k.org>
+
+ * fbdev: sh_mobile_meram: Fix defined but not used compiler warnings
+
+2013-11-19 Sasha Levin <sasha.levin at oracle.com>
+
+ * video: kyro: fix incorrect sizes when copying to userspace
+
+2013-11-26 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branches 'regmap/fix/doc' and 'regmap/fix/mmio' into regmap-linus
+
+2013-11-26 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'regmap/fix/core' into regmap-linus
+
+2013-11-25 Stephen Warren <swarren at nvidia.com>
+
+ * regmap: use IS_ERR() to check clk_get() results
+
+2013-11-25 Tim Kryger <tim.kryger at linaro.org>
+
+ * i2c: i2c-bcm-kona: Fix module build
+
+2013-11-24 Martin Vogt <mvogt1 at gmail.com>
+
+ * i2c: i2c-diolan-u2c: different usb endpoints for DLN-2-U2C
+
+2013-11-21 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * i2c: bcm-kona: remove duplicated include
+
+2013-11-26 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Drop bus->avoid_link_reset flag
+
+2013-11-26 Kailang Yang <kailang at realtek.com>
+
+ * ALSA: hda/realtek - Set pcbeep amp for ALC668
+
+2013-11-26 Kailang Yang <kailang at realtek.com>
+
+ * ALSA: hda/realtek - Add support of ALC231 codec
+
+2013-11-20 Taras Kondratiuk <taras.kondratiuk at linaro.org>
+
+ * i2c: davinci: raw read and write endian fix
+
+2013-11-11 Aaro Koskinen <aaro.koskinen at iki.fi>
+
+ * ARM: OMAPFB: panel-sony-acx565akm: fix bad unlock balance
+
+2013-11-18 Michal Marek <mmarek at suse.cz>
+
+ * mfd: Make MFD_AS3722 depend on I2C=y
+
+2013-11-25 Stephen Warren <swarren at wwwdotorg.org>
+
+ * ARM: bcm2835: add missing #xxx-cells to I2C nodes
+
+2013-11-26 Mattia Dongili <malattia at linux.it>
+
+ * sony-laptop: do not scribble keyboard backlight registers on resume
+
+2013-11-25 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Revert "n_gsm: race between ld close and gsmtty open"
+
+2013-11-23 Andrew Liu <andrew.liu200917 at gmail.com>
+
+ * Input: keyboard - "keycode & KEY_MAX" changes some keycode values
+
+2013-11-26 James Morris <james.l.morris at oracle.com>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity into for-linus
+
+2013-11-25 Kevin Hilman <khilman at linaro.org>
+
+ * Merge tag 'imx-fixes-3.13-2' of git://git.linaro.org/people/shawnguo/linux-2.6 into fixes
+
+2013-10-23 Doug Anderson <dianders at chromium.org>
+
+ * ARM: dts: Add max77686 RTC interrupt to cros5250-common
+
+2013-11-13 Mika Westerberg <mika.westerberg at linux.intel.com>
+
+ * HID: i2c-hid: disable interrupt on suspend
+
+2013-11-25 Dave Martin <Dave.Martin at arm.com>
+
+ * ARM: vexpress/TC2: Implement MCPM power_down_finish()
+
+2013-11-11 Michal Marek <mmarek at suse.cz>
+
+ * PCI: Omit PCI ID macro strings to shorten quirk names
+
+2013-11-24 Geyslan G. Bem <geyslan at gmail.com>
+
+ * selinux: fix possible memory leak
+
+2013-11-24 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * PCI: Move device_del() from pci_stop_dev() to pci_destroy_dev()
+
+2013-11-18 Bjorn Helgaas <bhelgaas at google.com>
+
+ * Revert "workqueue: allow work_on_cpu() to be called recursively"
+
+2013-11-25 Thierry Reding <thierry.reding at gmail.com>
+
+ * ARM: tegra: Provide dummy powergate implementation
+
+2013-11-19 Olof Johansson <olof at lixom.net>
+
+ * Merge tag 'omap-for-v3.13/more-fixes-for-merge-window-take2' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
+
+2013-11-12 Olof Johansson <olof at lixom.net>
+
+ * ARM: omap: fix warning with LPAE build
+
+2013-11-18 Alexander Duyck <alexander.h.duyck at intel.com>
+
+ * PCI: Avoid unnecessary CPU switch when calling driver .probe() method
+
+2013-11-25 Laxman Dewangan <ldewangan at nvidia.com>
+
+ * irq: Enable all irqs unconditionally in irq_resume
+
+2013-11-19 Luciano Coelho <luciano.coelho at intel.com>
+
+ * iwlwifi: mvm: use a cast to calculate the last seqno from the next one
+
+2013-11-19 Luciano Coelho <luciano.coelho at intel.com>
+
+ * iwlwifi: mvm: set seqno also when no keys are set
+
+2013-11-05 Alexander Bondar <alexander.bondar at intel.com>
+
+ * iwlwifi: pcie: stop sending commands to dead firmware
+
+2013-11-22 Olav Haugan <ohaugan at codeaurora.org>
+
+ * staging: zsmalloc: Ensure handle is never 0 on success
+
+2013-11-21 Peng Tao <bergwolf at gmail.com>
+
+ * staging/lustre/ptlrpc: fix ptlrpc_stop_pinger logic
+
+2013-11-25 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'regulator-v3.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
+
+2013-11-25 Roberto Sassu <roberto.sassu at polito.it>
+
+ * ima: make a copy of template_fmt in template_desc_init_fields()
+
+2013-11-07 Peter Hurley <peter at hurleysoftware.com>
+
+ * n_tty: Protect minimum_to_wake reset for concurrent readers
+
+2013-11-22 Florian Meier <florian.meier at koalo.de>
+
+ * ASoC: Add support for BCM2835
+
+2013-11-19 Peter Hurley <peter at hurleysoftware.com>
+
+ * tty: Reset hupped state on open
+
+2013-11-22 Geert Uytterhoeven <geert at linux-m68k.org>
+
+ * TTY: amiserial, add missing platform check
+
+2013-11-21 Catalin Marinas <catalin.marinas at arm.com>
+
+ * arm64: Unmask asynchronous aborts when in kernel mode
+
+2013-11-14 Catalin Marinas <catalin.marinas at arm.com>
+
+ * arm64: dts: Reserve the memory used for secondary CPU release address
+
+2013-11-12 Marc Zyngier <Marc.Zyngier at arm.com>
+
+ * arm64: let the core code deal with preempt_count
+
+2013-11-24 Steve French <smfrench at gmail.com>
+
+ * [CIFS] Do not use btrfs refcopy ioctl for SMB2 copy offload
+
+2013-11-22 Mark Brown <broonie at linaro.org>
+
+ * ASoC: wm8990: Mark the register map as dirty when powering down
+
+2013-11-08 Roberto Sassu <roberto.sassu at polito.it>
+
+ * ima: do not send field length to userspace for digest of ima template
+
+2013-11-08 Roberto Sassu <roberto.sassu at polito.it>
+
+ * ima: do not include field length in template digest calc for ima template
+
+2013-11-22 Martin Schwidefsky <schwidefsky at de.ibm.com>
+
+ * s390/mm: handle asce-type exceptions as normal page fault
+
+2013-11-22 Martin Schwidefsky <schwidefsky at de.ibm.com>
+
+ * s390,time: revert direct ktime path for s390 clockevent device
+
+2013-11-22 Martin Schwidefsky <schwidefsky at de.ibm.com>
+
+ * s390/time,vdso: convert to the new update_vsyscall interface
+
+2013-11-21 Heiko Carstens <heiko.carstens at de.ibm.com>
+
+ * s390/uaccess: add missing page table walk range check
+
+2013-11-14 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * pinctrl: rockchip: missing unlock on error in rockchip_set_pull()
+
+2013-11-10 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * pinctrl: abx500: fix some more bitwise AND tests
+
+2013-11-08 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * pinctrl: rockchip: testing the wrong variable
+
+2013-11-19 Axel Lin <axel.lin at ingics.com>
+
+ * gpio: ucb1400: Add MODULE_ALIAS
+
+2013-11-19 Alexandre Courbot <acourbot at nvidia.com>
+
+ * gpiolib: fix of_find_gpio() when OF not defined
+
+2013-11-13 Michal Nazarewicz <mina86 at mina86.com>
+
+ * gpio: fix memory leak in error path
+
+2013-11-07 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * gpio: rcar: NULL dereference on error in probe()
+
+2013-11-07 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * gpio: msm: make msm_gpio.summary_irq signed for error handling
+
+2013-11-07 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * gpio: mvebu: make mvchip->irqbase signed for error handling
+
+2013-11-16 Alexandre Courbot <acourbot at nvidia.com>
+
+ * gpiolib: use dedicated flags for GPIO properties
+
+2013-11-21 Vineet Gupta <vgupta at synopsys.com>
+
+ * ARC: Add guard macro to uapi/asm/unistd.h
+
+2013-11-15 Vineet Gupta <vgupta at synopsys.com>
+
+ * ARC: extable: Enable sorting at build time
+
+2013-11-24 Kent Overstreet <kmo at daterainc.com>
+
+ * block: submit_bio_wait() conversions
+
+2013-11-22 Randy Dunlap <rdunlap at infradead.org>
+
+ * slab.h: remove duplicate kmalloc declaration and fix kernel-doc warnings
+
+2013-11-24 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
+
+2013-11-19 Sourav Poddar <sourav.poddar at ti.com>
+
+ * spi/qspi: Fix qspi remove path.
+
+2013-11-19 Sourav Poddar <sourav.poddar at ti.com>
+
+ * spi/qspi: cleanup pm_runtime error check.
+
+2013-11-11 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * spi/qspi: set correct platform drvdata in ti_qspi_probe()
+
+2013-11-12 Mika Westerberg <mika.westerberg at linux.intel.com>
+
+ * spi/pxa2xx: add new ACPI IDs
+
+2013-11-24 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'regulator/fix/pfuze100' into regulator-linus
+
+2013-11-24 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'regulator/fix/gpio' into regulator-linus
+
+2013-11-24 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'regulator/fix/fixed' into regulator-linus
+
+2013-11-24 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'regulator/fix/arizona' into regulator-linus
+
+2013-11-22 Stephen Warren <swarren at nvidia.com>
+
+ * ASoC: dapm: Use SND_SOC_DAPM_INIT_REG_VAL in SND_SOC_DAPM_MUX
+
+2013-11-21 Jarkko Nikula <jarkko.nikula at linux.intel.com>
+
+ * ASoC: Rename mid-x86 directory to intel
+
+2013-11-07 Kuninori Morimoto <kuninori.morimoto.gx at renesas.com>
+
+ * ASoC: rcar: select REGMAP
+
+2013-11-20 Nicolin Chen <b42378 at freescale.com>
+
+ * ASoC: soc-pcm: move DAIs parameters cleaning into hw_free()
+
+2013-11-13 Nicolin Chen <b42378 at freescale.com>
+
+ * ASoC: soc-pcm: add symmetry for channels and sample bits
+
+2013-11-09 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * irqchip: renesas-intc-irqpin: Fix register bitfield shift calculation
+
+2013-11-06 Simon Horman <horms+renesas at verge.net.au>
+
+ * ARM: shmobile: lager: phy fixup needs CONFIG_PHYLIB
+
+2013-11-24 Phillip Lougher <phillip at squashfs.org.uk>
+
+ * Squashfs: fix failure to unlock pages on decompress error
+
+2013-11-23 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Revert "KEYS: verify a certificate is signed by a 'trusted' key"
+
+2013-11-23 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Revert "ima: define '_ima' as a builtin 'trusted' keyring"
+
+2013-11-23 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
+
+2013-11-12 Eric Van Hensbergen <ericvh at gmail.com>
+
+ * net/9p: remove virtio default hack and set appropriate bits instead
+
+2013-10-21 Geyslan G. Bem <geyslan at gmail.com>
+
+ * 9p: remove useless 'name' variable and assignment
+
+2013-10-21 Geyslan G. Bem <geyslan at gmail.com>
+
+ * 9p: fix return value in case in v9fs_fid_xattr_set()
+
+2013-11-09 Li Wang <liwang at ubuntukylin.com>
+
+ * ceph: allocate non-zero page to fscache in readpage()
+
+2013-10-31 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: wake up 'safe' waiters when unregistering request
+
+2013-09-26 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: cleanup aborted requests when re-sending requests.
+
+2013-09-22 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: handle race between cap reconnect and cap release
+
+2013-09-22 Yan, Zheng <zheng.z.yan at intel.com>
+
+ * ceph: set caps count after composing cap reconnect message
+
+2013-11-17 Tejun Heo <tj at kernel.org>
+
+ * sysfs: use a separate locking class for open files depending on mmap
+
+2013-11-17 Samir Benmendil <samir.benmendil at gmail.com>
+
+ * ahci: add Marvell 9230 to the AHCI PCI device list
+
+2013-11-20 Yijing Wang <wangyijing at huawei.com>
+
+ * ata: fix acpi_bus_get_device() return value check
+
+2013-11-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for_linus' of git://cavan.codon.org.uk/platform-drivers-x86
+
+2013-09-10 Li Bin <huawei.libin at huawei.com>
+
+ * workqueue: fix pool ID allocation leakage and remove BUILD_BUG_ON() in init_workqueues
+
+2013-09-09 Li Bin <huawei.libin at huawei.com>
+
+ * workqueue: fix comment typo for __queue_work()
+
+2013-09-05 Tejun Heo <tj at kernel.org>
+
+ * workqueue: fix ordered workqueues in NUMA setups
+
+2013-11-14 Oleg Nesterov <oleg at redhat.com>
+
+ * workqueue: swap set_cpus_allowed_ptr() and PF_NO_SETAFFINITY
+
+2013-11-21 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * pata_arasan_cf: add missing clk_disable_unprepare() on error path
+
+2013-11-22 Alistair Popple <alistair at popple.id.au>
+
+ * ahci: add support for IBM Akebono platform device
+
+2013-11-22 Dmitry Torokhov <dmitry.torokhov at gmail.com>
+
+ * Merge branch 'next' into for-linus
+
+2013-11-22 Tejun Heo <tj at kernel.org>
+
+ * cgroup: use a dedicated workqueue for cgroup destruction
+
+2013-11-22 Martin Schwidefsky <schwidefsky at de.ibm.com>
+
+ * time: Fix 1ns/tick drift w/ GENERIC_TIME_VSYSCALL_OLD
+
+2013-11-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.13-rc1
+
+2013-11-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'ecryptfs-3.13-rc1-quiet-checkers' of git://git.kernel.org/pub/scm/linux/kernel/git/tyhicks/ecryptfs
+
+2013-11-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'sound-fix2-3.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2013-11-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2013-11-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pci-v3.13-fixes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
+
+2013-11-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending
+
+2013-11-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging
+
+2013-11-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2013-11-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'fixes' of git://git.linaro.org/people/rmk/linux-arm
+
+2013-11-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'next' of git://git.kernel.org/pub/scm/virt/kvm/kvm
+
+2013-11-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kvack.org/~bcrl/aio-next
+
+2013-11-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.13' of git://linux-nfs.org/~bfields/linux
+
+2013-11-21 Takashi Sakamoto <o-takashi at sakamocchi.jp>
+
+ * ALSA: firewire-lib: fix wrong value for FDF field as an empty packet
+
+2013-11-22 David Henningsson <david.henningsson at canonical.com>
+
+ * ALSA: hda - Set current_headset_type to ALC_HEADSET_TYPE_ENUM (janitorial)
+
+2013-11-22 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Provide missing pin configs for VAIO with ALC260
+
+2013-11-21 Stephen Warren <swarren at nvidia.com>
+
+ * spi: core: invert success test in devm_spi_register_master
+
+2013-11-21 Herbert Xu <herbert at gondor.apana.org.au>
+
+ * gso: handle new frag_list of frags GRO packets
+
+2013-11-21 Johannes Berg <johannes.berg at intel.com>
+
+ * genetlink: fix genl_set_err() group ID
+
+2013-11-21 Johannes Berg <johannes.berg at intel.com>
+
+ * genetlink: fix genlmsg_multicast() bug
+
+2013-11-21 Daniel Borkmann <dborkman at redhat.com>
+
+ * packet: fix use after free race in send path when dev is released
+
+2013-11-20 David Sterba <dsterba at suse.cz>
+
+ * Documentation: filesystems: update btrfs tools section
+
+2013-11-20 David Sterba <dsterba at suse.cz>
+
+ * Documentation: filesystems: add new btrfs mount options
+
+2013-11-21 Takashi Iwai <tiwai at suse.de>
+
+ * Merge tag 'asoc-v3.13-5' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2013-11-20 Courtney Cavin <courtney.cavin at sonymobile.com>
+
+ * regmap: make sure we unlock on failure in regmap_bulk_write
+
+2013-11-21 David Henningsson <david.henningsson at canonical.com>
+
+ * ALSA: hda - Add headset quirk for Dell Inspiron 3135
+
+2013-11-21 David Herrmann <dh.herrmann at gmail.com>
+
+ * drm/sysfs: fix hotplug regression since lifetime changes
+
+2013-11-20 Benjamin Tissoires <benjamin.tissoires at redhat.com>
+
+ * HID: kye: fix unresponsive keyboard
+
+2013-11-20 Benjamin Tissoires <benjamin.tissoires at redhat.com>
+
+ * HID: kye: Add report fixup for Genius Manticore Keyboard
+
+2013-11-18 Heiko Carstens <heiko.carstens at de.ibm.com>
+
+ * KVM: kvm_clear_guest_page(): fix empty_zero_page usage
+
+2013-11-21 KaiChung Cheng <kenny_cheng at wistron.com>
+
+ * HID: multicouh: add PID VID to support 1 new Wistron optical touch device
+
+2013-11-07 Benjamin Tissoires <benjamin.tissoires at redhat.com>
+
+ * HID: appleir: force input to be set
+
+2013-11-21 Inki Dae <inki.dae at samsung.com>
+
+ * drm/exynos: g2d: fix memory leak to userptr
+
+2013-11-21 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'ttm-fixes-3.13' of git://people.freedesktop.org/~thomash/linux into drm-fixes
+
+2013-11-21 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'vmwgfx-fixes-3.13' of git://people.freedesktop.org/~thomash/linux into drm-fixes
+
+2013-11-21 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'drm-intel-fixes-2013-11-20' of git://people.freedesktop.org/~danvet/drm-intel into drm-fixes
+
+2013-11-21 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-next-3.13' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
+
+2013-11-21 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Fix the headphone jack detection on Sony VAIO TX
+
+2013-11-21 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Fix missing bass speaker on ASUS N550
+
+2013-11-20 Eric Seppanen <eric at purestorage.com>
+
+ * iscsi-target: chap auth shouldn't match username with trailing garbage
+
+2013-11-20 Eric Seppanen <eric at purestorage.com>
+
+ * iscsi-target: fix extract_param to handle buffer length corner case
+
+2013-11-20 Haiyang Zhang <haiyangz at microsoft.com>
+
+ * MAINTAINERS - add keyboard driver to Hyper-V file list
+
+2013-11-19 Dmitry Torokhov <dmitry.torokhov at gmail.com>
+
+ * Input: atmel-wm97xx - fix compile error
+
+2013-11-19 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * Input: hp_sdc_rtc - unlock on error in hp_sdc_rtc_read_i8042timer()
+
+2013-11-19 Xie XiuQi <xiexiuqi at huawei.com>
+
+ * Input: cyttsp4 - remove unnecessary work pending test
+
+2013-11-20 David Sterba <dsterba at suse.cz>
+
+ * btrfs: update kconfig help text
+
+2013-11-18 Akinobu Mita <akinobu.mita at gmail.com>
+
+ * btrfs: fix bio_size_ok() for max_sectors > 0xffff
+
+2013-11-14 Steven Rostedt <rostedt at goodmis.org>
+
+ * btrfs: Use trace condition for get_extent tracepoint
+
+2013-11-14 Anand Jain <Anand.Jain at oracle.com>
+
+ * btrfs: fix typo in the log message
+
+2013-11-14 Miao Xie <miaox at cn.fujitsu.com>
+
+ * Btrfs: fix list delete warning when removing ordered root from the list
+
+2013-11-13 Stefan Behrens <sbehrens at giantdisaster.de>
+
+ * Btrfs: print bytenr instead of page pointer in check-int
+
+2013-11-12 Wang Shilong <wangsl.fnst at cn.fujitsu.com>
+
+ * Btrfs: remove dead codes from ctree.h
+
+2013-11-06 Filipe David Borba Manana <fdmanana at gmail.com>
+
+ * Btrfs: don't wait for ordered data outside desired range
+
+2013-11-06 Liu Bo <bo.li.liu at oracle.com>
+
+ * Btrfs: fix lockdep error in async commit
+
+2013-10-03 Prarit Bhargava <prarit at redhat.com>
+
+ * x86, wmi fix modalias_show return values
+
+2013-11-15 Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy at linux.intel.com>
+
+ * ipc: Added support for IPC interrupt mode
+
+2013-11-14 Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy at linux.intel.com>
+
+ * ipc: Handle error conditions in ipc command
+
+2013-11-14 Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy at linux.intel.com>
+
+ * ipc: Enabled ipc support for additional intel platforms
+
+2013-11-14 Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy at linux.intel.com>
+
+ * ipc: Added platform data structure
+
+2013-10-24 Takashi Iwai <tiwai at suse.de>
+
+ * thinkpad_acpi: Fix build error when CONFIG_SND_MAX_CARDS > 32
+
+2013-11-07 Olof Johansson <olof at lixom.net>
+
+ * platform: add chrome platform directory
+
+2013-10-29 Alex Hung <alex.hung at canonical.com>
+
+ * hp-wmi: detect "2009 BIOS or later" flag by WMI 0x0d for wireless cmd
+
+2013-11-12 Alex Hung <alex.hung at canonical.com>
+
+ * dell-wmi: Add KEY_MICMUTE to bios_to_linux_keycode
+
+2013-11-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
+
+2013-11-20 Stephen Boyd <sboyd at codeaurora.org>
+
+ * clocksource: arm_arch_timer: Hide eventstream Kconfig on non-ARM
+
+2013-11-19 Will Deacon <will.deacon at arm.com>
+
+ * ARM: 7894/1: kconfig: select GENERIC_CLOCKEVENTS if HAVE_ARM_ARCH_TIMER
+
+2013-11-19 Will Deacon <will.deacon at arm.com>
+
+ * ARM: 7893/1: bitops: only emit .arch_extension mp if CONFIG_SMP
+
+2013-11-18 Yinghai Lu <yinghai at kernel.org>
+
+ * PCI: Remove duplicate pci_disable_device() from pcie_portdrv_remove()
+
+2013-11-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mattst88/alpha
+
+2013-11-18 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * cpufreq: exynos: Remove unwanted EXPORT_SYMBOL
+
+2013-11-20 Hong Zhiguo <zhiguohong at tencent.com>
+
+ * Update of blkg_stat and blkg_rwstat may happen in bh context. While u64_stats_fetch_retry is only preempt_disable on 32bit UP system. This is not enough to avoid preemption by bh and may read strange 64 bit value.
+
+2013-11-20 Stephen Warren <swarren at nvidia.com>
+
+ * cpufreq: tegra: don't error target() when suspended
+
+2013-11-21 Ulrich Weigand <Ulrich.Weigand at de.ibm.com>
+
+ * powerpc: Wrong DWARF CFI in the kernel vdso for little-endian / ELFv2
+
+2013-11-20 H. Peter Anvin <hpa at linux.intel.com>
+
+ * x86-64, copy_user: Use leal to produce 32-bit results
+
+2013-11-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pm+acpi-2-3.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2013-11-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'next' of git://git.infradead.org/users/vkoul/slave-dma
+
+2013-11-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.dk/linux-block
+
+2013-11-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'md/3.13' of git://neil.brown.name/md
+
+2013-11-20 Trond Myklebust <Trond.Myklebust at netapp.com>
+
+ * NFSv4: close needs to handle NFS4ERR_ADMIN_REVOKED
+
+2013-11-19 Trond Myklebust <Trond.Myklebust at netapp.com>
+
+ * NFSv4: Update list of irrecoverable errors on DELEGRETURN
+
+2013-11-15 Andy Adamson <andros at netapp.com>
+
+ * NFSv4 wait on recovery for async session errors
+
+2013-11-20 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * iscsi-target: Expose default_erl as TPG attribute
+
+2013-11-19 Hannes Reinecke <hare at suse.de>
+
+ * target_core_configfs: split up ALUA supported states
+
+2013-11-19 Hannes Reinecke <hare at suse.de>
+
+ * target_core_alua: Make supported states configurable
+
+2013-11-19 Hannes Reinecke <hare at suse.de>
+
+ * target_core_alua: Store supported ALUA states
+
+2013-04-18 Jon Mason <jon.mason at intel.com>
+
+ * NTB: Disable interrupts and poll under high load
+
+2013-10-03 Jon Mason <jon.mason at intel.com>
+
+ * NTB: Enable Snoop on Primary Side
+
+2013-11-01 Jon Mason <jon.mason at intel.com>
+
+ * NTB: Document HW errata
+
+2013-10-21 Michael Opdenacker <michael.opdenacker at free-electrons.com>
+
+ * NTB: remove duplicate defines
+
+2013-11-19 Jon Mason <jon.mason at intel.com>
+
+ * NTB: correct dmaengine_get/put usage
+
+2013-09-09 Jon Mason <jon.mason at intel.com>
+
+ * NTB: Fix ntb_transport link down race
+
+2013-10-02 Alexander Gordeev <agordeev at redhat.com>
+
+ * ntb: Fix missed call to pci_enable_msix()
+
+2013-09-13 Jon Mason <jon.mason at intel.com>
+
+ * NTB: Fix NTB-RP Link Up
+
+2013-09-06 Jon Mason <jon.mason at intel.com>
+
+ * NTB: Xeon Doorbell errata workaround
+
+2013-11-20 Yijing Wang <wangyijing at huawei.com>
+
+ * hwmon: (acpi_power_meter) Fix acpi_bus_get_device() return value check
+
+2013-11-20 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Fix unbalanced runtime PM notification at resume
+
+2013-11-20 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/fix/wm8962' into asoc-linus
+
+2013-11-20 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/fix/rcar' into asoc-linus
+
+2013-11-20 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/fix/fsl' into asoc-linus
+
+2013-11-20 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/fix/dma' into asoc-linus
+
+2013-11-20 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/fix/cs42l52' into asoc-linus
+
+2013-11-20 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/fix/blackfin' into asoc-linus
+
+2013-11-20 Daniel Vetter <daniel.vetter at ffwll.ch>
+
+ * drm/i915: Fix gen3 self-refresh watermarks
+
+2013-11-08 Florian Echtler <floe at butterbrot.org>
+
+ * Input: add sur40 driver for Samsung SUR40 (aka MS Surface 2.0/Pixelsense)
+
+2013-11-20 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2013-11-20 David Henningsson <david.henningsson at canonical.com>
+
+ * ALSA: hda - A casual Dell Headset quirk
+
+2013-11-14 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/ttm: Remove set_need_resched from the ttm fault handler
+
+2013-11-17 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/ttm: Don't move non-existing data
+
+2013-11-19 Sasha Levin <sasha.levin at oracle.com>
+
+ * kvm: mmu: delay mmu audit activation
+
+2013-11-19 Heiko Carstens <heiko.carstens at de.ibm.com>
+
+ * s390/mm: optimize copy_page
+
+2013-11-19 Stefan Weinhuber <wein at de.ibm.com>
+
+ * s390/dasd: validate request size before building CCW/TCW request
+
+2013-11-19 Hendrik Brueckner <brueckner at linux.vnet.ibm.com>
+
+ * s390/signal: always restore saved runtime instrumentation psw bit
+
+2013-11-19 Steve French <smfrench at gmail.com>
+
+ * Check SMB3 dialects against downgrade attacks
+
+2013-11-10 Phillip Lougher <phillip at squashfs.org.uk>
+
+ * Squashfs: Check stream is not NULL in decompressor_multi.c
+
+2013-11-13 Phillip Lougher <phillip at squashfs.org.uk>
+
+ * Squashfs: Directly decompress into the page cache for file data
+
+2013-10-31 Phillip Lougher <phillip at squashfs.org.uk>
+
+ * Squashfs: Restructure squashfs_readpage()
+
+2013-11-19 Jens Axboe <axboe at kernel.dk>
+
+ * blk-mq: add blktrace insert event trace
+
+2013-11-19 Shaohua Li <shli at fusionio.com>
+
+ * virtio-blk: virtqueue_kick() must be ordered with other virtqueue operations
+
+2013-10-31 Mahesh Rajashekhara <Mahesh.Rajashekhara at pmcs.com>
+
+ * aacraid: prevent invalid pointer dereference
+
+2013-11-19 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2013-11-19 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
+
+2013-11-19 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'please-pull-fixia64' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux
+
+2013-11-19 Sasha Levin <sasha.levin at oracle.com>
+
+ * aio: nullify aio->ring_pages after freeing it
+
+2013-11-19 Sasha Levin <sasha.levin at oracle.com>
+
+ * aio: prevent double free in ioctx_alloc
+
+2013-11-14 Tim Gardner <tim.gardner at canonical.com>
+
+ * SELinux: security_load_policy: Silence frame-larger-than warning
+
+2013-11-19 Richard Haines <richard_c_haines at btinternet.com>
+
+ * SELinux: Update policy version to support constraints info
+
+2013-11-18 Kirill A. Shutemov <kirill.shutemov at linux.intel.com>
+
+ * kernel/bounds: avoid circular dependencies in generated headers
+
+2013-11-19 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'genetlink_mcast'
+
+2013-11-19 Johannes Berg <johannes.berg at intel.com>
+
+ * genetlink: make multicast groups const, prevent abuse
+
+2013-11-19 Johannes Berg <johannes.berg at intel.com>
+
+ * genetlink: pass family to functions using groups
+
+2013-11-19 Johannes Berg <johannes.berg at intel.com>
+
+ * genetlink: add and use genl_set_err()
+
+2013-11-19 Johannes Berg <johannes.berg at intel.com>
+
+ * genetlink: remove family pointer from genl_multicast_group
+
+2013-11-19 Johannes Berg <johannes.berg at intel.com>
+
+ * genetlink: remove genl_unregister_mc_group()
+
+2013-11-19 Johannes Berg <johannes.berg at intel.com>
+
+ * hsr: don't call genl_unregister_mc_group()
+
+2013-11-19 Johannes Berg <johannes.berg at intel.com>
+
+ * quota/genetlink: use proper genetlink multicast APIs
+
+2013-11-18 Trond Myklebust <Trond.Myklebust at netapp.com>
+
+ * NFS: Fix a warning in nfs_setsecurity
+
+2013-11-13 Anna Schumaker <bjschuma at netapp.com>
+
+ * NFS: Enabling v4.2 should not recompile nfsd and lockd
+
+2013-11-19 Andrey Vagin <avagin at openvz.org>
+
+ * tcp: don't update snd_nxt, when a socket is switched from repair mode
+
+2013-11-19 Samuel Li <samuel.li at amd.com>
+
+ * drm/radeon: hook up backlight functions for CI and KV family.
+
+2013-11-19 Ying Xue <ying.xue at windriver.com>
+
+ * atm: idt77252: fix dev refcnt leak
+
+2013-11-19 fan.du <fan.du at windriver.com>
+
+ * xfrm: Release dst if this dst is improper for vti tunnel
+
+2013-11-19 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'acpi-hotplug'
+
+2013-11-19 Mika Westerberg <mika.westerberg at linux.intel.com>
+
+ * PCI / hotplug / ACPI: Drop unused acpiphp_debug declaration
+
+2013-11-19 Johannes Berg <johannes.berg at intel.com>
+
+ * netlink: fix documentation typo in netlink_set_err()
+
+2013-11-19 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'arc-v3.13-rc1-part2' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc
+
+2013-11-19 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
+
+2013-11-19 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml
+
+2013-11-19 Arnaldo Carvalho de Melo <acme at redhat.com>
+
+ * tools lib traceevent: Fix conversion of pointer to integer of different size
+
+2013-11-17 Kuninori Morimoto <kuninori.morimoto.gx at renesas.com>
+
+ * ASoC: rcar: fixup dma_async_issue_pending() timing
+
+2013-11-08 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * ASoC: rcar: off by one in rsnd_scu_set_route()
+
+2013-11-19 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-19 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'irq-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-15 Geert Uytterhoeven <geert at linux-m68k.org>
+
+ * genirq: Correct fuzzy and fragile IRQ_RETVAL() definition
+
+2013-11-19 Jens Axboe <axboe at kernel.dk>
+
+ * blk-mq: ensure that we set REQ_IO_STAT so diskstats work
+
+2013-11-17 Shigeru Yoshida <shigeru.yoshida at gmail.com>
+
+ * sched: Fix a trivial typo in comments
+
+2013-11-19 Alex Shi <alex.shi at linaro.org>
+
+ * sched: Remove unused variable in 'struct sched_domain'
+
+2013-11-19 Peter Zijlstra <peterz at infradead.org>
+
+ * sched: Avoid NULL dereference on sd_busy
+
+2013-11-12 Srikar Dronamraju <srikar at linux.vnet.ibm.com>
+
+ * sched: Check sched_domain before computing group power
+
+2013-11-18 Daniel Vetter <daniel.vetter at ffwll.ch>
+
+ * drm/i915: Replicate BIOS eDP bpp clamping hack for hsw
+
+2013-11-15 Vince Weaver <vincent.weaver at maine.edu>
+
+ * perf/trace: Properly use u64 to hold event_id
+
+2013-09-13 Peter Zijlstra <peterz at infradead.org>
+
+ * perf: Remove fragile swevent hlist optimization
+
+2013-11-14 Peter Zijlstra <peterz at infradead.org>
+
+ * ftrace, perf: Avoid infinite event generation loop
+
+2013-11-18 Steven Rostedt <rostedt at goodmis.org>
+
+ * tools lib traceevent: Fix use of multiple options in processing field
+
+2013-11-18 Namhyung Kim <namhyung.kim at lge.com>
+
+ * perf header: Fix possible memory leaks in process_group_desc()
+
+2013-11-18 Namhyung Kim <namhyung.kim at lge.com>
+
+ * perf header: Fix bogus group name
+
+2013-11-16 Frederic Weisbecker <fweisbec at gmail.com>
+
+ * perf tools: Tag thread comm as overriden
+
+2013-11-19 Kirill A. Shutemov <kirill.shutemov at linux.intel.com>
+
+ * x86/mm: Implement ASLR for hugetlb mappings
+
+2013-11-15 Cyrill Gorcunov <gorcunov at gmail.com>
+
+ * x86/mm: Unify pte_to_pgoff() and pgoff_to_pte() helpers
+
+2013-11-18 Chris Wilson <chris at chris-wilson.co.uk>
+
+ * drm/i915: Do not enable package C8 on unsupported hardware
+
+2013-11-19 David Henningsson <david.henningsson at canonical.com>
+
+ * ALSA: hda - Also enable mute/micmute LED control for "Lenovo dock" fixup
+
+2013-11-14 majianpeng <majianpeng at gmail.com>
+
+ * md/raid5: Use conf->device_lock protect changing of multi-thread resources.
+
+2013-11-14 majianpeng <majianpeng at gmail.com>
+
+ * md/raid5: Before freeing old multi-thread worker, it should flush them.
+
+2013-11-14 majianpeng <majianpeng at gmail.com>
+
+ * md/raid5: For stripe with R5_ReadNoMerge, we replace REQ_FLUSH with REQ_NOMERGE.
+
+2013-11-14 Aurelien Jarno <aurelien at aurel32.net>
+
+ * UAPI: include <asm/byteorder.h> in linux/raid/md_p.h
+
+2013-11-15 majianpeng <majianpeng at gmail.com>
+
+ * raid1: Rewrite the implementation of iobarrier.
+
+2013-11-19 Al Viro <viro at ZenIV.linux.org.uk>
+
+ * seq_file: always clear m->count when we free m->buf
+
+2013-11-18 Olof Johansson <olof at lixom.net>
+
+ * ARM: 7892/1: Fix warning for V7M builds
+
+2013-11-18 Tony Lindgren <tony at atomide.com>
+
+ * ARM: OMAP2+: Remove legacy omap4_twl6030_hsmmc_init
+
+2013-11-18 Tony Lindgren <tony at atomide.com>
+
+ * ARM: OMAP2+: Remove legacy mux code for display.c
+
+2013-11-19 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'pm-sleep'
+
+2013-11-19 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'pm-cpufreq'
+
+2013-11-19 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'pm-runtime'
+
+2013-11-19 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'pm-tools'
+
+2013-11-19 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'pm-cpuidle'
+
+2013-11-19 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'acpi-video'
+
+2013-11-19 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'acpi-ec'
+
+2013-11-18 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPI / scan: Set flags.match_driver in acpi_bus_scan_fixed()
+
+2013-11-14 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPI / PCI root: Clear driver_data before failing enumeration
+
+2013-11-14 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPI / hotplug: Fix PCI host bridge hot removal
+
+2013-11-14 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPI / hotplug: Fix acpi_bus_get_device() return value check
+
+2013-11-18 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://www.linux-watchdog.org/linux-watchdog
+
+2013-11-18 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'i2c/for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux
+
+2013-11-18 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
+
+2013-11-18 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'for-v3.13' of git://git.infradead.org/battery-2.6
+
+2013-11-18 Tony Lindgren <tony at atomide.com>
+
+ * ARM: OMAP2+: Fix undefined reference to set_cntfreq
+
+2013-11-18 Tony Lindgren <tony at atomide.com>
+
+ * gpio: twl4030: Fix passing of pdata in the device tree case
+
+2013-11-18 Steve French <smfrench at gmail.com>
+
+ * Removed duplicated (and unneeded) goto
+
+2013-11-16 Steve French <smfrench at gmail.com>
+
+ * CIFS: Fix SMB2/SMB3 Copy offload support (refcopy) for large files
+
+2013-11-18 Tony Lindgren <tony at atomide.com>
+
+ * gpio: twl4030: Fix regression for twl gpio output
+
+2013-11-18 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'topic/kbuild-fixes-for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
+
+2013-11-18 Stephen Rothwell <sfr at canb.auug.org.au>
+
+ * sparc64: merge fix
+
+2013-11-18 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'v4l_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
+
+2013-11-18 Kirill A. Shutemov <kirill.shutemov at linux.intel.com>
+
+ * sparc64: fix build regession
+
+2013-11-18 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'linux_next' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac
+
+2013-11-13 Guenter Roeck <linux at roeck-us.net>
+
+ * hwmon: (nct6775) NCT6791 supports weight control only for CPUFAN
+
+2013-11-13 Guenter Roeck <linux at roeck-us.net>
+
+ * hwmon: (nct6775) Monitor additional temperature registers
+
+2013-11-09 Arnaud Ebalard <arno at natisbad.org>
+
+ * hwmon: (lm75) Add support for GMT G751 chip
+
+2013-11-18 Ajit Khaparde <ajit.khaparde at emulex.com>
+
+ * be2net: Delete secondary unicast MAC addresses during be_close
+
+2013-11-18 Ajit Khaparde <ajit.khaparde at emulex.com>
+
+ * be2net: Fix unconditional enabling of Rx interface options
+
+2013-11-18 Zhi Yong Wu <wuzhy at linux.vnet.ibm.com>
+
+ * net, virtio_net: replace the magic value
+
+2013-08-17 Guenter Roeck <linux at roeck-us.net>
+
+ * watchdog: w83627hf: Use helper functions to access superio registers
+
+2013-08-17 Guenter Roeck <linux at roeck-us.net>
+
+ * watchdog: w83627hf: Enable watchdog device only if not already enabled
+
+2013-08-17 Guenter Roeck <linux at roeck-us.net>
+
+ * watchdog: w83627hf: Enable watchdog only once
+
+2013-10-28 Guenter Roeck <linux at roeck-us.net>
+
+ * watchdog: w83627hf: Convert to watchdog infrastructure
+
+2013-11-18 Akinobu Mita <akinobu.mita at gmail.com>
+
+ * bio: fix argument of __bio_add_page() for max_sectors > 0xffff
+
+2013-11-18 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * i2c: bcm-kona: fix error return code in bcm_kona_i2c_probe()
+
+2013-11-17 Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+
+ * xen/arm: p2m_init and p2m_lock should be static
+
+2013-11-18 Josh Boyer <jwboyer at redhat.com>
+
+ * arm/xen: Export phys_to_mach to fix Xen module link errors
+
+2013-11-18 Michel Dänzer <michel.daenzer at amd.com>
+
+ * drm/radeon/cik: Add macrotile mode array query
+
+2013-11-15 Ben Hutchings <ben at decadent.org.uk>
+
+ * deb-pkg: Inhibit initramfs builders if CONFIG_BLK_DEV_INITRD is not set
+
+2013-11-08 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Make vmwgfx dma buffers prime aware
+
+2013-11-08 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Make surfaces prime-aware
+
+2013-11-13 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/vmwgfx: Hook up the prime ioctls
+
+2013-11-15 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * spi: spi-mxs: fix reference leak to master in mxs_spi_remove()
+
+2013-11-10 Kuninori Morimoto <kuninori.morimoto.gx at renesas.com>
+
+ * ASoC: rcar: fixup mod access before checking
+
+2013-11-13 Thomas Hellstrom <thellstrom at vmware.com>
+
+ * drm/ttm: Add a minimal prime implementation for ttm base objects
+
+2013-11-16 Shawn Guo <shawn.guo at linaro.org>
+
+ * ARM: dts: imx6qdl: disable spdif "rxtx5" clock option
+
+2013-11-14 H. Peter Anvin <hpa at zytor.com>
+
+ * Revert "init/Kconfig: add option to disable kernel compression"
+
+2013-11-16 Victor Kamensky <victor.kamensky at linaro.org>
+
+ * watchdog: omap_wdt: raw read and write endian fix
+
+2013-11-11 Uwe Kleine-König <u.kleine-koenig at pengutronix.de>
+
+ * watchdog: sirf: don't depend on dummy value of CLOCK_TICK_RATE
+
+2013-11-17 Andreas Werner <wernerandy at gmx.de>
+
+ * i2c: i2c-eg20t: do not print error message in syslog if no ACK received
+
+2013-11-17 Roland Dreier <roland at purestorage.com>
+
+ * Merge branches 'cma', 'cxgb4', 'flowsteer', 'ipoib', 'misc', 'mlx4', 'mlx5', 'nes', 'ocrdma', 'qib' and 'srp' into for-next
+
+2013-11-06 Matan Barak <matanb at mellanox.com>
+
+ * IB/core: Re-enable create_flow/destroy_flow uverbs
+
+2013-11-06 Yann Droneaud <ydroneaud at opteya.com>
+
+ * IB/core: extended command: an improved infrastructure for uverbs commands
+
+2013-11-06 Yann Droneaud <ydroneaud at opteya.com>
+
+ * IB/core: Remove ib_uverbs_flow_spec structure from userspace
+
+2013-11-12 Geert Uytterhoeven <geert at linux-m68k.org>
+
+ * um: Remove unused declarations from <as-layout.h>
+
+2013-10-27 Michael Opdenacker <michael.opdenacker at free-electrons.com>
+
+ * um: remove used STDIO_CONSOLE Kconfig param
+
+2013-09-27 Ramkumar Ramachandra <artagnon at gmail.com>
+
+ * um/vdso: add .gitignore for a couple of targets
+
+2013-11-16 Fenghua Yu <fenghua.yu at intel.com>
+
+ * x86-64, copy_user: Remove zero byte check before copy user buffer.
+
+2013-11-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.samba.org/sfrench/cifs-2.6
+
+2013-11-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'nfs-for-3.13-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
+
+2013-11-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'sound-fix-3.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2013-11-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2013-10-22 Joe Perches <joe at perches.com>
+
+ * IB/ucma: Convert use of typedef ctl_table to struct ctl_table
+
+2013-11-15 Zhao Hongjiang <zhaohongjiang at huawei.com>
+
+ * IB/cm: Convert to using idr_alloc_cyclic()
+
+2013-11-16 Takashi Iwai <tiwai at suse.de>
+
+ * Merge tag 'asoc-v3.13-4' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2013-11-16 Vinod Koul <vinod.koul at intel.com>
+
+ * Merge commit 'dmaengine-3.13-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/dmaengine
+
+2013-11-15 Steve French <smfrench at gmail.com>
+
+ * [CIFS] Warn if SMB3 encryption required by server
+
+2013-11-15 Steve French <smfrench at gmail.com>
+
+ * setfacl removes part of ACL when setting POSIX ACLs to Samba
+
+2013-11-15 lan,Tianyu <tianyu.lan at intel.com>
+
+ * cpufreq: governor: Remove fossil comment in the cpufreq_governor_dbs()
+
+2013-11-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
+
+2013-11-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial
+
+2013-11-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
+
+2013-11-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'mfd-3.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/sameo/mfd-next
+
+2013-11-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jdelvare/staging
+
+2013-11-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.dk/linux-block
+
+2013-11-16 Jiri Kosina <jkosina at suse.cz>
+
+ * Merge branches 'for-3.12/upstream-fixes', 'for-3.13/holtek', 'for-3.13/i2c-hid', 'for-3.13/logitech', 'for-3.13/multitouch', 'for-3.13/roccat', 'for-3.13/upstream' and 'for-3.13/wiimote' into for-linus
+
+2013-11-14 Tim Kryger <tim.kryger at linaro.org>
+
+ * i2c: bcm-kona: Introduce Broadcom I2C Driver
+
+2013-10-31 Eli Cohen <eli at dev.mellanox.co.il>
+
+ * IB/mlx5: Fix page shift in create CQ for userspace
+
+2013-10-31 Eli Cohen <eli at dev.mellanox.co.il>
+
+ * IB/mlx4: Fix device max capabilities check
+
+2013-10-31 Eli Cohen <eli at dev.mellanox.co.il>
+
+ * IB/mlx5: Fix list_del of empty list
+
+2013-11-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
+
+2013-11-14 Tony Lindgren <tony at atomide.com>
+
+ * i2c: cbus-gpio: Fix device tree binding
+
+2013-11-15 Aaron Lu <aaron.lu at intel.com>
+
+ * ACPI / video: clean up DMI table for initial black screen problem
+
+2013-11-15 Steve French <smfrench at gmail.com>
+
+ * [CIFS] Set copychunk defaults
+
+2013-11-10 Larry Finger <Larry.Finger at lwfinger.net>
+
+ * rtlwifi: rtl8192cu: Fix more pointer arithmetic errors
+
+2013-11-13 Christoph Hellwig <hch at infradead.org>
+
+ * nfs: fix pnfs Kconfig defaults
+
+2013-11-14 NeilBrown <neilb at suse.de>
+
+ * NFS: correctly report misuse of "migration" mount option.
+
+2013-10-31 Eli Cohen <eli at dev.mellanox.co.il>
+
+ * IB/core: Encorce MR access rights rules on kernel consumers
+
+2013-10-31 Eli Cohen <eli at dev.mellanox.co.il>
+
+ * IB/mlx4: Fix endless loop in resize CQ
+
+2013-11-15 Charles Keepax <ckeepax at opensource.wolfsonmicro.com>
+
+ * regulator: arizona-micsupp: Correct wm5110 voltage selection
+
+2013-11-15 Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+
+ * swiotlb-xen: add missing xen_dma_map_page call
+
+2013-11-15 Sebastian Ott <sebott at linux.vnet.ibm.com>
+
+ * s390/pci: implement hotplug notifications
+
+2013-11-14 Sebastian Ott <sebott at linux.vnet.ibm.com>
+
+ * s390/scm_block: do not hide eadm subchannel dependency
+
+2013-11-13 Michael Holzheu <holzheu at linux.vnet.ibm.com>
+
+ * s390/sclp: Consolidate early sclp init calls to sclp_early_detect()
+
+2013-11-13 Michael Holzheu <holzheu at linux.vnet.ibm.com>
+
+ * s390/sclp: Move early code from sclp_cmd.c to sclp_early.c
+
+2013-11-13 David Herrmann <dh.herrmann at gmail.com>
+
+ * drm: check for !kdev in drm_unplug_minor()
+
+2013-11-15 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * spi: bcm63xx: fix reference leak to master in bcm63xx_spi_remove()
+
+2013-11-15 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * spi: txx9: fix reference leak to master in txx9spi_remove()
+
+2013-11-15 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * spi: mpc512x: fix reference leak to master in mpc512x_psc_spi_do_remove()
+
+2013-11-15 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * spi: rspi: use platform drvdata correctly in rspi_remove()
+
+2013-11-15 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * spi: bcm2835: fix reference leak to master in bcm2835_spi_remove()
+
+2013-11-14 Brian Austin <brian.austin at cirrus.com>
+
+ * ASoC: cs42l52: Correct MIC CTL mask
+
+2013-11-15 Wei Ni <wni at nvidia.com>
+
+ * Documentation: dt: hwmon: Add OF document for LM90
+
+2013-11-15 Wei Ni <wni at nvidia.com>
+
+ * hwmon: (lm90) Add power control
+
+2013-11-14 David Rientjes <rientjes at google.com>
+
+ * x86: Export 'boot_cpu_physical_apicid' to modules
+
+2013-11-14 Joe Perches <joe at perches.com>
+
+ * MAINTAINERS: Update file patterns in the lockdep and scheduler entries
+
+2013-11-13 Mischa Jonker <mjonker at synopsys.com>
+
+ * ARC: [plat-arcfpga] Add defconfig without initramfs location
+
+2013-11-13 Mischa Jonker <mjonker at synopsys.com>
+
+ * ARC: perf: ARC 700 PMU doesn't support sampling events
+
+2013-11-15 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-nouveau-next' of git://anongit.freedesktop.org/git/nouveau/linux-2.6 into drm-next
+
+2013-11-14 Dmitry Torokhov <dmitry.torokhov at gmail.com>
+
+ * Merge branch 'next' into for-linus
+
+2013-11-14 Dmitry Torokhov <dmitry.torokhov at gmail.com>
+
+ * Revert "Input: ALPS - add support for model found on Dell XT2"
+
+2013-11-14 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * PM / Hibernate: Do not crash kernel in free_basic_memory_bitmaps()
+
+2013-11-13 Tyler Hicks <tyhicks at canonical.com>
+
+ * eCryptfs: file->private_data is always valid
+
+2013-10-30 Ezequiel Garcia <ezequiel.garcia at free-electrons.com>
+
+ * dma: mv_xor: Fix mis-usage of mmio 'base' and 'high_base' registers
+
+2013-10-30 Ezequiel Garcia <ezequiel.garcia at free-electrons.com>
+
+ * dma: mv_xor: Remove unneeded NULL address check
+
+2013-11-13 Dan Williams <dan.j.williams at intel.com>
+
+ * ioat: fix ioat3_irq_reinit
+
+2013-11-13 Dan Williams <dan.j.williams at intel.com>
+
+ * ioat: kill msix_single_vector support
+
+2013-11-13 Dan Williams <dan.j.williams at intel.com>
+
+ * raid6test: add new corner case for ioatdma driver
+
+2013-11-13 Dan Williams <dan.j.williams at intel.com>
+
+ * ioatdma: clean up sed pool kmem_cache
+
+2013-11-14 Bjorn Helgaas <bhelgaas at google.com>
+
+ * PCI: Fix whitespace, capitalization, and spelling errors
+
+2013-11-11 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * i2c: wmt: add missing clk_disable_unprepare() on error
+
+2013-11-14 Nicolin Chen <b42378 at freescale.com>
+
+ * ASoC: wm8962: Turn on regcache_cache_only before disabling regulator
+
+2013-11-01 Jens Axboe <axboe at kernel.dk>
+
+ * virtio_blk: blk-mq support
+
+2013-11-12 Oskar Schirmer <oskar at scara.com>
+
+ * ASoC: fsl: imx-pcm-fiq: omit fiq counter to avoid harm in unbalanced situations
+
+2013-11-05 Tim Harvey <tharvey at gateworks.com>
+
+ * regulator: pfuze100: allow misprogrammed ID
+
+2013-11-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
+
+2013-11-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'xfs-for-linus-v3.13-rc1' of git://oss.sgi.com/xfs/xfs
+
+2013-11-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-trace-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-14 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'fbdev-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tomba/linux
+
+2013-11-12 Maarten Lankhorst <maarten.lankhorst at canonical.com>
+
+ * drm/nouveau: do not map evicted vram buffers in nouveau_bo_vma_add
+
+2013-11-13 Jeff Layton <jlayton at redhat.com>
+
+ * nfs: don't retry detect_trunking with RPC_AUTH_UNIX more than once
+
+2013-11-14 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-next-3.13' of git://people.freedesktop.org/~agd5f/linux into drm-next
+
+2013-11-13 viresh kumar <viresh.kumar at linaro.org>
+
+ * cpufreq: OMAP: Fix compilation error 'r & ret undeclared'
+
+2013-11-13 Ulf Hansson <ulf.hansson at linaro.org>
+
+ * PM / Runtime: Fix error path for prepare
+
+2013-09-17 Thomas Gleixner <tglx at linutronix.de>
+
+ * preempt: Make PREEMPT_ACTIVE generic
+
+2013-09-17 Thomas Gleixner <tglx at linutronix.de>
+
+ * sparc: Use preempt_schedule_irq
+
+2013-09-17 Thomas Gleixner <tglx at linutronix.de>
+
+ * ia64: Use preempt_schedule_irq
+
+2013-09-17 Thomas Gleixner <tglx at linutronix.de>
+
+ * m32r: Use preempt_schedule_irq
+
+2013-09-17 Thomas Gleixner <tglx at linutronix.de>
+
+ * hardirq: Make hardirq bits generic
+
+2013-11-11 Thomas Gleixner <tglx at linutronix.de>
+
+ * m68k: Simplify low level interrupt handling code
+
+2013-11-06 Thomas Gleixner <tglx at linutronix.de>
+
+ * genirq: Prevent spurious detection for unconditionally polled interrupts
+
+2013-11-13 Guennadi Liakhovetski <g.liakhovetski at gmx.de>
+
+ * regulator: fixed: fix regulator_list_voltage() for regression
+
+2013-11-08 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * dma: pl330: silence a compile warning
+
+2013-11-08 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * dma: pl330: off by one in pl330_probe()
+
+2013-11-12 Peter Zijlstra <peterz at infradead.org>
+
+ * block: Use u64_stats_init() to initialize seqcounts
+
+2013-11-09 Fengguang Wu <fengguang.wu at intel.com>
+
+ * locking/lockdep: Mark __lockdep_count_forward_deps() as static
+
+2013-11-10 Michal Nazarewicz <mina86 at mina86.com>
+
+ * sched/fair: Avoid integer overflow
+
+2013-11-11 Peter Zijlstra <peterz at infradead.org>
+
+ * sched: Optimize task_sched_runtime()
+
+2013-11-06 Peter Zijlstra <peterz at infradead.org>
+
+ * sched/numa: Cure update_numa_stats() vs. hotplug
+
+2013-10-29 Markus Pargmann <mpa at pengutronix.de>
+
+ * dma: mxs-dma: Use semaphores for cyclic DMA
+
+2013-10-29 Markus Pargmann <mpa at pengutronix.de>
+
+ * dma: mxs-dma: Update state after channel reset
+
+2013-10-29 Markus Pargmann <mpa at pengutronix.de>
+
+ * dma: mxs-dma: Fix channel reset hardware bug
+
+2013-10-29 Markus Pargmann <mpa at pengutronix.de>
+
+ * dma: mxs-dma: Report correct residue for cyclic DMA
+
+2013-11-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
+
+2013-11-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'akpm' (patches from Andrew Morton)
+
+2013-11-07 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * pm2301-charger: Remove unneeded NULL checks
+
+2013-10-31 NeilBrown <neilb at suse.de>
+
+ * twl4030_charger: Add devicetree support
+
+2013-11-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
+
+2013-11-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'dlm-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm
+
+2013-11-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'upstream-3.13-rc1' of git://git.infradead.org/linux-ubi
+
+2013-11-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'upstream-3.13-rc1' of git://git.infradead.org/linux-ubifs
+
+2013-11-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse
+
+2013-11-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
+
+2013-11-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'for-f2fs-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
+
+2013-11-12 Mathias Krause <minipli at googlemail.com>
+
+ * ipc, msg: fix message length check for negative values
+
+2013-11-12 Xie XiuQi <xiexiuqi at huawei.com>
+
+ * ipc/util.c: remove unnecessary work pending test
+
+2013-11-12 Ilija Hadzic <ihadzic at research.bell-labs.com>
+
+ * devpts: plug the memory leak in kill_sb
+
+2013-11-12 P J P <ppandit at redhat.com>
+
+ * ./Makefile: export initial ramdisk compression config option
+
+2013-11-12 Christian Ruppert <christian.ruppert at abilis.com>
+
+ * init/Kconfig: add option to disable kernel compression
+
+2013-11-12 Michal Nazarewicz <mina86 at mina86.com>
+
+ * drivers: w1: make w1_slave::flags long to avoid memory corruption
+
+2013-11-12 Jingoo Han <jg1.han at samsung.com>
+
+ * drivers/w1/masters/ds1wm.cuse dev_get_platdata()
+
+2013-11-12 Trond Myklebust <Trond.Myklebust at netapp.com>
+
+ * SUNRPC: Avoid deep recursion in rpc_release_client
+
+2013-11-08 Ulf Hansson <ulf.hansson at linaro.org>
+
+ * PM / Runtime: Update documentation around probe|remove|suspend
+
+2013-11-08 Xiaoguang Chen <chenxg at marvell.com>
+
+ * cpufreq: conservative: set requested_freq to policy max when it is over policy max
+
+2013-11-09 Len Brown <len.brown at intel.com>
+
+ * tools / power turbostat: Support Silvermont
+
+2013-11-09 Len Brown <len.brown at intel.com>
+
+ * intel_idle: Support Intel Atom Processor C2000 Product Family
+
+2013-11-12 Thomas Renninger <trenn at suse.de>
+
+ * x86/microcode/amd: Tone down printk(), don't treat a missing firmware file as an error
+
+2013-11-12 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2013-11-11 Steven Rostedt <rostedt at goodmis.org>
+
+ * tools lib traceevent: Add direct access to dynamic arrays
+
+2013-10-25 Jiri Slaby <jslaby at suse.cz>
+
+ * x86/dumpstack: Fix printk_address for direct addresses
+
+2013-11-12 Arnaldo Carvalho de Melo <acme at redhat.com>
+
+ * perf target: Shorten perf_target__ to target__
+
+2013-11-12 Adrian Hunter <adrian.hunter at intel.com>
+
+ * perf tests: Handle throttle events in 'object code reading' test
+
+2013-11-12 David Ahern <dsahern at gmail.com>
+
+ * perf evlist: Refactor mmap_pages parsing
+
+2013-11-12 Tristan Rice <rice at outerearth.net>
+
+ * HID: enable Mayflash USB Gamecube Adapter
+
+2013-11-12 Kees Cook <keescook at chromium.org>
+
+ * x86, kaslr: Use char array to gain sizeof sanity
+
+2013-11-11 H. Peter Anvin <hpa at zytor.com>
+
+ * x86, kaslr: Add a circular multiply for better bit diffusion
+
+2013-11-11 Kees Cook <keescook at chromium.org>
+
+ * x86, kaslr: Mix entropy sources together as needed
+
+2013-11-07 Mischa Jonker <mjonker at synopsys.com>
+
+ * ARC: Add documentation on DT binding for ARC700 PMU
+
+2013-11-07 Mischa Jonker <mjonker at synopsys.com>
+
+ * ARC: Add perf support for ARC700 cores
+
+2013-11-11 Andreas Dilger <adilger at dilger.ca>
+
+ * ext4: add prototypes for macro-generated functions
+
+2013-11-11 Andreas Dilger <andreas.dilger at intel.com>
+
+ * ext4: return non-zero st_blocks for inline data
+
+2013-11-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-uv-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-uaccess-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-reboot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-12 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-platform-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-11 Gerhard Sittig <gsi at denx.de>
+
+ * regmap: trivial comment fix (copy'n'paste error)
+
+2013-11-11 Samuel Ortiz <sameo at linux.intel.com>
+
+ * Merge tag 'mfd-lee-3.13-3' of git://git.linaro.org/people/ljones/mfd
+
+2013-11-11 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'prandom'
+
+2013-11-11 Daniel Borkmann <dborkman at redhat.com>
+
+ * random32: add test cases for taus113 implementation
+
+2013-11-11 Daniel Borkmann <dborkman at redhat.com>
+
+ * random32: upgrade taus88 generator to taus113 from errata paper
+
+2013-11-11 Daniel Borkmann <dborkman at redhat.com>
+
+ * random32: move rnd_state to linux/random.h
+
+2013-11-11 Hannes Frederic Sowa <hannes at stressinduktion.org>
+
+ * random32: add prandom_reseed_late() and call when nonblocking pool becomes initialized
+
+2013-11-11 Hannes Frederic Sowa <hannes at stressinduktion.org>
+
+ * random32: add periodic reseeding
+
+2013-11-11 Daniel Borkmann <dborkman at redhat.com>
+
+ * random32: fix off-by-one in seeding requirement
+
+2013-11-11 Jonas Jensen <jonas.jensen at gmail.com>
+
+ * PHY: Add RTL8201CP phy_driver to realtek
+
+2013-11-11 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * xtsonic: add missing platform_set_drvdata() in xtsonic_probe()
+
+2013-11-11 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * macmace: add missing platform_set_drvdata() in mace_probe()
+
+2013-11-11 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * ethernet/arc/arc_emac: add missing platform_set_drvdata() in arc_emac_probe()
+
+2013-11-11 Ingo Molnar <mingo at kernel.org>
+
+ * Revert "x86/UV: Add uvtrace support"
+
+2013-11-10 Michal Nazarewicz <mina86 at mina86.com>
+
+ * RDMA/cma: Remove unused argument and minor dead code
+
+2013-11-01 Sean Hefty <sean.hefty at intel.com>
+
+ * RDMA/ucma: Discard events for IDs not yet claimed by user space
+
+2013-11-11 Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+
+ * xen/arm: pfn_to_mfn and mfn_to_pfn return the argument if nothing is in the p2m
+
+2013-11-11 H. Peter Anvin <hpa at zytor.com>
+
+ * x86, trace: Change user|kernel_page_fault to page_fault_user|kernel
+
+2013-11-05 Alexander Shiyan <shc_work at mail.ru>
+
+ * ARM: dts: i.MX51: Fix OTG PHY clock
+
+2013-10-31 Shawn Guo <shawn.guo at linaro.org>
+
+ * ARM: imx: set up pllv3 POWER and BYPASS sequentially
+
+2013-10-30 Shawn Guo <shawn.guo at linaro.org>
+
+ * ARM: imx: pllv3 needs relock in .set_rate() call
+
+2013-10-30 Shawn Guo <shawn.guo at linaro.org>
+
+ * ARM: imx: add sleep for pllv3 relock
+
+2013-10-31 Lothar Waßmann <LW at KARO-electronics.de>
+
+ * ARM: imx6q: add missing sentinel to divider table
+
+2013-10-31 Shawn Guo <shawn.guo at linaro.org>
+
+ * ARM: imx: v7_cpu_resume() is needed by imx6sl build
+
+2013-10-31 Shawn Guo <shawn.guo at linaro.org>
+
+ * ARM: imx: improve mxc_restart() on the SRC bit writes
+
+2013-10-28 Shawn Guo <shawn.guo at linaro.org>
+
+ * ARM: imx: remove imx_src_prepare_restart() call
+
+2013-11-08 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * Documentation: mfd: Update s2mps11.txt
+
+2013-11-07 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * mfd: pm8921: Potential NULL dereference in pm8921_remove()
+
+2013-11-09 Sven Eckelmann <sven at narfation.org>
+
+ * HID: sony: Add force feedback support for Dualshock3 USB
+
+2013-10-21 Forest Bond <forest.bond at rapidrollout.com>
+
+ * Input: usbtouchscreen: ignore eGalax/D-Wav/EETI HIDs
+
+2013-10-21 Forest Bond <forest.bond at rapidrollout.com>
+
+ * HID: don't ignore eGalax/D-Wav/EETI HIDs
+
+2013-11-10 Felipe Balbi <balbi at ti.com>
+
+ * arm: dts: am335x sk: add touchscreen support
+
+2013-11-10 Felipe Balbi <balbi at ti.com>
+
+ * Input: ti_am335x_tsc - fix spelling mistake in TSC/ADC DT binding
+
+2013-11-10 Duan Jiong <duanj.fnst at cn.fujitsu.com>
+
+ * Input: cyttsp4 - replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO
+
+2013-11-10 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * Input: mma8450 - add missing i2c_set_clientdata() in mma8450_probe()
+
+2013-11-10 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * Input: mpu3050 - add missing i2c_set_clientdata() in mpu3050_probe()
+
+2013-11-10 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * Input: tnetv107x-keypad - make irqs signed for error handling
+
+2013-11-08 Hannes Frederic Sowa <hannes at stressinduktion.org>
+
+ * ipv6: protect for_each_sk_fl_rcu in mem_check with rcu_read_lock_bh
+
+2013-11-11 David S. Miller <davem at davemloft.net>
+
+ * vlan: Implement vlan_dev_get_egress_qos_mask as an inline.
+
+2013-11-09 Jacob Keller <jacob.e.keller at intel.com>
+
+ * ixgbe: add warning when max_vfs is out of range.
+
+2013-10-31 Christian Ruppert <christian.ruppert at abilis.com>
+
+ * ARC: [TB10x] Updates for GPIO and pinctrl
+
+2013-11-09 Al Viro <viro at zeniv.linux.org.uk>
+
+ * ecryptfs: ->f_op is never NULL
+
+2013-11-09 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * regulator: gpio-regulator: Don't oops on missing regulator-type property
+
+2013-09-17 Dave Jones <davej at redhat.com>
+
+ * RDMA/nes: Remove self-assignment from nes_query_qp()
+
+2011-09-20 J. Bruce Fields <bfields at redhat.com>
+
+ * locks: break delegations on any attribute modification
+
+2011-09-20 J. Bruce Fields <bfields at redhat.com>
+
+ * locks: break delegations on link
+
+2011-09-20 J. Bruce Fields <bfields at redhat.com>
+
+ * locks: break delegations on rename
+
+2012-08-28 J. Bruce Fields <bfields at redhat.com>
+
+ * locks: helper functions for delegation breaking
+
+2011-09-20 J. Bruce Fields <bfields at redhat.com>
+
+ * locks: break delegations on unlink
+
+2013-10-10 Bart Van Assche <bvanassche at acm.org>
+
+ * IB/srp: Report receive errors correctly
+
+2013-10-10 Bart Van Assche <bvanassche at acm.org>
+
+ * IB/srp: Avoid offlining operational SCSI devices
+
+2013-10-10 Vu Pham <vuhuong at mellanox.com>
+
+ * IB/srp: Remove target from list before freeing Scsi_Host structure
+
+2013-10-25 Mike Marciniszyn <mike.marciniszyn at intel.com>
+
+ * IB/qib: Fix txselect regression
+
+2013-10-24 Mike Marciniszyn <mike.marciniszyn at intel.com>
+
+ * IB/qib: Fix checkpatch __packed warnings
+
+2013-10-04 Jan Kara <jack at suse.cz>
+
+ * IB/qib: Convert qib_user_sdma_pin_pages() to use get_user_pages_fast()
+
+2013-10-28 Naresh Gottumukkala <bgottumukkala at emulex.com>
+
+ * RDMA/ocrdma: Remove redundant check in ocrdma_build_fr()
+
+2013-09-06 Naresh Gottumukkala <bgottumukkala at emulex.com>
+
+ * RDMA/ocrdma: Fix a crash in rmmod
+
+2013-09-06 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * RDMA/ocrdma: Silence an integer underflow warning
+
+2013-08-21 Michal Schmidt <mschmidt at redhat.com>
+
+ * IPoIB: lower NAPI weight
+
+2013-10-16 Erez Shitrit <erezsh at mellanox.com>
+
+ * IPoIB: Start multicast join process only on active ports
+
+2013-10-16 Erez Shitrit <erezsh at mellanox.com>
+
+ * IPoIB: Add path query flushing in ipoib_ib_dev_cleanup
+
+2013-10-27 Ben Hutchings <ben at decadent.org.uk>
+
+ * IB/cxgb4: Fix formatting of physical address
+
+2013-09-24 Doug Ledford <dledford at redhat.com>
+
+ * IB/cma: Check for GID on listening device first
+
+2013-11-06 Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+
+ * arm,arm64/include/asm/io.h: define struct bio_vec
+
+2013-11-08 Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+
+ * Merge remote-tracking branch 'stefano/swiotlb-xen-9.1' into stable/for-linus-3.13
+
+2013-11-08 Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+
+ * Merge tag 'v3.12-rc5' into stable/for-linus-3.13
+
+2013-11-04 Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+
+ * swiotlb-xen: missing include dma-direction.h
+
+2013-11-04 Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+
+ * pci-swiotlb-xen: call pci_request_acs only ifdef CONFIG_PCI
+
+2013-10-30 Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+
+ * arm: make SWIOTLB available
+
+2013-11-08 Paul Moore <pmoore at redhat.com>
+
+ * Merge tag 'v3.12'
+
+2013-10-18 Ben Harris <bjh21 at cam.ac.uk>
+
+ * floppy: Correct documentation of driver options when used as a module.
+
+2013-11-06 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * pktcdvd: debugfs functions return NULL on error
+
+2013-10-29 Roger Pau Monne <roger.pau at citrix.com>
+
+ * xen-blkfront: restore the non-persistent data path
+
+2013-11-05 Bartlomiej Zolnierkiewicz <b.zolnierkie at samsung.com>
+
+ * skd: fix formatting in skd_s1120.h
+
+2013-11-05 Bartlomiej Zolnierkiewicz <b.zolnierkie at samsung.com>
+
+ * skd: reorder construct/destruct code
+
+2013-11-05 Bartlomiej Zolnierkiewicz <b.zolnierkie at samsung.com>
+
+ * skd: cleanup skd_do_inq_page_da()
+
+2013-11-05 Bartlomiej Zolnierkiewicz <b.zolnierkie at samsung.com>
+
+ * skd: remove SKD_OMIT_FROM_SRC_DIST ifdefs
+
+2013-11-05 Bartlomiej Zolnierkiewicz <b.zolnierkie at samsung.com>
+
+ * skd: remove redundant skdev->pdev assignment from skd_pci_probe()
+
+2013-11-05 Bartlomiej Zolnierkiewicz <b.zolnierkie at samsung.com>
+
+ * skd: use <asm/unaligned.h>
+
+2013-11-08 Stefan Achatz <erazor_de at users.sourceforge.net>
+
+ * HID: roccat: add missing special driver declarations
+
+2013-11-06 Simon Wood <simon at mungewell.org>
+
+ * HID:hid-lg4ff: Correct Auto-center strength for wheels other than MOMO and MOMO2
+
+2013-11-06 Simon Wood <simon at mungewell.org>
+
+ * HID:hid-lg4ff: Initialize device properties before we touch autocentering.
+
+2013-11-06 Simon Wood <simon at mungewell.org>
+
+ * HID:hid-lg4ff: ensure ConstantForce is disabled when set to 0
+
+2013-11-06 Simon Wood <simon at mungewell.org>
+
+ * HID:hid-lg4ff: Switch autocentering off when strength is set to zero.
+
+2013-11-06 Simon Wood <simon at mungewell.org>
+
+ * HID:hid-lg4ff: Scale autocentering force properly on Logitech wheel
+
+2013-11-01 Mauro Carvalho Chehab <m.chehab at samsung.com>
+
+ * [media] platform drivers: Fix build on frv arch
+
+2013-11-02 Mauro Carvalho Chehab <m.chehab at samsung.com>
+
+ * [media] lirc_zilog: Don't use dynamic static allocation
+
+2013-11-07 Michael Opdenacker <michael.opdenacker at free-electrons.com>
+
+ * scripts/tags.sh: remove obsolete __devinit[const|data]
+
+2013-11-08 Theodore Ts'o <tytso at mit.edu>
+
+ * ext4: use prandom_u32() instead of get_random_bytes()
+
+2013-11-07 Eric Sandeen <sandeen at redhat.com>
+
+ * ext4: remove unreachable code after ext4_can_extents_be_merged()
+
+2013-11-07 Xiaoguang Chen <chenxg at marvell.com>
+
+ * cpufreq: conservative: fix requested_freq reduction issue
+
+2013-11-07 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPI / hotplug: Consolidate deferred execution of ACPI hotplug routines
+
+2013-11-07 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * PM / runtime: Use pm_runtime_put_sync() in __device_release_driver()
+
+2013-11-06 Lee Jones <lee.jones at linaro.org>
+
+ * ASoC: generic-dmaengine-pcm: Clear slave_config memory
+
+2013-10-31 Adrian Huang <adrianhuang0701 at gmail.com>
+
+ * intel_pstate: skip the driver if ACPI has power mgmt option
+
+2013-11-07 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPI / hotplug: Do not execute "insert in progress" _OST
+
+2013-11-04 Gu Zheng <guz.fnst at cn.fujitsu.com>
+
+ * xfs: simplify kmem_{zone_}zalloc
+
+2013-10-17 Randy Dunlap <rdunlap at infradead.org>
+
+ * scripts/kernel-doc: make unknown function prototype a Warning instead of an Error
+
+2013-11-01 Dave Chinner <dchinner at redhat.com>
+
+ * xfs: add tracepoints to AGF/AGI read operations
+
+2013-11-01 Dave Chinner <dchinner at redhat.com>
+
+ * xfs: trace AIL manipulations
+
+2013-11-05 Herbert Xu <herbert at gondor.apana.org.au>
+
+ * crypto: s390 - Fix aes-cbc IV corruption
+
+2013-11-05 Jan Kara <jack at suse.cz>
+
+ * ext2: Fix fs corruption in ext2_get_xip_mem()
+
+2013-10-02 Maxim Patlasov <MPatlasov at parallels.com>
+
+ * fuse: writepages: protect secondary requests from fuse file release
+
+2013-10-02 Maxim Patlasov <MPatlasov at parallels.com>
+
+ * fuse: writepages: update bdi writeout when deleting secondary request
+
+2013-11-04 Srinivas Pandruvada <srinivas.pandruvada at linux.intel.com>
+
+ * PowerCap: Fix build error with option -Werror=format-security
+
+2013-11-04 Benjamin LaHaise <bcrl at kvack.org>
+
+ * Merge branch 'aio-fix' of http://evilpiepirate.org/git/linux-bcache
+
+2013-11-04 James Ralston <james.d.ralston at intel.com>
+
+ * mfd: lpc_ich: Add Device IDs for Intel Wildcat Point-LP PCH
+
+2013-11-04 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
+
+2013-11-03 Stefan Achatz <erazor_de at users.sourceforge.net>
+
+ * HID: roccat: fix Coverity CID 141438
+
+2013-10-10 Chanwoo Choi <cw00.choi at samsung.com>
+
+ * mfd: max77693: Fix up bug of wrong interrupt number
+
+2013-11-03 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/topic/wm8996' into asoc-next
+
+2013-11-03 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/topic/wm8962' into asoc-next
+
+2013-11-03 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/topic/wm0010' into asoc-next
+
+2013-11-03 Jack Morgenstein <jackm at dev.mellanox.co.il>
+
+ * net/mlx4_core: Fix call to __mlx4_unregister_mac
+
+2013-11-04 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'fixes-for-3.12' of git://gitorious.org/linux-can/linux-can
+
+2013-10-31 Daniel Borkmann <dborkman at redhat.com>
+
+ * net: sctp: do not trigger BUG_ON in sctp_cmd_delete_tcb
+
+2013-11-03 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.12
+
+2013-11-03 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'upstream' of git://git.linux-mips.org/pub/scm/ralf/upstream-linus
+
+2013-11-03 Mathias Krause <minipli at googlemail.com>
+
+ * ipc, msg: forbid negative values for "msg{max,mnb,mni}"
+
+2013-11-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux
+
+2013-11-02 Vineet Gupta <Vineet.Gupta1 at synopsys.com>
+
+ * ARC: Incorrect mm reference used in vmalloc fault handler
+
+2013-11-01 Jason Wang <jasowang at redhat.com>
+
+ * net: flow_dissector: fail on evil iph->ihl
+
+2013-11-02 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec
+
+2013-11-02 Ming Lei <tom.leiming at gmail.com>
+
+ * scripts/kallsyms: filter symbols not in kernel address space
+
+2013-11-01 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-11-01 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'usb-3.12-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
+
+2013-11-01 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'sound-3.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2013-11-01 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'clk-fixes-for-linus' of git://git.linaro.org/people/mturquette/linux
+
+2013-11-01 Greg Thelen <gthelen at google.com>
+
+ * memcg: remove incorrect underflow check
+
+2013-11-01 Richard Fitzgerald <rf at opensource.wolfsonmicro.com>
+
+ * ASoC: wm8962: Add EQ coefficient support
+
+2013-11-01 Алексей Крамаренко <alexeyk13 at yandex.ru>
+
+ * USB: serial: ftdi_sio: add id for Z3X Box device
+
+2013-10-30 Greg KH <gregkh at linuxfoundation.org>
+
+ * USB: Maintainers change for usb serial drivers
+
+2013-11-01 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Revert "USB: pl2303: restrict the divisor based baud rate encoding method to the "HX" chip type"
+
+2013-11-01 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Revert "usb: pl2303: fix+improve the divsor based baud rate encoding method"
+
+2013-11-01 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Revert "usb: pl2303: do not round to the next nearest standard baud rate for the divisor based baud rate encoding method"
+
+2013-11-01 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Revert "usb: pl2303: remove 500000 baud from the list of standard baud rates"
+
+2013-11-01 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Revert "usb: pl2303: move the two baud rate encoding methods to separate functions"
+
+2013-11-01 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Revert "usb: pl2303: increase the allowed baud rate range for the divisor based encoding method"
+
+2013-10-30 Steffen Klassert <steffen.klassert at secunet.com>
+
+ * xfrm: Fix null pointer dereference when decoding sessions
+
+2013-10-31 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'akpm' (fixes from Andrew Morton)
+
+2013-10-31 Ming Lei <ming.lei at canonical.com>
+
+ * lib/scatterlist.c: don't flush_kernel_dcache_page on slab page
+
+2013-10-31 Johannes Weiner <hannes at cmpxchg.org>
+
+ * mm: memcg: fix test for child groups
+
+2013-10-31 Johannes Weiner <hannes at cmpxchg.org>
+
+ * mm: memcg: lockdep annotation for memcg OOM lock
+
+2013-10-31 Johannes Weiner <hannes at cmpxchg.org>
+
+ * mm: memcg: use proper memcg in limit bypass
+
+2013-10-31 Stratos Karafotis <stratosk at semaphore.gr>
+
+ * cpufreq: ondemand: Remove redundant return statement
+
+2013-10-31 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * vfs: decrapify dput(), fix cache behavior under normal load
+
+2013-10-31 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * i915: fix compiler warning
+
+2013-10-31 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-10-27 Olivier Sobrie <olivier at sobrie.be>
+
+ * can: kvaser_usb: fix usb endpoints detection
+
+2013-10-28 Markus Pargmann <mpa at pengutronix.de>
+
+ * can: c_can: Fix RX message handling, handle lost message before EOB
+
+2013-10-31 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
+
+2013-10-31 Russell King <rmk+kernel at arm.linux.org.uk>
+
+ * ALSA: fix oops in snd_pcm_info() caused by ASoC DPCM
+
+2013-10-31 Heiko Stübner <heiko at sntech.de>
+
+ * Input: add driver for Neonode zForce based touchscreens
+
+2013-10-31 Laurent Pinchart <laurent.pinchart+renesas at ideasonboard.com>
+
+ * Input: sh_keysc - enable the driver on all ARM platforms
+
+2013-10-31 Kang Hu <hukangustc at gmail.com>
+
+ * Input: remove a redundant max() call
+
+2013-10-31 Tom Gundersen <teg at jklm.no>
+
+ * Input: mousedev - allow disabling even without CONFIG_EXPERT
+
+2013-10-31 Tom Gundersen <teg at jklm.no>
+
+ * Input: allow deselecting serio drivers even without CONFIG_EXPERT
+
+2013-10-31 Tom Gundersen <teg at jklm.no>
+
+ * Input: i8042 - add PNP modaliases
+
+2013-10-31 Daniel Stone <daniel at fooishbar.org>
+
+ * Input: evdev - fall back to vmalloc for client event buffer
+
+2013-10-16 Joseph Salisbury <joseph.salisbury at canonical.com>
+
+ * Input: cypress_ps2 - do not consider data bad if palm is detected
+
+2013-10-31 Masanari Iida <standby24x7 at gmail.com>
+
+ * doc: usb: Fix typo in Documentation/usb/gadget_configs.txt
+
+2013-10-31 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * MIPS: ralink: fix return value check in rt_timer_probe()
+
+2013-10-18 Srinivas Kandagatla <srinivas.kandagatla at st.com>
+
+ * [media] media: st-rc: Add ST remote control driver
+
+2013-10-31 Yunkang Tang <tommywill2011 at gmail.com>
+
+ * Input: ALPS - add support for model found on Dell XT2
+
+2013-08-14 Viresh Kumar <viresh.kumar at linaro.org>
+
+ * cpufreq: move freq change notifications to cpufreq core
+
+2013-10-29 Viresh Kumar <viresh.kumar at linaro.org>
+
+ * cpufreq: distinguish drivers that do asynchronous notifications
+
+2013-10-30 Dirk Brandewie <dirk.j.brandewie at intel.com>
+
+ * cpufreq/intel_pstate: Add static declarations to internal functions
+
+2013-10-30 Nicolas Pitre <nicolas.pitre at linaro.org>
+
+ * cpufreq: arm_big_little: reconfigure switcher behavior at run time
+
+2013-10-21 Jingoo Han <jg1.han at samsung.com>
+
+ * ARM: EXYNOS: Remove incorrect __init annotation from cpuidle driver
+
+2013-10-30 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'akpm' (fixes from Andrew Morton)
+
+2013-10-30 Greg Thelen <gthelen at google.com>
+
+ * memcg: use __this_cpu_sub() to dec stats to avoid incorrect subtrahend casting
+
+2013-10-30 Greg Thelen <gthelen at google.com>
+
+ * percpu: fix this_cpu_sub() subtrahend casting for unsigneds
+
+2013-10-30 Chen LinX <linx.z.chen at intel.com>
+
+ * mm/pagewalk.c: fix walk_page_range() access of wrong PTEs
+
+2013-10-30 Masanari Iida <standby24x7 at gmail.com>
+
+ * doc:net: Fix typo in Documentation/networking
+
+2013-10-30 Russell King <rmk+kernel at arm.linux.org.uk>
+
+ * mm: list_lru: fix almost infinite loop causing effective livelock
+
+2013-10-30 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'tty-3.12-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
+
+2013-10-30 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2013-10-30 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'sound-3.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
+
+2013-10-30 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
+
+2013-10-29 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * Staging: sb105x: info leak in mp_get_count()
+
+2013-10-29 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * Staging: bcm: info leak in ioctl
+
+2013-10-29 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * staging: wlags49_h2: buffer overflow setting station name
+
+2013-10-29 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * aacraid: missing capable() check in compat ioctl
+
+2013-10-30 Takashi Iwai <tiwai at suse.de>
+
+ * Merge tag 'asoc-fix-v3.12-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2013-10-30 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/fix/wm8994' into asoc-linus
+
+2013-10-30 Takashi Iwai <tiwai at suse.de>
+
+ * ASoC: wm8996: Fix negative array index read
+
+2013-10-30 Takashi Iwai <tiwai at suse.de>
+
+ * ASoC: wm_hubs: Add missing break in hp_supply_event()
+
+2013-10-30 Markos Chandras <markos.chandras at imgtec.com>
+
+ * MIPS: malta: Fix GIC interrupt offsets
+
+2013-10-26 Jan Matějka <yac at blesmrt.net>
+
+ * HID: multitouch: add manufacturer to Kconfig help text
+
+2013-10-19 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * HID: logitech-dj: small cleanup in rdcat()
+
+2013-10-21 Bibek Basu <bbasu at nvidia.com>
+
+ * HID: i2c-hid: Stop querying for init reports
+
+2013-10-28 Stefan Achatz <erazor_de at users.sourceforge.net>
+
+ * HID: roccat: add support for Ryos MK keyboards
+
+2013-10-28 Stefan Achatz <erazor_de at users.sourceforge.net>
+
+ * HID: roccat: generalize some common code
+
+2013-10-28 Stefan Achatz <erazor_de at users.sourceforge.net>
+
+ * HID: roccat: add new device return value
+
+2013-10-28 David Herrmann <dh.herrmann at gmail.com>
+
+ * HID: wiimote: add pro-controller analog stick calibration
+
+2013-10-28 David Herrmann <dh.herrmann at gmail.com>
+
+ * HID: wiimote: fix inverted pro-controller axes
+
+2013-10-30 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Add a fixup for ASUS N76VZ
+
+2013-10-30 Paolo Bonzini <pbonzini at redhat.com>
+
+ * KVM: use a more sensible error number when debugfs directory creation fails
+
+2013-10-29 Tim Gardner <tim.gardner at canonical.com>
+
+ * KVM: Fix modprobe failure for kvm_intel/kvm_amd
+
+2013-10-28 David Herrmann <dh.herrmann at gmail.com>
+
+ * drm: allow DRM_IOCTL_VERSION on render-nodes
+
+2013-10-29 Joel Fernandes <joelf at ti.com>
+
+ * crypto: omap-aes - Fix CTR mode counter length
+
+2013-10-26 Joni Lapilainen <joni.lapilainen at gmail.com>
+
+ * crypto: omap-sham - Add missing modalias
+
+2013-10-25 Mathias Krause <mathias.krause at secunet.com>
+
+ * padata: make the sequence counter an atomic_t
+
+2013-10-29 Nathan Hintz <nlhintz at hotmail.com>
+
+ * bgmac: don't update slot on skb alloc/dma mapping error
+
+2013-10-30 Alistair Popple <alistair at popple.id.au>
+
+ * ibm emac: Fix locking for enable/disable eob irq
+
+2013-10-30 Alistair Popple <alistair at popple.id.au>
+
+ * ibm emac: Don't call napi_complete if napi_reschedule failed
+
+2013-10-29 Jason Wang <jasowang at redhat.com>
+
+ * virtio-net: correctly handle cpu hotplug notifier during resuming
+
+2013-10-30 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'drm-intel-fixes-2013-10-29' of git://people.freedesktop.org/~danvet/drm-intel into drm-fixes
+
+2013-10-28 Vlad Yasevich <vyasevic at redhat.com>
+
+ * bridge: pass correct vlan id to multicast code
+
+2013-10-29 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jesse/openvswitch
+
+2013-10-28 Michael Drüing <michael at drueing.de>
+
+ * net: x25: Fix dead URLs in Kconfig
+
+2013-10-29 David S. Miller <davem at davemloft.net>
+
+ * Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
+
+2013-10-08 Deng-Cheng Zhu <dengcheng.zhu at imgtec.com>
+
+ * MIPS: Perf: Fix 74K cache map
+
+2013-10-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Fix a few incorrectly checked [io_]remap_pfn_range() calls
+
+2013-10-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-10-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Kconfig: make KOBJECT_RELEASE debugging require timer debugging
+
+2013-10-29 Daniel Vetter <daniel.vetter at ffwll.ch>
+
+ * drm/i915: Fix the PPT fdi lane bifurcate state handling on ivb
+
+2013-10-28 Holger Eitzenberger <holger at eitzenberger.org>
+
+ * netfilter: xt_NFQUEUE: fix --queue-bypass regression
+
+2013-10-17 Peter Zijlstra <peterz at infradead.org>
+
+ * perf/x86: Fix NMI measurements
+
+2013-10-28 Peter Zijlstra <peterz at infradead.org>
+
+ * perf: Fix perf ring buffer memory ordering
+
+2013-10-07 Mel Gorman <mgorman at suse.de>
+
+ * mm: Account for a THP NUMA hinting update as one PTE update
+
+2013-10-29 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2013-10-28 Wei Liu <wei.liu2 at citrix.com>
+
+ * xen-netback: use jiffies_64 value to calculate credit timeout
+
+2013-10-27 Ben Hutchings <ben at decadent.org.uk>
+
+ * cxgb3: Fix length calculation in write_ofld_wr() on 32-bit architectures
+
+2013-10-27 Anton Vorontsov <anton at enomsg.org>
+
+ * power_supply: Fix documentation for TEMP_*ALERT* properties
+
+2013-10-29 Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+
+ * swiotlb-xen: fix error code returned by xen_swiotlb_map_sg_attrs
+
+2013-10-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'xtensa-next-20131015' of git://github.com/czankel/xtensa-linux
+
+2013-10-28 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
+
+2013-10-28 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'regmap/topic/spmi' into regmap-next
+
+2013-10-28 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'regmap/topic/range' into regmap-next
+
+2013-10-28 Josh Cartwright <joshc at codeaurora.org>
+
+ * regmap: add SPMI support
+
+2013-10-24 Zhouyi Zhou <zhouzhouyi at gmail.com>
+
+ * perf tools: Fixup mmap event consumption
+
+2013-10-26 Jiri Olsa <jolsa at redhat.com>
+
+ * perf top: Split -G and --call-graph
+
+2013-10-26 Jiri Olsa <jolsa at redhat.com>
+
+ * perf record: Split -g and --call-graph
+
+2013-10-25 Jiri Olsa <jolsa at redhat.com>
+
+ * perf hists: Add color overhead for stdio output buffer
+
+2013-10-27 Rob Pearce <rob at flitspace.org.uk>
+
+ * drm/i915: No LVDS hardware on Intel D410PT and D425KT
+
+2013-10-21 Jani Nikula <jani.nikula at intel.com>
+
+ * drm/i915/dp: workaround BIOS eDP bpp clamping issue
+
+2013-09-24 Ville Syrjälä <ville.syrjala at linux.intel.com>
+
+ * drm/i915: Add HSW CRT output readout support
+
+2013-10-28 Takashi Iwai <tiwai at suse.de>
+
+ * ASoC: dapm: Return -ENOMEM in snd_soc_dapm_new_dai_widgets()
+
+2013-10-28 Takashi Iwai <tiwai at suse.de>
+
+ * ASoC: dapm: Fix source list debugfs outputs
+
+2013-10-28 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2013-10-28 Arnaldo Carvalho de Melo <acme at redhat.com>
+
+ * perf tools: Fix up /proc/PID/maps parsing
+
+2013-10-25 Steffen Klassert <steffen.klassert at secunet.com>
+
+ * xfrm: Increase the garbage collector threshold
+
+2013-10-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.12-rc7
+
+2013-10-26 Henrik Austad <haustad at cisco.com>
+
+ * doc: add missing files to timers/00-INDEX
+
+2013-10-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'parisc-3.12' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
+
+2013-10-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-10-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-10-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-10-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending
+
+2013-10-27 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'fixes' of git://git.infradead.org/users/vkoul/slave-dma
+
+2013-10-26 Helge Deller <deller at gmx.de>
+
+ * parisc: Do not crash 64bit SMP kernels on machines with >= 4GB RAM
+
+2013-08-21 Mats Kärrman <Mats.Karrman at tritech.se>
+
+ * UBIFS: correct data corruption range
+
+2013-06-07 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * UBIFS: fix return code
+
+2013-10-26 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pm+acpi-3.12-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2013-10-26 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Fix silent headphone on Thinkpads with AD1984A codec
+
+2013-10-25 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Add missing initial vmaster hook at build_controls callback
+
+2013-10-23 Thierry Reding <thierry.reding at gmail.com>
+
+ * PowerCap: Convert class code to use dev_groups
+
+2013-10-25 Takashi Iwai <tiwai at suse.de>
+
+ * ASoC: dmaengine: Use SNDRV_PCM_STREAM_LAST for array size
+
+2013-10-25 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'for-linus-20131025' of git://git.infradead.org/linux-mtd
+
+2013-10-25 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * vhost/scsi: Fix incorrect usage of get_user_pages_fast write parameter
+
+2013-10-25 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * target/pscsi: fix return value check
+
+2013-10-25 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
+
+2013-10-25 David Woodhouse <David.Woodhouse at intel.com>
+
+ * mtd: gpmi: fix ECC regression
+
+2013-10-25 Gu Zheng <guz.fnst at cn.fujitsu.com>
+
+ * seq_file: always update file->f_pos in seq_lseek()
+
+2013-10-25 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * acpi-cpufreq: Fail initialization if driver cannot be registered
+
+2013-10-25 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2013-10-24 Lan Tianyu <tianyu.lan at intel.com>
+
+ * x86/reboot: Correct pr_info() log message in the set_bios/pci/kbd_reboot()
+
+2013-10-25 Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+
+ * swiotlb-xen: static inline xen_phys_to_bus, xen_bus_to_phys, xen_virt_to_bus and range_straddles_page_boundary
+
+2013-10-25 Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+
+ * grant-table: call set_phys_to_machine after mapping grant refs
+
+2013-10-25 Stefano Stabellini <stefano.stabellini at eu.citrix.com>
+
+ * arm,arm64: do not always merge biovec if we are running on Xen
+
+2013-10-25 James Bottomley <JBottomley at Parallels.com>
+
+ * [SCSI] Revert "sg: use rwsem to solve race during exclusive open"
+
+2013-10-21 Anders F. U. Kiær <ablacksheep at gmail.com>
+
+ * HID: add support for LEETGION Hellion Gaming Mouse
+
+2013-10-25 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'spi/topic/wr' into spi-next
+
+2013-10-25 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'spi/topic/txx9' into spi-next
+
+2013-10-25 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'spi/topic/topcliff' into spi-next
+
+2013-10-25 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'spi/topic/tegra114' into spi-next
+
+2013-10-25 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'spi/topic/tegra-slink' into spi-next
+
+2013-10-25 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'spi/topic/tegra' into spi-next
+
+2013-10-25 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'spi/topic/s3c64xx' into spi-next
+
+2013-10-25 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'spi/topic/s3c24xx' into spi-next
+
+2013-10-21 Forest Bond <forest.bond at rapidrollout.com>
+
+ * HID: hid-multitouch: add support for SiS panels
+
+2013-10-25 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'ecryptfs-3.12-rc7-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tyhicks/ecryptfs
+
+2013-10-24 Russ Dill <Russ.Dill at ti.com>
+
+ * PM / hibernate: Move software_resume to late_initcall_sync
+
+2013-10-19 Ezequiel Garcia <ezequiel.garcia at free-electrons.com>
+
+ * mtd: nand: pxa3xx: Fix registered MTD name
+
+2013-10-24 Colin Ian King <colin.king at canonical.com>
+
+ * eCryptfs: fix 32 bit corruption issue
+
+2013-10-24 Vinod Koul <vinod.koul at intel.com>
+
+ * dmaengine: edma: fix another memory leak
+
+2013-10-24 Valentin Ilie <valentin.ilie at gmail.com>
+
+ * dma: edma: Fix memory leak
+
+2013-10-24 Joseph Schuchart <joseph.schuchart at tu-dresden.de>
+
+ * perf script python: Fix mem leak due to missing Py_DECREFs on dict entries
+
+2013-10-24 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/topic/wm8962' into asoc-next
+
+2013-10-24 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/topic/wm8400' into asoc-next
+
+2013-10-24 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/topic/twl6040' into asoc-next
+
+2013-10-24 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'asoc/topic/twl4030' into asoc-next
+
+2013-10-24 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'regulator/topic/tps65910' into regulator-next
+
+2013-10-24 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'regulator/topic/tps6586x' into regulator-next
+
+2013-10-24 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'regulator/topic/tps65090' into regulator-next
+
+2013-10-24 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'regulator/topic/tps65023' into regulator-next
+
+2013-10-24 Mark Brown <broonie at linaro.org>
+
+ * Merge remote-tracking branch 'regulator/topic/tps6105x' into regulator-next
+
+2013-10-24 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * target: Fail XCOPY for non matching source + destination block_size
+
+2013-10-24 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * target: Generate failure for XCOPY I/O with non-zero scsi_status
+
+2013-10-24 Takashi Iwai <tiwai at suse.de>
+
+ * ALSA: hda - Fix unbalanced runtime PM refcount after S3/S4
+
+2013-10-24 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'md/3.12-fixes' of git://neil.brown.name/md
+
+2013-10-24 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
+
+2013-10-19 Shaohua Li <shli at kernel.org>
+
+ * raid5: avoid finding "discard" stripe
+
+2013-10-22 Daniel Borkmann <dborkman at redhat.com>
+
+ * net: sctp: fix ASCONF to allow non SCTP_ADDR_SRC addresses in ipv6
+
+2013-10-22 Dave Jiang <dave.jiang at intel.com>
+
+ * MAINTAINERS: add to ioatdma maintainer list
+
+2013-10-09 Mike Pagano <mpagano at gentoo.org>
+
+ * show_delta: Update script to support python versions 2.5 through 3.3
+
+2013-08-15 Wolfram Sang <wsa at the-dreams.de>
+
+ * scripts/coccinelle/api: remove devm_request_and_ioremap.cocci
+
+2013-10-23 Kirill Tkhai <tkhai at yandex.ru>
+
+ * scripts/tags.sh: Increase identifier list
+
+2013-09-24 Thomas Gleixner <tglx at linutronix.de>
+
+ * clockevents: Sanitize ticks to nsec conversion
+
+2013-10-23 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
+
+2013-10-23 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-rc' of git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux
+
+2013-09-16 Randy Dunlap <rdunlap at infradead.org>
+
+ * platform/x86: fix asus-wmi build error
+
+2013-10-22 Kent Overstreet <kmo at daterainc.com>
+
+ * bcache: Fixed incorrect order of arguments to bio_alloc_bioset()
+
+2013-10-23 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'v4l_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
+
+2013-10-23 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
+
+2013-10-22 Jason Gerecke <killertofu at gmail.com>
+
+ * Input: wacom - add support for ISDv4 0x10E sensor
+
+2013-10-15 Jason Gerecke <killertofu at gmail.com>
+
+ * Input: wacom - add support for ISDv4 0x10F sensor
+
+2013-08-16 wang.bo116 at zte.com.cn <wang.bo116 at zte.com.cn>
+
+ * UBIFS: remove unnecessary code in ubifs_garbage_collect
+
+2013-10-21 Krzysztof Kozlowski <k.kozlowski at samsung.com>
+
+ * spi/s3c64xx: Fix doubled clock disable on suspend
+
+2013-10-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
+
+2013-10-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.12-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata
+
+2013-10-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.12-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
+
+2013-10-17 Martin Schwidefsky <schwidefsky at de.ibm.com>
+
+ * s390/time: correct use of store clock fast
+
+2013-10-21 Dirk Brandewie <dirk.j.brandewie at intel.com>
+
+ * intel_pstate: Correct calculation of min pstate value
+
+2013-10-21 Brennan Shacklett <brennan at genyes.org>
+
+ * intel_pstate: Improve accuracy by not truncating until final result
+
+2013-10-18 David Herrmann <dh.herrmann at gmail.com>
+
+ * HID: wiimote: add LEGO-wiimote VID
+
+2013-10-21 Zhang Rui <rui.zhang at intel.com>
+
+ * Merge branch 'x86_pkg_temp' of .git into for-rc
+
+2013-10-21 Zhang Rui <rui.zhang at intel.com>
+
+ * Revert "drivers: thermal: parent virtual hwmon with thermal zone"
+
+2013-10-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'parisc-3.12' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
+
+2013-10-20 Al Viro <viro at zeniv.linux.org.uk>
+
+ * nfsd regression since delayed fput()
+
+2013-10-20 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2013-10-19 Mark Brown <broonie at linaro.org>
+
+ * ALSA: Add MAINTAINERS entry for dmaengine helpers
+
+2013-10-19 Takashi Iwai <tiwai at suse.de>
+
+ * Merge tag 'asoc-v3.12-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
+
+2013-10-19 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.12-rc6
+
+2013-10-18 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
+
+2013-10-18 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pm+acpi-3.12-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2013-10-17 Tetsuo Handa <penguin-kernel at I-love.SAKURA.ne.jp>
+
+ * mutex: Avoid gcc version dependent __builtin_constant_p() usage
+
+2013-10-18 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-10-18 Josef Bacik <jbacik at fusionio.com>
+
+ * Btrfs: release path before starting transaction in can_nocow_extent
+
+2013-10-14 Heiko Stuebner <heiko at sntech.de>
+
+ * MAINTAINERS: Add maintainers entry for Rockchip SoCs
+
+2013-09-13 Stephen Warren <swarren at nvidia.com>
+
+ * MAINTAINERS: Tegra updates, and driver ownership
+
+2013-10-13 Nikolai Kondrashov <spbnick at gmail.com>
+
+ * HID: Fix unit exponent parsing again
+
+2013-10-18 Xie XiuQi <xiexiuqi at huawei.com>
+
+ * timekeeping: Fix some trivial typos in comments
+
+2013-10-18 Xie XiuQi <xiexiuqi at huawei.com>
+
+ * mm: Fix some trivial typos in comments
+
+2013-10-18 Xie XiuQi <xiexiuqi at huawei.com>
+
+ * irq: Fix some trivial typos in comments
+
+2013-10-18 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'acpi-fixes'
+
+2013-10-18 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'pm-fixes'
+
+2013-10-17 Jacob Pan <jacob.jun.pan at linux.intel.com>
+
+ * PowerCap: Introduce Intel RAPL power capping driver
+
+2013-10-17 Mark Brown <broonie at linaro.org>
+
+ * ASoC: wm8962: Move register initialisation to I2C probe()
+
+2013-10-17 Mark Brown <broonie at linaro.org>
+
+ * ASoC: wm8962: Move interrupt initalisation to probe()
+
+2013-10-17 Michal Kubecek <mkubecek at suse.cz>
+
+ * xfrm: prevent ipcomp scratch buffer race condition
+
+2013-10-15 Kees Cook <keescook at chromium.org>
+
+ * x86/relocs: Add percpu fixup for GNU ld 2.23
+
+2013-10-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.samba.org/sfrench/cifs-2.6
+
+2013-10-17 Nicolas Ferre <nicolas.ferre at atmel.com>
+
+ * tty/serial: at91: fix uart/usart selection for older products
+
+2013-10-17 Stephane Eranian <eranian at google.com>
+
+ * perf: Disable PERF_RECORD_MMAP2 support
+
+2013-10-14 Arnaldo Carvalho de Melo <acme at redhat.com>
+
+ * perf scripting perl: Fix build error on Fedora 12
+
+2013-10-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'driver-core-3.12-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core
+
+2013-10-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'usb-3.12-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
+
+2013-10-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'tty-3.12-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
+
+2013-10-16 Guenter Roeck <linux at roeck-us.net>
+
+ * usb: usb_phy_gen: refine conditional declaration of usb_nop_xceiv_register
+
+2013-10-17 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPI / PM: Drop two functions that are not used any more
+
+2013-10-05 Mark Brown <broonie at linaro.org>
+
+ * spi/tegra20-slink: Move first transfer preparation to prepare_message
+
+2013-10-05 Mark Brown <broonie at linaro.org>
+
+ * spi/tegra20-slink: Crude refactoring to use core message parsing
+
+2013-10-11 Yasuaki Ishimatsu <isimatu.yasuaki at jp.fujitsu.com>
+
+ * driver core: Release device_hotplug_lock when store_mem_state returns EINVAL
+
+2013-10-11 Srinivas Pandruvada <srinivas.pandruvada at linux.intel.com>
+
+ * bitops: Introduce BIT_ULL
+
+2013-10-11 Jacob Pan <jacob.jun.pan at linux.intel.com>
+
+ * x86 / msr: add 64bit _on_cpu access functions
+
+2013-10-11 Srinivas Pandruvada <srinivas.pandruvada at linux.intel.com>
+
+ * PowerCap: Add to drivers Kconfig and Makefile
+
+2013-10-11 Srinivas Pandruvada <srinivas.pandruvada at linux.intel.com>
+
+ * PowerCap: Add class driver
+
+2013-10-11 Srinivas Pandruvada <srinivas.pandruvada at linux.intel.com>
+
+ * PowerCap: Documentation
+
+2013-10-11 Geyslan G. Bem <geyslan at gmail.com>
+
+ * ecryptfs: Fix memory leakage in keystore.c
+
+2013-10-15 Alexei Starovoitov <ast at plumgrid.com>
+
+ * openvswitch: fix vport-netdev unregister
+
+2013-10-14 Roel Kluin <roel.kluin at gmail.com>
+
+ * serial: vt8500: add missing braces
+
+2013-09-19 Thomas Meyer <thomas at m3y3r.de>
+
+ * xtensa: Cocci spatch "noderef"
+
+2013-10-16 Bart Van Assche <bvanassche at acm.org>
+
+ * dlm: Avoid that dlm_release_lockspace() incorrectly returns -EBUSY
+
+2013-10-15 Bastien Nocera <hadess at hadess.net>
+
+ * Input: wacom - export battery scope
+
+2013-10-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'devicetree-for-linus' of git://git.secretlab.ca/git/linux
+
+2013-10-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'fixes-for-v3.12' of git://git.linaro.org/people/mszyprowski/linux-dma-mapping
+
+2013-10-15 Ulf Hansson <ulf.hansson at linaro.org>
+
+ * PM / Runtime: Respect autosuspend when idle triggers suspend
+
+2013-10-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/virt/kvm/kvm
+
+2013-10-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'stable/for-linus-3.12-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
+
+2013-10-15 Baruch Siach <baruch at tkos.co.il>
+
+ * xtensa: don't use alternate signal stack on threads
+
+2013-10-11 Masami Hiramatsu <masami.hiramatsu.pt at hitachi.com>
+
+ * perf probe: Fix to initialize fname always before use it
+
+2013-10-10 Miklos Szeredi <mszeredi at suse.cz>
+
+ * ext[34]: fix double put in tmpfile
+
+2013-09-15 Eduardo Valentin <eduardo.valentin at ti.com>
+
+ * drivers: thermal: allow ti-soc-thermal run without pcb zone
+
+2013-10-09 Lukasz Majewski <l.majewski at samsung.com>
+
+ * thermal: exynos: Provide initial setting for TMU's test MUX address at Exynos4412
+
+2013-10-14 Lukasz Dorau <lukasz.dorau at intel.com>
+
+ * libahci: fix turning on LEDs in ahci_start_port()
+
+2013-10-14 Jingoo Han <jg1.han at samsung.com>
+
+ * regulator: tps65910: Fix checkpatch issue
+
+2013-10-14 Jingoo Han <jg1.han at samsung.com>
+
+ * regulator: tps65023: Fix checkpatch issue
+
+2013-10-14 Jingoo Han <jg1.han at samsung.com>
+
+ * spi: txx9: Fix checkpatch issue
+
+2013-10-14 Jingoo Han <jg1.han at samsung.com>
+
+ * spi: tegra20-slink: Fix checkpatch issue
+
+2013-10-14 Heiko Carstens <heiko.carstens at de.ibm.com>
+
+ * s390/vmlogrdr: fix array access in vmlogrdr_open()
+
+2013-10-14 Heiko Carstens <heiko.carstens at de.ibm.com>
+
+ * s390/compat,signal: fix return value of copy_siginfo_(to|from)_user32()
+
+2013-10-09 Stefan Haberland <stefan.haberland at de.ibm.com>
+
+ * s390/dasd: check for availability of prefix command during format
+
+2013-10-07 Martin Schwidefsky <schwidefsky at de.ibm.com>
+
+ * s390/mm,kvm: fix software dirty bits vs. kvm for old machines
+
+2013-10-09 Raghavendra K T <raghavendra.kt at linux.vnet.ibm.com>
+
+ * KVM: Enable pvspinlock after jump_label_init() to avoid VM hang
+
+2013-10-11 Marek Szyprowski <m.szyprowski at samsung.com>
+
+ * Revert "drivers: of: add initialization code for dma reserved memory"
+
+2013-10-11 Marek Szyprowski <m.szyprowski at samsung.com>
+
+ * Revert "ARM: init: add support for reserved memory defined by device tree"
+
+2013-10-14 Russ Anderson <rja at sgi.com>
+
+ * x86: Update UV3 hub revision ID
+
+2013-10-14 Jason Cooper <jason at lakedaemon.net>
+
+ * MAINTAINERS: ARM: mvebu: add Sebastian Hesselbarth
+
+2013-10-13 Tim Gardner <tim.gardner at canonical.com>
+
+ * cifs: ntstatus_to_dos_map[] is not terminated
+
+2013-10-14 Grant Likely <grant.likely at linaro.org>
+
+ * Revert "of: Feed entire flattened device tree into the random pool"
+
+2013-09-20 Simon Farnsworth <simon.farnsworth at onelan.co.uk>
+
+ * [media] saa7134: Fix crash when device is closed before streamoff
+
+2013-10-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.12-rc5
+
+2013-10-12 Anjana V Kumar <anjanavk12 at gmail.com>
+
+ * cgroup: fix to break the while loop in cgroup_attach_task() correctly
+
+2013-10-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://www.linux-watchdog.org/linux-watchdog
+
+2013-10-05 Maxime Ripard <maxime.ripard at free-electrons.com>
+
+ * watchdog: sunxi: Fix section mismatch
+
+2013-09-23 Jingoo Han <jg1.han at samsung.com>
+
+ * watchdog: kempld_wdt: Fix bit mask definition
+
+2013-08-23 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * watchdog: ts72xx_wdt: locking bug in ioctl
+
+2013-10-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2013-09-18 Yuvaraj Kumar C D <yuvaraj.cd at gmail.com>
+
+ * ARM: exynos: dts: Update 5250 arch timer node with clock frequency
+
+2013-10-13 Olof Johansson <olof at lixom.net>
+
+ * Merge tag 'fixes-against-v3.12-rc3-take2' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
+
+2013-10-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'parisc-3.12' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
+
+2013-10-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'fixes' of git://git.infradead.org/users/vkoul/slave-dma
+
+2013-10-09 Helge Deller <deller at gmx.de>
+
+ * parisc: let probe_kernel_read() capture access to page zero
+
+2013-10-05 John David Anglin <dave.anglin at bell.net>
+
+ * parisc: optimize variable initialization in do_page_fault
+
+2013-10-13 H. Peter Anvin <hpa at linux.intel.com>
+
+ * x86, boot: Rename get_flags() and check_flags() to *_cpuflags()
+
+2013-10-10 Kees Cook <keescook at chromium.org>
+
+ * x86, kaslr: Raise the maximum virtual address to -1 GiB on x86_64
+
+2013-10-10 Kees Cook <keescook at chromium.org>
+
+ * x86, kaslr: Report kernel offset on panic
+
+2013-10-10 Kees Cook <keescook at chromium.org>
+
+ * x86, kaslr: Select random position from e820 maps
+
+2013-10-10 Kees Cook <keescook at chromium.org>
+
+ * x86, kaslr: Provide randomness functions
+
+2013-10-10 Kees Cook <keescook at chromium.org>
+
+ * x86, kaslr: Return location from decompress_kernel
+
+2013-10-10 Kees Cook <keescook at chromium.org>
+
+ * x86, boot: Move CPU flags out of cpucheck
+
+2013-10-10 Michael Davidson <md at google.com>
+
+ * x86, relocs: Add more per-cpu gold special cases
+
+2013-09-30 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * vfs: allow O_PATH file descriptors for fstatfs()
+
+2013-10-11 Will Deacon <will.deacon at arm.com>
+
+ * net: smc91x: dont't use SMC_outw for fixing up halfword-aligned data
+
+2013-10-11 Salva Peiró <speiro at ai2.upv.es>
+
+ * farsync: fix info leak in ioctl
+
+2013-10-10 Oussama Ghorbel <ou.ghorbel at gmail.com>
+
+ * ipv6: Initialize ip6_tnl.hlen in gre tunnel even if no route is found
+
+2013-10-06 stephen hemminger <stephen at networkplumber.org>
+
+ * netem: free skb's in tree on reset
+
+2013-10-06 stephen hemminger <stephen at networkplumber.org>
+
+ * netem: update backlog after drop
+
+2013-10-10 Eric Dumazet <edumazet at google.com>
+
+ * l2tp: must disable bh before calling l2tp_xmit_skb()
+
+2013-10-10 Simon Horman <horms+renesas at verge.net.au>
+
+ * net: sh_eth: Correct fix for RX packet errors on R8A7740
+
+2013-10-10 Kent Overstreet <kmo at daterainc.com>
+
+ * aio: Fix a trinity splat
+
+2013-10-07 Geyslan G. Bem <geyslan at gmail.com>
+
+ * dma: edma.c: remove edma_desc leakage
+
+2013-09-25 Miao Xie <miaox at cn.fujitsu.com>
+
+ * Btrfs: fix oops caused by the space balance and dead roots
+
+2013-09-25 Miao Xie <miaox at cn.fujitsu.com>
+
+ * Btrfs: insert orphan roots into fs radix tree
+
+2013-10-10 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'for-linus-20131008' of git://git.infradead.org/linux-mtd
+
+2013-10-08 Laxman Dewangan <ldewangan at nvidia.com>
+
+ * regulator: tps65910: get regulators node from parent node only
+
+2013-10-08 Laxman Dewangan <ldewangan at nvidia.com>
+
+ * regulator: tps6586x: get regulators node from parent node only
+
+2013-10-08 Laxman Dewangan <ldewangan at nvidia.com>
+
+ * regulator: tps65090: get regulators node from parent node only
+
+2013-10-02 AceLan Kao <acelan.kao at canonical.com>
+
+ * HID: usbhid: quirk for SiS Touchscreen
+
+2013-09-23 Pali Rohár <pali.rohar at gmail.com>
+
+ * ARM: OMAP2: RX-51: Add missing max_current to rx51_lp5523_led_config
+
+2013-10-08 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-07-23 Jonathan Austin <jonathan.austin at arm.com>
+
+ * clk: fixup argument order when setting VCO parameters
+
+2013-10-08 Ingo Molnar <mingo at kernel.org>
+
+ * Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
+
+2013-10-07 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Revert "serial: i.MX: evaluate linux,stdout-path property"
+
+2013-09-17 Dinh Nguyen <dinguyen at altera.com>
+
+ * clk: socfpga: Fix incorrect sdmmc clock name
+
+2013-10-08 Benjamin Herrenschmidt <benh at kernel.crashing.org>
+
+ * powerpc/irq: Don't switch to irq stack from softirq stack
+
+2009-08-07 Gwendal Grignou <gwendal at google.com>
+
+ * libata: make ata_eh_qc_retry() bump scmd->allowed on bogus failures
+
+2013-10-02 Luosong <android at generaltouch.com>
+
+ * HID: multitouch: Fix GeneralTouch products and add more PIDs
+
+2013-09-27 Sachin Prabhu <sprabhu at redhat.com>
+
+ * cifs: Allow LANMAN auth method for servers supporting unencapsulated authentication methods
+
+2013-10-06 Jan Klos <honza.klos at gmail.com>
+
+ * cifs: Fix inability to write files >2GB to SMB2/3 shares
+
+2013-10-06 Lars-Peter Clausen <lars at metafoo.de>
+
+ * ASoC: twl6040: Use virtual DAPM mixer controls
+
+2013-10-03 Simon Guinot <simon.guinot at sequanux.org>
+
+ * clk: armada-370: fix tclk frequencies
+
+2013-10-06 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.12-rc4
+
+2013-10-05 Eric W. Biederman <ebiederm at xmission.com>
+
+ * net: Update the sysctl permissions handler to test effective uid/gid
+
+2013-10-06 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending
+
+2013-10-06 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'fixes' of git://git.infradead.org/users/vkoul/slave-dma
+
+2013-09-19 Tim Gardner <tim.gardner at canonical.com>
+
+ * Input: cm109 - convert high volume dev_err() to dev_err_ratelimited()
+
+2013-10-06 David Herrmann <dh.herrmann at gmail.com>
+
+ * Input: move name/timer init to input_alloc_dev()
+
+2013-10-05 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
+
+2013-10-05 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'gpio-v3.12-2' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio
+
+2013-10-05 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'usb-3.12-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
+
+2013-10-05 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'tty-3.12-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
+
+2013-10-05 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'staging-3.12-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
+
+2013-09-19 Darrick J. Wong <darrick.wong at oracle.com>
+
+ * btrfs: Fix crash due to not allocating integrity data for a bioset
+
+2013-10-05 Chris Mason <chris.mason at fusionio.com>
+
+ * Merge branch 'for-linus' into for-linus-3.12
+
+2013-10-04 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.samba.org/sfrench/cifs-2.6
+
+2013-10-04 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pci-v3.12-fixes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
+
+2013-10-04 Bjorn Helgaas <bhelgaas at google.com>
+
+ * Revert "x86/PCI: MMCONFIG: Check earlier for MMCONFIG region at address zero"
+
+2013-10-04 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pm+acpi-3.12-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2013-10-04 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'xfs-for-linus-v3.12-rc4' of git://oss.sgi.com/xfs/xfs
+
+2013-10-04 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * selinux: remove 'flags' parameter from avc_audit()
+
+2013-10-04 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * selinux: avc_has_perm_flags has no more users
+
+2013-10-02 Ilya Dryomov <idryomov at gmail.com>
+
+ * Btrfs: fix a use-after-free bug in btrfs_dev_replace_finishing
+
+2013-10-02 Ilya Dryomov <idryomov at gmail.com>
+
+ * Btrfs: eliminate races in worker stopping code
+
+2013-10-01 Liu Bo <bo.li.liu at oracle.com>
+
+ * Btrfs: fix crash of compressed writes
+
+2013-09-30 Josef Bacik <jbacik at fusionio.com>
+
+ * Btrfs: fix transid verify errors when recovering log tree
+
+2013-10-01 Thierry Reding <thierry.reding at gmail.com>
+
+ * xfs: Use kmem_free() instead of free()
+
+2013-09-27 tinguely at sgi.com <tinguely at sgi.com>
+
+ * xfs: fix memory leak in xlog_recover_add_to_trans
+
+2013-09-30 Namhyung Kim <namhyung.kim at lge.com>
+
+ * perf session: Fix infinite loop on invalid perf.data file
+
+2013-09-17 Michael Grzeschik <m.grzeschik at pengutronix.de>
+
+ * dmaengine: imx-dma: fix callback path in tasklet
+
+2013-09-17 Michael Grzeschik <m.grzeschik at pengutronix.de>
+
+ * dmaengine: imx-dma: fix lockdep issue between irqhandler and tasklet
+
+2013-09-17 Michael Grzeschik <m.grzeschik at pengutronix.de>
+
+ * dmaengine: imx-dma: fix slow path issue in prep_dma_cyclic
+
+2013-10-03 Peter Zijlstra <peterz at infradead.org>
+
+ * perf/x86: Clean up cap_user_time* setting
+
+2013-10-01 David Vrabel <david.vrabel at citrix.com>
+
+ * xen/hvc: allow xenboot console to be used again
+
+2013-10-01 David Cohen <david.a.cohen at linux.intel.com>
+
+ * usb: chipidea: add Intel Clovertrail pci id
+
+2013-10-02 Ian Abbott <abbotti at mev.co.uk>
+
+ * staging: comedi: ni_65xx: (bug fix) confine insn_bits to one subdevice
+
+2013-10-03 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * iscsi-target; Allow an extra tag_num / 2 number of percpu_ida tags
+
+2013-10-03 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * iscsi-target: Perform release of acknowledged tags from RX context
+
+2013-10-03 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * iscsi-target: Only perform wait_for_tasks when performing shutdown
+
+2013-10-02 Mike Travis <travis at sgi.com>
+
+ * x86/UV: Add call to KGDB/KDB from NMI handler
+
+2013-10-02 Mike Travis <travis at sgi.com>
+
+ * kdb: Add support for external NMI handler to call KGDB/KDB
+
+2013-09-28 Richard Weinberger <richard at nod.at>
+
+ * UBI: Add some asserts to ubi_attach_fastmap()
+
+2013-09-28 Richard Weinberger <richard at nod.at>
+
+ * UBI: Fix memory leak in ubi_attach_fastmap() error path
+
+2013-09-28 Richard Genoud <richard.genoud at gmail.com>
+
+ * UBI: simplify image sequence test
+
+2013-09-28 Richard Genoud <richard.genoud at gmail.com>
+
+ * UBI: fastmap: fix backward compatibility with image_seq
+
+2013-09-27 Lars-Peter Clausen <lars at metafoo.de>
+
+ * staging:iio:ade7753/ade7754/ade7759: Use spi_w8r16be() instead of spi_w8r16()
+
+2013-09-27 Lars-Peter Clausen <lars at metafoo.de>
+
+ * hwmon: (adt7310) Use spi_w8r16be() instead spi_w8r16()
+
+2013-09-27 Lars-Peter Clausen <lars at metafoo.de>
+
+ * spi: Add a spi_w8r16be() helper
+
+2013-10-01 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * target: Fail on non zero scsi_status in compare_and_write_callback
+
+2013-10-01 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * target: Fix recursive COMPARE_AND_WRITE callback failure
+
+2013-10-01 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * target: Reset data_length for COMPARE_AND_WRITE to NoLB * block_size
+
+2013-09-30 Jack Wang <jinpu.wang at profitbricks.com>
+
+ * ib_srpt: always set response for task management
+
+2013-10-02 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'pm-fixes'
+
+2013-10-02 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'acpi-fixes'
+
+2013-10-02 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'pm-cpufreq-fixes' into pm-fixes
+
+2013-09-27 Andreas Herrmann <andreas.herrmann at calxeda.com>
+
+ * ARM: dma-mapping: Always pass proper prot flags to iommu_map()
+
+2013-09-13 Linus Walleij <linus.walleij at linaro.org>
+
+ * clk: nomadik: set all timers to use 2.4 MHz TIMCLK
+
+2013-09-23 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * target: Fall back to vzalloc upon ->sess_cmd_map kzalloc failure
+
+2013-09-23 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * vhost/scsi: Use GFP_ATOMIC with percpu_ida_alloc for obtaining tag
+
+2013-09-18 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * ib_srpt: Destroy cm_id before destroying QP.
+
+2013-09-18 Nicholas Bellinger <nab at linux-iscsi.org>
+
+ * target: Fix xop->dbl assignment in target_xcopy_parse_segdesc_02
+
+2013-10-01 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Merge tag 'fixes-for-v3.12-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb into usb-linus
+
+2013-10-01 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * ACPI: Use EXPORT_SYMBOL() for acpi_bus_get_device()
+
+2013-10-01 Srinivas Pandruvada <srinivas.pandruvada at linux.intel.com>
+
+ * intel_pstate: fix no_turbo
+
+2013-09-24 Robert Baldyga <r.baldyga at samsung.com>
+
+ * usb: gadget: s3c-hsotg: fix can_write limit for non-periodic endpoints
+
+2013-09-27 Robert Baldyga <r.baldyga at samsung.com>
+
+ * usb: gadget: f_fs: fix error handling
+
+2013-10-01 Sebastian Andrzej Siewior <bigeasy at linutronix.de>
+
+ * usb: musb: dsps: do not bind to "musb-hdrc"
+
+2013-09-25 Javier Martinez Canillas <javier.martinez at collabora.co.uk>
+
+ * gpio/omap: auto-setup a GPIO when used as an IRQ
+
+2013-09-25 Javier Martinez Canillas <javier.martinez at collabora.co.uk>
+
+ * gpio/omap: maintain GPIO and IRQ usage separately
+
+2013-09-30 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Merge tag 'iio-fixes-for-3.12b2' of git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio into staging-linus
+
+2013-09-27 Peter Hurley <peter at hurleysoftware.com>
+
+ * tty: Fix pty master read() after slave closes
+
+2013-09-28 Michal Malý <madcatxster at prifuk.cz>
+
+ * USB: serial: option: Ignore card reader interface on Huawei E1750
+
+2013-09-24 Denis CIOCCA <denis.ciocca at st.com>
+
+ * iio:magnetometer: Bugfix magnetometer default output registers
+
+2013-09-21 Lars-Peter Clausen <lars at metafoo.de>
+
+ * iio: Remove debugfs entries in iio_device_unregister()
+
+2013-09-30 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * PM / hibernate: Fix user space driven resume regression
+
+2013-09-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.12-rc3
+
+2013-09-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'usb-3.12-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
+
+2013-09-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'tty-3.12-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
+
+2013-09-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'staging-3.12-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
+
+2013-09-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'driver-core-3.12-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core
+
+2013-09-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'char-misc-3.12-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
+
+2013-09-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-09-29 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2013-09-29 Ingo Molnar <mingo at kernel.org>
+
+ * Revert "perf symbols: Demangle cloned functions"
+
+2013-09-24 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * iio: amplifiers: ad8366: Remove regulator_put
+
+2013-09-26 Rhyland Klein <rklein at nvidia.com>
+
+ * spi/tegra114: Correct support for cs_change
+
+2013-09-17 Elie De Brauwer <eliedebrauwer at gmail.com>
+
+ * mtd: m25p80: Fix 4 byte addressing mode for Micron devices.
+
+2013-09-16 Brian Norris <computersforpeace at gmail.com>
+
+ * mtd: nand: fix memory leak in ONFI extended parameter page
+
+2013-09-27 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * ASoC: wm8993: drop regulator_bulk_free of devm_ allocated data
+
+2013-09-26 Larry Finger <Larry.Finger at lwfinger.net>
+
+ * staging: r8188eu: Add new device ID
+
+2013-09-26 David Cohen <david.a.cohen at linux.intel.com>
+
+ * usb: dwc3: add support for Merrifield
+
+2013-09-02 Shengzhou Liu <Shengzhou.Liu at freescale.com>
+
+ * USB: fsl/ehci: fix failure of checking PHY_CLK_VALID during reinitialization
+
+2013-09-20 Al Viro <viro at ZenIV.linux.org.uk>
+
+ * USB: Fix breakage in ffs_fs_mount()
+
+2013-09-24 Benson Leung <bleung at chromium.org>
+
+ * driver core : Fix use after free of dev->parent in device_shutdown
+
+2013-09-23 Eric W. Biederman <ebiederm at xmission.com>
+
+ * sysfs: Allow mounting without CONFIG_NET
+
+2013-09-04 K. Y. Srinivasan <kys at microsoft.com>
+
+ * Drivers: hv: vmbus: Terminate vmbus version negotiation on timeout
+
+2013-09-06 K. Y. Srinivasan <kys at microsoft.com>
+
+ * Drivers: hv: util: Correctly support ws2008R2 and earlier
+
+2013-09-26 Paul Moore <pmoore at redhat.com>
+
+ * selinux: correct locking in selinux_netlbl_socket_connect)
+
+2013-09-26 Duan Jiong <duanj.fnst at cn.fujitsu.com>
+
+ * selinux: Use kmemdup instead of kmalloc + memcpy
+
+2013-08-31 Gabor Juhos <juhosg at openwrt.org>
+
+ * tty: ar933x_uart: move devicetree binding documentation
+
+2013-09-16 Ramneek Mehresh <ramneek.mehresh at freescale.com>
+
+ * fsl/usb: Resolve PHY_CLK_VLD instability issue for ULPI phy
+
+2013-09-25 Peter Hurley <peter at hurleysoftware.com>
+
+ * tty: Fix SIGTTOU not sent with tcflush()
+
+2013-09-24 Kurt Garloff <kurt at garloff.de>
+
+ * usb/core/devio.c: Don't reject control message to endpoint with wrong direction bit
+
+2013-09-17 Geert Uytterhoeven <geert at linux-m68k.org>
+
+ * usb: chipidea: USB_CHIPIDEA should depend on HAS_DMA
+
+2013-09-25 Steve French <smfrench at gmail.com>
+
+ * [CIFS] update cifs.ko version
+
+2013-09-25 Steve French <smfrench at gmail.com>
+
+ * [CIFS] Remove ext2 flags that have been moved to fs.h
+
+2013-09-17 Fabio Estevam <fabio.estevam at freescale.com>
+
+ * staging: imx-drm: Fix probe failure
+
+2013-09-23 Malcolm Priestley <tvboxspy at gmail.com>
+
+ * staging: vt6656: [BUG] iwctl_siwencodeext return if device not open
+
+2013-09-22 Malcolm Priestley <tvboxspy at gmail.com>
+
+ * staging: vt6656: [BUG] main_usb.c oops on device_close move flag earlier.
+
+2013-09-17 Bin Liu <b-liu at ti.com>
+
+ * usb: musb: gadget: fix otg active status flag
+
+2013-09-24 Srinivas Pandruvada <srinivas.pandruvada at linux.intel.com>
+
+ * Thermal: x86_pkg_temp: change spin lock
+
+2013-09-24 Paul E. McKenney <paulmck at linux.vnet.ibm.com>
+
+ * mm: Place preemption point in do_mlockall() loop
+
+2013-09-24 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'akpm' (patches from Andrew Morton)
+
+2013-09-24 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * spi: spi-topcliff-pch: fix a pci_iomap() check
+
+2013-09-23 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.12-rc2
+
+2013-09-23 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'staging-3.12-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
+
+2013-09-23 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'usb-3.12-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
+
+2013-09-23 Johan Hovold <jhovold at gmail.com>
+
+ * usb: phy: gpio-vbus: fix deferred probe from __init
+
+2013-09-23 Johan Hovold <jhovold at gmail.com>
+
+ * usb: gadget: pxa25x_udc: fix deferred probe from __init
+
+2013-09-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
+
+2013-09-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-3.12/core' of git://git.kernel.dk/linux-block
+
+2013-09-22 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
+
+2013-09-22 Anatol Pomozov <anatol.pomozov at gmail.com>
+
+ * cfq: explicitly use 64bit divide operation for 64bit arguments
+
+2013-09-21 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Merge tag 'iio-fixes-for-3.12a' of git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio into staging-linus
+
+2013-09-21 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'nfs-for-3.12-3' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
+
+2013-09-21 Jun'ichi Nomura <j-nomura at ce.jp.nec.com>
+
+ * block: Add nr_bios to block_rq_remap tracepoint
+
+2013-09-20 Josef Bacik <jbacik at fusionio.com>
+
+ * Btrfs: create the uuid tree on remount rw
+
+2013-09-21 Jim McDonough <jmcd at samba.org>
+
+ * [CIFS] Provide sane values for nlink
+
+2013-09-17 Mark Fasheh <mfasheh at suse.de>
+
+ * btrfs: change extent-same to copy entire argument struct
+
+2013-09-16 Guangyu Sun <guangyu.sun at oracle.com>
+
+ * Btrfs: dir_inode_operations should use btrfs_update_time also
+
+2013-09-13 Frank Holton <fholton at gmail.com>
+
+ * btrfs: Add btrfs: prefix to kernel log output
+
+2013-09-13 David Sterba <dsterba at suse.cz>
+
+ * btrfs: refuse to remount read-write after abort
+
+2013-09-13 chandan <chandan at linux.vnet.ibm.com>
+
+ * Btrfs: btrfs_ioctl_default_subvol: Revert back to toplevel subvolume when arg is 0
+
+2013-09-11 Filipe David Borba Manana <fdmanana at gmail.com>
+
+ * Btrfs: don't leak transaction in btrfs_sync_file()
+
+2013-09-11 Stefan Behrens <sbehrens at giantdisaster.de>
+
+ * Btrfs: add the missing mutex unlock in write_all_supers()
+
+2013-09-18 Lars-Peter Clausen <lars at metafoo.de>
+
+ * iio:buffer_cb: Add missing iio_buffer_init()
+
+2013-09-18 Lars-Peter Clausen <lars at metafoo.de>
+
+ * iio: Prevent race between IIO chardev opening and IIO device free
+
+2013-09-18 Lars-Peter Clausen <lars at metafoo.de>
+
+ * iio: fix: Keep a reference to the IIO device for open file descriptors
+
+2013-09-18 Lars-Peter Clausen <lars at metafoo.de>
+
+ * iio: Stop sampling when the device is removed
+
+2013-09-18 Peter Meerwald <pmeerw at pmeerw.net>
+
+ * iio: Fix crash when scan_bytes is computed with active_scan_mask == NULL
+
+2013-09-18 Peter Meerwald <pmeerw at pmeerw.net>
+
+ * iio: Fix mcp4725 dev-to-indio_dev conversion in suspend/resume
+
+2013-09-18 Peter Meerwald <pmeerw at pmeerw.net>
+
+ * iio: Fix bma180 dev-to-indio_dev conversion in suspend/resume
+
+2013-09-18 Peter Meerwald <pmeerw at pmeerw.net>
+
+ * iio: Fix tmp006 dev-to-indio_dev conversion in suspend/resume
+
+2013-09-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'pm+acpi-3.12-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+
+2013-09-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
+
+2013-09-20 David Howells <dhowells at redhat.com>
+
+ * CacheFiles: Don't try to dump the index key if the cookie has been cleared
+
+2013-09-20 Josh Boyer <jwboyer at redhat.com>
+
+ * CacheFiles: Fix memory leak in cachefiles_check_auxdata error paths
+
+2013-09-19 Will Deacon <will.deacon at arm.com>
+
+ * lockref: use cmpxchg64 explicitly for lockless updates
+
+2013-09-20 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'pm-cpufreq'
+
+2013-09-20 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * Merge branch 'acpi-pci'
+
+2013-09-20 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'arm64-stable' of git://git.kernel.org/pub/scm/linux/kernel/git/cmarinas/linux-aarch64
+
+2013-09-18 Steve Capper <Steve.Capper at arm.com>
+
+ * arm64: Widen hwcap to be 64 bit
+
+2013-09-19 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
+
+2013-09-20 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'msm-fixes-3.12' of git://people.freedesktop.org/~robclark/linux into drm-fixes
+
+2013-09-20 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'exynos-drm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/daeinki/drm-exynos into drm-fixes
+
+2013-09-20 Dave Airlie <airlied at redhat.com>
+
+ * Merge tag 'drm-intel-fixes-2013-09-19' of git://people.freedesktop.org/~danvet/drm-intel into drm-fixes
+
+2013-09-18 Yinghai Lu <yinghai at kernel.org>
+
+ * cpufreq: return EEXIST instead of EBUSY for second registering
+
+2013-09-20 Dave Airlie <airlied at redhat.com>
+
+ * Revert "drm: mark context support as a legacy subsystem"
+
+2013-09-14 Rafael J. Wysocki <rafael.j.wysocki at intel.com>
+
+ * PCI / ACPI / PM: Clear pme_poll for devices in D3cold on wakeup
+
+2013-09-13 Daniel Vetter <daniel.vetter at ffwll.ch>
+
+ * drm/fb-helper: don't sleep for screen unblank when an oops is in progress
+
+2013-09-10 Sudeep KarkadaNagesha <sudeep.karkadanagesha at arm.com>
+
+ * ARM: shmobile: change dev_id to cpu0 while registering cpu clock
+
+2013-09-10 Sudeep KarkadaNagesha <sudeep.karkadanagesha at arm.com>
+
+ * ARM: i.MX: change dev_id to cpu0 while registering cpu clock
+
+2013-09-10 Sudeep KarkadaNagesha <sudeep.karkadanagesha at arm.com>
+
+ * cpufreq: imx6q-cpufreq: assign cpu_dev correctly to cpu0 device
+
+2013-09-10 Sudeep KarkadaNagesha <sudeep.karkadanagesha at arm.com>
+
+ * cpufreq: cpufreq-cpu0: assign cpu_dev correctly to cpu0 device
+
+2013-09-13 Prarit Bhargava <prarit at redhat.com>
+
+ * drm, ttm Fix uninitialized warning
+
+2013-09-17 Ben Skeggs <bskeggs at redhat.com>
+
+ * drm/ttm: fix the tt_populated check in ttm_tt_destroy()
+
+2013-09-19 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-nouveau-next' of git://anongit.freedesktop.org/git/nouveau/linux-2.6 into drm-fixes
+
+2013-09-16 Andrey Moiseev <o2g.org.ru at gmail.com>
+
+ * Input: i8042 - i8042_flush fix for a full 8042 buffer
+
+2013-09-18 Lukasz Czerwinski <l.czerwinski at samsung.com>
+
+ * iio: iio_device_add_event_sysfs() bugfix
+
+2013-09-11 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * staging: iio: ade7854-spi: Fix return value
+
+2013-09-03 Peter Meerwald <pmeerw at pmeerw.net>
+
+ * staging:iio:hmc5843: Fix measurement conversion
+
+2013-09-18 Paul Moore <pmoore at redhat.com>
+
+ * selinux: add Paul Moore as a SELinux maintainer
+
+2013-09-18 Paul Moore <pmoore at redhat.com>
+
+ * Merge git://git.infradead.org/users/eparis/selinux
+
+2013-09-16 Jeff Layton <jlayton at redhat.com>
+
+ * cifs: stop trying to use virtual circuits
+
+2013-09-18 J. Bruce Fields <bfields at redhat.com>
+
+ * RPCSEC_GSS: fix crash on destroying gss auth
+
+2013-09-04 David Howells <dhowells at redhat.com>
+
+ * CIFS: FS-Cache: Uncache unread pages in cifs_readpages() before freeing them
+
+2013-09-18 Mike Dunn <mikedunn at newsguy.com>
+
+ * Input: pxa27x_keypad - fix NULL pointer dereference
+
+2013-09-18 Mike Christie <michaelc at cs.wisc.edu>
+
+ * If the queue is dying then we only call the rq->end_io callout. This leaves bios setup on the request, because the caller assumes when the blk_execute_rq_nowait/blk_execute_rq call has completed that the rq->bios have been cleaned up.
+
+2013-09-17 Ville Syrjälä <ville.syrjala at linux.intel.com>
+
+ * drm/i915: Don't enable the cursor on a disable pipe
+
+2013-09-17 Jani Nikula <jani.nikula at intel.com>
+
+ * drm/i915: do not update cursor in crtc mode set
+
+2013-09-17 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
+
+2013-09-15 Oleg Nesterov <oleg at redhat.com>
+
+ * tty: disassociate_ctty() sends the extra SIGCONT
+
+2013-09-17 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Merge tag 'fixes-for-v3.12-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb into usb-linus
+
+2013-09-17 Michael S. Tsirkin <mst at redhat.com>
+
+ * vhost-scsi: whitespace tweak
+
+2013-09-17 Michael S. Tsirkin <mst at redhat.com>
+
+ * vhost/scsi: use vmalloc for order-10 allocation
+
+2013-05-29 Bjorn Helgaas <bhelgaas at google.com>
+
+ * bio-integrity: Fix use of bs->bio_integrity_pool after free
+
+2013-09-17 Peter Hurley <peter at hurleysoftware.com>
+
+ * n_tty: Fix EOF push index when termios changes
+
+2013-09-10 Johan Hovold <jhovold at gmail.com>
+
+ * serial: pch_uart: remove unnecessary tty_port_tty_get
+
+2013-09-10 Johan Hovold <jhovold at gmail.com>
+
+ * serial: pch_uart: fix tty-kref leak in dma-rx path
+
+2013-09-14 Frank Schäfer <fschaefer.oss at googlemail.com>
+
+ * USB: pl2303: distinguish between original and cloned HX chips
+
+2013-09-03 Dave Jones <davej at redhat.com>
+
+ * USB: Faraday fotg210: fix email addresses
+
+2013-09-03 Dave Jones <davej at redhat.com>
+
+ * USB: fix typo in usb serial simple driver Kconfig
+
+2013-09-12 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Revert "USB: EHCI: support running URB giveback in tasklet context"
+
+2013-09-12 Chanho Park <chanho61.park at samsung.com>
+
+ * usb: s3c-hsotg: do not disconnect gadget when receiving ErlySusp intr
+
+2013-09-12 Marek Szyprowski <m.szyprowski at samsung.com>
+
+ * usb: s3c-hsotg: fix unregistration function
+
+2013-09-16 Peter Oh <poh at broadcom.com>
+
+ * usb: gadget: f_mass_storage: reset endpoint driver data when disabled
+
+2013-09-16 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * usb: host: fsl-mph-dr-of: Staticize local symbols
+
+2013-09-16 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * usb: gadget: f_eem: Staticize eem_alloc
+
+2013-09-16 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * usb: gadget: f_ecm: Staticize ecm_alloc
+
+2013-09-16 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * usb: phy: omap-usb3: Fix return value
+
+2013-09-11 David Cohen <david.a.cohen at linux.intel.com>
+
+ * usb: dwc3: gadget: avoid memory leak when failing to allocate all eps
+
+2013-09-10 Heikki Krogerus <heikki.krogerus at linux.intel.com>
+
+ * usb: dwc3: remove extcon dependency
+
+2013-09-02 Chen Gang <gang.chen at asianux.com>
+
+ * usb: gadget: add '__ref' for rndis_config_register() and cdc_config_register()
+
+2013-09-13 Dan Carpenter <dan.carpenter at oracle.com>
+
+ * staging: line6: add bounds check in snd_toneport_source_put()
+
+2013-09-01 Ben Hutchings <ben at decadent.org.uk>
+
+ * Staging: comedi: Fix dependencies for drivers misclassified as PCI
+
+2013-09-04 Larry Finger <Larry.Finger at lwfinger.net>
+
+ * staging: r8188eu: Adjust RX gain
+
+2013-08-31 Larry Finger <Larry.Finger at lwfinger.net>
+
+ * staging: r8188eu: Fix smatch warning in core/rtw_ieee80211.
+
+2013-08-31 Larry Finger <Larry.Finger at lwfinger.net>
+
+ * staging: r8188eu: Fix smatch error in core/rtw_mlme_ext.c
+
+2013-08-31 Larry Finger <Larry.Finger at lwfinger.net>
+
+ * staging: r8188eu: Fix Smatch off-by-one warning in hal/rtl8188e_hal_init.c
+
+2013-09-08 Guenter Roeck <linux at roeck-us.net>
+
+ * staging: Disable lustre file system for MIPS, SH, and XTENSA
+
+2013-09-12 Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+
+ * Revert "staging: zram: Add auto loading of module if user opens /dev/zram."
+
+2013-09-05 Aaro Koskinen <aaro.koskinen at iki.fi>
+
+ * staging: octeon-ethernet: rgmii: enable interrupts that we can handle
+
+2013-09-05 Aaro Koskinen <aaro.koskinen at iki.fi>
+
+ * staging: octeon-ethernet: remove skb alloc failure warnings
+
+2013-09-05 Aaro Koskinen <aaro.koskinen at iki.fi>
+
+ * staging: octeon-ethernet: make dropped packets to consume NAPI budget
+
+2013-09-17 Ben Skeggs <bskeggs at redhat.com>
+
+ * drm/nouveau/ttm: prevent double-free in nouveau_sgdma_create_ttm() failure path
+
+2013-09-17 Ben Skeggs <bskeggs at redhat.com>
+
+ * drm/nouveau/bios/init: fix thinko in INIT_CONFIGURE_MEM
+
+2013-06-07 Qin Chuanyu <qinchuanyu at huawei.com>
+
+ * vhost: wake up worker outside spin_lock
+
+2013-09-04 Josh Boyer <jwboyer at redhat.com>
+
+ * dma/Kconfig: Make TI_EDMA select TI_PRIV_EDMA
+
+2013-09-04 Josh Boyer <jwboyer at redhat.com>
+
+ * edma: Update author email address
+
+2013-08-31 Mark Brown <broonie at linaro.org>
+
+ * ASoC: wm8400: Use regmap for I/O
+
+2013-08-31 Mark Brown <broonie at linaro.org>
+
+ * ASoC: wm8400: Use supplies to manage input power
+
+2013-08-30 Axel Lin <axel.lin at ingics.com>
+
+ * spi: tegra: Use DIV_ROUND_UP instead of open coded
+
+2013-09-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.12-rc1
+
+2013-09-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'timers/core' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
+
+2013-09-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for-next' of git://git.samba.org/sfrench/cifs-2.6
+
+2013-09-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'upstream-3.12-rc1' of git://git.infradead.org/linux-ubi
+
+2013-09-16 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'upstream-3.12-rc1' of git://git.infradead.org/linux-ubifs
+
+2013-09-11 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * drm/exynos: fix return value check in lowlevel_buffer_allocate()
+
+2013-09-05 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * drm/exynos: Fix address space warnings in exynos_drm_fbdev.c
+
+2013-09-05 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * drm/exynos: Fix address space warning in exynos_drm_buf.c
+
+2013-09-05 Sachin Kamat <sachin.kamat at linaro.org>
+
+ * drm/exynos: Remove redundant OF dependency
+
+2013-09-14 Rob Clark <robdclark at gmail.com>
+
+ * drm/msm: drop unnecessary set_need_resched()
+
+2013-09-16 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-fixes-3.12' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
+
+2013-09-15 Christian König <christian.koenig at amd.com>
+
+ * drm/radeon: avoid UVD corruptions on AGP cards
+
+2013-09-12 Daniel Vetter <daniel.vetter at ffwll.ch>
+
+ * drm/udl: rip out set_need_resched
+
+2013-09-16 Dave Airlie <airlied at redhat.com>
+
+ * Merge branch 'drm-fixes-3.12' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
+
+2013-09-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'upstream' of git://git.linux-mips.org/pub/scm/ralf/upstream-linus
+
+2013-09-15 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge branch 'for_linus' of git://cavan.codon.org.uk/platform-drivers-x86
+
+2013-09-10 Björn Jacke <bj at sernet.de>
+
+ * cifs: update cifs.txt and remove some outdated infos
+
+2013-09-13 Sachin Prabhu <sprabhu at redhat.com>
+
+ * cifs: Avoid calling unlock_page() twice in cifs_readpage() when using fscache
+
+2013-09-13 Sachin Prabhu <sprabhu at redhat.com>
+
+ * cifs: Do not take a reference to the page in cifs_readpage_worker()
+
+2013-09-13 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging
+
+2013-09-11 Benjamin Tissoires <benjamin.tissoires at redhat.com>
+
+ * HID: lenovo-tpkbd: fix leak if tpkbd_probe_tp fails
+
+2013-09-11 Markos Chandras <markos.chandras at imgtec.com>
+
+ * MIPS: kernel: vpe: Make vpe_attrs an array of pointers.
+
+2013-09-12 Daniel Vetter <daniel.vetter at ffwll.ch>
+
+ * drm/i915: kill set_need_resched
+
+2013-09-11 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * drm/msm: fix potential NULL pointer dereference
+
+2013-09-12 Dave Airlie <airlied at redhat.com>
+
+ * drm/ast: fix the ast open key function
+
+2013-09-11 Daniel Vetter <daniel.vetter at ffwll.ch>
+
+ * drm/i915/dvo: set crtc timings again for panel fixed modes
+
+2013-09-11 Daniel Vetter <daniel.vetter at ffwll.ch>
+
+ * drm/i915/sdvo: Robustify the dtd<->drm_mode conversions
+
+2013-09-11 Rob Clark <robdclark at gmail.com>
+
+ * drm/msm: workaround for missing irq
+
+2013-09-11 Rob Clark <robdclark at gmail.com>
+
+ * drm/msm: return -EBUSY if bo still active
+
+2013-09-11 Wei Yongjun <yongjun_wei at trendmicro.com.cn>
+
+ * drm/msm: fix return value check in ERR_PTR()
+
+2013-08-28 Tejun Heo <tj at kernel.org>
+
+ * blkcg: relocate root_blkg setting and clearing
+
+2013-08-29 Joe Perches <joe at perches.com>
+
+ * block: Convert kmalloc_node(...GFP_ZERO...) to kzalloc_node(...)
+
+2013-09-11 Jianpeng Ma <majianpeng at gmail.com>
+
+ * block: trace all devices plug operation
+
+2013-09-09 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon/dpm: add bapm callback for kb/kv
+
+2013-09-09 Alex Deucher <alexander.deucher at amd.com>
+
+ * drm/radeon/dpm: add bapm callback for trinity
+
+2013-08-16 Andi Kleen <ak at linux.intel.com>
+
+ * x86: Add 1/2/4/8 byte optimization to 64bit __copy_{from,to}_user_inatomic
+
+2013-09-06 Rob Clark <robdclark at gmail.com>
+
+ * drm/msm: fix cmdstream size check
+
+2013-09-03 Rob Clark <robdclark at gmail.com>
+
+ * drm/msm: hangcheck harder
+
+2013-09-01 Rob Clark <robdclark at gmail.com>
+
+ * drm/msm: handle read vs write fences
+
+2013-09-10 Daniel Vetter <daniel.vetter at ffwll.ch>
+
+ * drm/i915/sdvo: Fully translate sync flags in the dtd->mode conversion
+
+2013-09-10 Takashi Iwai <tiwai at suse.de>
+
+ * drm/i915: Use proper print format for debug prints
+
+2013-09-09 Dave Airlie <airlied at gmail.com>
+
+ * drm/nouveau: fix oops on runtime suspend/resume
+
+2013-09-07 David Herrmann <dh.herrmann at gmail.com>
+
+ * Input: evdev - add EVIOCREVOKE ioctl
+
+2013-09-06 Dmitry Torokhov <dmitry.torokhov at gmail.com>
+
+ * Merge branch 'next' into for-linus
+
+2013-09-05 Dave Airlie <airlied at gmail.com>
+
+ * Merge branch 'exynos-drm-next' of git://git.kernel.org/pub/scm/linux/kernel/git/daeinki/drm-exynos into drm-next
+
+2013-09-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.11
+
+2013-08-20 Ezequiel Garcia <ezequiel.garcia at free-electrons.com>
+
+ * clocksource: armada-370-xp: Add detailed clock requirements in devicetree binding
+
+2013-08-20 Ezequiel Garcia <ezequiel.garcia at free-electrons.com>
+
+ * clocksource: armada-370-xp: Get reference fixed-clock by name
+
+2013-08-20 Ezequiel Garcia <ezequiel.garcia at free-electrons.com>
+
+ * clocksource: armada-370-xp: Replace WARN_ON with BUG_ON
+
+2013-08-13 Ezequiel Garcia <ezequiel.garcia at free-electrons.com>
+
+ * clocksource: armada-370-xp: Fix device-tree binding
+
+2013-08-13 Ezequiel Garcia <ezequiel.garcia at free-electrons.com>
+
+ * clocksource: armada-370-xp: Introduce new compatibles
+
+2013-09-02 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
+
+2013-08-29 David Herrmann <dh.herrmann at gmail.com>
+
+ * Input: add SYN_MAX and SYN_CNT constants
+
+2013-08-29 Fabio Estevam <fabio.estevam at freescale.com>
+
+ * Input: max11801_ts - convert to devm
+
+2013-08-29 Fabio Estevam <fabio.estevam at freescale.com>
+
+ * Input: egalax-ts - fix typo and improve text
+
+2013-08-29 Mischa Jonker <Mischa.Jonker at synopsys.com>
+
+ * Input: i8042 - disable the driver on ARC platforms
+
+2013-08-28 Eric Paris <eparis at redhat.com>
+
+ * Revert "SELinux: do not handle seclabel as a special flag"
+
+2013-04-16 Anand Avati <avati at redhat.com>
+
+ * selinux: consider filesystem subtype in policies
+
+2013-08-26 Mag <magissia at magissia.com>
+
+ * Input: xpad - add signature for Razer Onza Classic Edition
+
+2013-08-15 Matteo Delfino <kendatsuba at gmail.com>
+
+ * Input: elantech - fix packet check for v3 and v4 hardware
+
+2013-08-19 Richard Weinberger <richard at nod.at>
+
+ * UBI: Fix invalidate_fastmap()
+
+2013-08-19 Richard Weinberger <richard at nod.at>
+
+ * UBI: Fix PEB leak in wear_leveling_worker()
+
+2013-08-18 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.11-rc6
+
+2013-08-14 Mats Kärrman <Mats.Karrman at tritech.se>
+
+ * UBIFS: remove invalid warn msg with tst_recovery enabled
+
+2013-08-11 Linus Torvalds <torvalds at linux-foundation.org>
+
+ * Linux 3.11-rc5
diff --git a/ceph/Kconfig b/ceph/Kconfig
new file mode 100644
index 0000000..264e9bf
--- /dev/null
+++ b/ceph/Kconfig
@@ -0,0 +1,40 @@
+config CEPH_FS
+ tristate "Ceph distributed file system"
+ depends on INET
+ select CEPH_LIB
+ select LIBCRC32C
+ select CRYPTO_AES
+ select CRYPTO
+ default n
+ help
+ Choose Y or M here to include support for mounting the
+ experimental Ceph distributed file system. Ceph is an extremely
+ scalable file system designed to provide high performance,
+ reliable access to petabytes of storage.
+
+ More information at http://ceph.newdream.net/.
+
+ If unsure, say N.
+
+if CEPH_FS
+config CEPH_FSCACHE
+ bool "Enable Ceph client caching support"
+ depends on CEPH_FS=m && FSCACHE || CEPH_FS=y && FSCACHE=y
+ help
+ Choose Y here to enable persistent, read-only local
+ caching support for Ceph clients using FS-Cache
+
+endif
+
+config CEPH_FS_POSIX_ACL
+ bool "Ceph POSIX Access Control Lists"
+ depends on CEPH_FS
+ select FS_POSIX_ACL
+ help
+ POSIX Access Control Lists (ACLs) support permissions for users and
+ groups beyond the owner/group/world scheme.
+
+ To learn more about Access Control Lists, visit the POSIX ACLs for
+ Linux website <http://acl.bestbits.at/>.
+
+ If you don't know what Access Control Lists are, say N
diff --git a/ceph/Makefile b/ceph/Makefile
new file mode 100644
index 0000000..85a4230
--- /dev/null
+++ b/ceph/Makefile
@@ -0,0 +1,13 @@
+#
+# Makefile for CEPH filesystem.
+#
+
+obj-$(CONFIG_CEPH_FS) += ceph.o
+
+ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
+ export.o caps.o snap.o xattr.o \
+ mds_client.o mdsmap.o strings.o ceph_frag.o \
+ debugfs.o
+
+ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
+ceph-$(CONFIG_CEPH_FS_POSIX_ACL) += acl.o
diff --git a/ceph/acl.c b/ceph/acl.c
new file mode 100644
index 0000000..21887d6
--- /dev/null
+++ b/ceph/acl.c
@@ -0,0 +1,200 @@
+/*
+ * linux/fs/ceph/acl.c
+ *
+ * Copyright (C) 2013 Guangliang Zhao, <lucienchao at gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/ceph/ceph_debug.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/posix_acl.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include "super.h"
+
+static inline void ceph_set_cached_acl(struct inode *inode,
+ int type, struct posix_acl *acl)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ spin_lock(&ci->i_ceph_lock);
+ if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0))
+ set_cached_acl(inode, type, acl);
+ spin_unlock(&ci->i_ceph_lock);
+}
+
+static inline struct posix_acl *ceph_get_cached_acl(struct inode *inode,
+ int type)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct posix_acl *acl = ACL_NOT_CACHED;
+
+ spin_lock(&ci->i_ceph_lock);
+ if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0))
+ acl = get_cached_acl(inode, type);
+ spin_unlock(&ci->i_ceph_lock);
+
+ return acl;
+}
+
+struct posix_acl *ceph_get_acl(struct inode *inode, int type)
+{
+ int size;
+ const char *name;
+ char *value = NULL;
+ struct posix_acl *acl;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ name = POSIX_ACL_XATTR_ACCESS;
+ break;
+ case ACL_TYPE_DEFAULT:
+ name = POSIX_ACL_XATTR_DEFAULT;
+ break;
+ default:
+ BUG();
+ }
+
+ size = __ceph_getxattr(inode, name, "", 0);
+ if (size > 0) {
+ value = kzalloc(size, GFP_NOFS);
+ if (!value)
+ return ERR_PTR(-ENOMEM);
+ size = __ceph_getxattr(inode, name, value, size);
+ }
+
+ if (size > 0)
+ acl = posix_acl_from_xattr(&init_user_ns, value, size);
+ else if (size == -ERANGE || size == -ENODATA || size == 0)
+ acl = NULL;
+ else
+ acl = ERR_PTR(-EIO);
+
+ kfree(value);
+
+ if (!IS_ERR(acl))
+ ceph_set_cached_acl(inode, type, acl);
+
+ return acl;
+}
+
+int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+ int ret = 0, size = 0;
+ const char *name = NULL;
+ char *value = NULL;
+ struct iattr newattrs;
+ umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;
+ struct dentry *dentry;
+
+ if (acl) {
+ ret = posix_acl_valid(acl);
+ if (ret < 0)
+ goto out;
+ }
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ name = POSIX_ACL_XATTR_ACCESS;
+ if (acl) {
+ ret = posix_acl_equiv_mode(acl, &new_mode);
+ if (ret < 0)
+ goto out;
+ if (ret == 0)
+ acl = NULL;
+ }
+ break;
+ case ACL_TYPE_DEFAULT:
+ if (!S_ISDIR(inode->i_mode)) {
+ ret = acl ? -EINVAL : 0;
+ goto out;
+ }
+ name = POSIX_ACL_XATTR_DEFAULT;
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (acl) {
+ size = posix_acl_xattr_size(acl->a_count);
+ value = kmalloc(size, GFP_NOFS);
+ if (!value) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
+ if (ret < 0)
+ goto out_free;
+ }
+
+ dentry = d_find_alias(inode);
+ if (new_mode != old_mode) {
+ newattrs.ia_mode = new_mode;
+ newattrs.ia_valid = ATTR_MODE;
+ ret = ceph_setattr(dentry, &newattrs);
+ if (ret)
+ goto out_dput;
+ }
+
+ ret = __ceph_setxattr(dentry, name, value, size, 0);
+ if (ret) {
+ if (new_mode != old_mode) {
+ newattrs.ia_mode = old_mode;
+ newattrs.ia_valid = ATTR_MODE;
+ ceph_setattr(dentry, &newattrs);
+ }
+ goto out_dput;
+ }
+
+ ceph_set_cached_acl(inode, type, acl);
+
+out_dput:
+ dput(dentry);
+out_free:
+ kfree(value);
+out:
+ return ret;
+}
+
+int ceph_init_acl(struct dentry *dentry, struct inode *inode, struct inode *dir)
+{
+ struct posix_acl *default_acl, *acl;
+ int error;
+
+ error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
+ if (error)
+ return error;
+
+ if (!default_acl && !acl)
+ cache_no_acl(inode);
+
+ if (default_acl) {
+ error = ceph_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
+ posix_acl_release(default_acl);
+ }
+ if (acl) {
+ if (!error)
+ error = ceph_set_acl(inode, acl, ACL_TYPE_ACCESS);
+ posix_acl_release(acl);
+ }
+ return error;
+}
diff --git a/ceph/addr.c b/ceph/addr.c
new file mode 100644
index 0000000..b53278c
--- /dev/null
+++ b/ceph/addr.c
@@ -0,0 +1,1345 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/backing-dev.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/writeback.h> /* generic_writepages */
+#include <linux/slab.h>
+#include <linux/pagevec.h>
+#include <linux/task_io_accounting_ops.h>
+
+#include "super.h"
+#include "mds_client.h"
+#include "cache.h"
+#include <linux/ceph/osd_client.h>
+
+/*
+ * Ceph address space ops.
+ *
+ * There are a few funny things going on here.
+ *
+ * The page->private field is used to reference a struct
+ * ceph_snap_context for _every_ dirty page. This indicates which
+ * snapshot the page was logically dirtied in, and thus which snap
+ * context needs to be associated with the osd write during writeback.
+ *
+ * Similarly, struct ceph_inode_info maintains a set of counters to
+ * count dirty pages on the inode. In the absence of snapshots,
+ * i_wrbuffer_ref == i_wrbuffer_ref_head == the dirty page count.
+ *
+ * When a snapshot is taken (that is, when the client receives
+ * notification that a snapshot was taken), each inode with caps and
+ * with dirty pages (dirty pages implies there is a cap) gets a new
+ * ceph_cap_snap in the i_cap_snaps list (which is sorted in ascending
+ * order, new snaps go to the tail). The i_wrbuffer_ref_head count is
+ * moved to capsnap->dirty. (Unless a sync write is currently in
+ * progress. In that case, the capsnap is said to be "pending", new
+ * writes cannot start, and the capsnap isn't "finalized" until the
+ * write completes (or fails) and a final size/mtime for the inode for
+ * that snap can be settled upon.) i_wrbuffer_ref_head is reset to 0.
+ *
+ * On writeback, we must submit writes to the osd IN SNAP ORDER. So,
+ * we look for the first capsnap in i_cap_snaps and write out pages in
+ * that snap context _only_. Then we move on to the next capsnap,
+ * eventually reaching the "live" or "head" context (i.e., pages that
+ * are not yet snapped) and are writing the most recently dirtied
+ * pages.
+ *
+ * Invalidate and so forth must take care to ensure the dirty page
+ * accounting is preserved.
+ */
+
+#define CONGESTION_ON_THRESH(congestion_kb) (congestion_kb >> (PAGE_SHIFT-10))
+#define CONGESTION_OFF_THRESH(congestion_kb) \
+ (CONGESTION_ON_THRESH(congestion_kb) - \
+ (CONGESTION_ON_THRESH(congestion_kb) >> 2))
+
+static inline struct ceph_snap_context *page_snap_context(struct page *page)
+{
+ if (PagePrivate(page))
+ return (void *)page->private;
+ return NULL;
+}
+
+/*
+ * Dirty a page. Optimistically adjust accounting, on the assumption
+ * that we won't race with invalidate. If we do, readjust.
+ */
+static int ceph_set_page_dirty(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode;
+ struct ceph_inode_info *ci;
+ struct ceph_snap_context *snapc;
+ int ret;
+
+ if (unlikely(!mapping))
+ return !TestSetPageDirty(page);
+
+ if (PageDirty(page)) {
+ dout("%p set_page_dirty %p idx %lu -- already dirty\n",
+ mapping->host, page, page->index);
+ BUG_ON(!PagePrivate(page));
+ return 0;
+ }
+
+ inode = mapping->host;
+ ci = ceph_inode(inode);
+
+ /*
+ * Note that we're grabbing a snapc ref here without holding
+ * any locks!
+ */
+ snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context);
+
+ /* dirty the head */
+ spin_lock(&ci->i_ceph_lock);
+ if (ci->i_head_snapc == NULL)
+ ci->i_head_snapc = ceph_get_snap_context(snapc);
+ ++ci->i_wrbuffer_ref_head;
+ if (ci->i_wrbuffer_ref == 0)
+ ihold(inode);
+ ++ci->i_wrbuffer_ref;
+ dout("%p set_page_dirty %p idx %lu head %d/%d -> %d/%d "
+ "snapc %p seq %lld (%d snaps)\n",
+ mapping->host, page, page->index,
+ ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1,
+ ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
+ snapc, snapc->seq, snapc->num_snaps);
+ spin_unlock(&ci->i_ceph_lock);
+
+ /*
+ * Reference snap context in page->private. Also set
+ * PagePrivate so that we get invalidatepage callback.
+ */
+ BUG_ON(PagePrivate(page));
+ page->private = (unsigned long)snapc;
+ SetPagePrivate(page);
+
+ ret = __set_page_dirty_nobuffers(page);
+ WARN_ON(!PageLocked(page));
+ WARN_ON(!page->mapping);
+
+ return ret;
+}
+
+/*
+ * If we are truncating the full page (i.e. offset == 0), adjust the
+ * dirty page counters appropriately. Only called if there is private
+ * data on the page.
+ */
+static void ceph_invalidatepage(struct page *page, unsigned int offset,
+ unsigned int length)
+{
+ struct inode *inode;
+ struct ceph_inode_info *ci;
+ struct ceph_snap_context *snapc = page_snap_context(page);
+
+ inode = page->mapping->host;
+ ci = ceph_inode(inode);
+
+ if (offset != 0 || length != PAGE_CACHE_SIZE) {
+ dout("%p invalidatepage %p idx %lu partial dirty page %u~%u\n",
+ inode, page, page->index, offset, length);
+ return;
+ }
+
+ ceph_invalidate_fscache_page(inode, page);
+
+ if (!PagePrivate(page))
+ return;
+
+ /*
+ * We can get non-dirty pages here due to races between
+ * set_page_dirty and truncate_complete_page; just spit out a
+ * warning, in case we end up with accounting problems later.
+ */
+ if (!PageDirty(page))
+ pr_err("%p invalidatepage %p page not dirty\n", inode, page);
+
+ ClearPageChecked(page);
+
+ dout("%p invalidatepage %p idx %lu full dirty page\n",
+ inode, page, page->index);
+
+ ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
+ ceph_put_snap_context(snapc);
+ page->private = 0;
+ ClearPagePrivate(page);
+}
+
+static int ceph_releasepage(struct page *page, gfp_t g)
+{
+ struct inode *inode = page->mapping ? page->mapping->host : NULL;
+ dout("%p releasepage %p idx %lu\n", inode, page, page->index);
+ WARN_ON(PageDirty(page));
+
+ /* Can we release the page from the cache? */
+ if (!ceph_release_fscache_page(page, g))
+ return 0;
+
+ return !PagePrivate(page);
+}
+
+/*
+ * read a single page, without unlocking it.
+ */
+static int readpage_nounlock(struct file *filp, struct page *page)
+{
+ struct inode *inode = file_inode(filp);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_osd_client *osdc =
+ &ceph_inode_to_client(inode)->client->osdc;
+ int err = 0;
+ u64 len = PAGE_CACHE_SIZE;
+
+ err = ceph_readpage_from_fscache(inode, page);
+
+ if (err == 0)
+ goto out;
+
+ dout("readpage inode %p file %p page %p index %lu\n",
+ inode, filp, page, page->index);
+ err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
+ (u64) page_offset(page), &len,
+ ci->i_truncate_seq, ci->i_truncate_size,
+ &page, 1, 0);
+ if (err == -ENOENT)
+ err = 0;
+ if (err < 0) {
+ SetPageError(page);
+ ceph_fscache_readpage_cancel(inode, page);
+ goto out;
+ } else {
+ if (err < PAGE_CACHE_SIZE) {
+ /* zero fill remainder of page */
+ zero_user_segment(page, err, PAGE_CACHE_SIZE);
+ } else {
+ flush_dcache_page(page);
+ }
+ }
+ SetPageUptodate(page);
+
+ if (err >= 0)
+ ceph_readpage_to_fscache(inode, page);
+
+out:
+ return err < 0 ? err : 0;
+}
+
+static int ceph_readpage(struct file *filp, struct page *page)
+{
+ int r = readpage_nounlock(filp, page);
+ unlock_page(page);
+ return r;
+}
+
+/*
+ * Finish an async read(ahead) op.
+ */
+static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
+{
+ struct inode *inode = req->r_inode;
+ struct ceph_osd_data *osd_data;
+ int rc = req->r_result;
+ int bytes = le32_to_cpu(msg->hdr.data_len);
+ int num_pages;
+ int i;
+
+ dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
+
+ /* unlock all pages, zeroing any data we didn't read */
+ osd_data = osd_req_op_extent_osd_data(req, 0);
+ BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
+ num_pages = calc_pages_for((u64)osd_data->alignment,
+ (u64)osd_data->length);
+ for (i = 0; i < num_pages; i++) {
+ struct page *page = osd_data->pages[i];
+
+ if (rc < 0)
+ goto unlock;
+ if (bytes < (int)PAGE_CACHE_SIZE) {
+ /* zero (remainder of) page */
+ int s = bytes < 0 ? 0 : bytes;
+ zero_user_segment(page, s, PAGE_CACHE_SIZE);
+ }
+ dout("finish_read %p uptodate %p idx %lu\n", inode, page,
+ page->index);
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ ceph_readpage_to_fscache(inode, page);
+unlock:
+ unlock_page(page);
+ page_cache_release(page);
+ bytes -= PAGE_CACHE_SIZE;
+ }
+ kfree(osd_data->pages);
+}
+
+static void ceph_unlock_page_vector(struct page **pages, int num_pages)
+{
+ int i;
+
+ for (i = 0; i < num_pages; i++)
+ unlock_page(pages[i]);
+}
+
+/*
+ * start an async read(ahead) operation. return nr_pages we submitted
+ * a read for on success, or negative error code.
+ */
+static int start_read(struct inode *inode, struct list_head *page_list, int max)
+{
+ struct ceph_osd_client *osdc =
+ &ceph_inode_to_client(inode)->client->osdc;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct page *page = list_entry(page_list->prev, struct page, lru);
+ struct ceph_vino vino;
+ struct ceph_osd_request *req;
+ u64 off;
+ u64 len;
+ int i;
+ struct page **pages;
+ pgoff_t next_index;
+ int nr_pages = 0;
+ int ret;
+
+ off = (u64) page_offset(page);
+
+ /* count pages */
+ next_index = page->index;
+ list_for_each_entry_reverse(page, page_list, lru) {
+ if (page->index != next_index)
+ break;
+ nr_pages++;
+ next_index++;
+ if (max && nr_pages == max)
+ break;
+ }
+ len = nr_pages << PAGE_CACHE_SHIFT;
+ dout("start_read %p nr_pages %d is %lld~%lld\n", inode, nr_pages,
+ off, len);
+ vino = ceph_vino(inode);
+ req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len,
+ 1, CEPH_OSD_OP_READ,
+ CEPH_OSD_FLAG_READ, NULL,
+ ci->i_truncate_seq, ci->i_truncate_size,
+ false);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ /* build page vector */
+ nr_pages = calc_pages_for(0, len);
+ pages = kmalloc(sizeof(*pages) * nr_pages, GFP_NOFS);
+ ret = -ENOMEM;
+ if (!pages)
+ goto out;
+ for (i = 0; i < nr_pages; ++i) {
+ page = list_entry(page_list->prev, struct page, lru);
+ BUG_ON(PageLocked(page));
+ list_del(&page->lru);
+
+ dout("start_read %p adding %p idx %lu\n", inode, page,
+ page->index);
+ if (add_to_page_cache_lru(page, &inode->i_data, page->index,
+ GFP_NOFS)) {
+ ceph_fscache_uncache_page(inode, page);
+ page_cache_release(page);
+ dout("start_read %p add_to_page_cache failed %p\n",
+ inode, page);
+ nr_pages = i;
+ goto out_pages;
+ }
+ pages[i] = page;
+ }
+ osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, false);
+ req->r_callback = finish_read;
+ req->r_inode = inode;
+
+ ceph_osdc_build_request(req, off, NULL, vino.snap, NULL);
+
+ dout("start_read %p starting %p %lld~%lld\n", inode, req, off, len);
+ ret = ceph_osdc_start_request(osdc, req, false);
+ if (ret < 0)
+ goto out_pages;
+ ceph_osdc_put_request(req);
+ return nr_pages;
+
+out_pages:
+ ceph_unlock_page_vector(pages, nr_pages);
+ ceph_release_page_vector(pages, nr_pages);
+out:
+ ceph_osdc_put_request(req);
+ return ret;
+}
+
+
+/*
+ * Read multiple pages. Leave pages we don't read + unlock in page_list;
+ * the caller (VM) cleans them up.
+ */
+static int ceph_readpages(struct file *file, struct address_space *mapping,
+ struct list_head *page_list, unsigned nr_pages)
+{
+ struct inode *inode = file_inode(file);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ int rc = 0;
+ int max = 0;
+
+ rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
+ &nr_pages);
+
+ if (rc == 0)
+ goto out;
+
+ if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
+ max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
+ >> PAGE_SHIFT;
+
+ dout("readpages %p file %p nr_pages %d max %d\n", inode,
+ file, nr_pages,
+ max);
+ while (!list_empty(page_list)) {
+ rc = start_read(inode, page_list, max);
+ if (rc < 0)
+ goto out;
+ BUG_ON(rc == 0);
+ }
+out:
+ ceph_fscache_readpages_cancel(inode, page_list);
+
+ dout("readpages %p file %p ret %d\n", inode, file, rc);
+ return rc;
+}
+
+/*
+ * Get ref for the oldest snapc for an inode with dirty data... that is, the
+ * only snap context we are allowed to write back.
+ */
+static struct ceph_snap_context *get_oldest_context(struct inode *inode,
+ u64 *snap_size)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_snap_context *snapc = NULL;
+ struct ceph_cap_snap *capsnap = NULL;
+
+ spin_lock(&ci->i_ceph_lock);
+ list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
+ dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
+ capsnap->context, capsnap->dirty_pages);
+ if (capsnap->dirty_pages) {
+ snapc = ceph_get_snap_context(capsnap->context);
+ if (snap_size)
+ *snap_size = capsnap->size;
+ break;
+ }
+ }
+ if (!snapc && ci->i_wrbuffer_ref_head) {
+ snapc = ceph_get_snap_context(ci->i_head_snapc);
+ dout(" head snapc %p has %d dirty pages\n",
+ snapc, ci->i_wrbuffer_ref_head);
+ }
+ spin_unlock(&ci->i_ceph_lock);
+ return snapc;
+}
+
+/*
+ * Write a single page, but leave the page locked.
+ *
+ * If we get a write error, set the page error bit, but still adjust the
+ * dirty page accounting (i.e., page is no longer dirty).
+ */
+static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
+{
+ struct inode *inode;
+ struct ceph_inode_info *ci;
+ struct ceph_fs_client *fsc;
+ struct ceph_osd_client *osdc;
+ struct ceph_snap_context *snapc, *oldest;
+ loff_t page_off = page_offset(page);
+ long writeback_stat;
+ u64 truncate_size, snap_size = 0;
+ u32 truncate_seq;
+ int err = 0, len = PAGE_CACHE_SIZE;
+
+ dout("writepage %p idx %lu\n", page, page->index);
+
+ if (!page->mapping || !page->mapping->host) {
+ dout("writepage %p - no mapping\n", page);
+ return -EFAULT;
+ }
+ inode = page->mapping->host;
+ ci = ceph_inode(inode);
+ fsc = ceph_inode_to_client(inode);
+ osdc = &fsc->client->osdc;
+
+ /* verify this is a writeable snap context */
+ snapc = page_snap_context(page);
+ if (snapc == NULL) {
+ dout("writepage %p page %p not dirty?\n", inode, page);
+ goto out;
+ }
+ oldest = get_oldest_context(inode, &snap_size);
+ if (snapc->seq > oldest->seq) {
+ dout("writepage %p page %p snapc %p not writeable - noop\n",
+ inode, page, snapc);
+ /* we should only noop if called by kswapd */
+ WARN_ON((current->flags & PF_MEMALLOC) == 0);
+ ceph_put_snap_context(oldest);
+ goto out;
+ }
+ ceph_put_snap_context(oldest);
+
+ spin_lock(&ci->i_ceph_lock);
+ truncate_seq = ci->i_truncate_seq;
+ truncate_size = ci->i_truncate_size;
+ if (!snap_size)
+ snap_size = i_size_read(inode);
+ spin_unlock(&ci->i_ceph_lock);
+
+ /* is this a partial page at end of file? */
+ if (page_off >= snap_size) {
+ dout("%p page eof %llu\n", page, snap_size);
+ goto out;
+ }
+ if (snap_size < page_off + len)
+ len = snap_size - page_off;
+
+ dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
+ inode, page, page->index, page_off, len, snapc);
+
+ writeback_stat = atomic_long_inc_return(&fsc->writeback_count);
+ if (writeback_stat >
+ CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
+ set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
+
+ ceph_readpage_to_fscache(inode, page);
+
+ set_page_writeback(page);
+ err = ceph_osdc_writepages(osdc, ceph_vino(inode),
+ &ci->i_layout, snapc,
+ page_off, len,
+ truncate_seq, truncate_size,
+ &inode->i_mtime, &page, 1);
+ if (err < 0) {
+ dout("writepage setting page/mapping error %d %p\n", err, page);
+ SetPageError(page);
+ mapping_set_error(&inode->i_data, err);
+ if (wbc)
+ wbc->pages_skipped++;
+ } else {
+ dout("writepage cleaned page %p\n", page);
+ err = 0; /* vfs expects us to return 0 */
+ }
+ page->private = 0;
+ ClearPagePrivate(page);
+ end_page_writeback(page);
+ ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
+ ceph_put_snap_context(snapc); /* page's reference */
+out:
+ return err;
+}
+
+static int ceph_writepage(struct page *page, struct writeback_control *wbc)
+{
+ int err;
+ struct inode *inode = page->mapping->host;
+ BUG_ON(!inode);
+ ihold(inode);
+ err = writepage_nounlock(page, wbc);
+ unlock_page(page);
+ iput(inode);
+ return err;
+}
+
+
+/*
+ * lame release_pages helper. release_pages() isn't exported to
+ * modules.
+ */
+static void ceph_release_pages(struct page **pages, int num)
+{
+ struct pagevec pvec;
+ int i;
+
+ pagevec_init(&pvec, 0);
+ for (i = 0; i < num; i++) {
+ if (pagevec_add(&pvec, pages[i]) == 0)
+ pagevec_release(&pvec);
+ }
+ pagevec_release(&pvec);
+}
+
+/*
+ * async writeback completion handler.
+ *
+ * If we get an error, set the mapping error bit, but not the individual
+ * page error bits.
+ */
+static void writepages_finish(struct ceph_osd_request *req,
+ struct ceph_msg *msg)
+{
+ struct inode *inode = req->r_inode;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_osd_data *osd_data;
+ unsigned wrote;
+ struct page *page;
+ int num_pages;
+ int i;
+ struct ceph_snap_context *snapc = req->r_snapc;
+ struct address_space *mapping = inode->i_mapping;
+ int rc = req->r_result;
+ u64 bytes = req->r_ops[0].extent.length;
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ long writeback_stat;
+ unsigned issued = ceph_caps_issued(ci);
+
+ osd_data = osd_req_op_extent_osd_data(req, 0);
+ BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
+ num_pages = calc_pages_for((u64)osd_data->alignment,
+ (u64)osd_data->length);
+ if (rc >= 0) {
+ /*
+ * Assume we wrote the pages we originally sent. The
+ * osd might reply with fewer pages if our writeback
+ * raced with a truncation and was adjusted at the osd,
+ * so don't believe the reply.
+ */
+ wrote = num_pages;
+ } else {
+ wrote = 0;
+ mapping_set_error(mapping, rc);
+ }
+ dout("writepages_finish %p rc %d bytes %llu wrote %d (pages)\n",
+ inode, rc, bytes, wrote);
+
+ /* clean all pages */
+ for (i = 0; i < num_pages; i++) {
+ page = osd_data->pages[i];
+ BUG_ON(!page);
+ WARN_ON(!PageUptodate(page));
+
+ writeback_stat =
+ atomic_long_dec_return(&fsc->writeback_count);
+ if (writeback_stat <
+ CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb))
+ clear_bdi_congested(&fsc->backing_dev_info,
+ BLK_RW_ASYNC);
+
+ ceph_put_snap_context(page_snap_context(page));
+ page->private = 0;
+ ClearPagePrivate(page);
+ dout("unlocking %d %p\n", i, page);
+ end_page_writeback(page);
+
+ /*
+ * We lost the cache cap, need to truncate the page before
+ * it is unlocked, otherwise we'd truncate it later in the
+ * page truncation thread, possibly losing some data that
+ * raced its way in
+ */
+ if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0)
+ generic_error_remove_page(inode->i_mapping, page);
+
+ unlock_page(page);
+ }
+ dout("%p wrote+cleaned %d pages\n", inode, wrote);
+ ceph_put_wrbuffer_cap_refs(ci, num_pages, snapc);
+
+ ceph_release_pages(osd_data->pages, num_pages);
+ if (osd_data->pages_from_pool)
+ mempool_free(osd_data->pages,
+ ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool);
+ else
+ kfree(osd_data->pages);
+ ceph_osdc_put_request(req);
+}
+
+/*
+ * initiate async writeback
+ */
+static int ceph_writepages_start(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct inode *inode = mapping->host;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_vino vino = ceph_vino(inode);
+ pgoff_t index, start, end;
+ int range_whole = 0;
+ int should_loop = 1;
+ pgoff_t max_pages = 0, max_pages_ever = 0;
+ struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
+ struct pagevec pvec;
+ int done = 0;
+ int rc = 0;
+ unsigned wsize = 1 << inode->i_blkbits;
+ struct ceph_osd_request *req = NULL;
+ int do_sync;
+ u64 truncate_size, snap_size;
+ u32 truncate_seq;
+
+ /*
+ * Include a 'sync' in the OSD request if this is a data
+ * integrity write (e.g., O_SYNC write or fsync()), or if our
+ * cap is being revoked.
+ */
+ if ((wbc->sync_mode == WB_SYNC_ALL) ||
+ ceph_caps_revoking(ci, CEPH_CAP_FILE_BUFFER))
+ do_sync = 1;
+ dout("writepages_start %p dosync=%d (mode=%s)\n",
+ inode, do_sync,
+ wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
+ (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
+
+ if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) {
+ pr_warning("writepage_start %p on forced umount\n", inode);
+ return -EIO; /* we're in a forced umount, don't write! */
+ }
+ if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize)
+ wsize = fsc->mount_options->wsize;
+ if (wsize < PAGE_CACHE_SIZE)
+ wsize = PAGE_CACHE_SIZE;
+ max_pages_ever = wsize >> PAGE_CACHE_SHIFT;
+
+ pagevec_init(&pvec, 0);
+
+ /* where to start/end? */
+ if (wbc->range_cyclic) {
+ start = mapping->writeback_index; /* Start from prev offset */
+ end = -1;
+ dout(" cyclic, start at %lu\n", start);
+ } else {
+ start = wbc->range_start >> PAGE_CACHE_SHIFT;
+ end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+ range_whole = 1;
+ should_loop = 0;
+ dout(" not cyclic, %lu to %lu\n", start, end);
+ }
+ index = start;
+
+retry:
+ /* find oldest snap context with dirty data */
+ ceph_put_snap_context(snapc);
+ snap_size = 0;
+ snapc = get_oldest_context(inode, &snap_size);
+ if (!snapc) {
+ /* hmm, why does writepages get called when there
+ is no dirty data? */
+ dout(" no snap context with dirty data?\n");
+ goto out;
+ }
+ if (snap_size == 0)
+ snap_size = i_size_read(inode);
+ dout(" oldest snapc is %p seq %lld (%d snaps)\n",
+ snapc, snapc->seq, snapc->num_snaps);
+
+ spin_lock(&ci->i_ceph_lock);
+ truncate_seq = ci->i_truncate_seq;
+ truncate_size = ci->i_truncate_size;
+ if (!snap_size)
+ snap_size = i_size_read(inode);
+ spin_unlock(&ci->i_ceph_lock);
+
+ if (last_snapc && snapc != last_snapc) {
+ /* if we switched to a newer snapc, restart our scan at the
+ * start of the original file range. */
+ dout(" snapc differs from last pass, restarting at %lu\n",
+ index);
+ index = start;
+ }
+ last_snapc = snapc;
+
+ while (!done && index <= end) {
+ int num_ops = do_sync ? 2 : 1;
+ unsigned i;
+ int first;
+ pgoff_t next;
+ int pvec_pages, locked_pages;
+ struct page **pages = NULL;
+ mempool_t *pool = NULL; /* Becomes non-null if mempool used */
+ struct page *page;
+ int want;
+ u64 offset, len;
+ long writeback_stat;
+
+ next = 0;
+ locked_pages = 0;
+ max_pages = max_pages_ever;
+
+get_more_pages:
+ first = -1;
+ want = min(end - index,
+ min((pgoff_t)PAGEVEC_SIZE,
+ max_pages - (pgoff_t)locked_pages) - 1)
+ + 1;
+ pvec_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+ PAGECACHE_TAG_DIRTY,
+ want);
+ dout("pagevec_lookup_tag got %d\n", pvec_pages);
+ if (!pvec_pages && !locked_pages)
+ break;
+ for (i = 0; i < pvec_pages && locked_pages < max_pages; i++) {
+ page = pvec.pages[i];
+ dout("? %p idx %lu\n", page, page->index);
+ if (locked_pages == 0)
+ lock_page(page); /* first page */
+ else if (!trylock_page(page))
+ break;
+
+ /* only dirty pages, or our accounting breaks */
+ if (unlikely(!PageDirty(page)) ||
+ unlikely(page->mapping != mapping)) {
+ dout("!dirty or !mapping %p\n", page);
+ unlock_page(page);
+ break;
+ }
+ if (!wbc->range_cyclic && page->index > end) {
+ dout("end of range %p\n", page);
+ done = 1;
+ unlock_page(page);
+ break;
+ }
+ if (next && (page->index != next)) {
+ dout("not consecutive %p\n", page);
+ unlock_page(page);
+ break;
+ }
+ if (wbc->sync_mode != WB_SYNC_NONE) {
+ dout("waiting on writeback %p\n", page);
+ wait_on_page_writeback(page);
+ }
+ if (page_offset(page) >= snap_size) {
+ dout("%p page eof %llu\n", page, snap_size);
+ done = 1;
+ unlock_page(page);
+ break;
+ }
+ if (PageWriteback(page)) {
+ dout("%p under writeback\n", page);
+ unlock_page(page);
+ break;
+ }
+
+ /* only if matching snap context */
+ pgsnapc = page_snap_context(page);
+ if (pgsnapc->seq > snapc->seq) {
+ dout("page snapc %p %lld > oldest %p %lld\n",
+ pgsnapc, pgsnapc->seq, snapc, snapc->seq);
+ unlock_page(page);
+ if (!locked_pages)
+ continue; /* keep looking for snap */
+ break;
+ }
+
+ if (!clear_page_dirty_for_io(page)) {
+ dout("%p !clear_page_dirty_for_io\n", page);
+ unlock_page(page);
+ break;
+ }
+
+ /*
+ * We have something to write. If this is
+ * the first locked page this time through,
+ * allocate an osd request and a page array
+ * that it will use.
+ */
+ if (locked_pages == 0) {
+ BUG_ON(pages);
+ /* prepare async write request */
+ offset = (u64)page_offset(page);
+ len = wsize;
+ req = ceph_osdc_new_request(&fsc->client->osdc,
+ &ci->i_layout, vino,
+ offset, &len, num_ops,
+ CEPH_OSD_OP_WRITE,
+ CEPH_OSD_FLAG_WRITE |
+ CEPH_OSD_FLAG_ONDISK,
+ snapc, truncate_seq,
+ truncate_size, true);
+ if (IS_ERR(req)) {
+ rc = PTR_ERR(req);
+ unlock_page(page);
+ break;
+ }
+
+ req->r_callback = writepages_finish;
+ req->r_inode = inode;
+
+ max_pages = calc_pages_for(0, (u64)len);
+ pages = kmalloc(max_pages * sizeof (*pages),
+ GFP_NOFS);
+ if (!pages) {
+ pool = fsc->wb_pagevec_pool;
+ pages = mempool_alloc(pool, GFP_NOFS);
+ BUG_ON(!pages);
+ }
+ }
+
+ /* note position of first page in pvec */
+ if (first < 0)
+ first = i;
+ dout("%p will write page %p idx %lu\n",
+ inode, page, page->index);
+
+ writeback_stat =
+ atomic_long_inc_return(&fsc->writeback_count);
+ if (writeback_stat > CONGESTION_ON_THRESH(
+ fsc->mount_options->congestion_kb)) {
+ set_bdi_congested(&fsc->backing_dev_info,
+ BLK_RW_ASYNC);
+ }
+
+ set_page_writeback(page);
+ pages[locked_pages] = page;
+ locked_pages++;
+ next = page->index + 1;
+ }
+
+ /* did we get anything? */
+ if (!locked_pages)
+ goto release_pvec_pages;
+ if (i) {
+ int j;
+ BUG_ON(!locked_pages || first < 0);
+
+ if (pvec_pages && i == pvec_pages &&
+ locked_pages < max_pages) {
+ dout("reached end pvec, trying for more\n");
+ pagevec_reinit(&pvec);
+ goto get_more_pages;
+ }
+
+ /* shift unused pages over in the pvec... we
+ * will need to release them below. */
+ for (j = i; j < pvec_pages; j++) {
+ dout(" pvec leftover page %p\n",
+ pvec.pages[j]);
+ pvec.pages[j-i+first] = pvec.pages[j];
+ }
+ pvec.nr -= i-first;
+ }
+
+ /* Format the osd request message and submit the write */
+
+ offset = page_offset(pages[0]);
+ len = min(snap_size - offset,
+ (u64)locked_pages << PAGE_CACHE_SHIFT);
+ dout("writepages got %d pages at %llu~%llu\n",
+ locked_pages, offset, len);
+
+ osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
+ !!pool, false);
+
+ pages = NULL; /* request message now owns the pages array */
+ pool = NULL;
+
+ /* Update the write op length in case we changed it */
+
+ osd_req_op_extent_update(req, 0, len);
+
+ vino = ceph_vino(inode);
+ ceph_osdc_build_request(req, offset, snapc, vino.snap,
+ &inode->i_mtime);
+
+ rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
+ BUG_ON(rc);
+ req = NULL;
+
+ /* continue? */
+ index = next;
+ wbc->nr_to_write -= locked_pages;
+ if (wbc->nr_to_write <= 0)
+ done = 1;
+
+release_pvec_pages:
+ dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr,
+ pvec.nr ? pvec.pages[0] : NULL);
+ pagevec_release(&pvec);
+
+ if (locked_pages && !done)
+ goto retry;
+ }
+
+ if (should_loop && !done) {
+ /* more to do; loop back to beginning of file */
+ dout("writepages looping back to beginning of file\n");
+ should_loop = 0;
+ index = 0;
+ goto retry;
+ }
+
+ if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+ mapping->writeback_index = index;
+
+out:
+ if (req)
+ ceph_osdc_put_request(req);
+ ceph_put_snap_context(snapc);
+ dout("writepages done, rc = %d\n", rc);
+ return rc;
+}
+
+
+
+/*
+ * See if a given @snapc is either writeable, or already written.
+ */
+static int context_is_writeable_or_written(struct inode *inode,
+ struct ceph_snap_context *snapc)
+{
+ struct ceph_snap_context *oldest = get_oldest_context(inode, NULL);
+ int ret = !oldest || snapc->seq <= oldest->seq;
+
+ ceph_put_snap_context(oldest);
+ return ret;
+}
+
+/*
+ * We are only allowed to write into/dirty the page if the page is
+ * clean, or already dirty within the same snap context.
+ *
+ * called with page locked.
+ * return success with page locked,
+ * or any failure (incl -EAGAIN) with page unlocked.
+ */
+static int ceph_update_writeable_page(struct file *file,
+ loff_t pos, unsigned len,
+ struct page *page)
+{
+ struct inode *inode = file_inode(file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+ loff_t page_off = pos & PAGE_CACHE_MASK;
+ int pos_in_page = pos & ~PAGE_CACHE_MASK;
+ int end_in_page = pos_in_page + len;
+ loff_t i_size;
+ int r;
+ struct ceph_snap_context *snapc, *oldest;
+
+retry_locked:
+ /* writepages currently holds page lock, but if we change that later, */
+ wait_on_page_writeback(page);
+
+ /* check snap context */
+ BUG_ON(!ci->i_snap_realm);
+ down_read(&mdsc->snap_rwsem);
+ BUG_ON(!ci->i_snap_realm->cached_context);
+ snapc = page_snap_context(page);
+ if (snapc && snapc != ci->i_head_snapc) {
+ /*
+ * this page is already dirty in another (older) snap
+ * context! is it writeable now?
+ */
+ oldest = get_oldest_context(inode, NULL);
+ up_read(&mdsc->snap_rwsem);
+
+ if (snapc->seq > oldest->seq) {
+ ceph_put_snap_context(oldest);
+ dout(" page %p snapc %p not current or oldest\n",
+ page, snapc);
+ /*
+ * queue for writeback, and wait for snapc to
+ * be writeable or written
+ */
+ snapc = ceph_get_snap_context(snapc);
+ unlock_page(page);
+ ceph_queue_writeback(inode);
+ r = wait_event_interruptible(ci->i_cap_wq,
+ context_is_writeable_or_written(inode, snapc));
+ ceph_put_snap_context(snapc);
+ if (r == -ERESTARTSYS)
+ return r;
+ return -EAGAIN;
+ }
+ ceph_put_snap_context(oldest);
+
+ /* yay, writeable, do it now (without dropping page lock) */
+ dout(" page %p snapc %p not current, but oldest\n",
+ page, snapc);
+ if (!clear_page_dirty_for_io(page))
+ goto retry_locked;
+ r = writepage_nounlock(page, NULL);
+ if (r < 0)
+ goto fail_nosnap;
+ goto retry_locked;
+ }
+
+ if (PageUptodate(page)) {
+ dout(" page %p already uptodate\n", page);
+ return 0;
+ }
+
+ /* full page? */
+ if (pos_in_page == 0 && len == PAGE_CACHE_SIZE)
+ return 0;
+
+ /* past end of file? */
+ i_size = inode->i_size; /* caller holds i_mutex */
+
+ if (i_size + len > inode->i_sb->s_maxbytes) {
+ /* file is too big */
+ r = -EINVAL;
+ goto fail;
+ }
+
+ if (page_off >= i_size ||
+ (pos_in_page == 0 && (pos+len) >= i_size &&
+ end_in_page - pos_in_page != PAGE_CACHE_SIZE)) {
+ dout(" zeroing %p 0 - %d and %d - %d\n",
+ page, pos_in_page, end_in_page, (int)PAGE_CACHE_SIZE);
+ zero_user_segments(page,
+ 0, pos_in_page,
+ end_in_page, PAGE_CACHE_SIZE);
+ return 0;
+ }
+
+ /* we need to read it. */
+ up_read(&mdsc->snap_rwsem);
+ r = readpage_nounlock(file, page);
+ if (r < 0)
+ goto fail_nosnap;
+ goto retry_locked;
+
+fail:
+ up_read(&mdsc->snap_rwsem);
+fail_nosnap:
+ unlock_page(page);
+ return r;
+}
+
+/*
+ * We are only allowed to write into/dirty the page if the page is
+ * clean, or already dirty within the same snap context.
+ */
+static int ceph_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata)
+{
+ struct inode *inode = file_inode(file);
+ struct page *page;
+ pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ int r;
+
+ do {
+ /* get a page */
+ page = grab_cache_page_write_begin(mapping, index, 0);
+ if (!page)
+ return -ENOMEM;
+ *pagep = page;
+
+ dout("write_begin file %p inode %p page %p %d~%d\n", file,
+ inode, page, (int)pos, (int)len);
+
+ r = ceph_update_writeable_page(file, pos, len, page);
+ } while (r == -EAGAIN);
+
+ return r;
+}
+
+/*
+ * we don't do anything in here that simple_write_end doesn't do
+ * except adjust dirty page accounting and drop read lock on
+ * mdsc->snap_rwsem.
+ */
+static int ceph_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
+{
+ struct inode *inode = file_inode(file);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+ int check_cap = 0;
+
+ dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
+ inode, page, (int)pos, (int)copied, (int)len);
+
+ /* zero the stale part of the page if we did a short copy */
+ if (copied < len)
+ zero_user_segment(page, from+copied, len);
+
+ /* did file size increase? */
+ /* (no need for i_size_read(); we caller holds i_mutex */
+ if (pos+copied > inode->i_size)
+ check_cap = ceph_inode_set_size(inode, pos+copied);
+
+ if (!PageUptodate(page))
+ SetPageUptodate(page);
+
+ set_page_dirty(page);
+
+ unlock_page(page);
+ up_read(&mdsc->snap_rwsem);
+ page_cache_release(page);
+
+ if (check_cap)
+ ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL);
+
+ return copied;
+}
+
+/*
+ * we set .direct_IO to indicate direct io is supported, but since we
+ * intercept O_DIRECT reads and writes early, this function should
+ * never get called.
+ */
+static ssize_t ceph_direct_io(int rw, struct kiocb *iocb,
+ const struct iovec *iov,
+ loff_t pos, unsigned long nr_segs)
+{
+ WARN_ON(1);
+ return -EINVAL;
+}
+
+const struct address_space_operations ceph_aops = {
+ .readpage = ceph_readpage,
+ .readpages = ceph_readpages,
+ .writepage = ceph_writepage,
+ .writepages = ceph_writepages_start,
+ .write_begin = ceph_write_begin,
+ .write_end = ceph_write_end,
+ .set_page_dirty = ceph_set_page_dirty,
+ .invalidatepage = ceph_invalidatepage,
+ .releasepage = ceph_releasepage,
+ .direct_IO = ceph_direct_io,
+};
+
+
+/*
+ * vm ops
+ */
+static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct inode *inode = file_inode(vma->vm_file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_file_info *fi = vma->vm_file->private_data;
+ loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT;
+ int want, got, ret;
+
+ dout("filemap_fault %p %llx.%llx %llu~%zd trying to get caps\n",
+ inode, ceph_vinop(inode), off, (size_t)PAGE_CACHE_SIZE);
+ if (fi->fmode & CEPH_FILE_MODE_LAZY)
+ want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
+ else
+ want = CEPH_CAP_FILE_CACHE;
+ while (1) {
+ got = 0;
+ ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
+ if (ret == 0)
+ break;
+ if (ret != -ERESTARTSYS) {
+ WARN_ON(1);
+ return VM_FAULT_SIGBUS;
+ }
+ }
+ dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
+ inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got));
+
+ ret = filemap_fault(vma, vmf);
+
+ dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
+ inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret);
+ ceph_put_cap_refs(ci, got);
+
+ return ret;
+}
+
+/*
+ * Reuse write_begin here for simplicity.
+ */
+static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct inode *inode = file_inode(vma->vm_file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_file_info *fi = vma->vm_file->private_data;
+ struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+ struct page *page = vmf->page;
+ loff_t off = page_offset(page);
+ loff_t size = i_size_read(inode);
+ size_t len;
+ int want, got, ret;
+
+ if (off + PAGE_CACHE_SIZE <= size)
+ len = PAGE_CACHE_SIZE;
+ else
+ len = size & ~PAGE_CACHE_MASK;
+
+ dout("page_mkwrite %p %llx.%llx %llu~%zd getting caps i_size %llu\n",
+ inode, ceph_vinop(inode), off, len, size);
+ if (fi->fmode & CEPH_FILE_MODE_LAZY)
+ want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
+ else
+ want = CEPH_CAP_FILE_BUFFER;
+ while (1) {
+ got = 0;
+ ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, off + len);
+ if (ret == 0)
+ break;
+ if (ret != -ERESTARTSYS) {
+ WARN_ON(1);
+ return VM_FAULT_SIGBUS;
+ }
+ }
+ dout("page_mkwrite %p %llu~%zd got cap refs on %s\n",
+ inode, off, len, ceph_cap_string(got));
+
+ /* Update time before taking page lock */
+ file_update_time(vma->vm_file);
+
+ lock_page(page);
+
+ ret = VM_FAULT_NOPAGE;
+ if ((off > size) ||
+ (page->mapping != inode->i_mapping))
+ goto out;
+
+ ret = ceph_update_writeable_page(vma->vm_file, off, len, page);
+ if (ret == 0) {
+ /* success. we'll keep the page locked. */
+ set_page_dirty(page);
+ up_read(&mdsc->snap_rwsem);
+ ret = VM_FAULT_LOCKED;
+ } else {
+ if (ret == -ENOMEM)
+ ret = VM_FAULT_OOM;
+ else
+ ret = VM_FAULT_SIGBUS;
+ }
+out:
+ if (ret != VM_FAULT_LOCKED) {
+ unlock_page(page);
+ } else {
+ int dirty;
+ spin_lock(&ci->i_ceph_lock);
+ dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
+ spin_unlock(&ci->i_ceph_lock);
+ if (dirty)
+ __mark_inode_dirty(inode, dirty);
+ }
+
+ dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n",
+ inode, off, len, ceph_cap_string(got), ret);
+ ceph_put_cap_refs(ci, got);
+
+ return ret;
+}
+
+static struct vm_operations_struct ceph_vmops = {
+ .fault = ceph_filemap_fault,
+ .page_mkwrite = ceph_page_mkwrite,
+ .remap_pages = generic_file_remap_pages,
+};
+
+int ceph_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct address_space *mapping = file->f_mapping;
+
+ if (!mapping->a_ops->readpage)
+ return -ENOEXEC;
+ file_accessed(file);
+ vma->vm_ops = &ceph_vmops;
+ return 0;
+}
diff --git a/ceph/cache.c b/ceph/cache.c
new file mode 100644
index 0000000..834f9f3
--- /dev/null
+++ b/ceph/cache.c
@@ -0,0 +1,402 @@
+/*
+ * Ceph cache definitions.
+ *
+ * Copyright (C) 2013 by Adfin Solutions, Inc. All Rights Reserved.
+ * Written by Milosz Tanski (milosz at adfin.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include "super.h"
+#include "cache.h"
+
+struct ceph_aux_inode {
+ struct timespec mtime;
+ loff_t size;
+};
+
+struct fscache_netfs ceph_cache_netfs = {
+ .name = "ceph",
+ .version = 0,
+};
+
+static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t maxbuf)
+{
+ const struct ceph_fs_client* fsc = cookie_netfs_data;
+ uint16_t klen;
+
+ klen = sizeof(fsc->client->fsid);
+ if (klen > maxbuf)
+ return 0;
+
+ memcpy(buffer, &fsc->client->fsid, klen);
+ return klen;
+}
+
+static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
+ .name = "CEPH.fsid",
+ .type = FSCACHE_COOKIE_TYPE_INDEX,
+ .get_key = ceph_fscache_session_get_key,
+};
+
+int ceph_fscache_register(void)
+{
+ return fscache_register_netfs(&ceph_cache_netfs);
+}
+
+void ceph_fscache_unregister(void)
+{
+ fscache_unregister_netfs(&ceph_cache_netfs);
+}
+
+int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
+{
+ fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
+ &ceph_fscache_fsid_object_def,
+ fsc, true);
+
+ if (fsc->fscache == NULL) {
+ pr_err("Unable to resgister fsid: %p fscache cookie", fsc);
+ return 0;
+ }
+
+ fsc->revalidate_wq = alloc_workqueue("ceph-revalidate", 0, 1);
+ if (fsc->revalidate_wq == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t maxbuf)
+{
+ const struct ceph_inode_info* ci = cookie_netfs_data;
+ uint16_t klen;
+
+ /* use ceph virtual inode (id + snaphot) */
+ klen = sizeof(ci->i_vino);
+ if (klen > maxbuf)
+ return 0;
+
+ memcpy(buffer, &ci->i_vino, klen);
+ return klen;
+}
+
+static uint16_t ceph_fscache_inode_get_aux(const void *cookie_netfs_data,
+ void *buffer, uint16_t bufmax)
+{
+ struct ceph_aux_inode aux;
+ const struct ceph_inode_info* ci = cookie_netfs_data;
+ const struct inode* inode = &ci->vfs_inode;
+
+ memset(&aux, 0, sizeof(aux));
+ aux.mtime = inode->i_mtime;
+ aux.size = inode->i_size;
+
+ memcpy(buffer, &aux, sizeof(aux));
+
+ return sizeof(aux);
+}
+
+static void ceph_fscache_inode_get_attr(const void *cookie_netfs_data,
+ uint64_t *size)
+{
+ const struct ceph_inode_info* ci = cookie_netfs_data;
+ const struct inode* inode = &ci->vfs_inode;
+
+ *size = inode->i_size;
+}
+
+static enum fscache_checkaux ceph_fscache_inode_check_aux(
+ void *cookie_netfs_data, const void *data, uint16_t dlen)
+{
+ struct ceph_aux_inode aux;
+ struct ceph_inode_info* ci = cookie_netfs_data;
+ struct inode* inode = &ci->vfs_inode;
+
+ if (dlen != sizeof(aux))
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ memset(&aux, 0, sizeof(aux));
+ aux.mtime = inode->i_mtime;
+ aux.size = inode->i_size;
+
+ if (memcmp(data, &aux, sizeof(aux)) != 0)
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ dout("ceph inode 0x%p cached okay", ci);
+ return FSCACHE_CHECKAUX_OKAY;
+}
+
+static void ceph_fscache_inode_now_uncached(void* cookie_netfs_data)
+{
+ struct ceph_inode_info* ci = cookie_netfs_data;
+ struct pagevec pvec;
+ pgoff_t first;
+ int loop, nr_pages;
+
+ pagevec_init(&pvec, 0);
+ first = 0;
+
+ dout("ceph inode 0x%p now uncached", ci);
+
+ while (1) {
+ nr_pages = pagevec_lookup(&pvec, ci->vfs_inode.i_mapping, first,
+ PAGEVEC_SIZE - pagevec_count(&pvec));
+
+ if (!nr_pages)
+ break;
+
+ for (loop = 0; loop < nr_pages; loop++)
+ ClearPageFsCache(pvec.pages[loop]);
+
+ first = pvec.pages[nr_pages - 1]->index + 1;
+
+ pvec.nr = nr_pages;
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+}
+
+static const struct fscache_cookie_def ceph_fscache_inode_object_def = {
+ .name = "CEPH.inode",
+ .type = FSCACHE_COOKIE_TYPE_DATAFILE,
+ .get_key = ceph_fscache_inode_get_key,
+ .get_attr = ceph_fscache_inode_get_attr,
+ .get_aux = ceph_fscache_inode_get_aux,
+ .check_aux = ceph_fscache_inode_check_aux,
+ .now_uncached = ceph_fscache_inode_now_uncached,
+};
+
+void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
+ struct ceph_inode_info* ci)
+{
+ struct inode* inode = &ci->vfs_inode;
+
+ /* No caching for filesystem */
+ if (fsc->fscache == NULL)
+ return;
+
+ /* Only cache for regular files that are read only */
+ if ((ci->vfs_inode.i_mode & S_IFREG) == 0)
+ return;
+
+ /* Avoid multiple racing open requests */
+ mutex_lock(&inode->i_mutex);
+
+ if (ci->fscache)
+ goto done;
+
+ ci->fscache = fscache_acquire_cookie(fsc->fscache,
+ &ceph_fscache_inode_object_def,
+ ci, true);
+ fscache_check_consistency(ci->fscache);
+done:
+ mutex_unlock(&inode->i_mutex);
+
+}
+
+void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
+{
+ struct fscache_cookie* cookie;
+
+ if ((cookie = ci->fscache) == NULL)
+ return;
+
+ ci->fscache = NULL;
+
+ fscache_uncache_all_inode_pages(cookie, &ci->vfs_inode);
+ fscache_relinquish_cookie(cookie, 0);
+}
+
+static void ceph_vfs_readpage_complete(struct page *page, void *data, int error)
+{
+ if (!error)
+ SetPageUptodate(page);
+}
+
+static void ceph_vfs_readpage_complete_unlock(struct page *page, void *data, int error)
+{
+ if (!error)
+ SetPageUptodate(page);
+
+ unlock_page(page);
+}
+
+static inline int cache_valid(struct ceph_inode_info *ci)
+{
+ return ((ceph_caps_issued(ci) & CEPH_CAP_FILE_CACHE) &&
+ (ci->i_fscache_gen == ci->i_rdcache_gen));
+}
+
+
+/* Atempt to read from the fscache,
+ *
+ * This function is called from the readpage_nounlock context. DO NOT attempt to
+ * unlock the page here (or in the callback).
+ */
+int ceph_readpage_from_fscache(struct inode *inode, struct page *page)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int ret;
+
+ if (!cache_valid(ci))
+ return -ENOBUFS;
+
+ ret = fscache_read_or_alloc_page(ci->fscache, page,
+ ceph_vfs_readpage_complete, NULL,
+ GFP_KERNEL);
+
+ switch (ret) {
+ case 0: /* Page found */
+ dout("page read submitted\n");
+ return 0;
+ case -ENOBUFS: /* Pages were not found, and can't be */
+ case -ENODATA: /* Pages were not found */
+ dout("page/inode not in cache\n");
+ return ret;
+ default:
+ dout("%s: unknown error ret = %i\n", __func__, ret);
+ return ret;
+ }
+}
+
+int ceph_readpages_from_fscache(struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int ret;
+
+ if (!cache_valid(ci))
+ return -ENOBUFS;
+
+ ret = fscache_read_or_alloc_pages(ci->fscache, mapping, pages, nr_pages,
+ ceph_vfs_readpage_complete_unlock,
+ NULL, mapping_gfp_mask(mapping));
+
+ switch (ret) {
+ case 0: /* All pages found */
+ dout("all-page read submitted\n");
+ return 0;
+ case -ENOBUFS: /* Some pages were not found, and can't be */
+ case -ENODATA: /* some pages were not found */
+ dout("page/inode not in cache\n");
+ return ret;
+ default:
+ dout("%s: unknown error ret = %i\n", __func__, ret);
+ return ret;
+ }
+}
+
+void ceph_readpage_to_fscache(struct inode *inode, struct page *page)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int ret;
+
+ if (!PageFsCache(page))
+ return;
+
+ if (!cache_valid(ci))
+ return;
+
+ ret = fscache_write_page(ci->fscache, page, GFP_KERNEL);
+ if (ret)
+ fscache_uncache_page(ci->fscache, page);
+}
+
+void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ if (!PageFsCache(page))
+ return;
+
+ fscache_wait_on_page_write(ci->fscache, page);
+ fscache_uncache_page(ci->fscache, page);
+}
+
+void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
+{
+ if (fsc->revalidate_wq)
+ destroy_workqueue(fsc->revalidate_wq);
+
+ fscache_relinquish_cookie(fsc->fscache, 0);
+ fsc->fscache = NULL;
+}
+
+static void ceph_revalidate_work(struct work_struct *work)
+{
+ int issued;
+ u32 orig_gen;
+ struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
+ i_revalidate_work);
+ struct inode *inode = &ci->vfs_inode;
+
+ spin_lock(&ci->i_ceph_lock);
+ issued = __ceph_caps_issued(ci, NULL);
+ orig_gen = ci->i_rdcache_gen;
+ spin_unlock(&ci->i_ceph_lock);
+
+ if (!(issued & CEPH_CAP_FILE_CACHE)) {
+ dout("revalidate_work lost cache before validation %p\n",
+ inode);
+ goto out;
+ }
+
+ if (!fscache_check_consistency(ci->fscache))
+ fscache_invalidate(ci->fscache);
+
+ spin_lock(&ci->i_ceph_lock);
+ /* Update the new valid generation (backwards sanity check too) */
+ if (orig_gen > ci->i_fscache_gen) {
+ ci->i_fscache_gen = orig_gen;
+ }
+ spin_unlock(&ci->i_ceph_lock);
+
+out:
+ iput(&ci->vfs_inode);
+}
+
+void ceph_queue_revalidate(struct inode *inode)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ if (fsc->revalidate_wq == NULL || ci->fscache == NULL)
+ return;
+
+ ihold(inode);
+
+ if (queue_work(ceph_sb_to_client(inode->i_sb)->revalidate_wq,
+ &ci->i_revalidate_work)) {
+ dout("ceph_queue_revalidate %p\n", inode);
+ } else {
+ dout("ceph_queue_revalidate %p failed\n)", inode);
+ iput(inode);
+ }
+}
+
+void ceph_fscache_inode_init(struct ceph_inode_info *ci)
+{
+ ci->fscache = NULL;
+ /* The first load is verifed cookie open time */
+ ci->i_fscache_gen = 1;
+ INIT_WORK(&ci->i_revalidate_work, ceph_revalidate_work);
+}
diff --git a/ceph/cache.h b/ceph/cache.h
new file mode 100644
index 0000000..5ac591b
--- /dev/null
+++ b/ceph/cache.h
@@ -0,0 +1,182 @@
+/*
+ * Ceph cache definitions.
+ *
+ * Copyright (C) 2013 by Adfin Solutions, Inc. All Rights Reserved.
+ * Written by Milosz Tanski (milosz at adfin.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#ifndef _CEPH_CACHE_H
+#define _CEPH_CACHE_H
+
+#ifdef CONFIG_CEPH_FSCACHE
+
+extern struct fscache_netfs ceph_cache_netfs;
+
+int ceph_fscache_register(void);
+void ceph_fscache_unregister(void);
+
+int ceph_fscache_register_fs(struct ceph_fs_client* fsc);
+void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc);
+
+void ceph_fscache_inode_init(struct ceph_inode_info *ci);
+void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
+ struct ceph_inode_info* ci);
+void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci);
+
+int ceph_readpage_from_fscache(struct inode *inode, struct page *page);
+int ceph_readpages_from_fscache(struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages);
+void ceph_readpage_to_fscache(struct inode *inode, struct page *page);
+void ceph_invalidate_fscache_page(struct inode* inode, struct page *page);
+void ceph_queue_revalidate(struct inode *inode);
+
+static inline void ceph_fscache_update_objectsize(struct inode *inode)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ fscache_attr_changed(ci->fscache);
+}
+
+static inline void ceph_fscache_invalidate(struct inode *inode)
+{
+ fscache_invalidate(ceph_inode(inode)->fscache);
+}
+
+static inline void ceph_fscache_uncache_page(struct inode *inode,
+ struct page *page)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ return fscache_uncache_page(ci->fscache, page);
+}
+
+static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp)
+{
+ struct inode* inode = page->mapping->host;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ return fscache_maybe_release_page(ci->fscache, page, gfp);
+}
+
+static inline void ceph_fscache_readpage_cancel(struct inode *inode,
+ struct page *page)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ if (fscache_cookie_valid(ci->fscache) && PageFsCache(page))
+ __fscache_uncache_page(ci->fscache, page);
+}
+
+static inline void ceph_fscache_readpages_cancel(struct inode *inode,
+ struct list_head *pages)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ return fscache_readpages_cancel(ci->fscache, pages);
+}
+
+#else
+
+static inline int ceph_fscache_register(void)
+{
+ return 0;
+}
+
+static inline void ceph_fscache_unregister(void)
+{
+}
+
+static inline int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
+{
+ return 0;
+}
+
+static inline void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
+{
+}
+
+static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci)
+{
+}
+
+static inline void ceph_fscache_register_inode_cookie(struct ceph_fs_client* parent_fsc,
+ struct ceph_inode_info* ci)
+{
+}
+
+static inline void ceph_fscache_uncache_page(struct inode *inode,
+ struct page *pages)
+{
+}
+
+static inline int ceph_readpage_from_fscache(struct inode* inode,
+ struct page *page)
+{
+ return -ENOBUFS;
+}
+
+static inline int ceph_readpages_from_fscache(struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ return -ENOBUFS;
+}
+
+static inline void ceph_readpage_to_fscache(struct inode *inode,
+ struct page *page)
+{
+}
+
+static inline void ceph_fscache_update_objectsize(struct inode *inode)
+{
+}
+
+static inline void ceph_fscache_invalidate(struct inode *inode)
+{
+}
+
+static inline void ceph_invalidate_fscache_page(struct inode *inode,
+ struct page *page)
+{
+}
+
+static inline void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
+{
+}
+
+static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp)
+{
+ return 1;
+}
+
+static inline void ceph_fscache_readpage_cancel(struct inode *inode,
+ struct page *page)
+{
+}
+
+static inline void ceph_fscache_readpages_cancel(struct inode *inode,
+ struct list_head *pages)
+{
+}
+
+static inline void ceph_queue_revalidate(struct inode *inode)
+{
+}
+
+#endif
+
+#endif
diff --git a/ceph/caps.c b/ceph/caps.c
new file mode 100644
index 0000000..c561b62
--- /dev/null
+++ b/ceph/caps.c
@@ -0,0 +1,3313 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/writeback.h>
+
+#include "super.h"
+#include "mds_client.h"
+#include "cache.h"
+#include <linux/ceph/decode.h>
+#include <linux/ceph/messenger.h>
+
+/*
+ * Capability management
+ *
+ * The Ceph metadata servers control client access to inode metadata
+ * and file data by issuing capabilities, granting clients permission
+ * to read and/or write both inode field and file data to OSDs
+ * (storage nodes). Each capability consists of a set of bits
+ * indicating which operations are allowed.
+ *
+ * If the client holds a *_SHARED cap, the client has a coherent value
+ * that can be safely read from the cached inode.
+ *
+ * In the case of a *_EXCL (exclusive) or FILE_WR capabilities, the
+ * client is allowed to change inode attributes (e.g., file size,
+ * mtime), note its dirty state in the ceph_cap, and asynchronously
+ * flush that metadata change to the MDS.
+ *
+ * In the event of a conflicting operation (perhaps by another
+ * client), the MDS will revoke the conflicting client capabilities.
+ *
+ * In order for a client to cache an inode, it must hold a capability
+ * with at least one MDS server. When inodes are released, release
+ * notifications are batched and periodically sent en masse to the MDS
+ * cluster to release server state.
+ */
+
+
+/*
+ * Generate readable cap strings for debugging output.
+ */
+#define MAX_CAP_STR 20
+static char cap_str[MAX_CAP_STR][40];
+static DEFINE_SPINLOCK(cap_str_lock);
+static int last_cap_str;
+
+static char *gcap_string(char *s, int c)
+{
+ if (c & CEPH_CAP_GSHARED)
+ *s++ = 's';
+ if (c & CEPH_CAP_GEXCL)
+ *s++ = 'x';
+ if (c & CEPH_CAP_GCACHE)
+ *s++ = 'c';
+ if (c & CEPH_CAP_GRD)
+ *s++ = 'r';
+ if (c & CEPH_CAP_GWR)
+ *s++ = 'w';
+ if (c & CEPH_CAP_GBUFFER)
+ *s++ = 'b';
+ if (c & CEPH_CAP_GLAZYIO)
+ *s++ = 'l';
+ return s;
+}
+
+const char *ceph_cap_string(int caps)
+{
+ int i;
+ char *s;
+ int c;
+
+ spin_lock(&cap_str_lock);
+ i = last_cap_str++;
+ if (last_cap_str == MAX_CAP_STR)
+ last_cap_str = 0;
+ spin_unlock(&cap_str_lock);
+
+ s = cap_str[i];
+
+ if (caps & CEPH_CAP_PIN)
+ *s++ = 'p';
+
+ c = (caps >> CEPH_CAP_SAUTH) & 3;
+ if (c) {
+ *s++ = 'A';
+ s = gcap_string(s, c);
+ }
+
+ c = (caps >> CEPH_CAP_SLINK) & 3;
+ if (c) {
+ *s++ = 'L';
+ s = gcap_string(s, c);
+ }
+
+ c = (caps >> CEPH_CAP_SXATTR) & 3;
+ if (c) {
+ *s++ = 'X';
+ s = gcap_string(s, c);
+ }
+
+ c = caps >> CEPH_CAP_SFILE;
+ if (c) {
+ *s++ = 'F';
+ s = gcap_string(s, c);
+ }
+
+ if (s == cap_str[i])
+ *s++ = '-';
+ *s = 0;
+ return cap_str[i];
+}
+
+void ceph_caps_init(struct ceph_mds_client *mdsc)
+{
+ INIT_LIST_HEAD(&mdsc->caps_list);
+ spin_lock_init(&mdsc->caps_list_lock);
+}
+
+void ceph_caps_finalize(struct ceph_mds_client *mdsc)
+{
+ struct ceph_cap *cap;
+
+ spin_lock(&mdsc->caps_list_lock);
+ while (!list_empty(&mdsc->caps_list)) {
+ cap = list_first_entry(&mdsc->caps_list,
+ struct ceph_cap, caps_item);
+ list_del(&cap->caps_item);
+ kmem_cache_free(ceph_cap_cachep, cap);
+ }
+ mdsc->caps_total_count = 0;
+ mdsc->caps_avail_count = 0;
+ mdsc->caps_use_count = 0;
+ mdsc->caps_reserve_count = 0;
+ mdsc->caps_min_count = 0;
+ spin_unlock(&mdsc->caps_list_lock);
+}
+
+void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta)
+{
+ spin_lock(&mdsc->caps_list_lock);
+ mdsc->caps_min_count += delta;
+ BUG_ON(mdsc->caps_min_count < 0);
+ spin_unlock(&mdsc->caps_list_lock);
+}
+
+void ceph_reserve_caps(struct ceph_mds_client *mdsc,
+ struct ceph_cap_reservation *ctx, int need)
+{
+ int i;
+ struct ceph_cap *cap;
+ int have;
+ int alloc = 0;
+ LIST_HEAD(newcaps);
+
+ dout("reserve caps ctx=%p need=%d\n", ctx, need);
+
+ /* first reserve any caps that are already allocated */
+ spin_lock(&mdsc->caps_list_lock);
+ if (mdsc->caps_avail_count >= need)
+ have = need;
+ else
+ have = mdsc->caps_avail_count;
+ mdsc->caps_avail_count -= have;
+ mdsc->caps_reserve_count += have;
+ BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+ mdsc->caps_reserve_count +
+ mdsc->caps_avail_count);
+ spin_unlock(&mdsc->caps_list_lock);
+
+ for (i = have; i < need; i++) {
+ cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
+ if (!cap)
+ break;
+ list_add(&cap->caps_item, &newcaps);
+ alloc++;
+ }
+ /* we didn't manage to reserve as much as we needed */
+ if (have + alloc != need)
+ pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n",
+ ctx, need, have + alloc);
+
+ spin_lock(&mdsc->caps_list_lock);
+ mdsc->caps_total_count += alloc;
+ mdsc->caps_reserve_count += alloc;
+ list_splice(&newcaps, &mdsc->caps_list);
+
+ BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+ mdsc->caps_reserve_count +
+ mdsc->caps_avail_count);
+ spin_unlock(&mdsc->caps_list_lock);
+
+ ctx->count = need;
+ dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n",
+ ctx, mdsc->caps_total_count, mdsc->caps_use_count,
+ mdsc->caps_reserve_count, mdsc->caps_avail_count);
+}
+
+int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
+ struct ceph_cap_reservation *ctx)
+{
+ dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count);
+ if (ctx->count) {
+ spin_lock(&mdsc->caps_list_lock);
+ BUG_ON(mdsc->caps_reserve_count < ctx->count);
+ mdsc->caps_reserve_count -= ctx->count;
+ mdsc->caps_avail_count += ctx->count;
+ ctx->count = 0;
+ dout("unreserve caps %d = %d used + %d resv + %d avail\n",
+ mdsc->caps_total_count, mdsc->caps_use_count,
+ mdsc->caps_reserve_count, mdsc->caps_avail_count);
+ BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+ mdsc->caps_reserve_count +
+ mdsc->caps_avail_count);
+ spin_unlock(&mdsc->caps_list_lock);
+ }
+ return 0;
+}
+
+static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc,
+ struct ceph_cap_reservation *ctx)
+{
+ struct ceph_cap *cap = NULL;
+
+ /* temporary, until we do something about cap import/export */
+ if (!ctx) {
+ cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
+ if (cap) {
+ spin_lock(&mdsc->caps_list_lock);
+ mdsc->caps_use_count++;
+ mdsc->caps_total_count++;
+ spin_unlock(&mdsc->caps_list_lock);
+ }
+ return cap;
+ }
+
+ spin_lock(&mdsc->caps_list_lock);
+ dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n",
+ ctx, ctx->count, mdsc->caps_total_count, mdsc->caps_use_count,
+ mdsc->caps_reserve_count, mdsc->caps_avail_count);
+ BUG_ON(!ctx->count);
+ BUG_ON(ctx->count > mdsc->caps_reserve_count);
+ BUG_ON(list_empty(&mdsc->caps_list));
+
+ ctx->count--;
+ mdsc->caps_reserve_count--;
+ mdsc->caps_use_count++;
+
+ cap = list_first_entry(&mdsc->caps_list, struct ceph_cap, caps_item);
+ list_del(&cap->caps_item);
+
+ BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+ mdsc->caps_reserve_count + mdsc->caps_avail_count);
+ spin_unlock(&mdsc->caps_list_lock);
+ return cap;
+}
+
+void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap)
+{
+ spin_lock(&mdsc->caps_list_lock);
+ dout("put_cap %p %d = %d used + %d resv + %d avail\n",
+ cap, mdsc->caps_total_count, mdsc->caps_use_count,
+ mdsc->caps_reserve_count, mdsc->caps_avail_count);
+ mdsc->caps_use_count--;
+ /*
+ * Keep some preallocated caps around (ceph_min_count), to
+ * avoid lots of free/alloc churn.
+ */
+ if (mdsc->caps_avail_count >= mdsc->caps_reserve_count +
+ mdsc->caps_min_count) {
+ mdsc->caps_total_count--;
+ kmem_cache_free(ceph_cap_cachep, cap);
+ } else {
+ mdsc->caps_avail_count++;
+ list_add(&cap->caps_item, &mdsc->caps_list);
+ }
+
+ BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+ mdsc->caps_reserve_count + mdsc->caps_avail_count);
+ spin_unlock(&mdsc->caps_list_lock);
+}
+
+void ceph_reservation_status(struct ceph_fs_client *fsc,
+ int *total, int *avail, int *used, int *reserved,
+ int *min)
+{
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+
+ if (total)
+ *total = mdsc->caps_total_count;
+ if (avail)
+ *avail = mdsc->caps_avail_count;
+ if (used)
+ *used = mdsc->caps_use_count;
+ if (reserved)
+ *reserved = mdsc->caps_reserve_count;
+ if (min)
+ *min = mdsc->caps_min_count;
+}
+
+/*
+ * Find ceph_cap for given mds, if any.
+ *
+ * Called with i_ceph_lock held.
+ */
+static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
+{
+ struct ceph_cap *cap;
+ struct rb_node *n = ci->i_caps.rb_node;
+
+ while (n) {
+ cap = rb_entry(n, struct ceph_cap, ci_node);
+ if (mds < cap->mds)
+ n = n->rb_left;
+ else if (mds > cap->mds)
+ n = n->rb_right;
+ else
+ return cap;
+ }
+ return NULL;
+}
+
+struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds)
+{
+ struct ceph_cap *cap;
+
+ spin_lock(&ci->i_ceph_lock);
+ cap = __get_cap_for_mds(ci, mds);
+ spin_unlock(&ci->i_ceph_lock);
+ return cap;
+}
+
+/*
+ * Return id of any MDS with a cap, preferably FILE_WR|BUFFER|EXCL, else -1.
+ */
+static int __ceph_get_cap_mds(struct ceph_inode_info *ci)
+{
+ struct ceph_cap *cap;
+ int mds = -1;
+ struct rb_node *p;
+
+ /* prefer mds with WR|BUFFER|EXCL caps */
+ for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
+ cap = rb_entry(p, struct ceph_cap, ci_node);
+ mds = cap->mds;
+ if (cap->issued & (CEPH_CAP_FILE_WR |
+ CEPH_CAP_FILE_BUFFER |
+ CEPH_CAP_FILE_EXCL))
+ break;
+ }
+ return mds;
+}
+
+int ceph_get_cap_mds(struct inode *inode)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int mds;
+ spin_lock(&ci->i_ceph_lock);
+ mds = __ceph_get_cap_mds(ceph_inode(inode));
+ spin_unlock(&ci->i_ceph_lock);
+ return mds;
+}
+
+/*
+ * Called under i_ceph_lock.
+ */
+static void __insert_cap_node(struct ceph_inode_info *ci,
+ struct ceph_cap *new)
+{
+ struct rb_node **p = &ci->i_caps.rb_node;
+ struct rb_node *parent = NULL;
+ struct ceph_cap *cap = NULL;
+
+ while (*p) {
+ parent = *p;
+ cap = rb_entry(parent, struct ceph_cap, ci_node);
+ if (new->mds < cap->mds)
+ p = &(*p)->rb_left;
+ else if (new->mds > cap->mds)
+ p = &(*p)->rb_right;
+ else
+ BUG();
+ }
+
+ rb_link_node(&new->ci_node, parent, p);
+ rb_insert_color(&new->ci_node, &ci->i_caps);
+}
+
+/*
+ * (re)set cap hold timeouts, which control the delayed release
+ * of unused caps back to the MDS. Should be called on cap use.
+ */
+static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
+ struct ceph_inode_info *ci)
+{
+ struct ceph_mount_options *ma = mdsc->fsc->mount_options;
+
+ ci->i_hold_caps_min = round_jiffies(jiffies +
+ ma->caps_wanted_delay_min * HZ);
+ ci->i_hold_caps_max = round_jiffies(jiffies +
+ ma->caps_wanted_delay_max * HZ);
+ dout("__cap_set_timeouts %p min %lu max %lu\n", &ci->vfs_inode,
+ ci->i_hold_caps_min - jiffies, ci->i_hold_caps_max - jiffies);
+}
+
+/*
+ * (Re)queue cap at the end of the delayed cap release list.
+ *
+ * If I_FLUSH is set, leave the inode at the front of the list.
+ *
+ * Caller holds i_ceph_lock
+ * -> we take mdsc->cap_delay_lock
+ */
+static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
+ struct ceph_inode_info *ci)
+{
+ __cap_set_timeouts(mdsc, ci);
+ dout("__cap_delay_requeue %p flags %d at %lu\n", &ci->vfs_inode,
+ ci->i_ceph_flags, ci->i_hold_caps_max);
+ if (!mdsc->stopping) {
+ spin_lock(&mdsc->cap_delay_lock);
+ if (!list_empty(&ci->i_cap_delay_list)) {
+ if (ci->i_ceph_flags & CEPH_I_FLUSH)
+ goto no_change;
+ list_del_init(&ci->i_cap_delay_list);
+ }
+ list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
+no_change:
+ spin_unlock(&mdsc->cap_delay_lock);
+ }
+}
+
+/*
+ * Queue an inode for immediate writeback. Mark inode with I_FLUSH,
+ * indicating we should send a cap message to flush dirty metadata
+ * asap, and move to the front of the delayed cap list.
+ */
+static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc,
+ struct ceph_inode_info *ci)
+{
+ dout("__cap_delay_requeue_front %p\n", &ci->vfs_inode);
+ spin_lock(&mdsc->cap_delay_lock);
+ ci->i_ceph_flags |= CEPH_I_FLUSH;
+ if (!list_empty(&ci->i_cap_delay_list))
+ list_del_init(&ci->i_cap_delay_list);
+ list_add(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
+ spin_unlock(&mdsc->cap_delay_lock);
+}
+
+/*
+ * Cancel delayed work on cap.
+ *
+ * Caller must hold i_ceph_lock.
+ */
+static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
+ struct ceph_inode_info *ci)
+{
+ dout("__cap_delay_cancel %p\n", &ci->vfs_inode);
+ if (list_empty(&ci->i_cap_delay_list))
+ return;
+ spin_lock(&mdsc->cap_delay_lock);
+ list_del_init(&ci->i_cap_delay_list);
+ spin_unlock(&mdsc->cap_delay_lock);
+}
+
+/*
+ * Common issue checks for add_cap, handle_cap_grant.
+ */
+static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
+ unsigned issued)
+{
+ unsigned had = __ceph_caps_issued(ci, NULL);
+
+ /*
+ * Each time we receive FILE_CACHE anew, we increment
+ * i_rdcache_gen.
+ */
+ if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
+ (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) {
+ ci->i_rdcache_gen++;
+ }
+
+ /*
+ * if we are newly issued FILE_SHARED, mark dir not complete; we
+ * don't know what happened to this directory while we didn't
+ * have the cap.
+ */
+ if ((issued & CEPH_CAP_FILE_SHARED) &&
+ (had & CEPH_CAP_FILE_SHARED) == 0) {
+ ci->i_shared_gen++;
+ if (S_ISDIR(ci->vfs_inode.i_mode)) {
+ dout(" marking %p NOT complete\n", &ci->vfs_inode);
+ __ceph_dir_clear_complete(ci);
+ }
+ }
+}
+
+/*
+ * Add a capability under the given MDS session.
+ *
+ * Caller should hold session snap_rwsem (read) and s_mutex.
+ *
+ * @fmode is the open file mode, if we are opening a file, otherwise
+ * it is < 0. (This is so we can atomically add the cap and add an
+ * open file reference to it.)
+ */
+int ceph_add_cap(struct inode *inode,
+ struct ceph_mds_session *session, u64 cap_id,
+ int fmode, unsigned issued, unsigned wanted,
+ unsigned seq, unsigned mseq, u64 realmino, int flags,
+ struct ceph_cap_reservation *caps_reservation)
+{
+ struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_cap *new_cap = NULL;
+ struct ceph_cap *cap;
+ int mds = session->s_mds;
+ int actual_wanted;
+
+ dout("add_cap %p mds%d cap %llx %s seq %d\n", inode,
+ session->s_mds, cap_id, ceph_cap_string(issued), seq);
+
+ /*
+ * If we are opening the file, include file mode wanted bits
+ * in wanted.
+ */
+ if (fmode >= 0)
+ wanted |= ceph_caps_for_mode(fmode);
+
+retry:
+ spin_lock(&ci->i_ceph_lock);
+ cap = __get_cap_for_mds(ci, mds);
+ if (!cap) {
+ if (new_cap) {
+ cap = new_cap;
+ new_cap = NULL;
+ } else {
+ spin_unlock(&ci->i_ceph_lock);
+ new_cap = get_cap(mdsc, caps_reservation);
+ if (new_cap == NULL)
+ return -ENOMEM;
+ goto retry;
+ }
+
+ cap->issued = 0;
+ cap->implemented = 0;
+ cap->mds = mds;
+ cap->mds_wanted = 0;
+ cap->mseq = 0;
+
+ cap->ci = ci;
+ __insert_cap_node(ci, cap);
+
+ /* add to session cap list */
+ cap->session = session;
+ spin_lock(&session->s_cap_lock);
+ list_add_tail(&cap->session_caps, &session->s_caps);
+ session->s_nr_caps++;
+ spin_unlock(&session->s_cap_lock);
+ } else {
+ if (new_cap)
+ ceph_put_cap(mdsc, new_cap);
+
+ /*
+ * auth mds of the inode changed. we received the cap export
+ * message, but still haven't received the cap import message.
+ * handle_cap_export() updated the new auth MDS' cap.
+ *
+ * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing
+ * a message that was send before the cap import message. So
+ * don't remove caps.
+ */
+ if (ceph_seq_cmp(seq, cap->seq) <= 0) {
+ WARN_ON(cap != ci->i_auth_cap);
+ WARN_ON(cap->cap_id != cap_id);
+ seq = cap->seq;
+ mseq = cap->mseq;
+ issued |= cap->issued;
+ flags |= CEPH_CAP_FLAG_AUTH;
+ }
+ }
+
+ if (!ci->i_snap_realm) {
+ /*
+ * add this inode to the appropriate snap realm
+ */
+ struct ceph_snap_realm *realm = ceph_lookup_snap_realm(mdsc,
+ realmino);
+ if (realm) {
+ ceph_get_snap_realm(mdsc, realm);
+ spin_lock(&realm->inodes_with_caps_lock);
+ ci->i_snap_realm = realm;
+ list_add(&ci->i_snap_realm_item,
+ &realm->inodes_with_caps);
+ spin_unlock(&realm->inodes_with_caps_lock);
+ } else {
+ pr_err("ceph_add_cap: couldn't find snap realm %llx\n",
+ realmino);
+ WARN_ON(!realm);
+ }
+ }
+
+ __check_cap_issue(ci, cap, issued);
+
+ /*
+ * If we are issued caps we don't want, or the mds' wanted
+ * value appears to be off, queue a check so we'll release
+ * later and/or update the mds wanted value.
+ */
+ actual_wanted = __ceph_caps_wanted(ci);
+ if ((wanted & ~actual_wanted) ||
+ (issued & ~actual_wanted & CEPH_CAP_ANY_WR)) {
+ dout(" issued %s, mds wanted %s, actual %s, queueing\n",
+ ceph_cap_string(issued), ceph_cap_string(wanted),
+ ceph_cap_string(actual_wanted));
+ __cap_delay_requeue(mdsc, ci);
+ }
+
+ if (flags & CEPH_CAP_FLAG_AUTH) {
+ if (ci->i_auth_cap == NULL ||
+ ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) {
+ ci->i_auth_cap = cap;
+ cap->mds_wanted = wanted;
+ }
+ ci->i_cap_exporting_issued = 0;
+ } else {
+ WARN_ON(ci->i_auth_cap == cap);
+ }
+
+ dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n",
+ inode, ceph_vinop(inode), cap, ceph_cap_string(issued),
+ ceph_cap_string(issued|cap->issued), seq, mds);
+ cap->cap_id = cap_id;
+ cap->issued = issued;
+ cap->implemented |= issued;
+ if (ceph_seq_cmp(mseq, cap->mseq) > 0)
+ cap->mds_wanted = wanted;
+ else
+ cap->mds_wanted |= wanted;
+ cap->seq = seq;
+ cap->issue_seq = seq;
+ cap->mseq = mseq;
+ cap->cap_gen = session->s_cap_gen;
+
+ if (fmode >= 0)
+ __ceph_get_fmode(ci, fmode);
+ spin_unlock(&ci->i_ceph_lock);
+ wake_up_all(&ci->i_cap_wq);
+ return 0;
+}
+
+/*
+ * Return true if cap has not timed out and belongs to the current
+ * generation of the MDS session (i.e. has not gone 'stale' due to
+ * us losing touch with the mds).
+ */
+static int __cap_is_valid(struct ceph_cap *cap)
+{
+ unsigned long ttl;
+ u32 gen;
+
+ spin_lock(&cap->session->s_gen_ttl_lock);
+ gen = cap->session->s_cap_gen;
+ ttl = cap->session->s_cap_ttl;
+ spin_unlock(&cap->session->s_gen_ttl_lock);
+
+ if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) {
+ dout("__cap_is_valid %p cap %p issued %s "
+ "but STALE (gen %u vs %u)\n", &cap->ci->vfs_inode,
+ cap, ceph_cap_string(cap->issued), cap->cap_gen, gen);
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Return set of valid cap bits issued to us. Note that caps time
+ * out, and may be invalidated in bulk if the client session times out
+ * and session->s_cap_gen is bumped.
+ */
+int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
+{
+ int have = ci->i_snap_caps | ci->i_cap_exporting_issued;
+ struct ceph_cap *cap;
+ struct rb_node *p;
+
+ if (implemented)
+ *implemented = 0;
+ for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
+ cap = rb_entry(p, struct ceph_cap, ci_node);
+ if (!__cap_is_valid(cap))
+ continue;
+ dout("__ceph_caps_issued %p cap %p issued %s\n",
+ &ci->vfs_inode, cap, ceph_cap_string(cap->issued));
+ have |= cap->issued;
+ if (implemented)
+ *implemented |= cap->implemented;
+ }
+ /*
+ * exclude caps issued by non-auth MDS, but are been revoking
+ * by the auth MDS. The non-auth MDS should be revoking/exporting
+ * these caps, but the message is delayed.
+ */
+ if (ci->i_auth_cap) {
+ cap = ci->i_auth_cap;
+ have &= ~cap->implemented | cap->issued;
+ }
+ return have;
+}
+
+/*
+ * Get cap bits issued by caps other than @ocap
+ */
+int __ceph_caps_issued_other(struct ceph_inode_info *ci, struct ceph_cap *ocap)
+{
+ int have = ci->i_snap_caps;
+ struct ceph_cap *cap;
+ struct rb_node *p;
+
+ for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
+ cap = rb_entry(p, struct ceph_cap, ci_node);
+ if (cap == ocap)
+ continue;
+ if (!__cap_is_valid(cap))
+ continue;
+ have |= cap->issued;
+ }
+ return have;
+}
+
+/*
+ * Move a cap to the end of the LRU (oldest caps at list head, newest
+ * at list tail).
+ */
+static void __touch_cap(struct ceph_cap *cap)
+{
+ struct ceph_mds_session *s = cap->session;
+
+ spin_lock(&s->s_cap_lock);
+ if (s->s_cap_iterator == NULL) {
+ dout("__touch_cap %p cap %p mds%d\n", &cap->ci->vfs_inode, cap,
+ s->s_mds);
+ list_move_tail(&cap->session_caps, &s->s_caps);
+ } else {
+ dout("__touch_cap %p cap %p mds%d NOP, iterating over caps\n",
+ &cap->ci->vfs_inode, cap, s->s_mds);
+ }
+ spin_unlock(&s->s_cap_lock);
+}
+
+/*
+ * Check if we hold the given mask. If so, move the cap(s) to the
+ * front of their respective LRUs. (This is the preferred way for
+ * callers to check for caps they want.)
+ */
+int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
+{
+ struct ceph_cap *cap;
+ struct rb_node *p;
+ int have = ci->i_snap_caps;
+
+ if ((have & mask) == mask) {
+ dout("__ceph_caps_issued_mask %p snap issued %s"
+ " (mask %s)\n", &ci->vfs_inode,
+ ceph_cap_string(have),
+ ceph_cap_string(mask));
+ return 1;
+ }
+
+ for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
+ cap = rb_entry(p, struct ceph_cap, ci_node);
+ if (!__cap_is_valid(cap))
+ continue;
+ if ((cap->issued & mask) == mask) {
+ dout("__ceph_caps_issued_mask %p cap %p issued %s"
+ " (mask %s)\n", &ci->vfs_inode, cap,
+ ceph_cap_string(cap->issued),
+ ceph_cap_string(mask));
+ if (touch)
+ __touch_cap(cap);
+ return 1;
+ }
+
+ /* does a combination of caps satisfy mask? */
+ have |= cap->issued;
+ if ((have & mask) == mask) {
+ dout("__ceph_caps_issued_mask %p combo issued %s"
+ " (mask %s)\n", &ci->vfs_inode,
+ ceph_cap_string(cap->issued),
+ ceph_cap_string(mask));
+ if (touch) {
+ struct rb_node *q;
+
+ /* touch this + preceding caps */
+ __touch_cap(cap);
+ for (q = rb_first(&ci->i_caps); q != p;
+ q = rb_next(q)) {
+ cap = rb_entry(q, struct ceph_cap,
+ ci_node);
+ if (!__cap_is_valid(cap))
+ continue;
+ __touch_cap(cap);
+ }
+ }
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Return true if mask caps are currently being revoked by an MDS.
+ */
+int __ceph_caps_revoking_other(struct ceph_inode_info *ci,
+ struct ceph_cap *ocap, int mask)
+{
+ struct ceph_cap *cap;
+ struct rb_node *p;
+
+ for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
+ cap = rb_entry(p, struct ceph_cap, ci_node);
+ if (cap != ocap &&
+ (cap->implemented & ~cap->issued & mask))
+ return 1;
+ }
+ return 0;
+}
+
+int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
+{
+ struct inode *inode = &ci->vfs_inode;
+ int ret;
+
+ spin_lock(&ci->i_ceph_lock);
+ ret = __ceph_caps_revoking_other(ci, NULL, mask);
+ spin_unlock(&ci->i_ceph_lock);
+ dout("ceph_caps_revoking %p %s = %d\n", inode,
+ ceph_cap_string(mask), ret);
+ return ret;
+}
+
+int __ceph_caps_used(struct ceph_inode_info *ci)
+{
+ int used = 0;
+ if (ci->i_pin_ref)
+ used |= CEPH_CAP_PIN;
+ if (ci->i_rd_ref)
+ used |= CEPH_CAP_FILE_RD;
+ if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages)
+ used |= CEPH_CAP_FILE_CACHE;
+ if (ci->i_wr_ref)
+ used |= CEPH_CAP_FILE_WR;
+ if (ci->i_wb_ref || ci->i_wrbuffer_ref)
+ used |= CEPH_CAP_FILE_BUFFER;
+ return used;
+}
+
+/*
+ * wanted, by virtue of open file modes
+ */
+int __ceph_caps_file_wanted(struct ceph_inode_info *ci)
+{
+ int want = 0;
+ int mode;
+ for (mode = 0; mode < CEPH_FILE_MODE_NUM; mode++)
+ if (ci->i_nr_by_mode[mode])
+ want |= ceph_caps_for_mode(mode);
+ return want;
+}
+
+/*
+ * Return caps we have registered with the MDS(s) as 'wanted'.
+ */
+int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
+{
+ struct ceph_cap *cap;
+ struct rb_node *p;
+ int mds_wanted = 0;
+
+ for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
+ cap = rb_entry(p, struct ceph_cap, ci_node);
+ if (!__cap_is_valid(cap))
+ continue;
+ if (cap == ci->i_auth_cap)
+ mds_wanted |= cap->mds_wanted;
+ else
+ mds_wanted |= (cap->mds_wanted & ~CEPH_CAP_ANY_FILE_WR);
+ }
+ return mds_wanted;
+}
+
+/*
+ * called under i_ceph_lock
+ */
+static int __ceph_is_any_caps(struct ceph_inode_info *ci)
+{
+ return !RB_EMPTY_ROOT(&ci->i_caps) || ci->i_cap_exporting_issued;
+}
+
+int ceph_is_any_caps(struct inode *inode)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int ret;
+
+ spin_lock(&ci->i_ceph_lock);
+ ret = __ceph_is_any_caps(ci);
+ spin_unlock(&ci->i_ceph_lock);
+
+ return ret;
+}
+
+/*
+ * Remove a cap. Take steps to deal with a racing iterate_session_caps.
+ *
+ * caller should hold i_ceph_lock.
+ * caller will not hold session s_mutex if called from destroy_inode.
+ */
+void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
+{
+ struct ceph_mds_session *session = cap->session;
+ struct ceph_inode_info *ci = cap->ci;
+ struct ceph_mds_client *mdsc =
+ ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+ int removed = 0;
+
+ dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
+
+ /* remove from session list */
+ spin_lock(&session->s_cap_lock);
+ /*
+ * s_cap_reconnect is protected by s_cap_lock. no one changes
+ * s_cap_gen while session is in the reconnect state.
+ */
+ if (queue_release &&
+ (!session->s_cap_reconnect ||
+ cap->cap_gen == session->s_cap_gen))
+ __queue_cap_release(session, ci->i_vino.ino, cap->cap_id,
+ cap->mseq, cap->issue_seq);
+
+ if (session->s_cap_iterator == cap) {
+ /* not yet, we are iterating over this very cap */
+ dout("__ceph_remove_cap delaying %p removal from session %p\n",
+ cap, cap->session);
+ } else {
+ list_del_init(&cap->session_caps);
+ session->s_nr_caps--;
+ cap->session = NULL;
+ removed = 1;
+ }
+ /* protect backpointer with s_cap_lock: see iterate_session_caps */
+ cap->ci = NULL;
+ spin_unlock(&session->s_cap_lock);
+
+ /* remove from inode list */
+ rb_erase(&cap->ci_node, &ci->i_caps);
+ if (ci->i_auth_cap == cap)
+ ci->i_auth_cap = NULL;
+
+ if (removed)
+ ceph_put_cap(mdsc, cap);
+
+ if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
+ struct ceph_snap_realm *realm = ci->i_snap_realm;
+ spin_lock(&realm->inodes_with_caps_lock);
+ list_del_init(&ci->i_snap_realm_item);
+ ci->i_snap_realm_counter++;
+ ci->i_snap_realm = NULL;
+ spin_unlock(&realm->inodes_with_caps_lock);
+ ceph_put_snap_realm(mdsc, realm);
+ }
+ if (!__ceph_is_any_real_caps(ci))
+ __cap_delay_cancel(mdsc, ci);
+}
+
+/*
+ * Build and send a cap message to the given MDS.
+ *
+ * Caller should be holding s_mutex.
+ */
+static int send_cap_msg(struct ceph_mds_session *session,
+ u64 ino, u64 cid, int op,
+ int caps, int wanted, int dirty,
+ u32 seq, u64 flush_tid, u32 issue_seq, u32 mseq,
+ u64 size, u64 max_size,
+ struct timespec *mtime, struct timespec *atime,
+ u64 time_warp_seq,
+ kuid_t uid, kgid_t gid, umode_t mode,
+ u64 xattr_version,
+ struct ceph_buffer *xattrs_buf,
+ u64 follows)
+{
+ struct ceph_mds_caps *fc;
+ struct ceph_msg *msg;
+
+ dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s"
+ " seq %u/%u mseq %u follows %lld size %llu/%llu"
+ " xattr_ver %llu xattr_len %d\n", ceph_cap_op_name(op),
+ cid, ino, ceph_cap_string(caps), ceph_cap_string(wanted),
+ ceph_cap_string(dirty),
+ seq, issue_seq, mseq, follows, size, max_size,
+ xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0);
+
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS, false);
+ if (!msg)
+ return -ENOMEM;
+
+ msg->hdr.tid = cpu_to_le64(flush_tid);
+
+ fc = msg->front.iov_base;
+ memset(fc, 0, sizeof(*fc));
+
+ fc->cap_id = cpu_to_le64(cid);
+ fc->op = cpu_to_le32(op);
+ fc->seq = cpu_to_le32(seq);
+ fc->issue_seq = cpu_to_le32(issue_seq);
+ fc->migrate_seq = cpu_to_le32(mseq);
+ fc->caps = cpu_to_le32(caps);
+ fc->wanted = cpu_to_le32(wanted);
+ fc->dirty = cpu_to_le32(dirty);
+ fc->ino = cpu_to_le64(ino);
+ fc->snap_follows = cpu_to_le64(follows);
+
+ fc->size = cpu_to_le64(size);
+ fc->max_size = cpu_to_le64(max_size);
+ if (mtime)
+ ceph_encode_timespec(&fc->mtime, mtime);
+ if (atime)
+ ceph_encode_timespec(&fc->atime, atime);
+ fc->time_warp_seq = cpu_to_le32(time_warp_seq);
+
+ fc->uid = cpu_to_le32(from_kuid(&init_user_ns, uid));
+ fc->gid = cpu_to_le32(from_kgid(&init_user_ns, gid));
+ fc->mode = cpu_to_le32(mode);
+
+ fc->xattr_version = cpu_to_le64(xattr_version);
+ if (xattrs_buf) {
+ msg->middle = ceph_buffer_get(xattrs_buf);
+ fc->xattr_len = cpu_to_le32(xattrs_buf->vec.iov_len);
+ msg->hdr.middle_len = cpu_to_le32(xattrs_buf->vec.iov_len);
+ }
+
+ ceph_con_send(&session->s_con, msg);
+ return 0;
+}
+
+void __queue_cap_release(struct ceph_mds_session *session,
+ u64 ino, u64 cap_id, u32 migrate_seq,
+ u32 issue_seq)
+{
+ struct ceph_msg *msg;
+ struct ceph_mds_cap_release *head;
+ struct ceph_mds_cap_item *item;
+
+ BUG_ON(!session->s_num_cap_releases);
+ msg = list_first_entry(&session->s_cap_releases,
+ struct ceph_msg, list_head);
+
+ dout(" adding %llx release to mds%d msg %p (%d left)\n",
+ ino, session->s_mds, msg, session->s_num_cap_releases);
+
+ BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
+ head = msg->front.iov_base;
+ le32_add_cpu(&head->num, 1);
+ item = msg->front.iov_base + msg->front.iov_len;
+ item->ino = cpu_to_le64(ino);
+ item->cap_id = cpu_to_le64(cap_id);
+ item->migrate_seq = cpu_to_le32(migrate_seq);
+ item->seq = cpu_to_le32(issue_seq);
+
+ session->s_num_cap_releases--;
+
+ msg->front.iov_len += sizeof(*item);
+ if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
+ dout(" release msg %p full\n", msg);
+ list_move_tail(&msg->list_head, &session->s_cap_releases_done);
+ } else {
+ dout(" release msg %p at %d/%d (%d)\n", msg,
+ (int)le32_to_cpu(head->num),
+ (int)CEPH_CAPS_PER_RELEASE,
+ (int)msg->front.iov_len);
+ }
+}
+
+/*
+ * Queue cap releases when an inode is dropped from our cache. Since
+ * inode is about to be destroyed, there is no need for i_ceph_lock.
+ */
+void ceph_queue_caps_release(struct inode *inode)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct rb_node *p;
+
+ p = rb_first(&ci->i_caps);
+ while (p) {
+ struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
+ p = rb_next(p);
+ __ceph_remove_cap(cap, true);
+ }
+}
+
+/*
+ * Send a cap msg on the given inode. Update our caps state, then
+ * drop i_ceph_lock and send the message.
+ *
+ * Make note of max_size reported/requested from mds, revoked caps
+ * that have now been implemented.
+ *
+ * Make half-hearted attempt ot to invalidate page cache if we are
+ * dropping RDCACHE. Note that this will leave behind locked pages
+ * that we'll then need to deal with elsewhere.
+ *
+ * Return non-zero if delayed release, or we experienced an error
+ * such that the caller should requeue + retry later.
+ *
+ * called with i_ceph_lock, then drops it.
+ * caller should hold snap_rwsem (read), s_mutex.
+ */
+static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
+ int op, int used, int want, int retain, int flushing,
+ unsigned *pflush_tid)
+ __releases(cap->ci->i_ceph_lock)
+{
+ struct ceph_inode_info *ci = cap->ci;
+ struct inode *inode = &ci->vfs_inode;
+ u64 cap_id = cap->cap_id;
+ int held, revoking, dropping, keep;
+ u64 seq, issue_seq, mseq, time_warp_seq, follows;
+ u64 size, max_size;
+ struct timespec mtime, atime;
+ int wake = 0;
+ umode_t mode;
+ kuid_t uid;
+ kgid_t gid;
+ struct ceph_mds_session *session;
+ u64 xattr_version = 0;
+ struct ceph_buffer *xattr_blob = NULL;
+ int delayed = 0;
+ u64 flush_tid = 0;
+ int i;
+ int ret;
+
+ held = cap->issued | cap->implemented;
+ revoking = cap->implemented & ~cap->issued;
+ retain &= ~revoking;
+ dropping = cap->issued & ~retain;
+
+ dout("__send_cap %p cap %p session %p %s -> %s (revoking %s)\n",
+ inode, cap, cap->session,
+ ceph_cap_string(held), ceph_cap_string(held & retain),
+ ceph_cap_string(revoking));
+ BUG_ON((retain & CEPH_CAP_PIN) == 0);
+
+ session = cap->session;
+
+ /* don't release wanted unless we've waited a bit. */
+ if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0 &&
+ time_before(jiffies, ci->i_hold_caps_min)) {
+ dout(" delaying issued %s -> %s, wanted %s -> %s on send\n",
+ ceph_cap_string(cap->issued),
+ ceph_cap_string(cap->issued & retain),
+ ceph_cap_string(cap->mds_wanted),
+ ceph_cap_string(want));
+ want |= cap->mds_wanted;
+ retain |= cap->issued;
+ delayed = 1;
+ }
+ ci->i_ceph_flags &= ~(CEPH_I_NODELAY | CEPH_I_FLUSH);
+
+ cap->issued &= retain; /* drop bits we don't want */
+ if (cap->implemented & ~cap->issued) {
+ /*
+ * Wake up any waiters on wanted -> needed transition.
+ * This is due to the weird transition from buffered
+ * to sync IO... we need to flush dirty pages _before_
+ * allowing sync writes to avoid reordering.
+ */
+ wake = 1;
+ }
+ cap->implemented &= cap->issued | used;
+ cap->mds_wanted = want;
+
+ if (flushing) {
+ /*
+ * assign a tid for flush operations so we can avoid
+ * flush1 -> dirty1 -> flush2 -> flushack1 -> mark
+ * clean type races. track latest tid for every bit
+ * so we can handle flush AxFw, flush Fw, and have the
+ * first ack clean Ax.
+ */
+ flush_tid = ++ci->i_cap_flush_last_tid;
+ if (pflush_tid)
+ *pflush_tid = flush_tid;
+ dout(" cap_flush_tid %d\n", (int)flush_tid);
+ for (i = 0; i < CEPH_CAP_BITS; i++)
+ if (flushing & (1 << i))
+ ci->i_cap_flush_tid[i] = flush_tid;
+
+ follows = ci->i_head_snapc->seq;
+ } else {
+ follows = 0;
+ }
+
+ keep = cap->implemented;
+ seq = cap->seq;
+ issue_seq = cap->issue_seq;
+ mseq = cap->mseq;
+ size = inode->i_size;
+ ci->i_reported_size = size;
+ max_size = ci->i_wanted_max_size;
+ ci->i_requested_max_size = max_size;
+ mtime = inode->i_mtime;
+ atime = inode->i_atime;
+ time_warp_seq = ci->i_time_warp_seq;
+ uid = inode->i_uid;
+ gid = inode->i_gid;
+ mode = inode->i_mode;
+
+ if (flushing & CEPH_CAP_XATTR_EXCL) {
+ __ceph_build_xattrs_blob(ci);
+ xattr_blob = ci->i_xattrs.blob;
+ xattr_version = ci->i_xattrs.version;
+ }
+
+ spin_unlock(&ci->i_ceph_lock);
+
+ ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
+ op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
+ size, max_size, &mtime, &atime, time_warp_seq,
+ uid, gid, mode, xattr_version, xattr_blob,
+ follows);
+ if (ret < 0) {
+ dout("error sending cap msg, must requeue %p\n", inode);
+ delayed = 1;
+ }
+
+ if (wake)
+ wake_up_all(&ci->i_cap_wq);
+
+ return delayed;
+}
+
+/*
+ * When a snapshot is taken, clients accumulate dirty metadata on
+ * inodes with capabilities in ceph_cap_snaps to describe the file
+ * state at the time the snapshot was taken. This must be flushed
+ * asynchronously back to the MDS once sync writes complete and dirty
+ * data is written out.
+ *
+ * Unless @again is true, skip cap_snaps that were already sent to
+ * the MDS (i.e., during this session).
+ *
+ * Called under i_ceph_lock. Takes s_mutex as needed.
+ */
+void __ceph_flush_snaps(struct ceph_inode_info *ci,
+ struct ceph_mds_session **psession,
+ int again)
+ __releases(ci->i_ceph_lock)
+ __acquires(ci->i_ceph_lock)
+{
+ struct inode *inode = &ci->vfs_inode;
+ int mds;
+ struct ceph_cap_snap *capsnap;
+ u32 mseq;
+ struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+ struct ceph_mds_session *session = NULL; /* if session != NULL, we hold
+ session->s_mutex */
+ u64 next_follows = 0; /* keep track of how far we've gotten through the
+ i_cap_snaps list, and skip these entries next time
+ around to avoid an infinite loop */
+
+ if (psession)
+ session = *psession;
+
+ dout("__flush_snaps %p\n", inode);
+retry:
+ list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
+ /* avoid an infiniute loop after retry */
+ if (capsnap->follows < next_follows)
+ continue;
+ /*
+ * we need to wait for sync writes to complete and for dirty
+ * pages to be written out.
+ */
+ if (capsnap->dirty_pages || capsnap->writing)
+ break;
+
+ /*
+ * if cap writeback already occurred, we should have dropped
+ * the capsnap in ceph_put_wrbuffer_cap_refs.
+ */
+ BUG_ON(capsnap->dirty == 0);
+
+ /* pick mds, take s_mutex */
+ if (ci->i_auth_cap == NULL) {
+ dout("no auth cap (migrating?), doing nothing\n");
+ goto out;
+ }
+
+ /* only flush each capsnap once */
+ if (!again && !list_empty(&capsnap->flushing_item)) {
+ dout("already flushed %p, skipping\n", capsnap);
+ continue;
+ }
+
+ mds = ci->i_auth_cap->session->s_mds;
+ mseq = ci->i_auth_cap->mseq;
+
+ if (session && session->s_mds != mds) {
+ dout("oops, wrong session %p mutex\n", session);
+ mutex_unlock(&session->s_mutex);
+ ceph_put_mds_session(session);
+ session = NULL;
+ }
+ if (!session) {
+ spin_unlock(&ci->i_ceph_lock);
+ mutex_lock(&mdsc->mutex);
+ session = __ceph_lookup_mds_session(mdsc, mds);
+ mutex_unlock(&mdsc->mutex);
+ if (session) {
+ dout("inverting session/ino locks on %p\n",
+ session);
+ mutex_lock(&session->s_mutex);
+ }
+ /*
+ * if session == NULL, we raced against a cap
+ * deletion or migration. retry, and we'll
+ * get a better @mds value next time.
+ */
+ spin_lock(&ci->i_ceph_lock);
+ goto retry;
+ }
+
+ capsnap->flush_tid = ++ci->i_cap_flush_last_tid;
+ atomic_inc(&capsnap->nref);
+ if (!list_empty(&capsnap->flushing_item))
+ list_del_init(&capsnap->flushing_item);
+ list_add_tail(&capsnap->flushing_item,
+ &session->s_cap_snaps_flushing);
+ spin_unlock(&ci->i_ceph_lock);
+
+ dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
+ inode, capsnap, capsnap->follows, capsnap->flush_tid);
+ send_cap_msg(session, ceph_vino(inode).ino, 0,
+ CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0,
+ capsnap->dirty, 0, capsnap->flush_tid, 0, mseq,
+ capsnap->size, 0,
+ &capsnap->mtime, &capsnap->atime,
+ capsnap->time_warp_seq,
+ capsnap->uid, capsnap->gid, capsnap->mode,
+ capsnap->xattr_version, capsnap->xattr_blob,
+ capsnap->follows);
+
+ next_follows = capsnap->follows + 1;
+ ceph_put_cap_snap(capsnap);
+
+ spin_lock(&ci->i_ceph_lock);
+ goto retry;
+ }
+
+ /* we flushed them all; remove this inode from the queue */
+ spin_lock(&mdsc->snap_flush_lock);
+ list_del_init(&ci->i_snap_flush_item);
+ spin_unlock(&mdsc->snap_flush_lock);
+
+out:
+ if (psession)
+ *psession = session;
+ else if (session) {
+ mutex_unlock(&session->s_mutex);
+ ceph_put_mds_session(session);
+ }
+}
+
+static void ceph_flush_snaps(struct ceph_inode_info *ci)
+{
+ spin_lock(&ci->i_ceph_lock);
+ __ceph_flush_snaps(ci, NULL, 0);
+ spin_unlock(&ci->i_ceph_lock);
+}
+
+/*
+ * Mark caps dirty. If inode is newly dirty, return the dirty flags.
+ * Caller is then responsible for calling __mark_inode_dirty with the
+ * returned flags value.
+ */
+int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
+{
+ struct ceph_mds_client *mdsc =
+ ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+ struct inode *inode = &ci->vfs_inode;
+ int was = ci->i_dirty_caps;
+ int dirty = 0;
+
+ dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode,
+ ceph_cap_string(mask), ceph_cap_string(was),
+ ceph_cap_string(was | mask));
+ ci->i_dirty_caps |= mask;
+ if (was == 0) {
+ if (!ci->i_head_snapc)
+ ci->i_head_snapc = ceph_get_snap_context(
+ ci->i_snap_realm->cached_context);
+ dout(" inode %p now dirty snapc %p auth cap %p\n",
+ &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap);
+ WARN_ON(!ci->i_auth_cap);
+ BUG_ON(!list_empty(&ci->i_dirty_item));
+ spin_lock(&mdsc->cap_dirty_lock);
+ list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
+ spin_unlock(&mdsc->cap_dirty_lock);
+ if (ci->i_flushing_caps == 0) {
+ ihold(inode);
+ dirty |= I_DIRTY_SYNC;
+ }
+ }
+ BUG_ON(list_empty(&ci->i_dirty_item));
+ if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
+ (mask & CEPH_CAP_FILE_BUFFER))
+ dirty |= I_DIRTY_DATASYNC;
+ __cap_delay_requeue(mdsc, ci);
+ return dirty;
+}
+
+/*
+ * Add dirty inode to the flushing list. Assigned a seq number so we
+ * can wait for caps to flush without starving.
+ *
+ * Called under i_ceph_lock.
+ */
+static int __mark_caps_flushing(struct inode *inode,
+ struct ceph_mds_session *session)
+{
+ struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int flushing;
+
+ BUG_ON(ci->i_dirty_caps == 0);
+ BUG_ON(list_empty(&ci->i_dirty_item));
+
+ flushing = ci->i_dirty_caps;
+ dout("__mark_caps_flushing flushing %s, flushing_caps %s -> %s\n",
+ ceph_cap_string(flushing),
+ ceph_cap_string(ci->i_flushing_caps),
+ ceph_cap_string(ci->i_flushing_caps | flushing));
+ ci->i_flushing_caps |= flushing;
+ ci->i_dirty_caps = 0;
+ dout(" inode %p now !dirty\n", inode);
+
+ spin_lock(&mdsc->cap_dirty_lock);
+ list_del_init(&ci->i_dirty_item);
+
+ ci->i_cap_flush_seq = ++mdsc->cap_flush_seq;
+ if (list_empty(&ci->i_flushing_item)) {
+ list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing);
+ mdsc->num_cap_flushing++;
+ dout(" inode %p now flushing seq %lld\n", inode,
+ ci->i_cap_flush_seq);
+ } else {
+ list_move_tail(&ci->i_flushing_item, &session->s_cap_flushing);
+ dout(" inode %p now flushing (more) seq %lld\n", inode,
+ ci->i_cap_flush_seq);
+ }
+ spin_unlock(&mdsc->cap_dirty_lock);
+
+ return flushing;
+}
+
+/*
+ * try to invalidate mapping pages without blocking.
+ */
+static int try_nonblocking_invalidate(struct inode *inode)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ u32 invalidating_gen = ci->i_rdcache_gen;
+
+ spin_unlock(&ci->i_ceph_lock);
+ invalidate_mapping_pages(&inode->i_data, 0, -1);
+ spin_lock(&ci->i_ceph_lock);
+
+ if (inode->i_data.nrpages == 0 &&
+ invalidating_gen == ci->i_rdcache_gen) {
+ /* success. */
+ dout("try_nonblocking_invalidate %p success\n", inode);
+ /* save any racing async invalidate some trouble */
+ ci->i_rdcache_revoking = ci->i_rdcache_gen - 1;
+ return 0;
+ }
+ dout("try_nonblocking_invalidate %p failed\n", inode);
+ return -1;
+}
+
+/*
+ * Swiss army knife function to examine currently used and wanted
+ * versus held caps. Release, flush, ack revoked caps to mds as
+ * appropriate.
+ *
+ * CHECK_CAPS_NODELAY - caller is delayed work and we should not delay
+ * cap release further.
+ * CHECK_CAPS_AUTHONLY - we should only check the auth cap
+ * CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
+ * further delay.
+ */
+void ceph_check_caps(struct ceph_inode_info *ci, int flags,
+ struct ceph_mds_session *session)
+{
+ struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct inode *inode = &ci->vfs_inode;
+ struct ceph_cap *cap;
+ int file_wanted, used, cap_used;
+ int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */
+ int issued, implemented, want, retain, revoking, flushing = 0;
+ int mds = -1; /* keep track of how far we've gone through i_caps list
+ to avoid an infinite loop on retry */
+ struct rb_node *p;
+ int tried_invalidate = 0;
+ int delayed = 0, sent = 0, force_requeue = 0, num;
+ int queue_invalidate = 0;
+ int is_delayed = flags & CHECK_CAPS_NODELAY;
+
+ /* if we are unmounting, flush any unused caps immediately. */
+ if (mdsc->stopping)
+ is_delayed = 1;
+
+ spin_lock(&ci->i_ceph_lock);
+
+ if (ci->i_ceph_flags & CEPH_I_FLUSH)
+ flags |= CHECK_CAPS_FLUSH;
+
+ /* flush snaps first time around only */
+ if (!list_empty(&ci->i_cap_snaps))
+ __ceph_flush_snaps(ci, &session, 0);
+ goto retry_locked;
+retry:
+ spin_lock(&ci->i_ceph_lock);
+retry_locked:
+ file_wanted = __ceph_caps_file_wanted(ci);
+ used = __ceph_caps_used(ci);
+ want = file_wanted | used;
+ issued = __ceph_caps_issued(ci, &implemented);
+ revoking = implemented & ~issued;
+
+ retain = want | CEPH_CAP_PIN;
+ if (!mdsc->stopping && inode->i_nlink > 0) {
+ if (want) {
+ retain |= CEPH_CAP_ANY; /* be greedy */
+ } else {
+ retain |= CEPH_CAP_ANY_SHARED;
+ /*
+ * keep RD only if we didn't have the file open RW,
+ * because then the mds would revoke it anyway to
+ * journal max_size=0.
+ */
+ if (ci->i_max_size == 0)
+ retain |= CEPH_CAP_ANY_RD;
+ }
+ }
+
+ dout("check_caps %p file_want %s used %s dirty %s flushing %s"
+ " issued %s revoking %s retain %s %s%s%s\n", inode,
+ ceph_cap_string(file_wanted),
+ ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
+ ceph_cap_string(ci->i_flushing_caps),
+ ceph_cap_string(issued), ceph_cap_string(revoking),
+ ceph_cap_string(retain),
+ (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "",
+ (flags & CHECK_CAPS_NODELAY) ? " NODELAY" : "",
+ (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "");
+
+ /*
+ * If we no longer need to hold onto old our caps, and we may
+ * have cached pages, but don't want them, then try to invalidate.
+ * If we fail, it's because pages are locked.... try again later.
+ */
+ if ((!is_delayed || mdsc->stopping) &&
+ ci->i_wrbuffer_ref == 0 && /* no dirty pages... */
+ inode->i_data.nrpages && /* have cached pages */
+ (file_wanted == 0 || /* no open files */
+ (revoking & (CEPH_CAP_FILE_CACHE|
+ CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */
+ !tried_invalidate) {
+ dout("check_caps trying to invalidate on %p\n", inode);
+ if (try_nonblocking_invalidate(inode) < 0) {
+ if (revoking & (CEPH_CAP_FILE_CACHE|
+ CEPH_CAP_FILE_LAZYIO)) {
+ dout("check_caps queuing invalidate\n");
+ queue_invalidate = 1;
+ ci->i_rdcache_revoking = ci->i_rdcache_gen;
+ } else {
+ dout("check_caps failed to invalidate pages\n");
+ /* we failed to invalidate pages. check these
+ caps again later. */
+ force_requeue = 1;
+ __cap_set_timeouts(mdsc, ci);
+ }
+ }
+ tried_invalidate = 1;
+ goto retry_locked;
+ }
+
+ num = 0;
+ for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
+ cap = rb_entry(p, struct ceph_cap, ci_node);
+ num++;
+
+ /* avoid looping forever */
+ if (mds >= cap->mds ||
+ ((flags & CHECK_CAPS_AUTHONLY) && cap != ci->i_auth_cap))
+ continue;
+
+ /* NOTE: no side-effects allowed, until we take s_mutex */
+
+ cap_used = used;
+ if (ci->i_auth_cap && cap != ci->i_auth_cap)
+ cap_used &= ~ci->i_auth_cap->issued;
+
+ revoking = cap->implemented & ~cap->issued;
+ dout(" mds%d cap %p used %s issued %s implemented %s revoking %s\n",
+ cap->mds, cap, ceph_cap_string(cap->issued),
+ ceph_cap_string(cap_used),
+ ceph_cap_string(cap->implemented),
+ ceph_cap_string(revoking));
+
+ if (cap == ci->i_auth_cap &&
+ (cap->issued & CEPH_CAP_FILE_WR)) {
+ /* request larger max_size from MDS? */
+ if (ci->i_wanted_max_size > ci->i_max_size &&
+ ci->i_wanted_max_size > ci->i_requested_max_size) {
+ dout("requesting new max_size\n");
+ goto ack;
+ }
+
+ /* approaching file_max? */
+ if ((inode->i_size << 1) >= ci->i_max_size &&
+ (ci->i_reported_size << 1) < ci->i_max_size) {
+ dout("i_size approaching max_size\n");
+ goto ack;
+ }
+ }
+ /* flush anything dirty? */
+ if (cap == ci->i_auth_cap && (flags & CHECK_CAPS_FLUSH) &&
+ ci->i_dirty_caps) {
+ dout("flushing dirty caps\n");
+ goto ack;
+ }
+
+ /* completed revocation? going down and there are no caps? */
+ if (revoking && (revoking & cap_used) == 0) {
+ dout("completed revocation of %s\n",
+ ceph_cap_string(cap->implemented & ~cap->issued));
+ goto ack;
+ }
+
+ /* want more caps from mds? */
+ if (want & ~(cap->mds_wanted | cap->issued))
+ goto ack;
+
+ /* things we might delay */
+ if ((cap->issued & ~retain) == 0 &&
+ cap->mds_wanted == want)
+ continue; /* nope, all good */
+
+ if (is_delayed)
+ goto ack;
+
+ /* delay? */
+ if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0 &&
+ time_before(jiffies, ci->i_hold_caps_max)) {
+ dout(" delaying issued %s -> %s, wanted %s -> %s\n",
+ ceph_cap_string(cap->issued),
+ ceph_cap_string(cap->issued & retain),
+ ceph_cap_string(cap->mds_wanted),
+ ceph_cap_string(want));
+ delayed++;
+ continue;
+ }
+
+ack:
+ if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
+ dout(" skipping %p I_NOFLUSH set\n", inode);
+ continue;
+ }
+
+ if (session && session != cap->session) {
+ dout("oops, wrong session %p mutex\n", session);
+ mutex_unlock(&session->s_mutex);
+ session = NULL;
+ }
+ if (!session) {
+ session = cap->session;
+ if (mutex_trylock(&session->s_mutex) == 0) {
+ dout("inverting session/ino locks on %p\n",
+ session);
+ spin_unlock(&ci->i_ceph_lock);
+ if (took_snap_rwsem) {
+ up_read(&mdsc->snap_rwsem);
+ took_snap_rwsem = 0;
+ }
+ mutex_lock(&session->s_mutex);
+ goto retry;
+ }
+ }
+ /* take snap_rwsem after session mutex */
+ if (!took_snap_rwsem) {
+ if (down_read_trylock(&mdsc->snap_rwsem) == 0) {
+ dout("inverting snap/in locks on %p\n",
+ inode);
+ spin_unlock(&ci->i_ceph_lock);
+ down_read(&mdsc->snap_rwsem);
+ took_snap_rwsem = 1;
+ goto retry;
+ }
+ took_snap_rwsem = 1;
+ }
+
+ if (cap == ci->i_auth_cap && ci->i_dirty_caps)
+ flushing = __mark_caps_flushing(inode, session);
+ else
+ flushing = 0;
+
+ mds = cap->mds; /* remember mds, so we don't repeat */
+ sent++;
+
+ /* __send_cap drops i_ceph_lock */
+ delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used,
+ want, retain, flushing, NULL);
+ goto retry; /* retake i_ceph_lock and restart our cap scan. */
+ }
+
+ /*
+ * Reschedule delayed caps release if we delayed anything,
+ * otherwise cancel.
+ */
+ if (delayed && is_delayed)
+ force_requeue = 1; /* __send_cap delayed release; requeue */
+ if (!delayed && !is_delayed)
+ __cap_delay_cancel(mdsc, ci);
+ else if (!is_delayed || force_requeue)
+ __cap_delay_requeue(mdsc, ci);
+
+ spin_unlock(&ci->i_ceph_lock);
+
+ if (queue_invalidate)
+ ceph_queue_invalidate(inode);
+
+ if (session)
+ mutex_unlock(&session->s_mutex);
+ if (took_snap_rwsem)
+ up_read(&mdsc->snap_rwsem);
+}
+
+/*
+ * Try to flush dirty caps back to the auth mds.
+ */
+static int try_flush_caps(struct inode *inode, unsigned *flush_tid)
+{
+ struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int flushing = 0;
+ struct ceph_mds_session *session = NULL;
+
+retry:
+ spin_lock(&ci->i_ceph_lock);
+ if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
+ dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode);
+ goto out;
+ }
+ if (ci->i_dirty_caps && ci->i_auth_cap) {
+ struct ceph_cap *cap = ci->i_auth_cap;
+ int used = __ceph_caps_used(ci);
+ int want = __ceph_caps_wanted(ci);
+ int delayed;
+
+ if (!session || session != cap->session) {
+ spin_unlock(&ci->i_ceph_lock);
+ if (session)
+ mutex_unlock(&session->s_mutex);
+ session = cap->session;
+ mutex_lock(&session->s_mutex);
+ goto retry;
+ }
+ if (cap->session->s_state < CEPH_MDS_SESSION_OPEN)
+ goto out;
+
+ flushing = __mark_caps_flushing(inode, session);
+
+ /* __send_cap drops i_ceph_lock */
+ delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want,
+ cap->issued | cap->implemented, flushing,
+ flush_tid);
+ if (!delayed)
+ goto out_unlocked;
+
+ spin_lock(&ci->i_ceph_lock);
+ __cap_delay_requeue(mdsc, ci);
+ }
+out:
+ spin_unlock(&ci->i_ceph_lock);
+out_unlocked:
+ if (session)
+ mutex_unlock(&session->s_mutex);
+ return flushing;
+}
+
+/*
+ * Return true if we've flushed caps through the given flush_tid.
+ */
+static int caps_are_flushed(struct inode *inode, unsigned tid)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int i, ret = 1;
+
+ spin_lock(&ci->i_ceph_lock);
+ for (i = 0; i < CEPH_CAP_BITS; i++)
+ if ((ci->i_flushing_caps & (1 << i)) &&
+ ci->i_cap_flush_tid[i] <= tid) {
+ /* still flushing this bit */
+ ret = 0;
+ break;
+ }
+ spin_unlock(&ci->i_ceph_lock);
+ return ret;
+}
+
+/*
+ * Wait on any unsafe replies for the given inode. First wait on the
+ * newest request, and make that the upper bound. Then, if there are
+ * more requests, keep waiting on the oldest as long as it is still older
+ * than the original request.
+ */
+static void sync_write_wait(struct inode *inode)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct list_head *head = &ci->i_unsafe_writes;
+ struct ceph_osd_request *req;
+ u64 last_tid;
+
+ spin_lock(&ci->i_unsafe_lock);
+ if (list_empty(head))
+ goto out;
+
+ /* set upper bound as _last_ entry in chain */
+ req = list_entry(head->prev, struct ceph_osd_request,
+ r_unsafe_item);
+ last_tid = req->r_tid;
+
+ do {
+ ceph_osdc_get_request(req);
+ spin_unlock(&ci->i_unsafe_lock);
+ dout("sync_write_wait on tid %llu (until %llu)\n",
+ req->r_tid, last_tid);
+ wait_for_completion(&req->r_safe_completion);
+ spin_lock(&ci->i_unsafe_lock);
+ ceph_osdc_put_request(req);
+
+ /*
+ * from here on look at first entry in chain, since we
+ * only want to wait for anything older than last_tid
+ */
+ if (list_empty(head))
+ break;
+ req = list_entry(head->next, struct ceph_osd_request,
+ r_unsafe_item);
+ } while (req->r_tid < last_tid);
+out:
+ spin_unlock(&ci->i_unsafe_lock);
+}
+
+int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ struct inode *inode = file->f_mapping->host;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ unsigned flush_tid;
+ int ret;
+ int dirty;
+
+ dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
+ sync_write_wait(inode);
+
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret < 0)
+ return ret;
+ mutex_lock(&inode->i_mutex);
+
+ dirty = try_flush_caps(inode, &flush_tid);
+ dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
+
+ /*
+ * only wait on non-file metadata writeback (the mds
+ * can recover size and mtime, so we don't need to
+ * wait for that)
+ */
+ if (!datasync && (dirty & ~CEPH_CAP_ANY_FILE_WR)) {
+ dout("fsync waiting for flush_tid %u\n", flush_tid);
+ ret = wait_event_interruptible(ci->i_cap_wq,
+ caps_are_flushed(inode, flush_tid));
+ }
+
+ dout("fsync %p%s done\n", inode, datasync ? " datasync" : "");
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+}
+
+/*
+ * Flush any dirty caps back to the mds. If we aren't asked to wait,
+ * queue inode for flush but don't do so immediately, because we can
+ * get by with fewer MDS messages if we wait for data writeback to
+ * complete first.
+ */
+int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ unsigned flush_tid;
+ int err = 0;
+ int dirty;
+ int wait = wbc->sync_mode == WB_SYNC_ALL;
+
+ dout("write_inode %p wait=%d\n", inode, wait);
+ if (wait) {
+ dirty = try_flush_caps(inode, &flush_tid);
+ if (dirty)
+ err = wait_event_interruptible(ci->i_cap_wq,
+ caps_are_flushed(inode, flush_tid));
+ } else {
+ struct ceph_mds_client *mdsc =
+ ceph_sb_to_client(inode->i_sb)->mdsc;
+
+ spin_lock(&ci->i_ceph_lock);
+ if (__ceph_caps_dirty(ci))
+ __cap_delay_requeue_front(mdsc, ci);
+ spin_unlock(&ci->i_ceph_lock);
+ }
+ return err;
+}
+
+/*
+ * After a recovering MDS goes active, we need to resend any caps
+ * we were flushing.
+ *
+ * Caller holds session->s_mutex.
+ */
+static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ struct ceph_cap_snap *capsnap;
+
+ dout("kick_flushing_capsnaps mds%d\n", session->s_mds);
+ list_for_each_entry(capsnap, &session->s_cap_snaps_flushing,
+ flushing_item) {
+ struct ceph_inode_info *ci = capsnap->ci;
+ struct inode *inode = &ci->vfs_inode;
+ struct ceph_cap *cap;
+
+ spin_lock(&ci->i_ceph_lock);
+ cap = ci->i_auth_cap;
+ if (cap && cap->session == session) {
+ dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
+ cap, capsnap);
+ __ceph_flush_snaps(ci, &session, 1);
+ } else {
+ pr_err("%p auth cap %p not mds%d ???\n", inode,
+ cap, session->s_mds);
+ }
+ spin_unlock(&ci->i_ceph_lock);
+ }
+}
+
+void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ struct ceph_inode_info *ci;
+
+ kick_flushing_capsnaps(mdsc, session);
+
+ dout("kick_flushing_caps mds%d\n", session->s_mds);
+ list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
+ struct inode *inode = &ci->vfs_inode;
+ struct ceph_cap *cap;
+ int delayed = 0;
+
+ spin_lock(&ci->i_ceph_lock);
+ cap = ci->i_auth_cap;
+ if (cap && cap->session == session) {
+ dout("kick_flushing_caps %p cap %p %s\n", inode,
+ cap, ceph_cap_string(ci->i_flushing_caps));
+ delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
+ __ceph_caps_used(ci),
+ __ceph_caps_wanted(ci),
+ cap->issued | cap->implemented,
+ ci->i_flushing_caps, NULL);
+ if (delayed) {
+ spin_lock(&ci->i_ceph_lock);
+ __cap_delay_requeue(mdsc, ci);
+ spin_unlock(&ci->i_ceph_lock);
+ }
+ } else {
+ pr_err("%p auth cap %p not mds%d ???\n", inode,
+ cap, session->s_mds);
+ spin_unlock(&ci->i_ceph_lock);
+ }
+ }
+}
+
+static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session,
+ struct inode *inode)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_cap *cap;
+ int delayed = 0;
+
+ spin_lock(&ci->i_ceph_lock);
+ cap = ci->i_auth_cap;
+ dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
+ ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
+
+ __ceph_flush_snaps(ci, &session, 1);
+
+ if (ci->i_flushing_caps) {
+ spin_lock(&mdsc->cap_dirty_lock);
+ list_move_tail(&ci->i_flushing_item,
+ &cap->session->s_cap_flushing);
+ spin_unlock(&mdsc->cap_dirty_lock);
+
+ delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
+ __ceph_caps_used(ci),
+ __ceph_caps_wanted(ci),
+ cap->issued | cap->implemented,
+ ci->i_flushing_caps, NULL);
+ if (delayed) {
+ spin_lock(&ci->i_ceph_lock);
+ __cap_delay_requeue(mdsc, ci);
+ spin_unlock(&ci->i_ceph_lock);
+ }
+ } else {
+ spin_unlock(&ci->i_ceph_lock);
+ }
+}
+
+
+/*
+ * Take references to capabilities we hold, so that we don't release
+ * them to the MDS prematurely.
+ *
+ * Protected by i_ceph_lock.
+ */
+static void __take_cap_refs(struct ceph_inode_info *ci, int got)
+{
+ if (got & CEPH_CAP_PIN)
+ ci->i_pin_ref++;
+ if (got & CEPH_CAP_FILE_RD)
+ ci->i_rd_ref++;
+ if (got & CEPH_CAP_FILE_CACHE)
+ ci->i_rdcache_ref++;
+ if (got & CEPH_CAP_FILE_WR)
+ ci->i_wr_ref++;
+ if (got & CEPH_CAP_FILE_BUFFER) {
+ if (ci->i_wb_ref == 0)
+ ihold(&ci->vfs_inode);
+ ci->i_wb_ref++;
+ dout("__take_cap_refs %p wb %d -> %d (?)\n",
+ &ci->vfs_inode, ci->i_wb_ref-1, ci->i_wb_ref);
+ }
+}
+
+/*
+ * Try to grab cap references. Specify those refs we @want, and the
+ * minimal set we @need. Also include the larger offset we are writing
+ * to (when applicable), and check against max_size here as well.
+ * Note that caller is responsible for ensuring max_size increases are
+ * requested from the MDS.
+ */
+static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
+ int *got, loff_t endoff, int *check_max, int *err)
+{
+ struct inode *inode = &ci->vfs_inode;
+ int ret = 0;
+ int have, implemented;
+ int file_wanted;
+
+ dout("get_cap_refs %p need %s want %s\n", inode,
+ ceph_cap_string(need), ceph_cap_string(want));
+ spin_lock(&ci->i_ceph_lock);
+
+ /* make sure file is actually open */
+ file_wanted = __ceph_caps_file_wanted(ci);
+ if ((file_wanted & need) == 0) {
+ dout("try_get_cap_refs need %s file_wanted %s, EBADF\n",
+ ceph_cap_string(need), ceph_cap_string(file_wanted));
+ *err = -EBADF;
+ ret = 1;
+ goto out;
+ }
+
+ /* finish pending truncate */
+ while (ci->i_truncate_pending) {
+ spin_unlock(&ci->i_ceph_lock);
+ __ceph_do_pending_vmtruncate(inode);
+ spin_lock(&ci->i_ceph_lock);
+ }
+
+ have = __ceph_caps_issued(ci, &implemented);
+
+ if (have & need & CEPH_CAP_FILE_WR) {
+ if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) {
+ dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
+ inode, endoff, ci->i_max_size);
+ if (endoff > ci->i_requested_max_size) {
+ *check_max = 1;
+ ret = 1;
+ }
+ goto out;
+ }
+ /*
+ * If a sync write is in progress, we must wait, so that we
+ * can get a final snapshot value for size+mtime.
+ */
+ if (__ceph_have_pending_cap_snap(ci)) {
+ dout("get_cap_refs %p cap_snap_pending\n", inode);
+ goto out;
+ }
+ }
+
+ if ((have & need) == need) {
+ /*
+ * Look at (implemented & ~have & not) so that we keep waiting
+ * on transition from wanted -> needed caps. This is needed
+ * for WRBUFFER|WR -> WR to avoid a new WR sync write from
+ * going before a prior buffered writeback happens.
+ */
+ int not = want & ~(have & need);
+ int revoking = implemented & ~have;
+ dout("get_cap_refs %p have %s but not %s (revoking %s)\n",
+ inode, ceph_cap_string(have), ceph_cap_string(not),
+ ceph_cap_string(revoking));
+ if ((revoking & not) == 0) {
+ *got = need | (have & want);
+ __take_cap_refs(ci, *got);
+ ret = 1;
+ }
+ } else {
+ dout("get_cap_refs %p have %s needed %s\n", inode,
+ ceph_cap_string(have), ceph_cap_string(need));
+ }
+out:
+ spin_unlock(&ci->i_ceph_lock);
+ dout("get_cap_refs %p ret %d got %s\n", inode,
+ ret, ceph_cap_string(*got));
+ return ret;
+}
+
+/*
+ * Check the offset we are writing up to against our current
+ * max_size. If necessary, tell the MDS we want to write to
+ * a larger offset.
+ */
+static void check_max_size(struct inode *inode, loff_t endoff)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int check = 0;
+
+ /* do we need to explicitly request a larger max_size? */
+ spin_lock(&ci->i_ceph_lock);
+ if (endoff >= ci->i_max_size && endoff > ci->i_wanted_max_size) {
+ dout("write %p at large endoff %llu, req max_size\n",
+ inode, endoff);
+ ci->i_wanted_max_size = endoff;
+ }
+ /* duplicate ceph_check_caps()'s logic */
+ if (ci->i_auth_cap &&
+ (ci->i_auth_cap->issued & CEPH_CAP_FILE_WR) &&
+ ci->i_wanted_max_size > ci->i_max_size &&
+ ci->i_wanted_max_size > ci->i_requested_max_size)
+ check = 1;
+ spin_unlock(&ci->i_ceph_lock);
+ if (check)
+ ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
+}
+
+/*
+ * Wait for caps, and take cap references. If we can't get a WR cap
+ * due to a small max_size, make sure we check_max_size (and possibly
+ * ask the mds) so we don't get hung up indefinitely.
+ */
+int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, int *got,
+ loff_t endoff)
+{
+ int check_max, ret, err;
+
+retry:
+ if (endoff > 0)
+ check_max_size(&ci->vfs_inode, endoff);
+ check_max = 0;
+ err = 0;
+ ret = wait_event_interruptible(ci->i_cap_wq,
+ try_get_cap_refs(ci, need, want,
+ got, endoff,
+ &check_max, &err));
+ if (err)
+ ret = err;
+ if (check_max)
+ goto retry;
+ return ret;
+}
+
+/*
+ * Take cap refs. Caller must already know we hold at least one ref
+ * on the caps in question or we don't know this is safe.
+ */
+void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps)
+{
+ spin_lock(&ci->i_ceph_lock);
+ __take_cap_refs(ci, caps);
+ spin_unlock(&ci->i_ceph_lock);
+}
+
+/*
+ * Release cap refs.
+ *
+ * If we released the last ref on any given cap, call ceph_check_caps
+ * to release (or schedule a release).
+ *
+ * If we are releasing a WR cap (from a sync write), finalize any affected
+ * cap_snap, and wake up any waiters.
+ */
+void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
+{
+ struct inode *inode = &ci->vfs_inode;
+ int last = 0, put = 0, flushsnaps = 0, wake = 0;
+ struct ceph_cap_snap *capsnap;
+
+ spin_lock(&ci->i_ceph_lock);
+ if (had & CEPH_CAP_PIN)
+ --ci->i_pin_ref;
+ if (had & CEPH_CAP_FILE_RD)
+ if (--ci->i_rd_ref == 0)
+ last++;
+ if (had & CEPH_CAP_FILE_CACHE)
+ if (--ci->i_rdcache_ref == 0)
+ last++;
+ if (had & CEPH_CAP_FILE_BUFFER) {
+ if (--ci->i_wb_ref == 0) {
+ last++;
+ put++;
+ }
+ dout("put_cap_refs %p wb %d -> %d (?)\n",
+ inode, ci->i_wb_ref+1, ci->i_wb_ref);
+ }
+ if (had & CEPH_CAP_FILE_WR)
+ if (--ci->i_wr_ref == 0) {
+ last++;
+ if (!list_empty(&ci->i_cap_snaps)) {
+ capsnap = list_first_entry(&ci->i_cap_snaps,
+ struct ceph_cap_snap,
+ ci_item);
+ if (capsnap->writing) {
+ capsnap->writing = 0;
+ flushsnaps =
+ __ceph_finish_cap_snap(ci,
+ capsnap);
+ wake = 1;
+ }
+ }
+ }
+ spin_unlock(&ci->i_ceph_lock);
+
+ dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
+ last ? " last" : "", put ? " put" : "");
+
+ if (last && !flushsnaps)
+ ceph_check_caps(ci, 0, NULL);
+ else if (flushsnaps)
+ ceph_flush_snaps(ci);
+ if (wake)
+ wake_up_all(&ci->i_cap_wq);
+ if (put)
+ iput(inode);
+}
+
+/*
+ * Release @nr WRBUFFER refs on dirty pages for the given @snapc snap
+ * context. Adjust per-snap dirty page accounting as appropriate.
+ * Once all dirty data for a cap_snap is flushed, flush snapped file
+ * metadata back to the MDS. If we dropped the last ref, call
+ * ceph_check_caps.
+ */
+void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
+ struct ceph_snap_context *snapc)
+{
+ struct inode *inode = &ci->vfs_inode;
+ int last = 0;
+ int complete_capsnap = 0;
+ int drop_capsnap = 0;
+ int found = 0;
+ struct ceph_cap_snap *capsnap = NULL;
+
+ spin_lock(&ci->i_ceph_lock);
+ ci->i_wrbuffer_ref -= nr;
+ last = !ci->i_wrbuffer_ref;
+
+ if (ci->i_head_snapc == snapc) {
+ ci->i_wrbuffer_ref_head -= nr;
+ if (ci->i_wrbuffer_ref_head == 0 &&
+ ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) {
+ BUG_ON(!ci->i_head_snapc);
+ ceph_put_snap_context(ci->i_head_snapc);
+ ci->i_head_snapc = NULL;
+ }
+ dout("put_wrbuffer_cap_refs on %p head %d/%d -> %d/%d %s\n",
+ inode,
+ ci->i_wrbuffer_ref+nr, ci->i_wrbuffer_ref_head+nr,
+ ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
+ last ? " LAST" : "");
+ } else {
+ list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
+ if (capsnap->context == snapc) {
+ found = 1;
+ break;
+ }
+ }
+ BUG_ON(!found);
+ capsnap->dirty_pages -= nr;
+ if (capsnap->dirty_pages == 0) {
+ complete_capsnap = 1;
+ if (capsnap->dirty == 0)
+ /* cap writeback completed before we created
+ * the cap_snap; no FLUSHSNAP is needed */
+ drop_capsnap = 1;
+ }
+ dout("put_wrbuffer_cap_refs on %p cap_snap %p "
+ " snap %lld %d/%d -> %d/%d %s%s%s\n",
+ inode, capsnap, capsnap->context->seq,
+ ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr,
+ ci->i_wrbuffer_ref, capsnap->dirty_pages,
+ last ? " (wrbuffer last)" : "",
+ complete_capsnap ? " (complete capsnap)" : "",
+ drop_capsnap ? " (drop capsnap)" : "");
+ if (drop_capsnap) {
+ ceph_put_snap_context(capsnap->context);
+ list_del(&capsnap->ci_item);
+ list_del(&capsnap->flushing_item);
+ ceph_put_cap_snap(capsnap);
+ }
+ }
+
+ spin_unlock(&ci->i_ceph_lock);
+
+ if (last) {
+ ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
+ iput(inode);
+ } else if (complete_capsnap) {
+ ceph_flush_snaps(ci);
+ wake_up_all(&ci->i_cap_wq);
+ }
+ if (drop_capsnap)
+ iput(inode);
+}
+
+/*
+ * Invalidate unlinked inode's aliases, so we can drop the inode ASAP.
+ */
+static void invalidate_aliases(struct inode *inode)
+{
+ struct dentry *dn, *prev = NULL;
+
+ dout("invalidate_aliases inode %p\n", inode);
+ d_prune_aliases(inode);
+ /*
+ * For non-directory inode, d_find_alias() only returns
+ * hashed dentry. After calling d_invalidate(), the
+ * dentry becomes unhashed.
+ *
+ * For directory inode, d_find_alias() can return
+ * unhashed dentry. But directory inode should have
+ * one alias at most.
+ */
+ while ((dn = d_find_alias(inode))) {
+ if (dn == prev) {
+ dput(dn);
+ break;
+ }
+ d_invalidate(dn);
+ if (prev)
+ dput(prev);
+ prev = dn;
+ }
+ if (prev)
+ dput(prev);
+}
+
+/*
+ * Handle a cap GRANT message from the MDS. (Note that a GRANT may
+ * actually be a revocation if it specifies a smaller cap set.)
+ *
+ * caller holds s_mutex and i_ceph_lock, we drop both.
+ *
+ * return value:
+ * 0 - ok
+ * 1 - check_caps on auth cap only (writeback)
+ * 2 - check_caps (ack revoke)
+ */
+static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
+ struct ceph_mds_session *session,
+ struct ceph_cap *cap,
+ struct ceph_buffer *xattr_buf)
+ __releases(ci->i_ceph_lock)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int mds = session->s_mds;
+ int seq = le32_to_cpu(grant->seq);
+ int newcaps = le32_to_cpu(grant->caps);
+ int issued, implemented, used, wanted, dirty;
+ u64 size = le64_to_cpu(grant->size);
+ u64 max_size = le64_to_cpu(grant->max_size);
+ struct timespec mtime, atime, ctime;
+ int check_caps = 0;
+ int wake = 0;
+ int writeback = 0;
+ int queue_invalidate = 0;
+ int deleted_inode = 0;
+ int queue_revalidate = 0;
+
+ dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
+ inode, cap, mds, seq, ceph_cap_string(newcaps));
+ dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
+ inode->i_size);
+
+
+ /*
+ * auth mds of the inode changed. we received the cap export message,
+ * but still haven't received the cap import message. handle_cap_export
+ * updated the new auth MDS' cap.
+ *
+ * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing a message
+ * that was sent before the cap import message. So don't remove caps.
+ */
+ if (ceph_seq_cmp(seq, cap->seq) <= 0) {
+ WARN_ON(cap != ci->i_auth_cap);
+ WARN_ON(cap->cap_id != le64_to_cpu(grant->cap_id));
+ seq = cap->seq;
+ newcaps |= cap->issued;
+ }
+
+ /*
+ * If CACHE is being revoked, and we have no dirty buffers,
+ * try to invalidate (once). (If there are dirty buffers, we
+ * will invalidate _after_ writeback.)
+ */
+ if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) &&
+ (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
+ !ci->i_wrbuffer_ref) {
+ if (try_nonblocking_invalidate(inode)) {
+ /* there were locked pages.. invalidate later
+ in a separate thread. */
+ if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
+ queue_invalidate = 1;
+ ci->i_rdcache_revoking = ci->i_rdcache_gen;
+ }
+ }
+
+ ceph_fscache_invalidate(inode);
+ }
+
+ /* side effects now are allowed */
+
+ issued = __ceph_caps_issued(ci, &implemented);
+ issued |= implemented | __ceph_caps_dirty(ci);
+
+ cap->cap_gen = session->s_cap_gen;
+ cap->seq = seq;
+
+ __check_cap_issue(ci, cap, newcaps);
+
+ if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
+ inode->i_mode = le32_to_cpu(grant->mode);
+ inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid));
+ inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid));
+ dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
+ from_kuid(&init_user_ns, inode->i_uid),
+ from_kgid(&init_user_ns, inode->i_gid));
+ }
+
+ if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
+ set_nlink(inode, le32_to_cpu(grant->nlink));
+ if (inode->i_nlink == 0 &&
+ (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
+ deleted_inode = 1;
+ }
+
+ if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) {
+ int len = le32_to_cpu(grant->xattr_len);
+ u64 version = le64_to_cpu(grant->xattr_version);
+
+ if (version > ci->i_xattrs.version) {
+ dout(" got new xattrs v%llu on %p len %d\n",
+ version, inode, len);
+ if (ci->i_xattrs.blob)
+ ceph_buffer_put(ci->i_xattrs.blob);
+ ci->i_xattrs.blob = ceph_buffer_get(xattr_buf);
+ ci->i_xattrs.version = version;
+ ceph_forget_all_cached_acls(inode);
+ }
+ }
+
+ /* Do we need to revalidate our fscache cookie. Don't bother on the
+ * first cache cap as we already validate at cookie creation time. */
+ if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1)
+ queue_revalidate = 1;
+
+ /* size/ctime/mtime/atime? */
+ ceph_fill_file_size(inode, issued,
+ le32_to_cpu(grant->truncate_seq),
+ le64_to_cpu(grant->truncate_size), size);
+ ceph_decode_timespec(&mtime, &grant->mtime);
+ ceph_decode_timespec(&atime, &grant->atime);
+ ceph_decode_timespec(&ctime, &grant->ctime);
+ ceph_fill_file_time(inode, issued,
+ le32_to_cpu(grant->time_warp_seq), &ctime, &mtime,
+ &atime);
+
+
+ /* file layout may have changed */
+ ci->i_layout = grant->layout;
+
+ /* max size increase? */
+ if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
+ dout("max_size %lld -> %llu\n", ci->i_max_size, max_size);
+ ci->i_max_size = max_size;
+ if (max_size >= ci->i_wanted_max_size) {
+ ci->i_wanted_max_size = 0; /* reset */
+ ci->i_requested_max_size = 0;
+ }
+ wake = 1;
+ }
+
+ /* check cap bits */
+ wanted = __ceph_caps_wanted(ci);
+ used = __ceph_caps_used(ci);
+ dirty = __ceph_caps_dirty(ci);
+ dout(" my wanted = %s, used = %s, dirty %s\n",
+ ceph_cap_string(wanted),
+ ceph_cap_string(used),
+ ceph_cap_string(dirty));
+ if (wanted != le32_to_cpu(grant->wanted)) {
+ dout("mds wanted %s -> %s\n",
+ ceph_cap_string(le32_to_cpu(grant->wanted)),
+ ceph_cap_string(wanted));
+ /* imported cap may not have correct mds_wanted */
+ if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT)
+ check_caps = 1;
+ }
+
+ /* revocation, grant, or no-op? */
+ if (cap->issued & ~newcaps) {
+ int revoking = cap->issued & ~newcaps;
+
+ dout("revocation: %s -> %s (revoking %s)\n",
+ ceph_cap_string(cap->issued),
+ ceph_cap_string(newcaps),
+ ceph_cap_string(revoking));
+ if (revoking & used & CEPH_CAP_FILE_BUFFER)
+ writeback = 1; /* initiate writeback; will delay ack */
+ else if (revoking == CEPH_CAP_FILE_CACHE &&
+ (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
+ queue_invalidate)
+ ; /* do nothing yet, invalidation will be queued */
+ else if (cap == ci->i_auth_cap)
+ check_caps = 1; /* check auth cap only */
+ else
+ check_caps = 2; /* check all caps */
+ cap->issued = newcaps;
+ cap->implemented |= newcaps;
+ } else if (cap->issued == newcaps) {
+ dout("caps unchanged: %s -> %s\n",
+ ceph_cap_string(cap->issued), ceph_cap_string(newcaps));
+ } else {
+ dout("grant: %s -> %s\n", ceph_cap_string(cap->issued),
+ ceph_cap_string(newcaps));
+ /* non-auth MDS is revoking the newly grant caps ? */
+ if (cap == ci->i_auth_cap &&
+ __ceph_caps_revoking_other(ci, cap, newcaps))
+ check_caps = 2;
+
+ cap->issued = newcaps;
+ cap->implemented |= newcaps; /* add bits only, to
+ * avoid stepping on a
+ * pending revocation */
+ wake = 1;
+ }
+ BUG_ON(cap->issued & ~cap->implemented);
+
+ spin_unlock(&ci->i_ceph_lock);
+
+ if (writeback)
+ /*
+ * queue inode for writeback: we can't actually call
+ * filemap_write_and_wait, etc. from message handler
+ * context.
+ */
+ ceph_queue_writeback(inode);
+ if (queue_invalidate)
+ ceph_queue_invalidate(inode);
+ if (deleted_inode)
+ invalidate_aliases(inode);
+ if (queue_revalidate)
+ ceph_queue_revalidate(inode);
+ if (wake)
+ wake_up_all(&ci->i_cap_wq);
+
+ if (check_caps == 1)
+ ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
+ session);
+ else if (check_caps == 2)
+ ceph_check_caps(ci, CHECK_CAPS_NODELAY, session);
+ else
+ mutex_unlock(&session->s_mutex);
+}
+
+/*
+ * Handle FLUSH_ACK from MDS, indicating that metadata we sent to the
+ * MDS has been safely committed.
+ */
+static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
+ struct ceph_mds_caps *m,
+ struct ceph_mds_session *session,
+ struct ceph_cap *cap)
+ __releases(ci->i_ceph_lock)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+ unsigned seq = le32_to_cpu(m->seq);
+ int dirty = le32_to_cpu(m->dirty);
+ int cleaned = 0;
+ int drop = 0;
+ int i;
+
+ for (i = 0; i < CEPH_CAP_BITS; i++)
+ if ((dirty & (1 << i)) &&
+ flush_tid == ci->i_cap_flush_tid[i])
+ cleaned |= 1 << i;
+
+ dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s,"
+ " flushing %s -> %s\n",
+ inode, session->s_mds, seq, ceph_cap_string(dirty),
+ ceph_cap_string(cleaned), ceph_cap_string(ci->i_flushing_caps),
+ ceph_cap_string(ci->i_flushing_caps & ~cleaned));
+
+ if (ci->i_flushing_caps == (ci->i_flushing_caps & ~cleaned))
+ goto out;
+
+ ci->i_flushing_caps &= ~cleaned;
+
+ spin_lock(&mdsc->cap_dirty_lock);
+ if (ci->i_flushing_caps == 0) {
+ list_del_init(&ci->i_flushing_item);
+ if (!list_empty(&session->s_cap_flushing))
+ dout(" mds%d still flushing cap on %p\n",
+ session->s_mds,
+ &list_entry(session->s_cap_flushing.next,
+ struct ceph_inode_info,
+ i_flushing_item)->vfs_inode);
+ mdsc->num_cap_flushing--;
+ wake_up_all(&mdsc->cap_flushing_wq);
+ dout(" inode %p now !flushing\n", inode);
+
+ if (ci->i_dirty_caps == 0) {
+ dout(" inode %p now clean\n", inode);
+ BUG_ON(!list_empty(&ci->i_dirty_item));
+ drop = 1;
+ if (ci->i_wrbuffer_ref_head == 0) {
+ BUG_ON(!ci->i_head_snapc);
+ ceph_put_snap_context(ci->i_head_snapc);
+ ci->i_head_snapc = NULL;
+ }
+ } else {
+ BUG_ON(list_empty(&ci->i_dirty_item));
+ }
+ }
+ spin_unlock(&mdsc->cap_dirty_lock);
+ wake_up_all(&ci->i_cap_wq);
+
+out:
+ spin_unlock(&ci->i_ceph_lock);
+ if (drop)
+ iput(inode);
+}
+
+/*
+ * Handle FLUSHSNAP_ACK. MDS has flushed snap data to disk and we can
+ * throw away our cap_snap.
+ *
+ * Caller hold s_mutex.
+ */
+static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
+ struct ceph_mds_caps *m,
+ struct ceph_mds_session *session)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ u64 follows = le64_to_cpu(m->snap_follows);
+ struct ceph_cap_snap *capsnap;
+ int drop = 0;
+
+ dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n",
+ inode, ci, session->s_mds, follows);
+
+ spin_lock(&ci->i_ceph_lock);
+ list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
+ if (capsnap->follows == follows) {
+ if (capsnap->flush_tid != flush_tid) {
+ dout(" cap_snap %p follows %lld tid %lld !="
+ " %lld\n", capsnap, follows,
+ flush_tid, capsnap->flush_tid);
+ break;
+ }
+ WARN_ON(capsnap->dirty_pages || capsnap->writing);
+ dout(" removing %p cap_snap %p follows %lld\n",
+ inode, capsnap, follows);
+ ceph_put_snap_context(capsnap->context);
+ list_del(&capsnap->ci_item);
+ list_del(&capsnap->flushing_item);
+ ceph_put_cap_snap(capsnap);
+ drop = 1;
+ break;
+ } else {
+ dout(" skipping cap_snap %p follows %lld\n",
+ capsnap, capsnap->follows);
+ }
+ }
+ spin_unlock(&ci->i_ceph_lock);
+ if (drop)
+ iput(inode);
+}
+
+/*
+ * Handle TRUNC from MDS, indicating file truncation.
+ *
+ * caller hold s_mutex.
+ */
+static void handle_cap_trunc(struct inode *inode,
+ struct ceph_mds_caps *trunc,
+ struct ceph_mds_session *session)
+ __releases(ci->i_ceph_lock)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int mds = session->s_mds;
+ int seq = le32_to_cpu(trunc->seq);
+ u32 truncate_seq = le32_to_cpu(trunc->truncate_seq);
+ u64 truncate_size = le64_to_cpu(trunc->truncate_size);
+ u64 size = le64_to_cpu(trunc->size);
+ int implemented = 0;
+ int dirty = __ceph_caps_dirty(ci);
+ int issued = __ceph_caps_issued(ceph_inode(inode), &implemented);
+ int queue_trunc = 0;
+
+ issued |= implemented | dirty;
+
+ dout("handle_cap_trunc inode %p mds%d seq %d to %lld seq %d\n",
+ inode, mds, seq, truncate_size, truncate_seq);
+ queue_trunc = ceph_fill_file_size(inode, issued,
+ truncate_seq, truncate_size, size);
+ spin_unlock(&ci->i_ceph_lock);
+
+ if (queue_trunc) {
+ ceph_queue_vmtruncate(inode);
+ ceph_fscache_invalidate(inode);
+ }
+}
+
+/*
+ * Handle EXPORT from MDS. Cap is being migrated _from_ this mds to a
+ * different one. If we are the most recent migration we've seen (as
+ * indicated by mseq), make note of the migrating cap bits for the
+ * duration (until we see the corresponding IMPORT).
+ *
+ * caller holds s_mutex
+ */
+static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
+ struct ceph_mds_cap_peer *ph,
+ struct ceph_mds_session *session)
+{
+ struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+ struct ceph_mds_session *tsession = NULL;
+ struct ceph_cap *cap, *tcap;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ u64 t_cap_id;
+ unsigned mseq = le32_to_cpu(ex->migrate_seq);
+ unsigned t_seq, t_mseq;
+ int target, issued;
+ int mds = session->s_mds;
+
+ if (ph) {
+ t_cap_id = le64_to_cpu(ph->cap_id);
+ t_seq = le32_to_cpu(ph->seq);
+ t_mseq = le32_to_cpu(ph->mseq);
+ target = le32_to_cpu(ph->mds);
+ } else {
+ t_cap_id = t_seq = t_mseq = 0;
+ target = -1;
+ }
+
+ dout("handle_cap_export inode %p ci %p mds%d mseq %d target %d\n",
+ inode, ci, mds, mseq, target);
+retry:
+ spin_lock(&ci->i_ceph_lock);
+ cap = __get_cap_for_mds(ci, mds);
+ if (!cap)
+ goto out_unlock;
+
+ if (target < 0) {
+ __ceph_remove_cap(cap, false);
+ goto out_unlock;
+ }
+
+ /*
+ * now we know we haven't received the cap import message yet
+ * because the exported cap still exist.
+ */
+
+ issued = cap->issued;
+ WARN_ON(issued != cap->implemented);
+
+ tcap = __get_cap_for_mds(ci, target);
+ if (tcap) {
+ /* already have caps from the target */
+ if (tcap->cap_id != t_cap_id ||
+ ceph_seq_cmp(tcap->seq, t_seq) < 0) {
+ dout(" updating import cap %p mds%d\n", tcap, target);
+ tcap->cap_id = t_cap_id;
+ tcap->seq = t_seq - 1;
+ tcap->issue_seq = t_seq - 1;
+ tcap->mseq = t_mseq;
+ tcap->issued |= issued;
+ tcap->implemented |= issued;
+ if (cap == ci->i_auth_cap)
+ ci->i_auth_cap = tcap;
+ if (ci->i_flushing_caps && ci->i_auth_cap == tcap) {
+ spin_lock(&mdsc->cap_dirty_lock);
+ list_move_tail(&ci->i_flushing_item,
+ &tcap->session->s_cap_flushing);
+ spin_unlock(&mdsc->cap_dirty_lock);
+ }
+ }
+ __ceph_remove_cap(cap, false);
+ goto out_unlock;
+ }
+
+ if (tsession) {
+ int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
+ spin_unlock(&ci->i_ceph_lock);
+ /* add placeholder for the export tagert */
+ ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0,
+ t_seq - 1, t_mseq, (u64)-1, flag, NULL);
+ goto retry;
+ }
+
+ spin_unlock(&ci->i_ceph_lock);
+ mutex_unlock(&session->s_mutex);
+
+ /* open target session */
+ tsession = ceph_mdsc_open_export_target_session(mdsc, target);
+ if (!IS_ERR(tsession)) {
+ if (mds > target) {
+ mutex_lock(&session->s_mutex);
+ mutex_lock_nested(&tsession->s_mutex,
+ SINGLE_DEPTH_NESTING);
+ } else {
+ mutex_lock(&tsession->s_mutex);
+ mutex_lock_nested(&session->s_mutex,
+ SINGLE_DEPTH_NESTING);
+ }
+ ceph_add_cap_releases(mdsc, tsession);
+ } else {
+ WARN_ON(1);
+ tsession = NULL;
+ target = -1;
+ }
+ goto retry;
+
+out_unlock:
+ spin_unlock(&ci->i_ceph_lock);
+ mutex_unlock(&session->s_mutex);
+ if (tsession) {
+ mutex_unlock(&tsession->s_mutex);
+ ceph_put_mds_session(tsession);
+ }
+}
+
+/*
+ * Handle cap IMPORT. If there are temp bits from an older EXPORT,
+ * clean them up.
+ *
+ * caller holds s_mutex.
+ */
+static void handle_cap_import(struct ceph_mds_client *mdsc,
+ struct inode *inode, struct ceph_mds_caps *im,
+ struct ceph_mds_cap_peer *ph,
+ struct ceph_mds_session *session,
+ void *snaptrace, int snaptrace_len)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_cap *cap;
+ int mds = session->s_mds;
+ unsigned issued = le32_to_cpu(im->caps);
+ unsigned wanted = le32_to_cpu(im->wanted);
+ unsigned seq = le32_to_cpu(im->seq);
+ unsigned mseq = le32_to_cpu(im->migrate_seq);
+ u64 realmino = le64_to_cpu(im->realm);
+ u64 cap_id = le64_to_cpu(im->cap_id);
+ u64 p_cap_id;
+ int peer;
+
+ if (ph) {
+ p_cap_id = le64_to_cpu(ph->cap_id);
+ peer = le32_to_cpu(ph->mds);
+ } else {
+ p_cap_id = 0;
+ peer = -1;
+ }
+
+ dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n",
+ inode, ci, mds, mseq, peer);
+
+ spin_lock(&ci->i_ceph_lock);
+ cap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL;
+ if (cap && cap->cap_id == p_cap_id) {
+ dout(" remove export cap %p mds%d flags %d\n",
+ cap, peer, ph->flags);
+ if ((ph->flags & CEPH_CAP_FLAG_AUTH) &&
+ (cap->seq != le32_to_cpu(ph->seq) ||
+ cap->mseq != le32_to_cpu(ph->mseq))) {
+ pr_err("handle_cap_import: mismatched seq/mseq: "
+ "ino (%llx.%llx) mds%d seq %d mseq %d "
+ "importer mds%d has peer seq %d mseq %d\n",
+ ceph_vinop(inode), peer, cap->seq,
+ cap->mseq, mds, le32_to_cpu(ph->seq),
+ le32_to_cpu(ph->mseq));
+ }
+ ci->i_cap_exporting_issued = cap->issued;
+ __ceph_remove_cap(cap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
+ }
+
+ /* make sure we re-request max_size, if necessary */
+ ci->i_wanted_max_size = 0;
+ ci->i_requested_max_size = 0;
+ spin_unlock(&ci->i_ceph_lock);
+
+ down_write(&mdsc->snap_rwsem);
+ ceph_update_snap_trace(mdsc, snaptrace, snaptrace+snaptrace_len,
+ false);
+ downgrade_write(&mdsc->snap_rwsem);
+ ceph_add_cap(inode, session, cap_id, -1,
+ issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH,
+ NULL /* no caps context */);
+ kick_flushing_inode_caps(mdsc, session, inode);
+ up_read(&mdsc->snap_rwsem);
+
+}
+
+/*
+ * Handle a caps message from the MDS.
+ *
+ * Identify the appropriate session, inode, and call the right handler
+ * based on the cap op.
+ */
+void ceph_handle_caps(struct ceph_mds_session *session,
+ struct ceph_msg *msg)
+{
+ struct ceph_mds_client *mdsc = session->s_mdsc;
+ struct super_block *sb = mdsc->fsc->sb;
+ struct inode *inode;
+ struct ceph_inode_info *ci;
+ struct ceph_cap *cap;
+ struct ceph_mds_caps *h;
+ struct ceph_mds_cap_peer *peer = NULL;
+ int mds = session->s_mds;
+ int op;
+ u32 seq, mseq;
+ struct ceph_vino vino;
+ u64 cap_id;
+ u64 size, max_size;
+ u64 tid;
+ void *snaptrace;
+ size_t snaptrace_len;
+ void *flock;
+ void *end;
+ u32 flock_len;
+
+ dout("handle_caps from mds%d\n", mds);
+
+ /* decode */
+ end = msg->front.iov_base + msg->front.iov_len;
+ tid = le64_to_cpu(msg->hdr.tid);
+ if (msg->front.iov_len < sizeof(*h))
+ goto bad;
+ h = msg->front.iov_base;
+ op = le32_to_cpu(h->op);
+ vino.ino = le64_to_cpu(h->ino);
+ vino.snap = CEPH_NOSNAP;
+ cap_id = le64_to_cpu(h->cap_id);
+ seq = le32_to_cpu(h->seq);
+ mseq = le32_to_cpu(h->migrate_seq);
+ size = le64_to_cpu(h->size);
+ max_size = le64_to_cpu(h->max_size);
+
+ snaptrace = h + 1;
+ snaptrace_len = le32_to_cpu(h->snap_trace_len);
+
+ if (le16_to_cpu(msg->hdr.version) >= 2) {
+ void *p = snaptrace + snaptrace_len;
+ ceph_decode_32_safe(&p, end, flock_len, bad);
+ if (p + flock_len > end)
+ goto bad;
+ flock = p;
+ } else {
+ flock = NULL;
+ flock_len = 0;
+ }
+
+ if (le16_to_cpu(msg->hdr.version) >= 3) {
+ if (op == CEPH_CAP_OP_IMPORT) {
+ void *p = flock + flock_len;
+ if (p + sizeof(*peer) > end)
+ goto bad;
+ peer = p;
+ } else if (op == CEPH_CAP_OP_EXPORT) {
+ /* recorded in unused fields */
+ peer = (void *)&h->size;
+ }
+ }
+
+ mutex_lock(&session->s_mutex);
+ session->s_seq++;
+ dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
+ (unsigned)seq);
+
+ if (op == CEPH_CAP_OP_IMPORT)
+ ceph_add_cap_releases(mdsc, session);
+
+ /* lookup ino */
+ inode = ceph_find_inode(sb, vino);
+ ci = ceph_inode(inode);
+ dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
+ vino.snap, inode);
+ if (!inode) {
+ dout(" i don't have ino %llx\n", vino.ino);
+
+ if (op == CEPH_CAP_OP_IMPORT) {
+ spin_lock(&session->s_cap_lock);
+ __queue_cap_release(session, vino.ino, cap_id,
+ mseq, seq);
+ spin_unlock(&session->s_cap_lock);
+ }
+ goto flush_cap_releases;
+ }
+
+ /* these will work even if we don't have a cap yet */
+ switch (op) {
+ case CEPH_CAP_OP_FLUSHSNAP_ACK:
+ handle_cap_flushsnap_ack(inode, tid, h, session);
+ goto done;
+
+ case CEPH_CAP_OP_EXPORT:
+ handle_cap_export(inode, h, peer, session);
+ goto done_unlocked;
+
+ case CEPH_CAP_OP_IMPORT:
+ handle_cap_import(mdsc, inode, h, peer, session,
+ snaptrace, snaptrace_len);
+ }
+
+ /* the rest require a cap */
+ spin_lock(&ci->i_ceph_lock);
+ cap = __get_cap_for_mds(ceph_inode(inode), mds);
+ if (!cap) {
+ dout(" no cap on %p ino %llx.%llx from mds%d\n",
+ inode, ceph_ino(inode), ceph_snap(inode), mds);
+ spin_unlock(&ci->i_ceph_lock);
+ goto flush_cap_releases;
+ }
+
+ /* note that each of these drops i_ceph_lock for us */
+ switch (op) {
+ case CEPH_CAP_OP_REVOKE:
+ case CEPH_CAP_OP_GRANT:
+ case CEPH_CAP_OP_IMPORT:
+ handle_cap_grant(inode, h, session, cap, msg->middle);
+ goto done_unlocked;
+
+ case CEPH_CAP_OP_FLUSH_ACK:
+ handle_cap_flush_ack(inode, tid, h, session, cap);
+ break;
+
+ case CEPH_CAP_OP_TRUNC:
+ handle_cap_trunc(inode, h, session);
+ break;
+
+ default:
+ spin_unlock(&ci->i_ceph_lock);
+ pr_err("ceph_handle_caps: unknown cap op %d %s\n", op,
+ ceph_cap_op_name(op));
+ }
+
+ goto done;
+
+flush_cap_releases:
+ /*
+ * send any full release message to try to move things
+ * along for the mds (who clearly thinks we still have this
+ * cap).
+ */
+ ceph_add_cap_releases(mdsc, session);
+ ceph_send_cap_releases(mdsc, session);
+
+done:
+ mutex_unlock(&session->s_mutex);
+done_unlocked:
+ if (inode)
+ iput(inode);
+ return;
+
+bad:
+ pr_err("ceph_handle_caps: corrupt message\n");
+ ceph_msg_dump(msg);
+ return;
+}
+
+/*
+ * Delayed work handler to process end of delayed cap release LRU list.
+ */
+void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
+{
+ struct ceph_inode_info *ci;
+ int flags = CHECK_CAPS_NODELAY;
+
+ dout("check_delayed_caps\n");
+ while (1) {
+ spin_lock(&mdsc->cap_delay_lock);
+ if (list_empty(&mdsc->cap_delay_list))
+ break;
+ ci = list_first_entry(&mdsc->cap_delay_list,
+ struct ceph_inode_info,
+ i_cap_delay_list);
+ if ((ci->i_ceph_flags & CEPH_I_FLUSH) == 0 &&
+ time_before(jiffies, ci->i_hold_caps_max))
+ break;
+ list_del_init(&ci->i_cap_delay_list);
+ spin_unlock(&mdsc->cap_delay_lock);
+ dout("check_delayed_caps on %p\n", &ci->vfs_inode);
+ ceph_check_caps(ci, flags, NULL);
+ }
+ spin_unlock(&mdsc->cap_delay_lock);
+}
+
+/*
+ * Flush all dirty caps to the mds
+ */
+void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
+{
+ struct ceph_inode_info *ci;
+ struct inode *inode;
+
+ dout("flush_dirty_caps\n");
+ spin_lock(&mdsc->cap_dirty_lock);
+ while (!list_empty(&mdsc->cap_dirty)) {
+ ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info,
+ i_dirty_item);
+ inode = &ci->vfs_inode;
+ ihold(inode);
+ dout("flush_dirty_caps %p\n", inode);
+ spin_unlock(&mdsc->cap_dirty_lock);
+ ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, NULL);
+ iput(inode);
+ spin_lock(&mdsc->cap_dirty_lock);
+ }
+ spin_unlock(&mdsc->cap_dirty_lock);
+ dout("flush_dirty_caps done\n");
+}
+
+/*
+ * Drop open file reference. If we were the last open file,
+ * we may need to release capabilities to the MDS (or schedule
+ * their delayed release).
+ */
+void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
+{
+ struct inode *inode = &ci->vfs_inode;
+ int last = 0;
+
+ spin_lock(&ci->i_ceph_lock);
+ dout("put_fmode %p fmode %d %d -> %d\n", inode, fmode,
+ ci->i_nr_by_mode[fmode], ci->i_nr_by_mode[fmode]-1);
+ BUG_ON(ci->i_nr_by_mode[fmode] == 0);
+ if (--ci->i_nr_by_mode[fmode] == 0)
+ last++;
+ spin_unlock(&ci->i_ceph_lock);
+
+ if (last && ci->i_vino.snap == CEPH_NOSNAP)
+ ceph_check_caps(ci, 0, NULL);
+}
+
+/*
+ * Helpers for embedding cap and dentry lease releases into mds
+ * requests.
+ *
+ * @force is used by dentry_release (below) to force inclusion of a
+ * record for the directory inode, even when there aren't any caps to
+ * drop.
+ */
+int ceph_encode_inode_release(void **p, struct inode *inode,
+ int mds, int drop, int unless, int force)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_cap *cap;
+ struct ceph_mds_request_release *rel = *p;
+ int used, dirty;
+ int ret = 0;
+
+ spin_lock(&ci->i_ceph_lock);
+ used = __ceph_caps_used(ci);
+ dirty = __ceph_caps_dirty(ci);
+
+ dout("encode_inode_release %p mds%d used|dirty %s drop %s unless %s\n",
+ inode, mds, ceph_cap_string(used|dirty), ceph_cap_string(drop),
+ ceph_cap_string(unless));
+
+ /* only drop unused, clean caps */
+ drop &= ~(used | dirty);
+
+ cap = __get_cap_for_mds(ci, mds);
+ if (cap && __cap_is_valid(cap)) {
+ if (force ||
+ ((cap->issued & drop) &&
+ (cap->issued & unless) == 0)) {
+ if ((cap->issued & drop) &&
+ (cap->issued & unless) == 0) {
+ int wanted = __ceph_caps_wanted(ci);
+ if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0)
+ wanted |= cap->mds_wanted;
+ dout("encode_inode_release %p cap %p "
+ "%s -> %s, wanted %s -> %s\n", inode, cap,
+ ceph_cap_string(cap->issued),
+ ceph_cap_string(cap->issued & ~drop),
+ ceph_cap_string(cap->mds_wanted),
+ ceph_cap_string(wanted));
+
+ cap->issued &= ~drop;
+ cap->implemented &= ~drop;
+ cap->mds_wanted = wanted;
+ } else {
+ dout("encode_inode_release %p cap %p %s"
+ " (force)\n", inode, cap,
+ ceph_cap_string(cap->issued));
+ }
+
+ rel->ino = cpu_to_le64(ceph_ino(inode));
+ rel->cap_id = cpu_to_le64(cap->cap_id);
+ rel->seq = cpu_to_le32(cap->seq);
+ rel->issue_seq = cpu_to_le32(cap->issue_seq),
+ rel->mseq = cpu_to_le32(cap->mseq);
+ rel->caps = cpu_to_le32(cap->implemented);
+ rel->wanted = cpu_to_le32(cap->mds_wanted);
+ rel->dname_len = 0;
+ rel->dname_seq = 0;
+ *p += sizeof(*rel);
+ ret = 1;
+ } else {
+ dout("encode_inode_release %p cap %p %s\n",
+ inode, cap, ceph_cap_string(cap->issued));
+ }
+ }
+ spin_unlock(&ci->i_ceph_lock);
+ return ret;
+}
+
+int ceph_encode_dentry_release(void **p, struct dentry *dentry,
+ int mds, int drop, int unless)
+{
+ struct inode *dir = dentry->d_parent->d_inode;
+ struct ceph_mds_request_release *rel = *p;
+ struct ceph_dentry_info *di = ceph_dentry(dentry);
+ int force = 0;
+ int ret;
+
+ /*
+ * force an record for the directory caps if we have a dentry lease.
+ * this is racy (can't take i_ceph_lock and d_lock together), but it
+ * doesn't have to be perfect; the mds will revoke anything we don't
+ * release.
+ */
+ spin_lock(&dentry->d_lock);
+ if (di->lease_session && di->lease_session->s_mds == mds)
+ force = 1;
+ spin_unlock(&dentry->d_lock);
+
+ ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
+
+ spin_lock(&dentry->d_lock);
+ if (ret && di->lease_session && di->lease_session->s_mds == mds) {
+ dout("encode_dentry_release %p mds%d seq %d\n",
+ dentry, mds, (int)di->lease_seq);
+ rel->dname_len = cpu_to_le32(dentry->d_name.len);
+ memcpy(*p, dentry->d_name.name, dentry->d_name.len);
+ *p += dentry->d_name.len;
+ rel->dname_seq = cpu_to_le32(di->lease_seq);
+ __ceph_mdsc_drop_dentry_lease(dentry);
+ }
+ spin_unlock(&dentry->d_lock);
+ return ret;
+}
diff --git a/ceph/ceph_frag.c b/ceph/ceph_frag.c
new file mode 100644
index 0000000..bdce8b1
--- /dev/null
+++ b/ceph/ceph_frag.c
@@ -0,0 +1,22 @@
+/*
+ * Ceph 'frag' type
+ */
+#include <linux/module.h>
+#include <linux/ceph/types.h>
+
+int ceph_frag_compare(__u32 a, __u32 b)
+{
+ unsigned va = ceph_frag_value(a);
+ unsigned vb = ceph_frag_value(b);
+ if (va < vb)
+ return -1;
+ if (va > vb)
+ return 1;
+ va = ceph_frag_bits(a);
+ vb = ceph_frag_bits(b);
+ if (va < vb)
+ return -1;
+ if (va > vb)
+ return 1;
+ return 0;
+}
diff --git a/ceph/debugfs.c b/ceph/debugfs.c
new file mode 100644
index 0000000..16b54aa
--- /dev/null
+++ b/ceph/debugfs.c
@@ -0,0 +1,277 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/debugfs.h>
+
+#include "super.h"
+
+#ifdef CONFIG_DEBUG_FS
+
+#include "mds_client.h"
+
+static int mdsmap_show(struct seq_file *s, void *p)
+{
+ int i;
+ struct ceph_fs_client *fsc = s->private;
+
+ if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL)
+ return 0;
+ seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch);
+ seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root);
+ seq_printf(s, "session_timeout %d\n",
+ fsc->mdsc->mdsmap->m_session_timeout);
+ seq_printf(s, "session_autoclose %d\n",
+ fsc->mdsc->mdsmap->m_session_autoclose);
+ for (i = 0; i < fsc->mdsc->mdsmap->m_max_mds; i++) {
+ struct ceph_entity_addr *addr =
+ &fsc->mdsc->mdsmap->m_info[i].addr;
+ int state = fsc->mdsc->mdsmap->m_info[i].state;
+
+ seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
+ ceph_pr_addr(&addr->in_addr),
+ ceph_mds_state_name(state));
+ }
+ return 0;
+}
+
+/*
+ * mdsc debugfs
+ */
+static int mdsc_show(struct seq_file *s, void *p)
+{
+ struct ceph_fs_client *fsc = s->private;
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct ceph_mds_request *req;
+ struct rb_node *rp;
+ int pathlen;
+ u64 pathbase;
+ char *path;
+
+ mutex_lock(&mdsc->mutex);
+ for (rp = rb_first(&mdsc->request_tree); rp; rp = rb_next(rp)) {
+ req = rb_entry(rp, struct ceph_mds_request, r_node);
+
+ if (req->r_request && req->r_session)
+ seq_printf(s, "%lld\tmds%d\t", req->r_tid,
+ req->r_session->s_mds);
+ else if (!req->r_request)
+ seq_printf(s, "%lld\t(no request)\t", req->r_tid);
+ else
+ seq_printf(s, "%lld\t(no session)\t", req->r_tid);
+
+ seq_printf(s, "%s", ceph_mds_op_name(req->r_op));
+
+ if (req->r_got_unsafe)
+ seq_printf(s, "\t(unsafe)");
+ else
+ seq_printf(s, "\t");
+
+ if (req->r_inode) {
+ seq_printf(s, " #%llx", ceph_ino(req->r_inode));
+ } else if (req->r_dentry) {
+ path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
+ &pathbase, 0);
+ if (IS_ERR(path))
+ path = NULL;
+ spin_lock(&req->r_dentry->d_lock);
+ seq_printf(s, " #%llx/%.*s (%s)",
+ ceph_ino(req->r_dentry->d_parent->d_inode),
+ req->r_dentry->d_name.len,
+ req->r_dentry->d_name.name,
+ path ? path : "");
+ spin_unlock(&req->r_dentry->d_lock);
+ kfree(path);
+ } else if (req->r_path1) {
+ seq_printf(s, " #%llx/%s", req->r_ino1.ino,
+ req->r_path1);
+ } else {
+ seq_printf(s, " #%llx", req->r_ino1.ino);
+ }
+
+ if (req->r_old_dentry) {
+ path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen,
+ &pathbase, 0);
+ if (IS_ERR(path))
+ path = NULL;
+ spin_lock(&req->r_old_dentry->d_lock);
+ seq_printf(s, " #%llx/%.*s (%s)",
+ req->r_old_dentry_dir ?
+ ceph_ino(req->r_old_dentry_dir) : 0,
+ req->r_old_dentry->d_name.len,
+ req->r_old_dentry->d_name.name,
+ path ? path : "");
+ spin_unlock(&req->r_old_dentry->d_lock);
+ kfree(path);
+ } else if (req->r_path2) {
+ if (req->r_ino2.ino)
+ seq_printf(s, " #%llx/%s", req->r_ino2.ino,
+ req->r_path2);
+ else
+ seq_printf(s, " %s", req->r_path2);
+ }
+
+ seq_printf(s, "\n");
+ }
+ mutex_unlock(&mdsc->mutex);
+
+ return 0;
+}
+
+static int caps_show(struct seq_file *s, void *p)
+{
+ struct ceph_fs_client *fsc = s->private;
+ int total, avail, used, reserved, min;
+
+ ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min);
+ seq_printf(s, "total\t\t%d\n"
+ "avail\t\t%d\n"
+ "used\t\t%d\n"
+ "reserved\t%d\n"
+ "min\t%d\n",
+ total, avail, used, reserved, min);
+ return 0;
+}
+
+static int dentry_lru_show(struct seq_file *s, void *ptr)
+{
+ struct ceph_fs_client *fsc = s->private;
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct ceph_dentry_info *di;
+
+ spin_lock(&mdsc->dentry_lru_lock);
+ list_for_each_entry(di, &mdsc->dentry_lru, lru) {
+ struct dentry *dentry = di->dentry;
+ seq_printf(s, "%p %p\t%.*s\n",
+ di, dentry, dentry->d_name.len, dentry->d_name.name);
+ }
+ spin_unlock(&mdsc->dentry_lru_lock);
+
+ return 0;
+}
+
+CEPH_DEFINE_SHOW_FUNC(mdsmap_show)
+CEPH_DEFINE_SHOW_FUNC(mdsc_show)
+CEPH_DEFINE_SHOW_FUNC(caps_show)
+CEPH_DEFINE_SHOW_FUNC(dentry_lru_show)
+
+
+/*
+ * debugfs
+ */
+static int congestion_kb_set(void *data, u64 val)
+{
+ struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
+
+ fsc->mount_options->congestion_kb = (int)val;
+ return 0;
+}
+
+static int congestion_kb_get(void *data, u64 *val)
+{
+ struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
+
+ *val = (u64)fsc->mount_options->congestion_kb;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get,
+ congestion_kb_set, "%llu\n");
+
+
+void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
+{
+ dout("ceph_fs_debugfs_cleanup\n");
+ debugfs_remove(fsc->debugfs_bdi);
+ debugfs_remove(fsc->debugfs_congestion_kb);
+ debugfs_remove(fsc->debugfs_mdsmap);
+ debugfs_remove(fsc->debugfs_caps);
+ debugfs_remove(fsc->debugfs_mdsc);
+ debugfs_remove(fsc->debugfs_dentry_lru);
+}
+
+int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
+{
+ char name[100];
+ int err = -ENOMEM;
+
+ dout("ceph_fs_debugfs_init\n");
+ BUG_ON(!fsc->client->debugfs_dir);
+ fsc->debugfs_congestion_kb =
+ debugfs_create_file("writeback_congestion_kb",
+ 0600,
+ fsc->client->debugfs_dir,
+ fsc,
+ &congestion_kb_fops);
+ if (!fsc->debugfs_congestion_kb)
+ goto out;
+
+ snprintf(name, sizeof(name), "../../bdi/%s",
+ dev_name(fsc->backing_dev_info.dev));
+ fsc->debugfs_bdi =
+ debugfs_create_symlink("bdi",
+ fsc->client->debugfs_dir,
+ name);
+ if (!fsc->debugfs_bdi)
+ goto out;
+
+ fsc->debugfs_mdsmap = debugfs_create_file("mdsmap",
+ 0600,
+ fsc->client->debugfs_dir,
+ fsc,
+ &mdsmap_show_fops);
+ if (!fsc->debugfs_mdsmap)
+ goto out;
+
+ fsc->debugfs_mdsc = debugfs_create_file("mdsc",
+ 0600,
+ fsc->client->debugfs_dir,
+ fsc,
+ &mdsc_show_fops);
+ if (!fsc->debugfs_mdsc)
+ goto out;
+
+ fsc->debugfs_caps = debugfs_create_file("caps",
+ 0400,
+ fsc->client->debugfs_dir,
+ fsc,
+ &caps_show_fops);
+ if (!fsc->debugfs_caps)
+ goto out;
+
+ fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
+ 0600,
+ fsc->client->debugfs_dir,
+ fsc,
+ &dentry_lru_show_fops);
+ if (!fsc->debugfs_dentry_lru)
+ goto out;
+
+ return 0;
+
+out:
+ ceph_fs_debugfs_cleanup(fsc);
+ return err;
+}
+
+
+#else /* CONFIG_DEBUG_FS */
+
+int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
+{
+ return 0;
+}
+
+void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/ceph/dir.c b/ceph/dir.c
new file mode 100644
index 0000000..c29d6ae
--- /dev/null
+++ b/ceph/dir.c
@@ -0,0 +1,1349 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/spinlock.h>
+#include <linux/fs_struct.h>
+#include <linux/namei.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+#include "super.h"
+#include "mds_client.h"
+
+/*
+ * Directory operations: readdir, lookup, create, link, unlink,
+ * rename, etc.
+ */
+
+/*
+ * Ceph MDS operations are specified in terms of a base ino and
+ * relative path. Thus, the client can specify an operation on a
+ * specific inode (e.g., a getattr due to fstat(2)), or as a path
+ * relative to, say, the root directory.
+ *
+ * Normally, we limit ourselves to strict inode ops (no path component)
+ * or dentry operations (a single path component relative to an ino). The
+ * exception to this is open_root_dentry(), which will open the mount
+ * point by name.
+ */
+
+const struct inode_operations ceph_dir_iops;
+const struct file_operations ceph_dir_fops;
+const struct dentry_operations ceph_dentry_ops;
+
+/*
+ * Initialize ceph dentry state.
+ */
+int ceph_init_dentry(struct dentry *dentry)
+{
+ struct ceph_dentry_info *di;
+
+ if (dentry->d_fsdata)
+ return 0;
+
+ di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
+ if (!di)
+ return -ENOMEM; /* oh well */
+
+ spin_lock(&dentry->d_lock);
+ if (dentry->d_fsdata) {
+ /* lost a race */
+ kmem_cache_free(ceph_dentry_cachep, di);
+ goto out_unlock;
+ }
+
+ if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
+ d_set_d_op(dentry, &ceph_dentry_ops);
+ else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
+ d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
+ else
+ d_set_d_op(dentry, &ceph_snap_dentry_ops);
+
+ di->dentry = dentry;
+ di->lease_session = NULL;
+ dentry->d_time = jiffies;
+ /* avoid reordering d_fsdata setup so that the check above is safe */
+ smp_mb();
+ dentry->d_fsdata = di;
+ ceph_dentry_lru_add(dentry);
+out_unlock:
+ spin_unlock(&dentry->d_lock);
+ return 0;
+}
+
+struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry)
+{
+ struct inode *inode = NULL;
+
+ if (!dentry)
+ return NULL;
+
+ spin_lock(&dentry->d_lock);
+ if (!IS_ROOT(dentry)) {
+ inode = dentry->d_parent->d_inode;
+ ihold(inode);
+ }
+ spin_unlock(&dentry->d_lock);
+ return inode;
+}
+
+
+/*
+ * for readdir, we encode the directory frag and offset within that
+ * frag into f_pos.
+ */
+static unsigned fpos_frag(loff_t p)
+{
+ return p >> 32;
+}
+static unsigned fpos_off(loff_t p)
+{
+ return p & 0xffffffff;
+}
+
+static int fpos_cmp(loff_t l, loff_t r)
+{
+ int v = ceph_frag_compare(fpos_frag(l), fpos_frag(r));
+ if (v)
+ return v;
+ return (int)(fpos_off(l) - fpos_off(r));
+}
+
+/*
+ * When possible, we try to satisfy a readdir by peeking at the
+ * dcache. We make this work by carefully ordering dentries on
+ * d_u.d_child when we initially get results back from the MDS, and
+ * falling back to a "normal" sync readdir if any dentries in the dir
+ * are dropped.
+ *
+ * Complete dir indicates that we have all dentries in the dir. It is
+ * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
+ * the MDS if/when the directory is modified).
+ */
+static int __dcache_readdir(struct file *file, struct dir_context *ctx,
+ u32 shared_gen)
+{
+ struct ceph_file_info *fi = file->private_data;
+ struct dentry *parent = file->f_dentry;
+ struct inode *dir = parent->d_inode;
+ struct list_head *p;
+ struct dentry *dentry, *last;
+ struct ceph_dentry_info *di;
+ int err = 0;
+
+ /* claim ref on last dentry we returned */
+ last = fi->dentry;
+ fi->dentry = NULL;
+
+ dout("__dcache_readdir %p v%u at %llu (last %p)\n",
+ dir, shared_gen, ctx->pos, last);
+
+ spin_lock(&parent->d_lock);
+
+ /* start at beginning? */
+ if (ctx->pos == 2 || last == NULL ||
+ fpos_cmp(ctx->pos, ceph_dentry(last)->offset) < 0) {
+ if (list_empty(&parent->d_subdirs))
+ goto out_unlock;
+ p = parent->d_subdirs.prev;
+ dout(" initial p %p/%p\n", p->prev, p->next);
+ } else {
+ p = last->d_u.d_child.prev;
+ }
+
+more:
+ dentry = list_entry(p, struct dentry, d_u.d_child);
+ di = ceph_dentry(dentry);
+ while (1) {
+ dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next,
+ d_unhashed(dentry) ? "!hashed" : "hashed",
+ parent->d_subdirs.prev, parent->d_subdirs.next);
+ if (p == &parent->d_subdirs) {
+ fi->flags |= CEPH_F_ATEND;
+ goto out_unlock;
+ }
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+ if (di->lease_shared_gen == shared_gen &&
+ !d_unhashed(dentry) && dentry->d_inode &&
+ ceph_snap(dentry->d_inode) != CEPH_SNAPDIR &&
+ ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
+ fpos_cmp(ctx->pos, di->offset) <= 0)
+ break;
+ dout(" skipping %p %.*s at %llu (%llu)%s%s\n", dentry,
+ dentry->d_name.len, dentry->d_name.name, di->offset,
+ ctx->pos, d_unhashed(dentry) ? " unhashed" : "",
+ !dentry->d_inode ? " null" : "");
+ spin_unlock(&dentry->d_lock);
+ p = p->prev;
+ dentry = list_entry(p, struct dentry, d_u.d_child);
+ di = ceph_dentry(dentry);
+ }
+
+ dget_dlock(dentry);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&parent->d_lock);
+
+ /* make sure a dentry wasn't dropped while we didn't have parent lock */
+ if (!ceph_dir_is_complete(dir)) {
+ dout(" lost dir complete on %p; falling back to mds\n", dir);
+ dput(dentry);
+ err = -EAGAIN;
+ goto out;
+ }
+
+ dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, ctx->pos,
+ dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
+ if (!dir_emit(ctx, dentry->d_name.name,
+ dentry->d_name.len,
+ ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino),
+ dentry->d_inode->i_mode >> 12)) {
+ if (last) {
+ /* remember our position */
+ fi->dentry = last;
+ fi->next_offset = fpos_off(di->offset);
+ }
+ dput(dentry);
+ return 0;
+ }
+
+ ctx->pos = di->offset + 1;
+
+ if (last)
+ dput(last);
+ last = dentry;
+
+ spin_lock(&parent->d_lock);
+ p = p->prev; /* advance to next dentry */
+ goto more;
+
+out_unlock:
+ spin_unlock(&parent->d_lock);
+out:
+ if (last)
+ dput(last);
+ return err;
+}
+
+/*
+ * make note of the last dentry we read, so we can
+ * continue at the same lexicographical point,
+ * regardless of what dir changes take place on the
+ * server.
+ */
+static int note_last_dentry(struct ceph_file_info *fi, const char *name,
+ int len)
+{
+ kfree(fi->last_name);
+ fi->last_name = kmalloc(len+1, GFP_NOFS);
+ if (!fi->last_name)
+ return -ENOMEM;
+ memcpy(fi->last_name, name, len);
+ fi->last_name[len] = 0;
+ dout("note_last_dentry '%s'\n", fi->last_name);
+ return 0;
+}
+
+static int ceph_readdir(struct file *file, struct dir_context *ctx)
+{
+ struct ceph_file_info *fi = file->private_data;
+ struct inode *inode = file_inode(file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ unsigned frag = fpos_frag(ctx->pos);
+ int off = fpos_off(ctx->pos);
+ int err;
+ u32 ftype;
+ struct ceph_mds_reply_info_parsed *rinfo;
+
+ dout("readdir %p file %p frag %u off %u\n", inode, file, frag, off);
+ if (fi->flags & CEPH_F_ATEND)
+ return 0;
+
+ /* always start with . and .. */
+ if (ctx->pos == 0) {
+ /* note dir version at start of readdir so we can tell
+ * if any dentries get dropped */
+ fi->dir_release_count = atomic_read(&ci->i_release_count);
+
+ dout("readdir off 0 -> '.'\n");
+ if (!dir_emit(ctx, ".", 1,
+ ceph_translate_ino(inode->i_sb, inode->i_ino),
+ inode->i_mode >> 12))
+ return 0;
+ ctx->pos = 1;
+ off = 1;
+ }
+ if (ctx->pos == 1) {
+ ino_t ino = parent_ino(file->f_dentry);
+ dout("readdir off 1 -> '..'\n");
+ if (!dir_emit(ctx, "..", 2,
+ ceph_translate_ino(inode->i_sb, ino),
+ inode->i_mode >> 12))
+ return 0;
+ ctx->pos = 2;
+ off = 2;
+ }
+
+ /* can we use the dcache? */
+ spin_lock(&ci->i_ceph_lock);
+ if ((ctx->pos == 2 || fi->dentry) &&
+ !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
+ ceph_snap(inode) != CEPH_SNAPDIR &&
+ __ceph_dir_is_complete(ci) &&
+ __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
+ u32 shared_gen = ci->i_shared_gen;
+ spin_unlock(&ci->i_ceph_lock);
+ err = __dcache_readdir(file, ctx, shared_gen);
+ if (err != -EAGAIN)
+ return err;
+ frag = fpos_frag(ctx->pos);
+ off = fpos_off(ctx->pos);
+ } else {
+ spin_unlock(&ci->i_ceph_lock);
+ }
+ if (fi->dentry) {
+ err = note_last_dentry(fi, fi->dentry->d_name.name,
+ fi->dentry->d_name.len);
+ if (err)
+ return err;
+ dput(fi->dentry);
+ fi->dentry = NULL;
+ }
+
+ /* proceed with a normal readdir */
+
+more:
+ /* do we have the correct frag content buffered? */
+ if (fi->frag != frag || fi->last_readdir == NULL) {
+ struct ceph_mds_request *req;
+ int op = ceph_snap(inode) == CEPH_SNAPDIR ?
+ CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
+
+ /* discard old result, if any */
+ if (fi->last_readdir) {
+ ceph_mdsc_put_request(fi->last_readdir);
+ fi->last_readdir = NULL;
+ }
+
+ dout("readdir fetching %llx.%llx frag %x offset '%s'\n",
+ ceph_vinop(inode), frag, fi->last_name);
+ req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ err = ceph_alloc_readdir_reply_buffer(req, inode);
+ if (err) {
+ ceph_mdsc_put_request(req);
+ return err;
+ }
+ req->r_inode = inode;
+ ihold(inode);
+ req->r_dentry = dget(file->f_dentry);
+ /* hints to request -> mds selection code */
+ req->r_direct_mode = USE_AUTH_MDS;
+ req->r_direct_hash = ceph_frag_value(frag);
+ req->r_direct_is_hash = true;
+ req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
+ req->r_readdir_offset = fi->next_offset;
+ req->r_args.readdir.frag = cpu_to_le32(frag);
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+ if (err < 0) {
+ ceph_mdsc_put_request(req);
+ return err;
+ }
+ dout("readdir got and parsed readdir result=%d"
+ " on frag %x, end=%d, complete=%d\n", err, frag,
+ (int)req->r_reply_info.dir_end,
+ (int)req->r_reply_info.dir_complete);
+
+ if (!req->r_did_prepopulate) {
+ dout("readdir !did_prepopulate");
+ /* preclude from marking dir complete */
+ fi->dir_release_count--;
+ }
+
+ /* note next offset and last dentry name */
+ rinfo = &req->r_reply_info;
+ if (le32_to_cpu(rinfo->dir_dir->frag) != frag) {
+ frag = le32_to_cpu(rinfo->dir_dir->frag);
+ if (ceph_frag_is_leftmost(frag))
+ fi->next_offset = 2;
+ else
+ fi->next_offset = 0;
+ off = fi->next_offset;
+ }
+ fi->frag = frag;
+ fi->offset = fi->next_offset;
+ fi->last_readdir = req;
+
+ if (req->r_reply_info.dir_end) {
+ kfree(fi->last_name);
+ fi->last_name = NULL;
+ if (ceph_frag_is_rightmost(frag))
+ fi->next_offset = 2;
+ else
+ fi->next_offset = 0;
+ } else {
+ err = note_last_dentry(fi,
+ rinfo->dir_dname[rinfo->dir_nr-1],
+ rinfo->dir_dname_len[rinfo->dir_nr-1]);
+ if (err)
+ return err;
+ fi->next_offset += rinfo->dir_nr;
+ }
+ }
+
+ rinfo = &fi->last_readdir->r_reply_info;
+ dout("readdir frag %x num %d off %d chunkoff %d\n", frag,
+ rinfo->dir_nr, off, fi->offset);
+
+ ctx->pos = ceph_make_fpos(frag, off);
+ while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) {
+ struct ceph_mds_reply_inode *in =
+ rinfo->dir_in[off - fi->offset].in;
+ struct ceph_vino vino;
+ ino_t ino;
+
+ dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n",
+ off, off - fi->offset, rinfo->dir_nr, ctx->pos,
+ rinfo->dir_dname_len[off - fi->offset],
+ rinfo->dir_dname[off - fi->offset], in);
+ BUG_ON(!in);
+ ftype = le32_to_cpu(in->mode) >> 12;
+ vino.ino = le64_to_cpu(in->ino);
+ vino.snap = le64_to_cpu(in->snapid);
+ ino = ceph_vino_to_ino(vino);
+ if (!dir_emit(ctx,
+ rinfo->dir_dname[off - fi->offset],
+ rinfo->dir_dname_len[off - fi->offset],
+ ceph_translate_ino(inode->i_sb, ino), ftype)) {
+ dout("filldir stopping us...\n");
+ return 0;
+ }
+ off++;
+ ctx->pos++;
+ }
+
+ if (fi->last_name) {
+ ceph_mdsc_put_request(fi->last_readdir);
+ fi->last_readdir = NULL;
+ goto more;
+ }
+
+ /* more frags? */
+ if (!ceph_frag_is_rightmost(frag)) {
+ frag = ceph_frag_next(frag);
+ off = 0;
+ ctx->pos = ceph_make_fpos(frag, off);
+ dout("readdir next frag is %x\n", frag);
+ goto more;
+ }
+ fi->flags |= CEPH_F_ATEND;
+
+ /*
+ * if dir_release_count still matches the dir, no dentries
+ * were released during the whole readdir, and we should have
+ * the complete dir contents in our cache.
+ */
+ spin_lock(&ci->i_ceph_lock);
+ if (atomic_read(&ci->i_release_count) == fi->dir_release_count) {
+ dout(" marking %p complete\n", inode);
+ __ceph_dir_set_complete(ci, fi->dir_release_count);
+ }
+ spin_unlock(&ci->i_ceph_lock);
+
+ dout("readdir %p file %p done.\n", inode, file);
+ return 0;
+}
+
+static void reset_readdir(struct ceph_file_info *fi, unsigned frag)
+{
+ if (fi->last_readdir) {
+ ceph_mdsc_put_request(fi->last_readdir);
+ fi->last_readdir = NULL;
+ }
+ kfree(fi->last_name);
+ fi->last_name = NULL;
+ if (ceph_frag_is_leftmost(frag))
+ fi->next_offset = 2; /* compensate for . and .. */
+ else
+ fi->next_offset = 0;
+ if (fi->dentry) {
+ dput(fi->dentry);
+ fi->dentry = NULL;
+ }
+ fi->flags &= ~CEPH_F_ATEND;
+}
+
+static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct ceph_file_info *fi = file->private_data;
+ struct inode *inode = file->f_mapping->host;
+ loff_t old_offset = ceph_make_fpos(fi->frag, fi->next_offset);
+ loff_t retval;
+
+ mutex_lock(&inode->i_mutex);
+ retval = -EINVAL;
+ switch (whence) {
+ case SEEK_END:
+ offset += inode->i_size + 2; /* FIXME */
+ break;
+ case SEEK_CUR:
+ offset += file->f_pos;
+ case SEEK_SET:
+ break;
+ default:
+ goto out;
+ }
+
+ if (offset >= 0) {
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ file->f_version = 0;
+ fi->flags &= ~CEPH_F_ATEND;
+ }
+ retval = offset;
+
+ /*
+ * discard buffered readdir content on seekdir(0), or
+ * seek to new frag, or seek prior to current chunk.
+ */
+ if (offset == 0 ||
+ fpos_frag(offset) != fi->frag ||
+ fpos_off(offset) < fi->offset) {
+ dout("dir_llseek dropping %p content\n", file);
+ reset_readdir(fi, fpos_frag(offset));
+ }
+
+ /* bump dir_release_count if we did a forward seek */
+ if (fpos_cmp(offset, old_offset) > 0)
+ fi->dir_release_count--;
+ }
+out:
+ mutex_unlock(&inode->i_mutex);
+ return retval;
+}
+
+/*
+ * Handle lookups for the hidden .snap directory.
+ */
+int ceph_handle_snapdir(struct ceph_mds_request *req,
+ struct dentry *dentry, int err)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+ struct inode *parent = dentry->d_parent->d_inode; /* we hold i_mutex */
+
+ /* .snap dir? */
+ if (err == -ENOENT &&
+ ceph_snap(parent) == CEPH_NOSNAP &&
+ strcmp(dentry->d_name.name,
+ fsc->mount_options->snapdir_name) == 0) {
+ struct inode *inode = ceph_get_snapdir(parent);
+ dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n",
+ dentry, dentry->d_name.len, dentry->d_name.name, inode);
+ BUG_ON(!d_unhashed(dentry));
+ d_add(dentry, inode);
+ err = 0;
+ }
+ return err;
+}
+
+/*
+ * Figure out final result of a lookup/open request.
+ *
+ * Mainly, make sure we return the final req->r_dentry (if it already
+ * existed) in place of the original VFS-provided dentry when they
+ * differ.
+ *
+ * Gracefully handle the case where the MDS replies with -ENOENT and
+ * no trace (which it may do, at its discretion, e.g., if it doesn't
+ * care to issue a lease on the negative dentry).
+ */
+struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
+ struct dentry *dentry, int err)
+{
+ if (err == -ENOENT) {
+ /* no trace? */
+ err = 0;
+ if (!req->r_reply_info.head->is_dentry) {
+ dout("ENOENT and no trace, dentry %p inode %p\n",
+ dentry, dentry->d_inode);
+ if (dentry->d_inode) {
+ d_drop(dentry);
+ err = -ENOENT;
+ } else {
+ d_add(dentry, NULL);
+ }
+ }
+ }
+ if (err)
+ dentry = ERR_PTR(err);
+ else if (dentry != req->r_dentry)
+ dentry = dget(req->r_dentry); /* we got spliced */
+ else
+ dentry = NULL;
+ return dentry;
+}
+
+static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry)
+{
+ return ceph_ino(inode) == CEPH_INO_ROOT &&
+ strncmp(dentry->d_name.name, ".ceph", 5) == 0;
+}
+
+/*
+ * Look up a single dir entry. If there is a lookup intent, inform
+ * the MDS so that it gets our 'caps wanted' value in a single op.
+ */
+static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct ceph_mds_request *req;
+ int op;
+ int err;
+
+ dout("lookup %p dentry %p '%.*s'\n",
+ dir, dentry, dentry->d_name.len, dentry->d_name.name);
+
+ if (dentry->d_name.len > NAME_MAX)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ err = ceph_init_dentry(dentry);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ /* can we conclude ENOENT locally? */
+ if (dentry->d_inode == NULL) {
+ struct ceph_inode_info *ci = ceph_inode(dir);
+ struct ceph_dentry_info *di = ceph_dentry(dentry);
+
+ spin_lock(&ci->i_ceph_lock);
+ dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
+ if (strncmp(dentry->d_name.name,
+ fsc->mount_options->snapdir_name,
+ dentry->d_name.len) &&
+ !is_root_ceph_dentry(dir, dentry) &&
+ __ceph_dir_is_complete(ci) &&
+ (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
+ spin_unlock(&ci->i_ceph_lock);
+ dout(" dir %p complete, -ENOENT\n", dir);
+ d_add(dentry, NULL);
+ di->lease_shared_gen = ci->i_shared_gen;
+ return NULL;
+ }
+ spin_unlock(&ci->i_ceph_lock);
+ }
+
+ op = ceph_snap(dir) == CEPH_SNAPDIR ?
+ CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
+ req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
+ if (IS_ERR(req))
+ return ERR_CAST(req);
+ req->r_dentry = dget(dentry);
+ req->r_num_caps = 2;
+ /* we only need inode linkage */
+ req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
+ req->r_locked_dir = dir;
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+ err = ceph_handle_snapdir(req, dentry, err);
+ dentry = ceph_finish_lookup(req, dentry, err);
+ ceph_mdsc_put_request(req); /* will dput(dentry) */
+ dout("lookup result=%p\n", dentry);
+ return dentry;
+}
+
+/*
+ * If we do a create but get no trace back from the MDS, follow up with
+ * a lookup (the VFS expects us to link up the provided dentry).
+ */
+int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
+{
+ struct dentry *result = ceph_lookup(dir, dentry, 0);
+
+ if (result && !IS_ERR(result)) {
+ /*
+ * We created the item, then did a lookup, and found
+ * it was already linked to another inode we already
+ * had in our cache (and thus got spliced). Link our
+ * dentry to that inode, but don't hash it, just in
+ * case the VFS wants to dereference it.
+ */
+ BUG_ON(!result->d_inode);
+ d_instantiate(dentry, result->d_inode);
+ return 0;
+ }
+ return PTR_ERR(result);
+}
+
+static int ceph_mknod(struct inode *dir, struct dentry *dentry,
+ umode_t mode, dev_t rdev)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct ceph_mds_request *req;
+ int err;
+
+ if (ceph_snap(dir) != CEPH_NOSNAP)
+ return -EROFS;
+
+ dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n",
+ dir, dentry, mode, rdev);
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS);
+ if (IS_ERR(req)) {
+ d_drop(dentry);
+ return PTR_ERR(req);
+ }
+ req->r_dentry = dget(dentry);
+ req->r_num_caps = 2;
+ req->r_locked_dir = dir;
+ req->r_args.mknod.mode = cpu_to_le32(mode);
+ req->r_args.mknod.rdev = cpu_to_le32(rdev);
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+ req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
+ err = ceph_mdsc_do_request(mdsc, dir, req);
+ if (!err && !req->r_reply_info.head->is_dentry)
+ err = ceph_handle_notrace_create(dir, dentry);
+ ceph_mdsc_put_request(req);
+
+ if (!err)
+ ceph_init_acl(dentry, dentry->d_inode, dir);
+ else
+ d_drop(dentry);
+ return err;
+}
+
+static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+ bool excl)
+{
+ return ceph_mknod(dir, dentry, mode, 0);
+}
+
+static int ceph_symlink(struct inode *dir, struct dentry *dentry,
+ const char *dest)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct ceph_mds_request *req;
+ int err;
+
+ if (ceph_snap(dir) != CEPH_NOSNAP)
+ return -EROFS;
+
+ dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest);
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS);
+ if (IS_ERR(req)) {
+ d_drop(dentry);
+ return PTR_ERR(req);
+ }
+ req->r_dentry = dget(dentry);
+ req->r_num_caps = 2;
+ req->r_path2 = kstrdup(dest, GFP_NOFS);
+ req->r_locked_dir = dir;
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+ req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
+ err = ceph_mdsc_do_request(mdsc, dir, req);
+ if (!err && !req->r_reply_info.head->is_dentry)
+ err = ceph_handle_notrace_create(dir, dentry);
+ ceph_mdsc_put_request(req);
+ if (!err)
+ ceph_init_acl(dentry, dentry->d_inode, dir);
+ else
+ d_drop(dentry);
+ return err;
+}
+
+static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct ceph_mds_request *req;
+ int err = -EROFS;
+ int op;
+
+ if (ceph_snap(dir) == CEPH_SNAPDIR) {
+ /* mkdir .snap/foo is a MKSNAP */
+ op = CEPH_MDS_OP_MKSNAP;
+ dout("mksnap dir %p snap '%.*s' dn %p\n", dir,
+ dentry->d_name.len, dentry->d_name.name, dentry);
+ } else if (ceph_snap(dir) == CEPH_NOSNAP) {
+ dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode);
+ op = CEPH_MDS_OP_MKDIR;
+ } else {
+ goto out;
+ }
+ req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto out;
+ }
+
+ req->r_dentry = dget(dentry);
+ req->r_num_caps = 2;
+ req->r_locked_dir = dir;
+ req->r_args.mkdir.mode = cpu_to_le32(mode);
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+ req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
+ err = ceph_mdsc_do_request(mdsc, dir, req);
+ if (!err && !req->r_reply_info.head->is_dentry)
+ err = ceph_handle_notrace_create(dir, dentry);
+ ceph_mdsc_put_request(req);
+out:
+ if (!err)
+ ceph_init_acl(dentry, dentry->d_inode, dir);
+ else
+ d_drop(dentry);
+ return err;
+}
+
+static int ceph_link(struct dentry *old_dentry, struct inode *dir,
+ struct dentry *dentry)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct ceph_mds_request *req;
+ int err;
+
+ if (ceph_snap(dir) != CEPH_NOSNAP)
+ return -EROFS;
+
+ dout("link in dir %p old_dentry %p dentry %p\n", dir,
+ old_dentry, dentry);
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS);
+ if (IS_ERR(req)) {
+ d_drop(dentry);
+ return PTR_ERR(req);
+ }
+ req->r_dentry = dget(dentry);
+ req->r_num_caps = 2;
+ req->r_old_dentry = dget(old_dentry);
+ req->r_locked_dir = dir;
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+ req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
+ /* release LINK_SHARED on source inode (mds will lock it) */
+ req->r_old_inode_drop = CEPH_CAP_LINK_SHARED;
+ err = ceph_mdsc_do_request(mdsc, dir, req);
+ if (err) {
+ d_drop(dentry);
+ } else if (!req->r_reply_info.head->is_dentry) {
+ ihold(old_dentry->d_inode);
+ d_instantiate(dentry, old_dentry->d_inode);
+ }
+ ceph_mdsc_put_request(req);
+ return err;
+}
+
+/*
+ * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps. If it
+ * looks like the link count will hit 0, drop any other caps (other
+ * than PIN) we don't specifically want (due to the file still being
+ * open).
+ */
+static int drop_caps_for_unlink(struct inode *inode)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
+
+ spin_lock(&ci->i_ceph_lock);
+ if (inode->i_nlink == 1) {
+ drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
+ ci->i_ceph_flags |= CEPH_I_NODELAY;
+ }
+ spin_unlock(&ci->i_ceph_lock);
+ return drop;
+}
+
+/*
+ * rmdir and unlink are differ only by the metadata op code
+ */
+static int ceph_unlink(struct inode *dir, struct dentry *dentry)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct inode *inode = dentry->d_inode;
+ struct ceph_mds_request *req;
+ int err = -EROFS;
+ int op;
+
+ if (ceph_snap(dir) == CEPH_SNAPDIR) {
+ /* rmdir .snap/foo is RMSNAP */
+ dout("rmsnap dir %p '%.*s' dn %p\n", dir, dentry->d_name.len,
+ dentry->d_name.name, dentry);
+ op = CEPH_MDS_OP_RMSNAP;
+ } else if (ceph_snap(dir) == CEPH_NOSNAP) {
+ dout("unlink/rmdir dir %p dn %p inode %p\n",
+ dir, dentry, inode);
+ op = S_ISDIR(dentry->d_inode->i_mode) ?
+ CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK;
+ } else
+ goto out;
+ req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto out;
+ }
+ req->r_dentry = dget(dentry);
+ req->r_num_caps = 2;
+ req->r_locked_dir = dir;
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+ req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
+ req->r_inode_drop = drop_caps_for_unlink(inode);
+ err = ceph_mdsc_do_request(mdsc, dir, req);
+ if (!err && !req->r_reply_info.head->is_dentry)
+ d_delete(dentry);
+ ceph_mdsc_put_request(req);
+out:
+ return err;
+}
+
+static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct ceph_mds_request *req;
+ int err;
+
+ if (ceph_snap(old_dir) != ceph_snap(new_dir))
+ return -EXDEV;
+ if (ceph_snap(old_dir) != CEPH_NOSNAP ||
+ ceph_snap(new_dir) != CEPH_NOSNAP)
+ return -EROFS;
+ dout("rename dir %p dentry %p to dir %p dentry %p\n",
+ old_dir, old_dentry, new_dir, new_dentry);
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ ihold(old_dir);
+ req->r_dentry = dget(new_dentry);
+ req->r_num_caps = 2;
+ req->r_old_dentry = dget(old_dentry);
+ req->r_old_dentry_dir = old_dir;
+ req->r_locked_dir = new_dir;
+ req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
+ req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+ req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
+ /* release LINK_RDCACHE on source inode (mds will lock it) */
+ req->r_old_inode_drop = CEPH_CAP_LINK_SHARED;
+ if (new_dentry->d_inode)
+ req->r_inode_drop = drop_caps_for_unlink(new_dentry->d_inode);
+ err = ceph_mdsc_do_request(mdsc, old_dir, req);
+ if (!err && !req->r_reply_info.head->is_dentry) {
+ /*
+ * Normally d_move() is done by fill_trace (called by
+ * do_request, above). If there is no trace, we need
+ * to do it here.
+ */
+
+ d_move(old_dentry, new_dentry);
+
+ /* ensure target dentry is invalidated, despite
+ rehashing bug in vfs_rename_dir */
+ ceph_invalidate_dentry_lease(new_dentry);
+
+ /* d_move screws up sibling dentries' offsets */
+ ceph_dir_clear_complete(old_dir);
+ ceph_dir_clear_complete(new_dir);
+
+ }
+ ceph_mdsc_put_request(req);
+ return err;
+}
+
+/*
+ * Ensure a dentry lease will no longer revalidate.
+ */
+void ceph_invalidate_dentry_lease(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ dentry->d_time = jiffies;
+ ceph_dentry(dentry)->lease_shared_gen = 0;
+ spin_unlock(&dentry->d_lock);
+}
+
+/*
+ * Check if dentry lease is valid. If not, delete the lease. Try to
+ * renew if the least is more than half up.
+ */
+static int dentry_lease_is_valid(struct dentry *dentry)
+{
+ struct ceph_dentry_info *di;
+ struct ceph_mds_session *s;
+ int valid = 0;
+ u32 gen;
+ unsigned long ttl;
+ struct ceph_mds_session *session = NULL;
+ struct inode *dir = NULL;
+ u32 seq = 0;
+
+ spin_lock(&dentry->d_lock);
+ di = ceph_dentry(dentry);
+ if (di->lease_session) {
+ s = di->lease_session;
+ spin_lock(&s->s_gen_ttl_lock);
+ gen = s->s_cap_gen;
+ ttl = s->s_cap_ttl;
+ spin_unlock(&s->s_gen_ttl_lock);
+
+ if (di->lease_gen == gen &&
+ time_before(jiffies, dentry->d_time) &&
+ time_before(jiffies, ttl)) {
+ valid = 1;
+ if (di->lease_renew_after &&
+ time_after(jiffies, di->lease_renew_after)) {
+ /* we should renew */
+ dir = dentry->d_parent->d_inode;
+ session = ceph_get_mds_session(s);
+ seq = di->lease_seq;
+ di->lease_renew_after = 0;
+ di->lease_renew_from = jiffies;
+ }
+ }
+ }
+ spin_unlock(&dentry->d_lock);
+
+ if (session) {
+ ceph_mdsc_lease_send_msg(session, dir, dentry,
+ CEPH_MDS_LEASE_RENEW, seq);
+ ceph_put_mds_session(session);
+ }
+ dout("dentry_lease_is_valid - dentry %p = %d\n", dentry, valid);
+ return valid;
+}
+
+/*
+ * Check if directory-wide content lease/cap is valid.
+ */
+static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
+{
+ struct ceph_inode_info *ci = ceph_inode(dir);
+ struct ceph_dentry_info *di = ceph_dentry(dentry);
+ int valid = 0;
+
+ spin_lock(&ci->i_ceph_lock);
+ if (ci->i_shared_gen == di->lease_shared_gen)
+ valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
+ spin_unlock(&ci->i_ceph_lock);
+ dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n",
+ dir, (unsigned)ci->i_shared_gen, dentry,
+ (unsigned)di->lease_shared_gen, valid);
+ return valid;
+}
+
+/*
+ * Check if cached dentry can be trusted.
+ */
+static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ int valid = 0;
+ struct inode *dir;
+
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
+ dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
+ ceph_dentry(dentry)->offset);
+
+ dir = ceph_get_dentry_parent_inode(dentry);
+
+ /* always trust cached snapped dentries, snapdir dentry */
+ if (ceph_snap(dir) != CEPH_NOSNAP) {
+ dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry,
+ dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
+ valid = 1;
+ } else if (dentry->d_inode &&
+ ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) {
+ valid = 1;
+ } else if (dentry_lease_is_valid(dentry) ||
+ dir_lease_is_valid(dir, dentry)) {
+ if (dentry->d_inode)
+ valid = ceph_is_any_caps(dentry->d_inode);
+ else
+ valid = 1;
+ }
+
+ dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid");
+ if (valid) {
+ ceph_dentry_lru_touch(dentry);
+ } else {
+ ceph_dir_clear_complete(dir);
+ d_drop(dentry);
+ }
+ iput(dir);
+ return valid;
+}
+
+/*
+ * Release our ceph_dentry_info.
+ */
+static void ceph_d_release(struct dentry *dentry)
+{
+ struct ceph_dentry_info *di = ceph_dentry(dentry);
+
+ dout("d_release %p\n", dentry);
+ ceph_dentry_lru_del(dentry);
+ if (di->lease_session)
+ ceph_put_mds_session(di->lease_session);
+ kmem_cache_free(ceph_dentry_cachep, di);
+ dentry->d_fsdata = NULL;
+}
+
+static int ceph_snapdir_d_revalidate(struct dentry *dentry,
+ unsigned int flags)
+{
+ /*
+ * Eventually, we'll want to revalidate snapped metadata
+ * too... probably...
+ */
+ return 1;
+}
+
+/*
+ * When the VFS prunes a dentry from the cache, we need to clear the
+ * complete flag on the parent directory.
+ *
+ * Called under dentry->d_lock.
+ */
+static void ceph_d_prune(struct dentry *dentry)
+{
+ dout("ceph_d_prune %p\n", dentry);
+
+ /* do we have a valid parent? */
+ if (IS_ROOT(dentry))
+ return;
+
+ /* if we are not hashed, we don't affect dir's completeness */
+ if (d_unhashed(dentry))
+ return;
+
+ /*
+ * we hold d_lock, so d_parent is stable, and d_fsdata is never
+ * cleared until d_release
+ */
+ ceph_dir_clear_complete(dentry->d_parent->d_inode);
+}
+
+/*
+ * read() on a dir. This weird interface hack only works if mounted
+ * with '-o dirstat'.
+ */
+static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
+ loff_t *ppos)
+{
+ struct ceph_file_info *cf = file->private_data;
+ struct inode *inode = file_inode(file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int left;
+ const int bufsize = 1024;
+
+ if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
+ return -EISDIR;
+
+ if (!cf->dir_info) {
+ cf->dir_info = kmalloc(bufsize, GFP_NOFS);
+ if (!cf->dir_info)
+ return -ENOMEM;
+ cf->dir_info_len =
+ snprintf(cf->dir_info, bufsize,
+ "entries: %20lld\n"
+ " files: %20lld\n"
+ " subdirs: %20lld\n"
+ "rentries: %20lld\n"
+ " rfiles: %20lld\n"
+ " rsubdirs: %20lld\n"
+ "rbytes: %20lld\n"
+ "rctime: %10ld.%09ld\n",
+ ci->i_files + ci->i_subdirs,
+ ci->i_files,
+ ci->i_subdirs,
+ ci->i_rfiles + ci->i_rsubdirs,
+ ci->i_rfiles,
+ ci->i_rsubdirs,
+ ci->i_rbytes,
+ (long)ci->i_rctime.tv_sec,
+ (long)ci->i_rctime.tv_nsec);
+ }
+
+ if (*ppos >= cf->dir_info_len)
+ return 0;
+ size = min_t(unsigned, size, cf->dir_info_len-*ppos);
+ left = copy_to_user(buf, cf->dir_info + *ppos, size);
+ if (left == size)
+ return -EFAULT;
+ *ppos += (size - left);
+ return size - left;
+}
+
+/*
+ * an fsync() on a dir will wait for any uncommitted directory
+ * operations to commit.
+ */
+static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
+{
+ struct inode *inode = file_inode(file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct list_head *head = &ci->i_unsafe_dirops;
+ struct ceph_mds_request *req;
+ u64 last_tid;
+ int ret = 0;
+
+ dout("dir_fsync %p\n", inode);
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+ mutex_lock(&inode->i_mutex);
+
+ spin_lock(&ci->i_unsafe_lock);
+ if (list_empty(head))
+ goto out;
+
+ req = list_entry(head->prev,
+ struct ceph_mds_request, r_unsafe_dir_item);
+ last_tid = req->r_tid;
+
+ do {
+ ceph_mdsc_get_request(req);
+ spin_unlock(&ci->i_unsafe_lock);
+
+ dout("dir_fsync %p wait on tid %llu (until %llu)\n",
+ inode, req->r_tid, last_tid);
+ if (req->r_timeout) {
+ ret = wait_for_completion_timeout(
+ &req->r_safe_completion, req->r_timeout);
+ if (ret > 0)
+ ret = 0;
+ else if (ret == 0)
+ ret = -EIO; /* timed out */
+ } else {
+ wait_for_completion(&req->r_safe_completion);
+ }
+ ceph_mdsc_put_request(req);
+
+ spin_lock(&ci->i_unsafe_lock);
+ if (ret || list_empty(head))
+ break;
+ req = list_entry(head->next,
+ struct ceph_mds_request, r_unsafe_dir_item);
+ } while (req->r_tid < last_tid);
+out:
+ spin_unlock(&ci->i_unsafe_lock);
+ mutex_unlock(&inode->i_mutex);
+
+ return ret;
+}
+
+/*
+ * We maintain a private dentry LRU.
+ *
+ * FIXME: this needs to be changed to a per-mds lru to be useful.
+ */
+void ceph_dentry_lru_add(struct dentry *dn)
+{
+ struct ceph_dentry_info *di = ceph_dentry(dn);
+ struct ceph_mds_client *mdsc;
+
+ dout("dentry_lru_add %p %p '%.*s'\n", di, dn,
+ dn->d_name.len, dn->d_name.name);
+ mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
+ spin_lock(&mdsc->dentry_lru_lock);
+ list_add_tail(&di->lru, &mdsc->dentry_lru);
+ mdsc->num_dentry++;
+ spin_unlock(&mdsc->dentry_lru_lock);
+}
+
+void ceph_dentry_lru_touch(struct dentry *dn)
+{
+ struct ceph_dentry_info *di = ceph_dentry(dn);
+ struct ceph_mds_client *mdsc;
+
+ dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn,
+ dn->d_name.len, dn->d_name.name, di->offset);
+ mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
+ spin_lock(&mdsc->dentry_lru_lock);
+ list_move_tail(&di->lru, &mdsc->dentry_lru);
+ spin_unlock(&mdsc->dentry_lru_lock);
+}
+
+void ceph_dentry_lru_del(struct dentry *dn)
+{
+ struct ceph_dentry_info *di = ceph_dentry(dn);
+ struct ceph_mds_client *mdsc;
+
+ dout("dentry_lru_del %p %p '%.*s'\n", di, dn,
+ dn->d_name.len, dn->d_name.name);
+ mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
+ spin_lock(&mdsc->dentry_lru_lock);
+ list_del_init(&di->lru);
+ mdsc->num_dentry--;
+ spin_unlock(&mdsc->dentry_lru_lock);
+}
+
+/*
+ * Return name hash for a given dentry. This is dependent on
+ * the parent directory's hash function.
+ */
+unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
+{
+ struct ceph_inode_info *dci = ceph_inode(dir);
+
+ switch (dci->i_dir_layout.dl_dir_hash) {
+ case 0: /* for backward compat */
+ case CEPH_STR_HASH_LINUX:
+ return dn->d_name.hash;
+
+ default:
+ return ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
+ dn->d_name.name, dn->d_name.len);
+ }
+}
+
+const struct file_operations ceph_dir_fops = {
+ .read = ceph_read_dir,
+ .iterate = ceph_readdir,
+ .llseek = ceph_dir_llseek,
+ .open = ceph_open,
+ .release = ceph_release,
+ .unlocked_ioctl = ceph_ioctl,
+ .fsync = ceph_dir_fsync,
+};
+
+const struct inode_operations ceph_dir_iops = {
+ .lookup = ceph_lookup,
+ .permission = ceph_permission,
+ .getattr = ceph_getattr,
+ .setattr = ceph_setattr,
+ .setxattr = ceph_setxattr,
+ .getxattr = ceph_getxattr,
+ .listxattr = ceph_listxattr,
+ .removexattr = ceph_removexattr,
+ .get_acl = ceph_get_acl,
+ .set_acl = ceph_set_acl,
+ .mknod = ceph_mknod,
+ .symlink = ceph_symlink,
+ .mkdir = ceph_mkdir,
+ .link = ceph_link,
+ .unlink = ceph_unlink,
+ .rmdir = ceph_unlink,
+ .rename = ceph_rename,
+ .create = ceph_create,
+ .atomic_open = ceph_atomic_open,
+};
+
+const struct dentry_operations ceph_dentry_ops = {
+ .d_revalidate = ceph_d_revalidate,
+ .d_release = ceph_d_release,
+ .d_prune = ceph_d_prune,
+};
+
+const struct dentry_operations ceph_snapdir_dentry_ops = {
+ .d_revalidate = ceph_snapdir_d_revalidate,
+ .d_release = ceph_d_release,
+};
+
+const struct dentry_operations ceph_snap_dentry_ops = {
+ .d_release = ceph_d_release,
+ .d_prune = ceph_d_prune,
+};
diff --git a/ceph/export.c b/ceph/export.c
new file mode 100644
index 0000000..00d6af6
--- /dev/null
+++ b/ceph/export.c
@@ -0,0 +1,250 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/exportfs.h>
+#include <linux/slab.h>
+#include <asm/unaligned.h>
+
+#include "super.h"
+#include "mds_client.h"
+
+/*
+ * Basic fh
+ */
+struct ceph_nfs_fh {
+ u64 ino;
+} __attribute__ ((packed));
+
+/*
+ * Larger fh that includes parent ino.
+ */
+struct ceph_nfs_confh {
+ u64 ino, parent_ino;
+} __attribute__ ((packed));
+
+static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
+ struct inode *parent_inode)
+{
+ int type;
+ struct ceph_nfs_fh *fh = (void *)rawfh;
+ struct ceph_nfs_confh *cfh = (void *)rawfh;
+ int connected_handle_length = sizeof(*cfh)/4;
+ int handle_length = sizeof(*fh)/4;
+
+ /* don't re-export snaps */
+ if (ceph_snap(inode) != CEPH_NOSNAP)
+ return -EINVAL;
+
+ if (parent_inode && (*max_len < connected_handle_length)) {
+ *max_len = connected_handle_length;
+ return FILEID_INVALID;
+ } else if (*max_len < handle_length) {
+ *max_len = handle_length;
+ return FILEID_INVALID;
+ }
+
+ if (parent_inode) {
+ dout("encode_fh %llx with parent %llx\n",
+ ceph_ino(inode), ceph_ino(parent_inode));
+ cfh->ino = ceph_ino(inode);
+ cfh->parent_ino = ceph_ino(parent_inode);
+ *max_len = connected_handle_length;
+ type = FILEID_INO32_GEN_PARENT;
+ } else {
+ dout("encode_fh %llx\n", ceph_ino(inode));
+ fh->ino = ceph_ino(inode);
+ *max_len = handle_length;
+ type = FILEID_INO32_GEN;
+ }
+ return type;
+}
+
+static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
+{
+ struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
+ struct inode *inode;
+ struct dentry *dentry;
+ struct ceph_vino vino;
+ int err;
+
+ vino.ino = ino;
+ vino.snap = CEPH_NOSNAP;
+ inode = ceph_find_inode(sb, vino);
+ if (!inode) {
+ struct ceph_mds_request *req;
+
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
+ USE_ANY_MDS);
+ if (IS_ERR(req))
+ return ERR_CAST(req);
+
+ req->r_ino1 = vino;
+ req->r_num_caps = 1;
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+ inode = req->r_target_inode;
+ if (inode)
+ ihold(inode);
+ ceph_mdsc_put_request(req);
+ if (!inode)
+ return ERR_PTR(-ESTALE);
+ }
+
+ dentry = d_obtain_alias(inode);
+ if (IS_ERR(dentry)) {
+ iput(inode);
+ return dentry;
+ }
+ err = ceph_init_dentry(dentry);
+ if (err < 0) {
+ dput(dentry);
+ return ERR_PTR(err);
+ }
+ dout("__fh_to_dentry %llx %p dentry %p\n", ino, inode, dentry);
+ return dentry;
+}
+
+/*
+ * convert regular fh to dentry
+ */
+static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
+ struct fid *fid,
+ int fh_len, int fh_type)
+{
+ struct ceph_nfs_fh *fh = (void *)fid->raw;
+
+ if (fh_type != FILEID_INO32_GEN &&
+ fh_type != FILEID_INO32_GEN_PARENT)
+ return NULL;
+ if (fh_len < sizeof(*fh) / 4)
+ return NULL;
+
+ dout("fh_to_dentry %llx\n", fh->ino);
+ return __fh_to_dentry(sb, fh->ino);
+}
+
+static struct dentry *__get_parent(struct super_block *sb,
+ struct dentry *child, u64 ino)
+{
+ struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
+ struct ceph_mds_request *req;
+ struct inode *inode;
+ struct dentry *dentry;
+ int err;
+
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT,
+ USE_ANY_MDS);
+ if (IS_ERR(req))
+ return ERR_CAST(req);
+
+ if (child) {
+ req->r_inode = child->d_inode;
+ ihold(child->d_inode);
+ } else {
+ req->r_ino1 = (struct ceph_vino) {
+ .ino = ino,
+ .snap = CEPH_NOSNAP,
+ };
+ }
+ req->r_num_caps = 1;
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+ inode = req->r_target_inode;
+ if (inode)
+ ihold(inode);
+ ceph_mdsc_put_request(req);
+ if (!inode)
+ return ERR_PTR(-ENOENT);
+
+ dentry = d_obtain_alias(inode);
+ if (IS_ERR(dentry)) {
+ iput(inode);
+ return dentry;
+ }
+ err = ceph_init_dentry(dentry);
+ if (err < 0) {
+ dput(dentry);
+ return ERR_PTR(err);
+ }
+ dout("__get_parent ino %llx parent %p ino %llx.%llx\n",
+ child ? ceph_ino(child->d_inode) : ino,
+ dentry, ceph_vinop(inode));
+ return dentry;
+}
+
+struct dentry *ceph_get_parent(struct dentry *child)
+{
+ /* don't re-export snaps */
+ if (ceph_snap(child->d_inode) != CEPH_NOSNAP)
+ return ERR_PTR(-EINVAL);
+
+ dout("get_parent %p ino %llx.%llx\n",
+ child, ceph_vinop(child->d_inode));
+ return __get_parent(child->d_sb, child, 0);
+}
+
+/*
+ * convert regular fh to parent
+ */
+static struct dentry *ceph_fh_to_parent(struct super_block *sb,
+ struct fid *fid,
+ int fh_len, int fh_type)
+{
+ struct ceph_nfs_confh *cfh = (void *)fid->raw;
+ struct dentry *dentry;
+
+ if (fh_type != FILEID_INO32_GEN_PARENT)
+ return NULL;
+ if (fh_len < sizeof(*cfh) / 4)
+ return NULL;
+
+ dout("fh_to_parent %llx\n", cfh->parent_ino);
+ dentry = __get_parent(sb, NULL, cfh->ino);
+ if (IS_ERR(dentry) && PTR_ERR(dentry) == -ENOENT)
+ dentry = __fh_to_dentry(sb, cfh->parent_ino);
+ return dentry;
+}
+
+static int ceph_get_name(struct dentry *parent, char *name,
+ struct dentry *child)
+{
+ struct ceph_mds_client *mdsc;
+ struct ceph_mds_request *req;
+ int err;
+
+ mdsc = ceph_inode_to_client(child->d_inode)->mdsc;
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
+ USE_ANY_MDS);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ mutex_lock(&parent->d_inode->i_mutex);
+
+ req->r_inode = child->d_inode;
+ ihold(child->d_inode);
+ req->r_ino2 = ceph_vino(parent->d_inode);
+ req->r_locked_dir = parent->d_inode;
+ req->r_num_caps = 2;
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+
+ mutex_unlock(&parent->d_inode->i_mutex);
+
+ if (!err) {
+ struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+ memcpy(name, rinfo->dname, rinfo->dname_len);
+ name[rinfo->dname_len] = 0;
+ dout("get_name %p ino %llx.%llx name %s\n",
+ child, ceph_vinop(child->d_inode), name);
+ } else {
+ dout("get_name %p ino %llx.%llx err %d\n",
+ child, ceph_vinop(child->d_inode), err);
+ }
+
+ ceph_mdsc_put_request(req);
+ return err;
+}
+
+const struct export_operations ceph_export_ops = {
+ .encode_fh = ceph_encode_fh,
+ .fh_to_dentry = ceph_fh_to_dentry,
+ .fh_to_parent = ceph_fh_to_parent,
+ .get_parent = ceph_get_parent,
+ .get_name = ceph_get_name,
+};
diff --git a/ceph/file.c b/ceph/file.c
new file mode 100644
index 0000000..66075a4
--- /dev/null
+++ b/ceph/file.c
@@ -0,0 +1,1294 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/writeback.h>
+#include <linux/aio.h>
+#include <linux/falloc.h>
+
+#include "super.h"
+#include "mds_client.h"
+#include "cache.h"
+
+/*
+ * Ceph file operations
+ *
+ * Implement basic open/close functionality, and implement
+ * read/write.
+ *
+ * We implement three modes of file I/O:
+ * - buffered uses the generic_file_aio_{read,write} helpers
+ *
+ * - synchronous is used when there is multi-client read/write
+ * sharing, avoids the page cache, and synchronously waits for an
+ * ack from the OSD.
+ *
+ * - direct io takes the variant of the sync path that references
+ * user pages directly.
+ *
+ * fsync() flushes and waits on dirty pages, but just queues metadata
+ * for writeback: since the MDS can recover size and mtime there is no
+ * need to wait for MDS acknowledgement.
+ */
+
+
+/*
+ * Prepare an open request. Preallocate ceph_cap to avoid an
+ * inopportune ENOMEM later.
+ */
+static struct ceph_mds_request *
+prepare_open_request(struct super_block *sb, int flags, int create_mode)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct ceph_mds_request *req;
+ int want_auth = USE_ANY_MDS;
+ int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN;
+
+ if (flags & (O_WRONLY|O_RDWR|O_CREAT|O_TRUNC))
+ want_auth = USE_AUTH_MDS;
+
+ req = ceph_mdsc_create_request(mdsc, op, want_auth);
+ if (IS_ERR(req))
+ goto out;
+ req->r_fmode = ceph_flags_to_mode(flags);
+ req->r_args.open.flags = cpu_to_le32(flags);
+ req->r_args.open.mode = cpu_to_le32(create_mode);
+out:
+ return req;
+}
+
+/*
+ * initialize private struct file data.
+ * if we fail, clean up by dropping fmode reference on the ceph_inode
+ */
+static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
+{
+ struct ceph_file_info *cf;
+ int ret = 0;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+
+ switch (inode->i_mode & S_IFMT) {
+ case S_IFREG:
+ /* First file open request creates the cookie, we want to keep
+ * this cookie around for the filetime of the inode as not to
+ * have to worry about fscache register / revoke / operation
+ * races.
+ *
+ * Also, if we know the operation is going to invalidate data
+ * (non readonly) just nuke the cache right away.
+ */
+ ceph_fscache_register_inode_cookie(mdsc->fsc, ci);
+ if ((fmode & CEPH_FILE_MODE_WR))
+ ceph_fscache_invalidate(inode);
+ case S_IFDIR:
+ dout("init_file %p %p 0%o (regular)\n", inode, file,
+ inode->i_mode);
+ cf = kmem_cache_alloc(ceph_file_cachep, GFP_NOFS | __GFP_ZERO);
+ if (cf == NULL) {
+ ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+ return -ENOMEM;
+ }
+ cf->fmode = fmode;
+ cf->next_offset = 2;
+ file->private_data = cf;
+ BUG_ON(inode->i_fop->release != ceph_release);
+ break;
+
+ case S_IFLNK:
+ dout("init_file %p %p 0%o (symlink)\n", inode, file,
+ inode->i_mode);
+ ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+ break;
+
+ default:
+ dout("init_file %p %p 0%o (special)\n", inode, file,
+ inode->i_mode);
+ /*
+ * we need to drop the open ref now, since we don't
+ * have .release set to ceph_release.
+ */
+ ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+ BUG_ON(inode->i_fop->release == ceph_release);
+
+ /* call the proper open fop */
+ ret = inode->i_fop->open(inode, file);
+ }
+ return ret;
+}
+
+/*
+ * If we already have the requisite capabilities, we can satisfy
+ * the open request locally (no need to request new caps from the
+ * MDS). We do, however, need to inform the MDS (asynchronously)
+ * if our wanted caps set expands.
+ */
+int ceph_open(struct inode *inode, struct file *file)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct ceph_mds_request *req;
+ struct ceph_file_info *cf = file->private_data;
+ struct inode *parent_inode = NULL;
+ int err;
+ int flags, fmode, wanted;
+
+ if (cf) {
+ dout("open file %p is already opened\n", file);
+ return 0;
+ }
+
+ /* filter out O_CREAT|O_EXCL; vfs did that already. yuck. */
+ flags = file->f_flags & ~(O_CREAT|O_EXCL);
+ if (S_ISDIR(inode->i_mode))
+ flags = O_DIRECTORY; /* mds likes to know */
+
+ dout("open inode %p ino %llx.%llx file %p flags %d (%d)\n", inode,
+ ceph_vinop(inode), file, flags, file->f_flags);
+ fmode = ceph_flags_to_mode(flags);
+ wanted = ceph_caps_for_mode(fmode);
+
+ /* snapped files are read-only */
+ if (ceph_snap(inode) != CEPH_NOSNAP && (file->f_mode & FMODE_WRITE))
+ return -EROFS;
+
+ /* trivially open snapdir */
+ if (ceph_snap(inode) == CEPH_SNAPDIR) {
+ spin_lock(&ci->i_ceph_lock);
+ __ceph_get_fmode(ci, fmode);
+ spin_unlock(&ci->i_ceph_lock);
+ return ceph_init_file(inode, file, fmode);
+ }
+
+ /*
+ * No need to block if we have caps on the auth MDS (for
+ * write) or any MDS (for read). Update wanted set
+ * asynchronously.
+ */
+ spin_lock(&ci->i_ceph_lock);
+ if (__ceph_is_any_real_caps(ci) &&
+ (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
+ int mds_wanted = __ceph_caps_mds_wanted(ci);
+ int issued = __ceph_caps_issued(ci, NULL);
+
+ dout("open %p fmode %d want %s issued %s using existing\n",
+ inode, fmode, ceph_cap_string(wanted),
+ ceph_cap_string(issued));
+ __ceph_get_fmode(ci, fmode);
+ spin_unlock(&ci->i_ceph_lock);
+
+ /* adjust wanted? */
+ if ((issued & wanted) != wanted &&
+ (mds_wanted & wanted) != wanted &&
+ ceph_snap(inode) != CEPH_SNAPDIR)
+ ceph_check_caps(ci, 0, NULL);
+
+ return ceph_init_file(inode, file, fmode);
+ } else if (ceph_snap(inode) != CEPH_NOSNAP &&
+ (ci->i_snap_caps & wanted) == wanted) {
+ __ceph_get_fmode(ci, fmode);
+ spin_unlock(&ci->i_ceph_lock);
+ return ceph_init_file(inode, file, fmode);
+ }
+
+ spin_unlock(&ci->i_ceph_lock);
+
+ dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
+ req = prepare_open_request(inode->i_sb, flags, 0);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto out;
+ }
+ req->r_inode = inode;
+ ihold(inode);
+
+ req->r_num_caps = 1;
+ if (flags & O_CREAT)
+ parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
+ err = ceph_mdsc_do_request(mdsc, parent_inode, req);
+ iput(parent_inode);
+ if (!err)
+ err = ceph_init_file(inode, file, req->r_fmode);
+ ceph_mdsc_put_request(req);
+ dout("open result=%d on %llx.%llx\n", err, ceph_vinop(inode));
+out:
+ return err;
+}
+
+
+/*
+ * Do a lookup + open with a single request. If we get a non-existent
+ * file or symlink, return 1 so the VFS can retry.
+ */
+int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
+ struct file *file, unsigned flags, umode_t mode,
+ int *opened)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct ceph_mds_request *req;
+ struct dentry *dn;
+ int err;
+
+ dout("atomic_open %p dentry %p '%.*s' %s flags %d mode 0%o\n",
+ dir, dentry, dentry->d_name.len, dentry->d_name.name,
+ d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode);
+
+ if (dentry->d_name.len > NAME_MAX)
+ return -ENAMETOOLONG;
+
+ err = ceph_init_dentry(dentry);
+ if (err < 0)
+ return err;
+
+ /* do the open */
+ req = prepare_open_request(dir->i_sb, flags, mode);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ req->r_dentry = dget(dentry);
+ req->r_num_caps = 2;
+ if (flags & O_CREAT) {
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+ req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
+ }
+ req->r_locked_dir = dir; /* caller holds dir->i_mutex */
+ err = ceph_mdsc_do_request(mdsc,
+ (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
+ req);
+ if (err)
+ goto out_err;
+
+ err = ceph_handle_snapdir(req, dentry, err);
+ if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
+ err = ceph_handle_notrace_create(dir, dentry);
+
+ if (d_unhashed(dentry)) {
+ dn = ceph_finish_lookup(req, dentry, err);
+ if (IS_ERR(dn))
+ err = PTR_ERR(dn);
+ } else {
+ /* we were given a hashed negative dentry */
+ dn = NULL;
+ }
+ if (err)
+ goto out_err;
+ if (dn || dentry->d_inode == NULL || S_ISLNK(dentry->d_inode->i_mode)) {
+ /* make vfs retry on splice, ENOENT, or symlink */
+ dout("atomic_open finish_no_open on dn %p\n", dn);
+ err = finish_no_open(file, dn);
+ } else {
+ dout("atomic_open finish_open on dn %p\n", dn);
+ if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
+ ceph_init_acl(dentry, dentry->d_inode, dir);
+ *opened |= FILE_CREATED;
+ }
+ err = finish_open(file, dentry, ceph_open, opened);
+ }
+out_err:
+ if (!req->r_err && req->r_target_inode)
+ ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode);
+ ceph_mdsc_put_request(req);
+ dout("atomic_open result=%d\n", err);
+ return err;
+}
+
+int ceph_release(struct inode *inode, struct file *file)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_file_info *cf = file->private_data;
+
+ dout("release inode %p file %p\n", inode, file);
+ ceph_put_fmode(ci, cf->fmode);
+ if (cf->last_readdir)
+ ceph_mdsc_put_request(cf->last_readdir);
+ kfree(cf->last_name);
+ kfree(cf->dir_info);
+ dput(cf->dentry);
+ kmem_cache_free(ceph_file_cachep, cf);
+
+ /* wake up anyone waiting for caps on this inode */
+ wake_up_all(&ci->i_cap_wq);
+ return 0;
+}
+
+/*
+ * Read a range of bytes striped over one or more objects. Iterate over
+ * objects we stripe over. (That's not atomic, but good enough for now.)
+ *
+ * If we get a short result from the OSD, check against i_size; we need to
+ * only return a short read to the caller if we hit EOF.
+ */
+static int striped_read(struct inode *inode,
+ u64 off, u64 len,
+ struct page **pages, int num_pages,
+ int *checkeof, bool o_direct,
+ unsigned long buf_align)
+{
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ u64 pos, this_len, left;
+ int io_align, page_align;
+ int pages_left;
+ int read;
+ struct page **page_pos;
+ int ret;
+ bool hit_stripe, was_short;
+
+ /*
+ * we may need to do multiple reads. not atomic, unfortunately.
+ */
+ pos = off;
+ left = len;
+ page_pos = pages;
+ pages_left = num_pages;
+ read = 0;
+ io_align = off & ~PAGE_MASK;
+
+more:
+ if (o_direct)
+ page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
+ else
+ page_align = pos & ~PAGE_MASK;
+ this_len = left;
+ ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
+ &ci->i_layout, pos, &this_len,
+ ci->i_truncate_seq,
+ ci->i_truncate_size,
+ page_pos, pages_left, page_align);
+ if (ret == -ENOENT)
+ ret = 0;
+ hit_stripe = this_len < left;
+ was_short = ret >= 0 && ret < this_len;
+ dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read,
+ ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
+
+ if (ret >= 0) {
+ int didpages;
+ if (was_short && (pos + ret < inode->i_size)) {
+ u64 tmp = min(this_len - ret,
+ inode->i_size - pos - ret);
+ dout(" zero gap %llu to %llu\n",
+ pos + ret, pos + ret + tmp);
+ ceph_zero_page_vector_range(page_align + read + ret,
+ tmp, pages);
+ ret += tmp;
+ }
+
+ didpages = (page_align + ret) >> PAGE_CACHE_SHIFT;
+ pos += ret;
+ read = pos - off;
+ left -= ret;
+ page_pos += didpages;
+ pages_left -= didpages;
+
+ /* hit stripe and need continue*/
+ if (left && hit_stripe && pos < inode->i_size)
+ goto more;
+ }
+
+ if (read > 0) {
+ ret = read;
+ /* did we bounce off eof? */
+ if (pos + left > inode->i_size)
+ *checkeof = 1;
+ }
+
+ dout("striped_read returns %d\n", ret);
+ return ret;
+}
+
+/*
+ * Completely synchronous read and write methods. Direct from __user
+ * buffer to osd, or directly to user pages (if O_DIRECT).
+ *
+ * If the read spans object boundary, just do multiple reads.
+ */
+static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
+ int *checkeof)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ struct page **pages;
+ u64 off = iocb->ki_pos;
+ int num_pages, ret;
+ size_t len = i->count;
+
+ dout("sync_read on file %p %llu~%u %s\n", file, off,
+ (unsigned)len,
+ (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
+ /*
+ * flush any page cache pages in this range. this
+ * will make concurrent normal and sync io slow,
+ * but it will at least behave sensibly when they are
+ * in sequence.
+ */
+ ret = filemap_write_and_wait_range(inode->i_mapping, off,
+ off + len);
+ if (ret < 0)
+ return ret;
+
+ if (file->f_flags & O_DIRECT) {
+ while (iov_iter_count(i)) {
+ void __user *data = i->iov[0].iov_base + i->iov_offset;
+ size_t len = i->iov[0].iov_len - i->iov_offset;
+
+ num_pages = calc_pages_for((unsigned long)data, len);
+ pages = ceph_get_direct_page_vector(data,
+ num_pages, true);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+
+ ret = striped_read(inode, off, len,
+ pages, num_pages, checkeof,
+ 1, (unsigned long)data & ~PAGE_MASK);
+ ceph_put_page_vector(pages, num_pages, true);
+
+ if (ret <= 0)
+ break;
+ off += ret;
+ iov_iter_advance(i, ret);
+ if (ret < len)
+ break;
+ }
+ } else {
+ num_pages = calc_pages_for(off, len);
+ pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+ ret = striped_read(inode, off, len, pages,
+ num_pages, checkeof, 0, 0);
+ if (ret > 0) {
+ int l, k = 0;
+ size_t left = len = ret;
+
+ while (left) {
+ void __user *data = i->iov[0].iov_base
+ + i->iov_offset;
+ l = min(i->iov[0].iov_len - i->iov_offset,
+ left);
+
+ ret = ceph_copy_page_vector_to_user(&pages[k],
+ data, off,
+ l);
+ if (ret > 0) {
+ iov_iter_advance(i, ret);
+ left -= ret;
+ off += ret;
+ k = calc_pages_for(iocb->ki_pos,
+ len - left + 1) - 1;
+ BUG_ON(k >= num_pages && left);
+ } else
+ break;
+ }
+ }
+ ceph_release_page_vector(pages, num_pages);
+ }
+
+ if (off > iocb->ki_pos) {
+ ret = off - iocb->ki_pos;
+ iocb->ki_pos = off;
+ }
+
+ dout("sync_read result %d\n", ret);
+ return ret;
+}
+
+/*
+ * Write commit request unsafe callback, called to tell us when a
+ * request is unsafe (that is, in flight--has been handed to the
+ * messenger to send to its target osd). It is called again when
+ * we've received a response message indicating the request is
+ * "safe" (its CEPH_OSD_FLAG_ONDISK flag is set), or when a request
+ * is completed early (and unsuccessfully) due to a timeout or
+ * interrupt.
+ *
+ * This is used if we requested both an ACK and ONDISK commit reply
+ * from the OSD.
+ */
+static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
+{
+ struct ceph_inode_info *ci = ceph_inode(req->r_inode);
+
+ dout("%s %p tid %llu %ssafe\n", __func__, req, req->r_tid,
+ unsafe ? "un" : "");
+ if (unsafe) {
+ ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
+ spin_lock(&ci->i_unsafe_lock);
+ list_add_tail(&req->r_unsafe_item,
+ &ci->i_unsafe_writes);
+ spin_unlock(&ci->i_unsafe_lock);
+ } else {
+ spin_lock(&ci->i_unsafe_lock);
+ list_del_init(&req->r_unsafe_item);
+ spin_unlock(&ci->i_unsafe_lock);
+ ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR);
+ }
+}
+
+
+/*
+ * Synchronous write, straight from __user pointer or user pages.
+ *
+ * If write spans object boundary, just do multiple writes. (For a
+ * correct atomic write, we should e.g. take write locks on all
+ * objects, rollback on failure, etc.)
+ */
+static ssize_t
+ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, size_t count)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_snap_context *snapc;
+ struct ceph_vino vino;
+ struct ceph_osd_request *req;
+ struct page **pages;
+ int num_pages;
+ int written = 0;
+ int flags;
+ int check_caps = 0;
+ int page_align;
+ int ret;
+ struct timespec mtime = CURRENT_TIME;
+ loff_t pos = iocb->ki_pos;
+ struct iov_iter i;
+
+ if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
+ return -EROFS;
+
+ dout("sync_direct_write on file %p %lld~%u\n", file, pos,
+ (unsigned)count);
+
+ ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + count);
+ if (ret < 0)
+ return ret;
+
+ ret = invalidate_inode_pages2_range(inode->i_mapping,
+ pos >> PAGE_CACHE_SHIFT,
+ (pos + count) >> PAGE_CACHE_SHIFT);
+ if (ret < 0)
+ dout("invalidate_inode_pages2_range returned %d\n", ret);
+
+ flags = CEPH_OSD_FLAG_ORDERSNAP |
+ CEPH_OSD_FLAG_ONDISK |
+ CEPH_OSD_FLAG_WRITE;
+
+ iov_iter_init(&i, iov, nr_segs, count, 0);
+
+ while (iov_iter_count(&i) > 0) {
+ void __user *data = i.iov->iov_base + i.iov_offset;
+ u64 len = i.iov->iov_len - i.iov_offset;
+
+ page_align = (unsigned long)data & ~PAGE_MASK;
+
+ snapc = ci->i_snap_realm->cached_context;
+ vino = ceph_vino(inode);
+ req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
+ vino, pos, &len,
+ 2,/*include a 'startsync' command*/
+ CEPH_OSD_OP_WRITE, flags, snapc,
+ ci->i_truncate_seq,
+ ci->i_truncate_size,
+ false);
+ if (IS_ERR(req)) {
+ ret = PTR_ERR(req);
+ goto out;
+ }
+
+ num_pages = calc_pages_for(page_align, len);
+ pages = ceph_get_direct_page_vector(data, num_pages, false);
+ if (IS_ERR(pages)) {
+ ret = PTR_ERR(pages);
+ goto out;
+ }
+
+ /*
+ * throw out any page cache pages in this range. this
+ * may block.
+ */
+ truncate_inode_pages_range(inode->i_mapping, pos,
+ (pos+len) | (PAGE_CACHE_SIZE-1));
+ osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align,
+ false, false);
+
+ /* BUG_ON(vino.snap != CEPH_NOSNAP); */
+ ceph_osdc_build_request(req, pos, snapc, vino.snap, &mtime);
+
+ ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+ if (!ret)
+ ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+
+ ceph_put_page_vector(pages, num_pages, false);
+
+out:
+ ceph_osdc_put_request(req);
+ if (ret == 0) {
+ pos += len;
+ written += len;
+ iov_iter_advance(&i, (size_t)len);
+
+ if (pos > i_size_read(inode)) {
+ check_caps = ceph_inode_set_size(inode, pos);
+ if (check_caps)
+ ceph_check_caps(ceph_inode(inode),
+ CHECK_CAPS_AUTHONLY,
+ NULL);
+ }
+ } else
+ break;
+ }
+
+ if (ret != -EOLDSNAPC && written > 0) {
+ iocb->ki_pos = pos;
+ ret = written;
+ }
+ return ret;
+}
+
+
+/*
+ * Synchronous write, straight from __user pointer or user pages.
+ *
+ * If write spans object boundary, just do multiple writes. (For a
+ * correct atomic write, we should e.g. take write locks on all
+ * objects, rollback on failure, etc.)
+ */
+static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, size_t count)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_snap_context *snapc;
+ struct ceph_vino vino;
+ struct ceph_osd_request *req;
+ struct page **pages;
+ u64 len;
+ int num_pages;
+ int written = 0;
+ int flags;
+ int check_caps = 0;
+ int ret;
+ struct timespec mtime = CURRENT_TIME;
+ loff_t pos = iocb->ki_pos;
+ struct iov_iter i;
+
+ if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
+ return -EROFS;
+
+ dout("sync_write on file %p %lld~%u\n", file, pos, (unsigned)count);
+
+ ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + count);
+ if (ret < 0)
+ return ret;
+
+ ret = invalidate_inode_pages2_range(inode->i_mapping,
+ pos >> PAGE_CACHE_SHIFT,
+ (pos + count) >> PAGE_CACHE_SHIFT);
+ if (ret < 0)
+ dout("invalidate_inode_pages2_range returned %d\n", ret);
+
+ flags = CEPH_OSD_FLAG_ORDERSNAP |
+ CEPH_OSD_FLAG_ONDISK |
+ CEPH_OSD_FLAG_WRITE |
+ CEPH_OSD_FLAG_ACK;
+
+ iov_iter_init(&i, iov, nr_segs, count, 0);
+
+ while ((len = iov_iter_count(&i)) > 0) {
+ size_t left;
+ int n;
+
+ snapc = ci->i_snap_realm->cached_context;
+ vino = ceph_vino(inode);
+ req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
+ vino, pos, &len, 1,
+ CEPH_OSD_OP_WRITE, flags, snapc,
+ ci->i_truncate_seq,
+ ci->i_truncate_size,
+ false);
+ if (IS_ERR(req)) {
+ ret = PTR_ERR(req);
+ goto out;
+ }
+
+ /*
+ * write from beginning of first page,
+ * regardless of io alignment
+ */
+ num_pages = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+ pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
+ if (IS_ERR(pages)) {
+ ret = PTR_ERR(pages);
+ goto out;
+ }
+
+ left = len;
+ for (n = 0; n < num_pages; n++) {
+ size_t plen = min_t(size_t, left, PAGE_SIZE);
+ ret = iov_iter_copy_from_user(pages[n], &i, 0, plen);
+ if (ret != plen) {
+ ret = -EFAULT;
+ break;
+ }
+ left -= ret;
+ iov_iter_advance(&i, ret);
+ }
+
+ if (ret < 0) {
+ ceph_release_page_vector(pages, num_pages);
+ goto out;
+ }
+
+ /* get a second commit callback */
+ req->r_unsafe_callback = ceph_sync_write_unsafe;
+ req->r_inode = inode;
+
+ osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
+ false, true);
+
+ /* BUG_ON(vino.snap != CEPH_NOSNAP); */
+ ceph_osdc_build_request(req, pos, snapc, vino.snap, &mtime);
+
+ ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+ if (!ret)
+ ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+
+out:
+ ceph_osdc_put_request(req);
+ if (ret == 0) {
+ pos += len;
+ written += len;
+
+ if (pos > i_size_read(inode)) {
+ check_caps = ceph_inode_set_size(inode, pos);
+ if (check_caps)
+ ceph_check_caps(ceph_inode(inode),
+ CHECK_CAPS_AUTHONLY,
+ NULL);
+ }
+ } else
+ break;
+ }
+
+ if (ret != -EOLDSNAPC && written > 0) {
+ ret = written;
+ iocb->ki_pos = pos;
+ }
+ return ret;
+}
+
+/*
+ * Wrap generic_file_aio_read with checks for cap bits on the inode.
+ * Atomically grab references, so that those bits are not released
+ * back to the MDS mid-read.
+ *
+ * Hmm, the sync read case isn't actually async... should it be?
+ */
+static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct file *filp = iocb->ki_filp;
+ struct ceph_file_info *fi = filp->private_data;
+ size_t len = iocb->ki_nbytes;
+ struct inode *inode = file_inode(filp);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ ssize_t ret;
+ int want, got = 0;
+ int checkeof = 0, read = 0;
+
+again:
+ dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
+ inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, inode);
+
+ if (fi->fmode & CEPH_FILE_MODE_LAZY)
+ want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
+ else
+ want = CEPH_CAP_FILE_CACHE;
+ ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
+ if (ret < 0)
+ return ret;
+
+ if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
+ (iocb->ki_filp->f_flags & O_DIRECT) ||
+ (fi->flags & CEPH_F_SYNC)) {
+ struct iov_iter i;
+
+ dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n",
+ inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
+ ceph_cap_string(got));
+
+ if (!read) {
+ ret = generic_segment_checks(iov, &nr_segs,
+ &len, VERIFY_WRITE);
+ if (ret)
+ goto out;
+ }
+
+ iov_iter_init(&i, iov, nr_segs, len, read);
+
+ /* hmm, this isn't really async... */
+ ret = ceph_sync_read(iocb, &i, &checkeof);
+ } else {
+ /*
+ * We can't modify the content of iov,
+ * so we only read from beginning.
+ */
+ if (read) {
+ iocb->ki_pos = pos;
+ len = iocb->ki_nbytes;
+ read = 0;
+ }
+ dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
+ inode, ceph_vinop(inode), pos, (unsigned)len,
+ ceph_cap_string(got));
+
+ ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
+ }
+out:
+ dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
+ inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
+ ceph_put_cap_refs(ci, got);
+
+ if (checkeof && ret >= 0) {
+ int statret = ceph_do_getattr(inode,
+ CEPH_STAT_CAP_SIZE);
+
+ /* hit EOF or hole? */
+ if (statret == 0 && iocb->ki_pos < inode->i_size &&
+ ret < len) {
+ dout("sync_read hit hole, ppos %lld < size %lld"
+ ", reading more\n", iocb->ki_pos,
+ inode->i_size);
+
+ read += ret;
+ len -= ret;
+ checkeof = 0;
+ goto again;
+ }
+ }
+
+ if (ret >= 0)
+ ret += read;
+
+ return ret;
+}
+
+/*
+ * Take cap references to avoid releasing caps to MDS mid-write.
+ *
+ * If we are synchronous, and write with an old snap context, the OSD
+ * may return EOLDSNAPC. In that case, retry the write.. _after_
+ * dropping our cap refs and allowing the pending snap to logically
+ * complete _before_ this write occurs.
+ *
+ * If we are near ENOSPC, write synchronously.
+ */
+static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct file *file = iocb->ki_filp;
+ struct ceph_file_info *fi = file->private_data;
+ struct inode *inode = file_inode(file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_osd_client *osdc =
+ &ceph_sb_to_client(inode->i_sb)->client->osdc;
+ ssize_t count, written = 0;
+ int err, want, got;
+
+ if (ceph_snap(inode) != CEPH_NOSNAP)
+ return -EROFS;
+
+ mutex_lock(&inode->i_mutex);
+
+ err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
+ if (err)
+ goto out;
+
+ /* We can write back this queue in page reclaim */
+ current->backing_dev_info = file->f_mapping->backing_dev_info;
+
+ err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
+ if (err)
+ goto out;
+
+ if (count == 0)
+ goto out;
+
+ err = file_remove_suid(file);
+ if (err)
+ goto out;
+
+ err = file_update_time(file);
+ if (err)
+ goto out;
+
+retry_snap:
+ if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) {
+ err = -ENOSPC;
+ goto out;
+ }
+
+ dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
+ inode, ceph_vinop(inode), pos, count, inode->i_size);
+ if (fi->fmode & CEPH_FILE_MODE_LAZY)
+ want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
+ else
+ want = CEPH_CAP_FILE_BUFFER;
+ got = 0;
+ err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos + count);
+ if (err < 0)
+ goto out;
+
+ dout("aio_write %p %llx.%llx %llu~%zd got cap refs on %s\n",
+ inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
+
+ if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
+ (file->f_flags & O_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
+ mutex_unlock(&inode->i_mutex);
+ if (file->f_flags & O_DIRECT)
+ written = ceph_sync_direct_write(iocb, iov,
+ nr_segs, count);
+ else
+ written = ceph_sync_write(iocb, iov, nr_segs, count);
+ if (written == -EOLDSNAPC) {
+ dout("aio_write %p %llx.%llx %llu~%u"
+ "got EOLDSNAPC, retrying\n",
+ inode, ceph_vinop(inode),
+ pos, (unsigned)iov->iov_len);
+ mutex_lock(&inode->i_mutex);
+ goto retry_snap;
+ }
+ } else {
+ loff_t old_size = inode->i_size;
+ /*
+ * No need to acquire the i_truncate_mutex. Because
+ * the MDS revokes Fwb caps before sending truncate
+ * message to us. We can't get Fwb cap while there
+ * are pending vmtruncate. So write and vmtruncate
+ * can not run at the same time
+ */
+ written = generic_file_buffered_write(iocb, iov, nr_segs,
+ pos, &iocb->ki_pos,
+ count, 0);
+ if (inode->i_size > old_size)
+ ceph_fscache_update_objectsize(inode);
+ mutex_unlock(&inode->i_mutex);
+ }
+
+ if (written >= 0) {
+ int dirty;
+ spin_lock(&ci->i_ceph_lock);
+ dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
+ spin_unlock(&ci->i_ceph_lock);
+ if (dirty)
+ __mark_inode_dirty(inode, dirty);
+ }
+
+ dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n",
+ inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
+ ceph_cap_string(got));
+ ceph_put_cap_refs(ci, got);
+
+ if (written >= 0 &&
+ ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) ||
+ ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
+ err = vfs_fsync_range(file, pos, pos + written - 1, 1);
+ if (err < 0)
+ written = err;
+ }
+
+ goto out_unlocked;
+
+out:
+ mutex_unlock(&inode->i_mutex);
+out_unlocked:
+ current->backing_dev_info = NULL;
+ return written ? written : err;
+}
+
+/*
+ * llseek. be sure to verify file size on SEEK_END.
+ */
+static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct inode *inode = file->f_mapping->host;
+ int ret;
+
+ mutex_lock(&inode->i_mutex);
+
+ if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
+ ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
+ if (ret < 0) {
+ offset = ret;
+ goto out;
+ }
+ }
+
+ switch (whence) {
+ case SEEK_END:
+ offset += inode->i_size;
+ break;
+ case SEEK_CUR:
+ /*
+ * Here we special-case the lseek(fd, 0, SEEK_CUR)
+ * position-querying operation. Avoid rewriting the "same"
+ * f_pos value back to the file because a concurrent read(),
+ * write() or lseek() might have altered it
+ */
+ if (offset == 0) {
+ offset = file->f_pos;
+ goto out;
+ }
+ offset += file->f_pos;
+ break;
+ case SEEK_DATA:
+ if (offset >= inode->i_size) {
+ ret = -ENXIO;
+ goto out;
+ }
+ break;
+ case SEEK_HOLE:
+ if (offset >= inode->i_size) {
+ ret = -ENXIO;
+ goto out;
+ }
+ offset = inode->i_size;
+ break;
+ }
+
+ offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
+
+out:
+ mutex_unlock(&inode->i_mutex);
+ return offset;
+}
+
+static inline void ceph_zero_partial_page(
+ struct inode *inode, loff_t offset, unsigned size)
+{
+ struct page *page;
+ pgoff_t index = offset >> PAGE_CACHE_SHIFT;
+
+ page = find_lock_page(inode->i_mapping, index);
+ if (page) {
+ wait_on_page_writeback(page);
+ zero_user(page, offset & (PAGE_CACHE_SIZE - 1), size);
+ unlock_page(page);
+ page_cache_release(page);
+ }
+}
+
+static void ceph_zero_pagecache_range(struct inode *inode, loff_t offset,
+ loff_t length)
+{
+ loff_t nearly = round_up(offset, PAGE_CACHE_SIZE);
+ if (offset < nearly) {
+ loff_t size = nearly - offset;
+ if (length < size)
+ size = length;
+ ceph_zero_partial_page(inode, offset, size);
+ offset += size;
+ length -= size;
+ }
+ if (length >= PAGE_CACHE_SIZE) {
+ loff_t size = round_down(length, PAGE_CACHE_SIZE);
+ truncate_pagecache_range(inode, offset, offset + size - 1);
+ offset += size;
+ length -= size;
+ }
+ if (length)
+ ceph_zero_partial_page(inode, offset, length);
+}
+
+static int ceph_zero_partial_object(struct inode *inode,
+ loff_t offset, loff_t *length)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_osd_request *req;
+ int ret = 0;
+ loff_t zero = 0;
+ int op;
+
+ if (!length) {
+ op = offset ? CEPH_OSD_OP_DELETE : CEPH_OSD_OP_TRUNCATE;
+ length = &zero;
+ } else {
+ op = CEPH_OSD_OP_ZERO;
+ }
+
+ req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
+ ceph_vino(inode),
+ offset, length,
+ 1, op,
+ CEPH_OSD_FLAG_WRITE |
+ CEPH_OSD_FLAG_ONDISK,
+ NULL, 0, 0, false);
+ if (IS_ERR(req)) {
+ ret = PTR_ERR(req);
+ goto out;
+ }
+
+ ceph_osdc_build_request(req, offset, NULL, ceph_vino(inode).snap,
+ &inode->i_mtime);
+
+ ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+ if (!ret) {
+ ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+ if (ret == -ENOENT)
+ ret = 0;
+ }
+ ceph_osdc_put_request(req);
+
+out:
+ return ret;
+}
+
+static int ceph_zero_objects(struct inode *inode, loff_t offset, loff_t length)
+{
+ int ret = 0;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ s32 stripe_unit = ceph_file_layout_su(ci->i_layout);
+ s32 stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
+ s32 object_size = ceph_file_layout_object_size(ci->i_layout);
+ u64 object_set_size = object_size * stripe_count;
+ u64 nearly, t;
+
+ /* round offset up to next period boundary */
+ nearly = offset + object_set_size - 1;
+ t = nearly;
+ nearly -= do_div(t, object_set_size);
+
+ while (length && offset < nearly) {
+ loff_t size = length;
+ ret = ceph_zero_partial_object(inode, offset, &size);
+ if (ret < 0)
+ return ret;
+ offset += size;
+ length -= size;
+ }
+ while (length >= object_set_size) {
+ int i;
+ loff_t pos = offset;
+ for (i = 0; i < stripe_count; ++i) {
+ ret = ceph_zero_partial_object(inode, pos, NULL);
+ if (ret < 0)
+ return ret;
+ pos += stripe_unit;
+ }
+ offset += object_set_size;
+ length -= object_set_size;
+ }
+ while (length) {
+ loff_t size = length;
+ ret = ceph_zero_partial_object(inode, offset, &size);
+ if (ret < 0)
+ return ret;
+ offset += size;
+ length -= size;
+ }
+ return ret;
+}
+
+static long ceph_fallocate(struct file *file, int mode,
+ loff_t offset, loff_t length)
+{
+ struct ceph_file_info *fi = file->private_data;
+ struct inode *inode = file_inode(file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_osd_client *osdc =
+ &ceph_inode_to_client(inode)->client->osdc;
+ int want, got = 0;
+ int dirty;
+ int ret = 0;
+ loff_t endoff = 0;
+ loff_t size;
+
+ if (!S_ISREG(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ if (IS_SWAPFILE(inode))
+ return -ETXTBSY;
+
+ mutex_lock(&inode->i_mutex);
+
+ if (ceph_snap(inode) != CEPH_NOSNAP) {
+ ret = -EROFS;
+ goto unlock;
+ }
+
+ if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) &&
+ !(mode & FALLOC_FL_PUNCH_HOLE)) {
+ ret = -ENOSPC;
+ goto unlock;
+ }
+
+ size = i_size_read(inode);
+ if (!(mode & FALLOC_FL_KEEP_SIZE))
+ endoff = offset + length;
+
+ if (fi->fmode & CEPH_FILE_MODE_LAZY)
+ want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
+ else
+ want = CEPH_CAP_FILE_BUFFER;
+
+ ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
+ if (ret < 0)
+ goto unlock;
+
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
+ if (offset < size)
+ ceph_zero_pagecache_range(inode, offset, length);
+ ret = ceph_zero_objects(inode, offset, length);
+ } else if (endoff > size) {
+ truncate_pagecache_range(inode, size, -1);
+ if (ceph_inode_set_size(inode, endoff))
+ ceph_check_caps(ceph_inode(inode),
+ CHECK_CAPS_AUTHONLY, NULL);
+ }
+
+ if (!ret) {
+ spin_lock(&ci->i_ceph_lock);
+ dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
+ spin_unlock(&ci->i_ceph_lock);
+ if (dirty)
+ __mark_inode_dirty(inode, dirty);
+ }
+
+ ceph_put_cap_refs(ci, got);
+unlock:
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+}
+
+const struct file_operations ceph_file_fops = {
+ .open = ceph_open,
+ .release = ceph_release,
+ .llseek = ceph_llseek,
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .aio_read = ceph_aio_read,
+ .aio_write = ceph_aio_write,
+ .mmap = ceph_mmap,
+ .fsync = ceph_fsync,
+ .lock = ceph_lock,
+ .flock = ceph_flock,
+ .splice_read = generic_file_splice_read,
+ .splice_write = generic_file_splice_write,
+ .unlocked_ioctl = ceph_ioctl,
+ .compat_ioctl = ceph_ioctl,
+ .fallocate = ceph_fallocate,
+};
+
diff --git a/ceph/inode.c b/ceph/inode.c
new file mode 100644
index 0000000..233c6f9
--- /dev/null
+++ b/ceph/inode.c
@@ -0,0 +1,1927 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/namei.h>
+#include <linux/writeback.h>
+#include <linux/vmalloc.h>
+#include <linux/posix_acl.h>
+
+#include "super.h"
+#include "mds_client.h"
+#include "cache.h"
+#include <linux/ceph/decode.h>
+
+/*
+ * Ceph inode operations
+ *
+ * Implement basic inode helpers (get, alloc) and inode ops (getattr,
+ * setattr, etc.), xattr helpers, and helpers for assimilating
+ * metadata returned by the MDS into our cache.
+ *
+ * Also define helpers for doing asynchronous writeback, invalidation,
+ * and truncation for the benefit of those who can't afford to block
+ * (typically because they are in the message handler path).
+ */
+
+static const struct inode_operations ceph_symlink_iops;
+
+static void ceph_invalidate_work(struct work_struct *work);
+static void ceph_writeback_work(struct work_struct *work);
+static void ceph_vmtruncate_work(struct work_struct *work);
+
+/*
+ * find or create an inode, given the ceph ino number
+ */
+static int ceph_set_ino_cb(struct inode *inode, void *data)
+{
+ ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
+ inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data);
+ return 0;
+}
+
+struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino)
+{
+ struct inode *inode;
+ ino_t t = ceph_vino_to_ino(vino);
+
+ inode = iget5_locked(sb, t, ceph_ino_compare, ceph_set_ino_cb, &vino);
+ if (inode == NULL)
+ return ERR_PTR(-ENOMEM);
+ if (inode->i_state & I_NEW) {
+ dout("get_inode created new inode %p %llx.%llx ino %llx\n",
+ inode, ceph_vinop(inode), (u64)inode->i_ino);
+ unlock_new_inode(inode);
+ }
+
+ dout("get_inode on %lu=%llx.%llx got %p\n", inode->i_ino, vino.ino,
+ vino.snap, inode);
+ return inode;
+}
+
+/*
+ * get/constuct snapdir inode for a given directory
+ */
+struct inode *ceph_get_snapdir(struct inode *parent)
+{
+ struct ceph_vino vino = {
+ .ino = ceph_ino(parent),
+ .snap = CEPH_SNAPDIR,
+ };
+ struct inode *inode = ceph_get_inode(parent->i_sb, vino);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ BUG_ON(!S_ISDIR(parent->i_mode));
+ if (IS_ERR(inode))
+ return inode;
+ inode->i_mode = parent->i_mode;
+ inode->i_uid = parent->i_uid;
+ inode->i_gid = parent->i_gid;
+ inode->i_op = &ceph_dir_iops;
+ inode->i_fop = &ceph_dir_fops;
+ ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */
+ ci->i_rbytes = 0;
+ return inode;
+}
+
+const struct inode_operations ceph_file_iops = {
+ .permission = ceph_permission,
+ .setattr = ceph_setattr,
+ .getattr = ceph_getattr,
+ .setxattr = ceph_setxattr,
+ .getxattr = ceph_getxattr,
+ .listxattr = ceph_listxattr,
+ .removexattr = ceph_removexattr,
+ .get_acl = ceph_get_acl,
+ .set_acl = ceph_set_acl,
+};
+
+
+/*
+ * We use a 'frag tree' to keep track of the MDS's directory fragments
+ * for a given inode (usually there is just a single fragment). We
+ * need to know when a child frag is delegated to a new MDS, or when
+ * it is flagged as replicated, so we can direct our requests
+ * accordingly.
+ */
+
+/*
+ * find/create a frag in the tree
+ */
+static struct ceph_inode_frag *__get_or_create_frag(struct ceph_inode_info *ci,
+ u32 f)
+{
+ struct rb_node **p;
+ struct rb_node *parent = NULL;
+ struct ceph_inode_frag *frag;
+ int c;
+
+ p = &ci->i_fragtree.rb_node;
+ while (*p) {
+ parent = *p;
+ frag = rb_entry(parent, struct ceph_inode_frag, node);
+ c = ceph_frag_compare(f, frag->frag);
+ if (c < 0)
+ p = &(*p)->rb_left;
+ else if (c > 0)
+ p = &(*p)->rb_right;
+ else
+ return frag;
+ }
+
+ frag = kmalloc(sizeof(*frag), GFP_NOFS);
+ if (!frag) {
+ pr_err("__get_or_create_frag ENOMEM on %p %llx.%llx "
+ "frag %x\n", &ci->vfs_inode,
+ ceph_vinop(&ci->vfs_inode), f);
+ return ERR_PTR(-ENOMEM);
+ }
+ frag->frag = f;
+ frag->split_by = 0;
+ frag->mds = -1;
+ frag->ndist = 0;
+
+ rb_link_node(&frag->node, parent, p);
+ rb_insert_color(&frag->node, &ci->i_fragtree);
+
+ dout("get_or_create_frag added %llx.%llx frag %x\n",
+ ceph_vinop(&ci->vfs_inode), f);
+ return frag;
+}
+
+/*
+ * find a specific frag @f
+ */
+struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, u32 f)
+{
+ struct rb_node *n = ci->i_fragtree.rb_node;
+
+ while (n) {
+ struct ceph_inode_frag *frag =
+ rb_entry(n, struct ceph_inode_frag, node);
+ int c = ceph_frag_compare(f, frag->frag);
+ if (c < 0)
+ n = n->rb_left;
+ else if (c > 0)
+ n = n->rb_right;
+ else
+ return frag;
+ }
+ return NULL;
+}
+
+/*
+ * Choose frag containing the given value @v. If @pfrag is
+ * specified, copy the frag delegation info to the caller if
+ * it is present.
+ */
+u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
+ struct ceph_inode_frag *pfrag,
+ int *found)
+{
+ u32 t = ceph_frag_make(0, 0);
+ struct ceph_inode_frag *frag;
+ unsigned nway, i;
+ u32 n;
+
+ if (found)
+ *found = 0;
+
+ mutex_lock(&ci->i_fragtree_mutex);
+ while (1) {
+ WARN_ON(!ceph_frag_contains_value(t, v));
+ frag = __ceph_find_frag(ci, t);
+ if (!frag)
+ break; /* t is a leaf */
+ if (frag->split_by == 0) {
+ if (pfrag)
+ memcpy(pfrag, frag, sizeof(*pfrag));
+ if (found)
+ *found = 1;
+ break;
+ }
+
+ /* choose child */
+ nway = 1 << frag->split_by;
+ dout("choose_frag(%x) %x splits by %d (%d ways)\n", v, t,
+ frag->split_by, nway);
+ for (i = 0; i < nway; i++) {
+ n = ceph_frag_make_child(t, frag->split_by, i);
+ if (ceph_frag_contains_value(n, v)) {
+ t = n;
+ break;
+ }
+ }
+ BUG_ON(i == nway);
+ }
+ dout("choose_frag(%x) = %x\n", v, t);
+
+ mutex_unlock(&ci->i_fragtree_mutex);
+ return t;
+}
+
+/*
+ * Process dirfrag (delegation) info from the mds. Include leaf
+ * fragment in tree ONLY if ndist > 0. Otherwise, only
+ * branches/splits are included in i_fragtree)
+ */
+static int ceph_fill_dirfrag(struct inode *inode,
+ struct ceph_mds_reply_dirfrag *dirinfo)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_inode_frag *frag;
+ u32 id = le32_to_cpu(dirinfo->frag);
+ int mds = le32_to_cpu(dirinfo->auth);
+ int ndist = le32_to_cpu(dirinfo->ndist);
+ int i;
+ int err = 0;
+
+ mutex_lock(&ci->i_fragtree_mutex);
+ if (ndist == 0) {
+ /* no delegation info needed. */
+ frag = __ceph_find_frag(ci, id);
+ if (!frag)
+ goto out;
+ if (frag->split_by == 0) {
+ /* tree leaf, remove */
+ dout("fill_dirfrag removed %llx.%llx frag %x"
+ " (no ref)\n", ceph_vinop(inode), id);
+ rb_erase(&frag->node, &ci->i_fragtree);
+ kfree(frag);
+ } else {
+ /* tree branch, keep and clear */
+ dout("fill_dirfrag cleared %llx.%llx frag %x"
+ " referral\n", ceph_vinop(inode), id);
+ frag->mds = -1;
+ frag->ndist = 0;
+ }
+ goto out;
+ }
+
+
+ /* find/add this frag to store mds delegation info */
+ frag = __get_or_create_frag(ci, id);
+ if (IS_ERR(frag)) {
+ /* this is not the end of the world; we can continue
+ with bad/inaccurate delegation info */
+ pr_err("fill_dirfrag ENOMEM on mds ref %llx.%llx fg %x\n",
+ ceph_vinop(inode), le32_to_cpu(dirinfo->frag));
+ err = -ENOMEM;
+ goto out;
+ }
+
+ frag->mds = mds;
+ frag->ndist = min_t(u32, ndist, CEPH_MAX_DIRFRAG_REP);
+ for (i = 0; i < frag->ndist; i++)
+ frag->dist[i] = le32_to_cpu(dirinfo->dist[i]);
+ dout("fill_dirfrag %llx.%llx frag %x ndist=%d\n",
+ ceph_vinop(inode), frag->frag, frag->ndist);
+
+out:
+ mutex_unlock(&ci->i_fragtree_mutex);
+ return err;
+}
+
+
+/*
+ * initialize a newly allocated inode.
+ */
+struct inode *ceph_alloc_inode(struct super_block *sb)
+{
+ struct ceph_inode_info *ci;
+ int i;
+
+ ci = kmem_cache_alloc(ceph_inode_cachep, GFP_NOFS);
+ if (!ci)
+ return NULL;
+
+ dout("alloc_inode %p\n", &ci->vfs_inode);
+
+ spin_lock_init(&ci->i_ceph_lock);
+
+ ci->i_version = 0;
+ ci->i_time_warp_seq = 0;
+ ci->i_ceph_flags = 0;
+ atomic_set(&ci->i_release_count, 1);
+ atomic_set(&ci->i_complete_count, 0);
+ ci->i_symlink = NULL;
+
+ memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
+
+ ci->i_fragtree = RB_ROOT;
+ mutex_init(&ci->i_fragtree_mutex);
+
+ ci->i_xattrs.blob = NULL;
+ ci->i_xattrs.prealloc_blob = NULL;
+ ci->i_xattrs.dirty = false;
+ ci->i_xattrs.index = RB_ROOT;
+ ci->i_xattrs.count = 0;
+ ci->i_xattrs.names_size = 0;
+ ci->i_xattrs.vals_size = 0;
+ ci->i_xattrs.version = 0;
+ ci->i_xattrs.index_version = 0;
+
+ ci->i_caps = RB_ROOT;
+ ci->i_auth_cap = NULL;
+ ci->i_dirty_caps = 0;
+ ci->i_flushing_caps = 0;
+ INIT_LIST_HEAD(&ci->i_dirty_item);
+ INIT_LIST_HEAD(&ci->i_flushing_item);
+ ci->i_cap_flush_seq = 0;
+ ci->i_cap_flush_last_tid = 0;
+ memset(&ci->i_cap_flush_tid, 0, sizeof(ci->i_cap_flush_tid));
+ init_waitqueue_head(&ci->i_cap_wq);
+ ci->i_hold_caps_min = 0;
+ ci->i_hold_caps_max = 0;
+ INIT_LIST_HEAD(&ci->i_cap_delay_list);
+ INIT_LIST_HEAD(&ci->i_cap_snaps);
+ ci->i_head_snapc = NULL;
+ ci->i_snap_caps = 0;
+ ci->i_cap_exporting_issued = 0;
+
+ for (i = 0; i < CEPH_FILE_MODE_NUM; i++)
+ ci->i_nr_by_mode[i] = 0;
+
+ mutex_init(&ci->i_truncate_mutex);
+ ci->i_truncate_seq = 0;
+ ci->i_truncate_size = 0;
+ ci->i_truncate_pending = 0;
+
+ ci->i_max_size = 0;
+ ci->i_reported_size = 0;
+ ci->i_wanted_max_size = 0;
+ ci->i_requested_max_size = 0;
+
+ ci->i_pin_ref = 0;
+ ci->i_rd_ref = 0;
+ ci->i_rdcache_ref = 0;
+ ci->i_wr_ref = 0;
+ ci->i_wb_ref = 0;
+ ci->i_wrbuffer_ref = 0;
+ ci->i_wrbuffer_ref_head = 0;
+ ci->i_shared_gen = 0;
+ ci->i_rdcache_gen = 0;
+ ci->i_rdcache_revoking = 0;
+
+ INIT_LIST_HEAD(&ci->i_unsafe_writes);
+ INIT_LIST_HEAD(&ci->i_unsafe_dirops);
+ spin_lock_init(&ci->i_unsafe_lock);
+
+ ci->i_snap_realm = NULL;
+ INIT_LIST_HEAD(&ci->i_snap_realm_item);
+ INIT_LIST_HEAD(&ci->i_snap_flush_item);
+
+ INIT_WORK(&ci->i_wb_work, ceph_writeback_work);
+ INIT_WORK(&ci->i_pg_inv_work, ceph_invalidate_work);
+
+ INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);
+
+ ceph_fscache_inode_init(ci);
+
+ return &ci->vfs_inode;
+}
+
+static void ceph_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ kmem_cache_free(ceph_inode_cachep, ci);
+}
+
+void ceph_destroy_inode(struct inode *inode)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_inode_frag *frag;
+ struct rb_node *n;
+
+ dout("destroy_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode));
+
+ ceph_fscache_unregister_inode_cookie(ci);
+
+ ceph_queue_caps_release(inode);
+
+ /*
+ * we may still have a snap_realm reference if there are stray
+ * caps in i_cap_exporting_issued or i_snap_caps.
+ */
+ if (ci->i_snap_realm) {
+ struct ceph_mds_client *mdsc =
+ ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+ struct ceph_snap_realm *realm = ci->i_snap_realm;
+
+ dout(" dropping residual ref to snap realm %p\n", realm);
+ spin_lock(&realm->inodes_with_caps_lock);
+ list_del_init(&ci->i_snap_realm_item);
+ spin_unlock(&realm->inodes_with_caps_lock);
+ ceph_put_snap_realm(mdsc, realm);
+ }
+
+ kfree(ci->i_symlink);
+ while ((n = rb_first(&ci->i_fragtree)) != NULL) {
+ frag = rb_entry(n, struct ceph_inode_frag, node);
+ rb_erase(n, &ci->i_fragtree);
+ kfree(frag);
+ }
+
+ __ceph_destroy_xattrs(ci);
+ if (ci->i_xattrs.blob)
+ ceph_buffer_put(ci->i_xattrs.blob);
+ if (ci->i_xattrs.prealloc_blob)
+ ceph_buffer_put(ci->i_xattrs.prealloc_blob);
+
+ call_rcu(&inode->i_rcu, ceph_i_callback);
+}
+
+int ceph_drop_inode(struct inode *inode)
+{
+ /*
+ * Positve dentry and corresponding inode are always accompanied
+ * in MDS reply. So no need to keep inode in the cache after
+ * dropping all its aliases.
+ */
+ return 1;
+}
+
+/*
+ * Helpers to fill in size, ctime, mtime, and atime. We have to be
+ * careful because either the client or MDS may have more up to date
+ * info, depending on which capabilities are held, and whether
+ * time_warp_seq or truncate_seq have increased. (Ordinarily, mtime
+ * and size are monotonically increasing, except when utimes() or
+ * truncate() increments the corresponding _seq values.)
+ */
+int ceph_fill_file_size(struct inode *inode, int issued,
+ u32 truncate_seq, u64 truncate_size, u64 size)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int queue_trunc = 0;
+
+ if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 ||
+ (truncate_seq == ci->i_truncate_seq && size > inode->i_size)) {
+ dout("size %lld -> %llu\n", inode->i_size, size);
+ inode->i_size = size;
+ inode->i_blocks = (size + (1<<9) - 1) >> 9;
+ ci->i_reported_size = size;
+ if (truncate_seq != ci->i_truncate_seq) {
+ dout("truncate_seq %u -> %u\n",
+ ci->i_truncate_seq, truncate_seq);
+ ci->i_truncate_seq = truncate_seq;
+
+ /* the MDS should have revoked these caps */
+ WARN_ON_ONCE(issued & (CEPH_CAP_FILE_EXCL |
+ CEPH_CAP_FILE_RD |
+ CEPH_CAP_FILE_WR |
+ CEPH_CAP_FILE_LAZYIO));
+ /*
+ * If we hold relevant caps, or in the case where we're
+ * not the only client referencing this file and we
+ * don't hold those caps, then we need to check whether
+ * the file is either opened or mmaped
+ */
+ if ((issued & (CEPH_CAP_FILE_CACHE|
+ CEPH_CAP_FILE_BUFFER)) ||
+ mapping_mapped(inode->i_mapping) ||
+ __ceph_caps_file_wanted(ci)) {
+ ci->i_truncate_pending++;
+ queue_trunc = 1;
+ }
+ }
+ }
+ if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) >= 0 &&
+ ci->i_truncate_size != truncate_size) {
+ dout("truncate_size %lld -> %llu\n", ci->i_truncate_size,
+ truncate_size);
+ ci->i_truncate_size = truncate_size;
+ }
+
+ if (queue_trunc)
+ ceph_fscache_invalidate(inode);
+
+ return queue_trunc;
+}
+
+void ceph_fill_file_time(struct inode *inode, int issued,
+ u64 time_warp_seq, struct timespec *ctime,
+ struct timespec *mtime, struct timespec *atime)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int warn = 0;
+
+ if (issued & (CEPH_CAP_FILE_EXCL|
+ CEPH_CAP_FILE_WR|
+ CEPH_CAP_FILE_BUFFER|
+ CEPH_CAP_AUTH_EXCL|
+ CEPH_CAP_XATTR_EXCL)) {
+ if (timespec_compare(ctime, &inode->i_ctime) > 0) {
+ dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n",
+ inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+ ctime->tv_sec, ctime->tv_nsec);
+ inode->i_ctime = *ctime;
+ }
+ if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) {
+ /* the MDS did a utimes() */
+ dout("mtime %ld.%09ld -> %ld.%09ld "
+ "tw %d -> %d\n",
+ inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
+ mtime->tv_sec, mtime->tv_nsec,
+ ci->i_time_warp_seq, (int)time_warp_seq);
+
+ inode->i_mtime = *mtime;
+ inode->i_atime = *atime;
+ ci->i_time_warp_seq = time_warp_seq;
+ } else if (time_warp_seq == ci->i_time_warp_seq) {
+ /* nobody did utimes(); take the max */
+ if (timespec_compare(mtime, &inode->i_mtime) > 0) {
+ dout("mtime %ld.%09ld -> %ld.%09ld inc\n",
+ inode->i_mtime.tv_sec,
+ inode->i_mtime.tv_nsec,
+ mtime->tv_sec, mtime->tv_nsec);
+ inode->i_mtime = *mtime;
+ }
+ if (timespec_compare(atime, &inode->i_atime) > 0) {
+ dout("atime %ld.%09ld -> %ld.%09ld inc\n",
+ inode->i_atime.tv_sec,
+ inode->i_atime.tv_nsec,
+ atime->tv_sec, atime->tv_nsec);
+ inode->i_atime = *atime;
+ }
+ } else if (issued & CEPH_CAP_FILE_EXCL) {
+ /* we did a utimes(); ignore mds values */
+ } else {
+ warn = 1;
+ }
+ } else {
+ /* we have no write|excl caps; whatever the MDS says is true */
+ if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) {
+ inode->i_ctime = *ctime;
+ inode->i_mtime = *mtime;
+ inode->i_atime = *atime;
+ ci->i_time_warp_seq = time_warp_seq;
+ } else {
+ warn = 1;
+ }
+ }
+ if (warn) /* time_warp_seq shouldn't go backwards */
+ dout("%p mds time_warp_seq %llu < %u\n",
+ inode, time_warp_seq, ci->i_time_warp_seq);
+}
+
+/*
+ * Populate an inode based on info from mds. May be called on new or
+ * existing inodes.
+ */
+static int fill_inode(struct inode *inode,
+ struct ceph_mds_reply_info_in *iinfo,
+ struct ceph_mds_reply_dirfrag *dirinfo,
+ struct ceph_mds_session *session,
+ unsigned long ttl_from, int cap_fmode,
+ struct ceph_cap_reservation *caps_reservation)
+{
+ struct ceph_mds_reply_inode *info = iinfo->in;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int i;
+ int issued = 0, implemented;
+ struct timespec mtime, atime, ctime;
+ u32 nsplits;
+ struct ceph_inode_frag *frag;
+ struct rb_node *rb_node;
+ struct ceph_buffer *xattr_blob = NULL;
+ int err = 0;
+ int queue_trunc = 0;
+
+ dout("fill_inode %p ino %llx.%llx v %llu had %llu\n",
+ inode, ceph_vinop(inode), le64_to_cpu(info->version),
+ ci->i_version);
+
+ /*
+ * prealloc xattr data, if it looks like we'll need it. only
+ * if len > 4 (meaning there are actually xattrs; the first 4
+ * bytes are the xattr count).
+ */
+ if (iinfo->xattr_len > 4) {
+ xattr_blob = ceph_buffer_new(iinfo->xattr_len, GFP_NOFS);
+ if (!xattr_blob)
+ pr_err("fill_inode ENOMEM xattr blob %d bytes\n",
+ iinfo->xattr_len);
+ }
+
+ spin_lock(&ci->i_ceph_lock);
+
+ /*
+ * provided version will be odd if inode value is projected,
+ * even if stable. skip the update if we have newer stable
+ * info (ours>=theirs, e.g. due to racing mds replies), unless
+ * we are getting projected (unstable) info (in which case the
+ * version is odd, and we want ours>theirs).
+ * us them
+ * 2 2 skip
+ * 3 2 skip
+ * 3 3 update
+ */
+ if (le64_to_cpu(info->version) > 0 &&
+ (ci->i_version & ~1) >= le64_to_cpu(info->version))
+ goto no_change;
+
+ issued = __ceph_caps_issued(ci, &implemented);
+ issued |= implemented | __ceph_caps_dirty(ci);
+
+ /* update inode */
+ ci->i_version = le64_to_cpu(info->version);
+ inode->i_version++;
+ inode->i_rdev = le32_to_cpu(info->rdev);
+
+ if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
+ inode->i_mode = le32_to_cpu(info->mode);
+ inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid));
+ inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid));
+ dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
+ from_kuid(&init_user_ns, inode->i_uid),
+ from_kgid(&init_user_ns, inode->i_gid));
+ }
+
+ if ((issued & CEPH_CAP_LINK_EXCL) == 0)
+ set_nlink(inode, le32_to_cpu(info->nlink));
+
+ /* be careful with mtime, atime, size */
+ ceph_decode_timespec(&atime, &info->atime);
+ ceph_decode_timespec(&mtime, &info->mtime);
+ ceph_decode_timespec(&ctime, &info->ctime);
+ queue_trunc = ceph_fill_file_size(inode, issued,
+ le32_to_cpu(info->truncate_seq),
+ le64_to_cpu(info->truncate_size),
+ le64_to_cpu(info->size));
+ ceph_fill_file_time(inode, issued,
+ le32_to_cpu(info->time_warp_seq),
+ &ctime, &mtime, &atime);
+
+ ci->i_layout = info->layout;
+ inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
+
+ /* xattrs */
+ /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */
+ if ((issued & CEPH_CAP_XATTR_EXCL) == 0 &&
+ le64_to_cpu(info->xattr_version) > ci->i_xattrs.version) {
+ if (ci->i_xattrs.blob)
+ ceph_buffer_put(ci->i_xattrs.blob);
+ ci->i_xattrs.blob = xattr_blob;
+ if (xattr_blob)
+ memcpy(ci->i_xattrs.blob->vec.iov_base,
+ iinfo->xattr_data, iinfo->xattr_len);
+ ci->i_xattrs.version = le64_to_cpu(info->xattr_version);
+ ceph_forget_all_cached_acls(inode);
+ xattr_blob = NULL;
+ }
+
+ inode->i_mapping->a_ops = &ceph_aops;
+ inode->i_mapping->backing_dev_info =
+ &ceph_sb_to_client(inode->i_sb)->backing_dev_info;
+
+ switch (inode->i_mode & S_IFMT) {
+ case S_IFIFO:
+ case S_IFBLK:
+ case S_IFCHR:
+ case S_IFSOCK:
+ init_special_inode(inode, inode->i_mode, inode->i_rdev);
+ inode->i_op = &ceph_file_iops;
+ break;
+ case S_IFREG:
+ inode->i_op = &ceph_file_iops;
+ inode->i_fop = &ceph_file_fops;
+ break;
+ case S_IFLNK:
+ inode->i_op = &ceph_symlink_iops;
+ if (!ci->i_symlink) {
+ u32 symlen = iinfo->symlink_len;
+ char *sym;
+
+ spin_unlock(&ci->i_ceph_lock);
+
+ err = -EINVAL;
+ if (WARN_ON(symlen != inode->i_size))
+ goto out;
+
+ err = -ENOMEM;
+ sym = kstrndup(iinfo->symlink, symlen, GFP_NOFS);
+ if (!sym)
+ goto out;
+
+ spin_lock(&ci->i_ceph_lock);
+ if (!ci->i_symlink)
+ ci->i_symlink = sym;
+ else
+ kfree(sym); /* lost a race */
+ }
+ break;
+ case S_IFDIR:
+ inode->i_op = &ceph_dir_iops;
+ inode->i_fop = &ceph_dir_fops;
+
+ ci->i_dir_layout = iinfo->dir_layout;
+
+ ci->i_files = le64_to_cpu(info->files);
+ ci->i_subdirs = le64_to_cpu(info->subdirs);
+ ci->i_rbytes = le64_to_cpu(info->rbytes);
+ ci->i_rfiles = le64_to_cpu(info->rfiles);
+ ci->i_rsubdirs = le64_to_cpu(info->rsubdirs);
+ ceph_decode_timespec(&ci->i_rctime, &info->rctime);
+ break;
+ default:
+ pr_err("fill_inode %llx.%llx BAD mode 0%o\n",
+ ceph_vinop(inode), inode->i_mode);
+ }
+
+ /* set dir completion flag? */
+ if (S_ISDIR(inode->i_mode) &&
+ ci->i_files == 0 && ci->i_subdirs == 0 &&
+ ceph_snap(inode) == CEPH_NOSNAP &&
+ (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
+ (issued & CEPH_CAP_FILE_EXCL) == 0 &&
+ !__ceph_dir_is_complete(ci)) {
+ dout(" marking %p complete (empty)\n", inode);
+ __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
+ }
+no_change:
+ /* only update max_size on auth cap */
+ if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
+ ci->i_max_size != le64_to_cpu(info->max_size)) {
+ dout("max_size %lld -> %llu\n", ci->i_max_size,
+ le64_to_cpu(info->max_size));
+ ci->i_max_size = le64_to_cpu(info->max_size);
+ }
+
+ spin_unlock(&ci->i_ceph_lock);
+
+ /* queue truncate if we saw i_size decrease */
+ if (queue_trunc)
+ ceph_queue_vmtruncate(inode);
+
+ /* populate frag tree */
+ /* FIXME: move me up, if/when version reflects fragtree changes */
+ nsplits = le32_to_cpu(info->fragtree.nsplits);
+ mutex_lock(&ci->i_fragtree_mutex);
+ rb_node = rb_first(&ci->i_fragtree);
+ for (i = 0; i < nsplits; i++) {
+ u32 id = le32_to_cpu(info->fragtree.splits[i].frag);
+ frag = NULL;
+ while (rb_node) {
+ frag = rb_entry(rb_node, struct ceph_inode_frag, node);
+ if (ceph_frag_compare(frag->frag, id) >= 0) {
+ if (frag->frag != id)
+ frag = NULL;
+ else
+ rb_node = rb_next(rb_node);
+ break;
+ }
+ rb_node = rb_next(rb_node);
+ rb_erase(&frag->node, &ci->i_fragtree);
+ kfree(frag);
+ frag = NULL;
+ }
+ if (!frag) {
+ frag = __get_or_create_frag(ci, id);
+ if (IS_ERR(frag))
+ continue;
+ }
+ frag->split_by = le32_to_cpu(info->fragtree.splits[i].by);
+ dout(" frag %x split by %d\n", frag->frag, frag->split_by);
+ }
+ while (rb_node) {
+ frag = rb_entry(rb_node, struct ceph_inode_frag, node);
+ rb_node = rb_next(rb_node);
+ rb_erase(&frag->node, &ci->i_fragtree);
+ kfree(frag);
+ }
+ mutex_unlock(&ci->i_fragtree_mutex);
+
+ /* were we issued a capability? */
+ if (info->cap.caps) {
+ if (ceph_snap(inode) == CEPH_NOSNAP) {
+ ceph_add_cap(inode, session,
+ le64_to_cpu(info->cap.cap_id),
+ cap_fmode,
+ le32_to_cpu(info->cap.caps),
+ le32_to_cpu(info->cap.wanted),
+ le32_to_cpu(info->cap.seq),
+ le32_to_cpu(info->cap.mseq),
+ le64_to_cpu(info->cap.realm),
+ info->cap.flags,
+ caps_reservation);
+ } else {
+ spin_lock(&ci->i_ceph_lock);
+ dout(" %p got snap_caps %s\n", inode,
+ ceph_cap_string(le32_to_cpu(info->cap.caps)));
+ ci->i_snap_caps |= le32_to_cpu(info->cap.caps);
+ if (cap_fmode >= 0)
+ __ceph_get_fmode(ci, cap_fmode);
+ spin_unlock(&ci->i_ceph_lock);
+ }
+ } else if (cap_fmode >= 0) {
+ pr_warning("mds issued no caps on %llx.%llx\n",
+ ceph_vinop(inode));
+ __ceph_get_fmode(ci, cap_fmode);
+ }
+
+ /* update delegation info? */
+ if (dirinfo)
+ ceph_fill_dirfrag(inode, dirinfo);
+
+ err = 0;
+
+out:
+ if (xattr_blob)
+ ceph_buffer_put(xattr_blob);
+ return err;
+}
+
+/*
+ * caller should hold session s_mutex.
+ */
+static void update_dentry_lease(struct dentry *dentry,
+ struct ceph_mds_reply_lease *lease,
+ struct ceph_mds_session *session,
+ unsigned long from_time)
+{
+ struct ceph_dentry_info *di = ceph_dentry(dentry);
+ long unsigned duration = le32_to_cpu(lease->duration_ms);
+ long unsigned ttl = from_time + (duration * HZ) / 1000;
+ long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000;
+ struct inode *dir;
+
+ /* only track leases on regular dentries */
+ if (dentry->d_op != &ceph_dentry_ops)
+ return;
+
+ spin_lock(&dentry->d_lock);
+ dout("update_dentry_lease %p duration %lu ms ttl %lu\n",
+ dentry, duration, ttl);
+
+ /* make lease_rdcache_gen match directory */
+ dir = dentry->d_parent->d_inode;
+ di->lease_shared_gen = ceph_inode(dir)->i_shared_gen;
+
+ if (duration == 0)
+ goto out_unlock;
+
+ if (di->lease_gen == session->s_cap_gen &&
+ time_before(ttl, dentry->d_time))
+ goto out_unlock; /* we already have a newer lease. */
+
+ if (di->lease_session && di->lease_session != session)
+ goto out_unlock;
+
+ ceph_dentry_lru_touch(dentry);
+
+ if (!di->lease_session)
+ di->lease_session = ceph_get_mds_session(session);
+ di->lease_gen = session->s_cap_gen;
+ di->lease_seq = le32_to_cpu(lease->seq);
+ di->lease_renew_after = half_ttl;
+ di->lease_renew_from = 0;
+ dentry->d_time = ttl;
+out_unlock:
+ spin_unlock(&dentry->d_lock);
+ return;
+}
+
+/*
+ * splice a dentry to an inode.
+ * caller must hold directory i_mutex for this to be safe.
+ *
+ * we will only rehash the resulting dentry if @prehash is
+ * true; @prehash will be set to false (for the benefit of
+ * the caller) if we fail.
+ */
+static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
+ bool *prehash)
+{
+ struct dentry *realdn;
+
+ BUG_ON(dn->d_inode);
+
+ /* dn must be unhashed */
+ if (!d_unhashed(dn))
+ d_drop(dn);
+ realdn = d_materialise_unique(dn, in);
+ if (IS_ERR(realdn)) {
+ pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
+ PTR_ERR(realdn), dn, in, ceph_vinop(in));
+ if (prehash)
+ *prehash = false; /* don't rehash on error */
+ dn = realdn; /* note realdn contains the error */
+ goto out;
+ } else if (realdn) {
+ dout("dn %p (%d) spliced with %p (%d) "
+ "inode %p ino %llx.%llx\n",
+ dn, d_count(dn),
+ realdn, d_count(realdn),
+ realdn->d_inode, ceph_vinop(realdn->d_inode));
+ dput(dn);
+ dn = realdn;
+ } else {
+ BUG_ON(!ceph_dentry(dn));
+ dout("dn %p attached to %p ino %llx.%llx\n",
+ dn, dn->d_inode, ceph_vinop(dn->d_inode));
+ }
+ if ((!prehash || *prehash) && d_unhashed(dn))
+ d_rehash(dn);
+out:
+ return dn;
+}
+
+/*
+ * Incorporate results into the local cache. This is either just
+ * one inode, or a directory, dentry, and possibly linked-to inode (e.g.,
+ * after a lookup).
+ *
+ * A reply may contain
+ * a directory inode along with a dentry.
+ * and/or a target inode
+ *
+ * Called with snap_rwsem (read).
+ */
+int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
+ struct ceph_mds_session *session)
+{
+ struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+ struct inode *in = NULL;
+ struct ceph_vino vino;
+ struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+ int err = 0;
+
+ dout("fill_trace %p is_dentry %d is_target %d\n", req,
+ rinfo->head->is_dentry, rinfo->head->is_target);
+
+#if 0
+ /*
+ * Debugging hook:
+ *
+ * If we resend completed ops to a recovering mds, we get no
+ * trace. Since that is very rare, pretend this is the case
+ * to ensure the 'no trace' handlers in the callers behave.
+ *
+ * Fill in inodes unconditionally to avoid breaking cap
+ * invariants.
+ */
+ if (rinfo->head->op & CEPH_MDS_OP_WRITE) {
+ pr_info("fill_trace faking empty trace on %lld %s\n",
+ req->r_tid, ceph_mds_op_name(rinfo->head->op));
+ if (rinfo->head->is_dentry) {
+ rinfo->head->is_dentry = 0;
+ err = fill_inode(req->r_locked_dir,
+ &rinfo->diri, rinfo->dirfrag,
+ session, req->r_request_started, -1);
+ }
+ if (rinfo->head->is_target) {
+ rinfo->head->is_target = 0;
+ ininfo = rinfo->targeti.in;
+ vino.ino = le64_to_cpu(ininfo->ino);
+ vino.snap = le64_to_cpu(ininfo->snapid);
+ in = ceph_get_inode(sb, vino);
+ err = fill_inode(in, &rinfo->targeti, NULL,
+ session, req->r_request_started,
+ req->r_fmode);
+ iput(in);
+ }
+ }
+#endif
+
+ if (!rinfo->head->is_target && !rinfo->head->is_dentry) {
+ dout("fill_trace reply is empty!\n");
+ if (rinfo->head->result == 0 && req->r_locked_dir)
+ ceph_invalidate_dir_request(req);
+ return 0;
+ }
+
+ if (rinfo->head->is_dentry) {
+ struct inode *dir = req->r_locked_dir;
+
+ if (dir) {
+ err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag,
+ session, req->r_request_started, -1,
+ &req->r_caps_reservation);
+ if (err < 0)
+ goto done;
+ } else {
+ WARN_ON_ONCE(1);
+ }
+
+ if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME) {
+ struct qstr dname;
+ struct dentry *dn, *parent;
+
+ BUG_ON(!rinfo->head->is_target);
+ BUG_ON(req->r_dentry);
+
+ parent = d_find_any_alias(dir);
+ BUG_ON(!parent);
+
+ dname.name = rinfo->dname;
+ dname.len = rinfo->dname_len;
+ dname.hash = full_name_hash(dname.name, dname.len);
+ vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+ vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+retry_lookup:
+ dn = d_lookup(parent, &dname);
+ dout("d_lookup on parent=%p name=%.*s got %p\n",
+ parent, dname.len, dname.name, dn);
+
+ if (!dn) {
+ dn = d_alloc(parent, &dname);
+ dout("d_alloc %p '%.*s' = %p\n", parent,
+ dname.len, dname.name, dn);
+ if (dn == NULL) {
+ dput(parent);
+ err = -ENOMEM;
+ goto done;
+ }
+ err = ceph_init_dentry(dn);
+ if (err < 0) {
+ dput(dn);
+ dput(parent);
+ goto done;
+ }
+ } else if (dn->d_inode &&
+ (ceph_ino(dn->d_inode) != vino.ino ||
+ ceph_snap(dn->d_inode) != vino.snap)) {
+ dout(" dn %p points to wrong inode %p\n",
+ dn, dn->d_inode);
+ d_delete(dn);
+ dput(dn);
+ goto retry_lookup;
+ }
+
+ req->r_dentry = dn;
+ dput(parent);
+ }
+ }
+
+ if (rinfo->head->is_target) {
+ vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+ vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+
+ in = ceph_get_inode(sb, vino);
+ if (IS_ERR(in)) {
+ err = PTR_ERR(in);
+ goto done;
+ }
+ req->r_target_inode = in;
+
+ err = fill_inode(in, &rinfo->targeti, NULL,
+ session, req->r_request_started,
+ (!req->r_aborted && rinfo->head->result == 0) ?
+ req->r_fmode : -1,
+ &req->r_caps_reservation);
+ if (err < 0) {
+ pr_err("fill_inode badness %p %llx.%llx\n",
+ in, ceph_vinop(in));
+ goto done;
+ }
+ }
+
+ /*
+ * ignore null lease/binding on snapdir ENOENT, or else we
+ * will have trouble splicing in the virtual snapdir later
+ */
+ if (rinfo->head->is_dentry && !req->r_aborted &&
+ req->r_locked_dir &&
+ (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
+ fsc->mount_options->snapdir_name,
+ req->r_dentry->d_name.len))) {
+ /*
+ * lookup link rename : null -> possibly existing inode
+ * mknod symlink mkdir : null -> new inode
+ * unlink : linked -> null
+ */
+ struct inode *dir = req->r_locked_dir;
+ struct dentry *dn = req->r_dentry;
+ bool have_dir_cap, have_lease;
+
+ BUG_ON(!dn);
+ BUG_ON(!dir);
+ BUG_ON(dn->d_parent->d_inode != dir);
+ BUG_ON(ceph_ino(dir) !=
+ le64_to_cpu(rinfo->diri.in->ino));
+ BUG_ON(ceph_snap(dir) !=
+ le64_to_cpu(rinfo->diri.in->snapid));
+
+ /* do we have a lease on the whole dir? */
+ have_dir_cap =
+ (le32_to_cpu(rinfo->diri.in->cap.caps) &
+ CEPH_CAP_FILE_SHARED);
+
+ /* do we have a dn lease? */
+ have_lease = have_dir_cap ||
+ le32_to_cpu(rinfo->dlease->duration_ms);
+ if (!have_lease)
+ dout("fill_trace no dentry lease or dir cap\n");
+
+ /* rename? */
+ if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) {
+ struct inode *olddir = req->r_old_dentry_dir;
+ BUG_ON(!olddir);
+
+ dout(" src %p '%.*s' dst %p '%.*s'\n",
+ req->r_old_dentry,
+ req->r_old_dentry->d_name.len,
+ req->r_old_dentry->d_name.name,
+ dn, dn->d_name.len, dn->d_name.name);
+ dout("fill_trace doing d_move %p -> %p\n",
+ req->r_old_dentry, dn);
+
+ d_move(req->r_old_dentry, dn);
+ dout(" src %p '%.*s' dst %p '%.*s'\n",
+ req->r_old_dentry,
+ req->r_old_dentry->d_name.len,
+ req->r_old_dentry->d_name.name,
+ dn, dn->d_name.len, dn->d_name.name);
+
+ /* ensure target dentry is invalidated, despite
+ rehashing bug in vfs_rename_dir */
+ ceph_invalidate_dentry_lease(dn);
+
+ /* d_move screws up sibling dentries' offsets */
+ ceph_dir_clear_complete(dir);
+ ceph_dir_clear_complete(olddir);
+
+ dout("dn %p gets new offset %lld\n", req->r_old_dentry,
+ ceph_dentry(req->r_old_dentry)->offset);
+
+ dn = req->r_old_dentry; /* use old_dentry */
+ }
+
+ /* null dentry? */
+ if (!rinfo->head->is_target) {
+ dout("fill_trace null dentry\n");
+ if (dn->d_inode) {
+ dout("d_delete %p\n", dn);
+ d_delete(dn);
+ } else {
+ dout("d_instantiate %p NULL\n", dn);
+ d_instantiate(dn, NULL);
+ if (have_lease && d_unhashed(dn))
+ d_rehash(dn);
+ update_dentry_lease(dn, rinfo->dlease,
+ session,
+ req->r_request_started);
+ }
+ goto done;
+ }
+
+ /* attach proper inode */
+ if (!dn->d_inode) {
+ ceph_dir_clear_complete(dir);
+ ihold(in);
+ dn = splice_dentry(dn, in, &have_lease);
+ if (IS_ERR(dn)) {
+ err = PTR_ERR(dn);
+ goto done;
+ }
+ req->r_dentry = dn; /* may have spliced */
+ } else if (dn->d_inode && dn->d_inode != in) {
+ dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
+ dn, dn->d_inode, ceph_vinop(dn->d_inode),
+ ceph_vinop(in));
+ have_lease = false;
+ }
+
+ if (have_lease)
+ update_dentry_lease(dn, rinfo->dlease, session,
+ req->r_request_started);
+ dout(" final dn %p\n", dn);
+ } else if (!req->r_aborted &&
+ (req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
+ req->r_op == CEPH_MDS_OP_MKSNAP)) {
+ struct dentry *dn = req->r_dentry;
+ struct inode *dir = req->r_locked_dir;
+
+ /* fill out a snapdir LOOKUPSNAP dentry */
+ BUG_ON(!dn);
+ BUG_ON(!dir);
+ BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
+ dout(" linking snapped dir %p to dn %p\n", in, dn);
+ ceph_dir_clear_complete(dir);
+ ihold(in);
+ dn = splice_dentry(dn, in, NULL);
+ if (IS_ERR(dn)) {
+ err = PTR_ERR(dn);
+ goto done;
+ }
+ req->r_dentry = dn; /* may have spliced */
+ }
+done:
+ dout("fill_trace done err=%d\n", err);
+ return err;
+}
+
+/*
+ * Prepopulate our cache with readdir results, leases, etc.
+ */
+static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
+ struct ceph_mds_session *session)
+{
+ struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+ int i, err = 0;
+
+ for (i = 0; i < rinfo->dir_nr; i++) {
+ struct ceph_vino vino;
+ struct inode *in;
+ int rc;
+
+ vino.ino = le64_to_cpu(rinfo->dir_in[i].in->ino);
+ vino.snap = le64_to_cpu(rinfo->dir_in[i].in->snapid);
+
+ in = ceph_get_inode(req->r_dentry->d_sb, vino);
+ if (IS_ERR(in)) {
+ err = PTR_ERR(in);
+ dout("new_inode badness got %d\n", err);
+ continue;
+ }
+ rc = fill_inode(in, &rinfo->dir_in[i], NULL, session,
+ req->r_request_started, -1,
+ &req->r_caps_reservation);
+ if (rc < 0) {
+ pr_err("fill_inode badness on %p got %d\n", in, rc);
+ err = rc;
+ continue;
+ }
+ }
+
+ return err;
+}
+
+int ceph_readdir_prepopulate(struct ceph_mds_request *req,
+ struct ceph_mds_session *session)
+{
+ struct dentry *parent = req->r_dentry;
+ struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+ struct qstr dname;
+ struct dentry *dn;
+ struct inode *in;
+ int err = 0, ret, i;
+ struct inode *snapdir = NULL;
+ struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
+ struct ceph_dentry_info *di;
+ u64 r_readdir_offset = req->r_readdir_offset;
+ u32 frag = le32_to_cpu(rhead->args.readdir.frag);
+
+ if (rinfo->dir_dir &&
+ le32_to_cpu(rinfo->dir_dir->frag) != frag) {
+ dout("readdir_prepopulate got new frag %x -> %x\n",
+ frag, le32_to_cpu(rinfo->dir_dir->frag));
+ frag = le32_to_cpu(rinfo->dir_dir->frag);
+ if (ceph_frag_is_leftmost(frag))
+ r_readdir_offset = 2;
+ else
+ r_readdir_offset = 0;
+ }
+
+ if (req->r_aborted)
+ return readdir_prepopulate_inodes_only(req, session);
+
+ if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) {
+ snapdir = ceph_get_snapdir(parent->d_inode);
+ parent = d_find_alias(snapdir);
+ dout("readdir_prepopulate %d items under SNAPDIR dn %p\n",
+ rinfo->dir_nr, parent);
+ } else {
+ dout("readdir_prepopulate %d items under dn %p\n",
+ rinfo->dir_nr, parent);
+ if (rinfo->dir_dir)
+ ceph_fill_dirfrag(parent->d_inode, rinfo->dir_dir);
+ }
+
+ /* FIXME: release caps/leases if error occurs */
+ for (i = 0; i < rinfo->dir_nr; i++) {
+ struct ceph_vino vino;
+
+ dname.name = rinfo->dir_dname[i];
+ dname.len = rinfo->dir_dname_len[i];
+ dname.hash = full_name_hash(dname.name, dname.len);
+
+ vino.ino = le64_to_cpu(rinfo->dir_in[i].in->ino);
+ vino.snap = le64_to_cpu(rinfo->dir_in[i].in->snapid);
+
+retry_lookup:
+ dn = d_lookup(parent, &dname);
+ dout("d_lookup on parent=%p name=%.*s got %p\n",
+ parent, dname.len, dname.name, dn);
+
+ if (!dn) {
+ dn = d_alloc(parent, &dname);
+ dout("d_alloc %p '%.*s' = %p\n", parent,
+ dname.len, dname.name, dn);
+ if (dn == NULL) {
+ dout("d_alloc badness\n");
+ err = -ENOMEM;
+ goto out;
+ }
+ ret = ceph_init_dentry(dn);
+ if (ret < 0) {
+ dput(dn);
+ err = ret;
+ goto out;
+ }
+ } else if (dn->d_inode &&
+ (ceph_ino(dn->d_inode) != vino.ino ||
+ ceph_snap(dn->d_inode) != vino.snap)) {
+ dout(" dn %p points to wrong inode %p\n",
+ dn, dn->d_inode);
+ d_delete(dn);
+ dput(dn);
+ goto retry_lookup;
+ } else {
+ /* reorder parent's d_subdirs */
+ spin_lock(&parent->d_lock);
+ spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
+ list_move(&dn->d_u.d_child, &parent->d_subdirs);
+ spin_unlock(&dn->d_lock);
+ spin_unlock(&parent->d_lock);
+ }
+
+ /* inode */
+ if (dn->d_inode) {
+ in = dn->d_inode;
+ } else {
+ in = ceph_get_inode(parent->d_sb, vino);
+ if (IS_ERR(in)) {
+ dout("new_inode badness\n");
+ d_drop(dn);
+ dput(dn);
+ err = PTR_ERR(in);
+ goto out;
+ }
+ }
+
+ if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
+ req->r_request_started, -1,
+ &req->r_caps_reservation) < 0) {
+ pr_err("fill_inode badness on %p\n", in);
+ if (!dn->d_inode)
+ iput(in);
+ d_drop(dn);
+ goto next_item;
+ }
+
+ if (!dn->d_inode) {
+ dn = splice_dentry(dn, in, NULL);
+ if (IS_ERR(dn)) {
+ err = PTR_ERR(dn);
+ dn = NULL;
+ goto next_item;
+ }
+ }
+
+ di = dn->d_fsdata;
+ di->offset = ceph_make_fpos(frag, i + r_readdir_offset);
+
+ update_dentry_lease(dn, rinfo->dir_dlease[i],
+ req->r_session,
+ req->r_request_started);
+next_item:
+ if (dn)
+ dput(dn);
+ }
+ if (err == 0)
+ req->r_did_prepopulate = true;
+
+out:
+ if (snapdir) {
+ iput(snapdir);
+ dput(parent);
+ }
+ dout("readdir_prepopulate done\n");
+ return err;
+}
+
+int ceph_inode_set_size(struct inode *inode, loff_t size)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int ret = 0;
+
+ spin_lock(&ci->i_ceph_lock);
+ dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
+ inode->i_size = size;
+ inode->i_blocks = (size + (1 << 9) - 1) >> 9;
+
+ /* tell the MDS if we are approaching max_size */
+ if ((size << 1) >= ci->i_max_size &&
+ (ci->i_reported_size << 1) < ci->i_max_size)
+ ret = 1;
+
+ spin_unlock(&ci->i_ceph_lock);
+ return ret;
+}
+
+/*
+ * Write back inode data in a worker thread. (This can't be done
+ * in the message handler context.)
+ */
+void ceph_queue_writeback(struct inode *inode)
+{
+ ihold(inode);
+ if (queue_work(ceph_inode_to_client(inode)->wb_wq,
+ &ceph_inode(inode)->i_wb_work)) {
+ dout("ceph_queue_writeback %p\n", inode);
+ } else {
+ dout("ceph_queue_writeback %p failed\n", inode);
+ iput(inode);
+ }
+}
+
+static void ceph_writeback_work(struct work_struct *work)
+{
+ struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
+ i_wb_work);
+ struct inode *inode = &ci->vfs_inode;
+
+ dout("writeback %p\n", inode);
+ filemap_fdatawrite(&inode->i_data);
+ iput(inode);
+}
+
+/*
+ * queue an async invalidation
+ */
+void ceph_queue_invalidate(struct inode *inode)
+{
+ ihold(inode);
+ if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq,
+ &ceph_inode(inode)->i_pg_inv_work)) {
+ dout("ceph_queue_invalidate %p\n", inode);
+ } else {
+ dout("ceph_queue_invalidate %p failed\n", inode);
+ iput(inode);
+ }
+}
+
+/*
+ * Invalidate inode pages in a worker thread. (This can't be done
+ * in the message handler context.)
+ */
+static void ceph_invalidate_work(struct work_struct *work)
+{
+ struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
+ i_pg_inv_work);
+ struct inode *inode = &ci->vfs_inode;
+ u32 orig_gen;
+ int check = 0;
+
+ mutex_lock(&ci->i_truncate_mutex);
+ spin_lock(&ci->i_ceph_lock);
+ dout("invalidate_pages %p gen %d revoking %d\n", inode,
+ ci->i_rdcache_gen, ci->i_rdcache_revoking);
+ if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
+ if (__ceph_caps_revoking_other(ci, NULL, CEPH_CAP_FILE_CACHE))
+ check = 1;
+ spin_unlock(&ci->i_ceph_lock);
+ mutex_unlock(&ci->i_truncate_mutex);
+ goto out;
+ }
+ orig_gen = ci->i_rdcache_gen;
+ spin_unlock(&ci->i_ceph_lock);
+
+ truncate_inode_pages(inode->i_mapping, 0);
+
+ spin_lock(&ci->i_ceph_lock);
+ if (orig_gen == ci->i_rdcache_gen &&
+ orig_gen == ci->i_rdcache_revoking) {
+ dout("invalidate_pages %p gen %d successful\n", inode,
+ ci->i_rdcache_gen);
+ ci->i_rdcache_revoking--;
+ check = 1;
+ } else {
+ dout("invalidate_pages %p gen %d raced, now %d revoking %d\n",
+ inode, orig_gen, ci->i_rdcache_gen,
+ ci->i_rdcache_revoking);
+ if (__ceph_caps_revoking_other(ci, NULL, CEPH_CAP_FILE_CACHE))
+ check = 1;
+ }
+ spin_unlock(&ci->i_ceph_lock);
+ mutex_unlock(&ci->i_truncate_mutex);
+out:
+ if (check)
+ ceph_check_caps(ci, 0, NULL);
+ iput(inode);
+}
+
+
+/*
+ * called by trunc_wq;
+ *
+ * We also truncate in a separate thread as well.
+ */
+static void ceph_vmtruncate_work(struct work_struct *work)
+{
+ struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
+ i_vmtruncate_work);
+ struct inode *inode = &ci->vfs_inode;
+
+ dout("vmtruncate_work %p\n", inode);
+ __ceph_do_pending_vmtruncate(inode);
+ iput(inode);
+}
+
+/*
+ * Queue an async vmtruncate. If we fail to queue work, we will handle
+ * the truncation the next time we call __ceph_do_pending_vmtruncate.
+ */
+void ceph_queue_vmtruncate(struct inode *inode)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ ihold(inode);
+
+ if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
+ &ci->i_vmtruncate_work)) {
+ dout("ceph_queue_vmtruncate %p\n", inode);
+ } else {
+ dout("ceph_queue_vmtruncate %p failed, pending=%d\n",
+ inode, ci->i_truncate_pending);
+ iput(inode);
+ }
+}
+
+/*
+ * Make sure any pending truncation is applied before doing anything
+ * that may depend on it.
+ */
+void __ceph_do_pending_vmtruncate(struct inode *inode)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ u64 to;
+ int wrbuffer_refs, finish = 0;
+
+ mutex_lock(&ci->i_truncate_mutex);
+retry:
+ spin_lock(&ci->i_ceph_lock);
+ if (ci->i_truncate_pending == 0) {
+ dout("__do_pending_vmtruncate %p none pending\n", inode);
+ spin_unlock(&ci->i_ceph_lock);
+ mutex_unlock(&ci->i_truncate_mutex);
+ return;
+ }
+
+ /*
+ * make sure any dirty snapped pages are flushed before we
+ * possibly truncate them.. so write AND block!
+ */
+ if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
+ dout("__do_pending_vmtruncate %p flushing snaps first\n",
+ inode);
+ spin_unlock(&ci->i_ceph_lock);
+ filemap_write_and_wait_range(&inode->i_data, 0,
+ inode->i_sb->s_maxbytes);
+ goto retry;
+ }
+
+ /* there should be no reader or writer */
+ WARN_ON_ONCE(ci->i_rd_ref || ci->i_wr_ref);
+
+ to = ci->i_truncate_size;
+ wrbuffer_refs = ci->i_wrbuffer_ref;
+ dout("__do_pending_vmtruncate %p (%d) to %lld\n", inode,
+ ci->i_truncate_pending, to);
+ spin_unlock(&ci->i_ceph_lock);
+
+ truncate_inode_pages(inode->i_mapping, to);
+
+ spin_lock(&ci->i_ceph_lock);
+ if (to == ci->i_truncate_size) {
+ ci->i_truncate_pending = 0;
+ finish = 1;
+ }
+ spin_unlock(&ci->i_ceph_lock);
+ if (!finish)
+ goto retry;
+
+ mutex_unlock(&ci->i_truncate_mutex);
+
+ if (wrbuffer_refs == 0)
+ ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
+
+ wake_up_all(&ci->i_cap_wq);
+}
+
+/*
+ * symlinks
+ */
+static void *ceph_sym_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ struct ceph_inode_info *ci = ceph_inode(dentry->d_inode);
+ nd_set_link(nd, ci->i_symlink);
+ return NULL;
+}
+
+static const struct inode_operations ceph_symlink_iops = {
+ .readlink = generic_readlink,
+ .follow_link = ceph_sym_follow_link,
+ .setattr = ceph_setattr,
+ .getattr = ceph_getattr,
+ .setxattr = ceph_setxattr,
+ .getxattr = ceph_getxattr,
+ .listxattr = ceph_listxattr,
+ .removexattr = ceph_removexattr,
+};
+
+/*
+ * setattr
+ */
+int ceph_setattr(struct dentry *dentry, struct iattr *attr)
+{
+ struct inode *inode = dentry->d_inode;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ const unsigned int ia_valid = attr->ia_valid;
+ struct ceph_mds_request *req;
+ struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
+ int issued;
+ int release = 0, dirtied = 0;
+ int mask = 0;
+ int err = 0;
+ int inode_dirty_flags = 0;
+
+ if (ceph_snap(inode) != CEPH_NOSNAP)
+ return -EROFS;
+
+ err = inode_change_ok(inode, attr);
+ if (err != 0)
+ return err;
+
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETATTR,
+ USE_AUTH_MDS);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ spin_lock(&ci->i_ceph_lock);
+ issued = __ceph_caps_issued(ci, NULL);
+ dout("setattr %p issued %s\n", inode, ceph_cap_string(issued));
+
+ if (ia_valid & ATTR_UID) {
+ dout("setattr %p uid %d -> %d\n", inode,
+ from_kuid(&init_user_ns, inode->i_uid),
+ from_kuid(&init_user_ns, attr->ia_uid));
+ if (issued & CEPH_CAP_AUTH_EXCL) {
+ inode->i_uid = attr->ia_uid;
+ dirtied |= CEPH_CAP_AUTH_EXCL;
+ } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
+ !uid_eq(attr->ia_uid, inode->i_uid)) {
+ req->r_args.setattr.uid = cpu_to_le32(
+ from_kuid(&init_user_ns, attr->ia_uid));
+ mask |= CEPH_SETATTR_UID;
+ release |= CEPH_CAP_AUTH_SHARED;
+ }
+ }
+ if (ia_valid & ATTR_GID) {
+ dout("setattr %p gid %d -> %d\n", inode,
+ from_kgid(&init_user_ns, inode->i_gid),
+ from_kgid(&init_user_ns, attr->ia_gid));
+ if (issued & CEPH_CAP_AUTH_EXCL) {
+ inode->i_gid = attr->ia_gid;
+ dirtied |= CEPH_CAP_AUTH_EXCL;
+ } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
+ !gid_eq(attr->ia_gid, inode->i_gid)) {
+ req->r_args.setattr.gid = cpu_to_le32(
+ from_kgid(&init_user_ns, attr->ia_gid));
+ mask |= CEPH_SETATTR_GID;
+ release |= CEPH_CAP_AUTH_SHARED;
+ }
+ }
+ if (ia_valid & ATTR_MODE) {
+ dout("setattr %p mode 0%o -> 0%o\n", inode, inode->i_mode,
+ attr->ia_mode);
+ if (issued & CEPH_CAP_AUTH_EXCL) {
+ inode->i_mode = attr->ia_mode;
+ dirtied |= CEPH_CAP_AUTH_EXCL;
+ } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
+ attr->ia_mode != inode->i_mode) {
+ inode->i_mode = attr->ia_mode;
+ req->r_args.setattr.mode = cpu_to_le32(attr->ia_mode);
+ mask |= CEPH_SETATTR_MODE;
+ release |= CEPH_CAP_AUTH_SHARED;
+ }
+ }
+
+ if (ia_valid & ATTR_ATIME) {
+ dout("setattr %p atime %ld.%ld -> %ld.%ld\n", inode,
+ inode->i_atime.tv_sec, inode->i_atime.tv_nsec,
+ attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec);
+ if (issued & CEPH_CAP_FILE_EXCL) {
+ ci->i_time_warp_seq++;
+ inode->i_atime = attr->ia_atime;
+ dirtied |= CEPH_CAP_FILE_EXCL;
+ } else if ((issued & CEPH_CAP_FILE_WR) &&
+ timespec_compare(&inode->i_atime,
+ &attr->ia_atime) < 0) {
+ inode->i_atime = attr->ia_atime;
+ dirtied |= CEPH_CAP_FILE_WR;
+ } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
+ !timespec_equal(&inode->i_atime, &attr->ia_atime)) {
+ ceph_encode_timespec(&req->r_args.setattr.atime,
+ &attr->ia_atime);
+ mask |= CEPH_SETATTR_ATIME;
+ release |= CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_RD |
+ CEPH_CAP_FILE_WR;
+ }
+ }
+ if (ia_valid & ATTR_MTIME) {
+ dout("setattr %p mtime %ld.%ld -> %ld.%ld\n", inode,
+ inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
+ attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec);
+ if (issued & CEPH_CAP_FILE_EXCL) {
+ ci->i_time_warp_seq++;
+ inode->i_mtime = attr->ia_mtime;
+ dirtied |= CEPH_CAP_FILE_EXCL;
+ } else if ((issued & CEPH_CAP_FILE_WR) &&
+ timespec_compare(&inode->i_mtime,
+ &attr->ia_mtime) < 0) {
+ inode->i_mtime = attr->ia_mtime;
+ dirtied |= CEPH_CAP_FILE_WR;
+ } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
+ !timespec_equal(&inode->i_mtime, &attr->ia_mtime)) {
+ ceph_encode_timespec(&req->r_args.setattr.mtime,
+ &attr->ia_mtime);
+ mask |= CEPH_SETATTR_MTIME;
+ release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD |
+ CEPH_CAP_FILE_WR;
+ }
+ }
+ if (ia_valid & ATTR_SIZE) {
+ dout("setattr %p size %lld -> %lld\n", inode,
+ inode->i_size, attr->ia_size);
+ if (attr->ia_size > inode->i_sb->s_maxbytes) {
+ err = -EINVAL;
+ goto out;
+ }
+ if ((issued & CEPH_CAP_FILE_EXCL) &&
+ attr->ia_size > inode->i_size) {
+ inode->i_size = attr->ia_size;
+ inode->i_blocks =
+ (attr->ia_size + (1 << 9) - 1) >> 9;
+ inode->i_ctime = attr->ia_ctime;
+ ci->i_reported_size = attr->ia_size;
+ dirtied |= CEPH_CAP_FILE_EXCL;
+ } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
+ attr->ia_size != inode->i_size) {
+ req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
+ req->r_args.setattr.old_size =
+ cpu_to_le64(inode->i_size);
+ mask |= CEPH_SETATTR_SIZE;
+ release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD |
+ CEPH_CAP_FILE_WR;
+ }
+ }
+
+ /* these do nothing */
+ if (ia_valid & ATTR_CTIME) {
+ bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME|
+ ATTR_MODE|ATTR_UID|ATTR_GID)) == 0;
+ dout("setattr %p ctime %ld.%ld -> %ld.%ld (%s)\n", inode,
+ inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+ attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
+ only ? "ctime only" : "ignored");
+ inode->i_ctime = attr->ia_ctime;
+ if (only) {
+ /*
+ * if kernel wants to dirty ctime but nothing else,
+ * we need to choose a cap to dirty under, or do
+ * a almost-no-op setattr
+ */
+ if (issued & CEPH_CAP_AUTH_EXCL)
+ dirtied |= CEPH_CAP_AUTH_EXCL;
+ else if (issued & CEPH_CAP_FILE_EXCL)
+ dirtied |= CEPH_CAP_FILE_EXCL;
+ else if (issued & CEPH_CAP_XATTR_EXCL)
+ dirtied |= CEPH_CAP_XATTR_EXCL;
+ else
+ mask |= CEPH_SETATTR_CTIME;
+ }
+ }
+ if (ia_valid & ATTR_FILE)
+ dout("setattr %p ATTR_FILE ... hrm!\n", inode);
+
+ if (dirtied) {
+ inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied);
+ inode->i_ctime = CURRENT_TIME;
+ }
+
+ release &= issued;
+ spin_unlock(&ci->i_ceph_lock);
+
+ if (inode_dirty_flags)
+ __mark_inode_dirty(inode, inode_dirty_flags);
+
+ if (ia_valid & ATTR_MODE) {
+ err = posix_acl_chmod(inode, attr->ia_mode);
+ if (err)
+ goto out_put;
+ }
+
+ if (mask) {
+ req->r_inode = inode;
+ ihold(inode);
+ req->r_inode_drop = release;
+ req->r_args.setattr.mask = cpu_to_le32(mask);
+ req->r_num_caps = 1;
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+ }
+ dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
+ ceph_cap_string(dirtied), mask);
+
+ ceph_mdsc_put_request(req);
+ if (mask & CEPH_SETATTR_SIZE)
+ __ceph_do_pending_vmtruncate(inode);
+ return err;
+out:
+ spin_unlock(&ci->i_ceph_lock);
+out_put:
+ ceph_mdsc_put_request(req);
+ return err;
+}
+
+/*
+ * Verify that we have a lease on the given mask. If not,
+ * do a getattr against an mds.
+ */
+int ceph_do_getattr(struct inode *inode, int mask)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct ceph_mds_request *req;
+ int err;
+
+ if (ceph_snap(inode) == CEPH_SNAPDIR) {
+ dout("do_getattr inode %p SNAPDIR\n", inode);
+ return 0;
+ }
+
+ dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode);
+ if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
+ return 0;
+
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ req->r_inode = inode;
+ ihold(inode);
+ req->r_num_caps = 1;
+ req->r_args.getattr.mask = cpu_to_le32(mask);
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+ ceph_mdsc_put_request(req);
+ dout("do_getattr result=%d\n", err);
+ return err;
+}
+
+
+/*
+ * Check inode permissions. We verify we have a valid value for
+ * the AUTH cap, then call the generic handler.
+ */
+int ceph_permission(struct inode *inode, int mask)
+{
+ int err;
+
+ if (mask & MAY_NOT_BLOCK)
+ return -ECHILD;
+
+ err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED);
+
+ if (!err)
+ err = generic_permission(inode, mask);
+ return err;
+}
+
+/*
+ * Get all attributes. Hopefully somedata we'll have a statlite()
+ * and can limit the fields we require to be accurate.
+ */
+int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ struct inode *inode = dentry->d_inode;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int err;
+
+ err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL);
+ if (!err) {
+ generic_fillattr(inode, stat);
+ stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino);
+ if (ceph_snap(inode) != CEPH_NOSNAP)
+ stat->dev = ceph_snap(inode);
+ else
+ stat->dev = 0;
+ if (S_ISDIR(inode->i_mode)) {
+ if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb),
+ RBYTES))
+ stat->size = ci->i_rbytes;
+ else
+ stat->size = ci->i_files + ci->i_subdirs;
+ stat->blocks = 0;
+ stat->blksize = 65536;
+ }
+ }
+ return err;
+}
diff --git a/ceph/ioctl.c b/ceph/ioctl.c
new file mode 100644
index 0000000..2042fd1
--- /dev/null
+++ b/ceph/ioctl.c
@@ -0,0 +1,296 @@
+#include <linux/in.h>
+
+#include "super.h"
+#include "mds_client.h"
+#include <linux/ceph/ceph_debug.h>
+
+#include "ioctl.h"
+
+
+/*
+ * ioctls
+ */
+
+/*
+ * get and set the file layout
+ */
+static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
+{
+ struct ceph_inode_info *ci = ceph_inode(file_inode(file));
+ struct ceph_ioctl_layout l;
+ int err;
+
+ err = ceph_do_getattr(file_inode(file), CEPH_STAT_CAP_LAYOUT);
+ if (!err) {
+ l.stripe_unit = ceph_file_layout_su(ci->i_layout);
+ l.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
+ l.object_size = ceph_file_layout_object_size(ci->i_layout);
+ l.data_pool = le32_to_cpu(ci->i_layout.fl_pg_pool);
+ l.preferred_osd = (s32)-1;
+ if (copy_to_user(arg, &l, sizeof(l)))
+ return -EFAULT;
+ }
+
+ return err;
+}
+
+static long __validate_layout(struct ceph_mds_client *mdsc,
+ struct ceph_ioctl_layout *l)
+{
+ int i, err;
+
+ /* validate striping parameters */
+ if ((l->object_size & ~PAGE_MASK) ||
+ (l->stripe_unit & ~PAGE_MASK) ||
+ (l->stripe_unit != 0 &&
+ ((unsigned)l->object_size % (unsigned)l->stripe_unit)))
+ return -EINVAL;
+
+ /* make sure it's a valid data pool */
+ mutex_lock(&mdsc->mutex);
+ err = -EINVAL;
+ for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++)
+ if (mdsc->mdsmap->m_data_pg_pools[i] == l->data_pool) {
+ err = 0;
+ break;
+ }
+ mutex_unlock(&mdsc->mutex);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
+{
+ struct inode *inode = file_inode(file);
+ struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+ struct ceph_mds_request *req;
+ struct ceph_ioctl_layout l;
+ struct ceph_inode_info *ci = ceph_inode(file_inode(file));
+ struct ceph_ioctl_layout nl;
+ int err;
+
+ if (copy_from_user(&l, arg, sizeof(l)))
+ return -EFAULT;
+
+ /* validate changed params against current layout */
+ err = ceph_do_getattr(file_inode(file), CEPH_STAT_CAP_LAYOUT);
+ if (err)
+ return err;
+
+ memset(&nl, 0, sizeof(nl));
+ if (l.stripe_count)
+ nl.stripe_count = l.stripe_count;
+ else
+ nl.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
+ if (l.stripe_unit)
+ nl.stripe_unit = l.stripe_unit;
+ else
+ nl.stripe_unit = ceph_file_layout_su(ci->i_layout);
+ if (l.object_size)
+ nl.object_size = l.object_size;
+ else
+ nl.object_size = ceph_file_layout_object_size(ci->i_layout);
+ if (l.data_pool)
+ nl.data_pool = l.data_pool;
+ else
+ nl.data_pool = ceph_file_layout_pg_pool(ci->i_layout);
+
+ /* this is obsolete, and always -1 */
+ nl.preferred_osd = le64_to_cpu(-1);
+
+ err = __validate_layout(mdsc, &nl);
+ if (err)
+ return err;
+
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETLAYOUT,
+ USE_AUTH_MDS);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ req->r_inode = inode;
+ ihold(inode);
+ req->r_num_caps = 1;
+
+ req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL;
+
+ req->r_args.setlayout.layout.fl_stripe_unit =
+ cpu_to_le32(l.stripe_unit);
+ req->r_args.setlayout.layout.fl_stripe_count =
+ cpu_to_le32(l.stripe_count);
+ req->r_args.setlayout.layout.fl_object_size =
+ cpu_to_le32(l.object_size);
+ req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool);
+
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+ ceph_mdsc_put_request(req);
+ return err;
+}
+
+/*
+ * Set a layout policy on a directory inode. All items in the tree
+ * rooted at this inode will inherit this layout on creation,
+ * (It doesn't apply retroactively )
+ * unless a subdirectory has its own layout policy.
+ */
+static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
+{
+ struct inode *inode = file_inode(file);
+ struct ceph_mds_request *req;
+ struct ceph_ioctl_layout l;
+ int err;
+ struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+
+ /* copy and validate */
+ if (copy_from_user(&l, arg, sizeof(l)))
+ return -EFAULT;
+
+ err = __validate_layout(mdsc, &l);
+ if (err)
+ return err;
+
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETDIRLAYOUT,
+ USE_AUTH_MDS);
+
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ req->r_inode = inode;
+ ihold(inode);
+ req->r_num_caps = 1;
+
+ req->r_args.setlayout.layout.fl_stripe_unit =
+ cpu_to_le32(l.stripe_unit);
+ req->r_args.setlayout.layout.fl_stripe_count =
+ cpu_to_le32(l.stripe_count);
+ req->r_args.setlayout.layout.fl_object_size =
+ cpu_to_le32(l.object_size);
+ req->r_args.setlayout.layout.fl_pg_pool =
+ cpu_to_le32(l.data_pool);
+
+ err = ceph_mdsc_do_request(mdsc, inode, req);
+ ceph_mdsc_put_request(req);
+ return err;
+}
+
+/*
+ * Return object name, size/offset information, and location (OSD
+ * number, network address) for a given file offset.
+ */
+static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
+{
+ struct ceph_ioctl_dataloc dl;
+ struct inode *inode = file_inode(file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_osd_client *osdc =
+ &ceph_sb_to_client(inode->i_sb)->client->osdc;
+ struct ceph_object_locator oloc;
+ struct ceph_object_id oid;
+ u64 len = 1, olen;
+ u64 tmp;
+ struct ceph_pg pgid;
+ int r;
+
+ /* copy and validate */
+ if (copy_from_user(&dl, arg, sizeof(dl)))
+ return -EFAULT;
+
+ down_read(&osdc->map_sem);
+ r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
+ &dl.object_no, &dl.object_offset,
+ &olen);
+ if (r < 0) {
+ up_read(&osdc->map_sem);
+ return -EIO;
+ }
+ dl.file_offset -= dl.object_offset;
+ dl.object_size = ceph_file_layout_object_size(ci->i_layout);
+ dl.block_size = ceph_file_layout_su(ci->i_layout);
+
+ /* block_offset = object_offset % block_size */
+ tmp = dl.object_offset;
+ dl.block_offset = do_div(tmp, dl.block_size);
+
+ snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx",
+ ceph_ino(inode), dl.object_no);
+
+ oloc.pool = ceph_file_layout_pg_pool(ci->i_layout);
+ ceph_oid_set_name(&oid, dl.object_name);
+
+ r = ceph_oloc_oid_to_pg(osdc->osdmap, &oloc, &oid, &pgid);
+ if (r < 0) {
+ up_read(&osdc->map_sem);
+ return r;
+ }
+
+ dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
+ if (dl.osd >= 0) {
+ struct ceph_entity_addr *a =
+ ceph_osd_addr(osdc->osdmap, dl.osd);
+ if (a)
+ memcpy(&dl.osd_addr, &a->in_addr, sizeof(dl.osd_addr));
+ } else {
+ memset(&dl.osd_addr, 0, sizeof(dl.osd_addr));
+ }
+ up_read(&osdc->map_sem);
+
+ /* send result back to user */
+ if (copy_to_user(arg, &dl, sizeof(dl)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static long ceph_ioctl_lazyio(struct file *file)
+{
+ struct ceph_file_info *fi = file->private_data;
+ struct inode *inode = file_inode(file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
+ spin_lock(&ci->i_ceph_lock);
+ ci->i_nr_by_mode[fi->fmode]--;
+ fi->fmode |= CEPH_FILE_MODE_LAZY;
+ ci->i_nr_by_mode[fi->fmode]++;
+ spin_unlock(&ci->i_ceph_lock);
+ dout("ioctl_layzio: file %p marked lazy\n", file);
+
+ ceph_check_caps(ci, 0, NULL);
+ } else {
+ dout("ioctl_layzio: file %p already lazy\n", file);
+ }
+ return 0;
+}
+
+static long ceph_ioctl_syncio(struct file *file)
+{
+ struct ceph_file_info *fi = file->private_data;
+
+ fi->flags |= CEPH_F_SYNC;
+ return 0;
+}
+
+long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg);
+ switch (cmd) {
+ case CEPH_IOC_GET_LAYOUT:
+ return ceph_ioctl_get_layout(file, (void __user *)arg);
+
+ case CEPH_IOC_SET_LAYOUT:
+ return ceph_ioctl_set_layout(file, (void __user *)arg);
+
+ case CEPH_IOC_SET_LAYOUT_POLICY:
+ return ceph_ioctl_set_layout_policy(file, (void __user *)arg);
+
+ case CEPH_IOC_GET_DATALOC:
+ return ceph_ioctl_get_dataloc(file, (void __user *)arg);
+
+ case CEPH_IOC_LAZYIO:
+ return ceph_ioctl_lazyio(file);
+
+ case CEPH_IOC_SYNCIO:
+ return ceph_ioctl_syncio(file);
+ }
+
+ return -ENOTTY;
+}
diff --git a/ceph/ioctl.h b/ceph/ioctl.h
new file mode 100644
index 0000000..c77028a
--- /dev/null
+++ b/ceph/ioctl.h
@@ -0,0 +1,100 @@
+#ifndef FS_CEPH_IOCTL_H
+#define FS_CEPH_IOCTL_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define CEPH_IOCTL_MAGIC 0x97
+
+/*
+ * CEPH_IOC_GET_LAYOUT - get file layout or dir layout policy
+ * CEPH_IOC_SET_LAYOUT - set file layout
+ * CEPH_IOC_SET_LAYOUT_POLICY - set dir layout policy
+ *
+ * The file layout specifies how file data is striped over objects in
+ * the distributed object store, which object pool they belong to (if
+ * it differs from the default), and an optional 'preferred osd' to
+ * store them on.
+ *
+ * Files get a new layout based on the policy set on the containing
+ * directory or one of its ancestors. The GET_LAYOUT ioctl will let
+ * you examine the layout for a file or the policy on a directory.
+ *
+ * SET_LAYOUT will let you set a layout on a newly created file. This
+ * only works immediately after the file is created and before any
+ * data is written to it.
+ *
+ * SET_LAYOUT_POLICY will let you set a layout policy (default layout)
+ * on a directory that will apply to any new files created in that
+ * directory (or any child directory that doesn't specify a layout of
+ * its own).
+ */
+
+/* use u64 to align sanely on all archs */
+struct ceph_ioctl_layout {
+ __u64 stripe_unit, stripe_count, object_size;
+ __u64 data_pool;
+
+ /* obsolete. new values ignored, always return -1 */
+ __s64 preferred_osd;
+};
+
+#define CEPH_IOC_GET_LAYOUT _IOR(CEPH_IOCTL_MAGIC, 1, \
+ struct ceph_ioctl_layout)
+#define CEPH_IOC_SET_LAYOUT _IOW(CEPH_IOCTL_MAGIC, 2, \
+ struct ceph_ioctl_layout)
+#define CEPH_IOC_SET_LAYOUT_POLICY _IOW(CEPH_IOCTL_MAGIC, 5, \
+ struct ceph_ioctl_layout)
+
+/*
+ * CEPH_IOC_GET_DATALOC - get location of file data in the cluster
+ *
+ * Extract identity, address of the OSD and object storing a given
+ * file offset.
+ */
+struct ceph_ioctl_dataloc {
+ __u64 file_offset; /* in+out: file offset */
+ __u64 object_offset; /* out: offset in object */
+ __u64 object_no; /* out: object # */
+ __u64 object_size; /* out: object size */
+ char object_name[64]; /* out: object name */
+ __u64 block_offset; /* out: offset in block */
+ __u64 block_size; /* out: block length */
+ __s64 osd; /* out: osd # */
+ struct sockaddr_storage osd_addr; /* out: osd address */
+};
+
+#define CEPH_IOC_GET_DATALOC _IOWR(CEPH_IOCTL_MAGIC, 3, \
+ struct ceph_ioctl_dataloc)
+
+/*
+ * CEPH_IOC_LAZYIO - relax consistency
+ *
+ * Normally Ceph switches to synchronous IO when multiple clients have
+ * the file open (and or more for write). Reads and writes bypass the
+ * page cache and go directly to the OSD. Setting this flag on a file
+ * descriptor will allow buffered IO for this file in cases where the
+ * application knows it won't interfere with other nodes (or doesn't
+ * care).
+ */
+#define CEPH_IOC_LAZYIO _IO(CEPH_IOCTL_MAGIC, 4)
+
+/*
+ * CEPH_IOC_SYNCIO - force synchronous IO
+ *
+ * This ioctl sets a file flag that forces the synchronous IO that
+ * bypasses the page cache, even if it is not necessary. This is
+ * essentially the opposite behavior of IOC_LAZYIO. This forces the
+ * same read/write path as a file opened by multiple clients when one
+ * or more of those clients is opened for write.
+ *
+ * Note that this type of sync IO takes a different path than a file
+ * opened with O_SYNC/D_SYNC (writes hit the page cache and are
+ * immediately flushed on page boundaries). It is very similar to
+ * O_DIRECT (writes bypass the page cache) excep that O_DIRECT writes
+ * are not copied (user page must remain stable) and O_DIRECT writes
+ * have alignment restrictions (on the buffer and file offset).
+ */
+#define CEPH_IOC_SYNCIO _IO(CEPH_IOCTL_MAGIC, 5)
+
+#endif
diff --git a/ceph/locks.c b/ceph/locks.c
new file mode 100644
index 0000000..1913988
--- /dev/null
+++ b/ceph/locks.c
@@ -0,0 +1,338 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/random.h>
+
+#include "super.h"
+#include "mds_client.h"
+#include <linux/ceph/pagelist.h>
+
+static u64 lock_secret;
+
+static inline u64 secure_addr(void *addr)
+{
+ u64 v = lock_secret ^ (u64)(unsigned long)addr;
+ /*
+ * Set the most significant bit, so that MDS knows the 'owner'
+ * is sufficient to identify the owner of lock. (old code uses
+ * both 'owner' and 'pid')
+ */
+ v |= (1ULL << 63);
+ return v;
+}
+
+void __init ceph_flock_init(void)
+{
+ get_random_bytes(&lock_secret, sizeof(lock_secret));
+}
+
+/**
+ * Implement fcntl and flock locking functions.
+ */
+static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
+ int cmd, u8 wait, struct file_lock *fl)
+{
+ struct inode *inode = file_inode(file);
+ struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+ struct ceph_mds_request *req;
+ int err;
+ u64 length = 0;
+ u64 owner;
+
+ req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ req->r_inode = inode;
+ ihold(inode);
+ req->r_num_caps = 1;
+
+ /* mds requires start and length rather than start and end */
+ if (LLONG_MAX == fl->fl_end)
+ length = 0;
+ else
+ length = fl->fl_end - fl->fl_start + 1;
+
+ if (lock_type == CEPH_LOCK_FCNTL)
+ owner = secure_addr(fl->fl_owner);
+ else
+ owner = secure_addr(fl->fl_file);
+
+ dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, "
+ "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type,
+ (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length,
+ wait, fl->fl_type);
+
+ req->r_args.filelock_change.rule = lock_type;
+ req->r_args.filelock_change.type = cmd;
+ req->r_args.filelock_change.owner = cpu_to_le64(owner);
+ req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
+ req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
+ req->r_args.filelock_change.length = cpu_to_le64(length);
+ req->r_args.filelock_change.wait = wait;
+
+ err = ceph_mdsc_do_request(mdsc, inode, req);
+
+ if (operation == CEPH_MDS_OP_GETFILELOCK) {
+ fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
+ if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
+ fl->fl_type = F_RDLCK;
+ else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
+ fl->fl_type = F_WRLCK;
+ else
+ fl->fl_type = F_UNLCK;
+
+ fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
+ length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
+ le64_to_cpu(req->r_reply_info.filelock_reply->length);
+ if (length >= 1)
+ fl->fl_end = length -1;
+ else
+ fl->fl_end = 0;
+
+ }
+ ceph_mdsc_put_request(req);
+ dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
+ "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type,
+ (int)operation, (u64)fl->fl_pid, fl->fl_start,
+ length, wait, fl->fl_type, err);
+ return err;
+}
+
+/**
+ * Attempt to set an fcntl lock.
+ * For now, this just goes away to the server. Later it may be more awesome.
+ */
+int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
+{
+ u8 lock_cmd;
+ int err;
+ u8 wait = 0;
+ u16 op = CEPH_MDS_OP_SETFILELOCK;
+
+ if (!(fl->fl_flags & FL_POSIX))
+ return -ENOLCK;
+ /* No mandatory locks */
+ if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
+ return -ENOLCK;
+
+ dout("ceph_lock, fl_owner: %p", fl->fl_owner);
+
+ /* set wait bit as appropriate, then make command as Ceph expects it*/
+ if (IS_GETLK(cmd))
+ op = CEPH_MDS_OP_GETFILELOCK;
+ else if (IS_SETLKW(cmd))
+ wait = 1;
+
+ if (F_RDLCK == fl->fl_type)
+ lock_cmd = CEPH_LOCK_SHARED;
+ else if (F_WRLCK == fl->fl_type)
+ lock_cmd = CEPH_LOCK_EXCL;
+ else
+ lock_cmd = CEPH_LOCK_UNLOCK;
+
+ err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
+ if (!err) {
+ if (op != CEPH_MDS_OP_GETFILELOCK) {
+ dout("mds locked, locking locally");
+ err = posix_lock_file(file, fl, NULL);
+ if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
+ /* undo! This should only happen if
+ * the kernel detects local
+ * deadlock. */
+ ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
+ CEPH_LOCK_UNLOCK, 0, fl);
+ dout("got %d on posix_lock_file, undid lock",
+ err);
+ }
+ }
+
+ } else if (err == -ERESTARTSYS) {
+ dout("undoing lock\n");
+ ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
+ CEPH_LOCK_UNLOCK, 0, fl);
+ }
+ return err;
+}
+
+int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
+{
+ u8 lock_cmd;
+ int err;
+ u8 wait = 0;
+
+ if (!(fl->fl_flags & FL_FLOCK))
+ return -ENOLCK;
+ /* No mandatory locks */
+ if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
+ return -ENOLCK;
+
+ dout("ceph_flock, fl_file: %p", fl->fl_file);
+
+ if (IS_SETLKW(cmd))
+ wait = 1;
+
+ if (F_RDLCK == fl->fl_type)
+ lock_cmd = CEPH_LOCK_SHARED;
+ else if (F_WRLCK == fl->fl_type)
+ lock_cmd = CEPH_LOCK_EXCL;
+ else
+ lock_cmd = CEPH_LOCK_UNLOCK;
+
+ err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
+ file, lock_cmd, wait, fl);
+ if (!err) {
+ err = flock_lock_file_wait(file, fl);
+ if (err) {
+ ceph_lock_message(CEPH_LOCK_FLOCK,
+ CEPH_MDS_OP_SETFILELOCK,
+ file, CEPH_LOCK_UNLOCK, 0, fl);
+ dout("got %d on flock_lock_file_wait, undid lock", err);
+ }
+ } else if (err == -ERESTARTSYS) {
+ dout("undoing lock\n");
+ ceph_lock_message(CEPH_LOCK_FLOCK,
+ CEPH_MDS_OP_SETFILELOCK,
+ file, CEPH_LOCK_UNLOCK, 0, fl);
+ }
+ return err;
+}
+
+/**
+ * Must be called with lock_flocks() already held. Fills in the passed
+ * counter variables, so you can prepare pagelist metadata before calling
+ * ceph_encode_locks.
+ */
+void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
+{
+ struct file_lock *lock;
+
+ *fcntl_count = 0;
+ *flock_count = 0;
+
+ for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
+ if (lock->fl_flags & FL_POSIX)
+ ++(*fcntl_count);
+ else if (lock->fl_flags & FL_FLOCK)
+ ++(*flock_count);
+ }
+ dout("counted %d flock locks and %d fcntl locks",
+ *flock_count, *fcntl_count);
+}
+
+/**
+ * Encode the flock and fcntl locks for the given inode into the ceph_filelock
+ * array. Must be called with inode->i_lock already held.
+ * If we encounter more of a specific lock type than expected, return -ENOSPC.
+ */
+int ceph_encode_locks_to_buffer(struct inode *inode,
+ struct ceph_filelock *flocks,
+ int num_fcntl_locks, int num_flock_locks)
+{
+ struct file_lock *lock;
+ int err = 0;
+ int seen_fcntl = 0;
+ int seen_flock = 0;
+ int l = 0;
+
+ dout("encoding %d flock and %d fcntl locks", num_flock_locks,
+ num_fcntl_locks);
+
+ for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
+ if (lock->fl_flags & FL_POSIX) {
+ ++seen_fcntl;
+ if (seen_fcntl > num_fcntl_locks) {
+ err = -ENOSPC;
+ goto fail;
+ }
+ err = lock_to_ceph_filelock(lock, &flocks[l]);
+ if (err)
+ goto fail;
+ ++l;
+ }
+ }
+ for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
+ if (lock->fl_flags & FL_FLOCK) {
+ ++seen_flock;
+ if (seen_flock > num_flock_locks) {
+ err = -ENOSPC;
+ goto fail;
+ }
+ err = lock_to_ceph_filelock(lock, &flocks[l]);
+ if (err)
+ goto fail;
+ ++l;
+ }
+ }
+fail:
+ return err;
+}
+
+/**
+ * Copy the encoded flock and fcntl locks into the pagelist.
+ * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
+ * sequential flock locks.
+ * Returns zero on success.
+ */
+int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
+ struct ceph_pagelist *pagelist,
+ int num_fcntl_locks, int num_flock_locks)
+{
+ int err = 0;
+ __le32 nlocks;
+
+ nlocks = cpu_to_le32(num_fcntl_locks);
+ err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
+ if (err)
+ goto out_fail;
+
+ err = ceph_pagelist_append(pagelist, flocks,
+ num_fcntl_locks * sizeof(*flocks));
+ if (err)
+ goto out_fail;
+
+ nlocks = cpu_to_le32(num_flock_locks);
+ err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
+ if (err)
+ goto out_fail;
+
+ err = ceph_pagelist_append(pagelist,
+ &flocks[num_fcntl_locks],
+ num_flock_locks * sizeof(*flocks));
+out_fail:
+ return err;
+}
+
+/*
+ * Given a pointer to a lock, convert it to a ceph filelock
+ */
+int lock_to_ceph_filelock(struct file_lock *lock,
+ struct ceph_filelock *cephlock)
+{
+ int err = 0;
+ cephlock->start = cpu_to_le64(lock->fl_start);
+ cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
+ cephlock->client = cpu_to_le64(0);
+ cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
+ if (lock->fl_flags & FL_POSIX)
+ cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
+ else
+ cephlock->owner = cpu_to_le64(secure_addr(lock->fl_file));
+
+ switch (lock->fl_type) {
+ case F_RDLCK:
+ cephlock->type = CEPH_LOCK_SHARED;
+ break;
+ case F_WRLCK:
+ cephlock->type = CEPH_LOCK_EXCL;
+ break;
+ case F_UNLCK:
+ cephlock->type = CEPH_LOCK_UNLOCK;
+ break;
+ default:
+ dout("Have unknown lock type %d", lock->fl_type);
+ err = -EINVAL;
+ }
+
+ return err;
+}
diff --git a/ceph/mds_client.c b/ceph/mds_client.c
new file mode 100644
index 0000000..2b4d093
--- /dev/null
+++ b/ceph/mds_client.c
@@ -0,0 +1,3665 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/fs.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "super.h"
+#include "mds_client.h"
+
+#include <linux/ceph/ceph_features.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/pagelist.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/debugfs.h>
+
+/*
+ * A cluster of MDS (metadata server) daemons is responsible for
+ * managing the file system namespace (the directory hierarchy and
+ * inodes) and for coordinating shared access to storage. Metadata is
+ * partitioning hierarchically across a number of servers, and that
+ * partition varies over time as the cluster adjusts the distribution
+ * in order to balance load.
+ *
+ * The MDS client is primarily responsible to managing synchronous
+ * metadata requests for operations like open, unlink, and so forth.
+ * If there is a MDS failure, we find out about it when we (possibly
+ * request and) receive a new MDS map, and can resubmit affected
+ * requests.
+ *
+ * For the most part, though, we take advantage of a lossless
+ * communications channel to the MDS, and do not need to worry about
+ * timing out or resubmitting requests.
+ *
+ * We maintain a stateful "session" with each MDS we interact with.
+ * Within each session, we sent periodic heartbeat messages to ensure
+ * any capabilities or leases we have been issues remain valid. If
+ * the session times out and goes stale, our leases and capabilities
+ * are no longer valid.
+ */
+
+struct ceph_reconnect_state {
+ int nr_caps;
+ struct ceph_pagelist *pagelist;
+ bool flock;
+};
+
+static void __wake_requests(struct ceph_mds_client *mdsc,
+ struct list_head *head);
+
+static const struct ceph_connection_operations mds_con_ops;
+
+
+/*
+ * mds reply parsing
+ */
+
+/*
+ * parse individual inode info
+ */
+static int parse_reply_info_in(void **p, void *end,
+ struct ceph_mds_reply_info_in *info,
+ u64 features)
+{
+ int err = -EIO;
+
+ info->in = *p;
+ *p += sizeof(struct ceph_mds_reply_inode) +
+ sizeof(*info->in->fragtree.splits) *
+ le32_to_cpu(info->in->fragtree.nsplits);
+
+ ceph_decode_32_safe(p, end, info->symlink_len, bad);
+ ceph_decode_need(p, end, info->symlink_len, bad);
+ info->symlink = *p;
+ *p += info->symlink_len;
+
+ if (features & CEPH_FEATURE_DIRLAYOUTHASH)
+ ceph_decode_copy_safe(p, end, &info->dir_layout,
+ sizeof(info->dir_layout), bad);
+ else
+ memset(&info->dir_layout, 0, sizeof(info->dir_layout));
+
+ ceph_decode_32_safe(p, end, info->xattr_len, bad);
+ ceph_decode_need(p, end, info->xattr_len, bad);
+ info->xattr_data = *p;
+ *p += info->xattr_len;
+ return 0;
+bad:
+ return err;
+}
+
+/*
+ * parse a normal reply, which may contain a (dir+)dentry and/or a
+ * target inode.
+ */
+static int parse_reply_info_trace(void **p, void *end,
+ struct ceph_mds_reply_info_parsed *info,
+ u64 features)
+{
+ int err;
+
+ if (info->head->is_dentry) {
+ err = parse_reply_info_in(p, end, &info->diri, features);
+ if (err < 0)
+ goto out_bad;
+
+ if (unlikely(*p + sizeof(*info->dirfrag) > end))
+ goto bad;
+ info->dirfrag = *p;
+ *p += sizeof(*info->dirfrag) +
+ sizeof(u32)*le32_to_cpu(info->dirfrag->ndist);
+ if (unlikely(*p > end))
+ goto bad;
+
+ ceph_decode_32_safe(p, end, info->dname_len, bad);
+ ceph_decode_need(p, end, info->dname_len, bad);
+ info->dname = *p;
+ *p += info->dname_len;
+ info->dlease = *p;
+ *p += sizeof(*info->dlease);
+ }
+
+ if (info->head->is_target) {
+ err = parse_reply_info_in(p, end, &info->targeti, features);
+ if (err < 0)
+ goto out_bad;
+ }
+
+ if (unlikely(*p != end))
+ goto bad;
+ return 0;
+
+bad:
+ err = -EIO;
+out_bad:
+ pr_err("problem parsing mds trace %d\n", err);
+ return err;
+}
+
+/*
+ * parse readdir results
+ */
+static int parse_reply_info_dir(void **p, void *end,
+ struct ceph_mds_reply_info_parsed *info,
+ u64 features)
+{
+ u32 num, i = 0;
+ int err;
+
+ info->dir_dir = *p;
+ if (*p + sizeof(*info->dir_dir) > end)
+ goto bad;
+ *p += sizeof(*info->dir_dir) +
+ sizeof(u32)*le32_to_cpu(info->dir_dir->ndist);
+ if (*p > end)
+ goto bad;
+
+ ceph_decode_need(p, end, sizeof(num) + 2, bad);
+ num = ceph_decode_32(p);
+ info->dir_end = ceph_decode_8(p);
+ info->dir_complete = ceph_decode_8(p);
+ if (num == 0)
+ goto done;
+
+ BUG_ON(!info->dir_in);
+ info->dir_dname = (void *)(info->dir_in + num);
+ info->dir_dname_len = (void *)(info->dir_dname + num);
+ info->dir_dlease = (void *)(info->dir_dname_len + num);
+ if ((unsigned long)(info->dir_dlease + num) >
+ (unsigned long)info->dir_in + info->dir_buf_size) {
+ pr_err("dir contents are larger than expected\n");
+ WARN_ON(1);
+ goto bad;
+ }
+
+ info->dir_nr = num;
+ while (num) {
+ /* dentry */
+ ceph_decode_need(p, end, sizeof(u32)*2, bad);
+ info->dir_dname_len[i] = ceph_decode_32(p);
+ ceph_decode_need(p, end, info->dir_dname_len[i], bad);
+ info->dir_dname[i] = *p;
+ *p += info->dir_dname_len[i];
+ dout("parsed dir dname '%.*s'\n", info->dir_dname_len[i],
+ info->dir_dname[i]);
+ info->dir_dlease[i] = *p;
+ *p += sizeof(struct ceph_mds_reply_lease);
+
+ /* inode */
+ err = parse_reply_info_in(p, end, &info->dir_in[i], features);
+ if (err < 0)
+ goto out_bad;
+ i++;
+ num--;
+ }
+
+done:
+ if (*p != end)
+ goto bad;
+ return 0;
+
+bad:
+ err = -EIO;
+out_bad:
+ pr_err("problem parsing dir contents %d\n", err);
+ return err;
+}
+
+/*
+ * parse fcntl F_GETLK results
+ */
+static int parse_reply_info_filelock(void **p, void *end,
+ struct ceph_mds_reply_info_parsed *info,
+ u64 features)
+{
+ if (*p + sizeof(*info->filelock_reply) > end)
+ goto bad;
+
+ info->filelock_reply = *p;
+ *p += sizeof(*info->filelock_reply);
+
+ if (unlikely(*p != end))
+ goto bad;
+ return 0;
+
+bad:
+ return -EIO;
+}
+
+/*
+ * parse create results
+ */
+static int parse_reply_info_create(void **p, void *end,
+ struct ceph_mds_reply_info_parsed *info,
+ u64 features)
+{
+ if (features & CEPH_FEATURE_REPLY_CREATE_INODE) {
+ if (*p == end) {
+ info->has_create_ino = false;
+ } else {
+ info->has_create_ino = true;
+ info->ino = ceph_decode_64(p);
+ }
+ }
+
+ if (unlikely(*p != end))
+ goto bad;
+ return 0;
+
+bad:
+ return -EIO;
+}
+
+/*
+ * parse extra results
+ */
+static int parse_reply_info_extra(void **p, void *end,
+ struct ceph_mds_reply_info_parsed *info,
+ u64 features)
+{
+ if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
+ return parse_reply_info_filelock(p, end, info, features);
+ else if (info->head->op == CEPH_MDS_OP_READDIR ||
+ info->head->op == CEPH_MDS_OP_LSSNAP)
+ return parse_reply_info_dir(p, end, info, features);
+ else if (info->head->op == CEPH_MDS_OP_CREATE)
+ return parse_reply_info_create(p, end, info, features);
+ else
+ return -EIO;
+}
+
+/*
+ * parse entire mds reply
+ */
+static int parse_reply_info(struct ceph_msg *msg,
+ struct ceph_mds_reply_info_parsed *info,
+ u64 features)
+{
+ void *p, *end;
+ u32 len;
+ int err;
+
+ info->head = msg->front.iov_base;
+ p = msg->front.iov_base + sizeof(struct ceph_mds_reply_head);
+ end = p + msg->front.iov_len - sizeof(struct ceph_mds_reply_head);
+
+ /* trace */
+ ceph_decode_32_safe(&p, end, len, bad);
+ if (len > 0) {
+ ceph_decode_need(&p, end, len, bad);
+ err = parse_reply_info_trace(&p, p+len, info, features);
+ if (err < 0)
+ goto out_bad;
+ }
+
+ /* extra */
+ ceph_decode_32_safe(&p, end, len, bad);
+ if (len > 0) {
+ ceph_decode_need(&p, end, len, bad);
+ err = parse_reply_info_extra(&p, p+len, info, features);
+ if (err < 0)
+ goto out_bad;
+ }
+
+ /* snap blob */
+ ceph_decode_32_safe(&p, end, len, bad);
+ info->snapblob_len = len;
+ info->snapblob = p;
+ p += len;
+
+ if (p != end)
+ goto bad;
+ return 0;
+
+bad:
+ err = -EIO;
+out_bad:
+ pr_err("mds parse_reply err %d\n", err);
+ return err;
+}
+
+static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
+{
+ if (!info->dir_in)
+ return;
+ free_pages((unsigned long)info->dir_in, get_order(info->dir_buf_size));
+}
+
+
+/*
+ * sessions
+ */
+static const char *session_state_name(int s)
+{
+ switch (s) {
+ case CEPH_MDS_SESSION_NEW: return "new";
+ case CEPH_MDS_SESSION_OPENING: return "opening";
+ case CEPH_MDS_SESSION_OPEN: return "open";
+ case CEPH_MDS_SESSION_HUNG: return "hung";
+ case CEPH_MDS_SESSION_CLOSING: return "closing";
+ case CEPH_MDS_SESSION_RESTARTING: return "restarting";
+ case CEPH_MDS_SESSION_RECONNECTING: return "reconnecting";
+ default: return "???";
+ }
+}
+
+static struct ceph_mds_session *get_session(struct ceph_mds_session *s)
+{
+ if (atomic_inc_not_zero(&s->s_ref)) {
+ dout("mdsc get_session %p %d -> %d\n", s,
+ atomic_read(&s->s_ref)-1, atomic_read(&s->s_ref));
+ return s;
+ } else {
+ dout("mdsc get_session %p 0 -- FAIL", s);
+ return NULL;
+ }
+}
+
+void ceph_put_mds_session(struct ceph_mds_session *s)
+{
+ dout("mdsc put_session %p %d -> %d\n", s,
+ atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1);
+ if (atomic_dec_and_test(&s->s_ref)) {
+ if (s->s_auth.authorizer)
+ ceph_auth_destroy_authorizer(
+ s->s_mdsc->fsc->client->monc.auth,
+ s->s_auth.authorizer);
+ kfree(s);
+ }
+}
+
+/*
+ * called under mdsc->mutex
+ */
+struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc,
+ int mds)
+{
+ struct ceph_mds_session *session;
+
+ if (mds >= mdsc->max_sessions || mdsc->sessions[mds] == NULL)
+ return NULL;
+ session = mdsc->sessions[mds];
+ dout("lookup_mds_session %p %d\n", session,
+ atomic_read(&session->s_ref));
+ get_session(session);
+ return session;
+}
+
+static bool __have_session(struct ceph_mds_client *mdsc, int mds)
+{
+ if (mds >= mdsc->max_sessions)
+ return false;
+ return mdsc->sessions[mds];
+}
+
+static int __verify_registered_session(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *s)
+{
+ if (s->s_mds >= mdsc->max_sessions ||
+ mdsc->sessions[s->s_mds] != s)
+ return -ENOENT;
+ return 0;
+}
+
+/*
+ * create+register a new session for given mds.
+ * called under mdsc->mutex.
+ */
+static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
+ int mds)
+{
+ struct ceph_mds_session *s;
+
+ if (mds >= mdsc->mdsmap->m_max_mds)
+ return ERR_PTR(-EINVAL);
+
+ s = kzalloc(sizeof(*s), GFP_NOFS);
+ if (!s)
+ return ERR_PTR(-ENOMEM);
+ s->s_mdsc = mdsc;
+ s->s_mds = mds;
+ s->s_state = CEPH_MDS_SESSION_NEW;
+ s->s_ttl = 0;
+ s->s_seq = 0;
+ mutex_init(&s->s_mutex);
+
+ ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr);
+
+ spin_lock_init(&s->s_gen_ttl_lock);
+ s->s_cap_gen = 0;
+ s->s_cap_ttl = jiffies - 1;
+
+ spin_lock_init(&s->s_cap_lock);
+ s->s_renew_requested = 0;
+ s->s_renew_seq = 0;
+ INIT_LIST_HEAD(&s->s_caps);
+ s->s_nr_caps = 0;
+ s->s_trim_caps = 0;
+ atomic_set(&s->s_ref, 1);
+ INIT_LIST_HEAD(&s->s_waiting);
+ INIT_LIST_HEAD(&s->s_unsafe);
+ s->s_num_cap_releases = 0;
+ s->s_cap_reconnect = 0;
+ s->s_cap_iterator = NULL;
+ INIT_LIST_HEAD(&s->s_cap_releases);
+ INIT_LIST_HEAD(&s->s_cap_releases_done);
+ INIT_LIST_HEAD(&s->s_cap_flushing);
+ INIT_LIST_HEAD(&s->s_cap_snaps_flushing);
+
+ dout("register_session mds%d\n", mds);
+ if (mds >= mdsc->max_sessions) {
+ int newmax = 1 << get_count_order(mds+1);
+ struct ceph_mds_session **sa;
+
+ dout("register_session realloc to %d\n", newmax);
+ sa = kcalloc(newmax, sizeof(void *), GFP_NOFS);
+ if (sa == NULL)
+ goto fail_realloc;
+ if (mdsc->sessions) {
+ memcpy(sa, mdsc->sessions,
+ mdsc->max_sessions * sizeof(void *));
+ kfree(mdsc->sessions);
+ }
+ mdsc->sessions = sa;
+ mdsc->max_sessions = newmax;
+ }
+ mdsc->sessions[mds] = s;
+ atomic_inc(&s->s_ref); /* one ref to sessions[], one to caller */
+
+ ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds,
+ ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
+
+ return s;
+
+fail_realloc:
+ kfree(s);
+ return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * called under mdsc->mutex
+ */
+static void __unregister_session(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *s)
+{
+ dout("__unregister_session mds%d %p\n", s->s_mds, s);
+ BUG_ON(mdsc->sessions[s->s_mds] != s);
+ mdsc->sessions[s->s_mds] = NULL;
+ ceph_con_close(&s->s_con);
+ ceph_put_mds_session(s);
+}
+
+/*
+ * drop session refs in request.
+ *
+ * should be last request ref, or hold mdsc->mutex
+ */
+static void put_request_session(struct ceph_mds_request *req)
+{
+ if (req->r_session) {
+ ceph_put_mds_session(req->r_session);
+ req->r_session = NULL;
+ }
+}
+
+void ceph_mdsc_release_request(struct kref *kref)
+{
+ struct ceph_mds_request *req = container_of(kref,
+ struct ceph_mds_request,
+ r_kref);
+ destroy_reply_info(&req->r_reply_info);
+ if (req->r_request)
+ ceph_msg_put(req->r_request);
+ if (req->r_reply)
+ ceph_msg_put(req->r_reply);
+ if (req->r_inode) {
+ ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
+ iput(req->r_inode);
+ }
+ if (req->r_locked_dir)
+ ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
+ if (req->r_target_inode)
+ iput(req->r_target_inode);
+ if (req->r_dentry)
+ dput(req->r_dentry);
+ if (req->r_old_dentry)
+ dput(req->r_old_dentry);
+ if (req->r_old_dentry_dir) {
+ /*
+ * track (and drop pins for) r_old_dentry_dir
+ * separately, since r_old_dentry's d_parent may have
+ * changed between the dir mutex being dropped and
+ * this request being freed.
+ */
+ ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
+ CEPH_CAP_PIN);
+ iput(req->r_old_dentry_dir);
+ }
+ kfree(req->r_path1);
+ kfree(req->r_path2);
+ put_request_session(req);
+ ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation);
+ kfree(req);
+}
+
+/*
+ * lookup session, bump ref if found.
+ *
+ * called under mdsc->mutex.
+ */
+static struct ceph_mds_request *__lookup_request(struct ceph_mds_client *mdsc,
+ u64 tid)
+{
+ struct ceph_mds_request *req;
+ struct rb_node *n = mdsc->request_tree.rb_node;
+
+ while (n) {
+ req = rb_entry(n, struct ceph_mds_request, r_node);
+ if (tid < req->r_tid)
+ n = n->rb_left;
+ else if (tid > req->r_tid)
+ n = n->rb_right;
+ else {
+ ceph_mdsc_get_request(req);
+ return req;
+ }
+ }
+ return NULL;
+}
+
+static void __insert_request(struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *new)
+{
+ struct rb_node **p = &mdsc->request_tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct ceph_mds_request *req = NULL;
+
+ while (*p) {
+ parent = *p;
+ req = rb_entry(parent, struct ceph_mds_request, r_node);
+ if (new->r_tid < req->r_tid)
+ p = &(*p)->rb_left;
+ else if (new->r_tid > req->r_tid)
+ p = &(*p)->rb_right;
+ else
+ BUG();
+ }
+
+ rb_link_node(&new->r_node, parent, p);
+ rb_insert_color(&new->r_node, &mdsc->request_tree);
+}
+
+/*
+ * Register an in-flight request, and assign a tid. Link to directory
+ * are modifying (if any).
+ *
+ * Called under mdsc->mutex.
+ */
+static void __register_request(struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req,
+ struct inode *dir)
+{
+ req->r_tid = ++mdsc->last_tid;
+ if (req->r_num_caps)
+ ceph_reserve_caps(mdsc, &req->r_caps_reservation,
+ req->r_num_caps);
+ dout("__register_request %p tid %lld\n", req, req->r_tid);
+ ceph_mdsc_get_request(req);
+ __insert_request(mdsc, req);
+
+ req->r_uid = current_fsuid();
+ req->r_gid = current_fsgid();
+
+ if (dir) {
+ struct ceph_inode_info *ci = ceph_inode(dir);
+
+ ihold(dir);
+ spin_lock(&ci->i_unsafe_lock);
+ req->r_unsafe_dir = dir;
+ list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops);
+ spin_unlock(&ci->i_unsafe_lock);
+ }
+}
+
+static void __unregister_request(struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req)
+{
+ dout("__unregister_request %p tid %lld\n", req, req->r_tid);
+ rb_erase(&req->r_node, &mdsc->request_tree);
+ RB_CLEAR_NODE(&req->r_node);
+
+ if (req->r_unsafe_dir) {
+ struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir);
+
+ spin_lock(&ci->i_unsafe_lock);
+ list_del_init(&req->r_unsafe_dir_item);
+ spin_unlock(&ci->i_unsafe_lock);
+
+ iput(req->r_unsafe_dir);
+ req->r_unsafe_dir = NULL;
+ }
+
+ complete_all(&req->r_safe_completion);
+
+ ceph_mdsc_put_request(req);
+}
+
+/*
+ * Choose mds to send request to next. If there is a hint set in the
+ * request (e.g., due to a prior forward hint from the mds), use that.
+ * Otherwise, consult frag tree and/or caps to identify the
+ * appropriate mds. If all else fails, choose randomly.
+ *
+ * Called under mdsc->mutex.
+ */
+static struct dentry *get_nonsnap_parent(struct dentry *dentry)
+{
+ /*
+ * we don't need to worry about protecting the d_parent access
+ * here because we never renaming inside the snapped namespace
+ * except to resplice to another snapdir, and either the old or new
+ * result is a valid result.
+ */
+ while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
+ dentry = dentry->d_parent;
+ return dentry;
+}
+
+static int __choose_mds(struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req)
+{
+ struct inode *inode;
+ struct ceph_inode_info *ci;
+ struct ceph_cap *cap;
+ int mode = req->r_direct_mode;
+ int mds = -1;
+ u32 hash = req->r_direct_hash;
+ bool is_hash = req->r_direct_is_hash;
+
+ /*
+ * is there a specific mds we should try? ignore hint if we have
+ * no session and the mds is not up (active or recovering).
+ */
+ if (req->r_resend_mds >= 0 &&
+ (__have_session(mdsc, req->r_resend_mds) ||
+ ceph_mdsmap_get_state(mdsc->mdsmap, req->r_resend_mds) > 0)) {
+ dout("choose_mds using resend_mds mds%d\n",
+ req->r_resend_mds);
+ return req->r_resend_mds;
+ }
+
+ if (mode == USE_RANDOM_MDS)
+ goto random;
+
+ inode = NULL;
+ if (req->r_inode) {
+ inode = req->r_inode;
+ } else if (req->r_dentry) {
+ /* ignore race with rename; old or new d_parent is okay */
+ struct dentry *parent = req->r_dentry->d_parent;
+ struct inode *dir = parent->d_inode;
+
+ if (dir->i_sb != mdsc->fsc->sb) {
+ /* not this fs! */
+ inode = req->r_dentry->d_inode;
+ } else if (ceph_snap(dir) != CEPH_NOSNAP) {
+ /* direct snapped/virtual snapdir requests
+ * based on parent dir inode */
+ struct dentry *dn = get_nonsnap_parent(parent);
+ inode = dn->d_inode;
+ dout("__choose_mds using nonsnap parent %p\n", inode);
+ } else {
+ /* dentry target */
+ inode = req->r_dentry->d_inode;
+ if (!inode || mode == USE_AUTH_MDS) {
+ /* dir + name */
+ inode = dir;
+ hash = ceph_dentry_hash(dir, req->r_dentry);
+ is_hash = true;
+ }
+ }
+ }
+
+ dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash,
+ (int)hash, mode);
+ if (!inode)
+ goto random;
+ ci = ceph_inode(inode);
+
+ if (is_hash && S_ISDIR(inode->i_mode)) {
+ struct ceph_inode_frag frag;
+ int found;
+
+ ceph_choose_frag(ci, hash, &frag, &found);
+ if (found) {
+ if (mode == USE_ANY_MDS && frag.ndist > 0) {
+ u8 r;
+
+ /* choose a random replica */
+ get_random_bytes(&r, 1);
+ r %= frag.ndist;
+ mds = frag.dist[r];
+ dout("choose_mds %p %llx.%llx "
+ "frag %u mds%d (%d/%d)\n",
+ inode, ceph_vinop(inode),
+ frag.frag, mds,
+ (int)r, frag.ndist);
+ if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
+ CEPH_MDS_STATE_ACTIVE)
+ return mds;
+ }
+
+ /* since this file/dir wasn't known to be
+ * replicated, then we want to look for the
+ * authoritative mds. */
+ mode = USE_AUTH_MDS;
+ if (frag.mds >= 0) {
+ /* choose auth mds */
+ mds = frag.mds;
+ dout("choose_mds %p %llx.%llx "
+ "frag %u mds%d (auth)\n",
+ inode, ceph_vinop(inode), frag.frag, mds);
+ if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
+ CEPH_MDS_STATE_ACTIVE)
+ return mds;
+ }
+ }
+ }
+
+ spin_lock(&ci->i_ceph_lock);
+ cap = NULL;
+ if (mode == USE_AUTH_MDS)
+ cap = ci->i_auth_cap;
+ if (!cap && !RB_EMPTY_ROOT(&ci->i_caps))
+ cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
+ if (!cap) {
+ spin_unlock(&ci->i_ceph_lock);
+ goto random;
+ }
+ mds = cap->session->s_mds;
+ dout("choose_mds %p %llx.%llx mds%d (%scap %p)\n",
+ inode, ceph_vinop(inode), mds,
+ cap == ci->i_auth_cap ? "auth " : "", cap);
+ spin_unlock(&ci->i_ceph_lock);
+ return mds;
+
+random:
+ mds = ceph_mdsmap_get_random_mds(mdsc->mdsmap);
+ dout("choose_mds chose random mds%d\n", mds);
+ return mds;
+}
+
+
+/*
+ * session messages
+ */
+static struct ceph_msg *create_session_msg(u32 op, u64 seq)
+{
+ struct ceph_msg *msg;
+ struct ceph_mds_session_head *h;
+
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), GFP_NOFS,
+ false);
+ if (!msg) {
+ pr_err("create_session_msg ENOMEM creating msg\n");
+ return NULL;
+ }
+ h = msg->front.iov_base;
+ h->op = cpu_to_le32(op);
+ h->seq = cpu_to_le64(seq);
+ return msg;
+}
+
+/*
+ * send session open request.
+ *
+ * called under mdsc->mutex
+ */
+static int __open_session(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ struct ceph_msg *msg;
+ int mstate;
+ int mds = session->s_mds;
+
+ /* wait for mds to go active? */
+ mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds);
+ dout("open_session to mds%d (%s)\n", mds,
+ ceph_mds_state_name(mstate));
+ session->s_state = CEPH_MDS_SESSION_OPENING;
+ session->s_renew_requested = jiffies;
+
+ /* send connect message */
+ msg = create_session_msg(CEPH_SESSION_REQUEST_OPEN, session->s_seq);
+ if (!msg)
+ return -ENOMEM;
+ ceph_con_send(&session->s_con, msg);
+ return 0;
+}
+
+/*
+ * open sessions for any export targets for the given mds
+ *
+ * called under mdsc->mutex
+ */
+static struct ceph_mds_session *
+__open_export_target_session(struct ceph_mds_client *mdsc, int target)
+{
+ struct ceph_mds_session *session;
+
+ session = __ceph_lookup_mds_session(mdsc, target);
+ if (!session) {
+ session = register_session(mdsc, target);
+ if (IS_ERR(session))
+ return session;
+ }
+ if (session->s_state == CEPH_MDS_SESSION_NEW ||
+ session->s_state == CEPH_MDS_SESSION_CLOSING)
+ __open_session(mdsc, session);
+
+ return session;
+}
+
+struct ceph_mds_session *
+ceph_mdsc_open_export_target_session(struct ceph_mds_client *mdsc, int target)
+{
+ struct ceph_mds_session *session;
+
+ dout("open_export_target_session to mds%d\n", target);
+
+ mutex_lock(&mdsc->mutex);
+ session = __open_export_target_session(mdsc, target);
+ mutex_unlock(&mdsc->mutex);
+
+ return session;
+}
+
+static void __open_export_target_sessions(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ struct ceph_mds_info *mi;
+ struct ceph_mds_session *ts;
+ int i, mds = session->s_mds;
+
+ if (mds >= mdsc->mdsmap->m_max_mds)
+ return;
+
+ mi = &mdsc->mdsmap->m_info[mds];
+ dout("open_export_target_sessions for mds%d (%d targets)\n",
+ session->s_mds, mi->num_export_targets);
+
+ for (i = 0; i < mi->num_export_targets; i++) {
+ ts = __open_export_target_session(mdsc, mi->export_targets[i]);
+ if (!IS_ERR(ts))
+ ceph_put_mds_session(ts);
+ }
+}
+
+void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ mutex_lock(&mdsc->mutex);
+ __open_export_target_sessions(mdsc, session);
+ mutex_unlock(&mdsc->mutex);
+}
+
+/*
+ * session caps
+ */
+
+/*
+ * Free preallocated cap messages assigned to this session
+ */
+static void cleanup_cap_releases(struct ceph_mds_session *session)
+{
+ struct ceph_msg *msg;
+
+ spin_lock(&session->s_cap_lock);
+ while (!list_empty(&session->s_cap_releases)) {
+ msg = list_first_entry(&session->s_cap_releases,
+ struct ceph_msg, list_head);
+ list_del_init(&msg->list_head);
+ ceph_msg_put(msg);
+ }
+ while (!list_empty(&session->s_cap_releases_done)) {
+ msg = list_first_entry(&session->s_cap_releases_done,
+ struct ceph_msg, list_head);
+ list_del_init(&msg->list_head);
+ ceph_msg_put(msg);
+ }
+ spin_unlock(&session->s_cap_lock);
+}
+
+/*
+ * Helper to safely iterate over all caps associated with a session, with
+ * special care taken to handle a racing __ceph_remove_cap().
+ *
+ * Caller must hold session s_mutex.
+ */
+static int iterate_session_caps(struct ceph_mds_session *session,
+ int (*cb)(struct inode *, struct ceph_cap *,
+ void *), void *arg)
+{
+ struct list_head *p;
+ struct ceph_cap *cap;
+ struct inode *inode, *last_inode = NULL;
+ struct ceph_cap *old_cap = NULL;
+ int ret;
+
+ dout("iterate_session_caps %p mds%d\n", session, session->s_mds);
+ spin_lock(&session->s_cap_lock);
+ p = session->s_caps.next;
+ while (p != &session->s_caps) {
+ cap = list_entry(p, struct ceph_cap, session_caps);
+ inode = igrab(&cap->ci->vfs_inode);
+ if (!inode) {
+ p = p->next;
+ continue;
+ }
+ session->s_cap_iterator = cap;
+ spin_unlock(&session->s_cap_lock);
+
+ if (last_inode) {
+ iput(last_inode);
+ last_inode = NULL;
+ }
+ if (old_cap) {
+ ceph_put_cap(session->s_mdsc, old_cap);
+ old_cap = NULL;
+ }
+
+ ret = cb(inode, cap, arg);
+ last_inode = inode;
+
+ spin_lock(&session->s_cap_lock);
+ p = p->next;
+ if (cap->ci == NULL) {
+ dout("iterate_session_caps finishing cap %p removal\n",
+ cap);
+ BUG_ON(cap->session != session);
+ list_del_init(&cap->session_caps);
+ session->s_nr_caps--;
+ cap->session = NULL;
+ old_cap = cap; /* put_cap it w/o locks held */
+ }
+ if (ret < 0)
+ goto out;
+ }
+ ret = 0;
+out:
+ session->s_cap_iterator = NULL;
+ spin_unlock(&session->s_cap_lock);
+
+ if (last_inode)
+ iput(last_inode);
+ if (old_cap)
+ ceph_put_cap(session->s_mdsc, old_cap);
+
+ return ret;
+}
+
+static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
+ void *arg)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int drop = 0;
+
+ dout("removing cap %p, ci is %p, inode is %p\n",
+ cap, ci, &ci->vfs_inode);
+ spin_lock(&ci->i_ceph_lock);
+ __ceph_remove_cap(cap, false);
+ if (!__ceph_is_any_real_caps(ci)) {
+ struct ceph_mds_client *mdsc =
+ ceph_sb_to_client(inode->i_sb)->mdsc;
+
+ spin_lock(&mdsc->cap_dirty_lock);
+ if (!list_empty(&ci->i_dirty_item)) {
+ pr_info(" dropping dirty %s state for %p %lld\n",
+ ceph_cap_string(ci->i_dirty_caps),
+ inode, ceph_ino(inode));
+ ci->i_dirty_caps = 0;
+ list_del_init(&ci->i_dirty_item);
+ drop = 1;
+ }
+ if (!list_empty(&ci->i_flushing_item)) {
+ pr_info(" dropping dirty+flushing %s state for %p %lld\n",
+ ceph_cap_string(ci->i_flushing_caps),
+ inode, ceph_ino(inode));
+ ci->i_flushing_caps = 0;
+ list_del_init(&ci->i_flushing_item);
+ mdsc->num_cap_flushing--;
+ drop = 1;
+ }
+ if (drop && ci->i_wrbuffer_ref) {
+ pr_info(" dropping dirty data for %p %lld\n",
+ inode, ceph_ino(inode));
+ ci->i_wrbuffer_ref = 0;
+ ci->i_wrbuffer_ref_head = 0;
+ drop++;
+ }
+ spin_unlock(&mdsc->cap_dirty_lock);
+ }
+ spin_unlock(&ci->i_ceph_lock);
+ while (drop--)
+ iput(inode);
+ return 0;
+}
+
+/*
+ * caller must hold session s_mutex
+ */
+static void remove_session_caps(struct ceph_mds_session *session)
+{
+ dout("remove_session_caps on %p\n", session);
+ iterate_session_caps(session, remove_session_caps_cb, NULL);
+
+ spin_lock(&session->s_cap_lock);
+ if (session->s_nr_caps > 0) {
+ struct super_block *sb = session->s_mdsc->fsc->sb;
+ struct inode *inode;
+ struct ceph_cap *cap, *prev = NULL;
+ struct ceph_vino vino;
+ /*
+ * iterate_session_caps() skips inodes that are being
+ * deleted, we need to wait until deletions are complete.
+ * __wait_on_freeing_inode() is designed for the job,
+ * but it is not exported, so use lookup inode function
+ * to access it.
+ */
+ while (!list_empty(&session->s_caps)) {
+ cap = list_entry(session->s_caps.next,
+ struct ceph_cap, session_caps);
+ if (cap == prev)
+ break;
+ prev = cap;
+ vino = cap->ci->i_vino;
+ spin_unlock(&session->s_cap_lock);
+
+ inode = ceph_find_inode(sb, vino);
+ iput(inode);
+
+ spin_lock(&session->s_cap_lock);
+ }
+ }
+ spin_unlock(&session->s_cap_lock);
+
+ BUG_ON(session->s_nr_caps > 0);
+ BUG_ON(!list_empty(&session->s_cap_flushing));
+ cleanup_cap_releases(session);
+}
+
+/*
+ * wake up any threads waiting on this session's caps. if the cap is
+ * old (didn't get renewed on the client reconnect), remove it now.
+ *
+ * caller must hold s_mutex.
+ */
+static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
+ void *arg)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ wake_up_all(&ci->i_cap_wq);
+ if (arg) {
+ spin_lock(&ci->i_ceph_lock);
+ ci->i_wanted_max_size = 0;
+ ci->i_requested_max_size = 0;
+ spin_unlock(&ci->i_ceph_lock);
+ }
+ return 0;
+}
+
+static void wake_up_session_caps(struct ceph_mds_session *session,
+ int reconnect)
+{
+ dout("wake_up_session_caps %p mds%d\n", session, session->s_mds);
+ iterate_session_caps(session, wake_up_session_cb,
+ (void *)(unsigned long)reconnect);
+}
+
+/*
+ * Send periodic message to MDS renewing all currently held caps. The
+ * ack will reset the expiration for all caps from this session.
+ *
+ * caller holds s_mutex
+ */
+static int send_renew_caps(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ struct ceph_msg *msg;
+ int state;
+
+ if (time_after_eq(jiffies, session->s_cap_ttl) &&
+ time_after_eq(session->s_cap_ttl, session->s_renew_requested))
+ pr_info("mds%d caps stale\n", session->s_mds);
+ session->s_renew_requested = jiffies;
+
+ /* do not try to renew caps until a recovering mds has reconnected
+ * with its clients. */
+ state = ceph_mdsmap_get_state(mdsc->mdsmap, session->s_mds);
+ if (state < CEPH_MDS_STATE_RECONNECT) {
+ dout("send_renew_caps ignoring mds%d (%s)\n",
+ session->s_mds, ceph_mds_state_name(state));
+ return 0;
+ }
+
+ dout("send_renew_caps to mds%d (%s)\n", session->s_mds,
+ ceph_mds_state_name(state));
+ msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS,
+ ++session->s_renew_seq);
+ if (!msg)
+ return -ENOMEM;
+ ceph_con_send(&session->s_con, msg);
+ return 0;
+}
+
+static int send_flushmsg_ack(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session, u64 seq)
+{
+ struct ceph_msg *msg;
+
+ dout("send_flushmsg_ack to mds%d (%s)s seq %lld\n",
+ session->s_mds, session_state_name(session->s_state), seq);
+ msg = create_session_msg(CEPH_SESSION_FLUSHMSG_ACK, seq);
+ if (!msg)
+ return -ENOMEM;
+ ceph_con_send(&session->s_con, msg);
+ return 0;
+}
+
+
+/*
+ * Note new cap ttl, and any transition from stale -> not stale (fresh?).
+ *
+ * Called under session->s_mutex
+ */
+static void renewed_caps(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session, int is_renew)
+{
+ int was_stale;
+ int wake = 0;
+
+ spin_lock(&session->s_cap_lock);
+ was_stale = is_renew && time_after_eq(jiffies, session->s_cap_ttl);
+
+ session->s_cap_ttl = session->s_renew_requested +
+ mdsc->mdsmap->m_session_timeout*HZ;
+
+ if (was_stale) {
+ if (time_before(jiffies, session->s_cap_ttl)) {
+ pr_info("mds%d caps renewed\n", session->s_mds);
+ wake = 1;
+ } else {
+ pr_info("mds%d caps still stale\n", session->s_mds);
+ }
+ }
+ dout("renewed_caps mds%d ttl now %lu, was %s, now %s\n",
+ session->s_mds, session->s_cap_ttl, was_stale ? "stale" : "fresh",
+ time_before(jiffies, session->s_cap_ttl) ? "stale" : "fresh");
+ spin_unlock(&session->s_cap_lock);
+
+ if (wake)
+ wake_up_session_caps(session, 0);
+}
+
+/*
+ * send a session close request
+ */
+static int request_close_session(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ struct ceph_msg *msg;
+
+ dout("request_close_session mds%d state %s seq %lld\n",
+ session->s_mds, session_state_name(session->s_state),
+ session->s_seq);
+ msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq);
+ if (!msg)
+ return -ENOMEM;
+ ceph_con_send(&session->s_con, msg);
+ return 0;
+}
+
+/*
+ * Called with s_mutex held.
+ */
+static int __close_session(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ if (session->s_state >= CEPH_MDS_SESSION_CLOSING)
+ return 0;
+ session->s_state = CEPH_MDS_SESSION_CLOSING;
+ return request_close_session(mdsc, session);
+}
+
+/*
+ * Trim old(er) caps.
+ *
+ * Because we can't cache an inode without one or more caps, we do
+ * this indirectly: if a cap is unused, we prune its aliases, at which
+ * point the inode will hopefully get dropped to.
+ *
+ * Yes, this is a bit sloppy. Our only real goal here is to respond to
+ * memory pressure from the MDS, though, so it needn't be perfect.
+ */
+static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
+{
+ struct ceph_mds_session *session = arg;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int used, wanted, oissued, mine;
+
+ if (session->s_trim_caps <= 0)
+ return -1;
+
+ spin_lock(&ci->i_ceph_lock);
+ mine = cap->issued | cap->implemented;
+ used = __ceph_caps_used(ci);
+ wanted = __ceph_caps_file_wanted(ci);
+ oissued = __ceph_caps_issued_other(ci, cap);
+
+ dout("trim_caps_cb %p cap %p mine %s oissued %s used %s wanted %s\n",
+ inode, cap, ceph_cap_string(mine), ceph_cap_string(oissued),
+ ceph_cap_string(used), ceph_cap_string(wanted));
+ if (cap == ci->i_auth_cap) {
+ if (ci->i_dirty_caps | ci->i_flushing_caps)
+ goto out;
+ if ((used | wanted) & CEPH_CAP_ANY_WR)
+ goto out;
+ }
+ if ((used | wanted) & ~oissued & mine)
+ goto out; /* we need these caps */
+
+ session->s_trim_caps--;
+ if (oissued) {
+ /* we aren't the only cap.. just remove us */
+ __ceph_remove_cap(cap, true);
+ } else {
+ /* try to drop referring dentries */
+ spin_unlock(&ci->i_ceph_lock);
+ d_prune_aliases(inode);
+ dout("trim_caps_cb %p cap %p pruned, count now %d\n",
+ inode, cap, atomic_read(&inode->i_count));
+ return 0;
+ }
+
+out:
+ spin_unlock(&ci->i_ceph_lock);
+ return 0;
+}
+
+/*
+ * Trim session cap count down to some max number.
+ */
+static int trim_caps(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session,
+ int max_caps)
+{
+ int trim_caps = session->s_nr_caps - max_caps;
+
+ dout("trim_caps mds%d start: %d / %d, trim %d\n",
+ session->s_mds, session->s_nr_caps, max_caps, trim_caps);
+ if (trim_caps > 0) {
+ session->s_trim_caps = trim_caps;
+ iterate_session_caps(session, trim_caps_cb, session);
+ dout("trim_caps mds%d done: %d / %d, trimmed %d\n",
+ session->s_mds, session->s_nr_caps, max_caps,
+ trim_caps - session->s_trim_caps);
+ session->s_trim_caps = 0;
+ }
+
+ ceph_add_cap_releases(mdsc, session);
+ ceph_send_cap_releases(mdsc, session);
+ return 0;
+}
+
+/*
+ * Allocate cap_release messages. If there is a partially full message
+ * in the queue, try to allocate enough to cover it's remainder, so that
+ * we can send it immediately.
+ *
+ * Called under s_mutex.
+ */
+int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ struct ceph_msg *msg, *partial = NULL;
+ struct ceph_mds_cap_release *head;
+ int err = -ENOMEM;
+ int extra = mdsc->fsc->mount_options->cap_release_safety;
+ int num;
+
+ dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds,
+ extra);
+
+ spin_lock(&session->s_cap_lock);
+
+ if (!list_empty(&session->s_cap_releases)) {
+ msg = list_first_entry(&session->s_cap_releases,
+ struct ceph_msg,
+ list_head);
+ head = msg->front.iov_base;
+ num = le32_to_cpu(head->num);
+ if (num) {
+ dout(" partial %p with (%d/%d)\n", msg, num,
+ (int)CEPH_CAPS_PER_RELEASE);
+ extra += CEPH_CAPS_PER_RELEASE - num;
+ partial = msg;
+ }
+ }
+ while (session->s_num_cap_releases < session->s_nr_caps + extra) {
+ spin_unlock(&session->s_cap_lock);
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE,
+ GFP_NOFS, false);
+ if (!msg)
+ goto out_unlocked;
+ dout("add_cap_releases %p msg %p now %d\n", session, msg,
+ (int)msg->front.iov_len);
+ head = msg->front.iov_base;
+ head->num = cpu_to_le32(0);
+ msg->front.iov_len = sizeof(*head);
+ spin_lock(&session->s_cap_lock);
+ list_add(&msg->list_head, &session->s_cap_releases);
+ session->s_num_cap_releases += CEPH_CAPS_PER_RELEASE;
+ }
+
+ if (partial) {
+ head = partial->front.iov_base;
+ num = le32_to_cpu(head->num);
+ dout(" queueing partial %p with %d/%d\n", partial, num,
+ (int)CEPH_CAPS_PER_RELEASE);
+ list_move_tail(&partial->list_head,
+ &session->s_cap_releases_done);
+ session->s_num_cap_releases -= CEPH_CAPS_PER_RELEASE - num;
+ }
+ err = 0;
+ spin_unlock(&session->s_cap_lock);
+out_unlocked:
+ return err;
+}
+
+/*
+ * flush all dirty inode data to disk.
+ *
+ * returns true if we've flushed through want_flush_seq
+ */
+static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
+{
+ int mds, ret = 1;
+
+ dout("check_cap_flush want %lld\n", want_flush_seq);
+ mutex_lock(&mdsc->mutex);
+ for (mds = 0; ret && mds < mdsc->max_sessions; mds++) {
+ struct ceph_mds_session *session = mdsc->sessions[mds];
+
+ if (!session)
+ continue;
+ get_session(session);
+ mutex_unlock(&mdsc->mutex);
+
+ mutex_lock(&session->s_mutex);
+ if (!list_empty(&session->s_cap_flushing)) {
+ struct ceph_inode_info *ci =
+ list_entry(session->s_cap_flushing.next,
+ struct ceph_inode_info,
+ i_flushing_item);
+ struct inode *inode = &ci->vfs_inode;
+
+ spin_lock(&ci->i_ceph_lock);
+ if (ci->i_cap_flush_seq <= want_flush_seq) {
+ dout("check_cap_flush still flushing %p "
+ "seq %lld <= %lld to mds%d\n", inode,
+ ci->i_cap_flush_seq, want_flush_seq,
+ session->s_mds);
+ ret = 0;
+ }
+ spin_unlock(&ci->i_ceph_lock);
+ }
+ mutex_unlock(&session->s_mutex);
+ ceph_put_mds_session(session);
+
+ if (!ret)
+ return ret;
+ mutex_lock(&mdsc->mutex);
+ }
+
+ mutex_unlock(&mdsc->mutex);
+ dout("check_cap_flush ok, flushed thru %lld\n", want_flush_seq);
+ return ret;
+}
+
+/*
+ * called under s_mutex
+ */
+void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ struct ceph_msg *msg;
+
+ dout("send_cap_releases mds%d\n", session->s_mds);
+ spin_lock(&session->s_cap_lock);
+ while (!list_empty(&session->s_cap_releases_done)) {
+ msg = list_first_entry(&session->s_cap_releases_done,
+ struct ceph_msg, list_head);
+ list_del_init(&msg->list_head);
+ spin_unlock(&session->s_cap_lock);
+ msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+ dout("send_cap_releases mds%d %p\n", session->s_mds, msg);
+ ceph_con_send(&session->s_con, msg);
+ spin_lock(&session->s_cap_lock);
+ }
+ spin_unlock(&session->s_cap_lock);
+}
+
+static void discard_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ struct ceph_msg *msg;
+ struct ceph_mds_cap_release *head;
+ unsigned num;
+
+ dout("discard_cap_releases mds%d\n", session->s_mds);
+
+ if (!list_empty(&session->s_cap_releases)) {
+ /* zero out the in-progress message */
+ msg = list_first_entry(&session->s_cap_releases,
+ struct ceph_msg, list_head);
+ head = msg->front.iov_base;
+ num = le32_to_cpu(head->num);
+ dout("discard_cap_releases mds%d %p %u\n",
+ session->s_mds, msg, num);
+ head->num = cpu_to_le32(0);
+ msg->front.iov_len = sizeof(*head);
+ session->s_num_cap_releases += num;
+ }
+
+ /* requeue completed messages */
+ while (!list_empty(&session->s_cap_releases_done)) {
+ msg = list_first_entry(&session->s_cap_releases_done,
+ struct ceph_msg, list_head);
+ list_del_init(&msg->list_head);
+
+ head = msg->front.iov_base;
+ num = le32_to_cpu(head->num);
+ dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg,
+ num);
+ session->s_num_cap_releases += num;
+ head->num = cpu_to_le32(0);
+ msg->front.iov_len = sizeof(*head);
+ list_add(&msg->list_head, &session->s_cap_releases);
+ }
+}
+
+/*
+ * requests
+ */
+
+int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
+ struct inode *dir)
+{
+ struct ceph_inode_info *ci = ceph_inode(dir);
+ struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+ struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options;
+ size_t size = sizeof(*rinfo->dir_in) + sizeof(*rinfo->dir_dname_len) +
+ sizeof(*rinfo->dir_dname) + sizeof(*rinfo->dir_dlease);
+ int order, num_entries;
+
+ spin_lock(&ci->i_ceph_lock);
+ num_entries = ci->i_files + ci->i_subdirs;
+ spin_unlock(&ci->i_ceph_lock);
+ num_entries = max(num_entries, 1);
+ num_entries = min(num_entries, opt->max_readdir);
+
+ order = get_order(size * num_entries);
+ while (order >= 0) {
+ rinfo->dir_in = (void*)__get_free_pages(GFP_NOFS | __GFP_NOWARN,
+ order);
+ if (rinfo->dir_in)
+ break;
+ order--;
+ }
+ if (!rinfo->dir_in)
+ return -ENOMEM;
+
+ num_entries = (PAGE_SIZE << order) / size;
+ num_entries = min(num_entries, opt->max_readdir);
+
+ rinfo->dir_buf_size = PAGE_SIZE << order;
+ req->r_num_caps = num_entries + 1;
+ req->r_args.readdir.max_entries = cpu_to_le32(num_entries);
+ req->r_args.readdir.max_bytes = cpu_to_le32(opt->max_readdir_bytes);
+ return 0;
+}
+
+/*
+ * Create an mds request.
+ */
+struct ceph_mds_request *
+ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
+{
+ struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS);
+
+ if (!req)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&req->r_fill_mutex);
+ req->r_mdsc = mdsc;
+ req->r_started = jiffies;
+ req->r_resend_mds = -1;
+ INIT_LIST_HEAD(&req->r_unsafe_dir_item);
+ req->r_fmode = -1;
+ kref_init(&req->r_kref);
+ INIT_LIST_HEAD(&req->r_wait);
+ init_completion(&req->r_completion);
+ init_completion(&req->r_safe_completion);
+ INIT_LIST_HEAD(&req->r_unsafe_item);
+
+ req->r_op = op;
+ req->r_direct_mode = mode;
+ return req;
+}
+
+/*
+ * return oldest (lowest) request, tid in request tree, 0 if none.
+ *
+ * called under mdsc->mutex.
+ */
+static struct ceph_mds_request *__get_oldest_req(struct ceph_mds_client *mdsc)
+{
+ if (RB_EMPTY_ROOT(&mdsc->request_tree))
+ return NULL;
+ return rb_entry(rb_first(&mdsc->request_tree),
+ struct ceph_mds_request, r_node);
+}
+
+static u64 __get_oldest_tid(struct ceph_mds_client *mdsc)
+{
+ struct ceph_mds_request *req = __get_oldest_req(mdsc);
+
+ if (req)
+ return req->r_tid;
+ return 0;
+}
+
+/*
+ * Build a dentry's path. Allocate on heap; caller must kfree. Based
+ * on build_path_from_dentry in fs/cifs/dir.c.
+ *
+ * If @stop_on_nosnap, generate path relative to the first non-snapped
+ * inode.
+ *
+ * Encode hidden .snap dirs as a double /, i.e.
+ * foo/.snap/bar -> foo//bar
+ */
+char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
+ int stop_on_nosnap)
+{
+ struct dentry *temp;
+ char *path;
+ int len, pos;
+ unsigned seq;
+
+ if (dentry == NULL)
+ return ERR_PTR(-EINVAL);
+
+retry:
+ len = 0;
+ seq = read_seqbegin(&rename_lock);
+ rcu_read_lock();
+ for (temp = dentry; !IS_ROOT(temp);) {
+ struct inode *inode = temp->d_inode;
+ if (inode && ceph_snap(inode) == CEPH_SNAPDIR)
+ len++; /* slash only */
+ else if (stop_on_nosnap && inode &&
+ ceph_snap(inode) == CEPH_NOSNAP)
+ break;
+ else
+ len += 1 + temp->d_name.len;
+ temp = temp->d_parent;
+ }
+ rcu_read_unlock();
+ if (len)
+ len--; /* no leading '/' */
+
+ path = kmalloc(len+1, GFP_NOFS);
+ if (path == NULL)
+ return ERR_PTR(-ENOMEM);
+ pos = len;
+ path[pos] = 0; /* trailing null */
+ rcu_read_lock();
+ for (temp = dentry; !IS_ROOT(temp) && pos != 0; ) {
+ struct inode *inode;
+
+ spin_lock(&temp->d_lock);
+ inode = temp->d_inode;
+ if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
+ dout("build_path path+%d: %p SNAPDIR\n",
+ pos, temp);
+ } else if (stop_on_nosnap && inode &&
+ ceph_snap(inode) == CEPH_NOSNAP) {
+ spin_unlock(&temp->d_lock);
+ break;
+ } else {
+ pos -= temp->d_name.len;
+ if (pos < 0) {
+ spin_unlock(&temp->d_lock);
+ break;
+ }
+ strncpy(path + pos, temp->d_name.name,
+ temp->d_name.len);
+ }
+ spin_unlock(&temp->d_lock);
+ if (pos)
+ path[--pos] = '/';
+ temp = temp->d_parent;
+ }
+ rcu_read_unlock();
+ if (pos != 0 || read_seqretry(&rename_lock, seq)) {
+ pr_err("build_path did not end path lookup where "
+ "expected, namelen is %d, pos is %d\n", len, pos);
+ /* presumably this is only possible if racing with a
+ rename of one of the parent directories (we can not
+ lock the dentries above us to prevent this, but
+ retrying should be harmless) */
+ kfree(path);
+ goto retry;
+ }
+
+ *base = ceph_ino(temp->d_inode);
+ *plen = len;
+ dout("build_path on %p %d built %llx '%.*s'\n",
+ dentry, d_count(dentry), *base, len, path);
+ return path;
+}
+
+static int build_dentry_path(struct dentry *dentry,
+ const char **ppath, int *ppathlen, u64 *pino,
+ int *pfreepath)
+{
+ char *path;
+
+ if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) {
+ *pino = ceph_ino(dentry->d_parent->d_inode);
+ *ppath = dentry->d_name.name;
+ *ppathlen = dentry->d_name.len;
+ return 0;
+ }
+ path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+ *ppath = path;
+ *pfreepath = 1;
+ return 0;
+}
+
+static int build_inode_path(struct inode *inode,
+ const char **ppath, int *ppathlen, u64 *pino,
+ int *pfreepath)
+{
+ struct dentry *dentry;
+ char *path;
+
+ if (ceph_snap(inode) == CEPH_NOSNAP) {
+ *pino = ceph_ino(inode);
+ *ppathlen = 0;
+ return 0;
+ }
+ dentry = d_find_alias(inode);
+ path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
+ dput(dentry);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+ *ppath = path;
+ *pfreepath = 1;
+ return 0;
+}
+
+/*
+ * request arguments may be specified via an inode *, a dentry *, or
+ * an explicit ino+path.
+ */
+static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
+ const char *rpath, u64 rino,
+ const char **ppath, int *pathlen,
+ u64 *ino, int *freepath)
+{
+ int r = 0;
+
+ if (rinode) {
+ r = build_inode_path(rinode, ppath, pathlen, ino, freepath);
+ dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
+ ceph_snap(rinode));
+ } else if (rdentry) {
+ r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath);
+ dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
+ *ppath);
+ } else if (rpath || rino) {
+ *ino = rino;
+ *ppath = rpath;
+ *pathlen = rpath ? strlen(rpath) : 0;
+ dout(" path %.*s\n", *pathlen, rpath);
+ }
+
+ return r;
+}
+
+/*
+ * called under mdsc->mutex
+ */
+static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req,
+ int mds)
+{
+ struct ceph_msg *msg;
+ struct ceph_mds_request_head *head;
+ const char *path1 = NULL;
+ const char *path2 = NULL;
+ u64 ino1 = 0, ino2 = 0;
+ int pathlen1 = 0, pathlen2 = 0;
+ int freepath1 = 0, freepath2 = 0;
+ int len;
+ u16 releases;
+ void *p, *end;
+ int ret;
+
+ ret = set_request_path_attr(req->r_inode, req->r_dentry,
+ req->r_path1, req->r_ino1.ino,
+ &path1, &pathlen1, &ino1, &freepath1);
+ if (ret < 0) {
+ msg = ERR_PTR(ret);
+ goto out;
+ }
+
+ ret = set_request_path_attr(NULL, req->r_old_dentry,
+ req->r_path2, req->r_ino2.ino,
+ &path2, &pathlen2, &ino2, &freepath2);
+ if (ret < 0) {
+ msg = ERR_PTR(ret);
+ goto out_free1;
+ }
+
+ len = sizeof(*head) +
+ pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64));
+
+ /* calculate (max) length for cap releases */
+ len += sizeof(struct ceph_mds_request_release) *
+ (!!req->r_inode_drop + !!req->r_dentry_drop +
+ !!req->r_old_inode_drop + !!req->r_old_dentry_drop);
+ if (req->r_dentry_drop)
+ len += req->r_dentry->d_name.len;
+ if (req->r_old_dentry_drop)
+ len += req->r_old_dentry->d_name.len;
+
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, GFP_NOFS, false);
+ if (!msg) {
+ msg = ERR_PTR(-ENOMEM);
+ goto out_free2;
+ }
+
+ msg->hdr.tid = cpu_to_le64(req->r_tid);
+
+ head = msg->front.iov_base;
+ p = msg->front.iov_base + sizeof(*head);
+ end = msg->front.iov_base + msg->front.iov_len;
+
+ head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
+ head->op = cpu_to_le32(req->r_op);
+ head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, req->r_uid));
+ head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, req->r_gid));
+ head->args = req->r_args;
+
+ ceph_encode_filepath(&p, end, ino1, path1);
+ ceph_encode_filepath(&p, end, ino2, path2);
+
+ /* make note of release offset, in case we need to replay */
+ req->r_request_release_offset = p - msg->front.iov_base;
+
+ /* cap releases */
+ releases = 0;
+ if (req->r_inode_drop)
+ releases += ceph_encode_inode_release(&p,
+ req->r_inode ? req->r_inode : req->r_dentry->d_inode,
+ mds, req->r_inode_drop, req->r_inode_unless, 0);
+ if (req->r_dentry_drop)
+ releases += ceph_encode_dentry_release(&p, req->r_dentry,
+ mds, req->r_dentry_drop, req->r_dentry_unless);
+ if (req->r_old_dentry_drop)
+ releases += ceph_encode_dentry_release(&p, req->r_old_dentry,
+ mds, req->r_old_dentry_drop, req->r_old_dentry_unless);
+ if (req->r_old_inode_drop)
+ releases += ceph_encode_inode_release(&p,
+ req->r_old_dentry->d_inode,
+ mds, req->r_old_inode_drop, req->r_old_inode_unless, 0);
+ head->num_releases = cpu_to_le16(releases);
+
+ BUG_ON(p > end);
+ msg->front.iov_len = p - msg->front.iov_base;
+ msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+
+ if (req->r_data_len) {
+ /* outbound data set only by ceph_sync_setxattr() */
+ BUG_ON(!req->r_pages);
+ ceph_msg_data_add_pages(msg, req->r_pages, req->r_data_len, 0);
+ }
+
+ msg->hdr.data_len = cpu_to_le32(req->r_data_len);
+ msg->hdr.data_off = cpu_to_le16(0);
+
+out_free2:
+ if (freepath2)
+ kfree((char *)path2);
+out_free1:
+ if (freepath1)
+ kfree((char *)path1);
+out:
+ return msg;
+}
+
+/*
+ * called under mdsc->mutex if error, under no mutex if
+ * success.
+ */
+static void complete_request(struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req)
+{
+ if (req->r_callback)
+ req->r_callback(mdsc, req);
+ else
+ complete_all(&req->r_completion);
+}
+
+/*
+ * called under mdsc->mutex
+ */
+static int __prepare_send_request(struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req,
+ int mds)
+{
+ struct ceph_mds_request_head *rhead;
+ struct ceph_msg *msg;
+ int flags = 0;
+
+ req->r_attempts++;
+ if (req->r_inode) {
+ struct ceph_cap *cap =
+ ceph_get_cap_for_mds(ceph_inode(req->r_inode), mds);
+
+ if (cap)
+ req->r_sent_on_mseq = cap->mseq;
+ else
+ req->r_sent_on_mseq = -1;
+ }
+ dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req,
+ req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
+
+ if (req->r_got_unsafe) {
+ /*
+ * Replay. Do not regenerate message (and rebuild
+ * paths, etc.); just use the original message.
+ * Rebuilding paths will break for renames because
+ * d_move mangles the src name.
+ */
+ msg = req->r_request;
+ rhead = msg->front.iov_base;
+
+ flags = le32_to_cpu(rhead->flags);
+ flags |= CEPH_MDS_FLAG_REPLAY;
+ rhead->flags = cpu_to_le32(flags);
+
+ if (req->r_target_inode)
+ rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
+
+ rhead->num_retry = req->r_attempts - 1;
+
+ /* remove cap/dentry releases from message */
+ rhead->num_releases = 0;
+ msg->hdr.front_len = cpu_to_le32(req->r_request_release_offset);
+ msg->front.iov_len = req->r_request_release_offset;
+ return 0;
+ }
+
+ if (req->r_request) {
+ ceph_msg_put(req->r_request);
+ req->r_request = NULL;
+ }
+ msg = create_request_message(mdsc, req, mds);
+ if (IS_ERR(msg)) {
+ req->r_err = PTR_ERR(msg);
+ complete_request(mdsc, req);
+ return PTR_ERR(msg);
+ }
+ req->r_request = msg;
+
+ rhead = msg->front.iov_base;
+ rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
+ if (req->r_got_unsafe)
+ flags |= CEPH_MDS_FLAG_REPLAY;
+ if (req->r_locked_dir)
+ flags |= CEPH_MDS_FLAG_WANT_DENTRY;
+ rhead->flags = cpu_to_le32(flags);
+ rhead->num_fwd = req->r_num_fwd;
+ rhead->num_retry = req->r_attempts - 1;
+ rhead->ino = 0;
+
+ dout(" r_locked_dir = %p\n", req->r_locked_dir);
+ return 0;
+}
+
+/*
+ * send request, or put it on the appropriate wait list.
+ */
+static int __do_request(struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req)
+{
+ struct ceph_mds_session *session = NULL;
+ int mds = -1;
+ int err = -EAGAIN;
+
+ if (req->r_err || req->r_got_result) {
+ if (req->r_aborted)
+ __unregister_request(mdsc, req);
+ goto out;
+ }
+
+ if (req->r_timeout &&
+ time_after_eq(jiffies, req->r_started + req->r_timeout)) {
+ dout("do_request timed out\n");
+ err = -EIO;
+ goto finish;
+ }
+
+ put_request_session(req);
+
+ mds = __choose_mds(mdsc, req);
+ if (mds < 0 ||
+ ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) {
+ dout("do_request no mds or not active, waiting for map\n");
+ list_add(&req->r_wait, &mdsc->waiting_for_map);
+ goto out;
+ }
+
+ /* get, open session */
+ session = __ceph_lookup_mds_session(mdsc, mds);
+ if (!session) {
+ session = register_session(mdsc, mds);
+ if (IS_ERR(session)) {
+ err = PTR_ERR(session);
+ goto finish;
+ }
+ }
+ req->r_session = get_session(session);
+
+ dout("do_request mds%d session %p state %s\n", mds, session,
+ session_state_name(session->s_state));
+ if (session->s_state != CEPH_MDS_SESSION_OPEN &&
+ session->s_state != CEPH_MDS_SESSION_HUNG) {
+ if (session->s_state == CEPH_MDS_SESSION_NEW ||
+ session->s_state == CEPH_MDS_SESSION_CLOSING)
+ __open_session(mdsc, session);
+ list_add(&req->r_wait, &session->s_waiting);
+ goto out_session;
+ }
+
+ /* send request */
+ req->r_resend_mds = -1; /* forget any previous mds hint */
+
+ if (req->r_request_started == 0) /* note request start time */
+ req->r_request_started = jiffies;
+
+ err = __prepare_send_request(mdsc, req, mds);
+ if (!err) {
+ ceph_msg_get(req->r_request);
+ ceph_con_send(&session->s_con, req->r_request);
+ }
+
+out_session:
+ ceph_put_mds_session(session);
+out:
+ return err;
+
+finish:
+ req->r_err = err;
+ complete_request(mdsc, req);
+ goto out;
+}
+
+/*
+ * called under mdsc->mutex
+ */
+static void __wake_requests(struct ceph_mds_client *mdsc,
+ struct list_head *head)
+{
+ struct ceph_mds_request *req;
+ LIST_HEAD(tmp_list);
+
+ list_splice_init(head, &tmp_list);
+
+ while (!list_empty(&tmp_list)) {
+ req = list_entry(tmp_list.next,
+ struct ceph_mds_request, r_wait);
+ list_del_init(&req->r_wait);
+ dout(" wake request %p tid %llu\n", req, req->r_tid);
+ __do_request(mdsc, req);
+ }
+}
+
+/*
+ * Wake up threads with requests pending for @mds, so that they can
+ * resubmit their requests to a possibly different mds.
+ */
+static void kick_requests(struct ceph_mds_client *mdsc, int mds)
+{
+ struct ceph_mds_request *req;
+ struct rb_node *p;
+
+ dout("kick_requests mds%d\n", mds);
+ for (p = rb_first(&mdsc->request_tree); p; p = rb_next(p)) {
+ req = rb_entry(p, struct ceph_mds_request, r_node);
+ if (req->r_got_unsafe)
+ continue;
+ if (req->r_session &&
+ req->r_session->s_mds == mds) {
+ dout(" kicking tid %llu\n", req->r_tid);
+ __do_request(mdsc, req);
+ }
+ }
+}
+
+void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req)
+{
+ dout("submit_request on %p\n", req);
+ mutex_lock(&mdsc->mutex);
+ __register_request(mdsc, req, NULL);
+ __do_request(mdsc, req);
+ mutex_unlock(&mdsc->mutex);
+}
+
+/*
+ * Synchrously perform an mds request. Take care of all of the
+ * session setup, forwarding, retry details.
+ */
+int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
+ struct inode *dir,
+ struct ceph_mds_request *req)
+{
+ int err;
+
+ dout("do_request on %p\n", req);
+
+ /* take CAP_PIN refs for r_inode, r_locked_dir, r_old_dentry */
+ if (req->r_inode)
+ ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
+ if (req->r_locked_dir)
+ ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
+ if (req->r_old_dentry_dir)
+ ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
+ CEPH_CAP_PIN);
+
+ /* issue */
+ mutex_lock(&mdsc->mutex);
+ __register_request(mdsc, req, dir);
+ __do_request(mdsc, req);
+
+ if (req->r_err) {
+ err = req->r_err;
+ __unregister_request(mdsc, req);
+ dout("do_request early error %d\n", err);
+ goto out;
+ }
+
+ /* wait */
+ mutex_unlock(&mdsc->mutex);
+ dout("do_request waiting\n");
+ if (req->r_timeout) {
+ err = (long)wait_for_completion_killable_timeout(
+ &req->r_completion, req->r_timeout);
+ if (err == 0)
+ err = -EIO;
+ } else {
+ err = wait_for_completion_killable(&req->r_completion);
+ }
+ dout("do_request waited, got %d\n", err);
+ mutex_lock(&mdsc->mutex);
+
+ /* only abort if we didn't race with a real reply */
+ if (req->r_got_result) {
+ err = le32_to_cpu(req->r_reply_info.head->result);
+ } else if (err < 0) {
+ dout("aborted request %lld with %d\n", req->r_tid, err);
+
+ /*
+ * ensure we aren't running concurrently with
+ * ceph_fill_trace or ceph_readdir_prepopulate, which
+ * rely on locks (dir mutex) held by our caller.
+ */
+ mutex_lock(&req->r_fill_mutex);
+ req->r_err = err;
+ req->r_aborted = true;
+ mutex_unlock(&req->r_fill_mutex);
+
+ if (req->r_locked_dir &&
+ (req->r_op & CEPH_MDS_OP_WRITE))
+ ceph_invalidate_dir_request(req);
+ } else {
+ err = req->r_err;
+ }
+
+out:
+ mutex_unlock(&mdsc->mutex);
+ dout("do_request %p done, result %d\n", req, err);
+ return err;
+}
+
+/*
+ * Invalidate dir's completeness, dentry lease state on an aborted MDS
+ * namespace request.
+ */
+void ceph_invalidate_dir_request(struct ceph_mds_request *req)
+{
+ struct inode *inode = req->r_locked_dir;
+
+ dout("invalidate_dir_request %p (complete, lease(s))\n", inode);
+
+ ceph_dir_clear_complete(inode);
+ if (req->r_dentry)
+ ceph_invalidate_dentry_lease(req->r_dentry);
+ if (req->r_old_dentry)
+ ceph_invalidate_dentry_lease(req->r_old_dentry);
+}
+
+/*
+ * Handle mds reply.
+ *
+ * We take the session mutex and parse and process the reply immediately.
+ * This preserves the logical ordering of replies, capabilities, etc., sent
+ * by the MDS as they are applied to our local cache.
+ */
+static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
+{
+ struct ceph_mds_client *mdsc = session->s_mdsc;
+ struct ceph_mds_request *req;
+ struct ceph_mds_reply_head *head = msg->front.iov_base;
+ struct ceph_mds_reply_info_parsed *rinfo; /* parsed reply info */
+ u64 tid;
+ int err, result;
+ int mds = session->s_mds;
+
+ if (msg->front.iov_len < sizeof(*head)) {
+ pr_err("mdsc_handle_reply got corrupt (short) reply\n");
+ ceph_msg_dump(msg);
+ return;
+ }
+
+ /* get request, session */
+ tid = le64_to_cpu(msg->hdr.tid);
+ mutex_lock(&mdsc->mutex);
+ req = __lookup_request(mdsc, tid);
+ if (!req) {
+ dout("handle_reply on unknown tid %llu\n", tid);
+ mutex_unlock(&mdsc->mutex);
+ return;
+ }
+ dout("handle_reply %p\n", req);
+
+ /* correct session? */
+ if (req->r_session != session) {
+ pr_err("mdsc_handle_reply got %llu on session mds%d"
+ " not mds%d\n", tid, session->s_mds,
+ req->r_session ? req->r_session->s_mds : -1);
+ mutex_unlock(&mdsc->mutex);
+ goto out;
+ }
+
+ /* dup? */
+ if ((req->r_got_unsafe && !head->safe) ||
+ (req->r_got_safe && head->safe)) {
+ pr_warning("got a dup %s reply on %llu from mds%d\n",
+ head->safe ? "safe" : "unsafe", tid, mds);
+ mutex_unlock(&mdsc->mutex);
+ goto out;
+ }
+ if (req->r_got_safe && !head->safe) {
+ pr_warning("got unsafe after safe on %llu from mds%d\n",
+ tid, mds);
+ mutex_unlock(&mdsc->mutex);
+ goto out;
+ }
+
+ result = le32_to_cpu(head->result);
+
+ /*
+ * Handle an ESTALE
+ * if we're not talking to the authority, send to them
+ * if the authority has changed while we weren't looking,
+ * send to new authority
+ * Otherwise we just have to return an ESTALE
+ */
+ if (result == -ESTALE) {
+ dout("got ESTALE on request %llu", req->r_tid);
+ if (req->r_direct_mode != USE_AUTH_MDS) {
+ dout("not using auth, setting for that now");
+ req->r_direct_mode = USE_AUTH_MDS;
+ __do_request(mdsc, req);
+ mutex_unlock(&mdsc->mutex);
+ goto out;
+ } else {
+ int mds = __choose_mds(mdsc, req);
+ if (mds >= 0 && mds != req->r_session->s_mds) {
+ dout("but auth changed, so resending");
+ __do_request(mdsc, req);
+ mutex_unlock(&mdsc->mutex);
+ goto out;
+ }
+ }
+ dout("have to return ESTALE on request %llu", req->r_tid);
+ }
+
+
+ if (head->safe) {
+ req->r_got_safe = true;
+ __unregister_request(mdsc, req);
+
+ if (req->r_got_unsafe) {
+ /*
+ * We already handled the unsafe response, now do the
+ * cleanup. No need to examine the response; the MDS
+ * doesn't include any result info in the safe
+ * response. And even if it did, there is nothing
+ * useful we could do with a revised return value.
+ */
+ dout("got safe reply %llu, mds%d\n", tid, mds);
+ list_del_init(&req->r_unsafe_item);
+
+ /* last unsafe request during umount? */
+ if (mdsc->stopping && !__get_oldest_req(mdsc))
+ complete_all(&mdsc->safe_umount_waiters);
+ mutex_unlock(&mdsc->mutex);
+ goto out;
+ }
+ } else {
+ req->r_got_unsafe = true;
+ list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe);
+ }
+
+ dout("handle_reply tid %lld result %d\n", tid, result);
+ rinfo = &req->r_reply_info;
+ err = parse_reply_info(msg, rinfo, session->s_con.peer_features);
+ mutex_unlock(&mdsc->mutex);
+
+ mutex_lock(&session->s_mutex);
+ if (err < 0) {
+ pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
+ ceph_msg_dump(msg);
+ goto out_err;
+ }
+
+ /* snap trace */
+ if (rinfo->snapblob_len) {
+ down_write(&mdsc->snap_rwsem);
+ ceph_update_snap_trace(mdsc, rinfo->snapblob,
+ rinfo->snapblob + rinfo->snapblob_len,
+ le32_to_cpu(head->op) == CEPH_MDS_OP_RMSNAP);
+ downgrade_write(&mdsc->snap_rwsem);
+ } else {
+ down_read(&mdsc->snap_rwsem);
+ }
+
+ /* insert trace into our cache */
+ mutex_lock(&req->r_fill_mutex);
+ err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
+ if (err == 0) {
+ if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
+ req->r_op == CEPH_MDS_OP_LSSNAP))
+ ceph_readdir_prepopulate(req, req->r_session);
+ ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
+ }
+ mutex_unlock(&req->r_fill_mutex);
+
+ up_read(&mdsc->snap_rwsem);
+out_err:
+ mutex_lock(&mdsc->mutex);
+ if (!req->r_aborted) {
+ if (err) {
+ req->r_err = err;
+ } else {
+ req->r_reply = msg;
+ ceph_msg_get(msg);
+ req->r_got_result = true;
+ }
+ } else {
+ dout("reply arrived after request %lld was aborted\n", tid);
+ }
+ mutex_unlock(&mdsc->mutex);
+
+ ceph_add_cap_releases(mdsc, req->r_session);
+ mutex_unlock(&session->s_mutex);
+
+ /* kick calling process */
+ complete_request(mdsc, req);
+out:
+ ceph_mdsc_put_request(req);
+ return;
+}
+
+
+
+/*
+ * handle mds notification that our request has been forwarded.
+ */
+static void handle_forward(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session,
+ struct ceph_msg *msg)
+{
+ struct ceph_mds_request *req;
+ u64 tid = le64_to_cpu(msg->hdr.tid);
+ u32 next_mds;
+ u32 fwd_seq;
+ int err = -EINVAL;
+ void *p = msg->front.iov_base;
+ void *end = p + msg->front.iov_len;
+
+ ceph_decode_need(&p, end, 2*sizeof(u32), bad);
+ next_mds = ceph_decode_32(&p);
+ fwd_seq = ceph_decode_32(&p);
+
+ mutex_lock(&mdsc->mutex);
+ req = __lookup_request(mdsc, tid);
+ if (!req) {
+ dout("forward tid %llu to mds%d - req dne\n", tid, next_mds);
+ goto out; /* dup reply? */
+ }
+
+ if (req->r_aborted) {
+ dout("forward tid %llu aborted, unregistering\n", tid);
+ __unregister_request(mdsc, req);
+ } else if (fwd_seq <= req->r_num_fwd) {
+ dout("forward tid %llu to mds%d - old seq %d <= %d\n",
+ tid, next_mds, req->r_num_fwd, fwd_seq);
+ } else {
+ /* resend. forward race not possible; mds would drop */
+ dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
+ BUG_ON(req->r_err);
+ BUG_ON(req->r_got_result);
+ req->r_num_fwd = fwd_seq;
+ req->r_resend_mds = next_mds;
+ put_request_session(req);
+ __do_request(mdsc, req);
+ }
+ ceph_mdsc_put_request(req);
+out:
+ mutex_unlock(&mdsc->mutex);
+ return;
+
+bad:
+ pr_err("mdsc_handle_forward decode error err=%d\n", err);
+}
+
+/*
+ * handle a mds session control message
+ */
+static void handle_session(struct ceph_mds_session *session,
+ struct ceph_msg *msg)
+{
+ struct ceph_mds_client *mdsc = session->s_mdsc;
+ u32 op;
+ u64 seq;
+ int mds = session->s_mds;
+ struct ceph_mds_session_head *h = msg->front.iov_base;
+ int wake = 0;
+
+ /* decode */
+ if (msg->front.iov_len != sizeof(*h))
+ goto bad;
+ op = le32_to_cpu(h->op);
+ seq = le64_to_cpu(h->seq);
+
+ mutex_lock(&mdsc->mutex);
+ if (op == CEPH_SESSION_CLOSE)
+ __unregister_session(mdsc, session);
+ /* FIXME: this ttl calculation is generous */
+ session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose;
+ mutex_unlock(&mdsc->mutex);
+
+ mutex_lock(&session->s_mutex);
+
+ dout("handle_session mds%d %s %p state %s seq %llu\n",
+ mds, ceph_session_op_name(op), session,
+ session_state_name(session->s_state), seq);
+
+ if (session->s_state == CEPH_MDS_SESSION_HUNG) {
+ session->s_state = CEPH_MDS_SESSION_OPEN;
+ pr_info("mds%d came back\n", session->s_mds);
+ }
+
+ switch (op) {
+ case CEPH_SESSION_OPEN:
+ if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
+ pr_info("mds%d reconnect success\n", session->s_mds);
+ session->s_state = CEPH_MDS_SESSION_OPEN;
+ renewed_caps(mdsc, session, 0);
+ wake = 1;
+ if (mdsc->stopping)
+ __close_session(mdsc, session);
+ break;
+
+ case CEPH_SESSION_RENEWCAPS:
+ if (session->s_renew_seq == seq)
+ renewed_caps(mdsc, session, 1);
+ break;
+
+ case CEPH_SESSION_CLOSE:
+ if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
+ pr_info("mds%d reconnect denied\n", session->s_mds);
+ remove_session_caps(session);
+ wake = 1; /* for good measure */
+ wake_up_all(&mdsc->session_close_wq);
+ kick_requests(mdsc, mds);
+ break;
+
+ case CEPH_SESSION_STALE:
+ pr_info("mds%d caps went stale, renewing\n",
+ session->s_mds);
+ spin_lock(&session->s_gen_ttl_lock);
+ session->s_cap_gen++;
+ session->s_cap_ttl = jiffies - 1;
+ spin_unlock(&session->s_gen_ttl_lock);
+ send_renew_caps(mdsc, session);
+ break;
+
+ case CEPH_SESSION_RECALL_STATE:
+ trim_caps(mdsc, session, le32_to_cpu(h->max_caps));
+ break;
+
+ case CEPH_SESSION_FLUSHMSG:
+ send_flushmsg_ack(mdsc, session, seq);
+ break;
+
+ default:
+ pr_err("mdsc_handle_session bad op %d mds%d\n", op, mds);
+ WARN_ON(1);
+ }
+
+ mutex_unlock(&session->s_mutex);
+ if (wake) {
+ mutex_lock(&mdsc->mutex);
+ __wake_requests(mdsc, &session->s_waiting);
+ mutex_unlock(&mdsc->mutex);
+ }
+ return;
+
+bad:
+ pr_err("mdsc_handle_session corrupt message mds%d len %d\n", mds,
+ (int)msg->front.iov_len);
+ ceph_msg_dump(msg);
+ return;
+}
+
+
+/*
+ * called under session->mutex.
+ */
+static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ struct ceph_mds_request *req, *nreq;
+ int err;
+
+ dout("replay_unsafe_requests mds%d\n", session->s_mds);
+
+ mutex_lock(&mdsc->mutex);
+ list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) {
+ err = __prepare_send_request(mdsc, req, session->s_mds);
+ if (!err) {
+ ceph_msg_get(req->r_request);
+ ceph_con_send(&session->s_con, req->r_request);
+ }
+ }
+ mutex_unlock(&mdsc->mutex);
+}
+
+/*
+ * Encode information about a cap for a reconnect with the MDS.
+ */
+static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
+ void *arg)
+{
+ union {
+ struct ceph_mds_cap_reconnect v2;
+ struct ceph_mds_cap_reconnect_v1 v1;
+ } rec;
+ size_t reclen;
+ struct ceph_inode_info *ci;
+ struct ceph_reconnect_state *recon_state = arg;
+ struct ceph_pagelist *pagelist = recon_state->pagelist;
+ char *path;
+ int pathlen, err;
+ u64 pathbase;
+ struct dentry *dentry;
+
+ ci = cap->ci;
+
+ dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
+ inode, ceph_vinop(inode), cap, cap->cap_id,
+ ceph_cap_string(cap->issued));
+ err = ceph_pagelist_encode_64(pagelist, ceph_ino(inode));
+ if (err)
+ return err;
+
+ dentry = d_find_alias(inode);
+ if (dentry) {
+ path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
+ goto out_dput;
+ }
+ } else {
+ path = NULL;
+ pathlen = 0;
+ }
+ err = ceph_pagelist_encode_string(pagelist, path, pathlen);
+ if (err)
+ goto out_free;
+
+ spin_lock(&ci->i_ceph_lock);
+ cap->seq = 0; /* reset cap seq */
+ cap->issue_seq = 0; /* and issue_seq */
+ cap->mseq = 0; /* and migrate_seq */
+ cap->cap_gen = cap->session->s_cap_gen;
+
+ if (recon_state->flock) {
+ rec.v2.cap_id = cpu_to_le64(cap->cap_id);
+ rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
+ rec.v2.issued = cpu_to_le32(cap->issued);
+ rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
+ rec.v2.pathbase = cpu_to_le64(pathbase);
+ rec.v2.flock_len = 0;
+ reclen = sizeof(rec.v2);
+ } else {
+ rec.v1.cap_id = cpu_to_le64(cap->cap_id);
+ rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
+ rec.v1.issued = cpu_to_le32(cap->issued);
+ rec.v1.size = cpu_to_le64(inode->i_size);
+ ceph_encode_timespec(&rec.v1.mtime, &inode->i_mtime);
+ ceph_encode_timespec(&rec.v1.atime, &inode->i_atime);
+ rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
+ rec.v1.pathbase = cpu_to_le64(pathbase);
+ reclen = sizeof(rec.v1);
+ }
+ spin_unlock(&ci->i_ceph_lock);
+
+ if (recon_state->flock) {
+ int num_fcntl_locks, num_flock_locks;
+ struct ceph_filelock *flocks;
+
+encode_again:
+ spin_lock(&inode->i_lock);
+ ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
+ spin_unlock(&inode->i_lock);
+ flocks = kmalloc((num_fcntl_locks+num_flock_locks) *
+ sizeof(struct ceph_filelock), GFP_NOFS);
+ if (!flocks) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+ spin_lock(&inode->i_lock);
+ err = ceph_encode_locks_to_buffer(inode, flocks,
+ num_fcntl_locks,
+ num_flock_locks);
+ spin_unlock(&inode->i_lock);
+ if (err) {
+ kfree(flocks);
+ if (err == -ENOSPC)
+ goto encode_again;
+ goto out_free;
+ }
+ /*
+ * number of encoded locks is stable, so copy to pagelist
+ */
+ rec.v2.flock_len = cpu_to_le32(2*sizeof(u32) +
+ (num_fcntl_locks+num_flock_locks) *
+ sizeof(struct ceph_filelock));
+ err = ceph_pagelist_append(pagelist, &rec, reclen);
+ if (!err)
+ err = ceph_locks_to_pagelist(flocks, pagelist,
+ num_fcntl_locks,
+ num_flock_locks);
+ kfree(flocks);
+ } else {
+ err = ceph_pagelist_append(pagelist, &rec, reclen);
+ }
+
+ recon_state->nr_caps++;
+out_free:
+ kfree(path);
+out_dput:
+ dput(dentry);
+ return err;
+}
+
+
+/*
+ * If an MDS fails and recovers, clients need to reconnect in order to
+ * reestablish shared state. This includes all caps issued through
+ * this session _and_ the snap_realm hierarchy. Because it's not
+ * clear which snap realms the mds cares about, we send everything we
+ * know about.. that ensures we'll then get any new info the
+ * recovering MDS might have.
+ *
+ * This is a relatively heavyweight operation, but it's rare.
+ *
+ * called with mdsc->mutex held.
+ */
+static void send_mds_reconnect(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ struct ceph_msg *reply;
+ struct rb_node *p;
+ int mds = session->s_mds;
+ int err = -ENOMEM;
+ int s_nr_caps;
+ struct ceph_pagelist *pagelist;
+ struct ceph_reconnect_state recon_state;
+
+ pr_info("mds%d reconnect start\n", mds);
+
+ pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
+ if (!pagelist)
+ goto fail_nopagelist;
+ ceph_pagelist_init(pagelist);
+
+ reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, GFP_NOFS, false);
+ if (!reply)
+ goto fail_nomsg;
+
+ mutex_lock(&session->s_mutex);
+ session->s_state = CEPH_MDS_SESSION_RECONNECTING;
+ session->s_seq = 0;
+
+ ceph_con_close(&session->s_con);
+ ceph_con_open(&session->s_con,
+ CEPH_ENTITY_TYPE_MDS, mds,
+ ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
+
+ /* replay unsafe requests */
+ replay_unsafe_requests(mdsc, session);
+
+ down_read(&mdsc->snap_rwsem);
+
+ dout("session %p state %s\n", session,
+ session_state_name(session->s_state));
+
+ spin_lock(&session->s_gen_ttl_lock);
+ session->s_cap_gen++;
+ spin_unlock(&session->s_gen_ttl_lock);
+
+ spin_lock(&session->s_cap_lock);
+ /*
+ * notify __ceph_remove_cap() that we are composing cap reconnect.
+ * If a cap get released before being added to the cap reconnect,
+ * __ceph_remove_cap() should skip queuing cap release.
+ */
+ session->s_cap_reconnect = 1;
+ /* drop old cap expires; we're about to reestablish that state */
+ discard_cap_releases(mdsc, session);
+ spin_unlock(&session->s_cap_lock);
+
+ /* traverse this session's caps */
+ s_nr_caps = session->s_nr_caps;
+ err = ceph_pagelist_encode_32(pagelist, s_nr_caps);
+ if (err)
+ goto fail;
+
+ recon_state.nr_caps = 0;
+ recon_state.pagelist = pagelist;
+ recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK;
+ err = iterate_session_caps(session, encode_caps_cb, &recon_state);
+ if (err < 0)
+ goto fail;
+
+ spin_lock(&session->s_cap_lock);
+ session->s_cap_reconnect = 0;
+ spin_unlock(&session->s_cap_lock);
+
+ /*
+ * snaprealms. we provide mds with the ino, seq (version), and
+ * parent for all of our realms. If the mds has any newer info,
+ * it will tell us.
+ */
+ for (p = rb_first(&mdsc->snap_realms); p; p = rb_next(p)) {
+ struct ceph_snap_realm *realm =
+ rb_entry(p, struct ceph_snap_realm, node);
+ struct ceph_mds_snaprealm_reconnect sr_rec;
+
+ dout(" adding snap realm %llx seq %lld parent %llx\n",
+ realm->ino, realm->seq, realm->parent_ino);
+ sr_rec.ino = cpu_to_le64(realm->ino);
+ sr_rec.seq = cpu_to_le64(realm->seq);
+ sr_rec.parent = cpu_to_le64(realm->parent_ino);
+ err = ceph_pagelist_append(pagelist, &sr_rec, sizeof(sr_rec));
+ if (err)
+ goto fail;
+ }
+
+ if (recon_state.flock)
+ reply->hdr.version = cpu_to_le16(2);
+
+ /* raced with cap release? */
+ if (s_nr_caps != recon_state.nr_caps) {
+ struct page *page = list_first_entry(&pagelist->head,
+ struct page, lru);
+ __le32 *addr = kmap_atomic(page);
+ *addr = cpu_to_le32(recon_state.nr_caps);
+ kunmap_atomic(addr);
+ }
+
+ reply->hdr.data_len = cpu_to_le32(pagelist->length);
+ ceph_msg_data_add_pagelist(reply, pagelist);
+ ceph_con_send(&session->s_con, reply);
+
+ mutex_unlock(&session->s_mutex);
+
+ mutex_lock(&mdsc->mutex);
+ __wake_requests(mdsc, &session->s_waiting);
+ mutex_unlock(&mdsc->mutex);
+
+ up_read(&mdsc->snap_rwsem);
+ return;
+
+fail:
+ ceph_msg_put(reply);
+ up_read(&mdsc->snap_rwsem);
+ mutex_unlock(&session->s_mutex);
+fail_nomsg:
+ ceph_pagelist_release(pagelist);
+ kfree(pagelist);
+fail_nopagelist:
+ pr_err("error %d preparing reconnect for mds%d\n", err, mds);
+ return;
+}
+
+
+/*
+ * compare old and new mdsmaps, kicking requests
+ * and closing out old connections as necessary
+ *
+ * called under mdsc->mutex.
+ */
+static void check_new_map(struct ceph_mds_client *mdsc,
+ struct ceph_mdsmap *newmap,
+ struct ceph_mdsmap *oldmap)
+{
+ int i;
+ int oldstate, newstate;
+ struct ceph_mds_session *s;
+
+ dout("check_new_map new %u old %u\n",
+ newmap->m_epoch, oldmap->m_epoch);
+
+ for (i = 0; i < oldmap->m_max_mds && i < mdsc->max_sessions; i++) {
+ if (mdsc->sessions[i] == NULL)
+ continue;
+ s = mdsc->sessions[i];
+ oldstate = ceph_mdsmap_get_state(oldmap, i);
+ newstate = ceph_mdsmap_get_state(newmap, i);
+
+ dout("check_new_map mds%d state %s%s -> %s%s (session %s)\n",
+ i, ceph_mds_state_name(oldstate),
+ ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "",
+ ceph_mds_state_name(newstate),
+ ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
+ session_state_name(s->s_state));
+
+ if (i >= newmap->m_max_mds ||
+ memcmp(ceph_mdsmap_get_addr(oldmap, i),
+ ceph_mdsmap_get_addr(newmap, i),
+ sizeof(struct ceph_entity_addr))) {
+ if (s->s_state == CEPH_MDS_SESSION_OPENING) {
+ /* the session never opened, just close it
+ * out now */
+ __wake_requests(mdsc, &s->s_waiting);
+ __unregister_session(mdsc, s);
+ } else {
+ /* just close it */
+ mutex_unlock(&mdsc->mutex);
+ mutex_lock(&s->s_mutex);
+ mutex_lock(&mdsc->mutex);
+ ceph_con_close(&s->s_con);
+ mutex_unlock(&s->s_mutex);
+ s->s_state = CEPH_MDS_SESSION_RESTARTING;
+ }
+
+ /* kick any requests waiting on the recovering mds */
+ kick_requests(mdsc, i);
+ } else if (oldstate == newstate) {
+ continue; /* nothing new with this mds */
+ }
+
+ /*
+ * send reconnect?
+ */
+ if (s->s_state == CEPH_MDS_SESSION_RESTARTING &&
+ newstate >= CEPH_MDS_STATE_RECONNECT) {
+ mutex_unlock(&mdsc->mutex);
+ send_mds_reconnect(mdsc, s);
+ mutex_lock(&mdsc->mutex);
+ }
+
+ /*
+ * kick request on any mds that has gone active.
+ */
+ if (oldstate < CEPH_MDS_STATE_ACTIVE &&
+ newstate >= CEPH_MDS_STATE_ACTIVE) {
+ if (oldstate != CEPH_MDS_STATE_CREATING &&
+ oldstate != CEPH_MDS_STATE_STARTING)
+ pr_info("mds%d recovery completed\n", s->s_mds);
+ kick_requests(mdsc, i);
+ ceph_kick_flushing_caps(mdsc, s);
+ wake_up_session_caps(s, 1);
+ }
+ }
+
+ for (i = 0; i < newmap->m_max_mds && i < mdsc->max_sessions; i++) {
+ s = mdsc->sessions[i];
+ if (!s)
+ continue;
+ if (!ceph_mdsmap_is_laggy(newmap, i))
+ continue;
+ if (s->s_state == CEPH_MDS_SESSION_OPEN ||
+ s->s_state == CEPH_MDS_SESSION_HUNG ||
+ s->s_state == CEPH_MDS_SESSION_CLOSING) {
+ dout(" connecting to export targets of laggy mds%d\n",
+ i);
+ __open_export_target_sessions(mdsc, s);
+ }
+ }
+}
+
+
+
+/*
+ * leases
+ */
+
+/*
+ * caller must hold session s_mutex, dentry->d_lock
+ */
+void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry)
+{
+ struct ceph_dentry_info *di = ceph_dentry(dentry);
+
+ ceph_put_mds_session(di->lease_session);
+ di->lease_session = NULL;
+}
+
+static void handle_lease(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session,
+ struct ceph_msg *msg)
+{
+ struct super_block *sb = mdsc->fsc->sb;
+ struct inode *inode;
+ struct dentry *parent, *dentry;
+ struct ceph_dentry_info *di;
+ int mds = session->s_mds;
+ struct ceph_mds_lease *h = msg->front.iov_base;
+ u32 seq;
+ struct ceph_vino vino;
+ struct qstr dname;
+ int release = 0;
+
+ dout("handle_lease from mds%d\n", mds);
+
+ /* decode */
+ if (msg->front.iov_len < sizeof(*h) + sizeof(u32))
+ goto bad;
+ vino.ino = le64_to_cpu(h->ino);
+ vino.snap = CEPH_NOSNAP;
+ seq = le32_to_cpu(h->seq);
+ dname.name = (void *)h + sizeof(*h) + sizeof(u32);
+ dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32);
+ if (dname.len != get_unaligned_le32(h+1))
+ goto bad;
+
+ mutex_lock(&session->s_mutex);
+ session->s_seq++;
+
+ /* lookup inode */
+ inode = ceph_find_inode(sb, vino);
+ dout("handle_lease %s, ino %llx %p %.*s\n",
+ ceph_lease_op_name(h->action), vino.ino, inode,
+ dname.len, dname.name);
+ if (inode == NULL) {
+ dout("handle_lease no inode %llx\n", vino.ino);
+ goto release;
+ }
+
+ /* dentry */
+ parent = d_find_alias(inode);
+ if (!parent) {
+ dout("no parent dentry on inode %p\n", inode);
+ WARN_ON(1);
+ goto release; /* hrm... */
+ }
+ dname.hash = full_name_hash(dname.name, dname.len);
+ dentry = d_lookup(parent, &dname);
+ dput(parent);
+ if (!dentry)
+ goto release;
+
+ spin_lock(&dentry->d_lock);
+ di = ceph_dentry(dentry);
+ switch (h->action) {
+ case CEPH_MDS_LEASE_REVOKE:
+ if (di->lease_session == session) {
+ if (ceph_seq_cmp(di->lease_seq, seq) > 0)
+ h->seq = cpu_to_le32(di->lease_seq);
+ __ceph_mdsc_drop_dentry_lease(dentry);
+ }
+ release = 1;
+ break;
+
+ case CEPH_MDS_LEASE_RENEW:
+ if (di->lease_session == session &&
+ di->lease_gen == session->s_cap_gen &&
+ di->lease_renew_from &&
+ di->lease_renew_after == 0) {
+ unsigned long duration =
+ le32_to_cpu(h->duration_ms) * HZ / 1000;
+
+ di->lease_seq = seq;
+ dentry->d_time = di->lease_renew_from + duration;
+ di->lease_renew_after = di->lease_renew_from +
+ (duration >> 1);
+ di->lease_renew_from = 0;
+ }
+ break;
+ }
+ spin_unlock(&dentry->d_lock);
+ dput(dentry);
+
+ if (!release)
+ goto out;
+
+release:
+ /* let's just reuse the same message */
+ h->action = CEPH_MDS_LEASE_REVOKE_ACK;
+ ceph_msg_get(msg);
+ ceph_con_send(&session->s_con, msg);
+
+out:
+ iput(inode);
+ mutex_unlock(&session->s_mutex);
+ return;
+
+bad:
+ pr_err("corrupt lease message\n");
+ ceph_msg_dump(msg);
+}
+
+void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
+ struct inode *inode,
+ struct dentry *dentry, char action,
+ u32 seq)
+{
+ struct ceph_msg *msg;
+ struct ceph_mds_lease *lease;
+ int len = sizeof(*lease) + sizeof(u32);
+ int dnamelen = 0;
+
+ dout("lease_send_msg inode %p dentry %p %s to mds%d\n",
+ inode, dentry, ceph_lease_op_name(action), session->s_mds);
+ dnamelen = dentry->d_name.len;
+ len += dnamelen;
+
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS, false);
+ if (!msg)
+ return;
+ lease = msg->front.iov_base;
+ lease->action = action;
+ lease->ino = cpu_to_le64(ceph_vino(inode).ino);
+ lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap);
+ lease->seq = cpu_to_le32(seq);
+ put_unaligned_le32(dnamelen, lease + 1);
+ memcpy((void *)(lease + 1) + 4, dentry->d_name.name, dnamelen);
+
+ /*
+ * if this is a preemptive lease RELEASE, no need to
+ * flush request stream, since the actual request will
+ * soon follow.
+ */
+ msg->more_to_follow = (action == CEPH_MDS_LEASE_RELEASE);
+
+ ceph_con_send(&session->s_con, msg);
+}
+
+/*
+ * Preemptively release a lease we expect to invalidate anyway.
+ * Pass @inode always, @dentry is optional.
+ */
+void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode,
+ struct dentry *dentry)
+{
+ struct ceph_dentry_info *di;
+ struct ceph_mds_session *session;
+ u32 seq;
+
+ BUG_ON(inode == NULL);
+ BUG_ON(dentry == NULL);
+
+ /* is dentry lease valid? */
+ spin_lock(&dentry->d_lock);
+ di = ceph_dentry(dentry);
+ if (!di || !di->lease_session ||
+ di->lease_session->s_mds < 0 ||
+ di->lease_gen != di->lease_session->s_cap_gen ||
+ !time_before(jiffies, dentry->d_time)) {
+ dout("lease_release inode %p dentry %p -- "
+ "no lease\n",
+ inode, dentry);
+ spin_unlock(&dentry->d_lock);
+ return;
+ }
+
+ /* we do have a lease on this dentry; note mds and seq */
+ session = ceph_get_mds_session(di->lease_session);
+ seq = di->lease_seq;
+ __ceph_mdsc_drop_dentry_lease(dentry);
+ spin_unlock(&dentry->d_lock);
+
+ dout("lease_release inode %p dentry %p to mds%d\n",
+ inode, dentry, session->s_mds);
+ ceph_mdsc_lease_send_msg(session, inode, dentry,
+ CEPH_MDS_LEASE_RELEASE, seq);
+ ceph_put_mds_session(session);
+}
+
+/*
+ * drop all leases (and dentry refs) in preparation for umount
+ */
+static void drop_leases(struct ceph_mds_client *mdsc)
+{
+ int i;
+
+ dout("drop_leases\n");
+ mutex_lock(&mdsc->mutex);
+ for (i = 0; i < mdsc->max_sessions; i++) {
+ struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
+ if (!s)
+ continue;
+ mutex_unlock(&mdsc->mutex);
+ mutex_lock(&s->s_mutex);
+ mutex_unlock(&s->s_mutex);
+ ceph_put_mds_session(s);
+ mutex_lock(&mdsc->mutex);
+ }
+ mutex_unlock(&mdsc->mutex);
+}
+
+
+
+/*
+ * delayed work -- periodically trim expired leases, renew caps with mds
+ */
+static void schedule_delayed(struct ceph_mds_client *mdsc)
+{
+ int delay = 5;
+ unsigned hz = round_jiffies_relative(HZ * delay);
+ schedule_delayed_work(&mdsc->delayed_work, hz);
+}
+
+static void delayed_work(struct work_struct *work)
+{
+ int i;
+ struct ceph_mds_client *mdsc =
+ container_of(work, struct ceph_mds_client, delayed_work.work);
+ int renew_interval;
+ int renew_caps;
+
+ dout("mdsc delayed_work\n");
+ ceph_check_delayed_caps(mdsc);
+
+ mutex_lock(&mdsc->mutex);
+ renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
+ renew_caps = time_after_eq(jiffies, HZ*renew_interval +
+ mdsc->last_renew_caps);
+ if (renew_caps)
+ mdsc->last_renew_caps = jiffies;
+
+ for (i = 0; i < mdsc->max_sessions; i++) {
+ struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
+ if (s == NULL)
+ continue;
+ if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
+ dout("resending session close request for mds%d\n",
+ s->s_mds);
+ request_close_session(mdsc, s);
+ ceph_put_mds_session(s);
+ continue;
+ }
+ if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
+ if (s->s_state == CEPH_MDS_SESSION_OPEN) {
+ s->s_state = CEPH_MDS_SESSION_HUNG;
+ pr_info("mds%d hung\n", s->s_mds);
+ }
+ }
+ if (s->s_state < CEPH_MDS_SESSION_OPEN) {
+ /* this mds is failed or recovering, just wait */
+ ceph_put_mds_session(s);
+ continue;
+ }
+ mutex_unlock(&mdsc->mutex);
+
+ mutex_lock(&s->s_mutex);
+ if (renew_caps)
+ send_renew_caps(mdsc, s);
+ else
+ ceph_con_keepalive(&s->s_con);
+ ceph_add_cap_releases(mdsc, s);
+ if (s->s_state == CEPH_MDS_SESSION_OPEN ||
+ s->s_state == CEPH_MDS_SESSION_HUNG)
+ ceph_send_cap_releases(mdsc, s);
+ mutex_unlock(&s->s_mutex);
+ ceph_put_mds_session(s);
+
+ mutex_lock(&mdsc->mutex);
+ }
+ mutex_unlock(&mdsc->mutex);
+
+ schedule_delayed(mdsc);
+}
+
+int ceph_mdsc_init(struct ceph_fs_client *fsc)
+
+{
+ struct ceph_mds_client *mdsc;
+
+ mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS);
+ if (!mdsc)
+ return -ENOMEM;
+ mdsc->fsc = fsc;
+ fsc->mdsc = mdsc;
+ mutex_init(&mdsc->mutex);
+ mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
+ if (mdsc->mdsmap == NULL) {
+ kfree(mdsc);
+ return -ENOMEM;
+ }
+
+ init_completion(&mdsc->safe_umount_waiters);
+ init_waitqueue_head(&mdsc->session_close_wq);
+ INIT_LIST_HEAD(&mdsc->waiting_for_map);
+ mdsc->sessions = NULL;
+ mdsc->max_sessions = 0;
+ mdsc->stopping = 0;
+ init_rwsem(&mdsc->snap_rwsem);
+ mdsc->snap_realms = RB_ROOT;
+ INIT_LIST_HEAD(&mdsc->snap_empty);
+ spin_lock_init(&mdsc->snap_empty_lock);
+ mdsc->last_tid = 0;
+ mdsc->request_tree = RB_ROOT;
+ INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work);
+ mdsc->last_renew_caps = jiffies;
+ INIT_LIST_HEAD(&mdsc->cap_delay_list);
+ spin_lock_init(&mdsc->cap_delay_lock);
+ INIT_LIST_HEAD(&mdsc->snap_flush_list);
+ spin_lock_init(&mdsc->snap_flush_lock);
+ mdsc->cap_flush_seq = 0;
+ INIT_LIST_HEAD(&mdsc->cap_dirty);
+ INIT_LIST_HEAD(&mdsc->cap_dirty_migrating);
+ mdsc->num_cap_flushing = 0;
+ spin_lock_init(&mdsc->cap_dirty_lock);
+ init_waitqueue_head(&mdsc->cap_flushing_wq);
+ spin_lock_init(&mdsc->dentry_lru_lock);
+ INIT_LIST_HEAD(&mdsc->dentry_lru);
+
+ ceph_caps_init(mdsc);
+ ceph_adjust_min_caps(mdsc, fsc->min_caps);
+
+ return 0;
+}
+
+/*
+ * Wait for safe replies on open mds requests. If we time out, drop
+ * all requests from the tree to avoid dangling dentry refs.
+ */
+static void wait_requests(struct ceph_mds_client *mdsc)
+{
+ struct ceph_mds_request *req;
+ struct ceph_fs_client *fsc = mdsc->fsc;
+
+ mutex_lock(&mdsc->mutex);
+ if (__get_oldest_req(mdsc)) {
+ mutex_unlock(&mdsc->mutex);
+
+ dout("wait_requests waiting for requests\n");
+ wait_for_completion_timeout(&mdsc->safe_umount_waiters,
+ fsc->client->options->mount_timeout * HZ);
+
+ /* tear down remaining requests */
+ mutex_lock(&mdsc->mutex);
+ while ((req = __get_oldest_req(mdsc))) {
+ dout("wait_requests timed out on tid %llu\n",
+ req->r_tid);
+ __unregister_request(mdsc, req);
+ }
+ }
+ mutex_unlock(&mdsc->mutex);
+ dout("wait_requests done\n");
+}
+
+/*
+ * called before mount is ro, and before dentries are torn down.
+ * (hmm, does this still race with new lookups?)
+ */
+void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
+{
+ dout("pre_umount\n");
+ mdsc->stopping = 1;
+
+ drop_leases(mdsc);
+ ceph_flush_dirty_caps(mdsc);
+ wait_requests(mdsc);
+
+ /*
+ * wait for reply handlers to drop their request refs and
+ * their inode/dcache refs
+ */
+ ceph_msgr_flush();
+}
+
+/*
+ * wait for all write mds requests to flush.
+ */
+static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
+{
+ struct ceph_mds_request *req = NULL, *nextreq;
+ struct rb_node *n;
+
+ mutex_lock(&mdsc->mutex);
+ dout("wait_unsafe_requests want %lld\n", want_tid);
+restart:
+ req = __get_oldest_req(mdsc);
+ while (req && req->r_tid <= want_tid) {
+ /* find next request */
+ n = rb_next(&req->r_node);
+ if (n)
+ nextreq = rb_entry(n, struct ceph_mds_request, r_node);
+ else
+ nextreq = NULL;
+ if ((req->r_op & CEPH_MDS_OP_WRITE)) {
+ /* write op */
+ ceph_mdsc_get_request(req);
+ if (nextreq)
+ ceph_mdsc_get_request(nextreq);
+ mutex_unlock(&mdsc->mutex);
+ dout("wait_unsafe_requests wait on %llu (want %llu)\n",
+ req->r_tid, want_tid);
+ wait_for_completion(&req->r_safe_completion);
+ mutex_lock(&mdsc->mutex);
+ ceph_mdsc_put_request(req);
+ if (!nextreq)
+ break; /* next dne before, so we're done! */
+ if (RB_EMPTY_NODE(&nextreq->r_node)) {
+ /* next request was removed from tree */
+ ceph_mdsc_put_request(nextreq);
+ goto restart;
+ }
+ ceph_mdsc_put_request(nextreq); /* won't go away */
+ }
+ req = nextreq;
+ }
+ mutex_unlock(&mdsc->mutex);
+ dout("wait_unsafe_requests done\n");
+}
+
+void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
+{
+ u64 want_tid, want_flush;
+
+ if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
+ return;
+
+ dout("sync\n");
+ mutex_lock(&mdsc->mutex);
+ want_tid = mdsc->last_tid;
+ want_flush = mdsc->cap_flush_seq;
+ mutex_unlock(&mdsc->mutex);
+ dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush);
+
+ ceph_flush_dirty_caps(mdsc);
+
+ wait_unsafe_requests(mdsc, want_tid);
+ wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush));
+}
+
+/*
+ * true if all sessions are closed, or we force unmount
+ */
+static bool done_closing_sessions(struct ceph_mds_client *mdsc)
+{
+ int i, n = 0;
+
+ if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
+ return true;
+
+ mutex_lock(&mdsc->mutex);
+ for (i = 0; i < mdsc->max_sessions; i++)
+ if (mdsc->sessions[i])
+ n++;
+ mutex_unlock(&mdsc->mutex);
+ return n == 0;
+}
+
+/*
+ * called after sb is ro.
+ */
+void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
+{
+ struct ceph_mds_session *session;
+ int i;
+ struct ceph_fs_client *fsc = mdsc->fsc;
+ unsigned long timeout = fsc->client->options->mount_timeout * HZ;
+
+ dout("close_sessions\n");
+
+ /* close sessions */
+ mutex_lock(&mdsc->mutex);
+ for (i = 0; i < mdsc->max_sessions; i++) {
+ session = __ceph_lookup_mds_session(mdsc, i);
+ if (!session)
+ continue;
+ mutex_unlock(&mdsc->mutex);
+ mutex_lock(&session->s_mutex);
+ __close_session(mdsc, session);
+ mutex_unlock(&session->s_mutex);
+ ceph_put_mds_session(session);
+ mutex_lock(&mdsc->mutex);
+ }
+ mutex_unlock(&mdsc->mutex);
+
+ dout("waiting for sessions to close\n");
+ wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc),
+ timeout);
+
+ /* tear down remaining sessions */
+ mutex_lock(&mdsc->mutex);
+ for (i = 0; i < mdsc->max_sessions; i++) {
+ if (mdsc->sessions[i]) {
+ session = get_session(mdsc->sessions[i]);
+ __unregister_session(mdsc, session);
+ mutex_unlock(&mdsc->mutex);
+ mutex_lock(&session->s_mutex);
+ remove_session_caps(session);
+ mutex_unlock(&session->s_mutex);
+ ceph_put_mds_session(session);
+ mutex_lock(&mdsc->mutex);
+ }
+ }
+ WARN_ON(!list_empty(&mdsc->cap_delay_list));
+ mutex_unlock(&mdsc->mutex);
+
+ ceph_cleanup_empty_realms(mdsc);
+
+ cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
+
+ dout("stopped\n");
+}
+
+static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
+{
+ dout("stop\n");
+ cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
+ if (mdsc->mdsmap)
+ ceph_mdsmap_destroy(mdsc->mdsmap);
+ kfree(mdsc->sessions);
+ ceph_caps_finalize(mdsc);
+}
+
+void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
+{
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+
+ dout("mdsc_destroy %p\n", mdsc);
+ ceph_mdsc_stop(mdsc);
+
+ /* flush out any connection work with references to us */
+ ceph_msgr_flush();
+
+ fsc->mdsc = NULL;
+ kfree(mdsc);
+ dout("mdsc_destroy %p done\n", mdsc);
+}
+
+
+/*
+ * handle mds map update.
+ */
+void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
+{
+ u32 epoch;
+ u32 maplen;
+ void *p = msg->front.iov_base;
+ void *end = p + msg->front.iov_len;
+ struct ceph_mdsmap *newmap, *oldmap;
+ struct ceph_fsid fsid;
+ int err = -EINVAL;
+
+ ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad);
+ ceph_decode_copy(&p, &fsid, sizeof(fsid));
+ if (ceph_check_fsid(mdsc->fsc->client, &fsid) < 0)
+ return;
+ epoch = ceph_decode_32(&p);
+ maplen = ceph_decode_32(&p);
+ dout("handle_map epoch %u len %d\n", epoch, (int)maplen);
+
+ /* do we need it? */
+ ceph_monc_got_mdsmap(&mdsc->fsc->client->monc, epoch);
+ mutex_lock(&mdsc->mutex);
+ if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) {
+ dout("handle_map epoch %u <= our %u\n",
+ epoch, mdsc->mdsmap->m_epoch);
+ mutex_unlock(&mdsc->mutex);
+ return;
+ }
+
+ newmap = ceph_mdsmap_decode(&p, end);
+ if (IS_ERR(newmap)) {
+ err = PTR_ERR(newmap);
+ goto bad_unlock;
+ }
+
+ /* swap into place */
+ if (mdsc->mdsmap) {
+ oldmap = mdsc->mdsmap;
+ mdsc->mdsmap = newmap;
+ check_new_map(mdsc, newmap, oldmap);
+ ceph_mdsmap_destroy(oldmap);
+ } else {
+ mdsc->mdsmap = newmap; /* first mds map */
+ }
+ mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
+
+ __wake_requests(mdsc, &mdsc->waiting_for_map);
+
+ mutex_unlock(&mdsc->mutex);
+ schedule_delayed(mdsc);
+ return;
+
+bad_unlock:
+ mutex_unlock(&mdsc->mutex);
+bad:
+ pr_err("error decoding mdsmap %d\n", err);
+ return;
+}
+
+static struct ceph_connection *con_get(struct ceph_connection *con)
+{
+ struct ceph_mds_session *s = con->private;
+
+ if (get_session(s)) {
+ dout("mdsc con_get %p ok (%d)\n", s, atomic_read(&s->s_ref));
+ return con;
+ }
+ dout("mdsc con_get %p FAIL\n", s);
+ return NULL;
+}
+
+static void con_put(struct ceph_connection *con)
+{
+ struct ceph_mds_session *s = con->private;
+
+ dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref) - 1);
+ ceph_put_mds_session(s);
+}
+
+/*
+ * if the client is unresponsive for long enough, the mds will kill
+ * the session entirely.
+ */
+static void peer_reset(struct ceph_connection *con)
+{
+ struct ceph_mds_session *s = con->private;
+ struct ceph_mds_client *mdsc = s->s_mdsc;
+
+ pr_warning("mds%d closed our session\n", s->s_mds);
+ send_mds_reconnect(mdsc, s);
+}
+
+static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
+{
+ struct ceph_mds_session *s = con->private;
+ struct ceph_mds_client *mdsc = s->s_mdsc;
+ int type = le16_to_cpu(msg->hdr.type);
+
+ mutex_lock(&mdsc->mutex);
+ if (__verify_registered_session(mdsc, s) < 0) {
+ mutex_unlock(&mdsc->mutex);
+ goto out;
+ }
+ mutex_unlock(&mdsc->mutex);
+
+ switch (type) {
+ case CEPH_MSG_MDS_MAP:
+ ceph_mdsc_handle_map(mdsc, msg);
+ break;
+ case CEPH_MSG_CLIENT_SESSION:
+ handle_session(s, msg);
+ break;
+ case CEPH_MSG_CLIENT_REPLY:
+ handle_reply(s, msg);
+ break;
+ case CEPH_MSG_CLIENT_REQUEST_FORWARD:
+ handle_forward(mdsc, s, msg);
+ break;
+ case CEPH_MSG_CLIENT_CAPS:
+ ceph_handle_caps(s, msg);
+ break;
+ case CEPH_MSG_CLIENT_SNAP:
+ ceph_handle_snap(mdsc, s, msg);
+ break;
+ case CEPH_MSG_CLIENT_LEASE:
+ handle_lease(mdsc, s, msg);
+ break;
+
+ default:
+ pr_err("received unknown message type %d %s\n", type,
+ ceph_msg_type_name(type));
+ }
+out:
+ ceph_msg_put(msg);
+}
+
+/*
+ * authentication
+ */
+
+/*
+ * Note: returned pointer is the address of a structure that's
+ * managed separately. Caller must *not* attempt to free it.
+ */
+static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
+ int *proto, int force_new)
+{
+ struct ceph_mds_session *s = con->private;
+ struct ceph_mds_client *mdsc = s->s_mdsc;
+ struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
+ struct ceph_auth_handshake *auth = &s->s_auth;
+
+ if (force_new && auth->authorizer) {
+ ceph_auth_destroy_authorizer(ac, auth->authorizer);
+ auth->authorizer = NULL;
+ }
+ if (!auth->authorizer) {
+ int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
+ auth);
+ if (ret)
+ return ERR_PTR(ret);
+ } else {
+ int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
+ auth);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+ *proto = ac->protocol;
+
+ return auth;
+}
+
+
+static int verify_authorizer_reply(struct ceph_connection *con, int len)
+{
+ struct ceph_mds_session *s = con->private;
+ struct ceph_mds_client *mdsc = s->s_mdsc;
+ struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
+
+ return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer, len);
+}
+
+static int invalidate_authorizer(struct ceph_connection *con)
+{
+ struct ceph_mds_session *s = con->private;
+ struct ceph_mds_client *mdsc = s->s_mdsc;
+ struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
+
+ ceph_auth_invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS);
+
+ return ceph_monc_validate_auth(&mdsc->fsc->client->monc);
+}
+
+static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con,
+ struct ceph_msg_header *hdr, int *skip)
+{
+ struct ceph_msg *msg;
+ int type = (int) le16_to_cpu(hdr->type);
+ int front_len = (int) le32_to_cpu(hdr->front_len);
+
+ if (con->in_msg)
+ return con->in_msg;
+
+ *skip = 0;
+ msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
+ if (!msg) {
+ pr_err("unable to allocate msg type %d len %d\n",
+ type, front_len);
+ return NULL;
+ }
+
+ return msg;
+}
+
+static const struct ceph_connection_operations mds_con_ops = {
+ .get = con_get,
+ .put = con_put,
+ .dispatch = dispatch,
+ .get_authorizer = get_authorizer,
+ .verify_authorizer_reply = verify_authorizer_reply,
+ .invalidate_authorizer = invalidate_authorizer,
+ .peer_reset = peer_reset,
+ .alloc_msg = mds_alloc_msg,
+};
+
+/* eof */
diff --git a/ceph/mds_client.h b/ceph/mds_client.h
new file mode 100644
index 0000000..e90cfcc
--- /dev/null
+++ b/ceph/mds_client.h
@@ -0,0 +1,393 @@
+#ifndef _FS_CEPH_MDS_CLIENT_H
+#define _FS_CEPH_MDS_CLIENT_H
+
+#include <linux/completion.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+
+#include <linux/ceph/types.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/mdsmap.h>
+#include <linux/ceph/auth.h>
+
+/*
+ * Some lock dependencies:
+ *
+ * session->s_mutex
+ * mdsc->mutex
+ *
+ * mdsc->snap_rwsem
+ *
+ * ci->i_ceph_lock
+ * mdsc->snap_flush_lock
+ * mdsc->cap_delay_lock
+ *
+ */
+
+struct ceph_fs_client;
+struct ceph_cap;
+
+/*
+ * parsed info about a single inode. pointers are into the encoded
+ * on-wire structures within the mds reply message payload.
+ */
+struct ceph_mds_reply_info_in {
+ struct ceph_mds_reply_inode *in;
+ struct ceph_dir_layout dir_layout;
+ u32 symlink_len;
+ char *symlink;
+ u32 xattr_len;
+ char *xattr_data;
+};
+
+/*
+ * parsed info about an mds reply, including information about
+ * either: 1) the target inode and/or its parent directory and dentry,
+ * and directory contents (for readdir results), or
+ * 2) the file range lock info (for fcntl F_GETLK results).
+ */
+struct ceph_mds_reply_info_parsed {
+ struct ceph_mds_reply_head *head;
+
+ /* trace */
+ struct ceph_mds_reply_info_in diri, targeti;
+ struct ceph_mds_reply_dirfrag *dirfrag;
+ char *dname;
+ u32 dname_len;
+ struct ceph_mds_reply_lease *dlease;
+
+ /* extra */
+ union {
+ /* for fcntl F_GETLK results */
+ struct ceph_filelock *filelock_reply;
+
+ /* for readdir results */
+ struct {
+ struct ceph_mds_reply_dirfrag *dir_dir;
+ size_t dir_buf_size;
+ int dir_nr;
+ char **dir_dname;
+ u32 *dir_dname_len;
+ struct ceph_mds_reply_lease **dir_dlease;
+ struct ceph_mds_reply_info_in *dir_in;
+ u8 dir_complete, dir_end;
+ };
+
+ /* for create results */
+ struct {
+ bool has_create_ino;
+ u64 ino;
+ };
+ };
+
+ /* encoded blob describing snapshot contexts for certain
+ operations (e.g., open) */
+ void *snapblob;
+ int snapblob_len;
+};
+
+
+/*
+ * cap releases are batched and sent to the MDS en masse.
+ */
+#define CEPH_CAPS_PER_RELEASE ((PAGE_CACHE_SIZE - \
+ sizeof(struct ceph_mds_cap_release)) / \
+ sizeof(struct ceph_mds_cap_item))
+
+
+/*
+ * state associated with each MDS<->client session
+ */
+enum {
+ CEPH_MDS_SESSION_NEW = 1,
+ CEPH_MDS_SESSION_OPENING = 2,
+ CEPH_MDS_SESSION_OPEN = 3,
+ CEPH_MDS_SESSION_HUNG = 4,
+ CEPH_MDS_SESSION_CLOSING = 5,
+ CEPH_MDS_SESSION_RESTARTING = 6,
+ CEPH_MDS_SESSION_RECONNECTING = 7,
+};
+
+struct ceph_mds_session {
+ struct ceph_mds_client *s_mdsc;
+ int s_mds;
+ int s_state;
+ unsigned long s_ttl; /* time until mds kills us */
+ u64 s_seq; /* incoming msg seq # */
+ struct mutex s_mutex; /* serialize session messages */
+
+ struct ceph_connection s_con;
+
+ struct ceph_auth_handshake s_auth;
+
+ /* protected by s_gen_ttl_lock */
+ spinlock_t s_gen_ttl_lock;
+ u32 s_cap_gen; /* inc each time we get mds stale msg */
+ unsigned long s_cap_ttl; /* when session caps expire */
+
+ /* protected by s_cap_lock */
+ spinlock_t s_cap_lock;
+ struct list_head s_caps; /* all caps issued by this session */
+ int s_nr_caps, s_trim_caps;
+ int s_num_cap_releases;
+ int s_cap_reconnect;
+ struct list_head s_cap_releases; /* waiting cap_release messages */
+ struct list_head s_cap_releases_done; /* ready to send */
+ struct ceph_cap *s_cap_iterator;
+
+ /* protected by mutex */
+ struct list_head s_cap_flushing; /* inodes w/ flushing caps */
+ struct list_head s_cap_snaps_flushing;
+ unsigned long s_renew_requested; /* last time we sent a renew req */
+ u64 s_renew_seq;
+
+ atomic_t s_ref;
+ struct list_head s_waiting; /* waiting requests */
+ struct list_head s_unsafe; /* unsafe requests */
+};
+
+/*
+ * modes of choosing which MDS to send a request to
+ */
+enum {
+ USE_ANY_MDS,
+ USE_RANDOM_MDS,
+ USE_AUTH_MDS, /* prefer authoritative mds for this metadata item */
+};
+
+struct ceph_mds_request;
+struct ceph_mds_client;
+
+/*
+ * request completion callback
+ */
+typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req);
+
+/*
+ * an in-flight mds request
+ */
+struct ceph_mds_request {
+ u64 r_tid; /* transaction id */
+ struct rb_node r_node;
+ struct ceph_mds_client *r_mdsc;
+
+ int r_op; /* mds op code */
+
+ /* operation on what? */
+ struct inode *r_inode; /* arg1 */
+ struct dentry *r_dentry; /* arg1 */
+ struct dentry *r_old_dentry; /* arg2: rename from or link from */
+ struct inode *r_old_dentry_dir; /* arg2: old dentry's parent dir */
+ char *r_path1, *r_path2;
+ struct ceph_vino r_ino1, r_ino2;
+
+ struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */
+ struct inode *r_target_inode; /* resulting inode */
+
+ struct mutex r_fill_mutex;
+
+ union ceph_mds_request_args r_args;
+ int r_fmode; /* file mode, if expecting cap */
+ kuid_t r_uid;
+ kgid_t r_gid;
+
+ /* for choosing which mds to send this request to */
+ int r_direct_mode;
+ u32 r_direct_hash; /* choose dir frag based on this dentry hash */
+ bool r_direct_is_hash; /* true if r_direct_hash is valid */
+
+ /* data payload is used for xattr ops */
+ struct page **r_pages;
+ int r_num_pages;
+ int r_data_len;
+
+ /* what caps shall we drop? */
+ int r_inode_drop, r_inode_unless;
+ int r_dentry_drop, r_dentry_unless;
+ int r_old_dentry_drop, r_old_dentry_unless;
+ struct inode *r_old_inode;
+ int r_old_inode_drop, r_old_inode_unless;
+
+ struct ceph_msg *r_request; /* original request */
+ int r_request_release_offset;
+ struct ceph_msg *r_reply;
+ struct ceph_mds_reply_info_parsed r_reply_info;
+ int r_err;
+ bool r_aborted;
+
+ unsigned long r_timeout; /* optional. jiffies */
+ unsigned long r_started; /* start time to measure timeout against */
+ unsigned long r_request_started; /* start time for mds request only,
+ used to measure lease durations */
+
+ /* link unsafe requests to parent directory, for fsync */
+ struct inode *r_unsafe_dir;
+ struct list_head r_unsafe_dir_item;
+
+ struct ceph_mds_session *r_session;
+
+ int r_attempts; /* resend attempts */
+ int r_num_fwd; /* number of forward attempts */
+ int r_resend_mds; /* mds to resend to next, if any*/
+ u32 r_sent_on_mseq; /* cap mseq request was sent at*/
+
+ struct kref r_kref;
+ struct list_head r_wait;
+ struct completion r_completion;
+ struct completion r_safe_completion;
+ ceph_mds_request_callback_t r_callback;
+ struct list_head r_unsafe_item; /* per-session unsafe list item */
+ bool r_got_unsafe, r_got_safe, r_got_result;
+
+ bool r_did_prepopulate;
+ u32 r_readdir_offset;
+
+ struct ceph_cap_reservation r_caps_reservation;
+ int r_num_caps;
+};
+
+/*
+ * mds client state
+ */
+struct ceph_mds_client {
+ struct ceph_fs_client *fsc;
+ struct mutex mutex; /* all nested structures */
+
+ struct ceph_mdsmap *mdsmap;
+ struct completion safe_umount_waiters;
+ wait_queue_head_t session_close_wq;
+ struct list_head waiting_for_map;
+
+ struct ceph_mds_session **sessions; /* NULL for mds if no session */
+ int max_sessions; /* len of s_mds_sessions */
+ int stopping; /* true if shutting down */
+
+ /*
+ * snap_rwsem will cover cap linkage into snaprealms, and
+ * realm snap contexts. (later, we can do per-realm snap
+ * contexts locks..) the empty list contains realms with no
+ * references (implying they contain no inodes with caps) that
+ * should be destroyed.
+ */
+ struct rw_semaphore snap_rwsem;
+ struct rb_root snap_realms;
+ struct list_head snap_empty;
+ spinlock_t snap_empty_lock; /* protect snap_empty */
+
+ u64 last_tid; /* most recent mds request */
+ struct rb_root request_tree; /* pending mds requests */
+ struct delayed_work delayed_work; /* delayed work */
+ unsigned long last_renew_caps; /* last time we renewed our caps */
+ struct list_head cap_delay_list; /* caps with delayed release */
+ spinlock_t cap_delay_lock; /* protects cap_delay_list */
+ struct list_head snap_flush_list; /* cap_snaps ready to flush */
+ spinlock_t snap_flush_lock;
+
+ u64 cap_flush_seq;
+ struct list_head cap_dirty; /* inodes with dirty caps */
+ struct list_head cap_dirty_migrating; /* ...that are migration... */
+ int num_cap_flushing; /* # caps we are flushing */
+ spinlock_t cap_dirty_lock; /* protects above items */
+ wait_queue_head_t cap_flushing_wq;
+
+ /*
+ * Cap reservations
+ *
+ * Maintain a global pool of preallocated struct ceph_caps, referenced
+ * by struct ceph_caps_reservations. This ensures that we preallocate
+ * memory needed to successfully process an MDS response. (If an MDS
+ * sends us cap information and we fail to process it, we will have
+ * problems due to the client and MDS being out of sync.)
+ *
+ * Reservations are 'owned' by a ceph_cap_reservation context.
+ */
+ spinlock_t caps_list_lock;
+ struct list_head caps_list; /* unused (reserved or
+ unreserved) */
+ int caps_total_count; /* total caps allocated */
+ int caps_use_count; /* in use */
+ int caps_reserve_count; /* unused, reserved */
+ int caps_avail_count; /* unused, unreserved */
+ int caps_min_count; /* keep at least this many
+ (unreserved) */
+ spinlock_t dentry_lru_lock;
+ struct list_head dentry_lru;
+ int num_dentry;
+};
+
+extern const char *ceph_mds_op_name(int op);
+
+extern struct ceph_mds_session *
+__ceph_lookup_mds_session(struct ceph_mds_client *, int mds);
+
+static inline struct ceph_mds_session *
+ceph_get_mds_session(struct ceph_mds_session *s)
+{
+ atomic_inc(&s->s_ref);
+ return s;
+}
+
+extern void ceph_put_mds_session(struct ceph_mds_session *s);
+
+extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc,
+ struct ceph_msg *msg, int mds);
+
+extern int ceph_mdsc_init(struct ceph_fs_client *fsc);
+extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc);
+extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc);
+
+extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc);
+
+extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc,
+ struct inode *inode,
+ struct dentry *dn);
+
+extern void ceph_invalidate_dir_request(struct ceph_mds_request *req);
+extern int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
+ struct inode *dir);
+extern struct ceph_mds_request *
+ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode);
+extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req);
+extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
+ struct inode *dir,
+ struct ceph_mds_request *req);
+static inline void ceph_mdsc_get_request(struct ceph_mds_request *req)
+{
+ kref_get(&req->r_kref);
+}
+extern void ceph_mdsc_release_request(struct kref *kref);
+static inline void ceph_mdsc_put_request(struct ceph_mds_request *req)
+{
+ kref_put(&req->r_kref, ceph_mdsc_release_request);
+}
+
+extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session);
+extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session);
+
+extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
+
+extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
+ int stop_on_nosnap);
+
+extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry);
+extern void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
+ struct inode *inode,
+ struct dentry *dentry, char action,
+ u32 seq);
+
+extern void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc,
+ struct ceph_msg *msg);
+
+extern struct ceph_mds_session *
+ceph_mdsc_open_export_target_session(struct ceph_mds_client *mdsc, int target);
+extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session);
+
+#endif
diff --git a/ceph/mdsmap.c b/ceph/mdsmap.c
new file mode 100644
index 0000000..132b64e
--- /dev/null
+++ b/ceph/mdsmap.c
@@ -0,0 +1,189 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include <linux/ceph/mdsmap.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/decode.h>
+
+#include "super.h"
+
+
+/*
+ * choose a random mds that is "up" (i.e. has a state > 0), or -1.
+ */
+int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
+{
+ int n = 0;
+ int i;
+
+ /* special case for one mds */
+ if (1 == m->m_max_mds && m->m_info[0].state > 0)
+ return 0;
+
+ /* count */
+ for (i = 0; i < m->m_max_mds; i++)
+ if (m->m_info[i].state > 0)
+ n++;
+ if (n == 0)
+ return -1;
+
+ /* pick */
+ n = prandom_u32() % n;
+ i = 0;
+ for (i = 0; n > 0; i++, n--)
+ while (m->m_info[i].state <= 0)
+ i++;
+
+ return i;
+}
+
+/*
+ * Decode an MDS map
+ *
+ * Ignore any fields we don't care about (there are quite a few of
+ * them).
+ */
+struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
+{
+ struct ceph_mdsmap *m;
+ const void *start = *p;
+ int i, j, n;
+ int err = -EINVAL;
+ u16 version;
+
+ m = kzalloc(sizeof(*m), GFP_NOFS);
+ if (m == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ ceph_decode_16_safe(p, end, version, bad);
+ if (version > 3) {
+ pr_warning("got mdsmap version %d > 3, failing", version);
+ goto bad;
+ }
+
+ ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad);
+ m->m_epoch = ceph_decode_32(p);
+ m->m_client_epoch = ceph_decode_32(p);
+ m->m_last_failure = ceph_decode_32(p);
+ m->m_root = ceph_decode_32(p);
+ m->m_session_timeout = ceph_decode_32(p);
+ m->m_session_autoclose = ceph_decode_32(p);
+ m->m_max_file_size = ceph_decode_64(p);
+ m->m_max_mds = ceph_decode_32(p);
+
+ m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS);
+ if (m->m_info == NULL)
+ goto badmem;
+
+ /* pick out active nodes from mds_info (state > 0) */
+ n = ceph_decode_32(p);
+ for (i = 0; i < n; i++) {
+ u64 global_id;
+ u32 namelen;
+ s32 mds, inc, state;
+ u64 state_seq;
+ u8 infoversion;
+ struct ceph_entity_addr addr;
+ u32 num_export_targets;
+ void *pexport_targets = NULL;
+ struct ceph_timespec laggy_since;
+ struct ceph_mds_info *info;
+
+ ceph_decode_need(p, end, sizeof(u64)*2 + 1 + sizeof(u32), bad);
+ global_id = ceph_decode_64(p);
+ infoversion = ceph_decode_8(p);
+ *p += sizeof(u64);
+ namelen = ceph_decode_32(p); /* skip mds name */
+ *p += namelen;
+
+ ceph_decode_need(p, end,
+ 4*sizeof(u32) + sizeof(u64) +
+ sizeof(addr) + sizeof(struct ceph_timespec),
+ bad);
+ mds = ceph_decode_32(p);
+ inc = ceph_decode_32(p);
+ state = ceph_decode_32(p);
+ state_seq = ceph_decode_64(p);
+ ceph_decode_copy(p, &addr, sizeof(addr));
+ ceph_decode_addr(&addr);
+ ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));
+ *p += sizeof(u32);
+ ceph_decode_32_safe(p, end, namelen, bad);
+ *p += namelen;
+ if (infoversion >= 2) {
+ ceph_decode_32_safe(p, end, num_export_targets, bad);
+ pexport_targets = *p;
+ *p += num_export_targets * sizeof(u32);
+ } else {
+ num_export_targets = 0;
+ }
+
+ dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n",
+ i+1, n, global_id, mds, inc,
+ ceph_pr_addr(&addr.in_addr),
+ ceph_mds_state_name(state));
+
+ if (mds < 0 || mds >= m->m_max_mds || state <= 0)
+ continue;
+
+ info = &m->m_info[mds];
+ info->global_id = global_id;
+ info->state = state;
+ info->addr = addr;
+ info->laggy = (laggy_since.tv_sec != 0 ||
+ laggy_since.tv_nsec != 0);
+ info->num_export_targets = num_export_targets;
+ if (num_export_targets) {
+ info->export_targets = kcalloc(num_export_targets,
+ sizeof(u32), GFP_NOFS);
+ if (info->export_targets == NULL)
+ goto badmem;
+ for (j = 0; j < num_export_targets; j++)
+ info->export_targets[j] =
+ ceph_decode_32(&pexport_targets);
+ } else {
+ info->export_targets = NULL;
+ }
+ }
+
+ /* pg_pools */
+ ceph_decode_32_safe(p, end, n, bad);
+ m->m_num_data_pg_pools = n;
+ m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS);
+ if (!m->m_data_pg_pools)
+ goto badmem;
+ ceph_decode_need(p, end, sizeof(u64)*(n+1), bad);
+ for (i = 0; i < n; i++)
+ m->m_data_pg_pools[i] = ceph_decode_64(p);
+ m->m_cas_pg_pool = ceph_decode_64(p);
+
+ /* ok, we don't care about the rest. */
+ dout("mdsmap_decode success epoch %u\n", m->m_epoch);
+ return m;
+
+badmem:
+ err = -ENOMEM;
+bad:
+ pr_err("corrupt mdsmap\n");
+ print_hex_dump(KERN_DEBUG, "mdsmap: ",
+ DUMP_PREFIX_OFFSET, 16, 1,
+ start, end - start, true);
+ ceph_mdsmap_destroy(m);
+ return ERR_PTR(err);
+}
+
+void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
+{
+ int i;
+
+ for (i = 0; i < m->m_max_mds; i++)
+ kfree(m->m_info[i].export_targets);
+ kfree(m->m_info);
+ kfree(m->m_data_pg_pools);
+ kfree(m);
+}
diff --git a/ceph/snap.c b/ceph/snap.c
new file mode 100644
index 0000000..f01645a
--- /dev/null
+++ b/ceph/snap.c
@@ -0,0 +1,932 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/sort.h>
+#include <linux/slab.h>
+
+#include "super.h"
+#include "mds_client.h"
+
+#include <linux/ceph/decode.h>
+
+/*
+ * Snapshots in ceph are driven in large part by cooperation from the
+ * client. In contrast to local file systems or file servers that
+ * implement snapshots at a single point in the system, ceph's
+ * distributed access to storage requires clients to help decide
+ * whether a write logically occurs before or after a recently created
+ * snapshot.
+ *
+ * This provides a perfect instantanous client-wide snapshot. Between
+ * clients, however, snapshots may appear to be applied at slightly
+ * different points in time, depending on delays in delivering the
+ * snapshot notification.
+ *
+ * Snapshots are _not_ file system-wide. Instead, each snapshot
+ * applies to the subdirectory nested beneath some directory. This
+ * effectively divides the hierarchy into multiple "realms," where all
+ * of the files contained by each realm share the same set of
+ * snapshots. An individual realm's snap set contains snapshots
+ * explicitly created on that realm, as well as any snaps in its
+ * parent's snap set _after_ the point at which the parent became it's
+ * parent (due to, say, a rename). Similarly, snaps from prior parents
+ * during the time intervals during which they were the parent are included.
+ *
+ * The client is spared most of this detail, fortunately... it must only
+ * maintains a hierarchy of realms reflecting the current parent/child
+ * realm relationship, and for each realm has an explicit list of snaps
+ * inherited from prior parents.
+ *
+ * A snap_realm struct is maintained for realms containing every inode
+ * with an open cap in the system. (The needed snap realm information is
+ * provided by the MDS whenever a cap is issued, i.e., on open.) A 'seq'
+ * version number is used to ensure that as realm parameters change (new
+ * snapshot, new parent, etc.) the client's realm hierarchy is updated.
+ *
+ * The realm hierarchy drives the generation of a 'snap context' for each
+ * realm, which simply lists the resulting set of snaps for the realm. This
+ * is attached to any writes sent to OSDs.
+ */
+/*
+ * Unfortunately error handling is a bit mixed here. If we get a snap
+ * update, but don't have enough memory to update our realm hierarchy,
+ * it's not clear what we can do about it (besides complaining to the
+ * console).
+ */
+
+
+/*
+ * increase ref count for the realm
+ *
+ * caller must hold snap_rwsem for write.
+ */
+void ceph_get_snap_realm(struct ceph_mds_client *mdsc,
+ struct ceph_snap_realm *realm)
+{
+ dout("get_realm %p %d -> %d\n", realm,
+ atomic_read(&realm->nref), atomic_read(&realm->nref)+1);
+ /*
+ * since we _only_ increment realm refs or empty the empty
+ * list with snap_rwsem held, adjusting the empty list here is
+ * safe. we do need to protect against concurrent empty list
+ * additions, however.
+ */
+ if (atomic_read(&realm->nref) == 0) {
+ spin_lock(&mdsc->snap_empty_lock);
+ list_del_init(&realm->empty_item);
+ spin_unlock(&mdsc->snap_empty_lock);
+ }
+
+ atomic_inc(&realm->nref);
+}
+
+static void __insert_snap_realm(struct rb_root *root,
+ struct ceph_snap_realm *new)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct ceph_snap_realm *r = NULL;
+
+ while (*p) {
+ parent = *p;
+ r = rb_entry(parent, struct ceph_snap_realm, node);
+ if (new->ino < r->ino)
+ p = &(*p)->rb_left;
+ else if (new->ino > r->ino)
+ p = &(*p)->rb_right;
+ else
+ BUG();
+ }
+
+ rb_link_node(&new->node, parent, p);
+ rb_insert_color(&new->node, root);
+}
+
+/*
+ * create and get the realm rooted at @ino and bump its ref count.
+ *
+ * caller must hold snap_rwsem for write.
+ */
+static struct ceph_snap_realm *ceph_create_snap_realm(
+ struct ceph_mds_client *mdsc,
+ u64 ino)
+{
+ struct ceph_snap_realm *realm;
+
+ realm = kzalloc(sizeof(*realm), GFP_NOFS);
+ if (!realm)
+ return ERR_PTR(-ENOMEM);
+
+ atomic_set(&realm->nref, 0); /* tree does not take a ref */
+ realm->ino = ino;
+ INIT_LIST_HEAD(&realm->children);
+ INIT_LIST_HEAD(&realm->child_item);
+ INIT_LIST_HEAD(&realm->empty_item);
+ INIT_LIST_HEAD(&realm->dirty_item);
+ INIT_LIST_HEAD(&realm->inodes_with_caps);
+ spin_lock_init(&realm->inodes_with_caps_lock);
+ __insert_snap_realm(&mdsc->snap_realms, realm);
+ dout("create_snap_realm %llx %p\n", realm->ino, realm);
+ return realm;
+}
+
+/*
+ * lookup the realm rooted at @ino.
+ *
+ * caller must hold snap_rwsem for write.
+ */
+struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc,
+ u64 ino)
+{
+ struct rb_node *n = mdsc->snap_realms.rb_node;
+ struct ceph_snap_realm *r;
+
+ while (n) {
+ r = rb_entry(n, struct ceph_snap_realm, node);
+ if (ino < r->ino)
+ n = n->rb_left;
+ else if (ino > r->ino)
+ n = n->rb_right;
+ else {
+ dout("lookup_snap_realm %llx %p\n", r->ino, r);
+ return r;
+ }
+ }
+ return NULL;
+}
+
+static void __put_snap_realm(struct ceph_mds_client *mdsc,
+ struct ceph_snap_realm *realm);
+
+/*
+ * called with snap_rwsem (write)
+ */
+static void __destroy_snap_realm(struct ceph_mds_client *mdsc,
+ struct ceph_snap_realm *realm)
+{
+ dout("__destroy_snap_realm %p %llx\n", realm, realm->ino);
+
+ rb_erase(&realm->node, &mdsc->snap_realms);
+
+ if (realm->parent) {
+ list_del_init(&realm->child_item);
+ __put_snap_realm(mdsc, realm->parent);
+ }
+
+ kfree(realm->prior_parent_snaps);
+ kfree(realm->snaps);
+ ceph_put_snap_context(realm->cached_context);
+ kfree(realm);
+}
+
+/*
+ * caller holds snap_rwsem (write)
+ */
+static void __put_snap_realm(struct ceph_mds_client *mdsc,
+ struct ceph_snap_realm *realm)
+{
+ dout("__put_snap_realm %llx %p %d -> %d\n", realm->ino, realm,
+ atomic_read(&realm->nref), atomic_read(&realm->nref)-1);
+ if (atomic_dec_and_test(&realm->nref))
+ __destroy_snap_realm(mdsc, realm);
+}
+
+/*
+ * caller needn't hold any locks
+ */
+void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
+ struct ceph_snap_realm *realm)
+{
+ dout("put_snap_realm %llx %p %d -> %d\n", realm->ino, realm,
+ atomic_read(&realm->nref), atomic_read(&realm->nref)-1);
+ if (!atomic_dec_and_test(&realm->nref))
+ return;
+
+ if (down_write_trylock(&mdsc->snap_rwsem)) {
+ __destroy_snap_realm(mdsc, realm);
+ up_write(&mdsc->snap_rwsem);
+ } else {
+ spin_lock(&mdsc->snap_empty_lock);
+ list_add(&realm->empty_item, &mdsc->snap_empty);
+ spin_unlock(&mdsc->snap_empty_lock);
+ }
+}
+
+/*
+ * Clean up any realms whose ref counts have dropped to zero. Note
+ * that this does not include realms who were created but not yet
+ * used.
+ *
+ * Called under snap_rwsem (write)
+ */
+static void __cleanup_empty_realms(struct ceph_mds_client *mdsc)
+{
+ struct ceph_snap_realm *realm;
+
+ spin_lock(&mdsc->snap_empty_lock);
+ while (!list_empty(&mdsc->snap_empty)) {
+ realm = list_first_entry(&mdsc->snap_empty,
+ struct ceph_snap_realm, empty_item);
+ list_del(&realm->empty_item);
+ spin_unlock(&mdsc->snap_empty_lock);
+ __destroy_snap_realm(mdsc, realm);
+ spin_lock(&mdsc->snap_empty_lock);
+ }
+ spin_unlock(&mdsc->snap_empty_lock);
+}
+
+void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc)
+{
+ down_write(&mdsc->snap_rwsem);
+ __cleanup_empty_realms(mdsc);
+ up_write(&mdsc->snap_rwsem);
+}
+
+/*
+ * adjust the parent realm of a given @realm. adjust child list, and parent
+ * pointers, and ref counts appropriately.
+ *
+ * return true if parent was changed, 0 if unchanged, <0 on error.
+ *
+ * caller must hold snap_rwsem for write.
+ */
+static int adjust_snap_realm_parent(struct ceph_mds_client *mdsc,
+ struct ceph_snap_realm *realm,
+ u64 parentino)
+{
+ struct ceph_snap_realm *parent;
+
+ if (realm->parent_ino == parentino)
+ return 0;
+
+ parent = ceph_lookup_snap_realm(mdsc, parentino);
+ if (!parent) {
+ parent = ceph_create_snap_realm(mdsc, parentino);
+ if (IS_ERR(parent))
+ return PTR_ERR(parent);
+ }
+ dout("adjust_snap_realm_parent %llx %p: %llx %p -> %llx %p\n",
+ realm->ino, realm, realm->parent_ino, realm->parent,
+ parentino, parent);
+ if (realm->parent) {
+ list_del_init(&realm->child_item);
+ ceph_put_snap_realm(mdsc, realm->parent);
+ }
+ realm->parent_ino = parentino;
+ realm->parent = parent;
+ ceph_get_snap_realm(mdsc, parent);
+ list_add(&realm->child_item, &parent->children);
+ return 1;
+}
+
+
+static int cmpu64_rev(const void *a, const void *b)
+{
+ if (*(u64 *)a < *(u64 *)b)
+ return 1;
+ if (*(u64 *)a > *(u64 *)b)
+ return -1;
+ return 0;
+}
+
+/*
+ * build the snap context for a given realm.
+ */
+static int build_snap_context(struct ceph_snap_realm *realm)
+{
+ struct ceph_snap_realm *parent = realm->parent;
+ struct ceph_snap_context *snapc;
+ int err = 0;
+ u32 num = realm->num_prior_parent_snaps + realm->num_snaps;
+
+ /*
+ * build parent context, if it hasn't been built.
+ * conservatively estimate that all parent snaps might be
+ * included by us.
+ */
+ if (parent) {
+ if (!parent->cached_context) {
+ err = build_snap_context(parent);
+ if (err)
+ goto fail;
+ }
+ num += parent->cached_context->num_snaps;
+ }
+
+ /* do i actually need to update? not if my context seq
+ matches realm seq, and my parents' does to. (this works
+ because we rebuild_snap_realms() works _downward_ in
+ hierarchy after each update.) */
+ if (realm->cached_context &&
+ realm->cached_context->seq == realm->seq &&
+ (!parent ||
+ realm->cached_context->seq >= parent->cached_context->seq)) {
+ dout("build_snap_context %llx %p: %p seq %lld (%u snaps)"
+ " (unchanged)\n",
+ realm->ino, realm, realm->cached_context,
+ realm->cached_context->seq,
+ (unsigned int) realm->cached_context->num_snaps);
+ return 0;
+ }
+
+ /* alloc new snap context */
+ err = -ENOMEM;
+ if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64))
+ goto fail;
+ snapc = ceph_create_snap_context(num, GFP_NOFS);
+ if (!snapc)
+ goto fail;
+
+ /* build (reverse sorted) snap vector */
+ num = 0;
+ snapc->seq = realm->seq;
+ if (parent) {
+ u32 i;
+
+ /* include any of parent's snaps occurring _after_ my
+ parent became my parent */
+ for (i = 0; i < parent->cached_context->num_snaps; i++)
+ if (parent->cached_context->snaps[i] >=
+ realm->parent_since)
+ snapc->snaps[num++] =
+ parent->cached_context->snaps[i];
+ if (parent->cached_context->seq > snapc->seq)
+ snapc->seq = parent->cached_context->seq;
+ }
+ memcpy(snapc->snaps + num, realm->snaps,
+ sizeof(u64)*realm->num_snaps);
+ num += realm->num_snaps;
+ memcpy(snapc->snaps + num, realm->prior_parent_snaps,
+ sizeof(u64)*realm->num_prior_parent_snaps);
+ num += realm->num_prior_parent_snaps;
+
+ sort(snapc->snaps, num, sizeof(u64), cmpu64_rev, NULL);
+ snapc->num_snaps = num;
+ dout("build_snap_context %llx %p: %p seq %lld (%u snaps)\n",
+ realm->ino, realm, snapc, snapc->seq,
+ (unsigned int) snapc->num_snaps);
+
+ if (realm->cached_context)
+ ceph_put_snap_context(realm->cached_context);
+ realm->cached_context = snapc;
+ return 0;
+
+fail:
+ /*
+ * if we fail, clear old (incorrect) cached_context... hopefully
+ * we'll have better luck building it later
+ */
+ if (realm->cached_context) {
+ ceph_put_snap_context(realm->cached_context);
+ realm->cached_context = NULL;
+ }
+ pr_err("build_snap_context %llx %p fail %d\n", realm->ino,
+ realm, err);
+ return err;
+}
+
+/*
+ * rebuild snap context for the given realm and all of its children.
+ */
+static void rebuild_snap_realms(struct ceph_snap_realm *realm)
+{
+ struct ceph_snap_realm *child;
+
+ dout("rebuild_snap_realms %llx %p\n", realm->ino, realm);
+ build_snap_context(realm);
+
+ list_for_each_entry(child, &realm->children, child_item)
+ rebuild_snap_realms(child);
+}
+
+
+/*
+ * helper to allocate and decode an array of snapids. free prior
+ * instance, if any.
+ */
+static int dup_array(u64 **dst, __le64 *src, u32 num)
+{
+ u32 i;
+
+ kfree(*dst);
+ if (num) {
+ *dst = kcalloc(num, sizeof(u64), GFP_NOFS);
+ if (!*dst)
+ return -ENOMEM;
+ for (i = 0; i < num; i++)
+ (*dst)[i] = get_unaligned_le64(src + i);
+ } else {
+ *dst = NULL;
+ }
+ return 0;
+}
+
+
+/*
+ * When a snapshot is applied, the size/mtime inode metadata is queued
+ * in a ceph_cap_snap (one for each snapshot) until writeback
+ * completes and the metadata can be flushed back to the MDS.
+ *
+ * However, if a (sync) write is currently in-progress when we apply
+ * the snapshot, we have to wait until the write succeeds or fails
+ * (and a final size/mtime is known). In this case the
+ * cap_snap->writing = 1, and is said to be "pending." When the write
+ * finishes, we __ceph_finish_cap_snap().
+ *
+ * Caller must hold snap_rwsem for read (i.e., the realm topology won't
+ * change).
+ */
+void ceph_queue_cap_snap(struct ceph_inode_info *ci)
+{
+ struct inode *inode = &ci->vfs_inode;
+ struct ceph_cap_snap *capsnap;
+ int used, dirty;
+
+ capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS);
+ if (!capsnap) {
+ pr_err("ENOMEM allocating ceph_cap_snap on %p\n", inode);
+ return;
+ }
+
+ spin_lock(&ci->i_ceph_lock);
+ used = __ceph_caps_used(ci);
+ dirty = __ceph_caps_dirty(ci);
+
+ /*
+ * If there is a write in progress, treat that as a dirty Fw,
+ * even though it hasn't completed yet; by the time we finish
+ * up this capsnap it will be.
+ */
+ if (used & CEPH_CAP_FILE_WR)
+ dirty |= CEPH_CAP_FILE_WR;
+
+ if (__ceph_have_pending_cap_snap(ci)) {
+ /* there is no point in queuing multiple "pending" cap_snaps,
+ as no new writes are allowed to start when pending, so any
+ writes in progress now were started before the previous
+ cap_snap. lucky us. */
+ dout("queue_cap_snap %p already pending\n", inode);
+ kfree(capsnap);
+ } else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
+ CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) {
+ struct ceph_snap_context *snapc = ci->i_head_snapc;
+
+ /*
+ * if we are a sync write, we may need to go to the snaprealm
+ * to get the current snapc.
+ */
+ if (!snapc)
+ snapc = ci->i_snap_realm->cached_context;
+
+ dout("queue_cap_snap %p cap_snap %p queuing under %p %s\n",
+ inode, capsnap, snapc, ceph_cap_string(dirty));
+ ihold(inode);
+
+ atomic_set(&capsnap->nref, 1);
+ capsnap->ci = ci;
+ INIT_LIST_HEAD(&capsnap->ci_item);
+ INIT_LIST_HEAD(&capsnap->flushing_item);
+
+ capsnap->follows = snapc->seq;
+ capsnap->issued = __ceph_caps_issued(ci, NULL);
+ capsnap->dirty = dirty;
+
+ capsnap->mode = inode->i_mode;
+ capsnap->uid = inode->i_uid;
+ capsnap->gid = inode->i_gid;
+
+ if (dirty & CEPH_CAP_XATTR_EXCL) {
+ __ceph_build_xattrs_blob(ci);
+ capsnap->xattr_blob =
+ ceph_buffer_get(ci->i_xattrs.blob);
+ capsnap->xattr_version = ci->i_xattrs.version;
+ } else {
+ capsnap->xattr_blob = NULL;
+ capsnap->xattr_version = 0;
+ }
+
+ /* dirty page count moved from _head to this cap_snap;
+ all subsequent writes page dirties occur _after_ this
+ snapshot. */
+ capsnap->dirty_pages = ci->i_wrbuffer_ref_head;
+ ci->i_wrbuffer_ref_head = 0;
+ capsnap->context = snapc;
+ ci->i_head_snapc =
+ ceph_get_snap_context(ci->i_snap_realm->cached_context);
+ dout(" new snapc is %p\n", ci->i_head_snapc);
+ list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
+
+ if (used & CEPH_CAP_FILE_WR) {
+ dout("queue_cap_snap %p cap_snap %p snapc %p"
+ " seq %llu used WR, now pending\n", inode,
+ capsnap, snapc, snapc->seq);
+ capsnap->writing = 1;
+ } else {
+ /* note mtime, size NOW. */
+ __ceph_finish_cap_snap(ci, capsnap);
+ }
+ } else {
+ dout("queue_cap_snap %p nothing dirty|writing\n", inode);
+ kfree(capsnap);
+ }
+
+ spin_unlock(&ci->i_ceph_lock);
+}
+
+/*
+ * Finalize the size, mtime for a cap_snap.. that is, settle on final values
+ * to be used for the snapshot, to be flushed back to the mds.
+ *
+ * If capsnap can now be flushed, add to snap_flush list, and return 1.
+ *
+ * Caller must hold i_ceph_lock.
+ */
+int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
+ struct ceph_cap_snap *capsnap)
+{
+ struct inode *inode = &ci->vfs_inode;
+ struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+
+ BUG_ON(capsnap->writing);
+ capsnap->size = inode->i_size;
+ capsnap->mtime = inode->i_mtime;
+ capsnap->atime = inode->i_atime;
+ capsnap->ctime = inode->i_ctime;
+ capsnap->time_warp_seq = ci->i_time_warp_seq;
+ if (capsnap->dirty_pages) {
+ dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu "
+ "still has %d dirty pages\n", inode, capsnap,
+ capsnap->context, capsnap->context->seq,
+ ceph_cap_string(capsnap->dirty), capsnap->size,
+ capsnap->dirty_pages);
+ return 0;
+ }
+ dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu\n",
+ inode, capsnap, capsnap->context,
+ capsnap->context->seq, ceph_cap_string(capsnap->dirty),
+ capsnap->size);
+
+ spin_lock(&mdsc->snap_flush_lock);
+ list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
+ spin_unlock(&mdsc->snap_flush_lock);
+ return 1; /* caller may want to ceph_flush_snaps */
+}
+
+/*
+ * Queue cap_snaps for snap writeback for this realm and its children.
+ * Called under snap_rwsem, so realm topology won't change.
+ */
+static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
+{
+ struct ceph_inode_info *ci;
+ struct inode *lastinode = NULL;
+ struct ceph_snap_realm *child;
+
+ dout("queue_realm_cap_snaps %p %llx inodes\n", realm, realm->ino);
+
+ spin_lock(&realm->inodes_with_caps_lock);
+ list_for_each_entry(ci, &realm->inodes_with_caps,
+ i_snap_realm_item) {
+ struct inode *inode = igrab(&ci->vfs_inode);
+ if (!inode)
+ continue;
+ spin_unlock(&realm->inodes_with_caps_lock);
+ if (lastinode)
+ iput(lastinode);
+ lastinode = inode;
+ ceph_queue_cap_snap(ci);
+ spin_lock(&realm->inodes_with_caps_lock);
+ }
+ spin_unlock(&realm->inodes_with_caps_lock);
+ if (lastinode)
+ iput(lastinode);
+
+ list_for_each_entry(child, &realm->children, child_item) {
+ dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n",
+ realm, realm->ino, child, child->ino);
+ list_del_init(&child->dirty_item);
+ list_add(&child->dirty_item, &realm->dirty_item);
+ }
+
+ list_del_init(&realm->dirty_item);
+ dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino);
+}
+
+/*
+ * Parse and apply a snapblob "snap trace" from the MDS. This specifies
+ * the snap realm parameters from a given realm and all of its ancestors,
+ * up to the root.
+ *
+ * Caller must hold snap_rwsem for write.
+ */
+int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
+ void *p, void *e, bool deletion)
+{
+ struct ceph_mds_snap_realm *ri; /* encoded */
+ __le64 *snaps; /* encoded */
+ __le64 *prior_parent_snaps; /* encoded */
+ struct ceph_snap_realm *realm;
+ int invalidate = 0;
+ int err = -ENOMEM;
+ LIST_HEAD(dirty_realms);
+
+ dout("update_snap_trace deletion=%d\n", deletion);
+more:
+ ceph_decode_need(&p, e, sizeof(*ri), bad);
+ ri = p;
+ p += sizeof(*ri);
+ ceph_decode_need(&p, e, sizeof(u64)*(le32_to_cpu(ri->num_snaps) +
+ le32_to_cpu(ri->num_prior_parent_snaps)), bad);
+ snaps = p;
+ p += sizeof(u64) * le32_to_cpu(ri->num_snaps);
+ prior_parent_snaps = p;
+ p += sizeof(u64) * le32_to_cpu(ri->num_prior_parent_snaps);
+
+ realm = ceph_lookup_snap_realm(mdsc, le64_to_cpu(ri->ino));
+ if (!realm) {
+ realm = ceph_create_snap_realm(mdsc, le64_to_cpu(ri->ino));
+ if (IS_ERR(realm)) {
+ err = PTR_ERR(realm);
+ goto fail;
+ }
+ }
+
+ /* ensure the parent is correct */
+ err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent));
+ if (err < 0)
+ goto fail;
+ invalidate += err;
+
+ if (le64_to_cpu(ri->seq) > realm->seq) {
+ dout("update_snap_trace updating %llx %p %lld -> %lld\n",
+ realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
+ /* update realm parameters, snap lists */
+ realm->seq = le64_to_cpu(ri->seq);
+ realm->created = le64_to_cpu(ri->created);
+ realm->parent_since = le64_to_cpu(ri->parent_since);
+
+ realm->num_snaps = le32_to_cpu(ri->num_snaps);
+ err = dup_array(&realm->snaps, snaps, realm->num_snaps);
+ if (err < 0)
+ goto fail;
+
+ realm->num_prior_parent_snaps =
+ le32_to_cpu(ri->num_prior_parent_snaps);
+ err = dup_array(&realm->prior_parent_snaps, prior_parent_snaps,
+ realm->num_prior_parent_snaps);
+ if (err < 0)
+ goto fail;
+
+ /* queue realm for cap_snap creation */
+ list_add(&realm->dirty_item, &dirty_realms);
+
+ invalidate = 1;
+ } else if (!realm->cached_context) {
+ dout("update_snap_trace %llx %p seq %lld new\n",
+ realm->ino, realm, realm->seq);
+ invalidate = 1;
+ } else {
+ dout("update_snap_trace %llx %p seq %lld unchanged\n",
+ realm->ino, realm, realm->seq);
+ }
+
+ dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino,
+ realm, invalidate, p, e);
+
+ if (p < e)
+ goto more;
+
+ /* invalidate when we reach the _end_ (root) of the trace */
+ if (invalidate)
+ rebuild_snap_realms(realm);
+
+ /*
+ * queue cap snaps _after_ we've built the new snap contexts,
+ * so that i_head_snapc can be set appropriately.
+ */
+ while (!list_empty(&dirty_realms)) {
+ realm = list_first_entry(&dirty_realms, struct ceph_snap_realm,
+ dirty_item);
+ queue_realm_cap_snaps(realm);
+ }
+
+ __cleanup_empty_realms(mdsc);
+ return 0;
+
+bad:
+ err = -EINVAL;
+fail:
+ pr_err("update_snap_trace error %d\n", err);
+ return err;
+}
+
+
+/*
+ * Send any cap_snaps that are queued for flush. Try to carry
+ * s_mutex across multiple snap flushes to avoid locking overhead.
+ *
+ * Caller holds no locks.
+ */
+static void flush_snaps(struct ceph_mds_client *mdsc)
+{
+ struct ceph_inode_info *ci;
+ struct inode *inode;
+ struct ceph_mds_session *session = NULL;
+
+ dout("flush_snaps\n");
+ spin_lock(&mdsc->snap_flush_lock);
+ while (!list_empty(&mdsc->snap_flush_list)) {
+ ci = list_first_entry(&mdsc->snap_flush_list,
+ struct ceph_inode_info, i_snap_flush_item);
+ inode = &ci->vfs_inode;
+ ihold(inode);
+ spin_unlock(&mdsc->snap_flush_lock);
+ spin_lock(&ci->i_ceph_lock);
+ __ceph_flush_snaps(ci, &session, 0);
+ spin_unlock(&ci->i_ceph_lock);
+ iput(inode);
+ spin_lock(&mdsc->snap_flush_lock);
+ }
+ spin_unlock(&mdsc->snap_flush_lock);
+
+ if (session) {
+ mutex_unlock(&session->s_mutex);
+ ceph_put_mds_session(session);
+ }
+ dout("flush_snaps done\n");
+}
+
+
+/*
+ * Handle a snap notification from the MDS.
+ *
+ * This can take two basic forms: the simplest is just a snap creation
+ * or deletion notification on an existing realm. This should update the
+ * realm and its children.
+ *
+ * The more difficult case is realm creation, due to snap creation at a
+ * new point in the file hierarchy, or due to a rename that moves a file or
+ * directory into another realm.
+ */
+void ceph_handle_snap(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session,
+ struct ceph_msg *msg)
+{
+ struct super_block *sb = mdsc->fsc->sb;
+ int mds = session->s_mds;
+ u64 split;
+ int op;
+ int trace_len;
+ struct ceph_snap_realm *realm = NULL;
+ void *p = msg->front.iov_base;
+ void *e = p + msg->front.iov_len;
+ struct ceph_mds_snap_head *h;
+ int num_split_inos, num_split_realms;
+ __le64 *split_inos = NULL, *split_realms = NULL;
+ int i;
+ int locked_rwsem = 0;
+
+ /* decode */
+ if (msg->front.iov_len < sizeof(*h))
+ goto bad;
+ h = p;
+ op = le32_to_cpu(h->op);
+ split = le64_to_cpu(h->split); /* non-zero if we are splitting an
+ * existing realm */
+ num_split_inos = le32_to_cpu(h->num_split_inos);
+ num_split_realms = le32_to_cpu(h->num_split_realms);
+ trace_len = le32_to_cpu(h->trace_len);
+ p += sizeof(*h);
+
+ dout("handle_snap from mds%d op %s split %llx tracelen %d\n", mds,
+ ceph_snap_op_name(op), split, trace_len);
+
+ mutex_lock(&session->s_mutex);
+ session->s_seq++;
+ mutex_unlock(&session->s_mutex);
+
+ down_write(&mdsc->snap_rwsem);
+ locked_rwsem = 1;
+
+ if (op == CEPH_SNAP_OP_SPLIT) {
+ struct ceph_mds_snap_realm *ri;
+
+ /*
+ * A "split" breaks part of an existing realm off into
+ * a new realm. The MDS provides a list of inodes
+ * (with caps) and child realms that belong to the new
+ * child.
+ */
+ split_inos = p;
+ p += sizeof(u64) * num_split_inos;
+ split_realms = p;
+ p += sizeof(u64) * num_split_realms;
+ ceph_decode_need(&p, e, sizeof(*ri), bad);
+ /* we will peek at realm info here, but will _not_
+ * advance p, as the realm update will occur below in
+ * ceph_update_snap_trace. */
+ ri = p;
+
+ realm = ceph_lookup_snap_realm(mdsc, split);
+ if (!realm) {
+ realm = ceph_create_snap_realm(mdsc, split);
+ if (IS_ERR(realm))
+ goto out;
+ }
+ ceph_get_snap_realm(mdsc, realm);
+
+ dout("splitting snap_realm %llx %p\n", realm->ino, realm);
+ for (i = 0; i < num_split_inos; i++) {
+ struct ceph_vino vino = {
+ .ino = le64_to_cpu(split_inos[i]),
+ .snap = CEPH_NOSNAP,
+ };
+ struct inode *inode = ceph_find_inode(sb, vino);
+ struct ceph_inode_info *ci;
+ struct ceph_snap_realm *oldrealm;
+
+ if (!inode)
+ continue;
+ ci = ceph_inode(inode);
+
+ spin_lock(&ci->i_ceph_lock);
+ if (!ci->i_snap_realm)
+ goto skip_inode;
+ /*
+ * If this inode belongs to a realm that was
+ * created after our new realm, we experienced
+ * a race (due to another split notifications
+ * arriving from a different MDS). So skip
+ * this inode.
+ */
+ if (ci->i_snap_realm->created >
+ le64_to_cpu(ri->created)) {
+ dout(" leaving %p in newer realm %llx %p\n",
+ inode, ci->i_snap_realm->ino,
+ ci->i_snap_realm);
+ goto skip_inode;
+ }
+ dout(" will move %p to split realm %llx %p\n",
+ inode, realm->ino, realm);
+ /*
+ * Move the inode to the new realm
+ */
+ spin_lock(&realm->inodes_with_caps_lock);
+ list_del_init(&ci->i_snap_realm_item);
+ list_add(&ci->i_snap_realm_item,
+ &realm->inodes_with_caps);
+ oldrealm = ci->i_snap_realm;
+ ci->i_snap_realm = realm;
+ spin_unlock(&realm->inodes_with_caps_lock);
+ spin_unlock(&ci->i_ceph_lock);
+
+ ceph_get_snap_realm(mdsc, realm);
+ ceph_put_snap_realm(mdsc, oldrealm);
+
+ iput(inode);
+ continue;
+
+skip_inode:
+ spin_unlock(&ci->i_ceph_lock);
+ iput(inode);
+ }
+
+ /* we may have taken some of the old realm's children. */
+ for (i = 0; i < num_split_realms; i++) {
+ struct ceph_snap_realm *child =
+ ceph_lookup_snap_realm(mdsc,
+ le64_to_cpu(split_realms[i]));
+ if (!child)
+ continue;
+ adjust_snap_realm_parent(mdsc, child, realm->ino);
+ }
+ }
+
+ /*
+ * update using the provided snap trace. if we are deleting a
+ * snap, we can avoid queueing cap_snaps.
+ */
+ ceph_update_snap_trace(mdsc, p, e,
+ op == CEPH_SNAP_OP_DESTROY);
+
+ if (op == CEPH_SNAP_OP_SPLIT)
+ /* we took a reference when we created the realm, above */
+ ceph_put_snap_realm(mdsc, realm);
+
+ __cleanup_empty_realms(mdsc);
+
+ up_write(&mdsc->snap_rwsem);
+
+ flush_snaps(mdsc);
+ return;
+
+bad:
+ pr_err("corrupt snap message from mds%d\n", mds);
+ ceph_msg_dump(msg);
+out:
+ if (locked_rwsem)
+ up_write(&mdsc->snap_rwsem);
+ return;
+}
+
+
+
diff --git a/ceph/strings.c b/ceph/strings.c
new file mode 100644
index 0000000..51cc23e
--- /dev/null
+++ b/ceph/strings.c
@@ -0,0 +1,124 @@
+/*
+ * Ceph fs string constants
+ */
+#include <linux/module.h>
+#include <linux/ceph/types.h>
+
+
+const char *ceph_mds_state_name(int s)
+{
+ switch (s) {
+ /* down and out */
+ case CEPH_MDS_STATE_DNE: return "down:dne";
+ case CEPH_MDS_STATE_STOPPED: return "down:stopped";
+ /* up and out */
+ case CEPH_MDS_STATE_BOOT: return "up:boot";
+ case CEPH_MDS_STATE_STANDBY: return "up:standby";
+ case CEPH_MDS_STATE_STANDBY_REPLAY: return "up:standby-replay";
+ case CEPH_MDS_STATE_REPLAYONCE: return "up:oneshot-replay";
+ case CEPH_MDS_STATE_CREATING: return "up:creating";
+ case CEPH_MDS_STATE_STARTING: return "up:starting";
+ /* up and in */
+ case CEPH_MDS_STATE_REPLAY: return "up:replay";
+ case CEPH_MDS_STATE_RESOLVE: return "up:resolve";
+ case CEPH_MDS_STATE_RECONNECT: return "up:reconnect";
+ case CEPH_MDS_STATE_REJOIN: return "up:rejoin";
+ case CEPH_MDS_STATE_CLIENTREPLAY: return "up:clientreplay";
+ case CEPH_MDS_STATE_ACTIVE: return "up:active";
+ case CEPH_MDS_STATE_STOPPING: return "up:stopping";
+ }
+ return "???";
+}
+
+const char *ceph_session_op_name(int op)
+{
+ switch (op) {
+ case CEPH_SESSION_REQUEST_OPEN: return "request_open";
+ case CEPH_SESSION_OPEN: return "open";
+ case CEPH_SESSION_REQUEST_CLOSE: return "request_close";
+ case CEPH_SESSION_CLOSE: return "close";
+ case CEPH_SESSION_REQUEST_RENEWCAPS: return "request_renewcaps";
+ case CEPH_SESSION_RENEWCAPS: return "renewcaps";
+ case CEPH_SESSION_STALE: return "stale";
+ case CEPH_SESSION_RECALL_STATE: return "recall_state";
+ case CEPH_SESSION_FLUSHMSG: return "flushmsg";
+ case CEPH_SESSION_FLUSHMSG_ACK: return "flushmsg_ack";
+ }
+ return "???";
+}
+
+const char *ceph_mds_op_name(int op)
+{
+ switch (op) {
+ case CEPH_MDS_OP_LOOKUP: return "lookup";
+ case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash";
+ case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent";
+ case CEPH_MDS_OP_LOOKUPINO: return "lookupino";
+ case CEPH_MDS_OP_LOOKUPNAME: return "lookupname";
+ case CEPH_MDS_OP_GETATTR: return "getattr";
+ case CEPH_MDS_OP_SETXATTR: return "setxattr";
+ case CEPH_MDS_OP_SETATTR: return "setattr";
+ case CEPH_MDS_OP_RMXATTR: return "rmxattr";
+ case CEPH_MDS_OP_SETLAYOUT: return "setlayou";
+ case CEPH_MDS_OP_SETDIRLAYOUT: return "setdirlayout";
+ case CEPH_MDS_OP_READDIR: return "readdir";
+ case CEPH_MDS_OP_MKNOD: return "mknod";
+ case CEPH_MDS_OP_LINK: return "link";
+ case CEPH_MDS_OP_UNLINK: return "unlink";
+ case CEPH_MDS_OP_RENAME: return "rename";
+ case CEPH_MDS_OP_MKDIR: return "mkdir";
+ case CEPH_MDS_OP_RMDIR: return "rmdir";
+ case CEPH_MDS_OP_SYMLINK: return "symlink";
+ case CEPH_MDS_OP_CREATE: return "create";
+ case CEPH_MDS_OP_OPEN: return "open";
+ case CEPH_MDS_OP_LOOKUPSNAP: return "lookupsnap";
+ case CEPH_MDS_OP_LSSNAP: return "lssnap";
+ case CEPH_MDS_OP_MKSNAP: return "mksnap";
+ case CEPH_MDS_OP_RMSNAP: return "rmsnap";
+ case CEPH_MDS_OP_SETFILELOCK: return "setfilelock";
+ case CEPH_MDS_OP_GETFILELOCK: return "getfilelock";
+ }
+ return "???";
+}
+
+const char *ceph_cap_op_name(int op)
+{
+ switch (op) {
+ case CEPH_CAP_OP_GRANT: return "grant";
+ case CEPH_CAP_OP_REVOKE: return "revoke";
+ case CEPH_CAP_OP_TRUNC: return "trunc";
+ case CEPH_CAP_OP_EXPORT: return "export";
+ case CEPH_CAP_OP_IMPORT: return "import";
+ case CEPH_CAP_OP_UPDATE: return "update";
+ case CEPH_CAP_OP_DROP: return "drop";
+ case CEPH_CAP_OP_FLUSH: return "flush";
+ case CEPH_CAP_OP_FLUSH_ACK: return "flush_ack";
+ case CEPH_CAP_OP_FLUSHSNAP: return "flushsnap";
+ case CEPH_CAP_OP_FLUSHSNAP_ACK: return "flushsnap_ack";
+ case CEPH_CAP_OP_RELEASE: return "release";
+ case CEPH_CAP_OP_RENEW: return "renew";
+ }
+ return "???";
+}
+
+const char *ceph_lease_op_name(int o)
+{
+ switch (o) {
+ case CEPH_MDS_LEASE_REVOKE: return "revoke";
+ case CEPH_MDS_LEASE_RELEASE: return "release";
+ case CEPH_MDS_LEASE_RENEW: return "renew";
+ case CEPH_MDS_LEASE_REVOKE_ACK: return "revoke_ack";
+ }
+ return "???";
+}
+
+const char *ceph_snap_op_name(int o)
+{
+ switch (o) {
+ case CEPH_SNAP_OP_UPDATE: return "update";
+ case CEPH_SNAP_OP_CREATE: return "create";
+ case CEPH_SNAP_OP_DESTROY: return "destroy";
+ case CEPH_SNAP_OP_SPLIT: return "split";
+ }
+ return "???";
+}
diff --git a/ceph/super.c b/ceph/super.c
new file mode 100644
index 0000000..06150fd
--- /dev/null
+++ b/ceph/super.c
@@ -0,0 +1,1061 @@
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/backing-dev.h>
+#include <linux/ctype.h>
+#include <linux/fs.h>
+#include <linux/inet.h>
+#include <linux/in6.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/parser.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/statfs.h>
+#include <linux/string.h>
+
+#include "super.h"
+#include "mds_client.h"
+#include "cache.h"
+
+#include <linux/ceph/ceph_features.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/debugfs.h>
+
+/*
+ * Ceph superblock operations
+ *
+ * Handle the basics of mounting, unmounting.
+ */
+
+/*
+ * super ops
+ */
+static void ceph_put_super(struct super_block *s)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(s);
+
+ dout("put_super\n");
+ ceph_mdsc_close_sessions(fsc->mdsc);
+
+ /*
+ * ensure we release the bdi before put_anon_super releases
+ * the device name.
+ */
+ if (s->s_bdi == &fsc->backing_dev_info) {
+ bdi_unregister(&fsc->backing_dev_info);
+ s->s_bdi = NULL;
+ }
+
+ return;
+}
+
+static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+ struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode);
+ struct ceph_monmap *monmap = fsc->client->monc.monmap;
+ struct ceph_statfs st;
+ u64 fsid;
+ int err;
+
+ dout("statfs\n");
+ err = ceph_monc_do_statfs(&fsc->client->monc, &st);
+ if (err < 0)
+ return err;
+
+ /* fill in kstatfs */
+ buf->f_type = CEPH_SUPER_MAGIC; /* ?? */
+
+ /*
+ * express utilization in terms of large blocks to avoid
+ * overflow on 32-bit machines.
+ *
+ * NOTE: for the time being, we make bsize == frsize to humor
+ * not-yet-ancient versions of glibc that are broken.
+ * Someday, we will probably want to report a real block
+ * size... whatever that may mean for a network file system!
+ */
+ buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
+ buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
+ buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
+ buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
+ buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
+
+ buf->f_files = le64_to_cpu(st.num_objects);
+ buf->f_ffree = -1;
+ buf->f_namelen = NAME_MAX;
+
+ /* leave fsid little-endian, regardless of host endianness */
+ fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1);
+ buf->f_fsid.val[0] = fsid & 0xffffffff;
+ buf->f_fsid.val[1] = fsid >> 32;
+
+ return 0;
+}
+
+
+static int ceph_sync_fs(struct super_block *sb, int wait)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+
+ if (!wait) {
+ dout("sync_fs (non-blocking)\n");
+ ceph_flush_dirty_caps(fsc->mdsc);
+ dout("sync_fs (non-blocking) done\n");
+ return 0;
+ }
+
+ dout("sync_fs (blocking)\n");
+ ceph_osdc_sync(&fsc->client->osdc);
+ ceph_mdsc_sync(fsc->mdsc);
+ dout("sync_fs (blocking) done\n");
+ return 0;
+}
+
+/*
+ * mount options
+ */
+enum {
+ Opt_wsize,
+ Opt_rsize,
+ Opt_rasize,
+ Opt_caps_wanted_delay_min,
+ Opt_caps_wanted_delay_max,
+ Opt_cap_release_safety,
+ Opt_readdir_max_entries,
+ Opt_readdir_max_bytes,
+ Opt_congestion_kb,
+ Opt_last_int,
+ /* int args above */
+ Opt_snapdirname,
+ Opt_last_string,
+ /* string args above */
+ Opt_dirstat,
+ Opt_nodirstat,
+ Opt_rbytes,
+ Opt_norbytes,
+ Opt_asyncreaddir,
+ Opt_noasyncreaddir,
+ Opt_dcache,
+ Opt_nodcache,
+ Opt_ino32,
+ Opt_noino32,
+ Opt_fscache,
+ Opt_nofscache,
+#ifdef CONFIG_CEPH_FS_POSIX_ACL
+ Opt_acl,
+#endif
+ Opt_noacl
+};
+
+static match_table_t fsopt_tokens = {
+ {Opt_wsize, "wsize=%d"},
+ {Opt_rsize, "rsize=%d"},
+ {Opt_rasize, "rasize=%d"},
+ {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
+ {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
+ {Opt_cap_release_safety, "cap_release_safety=%d"},
+ {Opt_readdir_max_entries, "readdir_max_entries=%d"},
+ {Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
+ {Opt_congestion_kb, "write_congestion_kb=%d"},
+ /* int args above */
+ {Opt_snapdirname, "snapdirname=%s"},
+ /* string args above */
+ {Opt_dirstat, "dirstat"},
+ {Opt_nodirstat, "nodirstat"},
+ {Opt_rbytes, "rbytes"},
+ {Opt_norbytes, "norbytes"},
+ {Opt_asyncreaddir, "asyncreaddir"},
+ {Opt_noasyncreaddir, "noasyncreaddir"},
+ {Opt_dcache, "dcache"},
+ {Opt_nodcache, "nodcache"},
+ {Opt_ino32, "ino32"},
+ {Opt_noino32, "noino32"},
+ {Opt_fscache, "fsc"},
+ {Opt_nofscache, "nofsc"},
+#ifdef CONFIG_CEPH_FS_POSIX_ACL
+ {Opt_acl, "acl"},
+#endif
+ {Opt_noacl, "noacl"},
+ {-1, NULL}
+};
+
+static int parse_fsopt_token(char *c, void *private)
+{
+ struct ceph_mount_options *fsopt = private;
+ substring_t argstr[MAX_OPT_ARGS];
+ int token, intval, ret;
+
+ token = match_token((char *)c, fsopt_tokens, argstr);
+ if (token < 0)
+ return -EINVAL;
+
+ if (token < Opt_last_int) {
+ ret = match_int(&argstr[0], &intval);
+ if (ret < 0) {
+ pr_err("bad mount option arg (not int) "
+ "at '%s'\n", c);
+ return ret;
+ }
+ dout("got int token %d val %d\n", token, intval);
+ } else if (token > Opt_last_int && token < Opt_last_string) {
+ dout("got string token %d val %s\n", token,
+ argstr[0].from);
+ } else {
+ dout("got token %d\n", token);
+ }
+
+ switch (token) {
+ case Opt_snapdirname:
+ kfree(fsopt->snapdir_name);
+ fsopt->snapdir_name = kstrndup(argstr[0].from,
+ argstr[0].to-argstr[0].from,
+ GFP_KERNEL);
+ if (!fsopt->snapdir_name)
+ return -ENOMEM;
+ break;
+
+ /* misc */
+ case Opt_wsize:
+ fsopt->wsize = intval;
+ break;
+ case Opt_rsize:
+ fsopt->rsize = intval;
+ break;
+ case Opt_rasize:
+ fsopt->rasize = intval;
+ break;
+ case Opt_caps_wanted_delay_min:
+ fsopt->caps_wanted_delay_min = intval;
+ break;
+ case Opt_caps_wanted_delay_max:
+ fsopt->caps_wanted_delay_max = intval;
+ break;
+ case Opt_readdir_max_entries:
+ fsopt->max_readdir = intval;
+ break;
+ case Opt_readdir_max_bytes:
+ fsopt->max_readdir_bytes = intval;
+ break;
+ case Opt_congestion_kb:
+ fsopt->congestion_kb = intval;
+ break;
+ case Opt_dirstat:
+ fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT;
+ break;
+ case Opt_nodirstat:
+ fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
+ break;
+ case Opt_rbytes:
+ fsopt->flags |= CEPH_MOUNT_OPT_RBYTES;
+ break;
+ case Opt_norbytes:
+ fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
+ break;
+ case Opt_asyncreaddir:
+ fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR;
+ break;
+ case Opt_noasyncreaddir:
+ fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
+ break;
+ case Opt_dcache:
+ fsopt->flags |= CEPH_MOUNT_OPT_DCACHE;
+ break;
+ case Opt_nodcache:
+ fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE;
+ break;
+ case Opt_ino32:
+ fsopt->flags |= CEPH_MOUNT_OPT_INO32;
+ break;
+ case Opt_noino32:
+ fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
+ break;
+ case Opt_fscache:
+ fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
+ break;
+ case Opt_nofscache:
+ fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
+ break;
+#ifdef CONFIG_CEPH_FS_POSIX_ACL
+ case Opt_acl:
+ fsopt->sb_flags |= MS_POSIXACL;
+ break;
+#endif
+ case Opt_noacl:
+ fsopt->sb_flags &= ~MS_POSIXACL;
+ break;
+ default:
+ BUG_ON(token);
+ }
+ return 0;
+}
+
+static void destroy_mount_options(struct ceph_mount_options *args)
+{
+ dout("destroy_mount_options %p\n", args);
+ kfree(args->snapdir_name);
+ kfree(args);
+}
+
+static int strcmp_null(const char *s1, const char *s2)
+{
+ if (!s1 && !s2)
+ return 0;
+ if (s1 && !s2)
+ return -1;
+ if (!s1 && s2)
+ return 1;
+ return strcmp(s1, s2);
+}
+
+static int compare_mount_options(struct ceph_mount_options *new_fsopt,
+ struct ceph_options *new_opt,
+ struct ceph_fs_client *fsc)
+{
+ struct ceph_mount_options *fsopt1 = new_fsopt;
+ struct ceph_mount_options *fsopt2 = fsc->mount_options;
+ int ofs = offsetof(struct ceph_mount_options, snapdir_name);
+ int ret;
+
+ ret = memcmp(fsopt1, fsopt2, ofs);
+ if (ret)
+ return ret;
+
+ ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
+ if (ret)
+ return ret;
+
+ return ceph_compare_options(new_opt, fsc->client);
+}
+
+static int parse_mount_options(struct ceph_mount_options **pfsopt,
+ struct ceph_options **popt,
+ int flags, char *options,
+ const char *dev_name,
+ const char **path)
+{
+ struct ceph_mount_options *fsopt;
+ const char *dev_name_end;
+ int err;
+
+ if (!dev_name || !*dev_name)
+ return -EINVAL;
+
+ fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL);
+ if (!fsopt)
+ return -ENOMEM;
+
+ dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name);
+
+ fsopt->sb_flags = flags;
+ fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
+
+ fsopt->rsize = CEPH_RSIZE_DEFAULT;
+ fsopt->rasize = CEPH_RASIZE_DEFAULT;
+ fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
+ fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
+ fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
+ fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
+ fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
+ fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
+ fsopt->congestion_kb = default_congestion_kb();
+
+ /*
+ * Distinguish the server list from the path in "dev_name".
+ * Internally we do not include the leading '/' in the path.
+ *
+ * "dev_name" will look like:
+ * <server_spec>[,<server_spec>...]:[<path>]
+ * where
+ * <server_spec> is <ip>[:<port>]
+ * <path> is optional, but if present must begin with '/'
+ */
+ dev_name_end = strchr(dev_name, '/');
+ if (dev_name_end) {
+ /* skip over leading '/' for path */
+ *path = dev_name_end + 1;
+ } else {
+ /* path is empty */
+ dev_name_end = dev_name + strlen(dev_name);
+ *path = dev_name_end;
+ }
+ err = -EINVAL;
+ dev_name_end--; /* back up to ':' separator */
+ if (dev_name_end < dev_name || *dev_name_end != ':') {
+ pr_err("device name is missing path (no : separator in %s)\n",
+ dev_name);
+ goto out;
+ }
+ dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
+ dout("server path '%s'\n", *path);
+
+ *popt = ceph_parse_options(options, dev_name, dev_name_end,
+ parse_fsopt_token, (void *)fsopt);
+ if (IS_ERR(*popt)) {
+ err = PTR_ERR(*popt);
+ goto out;
+ }
+
+ /* success */
+ *pfsopt = fsopt;
+ return 0;
+
+out:
+ destroy_mount_options(fsopt);
+ return err;
+}
+
+/**
+ * ceph_show_options - Show mount options in /proc/mounts
+ * @m: seq_file to write to
+ * @root: root of that (sub)tree
+ */
+static int ceph_show_options(struct seq_file *m, struct dentry *root)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
+ struct ceph_mount_options *fsopt = fsc->mount_options;
+ struct ceph_options *opt = fsc->client->options;
+
+ if (opt->flags & CEPH_OPT_FSID)
+ seq_printf(m, ",fsid=%pU", &opt->fsid);
+ if (opt->flags & CEPH_OPT_NOSHARE)
+ seq_puts(m, ",noshare");
+ if (opt->flags & CEPH_OPT_NOCRC)
+ seq_puts(m, ",nocrc");
+
+ if (opt->name)
+ seq_printf(m, ",name=%s", opt->name);
+ if (opt->key)
+ seq_puts(m, ",secret=<hidden>");
+
+ if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
+ seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
+ if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
+ seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
+ if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
+ seq_printf(m, ",osdkeepalivetimeout=%d",
+ opt->osd_keepalive_timeout);
+
+ if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
+ seq_puts(m, ",dirstat");
+ if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0)
+ seq_puts(m, ",norbytes");
+ if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
+ seq_puts(m, ",noasyncreaddir");
+ if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE)
+ seq_puts(m, ",dcache");
+ else
+ seq_puts(m, ",nodcache");
+ if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
+ seq_puts(m, ",fsc");
+ else
+ seq_puts(m, ",nofsc");
+
+#ifdef CONFIG_CEPH_FS_POSIX_ACL
+ if (fsopt->sb_flags & MS_POSIXACL)
+ seq_puts(m, ",acl");
+ else
+ seq_puts(m, ",noacl");
+#endif
+
+ if (fsopt->wsize)
+ seq_printf(m, ",wsize=%d", fsopt->wsize);
+ if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
+ seq_printf(m, ",rsize=%d", fsopt->rsize);
+ if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
+ seq_printf(m, ",rasize=%d", fsopt->rasize);
+ if (fsopt->congestion_kb != default_congestion_kb())
+ seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
+ if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
+ seq_printf(m, ",caps_wanted_delay_min=%d",
+ fsopt->caps_wanted_delay_min);
+ if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
+ seq_printf(m, ",caps_wanted_delay_max=%d",
+ fsopt->caps_wanted_delay_max);
+ if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
+ seq_printf(m, ",cap_release_safety=%d",
+ fsopt->cap_release_safety);
+ if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
+ seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir);
+ if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
+ seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
+ if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
+ seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
+ return 0;
+}
+
+/*
+ * handle any mon messages the standard library doesn't understand.
+ * return error if we don't either.
+ */
+static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
+{
+ struct ceph_fs_client *fsc = client->private;
+ int type = le16_to_cpu(msg->hdr.type);
+
+ switch (type) {
+ case CEPH_MSG_MDS_MAP:
+ ceph_mdsc_handle_map(fsc->mdsc, msg);
+ return 0;
+
+ default:
+ return -1;
+ }
+}
+
+/*
+ * create a new fs client
+ */
+static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
+ struct ceph_options *opt)
+{
+ struct ceph_fs_client *fsc;
+ const u64 supported_features =
+ CEPH_FEATURE_FLOCK |
+ CEPH_FEATURE_DIRLAYOUTHASH;
+ const u64 required_features = 0;
+ int page_count;
+ size_t size;
+ int err = -ENOMEM;
+
+ fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
+ if (!fsc)
+ return ERR_PTR(-ENOMEM);
+
+ fsc->client = ceph_create_client(opt, fsc, supported_features,
+ required_features);
+ if (IS_ERR(fsc->client)) {
+ err = PTR_ERR(fsc->client);
+ goto fail;
+ }
+ fsc->client->extra_mon_dispatch = extra_mon_dispatch;
+ fsc->client->monc.want_mdsmap = 1;
+
+ fsc->mount_options = fsopt;
+
+ fsc->sb = NULL;
+ fsc->mount_state = CEPH_MOUNT_MOUNTING;
+
+ atomic_long_set(&fsc->writeback_count, 0);
+
+ err = bdi_init(&fsc->backing_dev_info);
+ if (err < 0)
+ goto fail_client;
+
+ err = -ENOMEM;
+ /*
+ * The number of concurrent works can be high but they don't need
+ * to be processed in parallel, limit concurrency.
+ */
+ fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1);
+ if (fsc->wb_wq == NULL)
+ goto fail_bdi;
+ fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1);
+ if (fsc->pg_inv_wq == NULL)
+ goto fail_wb_wq;
+ fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1);
+ if (fsc->trunc_wq == NULL)
+ goto fail_pg_inv_wq;
+
+ /* set up mempools */
+ err = -ENOMEM;
+ page_count = fsc->mount_options->wsize >> PAGE_CACHE_SHIFT;
+ size = sizeof (struct page *) * (page_count ? page_count : 1);
+ fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size);
+ if (!fsc->wb_pagevec_pool)
+ goto fail_trunc_wq;
+
+ /* setup fscache */
+ if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) &&
+ (ceph_fscache_register_fs(fsc) != 0))
+ goto fail_fscache;
+
+ /* caps */
+ fsc->min_caps = fsopt->max_readdir;
+
+ return fsc;
+
+fail_fscache:
+ ceph_fscache_unregister_fs(fsc);
+fail_trunc_wq:
+ destroy_workqueue(fsc->trunc_wq);
+fail_pg_inv_wq:
+ destroy_workqueue(fsc->pg_inv_wq);
+fail_wb_wq:
+ destroy_workqueue(fsc->wb_wq);
+fail_bdi:
+ bdi_destroy(&fsc->backing_dev_info);
+fail_client:
+ ceph_destroy_client(fsc->client);
+fail:
+ kfree(fsc);
+ return ERR_PTR(err);
+}
+
+static void destroy_fs_client(struct ceph_fs_client *fsc)
+{
+ dout("destroy_fs_client %p\n", fsc);
+
+ ceph_fscache_unregister_fs(fsc);
+
+ destroy_workqueue(fsc->wb_wq);
+ destroy_workqueue(fsc->pg_inv_wq);
+ destroy_workqueue(fsc->trunc_wq);
+
+ bdi_destroy(&fsc->backing_dev_info);
+
+ mempool_destroy(fsc->wb_pagevec_pool);
+
+ destroy_mount_options(fsc->mount_options);
+
+ ceph_fs_debugfs_cleanup(fsc);
+
+ ceph_destroy_client(fsc->client);
+
+ kfree(fsc);
+ dout("destroy_fs_client %p done\n", fsc);
+}
+
+/*
+ * caches
+ */
+struct kmem_cache *ceph_inode_cachep;
+struct kmem_cache *ceph_cap_cachep;
+struct kmem_cache *ceph_dentry_cachep;
+struct kmem_cache *ceph_file_cachep;
+
+static void ceph_inode_init_once(void *foo)
+{
+ struct ceph_inode_info *ci = foo;
+ inode_init_once(&ci->vfs_inode);
+}
+
+static int __init init_caches(void)
+{
+ int error = -ENOMEM;
+
+ ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
+ sizeof(struct ceph_inode_info),
+ __alignof__(struct ceph_inode_info),
+ (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
+ ceph_inode_init_once);
+ if (ceph_inode_cachep == NULL)
+ return -ENOMEM;
+
+ ceph_cap_cachep = KMEM_CACHE(ceph_cap,
+ SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+ if (ceph_cap_cachep == NULL)
+ goto bad_cap;
+
+ ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
+ SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+ if (ceph_dentry_cachep == NULL)
+ goto bad_dentry;
+
+ ceph_file_cachep = KMEM_CACHE(ceph_file_info,
+ SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+ if (ceph_file_cachep == NULL)
+ goto bad_file;
+
+ if ((error = ceph_fscache_register()))
+ goto bad_file;
+
+ return 0;
+bad_file:
+ kmem_cache_destroy(ceph_dentry_cachep);
+bad_dentry:
+ kmem_cache_destroy(ceph_cap_cachep);
+bad_cap:
+ kmem_cache_destroy(ceph_inode_cachep);
+ return error;
+}
+
+static void destroy_caches(void)
+{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
+
+ kmem_cache_destroy(ceph_inode_cachep);
+ kmem_cache_destroy(ceph_cap_cachep);
+ kmem_cache_destroy(ceph_dentry_cachep);
+ kmem_cache_destroy(ceph_file_cachep);
+
+ ceph_fscache_unregister();
+}
+
+
+/*
+ * ceph_umount_begin - initiate forced umount. Tear down down the
+ * mount, skipping steps that may hang while waiting for server(s).
+ */
+static void ceph_umount_begin(struct super_block *sb)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+
+ dout("ceph_umount_begin - starting forced umount\n");
+ if (!fsc)
+ return;
+ fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
+ return;
+}
+
+static const struct super_operations ceph_super_ops = {
+ .alloc_inode = ceph_alloc_inode,
+ .destroy_inode = ceph_destroy_inode,
+ .write_inode = ceph_write_inode,
+ .drop_inode = ceph_drop_inode,
+ .sync_fs = ceph_sync_fs,
+ .put_super = ceph_put_super,
+ .show_options = ceph_show_options,
+ .statfs = ceph_statfs,
+ .umount_begin = ceph_umount_begin,
+};
+
+/*
+ * Bootstrap mount by opening the root directory. Note the mount
+ * @started time from caller, and time out if this takes too long.
+ */
+static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
+ const char *path,
+ unsigned long started)
+{
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct ceph_mds_request *req = NULL;
+ int err;
+ struct dentry *root;
+
+ /* open dir */
+ dout("open_root_inode opening '%s'\n", path);
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
+ if (IS_ERR(req))
+ return ERR_CAST(req);
+ req->r_path1 = kstrdup(path, GFP_NOFS);
+ req->r_ino1.ino = CEPH_INO_ROOT;
+ req->r_ino1.snap = CEPH_NOSNAP;
+ req->r_started = started;
+ req->r_timeout = fsc->client->options->mount_timeout * HZ;
+ req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
+ req->r_num_caps = 2;
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+ if (err == 0) {
+ struct inode *inode = req->r_target_inode;
+ req->r_target_inode = NULL;
+ dout("open_root_inode success\n");
+ if (ceph_ino(inode) == CEPH_INO_ROOT &&
+ fsc->sb->s_root == NULL) {
+ root = d_make_root(inode);
+ if (!root) {
+ root = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+ } else {
+ root = d_obtain_alias(inode);
+ }
+ ceph_init_dentry(root);
+ dout("open_root_inode success, root dentry is %p\n", root);
+ } else {
+ root = ERR_PTR(err);
+ }
+out:
+ ceph_mdsc_put_request(req);
+ return root;
+}
+
+
+
+
+/*
+ * mount: join the ceph cluster, and open root directory.
+ */
+static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
+ const char *path)
+{
+ int err;
+ unsigned long started = jiffies; /* note the start time */
+ struct dentry *root;
+ int first = 0; /* first vfsmount for this super_block */
+
+ dout("mount start\n");
+ mutex_lock(&fsc->client->mount_mutex);
+
+ err = __ceph_open_session(fsc->client, started);
+ if (err < 0)
+ goto out;
+
+ dout("mount opening root\n");
+ root = open_root_dentry(fsc, "", started);
+ if (IS_ERR(root)) {
+ err = PTR_ERR(root);
+ goto out;
+ }
+ if (fsc->sb->s_root) {
+ dput(root);
+ } else {
+ fsc->sb->s_root = root;
+ first = 1;
+
+ err = ceph_fs_debugfs_init(fsc);
+ if (err < 0)
+ goto fail;
+ }
+
+ if (path[0] == 0) {
+ dget(root);
+ } else {
+ dout("mount opening base mountpoint\n");
+ root = open_root_dentry(fsc, path, started);
+ if (IS_ERR(root)) {
+ err = PTR_ERR(root);
+ goto fail;
+ }
+ }
+
+ fsc->mount_state = CEPH_MOUNT_MOUNTED;
+ dout("mount success\n");
+ mutex_unlock(&fsc->client->mount_mutex);
+ return root;
+
+out:
+ mutex_unlock(&fsc->client->mount_mutex);
+ return ERR_PTR(err);
+
+fail:
+ if (first) {
+ dput(fsc->sb->s_root);
+ fsc->sb->s_root = NULL;
+ }
+ goto out;
+}
+
+static int ceph_set_super(struct super_block *s, void *data)
+{
+ struct ceph_fs_client *fsc = data;
+ int ret;
+
+ dout("set_super %p data %p\n", s, data);
+
+ s->s_flags = fsc->mount_options->sb_flags;
+ s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */
+
+ s->s_xattr = ceph_xattr_handlers;
+ s->s_fs_info = fsc;
+ fsc->sb = s;
+
+ s->s_op = &ceph_super_ops;
+ s->s_export_op = &ceph_export_ops;
+
+ s->s_time_gran = 1000; /* 1000 ns == 1 us */
+
+ ret = set_anon_super(s, NULL); /* what is that second arg for? */
+ if (ret != 0)
+ goto fail;
+
+ return ret;
+
+fail:
+ s->s_fs_info = NULL;
+ fsc->sb = NULL;
+ return ret;
+}
+
+/*
+ * share superblock if same fs AND options
+ */
+static int ceph_compare_super(struct super_block *sb, void *data)
+{
+ struct ceph_fs_client *new = data;
+ struct ceph_mount_options *fsopt = new->mount_options;
+ struct ceph_options *opt = new->client->options;
+ struct ceph_fs_client *other = ceph_sb_to_client(sb);
+
+ dout("ceph_compare_super %p\n", sb);
+
+ if (compare_mount_options(fsopt, opt, other)) {
+ dout("monitor(s)/mount options don't match\n");
+ return 0;
+ }
+ if ((opt->flags & CEPH_OPT_FSID) &&
+ ceph_fsid_compare(&opt->fsid, &other->client->fsid)) {
+ dout("fsid doesn't match\n");
+ return 0;
+ }
+ if (fsopt->sb_flags != other->mount_options->sb_flags) {
+ dout("flags differ\n");
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * construct our own bdi so we can control readahead, etc.
+ */
+static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
+
+static int ceph_register_bdi(struct super_block *sb,
+ struct ceph_fs_client *fsc)
+{
+ int err;
+
+ /* set ra_pages based on rasize mount option? */
+ if (fsc->mount_options->rasize >= PAGE_CACHE_SIZE)
+ fsc->backing_dev_info.ra_pages =
+ (fsc->mount_options->rasize + PAGE_CACHE_SIZE - 1)
+ >> PAGE_SHIFT;
+ else
+ fsc->backing_dev_info.ra_pages =
+ default_backing_dev_info.ra_pages;
+
+ err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld",
+ atomic_long_inc_return(&bdi_seq));
+ if (!err)
+ sb->s_bdi = &fsc->backing_dev_info;
+ return err;
+}
+
+static struct dentry *ceph_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
+{
+ struct super_block *sb;
+ struct ceph_fs_client *fsc;
+ struct dentry *res;
+ int err;
+ int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
+ const char *path = NULL;
+ struct ceph_mount_options *fsopt = NULL;
+ struct ceph_options *opt = NULL;
+
+ dout("ceph_mount\n");
+
+#ifdef CONFIG_CEPH_FS_POSIX_ACL
+ flags |= MS_POSIXACL;
+#endif
+ err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
+ if (err < 0) {
+ res = ERR_PTR(err);
+ goto out_final;
+ }
+
+ /* create client (which we may/may not use) */
+ fsc = create_fs_client(fsopt, opt);
+ if (IS_ERR(fsc)) {
+ res = ERR_CAST(fsc);
+ destroy_mount_options(fsopt);
+ ceph_destroy_options(opt);
+ goto out_final;
+ }
+
+ err = ceph_mdsc_init(fsc);
+ if (err < 0) {
+ res = ERR_PTR(err);
+ goto out;
+ }
+
+ if (ceph_test_opt(fsc->client, NOSHARE))
+ compare_super = NULL;
+ sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc);
+ if (IS_ERR(sb)) {
+ res = ERR_CAST(sb);
+ goto out;
+ }
+
+ if (ceph_sb_to_client(sb) != fsc) {
+ ceph_mdsc_destroy(fsc);
+ destroy_fs_client(fsc);
+ fsc = ceph_sb_to_client(sb);
+ dout("get_sb got existing client %p\n", fsc);
+ } else {
+ dout("get_sb using new client %p\n", fsc);
+ err = ceph_register_bdi(sb, fsc);
+ if (err < 0) {
+ res = ERR_PTR(err);
+ goto out_splat;
+ }
+ }
+
+ res = ceph_real_mount(fsc, path);
+ if (IS_ERR(res))
+ goto out_splat;
+ dout("root %p inode %p ino %llx.%llx\n", res,
+ res->d_inode, ceph_vinop(res->d_inode));
+ return res;
+
+out_splat:
+ ceph_mdsc_close_sessions(fsc->mdsc);
+ deactivate_locked_super(sb);
+ goto out_final;
+
+out:
+ ceph_mdsc_destroy(fsc);
+ destroy_fs_client(fsc);
+out_final:
+ dout("ceph_mount fail %ld\n", PTR_ERR(res));
+ return res;
+}
+
+static void ceph_kill_sb(struct super_block *s)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(s);
+ dout("kill_sb %p\n", s);
+ ceph_mdsc_pre_umount(fsc->mdsc);
+ kill_anon_super(s); /* will call put_super after sb is r/o */
+ ceph_mdsc_destroy(fsc);
+ destroy_fs_client(fsc);
+}
+
+static struct file_system_type ceph_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "ceph",
+ .mount = ceph_mount,
+ .kill_sb = ceph_kill_sb,
+ .fs_flags = FS_RENAME_DOES_D_MOVE,
+};
+MODULE_ALIAS_FS("ceph");
+
+#define _STRINGIFY(x) #x
+#define STRINGIFY(x) _STRINGIFY(x)
+
+static int __init init_ceph(void)
+{
+ int ret = init_caches();
+ if (ret)
+ goto out;
+
+ ceph_flock_init();
+ ceph_xattr_init();
+ ret = register_filesystem(&ceph_fs_type);
+ if (ret)
+ goto out_icache;
+
+ pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
+
+ return 0;
+
+out_icache:
+ ceph_xattr_exit();
+ destroy_caches();
+out:
+ return ret;
+}
+
+static void __exit exit_ceph(void)
+{
+ dout("exit_ceph\n");
+ unregister_filesystem(&ceph_fs_type);
+ ceph_xattr_exit();
+ destroy_caches();
+}
+
+module_init(init_ceph);
+module_exit(exit_ceph);
+
+MODULE_AUTHOR("Sage Weil <sage at newdream.net>");
+MODULE_AUTHOR("Yehuda Sadeh <yehuda at hq.newdream.net>");
+MODULE_AUTHOR("Patience Warnick <patience at newdream.net>");
+MODULE_DESCRIPTION("Ceph filesystem for Linux");
+MODULE_LICENSE("GPL");
diff --git a/ceph/super.h b/ceph/super.h
new file mode 100644
index 0000000..ead05cc
--- /dev/null
+++ b/ceph/super.h
@@ -0,0 +1,890 @@
+#ifndef _FS_CEPH_SUPER_H
+#define _FS_CEPH_SUPER_H
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <asm/unaligned.h>
+#include <linux/backing-dev.h>
+#include <linux/completion.h>
+#include <linux/exportfs.h>
+#include <linux/fs.h>
+#include <linux/mempool.h>
+#include <linux/pagemap.h>
+#include <linux/wait.h>
+#include <linux/writeback.h>
+#include <linux/slab.h>
+#include <linux/posix_acl.h>
+
+#include <linux/ceph/libceph.h>
+
+#ifdef CONFIG_CEPH_FSCACHE
+#include <linux/fscache.h>
+#endif
+
+/* f_type in struct statfs */
+#define CEPH_SUPER_MAGIC 0x00c36400
+
+/* large granularity for statfs utilization stats to facilitate
+ * large volume sizes on 32-bit machines. */
+#define CEPH_BLOCK_SHIFT 22 /* 4 MB */
+#define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT)
+
+#define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */
+#define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */
+#define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */
+#define CEPH_MOUNT_OPT_INO32 (1<<8) /* 32 bit inos */
+#define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */
+#define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */
+
+#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES)
+
+#define ceph_set_mount_opt(fsc, opt) \
+ (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
+#define ceph_test_mount_opt(fsc, opt) \
+ (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
+
+#define CEPH_RSIZE_DEFAULT 0 /* max read size */
+#define CEPH_RASIZE_DEFAULT (8192*1024) /* readahead */
+#define CEPH_MAX_READDIR_DEFAULT 1024
+#define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024)
+#define CEPH_SNAPDIRNAME_DEFAULT ".snap"
+
+struct ceph_mount_options {
+ int flags;
+ int sb_flags;
+
+ int wsize; /* max write size */
+ int rsize; /* max read size */
+ int rasize; /* max readahead */
+ int congestion_kb; /* max writeback in flight */
+ int caps_wanted_delay_min, caps_wanted_delay_max;
+ int cap_release_safety;
+ int max_readdir; /* max readdir result (entires) */
+ int max_readdir_bytes; /* max readdir result (bytes) */
+
+ /*
+ * everything above this point can be memcmp'd; everything below
+ * is handled in compare_mount_options()
+ */
+
+ char *snapdir_name; /* default ".snap" */
+};
+
+struct ceph_fs_client {
+ struct super_block *sb;
+
+ struct ceph_mount_options *mount_options;
+ struct ceph_client *client;
+
+ unsigned long mount_state;
+ int min_caps; /* min caps i added */
+
+ struct ceph_mds_client *mdsc;
+
+ /* writeback */
+ mempool_t *wb_pagevec_pool;
+ struct workqueue_struct *wb_wq;
+ struct workqueue_struct *pg_inv_wq;
+ struct workqueue_struct *trunc_wq;
+ atomic_long_t writeback_count;
+
+ struct backing_dev_info backing_dev_info;
+
+#ifdef CONFIG_DEBUG_FS
+ struct dentry *debugfs_dentry_lru, *debugfs_caps;
+ struct dentry *debugfs_congestion_kb;
+ struct dentry *debugfs_bdi;
+ struct dentry *debugfs_mdsc, *debugfs_mdsmap;
+#endif
+
+#ifdef CONFIG_CEPH_FSCACHE
+ struct fscache_cookie *fscache;
+ struct workqueue_struct *revalidate_wq;
+#endif
+};
+
+
+/*
+ * File i/o capability. This tracks shared state with the metadata
+ * server that allows us to cache or writeback attributes or to read
+ * and write data. For any given inode, we should have one or more
+ * capabilities, one issued by each metadata server, and our
+ * cumulative access is the OR of all issued capabilities.
+ *
+ * Each cap is referenced by the inode's i_caps rbtree and by per-mds
+ * session capability lists.
+ */
+struct ceph_cap {
+ struct ceph_inode_info *ci;
+ struct rb_node ci_node; /* per-ci cap tree */
+ struct ceph_mds_session *session;
+ struct list_head session_caps; /* per-session caplist */
+ int mds;
+ u64 cap_id; /* unique cap id (mds provided) */
+ int issued; /* latest, from the mds */
+ int implemented; /* implemented superset of issued (for revocation) */
+ int mds_wanted;
+ u32 seq, issue_seq, mseq;
+ u32 cap_gen; /* active/stale cycle */
+ unsigned long last_used;
+ struct list_head caps_item;
+};
+
+#define CHECK_CAPS_NODELAY 1 /* do not delay any further */
+#define CHECK_CAPS_AUTHONLY 2 /* only check auth cap */
+#define CHECK_CAPS_FLUSH 4 /* flush any dirty caps */
+
+/*
+ * Snapped cap state that is pending flush to mds. When a snapshot occurs,
+ * we first complete any in-process sync writes and writeback any dirty
+ * data before flushing the snapped state (tracked here) back to the MDS.
+ */
+struct ceph_cap_snap {
+ atomic_t nref;
+ struct ceph_inode_info *ci;
+ struct list_head ci_item, flushing_item;
+
+ u64 follows, flush_tid;
+ int issued, dirty;
+ struct ceph_snap_context *context;
+
+ umode_t mode;
+ kuid_t uid;
+ kgid_t gid;
+
+ struct ceph_buffer *xattr_blob;
+ u64 xattr_version;
+
+ u64 size;
+ struct timespec mtime, atime, ctime;
+ u64 time_warp_seq;
+ int writing; /* a sync write is still in progress */
+ int dirty_pages; /* dirty pages awaiting writeback */
+};
+
+static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
+{
+ if (atomic_dec_and_test(&capsnap->nref)) {
+ if (capsnap->xattr_blob)
+ ceph_buffer_put(capsnap->xattr_blob);
+ kfree(capsnap);
+ }
+}
+
+/*
+ * The frag tree describes how a directory is fragmented, potentially across
+ * multiple metadata servers. It is also used to indicate points where
+ * metadata authority is delegated, and whether/where metadata is replicated.
+ *
+ * A _leaf_ frag will be present in the i_fragtree IFF there is
+ * delegation info. That is, if mds >= 0 || ndist > 0.
+ */
+#define CEPH_MAX_DIRFRAG_REP 4
+
+struct ceph_inode_frag {
+ struct rb_node node;
+
+ /* fragtree state */
+ u32 frag;
+ int split_by; /* i.e. 2^(split_by) children */
+
+ /* delegation and replication info */
+ int mds; /* -1 if same authority as parent */
+ int ndist; /* >0 if replicated */
+ int dist[CEPH_MAX_DIRFRAG_REP];
+};
+
+/*
+ * We cache inode xattrs as an encoded blob until they are first used,
+ * at which point we parse them into an rbtree.
+ */
+struct ceph_inode_xattr {
+ struct rb_node node;
+
+ const char *name;
+ int name_len;
+ const char *val;
+ int val_len;
+ int dirty;
+
+ int should_free_name;
+ int should_free_val;
+};
+
+/*
+ * Ceph dentry state
+ */
+struct ceph_dentry_info {
+ struct ceph_mds_session *lease_session;
+ u32 lease_gen, lease_shared_gen;
+ u32 lease_seq;
+ unsigned long lease_renew_after, lease_renew_from;
+ struct list_head lru;
+ struct dentry *dentry;
+ u64 time;
+ u64 offset;
+};
+
+struct ceph_inode_xattrs_info {
+ /*
+ * (still encoded) xattr blob. we avoid the overhead of parsing
+ * this until someone actually calls getxattr, etc.
+ *
+ * blob->vec.iov_len == 4 implies there are no xattrs; blob ==
+ * NULL means we don't know.
+ */
+ struct ceph_buffer *blob, *prealloc_blob;
+
+ struct rb_root index;
+ bool dirty;
+ int count;
+ int names_size;
+ int vals_size;
+ u64 version, index_version;
+};
+
+/*
+ * Ceph inode.
+ */
+struct ceph_inode_info {
+ struct ceph_vino i_vino; /* ceph ino + snap */
+
+ spinlock_t i_ceph_lock;
+
+ u64 i_version;
+ u32 i_time_warp_seq;
+
+ unsigned i_ceph_flags;
+ atomic_t i_release_count;
+ atomic_t i_complete_count;
+
+ struct ceph_dir_layout i_dir_layout;
+ struct ceph_file_layout i_layout;
+ char *i_symlink;
+
+ /* for dirs */
+ struct timespec i_rctime;
+ u64 i_rbytes, i_rfiles, i_rsubdirs;
+ u64 i_files, i_subdirs;
+
+ struct rb_root i_fragtree;
+ struct mutex i_fragtree_mutex;
+
+ struct ceph_inode_xattrs_info i_xattrs;
+
+ /* capabilities. protected _both_ by i_ceph_lock and cap->session's
+ * s_mutex. */
+ struct rb_root i_caps; /* cap list */
+ struct ceph_cap *i_auth_cap; /* authoritative cap, if any */
+ unsigned i_dirty_caps, i_flushing_caps; /* mask of dirtied fields */
+ struct list_head i_dirty_item, i_flushing_item;
+ u64 i_cap_flush_seq;
+ /* we need to track cap writeback on a per-cap-bit basis, to allow
+ * overlapping, pipelined cap flushes to the mds. we can probably
+ * reduce the tid to 8 bits if we're concerned about inode size. */
+ u16 i_cap_flush_last_tid, i_cap_flush_tid[CEPH_CAP_BITS];
+ wait_queue_head_t i_cap_wq; /* threads waiting on a capability */
+ unsigned long i_hold_caps_min; /* jiffies */
+ unsigned long i_hold_caps_max; /* jiffies */
+ struct list_head i_cap_delay_list; /* for delayed cap release to mds */
+ struct ceph_cap_reservation i_cap_migration_resv;
+ struct list_head i_cap_snaps; /* snapped state pending flush to mds */
+ struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or
+ dirty|flushing caps */
+ unsigned i_snap_caps; /* cap bits for snapped files */
+ unsigned i_cap_exporting_issued;
+
+ int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */
+
+ struct mutex i_truncate_mutex;
+ u32 i_truncate_seq; /* last truncate to smaller size */
+ u64 i_truncate_size; /* and the size we last truncated down to */
+ int i_truncate_pending; /* still need to call vmtruncate */
+
+ u64 i_max_size; /* max file size authorized by mds */
+ u64 i_reported_size; /* (max_)size reported to or requested of mds */
+ u64 i_wanted_max_size; /* offset we'd like to write too */
+ u64 i_requested_max_size; /* max_size we've requested */
+
+ /* held references to caps */
+ int i_pin_ref;
+ int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
+ int i_wrbuffer_ref, i_wrbuffer_ref_head;
+ u32 i_shared_gen; /* increment each time we get FILE_SHARED */
+ u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */
+ u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
+
+ struct list_head i_unsafe_writes; /* uncommitted sync writes */
+ struct list_head i_unsafe_dirops; /* uncommitted mds dir ops */
+ spinlock_t i_unsafe_lock;
+
+ struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */
+ int i_snap_realm_counter; /* snap realm (if caps) */
+ struct list_head i_snap_realm_item;
+ struct list_head i_snap_flush_item;
+
+ struct work_struct i_wb_work; /* writeback work */
+ struct work_struct i_pg_inv_work; /* page invalidation work */
+
+ struct work_struct i_vmtruncate_work;
+
+#ifdef CONFIG_CEPH_FSCACHE
+ struct fscache_cookie *fscache;
+ u32 i_fscache_gen; /* sequence, for delayed fscache validate */
+ struct work_struct i_revalidate_work;
+#endif
+ struct inode vfs_inode; /* at end */
+};
+
+static inline struct ceph_inode_info *ceph_inode(struct inode *inode)
+{
+ return container_of(inode, struct ceph_inode_info, vfs_inode);
+}
+
+static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode)
+{
+ return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
+}
+
+static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb)
+{
+ return (struct ceph_fs_client *)sb->s_fs_info;
+}
+
+static inline struct ceph_vino ceph_vino(struct inode *inode)
+{
+ return ceph_inode(inode)->i_vino;
+}
+
+/*
+ * ino_t is <64 bits on many architectures, blech.
+ *
+ * i_ino (kernel inode) st_ino (userspace)
+ * i386 32 32
+ * x86_64+ino32 64 32
+ * x86_64 64 64
+ */
+static inline u32 ceph_ino_to_ino32(__u64 vino)
+{
+ u32 ino = vino & 0xffffffff;
+ ino ^= vino >> 32;
+ if (!ino)
+ ino = 2;
+ return ino;
+}
+
+/*
+ * kernel i_ino value
+ */
+static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
+{
+#if BITS_PER_LONG == 32
+ return ceph_ino_to_ino32(vino.ino);
+#else
+ return (ino_t)vino.ino;
+#endif
+}
+
+/*
+ * user-visible ino (stat, filldir)
+ */
+#if BITS_PER_LONG == 32
+static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
+{
+ return ino;
+}
+#else
+static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
+{
+ if (ceph_test_mount_opt(ceph_sb_to_client(sb), INO32))
+ ino = ceph_ino_to_ino32(ino);
+ return ino;
+}
+#endif
+
+
+/* for printf-style formatting */
+#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
+
+static inline u64 ceph_ino(struct inode *inode)
+{
+ return ceph_inode(inode)->i_vino.ino;
+}
+static inline u64 ceph_snap(struct inode *inode)
+{
+ return ceph_inode(inode)->i_vino.snap;
+}
+
+static inline int ceph_ino_compare(struct inode *inode, void *data)
+{
+ struct ceph_vino *pvino = (struct ceph_vino *)data;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ return ci->i_vino.ino == pvino->ino &&
+ ci->i_vino.snap == pvino->snap;
+}
+
+static inline struct inode *ceph_find_inode(struct super_block *sb,
+ struct ceph_vino vino)
+{
+ ino_t t = ceph_vino_to_ino(vino);
+ return ilookup5(sb, t, ceph_ino_compare, &vino);
+}
+
+
+/*
+ * Ceph inode.
+ */
+#define CEPH_I_NODELAY 4 /* do not delay cap release */
+#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */
+#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */
+
+static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
+ int release_count)
+{
+ atomic_set(&ci->i_complete_count, release_count);
+}
+
+static inline void __ceph_dir_clear_complete(struct ceph_inode_info *ci)
+{
+ atomic_inc(&ci->i_release_count);
+}
+
+static inline bool __ceph_dir_is_complete(struct ceph_inode_info *ci)
+{
+ return atomic_read(&ci->i_complete_count) ==
+ atomic_read(&ci->i_release_count);
+}
+
+static inline void ceph_dir_clear_complete(struct inode *inode)
+{
+ __ceph_dir_clear_complete(ceph_inode(inode));
+}
+
+static inline bool ceph_dir_is_complete(struct inode *inode)
+{
+ return __ceph_dir_is_complete(ceph_inode(inode));
+}
+
+
+/* find a specific frag @f */
+extern struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci,
+ u32 f);
+
+/*
+ * choose fragment for value @v. copy frag content to pfrag, if leaf
+ * exists
+ */
+extern u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
+ struct ceph_inode_frag *pfrag,
+ int *found);
+
+static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry)
+{
+ return (struct ceph_dentry_info *)dentry->d_fsdata;
+}
+
+static inline loff_t ceph_make_fpos(unsigned frag, unsigned off)
+{
+ return ((loff_t)frag << 32) | (loff_t)off;
+}
+
+/*
+ * caps helpers
+ */
+static inline bool __ceph_is_any_real_caps(struct ceph_inode_info *ci)
+{
+ return !RB_EMPTY_ROOT(&ci->i_caps);
+}
+
+extern int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented);
+extern int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int t);
+extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
+ struct ceph_cap *cap);
+
+static inline int ceph_caps_issued(struct ceph_inode_info *ci)
+{
+ int issued;
+ spin_lock(&ci->i_ceph_lock);
+ issued = __ceph_caps_issued(ci, NULL);
+ spin_unlock(&ci->i_ceph_lock);
+ return issued;
+}
+
+static inline int ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask,
+ int touch)
+{
+ int r;
+ spin_lock(&ci->i_ceph_lock);
+ r = __ceph_caps_issued_mask(ci, mask, touch);
+ spin_unlock(&ci->i_ceph_lock);
+ return r;
+}
+
+static inline int __ceph_caps_dirty(struct ceph_inode_info *ci)
+{
+ return ci->i_dirty_caps | ci->i_flushing_caps;
+}
+extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask);
+
+extern int __ceph_caps_revoking_other(struct ceph_inode_info *ci,
+ struct ceph_cap *ocap, int mask);
+extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask);
+extern int __ceph_caps_used(struct ceph_inode_info *ci);
+
+extern int __ceph_caps_file_wanted(struct ceph_inode_info *ci);
+
+/*
+ * wanted, by virtue of open file modes AND cap refs (buffered/cached data)
+ */
+static inline int __ceph_caps_wanted(struct ceph_inode_info *ci)
+{
+ int w = __ceph_caps_file_wanted(ci) | __ceph_caps_used(ci);
+ if (w & CEPH_CAP_FILE_BUFFER)
+ w |= CEPH_CAP_FILE_EXCL; /* we want EXCL if dirty data */
+ return w;
+}
+
+/* what the mds thinks we want */
+extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci);
+
+extern void ceph_caps_init(struct ceph_mds_client *mdsc);
+extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
+extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta);
+extern void ceph_reserve_caps(struct ceph_mds_client *mdsc,
+ struct ceph_cap_reservation *ctx, int need);
+extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
+ struct ceph_cap_reservation *ctx);
+extern void ceph_reservation_status(struct ceph_fs_client *client,
+ int *total, int *avail, int *used,
+ int *reserved, int *min);
+
+
+
+/*
+ * we keep buffered readdir results attached to file->private_data
+ */
+#define CEPH_F_SYNC 1
+#define CEPH_F_ATEND 2
+
+struct ceph_file_info {
+ short fmode; /* initialized on open */
+ short flags; /* CEPH_F_* */
+
+ /* readdir: position within the dir */
+ u32 frag;
+ struct ceph_mds_request *last_readdir;
+
+ /* readdir: position within a frag */
+ unsigned offset; /* offset of last chunk, adjusted for . and .. */
+ unsigned next_offset; /* offset of next chunk (last_name's + 1) */
+ char *last_name; /* last entry in previous chunk */
+ struct dentry *dentry; /* next dentry (for dcache readdir) */
+ int dir_release_count;
+
+ /* used for -o dirstat read() on directory thing */
+ char *dir_info;
+ int dir_info_len;
+};
+
+
+
+/*
+ * A "snap realm" describes a subset of the file hierarchy sharing
+ * the same set of snapshots that apply to it. The realms themselves
+ * are organized into a hierarchy, such that children inherit (some of)
+ * the snapshots of their parents.
+ *
+ * All inodes within the realm that have capabilities are linked into a
+ * per-realm list.
+ */
+struct ceph_snap_realm {
+ u64 ino;
+ atomic_t nref;
+ struct rb_node node;
+
+ u64 created, seq;
+ u64 parent_ino;
+ u64 parent_since; /* snapid when our current parent became so */
+
+ u64 *prior_parent_snaps; /* snaps inherited from any parents we */
+ u32 num_prior_parent_snaps; /* had prior to parent_since */
+ u64 *snaps; /* snaps specific to this realm */
+ u32 num_snaps;
+
+ struct ceph_snap_realm *parent;
+ struct list_head children; /* list of child realms */
+ struct list_head child_item;
+
+ struct list_head empty_item; /* if i have ref==0 */
+
+ struct list_head dirty_item; /* if realm needs new context */
+
+ /* the current set of snaps for this realm */
+ struct ceph_snap_context *cached_context;
+
+ struct list_head inodes_with_caps;
+ spinlock_t inodes_with_caps_lock;
+};
+
+static inline int default_congestion_kb(void)
+{
+ int congestion_kb;
+
+ /*
+ * Copied from NFS
+ *
+ * congestion size, scale with available memory.
+ *
+ * 64MB: 8192k
+ * 128MB: 11585k
+ * 256MB: 16384k
+ * 512MB: 23170k
+ * 1GB: 32768k
+ * 2GB: 46340k
+ * 4GB: 65536k
+ * 8GB: 92681k
+ * 16GB: 131072k
+ *
+ * This allows larger machines to have larger/more transfers.
+ * Limit the default to 256M
+ */
+ congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
+ if (congestion_kb > 256*1024)
+ congestion_kb = 256*1024;
+
+ return congestion_kb;
+}
+
+
+
+/* snap.c */
+struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc,
+ u64 ino);
+extern void ceph_get_snap_realm(struct ceph_mds_client *mdsc,
+ struct ceph_snap_realm *realm);
+extern void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
+ struct ceph_snap_realm *realm);
+extern int ceph_update_snap_trace(struct ceph_mds_client *m,
+ void *p, void *e, bool deletion);
+extern void ceph_handle_snap(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session,
+ struct ceph_msg *msg);
+extern void ceph_queue_cap_snap(struct ceph_inode_info *ci);
+extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
+ struct ceph_cap_snap *capsnap);
+extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
+
+/*
+ * a cap_snap is "pending" if it is still awaiting an in-progress
+ * sync write (that may/may not still update size, mtime, etc.).
+ */
+static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci)
+{
+ return !list_empty(&ci->i_cap_snaps) &&
+ list_entry(ci->i_cap_snaps.prev, struct ceph_cap_snap,
+ ci_item)->writing;
+}
+
+/* inode.c */
+extern const struct inode_operations ceph_file_iops;
+
+extern struct inode *ceph_alloc_inode(struct super_block *sb);
+extern void ceph_destroy_inode(struct inode *inode);
+extern int ceph_drop_inode(struct inode *inode);
+
+extern struct inode *ceph_get_inode(struct super_block *sb,
+ struct ceph_vino vino);
+extern struct inode *ceph_get_snapdir(struct inode *parent);
+extern int ceph_fill_file_size(struct inode *inode, int issued,
+ u32 truncate_seq, u64 truncate_size, u64 size);
+extern void ceph_fill_file_time(struct inode *inode, int issued,
+ u64 time_warp_seq, struct timespec *ctime,
+ struct timespec *mtime, struct timespec *atime);
+extern int ceph_fill_trace(struct super_block *sb,
+ struct ceph_mds_request *req,
+ struct ceph_mds_session *session);
+extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
+ struct ceph_mds_session *session);
+
+extern int ceph_inode_holds_cap(struct inode *inode, int mask);
+
+extern int ceph_inode_set_size(struct inode *inode, loff_t size);
+extern void __ceph_do_pending_vmtruncate(struct inode *inode);
+extern void ceph_queue_vmtruncate(struct inode *inode);
+
+extern void ceph_queue_invalidate(struct inode *inode);
+extern void ceph_queue_writeback(struct inode *inode);
+
+extern int ceph_do_getattr(struct inode *inode, int mask);
+extern int ceph_permission(struct inode *inode, int mask);
+extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
+extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat);
+
+/* xattr.c */
+extern int ceph_setxattr(struct dentry *, const char *, const void *,
+ size_t, int);
+int __ceph_setxattr(struct dentry *, const char *, const void *, size_t, int);
+ssize_t __ceph_getxattr(struct inode *, const char *, void *, size_t);
+int __ceph_removexattr(struct dentry *, const char *);
+extern ssize_t ceph_getxattr(struct dentry *, const char *, void *, size_t);
+extern ssize_t ceph_listxattr(struct dentry *, char *, size_t);
+extern int ceph_removexattr(struct dentry *, const char *);
+extern void __ceph_build_xattrs_blob(struct ceph_inode_info *ci);
+extern void __ceph_destroy_xattrs(struct ceph_inode_info *ci);
+extern void __init ceph_xattr_init(void);
+extern void ceph_xattr_exit(void);
+
+/* acl.c */
+extern const struct xattr_handler *ceph_xattr_handlers[];
+
+#ifdef CONFIG_CEPH_FS_POSIX_ACL
+
+struct posix_acl *ceph_get_acl(struct inode *, int);
+int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+int ceph_init_acl(struct dentry *, struct inode *, struct inode *);
+
+static inline void ceph_forget_all_cached_acls(struct inode *inode)
+{
+ forget_all_cached_acls(inode);
+}
+
+#else
+
+#define ceph_get_acl NULL
+#define ceph_set_acl NULL
+
+static inline int ceph_init_acl(struct dentry *dentry, struct inode *inode,
+ struct inode *dir)
+{
+ return 0;
+}
+
+static inline int ceph_acl_chmod(struct dentry *dentry, struct inode *inode)
+{
+ return 0;
+}
+
+static inline void ceph_forget_all_cached_acls(struct inode *inode)
+{
+}
+
+#endif
+
+/* caps.c */
+extern const char *ceph_cap_string(int c);
+extern void ceph_handle_caps(struct ceph_mds_session *session,
+ struct ceph_msg *msg);
+extern int ceph_add_cap(struct inode *inode,
+ struct ceph_mds_session *session, u64 cap_id,
+ int fmode, unsigned issued, unsigned wanted,
+ unsigned cap, unsigned seq, u64 realmino, int flags,
+ struct ceph_cap_reservation *caps_reservation);
+extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
+extern void ceph_put_cap(struct ceph_mds_client *mdsc,
+ struct ceph_cap *cap);
+extern int ceph_is_any_caps(struct inode *inode);
+
+extern void __queue_cap_release(struct ceph_mds_session *session, u64 ino,
+ u64 cap_id, u32 migrate_seq, u32 issue_seq);
+extern void ceph_queue_caps_release(struct inode *inode);
+extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc);
+extern int ceph_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync);
+extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session);
+extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,
+ int mds);
+extern int ceph_get_cap_mds(struct inode *inode);
+extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
+extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
+extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
+ struct ceph_snap_context *snapc);
+extern void __ceph_flush_snaps(struct ceph_inode_info *ci,
+ struct ceph_mds_session **psession,
+ int again);
+extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
+ struct ceph_mds_session *session);
+extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
+extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
+
+extern int ceph_encode_inode_release(void **p, struct inode *inode,
+ int mds, int drop, int unless, int force);
+extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
+ int mds, int drop, int unless);
+
+extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
+ int *got, loff_t endoff);
+
+/* for counting open files by mode */
+static inline void __ceph_get_fmode(struct ceph_inode_info *ci, int mode)
+{
+ ci->i_nr_by_mode[mode]++;
+}
+extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode);
+
+/* addr.c */
+extern const struct address_space_operations ceph_aops;
+extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
+
+/* file.c */
+extern const struct file_operations ceph_file_fops;
+extern const struct address_space_operations ceph_aops;
+
+extern int ceph_open(struct inode *inode, struct file *file);
+extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
+ struct file *file, unsigned flags, umode_t mode,
+ int *opened);
+extern int ceph_release(struct inode *inode, struct file *filp);
+
+/* dir.c */
+extern const struct file_operations ceph_dir_fops;
+extern const struct inode_operations ceph_dir_iops;
+extern const struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops,
+ ceph_snapdir_dentry_ops;
+
+extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry);
+extern int ceph_handle_snapdir(struct ceph_mds_request *req,
+ struct dentry *dentry, int err);
+extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
+ struct dentry *dentry, int err);
+
+extern void ceph_dentry_lru_add(struct dentry *dn);
+extern void ceph_dentry_lru_touch(struct dentry *dn);
+extern void ceph_dentry_lru_del(struct dentry *dn);
+extern void ceph_invalidate_dentry_lease(struct dentry *dentry);
+extern unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn);
+extern struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry);
+
+/*
+ * our d_ops vary depending on whether the inode is live,
+ * snapshotted (read-only), or a virtual ".snap" directory.
+ */
+int ceph_init_dentry(struct dentry *dentry);
+
+
+/* ioctl.c */
+extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+
+/* export.c */
+extern const struct export_operations ceph_export_ops;
+
+/* locks.c */
+extern __init void ceph_flock_init(void);
+extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl);
+extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl);
+extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num);
+extern int ceph_encode_locks_to_buffer(struct inode *inode,
+ struct ceph_filelock *flocks,
+ int num_fcntl_locks,
+ int num_flock_locks);
+extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
+ struct ceph_pagelist *pagelist,
+ int num_fcntl_locks, int num_flock_locks);
+extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c);
+
+/* debugfs.c */
+extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
+extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client);
+
+#endif /* _FS_CEPH_SUPER_H */
diff --git a/ceph/xattr.c b/ceph/xattr.c
new file mode 100644
index 0000000..c9c2b88
--- /dev/null
+++ b/ceph/xattr.c
@@ -0,0 +1,1128 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include "super.h"
+#include "mds_client.h"
+
+#include <linux/ceph/decode.h>
+
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/slab.h>
+
+#define XATTR_CEPH_PREFIX "ceph."
+#define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
+
+static int __remove_xattr(struct ceph_inode_info *ci,
+ struct ceph_inode_xattr *xattr);
+
+/*
+ * List of handlers for synthetic system.* attributes. Other
+ * attributes are handled directly.
+ */
+const struct xattr_handler *ceph_xattr_handlers[] = {
+#ifdef CONFIG_CEPH_FS_POSIX_ACL
+ &posix_acl_access_xattr_handler,
+ &posix_acl_default_xattr_handler,
+#endif
+ NULL,
+};
+
+static bool ceph_is_valid_xattr(const char *name)
+{
+ return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
+ !strncmp(name, XATTR_SECURITY_PREFIX,
+ XATTR_SECURITY_PREFIX_LEN) ||
+ !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
+ !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
+ !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
+}
+
+/*
+ * These define virtual xattrs exposing the recursive directory
+ * statistics and layout metadata.
+ */
+struct ceph_vxattr {
+ char *name;
+ size_t name_size; /* strlen(name) + 1 (for '\0') */
+ size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
+ size_t size);
+ bool readonly, hidden;
+ bool (*exists_cb)(struct ceph_inode_info *ci);
+};
+
+/* layouts */
+
+static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
+{
+ size_t s;
+ char *p = (char *)&ci->i_layout;
+
+ for (s = 0; s < sizeof(ci->i_layout); s++, p++)
+ if (*p)
+ return true;
+ return false;
+}
+
+static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
+ size_t size)
+{
+ int ret;
+ struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+ struct ceph_osd_client *osdc = &fsc->client->osdc;
+ s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
+ const char *pool_name;
+ char buf[128];
+
+ dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
+ down_read(&osdc->map_sem);
+ pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
+ if (pool_name) {
+ size_t len = strlen(pool_name);
+ ret = snprintf(buf, sizeof(buf),
+ "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=",
+ (unsigned long long)ceph_file_layout_su(ci->i_layout),
+ (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
+ (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
+ if (!size) {
+ ret += len;
+ } else if (ret + len > size) {
+ ret = -ERANGE;
+ } else {
+ memcpy(val, buf, ret);
+ memcpy(val + ret, pool_name, len);
+ ret += len;
+ }
+ } else {
+ ret = snprintf(buf, sizeof(buf),
+ "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld",
+ (unsigned long long)ceph_file_layout_su(ci->i_layout),
+ (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
+ (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
+ (unsigned long long)pool);
+ if (size) {
+ if (ret <= size)
+ memcpy(val, buf, ret);
+ else
+ ret = -ERANGE;
+ }
+ }
+ up_read(&osdc->map_sem);
+ return ret;
+}
+
+static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
+ char *val, size_t size)
+{
+ return snprintf(val, size, "%lld",
+ (unsigned long long)ceph_file_layout_su(ci->i_layout));
+}
+
+static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
+ char *val, size_t size)
+{
+ return snprintf(val, size, "%lld",
+ (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout));
+}
+
+static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
+ char *val, size_t size)
+{
+ return snprintf(val, size, "%lld",
+ (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
+}
+
+static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
+ char *val, size_t size)
+{
+ int ret;
+ struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+ struct ceph_osd_client *osdc = &fsc->client->osdc;
+ s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
+ const char *pool_name;
+
+ down_read(&osdc->map_sem);
+ pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
+ if (pool_name)
+ ret = snprintf(val, size, "%s", pool_name);
+ else
+ ret = snprintf(val, size, "%lld", (unsigned long long)pool);
+ up_read(&osdc->map_sem);
+ return ret;
+}
+
+/* directories */
+
+static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
+ size_t size)
+{
+ return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs);
+}
+
+static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
+ size_t size)
+{
+ return snprintf(val, size, "%lld", ci->i_files);
+}
+
+static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
+ size_t size)
+{
+ return snprintf(val, size, "%lld", ci->i_subdirs);
+}
+
+static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
+ size_t size)
+{
+ return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs);
+}
+
+static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
+ size_t size)
+{
+ return snprintf(val, size, "%lld", ci->i_rfiles);
+}
+
+static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
+ size_t size)
+{
+ return snprintf(val, size, "%lld", ci->i_rsubdirs);
+}
+
+static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
+ size_t size)
+{
+ return snprintf(val, size, "%lld", ci->i_rbytes);
+}
+
+static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
+ size_t size)
+{
+ return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec,
+ (long)ci->i_rctime.tv_nsec);
+}
+
+
+#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
+#define CEPH_XATTR_NAME2(_type, _name, _name2) \
+ XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
+
+#define XATTR_NAME_CEPH(_type, _name) \
+ { \
+ .name = CEPH_XATTR_NAME(_type, _name), \
+ .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
+ .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
+ .readonly = true, \
+ .hidden = false, \
+ .exists_cb = NULL, \
+ }
+#define XATTR_LAYOUT_FIELD(_type, _name, _field) \
+ { \
+ .name = CEPH_XATTR_NAME2(_type, _name, _field), \
+ .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
+ .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
+ .readonly = false, \
+ .hidden = true, \
+ .exists_cb = ceph_vxattrcb_layout_exists, \
+ }
+
+static struct ceph_vxattr ceph_dir_vxattrs[] = {
+ {
+ .name = "ceph.dir.layout",
+ .name_size = sizeof("ceph.dir.layout"),
+ .getxattr_cb = ceph_vxattrcb_layout,
+ .readonly = false,
+ .hidden = true,
+ .exists_cb = ceph_vxattrcb_layout_exists,
+ },
+ XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
+ XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
+ XATTR_LAYOUT_FIELD(dir, layout, object_size),
+ XATTR_LAYOUT_FIELD(dir, layout, pool),
+ XATTR_NAME_CEPH(dir, entries),
+ XATTR_NAME_CEPH(dir, files),
+ XATTR_NAME_CEPH(dir, subdirs),
+ XATTR_NAME_CEPH(dir, rentries),
+ XATTR_NAME_CEPH(dir, rfiles),
+ XATTR_NAME_CEPH(dir, rsubdirs),
+ XATTR_NAME_CEPH(dir, rbytes),
+ XATTR_NAME_CEPH(dir, rctime),
+ { .name = NULL, 0 } /* Required table terminator */
+};
+static size_t ceph_dir_vxattrs_name_size; /* total size of all names */
+
+/* files */
+
+static struct ceph_vxattr ceph_file_vxattrs[] = {
+ {
+ .name = "ceph.file.layout",
+ .name_size = sizeof("ceph.file.layout"),
+ .getxattr_cb = ceph_vxattrcb_layout,
+ .readonly = false,
+ .hidden = true,
+ .exists_cb = ceph_vxattrcb_layout_exists,
+ },
+ XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
+ XATTR_LAYOUT_FIELD(file, layout, stripe_count),
+ XATTR_LAYOUT_FIELD(file, layout, object_size),
+ XATTR_LAYOUT_FIELD(file, layout, pool),
+ { .name = NULL, 0 } /* Required table terminator */
+};
+static size_t ceph_file_vxattrs_name_size; /* total size of all names */
+
+static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
+{
+ if (S_ISDIR(inode->i_mode))
+ return ceph_dir_vxattrs;
+ else if (S_ISREG(inode->i_mode))
+ return ceph_file_vxattrs;
+ return NULL;
+}
+
+static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs)
+{
+ if (vxattrs == ceph_dir_vxattrs)
+ return ceph_dir_vxattrs_name_size;
+ if (vxattrs == ceph_file_vxattrs)
+ return ceph_file_vxattrs_name_size;
+ BUG();
+
+ return 0;
+}
+
+/*
+ * Compute the aggregate size (including terminating '\0') of all
+ * virtual extended attribute names in the given vxattr table.
+ */
+static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
+{
+ struct ceph_vxattr *vxattr;
+ size_t size = 0;
+
+ for (vxattr = vxattrs; vxattr->name; vxattr++)
+ if (!vxattr->hidden)
+ size += vxattr->name_size;
+
+ return size;
+}
+
+/* Routines called at initialization and exit time */
+
+void __init ceph_xattr_init(void)
+{
+ ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs);
+ ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs);
+}
+
+void ceph_xattr_exit(void)
+{
+ ceph_dir_vxattrs_name_size = 0;
+ ceph_file_vxattrs_name_size = 0;
+}
+
+static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
+ const char *name)
+{
+ struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
+
+ if (vxattr) {
+ while (vxattr->name) {
+ if (!strcmp(vxattr->name, name))
+ return vxattr;
+ vxattr++;
+ }
+ }
+
+ return NULL;
+}
+
+static int __set_xattr(struct ceph_inode_info *ci,
+ const char *name, int name_len,
+ const char *val, int val_len,
+ int flags, int update_xattr,
+ struct ceph_inode_xattr **newxattr)
+{
+ struct rb_node **p;
+ struct rb_node *parent = NULL;
+ struct ceph_inode_xattr *xattr = NULL;
+ int c;
+ int new = 0;
+
+ p = &ci->i_xattrs.index.rb_node;
+ while (*p) {
+ parent = *p;
+ xattr = rb_entry(parent, struct ceph_inode_xattr, node);
+ c = strncmp(name, xattr->name, min(name_len, xattr->name_len));
+ if (c < 0)
+ p = &(*p)->rb_left;
+ else if (c > 0)
+ p = &(*p)->rb_right;
+ else {
+ if (name_len == xattr->name_len)
+ break;
+ else if (name_len < xattr->name_len)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+ xattr = NULL;
+ }
+
+ if (update_xattr) {
+ int err = 0;
+ if (xattr && (flags & XATTR_CREATE))
+ err = -EEXIST;
+ else if (!xattr && (flags & XATTR_REPLACE))
+ err = -ENODATA;
+ if (err) {
+ kfree(name);
+ kfree(val);
+ return err;
+ }
+ if (update_xattr < 0) {
+ if (xattr)
+ __remove_xattr(ci, xattr);
+ kfree(name);
+ return 0;
+ }
+ }
+
+ if (!xattr) {
+ new = 1;
+ xattr = *newxattr;
+ xattr->name = name;
+ xattr->name_len = name_len;
+ xattr->should_free_name = update_xattr;
+
+ ci->i_xattrs.count++;
+ dout("__set_xattr count=%d\n", ci->i_xattrs.count);
+ } else {
+ kfree(*newxattr);
+ *newxattr = NULL;
+ if (xattr->should_free_val)
+ kfree((void *)xattr->val);
+
+ if (update_xattr) {
+ kfree((void *)name);
+ name = xattr->name;
+ }
+ ci->i_xattrs.names_size -= xattr->name_len;
+ ci->i_xattrs.vals_size -= xattr->val_len;
+ }
+ ci->i_xattrs.names_size += name_len;
+ ci->i_xattrs.vals_size += val_len;
+ if (val)
+ xattr->val = val;
+ else
+ xattr->val = "";
+
+ xattr->val_len = val_len;
+ xattr->dirty = update_xattr;
+ xattr->should_free_val = (val && update_xattr);
+
+ if (new) {
+ rb_link_node(&xattr->node, parent, p);
+ rb_insert_color(&xattr->node, &ci->i_xattrs.index);
+ dout("__set_xattr_val p=%p\n", p);
+ }
+
+ dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n",
+ ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val);
+
+ return 0;
+}
+
+static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
+ const char *name)
+{
+ struct rb_node **p;
+ struct rb_node *parent = NULL;
+ struct ceph_inode_xattr *xattr = NULL;
+ int name_len = strlen(name);
+ int c;
+
+ p = &ci->i_xattrs.index.rb_node;
+ while (*p) {
+ parent = *p;
+ xattr = rb_entry(parent, struct ceph_inode_xattr, node);
+ c = strncmp(name, xattr->name, xattr->name_len);
+ if (c == 0 && name_len > xattr->name_len)
+ c = 1;
+ if (c < 0)
+ p = &(*p)->rb_left;
+ else if (c > 0)
+ p = &(*p)->rb_right;
+ else {
+ dout("__get_xattr %s: found %.*s\n", name,
+ xattr->val_len, xattr->val);
+ return xattr;
+ }
+ }
+
+ dout("__get_xattr %s: not found\n", name);
+
+ return NULL;
+}
+
+static void __free_xattr(struct ceph_inode_xattr *xattr)
+{
+ BUG_ON(!xattr);
+
+ if (xattr->should_free_name)
+ kfree((void *)xattr->name);
+ if (xattr->should_free_val)
+ kfree((void *)xattr->val);
+
+ kfree(xattr);
+}
+
+static int __remove_xattr(struct ceph_inode_info *ci,
+ struct ceph_inode_xattr *xattr)
+{
+ if (!xattr)
+ return -ENODATA;
+
+ rb_erase(&xattr->node, &ci->i_xattrs.index);
+
+ if (xattr->should_free_name)
+ kfree((void *)xattr->name);
+ if (xattr->should_free_val)
+ kfree((void *)xattr->val);
+
+ ci->i_xattrs.names_size -= xattr->name_len;
+ ci->i_xattrs.vals_size -= xattr->val_len;
+ ci->i_xattrs.count--;
+ kfree(xattr);
+
+ return 0;
+}
+
+static int __remove_xattr_by_name(struct ceph_inode_info *ci,
+ const char *name)
+{
+ struct rb_node **p;
+ struct ceph_inode_xattr *xattr;
+ int err;
+
+ p = &ci->i_xattrs.index.rb_node;
+ xattr = __get_xattr(ci, name);
+ err = __remove_xattr(ci, xattr);
+ return err;
+}
+
+static char *__copy_xattr_names(struct ceph_inode_info *ci,
+ char *dest)
+{
+ struct rb_node *p;
+ struct ceph_inode_xattr *xattr = NULL;
+
+ p = rb_first(&ci->i_xattrs.index);
+ dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count);
+
+ while (p) {
+ xattr = rb_entry(p, struct ceph_inode_xattr, node);
+ memcpy(dest, xattr->name, xattr->name_len);
+ dest[xattr->name_len] = '\0';
+
+ dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
+ xattr->name_len, ci->i_xattrs.names_size);
+
+ dest += xattr->name_len + 1;
+ p = rb_next(p);
+ }
+
+ return dest;
+}
+
+void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
+{
+ struct rb_node *p, *tmp;
+ struct ceph_inode_xattr *xattr = NULL;
+
+ p = rb_first(&ci->i_xattrs.index);
+
+ dout("__ceph_destroy_xattrs p=%p\n", p);
+
+ while (p) {
+ xattr = rb_entry(p, struct ceph_inode_xattr, node);
+ tmp = p;
+ p = rb_next(tmp);
+ dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p,
+ xattr->name_len, xattr->name);
+ rb_erase(tmp, &ci->i_xattrs.index);
+
+ __free_xattr(xattr);
+ }
+
+ ci->i_xattrs.names_size = 0;
+ ci->i_xattrs.vals_size = 0;
+ ci->i_xattrs.index_version = 0;
+ ci->i_xattrs.count = 0;
+ ci->i_xattrs.index = RB_ROOT;
+}
+
+static int __build_xattrs(struct inode *inode)
+ __releases(ci->i_ceph_lock)
+ __acquires(ci->i_ceph_lock)
+{
+ u32 namelen;
+ u32 numattr = 0;
+ void *p, *end;
+ u32 len;
+ const char *name, *val;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int xattr_version;
+ struct ceph_inode_xattr **xattrs = NULL;
+ int err = 0;
+ int i;
+
+ dout("__build_xattrs() len=%d\n",
+ ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
+
+ if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
+ return 0; /* already built */
+
+ __ceph_destroy_xattrs(ci);
+
+start:
+ /* updated internal xattr rb tree */
+ if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) {
+ p = ci->i_xattrs.blob->vec.iov_base;
+ end = p + ci->i_xattrs.blob->vec.iov_len;
+ ceph_decode_32_safe(&p, end, numattr, bad);
+ xattr_version = ci->i_xattrs.version;
+ spin_unlock(&ci->i_ceph_lock);
+
+ xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *),
+ GFP_NOFS);
+ err = -ENOMEM;
+ if (!xattrs)
+ goto bad_lock;
+ memset(xattrs, 0, numattr*sizeof(struct ceph_xattr *));
+ for (i = 0; i < numattr; i++) {
+ xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
+ GFP_NOFS);
+ if (!xattrs[i])
+ goto bad_lock;
+ }
+
+ spin_lock(&ci->i_ceph_lock);
+ if (ci->i_xattrs.version != xattr_version) {
+ /* lost a race, retry */
+ for (i = 0; i < numattr; i++)
+ kfree(xattrs[i]);
+ kfree(xattrs);
+ xattrs = NULL;
+ goto start;
+ }
+ err = -EIO;
+ while (numattr--) {
+ ceph_decode_32_safe(&p, end, len, bad);
+ namelen = len;
+ name = p;
+ p += len;
+ ceph_decode_32_safe(&p, end, len, bad);
+ val = p;
+ p += len;
+
+ err = __set_xattr(ci, name, namelen, val, len,
+ 0, 0, &xattrs[numattr]);
+
+ if (err < 0)
+ goto bad;
+ }
+ kfree(xattrs);
+ }
+ ci->i_xattrs.index_version = ci->i_xattrs.version;
+ ci->i_xattrs.dirty = false;
+
+ return err;
+bad_lock:
+ spin_lock(&ci->i_ceph_lock);
+bad:
+ if (xattrs) {
+ for (i = 0; i < numattr; i++)
+ kfree(xattrs[i]);
+ kfree(xattrs);
+ }
+ ci->i_xattrs.names_size = 0;
+ return err;
+}
+
+static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
+ int val_size)
+{
+ /*
+ * 4 bytes for the length, and additional 4 bytes per each xattr name,
+ * 4 bytes per each value
+ */
+ int size = 4 + ci->i_xattrs.count*(4 + 4) +
+ ci->i_xattrs.names_size +
+ ci->i_xattrs.vals_size;
+ dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n",
+ ci->i_xattrs.count, ci->i_xattrs.names_size,
+ ci->i_xattrs.vals_size);
+
+ if (name_size)
+ size += 4 + 4 + name_size + val_size;
+
+ return size;
+}
+
+/*
+ * If there are dirty xattrs, reencode xattrs into the prealloc_blob
+ * and swap into place.
+ */
+void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
+{
+ struct rb_node *p;
+ struct ceph_inode_xattr *xattr = NULL;
+ void *dest;
+
+ dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
+ if (ci->i_xattrs.dirty) {
+ int need = __get_required_blob_size(ci, 0, 0);
+
+ BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);
+
+ p = rb_first(&ci->i_xattrs.index);
+ dest = ci->i_xattrs.prealloc_blob->vec.iov_base;
+
+ ceph_encode_32(&dest, ci->i_xattrs.count);
+ while (p) {
+ xattr = rb_entry(p, struct ceph_inode_xattr, node);
+
+ ceph_encode_32(&dest, xattr->name_len);
+ memcpy(dest, xattr->name, xattr->name_len);
+ dest += xattr->name_len;
+ ceph_encode_32(&dest, xattr->val_len);
+ memcpy(dest, xattr->val, xattr->val_len);
+ dest += xattr->val_len;
+
+ p = rb_next(p);
+ }
+
+ /* adjust buffer len; it may be larger than we need */
+ ci->i_xattrs.prealloc_blob->vec.iov_len =
+ dest - ci->i_xattrs.prealloc_blob->vec.iov_base;
+
+ if (ci->i_xattrs.blob)
+ ceph_buffer_put(ci->i_xattrs.blob);
+ ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
+ ci->i_xattrs.prealloc_blob = NULL;
+ ci->i_xattrs.dirty = false;
+ ci->i_xattrs.version++;
+ }
+}
+
+ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
+ size_t size)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int err;
+ struct ceph_inode_xattr *xattr;
+ struct ceph_vxattr *vxattr = NULL;
+
+ if (!ceph_is_valid_xattr(name))
+ return -ENODATA;
+
+ /* let's see if a virtual xattr was requested */
+ vxattr = ceph_match_vxattr(inode, name);
+ if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
+ err = vxattr->getxattr_cb(ci, value, size);
+ return err;
+ }
+
+ spin_lock(&ci->i_ceph_lock);
+ dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
+ ci->i_xattrs.version, ci->i_xattrs.index_version);
+
+ if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
+ (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
+ goto get_xattr;
+ } else {
+ spin_unlock(&ci->i_ceph_lock);
+ /* get xattrs from mds (if we don't already have them) */
+ err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
+ if (err)
+ return err;
+ }
+
+ spin_lock(&ci->i_ceph_lock);
+
+ err = __build_xattrs(inode);
+ if (err < 0)
+ goto out;
+
+get_xattr:
+ err = -ENODATA; /* == ENOATTR */
+ xattr = __get_xattr(ci, name);
+ if (!xattr)
+ goto out;
+
+ err = -ERANGE;
+ if (size && size < xattr->val_len)
+ goto out;
+
+ err = xattr->val_len;
+ if (size == 0)
+ goto out;
+
+ memcpy(value, xattr->val, xattr->val_len);
+
+out:
+ spin_unlock(&ci->i_ceph_lock);
+ return err;
+}
+
+ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
+ size_t size)
+{
+ if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+ return generic_getxattr(dentry, name, value, size);
+
+ return __ceph_getxattr(dentry->d_inode, name, value, size);
+}
+
+ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
+{
+ struct inode *inode = dentry->d_inode;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
+ u32 vir_namelen = 0;
+ u32 namelen;
+ int err;
+ u32 len;
+ int i;
+
+ spin_lock(&ci->i_ceph_lock);
+ dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
+ ci->i_xattrs.version, ci->i_xattrs.index_version);
+
+ if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
+ (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
+ goto list_xattr;
+ } else {
+ spin_unlock(&ci->i_ceph_lock);
+ err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
+ if (err)
+ return err;
+ }
+
+ spin_lock(&ci->i_ceph_lock);
+
+ err = __build_xattrs(inode);
+ if (err < 0)
+ goto out;
+
+list_xattr:
+ /*
+ * Start with virtual dir xattr names (if any) (including
+ * terminating '\0' characters for each).
+ */
+ vir_namelen = ceph_vxattrs_name_size(vxattrs);
+
+ /* adding 1 byte per each variable due to the null termination */
+ namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
+ err = -ERANGE;
+ if (size && vir_namelen + namelen > size)
+ goto out;
+
+ err = namelen + vir_namelen;
+ if (size == 0)
+ goto out;
+
+ names = __copy_xattr_names(ci, names);
+
+ /* virtual xattr names, too */
+ err = namelen;
+ if (vxattrs) {
+ for (i = 0; vxattrs[i].name; i++) {
+ if (!vxattrs[i].hidden &&
+ !(vxattrs[i].exists_cb &&
+ !vxattrs[i].exists_cb(ci))) {
+ len = sprintf(names, "%s", vxattrs[i].name);
+ names += len + 1;
+ err += len + 1;
+ }
+ }
+ }
+
+out:
+ spin_unlock(&ci->i_ceph_lock);
+ return err;
+}
+
+static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
+ const char *value, size_t size, int flags)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+ struct inode *inode = dentry->d_inode;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_mds_request *req;
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ int err;
+ int i, nr_pages;
+ struct page **pages = NULL;
+ void *kaddr;
+
+ /* copy value into some pages */
+ nr_pages = calc_pages_for(0, size);
+ if (nr_pages) {
+ pages = kmalloc(sizeof(pages[0])*nr_pages, GFP_NOFS);
+ if (!pages)
+ return -ENOMEM;
+ err = -ENOMEM;
+ for (i = 0; i < nr_pages; i++) {
+ pages[i] = __page_cache_alloc(GFP_NOFS);
+ if (!pages[i]) {
+ nr_pages = i;
+ goto out;
+ }
+ kaddr = kmap(pages[i]);
+ memcpy(kaddr, value + i*PAGE_CACHE_SIZE,
+ min(PAGE_CACHE_SIZE, size-i*PAGE_CACHE_SIZE));
+ }
+ }
+
+ dout("setxattr value=%.*s\n", (int)size, value);
+
+ if (!value)
+ flags |= CEPH_XATTR_REMOVE;
+
+ /* do request */
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETXATTR,
+ USE_AUTH_MDS);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto out;
+ }
+ req->r_inode = inode;
+ ihold(inode);
+ req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
+ req->r_num_caps = 1;
+ req->r_args.setxattr.flags = cpu_to_le32(flags);
+ req->r_path2 = kstrdup(name, GFP_NOFS);
+
+ req->r_pages = pages;
+ req->r_num_pages = nr_pages;
+ req->r_data_len = size;
+
+ dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+ ceph_mdsc_put_request(req);
+ dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
+
+out:
+ if (pages) {
+ for (i = 0; i < nr_pages; i++)
+ __free_page(pages[i]);
+ kfree(pages);
+ }
+ return err;
+}
+
+int __ceph_setxattr(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
+{
+ struct inode *inode = dentry->d_inode;
+ struct ceph_vxattr *vxattr;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int issued;
+ int err;
+ int dirty = 0;
+ int name_len = strlen(name);
+ int val_len = size;
+ char *newname = NULL;
+ char *newval = NULL;
+ struct ceph_inode_xattr *xattr = NULL;
+ int required_blob_size;
+
+ if (!ceph_is_valid_xattr(name))
+ return -EOPNOTSUPP;
+
+ vxattr = ceph_match_vxattr(inode, name);
+ if (vxattr && vxattr->readonly)
+ return -EOPNOTSUPP;
+
+ /* pass any unhandled ceph.* xattrs through to the MDS */
+ if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
+ goto do_sync_unlocked;
+
+ /* preallocate memory for xattr name, value, index node */
+ err = -ENOMEM;
+ newname = kmemdup(name, name_len + 1, GFP_NOFS);
+ if (!newname)
+ goto out;
+
+ if (val_len) {
+ newval = kmemdup(value, val_len, GFP_NOFS);
+ if (!newval)
+ goto out;
+ }
+
+ xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
+ if (!xattr)
+ goto out;
+
+ spin_lock(&ci->i_ceph_lock);
+retry:
+ issued = __ceph_caps_issued(ci, NULL);
+ dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
+ if (!(issued & CEPH_CAP_XATTR_EXCL))
+ goto do_sync;
+ __build_xattrs(inode);
+
+ required_blob_size = __get_required_blob_size(ci, name_len, val_len);
+
+ if (!ci->i_xattrs.prealloc_blob ||
+ required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
+ struct ceph_buffer *blob;
+
+ spin_unlock(&ci->i_ceph_lock);
+ dout(" preaallocating new blob size=%d\n", required_blob_size);
+ blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
+ if (!blob)
+ goto out;
+ spin_lock(&ci->i_ceph_lock);
+ if (ci->i_xattrs.prealloc_blob)
+ ceph_buffer_put(ci->i_xattrs.prealloc_blob);
+ ci->i_xattrs.prealloc_blob = blob;
+ goto retry;
+ }
+
+ err = __set_xattr(ci, newname, name_len, newval, val_len,
+ flags, value ? 1 : -1, &xattr);
+
+ if (!err) {
+ dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
+ ci->i_xattrs.dirty = true;
+ inode->i_ctime = CURRENT_TIME;
+ }
+
+ spin_unlock(&ci->i_ceph_lock);
+ if (dirty)
+ __mark_inode_dirty(inode, dirty);
+ return err;
+
+do_sync:
+ spin_unlock(&ci->i_ceph_lock);
+do_sync_unlocked:
+ err = ceph_sync_setxattr(dentry, name, value, size, flags);
+out:
+ kfree(newname);
+ kfree(newval);
+ kfree(xattr);
+ return err;
+}
+
+int ceph_setxattr(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
+{
+ if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
+ return -EROFS;
+
+ if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+ return generic_setxattr(dentry, name, value, size, flags);
+
+ return __ceph_setxattr(dentry, name, value, size, flags);
+}
+
+static int ceph_send_removexattr(struct dentry *dentry, const char *name)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct inode *inode = dentry->d_inode;
+ struct ceph_mds_request *req;
+ int err;
+
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RMXATTR,
+ USE_AUTH_MDS);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ req->r_inode = inode;
+ ihold(inode);
+ req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
+ req->r_num_caps = 1;
+ req->r_path2 = kstrdup(name, GFP_NOFS);
+
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+ ceph_mdsc_put_request(req);
+ return err;
+}
+
+int __ceph_removexattr(struct dentry *dentry, const char *name)
+{
+ struct inode *inode = dentry->d_inode;
+ struct ceph_vxattr *vxattr;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int issued;
+ int err;
+ int required_blob_size;
+ int dirty;
+
+ if (!ceph_is_valid_xattr(name))
+ return -EOPNOTSUPP;
+
+ vxattr = ceph_match_vxattr(inode, name);
+ if (vxattr && vxattr->readonly)
+ return -EOPNOTSUPP;
+
+ /* pass any unhandled ceph.* xattrs through to the MDS */
+ if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
+ goto do_sync_unlocked;
+
+ err = -ENOMEM;
+ spin_lock(&ci->i_ceph_lock);
+retry:
+ issued = __ceph_caps_issued(ci, NULL);
+ dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
+
+ if (!(issued & CEPH_CAP_XATTR_EXCL))
+ goto do_sync;
+ __build_xattrs(inode);
+
+ required_blob_size = __get_required_blob_size(ci, 0, 0);
+
+ if (!ci->i_xattrs.prealloc_blob ||
+ required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
+ struct ceph_buffer *blob;
+
+ spin_unlock(&ci->i_ceph_lock);
+ dout(" preaallocating new blob size=%d\n", required_blob_size);
+ blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
+ if (!blob)
+ goto out;
+ spin_lock(&ci->i_ceph_lock);
+ if (ci->i_xattrs.prealloc_blob)
+ ceph_buffer_put(ci->i_xattrs.prealloc_blob);
+ ci->i_xattrs.prealloc_blob = blob;
+ goto retry;
+ }
+
+ err = __remove_xattr_by_name(ceph_inode(inode), name);
+
+ dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
+ ci->i_xattrs.dirty = true;
+ inode->i_ctime = CURRENT_TIME;
+ spin_unlock(&ci->i_ceph_lock);
+ if (dirty)
+ __mark_inode_dirty(inode, dirty);
+ return err;
+do_sync:
+ spin_unlock(&ci->i_ceph_lock);
+do_sync_unlocked:
+ err = ceph_send_removexattr(dentry, name);
+out:
+ return err;
+}
+
+int ceph_removexattr(struct dentry *dentry, const char *name)
+{
+ if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
+ return -EROFS;
+
+ if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+ return generic_removexattr(dentry, name);
+
+ return __ceph_removexattr(dentry, name);
+}
diff --git a/keys/ceph-type.h b/keys/ceph-type.h
new file mode 100644
index 0000000..f69c4ac
--- /dev/null
+++ b/keys/ceph-type.h
@@ -0,0 +1,8 @@
+#ifndef _KEYS_CEPH_TYPE_H
+#define _KEYS_CEPH_TYPE_H
+
+#include <linux/key.h>
+
+extern struct key_type key_type_ceph;
+
+#endif
diff --git a/libceph/Kconfig b/libceph/Kconfig
new file mode 100644
index 0000000..e50cc69
--- /dev/null
+++ b/libceph/Kconfig
@@ -0,0 +1,43 @@
+config CEPH_LIB
+ tristate "Ceph core library"
+ depends on INET
+ select LIBCRC32C
+ select CRYPTO_AES
+ select CRYPTO
+ select KEYS
+ default n
+ help
+ Choose Y or M here to include cephlib, which provides the
+ common functionality to both the Ceph filesystem and
+ to the rados block device (rbd).
+
+ More information at http://ceph.newdream.net/.
+
+ If unsure, say N.
+
+config CEPH_LIB_PRETTYDEBUG
+ bool "Include file:line in ceph debug output"
+ depends on CEPH_LIB
+ default n
+ help
+ If you say Y here, debug output will include a filename and
+ line to aid debugging. This increases kernel size and slows
+ execution slightly when debug call sites are enabled (e.g.,
+ via CONFIG_DYNAMIC_DEBUG).
+
+ If unsure, say N.
+
+config CEPH_LIB_USE_DNS_RESOLVER
+ bool "Use in-kernel support for DNS lookup"
+ depends on CEPH_LIB
+ select DNS_RESOLVER
+ default n
+ help
+ If you say Y here, hostnames (e.g. monitor addresses) will
+ be resolved using the CONFIG_DNS_RESOLVER facility.
+
+ For information on how to use CONFIG_DNS_RESOLVER consult
+ Documentation/networking/dns_resolver.txt
+
+ If unsure, say N.
+
diff --git a/libceph/Makefile b/libceph/Makefile
new file mode 100644
index 0000000..958d985
--- /dev/null
+++ b/libceph/Makefile
@@ -0,0 +1,15 @@
+#
+# Makefile for CEPH filesystem.
+#
+obj-$(CONFIG_CEPH_LIB) += libceph.o
+
+libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
+ mon_client.o \
+ osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
+ debugfs.o \
+ auth.o auth_none.o \
+ crypto.o armor.o \
+ auth_x.o \
+ ceph_fs.o ceph_strings.o ceph_hash.o \
+ pagevec.o snapshot.o
+
diff --git a/libceph/armor.c b/libceph/armor.c
new file mode 100644
index 0000000..1fc1ee1
--- /dev/null
+++ b/libceph/armor.c
@@ -0,0 +1,105 @@
+
+#include <linux/errno.h>
+
+int ceph_armor(char *dst, const char *src, const char *end);
+int ceph_unarmor(char *dst, const char *src, const char *end);
+
+/*
+ * base64 encode/decode.
+ */
+
+static const char *pem_key =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+static int encode_bits(int c)
+{
+ return pem_key[c];
+}
+
+static int decode_bits(char c)
+{
+ if (c >= 'A' && c <= 'Z')
+ return c - 'A';
+ if (c >= 'a' && c <= 'z')
+ return c - 'a' + 26;
+ if (c >= '0' && c <= '9')
+ return c - '0' + 52;
+ if (c == '+')
+ return 62;
+ if (c == '/')
+ return 63;
+ if (c == '=')
+ return 0; /* just non-negative, please */
+ return -EINVAL;
+}
+
+int ceph_armor(char *dst, const char *src, const char *end)
+{
+ int olen = 0;
+ int line = 0;
+
+ while (src < end) {
+ unsigned char a, b, c;
+
+ a = *src++;
+ *dst++ = encode_bits(a >> 2);
+ if (src < end) {
+ b = *src++;
+ *dst++ = encode_bits(((a & 3) << 4) | (b >> 4));
+ if (src < end) {
+ c = *src++;
+ *dst++ = encode_bits(((b & 15) << 2) |
+ (c >> 6));
+ *dst++ = encode_bits(c & 63);
+ } else {
+ *dst++ = encode_bits((b & 15) << 2);
+ *dst++ = '=';
+ }
+ } else {
+ *dst++ = encode_bits(((a & 3) << 4));
+ *dst++ = '=';
+ *dst++ = '=';
+ }
+ olen += 4;
+ line += 4;
+ if (line == 64) {
+ line = 0;
+ *(dst++) = '\n';
+ olen++;
+ }
+ }
+ return olen;
+}
+
+int ceph_unarmor(char *dst, const char *src, const char *end)
+{
+ int olen = 0;
+
+ while (src < end) {
+ int a, b, c, d;
+
+ if (src[0] == '\n') {
+ src++;
+ continue;
+ }
+ if (src + 4 > end)
+ return -EINVAL;
+ a = decode_bits(src[0]);
+ b = decode_bits(src[1]);
+ c = decode_bits(src[2]);
+ d = decode_bits(src[3]);
+ if (a < 0 || b < 0 || c < 0 || d < 0)
+ return -EINVAL;
+
+ *dst++ = (a << 2) | (b >> 4);
+ if (src[2] == '=')
+ return olen + 1;
+ *dst++ = ((b & 15) << 4) | (c >> 2);
+ if (src[3] == '=')
+ return olen + 2;
+ *dst++ = ((c & 3) << 6) | d;
+ olen += 3;
+ src += 4;
+ }
+ return olen;
+}
diff --git a/libceph/auth.c b/libceph/auth.c
new file mode 100644
index 0000000..6b923bc
--- /dev/null
+++ b/libceph/auth.c
@@ -0,0 +1,340 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/types.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/messenger.h>
+#include "auth_none.h"
+#include "auth_x.h"
+
+
+/*
+ * get protocol handler
+ */
+static u32 supported_protocols[] = {
+ CEPH_AUTH_NONE,
+ CEPH_AUTH_CEPHX
+};
+
+static int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol)
+{
+ switch (protocol) {
+ case CEPH_AUTH_NONE:
+ return ceph_auth_none_init(ac);
+ case CEPH_AUTH_CEPHX:
+ return ceph_x_init(ac);
+ default:
+ return -ENOENT;
+ }
+}
+
+/*
+ * setup, teardown.
+ */
+struct ceph_auth_client *ceph_auth_init(const char *name, const struct ceph_crypto_key *key)
+{
+ struct ceph_auth_client *ac;
+ int ret;
+
+ dout("auth_init name '%s'\n", name);
+
+ ret = -ENOMEM;
+ ac = kzalloc(sizeof(*ac), GFP_NOFS);
+ if (!ac)
+ goto out;
+
+ mutex_init(&ac->mutex);
+ ac->negotiating = true;
+ if (name)
+ ac->name = name;
+ else
+ ac->name = CEPH_AUTH_NAME_DEFAULT;
+ dout("auth_init name %s\n", ac->name);
+ ac->key = key;
+ return ac;
+
+out:
+ return ERR_PTR(ret);
+}
+
+void ceph_auth_destroy(struct ceph_auth_client *ac)
+{
+ dout("auth_destroy %p\n", ac);
+ if (ac->ops)
+ ac->ops->destroy(ac);
+ kfree(ac);
+}
+
+/*
+ * Reset occurs when reconnecting to the monitor.
+ */
+void ceph_auth_reset(struct ceph_auth_client *ac)
+{
+ mutex_lock(&ac->mutex);
+ dout("auth_reset %p\n", ac);
+ if (ac->ops && !ac->negotiating)
+ ac->ops->reset(ac);
+ ac->negotiating = true;
+ mutex_unlock(&ac->mutex);
+}
+
+int ceph_entity_name_encode(const char *name, void **p, void *end)
+{
+ int len = strlen(name);
+
+ if (*p + 2*sizeof(u32) + len > end)
+ return -ERANGE;
+ ceph_encode_32(p, CEPH_ENTITY_TYPE_CLIENT);
+ ceph_encode_32(p, len);
+ ceph_encode_copy(p, name, len);
+ return 0;
+}
+
+/*
+ * Initiate protocol negotiation with monitor. Include entity name
+ * and list supported protocols.
+ */
+int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
+{
+ struct ceph_mon_request_header *monhdr = buf;
+ void *p = monhdr + 1, *end = buf + len, *lenp;
+ int i, num;
+ int ret;
+
+ mutex_lock(&ac->mutex);
+ dout("auth_build_hello\n");
+ monhdr->have_version = 0;
+ monhdr->session_mon = cpu_to_le16(-1);
+ monhdr->session_mon_tid = 0;
+
+ ceph_encode_32(&p, 0); /* no protocol, yet */
+
+ lenp = p;
+ p += sizeof(u32);
+
+ ceph_decode_need(&p, end, 1 + sizeof(u32), bad);
+ ceph_encode_8(&p, 1);
+ num = ARRAY_SIZE(supported_protocols);
+ ceph_encode_32(&p, num);
+ ceph_decode_need(&p, end, num * sizeof(u32), bad);
+ for (i = 0; i < num; i++)
+ ceph_encode_32(&p, supported_protocols[i]);
+
+ ret = ceph_entity_name_encode(ac->name, &p, end);
+ if (ret < 0)
+ goto out;
+ ceph_decode_need(&p, end, sizeof(u64), bad);
+ ceph_encode_64(&p, ac->global_id);
+
+ ceph_encode_32(&lenp, p - lenp - sizeof(u32));
+ ret = p - buf;
+out:
+ mutex_unlock(&ac->mutex);
+ return ret;
+
+bad:
+ ret = -ERANGE;
+ goto out;
+}
+
+static int ceph_build_auth_request(struct ceph_auth_client *ac,
+ void *msg_buf, size_t msg_len)
+{
+ struct ceph_mon_request_header *monhdr = msg_buf;
+ void *p = monhdr + 1;
+ void *end = msg_buf + msg_len;
+ int ret;
+
+ monhdr->have_version = 0;
+ monhdr->session_mon = cpu_to_le16(-1);
+ monhdr->session_mon_tid = 0;
+
+ ceph_encode_32(&p, ac->protocol);
+
+ ret = ac->ops->build_request(ac, p + sizeof(u32), end);
+ if (ret < 0) {
+ pr_err("error %d building auth method %s request\n", ret,
+ ac->ops->name);
+ goto out;
+ }
+ dout(" built request %d bytes\n", ret);
+ ceph_encode_32(&p, ret);
+ ret = p + ret - msg_buf;
+out:
+ return ret;
+}
+
+/*
+ * Handle auth message from monitor.
+ */
+int ceph_handle_auth_reply(struct ceph_auth_client *ac,
+ void *buf, size_t len,
+ void *reply_buf, size_t reply_len)
+{
+ void *p = buf;
+ void *end = buf + len;
+ int protocol;
+ s32 result;
+ u64 global_id;
+ void *payload, *payload_end;
+ int payload_len;
+ char *result_msg;
+ int result_msg_len;
+ int ret = -EINVAL;
+
+ mutex_lock(&ac->mutex);
+ dout("handle_auth_reply %p %p\n", p, end);
+ ceph_decode_need(&p, end, sizeof(u32) * 3 + sizeof(u64), bad);
+ protocol = ceph_decode_32(&p);
+ result = ceph_decode_32(&p);
+ global_id = ceph_decode_64(&p);
+ payload_len = ceph_decode_32(&p);
+ payload = p;
+ p += payload_len;
+ ceph_decode_need(&p, end, sizeof(u32), bad);
+ result_msg_len = ceph_decode_32(&p);
+ result_msg = p;
+ p += result_msg_len;
+ if (p != end)
+ goto bad;
+
+ dout(" result %d '%.*s' gid %llu len %d\n", result, result_msg_len,
+ result_msg, global_id, payload_len);
+
+ payload_end = payload + payload_len;
+
+ if (global_id && ac->global_id != global_id) {
+ dout(" set global_id %lld -> %lld\n", ac->global_id, global_id);
+ ac->global_id = global_id;
+ }
+
+ if (ac->negotiating) {
+ /* server does not support our protocols? */
+ if (!protocol && result < 0) {
+ ret = result;
+ goto out;
+ }
+ /* set up (new) protocol handler? */
+ if (ac->protocol && ac->protocol != protocol) {
+ ac->ops->destroy(ac);
+ ac->protocol = 0;
+ ac->ops = NULL;
+ }
+ if (ac->protocol != protocol) {
+ ret = ceph_auth_init_protocol(ac, protocol);
+ if (ret) {
+ pr_err("error %d on auth protocol %d init\n",
+ ret, protocol);
+ goto out;
+ }
+ }
+
+ ac->negotiating = false;
+ }
+
+ ret = ac->ops->handle_reply(ac, result, payload, payload_end);
+ if (ret == -EAGAIN) {
+ ret = ceph_build_auth_request(ac, reply_buf, reply_len);
+ } else if (ret) {
+ pr_err("auth method '%s' error %d\n", ac->ops->name, ret);
+ }
+
+out:
+ mutex_unlock(&ac->mutex);
+ return ret;
+
+bad:
+ pr_err("failed to decode auth msg\n");
+ ret = -EINVAL;
+ goto out;
+}
+
+int ceph_build_auth(struct ceph_auth_client *ac,
+ void *msg_buf, size_t msg_len)
+{
+ int ret = 0;
+
+ mutex_lock(&ac->mutex);
+ if (!ac->protocol)
+ ret = ceph_auth_build_hello(ac, msg_buf, msg_len);
+ else if (ac->ops->should_authenticate(ac))
+ ret = ceph_build_auth_request(ac, msg_buf, msg_len);
+ mutex_unlock(&ac->mutex);
+ return ret;
+}
+
+int ceph_auth_is_authenticated(struct ceph_auth_client *ac)
+{
+ int ret = 0;
+
+ mutex_lock(&ac->mutex);
+ if (ac->ops)
+ ret = ac->ops->is_authenticated(ac);
+ mutex_unlock(&ac->mutex);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_auth_is_authenticated);
+
+int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
+ int peer_type,
+ struct ceph_auth_handshake *auth)
+{
+ int ret = 0;
+
+ mutex_lock(&ac->mutex);
+ if (ac->ops && ac->ops->create_authorizer)
+ ret = ac->ops->create_authorizer(ac, peer_type, auth);
+ mutex_unlock(&ac->mutex);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_auth_create_authorizer);
+
+void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a)
+{
+ mutex_lock(&ac->mutex);
+ if (ac->ops && ac->ops->destroy_authorizer)
+ ac->ops->destroy_authorizer(ac, a);
+ mutex_unlock(&ac->mutex);
+}
+EXPORT_SYMBOL(ceph_auth_destroy_authorizer);
+
+int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
+ int peer_type,
+ struct ceph_auth_handshake *a)
+{
+ int ret = 0;
+
+ mutex_lock(&ac->mutex);
+ if (ac->ops && ac->ops->update_authorizer)
+ ret = ac->ops->update_authorizer(ac, peer_type, a);
+ mutex_unlock(&ac->mutex);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_auth_update_authorizer);
+
+int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a, size_t len)
+{
+ int ret = 0;
+
+ mutex_lock(&ac->mutex);
+ if (ac->ops && ac->ops->verify_authorizer_reply)
+ ret = ac->ops->verify_authorizer_reply(ac, a, len);
+ mutex_unlock(&ac->mutex);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_auth_verify_authorizer_reply);
+
+void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, int peer_type)
+{
+ mutex_lock(&ac->mutex);
+ if (ac->ops && ac->ops->invalidate_authorizer)
+ ac->ops->invalidate_authorizer(ac, peer_type);
+ mutex_unlock(&ac->mutex);
+}
+EXPORT_SYMBOL(ceph_auth_invalidate_authorizer);
diff --git a/libceph/auth_none.c b/libceph/auth_none.c
new file mode 100644
index 0000000..8c93fa8
--- /dev/null
+++ b/libceph/auth_none.c
@@ -0,0 +1,137 @@
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/decode.h>
+#include <linux/ceph/auth.h>
+
+#include "auth_none.h"
+
+static void reset(struct ceph_auth_client *ac)
+{
+ struct ceph_auth_none_info *xi = ac->private;
+
+ xi->starting = true;
+ xi->built_authorizer = false;
+}
+
+static void destroy(struct ceph_auth_client *ac)
+{
+ kfree(ac->private);
+ ac->private = NULL;
+}
+
+static int is_authenticated(struct ceph_auth_client *ac)
+{
+ struct ceph_auth_none_info *xi = ac->private;
+
+ return !xi->starting;
+}
+
+static int should_authenticate(struct ceph_auth_client *ac)
+{
+ struct ceph_auth_none_info *xi = ac->private;
+
+ return xi->starting;
+}
+
+static int build_request(struct ceph_auth_client *ac, void *buf, void *end)
+{
+ return 0;
+}
+
+/*
+ * the generic auth code decode the global_id, and we carry no actual
+ * authenticate state, so nothing happens here.
+ */
+static int handle_reply(struct ceph_auth_client *ac, int result,
+ void *buf, void *end)
+{
+ struct ceph_auth_none_info *xi = ac->private;
+
+ xi->starting = false;
+ return result;
+}
+
+/*
+ * build an 'authorizer' with our entity_name and global_id. we can
+ * reuse a single static copy since it is identical for all services
+ * we connect to.
+ */
+static int ceph_auth_none_create_authorizer(
+ struct ceph_auth_client *ac, int peer_type,
+ struct ceph_auth_handshake *auth)
+{
+ struct ceph_auth_none_info *ai = ac->private;
+ struct ceph_none_authorizer *au = &ai->au;
+ void *p, *end;
+ int ret;
+
+ if (!ai->built_authorizer) {
+ p = au->buf;
+ end = p + sizeof(au->buf);
+ ceph_encode_8(&p, 1);
+ ret = ceph_entity_name_encode(ac->name, &p, end - 8);
+ if (ret < 0)
+ goto bad;
+ ceph_decode_need(&p, end, sizeof(u64), bad2);
+ ceph_encode_64(&p, ac->global_id);
+ au->buf_len = p - (void *)au->buf;
+ ai->built_authorizer = true;
+ dout("built authorizer len %d\n", au->buf_len);
+ }
+
+ auth->authorizer = (struct ceph_authorizer *) au;
+ auth->authorizer_buf = au->buf;
+ auth->authorizer_buf_len = au->buf_len;
+ auth->authorizer_reply_buf = au->reply_buf;
+ auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
+
+ return 0;
+
+bad2:
+ ret = -ERANGE;
+bad:
+ return ret;
+}
+
+static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a)
+{
+ /* nothing to do */
+}
+
+static const struct ceph_auth_client_ops ceph_auth_none_ops = {
+ .name = "none",
+ .reset = reset,
+ .destroy = destroy,
+ .is_authenticated = is_authenticated,
+ .should_authenticate = should_authenticate,
+ .build_request = build_request,
+ .handle_reply = handle_reply,
+ .create_authorizer = ceph_auth_none_create_authorizer,
+ .destroy_authorizer = ceph_auth_none_destroy_authorizer,
+};
+
+int ceph_auth_none_init(struct ceph_auth_client *ac)
+{
+ struct ceph_auth_none_info *xi;
+
+ dout("ceph_auth_none_init %p\n", ac);
+ xi = kzalloc(sizeof(*xi), GFP_NOFS);
+ if (!xi)
+ return -ENOMEM;
+
+ xi->starting = true;
+ xi->built_authorizer = false;
+
+ ac->protocol = CEPH_AUTH_NONE;
+ ac->private = xi;
+ ac->ops = &ceph_auth_none_ops;
+ return 0;
+}
+
diff --git a/libceph/auth_none.h b/libceph/auth_none.h
new file mode 100644
index 0000000..059a3ce
--- /dev/null
+++ b/libceph/auth_none.h
@@ -0,0 +1,29 @@
+#ifndef _FS_CEPH_AUTH_NONE_H
+#define _FS_CEPH_AUTH_NONE_H
+
+#include <linux/slab.h>
+#include <linux/ceph/auth.h>
+
+/*
+ * null security mode.
+ *
+ * we use a single static authorizer that simply encodes our entity name
+ * and global id.
+ */
+
+struct ceph_none_authorizer {
+ char buf[128];
+ int buf_len;
+ char reply_buf[0];
+};
+
+struct ceph_auth_none_info {
+ bool starting;
+ bool built_authorizer;
+ struct ceph_none_authorizer au; /* we only need one; it's static */
+};
+
+int ceph_auth_none_init(struct ceph_auth_client *ac);
+
+#endif
+
diff --git a/libceph/auth_x.c b/libceph/auth_x.c
new file mode 100644
index 0000000..96238ba
--- /dev/null
+++ b/libceph/auth_x.c
@@ -0,0 +1,711 @@
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/decode.h>
+#include <linux/ceph/auth.h>
+
+#include "crypto.h"
+#include "auth_x.h"
+#include "auth_x_protocol.h"
+
+#define TEMP_TICKET_BUF_LEN 256
+
+static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
+
+static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
+{
+ struct ceph_x_info *xi = ac->private;
+ int need;
+
+ ceph_x_validate_tickets(ac, &need);
+ dout("ceph_x_is_authenticated want=%d need=%d have=%d\n",
+ ac->want_keys, need, xi->have_keys);
+ return (ac->want_keys & xi->have_keys) == ac->want_keys;
+}
+
+static int ceph_x_should_authenticate(struct ceph_auth_client *ac)
+{
+ struct ceph_x_info *xi = ac->private;
+ int need;
+
+ ceph_x_validate_tickets(ac, &need);
+ dout("ceph_x_should_authenticate want=%d need=%d have=%d\n",
+ ac->want_keys, need, xi->have_keys);
+ return need != 0;
+}
+
+static int ceph_x_encrypt_buflen(int ilen)
+{
+ return sizeof(struct ceph_x_encrypt_header) + ilen + 16 +
+ sizeof(u32);
+}
+
+static int ceph_x_encrypt(struct ceph_crypto_key *secret,
+ void *ibuf, int ilen, void *obuf, size_t olen)
+{
+ struct ceph_x_encrypt_header head = {
+ .struct_v = 1,
+ .magic = cpu_to_le64(CEPHX_ENC_MAGIC)
+ };
+ size_t len = olen - sizeof(u32);
+ int ret;
+
+ ret = ceph_encrypt2(secret, obuf + sizeof(u32), &len,
+ &head, sizeof(head), ibuf, ilen);
+ if (ret)
+ return ret;
+ ceph_encode_32(&obuf, len);
+ return len + sizeof(u32);
+}
+
+static int ceph_x_decrypt(struct ceph_crypto_key *secret,
+ void **p, void *end, void *obuf, size_t olen)
+{
+ struct ceph_x_encrypt_header head;
+ size_t head_len = sizeof(head);
+ int len, ret;
+
+ len = ceph_decode_32(p);
+ if (*p + len > end)
+ return -EINVAL;
+
+ dout("ceph_x_decrypt len %d\n", len);
+ ret = ceph_decrypt2(secret, &head, &head_len, obuf, &olen,
+ *p, len);
+ if (ret)
+ return ret;
+ if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC)
+ return -EPERM;
+ *p += len;
+ return olen;
+}
+
+/*
+ * get existing (or insert new) ticket handler
+ */
+static struct ceph_x_ticket_handler *
+get_ticket_handler(struct ceph_auth_client *ac, int service)
+{
+ struct ceph_x_ticket_handler *th;
+ struct ceph_x_info *xi = ac->private;
+ struct rb_node *parent = NULL, **p = &xi->ticket_handlers.rb_node;
+
+ while (*p) {
+ parent = *p;
+ th = rb_entry(parent, struct ceph_x_ticket_handler, node);
+ if (service < th->service)
+ p = &(*p)->rb_left;
+ else if (service > th->service)
+ p = &(*p)->rb_right;
+ else
+ return th;
+ }
+
+ /* add it */
+ th = kzalloc(sizeof(*th), GFP_NOFS);
+ if (!th)
+ return ERR_PTR(-ENOMEM);
+ th->service = service;
+ rb_link_node(&th->node, parent, p);
+ rb_insert_color(&th->node, &xi->ticket_handlers);
+ return th;
+}
+
+static void remove_ticket_handler(struct ceph_auth_client *ac,
+ struct ceph_x_ticket_handler *th)
+{
+ struct ceph_x_info *xi = ac->private;
+
+ dout("remove_ticket_handler %p %d\n", th, th->service);
+ rb_erase(&th->node, &xi->ticket_handlers);
+ ceph_crypto_key_destroy(&th->session_key);
+ if (th->ticket_blob)
+ ceph_buffer_put(th->ticket_blob);
+ kfree(th);
+}
+
+static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
+ struct ceph_crypto_key *secret,
+ void *buf, void *end)
+{
+ struct ceph_x_info *xi = ac->private;
+ int num;
+ void *p = buf;
+ int ret;
+ char *dbuf;
+ char *ticket_buf;
+ u8 reply_struct_v;
+
+ dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
+ if (!dbuf)
+ return -ENOMEM;
+
+ ret = -ENOMEM;
+ ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
+ if (!ticket_buf)
+ goto out_dbuf;
+
+ ceph_decode_need(&p, end, 1 + sizeof(u32), bad);
+ reply_struct_v = ceph_decode_8(&p);
+ if (reply_struct_v != 1)
+ goto bad;
+ num = ceph_decode_32(&p);
+ dout("%d tickets\n", num);
+ while (num--) {
+ int type;
+ u8 tkt_struct_v, blob_struct_v;
+ struct ceph_x_ticket_handler *th;
+ void *dp, *dend;
+ int dlen;
+ char is_enc;
+ struct timespec validity;
+ struct ceph_crypto_key old_key;
+ void *tp, *tpend;
+ struct ceph_timespec new_validity;
+ struct ceph_crypto_key new_session_key;
+ struct ceph_buffer *new_ticket_blob;
+ unsigned long new_expires, new_renew_after;
+ u64 new_secret_id;
+
+ ceph_decode_need(&p, end, sizeof(u32) + 1, bad);
+
+ type = ceph_decode_32(&p);
+ dout(" ticket type %d %s\n", type, ceph_entity_type_name(type));
+
+ tkt_struct_v = ceph_decode_8(&p);
+ if (tkt_struct_v != 1)
+ goto bad;
+
+ th = get_ticket_handler(ac, type);
+ if (IS_ERR(th)) {
+ ret = PTR_ERR(th);
+ goto out;
+ }
+
+ /* blob for me */
+ dlen = ceph_x_decrypt(secret, &p, end, dbuf,
+ TEMP_TICKET_BUF_LEN);
+ if (dlen <= 0) {
+ ret = dlen;
+ goto out;
+ }
+ dout(" decrypted %d bytes\n", dlen);
+ dend = dbuf + dlen;
+ dp = dbuf;
+
+ tkt_struct_v = ceph_decode_8(&dp);
+ if (tkt_struct_v != 1)
+ goto bad;
+
+ memcpy(&old_key, &th->session_key, sizeof(old_key));
+ ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
+ if (ret)
+ goto out;
+
+ ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
+ ceph_decode_timespec(&validity, &new_validity);
+ new_expires = get_seconds() + validity.tv_sec;
+ new_renew_after = new_expires - (validity.tv_sec / 4);
+ dout(" expires=%lu renew_after=%lu\n", new_expires,
+ new_renew_after);
+
+ /* ticket blob for service */
+ ceph_decode_8_safe(&p, end, is_enc, bad);
+ tp = ticket_buf;
+ if (is_enc) {
+ /* encrypted */
+ dout(" encrypted ticket\n");
+ dlen = ceph_x_decrypt(&old_key, &p, end, ticket_buf,
+ TEMP_TICKET_BUF_LEN);
+ if (dlen < 0) {
+ ret = dlen;
+ goto out;
+ }
+ dlen = ceph_decode_32(&tp);
+ } else {
+ /* unencrypted */
+ ceph_decode_32_safe(&p, end, dlen, bad);
+ ceph_decode_need(&p, end, dlen, bad);
+ ceph_decode_copy(&p, ticket_buf, dlen);
+ }
+ tpend = tp + dlen;
+ dout(" ticket blob is %d bytes\n", dlen);
+ ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
+ blob_struct_v = ceph_decode_8(&tp);
+ new_secret_id = ceph_decode_64(&tp);
+ ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
+ if (ret)
+ goto out;
+
+ /* all is well, update our ticket */
+ ceph_crypto_key_destroy(&th->session_key);
+ if (th->ticket_blob)
+ ceph_buffer_put(th->ticket_blob);
+ th->session_key = new_session_key;
+ th->ticket_blob = new_ticket_blob;
+ th->validity = new_validity;
+ th->secret_id = new_secret_id;
+ th->expires = new_expires;
+ th->renew_after = new_renew_after;
+ dout(" got ticket service %d (%s) secret_id %lld len %d\n",
+ type, ceph_entity_type_name(type), th->secret_id,
+ (int)th->ticket_blob->vec.iov_len);
+ xi->have_keys |= th->service;
+ }
+
+ ret = 0;
+out:
+ kfree(ticket_buf);
+out_dbuf:
+ kfree(dbuf);
+ return ret;
+
+bad:
+ ret = -EINVAL;
+ goto out;
+}
+
+static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
+ struct ceph_x_ticket_handler *th,
+ struct ceph_x_authorizer *au)
+{
+ int maxlen;
+ struct ceph_x_authorize_a *msg_a;
+ struct ceph_x_authorize_b msg_b;
+ void *p, *end;
+ int ret;
+ int ticket_blob_len =
+ (th->ticket_blob ? th->ticket_blob->vec.iov_len : 0);
+
+ dout("build_authorizer for %s %p\n",
+ ceph_entity_type_name(th->service), au);
+
+ maxlen = sizeof(*msg_a) + sizeof(msg_b) +
+ ceph_x_encrypt_buflen(ticket_blob_len);
+ dout(" need len %d\n", maxlen);
+ if (au->buf && au->buf->alloc_len < maxlen) {
+ ceph_buffer_put(au->buf);
+ au->buf = NULL;
+ }
+ if (!au->buf) {
+ au->buf = ceph_buffer_new(maxlen, GFP_NOFS);
+ if (!au->buf)
+ return -ENOMEM;
+ }
+ au->service = th->service;
+ au->secret_id = th->secret_id;
+
+ msg_a = au->buf->vec.iov_base;
+ msg_a->struct_v = 1;
+ msg_a->global_id = cpu_to_le64(ac->global_id);
+ msg_a->service_id = cpu_to_le32(th->service);
+ msg_a->ticket_blob.struct_v = 1;
+ msg_a->ticket_blob.secret_id = cpu_to_le64(th->secret_id);
+ msg_a->ticket_blob.blob_len = cpu_to_le32(ticket_blob_len);
+ if (ticket_blob_len) {
+ memcpy(msg_a->ticket_blob.blob, th->ticket_blob->vec.iov_base,
+ th->ticket_blob->vec.iov_len);
+ }
+ dout(" th %p secret_id %lld %lld\n", th, th->secret_id,
+ le64_to_cpu(msg_a->ticket_blob.secret_id));
+
+ p = msg_a + 1;
+ p += ticket_blob_len;
+ end = au->buf->vec.iov_base + au->buf->vec.iov_len;
+
+ get_random_bytes(&au->nonce, sizeof(au->nonce));
+ msg_b.struct_v = 1;
+ msg_b.nonce = cpu_to_le64(au->nonce);
+ ret = ceph_x_encrypt(&th->session_key, &msg_b, sizeof(msg_b),
+ p, end - p);
+ if (ret < 0)
+ goto out_buf;
+ p += ret;
+ au->buf->vec.iov_len = p - au->buf->vec.iov_base;
+ dout(" built authorizer nonce %llx len %d\n", au->nonce,
+ (int)au->buf->vec.iov_len);
+ BUG_ON(au->buf->vec.iov_len > maxlen);
+ return 0;
+
+out_buf:
+ ceph_buffer_put(au->buf);
+ au->buf = NULL;
+ return ret;
+}
+
+static int ceph_x_encode_ticket(struct ceph_x_ticket_handler *th,
+ void **p, void *end)
+{
+ ceph_decode_need(p, end, 1 + sizeof(u64), bad);
+ ceph_encode_8(p, 1);
+ ceph_encode_64(p, th->secret_id);
+ if (th->ticket_blob) {
+ const char *buf = th->ticket_blob->vec.iov_base;
+ u32 len = th->ticket_blob->vec.iov_len;
+
+ ceph_encode_32_safe(p, end, len, bad);
+ ceph_encode_copy_safe(p, end, buf, len, bad);
+ } else {
+ ceph_encode_32_safe(p, end, 0, bad);
+ }
+
+ return 0;
+bad:
+ return -ERANGE;
+}
+
+static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
+{
+ int want = ac->want_keys;
+ struct ceph_x_info *xi = ac->private;
+ int service;
+
+ *pneed = ac->want_keys & ~(xi->have_keys);
+
+ for (service = 1; service <= want; service <<= 1) {
+ struct ceph_x_ticket_handler *th;
+
+ if (!(ac->want_keys & service))
+ continue;
+
+ if (*pneed & service)
+ continue;
+
+ th = get_ticket_handler(ac, service);
+
+ if (IS_ERR(th)) {
+ *pneed |= service;
+ continue;
+ }
+
+ if (get_seconds() >= th->renew_after)
+ *pneed |= service;
+ if (get_seconds() >= th->expires)
+ xi->have_keys &= ~service;
+ }
+}
+
+
+static int ceph_x_build_request(struct ceph_auth_client *ac,
+ void *buf, void *end)
+{
+ struct ceph_x_info *xi = ac->private;
+ int need;
+ struct ceph_x_request_header *head = buf;
+ int ret;
+ struct ceph_x_ticket_handler *th =
+ get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
+
+ if (IS_ERR(th))
+ return PTR_ERR(th);
+
+ ceph_x_validate_tickets(ac, &need);
+
+ dout("build_request want %x have %x need %x\n",
+ ac->want_keys, xi->have_keys, need);
+
+ if (need & CEPH_ENTITY_TYPE_AUTH) {
+ struct ceph_x_authenticate *auth = (void *)(head + 1);
+ void *p = auth + 1;
+ struct ceph_x_challenge_blob tmp;
+ char tmp_enc[40];
+ u64 *u;
+
+ if (p > end)
+ return -ERANGE;
+
+ dout(" get_auth_session_key\n");
+ head->op = cpu_to_le16(CEPHX_GET_AUTH_SESSION_KEY);
+
+ /* encrypt and hash */
+ get_random_bytes(&auth->client_challenge, sizeof(u64));
+ tmp.client_challenge = auth->client_challenge;
+ tmp.server_challenge = cpu_to_le64(xi->server_challenge);
+ ret = ceph_x_encrypt(&xi->secret, &tmp, sizeof(tmp),
+ tmp_enc, sizeof(tmp_enc));
+ if (ret < 0)
+ return ret;
+
+ auth->struct_v = 1;
+ auth->key = 0;
+ for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++)
+ auth->key ^= *(__le64 *)u;
+ dout(" server_challenge %llx client_challenge %llx key %llx\n",
+ xi->server_challenge, le64_to_cpu(auth->client_challenge),
+ le64_to_cpu(auth->key));
+
+ /* now encode the old ticket if exists */
+ ret = ceph_x_encode_ticket(th, &p, end);
+ if (ret < 0)
+ return ret;
+
+ return p - buf;
+ }
+
+ if (need) {
+ void *p = head + 1;
+ struct ceph_x_service_ticket_request *req;
+
+ if (p > end)
+ return -ERANGE;
+ head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY);
+
+ ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer);
+ if (ret)
+ return ret;
+ ceph_encode_copy(&p, xi->auth_authorizer.buf->vec.iov_base,
+ xi->auth_authorizer.buf->vec.iov_len);
+
+ req = p;
+ req->keys = cpu_to_le32(need);
+ p += sizeof(*req);
+ return p - buf;
+ }
+
+ return 0;
+}
+
+static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
+ void *buf, void *end)
+{
+ struct ceph_x_info *xi = ac->private;
+ struct ceph_x_reply_header *head = buf;
+ struct ceph_x_ticket_handler *th;
+ int len = end - buf;
+ int op;
+ int ret;
+
+ if (result)
+ return result; /* XXX hmm? */
+
+ if (xi->starting) {
+ /* it's a hello */
+ struct ceph_x_server_challenge *sc = buf;
+
+ if (len != sizeof(*sc))
+ return -EINVAL;
+ xi->server_challenge = le64_to_cpu(sc->server_challenge);
+ dout("handle_reply got server challenge %llx\n",
+ xi->server_challenge);
+ xi->starting = false;
+ xi->have_keys &= ~CEPH_ENTITY_TYPE_AUTH;
+ return -EAGAIN;
+ }
+
+ op = le16_to_cpu(head->op);
+ result = le32_to_cpu(head->result);
+ dout("handle_reply op %d result %d\n", op, result);
+ switch (op) {
+ case CEPHX_GET_AUTH_SESSION_KEY:
+ /* verify auth key */
+ ret = ceph_x_proc_ticket_reply(ac, &xi->secret,
+ buf + sizeof(*head), end);
+ break;
+
+ case CEPHX_GET_PRINCIPAL_SESSION_KEY:
+ th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
+ if (IS_ERR(th))
+ return PTR_ERR(th);
+ ret = ceph_x_proc_ticket_reply(ac, &th->session_key,
+ buf + sizeof(*head), end);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ if (ret)
+ return ret;
+ if (ac->want_keys == xi->have_keys)
+ return 0;
+ return -EAGAIN;
+}
+
+static int ceph_x_create_authorizer(
+ struct ceph_auth_client *ac, int peer_type,
+ struct ceph_auth_handshake *auth)
+{
+ struct ceph_x_authorizer *au;
+ struct ceph_x_ticket_handler *th;
+ int ret;
+
+ th = get_ticket_handler(ac, peer_type);
+ if (IS_ERR(th))
+ return PTR_ERR(th);
+
+ au = kzalloc(sizeof(*au), GFP_NOFS);
+ if (!au)
+ return -ENOMEM;
+
+ ret = ceph_x_build_authorizer(ac, th, au);
+ if (ret) {
+ kfree(au);
+ return ret;
+ }
+
+ auth->authorizer = (struct ceph_authorizer *) au;
+ auth->authorizer_buf = au->buf->vec.iov_base;
+ auth->authorizer_buf_len = au->buf->vec.iov_len;
+ auth->authorizer_reply_buf = au->reply_buf;
+ auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
+
+ return 0;
+}
+
+static int ceph_x_update_authorizer(
+ struct ceph_auth_client *ac, int peer_type,
+ struct ceph_auth_handshake *auth)
+{
+ struct ceph_x_authorizer *au;
+ struct ceph_x_ticket_handler *th;
+
+ th = get_ticket_handler(ac, peer_type);
+ if (IS_ERR(th))
+ return PTR_ERR(th);
+
+ au = (struct ceph_x_authorizer *)auth->authorizer;
+ if (au->secret_id < th->secret_id) {
+ dout("ceph_x_update_authorizer service %u secret %llu < %llu\n",
+ au->service, au->secret_id, th->secret_id);
+ return ceph_x_build_authorizer(ac, th, au);
+ }
+ return 0;
+}
+
+static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a, size_t len)
+{
+ struct ceph_x_authorizer *au = (void *)a;
+ struct ceph_x_ticket_handler *th;
+ int ret = 0;
+ struct ceph_x_authorize_reply reply;
+ void *p = au->reply_buf;
+ void *end = p + sizeof(au->reply_buf);
+
+ th = get_ticket_handler(ac, au->service);
+ if (IS_ERR(th))
+ return PTR_ERR(th);
+ ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply));
+ if (ret < 0)
+ return ret;
+ if (ret != sizeof(reply))
+ return -EPERM;
+
+ if (au->nonce + 1 != le64_to_cpu(reply.nonce_plus_one))
+ ret = -EPERM;
+ else
+ ret = 0;
+ dout("verify_authorizer_reply nonce %llx got %llx ret %d\n",
+ au->nonce, le64_to_cpu(reply.nonce_plus_one), ret);
+ return ret;
+}
+
+static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a)
+{
+ struct ceph_x_authorizer *au = (void *)a;
+
+ ceph_buffer_put(au->buf);
+ kfree(au);
+}
+
+
+static void ceph_x_reset(struct ceph_auth_client *ac)
+{
+ struct ceph_x_info *xi = ac->private;
+
+ dout("reset\n");
+ xi->starting = true;
+ xi->server_challenge = 0;
+}
+
+static void ceph_x_destroy(struct ceph_auth_client *ac)
+{
+ struct ceph_x_info *xi = ac->private;
+ struct rb_node *p;
+
+ dout("ceph_x_destroy %p\n", ac);
+ ceph_crypto_key_destroy(&xi->secret);
+
+ while ((p = rb_first(&xi->ticket_handlers)) != NULL) {
+ struct ceph_x_ticket_handler *th =
+ rb_entry(p, struct ceph_x_ticket_handler, node);
+ remove_ticket_handler(ac, th);
+ }
+
+ if (xi->auth_authorizer.buf)
+ ceph_buffer_put(xi->auth_authorizer.buf);
+
+ kfree(ac->private);
+ ac->private = NULL;
+}
+
+static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
+ int peer_type)
+{
+ struct ceph_x_ticket_handler *th;
+
+ th = get_ticket_handler(ac, peer_type);
+ if (!IS_ERR(th))
+ memset(&th->validity, 0, sizeof(th->validity));
+}
+
+
+static const struct ceph_auth_client_ops ceph_x_ops = {
+ .name = "x",
+ .is_authenticated = ceph_x_is_authenticated,
+ .should_authenticate = ceph_x_should_authenticate,
+ .build_request = ceph_x_build_request,
+ .handle_reply = ceph_x_handle_reply,
+ .create_authorizer = ceph_x_create_authorizer,
+ .update_authorizer = ceph_x_update_authorizer,
+ .verify_authorizer_reply = ceph_x_verify_authorizer_reply,
+ .destroy_authorizer = ceph_x_destroy_authorizer,
+ .invalidate_authorizer = ceph_x_invalidate_authorizer,
+ .reset = ceph_x_reset,
+ .destroy = ceph_x_destroy,
+};
+
+
+int ceph_x_init(struct ceph_auth_client *ac)
+{
+ struct ceph_x_info *xi;
+ int ret;
+
+ dout("ceph_x_init %p\n", ac);
+ ret = -ENOMEM;
+ xi = kzalloc(sizeof(*xi), GFP_NOFS);
+ if (!xi)
+ goto out;
+
+ ret = -EINVAL;
+ if (!ac->key) {
+ pr_err("no secret set (for auth_x protocol)\n");
+ goto out_nomem;
+ }
+
+ ret = ceph_crypto_key_clone(&xi->secret, ac->key);
+ if (ret < 0) {
+ pr_err("cannot clone key: %d\n", ret);
+ goto out_nomem;
+ }
+
+ xi->starting = true;
+ xi->ticket_handlers = RB_ROOT;
+
+ ac->protocol = CEPH_AUTH_CEPHX;
+ ac->private = xi;
+ ac->ops = &ceph_x_ops;
+ return 0;
+
+out_nomem:
+ kfree(xi);
+out:
+ return ret;
+}
+
+
diff --git a/libceph/auth_x.h b/libceph/auth_x.h
new file mode 100644
index 0000000..65ee720
--- /dev/null
+++ b/libceph/auth_x.h
@@ -0,0 +1,51 @@
+#ifndef _FS_CEPH_AUTH_X_H
+#define _FS_CEPH_AUTH_X_H
+
+#include <linux/rbtree.h>
+
+#include <linux/ceph/auth.h>
+
+#include "crypto.h"
+#include "auth_x_protocol.h"
+
+/*
+ * Handle ticket for a single service.
+ */
+struct ceph_x_ticket_handler {
+ struct rb_node node;
+ unsigned int service;
+
+ struct ceph_crypto_key session_key;
+ struct ceph_timespec validity;
+
+ u64 secret_id;
+ struct ceph_buffer *ticket_blob;
+
+ unsigned long renew_after, expires;
+};
+
+
+struct ceph_x_authorizer {
+ struct ceph_buffer *buf;
+ unsigned int service;
+ u64 nonce;
+ u64 secret_id;
+ char reply_buf[128]; /* big enough for encrypted blob */
+};
+
+struct ceph_x_info {
+ struct ceph_crypto_key secret;
+
+ bool starting;
+ u64 server_challenge;
+
+ unsigned int have_keys;
+ struct rb_root ticket_handlers;
+
+ struct ceph_x_authorizer auth_authorizer;
+};
+
+int ceph_x_init(struct ceph_auth_client *ac);
+
+#endif
+
diff --git a/libceph/auth_x_protocol.h b/libceph/auth_x_protocol.h
new file mode 100644
index 0000000..671d305
--- /dev/null
+++ b/libceph/auth_x_protocol.h
@@ -0,0 +1,90 @@
+#ifndef __FS_CEPH_AUTH_X_PROTOCOL
+#define __FS_CEPH_AUTH_X_PROTOCOL
+
+#define CEPHX_GET_AUTH_SESSION_KEY 0x0100
+#define CEPHX_GET_PRINCIPAL_SESSION_KEY 0x0200
+#define CEPHX_GET_ROTATING_KEY 0x0400
+
+/* common bits */
+struct ceph_x_ticket_blob {
+ __u8 struct_v;
+ __le64 secret_id;
+ __le32 blob_len;
+ char blob[];
+} __attribute__ ((packed));
+
+
+/* common request/reply headers */
+struct ceph_x_request_header {
+ __le16 op;
+} __attribute__ ((packed));
+
+struct ceph_x_reply_header {
+ __le16 op;
+ __le32 result;
+} __attribute__ ((packed));
+
+
+/* authenticate handshake */
+
+/* initial hello (no reply header) */
+struct ceph_x_server_challenge {
+ __u8 struct_v;
+ __le64 server_challenge;
+} __attribute__ ((packed));
+
+struct ceph_x_authenticate {
+ __u8 struct_v;
+ __le64 client_challenge;
+ __le64 key;
+ /* ticket blob */
+} __attribute__ ((packed));
+
+struct ceph_x_service_ticket_request {
+ __u8 struct_v;
+ __le32 keys;
+} __attribute__ ((packed));
+
+struct ceph_x_challenge_blob {
+ __le64 server_challenge;
+ __le64 client_challenge;
+} __attribute__ ((packed));
+
+
+
+/* authorize handshake */
+
+/*
+ * The authorizer consists of two pieces:
+ * a - service id, ticket blob
+ * b - encrypted with session key
+ */
+struct ceph_x_authorize_a {
+ __u8 struct_v;
+ __le64 global_id;
+ __le32 service_id;
+ struct ceph_x_ticket_blob ticket_blob;
+} __attribute__ ((packed));
+
+struct ceph_x_authorize_b {
+ __u8 struct_v;
+ __le64 nonce;
+} __attribute__ ((packed));
+
+struct ceph_x_authorize_reply {
+ __u8 struct_v;
+ __le64 nonce_plus_one;
+} __attribute__ ((packed));
+
+
+/*
+ * encyption bundle
+ */
+#define CEPHX_ENC_MAGIC 0xff009cad8826aa55ull
+
+struct ceph_x_encrypt_header {
+ __u8 struct_v;
+ __le64 magic;
+} __attribute__ ((packed));
+
+#endif
diff --git a/libceph/buffer.c b/libceph/buffer.c
new file mode 100644
index 0000000..621b5f6
--- /dev/null
+++ b/libceph/buffer.c
@@ -0,0 +1,58 @@
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/buffer.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/libceph.h> /* for ceph_kv{malloc,free} */
+
+struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
+{
+ struct ceph_buffer *b;
+
+ b = kmalloc(sizeof(*b), gfp);
+ if (!b)
+ return NULL;
+
+ b->vec.iov_base = ceph_kvmalloc(len, gfp);
+ if (!b->vec.iov_base) {
+ kfree(b);
+ return NULL;
+ }
+
+ kref_init(&b->kref);
+ b->alloc_len = len;
+ b->vec.iov_len = len;
+ dout("buffer_new %p\n", b);
+ return b;
+}
+EXPORT_SYMBOL(ceph_buffer_new);
+
+void ceph_buffer_release(struct kref *kref)
+{
+ struct ceph_buffer *b = container_of(kref, struct ceph_buffer, kref);
+
+ dout("buffer_release %p\n", b);
+ ceph_kvfree(b->vec.iov_base);
+ kfree(b);
+}
+EXPORT_SYMBOL(ceph_buffer_release);
+
+int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end)
+{
+ size_t len;
+
+ ceph_decode_need(p, end, sizeof(u32), bad);
+ len = ceph_decode_32(p);
+ dout("decode_buffer len %d\n", (int)len);
+ ceph_decode_need(p, end, len, bad);
+ *b = ceph_buffer_new(len, GFP_NOFS);
+ if (!*b)
+ return -ENOMEM;
+ ceph_decode_copy(p, (*b)->vec.iov_base, len);
+ return 0;
+bad:
+ return -EINVAL;
+}
diff --git a/libceph/ceph_common.c b/libceph/ceph_common.c
new file mode 100644
index 0000000..67d7721
--- /dev/null
+++ b/libceph/ceph_common.c
@@ -0,0 +1,664 @@
+
+#include <linux/ceph/ceph_debug.h>
+#include <linux/backing-dev.h>
+#include <linux/ctype.h>
+#include <linux/fs.h>
+#include <linux/inet.h>
+#include <linux/in6.h>
+#include <linux/key.h>
+#include <keys/ceph-type.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/parser.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/statfs.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
+
+
+#include <linux/ceph/ceph_features.h>
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/debugfs.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/auth.h>
+#include "crypto.h"
+
+
+/*
+ * Module compatibility interface. For now it doesn't do anything,
+ * but its existence signals a certain level of functionality.
+ *
+ * The data buffer is used to pass information both to and from
+ * libceph. The return value indicates whether libceph determines
+ * it is compatible with the caller (from another kernel module),
+ * given the provided data.
+ *
+ * The data pointer can be null.
+ */
+bool libceph_compatible(void *data)
+{
+ return true;
+}
+EXPORT_SYMBOL(libceph_compatible);
+
+/*
+ * find filename portion of a path (/foo/bar/baz -> baz)
+ */
+const char *ceph_file_part(const char *s, int len)
+{
+ const char *e = s + len;
+
+ while (e != s && *(e-1) != '/')
+ e--;
+ return e;
+}
+EXPORT_SYMBOL(ceph_file_part);
+
+const char *ceph_msg_type_name(int type)
+{
+ switch (type) {
+ case CEPH_MSG_SHUTDOWN: return "shutdown";
+ case CEPH_MSG_PING: return "ping";
+ case CEPH_MSG_AUTH: return "auth";
+ case CEPH_MSG_AUTH_REPLY: return "auth_reply";
+ case CEPH_MSG_MON_MAP: return "mon_map";
+ case CEPH_MSG_MON_GET_MAP: return "mon_get_map";
+ case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe";
+ case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
+ case CEPH_MSG_STATFS: return "statfs";
+ case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
+ case CEPH_MSG_MDS_MAP: return "mds_map";
+ case CEPH_MSG_CLIENT_SESSION: return "client_session";
+ case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
+ case CEPH_MSG_CLIENT_REQUEST: return "client_request";
+ case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward";
+ case CEPH_MSG_CLIENT_REPLY: return "client_reply";
+ case CEPH_MSG_CLIENT_CAPS: return "client_caps";
+ case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
+ case CEPH_MSG_CLIENT_SNAP: return "client_snap";
+ case CEPH_MSG_CLIENT_LEASE: return "client_lease";
+ case CEPH_MSG_OSD_MAP: return "osd_map";
+ case CEPH_MSG_OSD_OP: return "osd_op";
+ case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
+ case CEPH_MSG_WATCH_NOTIFY: return "watch_notify";
+ default: return "unknown";
+ }
+}
+EXPORT_SYMBOL(ceph_msg_type_name);
+
+/*
+ * Initially learn our fsid, or verify an fsid matches.
+ */
+int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
+{
+ if (client->have_fsid) {
+ if (ceph_fsid_compare(&client->fsid, fsid)) {
+ pr_err("bad fsid, had %pU got %pU",
+ &client->fsid, fsid);
+ return -1;
+ }
+ } else {
+ memcpy(&client->fsid, fsid, sizeof(*fsid));
+ }
+ return 0;
+}
+EXPORT_SYMBOL(ceph_check_fsid);
+
+static int strcmp_null(const char *s1, const char *s2)
+{
+ if (!s1 && !s2)
+ return 0;
+ if (s1 && !s2)
+ return -1;
+ if (!s1 && s2)
+ return 1;
+ return strcmp(s1, s2);
+}
+
+int ceph_compare_options(struct ceph_options *new_opt,
+ struct ceph_client *client)
+{
+ struct ceph_options *opt1 = new_opt;
+ struct ceph_options *opt2 = client->options;
+ int ofs = offsetof(struct ceph_options, mon_addr);
+ int i;
+ int ret;
+
+ ret = memcmp(opt1, opt2, ofs);
+ if (ret)
+ return ret;
+
+ ret = strcmp_null(opt1->name, opt2->name);
+ if (ret)
+ return ret;
+
+ if (opt1->key && !opt2->key)
+ return -1;
+ if (!opt1->key && opt2->key)
+ return 1;
+ if (opt1->key && opt2->key) {
+ if (opt1->key->type != opt2->key->type)
+ return -1;
+ if (opt1->key->created.tv_sec != opt2->key->created.tv_sec)
+ return -1;
+ if (opt1->key->created.tv_nsec != opt2->key->created.tv_nsec)
+ return -1;
+ if (opt1->key->len != opt2->key->len)
+ return -1;
+ if (opt1->key->key && !opt2->key->key)
+ return -1;
+ if (!opt1->key->key && opt2->key->key)
+ return 1;
+ if (opt1->key->key && opt2->key->key) {
+ ret = memcmp(opt1->key->key, opt2->key->key, opt1->key->len);
+ if (ret)
+ return ret;
+ }
+ }
+
+ /* any matching mon ip implies a match */
+ for (i = 0; i < opt1->num_mon; i++) {
+ if (ceph_monmap_contains(client->monc.monmap,
+ &opt1->mon_addr[i]))
+ return 0;
+ }
+ return -1;
+}
+EXPORT_SYMBOL(ceph_compare_options);
+
+void *ceph_kvmalloc(size_t size, gfp_t flags)
+{
+ if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
+ void *ptr = kmalloc(size, flags | __GFP_NOWARN);
+ if (ptr)
+ return ptr;
+ }
+
+ return __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
+}
+
+void ceph_kvfree(const void *ptr)
+{
+ if (is_vmalloc_addr(ptr))
+ vfree(ptr);
+ else
+ kfree(ptr);
+}
+
+
+static int parse_fsid(const char *str, struct ceph_fsid *fsid)
+{
+ int i = 0;
+ char tmp[3];
+ int err = -EINVAL;
+ int d;
+
+ dout("parse_fsid '%s'\n", str);
+ tmp[2] = 0;
+ while (*str && i < 16) {
+ if (ispunct(*str)) {
+ str++;
+ continue;
+ }
+ if (!isxdigit(str[0]) || !isxdigit(str[1]))
+ break;
+ tmp[0] = str[0];
+ tmp[1] = str[1];
+ if (sscanf(tmp, "%x", &d) < 1)
+ break;
+ fsid->fsid[i] = d & 0xff;
+ i++;
+ str += 2;
+ }
+
+ if (i == 16)
+ err = 0;
+ dout("parse_fsid ret %d got fsid %pU", err, fsid);
+ return err;
+}
+
+/*
+ * ceph options
+ */
+enum {
+ Opt_osdtimeout,
+ Opt_osdkeepalivetimeout,
+ Opt_mount_timeout,
+ Opt_osd_idle_ttl,
+ Opt_last_int,
+ /* int args above */
+ Opt_fsid,
+ Opt_name,
+ Opt_secret,
+ Opt_key,
+ Opt_ip,
+ Opt_last_string,
+ /* string args above */
+ Opt_share,
+ Opt_noshare,
+ Opt_crc,
+ Opt_nocrc,
+};
+
+static match_table_t opt_tokens = {
+ {Opt_osdtimeout, "osdtimeout=%d"},
+ {Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
+ {Opt_mount_timeout, "mount_timeout=%d"},
+ {Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
+ /* int args above */
+ {Opt_fsid, "fsid=%s"},
+ {Opt_name, "name=%s"},
+ {Opt_secret, "secret=%s"},
+ {Opt_key, "key=%s"},
+ {Opt_ip, "ip=%s"},
+ /* string args above */
+ {Opt_share, "share"},
+ {Opt_noshare, "noshare"},
+ {Opt_crc, "crc"},
+ {Opt_nocrc, "nocrc"},
+ {-1, NULL}
+};
+
+void ceph_destroy_options(struct ceph_options *opt)
+{
+ dout("destroy_options %p\n", opt);
+ kfree(opt->name);
+ if (opt->key) {
+ ceph_crypto_key_destroy(opt->key);
+ kfree(opt->key);
+ }
+ kfree(opt->mon_addr);
+ kfree(opt);
+}
+EXPORT_SYMBOL(ceph_destroy_options);
+
+/* get secret from key store */
+static int get_secret(struct ceph_crypto_key *dst, const char *name) {
+ struct key *ukey;
+ int key_err;
+ int err = 0;
+ struct ceph_crypto_key *ckey;
+
+ ukey = request_key(&key_type_ceph, name, NULL);
+ if (!ukey || IS_ERR(ukey)) {
+ /* request_key errors don't map nicely to mount(2)
+ errors; don't even try, but still printk */
+ key_err = PTR_ERR(ukey);
+ switch (key_err) {
+ case -ENOKEY:
+ pr_warning("ceph: Mount failed due to key not found: %s\n", name);
+ break;
+ case -EKEYEXPIRED:
+ pr_warning("ceph: Mount failed due to expired key: %s\n", name);
+ break;
+ case -EKEYREVOKED:
+ pr_warning("ceph: Mount failed due to revoked key: %s\n", name);
+ break;
+ default:
+ pr_warning("ceph: Mount failed due to unknown key error"
+ " %d: %s\n", key_err, name);
+ }
+ err = -EPERM;
+ goto out;
+ }
+
+ ckey = ukey->payload.data;
+ err = ceph_crypto_key_clone(dst, ckey);
+ if (err)
+ goto out_key;
+ /* pass through, err is 0 */
+
+out_key:
+ key_put(ukey);
+out:
+ return err;
+}
+
+struct ceph_options *
+ceph_parse_options(char *options, const char *dev_name,
+ const char *dev_name_end,
+ int (*parse_extra_token)(char *c, void *private),
+ void *private)
+{
+ struct ceph_options *opt;
+ const char *c;
+ int err = -ENOMEM;
+ substring_t argstr[MAX_OPT_ARGS];
+
+ if (current->nsproxy->net_ns != &init_net)
+ return ERR_PTR(-EINVAL);
+
+ opt = kzalloc(sizeof(*opt), GFP_KERNEL);
+ if (!opt)
+ return ERR_PTR(-ENOMEM);
+ opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr),
+ GFP_KERNEL);
+ if (!opt->mon_addr)
+ goto out;
+
+ dout("parse_options %p options '%s' dev_name '%s'\n", opt, options,
+ dev_name);
+
+ /* start with defaults */
+ opt->flags = CEPH_OPT_DEFAULT;
+ opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
+ opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
+ opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */
+
+ /* get mon ip(s) */
+ /* ip1[:port1][,ip2[:port2]...] */
+ err = ceph_parse_ips(dev_name, dev_name_end, opt->mon_addr,
+ CEPH_MAX_MON, &opt->num_mon);
+ if (err < 0)
+ goto out;
+
+ /* parse mount options */
+ while ((c = strsep(&options, ",")) != NULL) {
+ int token, intval, ret;
+ if (!*c)
+ continue;
+ err = -EINVAL;
+ token = match_token((char *)c, opt_tokens, argstr);
+ if (token < 0 && parse_extra_token) {
+ /* extra? */
+ err = parse_extra_token((char *)c, private);
+ if (err < 0) {
+ pr_err("bad option at '%s'\n", c);
+ goto out;
+ }
+ continue;
+ }
+ if (token < Opt_last_int) {
+ ret = match_int(&argstr[0], &intval);
+ if (ret < 0) {
+ pr_err("bad mount option arg (not int) "
+ "at '%s'\n", c);
+ continue;
+ }
+ dout("got int token %d val %d\n", token, intval);
+ } else if (token > Opt_last_int && token < Opt_last_string) {
+ dout("got string token %d val %s\n", token,
+ argstr[0].from);
+ } else {
+ dout("got token %d\n", token);
+ }
+ switch (token) {
+ case Opt_ip:
+ err = ceph_parse_ips(argstr[0].from,
+ argstr[0].to,
+ &opt->my_addr,
+ 1, NULL);
+ if (err < 0)
+ goto out;
+ opt->flags |= CEPH_OPT_MYIP;
+ break;
+
+ case Opt_fsid:
+ err = parse_fsid(argstr[0].from, &opt->fsid);
+ if (err == 0)
+ opt->flags |= CEPH_OPT_FSID;
+ break;
+ case Opt_name:
+ opt->name = kstrndup(argstr[0].from,
+ argstr[0].to-argstr[0].from,
+ GFP_KERNEL);
+ break;
+ case Opt_secret:
+ opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL);
+ if (!opt->key) {
+ err = -ENOMEM;
+ goto out;
+ }
+ err = ceph_crypto_key_unarmor(opt->key, argstr[0].from);
+ if (err < 0)
+ goto out;
+ break;
+ case Opt_key:
+ opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL);
+ if (!opt->key) {
+ err = -ENOMEM;
+ goto out;
+ }
+ err = get_secret(opt->key, argstr[0].from);
+ if (err < 0)
+ goto out;
+ break;
+
+ /* misc */
+ case Opt_osdtimeout:
+ pr_warning("ignoring deprecated osdtimeout option\n");
+ break;
+ case Opt_osdkeepalivetimeout:
+ opt->osd_keepalive_timeout = intval;
+ break;
+ case Opt_osd_idle_ttl:
+ opt->osd_idle_ttl = intval;
+ break;
+ case Opt_mount_timeout:
+ opt->mount_timeout = intval;
+ break;
+
+ case Opt_share:
+ opt->flags &= ~CEPH_OPT_NOSHARE;
+ break;
+ case Opt_noshare:
+ opt->flags |= CEPH_OPT_NOSHARE;
+ break;
+
+ case Opt_crc:
+ opt->flags &= ~CEPH_OPT_NOCRC;
+ break;
+ case Opt_nocrc:
+ opt->flags |= CEPH_OPT_NOCRC;
+ break;
+
+ default:
+ BUG_ON(token);
+ }
+ }
+
+ /* success */
+ return opt;
+
+out:
+ ceph_destroy_options(opt);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL(ceph_parse_options);
+
+u64 ceph_client_id(struct ceph_client *client)
+{
+ return client->monc.auth->global_id;
+}
+EXPORT_SYMBOL(ceph_client_id);
+
+/*
+ * create a fresh client instance
+ */
+struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
+ u64 supported_features,
+ u64 required_features)
+{
+ struct ceph_client *client;
+ struct ceph_entity_addr *myaddr = NULL;
+ int err = -ENOMEM;
+
+ client = kzalloc(sizeof(*client), GFP_KERNEL);
+ if (client == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ client->private = private;
+ client->options = opt;
+
+ mutex_init(&client->mount_mutex);
+ init_waitqueue_head(&client->auth_wq);
+ client->auth_err = 0;
+
+ client->extra_mon_dispatch = NULL;
+ client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT |
+ supported_features;
+ client->required_features = CEPH_FEATURES_REQUIRED_DEFAULT |
+ required_features;
+
+ /* msgr */
+ if (ceph_test_opt(client, MYIP))
+ myaddr = &client->options->my_addr;
+ ceph_messenger_init(&client->msgr, myaddr,
+ client->supported_features,
+ client->required_features,
+ ceph_test_opt(client, NOCRC));
+
+ /* subsystems */
+ err = ceph_monc_init(&client->monc, client);
+ if (err < 0)
+ goto fail;
+ err = ceph_osdc_init(&client->osdc, client);
+ if (err < 0)
+ goto fail_monc;
+
+ return client;
+
+fail_monc:
+ ceph_monc_stop(&client->monc);
+fail:
+ kfree(client);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL(ceph_create_client);
+
+void ceph_destroy_client(struct ceph_client *client)
+{
+ dout("destroy_client %p\n", client);
+
+ atomic_set(&client->msgr.stopping, 1);
+
+ /* unmount */
+ ceph_osdc_stop(&client->osdc);
+
+ ceph_monc_stop(&client->monc);
+
+ ceph_debugfs_client_cleanup(client);
+
+ ceph_destroy_options(client->options);
+
+ kfree(client);
+ dout("destroy_client %p done\n", client);
+}
+EXPORT_SYMBOL(ceph_destroy_client);
+
+/*
+ * true if we have the mon map (and have thus joined the cluster)
+ */
+static int have_mon_and_osd_map(struct ceph_client *client)
+{
+ return client->monc.monmap && client->monc.monmap->epoch &&
+ client->osdc.osdmap && client->osdc.osdmap->epoch;
+}
+
+/*
+ * mount: join the ceph cluster, and open root directory.
+ */
+int __ceph_open_session(struct ceph_client *client, unsigned long started)
+{
+ int err;
+ unsigned long timeout = client->options->mount_timeout * HZ;
+
+ /* open session, and wait for mon and osd maps */
+ err = ceph_monc_open_session(&client->monc);
+ if (err < 0)
+ return err;
+
+ while (!have_mon_and_osd_map(client)) {
+ err = -EIO;
+ if (timeout && time_after_eq(jiffies, started + timeout))
+ return err;
+
+ /* wait */
+ dout("mount waiting for mon_map\n");
+ err = wait_event_interruptible_timeout(client->auth_wq,
+ have_mon_and_osd_map(client) || (client->auth_err < 0),
+ timeout);
+ if (err == -EINTR || err == -ERESTARTSYS)
+ return err;
+ if (client->auth_err < 0)
+ return client->auth_err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(__ceph_open_session);
+
+
+int ceph_open_session(struct ceph_client *client)
+{
+ int ret;
+ unsigned long started = jiffies; /* note the start time */
+
+ dout("open_session start\n");
+ mutex_lock(&client->mount_mutex);
+
+ ret = __ceph_open_session(client, started);
+
+ mutex_unlock(&client->mount_mutex);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_open_session);
+
+
+static int __init init_ceph_lib(void)
+{
+ int ret = 0;
+
+ ret = ceph_debugfs_init();
+ if (ret < 0)
+ goto out;
+
+ ret = ceph_crypto_init();
+ if (ret < 0)
+ goto out_debugfs;
+
+ ret = ceph_msgr_init();
+ if (ret < 0)
+ goto out_crypto;
+
+ ret = ceph_osdc_setup();
+ if (ret < 0)
+ goto out_msgr;
+
+ pr_info("loaded (mon/osd proto %d/%d)\n",
+ CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL);
+
+ return 0;
+
+out_msgr:
+ ceph_msgr_exit();
+out_crypto:
+ ceph_crypto_shutdown();
+out_debugfs:
+ ceph_debugfs_cleanup();
+out:
+ return ret;
+}
+
+static void __exit exit_ceph_lib(void)
+{
+ dout("exit_ceph_lib\n");
+ ceph_osdc_cleanup();
+ ceph_msgr_exit();
+ ceph_crypto_shutdown();
+ ceph_debugfs_cleanup();
+}
+
+module_init(init_ceph_lib);
+module_exit(exit_ceph_lib);
+
+MODULE_AUTHOR("Sage Weil <sage at newdream.net>");
+MODULE_AUTHOR("Yehuda Sadeh <yehuda at hq.newdream.net>");
+MODULE_AUTHOR("Patience Warnick <patience at newdream.net>");
+MODULE_DESCRIPTION("Ceph filesystem for Linux");
+MODULE_LICENSE("GPL");
diff --git a/libceph/ceph_fs.c b/libceph/ceph_fs.c
new file mode 100644
index 0000000..41466cc
--- /dev/null
+++ b/libceph/ceph_fs.c
@@ -0,0 +1,78 @@
+/*
+ * Some non-inline ceph helpers
+ */
+#include <linux/module.h>
+#include <linux/ceph/types.h>
+
+/*
+ * return true if @layout appears to be valid
+ */
+int ceph_file_layout_is_valid(const struct ceph_file_layout *layout)
+{
+ __u32 su = le32_to_cpu(layout->fl_stripe_unit);
+ __u32 sc = le32_to_cpu(layout->fl_stripe_count);
+ __u32 os = le32_to_cpu(layout->fl_object_size);
+
+ /* stripe unit, object size must be non-zero, 64k increment */
+ if (!su || (su & (CEPH_MIN_STRIPE_UNIT-1)))
+ return 0;
+ if (!os || (os & (CEPH_MIN_STRIPE_UNIT-1)))
+ return 0;
+ /* object size must be a multiple of stripe unit */
+ if (os < su || os % su)
+ return 0;
+ /* stripe count must be non-zero */
+ if (!sc)
+ return 0;
+ return 1;
+}
+
+
+int ceph_flags_to_mode(int flags)
+{
+ int mode;
+
+#ifdef O_DIRECTORY /* fixme */
+ if ((flags & O_DIRECTORY) == O_DIRECTORY)
+ return CEPH_FILE_MODE_PIN;
+#endif
+
+ switch (flags & O_ACCMODE) {
+ case O_WRONLY:
+ mode = CEPH_FILE_MODE_WR;
+ break;
+ case O_RDONLY:
+ mode = CEPH_FILE_MODE_RD;
+ break;
+ case O_RDWR:
+ case O_ACCMODE: /* this is what the VFS does */
+ mode = CEPH_FILE_MODE_RDWR;
+ break;
+ }
+#ifdef O_LAZY
+ if (flags & O_LAZY)
+ mode |= CEPH_FILE_MODE_LAZY;
+#endif
+
+ return mode;
+}
+EXPORT_SYMBOL(ceph_flags_to_mode);
+
+int ceph_caps_for_mode(int mode)
+{
+ int caps = CEPH_CAP_PIN;
+
+ if (mode & CEPH_FILE_MODE_RD)
+ caps |= CEPH_CAP_FILE_SHARED |
+ CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE;
+ if (mode & CEPH_FILE_MODE_WR)
+ caps |= CEPH_CAP_FILE_EXCL |
+ CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER |
+ CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL |
+ CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL;
+ if (mode & CEPH_FILE_MODE_LAZY)
+ caps |= CEPH_CAP_FILE_LAZYIO;
+
+ return caps;
+}
+EXPORT_SYMBOL(ceph_caps_for_mode);
diff --git a/libceph/ceph_hash.c b/libceph/ceph_hash.c
new file mode 100644
index 0000000..67bb1f1
--- /dev/null
+++ b/libceph/ceph_hash.c
@@ -0,0 +1,121 @@
+
+#include <linux/ceph/types.h>
+#include <linux/module.h>
+
+/*
+ * Robert Jenkin's hash function.
+ * http://burtleburtle.net/bob/hash/evahash.html
+ * This is in the public domain.
+ */
+#define mix(a, b, c) \
+ do { \
+ a = a - b; a = a - c; a = a ^ (c >> 13); \
+ b = b - c; b = b - a; b = b ^ (a << 8); \
+ c = c - a; c = c - b; c = c ^ (b >> 13); \
+ a = a - b; a = a - c; a = a ^ (c >> 12); \
+ b = b - c; b = b - a; b = b ^ (a << 16); \
+ c = c - a; c = c - b; c = c ^ (b >> 5); \
+ a = a - b; a = a - c; a = a ^ (c >> 3); \
+ b = b - c; b = b - a; b = b ^ (a << 10); \
+ c = c - a; c = c - b; c = c ^ (b >> 15); \
+ } while (0)
+
+unsigned int ceph_str_hash_rjenkins(const char *str, unsigned int length)
+{
+ const unsigned char *k = (const unsigned char *)str;
+ __u32 a, b, c; /* the internal state */
+ __u32 len; /* how many key bytes still need mixing */
+
+ /* Set up the internal state */
+ len = length;
+ a = 0x9e3779b9; /* the golden ratio; an arbitrary value */
+ b = a;
+ c = 0; /* variable initialization of internal state */
+
+ /* handle most of the key */
+ while (len >= 12) {
+ a = a + (k[0] + ((__u32)k[1] << 8) + ((__u32)k[2] << 16) +
+ ((__u32)k[3] << 24));
+ b = b + (k[4] + ((__u32)k[5] << 8) + ((__u32)k[6] << 16) +
+ ((__u32)k[7] << 24));
+ c = c + (k[8] + ((__u32)k[9] << 8) + ((__u32)k[10] << 16) +
+ ((__u32)k[11] << 24));
+ mix(a, b, c);
+ k = k + 12;
+ len = len - 12;
+ }
+
+ /* handle the last 11 bytes */
+ c = c + length;
+ switch (len) { /* all the case statements fall through */
+ case 11:
+ c = c + ((__u32)k[10] << 24);
+ case 10:
+ c = c + ((__u32)k[9] << 16);
+ case 9:
+ c = c + ((__u32)k[8] << 8);
+ /* the first byte of c is reserved for the length */
+ case 8:
+ b = b + ((__u32)k[7] << 24);
+ case 7:
+ b = b + ((__u32)k[6] << 16);
+ case 6:
+ b = b + ((__u32)k[5] << 8);
+ case 5:
+ b = b + k[4];
+ case 4:
+ a = a + ((__u32)k[3] << 24);
+ case 3:
+ a = a + ((__u32)k[2] << 16);
+ case 2:
+ a = a + ((__u32)k[1] << 8);
+ case 1:
+ a = a + k[0];
+ /* case 0: nothing left to add */
+ }
+ mix(a, b, c);
+
+ return c;
+}
+
+/*
+ * linux dcache hash
+ */
+unsigned int ceph_str_hash_linux(const char *str, unsigned int length)
+{
+ unsigned long hash = 0;
+ unsigned char c;
+
+ while (length--) {
+ c = *str++;
+ hash = (hash + (c << 4) + (c >> 4)) * 11;
+ }
+ return hash;
+}
+
+
+unsigned int ceph_str_hash(int type, const char *s, unsigned int len)
+{
+ switch (type) {
+ case CEPH_STR_HASH_LINUX:
+ return ceph_str_hash_linux(s, len);
+ case CEPH_STR_HASH_RJENKINS:
+ return ceph_str_hash_rjenkins(s, len);
+ default:
+ return -1;
+ }
+}
+EXPORT_SYMBOL(ceph_str_hash);
+
+const char *ceph_str_hash_name(int type)
+{
+ switch (type) {
+ case CEPH_STR_HASH_LINUX:
+ return "linux";
+ case CEPH_STR_HASH_RJENKINS:
+ return "rjenkins";
+ default:
+ return "unknown";
+ }
+}
+EXPORT_SYMBOL(ceph_str_hash_name);
diff --git a/libceph/ceph_strings.c b/libceph/ceph_strings.c
new file mode 100644
index 0000000..1348df9
--- /dev/null
+++ b/libceph/ceph_strings.c
@@ -0,0 +1,123 @@
+/*
+ * Ceph string constants
+ */
+#include <linux/module.h>
+#include <linux/ceph/types.h>
+
+const char *ceph_entity_type_name(int type)
+{
+ switch (type) {
+ case CEPH_ENTITY_TYPE_MDS: return "mds";
+ case CEPH_ENTITY_TYPE_OSD: return "osd";
+ case CEPH_ENTITY_TYPE_MON: return "mon";
+ case CEPH_ENTITY_TYPE_CLIENT: return "client";
+ case CEPH_ENTITY_TYPE_AUTH: return "auth";
+ default: return "unknown";
+ }
+}
+
+const char *ceph_osd_op_name(int op)
+{
+ switch (op) {
+ case CEPH_OSD_OP_READ: return "read";
+ case CEPH_OSD_OP_STAT: return "stat";
+ case CEPH_OSD_OP_MAPEXT: return "mapext";
+ case CEPH_OSD_OP_SPARSE_READ: return "sparse-read";
+ case CEPH_OSD_OP_NOTIFY: return "notify";
+ case CEPH_OSD_OP_NOTIFY_ACK: return "notify-ack";
+ case CEPH_OSD_OP_ASSERT_VER: return "assert-version";
+
+ case CEPH_OSD_OP_MASKTRUNC: return "masktrunc";
+
+ case CEPH_OSD_OP_CREATE: return "create";
+ case CEPH_OSD_OP_WRITE: return "write";
+ case CEPH_OSD_OP_DELETE: return "delete";
+ case CEPH_OSD_OP_TRUNCATE: return "truncate";
+ case CEPH_OSD_OP_ZERO: return "zero";
+ case CEPH_OSD_OP_WRITEFULL: return "writefull";
+ case CEPH_OSD_OP_ROLLBACK: return "rollback";
+
+ case CEPH_OSD_OP_APPEND: return "append";
+ case CEPH_OSD_OP_STARTSYNC: return "startsync";
+ case CEPH_OSD_OP_SETTRUNC: return "settrunc";
+ case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc";
+
+ case CEPH_OSD_OP_TMAPUP: return "tmapup";
+ case CEPH_OSD_OP_TMAPGET: return "tmapget";
+ case CEPH_OSD_OP_TMAPPUT: return "tmapput";
+ case CEPH_OSD_OP_WATCH: return "watch";
+
+ case CEPH_OSD_OP_CLONERANGE: return "clonerange";
+ case CEPH_OSD_OP_ASSERT_SRC_VERSION: return "assert-src-version";
+ case CEPH_OSD_OP_SRC_CMPXATTR: return "src-cmpxattr";
+
+ case CEPH_OSD_OP_GETXATTR: return "getxattr";
+ case CEPH_OSD_OP_GETXATTRS: return "getxattrs";
+ case CEPH_OSD_OP_SETXATTR: return "setxattr";
+ case CEPH_OSD_OP_SETXATTRS: return "setxattrs";
+ case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs";
+ case CEPH_OSD_OP_RMXATTR: return "rmxattr";
+ case CEPH_OSD_OP_CMPXATTR: return "cmpxattr";
+
+ case CEPH_OSD_OP_PULL: return "pull";
+ case CEPH_OSD_OP_PUSH: return "push";
+ case CEPH_OSD_OP_BALANCEREADS: return "balance-reads";
+ case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads";
+ case CEPH_OSD_OP_SCRUB: return "scrub";
+ case CEPH_OSD_OP_SCRUB_RESERVE: return "scrub-reserve";
+ case CEPH_OSD_OP_SCRUB_UNRESERVE: return "scrub-unreserve";
+ case CEPH_OSD_OP_SCRUB_STOP: return "scrub-stop";
+ case CEPH_OSD_OP_SCRUB_MAP: return "scrub-map";
+
+ case CEPH_OSD_OP_WRLOCK: return "wrlock";
+ case CEPH_OSD_OP_WRUNLOCK: return "wrunlock";
+ case CEPH_OSD_OP_RDLOCK: return "rdlock";
+ case CEPH_OSD_OP_RDUNLOCK: return "rdunlock";
+ case CEPH_OSD_OP_UPLOCK: return "uplock";
+ case CEPH_OSD_OP_DNLOCK: return "dnlock";
+
+ case CEPH_OSD_OP_CALL: return "call";
+
+ case CEPH_OSD_OP_PGLS: return "pgls";
+ case CEPH_OSD_OP_PGLS_FILTER: return "pgls-filter";
+ case CEPH_OSD_OP_OMAPGETKEYS: return "omap-get-keys";
+ case CEPH_OSD_OP_OMAPGETVALS: return "omap-get-vals";
+ case CEPH_OSD_OP_OMAPGETHEADER: return "omap-get-header";
+ case CEPH_OSD_OP_OMAPGETVALSBYKEYS: return "omap-get-vals-by-keys";
+ case CEPH_OSD_OP_OMAPSETVALS: return "omap-set-vals";
+ case CEPH_OSD_OP_OMAPSETHEADER: return "omap-set-header";
+ case CEPH_OSD_OP_OMAPCLEAR: return "omap-clear";
+ case CEPH_OSD_OP_OMAPRMKEYS: return "omap-rm-keys";
+ }
+ return "???";
+}
+
+const char *ceph_osd_state_name(int s)
+{
+ switch (s) {
+ case CEPH_OSD_EXISTS:
+ return "exists";
+ case CEPH_OSD_UP:
+ return "up";
+ case CEPH_OSD_AUTOOUT:
+ return "autoout";
+ case CEPH_OSD_NEW:
+ return "new";
+ default:
+ return "???";
+ }
+}
+
+const char *ceph_pool_op_name(int op)
+{
+ switch (op) {
+ case POOL_OP_CREATE: return "create";
+ case POOL_OP_DELETE: return "delete";
+ case POOL_OP_AUID_CHANGE: return "auid change";
+ case POOL_OP_CREATE_SNAP: return "create snap";
+ case POOL_OP_DELETE_SNAP: return "delete snap";
+ case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap";
+ case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap";
+ }
+ return "???";
+}
diff --git a/libceph/crush/crush.c b/libceph/crush/crush.c
new file mode 100644
index 0000000..16bc199
--- /dev/null
+++ b/libceph/crush/crush.c
@@ -0,0 +1,129 @@
+
+#ifdef __KERNEL__
+# include <linux/slab.h>
+#else
+# include <stdlib.h>
+# include <assert.h>
+# define kfree(x) do { if (x) free(x); } while (0)
+# define BUG_ON(x) assert(!(x))
+#endif
+
+#include <linux/crush/crush.h>
+
+const char *crush_bucket_alg_name(int alg)
+{
+ switch (alg) {
+ case CRUSH_BUCKET_UNIFORM: return "uniform";
+ case CRUSH_BUCKET_LIST: return "list";
+ case CRUSH_BUCKET_TREE: return "tree";
+ case CRUSH_BUCKET_STRAW: return "straw";
+ default: return "unknown";
+ }
+}
+
+/**
+ * crush_get_bucket_item_weight - Get weight of an item in given bucket
+ * @b: bucket pointer
+ * @p: item index in bucket
+ */
+int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
+{
+ if ((__u32)p >= b->size)
+ return 0;
+
+ switch (b->alg) {
+ case CRUSH_BUCKET_UNIFORM:
+ return ((struct crush_bucket_uniform *)b)->item_weight;
+ case CRUSH_BUCKET_LIST:
+ return ((struct crush_bucket_list *)b)->item_weights[p];
+ case CRUSH_BUCKET_TREE:
+ return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)];
+ case CRUSH_BUCKET_STRAW:
+ return ((struct crush_bucket_straw *)b)->item_weights[p];
+ }
+ return 0;
+}
+
+void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b)
+{
+ kfree(b->h.perm);
+ kfree(b->h.items);
+ kfree(b);
+}
+
+void crush_destroy_bucket_list(struct crush_bucket_list *b)
+{
+ kfree(b->item_weights);
+ kfree(b->sum_weights);
+ kfree(b->h.perm);
+ kfree(b->h.items);
+ kfree(b);
+}
+
+void crush_destroy_bucket_tree(struct crush_bucket_tree *b)
+{
+ kfree(b->h.perm);
+ kfree(b->h.items);
+ kfree(b->node_weights);
+ kfree(b);
+}
+
+void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
+{
+ kfree(b->straws);
+ kfree(b->item_weights);
+ kfree(b->h.perm);
+ kfree(b->h.items);
+ kfree(b);
+}
+
+void crush_destroy_bucket(struct crush_bucket *b)
+{
+ switch (b->alg) {
+ case CRUSH_BUCKET_UNIFORM:
+ crush_destroy_bucket_uniform((struct crush_bucket_uniform *)b);
+ break;
+ case CRUSH_BUCKET_LIST:
+ crush_destroy_bucket_list((struct crush_bucket_list *)b);
+ break;
+ case CRUSH_BUCKET_TREE:
+ crush_destroy_bucket_tree((struct crush_bucket_tree *)b);
+ break;
+ case CRUSH_BUCKET_STRAW:
+ crush_destroy_bucket_straw((struct crush_bucket_straw *)b);
+ break;
+ }
+}
+
+/**
+ * crush_destroy - Destroy a crush_map
+ * @map: crush_map pointer
+ */
+void crush_destroy(struct crush_map *map)
+{
+ /* buckets */
+ if (map->buckets) {
+ __s32 b;
+ for (b = 0; b < map->max_buckets; b++) {
+ if (map->buckets[b] == NULL)
+ continue;
+ crush_destroy_bucket(map->buckets[b]);
+ }
+ kfree(map->buckets);
+ }
+
+ /* rules */
+ if (map->rules) {
+ __u32 b;
+ for (b = 0; b < map->max_rules; b++)
+ crush_destroy_rule(map->rules[b]);
+ kfree(map->rules);
+ }
+
+ kfree(map);
+}
+
+void crush_destroy_rule(struct crush_rule *rule)
+{
+ kfree(rule);
+}
diff --git a/libceph/crush/hash.c b/libceph/crush/hash.c
new file mode 100644
index 0000000..5bb63e3
--- /dev/null
+++ b/libceph/crush/hash.c
@@ -0,0 +1,149 @@
+
+#include <linux/types.h>
+#include <linux/crush/hash.h>
+
+/*
+ * Robert Jenkins' function for mixing 32-bit values
+ * http://burtleburtle.net/bob/hash/evahash.html
+ * a, b = random bits, c = input and output
+ */
+#define crush_hashmix(a, b, c) do { \
+ a = a-b; a = a-c; a = a^(c>>13); \
+ b = b-c; b = b-a; b = b^(a<<8); \
+ c = c-a; c = c-b; c = c^(b>>13); \
+ a = a-b; a = a-c; a = a^(c>>12); \
+ b = b-c; b = b-a; b = b^(a<<16); \
+ c = c-a; c = c-b; c = c^(b>>5); \
+ a = a-b; a = a-c; a = a^(c>>3); \
+ b = b-c; b = b-a; b = b^(a<<10); \
+ c = c-a; c = c-b; c = c^(b>>15); \
+ } while (0)
+
+#define crush_hash_seed 1315423911
+
+static __u32 crush_hash32_rjenkins1(__u32 a)
+{
+ __u32 hash = crush_hash_seed ^ a;
+ __u32 b = a;
+ __u32 x = 231232;
+ __u32 y = 1232;
+ crush_hashmix(b, x, hash);
+ crush_hashmix(y, a, hash);
+ return hash;
+}
+
+static __u32 crush_hash32_rjenkins1_2(__u32 a, __u32 b)
+{
+ __u32 hash = crush_hash_seed ^ a ^ b;
+ __u32 x = 231232;
+ __u32 y = 1232;
+ crush_hashmix(a, b, hash);
+ crush_hashmix(x, a, hash);
+ crush_hashmix(b, y, hash);
+ return hash;
+}
+
+static __u32 crush_hash32_rjenkins1_3(__u32 a, __u32 b, __u32 c)
+{
+ __u32 hash = crush_hash_seed ^ a ^ b ^ c;
+ __u32 x = 231232;
+ __u32 y = 1232;
+ crush_hashmix(a, b, hash);
+ crush_hashmix(c, x, hash);
+ crush_hashmix(y, a, hash);
+ crush_hashmix(b, x, hash);
+ crush_hashmix(y, c, hash);
+ return hash;
+}
+
+static __u32 crush_hash32_rjenkins1_4(__u32 a, __u32 b, __u32 c, __u32 d)
+{
+ __u32 hash = crush_hash_seed ^ a ^ b ^ c ^ d;
+ __u32 x = 231232;
+ __u32 y = 1232;
+ crush_hashmix(a, b, hash);
+ crush_hashmix(c, d, hash);
+ crush_hashmix(a, x, hash);
+ crush_hashmix(y, b, hash);
+ crush_hashmix(c, x, hash);
+ crush_hashmix(y, d, hash);
+ return hash;
+}
+
+static __u32 crush_hash32_rjenkins1_5(__u32 a, __u32 b, __u32 c, __u32 d,
+ __u32 e)
+{
+ __u32 hash = crush_hash_seed ^ a ^ b ^ c ^ d ^ e;
+ __u32 x = 231232;
+ __u32 y = 1232;
+ crush_hashmix(a, b, hash);
+ crush_hashmix(c, d, hash);
+ crush_hashmix(e, x, hash);
+ crush_hashmix(y, a, hash);
+ crush_hashmix(b, x, hash);
+ crush_hashmix(y, c, hash);
+ crush_hashmix(d, x, hash);
+ crush_hashmix(y, e, hash);
+ return hash;
+}
+
+
+__u32 crush_hash32(int type, __u32 a)
+{
+ switch (type) {
+ case CRUSH_HASH_RJENKINS1:
+ return crush_hash32_rjenkins1(a);
+ default:
+ return 0;
+ }
+}
+
+__u32 crush_hash32_2(int type, __u32 a, __u32 b)
+{
+ switch (type) {
+ case CRUSH_HASH_RJENKINS1:
+ return crush_hash32_rjenkins1_2(a, b);
+ default:
+ return 0;
+ }
+}
+
+__u32 crush_hash32_3(int type, __u32 a, __u32 b, __u32 c)
+{
+ switch (type) {
+ case CRUSH_HASH_RJENKINS1:
+ return crush_hash32_rjenkins1_3(a, b, c);
+ default:
+ return 0;
+ }
+}
+
+__u32 crush_hash32_4(int type, __u32 a, __u32 b, __u32 c, __u32 d)
+{
+ switch (type) {
+ case CRUSH_HASH_RJENKINS1:
+ return crush_hash32_rjenkins1_4(a, b, c, d);
+ default:
+ return 0;
+ }
+}
+
+__u32 crush_hash32_5(int type, __u32 a, __u32 b, __u32 c, __u32 d, __u32 e)
+{
+ switch (type) {
+ case CRUSH_HASH_RJENKINS1:
+ return crush_hash32_rjenkins1_5(a, b, c, d, e);
+ default:
+ return 0;
+ }
+}
+
+const char *crush_hash_name(int type)
+{
+ switch (type) {
+ case CRUSH_HASH_RJENKINS1:
+ return "rjenkins1";
+ default:
+ return "unknown";
+ }
+}
diff --git a/libceph/crush/mapper.c b/libceph/crush/mapper.c
new file mode 100644
index 0000000..a1ef53c
--- /dev/null
+++ b/libceph/crush/mapper.c
@@ -0,0 +1,819 @@
+
+#ifdef __KERNEL__
+# include <linux/string.h>
+# include <linux/slab.h>
+# include <linux/bug.h>
+# include <linux/kernel.h>
+# ifndef dprintk
+# define dprintk(args...)
+# endif
+#else
+# include <string.h>
+# include <stdio.h>
+# include <stdlib.h>
+# include <assert.h>
+# define BUG_ON(x) assert(!(x))
+# define dprintk(args...) /* printf(args) */
+# define kmalloc(x, f) malloc(x)
+# define kfree(x) free(x)
+#endif
+
+#include <linux/crush/crush.h>
+#include <linux/crush/hash.h>
+#include <linux/crush/mapper.h>
+
+/*
+ * Implement the core CRUSH mapping algorithm.
+ */
+
+/**
+ * crush_find_rule - find a crush_rule id for a given ruleset, type, and size.
+ * @map: the crush_map
+ * @ruleset: the storage ruleset id (user defined)
+ * @type: storage ruleset type (user defined)
+ * @size: output set size
+ */
+int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size)
+{
+ __u32 i;
+
+ for (i = 0; i < map->max_rules; i++) {
+ if (map->rules[i] &&
+ map->rules[i]->mask.ruleset == ruleset &&
+ map->rules[i]->mask.type == type &&
+ map->rules[i]->mask.min_size <= size &&
+ map->rules[i]->mask.max_size >= size)
+ return i;
+ }
+ return -1;
+}
+
+
+/*
+ * bucket choose methods
+ *
+ * For each bucket algorithm, we have a "choose" method that, given a
+ * crush input @x and replica position (usually, position in output set) @r,
+ * will produce an item in the bucket.
+ */
+
+/*
+ * Choose based on a random permutation of the bucket.
+ *
+ * We used to use some prime number arithmetic to do this, but it
+ * wasn't very random, and had some other bad behaviors. Instead, we
+ * calculate an actual random permutation of the bucket members.
+ * Since this is expensive, we optimize for the r=0 case, which
+ * captures the vast majority of calls.
+ */
+static int bucket_perm_choose(struct crush_bucket *bucket,
+ int x, int r)
+{
+ unsigned int pr = r % bucket->size;
+ unsigned int i, s;
+
+ /* start a new permutation if @x has changed */
+ if (bucket->perm_x != (__u32)x || bucket->perm_n == 0) {
+ dprintk("bucket %d new x=%d\n", bucket->id, x);
+ bucket->perm_x = x;
+
+ /* optimize common r=0 case */
+ if (pr == 0) {
+ s = crush_hash32_3(bucket->hash, x, bucket->id, 0) %
+ bucket->size;
+ bucket->perm[0] = s;
+ bucket->perm_n = 0xffff; /* magic value, see below */
+ goto out;
+ }
+
+ for (i = 0; i < bucket->size; i++)
+ bucket->perm[i] = i;
+ bucket->perm_n = 0;
+ } else if (bucket->perm_n == 0xffff) {
+ /* clean up after the r=0 case above */
+ for (i = 1; i < bucket->size; i++)
+ bucket->perm[i] = i;
+ bucket->perm[bucket->perm[0]] = 0;
+ bucket->perm_n = 1;
+ }
+
+ /* calculate permutation up to pr */
+ for (i = 0; i < bucket->perm_n; i++)
+ dprintk(" perm_choose have %d: %d\n", i, bucket->perm[i]);
+ while (bucket->perm_n <= pr) {
+ unsigned int p = bucket->perm_n;
+ /* no point in swapping the final entry */
+ if (p < bucket->size - 1) {
+ i = crush_hash32_3(bucket->hash, x, bucket->id, p) %
+ (bucket->size - p);
+ if (i) {
+ unsigned int t = bucket->perm[p + i];
+ bucket->perm[p + i] = bucket->perm[p];
+ bucket->perm[p] = t;
+ }
+ dprintk(" perm_choose swap %d with %d\n", p, p+i);
+ }
+ bucket->perm_n++;
+ }
+ for (i = 0; i < bucket->size; i++)
+ dprintk(" perm_choose %d: %d\n", i, bucket->perm[i]);
+
+ s = bucket->perm[pr];
+out:
+ dprintk(" perm_choose %d sz=%d x=%d r=%d (%d) s=%d\n", bucket->id,
+ bucket->size, x, r, pr, s);
+ return bucket->items[s];
+}
+
+/* uniform */
+static int bucket_uniform_choose(struct crush_bucket_uniform *bucket,
+ int x, int r)
+{
+ return bucket_perm_choose(&bucket->h, x, r);
+}
+
+/* list */
+static int bucket_list_choose(struct crush_bucket_list *bucket,
+ int x, int r)
+{
+ int i;
+
+ for (i = bucket->h.size-1; i >= 0; i--) {
+ __u64 w = crush_hash32_4(bucket->h.hash,x, bucket->h.items[i],
+ r, bucket->h.id);
+ w &= 0xffff;
+ dprintk("list_choose i=%d x=%d r=%d item %d weight %x "
+ "sw %x rand %llx",
+ i, x, r, bucket->h.items[i], bucket->item_weights[i],
+ bucket->sum_weights[i], w);
+ w *= bucket->sum_weights[i];
+ w = w >> 16;
+ /*dprintk(" scaled %llx\n", w);*/
+ if (w < bucket->item_weights[i])
+ return bucket->h.items[i];
+ }
+
+ dprintk("bad list sums for bucket %d\n", bucket->h.id);
+ return bucket->h.items[0];
+}
+
+
+/* (binary) tree */
+static int height(int n)
+{
+ int h = 0;
+ while ((n & 1) == 0) {
+ h++;
+ n = n >> 1;
+ }
+ return h;
+}
+
+static int left(int x)
+{
+ int h = height(x);
+ return x - (1 << (h-1));
+}
+
+static int right(int x)
+{
+ int h = height(x);
+ return x + (1 << (h-1));
+}
+
+static int terminal(int x)
+{
+ return x & 1;
+}
+
+static int bucket_tree_choose(struct crush_bucket_tree *bucket,
+ int x, int r)
+{
+ int n;
+ __u32 w;
+ __u64 t;
+
+ /* start at root */
+ n = bucket->num_nodes >> 1;
+
+ while (!terminal(n)) {
+ int l;
+ /* pick point in [0, w) */
+ w = bucket->node_weights[n];
+ t = (__u64)crush_hash32_4(bucket->h.hash, x, n, r,
+ bucket->h.id) * (__u64)w;
+ t = t >> 32;
+
+ /* descend to the left or right? */
+ l = left(n);
+ if (t < bucket->node_weights[l])
+ n = l;
+ else
+ n = right(n);
+ }
+
+ return bucket->h.items[n >> 1];
+}
+
+
+/* straw */
+
+static int bucket_straw_choose(struct crush_bucket_straw *bucket,
+ int x, int r)
+{
+ __u32 i;
+ int high = 0;
+ __u64 high_draw = 0;
+ __u64 draw;
+
+ for (i = 0; i < bucket->h.size; i++) {
+ draw = crush_hash32_3(bucket->h.hash, x, bucket->h.items[i], r);
+ draw &= 0xffff;
+ draw *= bucket->straws[i];
+ if (i == 0 || draw > high_draw) {
+ high = i;
+ high_draw = draw;
+ }
+ }
+ return bucket->h.items[high];
+}
+
+static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
+{
+ dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
+ BUG_ON(in->size == 0);
+ switch (in->alg) {
+ case CRUSH_BUCKET_UNIFORM:
+ return bucket_uniform_choose((struct crush_bucket_uniform *)in,
+ x, r);
+ case CRUSH_BUCKET_LIST:
+ return bucket_list_choose((struct crush_bucket_list *)in,
+ x, r);
+ case CRUSH_BUCKET_TREE:
+ return bucket_tree_choose((struct crush_bucket_tree *)in,
+ x, r);
+ case CRUSH_BUCKET_STRAW:
+ return bucket_straw_choose((struct crush_bucket_straw *)in,
+ x, r);
+ default:
+ dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
+ return in->items[0];
+ }
+}
+
+/*
+ * true if device is marked "out" (failed, fully offloaded)
+ * of the cluster
+ */
+static int is_out(const struct crush_map *map,
+ const __u32 *weight, int weight_max,
+ int item, int x)
+{
+ if (item >= weight_max)
+ return 1;
+ if (weight[item] >= 0x10000)
+ return 0;
+ if (weight[item] == 0)
+ return 1;
+ if ((crush_hash32_2(CRUSH_HASH_RJENKINS1, x, item) & 0xffff)
+ < weight[item])
+ return 0;
+ return 1;
+}
+
+/**
+ * crush_choose_firstn - choose numrep distinct items of given type
+ * @map: the crush_map
+ * @bucket: the bucket we are choose an item from
+ * @x: crush input value
+ * @numrep: the number of items to choose
+ * @type: the type of item to choose
+ * @out: pointer to output vector
+ * @outpos: our position in that vector
+ * @tries: number of attempts to make
+ * @recurse_tries: number of attempts to have recursive chooseleaf make
+ * @local_retries: localized retries
+ * @local_fallback_retries: localized fallback retries
+ * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
+ * @vary_r: pass r to recursive calls
+ * @out2: second output vector for leaf items (if @recurse_to_leaf)
+ * @parent_r: r value passed from the parent
+ */
+static int crush_choose_firstn(const struct crush_map *map,
+ struct crush_bucket *bucket,
+ const __u32 *weight, int weight_max,
+ int x, int numrep, int type,
+ int *out, int outpos,
+ unsigned int tries,
+ unsigned int recurse_tries,
+ unsigned int local_retries,
+ unsigned int local_fallback_retries,
+ int recurse_to_leaf,
+ unsigned int vary_r,
+ int *out2,
+ int parent_r)
+{
+ int rep;
+ unsigned int ftotal, flocal;
+ int retry_descent, retry_bucket, skip_rep;
+ struct crush_bucket *in = bucket;
+ int r;
+ int i;
+ int item = 0;
+ int itemtype;
+ int collide, reject;
+
+ dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
+ recurse_to_leaf ? "_LEAF" : "",
+ bucket->id, x, outpos, numrep,
+ tries, recurse_tries, local_retries, local_fallback_retries,
+ parent_r);
+
+ for (rep = outpos; rep < numrep; rep++) {
+ /* keep trying until we get a non-out, non-colliding item */
+ ftotal = 0;
+ skip_rep = 0;
+ do {
+ retry_descent = 0;
+ in = bucket; /* initial bucket */
+
+ /* choose through intervening buckets */
+ flocal = 0;
+ do {
+ collide = 0;
+ retry_bucket = 0;
+ r = rep + parent_r;
+ /* r' = r + f_total */
+ r += ftotal;
+
+ /* bucket choose */
+ if (in->size == 0) {
+ reject = 1;
+ goto reject;
+ }
+ if (local_fallback_retries > 0 &&
+ flocal >= (in->size>>1) &&
+ flocal > local_fallback_retries)
+ item = bucket_perm_choose(in, x, r);
+ else
+ item = crush_bucket_choose(in, x, r);
+ if (item >= map->max_devices) {
+ dprintk(" bad item %d\n", item);
+ skip_rep = 1;
+ break;
+ }
+
+ /* desired type? */
+ if (item < 0)
+ itemtype = map->buckets[-1-item]->type;
+ else
+ itemtype = 0;
+ dprintk(" item %d type %d\n", item, itemtype);
+
+ /* keep going? */
+ if (itemtype != type) {
+ if (item >= 0 ||
+ (-1-item) >= map->max_buckets) {
+ dprintk(" bad item type %d\n", type);
+ skip_rep = 1;
+ break;
+ }
+ in = map->buckets[-1-item];
+ retry_bucket = 1;
+ continue;
+ }
+
+ /* collision? */
+ for (i = 0; i < outpos; i++) {
+ if (out[i] == item) {
+ collide = 1;
+ break;
+ }
+ }
+
+ reject = 0;
+ if (!collide && recurse_to_leaf) {
+ if (item < 0) {
+ int sub_r;
+ if (vary_r)
+ sub_r = r >> (vary_r-1);
+ else
+ sub_r = 0;
+ if (crush_choose_firstn(map,
+ map->buckets[-1-item],
+ weight, weight_max,
+ x, outpos+1, 0,
+ out2, outpos,
+ recurse_tries, 0,
+ local_retries,
+ local_fallback_retries,
+ 0,
+ vary_r,
+ NULL,
+ sub_r) <= outpos)
+ /* didn't get leaf */
+ reject = 1;
+ } else {
+ /* we already have a leaf! */
+ out2[outpos] = item;
+ }
+ }
+
+ if (!reject) {
+ /* out? */
+ if (itemtype == 0)
+ reject = is_out(map, weight,
+ weight_max,
+ item, x);
+ else
+ reject = 0;
+ }
+
+reject:
+ if (reject || collide) {
+ ftotal++;
+ flocal++;
+
+ if (collide && flocal <= local_retries)
+ /* retry locally a few times */
+ retry_bucket = 1;
+ else if (local_fallback_retries > 0 &&
+ flocal <= in->size + local_fallback_retries)
+ /* exhaustive bucket search */
+ retry_bucket = 1;
+ else if (ftotal < tries)
+ /* then retry descent */
+ retry_descent = 1;
+ else
+ /* else give up */
+ skip_rep = 1;
+ dprintk(" reject %d collide %d "
+ "ftotal %u flocal %u\n",
+ reject, collide, ftotal,
+ flocal);
+ }
+ } while (retry_bucket);
+ } while (retry_descent);
+
+ if (skip_rep) {
+ dprintk("skip rep\n");
+ continue;
+ }
+
+ dprintk("CHOOSE got %d\n", item);
+ out[outpos] = item;
+ outpos++;
+ }
+
+ dprintk("CHOOSE returns %d\n", outpos);
+ return outpos;
+}
+
+
+/**
+ * crush_choose_indep: alternative breadth-first positionally stable mapping
+ *
+ */
+static void crush_choose_indep(const struct crush_map *map,
+ struct crush_bucket *bucket,
+ const __u32 *weight, int weight_max,
+ int x, int left, int numrep, int type,
+ int *out, int outpos,
+ unsigned int tries,
+ unsigned int recurse_tries,
+ int recurse_to_leaf,
+ int *out2,
+ int parent_r)
+{
+ struct crush_bucket *in = bucket;
+ int endpos = outpos + left;
+ int rep;
+ unsigned int ftotal;
+ int r;
+ int i;
+ int item = 0;
+ int itemtype;
+ int collide;
+
+ dprintk("CHOOSE%s INDEP bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
+ bucket->id, x, outpos, numrep);
+
+ /* initially my result is undefined */
+ for (rep = outpos; rep < endpos; rep++) {
+ out[rep] = CRUSH_ITEM_UNDEF;
+ if (out2)
+ out2[rep] = CRUSH_ITEM_UNDEF;
+ }
+
+ for (ftotal = 0; left > 0 && ftotal < tries; ftotal++) {
+ for (rep = outpos; rep < endpos; rep++) {
+ if (out[rep] != CRUSH_ITEM_UNDEF)
+ continue;
+
+ in = bucket; /* initial bucket */
+
+ /* choose through intervening buckets */
+ for (;;) {
+ /* note: we base the choice on the position
+ * even in the nested call. that means that
+ * if the first layer chooses the same bucket
+ * in a different position, we will tend to
+ * choose a different item in that bucket.
+ * this will involve more devices in data
+ * movement and tend to distribute the load.
+ */
+ r = rep + parent_r;
+
+ /* be careful */
+ if (in->alg == CRUSH_BUCKET_UNIFORM &&
+ in->size % numrep == 0)
+ /* r'=r+(n+1)*f_total */
+ r += (numrep+1) * ftotal;
+ else
+ /* r' = r + n*f_total */
+ r += numrep * ftotal;
+
+ /* bucket choose */
+ if (in->size == 0) {
+ dprintk(" empty bucket\n");
+ break;
+ }
+
+ item = crush_bucket_choose(in, x, r);
+ if (item >= map->max_devices) {
+ dprintk(" bad item %d\n", item);
+ out[rep] = CRUSH_ITEM_NONE;
+ if (out2)
+ out2[rep] = CRUSH_ITEM_NONE;
+ left--;
+ break;
+ }
+
+ /* desired type? */
+ if (item < 0)
+ itemtype = map->buckets[-1-item]->type;
+ else
+ itemtype = 0;
+ dprintk(" item %d type %d\n", item, itemtype);
+
+ /* keep going? */
+ if (itemtype != type) {
+ if (item >= 0 ||
+ (-1-item) >= map->max_buckets) {
+ dprintk(" bad item type %d\n", type);
+ out[rep] = CRUSH_ITEM_NONE;
+ if (out2)
+ out2[rep] =
+ CRUSH_ITEM_NONE;
+ left--;
+ break;
+ }
+ in = map->buckets[-1-item];
+ continue;
+ }
+
+ /* collision? */
+ collide = 0;
+ for (i = outpos; i < endpos; i++) {
+ if (out[i] == item) {
+ collide = 1;
+ break;
+ }
+ }
+ if (collide)
+ break;
+
+ if (recurse_to_leaf) {
+ if (item < 0) {
+ crush_choose_indep(map,
+ map->buckets[-1-item],
+ weight, weight_max,
+ x, 1, numrep, 0,
+ out2, rep,
+ recurse_tries, 0,
+ 0, NULL, r);
+ if (out2[rep] == CRUSH_ITEM_NONE) {
+ /* placed nothing; no leaf */
+ break;
+ }
+ } else {
+ /* we already have a leaf! */
+ out2[rep] = item;
+ }
+ }
+
+ /* out? */
+ if (itemtype == 0 &&
+ is_out(map, weight, weight_max, item, x))
+ break;
+
+ /* yay! */
+ out[rep] = item;
+ left--;
+ break;
+ }
+ }
+ }
+ for (rep = outpos; rep < endpos; rep++) {
+ if (out[rep] == CRUSH_ITEM_UNDEF) {
+ out[rep] = CRUSH_ITEM_NONE;
+ }
+ if (out2 && out2[rep] == CRUSH_ITEM_UNDEF) {
+ out2[rep] = CRUSH_ITEM_NONE;
+ }
+ }
+}
+
+/**
+ * crush_do_rule - calculate a mapping with the given input and rule
+ * @map: the crush_map
+ * @ruleno: the rule id
+ * @x: hash input
+ * @result: pointer to result vector
+ * @result_max: maximum result size
+ * @weight: weight vector (for map leaves)
+ * @weight_max: size of weight vector
+ * @scratch: scratch vector for private use; must be >= 3 * result_max
+ */
+int crush_do_rule(const struct crush_map *map,
+ int ruleno, int x, int *result, int result_max,
+ const __u32 *weight, int weight_max,
+ int *scratch)
+{
+ int result_len;
+ int *a = scratch;
+ int *b = scratch + result_max;
+ int *c = scratch + result_max*2;
+ int recurse_to_leaf;
+ int *w;
+ int wsize = 0;
+ int *o;
+ int osize;
+ int *tmp;
+ struct crush_rule *rule;
+ __u32 step;
+ int i, j;
+ int numrep;
+ /*
+ * the original choose_total_tries value was off by one (it
+ * counted "retries" and not "tries"). add one.
+ */
+ int choose_tries = map->choose_total_tries + 1;
+ int choose_leaf_tries = 0;
+ /*
+ * the local tries values were counted as "retries", though,
+ * and need no adjustment
+ */
+ int choose_local_retries = map->choose_local_tries;
+ int choose_local_fallback_retries = map->choose_local_fallback_tries;
+
+ int vary_r = map->chooseleaf_vary_r;
+
+ if ((__u32)ruleno >= map->max_rules) {
+ dprintk(" bad ruleno %d\n", ruleno);
+ return 0;
+ }
+
+ rule = map->rules[ruleno];
+ result_len = 0;
+ w = a;
+ o = b;
+
+ for (step = 0; step < rule->len; step++) {
+ int firstn = 0;
+ struct crush_rule_step *curstep = &rule->steps[step];
+
+ switch (curstep->op) {
+ case CRUSH_RULE_TAKE:
+ w[0] = curstep->arg1;
+ wsize = 1;
+ break;
+
+ case CRUSH_RULE_SET_CHOOSE_TRIES:
+ if (curstep->arg1 > 0)
+ choose_tries = curstep->arg1;
+ break;
+
+ case CRUSH_RULE_SET_CHOOSELEAF_TRIES:
+ if (curstep->arg1 > 0)
+ choose_leaf_tries = curstep->arg1;
+ break;
+
+ case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES:
+ if (curstep->arg1 >= 0)
+ choose_local_retries = curstep->arg1;
+ break;
+
+ case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES:
+ if (curstep->arg1 >= 0)
+ choose_local_fallback_retries = curstep->arg1;
+ break;
+
+ case CRUSH_RULE_SET_CHOOSELEAF_VARY_R:
+ if (curstep->arg1 >= 0)
+ vary_r = curstep->arg1;
+ break;
+
+ case CRUSH_RULE_CHOOSELEAF_FIRSTN:
+ case CRUSH_RULE_CHOOSE_FIRSTN:
+ firstn = 1;
+ /* fall through */
+ case CRUSH_RULE_CHOOSELEAF_INDEP:
+ case CRUSH_RULE_CHOOSE_INDEP:
+ if (wsize == 0)
+ break;
+
+ recurse_to_leaf =
+ curstep->op ==
+ CRUSH_RULE_CHOOSELEAF_FIRSTN ||
+ curstep->op ==
+ CRUSH_RULE_CHOOSELEAF_INDEP;
+
+ /* reset output */
+ osize = 0;
+
+ for (i = 0; i < wsize; i++) {
+ /*
+ * see CRUSH_N, CRUSH_N_MINUS macros.
+ * basically, numrep <= 0 means relative to
+ * the provided result_max
+ */
+ numrep = curstep->arg1;
+ if (numrep <= 0) {
+ numrep += result_max;
+ if (numrep <= 0)
+ continue;
+ }
+ j = 0;
+ if (firstn) {
+ int recurse_tries;
+ if (choose_leaf_tries)
+ recurse_tries =
+ choose_leaf_tries;
+ else if (map->chooseleaf_descend_once)
+ recurse_tries = 1;
+ else
+ recurse_tries = choose_tries;
+ osize += crush_choose_firstn(
+ map,
+ map->buckets[-1-w[i]],
+ weight, weight_max,
+ x, numrep,
+ curstep->arg2,
+ o+osize, j,
+ choose_tries,
+ recurse_tries,
+ choose_local_retries,
+ choose_local_fallback_retries,
+ recurse_to_leaf,
+ vary_r,
+ c+osize,
+ 0);
+ } else {
+ crush_choose_indep(
+ map,
+ map->buckets[-1-w[i]],
+ weight, weight_max,
+ x, numrep, numrep,
+ curstep->arg2,
+ o+osize, j,
+ choose_tries,
+ choose_leaf_tries ?
+ choose_leaf_tries : 1,
+ recurse_to_leaf,
+ c+osize,
+ 0);
+ osize += numrep;
+ }
+ }
+
+ if (recurse_to_leaf)
+ /* copy final _leaf_ values to output set */
+ memcpy(o, c, osize*sizeof(*o));
+
+ /* swap o and w arrays */
+ tmp = o;
+ o = w;
+ w = tmp;
+ wsize = osize;
+ break;
+
+
+ case CRUSH_RULE_EMIT:
+ for (i = 0; i < wsize && result_len < result_max; i++) {
+ result[result_len] = w[i];
+ result_len++;
+ }
+ wsize = 0;
+ break;
+
+ default:
+ dprintk(" unknown op %d at step %d\n",
+ curstep->op, step);
+ break;
+ }
+ }
+ return result_len;
+}
+
+
diff --git a/libceph/crypto.c b/libceph/crypto.c
new file mode 100644
index 0000000..6e7a236
--- /dev/null
+++ b/libceph/crypto.c
@@ -0,0 +1,487 @@
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/err.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <crypto/hash.h>
+#include <linux/key-type.h>
+
+#include <keys/ceph-type.h>
+#include <linux/ceph/decode.h>
+#include "crypto.h"
+
+int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
+ const struct ceph_crypto_key *src)
+{
+ memcpy(dst, src, sizeof(struct ceph_crypto_key));
+ dst->key = kmemdup(src->key, src->len, GFP_NOFS);
+ if (!dst->key)
+ return -ENOMEM;
+ return 0;
+}
+
+int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end)
+{
+ if (*p + sizeof(u16) + sizeof(key->created) +
+ sizeof(u16) + key->len > end)
+ return -ERANGE;
+ ceph_encode_16(p, key->type);
+ ceph_encode_copy(p, &key->created, sizeof(key->created));
+ ceph_encode_16(p, key->len);
+ ceph_encode_copy(p, key->key, key->len);
+ return 0;
+}
+
+int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end)
+{
+ ceph_decode_need(p, end, 2*sizeof(u16) + sizeof(key->created), bad);
+ key->type = ceph_decode_16(p);
+ ceph_decode_copy(p, &key->created, sizeof(key->created));
+ key->len = ceph_decode_16(p);
+ ceph_decode_need(p, end, key->len, bad);
+ key->key = kmalloc(key->len, GFP_NOFS);
+ if (!key->key)
+ return -ENOMEM;
+ ceph_decode_copy(p, key->key, key->len);
+ return 0;
+
+bad:
+ dout("failed to decode crypto key\n");
+ return -EINVAL;
+}
+
+int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey)
+{
+ int inlen = strlen(inkey);
+ int blen = inlen * 3 / 4;
+ void *buf, *p;
+ int ret;
+
+ dout("crypto_key_unarmor %s\n", inkey);
+ buf = kmalloc(blen, GFP_NOFS);
+ if (!buf)
+ return -ENOMEM;
+ blen = ceph_unarmor(buf, inkey, inkey+inlen);
+ if (blen < 0) {
+ kfree(buf);
+ return blen;
+ }
+
+ p = buf;
+ ret = ceph_crypto_key_decode(key, &p, p + blen);
+ kfree(buf);
+ if (ret)
+ return ret;
+ dout("crypto_key_unarmor key %p type %d len %d\n", key,
+ key->type, key->len);
+ return 0;
+}
+
+
+
+#define AES_KEY_SIZE 16
+
+static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void)
+{
+ return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
+}
+
+static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
+
+static int ceph_aes_encrypt(const void *key, int key_len,
+ void *dst, size_t *dst_len,
+ const void *src, size_t src_len)
+{
+ struct scatterlist sg_in[2], sg_out[1];
+ struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
+ struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 };
+ int ret;
+ void *iv;
+ int ivsize;
+ size_t zero_padding = (0x10 - (src_len & 0x0f));
+ char pad[16];
+
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ memset(pad, zero_padding, zero_padding);
+
+ *dst_len = src_len + zero_padding;
+
+ crypto_blkcipher_setkey((void *)tfm, key, key_len);
+ sg_init_table(sg_in, 2);
+ sg_set_buf(&sg_in[0], src, src_len);
+ sg_set_buf(&sg_in[1], pad, zero_padding);
+ sg_init_table(sg_out, 1);
+ sg_set_buf(sg_out, dst, *dst_len);
+ iv = crypto_blkcipher_crt(tfm)->iv;
+ ivsize = crypto_blkcipher_ivsize(tfm);
+
+ memcpy(iv, aes_iv, ivsize);
+ /*
+ print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1,
+ key, key_len, 1);
+ print_hex_dump(KERN_ERR, "enc src: ", DUMP_PREFIX_NONE, 16, 1,
+ src, src_len, 1);
+ print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1,
+ pad, zero_padding, 1);
+ */
+ ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in,
+ src_len + zero_padding);
+ crypto_free_blkcipher(tfm);
+ if (ret < 0)
+ pr_err("ceph_aes_crypt failed %d\n", ret);
+ /*
+ print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1,
+ dst, *dst_len, 1);
+ */
+ return 0;
+}
+
+static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
+ size_t *dst_len,
+ const void *src1, size_t src1_len,
+ const void *src2, size_t src2_len)
+{
+ struct scatterlist sg_in[3], sg_out[1];
+ struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
+ struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 };
+ int ret;
+ void *iv;
+ int ivsize;
+ size_t zero_padding = (0x10 - ((src1_len + src2_len) & 0x0f));
+ char pad[16];
+
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ memset(pad, zero_padding, zero_padding);
+
+ *dst_len = src1_len + src2_len + zero_padding;
+
+ crypto_blkcipher_setkey((void *)tfm, key, key_len);
+ sg_init_table(sg_in, 3);
+ sg_set_buf(&sg_in[0], src1, src1_len);
+ sg_set_buf(&sg_in[1], src2, src2_len);
+ sg_set_buf(&sg_in[2], pad, zero_padding);
+ sg_init_table(sg_out, 1);
+ sg_set_buf(sg_out, dst, *dst_len);
+ iv = crypto_blkcipher_crt(tfm)->iv;
+ ivsize = crypto_blkcipher_ivsize(tfm);
+
+ memcpy(iv, aes_iv, ivsize);
+ /*
+ print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1,
+ key, key_len, 1);
+ print_hex_dump(KERN_ERR, "enc src1: ", DUMP_PREFIX_NONE, 16, 1,
+ src1, src1_len, 1);
+ print_hex_dump(KERN_ERR, "enc src2: ", DUMP_PREFIX_NONE, 16, 1,
+ src2, src2_len, 1);
+ print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1,
+ pad, zero_padding, 1);
+ */
+ ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in,
+ src1_len + src2_len + zero_padding);
+ crypto_free_blkcipher(tfm);
+ if (ret < 0)
+ pr_err("ceph_aes_crypt2 failed %d\n", ret);
+ /*
+ print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1,
+ dst, *dst_len, 1);
+ */
+ return 0;
+}
+
+static int ceph_aes_decrypt(const void *key, int key_len,
+ void *dst, size_t *dst_len,
+ const void *src, size_t src_len)
+{
+ struct scatterlist sg_in[1], sg_out[2];
+ struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
+ struct blkcipher_desc desc = { .tfm = tfm };
+ char pad[16];
+ void *iv;
+ int ivsize;
+ int ret;
+ int last_byte;
+
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ crypto_blkcipher_setkey((void *)tfm, key, key_len);
+ sg_init_table(sg_in, 1);
+ sg_init_table(sg_out, 2);
+ sg_set_buf(sg_in, src, src_len);
+ sg_set_buf(&sg_out[0], dst, *dst_len);
+ sg_set_buf(&sg_out[1], pad, sizeof(pad));
+
+ iv = crypto_blkcipher_crt(tfm)->iv;
+ ivsize = crypto_blkcipher_ivsize(tfm);
+
+ memcpy(iv, aes_iv, ivsize);
+
+ /*
+ print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1,
+ key, key_len, 1);
+ print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1,
+ src, src_len, 1);
+ */
+
+ ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len);
+ crypto_free_blkcipher(tfm);
+ if (ret < 0) {
+ pr_err("ceph_aes_decrypt failed %d\n", ret);
+ return ret;
+ }
+
+ if (src_len <= *dst_len)
+ last_byte = ((char *)dst)[src_len - 1];
+ else
+ last_byte = pad[src_len - *dst_len - 1];
+ if (last_byte <= 16 && src_len >= last_byte) {
+ *dst_len = src_len - last_byte;
+ } else {
+ pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n",
+ last_byte, (int)src_len);
+ return -EPERM; /* bad padding */
+ }
+ /*
+ print_hex_dump(KERN_ERR, "dec out: ", DUMP_PREFIX_NONE, 16, 1,
+ dst, *dst_len, 1);
+ */
+ return 0;
+}
+
+static int ceph_aes_decrypt2(const void *key, int key_len,
+ void *dst1, size_t *dst1_len,
+ void *dst2, size_t *dst2_len,
+ const void *src, size_t src_len)
+{
+ struct scatterlist sg_in[1], sg_out[3];
+ struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
+ struct blkcipher_desc desc = { .tfm = tfm };
+ char pad[16];
+ void *iv;
+ int ivsize;
+ int ret;
+ int last_byte;
+
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ sg_init_table(sg_in, 1);
+ sg_set_buf(sg_in, src, src_len);
+ sg_init_table(sg_out, 3);
+ sg_set_buf(&sg_out[0], dst1, *dst1_len);
+ sg_set_buf(&sg_out[1], dst2, *dst2_len);
+ sg_set_buf(&sg_out[2], pad, sizeof(pad));
+
+ crypto_blkcipher_setkey((void *)tfm, key, key_len);
+ iv = crypto_blkcipher_crt(tfm)->iv;
+ ivsize = crypto_blkcipher_ivsize(tfm);
+
+ memcpy(iv, aes_iv, ivsize);
+
+ /*
+ print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1,
+ key, key_len, 1);
+ print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1,
+ src, src_len, 1);
+ */
+
+ ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len);
+ crypto_free_blkcipher(tfm);
+ if (ret < 0) {
+ pr_err("ceph_aes_decrypt failed %d\n", ret);
+ return ret;
+ }
+
+ if (src_len <= *dst1_len)
+ last_byte = ((char *)dst1)[src_len - 1];
+ else if (src_len <= *dst1_len + *dst2_len)
+ last_byte = ((char *)dst2)[src_len - *dst1_len - 1];
+ else
+ last_byte = pad[src_len - *dst1_len - *dst2_len - 1];
+ if (last_byte <= 16 && src_len >= last_byte) {
+ src_len -= last_byte;
+ } else {
+ pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n",
+ last_byte, (int)src_len);
+ return -EPERM; /* bad padding */
+ }
+
+ if (src_len < *dst1_len) {
+ *dst1_len = src_len;
+ *dst2_len = 0;
+ } else {
+ *dst2_len = src_len - *dst1_len;
+ }
+ /*
+ print_hex_dump(KERN_ERR, "dec out1: ", DUMP_PREFIX_NONE, 16, 1,
+ dst1, *dst1_len, 1);
+ print_hex_dump(KERN_ERR, "dec out2: ", DUMP_PREFIX_NONE, 16, 1,
+ dst2, *dst2_len, 1);
+ */
+
+ return 0;
+}
+
+
+int ceph_decrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
+ const void *src, size_t src_len)
+{
+ switch (secret->type) {
+ case CEPH_CRYPTO_NONE:
+ if (*dst_len < src_len)
+ return -ERANGE;
+ memcpy(dst, src, src_len);
+ *dst_len = src_len;
+ return 0;
+
+ case CEPH_CRYPTO_AES:
+ return ceph_aes_decrypt(secret->key, secret->len, dst,
+ dst_len, src, src_len);
+
+ default:
+ return -EINVAL;
+ }
+}
+
+int ceph_decrypt2(struct ceph_crypto_key *secret,
+ void *dst1, size_t *dst1_len,
+ void *dst2, size_t *dst2_len,
+ const void *src, size_t src_len)
+{
+ size_t t;
+
+ switch (secret->type) {
+ case CEPH_CRYPTO_NONE:
+ if (*dst1_len + *dst2_len < src_len)
+ return -ERANGE;
+ t = min(*dst1_len, src_len);
+ memcpy(dst1, src, t);
+ *dst1_len = t;
+ src += t;
+ src_len -= t;
+ if (src_len) {
+ t = min(*dst2_len, src_len);
+ memcpy(dst2, src, t);
+ *dst2_len = t;
+ }
+ return 0;
+
+ case CEPH_CRYPTO_AES:
+ return ceph_aes_decrypt2(secret->key, secret->len,
+ dst1, dst1_len, dst2, dst2_len,
+ src, src_len);
+
+ default:
+ return -EINVAL;
+ }
+}
+
+int ceph_encrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
+ const void *src, size_t src_len)
+{
+ switch (secret->type) {
+ case CEPH_CRYPTO_NONE:
+ if (*dst_len < src_len)
+ return -ERANGE;
+ memcpy(dst, src, src_len);
+ *dst_len = src_len;
+ return 0;
+
+ case CEPH_CRYPTO_AES:
+ return ceph_aes_encrypt(secret->key, secret->len, dst,
+ dst_len, src, src_len);
+
+ default:
+ return -EINVAL;
+ }
+}
+
+int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
+ const void *src1, size_t src1_len,
+ const void *src2, size_t src2_len)
+{
+ switch (secret->type) {
+ case CEPH_CRYPTO_NONE:
+ if (*dst_len < src1_len + src2_len)
+ return -ERANGE;
+ memcpy(dst, src1, src1_len);
+ memcpy(dst + src1_len, src2, src2_len);
+ *dst_len = src1_len + src2_len;
+ return 0;
+
+ case CEPH_CRYPTO_AES:
+ return ceph_aes_encrypt2(secret->key, secret->len, dst, dst_len,
+ src1, src1_len, src2, src2_len);
+
+ default:
+ return -EINVAL;
+ }
+}
+
+static int ceph_key_instantiate(struct key *key,
+ struct key_preparsed_payload *prep)
+{
+ struct ceph_crypto_key *ckey;
+ size_t datalen = prep->datalen;
+ int ret;
+ void *p;
+
+ ret = -EINVAL;
+ if (datalen <= 0 || datalen > 32767 || !prep->data)
+ goto err;
+
+ ret = key_payload_reserve(key, datalen);
+ if (ret < 0)
+ goto err;
+
+ ret = -ENOMEM;
+ ckey = kmalloc(sizeof(*ckey), GFP_KERNEL);
+ if (!ckey)
+ goto err;
+
+ /* TODO ceph_crypto_key_decode should really take const input */
+ p = (void *)prep->data;
+ ret = ceph_crypto_key_decode(ckey, &p, (char*)prep->data+datalen);
+ if (ret < 0)
+ goto err_ckey;
+
+ key->payload.data = ckey;
+ return 0;
+
+err_ckey:
+ kfree(ckey);
+err:
+ return ret;
+}
+
+static int ceph_key_match(const struct key *key, const void *description)
+{
+ return strcmp(key->description, description) == 0;
+}
+
+static void ceph_key_destroy(struct key *key) {
+ struct ceph_crypto_key *ckey = key->payload.data;
+
+ ceph_crypto_key_destroy(ckey);
+ kfree(ckey);
+}
+
+struct key_type key_type_ceph = {
+ .name = "ceph",
+ .instantiate = ceph_key_instantiate,
+ .match = ceph_key_match,
+ .destroy = ceph_key_destroy,
+};
+
+int ceph_crypto_init(void) {
+ return register_key_type(&key_type_ceph);
+}
+
+void ceph_crypto_shutdown(void) {
+ unregister_key_type(&key_type_ceph);
+}
diff --git a/libceph/crypto.h b/libceph/crypto.h
new file mode 100644
index 0000000..d149822
--- /dev/null
+++ b/libceph/crypto.h
@@ -0,0 +1,51 @@
+#ifndef _FS_CEPH_CRYPTO_H
+#define _FS_CEPH_CRYPTO_H
+
+#include <linux/ceph/types.h>
+#include <linux/ceph/buffer.h>
+
+/*
+ * cryptographic secret
+ */
+struct ceph_crypto_key {
+ int type;
+ struct ceph_timespec created;
+ int len;
+ void *key;
+};
+
+static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
+{
+ if (key)
+ kfree(key->key);
+}
+
+int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
+ const struct ceph_crypto_key *src);
+int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end);
+int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end);
+int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in);
+
+/* crypto.c */
+int ceph_decrypt(struct ceph_crypto_key *secret,
+ void *dst, size_t *dst_len,
+ const void *src, size_t src_len);
+int ceph_encrypt(struct ceph_crypto_key *secret,
+ void *dst, size_t *dst_len,
+ const void *src, size_t src_len);
+int ceph_decrypt2(struct ceph_crypto_key *secret,
+ void *dst1, size_t *dst1_len,
+ void *dst2, size_t *dst2_len,
+ const void *src, size_t src_len);
+int ceph_encrypt2(struct ceph_crypto_key *secret,
+ void *dst, size_t *dst_len,
+ const void *src1, size_t src1_len,
+ const void *src2, size_t src2_len);
+int ceph_crypto_init(void);
+void ceph_crypto_shutdown(void);
+
+/* armor.c */
+int ceph_armor(char *dst, const char *src, const char *end);
+int ceph_unarmor(char *dst, const char *src, const char *end);
+
+#endif
diff --git a/libceph/debugfs.c b/libceph/debugfs.c
new file mode 100644
index 0000000..10421a4
--- /dev/null
+++ b/libceph/debugfs.c
@@ -0,0 +1,282 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/*
+ * Implement /sys/kernel/debug/ceph fun
+ *
+ * /sys/kernel/debug/ceph/client* - an instance of the ceph client
+ * .../osdmap - current osdmap
+ * .../monmap - current monmap
+ * .../osdc - active osd requests
+ * .../monc - mon client state
+ * .../dentry_lru - dump contents of dentry lru
+ * .../caps - expose cap (reservation) stats
+ * .../bdi - symlink to ../../bdi/something
+ */
+
+static struct dentry *ceph_debugfs_dir;
+
+static int monmap_show(struct seq_file *s, void *p)
+{
+ int i;
+ struct ceph_client *client = s->private;
+
+ if (client->monc.monmap == NULL)
+ return 0;
+
+ seq_printf(s, "epoch %d\n", client->monc.monmap->epoch);
+ for (i = 0; i < client->monc.monmap->num_mon; i++) {
+ struct ceph_entity_inst *inst =
+ &client->monc.monmap->mon_inst[i];
+
+ seq_printf(s, "\t%s%lld\t%s\n",
+ ENTITY_NAME(inst->name),
+ ceph_pr_addr(&inst->addr.in_addr));
+ }
+ return 0;
+}
+
+static int osdmap_show(struct seq_file *s, void *p)
+{
+ int i;
+ struct ceph_client *client = s->private;
+ struct ceph_osdmap *map = client->osdc.osdmap;
+ struct rb_node *n;
+
+ if (map == NULL)
+ return 0;
+
+ seq_printf(s, "epoch %d\n", map->epoch);
+ seq_printf(s, "flags%s%s\n",
+ (map->flags & CEPH_OSDMAP_NEARFULL) ? " NEARFULL" : "",
+ (map->flags & CEPH_OSDMAP_FULL) ? " FULL" : "");
+
+ for (n = rb_first(&map->pg_pools); n; n = rb_next(n)) {
+ struct ceph_pg_pool_info *pool =
+ rb_entry(n, struct ceph_pg_pool_info, node);
+
+ seq_printf(s, "pool %lld pg_num %u (%d) read_tier %lld write_tier %lld\n",
+ pool->id, pool->pg_num, pool->pg_num_mask,
+ pool->read_tier, pool->write_tier);
+ }
+ for (i = 0; i < map->max_osd; i++) {
+ struct ceph_entity_addr *addr = &map->osd_addr[i];
+ int state = map->osd_state[i];
+ char sb[64];
+
+ seq_printf(s, "osd%d\t%s\t%3d%%\t(%s)\t%3d%%\n",
+ i, ceph_pr_addr(&addr->in_addr),
+ ((map->osd_weight[i]*100) >> 16),
+ ceph_osdmap_state_str(sb, sizeof(sb), state),
+ ((ceph_get_primary_affinity(map, i)*100) >> 16));
+ }
+ for (n = rb_first(&map->pg_temp); n; n = rb_next(n)) {
+ struct ceph_pg_mapping *pg =
+ rb_entry(n, struct ceph_pg_mapping, node);
+
+ seq_printf(s, "pg_temp %llu.%x [", pg->pgid.pool,
+ pg->pgid.seed);
+ for (i = 0; i < pg->pg_temp.len; i++)
+ seq_printf(s, "%s%d", (i == 0 ? "" : ","),
+ pg->pg_temp.osds[i]);
+ seq_printf(s, "]\n");
+ }
+ for (n = rb_first(&map->primary_temp); n; n = rb_next(n)) {
+ struct ceph_pg_mapping *pg =
+ rb_entry(n, struct ceph_pg_mapping, node);
+
+ seq_printf(s, "primary_temp %llu.%x %d\n", pg->pgid.pool,
+ pg->pgid.seed, pg->primary_temp.osd);
+ }
+
+ return 0;
+}
+
+static int monc_show(struct seq_file *s, void *p)
+{
+ struct ceph_client *client = s->private;
+ struct ceph_mon_generic_request *req;
+ struct ceph_mon_client *monc = &client->monc;
+ struct rb_node *rp;
+
+ mutex_lock(&monc->mutex);
+
+ if (monc->have_mdsmap)
+ seq_printf(s, "have mdsmap %u\n", (unsigned int)monc->have_mdsmap);
+ if (monc->have_osdmap)
+ seq_printf(s, "have osdmap %u\n", (unsigned int)monc->have_osdmap);
+ if (monc->want_next_osdmap)
+ seq_printf(s, "want next osdmap\n");
+
+ for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
+ __u16 op;
+ req = rb_entry(rp, struct ceph_mon_generic_request, node);
+ op = le16_to_cpu(req->request->hdr.type);
+ if (op == CEPH_MSG_STATFS)
+ seq_printf(s, "%lld statfs\n", req->tid);
+ else
+ seq_printf(s, "%lld unknown\n", req->tid);
+ }
+
+ mutex_unlock(&monc->mutex);
+ return 0;
+}
+
+static int osdc_show(struct seq_file *s, void *pp)
+{
+ struct ceph_client *client = s->private;
+ struct ceph_osd_client *osdc = &client->osdc;
+ struct rb_node *p;
+
+ mutex_lock(&osdc->request_mutex);
+ for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
+ struct ceph_osd_request *req;
+ unsigned int i;
+ int opcode;
+
+ req = rb_entry(p, struct ceph_osd_request, r_node);
+
+ seq_printf(s, "%lld\tosd%d\t%lld.%x\t", req->r_tid,
+ req->r_osd ? req->r_osd->o_osd : -1,
+ req->r_pgid.pool, req->r_pgid.seed);
+
+ seq_printf(s, "%.*s", req->r_base_oid.name_len,
+ req->r_base_oid.name);
+
+ if (req->r_reassert_version.epoch)
+ seq_printf(s, "\t%u'%llu",
+ (unsigned int)le32_to_cpu(req->r_reassert_version.epoch),
+ le64_to_cpu(req->r_reassert_version.version));
+ else
+ seq_printf(s, "\t");
+
+ for (i = 0; i < req->r_num_ops; i++) {
+ opcode = req->r_ops[i].op;
+ seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
+ }
+
+ seq_printf(s, "\n");
+ }
+ mutex_unlock(&osdc->request_mutex);
+ return 0;
+}
+
+CEPH_DEFINE_SHOW_FUNC(monmap_show)
+CEPH_DEFINE_SHOW_FUNC(osdmap_show)
+CEPH_DEFINE_SHOW_FUNC(monc_show)
+CEPH_DEFINE_SHOW_FUNC(osdc_show)
+
+int ceph_debugfs_init(void)
+{
+ ceph_debugfs_dir = debugfs_create_dir("ceph", NULL);
+ if (!ceph_debugfs_dir)
+ return -ENOMEM;
+ return 0;
+}
+
+void ceph_debugfs_cleanup(void)
+{
+ debugfs_remove(ceph_debugfs_dir);
+}
+
+int ceph_debugfs_client_init(struct ceph_client *client)
+{
+ int ret = -ENOMEM;
+ char name[80];
+
+ snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
+ client->monc.auth->global_id);
+
+ dout("ceph_debugfs_client_init %p %s\n", client, name);
+
+ BUG_ON(client->debugfs_dir);
+ client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
+ if (!client->debugfs_dir)
+ goto out;
+
+ client->monc.debugfs_file = debugfs_create_file("monc",
+ 0600,
+ client->debugfs_dir,
+ client,
+ &monc_show_fops);
+ if (!client->monc.debugfs_file)
+ goto out;
+
+ client->osdc.debugfs_file = debugfs_create_file("osdc",
+ 0600,
+ client->debugfs_dir,
+ client,
+ &osdc_show_fops);
+ if (!client->osdc.debugfs_file)
+ goto out;
+
+ client->debugfs_monmap = debugfs_create_file("monmap",
+ 0600,
+ client->debugfs_dir,
+ client,
+ &monmap_show_fops);
+ if (!client->debugfs_monmap)
+ goto out;
+
+ client->debugfs_osdmap = debugfs_create_file("osdmap",
+ 0600,
+ client->debugfs_dir,
+ client,
+ &osdmap_show_fops);
+ if (!client->debugfs_osdmap)
+ goto out;
+
+ return 0;
+
+out:
+ ceph_debugfs_client_cleanup(client);
+ return ret;
+}
+
+void ceph_debugfs_client_cleanup(struct ceph_client *client)
+{
+ dout("ceph_debugfs_client_cleanup %p\n", client);
+ debugfs_remove(client->debugfs_osdmap);
+ debugfs_remove(client->debugfs_monmap);
+ debugfs_remove(client->osdc.debugfs_file);
+ debugfs_remove(client->monc.debugfs_file);
+ debugfs_remove(client->debugfs_dir);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int ceph_debugfs_init(void)
+{
+ return 0;
+}
+
+void ceph_debugfs_cleanup(void)
+{
+}
+
+int ceph_debugfs_client_init(struct ceph_client *client)
+{
+ return 0;
+}
+
+void ceph_debugfs_client_cleanup(struct ceph_client *client)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
+
+EXPORT_SYMBOL(ceph_debugfs_init);
+EXPORT_SYMBOL(ceph_debugfs_cleanup);
diff --git a/libceph/messenger.c b/libceph/messenger.c
new file mode 100644
index 0000000..4f55f9c
--- /dev/null
+++ b/libceph/messenger.c
@@ -0,0 +1,3316 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/crc32c.h>
+#include <linux/ctype.h>
+#include <linux/highmem.h>
+#include <linux/inet.h>
+#include <linux/kthread.h>
+#include <linux/net.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#ifdef CONFIG_BLOCK
+#include <linux/bio.h>
+#endif /* CONFIG_BLOCK */
+#include <linux/dns_resolver.h>
+#include <net/tcp.h>
+
+#include <linux/ceph/ceph_features.h>
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/pagelist.h>
+#include <linux/export.h>
+
+#define list_entry_next(pos, member) \
+ list_entry(pos->member.next, typeof(*pos), member)
+
+/*
+ * Ceph uses the messenger to exchange ceph_msg messages with other
+ * hosts in the system. The messenger provides ordered and reliable
+ * delivery. We tolerate TCP disconnects by reconnecting (with
+ * exponential backoff) in the case of a fault (disconnection, bad
+ * crc, protocol error). Acks allow sent messages to be discarded by
+ * the sender.
+ */
+
+/*
+ * We track the state of the socket on a given connection using
+ * values defined below. The transition to a new socket state is
+ * handled by a function which verifies we aren't coming from an
+ * unexpected state.
+ *
+ * --------
+ * | NEW* | transient initial state
+ * --------
+ * | con_sock_state_init()
+ * v
+ * ----------
+ * | CLOSED | initialized, but no socket (and no
+ * ---------- TCP connection)
+ * ^ \
+ * | \ con_sock_state_connecting()
+ * | ----------------------
+ * | \
+ * + con_sock_state_closed() \
+ * |+--------------------------- \
+ * | \ \ \
+ * | ----------- \ \
+ * | | CLOSING | socket event; \ \
+ * | ----------- await close \ \
+ * | ^ \ |
+ * | | \ |
+ * | + con_sock_state_closing() \ |
+ * | / \ | |
+ * | / --------------- | |
+ * | / \ v v
+ * | / --------------
+ * | / -----------------| CONNECTING | socket created, TCP
+ * | | / -------------- connect initiated
+ * | | | con_sock_state_connected()
+ * | | v
+ * -------------
+ * | CONNECTED | TCP connection established
+ * -------------
+ *
+ * State values for ceph_connection->sock_state; NEW is assumed to be 0.
+ */
+
+#define CON_SOCK_STATE_NEW 0 /* -> CLOSED */
+#define CON_SOCK_STATE_CLOSED 1 /* -> CONNECTING */
+#define CON_SOCK_STATE_CONNECTING 2 /* -> CONNECTED or -> CLOSING */
+#define CON_SOCK_STATE_CONNECTED 3 /* -> CLOSING or -> CLOSED */
+#define CON_SOCK_STATE_CLOSING 4 /* -> CLOSED */
+
+/*
+ * connection states
+ */
+#define CON_STATE_CLOSED 1 /* -> PREOPEN */
+#define CON_STATE_PREOPEN 2 /* -> CONNECTING, CLOSED */
+#define CON_STATE_CONNECTING 3 /* -> NEGOTIATING, CLOSED */
+#define CON_STATE_NEGOTIATING 4 /* -> OPEN, CLOSED */
+#define CON_STATE_OPEN 5 /* -> STANDBY, CLOSED */
+#define CON_STATE_STANDBY 6 /* -> PREOPEN, CLOSED */
+
+/*
+ * ceph_connection flag bits
+ */
+#define CON_FLAG_LOSSYTX 0 /* we can close channel or drop
+ * messages on errors */
+#define CON_FLAG_KEEPALIVE_PENDING 1 /* we need to send a keepalive */
+#define CON_FLAG_WRITE_PENDING 2 /* we have data ready to send */
+#define CON_FLAG_SOCK_CLOSED 3 /* socket state changed to closed */
+#define CON_FLAG_BACKOFF 4 /* need to retry queuing delayed work */
+
+static bool con_flag_valid(unsigned long con_flag)
+{
+ switch (con_flag) {
+ case CON_FLAG_LOSSYTX:
+ case CON_FLAG_KEEPALIVE_PENDING:
+ case CON_FLAG_WRITE_PENDING:
+ case CON_FLAG_SOCK_CLOSED:
+ case CON_FLAG_BACKOFF:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static void con_flag_clear(struct ceph_connection *con, unsigned long con_flag)
+{
+ BUG_ON(!con_flag_valid(con_flag));
+
+ clear_bit(con_flag, &con->flags);
+}
+
+static void con_flag_set(struct ceph_connection *con, unsigned long con_flag)
+{
+ BUG_ON(!con_flag_valid(con_flag));
+
+ set_bit(con_flag, &con->flags);
+}
+
+static bool con_flag_test(struct ceph_connection *con, unsigned long con_flag)
+{
+ BUG_ON(!con_flag_valid(con_flag));
+
+ return test_bit(con_flag, &con->flags);
+}
+
+static bool con_flag_test_and_clear(struct ceph_connection *con,
+ unsigned long con_flag)
+{
+ BUG_ON(!con_flag_valid(con_flag));
+
+ return test_and_clear_bit(con_flag, &con->flags);
+}
+
+static bool con_flag_test_and_set(struct ceph_connection *con,
+ unsigned long con_flag)
+{
+ BUG_ON(!con_flag_valid(con_flag));
+
+ return test_and_set_bit(con_flag, &con->flags);
+}
+
+/* Slab caches for frequently-allocated structures */
+
+static struct kmem_cache *ceph_msg_cache;
+static struct kmem_cache *ceph_msg_data_cache;
+
+/* static tag bytes (protocol control messages) */
+static char tag_msg = CEPH_MSGR_TAG_MSG;
+static char tag_ack = CEPH_MSGR_TAG_ACK;
+static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;
+
+#ifdef CONFIG_LOCKDEP
+static struct lock_class_key socket_class;
+#endif
+
+/*
+ * When skipping (ignoring) a block of input we read it into a "skip
+ * buffer," which is this many bytes in size.
+ */
+#define SKIP_BUF_SIZE 1024
+
+static void queue_con(struct ceph_connection *con);
+static void con_work(struct work_struct *);
+static void con_fault(struct ceph_connection *con);
+
+/*
+ * Nicely render a sockaddr as a string. An array of formatted
+ * strings is used, to approximate reentrancy.
+ */
+#define ADDR_STR_COUNT_LOG 5 /* log2(# address strings in array) */
+#define ADDR_STR_COUNT (1 << ADDR_STR_COUNT_LOG)
+#define ADDR_STR_COUNT_MASK (ADDR_STR_COUNT - 1)
+#define MAX_ADDR_STR_LEN 64 /* 54 is enough */
+
+static char addr_str[ADDR_STR_COUNT][MAX_ADDR_STR_LEN];
+static atomic_t addr_str_seq = ATOMIC_INIT(0);
+
+static struct page *zero_page; /* used in certain error cases */
+
+const char *ceph_pr_addr(const struct sockaddr_storage *ss)
+{
+ int i;
+ char *s;
+ struct sockaddr_in *in4 = (struct sockaddr_in *) ss;
+ struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss;
+
+ i = atomic_inc_return(&addr_str_seq) & ADDR_STR_COUNT_MASK;
+ s = addr_str[i];
+
+ switch (ss->ss_family) {
+ case AF_INET:
+ snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%hu", &in4->sin_addr,
+ ntohs(in4->sin_port));
+ break;
+
+ case AF_INET6:
+ snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%hu", &in6->sin6_addr,
+ ntohs(in6->sin6_port));
+ break;
+
+ default:
+ snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %hu)",
+ ss->ss_family);
+ }
+
+ return s;
+}
+EXPORT_SYMBOL(ceph_pr_addr);
+
+static void encode_my_addr(struct ceph_messenger *msgr)
+{
+ memcpy(&msgr->my_enc_addr, &msgr->inst.addr, sizeof(msgr->my_enc_addr));
+ ceph_encode_addr(&msgr->my_enc_addr);
+}
+
+/*
+ * work queue for all reading and writing to/from the socket.
+ */
+static struct workqueue_struct *ceph_msgr_wq;
+
+static int ceph_msgr_slab_init(void)
+{
+ BUG_ON(ceph_msg_cache);
+ ceph_msg_cache = kmem_cache_create("ceph_msg",
+ sizeof (struct ceph_msg),
+ __alignof__(struct ceph_msg), 0, NULL);
+
+ if (!ceph_msg_cache)
+ return -ENOMEM;
+
+ BUG_ON(ceph_msg_data_cache);
+ ceph_msg_data_cache = kmem_cache_create("ceph_msg_data",
+ sizeof (struct ceph_msg_data),
+ __alignof__(struct ceph_msg_data),
+ 0, NULL);
+ if (ceph_msg_data_cache)
+ return 0;
+
+ kmem_cache_destroy(ceph_msg_cache);
+ ceph_msg_cache = NULL;
+
+ return -ENOMEM;
+}
+
+static void ceph_msgr_slab_exit(void)
+{
+ BUG_ON(!ceph_msg_data_cache);
+ kmem_cache_destroy(ceph_msg_data_cache);
+ ceph_msg_data_cache = NULL;
+
+ BUG_ON(!ceph_msg_cache);
+ kmem_cache_destroy(ceph_msg_cache);
+ ceph_msg_cache = NULL;
+}
+
+static void _ceph_msgr_exit(void)
+{
+ if (ceph_msgr_wq) {
+ destroy_workqueue(ceph_msgr_wq);
+ ceph_msgr_wq = NULL;
+ }
+
+ ceph_msgr_slab_exit();
+
+ BUG_ON(zero_page == NULL);
+ kunmap(zero_page);
+ page_cache_release(zero_page);
+ zero_page = NULL;
+}
+
+int ceph_msgr_init(void)
+{
+ BUG_ON(zero_page != NULL);
+ zero_page = ZERO_PAGE(0);
+ page_cache_get(zero_page);
+
+ if (ceph_msgr_slab_init())
+ return -ENOMEM;
+
+ ceph_msgr_wq = alloc_workqueue("ceph-msgr", 0, 0);
+ if (ceph_msgr_wq)
+ return 0;
+
+ pr_err("msgr_init failed to create workqueue\n");
+ _ceph_msgr_exit();
+
+ return -ENOMEM;
+}
+EXPORT_SYMBOL(ceph_msgr_init);
+
+void ceph_msgr_exit(void)
+{
+ BUG_ON(ceph_msgr_wq == NULL);
+
+ _ceph_msgr_exit();
+}
+EXPORT_SYMBOL(ceph_msgr_exit);
+
+void ceph_msgr_flush(void)
+{
+ flush_workqueue(ceph_msgr_wq);
+}
+EXPORT_SYMBOL(ceph_msgr_flush);
+
+/* Connection socket state transition functions */
+
+static void con_sock_state_init(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
+ if (WARN_ON(old_state != CON_SOCK_STATE_NEW))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+ CON_SOCK_STATE_CLOSED);
+}
+
+static void con_sock_state_connecting(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTING);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+ CON_SOCK_STATE_CONNECTING);
+}
+
+static void con_sock_state_connected(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+ CON_SOCK_STATE_CONNECTED);
+}
+
+static void con_sock_state_closing(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSING);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING &&
+ old_state != CON_SOCK_STATE_CONNECTED &&
+ old_state != CON_SOCK_STATE_CLOSING))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+ CON_SOCK_STATE_CLOSING);
+}
+
+static void con_sock_state_closed(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED &&
+ old_state != CON_SOCK_STATE_CLOSING &&
+ old_state != CON_SOCK_STATE_CONNECTING &&
+ old_state != CON_SOCK_STATE_CLOSED))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+ CON_SOCK_STATE_CLOSED);
+}
+
+/*
+ * socket callback functions
+ */
+
+/* data available on socket, or listen socket received a connect */
+static void ceph_sock_data_ready(struct sock *sk, int count_unused)
+{
+ struct ceph_connection *con = sk->sk_user_data;
+ if (atomic_read(&con->msgr->stopping)) {
+ return;
+ }
+
+ if (sk->sk_state != TCP_CLOSE_WAIT) {
+ dout("%s on %p state = %lu, queueing work\n", __func__,
+ con, con->state);
+ queue_con(con);
+ }
+}
+
+/* socket has buffer space for writing */
+static void ceph_sock_write_space(struct sock *sk)
+{
+ struct ceph_connection *con = sk->sk_user_data;
+
+ /* only queue to workqueue if there is data we want to write,
+ * and there is sufficient space in the socket buffer to accept
+ * more data. clear SOCK_NOSPACE so that ceph_sock_write_space()
+ * doesn't get called again until try_write() fills the socket
+ * buffer. See net/ipv4/tcp_input.c:tcp_check_space()
+ * and net/core/stream.c:sk_stream_write_space().
+ */
+ if (con_flag_test(con, CON_FLAG_WRITE_PENDING)) {
+ if (sk_stream_is_writeable(sk)) {
+ dout("%s %p queueing write work\n", __func__, con);
+ clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ queue_con(con);
+ }
+ } else {
+ dout("%s %p nothing to write\n", __func__, con);
+ }
+}
+
+/* socket's state has changed */
+static void ceph_sock_state_change(struct sock *sk)
+{
+ struct ceph_connection *con = sk->sk_user_data;
+
+ dout("%s %p state = %lu sk_state = %u\n", __func__,
+ con, con->state, sk->sk_state);
+
+ switch (sk->sk_state) {
+ case TCP_CLOSE:
+ dout("%s TCP_CLOSE\n", __func__);
+ case TCP_CLOSE_WAIT:
+ dout("%s TCP_CLOSE_WAIT\n", __func__);
+ con_sock_state_closing(con);
+ con_flag_set(con, CON_FLAG_SOCK_CLOSED);
+ queue_con(con);
+ break;
+ case TCP_ESTABLISHED:
+ dout("%s TCP_ESTABLISHED\n", __func__);
+ con_sock_state_connected(con);
+ queue_con(con);
+ break;
+ default: /* Everything else is uninteresting */
+ break;
+ }
+}
+
+/*
+ * set up socket callbacks
+ */
+static void set_sock_callbacks(struct socket *sock,
+ struct ceph_connection *con)
+{
+ struct sock *sk = sock->sk;
+ sk->sk_user_data = con;
+ sk->sk_data_ready = ceph_sock_data_ready;
+ sk->sk_write_space = ceph_sock_write_space;
+ sk->sk_state_change = ceph_sock_state_change;
+}
+
+
+/*
+ * socket helpers
+ */
+
+/*
+ * initiate connection to a remote socket.
+ */
+static int ceph_tcp_connect(struct ceph_connection *con)
+{
+ struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
+ struct socket *sock;
+ int ret;
+
+ BUG_ON(con->sock);
+ ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM,
+ IPPROTO_TCP, &sock);
+ if (ret)
+ return ret;
+ sock->sk->sk_allocation = GFP_NOFS;
+
+#ifdef CONFIG_LOCKDEP
+ lockdep_set_class(&sock->sk->sk_lock, &socket_class);
+#endif
+
+ set_sock_callbacks(sock, con);
+
+ dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));
+
+ con_sock_state_connecting(con);
+ ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
+ O_NONBLOCK);
+ if (ret == -EINPROGRESS) {
+ dout("connect %s EINPROGRESS sk_state = %u\n",
+ ceph_pr_addr(&con->peer_addr.in_addr),
+ sock->sk->sk_state);
+ } else if (ret < 0) {
+ pr_err("connect %s error %d\n",
+ ceph_pr_addr(&con->peer_addr.in_addr), ret);
+ sock_release(sock);
+ con->error_msg = "connect error";
+
+ return ret;
+ }
+ con->sock = sock;
+ return 0;
+}
+
+static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
+{
+ struct kvec iov = {buf, len};
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
+ int r;
+
+ r = kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags);
+ if (r == -EAGAIN)
+ r = 0;
+ return r;
+}
+
+static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
+ int page_offset, size_t length)
+{
+ void *kaddr;
+ int ret;
+
+ BUG_ON(page_offset + length > PAGE_SIZE);
+
+ kaddr = kmap(page);
+ BUG_ON(!kaddr);
+ ret = ceph_tcp_recvmsg(sock, kaddr + page_offset, length);
+ kunmap(page);
+
+ return ret;
+}
+
+/*
+ * write something. @more is true if caller will be sending more data
+ * shortly.
+ */
+static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
+ size_t kvlen, size_t len, int more)
+{
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
+ int r;
+
+ if (more)
+ msg.msg_flags |= MSG_MORE;
+ else
+ msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */
+
+ r = kernel_sendmsg(sock, &msg, iov, kvlen, len);
+ if (r == -EAGAIN)
+ r = 0;
+ return r;
+}
+
+static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
+ int offset, size_t size, bool more)
+{
+ int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR);
+ int ret;
+
+ ret = kernel_sendpage(sock, page, offset, size, flags);
+ if (ret == -EAGAIN)
+ ret = 0;
+
+ return ret;
+}
+
+
+/*
+ * Shutdown/close the socket for the given connection.
+ */
+static int con_close_socket(struct ceph_connection *con)
+{
+ int rc = 0;
+
+ dout("con_close_socket on %p sock %p\n", con, con->sock);
+ if (con->sock) {
+ rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
+ sock_release(con->sock);
+ con->sock = NULL;
+ }
+
+ /*
+ * Forcibly clear the SOCK_CLOSED flag. It gets set
+ * independent of the connection mutex, and we could have
+ * received a socket close event before we had the chance to
+ * shut the socket down.
+ */
+ con_flag_clear(con, CON_FLAG_SOCK_CLOSED);
+
+ con_sock_state_closed(con);
+ return rc;
+}
+
+/*
+ * Reset a connection. Discard all incoming and outgoing messages
+ * and clear *_seq state.
+ */
+static void ceph_msg_remove(struct ceph_msg *msg)
+{
+ list_del_init(&msg->list_head);
+ BUG_ON(msg->con == NULL);
+ msg->con->ops->put(msg->con);
+ msg->con = NULL;
+
+ ceph_msg_put(msg);
+}
+static void ceph_msg_remove_list(struct list_head *head)
+{
+ while (!list_empty(head)) {
+ struct ceph_msg *msg = list_first_entry(head, struct ceph_msg,
+ list_head);
+ ceph_msg_remove(msg);
+ }
+}
+
+static void reset_connection(struct ceph_connection *con)
+{
+ /* reset connection, out_queue, msg_ and connect_seq */
+ /* discard existing out_queue and msg_seq */
+ dout("reset_connection %p\n", con);
+ ceph_msg_remove_list(&con->out_queue);
+ ceph_msg_remove_list(&con->out_sent);
+
+ if (con->in_msg) {
+ BUG_ON(con->in_msg->con != con);
+ con->in_msg->con = NULL;
+ ceph_msg_put(con->in_msg);
+ con->in_msg = NULL;
+ con->ops->put(con);
+ }
+
+ con->connect_seq = 0;
+ con->out_seq = 0;
+ if (con->out_msg) {
+ ceph_msg_put(con->out_msg);
+ con->out_msg = NULL;
+ }
+ con->in_seq = 0;
+ con->in_seq_acked = 0;
+}
+
+/*
+ * mark a peer down. drop any open connections.
+ */
+void ceph_con_close(struct ceph_connection *con)
+{
+ mutex_lock(&con->mutex);
+ dout("con_close %p peer %s\n", con,
+ ceph_pr_addr(&con->peer_addr.in_addr));
+ con->state = CON_STATE_CLOSED;
+
+ con_flag_clear(con, CON_FLAG_LOSSYTX); /* so we retry next connect */
+ con_flag_clear(con, CON_FLAG_KEEPALIVE_PENDING);
+ con_flag_clear(con, CON_FLAG_WRITE_PENDING);
+ con_flag_clear(con, CON_FLAG_BACKOFF);
+
+ reset_connection(con);
+ con->peer_global_seq = 0;
+ cancel_delayed_work(&con->work);
+ con_close_socket(con);
+ mutex_unlock(&con->mutex);
+}
+EXPORT_SYMBOL(ceph_con_close);
+
+/*
+ * Reopen a closed connection, with a new peer address.
+ */
+void ceph_con_open(struct ceph_connection *con,
+ __u8 entity_type, __u64 entity_num,
+ struct ceph_entity_addr *addr)
+{
+ mutex_lock(&con->mutex);
+ dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
+
+ WARN_ON(con->state != CON_STATE_CLOSED);
+ con->state = CON_STATE_PREOPEN;
+
+ con->peer_name.type = (__u8) entity_type;
+ con->peer_name.num = cpu_to_le64(entity_num);
+
+ memcpy(&con->peer_addr, addr, sizeof(*addr));
+ con->delay = 0; /* reset backoff memory */
+ mutex_unlock(&con->mutex);
+ queue_con(con);
+}
+EXPORT_SYMBOL(ceph_con_open);
+
+/*
+ * return true if this connection ever successfully opened
+ */
+bool ceph_con_opened(struct ceph_connection *con)
+{
+ return con->connect_seq > 0;
+}
+
+/*
+ * initialize a new connection.
+ */
+void ceph_con_init(struct ceph_connection *con, void *private,
+ const struct ceph_connection_operations *ops,
+ struct ceph_messenger *msgr)
+{
+ dout("con_init %p\n", con);
+ memset(con, 0, sizeof(*con));
+ con->private = private;
+ con->ops = ops;
+ con->msgr = msgr;
+
+ con_sock_state_init(con);
+
+ mutex_init(&con->mutex);
+ INIT_LIST_HEAD(&con->out_queue);
+ INIT_LIST_HEAD(&con->out_sent);
+ INIT_DELAYED_WORK(&con->work, con_work);
+
+ con->state = CON_STATE_CLOSED;
+}
+EXPORT_SYMBOL(ceph_con_init);
+
+
+/*
+ * We maintain a global counter to order connection attempts. Get
+ * a unique seq greater than @gt.
+ */
+static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
+{
+ u32 ret;
+
+ spin_lock(&msgr->global_seq_lock);
+ if (msgr->global_seq < gt)
+ msgr->global_seq = gt;
+ ret = ++msgr->global_seq;
+ spin_unlock(&msgr->global_seq_lock);
+ return ret;
+}
+
+static void con_out_kvec_reset(struct ceph_connection *con)
+{
+ con->out_kvec_left = 0;
+ con->out_kvec_bytes = 0;
+ con->out_kvec_cur = &con->out_kvec[0];
+}
+
+static void con_out_kvec_add(struct ceph_connection *con,
+ size_t size, void *data)
+{
+ int index;
+
+ index = con->out_kvec_left;
+ BUG_ON(index >= ARRAY_SIZE(con->out_kvec));
+
+ con->out_kvec[index].iov_len = size;
+ con->out_kvec[index].iov_base = data;
+ con->out_kvec_left++;
+ con->out_kvec_bytes += size;
+}
+
+#ifdef CONFIG_BLOCK
+
+/*
+ * For a bio data item, a piece is whatever remains of the next
+ * entry in the current bio iovec, or the first entry in the next
+ * bio in the list.
+ */
+static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor,
+ size_t length)
+{
+ struct ceph_msg_data *data = cursor->data;
+ struct bio *bio;
+
+ BUG_ON(data->type != CEPH_MSG_DATA_BIO);
+
+ bio = data->bio;
+ BUG_ON(!bio);
+
+ cursor->resid = min(length, data->bio_length);
+ cursor->bio = bio;
+ cursor->bvec_iter = bio->bi_iter;
+ cursor->last_piece =
+ cursor->resid <= bio_iter_len(bio, cursor->bvec_iter);
+}
+
+static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
+ size_t *page_offset,
+ size_t *length)
+{
+ struct ceph_msg_data *data = cursor->data;
+ struct bio *bio;
+ struct bio_vec bio_vec;
+
+ BUG_ON(data->type != CEPH_MSG_DATA_BIO);
+
+ bio = cursor->bio;
+ BUG_ON(!bio);
+
+ bio_vec = bio_iter_iovec(bio, cursor->bvec_iter);
+
+ *page_offset = (size_t) bio_vec.bv_offset;
+ BUG_ON(*page_offset >= PAGE_SIZE);
+ if (cursor->last_piece) /* pagelist offset is always 0 */
+ *length = cursor->resid;
+ else
+ *length = (size_t) bio_vec.bv_len;
+ BUG_ON(*length > cursor->resid);
+ BUG_ON(*page_offset + *length > PAGE_SIZE);
+
+ return bio_vec.bv_page;
+}
+
+static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
+ size_t bytes)
+{
+ struct bio *bio;
+ struct bio_vec bio_vec;
+
+ BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO);
+
+ bio = cursor->bio;
+ BUG_ON(!bio);
+
+ bio_vec = bio_iter_iovec(bio, cursor->bvec_iter);
+
+ /* Advance the cursor offset */
+
+ BUG_ON(cursor->resid < bytes);
+ cursor->resid -= bytes;
+
+ bio_advance_iter(bio, &cursor->bvec_iter, bytes);
+
+ if (bytes < bio_vec.bv_len)
+ return false; /* more bytes to process in this segment */
+
+ /* Move on to the next segment, and possibly the next bio */
+
+ if (!cursor->bvec_iter.bi_size) {
+ bio = bio->bi_next;
+ cursor->bio = bio;
+ if (bio)
+ cursor->bvec_iter = bio->bi_iter;
+ else
+ memset(&cursor->bvec_iter, 0,
+ sizeof(cursor->bvec_iter));
+ }
+
+ if (!cursor->last_piece) {
+ BUG_ON(!cursor->resid);
+ BUG_ON(!bio);
+ /* A short read is OK, so use <= rather than == */
+ if (cursor->resid <= bio_iter_len(bio, cursor->bvec_iter))
+ cursor->last_piece = true;
+ }
+
+ return true;
+}
+#endif /* CONFIG_BLOCK */
+
+/*
+ * For a page array, a piece comes from the first page in the array
+ * that has not already been fully consumed.
+ */
+static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor,
+ size_t length)
+{
+ struct ceph_msg_data *data = cursor->data;
+ int page_count;
+
+ BUG_ON(data->type != CEPH_MSG_DATA_PAGES);
+
+ BUG_ON(!data->pages);
+ BUG_ON(!data->length);
+
+ cursor->resid = min(length, data->length);
+ page_count = calc_pages_for(data->alignment, (u64)data->length);
+ cursor->page_offset = data->alignment & ~PAGE_MASK;
+ cursor->page_index = 0;
+ BUG_ON(page_count > (int)USHRT_MAX);
+ cursor->page_count = (unsigned short)page_count;
+ BUG_ON(length > SIZE_MAX - cursor->page_offset);
+ cursor->last_piece = (size_t)cursor->page_offset + length <= PAGE_SIZE;
+}
+
+static struct page *
+ceph_msg_data_pages_next(struct ceph_msg_data_cursor *cursor,
+ size_t *page_offset, size_t *length)
+{
+ struct ceph_msg_data *data = cursor->data;
+
+ BUG_ON(data->type != CEPH_MSG_DATA_PAGES);
+
+ BUG_ON(cursor->page_index >= cursor->page_count);
+ BUG_ON(cursor->page_offset >= PAGE_SIZE);
+
+ *page_offset = cursor->page_offset;
+ if (cursor->last_piece)
+ *length = cursor->resid;
+ else
+ *length = PAGE_SIZE - *page_offset;
+
+ return data->pages[cursor->page_index];
+}
+
+static bool ceph_msg_data_pages_advance(struct ceph_msg_data_cursor *cursor,
+ size_t bytes)
+{
+ BUG_ON(cursor->data->type != CEPH_MSG_DATA_PAGES);
+
+ BUG_ON(cursor->page_offset + bytes > PAGE_SIZE);
+
+ /* Advance the cursor page offset */
+
+ cursor->resid -= bytes;
+ cursor->page_offset = (cursor->page_offset + bytes) & ~PAGE_MASK;
+ if (!bytes || cursor->page_offset)
+ return false; /* more bytes to process in the current page */
+
+ if (!cursor->resid)
+ return false; /* no more data */
+
+ /* Move on to the next page; offset is already at 0 */
+
+ BUG_ON(cursor->page_index >= cursor->page_count);
+ cursor->page_index++;
+ cursor->last_piece = cursor->resid <= PAGE_SIZE;
+
+ return true;
+}
+
+/*
+ * For a pagelist, a piece is whatever remains to be consumed in the
+ * first page in the list, or the front of the next page.
+ */
+static void
+ceph_msg_data_pagelist_cursor_init(struct ceph_msg_data_cursor *cursor,
+ size_t length)
+{
+ struct ceph_msg_data *data = cursor->data;
+ struct ceph_pagelist *pagelist;
+ struct page *page;
+
+ BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
+
+ pagelist = data->pagelist;
+ BUG_ON(!pagelist);
+
+ if (!length)
+ return; /* pagelist can be assigned but empty */
+
+ BUG_ON(list_empty(&pagelist->head));
+ page = list_first_entry(&pagelist->head, struct page, lru);
+
+ cursor->resid = min(length, pagelist->length);
+ cursor->page = page;
+ cursor->offset = 0;
+ cursor->last_piece = cursor->resid <= PAGE_SIZE;
+}
+
+static struct page *
+ceph_msg_data_pagelist_next(struct ceph_msg_data_cursor *cursor,
+ size_t *page_offset, size_t *length)
+{
+ struct ceph_msg_data *data = cursor->data;
+ struct ceph_pagelist *pagelist;
+
+ BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
+
+ pagelist = data->pagelist;
+ BUG_ON(!pagelist);
+
+ BUG_ON(!cursor->page);
+ BUG_ON(cursor->offset + cursor->resid != pagelist->length);
+
+ /* offset of first page in pagelist is always 0 */
+ *page_offset = cursor->offset & ~PAGE_MASK;
+ if (cursor->last_piece)
+ *length = cursor->resid;
+ else
+ *length = PAGE_SIZE - *page_offset;
+
+ return cursor->page;
+}
+
+static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
+ size_t bytes)
+{
+ struct ceph_msg_data *data = cursor->data;
+ struct ceph_pagelist *pagelist;
+
+ BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
+
+ pagelist = data->pagelist;
+ BUG_ON(!pagelist);
+
+ BUG_ON(cursor->offset + cursor->resid != pagelist->length);
+ BUG_ON((cursor->offset & ~PAGE_MASK) + bytes > PAGE_SIZE);
+
+ /* Advance the cursor offset */
+
+ cursor->resid -= bytes;
+ cursor->offset += bytes;
+ /* offset of first page in pagelist is always 0 */
+ if (!bytes || cursor->offset & ~PAGE_MASK)
+ return false; /* more bytes to process in the current page */
+
+ if (!cursor->resid)
+ return false; /* no more data */
+
+ /* Move on to the next page */
+
+ BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head));
+ cursor->page = list_entry_next(cursor->page, lru);
+ cursor->last_piece = cursor->resid <= PAGE_SIZE;
+
+ return true;
+}
+
+/*
+ * Message data is handled (sent or received) in pieces, where each
+ * piece resides on a single page. The network layer might not
+ * consume an entire piece at once. A data item's cursor keeps
+ * track of which piece is next to process and how much remains to
+ * be processed in that piece. It also tracks whether the current
+ * piece is the last one in the data item.
+ */
+static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
+{
+ size_t length = cursor->total_resid;
+
+ switch (cursor->data->type) {
+ case CEPH_MSG_DATA_PAGELIST:
+ ceph_msg_data_pagelist_cursor_init(cursor, length);
+ break;
+ case CEPH_MSG_DATA_PAGES:
+ ceph_msg_data_pages_cursor_init(cursor, length);
+ break;
+#ifdef CONFIG_BLOCK
+ case CEPH_MSG_DATA_BIO:
+ ceph_msg_data_bio_cursor_init(cursor, length);
+ break;
+#endif /* CONFIG_BLOCK */
+ case CEPH_MSG_DATA_NONE:
+ default:
+ /* BUG(); */
+ break;
+ }
+ cursor->need_crc = true;
+}
+
+static void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length)
+{
+ struct ceph_msg_data_cursor *cursor = &msg->cursor;
+ struct ceph_msg_data *data;
+
+ BUG_ON(!length);
+ BUG_ON(length > msg->data_length);
+ BUG_ON(list_empty(&msg->data));
+
+ cursor->data_head = &msg->data;
+ cursor->total_resid = length;
+ data = list_first_entry(&msg->data, struct ceph_msg_data, links);
+ cursor->data = data;
+
+ __ceph_msg_data_cursor_init(cursor);
+}
+
+/*
+ * Return the page containing the next piece to process for a given
+ * data item, and supply the page offset and length of that piece.
+ * Indicate whether this is the last piece in this data item.
+ */
+static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
+ size_t *page_offset, size_t *length,
+ bool *last_piece)
+{
+ struct page *page;
+
+ switch (cursor->data->type) {
+ case CEPH_MSG_DATA_PAGELIST:
+ page = ceph_msg_data_pagelist_next(cursor, page_offset, length);
+ break;
+ case CEPH_MSG_DATA_PAGES:
+ page = ceph_msg_data_pages_next(cursor, page_offset, length);
+ break;
+#ifdef CONFIG_BLOCK
+ case CEPH_MSG_DATA_BIO:
+ page = ceph_msg_data_bio_next(cursor, page_offset, length);
+ break;
+#endif /* CONFIG_BLOCK */
+ case CEPH_MSG_DATA_NONE:
+ default:
+ page = NULL;
+ break;
+ }
+ BUG_ON(!page);
+ BUG_ON(*page_offset + *length > PAGE_SIZE);
+ BUG_ON(!*length);
+ if (last_piece)
+ *last_piece = cursor->last_piece;
+
+ return page;
+}
+
+/*
+ * Returns true if the result moves the cursor on to the next piece
+ * of the data item.
+ */
+static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
+ size_t bytes)
+{
+ bool new_piece;
+
+ BUG_ON(bytes > cursor->resid);
+ switch (cursor->data->type) {
+ case CEPH_MSG_DATA_PAGELIST:
+ new_piece = ceph_msg_data_pagelist_advance(cursor, bytes);
+ break;
+ case CEPH_MSG_DATA_PAGES:
+ new_piece = ceph_msg_data_pages_advance(cursor, bytes);
+ break;
+#ifdef CONFIG_BLOCK
+ case CEPH_MSG_DATA_BIO:
+ new_piece = ceph_msg_data_bio_advance(cursor, bytes);
+ break;
+#endif /* CONFIG_BLOCK */
+ case CEPH_MSG_DATA_NONE:
+ default:
+ BUG();
+ break;
+ }
+ cursor->total_resid -= bytes;
+
+ if (!cursor->resid && cursor->total_resid) {
+ WARN_ON(!cursor->last_piece);
+ BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
+ cursor->data = list_entry_next(cursor->data, links);
+ __ceph_msg_data_cursor_init(cursor);
+ new_piece = true;
+ }
+ cursor->need_crc = new_piece;
+
+ return new_piece;
+}
+
+static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
+{
+ BUG_ON(!msg);
+ BUG_ON(!data_len);
+
+ /* Initialize data cursor */
+
+ ceph_msg_data_cursor_init(msg, (size_t)data_len);
+}
+
+/*
+ * Prepare footer for currently outgoing message, and finish things
+ * off. Assumes out_kvec* are already valid.. we just add on to the end.
+ */
+static void prepare_write_message_footer(struct ceph_connection *con)
+{
+ struct ceph_msg *m = con->out_msg;
+ int v = con->out_kvec_left;
+
+ m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
+
+ dout("prepare_write_message_footer %p\n", con);
+ con->out_kvec_is_msg = true;
+ con->out_kvec[v].iov_base = &m->footer;
+ con->out_kvec[v].iov_len = sizeof(m->footer);
+ con->out_kvec_bytes += sizeof(m->footer);
+ con->out_kvec_left++;
+ con->out_more = m->more_to_follow;
+ con->out_msg_done = true;
+}
+
+/*
+ * Prepare headers for the next outgoing message.
+ */
+static void prepare_write_message(struct ceph_connection *con)
+{
+ struct ceph_msg *m;
+ u32 crc;
+
+ con_out_kvec_reset(con);
+ con->out_kvec_is_msg = true;
+ con->out_msg_done = false;
+
+ /* Sneak an ack in there first? If we can get it into the same
+ * TCP packet that's a good thing. */
+ if (con->in_seq > con->in_seq_acked) {
+ con->in_seq_acked = con->in_seq;
+ con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
+ con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
+ con_out_kvec_add(con, sizeof (con->out_temp_ack),
+ &con->out_temp_ack);
+ }
+
+ BUG_ON(list_empty(&con->out_queue));
+ m = list_first_entry(&con->out_queue, struct ceph_msg, list_head);
+ con->out_msg = m;
+ BUG_ON(m->con != con);
+
+ /* put message on sent list */
+ ceph_msg_get(m);
+ list_move_tail(&m->list_head, &con->out_sent);
+
+ /*
+ * only assign outgoing seq # if we haven't sent this message
+ * yet. if it is requeued, resend with it's original seq.
+ */
+ if (m->needs_out_seq) {
+ m->hdr.seq = cpu_to_le64(++con->out_seq);
+ m->needs_out_seq = false;
+ }
+ WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len));
+
+ dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
+ m, con->out_seq, le16_to_cpu(m->hdr.type),
+ le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len),
+ m->data_length);
+ BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
+
+ /* tag + hdr + front + middle */
+ con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
+ con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
+ con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
+
+ if (m->middle)
+ con_out_kvec_add(con, m->middle->vec.iov_len,
+ m->middle->vec.iov_base);
+
+ /* fill in crc (except data pages), footer */
+ crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
+ con->out_msg->hdr.crc = cpu_to_le32(crc);
+ con->out_msg->footer.flags = 0;
+
+ crc = crc32c(0, m->front.iov_base, m->front.iov_len);
+ con->out_msg->footer.front_crc = cpu_to_le32(crc);
+ if (m->middle) {
+ crc = crc32c(0, m->middle->vec.iov_base,
+ m->middle->vec.iov_len);
+ con->out_msg->footer.middle_crc = cpu_to_le32(crc);
+ } else
+ con->out_msg->footer.middle_crc = 0;
+ dout("%s front_crc %u middle_crc %u\n", __func__,
+ le32_to_cpu(con->out_msg->footer.front_crc),
+ le32_to_cpu(con->out_msg->footer.middle_crc));
+
+ /* is there a data payload? */
+ con->out_msg->footer.data_crc = 0;
+ if (m->data_length) {
+ prepare_message_data(con->out_msg, m->data_length);
+ con->out_more = 1; /* data + footer will follow */
+ } else {
+ /* no, queue up footer too and be done */
+ prepare_write_message_footer(con);
+ }
+
+ con_flag_set(con, CON_FLAG_WRITE_PENDING);
+}
+
+/*
+ * Prepare an ack.
+ */
+static void prepare_write_ack(struct ceph_connection *con)
+{
+ dout("prepare_write_ack %p %llu -> %llu\n", con,
+ con->in_seq_acked, con->in_seq);
+ con->in_seq_acked = con->in_seq;
+
+ con_out_kvec_reset(con);
+
+ con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
+
+ con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
+ con_out_kvec_add(con, sizeof (con->out_temp_ack),
+ &con->out_temp_ack);
+
+ con->out_more = 1; /* more will follow.. eventually.. */
+ con_flag_set(con, CON_FLAG_WRITE_PENDING);
+}
+
+/*
+ * Prepare to share the seq during handshake
+ */
+static void prepare_write_seq(struct ceph_connection *con)
+{
+ dout("prepare_write_seq %p %llu -> %llu\n", con,
+ con->in_seq_acked, con->in_seq);
+ con->in_seq_acked = con->in_seq;
+
+ con_out_kvec_reset(con);
+
+ con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
+ con_out_kvec_add(con, sizeof (con->out_temp_ack),
+ &con->out_temp_ack);
+
+ con_flag_set(con, CON_FLAG_WRITE_PENDING);
+}
+
+/*
+ * Prepare to write keepalive byte.
+ */
+static void prepare_write_keepalive(struct ceph_connection *con)
+{
+ dout("prepare_write_keepalive %p\n", con);
+ con_out_kvec_reset(con);
+ con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
+ con_flag_set(con, CON_FLAG_WRITE_PENDING);
+}
+
+/*
+ * Connection negotiation.
+ */
+
+static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection *con,
+ int *auth_proto)
+{
+ struct ceph_auth_handshake *auth;
+
+ if (!con->ops->get_authorizer) {
+ con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
+ con->out_connect.authorizer_len = 0;
+ return NULL;
+ }
+
+ /* Can't hold the mutex while getting authorizer */
+ mutex_unlock(&con->mutex);
+ auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
+ mutex_lock(&con->mutex);
+
+ if (IS_ERR(auth))
+ return auth;
+ if (con->state != CON_STATE_NEGOTIATING)
+ return ERR_PTR(-EAGAIN);
+
+ con->auth_reply_buf = auth->authorizer_reply_buf;
+ con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
+ return auth;
+}
+
+/*
+ * We connected to a peer and are saying hello.
+ */
+static void prepare_write_banner(struct ceph_connection *con)
+{
+ con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
+ con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr),
+ &con->msgr->my_enc_addr);
+
+ con->out_more = 0;
+ con_flag_set(con, CON_FLAG_WRITE_PENDING);
+}
+
+static int prepare_write_connect(struct ceph_connection *con)
+{
+ unsigned int global_seq = get_global_seq(con->msgr, 0);
+ int proto;
+ int auth_proto;
+ struct ceph_auth_handshake *auth;
+
+ switch (con->peer_name.type) {
+ case CEPH_ENTITY_TYPE_MON:
+ proto = CEPH_MONC_PROTOCOL;
+ break;
+ case CEPH_ENTITY_TYPE_OSD:
+ proto = CEPH_OSDC_PROTOCOL;
+ break;
+ case CEPH_ENTITY_TYPE_MDS:
+ proto = CEPH_MDSC_PROTOCOL;
+ break;
+ default:
+ BUG();
+ }
+
+ dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
+ con->connect_seq, global_seq, proto);
+
+ con->out_connect.features = cpu_to_le64(con->msgr->supported_features);
+ con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
+ con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
+ con->out_connect.global_seq = cpu_to_le32(global_seq);
+ con->out_connect.protocol_version = cpu_to_le32(proto);
+ con->out_connect.flags = 0;
+
+ auth_proto = CEPH_AUTH_UNKNOWN;
+ auth = get_connect_authorizer(con, &auth_proto);
+ if (IS_ERR(auth))
+ return PTR_ERR(auth);
+
+ con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
+ con->out_connect.authorizer_len = auth ?
+ cpu_to_le32(auth->authorizer_buf_len) : 0;
+
+ con_out_kvec_add(con, sizeof (con->out_connect),
+ &con->out_connect);
+ if (auth && auth->authorizer_buf_len)
+ con_out_kvec_add(con, auth->authorizer_buf_len,
+ auth->authorizer_buf);
+
+ con->out_more = 0;
+ con_flag_set(con, CON_FLAG_WRITE_PENDING);
+
+ return 0;
+}
+
+/*
+ * write as much of pending kvecs to the socket as we can.
+ * 1 -> done
+ * 0 -> socket full, but more to do
+ * <0 -> error
+ */
+static int write_partial_kvec(struct ceph_connection *con)
+{
+ int ret;
+
+ dout("write_partial_kvec %p %d left\n", con, con->out_kvec_bytes);
+ while (con->out_kvec_bytes > 0) {
+ ret = ceph_tcp_sendmsg(con->sock, con->out_kvec_cur,
+ con->out_kvec_left, con->out_kvec_bytes,
+ con->out_more);
+ if (ret <= 0)
+ goto out;
+ con->out_kvec_bytes -= ret;
+ if (con->out_kvec_bytes == 0)
+ break; /* done */
+
+ /* account for full iov entries consumed */
+ while (ret >= con->out_kvec_cur->iov_len) {
+ BUG_ON(!con->out_kvec_left);
+ ret -= con->out_kvec_cur->iov_len;
+ con->out_kvec_cur++;
+ con->out_kvec_left--;
+ }
+ /* and for a partially-consumed entry */
+ if (ret) {
+ con->out_kvec_cur->iov_len -= ret;
+ con->out_kvec_cur->iov_base += ret;
+ }
+ }
+ con->out_kvec_left = 0;
+ con->out_kvec_is_msg = false;
+ ret = 1;
+out:
+ dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con,
+ con->out_kvec_bytes, con->out_kvec_left, ret);
+ return ret; /* done! */
+}
+
+static u32 ceph_crc32c_page(u32 crc, struct page *page,
+ unsigned int page_offset,
+ unsigned int length)
+{
+ char *kaddr;
+
+ kaddr = kmap(page);
+ BUG_ON(kaddr == NULL);
+ crc = crc32c(crc, kaddr + page_offset, length);
+ kunmap(page);
+
+ return crc;
+}
+/*
+ * Write as much message data payload as we can. If we finish, queue
+ * up the footer.
+ * 1 -> done, footer is now queued in out_kvec[].
+ * 0 -> socket full, but more to do
+ * <0 -> error
+ */
+static int write_partial_message_data(struct ceph_connection *con)
+{
+ struct ceph_msg *msg = con->out_msg;
+ struct ceph_msg_data_cursor *cursor = &msg->cursor;
+ bool do_datacrc = !con->msgr->nocrc;
+ u32 crc;
+
+ dout("%s %p msg %p\n", __func__, con, msg);
+
+ if (list_empty(&msg->data))
+ return -EINVAL;
+
+ /*
+ * Iterate through each page that contains data to be
+ * written, and send as much as possible for each.
+ *
+ * If we are calculating the data crc (the default), we will
+ * need to map the page. If we have no pages, they have
+ * been revoked, so use the zero page.
+ */
+ crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0;
+ while (cursor->resid) {
+ struct page *page;
+ size_t page_offset;
+ size_t length;
+ bool last_piece;
+ bool need_crc;
+ int ret;
+
+ page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
+ &last_piece);
+ ret = ceph_tcp_sendpage(con->sock, page, page_offset,
+ length, last_piece);
+ if (ret <= 0) {
+ if (do_datacrc)
+ msg->footer.data_crc = cpu_to_le32(crc);
+
+ return ret;
+ }
+ if (do_datacrc && cursor->need_crc)
+ crc = ceph_crc32c_page(crc, page, page_offset, length);
+ need_crc = ceph_msg_data_advance(&msg->cursor, (size_t)ret);
+ }
+
+ dout("%s %p msg %p done\n", __func__, con, msg);
+
+ /* prepare and queue up footer, too */
+ if (do_datacrc)
+ msg->footer.data_crc = cpu_to_le32(crc);
+ else
+ msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
+ con_out_kvec_reset(con);
+ prepare_write_message_footer(con);
+
+ return 1; /* must return > 0 to indicate success */
+}
+
+/*
+ * write some zeros
+ */
+static int write_partial_skip(struct ceph_connection *con)
+{
+ int ret;
+
+ while (con->out_skip > 0) {
+ size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE);
+
+ ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true);
+ if (ret <= 0)
+ goto out;
+ con->out_skip -= ret;
+ }
+ ret = 1;
+out:
+ return ret;
+}
+
+/*
+ * Prepare to read connection handshake, or an ack.
+ */
+static void prepare_read_banner(struct ceph_connection *con)
+{
+ dout("prepare_read_banner %p\n", con);
+ con->in_base_pos = 0;
+}
+
+static void prepare_read_connect(struct ceph_connection *con)
+{
+ dout("prepare_read_connect %p\n", con);
+ con->in_base_pos = 0;
+}
+
+static void prepare_read_ack(struct ceph_connection *con)
+{
+ dout("prepare_read_ack %p\n", con);
+ con->in_base_pos = 0;
+}
+
+static void prepare_read_seq(struct ceph_connection *con)
+{
+ dout("prepare_read_seq %p\n", con);
+ con->in_base_pos = 0;
+ con->in_tag = CEPH_MSGR_TAG_SEQ;
+}
+
+static void prepare_read_tag(struct ceph_connection *con)
+{
+ dout("prepare_read_tag %p\n", con);
+ con->in_base_pos = 0;
+ con->in_tag = CEPH_MSGR_TAG_READY;
+}
+
+/*
+ * Prepare to read a message.
+ */
+static int prepare_read_message(struct ceph_connection *con)
+{
+ dout("prepare_read_message %p\n", con);
+ BUG_ON(con->in_msg != NULL);
+ con->in_base_pos = 0;
+ con->in_front_crc = con->in_middle_crc = con->in_data_crc = 0;
+ return 0;
+}
+
+
+static int read_partial(struct ceph_connection *con,
+ int end, int size, void *object)
+{
+ while (con->in_base_pos < end) {
+ int left = end - con->in_base_pos;
+ int have = size - left;
+ int ret = ceph_tcp_recvmsg(con->sock, object + have, left);
+ if (ret <= 0)
+ return ret;
+ con->in_base_pos += ret;
+ }
+ return 1;
+}
+
+
+/*
+ * Read all or part of the connect-side handshake on a new connection
+ */
+static int read_partial_banner(struct ceph_connection *con)
+{
+ int size;
+ int end;
+ int ret;
+
+ dout("read_partial_banner %p at %d\n", con, con->in_base_pos);
+
+ /* peer's banner */
+ size = strlen(CEPH_BANNER);
+ end = size;
+ ret = read_partial(con, end, size, con->in_banner);
+ if (ret <= 0)
+ goto out;
+
+ size = sizeof (con->actual_peer_addr);
+ end += size;
+ ret = read_partial(con, end, size, &con->actual_peer_addr);
+ if (ret <= 0)
+ goto out;
+
+ size = sizeof (con->peer_addr_for_me);
+ end += size;
+ ret = read_partial(con, end, size, &con->peer_addr_for_me);
+ if (ret <= 0)
+ goto out;
+
+out:
+ return ret;
+}
+
+static int read_partial_connect(struct ceph_connection *con)
+{
+ int size;
+ int end;
+ int ret;
+
+ dout("read_partial_connect %p at %d\n", con, con->in_base_pos);
+
+ size = sizeof (con->in_reply);
+ end = size;
+ ret = read_partial(con, end, size, &con->in_reply);
+ if (ret <= 0)
+ goto out;
+
+ size = le32_to_cpu(con->in_reply.authorizer_len);
+ end += size;
+ ret = read_partial(con, end, size, con->auth_reply_buf);
+ if (ret <= 0)
+ goto out;
+
+ dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n",
+ con, (int)con->in_reply.tag,
+ le32_to_cpu(con->in_reply.connect_seq),
+ le32_to_cpu(con->in_reply.global_seq));
+out:
+ return ret;
+
+}
+
+/*
+ * Verify the hello banner looks okay.
+ */
+static int verify_hello(struct ceph_connection *con)
+{
+ if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) {
+ pr_err("connect to %s got bad banner\n",
+ ceph_pr_addr(&con->peer_addr.in_addr));
+ con->error_msg = "protocol error, bad banner";
+ return -1;
+ }
+ return 0;
+}
+
+static bool addr_is_blank(struct sockaddr_storage *ss)
+{
+ switch (ss->ss_family) {
+ case AF_INET:
+ return ((struct sockaddr_in *)ss)->sin_addr.s_addr == 0;
+ case AF_INET6:
+ return
+ ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 &&
+ ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 &&
+ ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 &&
+ ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0;
+ }
+ return false;
+}
+
+static int addr_port(struct sockaddr_storage *ss)
+{
+ switch (ss->ss_family) {
+ case AF_INET:
+ return ntohs(((struct sockaddr_in *)ss)->sin_port);
+ case AF_INET6:
+ return ntohs(((struct sockaddr_in6 *)ss)->sin6_port);
+ }
+ return 0;
+}
+
+static void addr_set_port(struct sockaddr_storage *ss, int p)
+{
+ switch (ss->ss_family) {
+ case AF_INET:
+ ((struct sockaddr_in *)ss)->sin_port = htons(p);
+ break;
+ case AF_INET6:
+ ((struct sockaddr_in6 *)ss)->sin6_port = htons(p);
+ break;
+ }
+}
+
+/*
+ * Unlike other *_pton function semantics, zero indicates success.
+ */
+static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss,
+ char delim, const char **ipend)
+{
+ struct sockaddr_in *in4 = (struct sockaddr_in *) ss;
+ struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss;
+
+ memset(ss, 0, sizeof(*ss));
+
+ if (in4_pton(str, len, (u8 *)&in4->sin_addr.s_addr, delim, ipend)) {
+ ss->ss_family = AF_INET;
+ return 0;
+ }
+
+ if (in6_pton(str, len, (u8 *)&in6->sin6_addr.s6_addr, delim, ipend)) {
+ ss->ss_family = AF_INET6;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+/*
+ * Extract hostname string and resolve using kernel DNS facility.
+ */
+#ifdef CONFIG_CEPH_LIB_USE_DNS_RESOLVER
+static int ceph_dns_resolve_name(const char *name, size_t namelen,
+ struct sockaddr_storage *ss, char delim, const char **ipend)
+{
+ const char *end, *delim_p;
+ char *colon_p, *ip_addr = NULL;
+ int ip_len, ret;
+
+ /*
+ * The end of the hostname occurs immediately preceding the delimiter or
+ * the port marker (':') where the delimiter takes precedence.
+ */
+ delim_p = memchr(name, delim, namelen);
+ colon_p = memchr(name, ':', namelen);
+
+ if (delim_p && colon_p)
+ end = delim_p < colon_p ? delim_p : colon_p;
+ else if (!delim_p && colon_p)
+ end = colon_p;
+ else {
+ end = delim_p;
+ if (!end) /* case: hostname:/ */
+ end = name + namelen;
+ }
+
+ if (end <= name)
+ return -EINVAL;
+
+ /* do dns_resolve upcall */
+ ip_len = dns_query(NULL, name, end - name, NULL, &ip_addr, NULL);
+ if (ip_len > 0)
+ ret = ceph_pton(ip_addr, ip_len, ss, -1, NULL);
+ else
+ ret = -ESRCH;
+
+ kfree(ip_addr);
+
+ *ipend = end;
+
+ pr_info("resolve '%.*s' (ret=%d): %s\n", (int)(end - name), name,
+ ret, ret ? "failed" : ceph_pr_addr(ss));
+
+ return ret;
+}
+#else
+static inline int ceph_dns_resolve_name(const char *name, size_t namelen,
+ struct sockaddr_storage *ss, char delim, const char **ipend)
+{
+ return -EINVAL;
+}
+#endif
+
+/*
+ * Parse a server name (IP or hostname). If a valid IP address is not found
+ * then try to extract a hostname to resolve using userspace DNS upcall.
+ */
+static int ceph_parse_server_name(const char *name, size_t namelen,
+ struct sockaddr_storage *ss, char delim, const char **ipend)
+{
+ int ret;
+
+ ret = ceph_pton(name, namelen, ss, delim, ipend);
+ if (ret)
+ ret = ceph_dns_resolve_name(name, namelen, ss, delim, ipend);
+
+ return ret;
+}
+
+/*
+ * Parse an ip[:port] list into an addr array. Use the default
+ * monitor port if a port isn't specified.
+ */
+int ceph_parse_ips(const char *c, const char *end,
+ struct ceph_entity_addr *addr,
+ int max_count, int *count)
+{
+ int i, ret = -EINVAL;
+ const char *p = c;
+
+ dout("parse_ips on '%.*s'\n", (int)(end-c), c);
+ for (i = 0; i < max_count; i++) {
+ const char *ipend;
+ struct sockaddr_storage *ss = &addr[i].in_addr;
+ int port;
+ char delim = ',';
+
+ if (*p == '[') {
+ delim = ']';
+ p++;
+ }
+
+ ret = ceph_parse_server_name(p, end - p, ss, delim, &ipend);
+ if (ret)
+ goto bad;
+ ret = -EINVAL;
+
+ p = ipend;
+
+ if (delim == ']') {
+ if (*p != ']') {
+ dout("missing matching ']'\n");
+ goto bad;
+ }
+ p++;
+ }
+
+ /* port? */
+ if (p < end && *p == ':') {
+ port = 0;
+ p++;
+ while (p < end && *p >= '0' && *p <= '9') {
+ port = (port * 10) + (*p - '0');
+ p++;
+ }
+ if (port == 0)
+ port = CEPH_MON_PORT;
+ else if (port > 65535)
+ goto bad;
+ } else {
+ port = CEPH_MON_PORT;
+ }
+
+ addr_set_port(ss, port);
+
+ dout("parse_ips got %s\n", ceph_pr_addr(ss));
+
+ if (p == end)
+ break;
+ if (*p != ',')
+ goto bad;
+ p++;
+ }
+
+ if (p != end)
+ goto bad;
+
+ if (count)
+ *count = i + 1;
+ return 0;
+
+bad:
+ pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_parse_ips);
+
+static int process_banner(struct ceph_connection *con)
+{
+ dout("process_banner on %p\n", con);
+
+ if (verify_hello(con) < 0)
+ return -1;
+
+ ceph_decode_addr(&con->actual_peer_addr);
+ ceph_decode_addr(&con->peer_addr_for_me);
+
+ /*
+ * Make sure the other end is who we wanted. note that the other
+ * end may not yet know their ip address, so if it's 0.0.0.0, give
+ * them the benefit of the doubt.
+ */
+ if (memcmp(&con->peer_addr, &con->actual_peer_addr,
+ sizeof(con->peer_addr)) != 0 &&
+ !(addr_is_blank(&con->actual_peer_addr.in_addr) &&
+ con->actual_peer_addr.nonce == con->peer_addr.nonce)) {
+ pr_warning("wrong peer, want %s/%d, got %s/%d\n",
+ ceph_pr_addr(&con->peer_addr.in_addr),
+ (int)le32_to_cpu(con->peer_addr.nonce),
+ ceph_pr_addr(&con->actual_peer_addr.in_addr),
+ (int)le32_to_cpu(con->actual_peer_addr.nonce));
+ con->error_msg = "wrong peer at address";
+ return -1;
+ }
+
+ /*
+ * did we learn our address?
+ */
+ if (addr_is_blank(&con->msgr->inst.addr.in_addr)) {
+ int port = addr_port(&con->msgr->inst.addr.in_addr);
+
+ memcpy(&con->msgr->inst.addr.in_addr,
+ &con->peer_addr_for_me.in_addr,
+ sizeof(con->peer_addr_for_me.in_addr));
+ addr_set_port(&con->msgr->inst.addr.in_addr, port);
+ encode_my_addr(con->msgr);
+ dout("process_banner learned my addr is %s\n",
+ ceph_pr_addr(&con->msgr->inst.addr.in_addr));
+ }
+
+ return 0;
+}
+
+static int process_connect(struct ceph_connection *con)
+{
+ u64 sup_feat = con->msgr->supported_features;
+ u64 req_feat = con->msgr->required_features;
+ u64 server_feat = ceph_sanitize_features(
+ le64_to_cpu(con->in_reply.features));
+ int ret;
+
+ dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
+
+ switch (con->in_reply.tag) {
+ case CEPH_MSGR_TAG_FEATURES:
+ pr_err("%s%lld %s feature set mismatch,"
+ " my %llx < server's %llx, missing %llx\n",
+ ENTITY_NAME(con->peer_name),
+ ceph_pr_addr(&con->peer_addr.in_addr),
+ sup_feat, server_feat, server_feat & ~sup_feat);
+ con->error_msg = "missing required protocol features";
+ reset_connection(con);
+ return -1;
+
+ case CEPH_MSGR_TAG_BADPROTOVER:
+ pr_err("%s%lld %s protocol version mismatch,"
+ " my %d != server's %d\n",
+ ENTITY_NAME(con->peer_name),
+ ceph_pr_addr(&con->peer_addr.in_addr),
+ le32_to_cpu(con->out_connect.protocol_version),
+ le32_to_cpu(con->in_reply.protocol_version));
+ con->error_msg = "protocol version mismatch";
+ reset_connection(con);
+ return -1;
+
+ case CEPH_MSGR_TAG_BADAUTHORIZER:
+ con->auth_retry++;
+ dout("process_connect %p got BADAUTHORIZER attempt %d\n", con,
+ con->auth_retry);
+ if (con->auth_retry == 2) {
+ con->error_msg = "connect authorization failure";
+ return -1;
+ }
+ con_out_kvec_reset(con);
+ ret = prepare_write_connect(con);
+ if (ret < 0)
+ return ret;
+ prepare_read_connect(con);
+ break;
+
+ case CEPH_MSGR_TAG_RESETSESSION:
+ /*
+ * If we connected with a large connect_seq but the peer
+ * has no record of a session with us (no connection, or
+ * connect_seq == 0), they will send RESETSESION to indicate
+ * that they must have reset their session, and may have
+ * dropped messages.
+ */
+ dout("process_connect got RESET peer seq %u\n",
+ le32_to_cpu(con->in_reply.connect_seq));
+ pr_err("%s%lld %s connection reset\n",
+ ENTITY_NAME(con->peer_name),
+ ceph_pr_addr(&con->peer_addr.in_addr));
+ reset_connection(con);
+ con_out_kvec_reset(con);
+ ret = prepare_write_connect(con);
+ if (ret < 0)
+ return ret;
+ prepare_read_connect(con);
+
+ /* Tell ceph about it. */
+ mutex_unlock(&con->mutex);
+ pr_info("reset on %s%lld\n", ENTITY_NAME(con->peer_name));
+ if (con->ops->peer_reset)
+ con->ops->peer_reset(con);
+ mutex_lock(&con->mutex);
+ if (con->state != CON_STATE_NEGOTIATING)
+ return -EAGAIN;
+ break;
+
+ case CEPH_MSGR_TAG_RETRY_SESSION:
+ /*
+ * If we sent a smaller connect_seq than the peer has, try
+ * again with a larger value.
+ */
+ dout("process_connect got RETRY_SESSION my seq %u, peer %u\n",
+ le32_to_cpu(con->out_connect.connect_seq),
+ le32_to_cpu(con->in_reply.connect_seq));
+ con->connect_seq = le32_to_cpu(con->in_reply.connect_seq);
+ con_out_kvec_reset(con);
+ ret = prepare_write_connect(con);
+ if (ret < 0)
+ return ret;
+ prepare_read_connect(con);
+ break;
+
+ case CEPH_MSGR_TAG_RETRY_GLOBAL:
+ /*
+ * If we sent a smaller global_seq than the peer has, try
+ * again with a larger value.
+ */
+ dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n",
+ con->peer_global_seq,
+ le32_to_cpu(con->in_reply.global_seq));
+ get_global_seq(con->msgr,
+ le32_to_cpu(con->in_reply.global_seq));
+ con_out_kvec_reset(con);
+ ret = prepare_write_connect(con);
+ if (ret < 0)
+ return ret;
+ prepare_read_connect(con);
+ break;
+
+ case CEPH_MSGR_TAG_SEQ:
+ case CEPH_MSGR_TAG_READY:
+ if (req_feat & ~server_feat) {
+ pr_err("%s%lld %s protocol feature mismatch,"
+ " my required %llx > server's %llx, need %llx\n",
+ ENTITY_NAME(con->peer_name),
+ ceph_pr_addr(&con->peer_addr.in_addr),
+ req_feat, server_feat, req_feat & ~server_feat);
+ con->error_msg = "missing required protocol features";
+ reset_connection(con);
+ return -1;
+ }
+
+ WARN_ON(con->state != CON_STATE_NEGOTIATING);
+ con->state = CON_STATE_OPEN;
+ con->auth_retry = 0; /* we authenticated; clear flag */
+ con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
+ con->connect_seq++;
+ con->peer_features = server_feat;
+ dout("process_connect got READY gseq %d cseq %d (%d)\n",
+ con->peer_global_seq,
+ le32_to_cpu(con->in_reply.connect_seq),
+ con->connect_seq);
+ WARN_ON(con->connect_seq !=
+ le32_to_cpu(con->in_reply.connect_seq));
+
+ if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
+ con_flag_set(con, CON_FLAG_LOSSYTX);
+
+ con->delay = 0; /* reset backoff memory */
+
+ if (con->in_reply.tag == CEPH_MSGR_TAG_SEQ) {
+ prepare_write_seq(con);
+ prepare_read_seq(con);
+ } else {
+ prepare_read_tag(con);
+ }
+ break;
+
+ case CEPH_MSGR_TAG_WAIT:
+ /*
+ * If there is a connection race (we are opening
+ * connections to each other), one of us may just have
+ * to WAIT. This shouldn't happen if we are the
+ * client.
+ */
+ pr_err("process_connect got WAIT as client\n");
+ con->error_msg = "protocol error, got WAIT as client";
+ return -1;
+
+ default:
+ pr_err("connect protocol error, will retry\n");
+ con->error_msg = "protocol error, garbage tag during connect";
+ return -1;
+ }
+ return 0;
+}
+
+
+/*
+ * read (part of) an ack
+ */
+static int read_partial_ack(struct ceph_connection *con)
+{
+ int size = sizeof (con->in_temp_ack);
+ int end = size;
+
+ return read_partial(con, end, size, &con->in_temp_ack);
+}
+
+/*
+ * We can finally discard anything that's been acked.
+ */
+static void process_ack(struct ceph_connection *con)
+{
+ struct ceph_msg *m;
+ u64 ack = le64_to_cpu(con->in_temp_ack);
+ u64 seq;
+
+ while (!list_empty(&con->out_sent)) {
+ m = list_first_entry(&con->out_sent, struct ceph_msg,
+ list_head);
+ seq = le64_to_cpu(m->hdr.seq);
+ if (seq > ack)
+ break;
+ dout("got ack for seq %llu type %d at %p\n", seq,
+ le16_to_cpu(m->hdr.type), m);
+ m->ack_stamp = jiffies;
+ ceph_msg_remove(m);
+ }
+ prepare_read_tag(con);
+}
+
+
+static int read_partial_message_section(struct ceph_connection *con,
+ struct kvec *section,
+ unsigned int sec_len, u32 *crc)
+{
+ int ret, left;
+
+ BUG_ON(!section);
+
+ while (section->iov_len < sec_len) {
+ BUG_ON(section->iov_base == NULL);
+ left = sec_len - section->iov_len;
+ ret = ceph_tcp_recvmsg(con->sock, (char *)section->iov_base +
+ section->iov_len, left);
+ if (ret <= 0)
+ return ret;
+ section->iov_len += ret;
+ }
+ if (section->iov_len == sec_len)
+ *crc = crc32c(0, section->iov_base, section->iov_len);
+
+ return 1;
+}
+
+static int read_partial_msg_data(struct ceph_connection *con)
+{
+ struct ceph_msg *msg = con->in_msg;
+ struct ceph_msg_data_cursor *cursor = &msg->cursor;
+ const bool do_datacrc = !con->msgr->nocrc;
+ struct page *page;
+ size_t page_offset;
+ size_t length;
+ u32 crc = 0;
+ int ret;
+
+ BUG_ON(!msg);
+ if (list_empty(&msg->data))
+ return -EIO;
+
+ if (do_datacrc)
+ crc = con->in_data_crc;
+ while (cursor->resid) {
+ page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
+ NULL);
+ ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
+ if (ret <= 0) {
+ if (do_datacrc)
+ con->in_data_crc = crc;
+
+ return ret;
+ }
+
+ if (do_datacrc)
+ crc = ceph_crc32c_page(crc, page, page_offset, ret);
+ (void) ceph_msg_data_advance(&msg->cursor, (size_t)ret);
+ }
+ if (do_datacrc)
+ con->in_data_crc = crc;
+
+ return 1; /* must return > 0 to indicate success */
+}
+
+/*
+ * read (part of) a message.
+ */
+static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip);
+
+static int read_partial_message(struct ceph_connection *con)
+{
+ struct ceph_msg *m = con->in_msg;
+ int size;
+ int end;
+ int ret;
+ unsigned int front_len, middle_len, data_len;
+ bool do_datacrc = !con->msgr->nocrc;
+ u64 seq;
+ u32 crc;
+
+ dout("read_partial_message con %p msg %p\n", con, m);
+
+ /* header */
+ size = sizeof (con->in_hdr);
+ end = size;
+ ret = read_partial(con, end, size, &con->in_hdr);
+ if (ret <= 0)
+ return ret;
+
+ crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc));
+ if (cpu_to_le32(crc) != con->in_hdr.crc) {
+ pr_err("read_partial_message bad hdr "
+ " crc %u != expected %u\n",
+ crc, con->in_hdr.crc);
+ return -EBADMSG;
+ }
+
+ front_len = le32_to_cpu(con->in_hdr.front_len);
+ if (front_len > CEPH_MSG_MAX_FRONT_LEN)
+ return -EIO;
+ middle_len = le32_to_cpu(con->in_hdr.middle_len);
+ if (middle_len > CEPH_MSG_MAX_MIDDLE_LEN)
+ return -EIO;
+ data_len = le32_to_cpu(con->in_hdr.data_len);
+ if (data_len > CEPH_MSG_MAX_DATA_LEN)
+ return -EIO;
+
+ /* verify seq# */
+ seq = le64_to_cpu(con->in_hdr.seq);
+ if ((s64)seq - (s64)con->in_seq < 1) {
+ pr_info("skipping %s%lld %s seq %lld expected %lld\n",
+ ENTITY_NAME(con->peer_name),
+ ceph_pr_addr(&con->peer_addr.in_addr),
+ seq, con->in_seq + 1);
+ con->in_base_pos = -front_len - middle_len - data_len -
+ sizeof(m->footer);
+ con->in_tag = CEPH_MSGR_TAG_READY;
+ return 0;
+ } else if ((s64)seq - (s64)con->in_seq > 1) {
+ pr_err("read_partial_message bad seq %lld expected %lld\n",
+ seq, con->in_seq + 1);
+ con->error_msg = "bad message sequence # for incoming message";
+ return -EBADMSG;
+ }
+
+ /* allocate message? */
+ if (!con->in_msg) {
+ int skip = 0;
+
+ dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
+ front_len, data_len);
+ ret = ceph_con_in_msg_alloc(con, &skip);
+ if (ret < 0)
+ return ret;
+
+ BUG_ON(!con->in_msg ^ skip);
+ if (con->in_msg && data_len > con->in_msg->data_length) {
+ pr_warning("%s skipping long message (%u > %zd)\n",
+ __func__, data_len, con->in_msg->data_length);
+ ceph_msg_put(con->in_msg);
+ con->in_msg = NULL;
+ skip = 1;
+ }
+ if (skip) {
+ /* skip this message */
+ dout("alloc_msg said skip message\n");
+ con->in_base_pos = -front_len - middle_len - data_len -
+ sizeof(m->footer);
+ con->in_tag = CEPH_MSGR_TAG_READY;
+ con->in_seq++;
+ return 0;
+ }
+
+ BUG_ON(!con->in_msg);
+ BUG_ON(con->in_msg->con != con);
+ m = con->in_msg;
+ m->front.iov_len = 0; /* haven't read it yet */
+ if (m->middle)
+ m->middle->vec.iov_len = 0;
+
+ /* prepare for data payload, if any */
+
+ if (data_len)
+ prepare_message_data(con->in_msg, data_len);
+ }
+
+ /* front */
+ ret = read_partial_message_section(con, &m->front, front_len,
+ &con->in_front_crc);
+ if (ret <= 0)
+ return ret;
+
+ /* middle */
+ if (m->middle) {
+ ret = read_partial_message_section(con, &m->middle->vec,
+ middle_len,
+ &con->in_middle_crc);
+ if (ret <= 0)
+ return ret;
+ }
+
+ /* (page) data */
+ if (data_len) {
+ ret = read_partial_msg_data(con);
+ if (ret <= 0)
+ return ret;
+ }
+
+ /* footer */
+ size = sizeof (m->footer);
+ end += size;
+ ret = read_partial(con, end, size, &m->footer);
+ if (ret <= 0)
+ return ret;
+
+ dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n",
+ m, front_len, m->footer.front_crc, middle_len,
+ m->footer.middle_crc, data_len, m->footer.data_crc);
+
+ /* crc ok? */
+ if (con->in_front_crc != le32_to_cpu(m->footer.front_crc)) {
+ pr_err("read_partial_message %p front crc %u != exp. %u\n",
+ m, con->in_front_crc, m->footer.front_crc);
+ return -EBADMSG;
+ }
+ if (con->in_middle_crc != le32_to_cpu(m->footer.middle_crc)) {
+ pr_err("read_partial_message %p middle crc %u != exp %u\n",
+ m, con->in_middle_crc, m->footer.middle_crc);
+ return -EBADMSG;
+ }
+ if (do_datacrc &&
+ (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 &&
+ con->in_data_crc != le32_to_cpu(m->footer.data_crc)) {
+ pr_err("read_partial_message %p data crc %u != exp. %u\n", m,
+ con->in_data_crc, le32_to_cpu(m->footer.data_crc));
+ return -EBADMSG;
+ }
+
+ return 1; /* done! */
+}
+
+/*
+ * Process message. This happens in the worker thread. The callback should
+ * be careful not to do anything that waits on other incoming messages or it
+ * may deadlock.
+ */
+static void process_message(struct ceph_connection *con)
+{
+ struct ceph_msg *msg;
+
+ BUG_ON(con->in_msg->con != con);
+ con->in_msg->con = NULL;
+ msg = con->in_msg;
+ con->in_msg = NULL;
+ con->ops->put(con);
+
+ /* if first message, set peer_name */
+ if (con->peer_name.type == 0)
+ con->peer_name = msg->hdr.src;
+
+ con->in_seq++;
+ mutex_unlock(&con->mutex);
+
+ dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n",
+ msg, le64_to_cpu(msg->hdr.seq),
+ ENTITY_NAME(msg->hdr.src),
+ le16_to_cpu(msg->hdr.type),
+ ceph_msg_type_name(le16_to_cpu(msg->hdr.type)),
+ le32_to_cpu(msg->hdr.front_len),
+ le32_to_cpu(msg->hdr.data_len),
+ con->in_front_crc, con->in_middle_crc, con->in_data_crc);
+ con->ops->dispatch(con, msg);
+
+ mutex_lock(&con->mutex);
+}
+
+
+/*
+ * Write something to the socket. Called in a worker thread when the
+ * socket appears to be writeable and we have something ready to send.
+ */
+static int try_write(struct ceph_connection *con)
+{
+ int ret = 1;
+
+ dout("try_write start %p state %lu\n", con, con->state);
+
+more:
+ dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
+
+ /* open the socket first? */
+ if (con->state == CON_STATE_PREOPEN) {
+ BUG_ON(con->sock);
+ con->state = CON_STATE_CONNECTING;
+
+ con_out_kvec_reset(con);
+ prepare_write_banner(con);
+ prepare_read_banner(con);
+
+ BUG_ON(con->in_msg);
+ con->in_tag = CEPH_MSGR_TAG_READY;
+ dout("try_write initiating connect on %p new state %lu\n",
+ con, con->state);
+ ret = ceph_tcp_connect(con);
+ if (ret < 0) {
+ con->error_msg = "connect error";
+ goto out;
+ }
+ }
+
+more_kvec:
+ /* kvec data queued? */
+ if (con->out_skip) {
+ ret = write_partial_skip(con);
+ if (ret <= 0)
+ goto out;
+ }
+ if (con->out_kvec_left) {
+ ret = write_partial_kvec(con);
+ if (ret <= 0)
+ goto out;
+ }
+
+ /* msg pages? */
+ if (con->out_msg) {
+ if (con->out_msg_done) {
+ ceph_msg_put(con->out_msg);
+ con->out_msg = NULL; /* we're done with this one */
+ goto do_next;
+ }
+
+ ret = write_partial_message_data(con);
+ if (ret == 1)
+ goto more_kvec; /* we need to send the footer, too! */
+ if (ret == 0)
+ goto out;
+ if (ret < 0) {
+ dout("try_write write_partial_message_data err %d\n",
+ ret);
+ goto out;
+ }
+ }
+
+do_next:
+ if (con->state == CON_STATE_OPEN) {
+ /* is anything else pending? */
+ if (!list_empty(&con->out_queue)) {
+ prepare_write_message(con);
+ goto more;
+ }
+ if (con->in_seq > con->in_seq_acked) {
+ prepare_write_ack(con);
+ goto more;
+ }
+ if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) {
+ prepare_write_keepalive(con);
+ goto more;
+ }
+ }
+
+ /* Nothing to do! */
+ con_flag_clear(con, CON_FLAG_WRITE_PENDING);
+ dout("try_write nothing else to write.\n");
+ ret = 0;
+out:
+ dout("try_write done on %p ret %d\n", con, ret);
+ return ret;
+}
+
+
+
+/*
+ * Read what we can from the socket.
+ */
+static int try_read(struct ceph_connection *con)
+{
+ int ret = -1;
+
+more:
+ dout("try_read start on %p state %lu\n", con, con->state);
+ if (con->state != CON_STATE_CONNECTING &&
+ con->state != CON_STATE_NEGOTIATING &&
+ con->state != CON_STATE_OPEN)
+ return 0;
+
+ BUG_ON(!con->sock);
+
+ dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag,
+ con->in_base_pos);
+
+ if (con->state == CON_STATE_CONNECTING) {
+ dout("try_read connecting\n");
+ ret = read_partial_banner(con);
+ if (ret <= 0)
+ goto out;
+ ret = process_banner(con);
+ if (ret < 0)
+ goto out;
+
+ con->state = CON_STATE_NEGOTIATING;
+
+ /*
+ * Received banner is good, exchange connection info.
+ * Do not reset out_kvec, as sending our banner raced
+ * with receiving peer banner after connect completed.
+ */
+ ret = prepare_write_connect(con);
+ if (ret < 0)
+ goto out;
+ prepare_read_connect(con);
+
+ /* Send connection info before awaiting response */
+ goto out;
+ }
+
+ if (con->state == CON_STATE_NEGOTIATING) {
+ dout("try_read negotiating\n");
+ ret = read_partial_connect(con);
+ if (ret <= 0)
+ goto out;
+ ret = process_connect(con);
+ if (ret < 0)
+ goto out;
+ goto more;
+ }
+
+ WARN_ON(con->state != CON_STATE_OPEN);
+
+ if (con->in_base_pos < 0) {
+ /*
+ * skipping + discarding content.
+ *
+ * FIXME: there must be a better way to do this!
+ */
+ static char buf[SKIP_BUF_SIZE];
+ int skip = min((int) sizeof (buf), -con->in_base_pos);
+
+ dout("skipping %d / %d bytes\n", skip, -con->in_base_pos);
+ ret = ceph_tcp_recvmsg(con->sock, buf, skip);
+ if (ret <= 0)
+ goto out;
+ con->in_base_pos += ret;
+ if (con->in_base_pos)
+ goto more;
+ }
+ if (con->in_tag == CEPH_MSGR_TAG_READY) {
+ /*
+ * what's next?
+ */
+ ret = ceph_tcp_recvmsg(con->sock, &con->in_tag, 1);
+ if (ret <= 0)
+ goto out;
+ dout("try_read got tag %d\n", (int)con->in_tag);
+ switch (con->in_tag) {
+ case CEPH_MSGR_TAG_MSG:
+ prepare_read_message(con);
+ break;
+ case CEPH_MSGR_TAG_ACK:
+ prepare_read_ack(con);
+ break;
+ case CEPH_MSGR_TAG_CLOSE:
+ con_close_socket(con);
+ con->state = CON_STATE_CLOSED;
+ goto out;
+ default:
+ goto bad_tag;
+ }
+ }
+ if (con->in_tag == CEPH_MSGR_TAG_MSG) {
+ ret = read_partial_message(con);
+ if (ret <= 0) {
+ switch (ret) {
+ case -EBADMSG:
+ con->error_msg = "bad crc";
+ ret = -EIO;
+ break;
+ case -EIO:
+ con->error_msg = "io error";
+ break;
+ }
+ goto out;
+ }
+ if (con->in_tag == CEPH_MSGR_TAG_READY)
+ goto more;
+ process_message(con);
+ if (con->state == CON_STATE_OPEN)
+ prepare_read_tag(con);
+ goto more;
+ }
+ if (con->in_tag == CEPH_MSGR_TAG_ACK ||
+ con->in_tag == CEPH_MSGR_TAG_SEQ) {
+ /*
+ * the final handshake seq exchange is semantically
+ * equivalent to an ACK
+ */
+ ret = read_partial_ack(con);
+ if (ret <= 0)
+ goto out;
+ process_ack(con);
+ goto more;
+ }
+
+out:
+ dout("try_read done on %p ret %d\n", con, ret);
+ return ret;
+
+bad_tag:
+ pr_err("try_read bad con->in_tag = %d\n", (int)con->in_tag);
+ con->error_msg = "protocol error, garbage tag";
+ ret = -1;
+ goto out;
+}
+
+
+/*
+ * Atomically queue work on a connection after the specified delay.
+ * Bump @con reference to avoid races with connection teardown.
+ * Returns 0 if work was queued, or an error code otherwise.
+ */
+static int queue_con_delay(struct ceph_connection *con, unsigned long delay)
+{
+ if (!con->ops->get(con)) {
+ dout("%s %p ref count 0\n", __func__, con);
+
+ return -ENOENT;
+ }
+
+ if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) {
+ dout("%s %p - already queued\n", __func__, con);
+ con->ops->put(con);
+
+ return -EBUSY;
+ }
+
+ dout("%s %p %lu\n", __func__, con, delay);
+
+ return 0;
+}
+
+static void queue_con(struct ceph_connection *con)
+{
+ (void) queue_con_delay(con, 0);
+}
+
+static bool con_sock_closed(struct ceph_connection *con)
+{
+ if (!con_flag_test_and_clear(con, CON_FLAG_SOCK_CLOSED))
+ return false;
+
+#define CASE(x) \
+ case CON_STATE_ ## x: \
+ con->error_msg = "socket closed (con state " #x ")"; \
+ break;
+
+ switch (con->state) {
+ CASE(CLOSED);
+ CASE(PREOPEN);
+ CASE(CONNECTING);
+ CASE(NEGOTIATING);
+ CASE(OPEN);
+ CASE(STANDBY);
+ default:
+ pr_warning("%s con %p unrecognized state %lu\n",
+ __func__, con, con->state);
+ con->error_msg = "unrecognized con state";
+ BUG();
+ break;
+ }
+#undef CASE
+
+ return true;
+}
+
+static bool con_backoff(struct ceph_connection *con)
+{
+ int ret;
+
+ if (!con_flag_test_and_clear(con, CON_FLAG_BACKOFF))
+ return false;
+
+ ret = queue_con_delay(con, round_jiffies_relative(con->delay));
+ if (ret) {
+ dout("%s: con %p FAILED to back off %lu\n", __func__,
+ con, con->delay);
+ BUG_ON(ret == -ENOENT);
+ con_flag_set(con, CON_FLAG_BACKOFF);
+ }
+
+ return true;
+}
+
+/* Finish fault handling; con->mutex must *not* be held here */
+
+static void con_fault_finish(struct ceph_connection *con)
+{
+ /*
+ * in case we faulted due to authentication, invalidate our
+ * current tickets so that we can get new ones.
+ */
+ if (con->auth_retry && con->ops->invalidate_authorizer) {
+ dout("calling invalidate_authorizer()\n");
+ con->ops->invalidate_authorizer(con);
+ }
+
+ if (con->ops->fault)
+ con->ops->fault(con);
+}
+
+/*
+ * Do some work on a connection. Drop a connection ref when we're done.
+ */
+static void con_work(struct work_struct *work)
+{
+ struct ceph_connection *con = container_of(work, struct ceph_connection,
+ work.work);
+ bool fault;
+
+ mutex_lock(&con->mutex);
+ while (true) {
+ int ret;
+
+ if ((fault = con_sock_closed(con))) {
+ dout("%s: con %p SOCK_CLOSED\n", __func__, con);
+ break;
+ }
+ if (con_backoff(con)) {
+ dout("%s: con %p BACKOFF\n", __func__, con);
+ break;
+ }
+ if (con->state == CON_STATE_STANDBY) {
+ dout("%s: con %p STANDBY\n", __func__, con);
+ break;
+ }
+ if (con->state == CON_STATE_CLOSED) {
+ dout("%s: con %p CLOSED\n", __func__, con);
+ BUG_ON(con->sock);
+ break;
+ }
+ if (con->state == CON_STATE_PREOPEN) {
+ dout("%s: con %p PREOPEN\n", __func__, con);
+ BUG_ON(con->sock);
+ }
+
+ ret = try_read(con);
+ if (ret < 0) {
+ if (ret == -EAGAIN)
+ continue;
+ con->error_msg = "socket error on read";
+ fault = true;
+ break;
+ }
+
+ ret = try_write(con);
+ if (ret < 0) {
+ if (ret == -EAGAIN)
+ continue;
+ con->error_msg = "socket error on write";
+ fault = true;
+ }
+
+ break; /* If we make it to here, we're done */
+ }
+ if (fault)
+ con_fault(con);
+ mutex_unlock(&con->mutex);
+
+ if (fault)
+ con_fault_finish(con);
+
+ con->ops->put(con);
+}
+
+/*
+ * Generic error/fault handler. A retry mechanism is used with
+ * exponential backoff
+ */
+static void con_fault(struct ceph_connection *con)
+{
+ pr_warning("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
+ ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
+ dout("fault %p state %lu to peer %s\n",
+ con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
+
+ WARN_ON(con->state != CON_STATE_CONNECTING &&
+ con->state != CON_STATE_NEGOTIATING &&
+ con->state != CON_STATE_OPEN);
+
+ con_close_socket(con);
+
+ if (con_flag_test(con, CON_FLAG_LOSSYTX)) {
+ dout("fault on LOSSYTX channel, marking CLOSED\n");
+ con->state = CON_STATE_CLOSED;
+ return;
+ }
+
+ if (con->in_msg) {
+ BUG_ON(con->in_msg->con != con);
+ con->in_msg->con = NULL;
+ ceph_msg_put(con->in_msg);
+ con->in_msg = NULL;
+ con->ops->put(con);
+ }
+
+ /* Requeue anything that hasn't been acked */
+ list_splice_init(&con->out_sent, &con->out_queue);
+
+ /* If there are no messages queued or keepalive pending, place
+ * the connection in a STANDBY state */
+ if (list_empty(&con->out_queue) &&
+ !con_flag_test(con, CON_FLAG_KEEPALIVE_PENDING)) {
+ dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
+ con_flag_clear(con, CON_FLAG_WRITE_PENDING);
+ con->state = CON_STATE_STANDBY;
+ } else {
+ /* retry after a delay. */
+ con->state = CON_STATE_PREOPEN;
+ if (con->delay == 0)
+ con->delay = BASE_DELAY_INTERVAL;
+ else if (con->delay < MAX_DELAY_INTERVAL)
+ con->delay *= 2;
+ con_flag_set(con, CON_FLAG_BACKOFF);
+ queue_con(con);
+ }
+}
+
+
+
+/*
+ * initialize a new messenger instance
+ */
+void ceph_messenger_init(struct ceph_messenger *msgr,
+ struct ceph_entity_addr *myaddr,
+ u64 supported_features,
+ u64 required_features,
+ bool nocrc)
+{
+ msgr->supported_features = supported_features;
+ msgr->required_features = required_features;
+
+ spin_lock_init(&msgr->global_seq_lock);
+
+ if (myaddr)
+ msgr->inst.addr = *myaddr;
+
+ /* select a random nonce */
+ msgr->inst.addr.type = 0;
+ get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
+ encode_my_addr(msgr);
+ msgr->nocrc = nocrc;
+
+ atomic_set(&msgr->stopping, 0);
+
+ dout("%s %p\n", __func__, msgr);
+}
+EXPORT_SYMBOL(ceph_messenger_init);
+
+static void clear_standby(struct ceph_connection *con)
+{
+ /* come back from STANDBY? */
+ if (con->state == CON_STATE_STANDBY) {
+ dout("clear_standby %p and ++connect_seq\n", con);
+ con->state = CON_STATE_PREOPEN;
+ con->connect_seq++;
+ WARN_ON(con_flag_test(con, CON_FLAG_WRITE_PENDING));
+ WARN_ON(con_flag_test(con, CON_FLAG_KEEPALIVE_PENDING));
+ }
+}
+
+/*
+ * Queue up an outgoing message on the given connection.
+ */
+void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
+{
+ /* set src+dst */
+ msg->hdr.src = con->msgr->inst.name;
+ BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
+ msg->needs_out_seq = true;
+
+ mutex_lock(&con->mutex);
+
+ if (con->state == CON_STATE_CLOSED) {
+ dout("con_send %p closed, dropping %p\n", con, msg);
+ ceph_msg_put(msg);
+ mutex_unlock(&con->mutex);
+ return;
+ }
+
+ BUG_ON(msg->con != NULL);
+ msg->con = con->ops->get(con);
+ BUG_ON(msg->con == NULL);
+
+ BUG_ON(!list_empty(&msg->list_head));
+ list_add_tail(&msg->list_head, &con->out_queue);
+ dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg,
+ ENTITY_NAME(con->peer_name), le16_to_cpu(msg->hdr.type),
+ ceph_msg_type_name(le16_to_cpu(msg->hdr.type)),
+ le32_to_cpu(msg->hdr.front_len),
+ le32_to_cpu(msg->hdr.middle_len),
+ le32_to_cpu(msg->hdr.data_len));
+
+ clear_standby(con);
+ mutex_unlock(&con->mutex);
+
+ /* if there wasn't anything waiting to send before, queue
+ * new work */
+ if (con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0)
+ queue_con(con);
+}
+EXPORT_SYMBOL(ceph_con_send);
+
+/*
+ * Revoke a message that was previously queued for send
+ */
+void ceph_msg_revoke(struct ceph_msg *msg)
+{
+ struct ceph_connection *con = msg->con;
+
+ if (!con)
+ return; /* Message not in our possession */
+
+ mutex_lock(&con->mutex);
+ if (!list_empty(&msg->list_head)) {
+ dout("%s %p msg %p - was on queue\n", __func__, con, msg);
+ list_del_init(&msg->list_head);
+ BUG_ON(msg->con == NULL);
+ msg->con->ops->put(msg->con);
+ msg->con = NULL;
+ msg->hdr.seq = 0;
+
+ ceph_msg_put(msg);
+ }
+ if (con->out_msg == msg) {
+ dout("%s %p msg %p - was sending\n", __func__, con, msg);
+ con->out_msg = NULL;
+ if (con->out_kvec_is_msg) {
+ con->out_skip = con->out_kvec_bytes;
+ con->out_kvec_is_msg = false;
+ }
+ msg->hdr.seq = 0;
+
+ ceph_msg_put(msg);
+ }
+ mutex_unlock(&con->mutex);
+}
+
+/*
+ * Revoke a message that we may be reading data into
+ */
+void ceph_msg_revoke_incoming(struct ceph_msg *msg)
+{
+ struct ceph_connection *con;
+
+ BUG_ON(msg == NULL);
+ if (!msg->con) {
+ dout("%s msg %p null con\n", __func__, msg);
+
+ return; /* Message not in our possession */
+ }
+
+ con = msg->con;
+ mutex_lock(&con->mutex);
+ if (con->in_msg == msg) {
+ unsigned int front_len = le32_to_cpu(con->in_hdr.front_len);
+ unsigned int middle_len = le32_to_cpu(con->in_hdr.middle_len);
+ unsigned int data_len = le32_to_cpu(con->in_hdr.data_len);
+
+ /* skip rest of message */
+ dout("%s %p msg %p revoked\n", __func__, con, msg);
+ con->in_base_pos = con->in_base_pos -
+ sizeof(struct ceph_msg_header) -
+ front_len -
+ middle_len -
+ data_len -
+ sizeof(struct ceph_msg_footer);
+ ceph_msg_put(con->in_msg);
+ con->in_msg = NULL;
+ con->in_tag = CEPH_MSGR_TAG_READY;
+ con->in_seq++;
+ } else {
+ dout("%s %p in_msg %p msg %p no-op\n",
+ __func__, con, con->in_msg, msg);
+ }
+ mutex_unlock(&con->mutex);
+}
+
+/*
+ * Queue a keepalive byte to ensure the tcp connection is alive.
+ */
+void ceph_con_keepalive(struct ceph_connection *con)
+{
+ dout("con_keepalive %p\n", con);
+ mutex_lock(&con->mutex);
+ clear_standby(con);
+ mutex_unlock(&con->mutex);
+ if (con_flag_test_and_set(con, CON_FLAG_KEEPALIVE_PENDING) == 0 &&
+ con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0)
+ queue_con(con);
+}
+EXPORT_SYMBOL(ceph_con_keepalive);
+
+static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type)
+{
+ struct ceph_msg_data *data;
+
+ if (WARN_ON(!ceph_msg_data_type_valid(type)))
+ return NULL;
+
+ data = kmem_cache_zalloc(ceph_msg_data_cache, GFP_NOFS);
+ if (data)
+ data->type = type;
+ INIT_LIST_HEAD(&data->links);
+
+ return data;
+}
+
+static void ceph_msg_data_destroy(struct ceph_msg_data *data)
+{
+ if (!data)
+ return;
+
+ WARN_ON(!list_empty(&data->links));
+ if (data->type == CEPH_MSG_DATA_PAGELIST) {
+ ceph_pagelist_release(data->pagelist);
+ kfree(data->pagelist);
+ }
+ kmem_cache_free(ceph_msg_data_cache, data);
+}
+
+void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
+ size_t length, size_t alignment)
+{
+ struct ceph_msg_data *data;
+
+ BUG_ON(!pages);
+ BUG_ON(!length);
+
+ data = ceph_msg_data_create(CEPH_MSG_DATA_PAGES);
+ BUG_ON(!data);
+ data->pages = pages;
+ data->length = length;
+ data->alignment = alignment & ~PAGE_MASK;
+
+ list_add_tail(&data->links, &msg->data);
+ msg->data_length += length;
+}
+EXPORT_SYMBOL(ceph_msg_data_add_pages);
+
+void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
+ struct ceph_pagelist *pagelist)
+{
+ struct ceph_msg_data *data;
+
+ BUG_ON(!pagelist);
+ BUG_ON(!pagelist->length);
+
+ data = ceph_msg_data_create(CEPH_MSG_DATA_PAGELIST);
+ BUG_ON(!data);
+ data->pagelist = pagelist;
+
+ list_add_tail(&data->links, &msg->data);
+ msg->data_length += pagelist->length;
+}
+EXPORT_SYMBOL(ceph_msg_data_add_pagelist);
+
+#ifdef CONFIG_BLOCK
+void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
+ size_t length)
+{
+ struct ceph_msg_data *data;
+
+ BUG_ON(!bio);
+
+ data = ceph_msg_data_create(CEPH_MSG_DATA_BIO);
+ BUG_ON(!data);
+ data->bio = bio;
+ data->bio_length = length;
+
+ list_add_tail(&data->links, &msg->data);
+ msg->data_length += length;
+}
+EXPORT_SYMBOL(ceph_msg_data_add_bio);
+#endif /* CONFIG_BLOCK */
+
+/*
+ * construct a new message with given type, size
+ * the new msg has a ref count of 1.
+ */
+struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
+ bool can_fail)
+{
+ struct ceph_msg *m;
+
+ m = kmem_cache_zalloc(ceph_msg_cache, flags);
+ if (m == NULL)
+ goto out;
+
+ m->hdr.type = cpu_to_le16(type);
+ m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
+ m->hdr.front_len = cpu_to_le32(front_len);
+
+ INIT_LIST_HEAD(&m->list_head);
+ kref_init(&m->kref);
+ INIT_LIST_HEAD(&m->data);
+
+ /* front */
+ if (front_len) {
+ m->front.iov_base = ceph_kvmalloc(front_len, flags);
+ if (m->front.iov_base == NULL) {
+ dout("ceph_msg_new can't allocate %d bytes\n",
+ front_len);
+ goto out2;
+ }
+ } else {
+ m->front.iov_base = NULL;
+ }
+ m->front_alloc_len = m->front.iov_len = front_len;
+
+ dout("ceph_msg_new %p front %d\n", m, front_len);
+ return m;
+
+out2:
+ ceph_msg_put(m);
+out:
+ if (!can_fail) {
+ pr_err("msg_new can't create type %d front %d\n", type,
+ front_len);
+ WARN_ON(1);
+ } else {
+ dout("msg_new can't create type %d front %d\n", type,
+ front_len);
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(ceph_msg_new);
+
+/*
+ * Allocate "middle" portion of a message, if it is needed and wasn't
+ * allocated by alloc_msg. This allows us to read a small fixed-size
+ * per-type header in the front and then gracefully fail (i.e.,
+ * propagate the error to the caller based on info in the front) when
+ * the middle is too large.
+ */
+static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg)
+{
+ int type = le16_to_cpu(msg->hdr.type);
+ int middle_len = le32_to_cpu(msg->hdr.middle_len);
+
+ dout("alloc_middle %p type %d %s middle_len %d\n", msg, type,
+ ceph_msg_type_name(type), middle_len);
+ BUG_ON(!middle_len);
+ BUG_ON(msg->middle);
+
+ msg->middle = ceph_buffer_new(middle_len, GFP_NOFS);
+ if (!msg->middle)
+ return -ENOMEM;
+ return 0;
+}
+
+/*
+ * Allocate a message for receiving an incoming message on a
+ * connection, and save the result in con->in_msg. Uses the
+ * connection's private alloc_msg op if available.
+ *
+ * Returns 0 on success, or a negative error code.
+ *
+ * On success, if we set *skip = 1:
+ * - the next message should be skipped and ignored.
+ * - con->in_msg == NULL
+ * or if we set *skip = 0:
+ * - con->in_msg is non-null.
+ * On error (ENOMEM, EAGAIN, ...),
+ * - con->in_msg == NULL
+ */
+static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
+{
+ struct ceph_msg_header *hdr = &con->in_hdr;
+ int middle_len = le32_to_cpu(hdr->middle_len);
+ struct ceph_msg *msg;
+ int ret = 0;
+
+ BUG_ON(con->in_msg != NULL);
+ BUG_ON(!con->ops->alloc_msg);
+
+ mutex_unlock(&con->mutex);
+ msg = con->ops->alloc_msg(con, hdr, skip);
+ mutex_lock(&con->mutex);
+ if (con->state != CON_STATE_OPEN) {
+ if (msg)
+ ceph_msg_put(msg);
+ return -EAGAIN;
+ }
+ if (msg) {
+ BUG_ON(*skip);
+ con->in_msg = msg;
+ con->in_msg->con = con->ops->get(con);
+ BUG_ON(con->in_msg->con == NULL);
+ } else {
+ /*
+ * Null message pointer means either we should skip
+ * this message or we couldn't allocate memory. The
+ * former is not an error.
+ */
+ if (*skip)
+ return 0;
+ con->error_msg = "error allocating memory for incoming message";
+
+ return -ENOMEM;
+ }
+ memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
+
+ if (middle_len && !con->in_msg->middle) {
+ ret = ceph_alloc_middle(con, con->in_msg);
+ if (ret < 0) {
+ ceph_msg_put(con->in_msg);
+ con->in_msg = NULL;
+ }
+ }
+
+ return ret;
+}
+
+
+/*
+ * Free a generically kmalloc'd message.
+ */
+void ceph_msg_kfree(struct ceph_msg *m)
+{
+ dout("msg_kfree %p\n", m);
+ ceph_kvfree(m->front.iov_base);
+ kmem_cache_free(ceph_msg_cache, m);
+}
+
+/*
+ * Drop a msg ref. Destroy as needed.
+ */
+void ceph_msg_last_put(struct kref *kref)
+{
+ struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
+ LIST_HEAD(data);
+ struct list_head *links;
+ struct list_head *next;
+
+ dout("ceph_msg_put last one on %p\n", m);
+ WARN_ON(!list_empty(&m->list_head));
+
+ /* drop middle, data, if any */
+ if (m->middle) {
+ ceph_buffer_put(m->middle);
+ m->middle = NULL;
+ }
+
+ list_splice_init(&m->data, &data);
+ list_for_each_safe(links, next, &data) {
+ struct ceph_msg_data *data;
+
+ data = list_entry(links, struct ceph_msg_data, links);
+ list_del_init(links);
+ ceph_msg_data_destroy(data);
+ }
+ m->data_length = 0;
+
+ if (m->pool)
+ ceph_msgpool_put(m->pool, m);
+ else
+ ceph_msg_kfree(m);
+}
+EXPORT_SYMBOL(ceph_msg_last_put);
+
+void ceph_msg_dump(struct ceph_msg *msg)
+{
+ pr_debug("msg_dump %p (front_alloc_len %d length %zd)\n", msg,
+ msg->front_alloc_len, msg->data_length);
+ print_hex_dump(KERN_DEBUG, "header: ",
+ DUMP_PREFIX_OFFSET, 16, 1,
+ &msg->hdr, sizeof(msg->hdr), true);
+ print_hex_dump(KERN_DEBUG, " front: ",
+ DUMP_PREFIX_OFFSET, 16, 1,
+ msg->front.iov_base, msg->front.iov_len, true);
+ if (msg->middle)
+ print_hex_dump(KERN_DEBUG, "middle: ",
+ DUMP_PREFIX_OFFSET, 16, 1,
+ msg->middle->vec.iov_base,
+ msg->middle->vec.iov_len, true);
+ print_hex_dump(KERN_DEBUG, "footer: ",
+ DUMP_PREFIX_OFFSET, 16, 1,
+ &msg->footer, sizeof(msg->footer), true);
+}
+EXPORT_SYMBOL(ceph_msg_dump);
diff --git a/libceph/mon_client.c b/libceph/mon_client.c
new file mode 100644
index 0000000..2ac9ef3
--- /dev/null
+++ b/libceph/mon_client.c
@@ -0,0 +1,1102 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/sched.h>
+
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/debugfs.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/auth.h>
+
+/*
+ * Interact with Ceph monitor cluster. Handle requests for new map
+ * versions, and periodically resend as needed. Also implement
+ * statfs() and umount().
+ *
+ * A small cluster of Ceph "monitors" are responsible for managing critical
+ * cluster configuration and state information. An odd number (e.g., 3, 5)
+ * of cmon daemons use a modified version of the Paxos part-time parliament
+ * algorithm to manage the MDS map (mds cluster membership), OSD map, and
+ * list of clients who have mounted the file system.
+ *
+ * We maintain an open, active session with a monitor at all times in order to
+ * receive timely MDSMap updates. We periodically send a keepalive byte on the
+ * TCP socket to ensure we detect a failure. If the connection does break, we
+ * randomly hunt for a new monitor. Once the connection is reestablished, we
+ * resend any outstanding requests.
+ */
+
+static const struct ceph_connection_operations mon_con_ops;
+
+static int __validate_auth(struct ceph_mon_client *monc);
+
+/*
+ * Decode a monmap blob (e.g., during mount).
+ */
+struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
+{
+ struct ceph_monmap *m = NULL;
+ int i, err = -EINVAL;
+ struct ceph_fsid fsid;
+ u32 epoch, num_mon;
+ u16 version;
+ u32 len;
+
+ ceph_decode_32_safe(&p, end, len, bad);
+ ceph_decode_need(&p, end, len, bad);
+
+ dout("monmap_decode %p %p len %d\n", p, end, (int)(end-p));
+
+ ceph_decode_16_safe(&p, end, version, bad);
+
+ ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad);
+ ceph_decode_copy(&p, &fsid, sizeof(fsid));
+ epoch = ceph_decode_32(&p);
+
+ num_mon = ceph_decode_32(&p);
+ ceph_decode_need(&p, end, num_mon*sizeof(m->mon_inst[0]), bad);
+
+ if (num_mon >= CEPH_MAX_MON)
+ goto bad;
+ m = kmalloc(sizeof(*m) + sizeof(m->mon_inst[0])*num_mon, GFP_NOFS);
+ if (m == NULL)
+ return ERR_PTR(-ENOMEM);
+ m->fsid = fsid;
+ m->epoch = epoch;
+ m->num_mon = num_mon;
+ ceph_decode_copy(&p, m->mon_inst, num_mon*sizeof(m->mon_inst[0]));
+ for (i = 0; i < num_mon; i++)
+ ceph_decode_addr(&m->mon_inst[i].addr);
+
+ dout("monmap_decode epoch %d, num_mon %d\n", m->epoch,
+ m->num_mon);
+ for (i = 0; i < m->num_mon; i++)
+ dout("monmap_decode mon%d is %s\n", i,
+ ceph_pr_addr(&m->mon_inst[i].addr.in_addr));
+ return m;
+
+bad:
+ dout("monmap_decode failed with %d\n", err);
+ kfree(m);
+ return ERR_PTR(err);
+}
+
+/*
+ * return true if *addr is included in the monmap.
+ */
+int ceph_monmap_contains(struct ceph_monmap *m, struct ceph_entity_addr *addr)
+{
+ int i;
+
+ for (i = 0; i < m->num_mon; i++)
+ if (memcmp(addr, &m->mon_inst[i].addr, sizeof(*addr)) == 0)
+ return 1;
+ return 0;
+}
+
+/*
+ * Send an auth request.
+ */
+static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len)
+{
+ monc->pending_auth = 1;
+ monc->m_auth->front.iov_len = len;
+ monc->m_auth->hdr.front_len = cpu_to_le32(len);
+ ceph_msg_revoke(monc->m_auth);
+ ceph_msg_get(monc->m_auth); /* keep our ref */
+ ceph_con_send(&monc->con, monc->m_auth);
+}
+
+/*
+ * Close monitor session, if any.
+ */
+static void __close_session(struct ceph_mon_client *monc)
+{
+ dout("__close_session closing mon%d\n", monc->cur_mon);
+ ceph_msg_revoke(monc->m_auth);
+ ceph_msg_revoke_incoming(monc->m_auth_reply);
+ ceph_msg_revoke(monc->m_subscribe);
+ ceph_msg_revoke_incoming(monc->m_subscribe_ack);
+ ceph_con_close(&monc->con);
+ monc->cur_mon = -1;
+ monc->pending_auth = 0;
+ ceph_auth_reset(monc->auth);
+}
+
+/*
+ * Open a session with a (new) monitor.
+ */
+static int __open_session(struct ceph_mon_client *monc)
+{
+ char r;
+ int ret;
+
+ if (monc->cur_mon < 0) {
+ get_random_bytes(&r, 1);
+ monc->cur_mon = r % monc->monmap->num_mon;
+ dout("open_session num=%d r=%d -> mon%d\n",
+ monc->monmap->num_mon, r, monc->cur_mon);
+ monc->sub_sent = 0;
+ monc->sub_renew_after = jiffies; /* i.e., expired */
+ monc->want_next_osdmap = !!monc->want_next_osdmap;
+
+ dout("open_session mon%d opening\n", monc->cur_mon);
+ ceph_con_open(&monc->con,
+ CEPH_ENTITY_TYPE_MON, monc->cur_mon,
+ &monc->monmap->mon_inst[monc->cur_mon].addr);
+
+ /* initiatiate authentication handshake */
+ ret = ceph_auth_build_hello(monc->auth,
+ monc->m_auth->front.iov_base,
+ monc->m_auth->front_alloc_len);
+ __send_prepared_auth_request(monc, ret);
+ } else {
+ dout("open_session mon%d already open\n", monc->cur_mon);
+ }
+ return 0;
+}
+
+static bool __sub_expired(struct ceph_mon_client *monc)
+{
+ return time_after_eq(jiffies, monc->sub_renew_after);
+}
+
+/*
+ * Reschedule delayed work timer.
+ */
+static void __schedule_delayed(struct ceph_mon_client *monc)
+{
+ unsigned int delay;
+
+ if (monc->cur_mon < 0 || __sub_expired(monc))
+ delay = 10 * HZ;
+ else
+ delay = 20 * HZ;
+ dout("__schedule_delayed after %u\n", delay);
+ schedule_delayed_work(&monc->delayed_work, delay);
+}
+
+/*
+ * Send subscribe request for mdsmap and/or osdmap.
+ */
+static void __send_subscribe(struct ceph_mon_client *monc)
+{
+ dout("__send_subscribe sub_sent=%u exp=%u want_osd=%d\n",
+ (unsigned int)monc->sub_sent, __sub_expired(monc),
+ monc->want_next_osdmap);
+ if ((__sub_expired(monc) && !monc->sub_sent) ||
+ monc->want_next_osdmap == 1) {
+ struct ceph_msg *msg = monc->m_subscribe;
+ struct ceph_mon_subscribe_item *i;
+ void *p, *end;
+ int num;
+
+ p = msg->front.iov_base;
+ end = p + msg->front_alloc_len;
+
+ num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap;
+ ceph_encode_32(&p, num);
+
+ if (monc->want_next_osdmap) {
+ dout("__send_subscribe to 'osdmap' %u\n",
+ (unsigned int)monc->have_osdmap);
+ ceph_encode_string(&p, end, "osdmap", 6);
+ i = p;
+ i->have = cpu_to_le64(monc->have_osdmap);
+ i->onetime = 1;
+ p += sizeof(*i);
+ monc->want_next_osdmap = 2; /* requested */
+ }
+ if (monc->want_mdsmap) {
+ dout("__send_subscribe to 'mdsmap' %u+\n",
+ (unsigned int)monc->have_mdsmap);
+ ceph_encode_string(&p, end, "mdsmap", 6);
+ i = p;
+ i->have = cpu_to_le64(monc->have_mdsmap);
+ i->onetime = 0;
+ p += sizeof(*i);
+ }
+ ceph_encode_string(&p, end, "monmap", 6);
+ i = p;
+ i->have = 0;
+ i->onetime = 0;
+ p += sizeof(*i);
+
+ msg->front.iov_len = p - msg->front.iov_base;
+ msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+ ceph_msg_revoke(msg);
+ ceph_con_send(&monc->con, ceph_msg_get(msg));
+
+ monc->sub_sent = jiffies | 1; /* never 0 */
+ }
+}
+
+static void handle_subscribe_ack(struct ceph_mon_client *monc,
+ struct ceph_msg *msg)
+{
+ unsigned int seconds;
+ struct ceph_mon_subscribe_ack *h = msg->front.iov_base;
+
+ if (msg->front.iov_len < sizeof(*h))
+ goto bad;
+ seconds = le32_to_cpu(h->duration);
+
+ mutex_lock(&monc->mutex);
+ if (monc->hunting) {
+ pr_info("mon%d %s session established\n",
+ monc->cur_mon,
+ ceph_pr_addr(&monc->con.peer_addr.in_addr));
+ monc->hunting = false;
+ }
+ dout("handle_subscribe_ack after %d seconds\n", seconds);
+ monc->sub_renew_after = monc->sub_sent + (seconds >> 1)*HZ - 1;
+ monc->sub_sent = 0;
+ mutex_unlock(&monc->mutex);
+ return;
+bad:
+ pr_err("got corrupt subscribe-ack msg\n");
+ ceph_msg_dump(msg);
+}
+
+/*
+ * Keep track of which maps we have
+ */
+int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got)
+{
+ mutex_lock(&monc->mutex);
+ monc->have_mdsmap = got;
+ mutex_unlock(&monc->mutex);
+ return 0;
+}
+EXPORT_SYMBOL(ceph_monc_got_mdsmap);
+
+int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got)
+{
+ mutex_lock(&monc->mutex);
+ monc->have_osdmap = got;
+ monc->want_next_osdmap = 0;
+ mutex_unlock(&monc->mutex);
+ return 0;
+}
+
+/*
+ * Register interest in the next osdmap
+ */
+void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
+{
+ dout("request_next_osdmap have %u\n", monc->have_osdmap);
+ mutex_lock(&monc->mutex);
+ if (!monc->want_next_osdmap)
+ monc->want_next_osdmap = 1;
+ if (monc->want_next_osdmap < 2)
+ __send_subscribe(monc);
+ mutex_unlock(&monc->mutex);
+}
+
+/*
+ *
+ */
+int ceph_monc_open_session(struct ceph_mon_client *monc)
+{
+ mutex_lock(&monc->mutex);
+ __open_session(monc);
+ __schedule_delayed(monc);
+ mutex_unlock(&monc->mutex);
+ return 0;
+}
+EXPORT_SYMBOL(ceph_monc_open_session);
+
+/*
+ * We require the fsid and global_id in order to initialize our
+ * debugfs dir.
+ */
+static bool have_debugfs_info(struct ceph_mon_client *monc)
+{
+ dout("have_debugfs_info fsid %d globalid %lld\n",
+ (int)monc->client->have_fsid, monc->auth->global_id);
+ return monc->client->have_fsid && monc->auth->global_id > 0;
+}
+
+/*
+ * The monitor responds with mount ack indicate mount success. The
+ * included client ticket allows the client to talk to MDSs and OSDs.
+ */
+static void ceph_monc_handle_map(struct ceph_mon_client *monc,
+ struct ceph_msg *msg)
+{
+ struct ceph_client *client = monc->client;
+ struct ceph_monmap *monmap = NULL, *old = monc->monmap;
+ void *p, *end;
+ int had_debugfs_info, init_debugfs = 0;
+
+ mutex_lock(&monc->mutex);
+
+ had_debugfs_info = have_debugfs_info(monc);
+
+ dout("handle_monmap\n");
+ p = msg->front.iov_base;
+ end = p + msg->front.iov_len;
+
+ monmap = ceph_monmap_decode(p, end);
+ if (IS_ERR(monmap)) {
+ pr_err("problem decoding monmap, %d\n",
+ (int)PTR_ERR(monmap));
+ goto out;
+ }
+
+ if (ceph_check_fsid(monc->client, &monmap->fsid) < 0) {
+ kfree(monmap);
+ goto out;
+ }
+
+ client->monc.monmap = monmap;
+ kfree(old);
+
+ if (!client->have_fsid) {
+ client->have_fsid = true;
+ if (!had_debugfs_info && have_debugfs_info(monc)) {
+ pr_info("client%lld fsid %pU\n",
+ ceph_client_id(monc->client),
+ &monc->client->fsid);
+ init_debugfs = 1;
+ }
+ mutex_unlock(&monc->mutex);
+
+ if (init_debugfs) {
+ /*
+ * do debugfs initialization without mutex to avoid
+ * creating a locking dependency
+ */
+ ceph_debugfs_client_init(monc->client);
+ }
+
+ goto out_unlocked;
+ }
+out:
+ mutex_unlock(&monc->mutex);
+out_unlocked:
+ wake_up_all(&client->auth_wq);
+}
+
+/*
+ * generic requests (e.g., statfs, poolop)
+ */
+static struct ceph_mon_generic_request *__lookup_generic_req(
+ struct ceph_mon_client *monc, u64 tid)
+{
+ struct ceph_mon_generic_request *req;
+ struct rb_node *n = monc->generic_request_tree.rb_node;
+
+ while (n) {
+ req = rb_entry(n, struct ceph_mon_generic_request, node);
+ if (tid < req->tid)
+ n = n->rb_left;
+ else if (tid > req->tid)
+ n = n->rb_right;
+ else
+ return req;
+ }
+ return NULL;
+}
+
+static void __insert_generic_request(struct ceph_mon_client *monc,
+ struct ceph_mon_generic_request *new)
+{
+ struct rb_node **p = &monc->generic_request_tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct ceph_mon_generic_request *req = NULL;
+
+ while (*p) {
+ parent = *p;
+ req = rb_entry(parent, struct ceph_mon_generic_request, node);
+ if (new->tid < req->tid)
+ p = &(*p)->rb_left;
+ else if (new->tid > req->tid)
+ p = &(*p)->rb_right;
+ else
+ BUG();
+ }
+
+ rb_link_node(&new->node, parent, p);
+ rb_insert_color(&new->node, &monc->generic_request_tree);
+}
+
+static void release_generic_request(struct kref *kref)
+{
+ struct ceph_mon_generic_request *req =
+ container_of(kref, struct ceph_mon_generic_request, kref);
+
+ if (req->reply)
+ ceph_msg_put(req->reply);
+ if (req->request)
+ ceph_msg_put(req->request);
+
+ kfree(req);
+}
+
+static void put_generic_request(struct ceph_mon_generic_request *req)
+{
+ kref_put(&req->kref, release_generic_request);
+}
+
+static void get_generic_request(struct ceph_mon_generic_request *req)
+{
+ kref_get(&req->kref);
+}
+
+static struct ceph_msg *get_generic_reply(struct ceph_connection *con,
+ struct ceph_msg_header *hdr,
+ int *skip)
+{
+ struct ceph_mon_client *monc = con->private;
+ struct ceph_mon_generic_request *req;
+ u64 tid = le64_to_cpu(hdr->tid);
+ struct ceph_msg *m;
+
+ mutex_lock(&monc->mutex);
+ req = __lookup_generic_req(monc, tid);
+ if (!req) {
+ dout("get_generic_reply %lld dne\n", tid);
+ *skip = 1;
+ m = NULL;
+ } else {
+ dout("get_generic_reply %lld got %p\n", tid, req->reply);
+ *skip = 0;
+ m = ceph_msg_get(req->reply);
+ /*
+ * we don't need to track the connection reading into
+ * this reply because we only have one open connection
+ * at a time, ever.
+ */
+ }
+ mutex_unlock(&monc->mutex);
+ return m;
+}
+
+static int do_generic_request(struct ceph_mon_client *monc,
+ struct ceph_mon_generic_request *req)
+{
+ int err;
+
+ /* register request */
+ mutex_lock(&monc->mutex);
+ req->tid = ++monc->last_tid;
+ req->request->hdr.tid = cpu_to_le64(req->tid);
+ __insert_generic_request(monc, req);
+ monc->num_generic_requests++;
+ ceph_con_send(&monc->con, ceph_msg_get(req->request));
+ mutex_unlock(&monc->mutex);
+
+ err = wait_for_completion_interruptible(&req->completion);
+
+ mutex_lock(&monc->mutex);
+ rb_erase(&req->node, &monc->generic_request_tree);
+ monc->num_generic_requests--;
+ mutex_unlock(&monc->mutex);
+
+ if (!err)
+ err = req->result;
+ return err;
+}
+
+/*
+ * statfs
+ */
+static void handle_statfs_reply(struct ceph_mon_client *monc,
+ struct ceph_msg *msg)
+{
+ struct ceph_mon_generic_request *req;
+ struct ceph_mon_statfs_reply *reply = msg->front.iov_base;
+ u64 tid = le64_to_cpu(msg->hdr.tid);
+
+ if (msg->front.iov_len != sizeof(*reply))
+ goto bad;
+ dout("handle_statfs_reply %p tid %llu\n", msg, tid);
+
+ mutex_lock(&monc->mutex);
+ req = __lookup_generic_req(monc, tid);
+ if (req) {
+ *(struct ceph_statfs *)req->buf = reply->st;
+ req->result = 0;
+ get_generic_request(req);
+ }
+ mutex_unlock(&monc->mutex);
+ if (req) {
+ complete_all(&req->completion);
+ put_generic_request(req);
+ }
+ return;
+
+bad:
+ pr_err("corrupt generic reply, tid %llu\n", tid);
+ ceph_msg_dump(msg);
+}
+
+/*
+ * Do a synchronous statfs().
+ */
+int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
+{
+ struct ceph_mon_generic_request *req;
+ struct ceph_mon_statfs *h;
+ int err;
+
+ req = kzalloc(sizeof(*req), GFP_NOFS);
+ if (!req)
+ return -ENOMEM;
+
+ kref_init(&req->kref);
+ req->buf = buf;
+ req->buf_len = sizeof(*buf);
+ init_completion(&req->completion);
+
+ err = -ENOMEM;
+ req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), GFP_NOFS,
+ true);
+ if (!req->request)
+ goto out;
+ req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024, GFP_NOFS,
+ true);
+ if (!req->reply)
+ goto out;
+
+ /* fill out request */
+ h = req->request->front.iov_base;
+ h->monhdr.have_version = 0;
+ h->monhdr.session_mon = cpu_to_le16(-1);
+ h->monhdr.session_mon_tid = 0;
+ h->fsid = monc->monmap->fsid;
+
+ err = do_generic_request(monc, req);
+
+out:
+ kref_put(&req->kref, release_generic_request);
+ return err;
+}
+EXPORT_SYMBOL(ceph_monc_do_statfs);
+
+/*
+ * pool ops
+ */
+static int get_poolop_reply_buf(const char *src, size_t src_len,
+ char *dst, size_t dst_len)
+{
+ u32 buf_len;
+
+ if (src_len != sizeof(u32) + dst_len)
+ return -EINVAL;
+
+ buf_len = le32_to_cpu(*(u32 *)src);
+ if (buf_len != dst_len)
+ return -EINVAL;
+
+ memcpy(dst, src + sizeof(u32), dst_len);
+ return 0;
+}
+
+static void handle_poolop_reply(struct ceph_mon_client *monc,
+ struct ceph_msg *msg)
+{
+ struct ceph_mon_generic_request *req;
+ struct ceph_mon_poolop_reply *reply = msg->front.iov_base;
+ u64 tid = le64_to_cpu(msg->hdr.tid);
+
+ if (msg->front.iov_len < sizeof(*reply))
+ goto bad;
+ dout("handle_poolop_reply %p tid %llu\n", msg, tid);
+
+ mutex_lock(&monc->mutex);
+ req = __lookup_generic_req(monc, tid);
+ if (req) {
+ if (req->buf_len &&
+ get_poolop_reply_buf(msg->front.iov_base + sizeof(*reply),
+ msg->front.iov_len - sizeof(*reply),
+ req->buf, req->buf_len) < 0) {
+ mutex_unlock(&monc->mutex);
+ goto bad;
+ }
+ req->result = le32_to_cpu(reply->reply_code);
+ get_generic_request(req);
+ }
+ mutex_unlock(&monc->mutex);
+ if (req) {
+ complete(&req->completion);
+ put_generic_request(req);
+ }
+ return;
+
+bad:
+ pr_err("corrupt generic reply, tid %llu\n", tid);
+ ceph_msg_dump(msg);
+}
+
+/*
+ * Do a synchronous pool op.
+ */
+static int do_poolop(struct ceph_mon_client *monc, u32 op,
+ u32 pool, u64 snapid,
+ char *buf, int len)
+{
+ struct ceph_mon_generic_request *req;
+ struct ceph_mon_poolop *h;
+ int err;
+
+ req = kzalloc(sizeof(*req), GFP_NOFS);
+ if (!req)
+ return -ENOMEM;
+
+ kref_init(&req->kref);
+ req->buf = buf;
+ req->buf_len = len;
+ init_completion(&req->completion);
+
+ err = -ENOMEM;
+ req->request = ceph_msg_new(CEPH_MSG_POOLOP, sizeof(*h), GFP_NOFS,
+ true);
+ if (!req->request)
+ goto out;
+ req->reply = ceph_msg_new(CEPH_MSG_POOLOP_REPLY, 1024, GFP_NOFS,
+ true);
+ if (!req->reply)
+ goto out;
+
+ /* fill out request */
+ req->request->hdr.version = cpu_to_le16(2);
+ h = req->request->front.iov_base;
+ h->monhdr.have_version = 0;
+ h->monhdr.session_mon = cpu_to_le16(-1);
+ h->monhdr.session_mon_tid = 0;
+ h->fsid = monc->monmap->fsid;
+ h->pool = cpu_to_le32(pool);
+ h->op = cpu_to_le32(op);
+ h->auid = 0;
+ h->snapid = cpu_to_le64(snapid);
+ h->name_len = 0;
+
+ err = do_generic_request(monc, req);
+
+out:
+ kref_put(&req->kref, release_generic_request);
+ return err;
+}
+
+int ceph_monc_create_snapid(struct ceph_mon_client *monc,
+ u32 pool, u64 *snapid)
+{
+ return do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP,
+ pool, 0, (char *)snapid, sizeof(*snapid));
+
+}
+EXPORT_SYMBOL(ceph_monc_create_snapid);
+
+int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
+ u32 pool, u64 snapid)
+{
+ return do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP,
+ pool, snapid, NULL, 0);
+
+}
+
+/*
+ * Resend pending generic requests.
+ */
+static void __resend_generic_request(struct ceph_mon_client *monc)
+{
+ struct ceph_mon_generic_request *req;
+ struct rb_node *p;
+
+ for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) {
+ req = rb_entry(p, struct ceph_mon_generic_request, node);
+ ceph_msg_revoke(req->request);
+ ceph_msg_revoke_incoming(req->reply);
+ ceph_con_send(&monc->con, ceph_msg_get(req->request));
+ }
+}
+
+/*
+ * Delayed work. If we haven't mounted yet, retry. Otherwise,
+ * renew/retry subscription as needed (in case it is timing out, or we
+ * got an ENOMEM). And keep the monitor connection alive.
+ */
+static void delayed_work(struct work_struct *work)
+{
+ struct ceph_mon_client *monc =
+ container_of(work, struct ceph_mon_client, delayed_work.work);
+
+ dout("monc delayed_work\n");
+ mutex_lock(&monc->mutex);
+ if (monc->hunting) {
+ __close_session(monc);
+ __open_session(monc); /* continue hunting */
+ } else {
+ ceph_con_keepalive(&monc->con);
+
+ __validate_auth(monc);
+
+ if (ceph_auth_is_authenticated(monc->auth))
+ __send_subscribe(monc);
+ }
+ __schedule_delayed(monc);
+ mutex_unlock(&monc->mutex);
+}
+
+/*
+ * On startup, we build a temporary monmap populated with the IPs
+ * provided by mount(2).
+ */
+static int build_initial_monmap(struct ceph_mon_client *monc)
+{
+ struct ceph_options *opt = monc->client->options;
+ struct ceph_entity_addr *mon_addr = opt->mon_addr;
+ int num_mon = opt->num_mon;
+ int i;
+
+ /* build initial monmap */
+ monc->monmap = kzalloc(sizeof(*monc->monmap) +
+ num_mon*sizeof(monc->monmap->mon_inst[0]),
+ GFP_KERNEL);
+ if (!monc->monmap)
+ return -ENOMEM;
+ for (i = 0; i < num_mon; i++) {
+ monc->monmap->mon_inst[i].addr = mon_addr[i];
+ monc->monmap->mon_inst[i].addr.nonce = 0;
+ monc->monmap->mon_inst[i].name.type =
+ CEPH_ENTITY_TYPE_MON;
+ monc->monmap->mon_inst[i].name.num = cpu_to_le64(i);
+ }
+ monc->monmap->num_mon = num_mon;
+ return 0;
+}
+
+int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
+{
+ int err = 0;
+
+ dout("init\n");
+ memset(monc, 0, sizeof(*monc));
+ monc->client = cl;
+ monc->monmap = NULL;
+ mutex_init(&monc->mutex);
+
+ err = build_initial_monmap(monc);
+ if (err)
+ goto out;
+
+ /* connection */
+ /* authentication */
+ monc->auth = ceph_auth_init(cl->options->name,
+ cl->options->key);
+ if (IS_ERR(monc->auth)) {
+ err = PTR_ERR(monc->auth);
+ goto out_monmap;
+ }
+ monc->auth->want_keys =
+ CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON |
+ CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS;
+
+ /* msgs */
+ err = -ENOMEM;
+ monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK,
+ sizeof(struct ceph_mon_subscribe_ack),
+ GFP_NOFS, true);
+ if (!monc->m_subscribe_ack)
+ goto out_auth;
+
+ monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS,
+ true);
+ if (!monc->m_subscribe)
+ goto out_subscribe_ack;
+
+ monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS,
+ true);
+ if (!monc->m_auth_reply)
+ goto out_subscribe;
+
+ monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS, true);
+ monc->pending_auth = 0;
+ if (!monc->m_auth)
+ goto out_auth_reply;
+
+ ceph_con_init(&monc->con, monc, &mon_con_ops,
+ &monc->client->msgr);
+
+ monc->cur_mon = -1;
+ monc->hunting = true;
+ monc->sub_renew_after = jiffies;
+ monc->sub_sent = 0;
+
+ INIT_DELAYED_WORK(&monc->delayed_work, delayed_work);
+ monc->generic_request_tree = RB_ROOT;
+ monc->num_generic_requests = 0;
+ monc->last_tid = 0;
+
+ monc->have_mdsmap = 0;
+ monc->have_osdmap = 0;
+ monc->want_next_osdmap = 1;
+ return 0;
+
+out_auth_reply:
+ ceph_msg_put(monc->m_auth_reply);
+out_subscribe:
+ ceph_msg_put(monc->m_subscribe);
+out_subscribe_ack:
+ ceph_msg_put(monc->m_subscribe_ack);
+out_auth:
+ ceph_auth_destroy(monc->auth);
+out_monmap:
+ kfree(monc->monmap);
+out:
+ return err;
+}
+EXPORT_SYMBOL(ceph_monc_init);
+
+void ceph_monc_stop(struct ceph_mon_client *monc)
+{
+ dout("stop\n");
+ cancel_delayed_work_sync(&monc->delayed_work);
+
+ mutex_lock(&monc->mutex);
+ __close_session(monc);
+
+ mutex_unlock(&monc->mutex);
+
+ /*
+ * flush msgr queue before we destroy ourselves to ensure that:
+ * - any work that references our embedded con is finished.
+ * - any osd_client or other work that may reference an authorizer
+ * finishes before we shut down the auth subsystem.
+ */
+ ceph_msgr_flush();
+
+ ceph_auth_destroy(monc->auth);
+
+ ceph_msg_put(monc->m_auth);
+ ceph_msg_put(monc->m_auth_reply);
+ ceph_msg_put(monc->m_subscribe);
+ ceph_msg_put(monc->m_subscribe_ack);
+
+ kfree(monc->monmap);
+}
+EXPORT_SYMBOL(ceph_monc_stop);
+
+static void handle_auth_reply(struct ceph_mon_client *monc,
+ struct ceph_msg *msg)
+{
+ int ret;
+ int was_auth = 0;
+ int had_debugfs_info, init_debugfs = 0;
+
+ mutex_lock(&monc->mutex);
+ had_debugfs_info = have_debugfs_info(monc);
+ was_auth = ceph_auth_is_authenticated(monc->auth);
+ monc->pending_auth = 0;
+ ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
+ msg->front.iov_len,
+ monc->m_auth->front.iov_base,
+ monc->m_auth->front_alloc_len);
+ if (ret < 0) {
+ monc->client->auth_err = ret;
+ wake_up_all(&monc->client->auth_wq);
+ } else if (ret > 0) {
+ __send_prepared_auth_request(monc, ret);
+ } else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
+ dout("authenticated, starting session\n");
+
+ monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
+ monc->client->msgr.inst.name.num =
+ cpu_to_le64(monc->auth->global_id);
+
+ __send_subscribe(monc);
+ __resend_generic_request(monc);
+ }
+
+ if (!had_debugfs_info && have_debugfs_info(monc)) {
+ pr_info("client%lld fsid %pU\n",
+ ceph_client_id(monc->client),
+ &monc->client->fsid);
+ init_debugfs = 1;
+ }
+ mutex_unlock(&monc->mutex);
+
+ if (init_debugfs) {
+ /*
+ * do debugfs initialization without mutex to avoid
+ * creating a locking dependency
+ */
+ ceph_debugfs_client_init(monc->client);
+ }
+}
+
+static int __validate_auth(struct ceph_mon_client *monc)
+{
+ int ret;
+
+ if (monc->pending_auth)
+ return 0;
+
+ ret = ceph_build_auth(monc->auth, monc->m_auth->front.iov_base,
+ monc->m_auth->front_alloc_len);
+ if (ret <= 0)
+ return ret; /* either an error, or no need to authenticate */
+ __send_prepared_auth_request(monc, ret);
+ return 0;
+}
+
+int ceph_monc_validate_auth(struct ceph_mon_client *monc)
+{
+ int ret;
+
+ mutex_lock(&monc->mutex);
+ ret = __validate_auth(monc);
+ mutex_unlock(&monc->mutex);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_monc_validate_auth);
+
+/*
+ * handle incoming message
+ */
+static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
+{
+ struct ceph_mon_client *monc = con->private;
+ int type = le16_to_cpu(msg->hdr.type);
+
+ if (!monc)
+ return;
+
+ switch (type) {
+ case CEPH_MSG_AUTH_REPLY:
+ handle_auth_reply(monc, msg);
+ break;
+
+ case CEPH_MSG_MON_SUBSCRIBE_ACK:
+ handle_subscribe_ack(monc, msg);
+ break;
+
+ case CEPH_MSG_STATFS_REPLY:
+ handle_statfs_reply(monc, msg);
+ break;
+
+ case CEPH_MSG_POOLOP_REPLY:
+ handle_poolop_reply(monc, msg);
+ break;
+
+ case CEPH_MSG_MON_MAP:
+ ceph_monc_handle_map(monc, msg);
+ break;
+
+ case CEPH_MSG_OSD_MAP:
+ ceph_osdc_handle_map(&monc->client->osdc, msg);
+ break;
+
+ default:
+ /* can the chained handler handle it? */
+ if (monc->client->extra_mon_dispatch &&
+ monc->client->extra_mon_dispatch(monc->client, msg) == 0)
+ break;
+
+ pr_err("received unknown message type %d %s\n", type,
+ ceph_msg_type_name(type));
+ }
+ ceph_msg_put(msg);
+}
+
+/*
+ * Allocate memory for incoming message
+ */
+static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
+ struct ceph_msg_header *hdr,
+ int *skip)
+{
+ struct ceph_mon_client *monc = con->private;
+ int type = le16_to_cpu(hdr->type);
+ int front_len = le32_to_cpu(hdr->front_len);
+ struct ceph_msg *m = NULL;
+
+ *skip = 0;
+
+ switch (type) {
+ case CEPH_MSG_MON_SUBSCRIBE_ACK:
+ m = ceph_msg_get(monc->m_subscribe_ack);
+ break;
+ case CEPH_MSG_POOLOP_REPLY:
+ case CEPH_MSG_STATFS_REPLY:
+ return get_generic_reply(con, hdr, skip);
+ case CEPH_MSG_AUTH_REPLY:
+ m = ceph_msg_get(monc->m_auth_reply);
+ break;
+ case CEPH_MSG_MON_MAP:
+ case CEPH_MSG_MDS_MAP:
+ case CEPH_MSG_OSD_MAP:
+ m = ceph_msg_new(type, front_len, GFP_NOFS, false);
+ if (!m)
+ return NULL; /* ENOMEM--return skip == 0 */
+ break;
+ }
+
+ if (!m) {
+ pr_info("alloc_msg unknown type %d\n", type);
+ *skip = 1;
+ }
+ return m;
+}
+
+/*
+ * If the monitor connection resets, pick a new monitor and resubmit
+ * any pending requests.
+ */
+static void mon_fault(struct ceph_connection *con)
+{
+ struct ceph_mon_client *monc = con->private;
+
+ if (!monc)
+ return;
+
+ dout("mon_fault\n");
+ mutex_lock(&monc->mutex);
+ if (!con->private)
+ goto out;
+
+ if (!monc->hunting)
+ pr_info("mon%d %s session lost, "
+ "hunting for new mon\n", monc->cur_mon,
+ ceph_pr_addr(&monc->con.peer_addr.in_addr));
+
+ __close_session(monc);
+ if (!monc->hunting) {
+ /* start hunting */
+ monc->hunting = true;
+ __open_session(monc);
+ } else {
+ /* already hunting, let's wait a bit */
+ __schedule_delayed(monc);
+ }
+out:
+ mutex_unlock(&monc->mutex);
+}
+
+/*
+ * We can ignore refcounting on the connection struct, as all references
+ * will come from the messenger workqueue, which is drained prior to
+ * mon_client destruction.
+ */
+static struct ceph_connection *con_get(struct ceph_connection *con)
+{
+ return con;
+}
+
+static void con_put(struct ceph_connection *con)
+{
+}
+
+static const struct ceph_connection_operations mon_con_ops = {
+ .get = con_get,
+ .put = con_put,
+ .dispatch = dispatch,
+ .fault = mon_fault,
+ .alloc_msg = mon_alloc_msg,
+};
diff --git a/libceph/msgpool.c b/libceph/msgpool.c
new file mode 100644
index 0000000..ddec1c1
--- /dev/null
+++ b/libceph/msgpool.c
@@ -0,0 +1,83 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+
+#include <linux/ceph/msgpool.h>
+
+static void *msgpool_alloc(gfp_t gfp_mask, void *arg)
+{
+ struct ceph_msgpool *pool = arg;
+ struct ceph_msg *msg;
+
+ msg = ceph_msg_new(pool->type, pool->front_len, gfp_mask, true);
+ if (!msg) {
+ dout("msgpool_alloc %s failed\n", pool->name);
+ } else {
+ dout("msgpool_alloc %s %p\n", pool->name, msg);
+ msg->pool = pool;
+ }
+ return msg;
+}
+
+static void msgpool_free(void *element, void *arg)
+{
+ struct ceph_msgpool *pool = arg;
+ struct ceph_msg *msg = element;
+
+ dout("msgpool_release %s %p\n", pool->name, msg);
+ msg->pool = NULL;
+ ceph_msg_put(msg);
+}
+
+int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
+ int front_len, int size, bool blocking, const char *name)
+{
+ dout("msgpool %s init\n", name);
+ pool->type = type;
+ pool->front_len = front_len;
+ pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool);
+ if (!pool->pool)
+ return -ENOMEM;
+ pool->name = name;
+ return 0;
+}
+
+void ceph_msgpool_destroy(struct ceph_msgpool *pool)
+{
+ dout("msgpool %s destroy\n", pool->name);
+ mempool_destroy(pool->pool);
+}
+
+struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
+ int front_len)
+{
+ struct ceph_msg *msg;
+
+ if (front_len > pool->front_len) {
+ dout("msgpool_get %s need front %d, pool size is %d\n",
+ pool->name, front_len, pool->front_len);
+ WARN_ON(1);
+
+ /* try to alloc a fresh message */
+ return ceph_msg_new(pool->type, front_len, GFP_NOFS, false);
+ }
+
+ msg = mempool_alloc(pool->pool, GFP_NOFS);
+ dout("msgpool_get %s %p\n", pool->name, msg);
+ return msg;
+}
+
+void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg)
+{
+ dout("msgpool_put %s %p\n", pool->name, msg);
+
+ /* reset msg front_len; user may have changed it */
+ msg->front.iov_len = pool->front_len;
+ msg->hdr.front_len = cpu_to_le32(pool->front_len);
+
+ kref_init(&msg->kref); /* retake single ref */
+ mempool_free(msg, pool->pool);
+}
diff --git a/libceph/osd_client.c b/libceph/osd_client.c
new file mode 100644
index 0000000..b4157dc
--- /dev/null
+++ b/libceph/osd_client.c
@@ -0,0 +1,2904 @@
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/highmem.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#ifdef CONFIG_BLOCK
+#include <linux/bio.h>
+#endif
+
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/osd_client.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/pagelist.h>
+
+#define OSD_OP_FRONT_LEN 4096
+#define OSD_OPREPLY_FRONT_LEN 512
+
+static struct kmem_cache *ceph_osd_request_cache;
+
+static const struct ceph_connection_operations osd_con_ops;
+
+static void __send_queued(struct ceph_osd_client *osdc);
+static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd);
+static void __register_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req);
+static void __unregister_linger_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req);
+static void __send_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req);
+
+/*
+ * Implement client access to distributed object storage cluster.
+ *
+ * All data objects are stored within a cluster/cloud of OSDs, or
+ * "object storage devices." (Note that Ceph OSDs have _nothing_ to
+ * do with the T10 OSD extensions to SCSI.) Ceph OSDs are simply
+ * remote daemons serving up and coordinating consistent and safe
+ * access to storage.
+ *
+ * Cluster membership and the mapping of data objects onto storage devices
+ * are described by the osd map.
+ *
+ * We keep track of pending OSD requests (read, write), resubmit
+ * requests to different OSDs when the cluster topology/data layout
+ * change, or retry the affected requests when the communications
+ * channel with an OSD is reset.
+ */
+
+/*
+ * calculate the mapping of a file extent onto an object, and fill out the
+ * request accordingly. shorten extent as necessary if it crosses an
+ * object boundary.
+ *
+ * fill osd op in request message.
+ */
+static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen,
+ u64 *objnum, u64 *objoff, u64 *objlen)
+{
+ u64 orig_len = *plen;
+ int r;
+
+ /* object extent? */
+ r = ceph_calc_file_object_mapping(layout, off, orig_len, objnum,
+ objoff, objlen);
+ if (r < 0)
+ return r;
+ if (*objlen < orig_len) {
+ *plen = *objlen;
+ dout(" skipping last %llu, final file extent %llu~%llu\n",
+ orig_len - *plen, off, *plen);
+ }
+
+ dout("calc_layout objnum=%llx %llu~%llu\n", *objnum, *objoff, *objlen);
+
+ return 0;
+}
+
+static void ceph_osd_data_init(struct ceph_osd_data *osd_data)
+{
+ memset(osd_data, 0, sizeof (*osd_data));
+ osd_data->type = CEPH_OSD_DATA_TYPE_NONE;
+}
+
+static void ceph_osd_data_pages_init(struct ceph_osd_data *osd_data,
+ struct page **pages, u64 length, u32 alignment,
+ bool pages_from_pool, bool own_pages)
+{
+ osd_data->type = CEPH_OSD_DATA_TYPE_PAGES;
+ osd_data->pages = pages;
+ osd_data->length = length;
+ osd_data->alignment = alignment;
+ osd_data->pages_from_pool = pages_from_pool;
+ osd_data->own_pages = own_pages;
+}
+
+static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data,
+ struct ceph_pagelist *pagelist)
+{
+ osd_data->type = CEPH_OSD_DATA_TYPE_PAGELIST;
+ osd_data->pagelist = pagelist;
+}
+
+#ifdef CONFIG_BLOCK
+static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
+ struct bio *bio, size_t bio_length)
+{
+ osd_data->type = CEPH_OSD_DATA_TYPE_BIO;
+ osd_data->bio = bio;
+ osd_data->bio_length = bio_length;
+}
+#endif /* CONFIG_BLOCK */
+
+#define osd_req_op_data(oreq, whch, typ, fld) \
+ ({ \
+ BUG_ON(whch >= (oreq)->r_num_ops); \
+ &(oreq)->r_ops[whch].typ.fld; \
+ })
+
+static struct ceph_osd_data *
+osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
+{
+ BUG_ON(which >= osd_req->r_num_ops);
+
+ return &osd_req->r_ops[which].raw_data_in;
+}
+
+struct ceph_osd_data *
+osd_req_op_extent_osd_data(struct ceph_osd_request *osd_req,
+ unsigned int which)
+{
+ return osd_req_op_data(osd_req, which, extent, osd_data);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data);
+
+struct ceph_osd_data *
+osd_req_op_cls_response_data(struct ceph_osd_request *osd_req,
+ unsigned int which)
+{
+ return osd_req_op_data(osd_req, which, cls, response_data);
+}
+EXPORT_SYMBOL(osd_req_op_cls_response_data); /* ??? */
+
+void osd_req_op_raw_data_in_pages(struct ceph_osd_request *osd_req,
+ unsigned int which, struct page **pages,
+ u64 length, u32 alignment,
+ bool pages_from_pool, bool own_pages)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_raw_data_in(osd_req, which);
+ ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+ pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_raw_data_in_pages);
+
+void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *osd_req,
+ unsigned int which, struct page **pages,
+ u64 length, u32 alignment,
+ bool pages_from_pool, bool own_pages)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+ ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+ pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_pages);
+
+void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *osd_req,
+ unsigned int which, struct ceph_pagelist *pagelist)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+ ceph_osd_data_pagelist_init(osd_data, pagelist);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist);
+
+#ifdef CONFIG_BLOCK
+void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
+ unsigned int which, struct bio *bio, size_t bio_length)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+ ceph_osd_data_bio_init(osd_data, bio, bio_length);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio);
+#endif /* CONFIG_BLOCK */
+
+static void osd_req_op_cls_request_info_pagelist(
+ struct ceph_osd_request *osd_req,
+ unsigned int which, struct ceph_pagelist *pagelist)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_data(osd_req, which, cls, request_info);
+ ceph_osd_data_pagelist_init(osd_data, pagelist);
+}
+
+void osd_req_op_cls_request_data_pagelist(
+ struct ceph_osd_request *osd_req,
+ unsigned int which, struct ceph_pagelist *pagelist)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_data(osd_req, which, cls, request_data);
+ ceph_osd_data_pagelist_init(osd_data, pagelist);
+}
+EXPORT_SYMBOL(osd_req_op_cls_request_data_pagelist);
+
+void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req,
+ unsigned int which, struct page **pages, u64 length,
+ u32 alignment, bool pages_from_pool, bool own_pages)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_data(osd_req, which, cls, request_data);
+ ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+ pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_cls_request_data_pages);
+
+void osd_req_op_cls_response_data_pages(struct ceph_osd_request *osd_req,
+ unsigned int which, struct page **pages, u64 length,
+ u32 alignment, bool pages_from_pool, bool own_pages)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_data(osd_req, which, cls, response_data);
+ ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+ pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_cls_response_data_pages);
+
+static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data)
+{
+ switch (osd_data->type) {
+ case CEPH_OSD_DATA_TYPE_NONE:
+ return 0;
+ case CEPH_OSD_DATA_TYPE_PAGES:
+ return osd_data->length;
+ case CEPH_OSD_DATA_TYPE_PAGELIST:
+ return (u64)osd_data->pagelist->length;
+#ifdef CONFIG_BLOCK
+ case CEPH_OSD_DATA_TYPE_BIO:
+ return (u64)osd_data->bio_length;
+#endif /* CONFIG_BLOCK */
+ default:
+ WARN(true, "unrecognized data type %d\n", (int)osd_data->type);
+ return 0;
+ }
+}
+
+static void ceph_osd_data_release(struct ceph_osd_data *osd_data)
+{
+ if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES && osd_data->own_pages) {
+ int num_pages;
+
+ num_pages = calc_pages_for((u64)osd_data->alignment,
+ (u64)osd_data->length);
+ ceph_release_page_vector(osd_data->pages, num_pages);
+ }
+ ceph_osd_data_init(osd_data);
+}
+
+static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
+ unsigned int which)
+{
+ struct ceph_osd_req_op *op;
+
+ BUG_ON(which >= osd_req->r_num_ops);
+ op = &osd_req->r_ops[which];
+
+ switch (op->op) {
+ case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_WRITE:
+ ceph_osd_data_release(&op->extent.osd_data);
+ break;
+ case CEPH_OSD_OP_CALL:
+ ceph_osd_data_release(&op->cls.request_info);
+ ceph_osd_data_release(&op->cls.request_data);
+ ceph_osd_data_release(&op->cls.response_data);
+ break;
+ default:
+ break;
+ }
+}
+
+/*
+ * requests
+ */
+void ceph_osdc_release_request(struct kref *kref)
+{
+ struct ceph_osd_request *req;
+ unsigned int which;
+
+ req = container_of(kref, struct ceph_osd_request, r_kref);
+ if (req->r_request)
+ ceph_msg_put(req->r_request);
+ if (req->r_reply) {
+ ceph_msg_revoke_incoming(req->r_reply);
+ ceph_msg_put(req->r_reply);
+ }
+
+ for (which = 0; which < req->r_num_ops; which++)
+ osd_req_op_data_release(req, which);
+
+ ceph_put_snap_context(req->r_snapc);
+ if (req->r_mempool)
+ mempool_free(req, req->r_osdc->req_mempool);
+ else
+ kmem_cache_free(ceph_osd_request_cache, req);
+
+}
+EXPORT_SYMBOL(ceph_osdc_release_request);
+
+struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
+ struct ceph_snap_context *snapc,
+ unsigned int num_ops,
+ bool use_mempool,
+ gfp_t gfp_flags)
+{
+ struct ceph_osd_request *req;
+ struct ceph_msg *msg;
+ size_t msg_size;
+
+ BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX);
+ BUG_ON(num_ops > CEPH_OSD_MAX_OP);
+
+ msg_size = 4 + 4 + 8 + 8 + 4+8;
+ msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
+ msg_size += 1 + 8 + 4 + 4; /* pg_t */
+ msg_size += 4 + CEPH_MAX_OID_NAME_LEN; /* oid */
+ msg_size += 2 + num_ops*sizeof(struct ceph_osd_op);
+ msg_size += 8; /* snapid */
+ msg_size += 8; /* snap_seq */
+ msg_size += 8 * (snapc ? snapc->num_snaps : 0); /* snaps */
+ msg_size += 4;
+
+ if (use_mempool) {
+ req = mempool_alloc(osdc->req_mempool, gfp_flags);
+ memset(req, 0, sizeof(*req));
+ } else {
+ req = kmem_cache_zalloc(ceph_osd_request_cache, gfp_flags);
+ }
+ if (req == NULL)
+ return NULL;
+
+ req->r_osdc = osdc;
+ req->r_mempool = use_mempool;
+ req->r_num_ops = num_ops;
+
+ kref_init(&req->r_kref);
+ init_completion(&req->r_completion);
+ init_completion(&req->r_safe_completion);
+ RB_CLEAR_NODE(&req->r_node);
+ INIT_LIST_HEAD(&req->r_unsafe_item);
+ INIT_LIST_HEAD(&req->r_linger_item);
+ INIT_LIST_HEAD(&req->r_linger_osd);
+ INIT_LIST_HEAD(&req->r_req_lru_item);
+ INIT_LIST_HEAD(&req->r_osd_item);
+
+ req->r_base_oloc.pool = -1;
+ req->r_target_oloc.pool = -1;
+
+ /* create reply message */
+ if (use_mempool)
+ msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0);
+ else
+ msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY,
+ OSD_OPREPLY_FRONT_LEN, gfp_flags, true);
+ if (!msg) {
+ ceph_osdc_put_request(req);
+ return NULL;
+ }
+ req->r_reply = msg;
+
+ /* create request message; allow space for oid */
+ if (use_mempool)
+ msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
+ else
+ msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags, true);
+ if (!msg) {
+ ceph_osdc_put_request(req);
+ return NULL;
+ }
+
+ memset(msg->front.iov_base, 0, msg->front.iov_len);
+
+ req->r_request = msg;
+
+ return req;
+}
+EXPORT_SYMBOL(ceph_osdc_alloc_request);
+
+static bool osd_req_opcode_valid(u16 opcode)
+{
+ switch (opcode) {
+ case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_STAT:
+ case CEPH_OSD_OP_MAPEXT:
+ case CEPH_OSD_OP_MASKTRUNC:
+ case CEPH_OSD_OP_SPARSE_READ:
+ case CEPH_OSD_OP_NOTIFY:
+ case CEPH_OSD_OP_NOTIFY_ACK:
+ case CEPH_OSD_OP_ASSERT_VER:
+ case CEPH_OSD_OP_WRITE:
+ case CEPH_OSD_OP_WRITEFULL:
+ case CEPH_OSD_OP_TRUNCATE:
+ case CEPH_OSD_OP_ZERO:
+ case CEPH_OSD_OP_DELETE:
+ case CEPH_OSD_OP_APPEND:
+ case CEPH_OSD_OP_STARTSYNC:
+ case CEPH_OSD_OP_SETTRUNC:
+ case CEPH_OSD_OP_TRIMTRUNC:
+ case CEPH_OSD_OP_TMAPUP:
+ case CEPH_OSD_OP_TMAPPUT:
+ case CEPH_OSD_OP_TMAPGET:
+ case CEPH_OSD_OP_CREATE:
+ case CEPH_OSD_OP_ROLLBACK:
+ case CEPH_OSD_OP_WATCH:
+ case CEPH_OSD_OP_OMAPGETKEYS:
+ case CEPH_OSD_OP_OMAPGETVALS:
+ case CEPH_OSD_OP_OMAPGETHEADER:
+ case CEPH_OSD_OP_OMAPGETVALSBYKEYS:
+ case CEPH_OSD_OP_OMAPSETVALS:
+ case CEPH_OSD_OP_OMAPSETHEADER:
+ case CEPH_OSD_OP_OMAPCLEAR:
+ case CEPH_OSD_OP_OMAPRMKEYS:
+ case CEPH_OSD_OP_OMAP_CMP:
+ case CEPH_OSD_OP_SETALLOCHINT:
+ case CEPH_OSD_OP_CLONERANGE:
+ case CEPH_OSD_OP_ASSERT_SRC_VERSION:
+ case CEPH_OSD_OP_SRC_CMPXATTR:
+ case CEPH_OSD_OP_GETXATTR:
+ case CEPH_OSD_OP_GETXATTRS:
+ case CEPH_OSD_OP_CMPXATTR:
+ case CEPH_OSD_OP_SETXATTR:
+ case CEPH_OSD_OP_SETXATTRS:
+ case CEPH_OSD_OP_RESETXATTRS:
+ case CEPH_OSD_OP_RMXATTR:
+ case CEPH_OSD_OP_PULL:
+ case CEPH_OSD_OP_PUSH:
+ case CEPH_OSD_OP_BALANCEREADS:
+ case CEPH_OSD_OP_UNBALANCEREADS:
+ case CEPH_OSD_OP_SCRUB:
+ case CEPH_OSD_OP_SCRUB_RESERVE:
+ case CEPH_OSD_OP_SCRUB_UNRESERVE:
+ case CEPH_OSD_OP_SCRUB_STOP:
+ case CEPH_OSD_OP_SCRUB_MAP:
+ case CEPH_OSD_OP_WRLOCK:
+ case CEPH_OSD_OP_WRUNLOCK:
+ case CEPH_OSD_OP_RDLOCK:
+ case CEPH_OSD_OP_RDUNLOCK:
+ case CEPH_OSD_OP_UPLOCK:
+ case CEPH_OSD_OP_DNLOCK:
+ case CEPH_OSD_OP_CALL:
+ case CEPH_OSD_OP_PGLS:
+ case CEPH_OSD_OP_PGLS_FILTER:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*
+ * This is an osd op init function for opcodes that have no data or
+ * other information associated with them. It also serves as a
+ * common init routine for all the other init functions, below.
+ */
+static struct ceph_osd_req_op *
+_osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
+ u16 opcode)
+{
+ struct ceph_osd_req_op *op;
+
+ BUG_ON(which >= osd_req->r_num_ops);
+ BUG_ON(!osd_req_opcode_valid(opcode));
+
+ op = &osd_req->r_ops[which];
+ memset(op, 0, sizeof (*op));
+ op->op = opcode;
+
+ return op;
+}
+
+void osd_req_op_init(struct ceph_osd_request *osd_req,
+ unsigned int which, u16 opcode)
+{
+ (void)_osd_req_op_init(osd_req, which, opcode);
+}
+EXPORT_SYMBOL(osd_req_op_init);
+
+void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
+ unsigned int which, u16 opcode,
+ u64 offset, u64 length,
+ u64 truncate_size, u32 truncate_seq)
+{
+ struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+ size_t payload_len = 0;
+
+ BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
+ opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
+ opcode != CEPH_OSD_OP_TRUNCATE);
+
+ op->extent.offset = offset;
+ op->extent.length = length;
+ op->extent.truncate_size = truncate_size;
+ op->extent.truncate_seq = truncate_seq;
+ if (opcode == CEPH_OSD_OP_WRITE)
+ payload_len += length;
+
+ op->payload_len = payload_len;
+}
+EXPORT_SYMBOL(osd_req_op_extent_init);
+
+void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
+ unsigned int which, u64 length)
+{
+ struct ceph_osd_req_op *op;
+ u64 previous;
+
+ BUG_ON(which >= osd_req->r_num_ops);
+ op = &osd_req->r_ops[which];
+ previous = op->extent.length;
+
+ if (length == previous)
+ return; /* Nothing to do */
+ BUG_ON(length > previous);
+
+ op->extent.length = length;
+ op->payload_len -= previous - length;
+}
+EXPORT_SYMBOL(osd_req_op_extent_update);
+
+void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
+ u16 opcode, const char *class, const char *method)
+{
+ struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+ struct ceph_pagelist *pagelist;
+ size_t payload_len = 0;
+ size_t size;
+
+ BUG_ON(opcode != CEPH_OSD_OP_CALL);
+
+ pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
+ BUG_ON(!pagelist);
+ ceph_pagelist_init(pagelist);
+
+ op->cls.class_name = class;
+ size = strlen(class);
+ BUG_ON(size > (size_t) U8_MAX);
+ op->cls.class_len = size;
+ ceph_pagelist_append(pagelist, class, size);
+ payload_len += size;
+
+ op->cls.method_name = method;
+ size = strlen(method);
+ BUG_ON(size > (size_t) U8_MAX);
+ op->cls.method_len = size;
+ ceph_pagelist_append(pagelist, method, size);
+ payload_len += size;
+
+ osd_req_op_cls_request_info_pagelist(osd_req, which, pagelist);
+
+ op->cls.argc = 0; /* currently unused */
+
+ op->payload_len = payload_len;
+}
+EXPORT_SYMBOL(osd_req_op_cls_init);
+
+void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
+ unsigned int which, u16 opcode,
+ u64 cookie, u64 version, int flag)
+{
+ struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+
+ BUG_ON(opcode != CEPH_OSD_OP_NOTIFY_ACK && opcode != CEPH_OSD_OP_WATCH);
+
+ op->watch.cookie = cookie;
+ op->watch.ver = version;
+ if (opcode == CEPH_OSD_OP_WATCH && flag)
+ op->watch.flag = (u8)1;
+}
+EXPORT_SYMBOL(osd_req_op_watch_init);
+
+void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
+ unsigned int which,
+ u64 expected_object_size,
+ u64 expected_write_size)
+{
+ struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
+ CEPH_OSD_OP_SETALLOCHINT);
+
+ op->alloc_hint.expected_object_size = expected_object_size;
+ op->alloc_hint.expected_write_size = expected_write_size;
+
+ /*
+ * CEPH_OSD_OP_SETALLOCHINT op is advisory and therefore deemed
+ * not worth a feature bit. Set FAILOK per-op flag to make
+ * sure older osds don't trip over an unsupported opcode.
+ */
+ op->flags |= CEPH_OSD_OP_FLAG_FAILOK;
+}
+EXPORT_SYMBOL(osd_req_op_alloc_hint_init);
+
+static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
+ struct ceph_osd_data *osd_data)
+{
+ u64 length = ceph_osd_data_length(osd_data);
+
+ if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
+ BUG_ON(length > (u64) SIZE_MAX);
+ if (length)
+ ceph_msg_data_add_pages(msg, osd_data->pages,
+ length, osd_data->alignment);
+ } else if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGELIST) {
+ BUG_ON(!length);
+ ceph_msg_data_add_pagelist(msg, osd_data->pagelist);
+#ifdef CONFIG_BLOCK
+ } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) {
+ ceph_msg_data_add_bio(msg, osd_data->bio, length);
+#endif
+ } else {
+ BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);
+ }
+}
+
+static u64 osd_req_encode_op(struct ceph_osd_request *req,
+ struct ceph_osd_op *dst, unsigned int which)
+{
+ struct ceph_osd_req_op *src;
+ struct ceph_osd_data *osd_data;
+ u64 request_data_len = 0;
+ u64 data_length;
+
+ BUG_ON(which >= req->r_num_ops);
+ src = &req->r_ops[which];
+ if (WARN_ON(!osd_req_opcode_valid(src->op))) {
+ pr_err("unrecognized osd opcode %d\n", src->op);
+
+ return 0;
+ }
+
+ switch (src->op) {
+ case CEPH_OSD_OP_STAT:
+ osd_data = &src->raw_data_in;
+ ceph_osdc_msg_data_add(req->r_reply, osd_data);
+ break;
+ case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_WRITE:
+ case CEPH_OSD_OP_ZERO:
+ case CEPH_OSD_OP_DELETE:
+ case CEPH_OSD_OP_TRUNCATE:
+ if (src->op == CEPH_OSD_OP_WRITE)
+ request_data_len = src->extent.length;
+ dst->extent.offset = cpu_to_le64(src->extent.offset);
+ dst->extent.length = cpu_to_le64(src->extent.length);
+ dst->extent.truncate_size =
+ cpu_to_le64(src->extent.truncate_size);
+ dst->extent.truncate_seq =
+ cpu_to_le32(src->extent.truncate_seq);
+ osd_data = &src->extent.osd_data;
+ if (src->op == CEPH_OSD_OP_WRITE)
+ ceph_osdc_msg_data_add(req->r_request, osd_data);
+ else
+ ceph_osdc_msg_data_add(req->r_reply, osd_data);
+ break;
+ case CEPH_OSD_OP_CALL:
+ dst->cls.class_len = src->cls.class_len;
+ dst->cls.method_len = src->cls.method_len;
+ osd_data = &src->cls.request_info;
+ ceph_osdc_msg_data_add(req->r_request, osd_data);
+ BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGELIST);
+ request_data_len = osd_data->pagelist->length;
+
+ osd_data = &src->cls.request_data;
+ data_length = ceph_osd_data_length(osd_data);
+ if (data_length) {
+ BUG_ON(osd_data->type == CEPH_OSD_DATA_TYPE_NONE);
+ dst->cls.indata_len = cpu_to_le32(data_length);
+ ceph_osdc_msg_data_add(req->r_request, osd_data);
+ src->payload_len += data_length;
+ request_data_len += data_length;
+ }
+ osd_data = &src->cls.response_data;
+ ceph_osdc_msg_data_add(req->r_reply, osd_data);
+ break;
+ case CEPH_OSD_OP_STARTSYNC:
+ break;
+ case CEPH_OSD_OP_NOTIFY_ACK:
+ case CEPH_OSD_OP_WATCH:
+ dst->watch.cookie = cpu_to_le64(src->watch.cookie);
+ dst->watch.ver = cpu_to_le64(src->watch.ver);
+ dst->watch.flag = src->watch.flag;
+ break;
+ case CEPH_OSD_OP_SETALLOCHINT:
+ dst->alloc_hint.expected_object_size =
+ cpu_to_le64(src->alloc_hint.expected_object_size);
+ dst->alloc_hint.expected_write_size =
+ cpu_to_le64(src->alloc_hint.expected_write_size);
+ break;
+ default:
+ pr_err("unsupported osd opcode %s\n",
+ ceph_osd_op_name(src->op));
+ WARN_ON(1);
+
+ return 0;
+ }
+
+ dst->op = cpu_to_le16(src->op);
+ dst->flags = cpu_to_le32(src->flags);
+ dst->payload_len = cpu_to_le32(src->payload_len);
+
+ return request_data_len;
+}
+
+/*
+ * build new request AND message, calculate layout, and adjust file
+ * extent as needed.
+ *
+ * if the file was recently truncated, we include information about its
+ * old and new size so that the object can be updated appropriately. (we
+ * avoid synchronously deleting truncated objects because it's slow.)
+ *
+ * if @do_sync, include a 'startsync' command so that the osd will flush
+ * data quickly.
+ */
+struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
+ struct ceph_file_layout *layout,
+ struct ceph_vino vino,
+ u64 off, u64 *plen, int num_ops,
+ int opcode, int flags,
+ struct ceph_snap_context *snapc,
+ u32 truncate_seq,
+ u64 truncate_size,
+ bool use_mempool)
+{
+ struct ceph_osd_request *req;
+ u64 objnum = 0;
+ u64 objoff = 0;
+ u64 objlen = 0;
+ u32 object_size;
+ u64 object_base;
+ int r;
+
+ BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
+ opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
+ opcode != CEPH_OSD_OP_TRUNCATE);
+
+ req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
+ GFP_NOFS);
+ if (!req)
+ return ERR_PTR(-ENOMEM);
+
+ req->r_flags = flags;
+
+ /* calculate max write size */
+ r = calc_layout(layout, off, plen, &objnum, &objoff, &objlen);
+ if (r < 0) {
+ ceph_osdc_put_request(req);
+ return ERR_PTR(r);
+ }
+
+ object_size = le32_to_cpu(layout->fl_object_size);
+ object_base = off - objoff;
+ if (!(truncate_seq == 1 && truncate_size == -1ULL)) {
+ if (truncate_size <= object_base) {
+ truncate_size = 0;
+ } else {
+ truncate_size -= object_base;
+ if (truncate_size > object_size)
+ truncate_size = object_size;
+ }
+ }
+
+ osd_req_op_extent_init(req, 0, opcode, objoff, objlen,
+ truncate_size, truncate_seq);
+
+ /*
+ * A second op in the ops array means the caller wants to
+ * also issue a include a 'startsync' command so that the
+ * osd will flush data quickly.
+ */
+ if (num_ops > 1)
+ osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
+
+ req->r_base_oloc.pool = ceph_file_layout_pg_pool(*layout);
+
+ snprintf(req->r_base_oid.name, sizeof(req->r_base_oid.name),
+ "%llx.%08llx", vino.ino, objnum);
+ req->r_base_oid.name_len = strlen(req->r_base_oid.name);
+
+ return req;
+}
+EXPORT_SYMBOL(ceph_osdc_new_request);
+
+/*
+ * We keep osd requests in an rbtree, sorted by ->r_tid.
+ */
+static void __insert_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *new)
+{
+ struct rb_node **p = &osdc->requests.rb_node;
+ struct rb_node *parent = NULL;
+ struct ceph_osd_request *req = NULL;
+
+ while (*p) {
+ parent = *p;
+ req = rb_entry(parent, struct ceph_osd_request, r_node);
+ if (new->r_tid < req->r_tid)
+ p = &(*p)->rb_left;
+ else if (new->r_tid > req->r_tid)
+ p = &(*p)->rb_right;
+ else
+ BUG();
+ }
+
+ rb_link_node(&new->r_node, parent, p);
+ rb_insert_color(&new->r_node, &osdc->requests);
+}
+
+static struct ceph_osd_request *__lookup_request(struct ceph_osd_client *osdc,
+ u64 tid)
+{
+ struct ceph_osd_request *req;
+ struct rb_node *n = osdc->requests.rb_node;
+
+ while (n) {
+ req = rb_entry(n, struct ceph_osd_request, r_node);
+ if (tid < req->r_tid)
+ n = n->rb_left;
+ else if (tid > req->r_tid)
+ n = n->rb_right;
+ else
+ return req;
+ }
+ return NULL;
+}
+
+static struct ceph_osd_request *
+__lookup_request_ge(struct ceph_osd_client *osdc,
+ u64 tid)
+{
+ struct ceph_osd_request *req;
+ struct rb_node *n = osdc->requests.rb_node;
+
+ while (n) {
+ req = rb_entry(n, struct ceph_osd_request, r_node);
+ if (tid < req->r_tid) {
+ if (!n->rb_left)
+ return req;
+ n = n->rb_left;
+ } else if (tid > req->r_tid) {
+ n = n->rb_right;
+ } else {
+ return req;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * Resubmit requests pending on the given osd.
+ */
+static void __kick_osd_requests(struct ceph_osd_client *osdc,
+ struct ceph_osd *osd)
+{
+ struct ceph_osd_request *req, *nreq;
+ LIST_HEAD(resend);
+ int err;
+
+ dout("__kick_osd_requests osd%d\n", osd->o_osd);
+ err = __reset_osd(osdc, osd);
+ if (err)
+ return;
+ /*
+ * Build up a list of requests to resend by traversing the
+ * osd's list of requests. Requests for a given object are
+ * sent in tid order, and that is also the order they're
+ * kept on this list. Therefore all requests that are in
+ * flight will be found first, followed by all requests that
+ * have not yet been sent. And to resend requests while
+ * preserving this order we will want to put any sent
+ * requests back on the front of the osd client's unsent
+ * list.
+ *
+ * So we build a separate ordered list of already-sent
+ * requests for the affected osd and splice it onto the
+ * front of the osd client's unsent list. Once we've seen a
+ * request that has not yet been sent we're done. Those
+ * requests are already sitting right where they belong.
+ */
+ list_for_each_entry(req, &osd->o_requests, r_osd_item) {
+ if (!req->r_sent)
+ break;
+ list_move_tail(&req->r_req_lru_item, &resend);
+ dout("requeueing %p tid %llu osd%d\n", req, req->r_tid,
+ osd->o_osd);
+ if (!req->r_linger)
+ req->r_flags |= CEPH_OSD_FLAG_RETRY;
+ }
+ list_splice(&resend, &osdc->req_unsent);
+
+ /*
+ * Linger requests are re-registered before sending, which
+ * sets up a new tid for each. We add them to the unsent
+ * list at the end to keep things in tid order.
+ */
+ list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
+ r_linger_osd) {
+ /*
+ * reregister request prior to unregistering linger so
+ * that r_osd is preserved.
+ */
+ BUG_ON(!list_empty(&req->r_req_lru_item));
+ __register_request(osdc, req);
+ list_add_tail(&req->r_req_lru_item, &osdc->req_unsent);
+ list_add_tail(&req->r_osd_item, &req->r_osd->o_requests);
+ __unregister_linger_request(osdc, req);
+ dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid,
+ osd->o_osd);
+ }
+}
+
+/*
+ * If the osd connection drops, we need to resubmit all requests.
+ */
+static void osd_reset(struct ceph_connection *con)
+{
+ struct ceph_osd *osd = con->private;
+ struct ceph_osd_client *osdc;
+
+ if (!osd)
+ return;
+ dout("osd_reset osd%d\n", osd->o_osd);
+ osdc = osd->o_osdc;
+ down_read(&osdc->map_sem);
+ mutex_lock(&osdc->request_mutex);
+ __kick_osd_requests(osdc, osd);
+ __send_queued(osdc);
+ mutex_unlock(&osdc->request_mutex);
+ up_read(&osdc->map_sem);
+}
+
+/*
+ * Track open sessions with osds.
+ */
+static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
+{
+ struct ceph_osd *osd;
+
+ osd = kzalloc(sizeof(*osd), GFP_NOFS);
+ if (!osd)
+ return NULL;
+
+ atomic_set(&osd->o_ref, 1);
+ osd->o_osdc = osdc;
+ osd->o_osd = onum;
+ RB_CLEAR_NODE(&osd->o_node);
+ INIT_LIST_HEAD(&osd->o_requests);
+ INIT_LIST_HEAD(&osd->o_linger_requests);
+ INIT_LIST_HEAD(&osd->o_osd_lru);
+ osd->o_incarnation = 1;
+
+ ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr);
+
+ INIT_LIST_HEAD(&osd->o_keepalive_item);
+ return osd;
+}
+
+static struct ceph_osd *get_osd(struct ceph_osd *osd)
+{
+ if (atomic_inc_not_zero(&osd->o_ref)) {
+ dout("get_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref)-1,
+ atomic_read(&osd->o_ref));
+ return osd;
+ } else {
+ dout("get_osd %p FAIL\n", osd);
+ return NULL;
+ }
+}
+
+static void put_osd(struct ceph_osd *osd)
+{
+ dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref),
+ atomic_read(&osd->o_ref) - 1);
+ if (atomic_dec_and_test(&osd->o_ref) && osd->o_auth.authorizer) {
+ struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
+
+ ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer);
+ kfree(osd);
+ }
+}
+
+/*
+ * remove an osd from our map
+ */
+static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
+{
+ dout("__remove_osd %p\n", osd);
+ BUG_ON(!list_empty(&osd->o_requests));
+ rb_erase(&osd->o_node, &osdc->osds);
+ list_del_init(&osd->o_osd_lru);
+ ceph_con_close(&osd->o_con);
+ put_osd(osd);
+}
+
+static void remove_all_osds(struct ceph_osd_client *osdc)
+{
+ dout("%s %p\n", __func__, osdc);
+ mutex_lock(&osdc->request_mutex);
+ while (!RB_EMPTY_ROOT(&osdc->osds)) {
+ struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds),
+ struct ceph_osd, o_node);
+ __remove_osd(osdc, osd);
+ }
+ mutex_unlock(&osdc->request_mutex);
+}
+
+static void __move_osd_to_lru(struct ceph_osd_client *osdc,
+ struct ceph_osd *osd)
+{
+ dout("__move_osd_to_lru %p\n", osd);
+ BUG_ON(!list_empty(&osd->o_osd_lru));
+ list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
+ osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ;
+}
+
+static void __remove_osd_from_lru(struct ceph_osd *osd)
+{
+ dout("__remove_osd_from_lru %p\n", osd);
+ if (!list_empty(&osd->o_osd_lru))
+ list_del_init(&osd->o_osd_lru);
+}
+
+static void remove_old_osds(struct ceph_osd_client *osdc)
+{
+ struct ceph_osd *osd, *nosd;
+
+ dout("__remove_old_osds %p\n", osdc);
+ mutex_lock(&osdc->request_mutex);
+ list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) {
+ if (time_before(jiffies, osd->lru_ttl))
+ break;
+ __remove_osd(osdc, osd);
+ }
+ mutex_unlock(&osdc->request_mutex);
+}
+
+/*
+ * reset osd connect
+ */
+static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
+{
+ struct ceph_entity_addr *peer_addr;
+
+ dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
+ if (list_empty(&osd->o_requests) &&
+ list_empty(&osd->o_linger_requests)) {
+ __remove_osd(osdc, osd);
+
+ return -ENODEV;
+ }
+
+ peer_addr = &osdc->osdmap->osd_addr[osd->o_osd];
+ if (!memcmp(peer_addr, &osd->o_con.peer_addr, sizeof (*peer_addr)) &&
+ !ceph_con_opened(&osd->o_con)) {
+ struct ceph_osd_request *req;
+
+ dout("osd addr hasn't changed and connection never opened, "
+ "letting msgr retry\n");
+ /* touch each r_stamp for handle_timeout()'s benfit */
+ list_for_each_entry(req, &osd->o_requests, r_osd_item)
+ req->r_stamp = jiffies;
+
+ return -EAGAIN;
+ }
+
+ ceph_con_close(&osd->o_con);
+ ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, peer_addr);
+ osd->o_incarnation++;
+
+ return 0;
+}
+
+static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new)
+{
+ struct rb_node **p = &osdc->osds.rb_node;
+ struct rb_node *parent = NULL;
+ struct ceph_osd *osd = NULL;
+
+ dout("__insert_osd %p osd%d\n", new, new->o_osd);
+ while (*p) {
+ parent = *p;
+ osd = rb_entry(parent, struct ceph_osd, o_node);
+ if (new->o_osd < osd->o_osd)
+ p = &(*p)->rb_left;
+ else if (new->o_osd > osd->o_osd)
+ p = &(*p)->rb_right;
+ else
+ BUG();
+ }
+
+ rb_link_node(&new->o_node, parent, p);
+ rb_insert_color(&new->o_node, &osdc->osds);
+}
+
+static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o)
+{
+ struct ceph_osd *osd;
+ struct rb_node *n = osdc->osds.rb_node;
+
+ while (n) {
+ osd = rb_entry(n, struct ceph_osd, o_node);
+ if (o < osd->o_osd)
+ n = n->rb_left;
+ else if (o > osd->o_osd)
+ n = n->rb_right;
+ else
+ return osd;
+ }
+ return NULL;
+}
+
+static void __schedule_osd_timeout(struct ceph_osd_client *osdc)
+{
+ schedule_delayed_work(&osdc->timeout_work,
+ osdc->client->options->osd_keepalive_timeout * HZ);
+}
+
+static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
+{
+ cancel_delayed_work(&osdc->timeout_work);
+}
+
+/*
+ * Register request, assign tid. If this is the first request, set up
+ * the timeout event.
+ */
+static void __register_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req)
+{
+ req->r_tid = ++osdc->last_tid;
+ req->r_request->hdr.tid = cpu_to_le64(req->r_tid);
+ dout("__register_request %p tid %lld\n", req, req->r_tid);
+ __insert_request(osdc, req);
+ ceph_osdc_get_request(req);
+ osdc->num_requests++;
+ if (osdc->num_requests == 1) {
+ dout(" first request, scheduling timeout\n");
+ __schedule_osd_timeout(osdc);
+ }
+}
+
+/*
+ * called under osdc->request_mutex
+ */
+static void __unregister_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req)
+{
+ if (RB_EMPTY_NODE(&req->r_node)) {
+ dout("__unregister_request %p tid %lld not registered\n",
+ req, req->r_tid);
+ return;
+ }
+
+ dout("__unregister_request %p tid %lld\n", req, req->r_tid);
+ rb_erase(&req->r_node, &osdc->requests);
+ osdc->num_requests--;
+
+ if (req->r_osd) {
+ /* make sure the original request isn't in flight. */
+ ceph_msg_revoke(req->r_request);
+
+ list_del_init(&req->r_osd_item);
+ if (list_empty(&req->r_osd->o_requests) &&
+ list_empty(&req->r_osd->o_linger_requests)) {
+ dout("moving osd to %p lru\n", req->r_osd);
+ __move_osd_to_lru(osdc, req->r_osd);
+ }
+ if (list_empty(&req->r_linger_item))
+ req->r_osd = NULL;
+ }
+
+ list_del_init(&req->r_req_lru_item);
+ ceph_osdc_put_request(req);
+
+ if (osdc->num_requests == 0) {
+ dout(" no requests, canceling timeout\n");
+ __cancel_osd_timeout(osdc);
+ }
+}
+
+/*
+ * Cancel a previously queued request message
+ */
+static void __cancel_request(struct ceph_osd_request *req)
+{
+ if (req->r_sent && req->r_osd) {
+ ceph_msg_revoke(req->r_request);
+ req->r_sent = 0;
+ }
+}
+
+static void __register_linger_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req)
+{
+ dout("__register_linger_request %p\n", req);
+ ceph_osdc_get_request(req);
+ list_add_tail(&req->r_linger_item, &osdc->req_linger);
+ if (req->r_osd)
+ list_add_tail(&req->r_linger_osd,
+ &req->r_osd->o_linger_requests);
+}
+
+static void __unregister_linger_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req)
+{
+ dout("__unregister_linger_request %p\n", req);
+ list_del_init(&req->r_linger_item);
+ if (req->r_osd) {
+ list_del_init(&req->r_linger_osd);
+
+ if (list_empty(&req->r_osd->o_requests) &&
+ list_empty(&req->r_osd->o_linger_requests)) {
+ dout("moving osd to %p lru\n", req->r_osd);
+ __move_osd_to_lru(osdc, req->r_osd);
+ }
+ if (list_empty(&req->r_osd_item))
+ req->r_osd = NULL;
+ }
+ ceph_osdc_put_request(req);
+}
+
+void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req)
+{
+ mutex_lock(&osdc->request_mutex);
+ if (req->r_linger) {
+ req->r_linger = 0;
+ __unregister_linger_request(osdc, req);
+ }
+ mutex_unlock(&osdc->request_mutex);
+}
+EXPORT_SYMBOL(ceph_osdc_unregister_linger_request);
+
+void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req)
+{
+ if (!req->r_linger) {
+ dout("set_request_linger %p\n", req);
+ req->r_linger = 1;
+ }
+}
+EXPORT_SYMBOL(ceph_osdc_set_request_linger);
+
+/*
+ * Returns whether a request should be blocked from being sent
+ * based on the current osdmap and osd_client settings.
+ *
+ * Caller should hold map_sem for read.
+ */
+static bool __req_should_be_paused(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req)
+{
+ bool pauserd = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD);
+ bool pausewr = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR) ||
+ ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL);
+ return (req->r_flags & CEPH_OSD_FLAG_READ && pauserd) ||
+ (req->r_flags & CEPH_OSD_FLAG_WRITE && pausewr);
+}
+
+/*
+ * Calculate mapping of a request to a PG. Takes tiering into account.
+ */
+static int __calc_request_pg(struct ceph_osdmap *osdmap,
+ struct ceph_osd_request *req,
+ struct ceph_pg *pg_out)
+{
+ bool need_check_tiering;
+
+ need_check_tiering = false;
+ if (req->r_target_oloc.pool == -1) {
+ req->r_target_oloc = req->r_base_oloc; /* struct */
+ need_check_tiering = true;
+ }
+ if (req->r_target_oid.name_len == 0) {
+ ceph_oid_copy(&req->r_target_oid, &req->r_base_oid);
+ need_check_tiering = true;
+ }
+
+ if (need_check_tiering &&
+ (req->r_flags & CEPH_OSD_FLAG_IGNORE_OVERLAY) == 0) {
+ struct ceph_pg_pool_info *pi;
+
+ pi = ceph_pg_pool_by_id(osdmap, req->r_target_oloc.pool);
+ if (pi) {
+ if ((req->r_flags & CEPH_OSD_FLAG_READ) &&
+ pi->read_tier >= 0)
+ req->r_target_oloc.pool = pi->read_tier;
+ if ((req->r_flags & CEPH_OSD_FLAG_WRITE) &&
+ pi->write_tier >= 0)
+ req->r_target_oloc.pool = pi->write_tier;
+ }
+ /* !pi is caught in ceph_oloc_oid_to_pg() */
+ }
+
+ return ceph_oloc_oid_to_pg(osdmap, &req->r_target_oloc,
+ &req->r_target_oid, pg_out);
+}
+
+/*
+ * Pick an osd (the first 'up' osd in the pg), allocate the osd struct
+ * (as needed), and set the request r_osd appropriately. If there is
+ * no up osd, set r_osd to NULL. Move the request to the appropriate list
+ * (unsent, homeless) or leave on in-flight lru.
+ *
+ * Return 0 if unchanged, 1 if changed, or negative on error.
+ *
+ * Caller should hold map_sem for read and request_mutex.
+ */
+static int __map_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req, int force_resend)
+{
+ struct ceph_pg pgid;
+ int acting[CEPH_PG_MAX_SIZE];
+ int num, o;
+ int err;
+ bool was_paused;
+
+ dout("map_request %p tid %lld\n", req, req->r_tid);
+
+ err = __calc_request_pg(osdc->osdmap, req, &pgid);
+ if (err) {
+ list_move(&req->r_req_lru_item, &osdc->req_notarget);
+ return err;
+ }
+ req->r_pgid = pgid;
+
+ num = ceph_calc_pg_acting(osdc->osdmap, pgid, acting, &o);
+ if (num < 0)
+ num = 0;
+
+ was_paused = req->r_paused;
+ req->r_paused = __req_should_be_paused(osdc, req);
+ if (was_paused && !req->r_paused)
+ force_resend = 1;
+
+ if ((!force_resend &&
+ req->r_osd && req->r_osd->o_osd == o &&
+ req->r_sent >= req->r_osd->o_incarnation &&
+ req->r_num_pg_osds == num &&
+ memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) ||
+ (req->r_osd == NULL && o == -1) ||
+ req->r_paused)
+ return 0; /* no change */
+
+ dout("map_request tid %llu pgid %lld.%x osd%d (was osd%d)\n",
+ req->r_tid, pgid.pool, pgid.seed, o,
+ req->r_osd ? req->r_osd->o_osd : -1);
+
+ /* record full pg acting set */
+ memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num);
+ req->r_num_pg_osds = num;
+
+ if (req->r_osd) {
+ __cancel_request(req);
+ list_del_init(&req->r_osd_item);
+ req->r_osd = NULL;
+ }
+
+ req->r_osd = __lookup_osd(osdc, o);
+ if (!req->r_osd && o >= 0) {
+ err = -ENOMEM;
+ req->r_osd = create_osd(osdc, o);
+ if (!req->r_osd) {
+ list_move(&req->r_req_lru_item, &osdc->req_notarget);
+ goto out;
+ }
+
+ dout("map_request osd %p is osd%d\n", req->r_osd, o);
+ __insert_osd(osdc, req->r_osd);
+
+ ceph_con_open(&req->r_osd->o_con,
+ CEPH_ENTITY_TYPE_OSD, o,
+ &osdc->osdmap->osd_addr[o]);
+ }
+
+ if (req->r_osd) {
+ __remove_osd_from_lru(req->r_osd);
+ list_add_tail(&req->r_osd_item, &req->r_osd->o_requests);
+ list_move_tail(&req->r_req_lru_item, &osdc->req_unsent);
+ } else {
+ list_move_tail(&req->r_req_lru_item, &osdc->req_notarget);
+ }
+ err = 1; /* osd or pg changed */
+
+out:
+ return err;
+}
+
+/*
+ * caller should hold map_sem (for read) and request_mutex
+ */
+static void __send_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req)
+{
+ void *p;
+
+ dout("send_request %p tid %llu to osd%d flags %d pg %lld.%x\n",
+ req, req->r_tid, req->r_osd->o_osd, req->r_flags,
+ (unsigned long long)req->r_pgid.pool, req->r_pgid.seed);
+
+ /* fill in message content that changes each time we send it */
+ put_unaligned_le32(osdc->osdmap->epoch, req->r_request_osdmap_epoch);
+ put_unaligned_le32(req->r_flags, req->r_request_flags);
+ put_unaligned_le64(req->r_target_oloc.pool, req->r_request_pool);
+ p = req->r_request_pgid;
+ ceph_encode_64(&p, req->r_pgid.pool);
+ ceph_encode_32(&p, req->r_pgid.seed);
+ put_unaligned_le64(1, req->r_request_attempts); /* FIXME */
+ memcpy(req->r_request_reassert_version, &req->r_reassert_version,
+ sizeof(req->r_reassert_version));
+
+ req->r_stamp = jiffies;
+ list_move_tail(&req->r_req_lru_item, &osdc->req_lru);
+
+ ceph_msg_get(req->r_request); /* send consumes a ref */
+
+ req->r_sent = req->r_osd->o_incarnation;
+
+ ceph_con_send(&req->r_osd->o_con, req->r_request);
+}
+
+/*
+ * Send any requests in the queue (req_unsent).
+ */
+static void __send_queued(struct ceph_osd_client *osdc)
+{
+ struct ceph_osd_request *req, *tmp;
+
+ dout("__send_queued\n");
+ list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item)
+ __send_request(osdc, req);
+}
+
+/*
+ * Caller should hold map_sem for read and request_mutex.
+ */
+static int __ceph_osdc_start_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req,
+ bool nofail)
+{
+ int rc;
+
+ __register_request(osdc, req);
+ req->r_sent = 0;
+ req->r_got_reply = 0;
+ rc = __map_request(osdc, req, 0);
+ if (rc < 0) {
+ if (nofail) {
+ dout("osdc_start_request failed map, "
+ " will retry %lld\n", req->r_tid);
+ rc = 0;
+ } else {
+ __unregister_request(osdc, req);
+ }
+ return rc;
+ }
+
+ if (req->r_osd == NULL) {
+ dout("send_request %p no up osds in pg\n", req);
+ ceph_monc_request_next_osdmap(&osdc->client->monc);
+ } else {
+ __send_queued(osdc);
+ }
+
+ return 0;
+}
+
+/*
+ * Timeout callback, called every N seconds when 1 or more osd
+ * requests has been active for more than N seconds. When this
+ * happens, we ping all OSDs with requests who have timed out to
+ * ensure any communications channel reset is detected. Reset the
+ * request timeouts another N seconds in the future as we go.
+ * Reschedule the timeout event another N seconds in future (unless
+ * there are no open requests).
+ */
+static void handle_timeout(struct work_struct *work)
+{
+ struct ceph_osd_client *osdc =
+ container_of(work, struct ceph_osd_client, timeout_work.work);
+ struct ceph_osd_request *req;
+ struct ceph_osd *osd;
+ unsigned long keepalive =
+ osdc->client->options->osd_keepalive_timeout * HZ;
+ struct list_head slow_osds;
+ dout("timeout\n");
+ down_read(&osdc->map_sem);
+
+ ceph_monc_request_next_osdmap(&osdc->client->monc);
+
+ mutex_lock(&osdc->request_mutex);
+
+ /*
+ * ping osds that are a bit slow. this ensures that if there
+ * is a break in the TCP connection we will notice, and reopen
+ * a connection with that osd (from the fault callback).
+ */
+ INIT_LIST_HEAD(&slow_osds);
+ list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) {
+ if (time_before(jiffies, req->r_stamp + keepalive))
+ break;
+
+ osd = req->r_osd;
+ BUG_ON(!osd);
+ dout(" tid %llu is slow, will send keepalive on osd%d\n",
+ req->r_tid, osd->o_osd);
+ list_move_tail(&osd->o_keepalive_item, &slow_osds);
+ }
+ while (!list_empty(&slow_osds)) {
+ osd = list_entry(slow_osds.next, struct ceph_osd,
+ o_keepalive_item);
+ list_del_init(&osd->o_keepalive_item);
+ ceph_con_keepalive(&osd->o_con);
+ }
+
+ __schedule_osd_timeout(osdc);
+ __send_queued(osdc);
+ mutex_unlock(&osdc->request_mutex);
+ up_read(&osdc->map_sem);
+}
+
+static void handle_osds_timeout(struct work_struct *work)
+{
+ struct ceph_osd_client *osdc =
+ container_of(work, struct ceph_osd_client,
+ osds_timeout_work.work);
+ unsigned long delay =
+ osdc->client->options->osd_idle_ttl * HZ >> 2;
+
+ dout("osds timeout\n");
+ down_read(&osdc->map_sem);
+ remove_old_osds(osdc);
+ up_read(&osdc->map_sem);
+
+ schedule_delayed_work(&osdc->osds_timeout_work,
+ round_jiffies_relative(delay));
+}
+
+static int ceph_oloc_decode(void **p, void *end,
+ struct ceph_object_locator *oloc)
+{
+ u8 struct_v, struct_cv;
+ u32 len;
+ void *struct_end;
+ int ret = 0;
+
+ ceph_decode_need(p, end, 1 + 1 + 4, e_inval);
+ struct_v = ceph_decode_8(p);
+ struct_cv = ceph_decode_8(p);
+ if (struct_v < 3) {
+ pr_warn("got v %d < 3 cv %d of ceph_object_locator\n",
+ struct_v, struct_cv);
+ goto e_inval;
+ }
+ if (struct_cv > 6) {
+ pr_warn("got v %d cv %d > 6 of ceph_object_locator\n",
+ struct_v, struct_cv);
+ goto e_inval;
+ }
+ len = ceph_decode_32(p);
+ ceph_decode_need(p, end, len, e_inval);
+ struct_end = *p + len;
+
+ oloc->pool = ceph_decode_64(p);
+ *p += 4; /* skip preferred */
+
+ len = ceph_decode_32(p);
+ if (len > 0) {
+ pr_warn("ceph_object_locator::key is set\n");
+ goto e_inval;
+ }
+
+ if (struct_v >= 5) {
+ len = ceph_decode_32(p);
+ if (len > 0) {
+ pr_warn("ceph_object_locator::nspace is set\n");
+ goto e_inval;
+ }
+ }
+
+ if (struct_v >= 6) {
+ s64 hash = ceph_decode_64(p);
+ if (hash != -1) {
+ pr_warn("ceph_object_locator::hash is set\n");
+ goto e_inval;
+ }
+ }
+
+ /* skip the rest */
+ *p = struct_end;
+out:
+ return ret;
+
+e_inval:
+ ret = -EINVAL;
+ goto out;
+}
+
+static int ceph_redirect_decode(void **p, void *end,
+ struct ceph_request_redirect *redir)
+{
+ u8 struct_v, struct_cv;
+ u32 len;
+ void *struct_end;
+ int ret;
+
+ ceph_decode_need(p, end, 1 + 1 + 4, e_inval);
+ struct_v = ceph_decode_8(p);
+ struct_cv = ceph_decode_8(p);
+ if (struct_cv > 1) {
+ pr_warn("got v %d cv %d > 1 of ceph_request_redirect\n",
+ struct_v, struct_cv);
+ goto e_inval;
+ }
+ len = ceph_decode_32(p);
+ ceph_decode_need(p, end, len, e_inval);
+ struct_end = *p + len;
+
+ ret = ceph_oloc_decode(p, end, &redir->oloc);
+ if (ret)
+ goto out;
+
+ len = ceph_decode_32(p);
+ if (len > 0) {
+ pr_warn("ceph_request_redirect::object_name is set\n");
+ goto e_inval;
+ }
+
+ len = ceph_decode_32(p);
+ *p += len; /* skip osd_instructions */
+
+ /* skip the rest */
+ *p = struct_end;
+out:
+ return ret;
+
+e_inval:
+ ret = -EINVAL;
+ goto out;
+}
+
+static void complete_request(struct ceph_osd_request *req)
+{
+ complete_all(&req->r_safe_completion); /* fsync waiter */
+}
+
+/*
+ * handle osd op reply. either call the callback if it is specified,
+ * or do the completion to wake up the waiting thread.
+ */
+static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
+ struct ceph_connection *con)
+{
+ void *p, *end;
+ struct ceph_osd_request *req;
+ struct ceph_request_redirect redir;
+ u64 tid;
+ int object_len;
+ unsigned int numops;
+ int payload_len, flags;
+ s32 result;
+ s32 retry_attempt;
+ struct ceph_pg pg;
+ int err;
+ u32 reassert_epoch;
+ u64 reassert_version;
+ u32 osdmap_epoch;
+ int already_completed;
+ u32 bytes;
+ unsigned int i;
+
+ tid = le64_to_cpu(msg->hdr.tid);
+ dout("handle_reply %p tid %llu\n", msg, tid);
+
+ p = msg->front.iov_base;
+ end = p + msg->front.iov_len;
+
+ ceph_decode_need(&p, end, 4, bad);
+ object_len = ceph_decode_32(&p);
+ ceph_decode_need(&p, end, object_len, bad);
+ p += object_len;
+
+ err = ceph_decode_pgid(&p, end, &pg);
+ if (err)
+ goto bad;
+
+ ceph_decode_need(&p, end, 8 + 4 + 4 + 8 + 4, bad);
+ flags = ceph_decode_64(&p);
+ result = ceph_decode_32(&p);
+ reassert_epoch = ceph_decode_32(&p);
+ reassert_version = ceph_decode_64(&p);
+ osdmap_epoch = ceph_decode_32(&p);
+
+ /* lookup */
+ down_read(&osdc->map_sem);
+ mutex_lock(&osdc->request_mutex);
+ req = __lookup_request(osdc, tid);
+ if (req == NULL) {
+ dout("handle_reply tid %llu dne\n", tid);
+ goto bad_mutex;
+ }
+ ceph_osdc_get_request(req);
+
+ dout("handle_reply %p tid %llu req %p result %d\n", msg, tid,
+ req, result);
+
+ ceph_decode_need(&p, end, 4, bad_put);
+ numops = ceph_decode_32(&p);
+ if (numops > CEPH_OSD_MAX_OP)
+ goto bad_put;
+ if (numops != req->r_num_ops)
+ goto bad_put;
+ payload_len = 0;
+ ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad_put);
+ for (i = 0; i < numops; i++) {
+ struct ceph_osd_op *op = p;
+ int len;
+
+ len = le32_to_cpu(op->payload_len);
+ req->r_reply_op_len[i] = len;
+ dout(" op %d has %d bytes\n", i, len);
+ payload_len += len;
+ p += sizeof(*op);
+ }
+ bytes = le32_to_cpu(msg->hdr.data_len);
+ if (payload_len != bytes) {
+ pr_warning("sum of op payload lens %d != data_len %d",
+ payload_len, bytes);
+ goto bad_put;
+ }
+
+ ceph_decode_need(&p, end, 4 + numops * 4, bad_put);
+ retry_attempt = ceph_decode_32(&p);
+ for (i = 0; i < numops; i++)
+ req->r_reply_op_result[i] = ceph_decode_32(&p);
+
+ if (le16_to_cpu(msg->hdr.version) >= 6) {
+ p += 8 + 4; /* skip replay_version */
+ p += 8; /* skip user_version */
+
+ err = ceph_redirect_decode(&p, end, &redir);
+ if (err)
+ goto bad_put;
+ } else {
+ redir.oloc.pool = -1;
+ }
+
+ if (redir.oloc.pool != -1) {
+ dout("redirect pool %lld\n", redir.oloc.pool);
+
+ __unregister_request(osdc, req);
+
+ req->r_target_oloc = redir.oloc; /* struct */
+
+ /*
+ * Start redirect requests with nofail=true. If
+ * mapping fails, request will end up on the notarget
+ * list, waiting for the new osdmap (which can take
+ * a while), even though the original request mapped
+ * successfully. In the future we might want to follow
+ * original request's nofail setting here.
+ */
+ err = __ceph_osdc_start_request(osdc, req, true);
+ BUG_ON(err);
+
+ goto out_unlock;
+ }
+
+ already_completed = req->r_got_reply;
+ if (!req->r_got_reply) {
+ req->r_result = result;
+ dout("handle_reply result %d bytes %d\n", req->r_result,
+ bytes);
+ if (req->r_result == 0)
+ req->r_result = bytes;
+
+ /* in case this is a write and we need to replay, */
+ req->r_reassert_version.epoch = cpu_to_le32(reassert_epoch);
+ req->r_reassert_version.version = cpu_to_le64(reassert_version);
+
+ req->r_got_reply = 1;
+ } else if ((flags & CEPH_OSD_FLAG_ONDISK) == 0) {
+ dout("handle_reply tid %llu dup ack\n", tid);
+ goto out_unlock;
+ }
+
+ dout("handle_reply tid %llu flags %d\n", tid, flags);
+
+ if (req->r_linger && (flags & CEPH_OSD_FLAG_ONDISK))
+ __register_linger_request(osdc, req);
+
+ /* either this is a read, or we got the safe response */
+ if (result < 0 ||
+ (flags & CEPH_OSD_FLAG_ONDISK) ||
+ ((flags & CEPH_OSD_FLAG_WRITE) == 0))
+ __unregister_request(osdc, req);
+
+ mutex_unlock(&osdc->request_mutex);
+ up_read(&osdc->map_sem);
+
+ if (!already_completed) {
+ if (req->r_unsafe_callback &&
+ result >= 0 && !(flags & CEPH_OSD_FLAG_ONDISK))
+ req->r_unsafe_callback(req, true);
+ if (req->r_callback)
+ req->r_callback(req, msg);
+ else
+ complete_all(&req->r_completion);
+ }
+
+ if (flags & CEPH_OSD_FLAG_ONDISK) {
+ if (req->r_unsafe_callback && already_completed)
+ req->r_unsafe_callback(req, false);
+ complete_request(req);
+ }
+
+out:
+ dout("req=%p req->r_linger=%d\n", req, req->r_linger);
+ ceph_osdc_put_request(req);
+ return;
+out_unlock:
+ mutex_unlock(&osdc->request_mutex);
+ up_read(&osdc->map_sem);
+ goto out;
+
+bad_put:
+ req->r_result = -EIO;
+ __unregister_request(osdc, req);
+ if (req->r_callback)
+ req->r_callback(req, msg);
+ else
+ complete_all(&req->r_completion);
+ complete_request(req);
+ ceph_osdc_put_request(req);
+bad_mutex:
+ mutex_unlock(&osdc->request_mutex);
+ up_read(&osdc->map_sem);
+bad:
+ pr_err("corrupt osd_op_reply got %d %d\n",
+ (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len));
+ ceph_msg_dump(msg);
+}
+
+static void reset_changed_osds(struct ceph_osd_client *osdc)
+{
+ struct rb_node *p, *n;
+
+ for (p = rb_first(&osdc->osds); p; p = n) {
+ struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node);
+
+ n = rb_next(p);
+ if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) ||
+ memcmp(&osd->o_con.peer_addr,
+ ceph_osd_addr(osdc->osdmap,
+ osd->o_osd),
+ sizeof(struct ceph_entity_addr)) != 0)
+ __reset_osd(osdc, osd);
+ }
+}
+
+/*
+ * Requeue requests whose mapping to an OSD has changed. If requests map to
+ * no osd, request a new map.
+ *
+ * Caller should hold map_sem for read.
+ */
+static void kick_requests(struct ceph_osd_client *osdc, bool force_resend,
+ bool force_resend_writes)
+{
+ struct ceph_osd_request *req, *nreq;
+ struct rb_node *p;
+ int needmap = 0;
+ int err;
+ bool force_resend_req;
+
+ dout("kick_requests %s %s\n", force_resend ? " (force resend)" : "",
+ force_resend_writes ? " (force resend writes)" : "");
+ mutex_lock(&osdc->request_mutex);
+ for (p = rb_first(&osdc->requests); p; ) {
+ req = rb_entry(p, struct ceph_osd_request, r_node);
+ p = rb_next(p);
+
+ /*
+ * For linger requests that have not yet been
+ * registered, move them to the linger list; they'll
+ * be sent to the osd in the loop below. Unregister
+ * the request before re-registering it as a linger
+ * request to ensure the __map_request() below
+ * will decide it needs to be sent.
+ */
+ if (req->r_linger && list_empty(&req->r_linger_item)) {
+ dout("%p tid %llu restart on osd%d\n",
+ req, req->r_tid,
+ req->r_osd ? req->r_osd->o_osd : -1);
+ ceph_osdc_get_request(req);
+ __unregister_request(osdc, req);
+ __register_linger_request(osdc, req);
+ ceph_osdc_put_request(req);
+ continue;
+ }
+
+ force_resend_req = force_resend ||
+ (force_resend_writes &&
+ req->r_flags & CEPH_OSD_FLAG_WRITE);
+ err = __map_request(osdc, req, force_resend_req);
+ if (err < 0)
+ continue; /* error */
+ if (req->r_osd == NULL) {
+ dout("%p tid %llu maps to no osd\n", req, req->r_tid);
+ needmap++; /* request a newer map */
+ } else if (err > 0) {
+ if (!req->r_linger) {
+ dout("%p tid %llu requeued on osd%d\n", req,
+ req->r_tid,
+ req->r_osd ? req->r_osd->o_osd : -1);
+ req->r_flags |= CEPH_OSD_FLAG_RETRY;
+ }
+ }
+ }
+
+ list_for_each_entry_safe(req, nreq, &osdc->req_linger,
+ r_linger_item) {
+ dout("linger req=%p req->r_osd=%p\n", req, req->r_osd);
+
+ err = __map_request(osdc, req,
+ force_resend || force_resend_writes);
+ dout("__map_request returned %d\n", err);
+ if (err == 0)
+ continue; /* no change and no osd was specified */
+ if (err < 0)
+ continue; /* hrm! */
+ if (req->r_osd == NULL) {
+ dout("tid %llu maps to no valid osd\n", req->r_tid);
+ needmap++; /* request a newer map */
+ continue;
+ }
+
+ dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid,
+ req->r_osd ? req->r_osd->o_osd : -1);
+ __register_request(osdc, req);
+ __unregister_linger_request(osdc, req);
+ }
+ reset_changed_osds(osdc);
+ mutex_unlock(&osdc->request_mutex);
+
+ if (needmap) {
+ dout("%d requests for down osds, need new map\n", needmap);
+ ceph_monc_request_next_osdmap(&osdc->client->monc);
+ }
+}
+
+
+/*
+ * Process updated osd map.
+ *
+ * The message contains any number of incremental and full maps, normally
+ * indicating some sort of topology change in the cluster. Kick requests
+ * off to different OSDs as needed.
+ */
+void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
+{
+ void *p, *end, *next;
+ u32 nr_maps, maplen;
+ u32 epoch;
+ struct ceph_osdmap *newmap = NULL, *oldmap;
+ int err;
+ struct ceph_fsid fsid;
+ bool was_full;
+
+ dout("handle_map have %u\n", osdc->osdmap ? osdc->osdmap->epoch : 0);
+ p = msg->front.iov_base;
+ end = p + msg->front.iov_len;
+
+ /* verify fsid */
+ ceph_decode_need(&p, end, sizeof(fsid), bad);
+ ceph_decode_copy(&p, &fsid, sizeof(fsid));
+ if (ceph_check_fsid(osdc->client, &fsid) < 0)
+ return;
+
+ down_write(&osdc->map_sem);
+
+ was_full = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL);
+
+ /* incremental maps */
+ ceph_decode_32_safe(&p, end, nr_maps, bad);
+ dout(" %d inc maps\n", nr_maps);
+ while (nr_maps > 0) {
+ ceph_decode_need(&p, end, 2*sizeof(u32), bad);
+ epoch = ceph_decode_32(&p);
+ maplen = ceph_decode_32(&p);
+ ceph_decode_need(&p, end, maplen, bad);
+ next = p + maplen;
+ if (osdc->osdmap && osdc->osdmap->epoch+1 == epoch) {
+ dout("applying incremental map %u len %d\n",
+ epoch, maplen);
+ newmap = osdmap_apply_incremental(&p, next,
+ osdc->osdmap,
+ &osdc->client->msgr);
+ if (IS_ERR(newmap)) {
+ err = PTR_ERR(newmap);
+ goto bad;
+ }
+ BUG_ON(!newmap);
+ if (newmap != osdc->osdmap) {
+ ceph_osdmap_destroy(osdc->osdmap);
+ osdc->osdmap = newmap;
+ }
+ was_full = was_full ||
+ ceph_osdmap_flag(osdc->osdmap,
+ CEPH_OSDMAP_FULL);
+ kick_requests(osdc, 0, was_full);
+ } else {
+ dout("ignoring incremental map %u len %d\n",
+ epoch, maplen);
+ }
+ p = next;
+ nr_maps--;
+ }
+ if (newmap)
+ goto done;
+
+ /* full maps */
+ ceph_decode_32_safe(&p, end, nr_maps, bad);
+ dout(" %d full maps\n", nr_maps);
+ while (nr_maps) {
+ ceph_decode_need(&p, end, 2*sizeof(u32), bad);
+ epoch = ceph_decode_32(&p);
+ maplen = ceph_decode_32(&p);
+ ceph_decode_need(&p, end, maplen, bad);
+ if (nr_maps > 1) {
+ dout("skipping non-latest full map %u len %d\n",
+ epoch, maplen);
+ } else if (osdc->osdmap && osdc->osdmap->epoch >= epoch) {
+ dout("skipping full map %u len %d, "
+ "older than our %u\n", epoch, maplen,
+ osdc->osdmap->epoch);
+ } else {
+ int skipped_map = 0;
+
+ dout("taking full map %u len %d\n", epoch, maplen);
+ newmap = ceph_osdmap_decode(&p, p+maplen);
+ if (IS_ERR(newmap)) {
+ err = PTR_ERR(newmap);
+ goto bad;
+ }
+ BUG_ON(!newmap);
+ oldmap = osdc->osdmap;
+ osdc->osdmap = newmap;
+ if (oldmap) {
+ if (oldmap->epoch + 1 < newmap->epoch)
+ skipped_map = 1;
+ ceph_osdmap_destroy(oldmap);
+ }
+ was_full = was_full ||
+ ceph_osdmap_flag(osdc->osdmap,
+ CEPH_OSDMAP_FULL);
+ kick_requests(osdc, skipped_map, was_full);
+ }
+ p += maplen;
+ nr_maps--;
+ }
+
+ if (!osdc->osdmap)
+ goto bad;
+done:
+ downgrade_write(&osdc->map_sem);
+ ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
+
+ /*
+ * subscribe to subsequent osdmap updates if full to ensure
+ * we find out when we are no longer full and stop returning
+ * ENOSPC.
+ */
+ if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) ||
+ ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD) ||
+ ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR))
+ ceph_monc_request_next_osdmap(&osdc->client->monc);
+
+ mutex_lock(&osdc->request_mutex);
+ __send_queued(osdc);
+ mutex_unlock(&osdc->request_mutex);
+ up_read(&osdc->map_sem);
+ wake_up_all(&osdc->client->auth_wq);
+ return;
+
+bad:
+ pr_err("osdc handle_map corrupt msg\n");
+ ceph_msg_dump(msg);
+ up_write(&osdc->map_sem);
+ return;
+}
+
+/*
+ * watch/notify callback event infrastructure
+ *
+ * These callbacks are used both for watch and notify operations.
+ */
+static void __release_event(struct kref *kref)
+{
+ struct ceph_osd_event *event =
+ container_of(kref, struct ceph_osd_event, kref);
+
+ dout("__release_event %p\n", event);
+ kfree(event);
+}
+
+static void get_event(struct ceph_osd_event *event)
+{
+ kref_get(&event->kref);
+}
+
+void ceph_osdc_put_event(struct ceph_osd_event *event)
+{
+ kref_put(&event->kref, __release_event);
+}
+EXPORT_SYMBOL(ceph_osdc_put_event);
+
+static void __insert_event(struct ceph_osd_client *osdc,
+ struct ceph_osd_event *new)
+{
+ struct rb_node **p = &osdc->event_tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct ceph_osd_event *event = NULL;
+
+ while (*p) {
+ parent = *p;
+ event = rb_entry(parent, struct ceph_osd_event, node);
+ if (new->cookie < event->cookie)
+ p = &(*p)->rb_left;
+ else if (new->cookie > event->cookie)
+ p = &(*p)->rb_right;
+ else
+ BUG();
+ }
+
+ rb_link_node(&new->node, parent, p);
+ rb_insert_color(&new->node, &osdc->event_tree);
+}
+
+static struct ceph_osd_event *__find_event(struct ceph_osd_client *osdc,
+ u64 cookie)
+{
+ struct rb_node **p = &osdc->event_tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct ceph_osd_event *event = NULL;
+
+ while (*p) {
+ parent = *p;
+ event = rb_entry(parent, struct ceph_osd_event, node);
+ if (cookie < event->cookie)
+ p = &(*p)->rb_left;
+ else if (cookie > event->cookie)
+ p = &(*p)->rb_right;
+ else
+ return event;
+ }
+ return NULL;
+}
+
+static void __remove_event(struct ceph_osd_event *event)
+{
+ struct ceph_osd_client *osdc = event->osdc;
+
+ if (!RB_EMPTY_NODE(&event->node)) {
+ dout("__remove_event removed %p\n", event);
+ rb_erase(&event->node, &osdc->event_tree);
+ ceph_osdc_put_event(event);
+ } else {
+ dout("__remove_event didn't remove %p\n", event);
+ }
+}
+
+int ceph_osdc_create_event(struct ceph_osd_client *osdc,
+ void (*event_cb)(u64, u64, u8, void *),
+ void *data, struct ceph_osd_event **pevent)
+{
+ struct ceph_osd_event *event;
+
+ event = kmalloc(sizeof(*event), GFP_NOIO);
+ if (!event)
+ return -ENOMEM;
+
+ dout("create_event %p\n", event);
+ event->cb = event_cb;
+ event->one_shot = 0;
+ event->data = data;
+ event->osdc = osdc;
+ INIT_LIST_HEAD(&event->osd_node);
+ RB_CLEAR_NODE(&event->node);
+ kref_init(&event->kref); /* one ref for us */
+ kref_get(&event->kref); /* one ref for the caller */
+
+ spin_lock(&osdc->event_lock);
+ event->cookie = ++osdc->event_count;
+ __insert_event(osdc, event);
+ spin_unlock(&osdc->event_lock);
+
+ *pevent = event;
+ return 0;
+}
+EXPORT_SYMBOL(ceph_osdc_create_event);
+
+void ceph_osdc_cancel_event(struct ceph_osd_event *event)
+{
+ struct ceph_osd_client *osdc = event->osdc;
+
+ dout("cancel_event %p\n", event);
+ spin_lock(&osdc->event_lock);
+ __remove_event(event);
+ spin_unlock(&osdc->event_lock);
+ ceph_osdc_put_event(event); /* caller's */
+}
+EXPORT_SYMBOL(ceph_osdc_cancel_event);
+
+
+static void do_event_work(struct work_struct *work)
+{
+ struct ceph_osd_event_work *event_work =
+ container_of(work, struct ceph_osd_event_work, work);
+ struct ceph_osd_event *event = event_work->event;
+ u64 ver = event_work->ver;
+ u64 notify_id = event_work->notify_id;
+ u8 opcode = event_work->opcode;
+
+ dout("do_event_work completing %p\n", event);
+ event->cb(ver, notify_id, opcode, event->data);
+ dout("do_event_work completed %p\n", event);
+ ceph_osdc_put_event(event);
+ kfree(event_work);
+}
+
+
+/*
+ * Process osd watch notifications
+ */
+static void handle_watch_notify(struct ceph_osd_client *osdc,
+ struct ceph_msg *msg)
+{
+ void *p, *end;
+ u8 proto_ver;
+ u64 cookie, ver, notify_id;
+ u8 opcode;
+ struct ceph_osd_event *event;
+ struct ceph_osd_event_work *event_work;
+
+ p = msg->front.iov_base;
+ end = p + msg->front.iov_len;
+
+ ceph_decode_8_safe(&p, end, proto_ver, bad);
+ ceph_decode_8_safe(&p, end, opcode, bad);
+ ceph_decode_64_safe(&p, end, cookie, bad);
+ ceph_decode_64_safe(&p, end, ver, bad);
+ ceph_decode_64_safe(&p, end, notify_id, bad);
+
+ spin_lock(&osdc->event_lock);
+ event = __find_event(osdc, cookie);
+ if (event) {
+ BUG_ON(event->one_shot);
+ get_event(event);
+ }
+ spin_unlock(&osdc->event_lock);
+ dout("handle_watch_notify cookie %lld ver %lld event %p\n",
+ cookie, ver, event);
+ if (event) {
+ event_work = kmalloc(sizeof(*event_work), GFP_NOIO);
+ if (!event_work) {
+ dout("ERROR: could not allocate event_work\n");
+ goto done_err;
+ }
+ INIT_WORK(&event_work->work, do_event_work);
+ event_work->event = event;
+ event_work->ver = ver;
+ event_work->notify_id = notify_id;
+ event_work->opcode = opcode;
+ if (!queue_work(osdc->notify_wq, &event_work->work)) {
+ dout("WARNING: failed to queue notify event work\n");
+ goto done_err;
+ }
+ }
+
+ return;
+
+done_err:
+ ceph_osdc_put_event(event);
+ return;
+
+bad:
+ pr_err("osdc handle_watch_notify corrupt msg\n");
+ return;
+}
+
+/*
+ * build new request AND message
+ *
+ */
+void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off,
+ struct ceph_snap_context *snapc, u64 snap_id,
+ struct timespec *mtime)
+{
+ struct ceph_msg *msg = req->r_request;
+ void *p;
+ size_t msg_size;
+ int flags = req->r_flags;
+ u64 data_len;
+ unsigned int i;
+
+ req->r_snapid = snap_id;
+ req->r_snapc = ceph_get_snap_context(snapc);
+
+ /* encode request */
+ msg->hdr.version = cpu_to_le16(4);
+
+ p = msg->front.iov_base;
+ ceph_encode_32(&p, 1); /* client_inc is always 1 */
+ req->r_request_osdmap_epoch = p;
+ p += 4;
+ req->r_request_flags = p;
+ p += 4;
+ if (req->r_flags & CEPH_OSD_FLAG_WRITE)
+ ceph_encode_timespec(p, mtime);
+ p += sizeof(struct ceph_timespec);
+ req->r_request_reassert_version = p;
+ p += sizeof(struct ceph_eversion); /* will get filled in */
+
+ /* oloc */
+ ceph_encode_8(&p, 4);
+ ceph_encode_8(&p, 4);
+ ceph_encode_32(&p, 8 + 4 + 4);
+ req->r_request_pool = p;
+ p += 8;
+ ceph_encode_32(&p, -1); /* preferred */
+ ceph_encode_32(&p, 0); /* key len */
+
+ ceph_encode_8(&p, 1);
+ req->r_request_pgid = p;
+ p += 8 + 4;
+ ceph_encode_32(&p, -1); /* preferred */
+
+ /* oid */
+ ceph_encode_32(&p, req->r_base_oid.name_len);
+ memcpy(p, req->r_base_oid.name, req->r_base_oid.name_len);
+ dout("oid '%.*s' len %d\n", req->r_base_oid.name_len,
+ req->r_base_oid.name, req->r_base_oid.name_len);
+ p += req->r_base_oid.name_len;
+
+ /* ops--can imply data */
+ ceph_encode_16(&p, (u16)req->r_num_ops);
+ data_len = 0;
+ for (i = 0; i < req->r_num_ops; i++) {
+ data_len += osd_req_encode_op(req, p, i);
+ p += sizeof(struct ceph_osd_op);
+ }
+
+ /* snaps */
+ ceph_encode_64(&p, req->r_snapid);
+ ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0);
+ ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0);
+ if (req->r_snapc) {
+ for (i = 0; i < snapc->num_snaps; i++) {
+ ceph_encode_64(&p, req->r_snapc->snaps[i]);
+ }
+ }
+
+ req->r_request_attempts = p;
+ p += 4;
+
+ /* data */
+ if (flags & CEPH_OSD_FLAG_WRITE) {
+ u16 data_off;
+
+ /*
+ * The header "data_off" is a hint to the receiver
+ * allowing it to align received data into its
+ * buffers such that there's no need to re-copy
+ * it before writing it to disk (direct I/O).
+ */
+ data_off = (u16) (off & 0xffff);
+ req->r_request->hdr.data_off = cpu_to_le16(data_off);
+ }
+ req->r_request->hdr.data_len = cpu_to_le32(data_len);
+
+ BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
+ msg_size = p - msg->front.iov_base;
+ msg->front.iov_len = msg_size;
+ msg->hdr.front_len = cpu_to_le32(msg_size);
+
+ dout("build_request msg_size was %d\n", (int)msg_size);
+}
+EXPORT_SYMBOL(ceph_osdc_build_request);
+
+/*
+ * Register request, send initial attempt.
+ */
+int ceph_osdc_start_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req,
+ bool nofail)
+{
+ int rc;
+
+ down_read(&osdc->map_sem);
+ mutex_lock(&osdc->request_mutex);
+
+ rc = __ceph_osdc_start_request(osdc, req, nofail);
+
+ mutex_unlock(&osdc->request_mutex);
+ up_read(&osdc->map_sem);
+
+ return rc;
+}
+EXPORT_SYMBOL(ceph_osdc_start_request);
+
+/*
+ * wait for a request to complete
+ */
+int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req)
+{
+ int rc;
+
+ rc = wait_for_completion_interruptible(&req->r_completion);
+ if (rc < 0) {
+ mutex_lock(&osdc->request_mutex);
+ __cancel_request(req);
+ __unregister_request(osdc, req);
+ mutex_unlock(&osdc->request_mutex);
+ complete_request(req);
+ dout("wait_request tid %llu canceled/timed out\n", req->r_tid);
+ return rc;
+ }
+
+ dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result);
+ return req->r_result;
+}
+EXPORT_SYMBOL(ceph_osdc_wait_request);
+
+/*
+ * sync - wait for all in-flight requests to flush. avoid starvation.
+ */
+void ceph_osdc_sync(struct ceph_osd_client *osdc)
+{
+ struct ceph_osd_request *req;
+ u64 last_tid, next_tid = 0;
+
+ mutex_lock(&osdc->request_mutex);
+ last_tid = osdc->last_tid;
+ while (1) {
+ req = __lookup_request_ge(osdc, next_tid);
+ if (!req)
+ break;
+ if (req->r_tid > last_tid)
+ break;
+
+ next_tid = req->r_tid + 1;
+ if ((req->r_flags & CEPH_OSD_FLAG_WRITE) == 0)
+ continue;
+
+ ceph_osdc_get_request(req);
+ mutex_unlock(&osdc->request_mutex);
+ dout("sync waiting on tid %llu (last is %llu)\n",
+ req->r_tid, last_tid);
+ wait_for_completion(&req->r_safe_completion);
+ mutex_lock(&osdc->request_mutex);
+ ceph_osdc_put_request(req);
+ }
+ mutex_unlock(&osdc->request_mutex);
+ dout("sync done (thru tid %llu)\n", last_tid);
+}
+EXPORT_SYMBOL(ceph_osdc_sync);
+
+/*
+ * Call all pending notify callbacks - for use after a watch is
+ * unregistered, to make sure no more callbacks for it will be invoked
+ */
+extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc)
+{
+ flush_workqueue(osdc->notify_wq);
+}
+EXPORT_SYMBOL(ceph_osdc_flush_notifies);
+
+
+/*
+ * init, shutdown
+ */
+int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
+{
+ int err;
+
+ dout("init\n");
+ osdc->client = client;
+ osdc->osdmap = NULL;
+ init_rwsem(&osdc->map_sem);
+ init_completion(&osdc->map_waiters);
+ osdc->last_requested_map = 0;
+ mutex_init(&osdc->request_mutex);
+ osdc->last_tid = 0;
+ osdc->osds = RB_ROOT;
+ INIT_LIST_HEAD(&osdc->osd_lru);
+ osdc->requests = RB_ROOT;
+ INIT_LIST_HEAD(&osdc->req_lru);
+ INIT_LIST_HEAD(&osdc->req_unsent);
+ INIT_LIST_HEAD(&osdc->req_notarget);
+ INIT_LIST_HEAD(&osdc->req_linger);
+ osdc->num_requests = 0;
+ INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout);
+ INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
+ spin_lock_init(&osdc->event_lock);
+ osdc->event_tree = RB_ROOT;
+ osdc->event_count = 0;
+
+ schedule_delayed_work(&osdc->osds_timeout_work,
+ round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ));
+
+ err = -ENOMEM;
+ osdc->req_mempool = mempool_create_kmalloc_pool(10,
+ sizeof(struct ceph_osd_request));
+ if (!osdc->req_mempool)
+ goto out;
+
+ err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP,
+ OSD_OP_FRONT_LEN, 10, true,
+ "osd_op");
+ if (err < 0)
+ goto out_mempool;
+ err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY,
+ OSD_OPREPLY_FRONT_LEN, 10, true,
+ "osd_op_reply");
+ if (err < 0)
+ goto out_msgpool;
+
+ err = -ENOMEM;
+ osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify");
+ if (!osdc->notify_wq)
+ goto out_msgpool_reply;
+
+ return 0;
+
+out_msgpool_reply:
+ ceph_msgpool_destroy(&osdc->msgpool_op_reply);
+out_msgpool:
+ ceph_msgpool_destroy(&osdc->msgpool_op);
+out_mempool:
+ mempool_destroy(osdc->req_mempool);
+out:
+ return err;
+}
+
+void ceph_osdc_stop(struct ceph_osd_client *osdc)
+{
+ flush_workqueue(osdc->notify_wq);
+ destroy_workqueue(osdc->notify_wq);
+ cancel_delayed_work_sync(&osdc->timeout_work);
+ cancel_delayed_work_sync(&osdc->osds_timeout_work);
+ if (osdc->osdmap) {
+ ceph_osdmap_destroy(osdc->osdmap);
+ osdc->osdmap = NULL;
+ }
+ remove_all_osds(osdc);
+ mempool_destroy(osdc->req_mempool);
+ ceph_msgpool_destroy(&osdc->msgpool_op);
+ ceph_msgpool_destroy(&osdc->msgpool_op_reply);
+}
+
+/*
+ * Read some contiguous pages. If we cross a stripe boundary, shorten
+ * *plen. Return number of bytes read, or error.
+ */
+int ceph_osdc_readpages(struct ceph_osd_client *osdc,
+ struct ceph_vino vino, struct ceph_file_layout *layout,
+ u64 off, u64 *plen,
+ u32 truncate_seq, u64 truncate_size,
+ struct page **pages, int num_pages, int page_align)
+{
+ struct ceph_osd_request *req;
+ int rc = 0;
+
+ dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
+ vino.snap, off, *plen);
+ req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1,
+ CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
+ NULL, truncate_seq, truncate_size,
+ false);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ /* it may be a short read due to an object boundary */
+
+ osd_req_op_extent_osd_data_pages(req, 0,
+ pages, *plen, page_align, false, false);
+
+ dout("readpages final extent is %llu~%llu (%llu bytes align %d)\n",
+ off, *plen, *plen, page_align);
+
+ ceph_osdc_build_request(req, off, NULL, vino.snap, NULL);
+
+ rc = ceph_osdc_start_request(osdc, req, false);
+ if (!rc)
+ rc = ceph_osdc_wait_request(osdc, req);
+
+ ceph_osdc_put_request(req);
+ dout("readpages result %d\n", rc);
+ return rc;
+}
+EXPORT_SYMBOL(ceph_osdc_readpages);
+
+/*
+ * do a synchronous write on N pages
+ */
+int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
+ struct ceph_file_layout *layout,
+ struct ceph_snap_context *snapc,
+ u64 off, u64 len,
+ u32 truncate_seq, u64 truncate_size,
+ struct timespec *mtime,
+ struct page **pages, int num_pages)
+{
+ struct ceph_osd_request *req;
+ int rc = 0;
+ int page_align = off & ~PAGE_MASK;
+
+ BUG_ON(vino.snap != CEPH_NOSNAP); /* snapshots aren't writeable */
+ req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1,
+ CEPH_OSD_OP_WRITE,
+ CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+ snapc, truncate_seq, truncate_size,
+ true);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ /* it may be a short write due to an object boundary */
+ osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align,
+ false, false);
+ dout("writepages %llu~%llu (%llu bytes)\n", off, len, len);
+
+ ceph_osdc_build_request(req, off, snapc, CEPH_NOSNAP, mtime);
+
+ rc = ceph_osdc_start_request(osdc, req, true);
+ if (!rc)
+ rc = ceph_osdc_wait_request(osdc, req);
+
+ ceph_osdc_put_request(req);
+ if (rc == 0)
+ rc = len;
+ dout("writepages result %d\n", rc);
+ return rc;
+}
+EXPORT_SYMBOL(ceph_osdc_writepages);
+
+int ceph_osdc_setup(void)
+{
+ BUG_ON(ceph_osd_request_cache);
+ ceph_osd_request_cache = kmem_cache_create("ceph_osd_request",
+ sizeof (struct ceph_osd_request),
+ __alignof__(struct ceph_osd_request),
+ 0, NULL);
+
+ return ceph_osd_request_cache ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL(ceph_osdc_setup);
+
+void ceph_osdc_cleanup(void)
+{
+ BUG_ON(!ceph_osd_request_cache);
+ kmem_cache_destroy(ceph_osd_request_cache);
+ ceph_osd_request_cache = NULL;
+}
+EXPORT_SYMBOL(ceph_osdc_cleanup);
+
+/*
+ * handle incoming message
+ */
+static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
+{
+ struct ceph_osd *osd = con->private;
+ struct ceph_osd_client *osdc;
+ int type = le16_to_cpu(msg->hdr.type);
+
+ if (!osd)
+ goto out;
+ osdc = osd->o_osdc;
+
+ switch (type) {
+ case CEPH_MSG_OSD_MAP:
+ ceph_osdc_handle_map(osdc, msg);
+ break;
+ case CEPH_MSG_OSD_OPREPLY:
+ handle_reply(osdc, msg, con);
+ break;
+ case CEPH_MSG_WATCH_NOTIFY:
+ handle_watch_notify(osdc, msg);
+ break;
+
+ default:
+ pr_err("received unknown message type %d %s\n", type,
+ ceph_msg_type_name(type));
+ }
+out:
+ ceph_msg_put(msg);
+}
+
+/*
+ * lookup and return message for incoming reply. set up reply message
+ * pages.
+ */
+static struct ceph_msg *get_reply(struct ceph_connection *con,
+ struct ceph_msg_header *hdr,
+ int *skip)
+{
+ struct ceph_osd *osd = con->private;
+ struct ceph_osd_client *osdc = osd->o_osdc;
+ struct ceph_msg *m;
+ struct ceph_osd_request *req;
+ int front_len = le32_to_cpu(hdr->front_len);
+ int data_len = le32_to_cpu(hdr->data_len);
+ u64 tid;
+
+ tid = le64_to_cpu(hdr->tid);
+ mutex_lock(&osdc->request_mutex);
+ req = __lookup_request(osdc, tid);
+ if (!req) {
+ *skip = 1;
+ m = NULL;
+ dout("get_reply unknown tid %llu from osd%d\n", tid,
+ osd->o_osd);
+ goto out;
+ }
+
+ if (req->r_reply->con)
+ dout("%s revoking msg %p from old con %p\n", __func__,
+ req->r_reply, req->r_reply->con);
+ ceph_msg_revoke_incoming(req->r_reply);
+
+ if (front_len > req->r_reply->front_alloc_len) {
+ pr_warning("get_reply front %d > preallocated %d (%u#%llu)\n",
+ front_len, req->r_reply->front_alloc_len,
+ (unsigned int)con->peer_name.type,
+ le64_to_cpu(con->peer_name.num));
+ m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS,
+ false);
+ if (!m)
+ goto out;
+ ceph_msg_put(req->r_reply);
+ req->r_reply = m;
+ }
+ m = ceph_msg_get(req->r_reply);
+
+ if (data_len > 0) {
+ struct ceph_osd_data *osd_data;
+
+ /*
+ * XXX This is assuming there is only one op containing
+ * XXX page data. Probably OK for reads, but this
+ * XXX ought to be done more generally.
+ */
+ osd_data = osd_req_op_extent_osd_data(req, 0);
+ if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
+ if (osd_data->pages &&
+ unlikely(osd_data->length < data_len)) {
+
+ pr_warning("tid %lld reply has %d bytes "
+ "we had only %llu bytes ready\n",
+ tid, data_len, osd_data->length);
+ *skip = 1;
+ ceph_msg_put(m);
+ m = NULL;
+ goto out;
+ }
+ }
+ }
+ *skip = 0;
+ dout("get_reply tid %lld %p\n", tid, m);
+
+out:
+ mutex_unlock(&osdc->request_mutex);
+ return m;
+
+}
+
+static struct ceph_msg *alloc_msg(struct ceph_connection *con,
+ struct ceph_msg_header *hdr,
+ int *skip)
+{
+ struct ceph_osd *osd = con->private;
+ int type = le16_to_cpu(hdr->type);
+ int front = le32_to_cpu(hdr->front_len);
+
+ *skip = 0;
+ switch (type) {
+ case CEPH_MSG_OSD_MAP:
+ case CEPH_MSG_WATCH_NOTIFY:
+ return ceph_msg_new(type, front, GFP_NOFS, false);
+ case CEPH_MSG_OSD_OPREPLY:
+ return get_reply(con, hdr, skip);
+ default:
+ pr_info("alloc_msg unexpected msg type %d from osd%d\n", type,
+ osd->o_osd);
+ *skip = 1;
+ return NULL;
+ }
+}
+
+/*
+ * Wrappers to refcount containing ceph_osd struct
+ */
+static struct ceph_connection *get_osd_con(struct ceph_connection *con)
+{
+ struct ceph_osd *osd = con->private;
+ if (get_osd(osd))
+ return con;
+ return NULL;
+}
+
+static void put_osd_con(struct ceph_connection *con)
+{
+ struct ceph_osd *osd = con->private;
+ put_osd(osd);
+}
+
+/*
+ * authentication
+ */
+/*
+ * Note: returned pointer is the address of a structure that's
+ * managed separately. Caller must *not* attempt to free it.
+ */
+static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
+ int *proto, int force_new)
+{
+ struct ceph_osd *o = con->private;
+ struct ceph_osd_client *osdc = o->o_osdc;
+ struct ceph_auth_client *ac = osdc->client->monc.auth;
+ struct ceph_auth_handshake *auth = &o->o_auth;
+
+ if (force_new && auth->authorizer) {
+ ceph_auth_destroy_authorizer(ac, auth->authorizer);
+ auth->authorizer = NULL;
+ }
+ if (!auth->authorizer) {
+ int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
+ auth);
+ if (ret)
+ return ERR_PTR(ret);
+ } else {
+ int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
+ auth);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+ *proto = ac->protocol;
+
+ return auth;
+}
+
+
+static int verify_authorizer_reply(struct ceph_connection *con, int len)
+{
+ struct ceph_osd *o = con->private;
+ struct ceph_osd_client *osdc = o->o_osdc;
+ struct ceph_auth_client *ac = osdc->client->monc.auth;
+
+ return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer, len);
+}
+
+static int invalidate_authorizer(struct ceph_connection *con)
+{
+ struct ceph_osd *o = con->private;
+ struct ceph_osd_client *osdc = o->o_osdc;
+ struct ceph_auth_client *ac = osdc->client->monc.auth;
+
+ ceph_auth_invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);
+ return ceph_monc_validate_auth(&osdc->client->monc);
+}
+
+static const struct ceph_connection_operations osd_con_ops = {
+ .get = get_osd_con,
+ .put = put_osd_con,
+ .dispatch = dispatch,
+ .get_authorizer = get_authorizer,
+ .verify_authorizer_reply = verify_authorizer_reply,
+ .invalidate_authorizer = invalidate_authorizer,
+ .alloc_msg = alloc_msg,
+ .fault = osd_reset,
+};
diff --git a/libceph/osdmap.c b/libceph/osdmap.c
new file mode 100644
index 0000000..8b8a5a2
--- /dev/null
+++ b/libceph/osdmap.c
@@ -0,0 +1,1724 @@
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <asm/div64.h>
+
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/osdmap.h>
+#include <linux/ceph/decode.h>
+#include <linux/crush/hash.h>
+#include <linux/crush/mapper.h>
+
+char *ceph_osdmap_state_str(char *str, int len, int state)
+{
+ if (!len)
+ return str;
+
+ if ((state & CEPH_OSD_EXISTS) && (state & CEPH_OSD_UP))
+ snprintf(str, len, "exists, up");
+ else if (state & CEPH_OSD_EXISTS)
+ snprintf(str, len, "exists");
+ else if (state & CEPH_OSD_UP)
+ snprintf(str, len, "up");
+ else
+ snprintf(str, len, "doesn't exist");
+
+ return str;
+}
+
+/* maps */
+
+static int calc_bits_of(unsigned int t)
+{
+ int b = 0;
+ while (t) {
+ t = t >> 1;
+ b++;
+ }
+ return b;
+}
+
+/*
+ * the foo_mask is the smallest value 2^n-1 that is >= foo.
+ */
+static void calc_pg_masks(struct ceph_pg_pool_info *pi)
+{
+ pi->pg_num_mask = (1 << calc_bits_of(pi->pg_num-1)) - 1;
+ pi->pgp_num_mask = (1 << calc_bits_of(pi->pgp_num-1)) - 1;
+}
+
+/*
+ * decode crush map
+ */
+static int crush_decode_uniform_bucket(void **p, void *end,
+ struct crush_bucket_uniform *b)
+{
+ dout("crush_decode_uniform_bucket %p to %p\n", *p, end);
+ ceph_decode_need(p, end, (1+b->h.size) * sizeof(u32), bad);
+ b->item_weight = ceph_decode_32(p);
+ return 0;
+bad:
+ return -EINVAL;
+}
+
+static int crush_decode_list_bucket(void **p, void *end,
+ struct crush_bucket_list *b)
+{
+ int j;
+ dout("crush_decode_list_bucket %p to %p\n", *p, end);
+ b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
+ if (b->item_weights == NULL)
+ return -ENOMEM;
+ b->sum_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
+ if (b->sum_weights == NULL)
+ return -ENOMEM;
+ ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
+ for (j = 0; j < b->h.size; j++) {
+ b->item_weights[j] = ceph_decode_32(p);
+ b->sum_weights[j] = ceph_decode_32(p);
+ }
+ return 0;
+bad:
+ return -EINVAL;
+}
+
+static int crush_decode_tree_bucket(void **p, void *end,
+ struct crush_bucket_tree *b)
+{
+ int j;
+ dout("crush_decode_tree_bucket %p to %p\n", *p, end);
+ ceph_decode_32_safe(p, end, b->num_nodes, bad);
+ b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS);
+ if (b->node_weights == NULL)
+ return -ENOMEM;
+ ceph_decode_need(p, end, b->num_nodes * sizeof(u32), bad);
+ for (j = 0; j < b->num_nodes; j++)
+ b->node_weights[j] = ceph_decode_32(p);
+ return 0;
+bad:
+ return -EINVAL;
+}
+
+static int crush_decode_straw_bucket(void **p, void *end,
+ struct crush_bucket_straw *b)
+{
+ int j;
+ dout("crush_decode_straw_bucket %p to %p\n", *p, end);
+ b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
+ if (b->item_weights == NULL)
+ return -ENOMEM;
+ b->straws = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
+ if (b->straws == NULL)
+ return -ENOMEM;
+ ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
+ for (j = 0; j < b->h.size; j++) {
+ b->item_weights[j] = ceph_decode_32(p);
+ b->straws[j] = ceph_decode_32(p);
+ }
+ return 0;
+bad:
+ return -EINVAL;
+}
+
+static int skip_name_map(void **p, void *end)
+{
+ int len;
+ ceph_decode_32_safe(p, end, len ,bad);
+ while (len--) {
+ int strlen;
+ *p += sizeof(u32);
+ ceph_decode_32_safe(p, end, strlen, bad);
+ *p += strlen;
+}
+ return 0;
+bad:
+ return -EINVAL;
+}
+
+static struct crush_map *crush_decode(void *pbyval, void *end)
+{
+ struct crush_map *c;
+ int err = -EINVAL;
+ int i, j;
+ void **p = &pbyval;
+ void *start = pbyval;
+ u32 magic;
+ u32 num_name_maps;
+
+ dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p));
+
+ c = kzalloc(sizeof(*c), GFP_NOFS);
+ if (c == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ /* set tunables to default values */
+ c->choose_local_tries = 2;
+ c->choose_local_fallback_tries = 5;
+ c->choose_total_tries = 19;
+ c->chooseleaf_descend_once = 0;
+
+ ceph_decode_need(p, end, 4*sizeof(u32), bad);
+ magic = ceph_decode_32(p);
+ if (magic != CRUSH_MAGIC) {
+ pr_err("crush_decode magic %x != current %x\n",
+ (unsigned int)magic, (unsigned int)CRUSH_MAGIC);
+ goto bad;
+ }
+ c->max_buckets = ceph_decode_32(p);
+ c->max_rules = ceph_decode_32(p);
+ c->max_devices = ceph_decode_32(p);
+
+ c->buckets = kcalloc(c->max_buckets, sizeof(*c->buckets), GFP_NOFS);
+ if (c->buckets == NULL)
+ goto badmem;
+ c->rules = kcalloc(c->max_rules, sizeof(*c->rules), GFP_NOFS);
+ if (c->rules == NULL)
+ goto badmem;
+
+ /* buckets */
+ for (i = 0; i < c->max_buckets; i++) {
+ int size = 0;
+ u32 alg;
+ struct crush_bucket *b;
+
+ ceph_decode_32_safe(p, end, alg, bad);
+ if (alg == 0) {
+ c->buckets[i] = NULL;
+ continue;
+ }
+ dout("crush_decode bucket %d off %x %p to %p\n",
+ i, (int)(*p-start), *p, end);
+
+ switch (alg) {
+ case CRUSH_BUCKET_UNIFORM:
+ size = sizeof(struct crush_bucket_uniform);
+ break;
+ case CRUSH_BUCKET_LIST:
+ size = sizeof(struct crush_bucket_list);
+ break;
+ case CRUSH_BUCKET_TREE:
+ size = sizeof(struct crush_bucket_tree);
+ break;
+ case CRUSH_BUCKET_STRAW:
+ size = sizeof(struct crush_bucket_straw);
+ break;
+ default:
+ err = -EINVAL;
+ goto bad;
+ }
+ BUG_ON(size == 0);
+ b = c->buckets[i] = kzalloc(size, GFP_NOFS);
+ if (b == NULL)
+ goto badmem;
+
+ ceph_decode_need(p, end, 4*sizeof(u32), bad);
+ b->id = ceph_decode_32(p);
+ b->type = ceph_decode_16(p);
+ b->alg = ceph_decode_8(p);
+ b->hash = ceph_decode_8(p);
+ b->weight = ceph_decode_32(p);
+ b->size = ceph_decode_32(p);
+
+ dout("crush_decode bucket size %d off %x %p to %p\n",
+ b->size, (int)(*p-start), *p, end);
+
+ b->items = kcalloc(b->size, sizeof(__s32), GFP_NOFS);
+ if (b->items == NULL)
+ goto badmem;
+ b->perm = kcalloc(b->size, sizeof(u32), GFP_NOFS);
+ if (b->perm == NULL)
+ goto badmem;
+ b->perm_n = 0;
+
+ ceph_decode_need(p, end, b->size*sizeof(u32), bad);
+ for (j = 0; j < b->size; j++)
+ b->items[j] = ceph_decode_32(p);
+
+ switch (b->alg) {
+ case CRUSH_BUCKET_UNIFORM:
+ err = crush_decode_uniform_bucket(p, end,
+ (struct crush_bucket_uniform *)b);
+ if (err < 0)
+ goto bad;
+ break;
+ case CRUSH_BUCKET_LIST:
+ err = crush_decode_list_bucket(p, end,
+ (struct crush_bucket_list *)b);
+ if (err < 0)
+ goto bad;
+ break;
+ case CRUSH_BUCKET_TREE:
+ err = crush_decode_tree_bucket(p, end,
+ (struct crush_bucket_tree *)b);
+ if (err < 0)
+ goto bad;
+ break;
+ case CRUSH_BUCKET_STRAW:
+ err = crush_decode_straw_bucket(p, end,
+ (struct crush_bucket_straw *)b);
+ if (err < 0)
+ goto bad;
+ break;
+ }
+ }
+
+ /* rules */
+ dout("rule vec is %p\n", c->rules);
+ for (i = 0; i < c->max_rules; i++) {
+ u32 yes;
+ struct crush_rule *r;
+
+ ceph_decode_32_safe(p, end, yes, bad);
+ if (!yes) {
+ dout("crush_decode NO rule %d off %x %p to %p\n",
+ i, (int)(*p-start), *p, end);
+ c->rules[i] = NULL;
+ continue;
+ }
+
+ dout("crush_decode rule %d off %x %p to %p\n",
+ i, (int)(*p-start), *p, end);
+
+ /* len */
+ ceph_decode_32_safe(p, end, yes, bad);
+#if BITS_PER_LONG == 32
+ err = -EINVAL;
+ if (yes > (ULONG_MAX - sizeof(*r))
+ / sizeof(struct crush_rule_step))
+ goto bad;
+#endif
+ r = c->rules[i] = kmalloc(sizeof(*r) +
+ yes*sizeof(struct crush_rule_step),
+ GFP_NOFS);
+ if (r == NULL)
+ goto badmem;
+ dout(" rule %d is at %p\n", i, r);
+ r->len = yes;
+ ceph_decode_copy_safe(p, end, &r->mask, 4, bad); /* 4 u8's */
+ ceph_decode_need(p, end, r->len*3*sizeof(u32), bad);
+ for (j = 0; j < r->len; j++) {
+ r->steps[j].op = ceph_decode_32(p);
+ r->steps[j].arg1 = ceph_decode_32(p);
+ r->steps[j].arg2 = ceph_decode_32(p);
+ }
+ }
+
+ /* ignore trailing name maps. */
+ for (num_name_maps = 0; num_name_maps < 3; num_name_maps++) {
+ err = skip_name_map(p, end);
+ if (err < 0)
+ goto done;
+ }
+
+ /* tunables */
+ ceph_decode_need(p, end, 3*sizeof(u32), done);
+ c->choose_local_tries = ceph_decode_32(p);
+ c->choose_local_fallback_tries = ceph_decode_32(p);
+ c->choose_total_tries = ceph_decode_32(p);
+ dout("crush decode tunable choose_local_tries = %d",
+ c->choose_local_tries);
+ dout("crush decode tunable choose_local_fallback_tries = %d",
+ c->choose_local_fallback_tries);
+ dout("crush decode tunable choose_total_tries = %d",
+ c->choose_total_tries);
+
+ ceph_decode_need(p, end, sizeof(u32), done);
+ c->chooseleaf_descend_once = ceph_decode_32(p);
+ dout("crush decode tunable chooseleaf_descend_once = %d",
+ c->chooseleaf_descend_once);
+
+done:
+ dout("crush_decode success\n");
+ return c;
+
+badmem:
+ err = -ENOMEM;
+bad:
+ dout("crush_decode fail %d\n", err);
+ crush_destroy(c);
+ return ERR_PTR(err);
+}
+
+/*
+ * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
+ * to a set of osds) and primary_temp (explicit primary setting)
+ */
+static int pgid_cmp(struct ceph_pg l, struct ceph_pg r)
+{
+ if (l.pool < r.pool)
+ return -1;
+ if (l.pool > r.pool)
+ return 1;
+ if (l.seed < r.seed)
+ return -1;
+ if (l.seed > r.seed)
+ return 1;
+ return 0;
+}
+
+static int __insert_pg_mapping(struct ceph_pg_mapping *new,
+ struct rb_root *root)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct ceph_pg_mapping *pg = NULL;
+ int c;
+
+ dout("__insert_pg_mapping %llx %p\n", *(u64 *)&new->pgid, new);
+ while (*p) {
+ parent = *p;
+ pg = rb_entry(parent, struct ceph_pg_mapping, node);
+ c = pgid_cmp(new->pgid, pg->pgid);
+ if (c < 0)
+ p = &(*p)->rb_left;
+ else if (c > 0)
+ p = &(*p)->rb_right;
+ else
+ return -EEXIST;
+ }
+
+ rb_link_node(&new->node, parent, p);
+ rb_insert_color(&new->node, root);
+ return 0;
+}
+
+static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
+ struct ceph_pg pgid)
+{
+ struct rb_node *n = root->rb_node;
+ struct ceph_pg_mapping *pg;
+ int c;
+
+ while (n) {
+ pg = rb_entry(n, struct ceph_pg_mapping, node);
+ c = pgid_cmp(pgid, pg->pgid);
+ if (c < 0) {
+ n = n->rb_left;
+ } else if (c > 0) {
+ n = n->rb_right;
+ } else {
+ dout("__lookup_pg_mapping %lld.%x got %p\n",
+ pgid.pool, pgid.seed, pg);
+ return pg;
+ }
+ }
+ return NULL;
+}
+
+static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid)
+{
+ struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid);
+
+ if (pg) {
+ dout("__remove_pg_mapping %lld.%x %p\n", pgid.pool, pgid.seed,
+ pg);
+ rb_erase(&pg->node, root);
+ kfree(pg);
+ return 0;
+ }
+ dout("__remove_pg_mapping %lld.%x dne\n", pgid.pool, pgid.seed);
+ return -ENOENT;
+}
+
+/*
+ * rbtree of pg pool info
+ */
+static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct ceph_pg_pool_info *pi = NULL;
+
+ while (*p) {
+ parent = *p;
+ pi = rb_entry(parent, struct ceph_pg_pool_info, node);
+ if (new->id < pi->id)
+ p = &(*p)->rb_left;
+ else if (new->id > pi->id)
+ p = &(*p)->rb_right;
+ else
+ return -EEXIST;
+ }
+
+ rb_link_node(&new->node, parent, p);
+ rb_insert_color(&new->node, root);
+ return 0;
+}
+
+static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id)
+{
+ struct ceph_pg_pool_info *pi;
+ struct rb_node *n = root->rb_node;
+
+ while (n) {
+ pi = rb_entry(n, struct ceph_pg_pool_info, node);
+ if (id < pi->id)
+ n = n->rb_left;
+ else if (id > pi->id)
+ n = n->rb_right;
+ else
+ return pi;
+ }
+ return NULL;
+}
+
+struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map, u64 id)
+{
+ return __lookup_pg_pool(&map->pg_pools, id);
+}
+
+const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id)
+{
+ struct ceph_pg_pool_info *pi;
+
+ if (id == CEPH_NOPOOL)
+ return NULL;
+
+ if (WARN_ON_ONCE(id > (u64) INT_MAX))
+ return NULL;
+
+ pi = __lookup_pg_pool(&map->pg_pools, (int) id);
+
+ return pi ? pi->name : NULL;
+}
+EXPORT_SYMBOL(ceph_pg_pool_name_by_id);
+
+int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
+{
+ struct rb_node *rbp;
+
+ for (rbp = rb_first(&map->pg_pools); rbp; rbp = rb_next(rbp)) {
+ struct ceph_pg_pool_info *pi =
+ rb_entry(rbp, struct ceph_pg_pool_info, node);
+ if (pi->name && strcmp(pi->name, name) == 0)
+ return pi->id;
+ }
+ return -ENOENT;
+}
+EXPORT_SYMBOL(ceph_pg_poolid_by_name);
+
+static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
+{
+ rb_erase(&pi->node, root);
+ kfree(pi->name);
+ kfree(pi);
+}
+
+static int decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
+{
+ u8 ev, cv;
+ unsigned len, num;
+ void *pool_end;
+
+ ceph_decode_need(p, end, 2 + 4, bad);
+ ev = ceph_decode_8(p); /* encoding version */
+ cv = ceph_decode_8(p); /* compat version */
+ if (ev < 5) {
+ pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv);
+ return -EINVAL;
+ }
+ if (cv > 9) {
+ pr_warning("got v %d cv %d > 9 of ceph_pg_pool\n", ev, cv);
+ return -EINVAL;
+ }
+ len = ceph_decode_32(p);
+ ceph_decode_need(p, end, len, bad);
+ pool_end = *p + len;
+
+ pi->type = ceph_decode_8(p);
+ pi->size = ceph_decode_8(p);
+ pi->crush_ruleset = ceph_decode_8(p);
+ pi->object_hash = ceph_decode_8(p);
+
+ pi->pg_num = ceph_decode_32(p);
+ pi->pgp_num = ceph_decode_32(p);
+
+ *p += 4 + 4; /* skip lpg* */
+ *p += 4; /* skip last_change */
+ *p += 8 + 4; /* skip snap_seq, snap_epoch */
+
+ /* skip snaps */
+ num = ceph_decode_32(p);
+ while (num--) {
+ *p += 8; /* snapid key */
+ *p += 1 + 1; /* versions */
+ len = ceph_decode_32(p);
+ *p += len;
+ }
+
+ /* skip removed_snaps */
+ num = ceph_decode_32(p);
+ *p += num * (8 + 8);
+
+ *p += 8; /* skip auid */
+ pi->flags = ceph_decode_64(p);
+ *p += 4; /* skip crash_replay_interval */
+
+ if (ev >= 7)
+ *p += 1; /* skip min_size */
+
+ if (ev >= 8)
+ *p += 8 + 8; /* skip quota_max_* */
+
+ if (ev >= 9) {
+ /* skip tiers */
+ num = ceph_decode_32(p);
+ *p += num * 8;
+
+ *p += 8; /* skip tier_of */
+ *p += 1; /* skip cache_mode */
+
+ pi->read_tier = ceph_decode_64(p);
+ pi->write_tier = ceph_decode_64(p);
+ } else {
+ pi->read_tier = -1;
+ pi->write_tier = -1;
+ }
+
+ /* ignore the rest */
+
+ *p = pool_end;
+ calc_pg_masks(pi);
+ return 0;
+
+bad:
+ return -EINVAL;
+}
+
+static int decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
+{
+ struct ceph_pg_pool_info *pi;
+ u32 num, len;
+ u64 pool;
+
+ ceph_decode_32_safe(p, end, num, bad);
+ dout(" %d pool names\n", num);
+ while (num--) {
+ ceph_decode_64_safe(p, end, pool, bad);
+ ceph_decode_32_safe(p, end, len, bad);
+ dout(" pool %llu len %d\n", pool, len);
+ ceph_decode_need(p, end, len, bad);
+ pi = __lookup_pg_pool(&map->pg_pools, pool);
+ if (pi) {
+ char *name = kstrndup(*p, len, GFP_NOFS);
+
+ if (!name)
+ return -ENOMEM;
+ kfree(pi->name);
+ pi->name = name;
+ dout(" name is %s\n", pi->name);
+ }
+ *p += len;
+ }
+ return 0;
+
+bad:
+ return -EINVAL;
+}
+
+/*
+ * osd map
+ */
+void ceph_osdmap_destroy(struct ceph_osdmap *map)
+{
+ dout("osdmap_destroy %p\n", map);
+ if (map->crush)
+ crush_destroy(map->crush);
+ while (!RB_EMPTY_ROOT(&map->pg_temp)) {
+ struct ceph_pg_mapping *pg =
+ rb_entry(rb_first(&map->pg_temp),
+ struct ceph_pg_mapping, node);
+ rb_erase(&pg->node, &map->pg_temp);
+ kfree(pg);
+ }
+ while (!RB_EMPTY_ROOT(&map->primary_temp)) {
+ struct ceph_pg_mapping *pg =
+ rb_entry(rb_first(&map->primary_temp),
+ struct ceph_pg_mapping, node);
+ rb_erase(&pg->node, &map->primary_temp);
+ kfree(pg);
+ }
+ while (!RB_EMPTY_ROOT(&map->pg_pools)) {
+ struct ceph_pg_pool_info *pi =
+ rb_entry(rb_first(&map->pg_pools),
+ struct ceph_pg_pool_info, node);
+ __remove_pg_pool(&map->pg_pools, pi);
+ }
+ kfree(map->osd_state);
+ kfree(map->osd_weight);
+ kfree(map->osd_addr);
+ kfree(map->osd_primary_affinity);
+ kfree(map);
+}
+
+/*
+ * Adjust max_osd value, (re)allocate arrays.
+ *
+ * The new elements are properly initialized.
+ */
+static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
+{
+ u8 *state;
+ u32 *weight;
+ struct ceph_entity_addr *addr;
+ int i;
+
+ state = krealloc(map->osd_state, max*sizeof(*state), GFP_NOFS);
+ weight = krealloc(map->osd_weight, max*sizeof(*weight), GFP_NOFS);
+ addr = krealloc(map->osd_addr, max*sizeof(*addr), GFP_NOFS);
+ if (!state || !weight || !addr) {
+ kfree(state);
+ kfree(weight);
+ kfree(addr);
+
+ return -ENOMEM;
+ }
+
+ for (i = map->max_osd; i < max; i++) {
+ state[i] = 0;
+ weight[i] = CEPH_OSD_OUT;
+ memset(addr + i, 0, sizeof(*addr));
+ }
+
+ map->osd_state = state;
+ map->osd_weight = weight;
+ map->osd_addr = addr;
+
+ if (map->osd_primary_affinity) {
+ u32 *affinity;
+
+ affinity = krealloc(map->osd_primary_affinity,
+ max*sizeof(*affinity), GFP_NOFS);
+ if (!affinity)
+ return -ENOMEM;
+
+ for (i = map->max_osd; i < max; i++)
+ affinity[i] = CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
+
+ map->osd_primary_affinity = affinity;
+ }
+
+ map->max_osd = max;
+
+ return 0;
+}
+
+#define OSDMAP_WRAPPER_COMPAT_VER 7
+#define OSDMAP_CLIENT_DATA_COMPAT_VER 1
+
+/*
+ * Return 0 or error. On success, *v is set to 0 for old (v6) osdmaps,
+ * to struct_v of the client_data section for new (v7 and above)
+ * osdmaps.
+ */
+static int get_osdmap_client_data_v(void **p, void *end,
+ const char *prefix, u8 *v)
+{
+ u8 struct_v;
+
+ ceph_decode_8_safe(p, end, struct_v, e_inval);
+ if (struct_v >= 7) {
+ u8 struct_compat;
+
+ ceph_decode_8_safe(p, end, struct_compat, e_inval);
+ if (struct_compat > OSDMAP_WRAPPER_COMPAT_VER) {
+ pr_warning("got v %d cv %d > %d of %s ceph_osdmap\n",
+ struct_v, struct_compat,
+ OSDMAP_WRAPPER_COMPAT_VER, prefix);
+ return -EINVAL;
+ }
+ *p += 4; /* ignore wrapper struct_len */
+
+ ceph_decode_8_safe(p, end, struct_v, e_inval);
+ ceph_decode_8_safe(p, end, struct_compat, e_inval);
+ if (struct_compat > OSDMAP_CLIENT_DATA_COMPAT_VER) {
+ pr_warning("got v %d cv %d > %d of %s ceph_osdmap client data\n",
+ struct_v, struct_compat,
+ OSDMAP_CLIENT_DATA_COMPAT_VER, prefix);
+ return -EINVAL;
+ }
+ *p += 4; /* ignore client data struct_len */
+ } else {
+ u16 version;
+
+ *p -= 1;
+ ceph_decode_16_safe(p, end, version, e_inval);
+ if (version < 6) {
+ pr_warning("got v %d < 6 of %s ceph_osdmap\n", version,
+ prefix);
+ return -EINVAL;
+ }
+
+ /* old osdmap enconding */
+ struct_v = 0;
+ }
+
+ *v = struct_v;
+ return 0;
+
+e_inval:
+ return -EINVAL;
+}
+
+static int __decode_pools(void **p, void *end, struct ceph_osdmap *map,
+ bool incremental)
+{
+ u32 n;
+
+ ceph_decode_32_safe(p, end, n, e_inval);
+ while (n--) {
+ struct ceph_pg_pool_info *pi;
+ u64 pool;
+ int ret;
+
+ ceph_decode_64_safe(p, end, pool, e_inval);
+
+ pi = __lookup_pg_pool(&map->pg_pools, pool);
+ if (!incremental || !pi) {
+ pi = kzalloc(sizeof(*pi), GFP_NOFS);
+ if (!pi)
+ return -ENOMEM;
+
+ pi->id = pool;
+
+ ret = __insert_pg_pool(&map->pg_pools, pi);
+ if (ret) {
+ kfree(pi);
+ return ret;
+ }
+ }
+
+ ret = decode_pool(p, end, pi);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+
+e_inval:
+ return -EINVAL;
+}
+
+static int decode_pools(void **p, void *end, struct ceph_osdmap *map)
+{
+ return __decode_pools(p, end, map, false);
+}
+
+static int decode_new_pools(void **p, void *end, struct ceph_osdmap *map)
+{
+ return __decode_pools(p, end, map, true);
+}
+
+static int __decode_pg_temp(void **p, void *end, struct ceph_osdmap *map,
+ bool incremental)
+{
+ u32 n;
+
+ ceph_decode_32_safe(p, end, n, e_inval);
+ while (n--) {
+ struct ceph_pg pgid;
+ u32 len, i;
+ int ret;
+
+ ret = ceph_decode_pgid(p, end, &pgid);
+ if (ret)
+ return ret;
+
+ ceph_decode_32_safe(p, end, len, e_inval);
+
+ ret = __remove_pg_mapping(&map->pg_temp, pgid);
+ BUG_ON(!incremental && ret != -ENOENT);
+
+ if (!incremental || len > 0) {
+ struct ceph_pg_mapping *pg;
+
+ ceph_decode_need(p, end, len*sizeof(u32), e_inval);
+
+ if (len > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
+ return -EINVAL;
+
+ pg = kzalloc(sizeof(*pg) + len*sizeof(u32), GFP_NOFS);
+ if (!pg)
+ return -ENOMEM;
+
+ pg->pgid = pgid;
+ pg->pg_temp.len = len;
+ for (i = 0; i < len; i++)
+ pg->pg_temp.osds[i] = ceph_decode_32(p);
+
+ ret = __insert_pg_mapping(pg, &map->pg_temp);
+ if (ret) {
+ kfree(pg);
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+
+e_inval:
+ return -EINVAL;
+}
+
+static int decode_pg_temp(void **p, void *end, struct ceph_osdmap *map)
+{
+ return __decode_pg_temp(p, end, map, false);
+}
+
+static int decode_new_pg_temp(void **p, void *end, struct ceph_osdmap *map)
+{
+ return __decode_pg_temp(p, end, map, true);
+}
+
+static int __decode_primary_temp(void **p, void *end, struct ceph_osdmap *map,
+ bool incremental)
+{
+ u32 n;
+
+ ceph_decode_32_safe(p, end, n, e_inval);
+ while (n--) {
+ struct ceph_pg pgid;
+ u32 osd;
+ int ret;
+
+ ret = ceph_decode_pgid(p, end, &pgid);
+ if (ret)
+ return ret;
+
+ ceph_decode_32_safe(p, end, osd, e_inval);
+
+ ret = __remove_pg_mapping(&map->primary_temp, pgid);
+ BUG_ON(!incremental && ret != -ENOENT);
+
+ if (!incremental || osd != (u32)-1) {
+ struct ceph_pg_mapping *pg;
+
+ pg = kzalloc(sizeof(*pg), GFP_NOFS);
+ if (!pg)
+ return -ENOMEM;
+
+ pg->pgid = pgid;
+ pg->primary_temp.osd = osd;
+
+ ret = __insert_pg_mapping(pg, &map->primary_temp);
+ if (ret) {
+ kfree(pg);
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+
+e_inval:
+ return -EINVAL;
+}
+
+static int decode_primary_temp(void **p, void *end, struct ceph_osdmap *map)
+{
+ return __decode_primary_temp(p, end, map, false);
+}
+
+static int decode_new_primary_temp(void **p, void *end,
+ struct ceph_osdmap *map)
+{
+ return __decode_primary_temp(p, end, map, true);
+}
+
+u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd)
+{
+ BUG_ON(osd >= map->max_osd);
+
+ if (!map->osd_primary_affinity)
+ return CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
+
+ return map->osd_primary_affinity[osd];
+}
+
+static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff)
+{
+ BUG_ON(osd >= map->max_osd);
+
+ if (!map->osd_primary_affinity) {
+ int i;
+
+ map->osd_primary_affinity = kmalloc(map->max_osd*sizeof(u32),
+ GFP_NOFS);
+ if (!map->osd_primary_affinity)
+ return -ENOMEM;
+
+ for (i = 0; i < map->max_osd; i++)
+ map->osd_primary_affinity[i] =
+ CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
+ }
+
+ map->osd_primary_affinity[osd] = aff;
+
+ return 0;
+}
+
+static int decode_primary_affinity(void **p, void *end,
+ struct ceph_osdmap *map)
+{
+ u32 len, i;
+
+ ceph_decode_32_safe(p, end, len, e_inval);
+ if (len == 0) {
+ kfree(map->osd_primary_affinity);
+ map->osd_primary_affinity = NULL;
+ return 0;
+ }
+ if (len != map->max_osd)
+ goto e_inval;
+
+ ceph_decode_need(p, end, map->max_osd*sizeof(u32), e_inval);
+
+ for (i = 0; i < map->max_osd; i++) {
+ int ret;
+
+ ret = set_primary_affinity(map, i, ceph_decode_32(p));
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+
+e_inval:
+ return -EINVAL;
+}
+
+static int decode_new_primary_affinity(void **p, void *end,
+ struct ceph_osdmap *map)
+{
+ u32 n;
+
+ ceph_decode_32_safe(p, end, n, e_inval);
+ while (n--) {
+ u32 osd, aff;
+ int ret;
+
+ ceph_decode_32_safe(p, end, osd, e_inval);
+ ceph_decode_32_safe(p, end, aff, e_inval);
+
+ ret = set_primary_affinity(map, osd, aff);
+ if (ret)
+ return ret;
+
+ pr_info("osd%d primary-affinity 0x%x\n", osd, aff);
+ }
+
+ return 0;
+
+e_inval:
+ return -EINVAL;
+}
+
+/*
+ * decode a full map.
+ */
+static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
+{
+ u8 struct_v;
+ u32 epoch = 0;
+ void *start = *p;
+ u32 max;
+ u32 len, i;
+ int err;
+
+ dout("%s %p to %p len %d\n", __func__, *p, end, (int)(end - *p));
+
+ err = get_osdmap_client_data_v(p, end, "full", &struct_v);
+ if (err)
+ goto bad;
+
+ /* fsid, epoch, created, modified */
+ ceph_decode_need(p, end, sizeof(map->fsid) + sizeof(u32) +
+ sizeof(map->created) + sizeof(map->modified), e_inval);
+ ceph_decode_copy(p, &map->fsid, sizeof(map->fsid));
+ epoch = map->epoch = ceph_decode_32(p);
+ ceph_decode_copy(p, &map->created, sizeof(map->created));
+ ceph_decode_copy(p, &map->modified, sizeof(map->modified));
+
+ /* pools */
+ err = decode_pools(p, end, map);
+ if (err)
+ goto bad;
+
+ /* pool_name */
+ err = decode_pool_names(p, end, map);
+ if (err)
+ goto bad;
+
+ ceph_decode_32_safe(p, end, map->pool_max, e_inval);
+
+ ceph_decode_32_safe(p, end, map->flags, e_inval);
+
+ /* max_osd */
+ ceph_decode_32_safe(p, end, max, e_inval);
+
+ /* (re)alloc osd arrays */
+ err = osdmap_set_max_osd(map, max);
+ if (err)
+ goto bad;
+
+ /* osd_state, osd_weight, osd_addrs->client_addr */
+ ceph_decode_need(p, end, 3*sizeof(u32) +
+ map->max_osd*(1 + sizeof(*map->osd_weight) +
+ sizeof(*map->osd_addr)), e_inval);
+
+ if (ceph_decode_32(p) != map->max_osd)
+ goto e_inval;
+
+ ceph_decode_copy(p, map->osd_state, map->max_osd);
+
+ if (ceph_decode_32(p) != map->max_osd)
+ goto e_inval;
+
+ for (i = 0; i < map->max_osd; i++)
+ map->osd_weight[i] = ceph_decode_32(p);
+
+ if (ceph_decode_32(p) != map->max_osd)
+ goto e_inval;
+
+ ceph_decode_copy(p, map->osd_addr, map->max_osd*sizeof(*map->osd_addr));
+ for (i = 0; i < map->max_osd; i++)
+ ceph_decode_addr(&map->osd_addr[i]);
+
+ /* pg_temp */
+ err = decode_pg_temp(p, end, map);
+ if (err)
+ goto bad;
+
+ /* primary_temp */
+ if (struct_v >= 1) {
+ err = decode_primary_temp(p, end, map);
+ if (err)
+ goto bad;
+ }
+
+ /* primary_affinity */
+ if (struct_v >= 2) {
+ err = decode_primary_affinity(p, end, map);
+ if (err)
+ goto bad;
+ } else {
+ /* XXX can this happen? */
+ kfree(map->osd_primary_affinity);
+ map->osd_primary_affinity = NULL;
+ }
+
+ /* crush */
+ ceph_decode_32_safe(p, end, len, e_inval);
+ map->crush = crush_decode(*p, min(*p + len, end));
+ if (IS_ERR(map->crush)) {
+ err = PTR_ERR(map->crush);
+ map->crush = NULL;
+ goto bad;
+ }
+ *p += len;
+
+ /* ignore the rest */
+ *p = end;
+
+ dout("full osdmap epoch %d max_osd %d\n", map->epoch, map->max_osd);
+ return 0;
+
+e_inval:
+ err = -EINVAL;
+bad:
+ pr_err("corrupt full osdmap (%d) epoch %d off %d (%p of %p-%p)\n",
+ err, epoch, (int)(*p - start), *p, start, end);
+ print_hex_dump(KERN_DEBUG, "osdmap: ",
+ DUMP_PREFIX_OFFSET, 16, 1,
+ start, end - start, true);
+ return err;
+}
+
+/*
+ * Allocate and decode a full map.
+ */
+struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
+{
+ struct ceph_osdmap *map;
+ int ret;
+
+ map = kzalloc(sizeof(*map), GFP_NOFS);
+ if (!map)
+ return ERR_PTR(-ENOMEM);
+
+ map->pg_temp = RB_ROOT;
+ map->primary_temp = RB_ROOT;
+ mutex_init(&map->crush_scratch_mutex);
+
+ ret = osdmap_decode(p, end, map);
+ if (ret) {
+ ceph_osdmap_destroy(map);
+ return ERR_PTR(ret);
+ }
+
+ return map;
+}
+
+/*
+ * decode and apply an incremental map update.
+ */
+struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
+ struct ceph_osdmap *map,
+ struct ceph_messenger *msgr)
+{
+ struct crush_map *newcrush = NULL;
+ struct ceph_fsid fsid;
+ u32 epoch = 0;
+ struct ceph_timespec modified;
+ s32 len;
+ u64 pool;
+ __s64 new_pool_max;
+ __s32 new_flags, max;
+ void *start = *p;
+ int err;
+ u8 struct_v;
+
+ dout("%s %p to %p len %d\n", __func__, *p, end, (int)(end - *p));
+
+ err = get_osdmap_client_data_v(p, end, "inc", &struct_v);
+ if (err)
+ goto bad;
+
+ /* fsid, epoch, modified, new_pool_max, new_flags */
+ ceph_decode_need(p, end, sizeof(fsid) + sizeof(u32) + sizeof(modified) +
+ sizeof(u64) + sizeof(u32), e_inval);
+ ceph_decode_copy(p, &fsid, sizeof(fsid));
+ epoch = ceph_decode_32(p);
+ BUG_ON(epoch != map->epoch+1);
+ ceph_decode_copy(p, &modified, sizeof(modified));
+ new_pool_max = ceph_decode_64(p);
+ new_flags = ceph_decode_32(p);
+
+ /* full map? */
+ ceph_decode_32_safe(p, end, len, e_inval);
+ if (len > 0) {
+ dout("apply_incremental full map len %d, %p to %p\n",
+ len, *p, end);
+ return ceph_osdmap_decode(p, min(*p+len, end));
+ }
+
+ /* new crush? */
+ ceph_decode_32_safe(p, end, len, e_inval);
+ if (len > 0) {
+ newcrush = crush_decode(*p, min(*p+len, end));
+ if (IS_ERR(newcrush)) {
+ err = PTR_ERR(newcrush);
+ newcrush = NULL;
+ goto bad;
+ }
+ *p += len;
+ }
+
+ /* new flags? */
+ if (new_flags >= 0)
+ map->flags = new_flags;
+ if (new_pool_max >= 0)
+ map->pool_max = new_pool_max;
+
+ /* new max? */
+ ceph_decode_32_safe(p, end, max, e_inval);
+ if (max >= 0) {
+ err = osdmap_set_max_osd(map, max);
+ if (err)
+ goto bad;
+ }
+
+ map->epoch++;
+ map->modified = modified;
+ if (newcrush) {
+ if (map->crush)
+ crush_destroy(map->crush);
+ map->crush = newcrush;
+ newcrush = NULL;
+ }
+
+ /* new_pools */
+ err = decode_new_pools(p, end, map);
+ if (err)
+ goto bad;
+
+ /* new_pool_names */
+ err = decode_pool_names(p, end, map);
+ if (err)
+ goto bad;
+
+ /* old_pool */
+ ceph_decode_32_safe(p, end, len, e_inval);
+ while (len--) {
+ struct ceph_pg_pool_info *pi;
+
+ ceph_decode_64_safe(p, end, pool, e_inval);
+ pi = __lookup_pg_pool(&map->pg_pools, pool);
+ if (pi)
+ __remove_pg_pool(&map->pg_pools, pi);
+ }
+
+ /* new_up */
+ ceph_decode_32_safe(p, end, len, e_inval);
+ while (len--) {
+ u32 osd;
+ struct ceph_entity_addr addr;
+ ceph_decode_32_safe(p, end, osd, e_inval);
+ ceph_decode_copy_safe(p, end, &addr, sizeof(addr), e_inval);
+ ceph_decode_addr(&addr);
+ pr_info("osd%d up\n", osd);
+ BUG_ON(osd >= map->max_osd);
+ map->osd_state[osd] |= CEPH_OSD_UP;
+ map->osd_addr[osd] = addr;
+ }
+
+ /* new_state */
+ ceph_decode_32_safe(p, end, len, e_inval);
+ while (len--) {
+ u32 osd;
+ u8 xorstate;
+ ceph_decode_32_safe(p, end, osd, e_inval);
+ xorstate = **(u8 **)p;
+ (*p)++; /* clean flag */
+ if (xorstate == 0)
+ xorstate = CEPH_OSD_UP;
+ if (xorstate & CEPH_OSD_UP)
+ pr_info("osd%d down\n", osd);
+ if (osd < map->max_osd)
+ map->osd_state[osd] ^= xorstate;
+ }
+
+ /* new_weight */
+ ceph_decode_32_safe(p, end, len, e_inval);
+ while (len--) {
+ u32 osd, off;
+ ceph_decode_need(p, end, sizeof(u32)*2, e_inval);
+ osd = ceph_decode_32(p);
+ off = ceph_decode_32(p);
+ pr_info("osd%d weight 0x%x %s\n", osd, off,
+ off == CEPH_OSD_IN ? "(in)" :
+ (off == CEPH_OSD_OUT ? "(out)" : ""));
+ if (osd < map->max_osd)
+ map->osd_weight[osd] = off;
+ }
+
+ /* new_pg_temp */
+ err = decode_new_pg_temp(p, end, map);
+ if (err)
+ goto bad;
+
+ /* new_primary_temp */
+ if (struct_v >= 1) {
+ err = decode_new_primary_temp(p, end, map);
+ if (err)
+ goto bad;
+ }
+
+ /* new_primary_affinity */
+ if (struct_v >= 2) {
+ err = decode_new_primary_affinity(p, end, map);
+ if (err)
+ goto bad;
+ }
+
+ /* ignore the rest */
+ *p = end;
+
+ dout("inc osdmap epoch %d max_osd %d\n", map->epoch, map->max_osd);
+ return map;
+
+e_inval:
+ err = -EINVAL;
+bad:
+ pr_err("corrupt inc osdmap (%d) epoch %d off %d (%p of %p-%p)\n",
+ err, epoch, (int)(*p - start), *p, start, end);
+ print_hex_dump(KERN_DEBUG, "osdmap: ",
+ DUMP_PREFIX_OFFSET, 16, 1,
+ start, end - start, true);
+ if (newcrush)
+ crush_destroy(newcrush);
+ return ERR_PTR(err);
+}
+
+
+
+
+/*
+ * calculate file layout from given offset, length.
+ * fill in correct oid, logical length, and object extent
+ * offset, length.
+ *
+ * for now, we write only a single su, until we can
+ * pass a stride back to the caller.
+ */
+int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
+ u64 off, u64 len,
+ u64 *ono,
+ u64 *oxoff, u64 *oxlen)
+{
+ u32 osize = le32_to_cpu(layout->fl_object_size);
+ u32 su = le32_to_cpu(layout->fl_stripe_unit);
+ u32 sc = le32_to_cpu(layout->fl_stripe_count);
+ u32 bl, stripeno, stripepos, objsetno;
+ u32 su_per_object;
+ u64 t, su_offset;
+
+ dout("mapping %llu~%llu osize %u fl_su %u\n", off, len,
+ osize, su);
+ if (su == 0 || sc == 0)
+ goto invalid;
+ su_per_object = osize / su;
+ if (su_per_object == 0)
+ goto invalid;
+ dout("osize %u / su %u = su_per_object %u\n", osize, su,
+ su_per_object);
+
+ if ((su & ~PAGE_MASK) != 0)
+ goto invalid;
+
+ /* bl = *off / su; */
+ t = off;
+ do_div(t, su);
+ bl = t;
+ dout("off %llu / su %u = bl %u\n", off, su, bl);
+
+ stripeno = bl / sc;
+ stripepos = bl % sc;
+ objsetno = stripeno / su_per_object;
+
+ *ono = objsetno * sc + stripepos;
+ dout("objset %u * sc %u = ono %u\n", objsetno, sc, (unsigned int)*ono);
+
+ /* *oxoff = *off % layout->fl_stripe_unit; # offset in su */
+ t = off;
+ su_offset = do_div(t, su);
+ *oxoff = su_offset + (stripeno % su_per_object) * su;
+
+ /*
+ * Calculate the length of the extent being written to the selected
+ * object. This is the minimum of the full length requested (len) or
+ * the remainder of the current stripe being written to.
+ */
+ *oxlen = min_t(u64, len, su - su_offset);
+
+ dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
+ return 0;
+
+invalid:
+ dout(" invalid layout\n");
+ *ono = 0;
+ *oxoff = 0;
+ *oxlen = 0;
+ return -EINVAL;
+}
+EXPORT_SYMBOL(ceph_calc_file_object_mapping);
+
+/*
+ * Calculate mapping of a (oloc, oid) pair to a PG. Should only be
+ * called with target's (oloc, oid), since tiering isn't taken into
+ * account.
+ */
+int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap,
+ struct ceph_object_locator *oloc,
+ struct ceph_object_id *oid,
+ struct ceph_pg *pg_out)
+{
+ struct ceph_pg_pool_info *pi;
+
+ pi = __lookup_pg_pool(&osdmap->pg_pools, oloc->pool);
+ if (!pi)
+ return -EIO;
+
+ pg_out->pool = oloc->pool;
+ pg_out->seed = ceph_str_hash(pi->object_hash, oid->name,
+ oid->name_len);
+
+ dout("%s '%.*s' pgid %llu.%x\n", __func__, oid->name_len, oid->name,
+ pg_out->pool, pg_out->seed);
+ return 0;
+}
+EXPORT_SYMBOL(ceph_oloc_oid_to_pg);
+
+static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
+ int *result, int result_max,
+ const __u32 *weight, int weight_max)
+{
+ int r;
+
+ BUG_ON(result_max > CEPH_PG_MAX_SIZE);
+
+ mutex_lock(&map->crush_scratch_mutex);
+ r = crush_do_rule(map->crush, ruleno, x, result, result_max,
+ weight, weight_max, map->crush_scratch_ary);
+ mutex_unlock(&map->crush_scratch_mutex);
+
+ return r;
+}
+
+/*
+ * Calculate raw (crush) set for given pgid.
+ *
+ * Return raw set length, or error.
+ */
+static int pg_to_raw_osds(struct ceph_osdmap *osdmap,
+ struct ceph_pg_pool_info *pool,
+ struct ceph_pg pgid, u32 pps, int *osds)
+{
+ int ruleno;
+ int len;
+
+ /* crush */
+ ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset,
+ pool->type, pool->size);
+ if (ruleno < 0) {
+ pr_err("no crush rule: pool %lld ruleset %d type %d size %d\n",
+ pgid.pool, pool->crush_ruleset, pool->type,
+ pool->size);
+ return -ENOENT;
+ }
+
+ len = do_crush(osdmap, ruleno, pps, osds,
+ min_t(int, pool->size, CEPH_PG_MAX_SIZE),
+ osdmap->osd_weight, osdmap->max_osd);
+ if (len < 0) {
+ pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n",
+ len, ruleno, pgid.pool, pool->crush_ruleset,
+ pool->type, pool->size);
+ return len;
+ }
+
+ return len;
+}
+
+/*
+ * Given raw set, calculate up set and up primary.
+ *
+ * Return up set length. *primary is set to up primary osd id, or -1
+ * if up set is empty.
+ */
+static int raw_to_up_osds(struct ceph_osdmap *osdmap,
+ struct ceph_pg_pool_info *pool,
+ int *osds, int len, int *primary)
+{
+ int up_primary = -1;
+ int i;
+
+ if (ceph_can_shift_osds(pool)) {
+ int removed = 0;
+
+ for (i = 0; i < len; i++) {
+ if (ceph_osd_is_down(osdmap, osds[i])) {
+ removed++;
+ continue;
+ }
+ if (removed)
+ osds[i - removed] = osds[i];
+ }
+
+ len -= removed;
+ if (len > 0)
+ up_primary = osds[0];
+ } else {
+ for (i = len - 1; i >= 0; i--) {
+ if (ceph_osd_is_down(osdmap, osds[i]))
+ osds[i] = CRUSH_ITEM_NONE;
+ else
+ up_primary = osds[i];
+ }
+ }
+
+ *primary = up_primary;
+ return len;
+}
+
+static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps,
+ struct ceph_pg_pool_info *pool,
+ int *osds, int len, int *primary)
+{
+ int i;
+ int pos = -1;
+
+ /*
+ * Do we have any non-default primary_affinity values for these
+ * osds?
+ */
+ if (!osdmap->osd_primary_affinity)
+ return;
+
+ for (i = 0; i < len; i++) {
+ int osd = osds[i];
+
+ if (osd != CRUSH_ITEM_NONE &&
+ osdmap->osd_primary_affinity[osd] !=
+ CEPH_OSD_DEFAULT_PRIMARY_AFFINITY) {
+ break;
+ }
+ }
+ if (i == len)
+ return;
+
+ /*
+ * Pick the primary. Feed both the seed (for the pg) and the
+ * osd into the hash/rng so that a proportional fraction of an
+ * osd's pgs get rejected as primary.
+ */
+ for (i = 0; i < len; i++) {
+ int osd = osds[i];
+ u32 aff;
+
+ if (osd == CRUSH_ITEM_NONE)
+ continue;
+
+ aff = osdmap->osd_primary_affinity[osd];
+ if (aff < CEPH_OSD_MAX_PRIMARY_AFFINITY &&
+ (crush_hash32_2(CRUSH_HASH_RJENKINS1,
+ pps, osd) >> 16) >= aff) {
+ /*
+ * We chose not to use this primary. Note it
+ * anyway as a fallback in case we don't pick
+ * anyone else, but keep looking.
+ */
+ if (pos < 0)
+ pos = i;
+ } else {
+ pos = i;
+ break;
+ }
+ }
+ if (pos < 0)
+ return;
+
+ *primary = osds[pos];
+
+ if (ceph_can_shift_osds(pool) && pos > 0) {
+ /* move the new primary to the front */
+ for (i = pos; i > 0; i--)
+ osds[i] = osds[i - 1];
+ osds[0] = *primary;
+ }
+}
+
+/*
+ * Given up set, apply pg_temp and primary_temp mappings.
+ *
+ * Return acting set length. *primary is set to acting primary osd id,
+ * or -1 if acting set is empty.
+ */
+static int apply_temps(struct ceph_osdmap *osdmap,
+ struct ceph_pg_pool_info *pool, struct ceph_pg pgid,
+ int *osds, int len, int *primary)
+{
+ struct ceph_pg_mapping *pg;
+ int temp_len;
+ int temp_primary;
+ int i;
+
+ /* raw_pg -> pg */
+ pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num,
+ pool->pg_num_mask);
+
+ /* pg_temp? */
+ pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
+ if (pg) {
+ temp_len = 0;
+ temp_primary = -1;
+
+ for (i = 0; i < pg->pg_temp.len; i++) {
+ if (ceph_osd_is_down(osdmap, pg->pg_temp.osds[i])) {
+ if (ceph_can_shift_osds(pool))
+ continue;
+ else
+ osds[temp_len++] = CRUSH_ITEM_NONE;
+ } else {
+ osds[temp_len++] = pg->pg_temp.osds[i];
+ }
+ }
+
+ /* apply pg_temp's primary */
+ for (i = 0; i < temp_len; i++) {
+ if (osds[i] != CRUSH_ITEM_NONE) {
+ temp_primary = osds[i];
+ break;
+ }
+ }
+ } else {
+ temp_len = len;
+ temp_primary = *primary;
+ }
+
+ /* primary_temp? */
+ pg = __lookup_pg_mapping(&osdmap->primary_temp, pgid);
+ if (pg)
+ temp_primary = pg->primary_temp.osd;
+
+ *primary = temp_primary;
+ return temp_len;
+}
+
+/*
+ * Calculate acting set for given pgid.
+ *
+ * Return acting set length, or error. *primary is set to acting
+ * primary osd id, or -1 if acting set is empty or on error.
+ */
+int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
+ int *osds, int *primary)
+{
+ struct ceph_pg_pool_info *pool;
+ u32 pps;
+ int len;
+
+ pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool);
+ if (!pool) {
+ *primary = -1;
+ return -ENOENT;
+ }
+
+ if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) {
+ /* hash pool id and seed so that pool PGs do not overlap */
+ pps = crush_hash32_2(CRUSH_HASH_RJENKINS1,
+ ceph_stable_mod(pgid.seed, pool->pgp_num,
+ pool->pgp_num_mask),
+ pgid.pool);
+ } else {
+ /*
+ * legacy behavior: add ps and pool together. this is
+ * not a great approach because the PGs from each pool
+ * will overlap on top of each other: 0.5 == 1.4 ==
+ * 2.3 == ...
+ */
+ pps = ceph_stable_mod(pgid.seed, pool->pgp_num,
+ pool->pgp_num_mask) +
+ (unsigned)pgid.pool;
+ }
+
+ len = pg_to_raw_osds(osdmap, pool, pgid, pps, osds);
+ if (len < 0) {
+ *primary = -1;
+ return len;
+ }
+
+ len = raw_to_up_osds(osdmap, pool, osds, len, primary);
+
+ apply_primary_affinity(osdmap, pps, pool, osds, len, primary);
+
+ len = apply_temps(osdmap, pool, pgid, osds, len, primary);
+
+ return len;
+}
+
+/*
+ * Return primary osd for given pgid, or -1 if none.
+ */
+int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
+{
+ int osds[CEPH_PG_MAX_SIZE];
+ int primary;
+
+ ceph_calc_pg_acting(osdmap, pgid, osds, &primary);
+
+ return primary;
+}
+EXPORT_SYMBOL(ceph_calc_pg_primary);
diff --git a/libceph/pagelist.c b/libceph/pagelist.c
new file mode 100644
index 0000000..92866be
--- /dev/null
+++ b/libceph/pagelist.c
@@ -0,0 +1,147 @@
+#include <linux/module.h>
+#include <linux/gfp.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+#include <linux/ceph/pagelist.h>
+
+static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
+{
+ if (pl->mapped_tail) {
+ struct page *page = list_entry(pl->head.prev, struct page, lru);
+ kunmap(page);
+ pl->mapped_tail = NULL;
+ }
+}
+
+int ceph_pagelist_release(struct ceph_pagelist *pl)
+{
+ ceph_pagelist_unmap_tail(pl);
+ while (!list_empty(&pl->head)) {
+ struct page *page = list_first_entry(&pl->head, struct page,
+ lru);
+ list_del(&page->lru);
+ __free_page(page);
+ }
+ ceph_pagelist_free_reserve(pl);
+ return 0;
+}
+EXPORT_SYMBOL(ceph_pagelist_release);
+
+static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
+{
+ struct page *page;
+
+ if (!pl->num_pages_free) {
+ page = __page_cache_alloc(GFP_NOFS);
+ } else {
+ page = list_first_entry(&pl->free_list, struct page, lru);
+ list_del(&page->lru);
+ --pl->num_pages_free;
+ }
+ if (!page)
+ return -ENOMEM;
+ pl->room += PAGE_SIZE;
+ ceph_pagelist_unmap_tail(pl);
+ list_add_tail(&page->lru, &pl->head);
+ pl->mapped_tail = kmap(page);
+ return 0;
+}
+
+int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len)
+{
+ while (pl->room < len) {
+ size_t bit = pl->room;
+ int ret;
+
+ memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK),
+ buf, bit);
+ pl->length += bit;
+ pl->room -= bit;
+ buf += bit;
+ len -= bit;
+ ret = ceph_pagelist_addpage(pl);
+ if (ret)
+ return ret;
+ }
+
+ memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len);
+ pl->length += len;
+ pl->room -= len;
+ return 0;
+}
+EXPORT_SYMBOL(ceph_pagelist_append);
+
+/* Allocate enough pages for a pagelist to append the given amount
+ * of data without without allocating.
+ * Returns: 0 on success, -ENOMEM on error.
+ */
+int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space)
+{
+ if (space <= pl->room)
+ return 0;
+ space -= pl->room;
+ space = (space + PAGE_SIZE - 1) >> PAGE_SHIFT; /* conv to num pages */
+
+ while (space > pl->num_pages_free) {
+ struct page *page = __page_cache_alloc(GFP_NOFS);
+ if (!page)
+ return -ENOMEM;
+ list_add_tail(&page->lru, &pl->free_list);
+ ++pl->num_pages_free;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(ceph_pagelist_reserve);
+
+/* Free any pages that have been preallocated. */
+int ceph_pagelist_free_reserve(struct ceph_pagelist *pl)
+{
+ while (!list_empty(&pl->free_list)) {
+ struct page *page = list_first_entry(&pl->free_list,
+ struct page, lru);
+ list_del(&page->lru);
+ __free_page(page);
+ --pl->num_pages_free;
+ }
+ BUG_ON(pl->num_pages_free);
+ return 0;
+}
+EXPORT_SYMBOL(ceph_pagelist_free_reserve);
+
+/* Create a truncation point. */
+void ceph_pagelist_set_cursor(struct ceph_pagelist *pl,
+ struct ceph_pagelist_cursor *c)
+{
+ c->pl = pl;
+ c->page_lru = pl->head.prev;
+ c->room = pl->room;
+}
+EXPORT_SYMBOL(ceph_pagelist_set_cursor);
+
+/* Truncate a pagelist to the given point. Move extra pages to reserve.
+ * This won't sleep.
+ * Returns: 0 on success,
+ * -EINVAL if the pagelist doesn't match the trunc point pagelist
+ */
+int ceph_pagelist_truncate(struct ceph_pagelist *pl,
+ struct ceph_pagelist_cursor *c)
+{
+ struct page *page;
+
+ if (pl != c->pl)
+ return -EINVAL;
+ ceph_pagelist_unmap_tail(pl);
+ while (pl->head.prev != c->page_lru) {
+ page = list_entry(pl->head.prev, struct page, lru);
+ /* move from pagelist to reserve */
+ list_move_tail(&page->lru, &pl->free_list);
+ ++pl->num_pages_free;
+ }
+ pl->room = c->room;
+ if (!list_empty(&pl->head)) {
+ page = list_entry(pl->head.prev, struct page, lru);
+ pl->mapped_tail = kmap(page);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(ceph_pagelist_truncate);
diff --git a/libceph/pagevec.c b/libceph/pagevec.c
new file mode 100644
index 0000000..815a224
--- /dev/null
+++ b/libceph/pagevec.c
@@ -0,0 +1,231 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/writeback.h>
+
+#include <linux/ceph/libceph.h>
+
+/*
+ * build a vector of user pages
+ */
+struct page **ceph_get_direct_page_vector(const void __user *data,
+ int num_pages, bool write_page)
+{
+ struct page **pages;
+ int got = 0;
+ int rc = 0;
+
+ pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
+ if (!pages)
+ return ERR_PTR(-ENOMEM);
+
+ down_read(¤t->mm->mmap_sem);
+ while (got < num_pages) {
+ rc = get_user_pages(current, current->mm,
+ (unsigned long)data + ((unsigned long)got * PAGE_SIZE),
+ num_pages - got, write_page, 0, pages + got, NULL);
+ if (rc < 0)
+ break;
+ BUG_ON(rc == 0);
+ got += rc;
+ }
+ up_read(¤t->mm->mmap_sem);
+ if (rc < 0)
+ goto fail;
+ return pages;
+
+fail:
+ ceph_put_page_vector(pages, got, false);
+ return ERR_PTR(rc);
+}
+EXPORT_SYMBOL(ceph_get_direct_page_vector);
+
+void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
+{
+ int i;
+
+ for (i = 0; i < num_pages; i++) {
+ if (dirty)
+ set_page_dirty_lock(pages[i]);
+ put_page(pages[i]);
+ }
+ kfree(pages);
+}
+EXPORT_SYMBOL(ceph_put_page_vector);
+
+void ceph_release_page_vector(struct page **pages, int num_pages)
+{
+ int i;
+
+ for (i = 0; i < num_pages; i++)
+ __free_pages(pages[i], 0);
+ kfree(pages);
+}
+EXPORT_SYMBOL(ceph_release_page_vector);
+
+/*
+ * allocate a vector new pages
+ */
+struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
+{
+ struct page **pages;
+ int i;
+
+ pages = kmalloc(sizeof(*pages) * num_pages, flags);
+ if (!pages)
+ return ERR_PTR(-ENOMEM);
+ for (i = 0; i < num_pages; i++) {
+ pages[i] = __page_cache_alloc(flags);
+ if (pages[i] == NULL) {
+ ceph_release_page_vector(pages, i);
+ return ERR_PTR(-ENOMEM);
+ }
+ }
+ return pages;
+}
+EXPORT_SYMBOL(ceph_alloc_page_vector);
+
+/*
+ * copy user data into a page vector
+ */
+int ceph_copy_user_to_page_vector(struct page **pages,
+ const void __user *data,
+ loff_t off, size_t len)
+{
+ int i = 0;
+ int po = off & ~PAGE_CACHE_MASK;
+ int left = len;
+ int l, bad;
+
+ while (left > 0) {
+ l = min_t(int, PAGE_CACHE_SIZE-po, left);
+ bad = copy_from_user(page_address(pages[i]) + po, data, l);
+ if (bad == l)
+ return -EFAULT;
+ data += l - bad;
+ left -= l - bad;
+ po += l - bad;
+ if (po == PAGE_CACHE_SIZE) {
+ po = 0;
+ i++;
+ }
+ }
+ return len;
+}
+EXPORT_SYMBOL(ceph_copy_user_to_page_vector);
+
+void ceph_copy_to_page_vector(struct page **pages,
+ const void *data,
+ loff_t off, size_t len)
+{
+ int i = 0;
+ size_t po = off & ~PAGE_CACHE_MASK;
+ size_t left = len;
+
+ while (left > 0) {
+ size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+
+ memcpy(page_address(pages[i]) + po, data, l);
+ data += l;
+ left -= l;
+ po += l;
+ if (po == PAGE_CACHE_SIZE) {
+ po = 0;
+ i++;
+ }
+ }
+}
+EXPORT_SYMBOL(ceph_copy_to_page_vector);
+
+void ceph_copy_from_page_vector(struct page **pages,
+ void *data,
+ loff_t off, size_t len)
+{
+ int i = 0;
+ size_t po = off & ~PAGE_CACHE_MASK;
+ size_t left = len;
+
+ while (left > 0) {
+ size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+
+ memcpy(data, page_address(pages[i]) + po, l);
+ data += l;
+ left -= l;
+ po += l;
+ if (po == PAGE_CACHE_SIZE) {
+ po = 0;
+ i++;
+ }
+ }
+}
+EXPORT_SYMBOL(ceph_copy_from_page_vector);
+
+/*
+ * copy user data from a page vector into a user pointer
+ */
+int ceph_copy_page_vector_to_user(struct page **pages,
+ void __user *data,
+ loff_t off, size_t len)
+{
+ int i = 0;
+ int po = off & ~PAGE_CACHE_MASK;
+ int left = len;
+ int l, bad;
+
+ while (left > 0) {
+ l = min_t(int, left, PAGE_CACHE_SIZE-po);
+ bad = copy_to_user(data, page_address(pages[i]) + po, l);
+ if (bad == l)
+ return -EFAULT;
+ data += l - bad;
+ left -= l - bad;
+ if (po) {
+ po += l - bad;
+ if (po == PAGE_CACHE_SIZE)
+ po = 0;
+ }
+ i++;
+ }
+ return len;
+}
+EXPORT_SYMBOL(ceph_copy_page_vector_to_user);
+
+/*
+ * Zero an extent within a page vector. Offset is relative to the
+ * start of the first page.
+ */
+void ceph_zero_page_vector_range(int off, int len, struct page **pages)
+{
+ int i = off >> PAGE_CACHE_SHIFT;
+
+ off &= ~PAGE_CACHE_MASK;
+
+ dout("zero_page_vector_page %u~%u\n", off, len);
+
+ /* leading partial page? */
+ if (off) {
+ int end = min((int)PAGE_CACHE_SIZE, off + len);
+ dout("zeroing %d %p head from %d\n", i, pages[i],
+ (int)off);
+ zero_user_segment(pages[i], off, end);
+ len -= (end - off);
+ i++;
+ }
+ while (len >= PAGE_CACHE_SIZE) {
+ dout("zeroing %d %p len=%d\n", i, pages[i], len);
+ zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
+ len -= PAGE_CACHE_SIZE;
+ i++;
+ }
+ /* trailing partial page? */
+ if (len) {
+ dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len);
+ zero_user_segment(pages[i], 0, len);
+ }
+}
+EXPORT_SYMBOL(ceph_zero_page_vector_range);
+
diff --git a/libceph/snapshot.c b/libceph/snapshot.c
new file mode 100644
index 0000000..154683f
--- /dev/null
+++ b/libceph/snapshot.c
@@ -0,0 +1,78 @@
+/*
+ * snapshot.c Ceph snapshot context utility routines (part of libceph)
+ *
+ * Copyright (C) 2013 Inktank Storage, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <stddef.h>
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/ceph/libceph.h>
+
+/*
+ * Ceph snapshot contexts are reference counted objects, and the
+ * returned structure holds a single reference. Acquire additional
+ * references with ceph_get_snap_context(), and release them with
+ * ceph_put_snap_context(). When the reference count reaches zero
+ * the entire structure is freed.
+ */
+
+/*
+ * Create a new ceph snapshot context large enough to hold the
+ * indicated number of snapshot ids (which can be 0). Caller has
+ * to fill in snapc->seq and snapc->snaps[0..snap_count-1].
+ *
+ * Returns a null pointer if an error occurs.
+ */
+struct ceph_snap_context *ceph_create_snap_context(u32 snap_count,
+ gfp_t gfp_flags)
+{
+ struct ceph_snap_context *snapc;
+ size_t size;
+
+ size = sizeof (struct ceph_snap_context);
+ size += snap_count * sizeof (snapc->snaps[0]);
+ snapc = kzalloc(size, gfp_flags);
+ if (!snapc)
+ return NULL;
+
+ atomic_set(&snapc->nref, 1);
+ snapc->num_snaps = snap_count;
+
+ return snapc;
+}
+EXPORT_SYMBOL(ceph_create_snap_context);
+
+struct ceph_snap_context *ceph_get_snap_context(struct ceph_snap_context *sc)
+{
+ if (sc)
+ atomic_inc(&sc->nref);
+ return sc;
+}
+EXPORT_SYMBOL(ceph_get_snap_context);
+
+void ceph_put_snap_context(struct ceph_snap_context *sc)
+{
+ if (!sc)
+ return;
+ if (atomic_dec_and_test(&sc->nref)) {
+ /*printk(" deleting snap_context %p\n", sc);*/
+ kfree(sc);
+ }
+}
+EXPORT_SYMBOL(ceph_put_snap_context);
diff --git a/linux/ceph/auth.h b/linux/ceph/auth.h
new file mode 100644
index 0000000..5f33868
--- /dev/null
+++ b/linux/ceph/auth.h
@@ -0,0 +1,116 @@
+#ifndef _FS_CEPH_AUTH_H
+#define _FS_CEPH_AUTH_H
+
+#include <linux/ceph/types.h>
+#include <linux/ceph/buffer.h>
+
+/*
+ * Abstract interface for communicating with the authenticate module.
+ * There is some handshake that takes place between us and the monitor
+ * to acquire the necessary keys. These are used to generate an
+ * 'authorizer' that we use when connecting to a service (mds, osd).
+ */
+
+struct ceph_auth_client;
+struct ceph_authorizer;
+
+struct ceph_auth_handshake {
+ struct ceph_authorizer *authorizer;
+ void *authorizer_buf;
+ size_t authorizer_buf_len;
+ void *authorizer_reply_buf;
+ size_t authorizer_reply_buf_len;
+};
+
+struct ceph_auth_client_ops {
+ const char *name;
+
+ /*
+ * true if we are authenticated and can connect to
+ * services.
+ */
+ int (*is_authenticated)(struct ceph_auth_client *ac);
+
+ /*
+ * true if we should (re)authenticate, e.g., when our tickets
+ * are getting old and crusty.
+ */
+ int (*should_authenticate)(struct ceph_auth_client *ac);
+
+ /*
+ * build requests and process replies during monitor
+ * handshake. if handle_reply returns -EAGAIN, we build
+ * another request.
+ */
+ int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end);
+ int (*handle_reply)(struct ceph_auth_client *ac, int result,
+ void *buf, void *end);
+
+ /*
+ * Create authorizer for connecting to a service, and verify
+ * the response to authenticate the service.
+ */
+ int (*create_authorizer)(struct ceph_auth_client *ac, int peer_type,
+ struct ceph_auth_handshake *auth);
+ /* ensure that an existing authorizer is up to date */
+ int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type,
+ struct ceph_auth_handshake *auth);
+ int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a, size_t len);
+ void (*destroy_authorizer)(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a);
+ void (*invalidate_authorizer)(struct ceph_auth_client *ac,
+ int peer_type);
+
+ /* reset when we (re)connect to a monitor */
+ void (*reset)(struct ceph_auth_client *ac);
+
+ void (*destroy)(struct ceph_auth_client *ac);
+};
+
+struct ceph_auth_client {
+ u32 protocol; /* CEPH_AUTH_* */
+ void *private; /* for use by protocol implementation */
+ const struct ceph_auth_client_ops *ops; /* null iff protocol==0 */
+
+ bool negotiating; /* true if negotiating protocol */
+ const char *name; /* entity name */
+ u64 global_id; /* our unique id in system */
+ const struct ceph_crypto_key *key; /* our secret key */
+ unsigned want_keys; /* which services we want */
+
+ struct mutex mutex;
+};
+
+extern struct ceph_auth_client *ceph_auth_init(const char *name,
+ const struct ceph_crypto_key *key);
+extern void ceph_auth_destroy(struct ceph_auth_client *ac);
+
+extern void ceph_auth_reset(struct ceph_auth_client *ac);
+
+extern int ceph_auth_build_hello(struct ceph_auth_client *ac,
+ void *buf, size_t len);
+extern int ceph_handle_auth_reply(struct ceph_auth_client *ac,
+ void *buf, size_t len,
+ void *reply_buf, size_t reply_len);
+extern int ceph_entity_name_encode(const char *name, void **p, void *end);
+
+extern int ceph_build_auth(struct ceph_auth_client *ac,
+ void *msg_buf, size_t msg_len);
+
+extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac);
+extern int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
+ int peer_type,
+ struct ceph_auth_handshake *auth);
+extern void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a);
+extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
+ int peer_type,
+ struct ceph_auth_handshake *a);
+extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a,
+ size_t len);
+extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
+ int peer_type);
+
+#endif
diff --git a/linux/ceph/buffer.h b/linux/ceph/buffer.h
new file mode 100644
index 0000000..07ad423
--- /dev/null
+++ b/linux/ceph/buffer.h
@@ -0,0 +1,38 @@
+#ifndef __FS_CEPH_BUFFER_H
+#define __FS_CEPH_BUFFER_H
+
+#include <linux/kref.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/types.h>
+#include <linux/uio.h>
+
+/*
+ * a simple reference counted buffer.
+ *
+ * use kmalloc for small sizes (<= one page), vmalloc for larger
+ * sizes.
+ */
+struct ceph_buffer {
+ struct kref kref;
+ struct kvec vec;
+ size_t alloc_len;
+};
+
+extern struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp);
+extern void ceph_buffer_release(struct kref *kref);
+
+static inline struct ceph_buffer *ceph_buffer_get(struct ceph_buffer *b)
+{
+ kref_get(&b->kref);
+ return b;
+}
+
+static inline void ceph_buffer_put(struct ceph_buffer *b)
+{
+ kref_put(&b->kref, ceph_buffer_release);
+}
+
+extern int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end);
+
+#endif
diff --git a/linux/ceph/ceph_debug.h b/linux/ceph/ceph_debug.h
new file mode 100644
index 0000000..aa2e191
--- /dev/null
+++ b/linux/ceph/ceph_debug.h
@@ -0,0 +1,38 @@
+#ifndef _FS_CEPH_DEBUG_H
+#define _FS_CEPH_DEBUG_H
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#ifdef CONFIG_CEPH_LIB_PRETTYDEBUG
+
+/*
+ * wrap pr_debug to include a filename:lineno prefix on each line.
+ * this incurs some overhead (kernel size and execution time) due to
+ * the extra function call at each call site.
+ */
+
+# if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
+extern const char *ceph_file_part(const char *s, int len);
+# define dout(fmt, ...) \
+ pr_debug("%.*s %12.12s:%-4d : " fmt, \
+ 8 - (int)sizeof(KBUILD_MODNAME), " ", \
+ ceph_file_part(__FILE__, sizeof(__FILE__)), \
+ __LINE__, ##__VA_ARGS__)
+# else
+/* faux printk call just to see any compiler warnings. */
+# define dout(fmt, ...) do { \
+ if (0) \
+ printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
+ } while (0)
+# endif
+
+#else
+
+/*
+ * or, just wrap pr_debug
+ */
+# define dout(fmt, ...) pr_debug(" " fmt, ##__VA_ARGS__)
+
+#endif
+
+#endif
diff --git a/linux/ceph/ceph_features.h b/linux/ceph/ceph_features.h
new file mode 100644
index 0000000..d12659c
--- /dev/null
+++ b/linux/ceph/ceph_features.h
@@ -0,0 +1,104 @@
+#ifndef __CEPH_FEATURES
+#define __CEPH_FEATURES
+
+/*
+ * feature bits
+ */
+#define CEPH_FEATURE_UID (1ULL<<0)
+#define CEPH_FEATURE_NOSRCADDR (1ULL<<1)
+#define CEPH_FEATURE_MONCLOCKCHECK (1ULL<<2)
+#define CEPH_FEATURE_FLOCK (1ULL<<3)
+#define CEPH_FEATURE_SUBSCRIBE2 (1ULL<<4)
+#define CEPH_FEATURE_MONNAMES (1ULL<<5)
+#define CEPH_FEATURE_RECONNECT_SEQ (1ULL<<6)
+#define CEPH_FEATURE_DIRLAYOUTHASH (1ULL<<7)
+#define CEPH_FEATURE_OBJECTLOCATOR (1ULL<<8)
+#define CEPH_FEATURE_PGID64 (1ULL<<9)
+#define CEPH_FEATURE_INCSUBOSDMAP (1ULL<<10)
+#define CEPH_FEATURE_PGPOOL3 (1ULL<<11)
+#define CEPH_FEATURE_OSDREPLYMUX (1ULL<<12)
+#define CEPH_FEATURE_OSDENC (1ULL<<13)
+#define CEPH_FEATURE_OMAP (1ULL<<14)
+#define CEPH_FEATURE_MONENC (1ULL<<15)
+#define CEPH_FEATURE_QUERY_T (1ULL<<16)
+#define CEPH_FEATURE_INDEP_PG_MAP (1ULL<<17)
+#define CEPH_FEATURE_CRUSH_TUNABLES (1ULL<<18)
+#define CEPH_FEATURE_CHUNKY_SCRUB (1ULL<<19)
+#define CEPH_FEATURE_MON_NULLROUTE (1ULL<<20)
+#define CEPH_FEATURE_MON_GV (1ULL<<21)
+#define CEPH_FEATURE_BACKFILL_RESERVATION (1ULL<<22)
+#define CEPH_FEATURE_MSG_AUTH (1ULL<<23)
+#define CEPH_FEATURE_RECOVERY_RESERVATION (1ULL<<24)
+#define CEPH_FEATURE_CRUSH_TUNABLES2 (1ULL<<25)
+#define CEPH_FEATURE_CREATEPOOLID (1ULL<<26)
+#define CEPH_FEATURE_REPLY_CREATE_INODE (1ULL<<27)
+#define CEPH_FEATURE_OSD_HBMSGS (1ULL<<28)
+#define CEPH_FEATURE_MDSENC (1ULL<<29)
+#define CEPH_FEATURE_OSDHASHPSPOOL (1ULL<<30)
+#define CEPH_FEATURE_MON_SINGLE_PAXOS (1ULL<<31)
+#define CEPH_FEATURE_OSD_SNAPMAPPER (1ULL<<32)
+#define CEPH_FEATURE_MON_SCRUB (1ULL<<33)
+#define CEPH_FEATURE_OSD_PACKED_RECOVERY (1ULL<<34)
+#define CEPH_FEATURE_OSD_CACHEPOOL (1ULL<<35)
+#define CEPH_FEATURE_CRUSH_V2 (1ULL<<36) /* new indep; SET_* steps */
+#define CEPH_FEATURE_EXPORT_PEER (1ULL<<37)
+#define CEPH_FEATURE_OSD_ERASURE_CODES (1ULL<<38)
+#define CEPH_FEATURE_OSD_TMAP2OMAP (1ULL<<38) /* overlap with EC */
+/* The process supports new-style OSDMap encoding. Monitors also use
+ this bit to determine if peers support NAK messages. */
+#define CEPH_FEATURE_OSDMAP_ENC (1ULL<<39)
+#define CEPH_FEATURE_MDS_INLINE_DATA (1ULL<<40)
+#define CEPH_FEATURE_CRUSH_TUNABLES3 (1ULL<<41)
+#define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41) /* overlap w/ tunables3 */
+
+/*
+ * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
+ * vector to evaluate to 64 bit ~0. To cope, we designate 1ULL << 63
+ * to mean 33 bit ~0, and introduce a helper below to do the
+ * translation.
+ *
+ * This was introduced by ceph.git commit
+ * 9ea02b84104045c2ffd7e7f4e7af512953855ecd v0.58-657-g9ea02b8
+ * and fixed by ceph.git commit
+ * 4255b5c2fb54ae40c53284b3ab700fdfc7e61748 v0.65-263-g4255b5c
+ */
+#define CEPH_FEATURE_RESERVED (1ULL<<63)
+
+static inline u64 ceph_sanitize_features(u64 features)
+{
+ if (features & CEPH_FEATURE_RESERVED) {
+ /* everything through OSD_SNAPMAPPER */
+ return 0x1ffffffffull;
+ } else {
+ return features;
+ }
+}
+
+/*
+ * Features supported.
+ */
+#define CEPH_FEATURES_SUPPORTED_DEFAULT \
+ (CEPH_FEATURE_NOSRCADDR | \
+ CEPH_FEATURE_RECONNECT_SEQ | \
+ CEPH_FEATURE_PGID64 | \
+ CEPH_FEATURE_PGPOOL3 | \
+ CEPH_FEATURE_OSDENC | \
+ CEPH_FEATURE_CRUSH_TUNABLES | \
+ CEPH_FEATURE_CRUSH_TUNABLES2 | \
+ CEPH_FEATURE_REPLY_CREATE_INODE | \
+ CEPH_FEATURE_OSDHASHPSPOOL | \
+ CEPH_FEATURE_OSD_CACHEPOOL | \
+ CEPH_FEATURE_CRUSH_V2 | \
+ CEPH_FEATURE_EXPORT_PEER | \
+ CEPH_FEATURE_OSDMAP_ENC | \
+ CEPH_FEATURE_CRUSH_TUNABLES3 | \
+ CEPH_FEATURE_OSD_PRIMARY_AFFINITY)
+
+#define CEPH_FEATURES_REQUIRED_DEFAULT \
+ (CEPH_FEATURE_NOSRCADDR | \
+ CEPH_FEATURE_RECONNECT_SEQ | \
+ CEPH_FEATURE_PGID64 | \
+ CEPH_FEATURE_PGPOOL3 | \
+ CEPH_FEATURE_OSDENC)
+
+#endif
diff --git a/linux/ceph/ceph_frag.h b/linux/ceph/ceph_frag.h
new file mode 100644
index 0000000..5babb8e
--- /dev/null
+++ b/linux/ceph/ceph_frag.h
@@ -0,0 +1,109 @@
+#ifndef FS_CEPH_FRAG_H
+#define FS_CEPH_FRAG_H
+
+/*
+ * "Frags" are a way to describe a subset of a 32-bit number space,
+ * using a mask and a value to match against that mask. Any given frag
+ * (subset of the number space) can be partitioned into 2^n sub-frags.
+ *
+ * Frags are encoded into a 32-bit word:
+ * 8 upper bits = "bits"
+ * 24 lower bits = "value"
+ * (We could go to 5+27 bits, but who cares.)
+ *
+ * We use the _most_ significant bits of the 24 bit value. This makes
+ * values logically sort.
+ *
+ * Unfortunately, because the "bits" field is still in the high bits, we
+ * can't sort encoded frags numerically. However, it does allow you
+ * to feed encoded frags as values into frag_contains_value.
+ */
+static inline __u32 ceph_frag_make(__u32 b, __u32 v)
+{
+ return (b << 24) |
+ (v & (0xffffffu << (24-b)) & 0xffffffu);
+}
+static inline __u32 ceph_frag_bits(__u32 f)
+{
+ return f >> 24;
+}
+static inline __u32 ceph_frag_value(__u32 f)
+{
+ return f & 0xffffffu;
+}
+static inline __u32 ceph_frag_mask(__u32 f)
+{
+ return (0xffffffu << (24-ceph_frag_bits(f))) & 0xffffffu;
+}
+static inline __u32 ceph_frag_mask_shift(__u32 f)
+{
+ return 24 - ceph_frag_bits(f);
+}
+
+static inline int ceph_frag_contains_value(__u32 f, __u32 v)
+{
+ return (v & ceph_frag_mask(f)) == ceph_frag_value(f);
+}
+static inline int ceph_frag_contains_frag(__u32 f, __u32 sub)
+{
+ /* is sub as specific as us, and contained by us? */
+ return ceph_frag_bits(sub) >= ceph_frag_bits(f) &&
+ (ceph_frag_value(sub) & ceph_frag_mask(f)) == ceph_frag_value(f);
+}
+
+static inline __u32 ceph_frag_parent(__u32 f)
+{
+ return ceph_frag_make(ceph_frag_bits(f) - 1,
+ ceph_frag_value(f) & (ceph_frag_mask(f) << 1));
+}
+static inline int ceph_frag_is_left_child(__u32 f)
+{
+ return ceph_frag_bits(f) > 0 &&
+ (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 0;
+}
+static inline int ceph_frag_is_right_child(__u32 f)
+{
+ return ceph_frag_bits(f) > 0 &&
+ (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 1;
+}
+static inline __u32 ceph_frag_sibling(__u32 f)
+{
+ return ceph_frag_make(ceph_frag_bits(f),
+ ceph_frag_value(f) ^ (0x1000000 >> ceph_frag_bits(f)));
+}
+static inline __u32 ceph_frag_left_child(__u32 f)
+{
+ return ceph_frag_make(ceph_frag_bits(f)+1, ceph_frag_value(f));
+}
+static inline __u32 ceph_frag_right_child(__u32 f)
+{
+ return ceph_frag_make(ceph_frag_bits(f)+1,
+ ceph_frag_value(f) | (0x1000000 >> (1+ceph_frag_bits(f))));
+}
+static inline __u32 ceph_frag_make_child(__u32 f, int by, int i)
+{
+ int newbits = ceph_frag_bits(f) + by;
+ return ceph_frag_make(newbits,
+ ceph_frag_value(f) | (i << (24 - newbits)));
+}
+static inline int ceph_frag_is_leftmost(__u32 f)
+{
+ return ceph_frag_value(f) == 0;
+}
+static inline int ceph_frag_is_rightmost(__u32 f)
+{
+ return ceph_frag_value(f) == ceph_frag_mask(f);
+}
+static inline __u32 ceph_frag_next(__u32 f)
+{
+ return ceph_frag_make(ceph_frag_bits(f),
+ ceph_frag_value(f) + (0x1000000 >> ceph_frag_bits(f)));
+}
+
+/*
+ * comparator to sort frags logically, as when traversing the
+ * number space in ascending order...
+ */
+int ceph_frag_compare(__u32 a, __u32 b);
+
+#endif
diff --git a/linux/ceph/ceph_fs.h b/linux/ceph/ceph_fs.h
new file mode 100644
index 0000000..5f6db18
--- /dev/null
+++ b/linux/ceph/ceph_fs.h
@@ -0,0 +1,789 @@
+/*
+ * ceph_fs.h - Ceph constants and data types to share between kernel and
+ * user space.
+ *
+ * Most types in this file are defined as little-endian, and are
+ * primarily intended to describe data structures that pass over the
+ * wire or that are stored on disk.
+ *
+ * LGPL2
+ */
+
+#ifndef CEPH_FS_H
+#define CEPH_FS_H
+
+#include <linux/ceph/msgr.h>
+#include <linux/ceph/rados.h>
+
+/*
+ * subprotocol versions. when specific messages types or high-level
+ * protocols change, bump the affected components. we keep rev
+ * internal cluster protocols separately from the public,
+ * client-facing protocol.
+ */
+#define CEPH_OSDC_PROTOCOL 24 /* server/client */
+#define CEPH_MDSC_PROTOCOL 32 /* server/client */
+#define CEPH_MONC_PROTOCOL 15 /* server/client */
+
+
+#define CEPH_INO_ROOT 1
+#define CEPH_INO_CEPH 2 /* hidden .ceph dir */
+#define CEPH_INO_DOTDOT 3 /* used by ceph fuse for parent (..) */
+
+/* arbitrary limit on max # of monitors (cluster of 3 is typical) */
+#define CEPH_MAX_MON 31
+
+/*
+ * ceph_file_layout - describe data layout for a file/inode
+ */
+struct ceph_file_layout {
+ /* file -> object mapping */
+ __le32 fl_stripe_unit; /* stripe unit, in bytes. must be multiple
+ of page size. */
+ __le32 fl_stripe_count; /* over this many objects */
+ __le32 fl_object_size; /* until objects are this big, then move to
+ new objects */
+ __le32 fl_cas_hash; /* UNUSED. 0 = none; 1 = sha256 */
+
+ /* pg -> disk layout */
+ __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */
+
+ /* object -> pg layout */
+ __le32 fl_unused; /* unused; used to be preferred primary for pg (-1 for none) */
+ __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */
+} __attribute__ ((packed));
+
+#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit))
+#define ceph_file_layout_stripe_count(l) \
+ ((__s32)le32_to_cpu((l).fl_stripe_count))
+#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size))
+#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
+#define ceph_file_layout_object_su(l) \
+ ((__s32)le32_to_cpu((l).fl_object_stripe_unit))
+#define ceph_file_layout_pg_pool(l) \
+ ((__s32)le32_to_cpu((l).fl_pg_pool))
+
+static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l)
+{
+ return le32_to_cpu(l->fl_stripe_unit) *
+ le32_to_cpu(l->fl_stripe_count);
+}
+
+/* "period" == bytes before i start on a new set of objects */
+static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l)
+{
+ return le32_to_cpu(l->fl_object_size) *
+ le32_to_cpu(l->fl_stripe_count);
+}
+
+#define CEPH_MIN_STRIPE_UNIT 65536
+
+int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
+
+struct ceph_dir_layout {
+ __u8 dl_dir_hash; /* see ceph_hash.h for ids */
+ __u8 dl_unused1;
+ __u16 dl_unused2;
+ __u32 dl_unused3;
+} __attribute__ ((packed));
+
+/* crypto algorithms */
+#define CEPH_CRYPTO_NONE 0x0
+#define CEPH_CRYPTO_AES 0x1
+
+#define CEPH_AES_IV "cephsageyudagreg"
+
+/* security/authentication protocols */
+#define CEPH_AUTH_UNKNOWN 0x0
+#define CEPH_AUTH_NONE 0x1
+#define CEPH_AUTH_CEPHX 0x2
+
+#define CEPH_AUTH_UID_DEFAULT ((__u64) -1)
+
+
+/*********************************************
+ * message layer
+ */
+
+/*
+ * message types
+ */
+
+/* misc */
+#define CEPH_MSG_SHUTDOWN 1
+#define CEPH_MSG_PING 2
+
+/* client <-> monitor */
+#define CEPH_MSG_MON_MAP 4
+#define CEPH_MSG_MON_GET_MAP 5
+#define CEPH_MSG_STATFS 13
+#define CEPH_MSG_STATFS_REPLY 14
+#define CEPH_MSG_MON_SUBSCRIBE 15
+#define CEPH_MSG_MON_SUBSCRIBE_ACK 16
+#define CEPH_MSG_AUTH 17
+#define CEPH_MSG_AUTH_REPLY 18
+#define CEPH_MSG_MON_GET_VERSION 19
+#define CEPH_MSG_MON_GET_VERSION_REPLY 20
+
+/* client <-> mds */
+#define CEPH_MSG_MDS_MAP 21
+
+#define CEPH_MSG_CLIENT_SESSION 22
+#define CEPH_MSG_CLIENT_RECONNECT 23
+
+#define CEPH_MSG_CLIENT_REQUEST 24
+#define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
+#define CEPH_MSG_CLIENT_REPLY 26
+#define CEPH_MSG_CLIENT_CAPS 0x310
+#define CEPH_MSG_CLIENT_LEASE 0x311
+#define CEPH_MSG_CLIENT_SNAP 0x312
+#define CEPH_MSG_CLIENT_CAPRELEASE 0x313
+
+/* pool ops */
+#define CEPH_MSG_POOLOP_REPLY 48
+#define CEPH_MSG_POOLOP 49
+
+
+/* osd */
+#define CEPH_MSG_OSD_MAP 41
+#define CEPH_MSG_OSD_OP 42
+#define CEPH_MSG_OSD_OPREPLY 43
+#define CEPH_MSG_WATCH_NOTIFY 44
+
+
+/* watch-notify operations */
+enum {
+ WATCH_NOTIFY = 1, /* notifying watcher */
+ WATCH_NOTIFY_COMPLETE = 2, /* notifier notified when done */
+};
+
+
+/* pool operations */
+enum {
+ POOL_OP_CREATE = 0x01,
+ POOL_OP_DELETE = 0x02,
+ POOL_OP_AUID_CHANGE = 0x03,
+ POOL_OP_CREATE_SNAP = 0x11,
+ POOL_OP_DELETE_SNAP = 0x12,
+ POOL_OP_CREATE_UNMANAGED_SNAP = 0x21,
+ POOL_OP_DELETE_UNMANAGED_SNAP = 0x22,
+};
+
+struct ceph_mon_request_header {
+ __le64 have_version;
+ __le16 session_mon;
+ __le64 session_mon_tid;
+} __attribute__ ((packed));
+
+struct ceph_mon_statfs {
+ struct ceph_mon_request_header monhdr;
+ struct ceph_fsid fsid;
+} __attribute__ ((packed));
+
+struct ceph_statfs {
+ __le64 kb, kb_used, kb_avail;
+ __le64 num_objects;
+} __attribute__ ((packed));
+
+struct ceph_mon_statfs_reply {
+ struct ceph_fsid fsid;
+ __le64 version;
+ struct ceph_statfs st;
+} __attribute__ ((packed));
+
+const char *ceph_pool_op_name(int op);
+
+struct ceph_mon_poolop {
+ struct ceph_mon_request_header monhdr;
+ struct ceph_fsid fsid;
+ __le32 pool;
+ __le32 op;
+ __le64 auid;
+ __le64 snapid;
+ __le32 name_len;
+} __attribute__ ((packed));
+
+struct ceph_mon_poolop_reply {
+ struct ceph_mon_request_header monhdr;
+ struct ceph_fsid fsid;
+ __le32 reply_code;
+ __le32 epoch;
+ char has_data;
+ char data[0];
+} __attribute__ ((packed));
+
+struct ceph_mon_unmanaged_snap {
+ __le64 snapid;
+} __attribute__ ((packed));
+
+struct ceph_osd_getmap {
+ struct ceph_mon_request_header monhdr;
+ struct ceph_fsid fsid;
+ __le32 start;
+} __attribute__ ((packed));
+
+struct ceph_mds_getmap {
+ struct ceph_mon_request_header monhdr;
+ struct ceph_fsid fsid;
+} __attribute__ ((packed));
+
+struct ceph_client_mount {
+ struct ceph_mon_request_header monhdr;
+} __attribute__ ((packed));
+
+#define CEPH_SUBSCRIBE_ONETIME 1 /* i want only 1 update after have */
+
+struct ceph_mon_subscribe_item {
+ __le64 have_version; __le64 have;
+ __u8 onetime;
+} __attribute__ ((packed));
+
+struct ceph_mon_subscribe_ack {
+ __le32 duration; /* seconds */
+ struct ceph_fsid fsid;
+} __attribute__ ((packed));
+
+/*
+ * mdsmap flags
+ */
+#define CEPH_MDSMAP_DOWN (1<<0) /* cluster deliberately down */
+
+/*
+ * mds states
+ * > 0 -> in
+ * <= 0 -> out
+ */
+#define CEPH_MDS_STATE_DNE 0 /* down, does not exist. */
+#define CEPH_MDS_STATE_STOPPED -1 /* down, once existed, but no subtrees.
+ empty log. */
+#define CEPH_MDS_STATE_BOOT -4 /* up, boot announcement. */
+#define CEPH_MDS_STATE_STANDBY -5 /* up, idle. waiting for assignment. */
+#define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */
+#define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */
+#define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */
+#define CEPH_MDS_STATE_REPLAYONCE -9 /* up, replaying an active node's journal */
+
+#define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */
+#define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed
+ operations (import, rename, etc.) */
+#define CEPH_MDS_STATE_RECONNECT 10 /* up, reconnect to clients */
+#define CEPH_MDS_STATE_REJOIN 11 /* up, rejoining distributed cache */
+#define CEPH_MDS_STATE_CLIENTREPLAY 12 /* up, replaying client operations */
+#define CEPH_MDS_STATE_ACTIVE 13 /* up, active */
+#define CEPH_MDS_STATE_STOPPING 14 /* up, but exporting metadata */
+
+extern const char *ceph_mds_state_name(int s);
+
+
+/*
+ * metadata lock types.
+ * - these are bitmasks.. we can compose them
+ * - they also define the lock ordering by the MDS
+ * - a few of these are internal to the mds
+ */
+#define CEPH_LOCK_DVERSION 1
+#define CEPH_LOCK_DN 2
+#define CEPH_LOCK_ISNAP 16
+#define CEPH_LOCK_IVERSION 32 /* mds internal */
+#define CEPH_LOCK_IFILE 64
+#define CEPH_LOCK_IAUTH 128
+#define CEPH_LOCK_ILINK 256
+#define CEPH_LOCK_IDFT 512 /* dir frag tree */
+#define CEPH_LOCK_INEST 1024 /* mds internal */
+#define CEPH_LOCK_IXATTR 2048
+#define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */
+#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */
+#define CEPH_LOCK_IPOLICY 16384 /* policy lock on dirs. MDS internal */
+
+/* client_session ops */
+enum {
+ CEPH_SESSION_REQUEST_OPEN,
+ CEPH_SESSION_OPEN,
+ CEPH_SESSION_REQUEST_CLOSE,
+ CEPH_SESSION_CLOSE,
+ CEPH_SESSION_REQUEST_RENEWCAPS,
+ CEPH_SESSION_RENEWCAPS,
+ CEPH_SESSION_STALE,
+ CEPH_SESSION_RECALL_STATE,
+ CEPH_SESSION_FLUSHMSG,
+ CEPH_SESSION_FLUSHMSG_ACK,
+};
+
+extern const char *ceph_session_op_name(int op);
+
+struct ceph_mds_session_head {
+ __le32 op;
+ __le64 seq;
+ struct ceph_timespec stamp;
+ __le32 max_caps, max_leases;
+} __attribute__ ((packed));
+
+/* client_request */
+/*
+ * metadata ops.
+ * & 0x001000 -> write op
+ * & 0x010000 -> follow symlink (e.g. stat(), not lstat()).
+ & & 0x100000 -> use weird ino/path trace
+ */
+#define CEPH_MDS_OP_WRITE 0x001000
+enum {
+ CEPH_MDS_OP_LOOKUP = 0x00100,
+ CEPH_MDS_OP_GETATTR = 0x00101,
+ CEPH_MDS_OP_LOOKUPHASH = 0x00102,
+ CEPH_MDS_OP_LOOKUPPARENT = 0x00103,
+ CEPH_MDS_OP_LOOKUPINO = 0x00104,
+ CEPH_MDS_OP_LOOKUPNAME = 0x00105,
+
+ CEPH_MDS_OP_SETXATTR = 0x01105,
+ CEPH_MDS_OP_RMXATTR = 0x01106,
+ CEPH_MDS_OP_SETLAYOUT = 0x01107,
+ CEPH_MDS_OP_SETATTR = 0x01108,
+ CEPH_MDS_OP_SETFILELOCK= 0x01109,
+ CEPH_MDS_OP_GETFILELOCK= 0x00110,
+ CEPH_MDS_OP_SETDIRLAYOUT=0x0110a,
+
+ CEPH_MDS_OP_MKNOD = 0x01201,
+ CEPH_MDS_OP_LINK = 0x01202,
+ CEPH_MDS_OP_UNLINK = 0x01203,
+ CEPH_MDS_OP_RENAME = 0x01204,
+ CEPH_MDS_OP_MKDIR = 0x01220,
+ CEPH_MDS_OP_RMDIR = 0x01221,
+ CEPH_MDS_OP_SYMLINK = 0x01222,
+
+ CEPH_MDS_OP_CREATE = 0x01301,
+ CEPH_MDS_OP_OPEN = 0x00302,
+ CEPH_MDS_OP_READDIR = 0x00305,
+
+ CEPH_MDS_OP_LOOKUPSNAP = 0x00400,
+ CEPH_MDS_OP_MKSNAP = 0x01400,
+ CEPH_MDS_OP_RMSNAP = 0x01401,
+ CEPH_MDS_OP_LSSNAP = 0x00402,
+};
+
+extern const char *ceph_mds_op_name(int op);
+
+
+#define CEPH_SETATTR_MODE 1
+#define CEPH_SETATTR_UID 2
+#define CEPH_SETATTR_GID 4
+#define CEPH_SETATTR_MTIME 8
+#define CEPH_SETATTR_ATIME 16
+#define CEPH_SETATTR_SIZE 32
+#define CEPH_SETATTR_CTIME 64
+
+/*
+ * Ceph setxattr request flags.
+ */
+#define CEPH_XATTR_CREATE (1 << 0)
+#define CEPH_XATTR_REPLACE (1 << 1)
+#define CEPH_XATTR_REMOVE (1 << 31)
+
+union ceph_mds_request_args {
+ struct {
+ __le32 mask; /* CEPH_CAP_* */
+ } __attribute__ ((packed)) getattr;
+ struct {
+ __le32 mode;
+ __le32 uid;
+ __le32 gid;
+ struct ceph_timespec mtime;
+ struct ceph_timespec atime;
+ __le64 size, old_size; /* old_size needed by truncate */
+ __le32 mask; /* CEPH_SETATTR_* */
+ } __attribute__ ((packed)) setattr;
+ struct {
+ __le32 frag; /* which dir fragment */
+ __le32 max_entries; /* how many dentries to grab */
+ __le32 max_bytes;
+ } __attribute__ ((packed)) readdir;
+ struct {
+ __le32 mode;
+ __le32 rdev;
+ } __attribute__ ((packed)) mknod;
+ struct {
+ __le32 mode;
+ } __attribute__ ((packed)) mkdir;
+ struct {
+ __le32 flags;
+ __le32 mode;
+ __le32 stripe_unit; /* layout for newly created file */
+ __le32 stripe_count; /* ... */
+ __le32 object_size;
+ __le32 file_replication;
+ __le32 unused; /* used to be preferred osd */
+ } __attribute__ ((packed)) open;
+ struct {
+ __le32 flags;
+ } __attribute__ ((packed)) setxattr;
+ struct {
+ struct ceph_file_layout layout;
+ } __attribute__ ((packed)) setlayout;
+ struct {
+ __u8 rule; /* currently fcntl or flock */
+ __u8 type; /* shared, exclusive, remove*/
+ __le64 owner; /* owner of the lock */
+ __le64 pid; /* process id requesting the lock */
+ __le64 start; /* initial location to lock */
+ __le64 length; /* num bytes to lock from start */
+ __u8 wait; /* will caller wait for lock to become available? */
+ } __attribute__ ((packed)) filelock_change;
+} __attribute__ ((packed));
+
+#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */
+#define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */
+
+struct ceph_mds_request_head {
+ __le64 oldest_client_tid;
+ __le32 mdsmap_epoch; /* on client */
+ __le32 flags; /* CEPH_MDS_FLAG_* */
+ __u8 num_retry, num_fwd; /* count retry, fwd attempts */
+ __le16 num_releases; /* # include cap/lease release records */
+ __le32 op; /* mds op code */
+ __le32 caller_uid, caller_gid;
+ __le64 ino; /* use this ino for openc, mkdir, mknod,
+ etc. (if replaying) */
+ union ceph_mds_request_args args;
+} __attribute__ ((packed));
+
+/* cap/lease release record */
+struct ceph_mds_request_release {
+ __le64 ino, cap_id; /* ino and unique cap id */
+ __le32 caps, wanted; /* new issued, wanted */
+ __le32 seq, issue_seq, mseq;
+ __le32 dname_seq; /* if releasing a dentry lease, a */
+ __le32 dname_len; /* string follows. */
+} __attribute__ ((packed));
+
+/* client reply */
+struct ceph_mds_reply_head {
+ __le32 op;
+ __le32 result;
+ __le32 mdsmap_epoch;
+ __u8 safe; /* true if committed to disk */
+ __u8 is_dentry, is_target; /* true if dentry, target inode records
+ are included with reply */
+} __attribute__ ((packed));
+
+/* one for each node split */
+struct ceph_frag_tree_split {
+ __le32 frag; /* this frag splits... */
+ __le32 by; /* ...by this many bits */
+} __attribute__ ((packed));
+
+struct ceph_frag_tree_head {
+ __le32 nsplits; /* num ceph_frag_tree_split records */
+ struct ceph_frag_tree_split splits[];
+} __attribute__ ((packed));
+
+/* capability issue, for bundling with mds reply */
+struct ceph_mds_reply_cap {
+ __le32 caps, wanted; /* caps issued, wanted */
+ __le64 cap_id;
+ __le32 seq, mseq;
+ __le64 realm; /* snap realm */
+ __u8 flags; /* CEPH_CAP_FLAG_* */
+} __attribute__ ((packed));
+
+#define CEPH_CAP_FLAG_AUTH (1 << 0) /* cap is issued by auth mds */
+#define CEPH_CAP_FLAG_RELEASE (1 << 1) /* release the cap */
+
+/* inode record, for bundling with mds reply */
+struct ceph_mds_reply_inode {
+ __le64 ino;
+ __le64 snapid;
+ __le32 rdev;
+ __le64 version; /* inode version */
+ __le64 xattr_version; /* version for xattr blob */
+ struct ceph_mds_reply_cap cap; /* caps issued for this inode */
+ struct ceph_file_layout layout;
+ struct ceph_timespec ctime, mtime, atime;
+ __le32 time_warp_seq;
+ __le64 size, max_size, truncate_size;
+ __le32 truncate_seq;
+ __le32 mode, uid, gid;
+ __le32 nlink;
+ __le64 files, subdirs, rbytes, rfiles, rsubdirs; /* dir stats */
+ struct ceph_timespec rctime;
+ struct ceph_frag_tree_head fragtree; /* (must be at end of struct) */
+} __attribute__ ((packed));
+/* followed by frag array, symlink string, dir layout, xattr blob */
+
+/* reply_lease follows dname, and reply_inode */
+struct ceph_mds_reply_lease {
+ __le16 mask; /* lease type(s) */
+ __le32 duration_ms; /* lease duration */
+ __le32 seq;
+} __attribute__ ((packed));
+
+struct ceph_mds_reply_dirfrag {
+ __le32 frag; /* fragment */
+ __le32 auth; /* auth mds, if this is a delegation point */
+ __le32 ndist; /* number of mds' this is replicated on */
+ __le32 dist[];
+} __attribute__ ((packed));
+
+#define CEPH_LOCK_FCNTL 1
+#define CEPH_LOCK_FLOCK 2
+
+#define CEPH_LOCK_SHARED 1
+#define CEPH_LOCK_EXCL 2
+#define CEPH_LOCK_UNLOCK 4
+
+struct ceph_filelock {
+ __le64 start;/* file offset to start lock at */
+ __le64 length; /* num bytes to lock; 0 for all following start */
+ __le64 client; /* which client holds the lock */
+ __le64 owner; /* owner the lock */
+ __le64 pid; /* process id holding the lock on the client */
+ __u8 type; /* shared lock, exclusive lock, or unlock */
+} __attribute__ ((packed));
+
+
+/* file access modes */
+#define CEPH_FILE_MODE_PIN 0
+#define CEPH_FILE_MODE_RD 1
+#define CEPH_FILE_MODE_WR 2
+#define CEPH_FILE_MODE_RDWR 3 /* RD | WR */
+#define CEPH_FILE_MODE_LAZY 4 /* lazy io */
+#define CEPH_FILE_MODE_NUM 8 /* bc these are bit fields.. mostly */
+
+int ceph_flags_to_mode(int flags);
+
+
+/* capability bits */
+#define CEPH_CAP_PIN 1 /* no specific capabilities beyond the pin */
+
+/* generic cap bits */
+#define CEPH_CAP_GSHARED 1 /* client can reads */
+#define CEPH_CAP_GEXCL 2 /* client can read and update */
+#define CEPH_CAP_GCACHE 4 /* (file) client can cache reads */
+#define CEPH_CAP_GRD 8 /* (file) client can read */
+#define CEPH_CAP_GWR 16 /* (file) client can write */
+#define CEPH_CAP_GBUFFER 32 /* (file) client can buffer writes */
+#define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */
+#define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */
+
+#define CEPH_CAP_SIMPLE_BITS 2
+#define CEPH_CAP_FILE_BITS 8
+
+/* per-lock shift */
+#define CEPH_CAP_SAUTH 2
+#define CEPH_CAP_SLINK 4
+#define CEPH_CAP_SXATTR 6
+#define CEPH_CAP_SFILE 8
+#define CEPH_CAP_SFLOCK 20
+
+#define CEPH_CAP_BITS 22
+
+/* composed values */
+#define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH)
+#define CEPH_CAP_AUTH_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SAUTH)
+#define CEPH_CAP_LINK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SLINK)
+#define CEPH_CAP_LINK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SLINK)
+#define CEPH_CAP_XATTR_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SXATTR)
+#define CEPH_CAP_XATTR_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SXATTR)
+#define CEPH_CAP_FILE(x) (x << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_CACHE (CEPH_CAP_GCACHE << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_RD (CEPH_CAP_GRD << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_WR (CEPH_CAP_GWR << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_BUFFER (CEPH_CAP_GBUFFER << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE)
+#define CEPH_CAP_FILE_LAZYIO (CEPH_CAP_GLAZYIO << CEPH_CAP_SFILE)
+#define CEPH_CAP_FLOCK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFLOCK)
+#define CEPH_CAP_FLOCK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFLOCK)
+
+
+/* cap masks (for getattr) */
+#define CEPH_STAT_CAP_INODE CEPH_CAP_PIN
+#define CEPH_STAT_CAP_TYPE CEPH_CAP_PIN /* mode >> 12 */
+#define CEPH_STAT_CAP_SYMLINK CEPH_CAP_PIN
+#define CEPH_STAT_CAP_UID CEPH_CAP_AUTH_SHARED
+#define CEPH_STAT_CAP_GID CEPH_CAP_AUTH_SHARED
+#define CEPH_STAT_CAP_MODE CEPH_CAP_AUTH_SHARED
+#define CEPH_STAT_CAP_NLINK CEPH_CAP_LINK_SHARED
+#define CEPH_STAT_CAP_LAYOUT CEPH_CAP_FILE_SHARED
+#define CEPH_STAT_CAP_MTIME CEPH_CAP_FILE_SHARED
+#define CEPH_STAT_CAP_SIZE CEPH_CAP_FILE_SHARED
+#define CEPH_STAT_CAP_ATIME CEPH_CAP_FILE_SHARED /* fixme */
+#define CEPH_STAT_CAP_XATTR CEPH_CAP_XATTR_SHARED
+#define CEPH_STAT_CAP_INODE_ALL (CEPH_CAP_PIN | \
+ CEPH_CAP_AUTH_SHARED | \
+ CEPH_CAP_LINK_SHARED | \
+ CEPH_CAP_FILE_SHARED | \
+ CEPH_CAP_XATTR_SHARED)
+
+#define CEPH_CAP_ANY_SHARED (CEPH_CAP_AUTH_SHARED | \
+ CEPH_CAP_LINK_SHARED | \
+ CEPH_CAP_XATTR_SHARED | \
+ CEPH_CAP_FILE_SHARED)
+#define CEPH_CAP_ANY_RD (CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_RD | \
+ CEPH_CAP_FILE_CACHE)
+
+#define CEPH_CAP_ANY_EXCL (CEPH_CAP_AUTH_EXCL | \
+ CEPH_CAP_LINK_EXCL | \
+ CEPH_CAP_XATTR_EXCL | \
+ CEPH_CAP_FILE_EXCL)
+#define CEPH_CAP_ANY_FILE_WR (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | \
+ CEPH_CAP_FILE_EXCL)
+#define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR)
+#define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \
+ CEPH_CAP_ANY_FILE_WR | CEPH_CAP_FILE_LAZYIO | \
+ CEPH_CAP_PIN)
+
+#define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \
+ CEPH_LOCK_IXATTR)
+
+int ceph_caps_for_mode(int mode);
+
+enum {
+ CEPH_CAP_OP_GRANT, /* mds->client grant */
+ CEPH_CAP_OP_REVOKE, /* mds->client revoke */
+ CEPH_CAP_OP_TRUNC, /* mds->client trunc notify */
+ CEPH_CAP_OP_EXPORT, /* mds has exported the cap */
+ CEPH_CAP_OP_IMPORT, /* mds has imported the cap */
+ CEPH_CAP_OP_UPDATE, /* client->mds update */
+ CEPH_CAP_OP_DROP, /* client->mds drop cap bits */
+ CEPH_CAP_OP_FLUSH, /* client->mds cap writeback */
+ CEPH_CAP_OP_FLUSH_ACK, /* mds->client flushed */
+ CEPH_CAP_OP_FLUSHSNAP, /* client->mds flush snapped metadata */
+ CEPH_CAP_OP_FLUSHSNAP_ACK, /* mds->client flushed snapped metadata */
+ CEPH_CAP_OP_RELEASE, /* client->mds release (clean) cap */
+ CEPH_CAP_OP_RENEW, /* client->mds renewal request */
+};
+
+extern const char *ceph_cap_op_name(int op);
+
+/*
+ * caps message, used for capability callbacks, acks, requests, etc.
+ */
+struct ceph_mds_caps {
+ __le32 op; /* CEPH_CAP_OP_* */
+ __le64 ino, realm;
+ __le64 cap_id;
+ __le32 seq, issue_seq;
+ __le32 caps, wanted, dirty; /* latest issued/wanted/dirty */
+ __le32 migrate_seq;
+ __le64 snap_follows;
+ __le32 snap_trace_len;
+
+ /* authlock */
+ __le32 uid, gid, mode;
+
+ /* linklock */
+ __le32 nlink;
+
+ /* xattrlock */
+ __le32 xattr_len;
+ __le64 xattr_version;
+
+ /* filelock */
+ __le64 size, max_size, truncate_size;
+ __le32 truncate_seq;
+ struct ceph_timespec mtime, atime, ctime;
+ struct ceph_file_layout layout;
+ __le32 time_warp_seq;
+} __attribute__ ((packed));
+
+struct ceph_mds_cap_peer {
+ __le64 cap_id;
+ __le32 seq;
+ __le32 mseq;
+ __le32 mds;
+ __u8 flags;
+} __attribute__ ((packed));
+
+/* cap release msg head */
+struct ceph_mds_cap_release {
+ __le32 num; /* number of cap_items that follow */
+} __attribute__ ((packed));
+
+struct ceph_mds_cap_item {
+ __le64 ino;
+ __le64 cap_id;
+ __le32 migrate_seq, seq;
+} __attribute__ ((packed));
+
+#define CEPH_MDS_LEASE_REVOKE 1 /* mds -> client */
+#define CEPH_MDS_LEASE_RELEASE 2 /* client -> mds */
+#define CEPH_MDS_LEASE_RENEW 3 /* client <-> mds */
+#define CEPH_MDS_LEASE_REVOKE_ACK 4 /* client -> mds */
+
+extern const char *ceph_lease_op_name(int o);
+
+/* lease msg header */
+struct ceph_mds_lease {
+ __u8 action; /* CEPH_MDS_LEASE_* */
+ __le16 mask; /* which lease */
+ __le64 ino;
+ __le64 first, last; /* snap range */
+ __le32 seq;
+ __le32 duration_ms; /* duration of renewal */
+} __attribute__ ((packed));
+/* followed by a __le32+string for dname */
+
+/* client reconnect */
+struct ceph_mds_cap_reconnect {
+ __le64 cap_id;
+ __le32 wanted;
+ __le32 issued;
+ __le64 snaprealm;
+ __le64 pathbase; /* base ino for our path to this ino */
+ __le32 flock_len; /* size of flock state blob, if any */
+} __attribute__ ((packed));
+/* followed by flock blob */
+
+struct ceph_mds_cap_reconnect_v1 {
+ __le64 cap_id;
+ __le32 wanted;
+ __le32 issued;
+ __le64 size;
+ struct ceph_timespec mtime, atime;
+ __le64 snaprealm;
+ __le64 pathbase; /* base ino for our path to this ino */
+} __attribute__ ((packed));
+
+struct ceph_mds_snaprealm_reconnect {
+ __le64 ino; /* snap realm base */
+ __le64 seq; /* snap seq for this snap realm */
+ __le64 parent; /* parent realm */
+} __attribute__ ((packed));
+
+/*
+ * snaps
+ */
+enum {
+ CEPH_SNAP_OP_UPDATE, /* CREATE or DESTROY */
+ CEPH_SNAP_OP_CREATE,
+ CEPH_SNAP_OP_DESTROY,
+ CEPH_SNAP_OP_SPLIT,
+};
+
+extern const char *ceph_snap_op_name(int o);
+
+/* snap msg header */
+struct ceph_mds_snap_head {
+ __le32 op; /* CEPH_SNAP_OP_* */
+ __le64 split; /* ino to split off, if any */
+ __le32 num_split_inos; /* # inos belonging to new child realm */
+ __le32 num_split_realms; /* # child realms udner new child realm */
+ __le32 trace_len; /* size of snap trace blob */
+} __attribute__ ((packed));
+/* followed by split ino list, then split realms, then the trace blob */
+
+/*
+ * encode info about a snaprealm, as viewed by a client
+ */
+struct ceph_mds_snap_realm {
+ __le64 ino; /* ino */
+ __le64 created; /* snap: when created */
+ __le64 parent; /* ino: parent realm */
+ __le64 parent_since; /* snap: same parent since */
+ __le64 seq; /* snap: version */
+ __le32 num_snaps;
+ __le32 num_prior_parent_snaps;
+} __attribute__ ((packed));
+/* followed by my snap list, then prior parent snap list */
+
+#endif
diff --git a/linux/ceph/ceph_hash.h b/linux/ceph/ceph_hash.h
new file mode 100644
index 0000000..d099c3f
--- /dev/null
+++ b/linux/ceph/ceph_hash.h
@@ -0,0 +1,13 @@
+#ifndef FS_CEPH_HASH_H
+#define FS_CEPH_HASH_H
+
+#define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */
+#define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */
+
+extern unsigned ceph_str_hash_linux(const char *s, unsigned len);
+extern unsigned ceph_str_hash_rjenkins(const char *s, unsigned len);
+
+extern unsigned ceph_str_hash(int type, const char *s, unsigned len);
+extern const char *ceph_str_hash_name(int type);
+
+#endif
diff --git a/linux/ceph/debugfs.h b/linux/ceph/debugfs.h
new file mode 100644
index 0000000..1df086d
--- /dev/null
+++ b/linux/ceph/debugfs.h
@@ -0,0 +1,33 @@
+#ifndef _FS_CEPH_DEBUGFS_H
+#define _FS_CEPH_DEBUGFS_H
+
+#include <linux/ceph/ceph_debug.h>
+#include <linux/ceph/types.h>
+
+#define CEPH_DEFINE_SHOW_FUNC(name) \
+static int name##_open(struct inode *inode, struct file *file) \
+{ \
+ struct seq_file *sf; \
+ int ret; \
+ \
+ ret = single_open(file, name, NULL); \
+ sf = file->private_data; \
+ sf->private = inode->i_private; \
+ return ret; \
+} \
+ \
+static const struct file_operations name##_fops = { \
+ .open = name##_open, \
+ .read = seq_read, \
+ .llseek = seq_lseek, \
+ .release = single_release, \
+};
+
+/* debugfs.c */
+extern int ceph_debugfs_init(void);
+extern void ceph_debugfs_cleanup(void);
+extern int ceph_debugfs_client_init(struct ceph_client *client);
+extern void ceph_debugfs_client_cleanup(struct ceph_client *client);
+
+#endif
+
diff --git a/linux/ceph/decode.h b/linux/ceph/decode.h
new file mode 100644
index 0000000..a6ef9cc
--- /dev/null
+++ b/linux/ceph/decode.h
@@ -0,0 +1,259 @@
+#ifndef __CEPH_DECODE_H
+#define __CEPH_DECODE_H
+
+#include <linux/err.h>
+#include <linux/bug.h>
+#include <linux/time.h>
+#include <asm/unaligned.h>
+
+#include <linux/ceph/types.h>
+
+/*
+ * in all cases,
+ * void **p pointer to position pointer
+ * void *end pointer to end of buffer (last byte + 1)
+ */
+
+static inline u64 ceph_decode_64(void **p)
+{
+ u64 v = get_unaligned_le64(*p);
+ *p += sizeof(u64);
+ return v;
+}
+static inline u32 ceph_decode_32(void **p)
+{
+ u32 v = get_unaligned_le32(*p);
+ *p += sizeof(u32);
+ return v;
+}
+static inline u16 ceph_decode_16(void **p)
+{
+ u16 v = get_unaligned_le16(*p);
+ *p += sizeof(u16);
+ return v;
+}
+static inline u8 ceph_decode_8(void **p)
+{
+ u8 v = *(u8 *)*p;
+ (*p)++;
+ return v;
+}
+static inline void ceph_decode_copy(void **p, void *pv, size_t n)
+{
+ memcpy(pv, *p, n);
+ *p += n;
+}
+
+/*
+ * bounds check input.
+ */
+static inline int ceph_has_room(void **p, void *end, size_t n)
+{
+ return end >= *p && n <= end - *p;
+}
+
+#define ceph_decode_need(p, end, n, bad) \
+ do { \
+ if (!likely(ceph_has_room(p, end, n))) \
+ goto bad; \
+ } while (0)
+
+#define ceph_decode_64_safe(p, end, v, bad) \
+ do { \
+ ceph_decode_need(p, end, sizeof(u64), bad); \
+ v = ceph_decode_64(p); \
+ } while (0)
+#define ceph_decode_32_safe(p, end, v, bad) \
+ do { \
+ ceph_decode_need(p, end, sizeof(u32), bad); \
+ v = ceph_decode_32(p); \
+ } while (0)
+#define ceph_decode_16_safe(p, end, v, bad) \
+ do { \
+ ceph_decode_need(p, end, sizeof(u16), bad); \
+ v = ceph_decode_16(p); \
+ } while (0)
+#define ceph_decode_8_safe(p, end, v, bad) \
+ do { \
+ ceph_decode_need(p, end, sizeof(u8), bad); \
+ v = ceph_decode_8(p); \
+ } while (0)
+
+#define ceph_decode_copy_safe(p, end, pv, n, bad) \
+ do { \
+ ceph_decode_need(p, end, n, bad); \
+ ceph_decode_copy(p, pv, n); \
+ } while (0)
+
+/*
+ * Allocate a buffer big enough to hold the wire-encoded string, and
+ * decode the string into it. The resulting string will always be
+ * terminated with '\0'. If successful, *p will be advanced
+ * past the decoded data. Also, if lenp is not a null pointer, the
+ * length (not including the terminating '\0') will be recorded in
+ * *lenp. Note that a zero-length string is a valid return value.
+ *
+ * Returns a pointer to the newly-allocated string buffer, or a
+ * pointer-coded errno if an error occurs. Neither *p nor *lenp
+ * will have been updated if an error is returned.
+ *
+ * There are two possible failures:
+ * - converting the string would require accessing memory at or
+ * beyond the "end" pointer provided (-ERANGE)
+ * - memory could not be allocated for the result (-ENOMEM)
+ */
+static inline char *ceph_extract_encoded_string(void **p, void *end,
+ size_t *lenp, gfp_t gfp)
+{
+ u32 len;
+ void *sp = *p;
+ char *buf;
+
+ ceph_decode_32_safe(&sp, end, len, bad);
+ if (!ceph_has_room(&sp, end, len))
+ goto bad;
+
+ buf = kmalloc(len + 1, gfp);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ if (len)
+ memcpy(buf, sp, len);
+ buf[len] = '\0';
+
+ *p = (char *) *p + sizeof (u32) + len;
+ if (lenp)
+ *lenp = (size_t) len;
+
+ return buf;
+
+bad:
+ return ERR_PTR(-ERANGE);
+}
+
+/*
+ * struct ceph_timespec <-> struct timespec
+ */
+static inline void ceph_decode_timespec(struct timespec *ts,
+ const struct ceph_timespec *tv)
+{
+ ts->tv_sec = (__kernel_time_t)le32_to_cpu(tv->tv_sec);
+ ts->tv_nsec = (long)le32_to_cpu(tv->tv_nsec);
+}
+static inline void ceph_encode_timespec(struct ceph_timespec *tv,
+ const struct timespec *ts)
+{
+ tv->tv_sec = cpu_to_le32((u32)ts->tv_sec);
+ tv->tv_nsec = cpu_to_le32((u32)ts->tv_nsec);
+}
+
+/*
+ * sockaddr_storage <-> ceph_sockaddr
+ */
+static inline void ceph_encode_addr(struct ceph_entity_addr *a)
+{
+ __be16 ss_family = htons(a->in_addr.ss_family);
+ a->in_addr.ss_family = *(__u16 *)&ss_family;
+}
+static inline void ceph_decode_addr(struct ceph_entity_addr *a)
+{
+ __be16 ss_family = *(__be16 *)&a->in_addr.ss_family;
+ a->in_addr.ss_family = ntohs(ss_family);
+ WARN_ON(a->in_addr.ss_family == 512);
+}
+
+/*
+ * encoders
+ */
+static inline void ceph_encode_64(void **p, u64 v)
+{
+ put_unaligned_le64(v, (__le64 *)*p);
+ *p += sizeof(u64);
+}
+static inline void ceph_encode_32(void **p, u32 v)
+{
+ put_unaligned_le32(v, (__le32 *)*p);
+ *p += sizeof(u32);
+}
+static inline void ceph_encode_16(void **p, u16 v)
+{
+ put_unaligned_le16(v, (__le16 *)*p);
+ *p += sizeof(u16);
+}
+static inline void ceph_encode_8(void **p, u8 v)
+{
+ *(u8 *)*p = v;
+ (*p)++;
+}
+static inline void ceph_encode_copy(void **p, const void *s, int len)
+{
+ memcpy(*p, s, len);
+ *p += len;
+}
+
+/*
+ * filepath, string encoders
+ */
+static inline void ceph_encode_filepath(void **p, void *end,
+ u64 ino, const char *path)
+{
+ u32 len = path ? strlen(path) : 0;
+ BUG_ON(*p + 1 + sizeof(ino) + sizeof(len) + len > end);
+ ceph_encode_8(p, 1);
+ ceph_encode_64(p, ino);
+ ceph_encode_32(p, len);
+ if (len)
+ memcpy(*p, path, len);
+ *p += len;
+}
+
+static inline void ceph_encode_string(void **p, void *end,
+ const char *s, u32 len)
+{
+ BUG_ON(*p + sizeof(len) + len > end);
+ ceph_encode_32(p, len);
+ if (len)
+ memcpy(*p, s, len);
+ *p += len;
+}
+
+#define ceph_encode_need(p, end, n, bad) \
+ do { \
+ if (!likely(ceph_has_room(p, end, n))) \
+ goto bad; \
+ } while (0)
+
+#define ceph_encode_64_safe(p, end, v, bad) \
+ do { \
+ ceph_encode_need(p, end, sizeof(u64), bad); \
+ ceph_encode_64(p, v); \
+ } while (0)
+#define ceph_encode_32_safe(p, end, v, bad) \
+ do { \
+ ceph_encode_need(p, end, sizeof(u32), bad); \
+ ceph_encode_32(p, v); \
+ } while (0)
+#define ceph_encode_16_safe(p, end, v, bad) \
+ do { \
+ ceph_encode_need(p, end, sizeof(u16), bad); \
+ ceph_encode_16(p, v); \
+ } while (0)
+#define ceph_encode_8_safe(p, end, v, bad) \
+ do { \
+ ceph_encode_need(p, end, sizeof(u8), bad); \
+ ceph_encode_8(p, v); \
+ } while (0)
+
+#define ceph_encode_copy_safe(p, end, pv, n, bad) \
+ do { \
+ ceph_encode_need(p, end, n, bad); \
+ ceph_encode_copy(p, pv, n); \
+ } while (0)
+#define ceph_encode_string_safe(p, end, s, n, bad) \
+ do { \
+ ceph_encode_need(p, end, n, bad); \
+ ceph_encode_string(p, end, s, n); \
+ } while (0)
+
+
+#endif
diff --git a/linux/ceph/libceph.h b/linux/ceph/libceph.h
new file mode 100644
index 0000000..2f49aa4
--- /dev/null
+++ b/linux/ceph/libceph.h
@@ -0,0 +1,230 @@
+#ifndef _FS_CEPH_LIBCEPH_H
+#define _FS_CEPH_LIBCEPH_H
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <asm/unaligned.h>
+#include <linux/backing-dev.h>
+#include <linux/completion.h>
+#include <linux/exportfs.h>
+#include <linux/bug.h>
+#include <linux/fs.h>
+#include <linux/mempool.h>
+#include <linux/pagemap.h>
+#include <linux/wait.h>
+#include <linux/writeback.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/types.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/msgpool.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/osd_client.h>
+#include <linux/ceph/ceph_fs.h>
+
+/*
+ * mount options
+ */
+#define CEPH_OPT_FSID (1<<0)
+#define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */
+#define CEPH_OPT_MYIP (1<<2) /* specified my ip */
+#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */
+
+#define CEPH_OPT_DEFAULT (0)
+
+#define ceph_set_opt(client, opt) \
+ (client)->options->flags |= CEPH_OPT_##opt;
+#define ceph_test_opt(client, opt) \
+ (!!((client)->options->flags & CEPH_OPT_##opt))
+
+struct ceph_options {
+ int flags;
+ struct ceph_fsid fsid;
+ struct ceph_entity_addr my_addr;
+ int mount_timeout;
+ int osd_idle_ttl;
+ int osd_keepalive_timeout;
+
+ /*
+ * any type that can't be simply compared or doesn't need need
+ * to be compared should go beyond this point,
+ * ceph_compare_options() should be updated accordingly
+ */
+
+ struct ceph_entity_addr *mon_addr; /* should be the first
+ pointer type of args */
+ int num_mon;
+ char *name;
+ struct ceph_crypto_key *key;
+};
+
+/*
+ * defaults
+ */
+#define CEPH_MOUNT_TIMEOUT_DEFAULT 60
+#define CEPH_OSD_KEEPALIVE_DEFAULT 5
+#define CEPH_OSD_IDLE_TTL_DEFAULT 60
+
+#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
+#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
+#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024)
+
+#define CEPH_AUTH_NAME_DEFAULT "guest"
+
+/*
+ * Delay telling the MDS we no longer want caps, in case we reopen
+ * the file. Delay a minimum amount of time, even if we send a cap
+ * message for some other reason. Otherwise, take the oppotunity to
+ * update the mds to avoid sending another message later.
+ */
+#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */
+#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */
+
+#define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4)
+
+/* mount state */
+enum {
+ CEPH_MOUNT_MOUNTING,
+ CEPH_MOUNT_MOUNTED,
+ CEPH_MOUNT_UNMOUNTING,
+ CEPH_MOUNT_UNMOUNTED,
+ CEPH_MOUNT_SHUTDOWN,
+};
+
+/*
+ * subtract jiffies
+ */
+static inline unsigned long time_sub(unsigned long a, unsigned long b)
+{
+ BUG_ON(time_after(b, a));
+ return (long)a - (long)b;
+}
+
+struct ceph_mds_client;
+
+/*
+ * per client state
+ *
+ * possibly shared by multiple mount points, if they are
+ * mounting the same ceph filesystem/cluster.
+ */
+struct ceph_client {
+ struct ceph_fsid fsid;
+ bool have_fsid;
+
+ void *private;
+
+ struct ceph_options *options;
+
+ struct mutex mount_mutex; /* serialize mount attempts */
+ wait_queue_head_t auth_wq;
+ int auth_err;
+
+ int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *);
+
+ u64 supported_features;
+ u64 required_features;
+
+ struct ceph_messenger msgr; /* messenger instance */
+ struct ceph_mon_client monc;
+ struct ceph_osd_client osdc;
+
+#ifdef CONFIG_DEBUG_FS
+ struct dentry *debugfs_dir;
+ struct dentry *debugfs_monmap;
+ struct dentry *debugfs_osdmap;
+#endif
+};
+
+
+
+/*
+ * snapshots
+ */
+
+/*
+ * A "snap context" is the set of existing snapshots when we
+ * write data. It is used by the OSD to guide its COW behavior.
+ *
+ * The ceph_snap_context is refcounted, and attached to each dirty
+ * page, indicating which context the dirty data belonged when it was
+ * dirtied.
+ */
+struct ceph_snap_context {
+ atomic_t nref;
+ u64 seq;
+ u32 num_snaps;
+ u64 snaps[];
+};
+
+extern struct ceph_snap_context *ceph_create_snap_context(u32 snap_count,
+ gfp_t gfp_flags);
+extern struct ceph_snap_context *ceph_get_snap_context(
+ struct ceph_snap_context *sc);
+extern void ceph_put_snap_context(struct ceph_snap_context *sc);
+
+/*
+ * calculate the number of pages a given length and offset map onto,
+ * if we align the data.
+ */
+static inline int calc_pages_for(u64 off, u64 len)
+{
+ return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) -
+ (off >> PAGE_CACHE_SHIFT);
+}
+
+extern struct kmem_cache *ceph_inode_cachep;
+extern struct kmem_cache *ceph_cap_cachep;
+extern struct kmem_cache *ceph_dentry_cachep;
+extern struct kmem_cache *ceph_file_cachep;
+
+/* ceph_common.c */
+extern bool libceph_compatible(void *data);
+
+extern const char *ceph_msg_type_name(int type);
+extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
+extern void *ceph_kvmalloc(size_t size, gfp_t flags);
+extern void ceph_kvfree(const void *ptr);
+
+extern struct ceph_options *ceph_parse_options(char *options,
+ const char *dev_name, const char *dev_name_end,
+ int (*parse_extra_token)(char *c, void *private),
+ void *private);
+extern void ceph_destroy_options(struct ceph_options *opt);
+extern int ceph_compare_options(struct ceph_options *new_opt,
+ struct ceph_client *client);
+extern struct ceph_client *ceph_create_client(struct ceph_options *opt,
+ void *private,
+ u64 supported_features,
+ u64 required_features);
+extern u64 ceph_client_id(struct ceph_client *client);
+extern void ceph_destroy_client(struct ceph_client *client);
+extern int __ceph_open_session(struct ceph_client *client,
+ unsigned long started);
+extern int ceph_open_session(struct ceph_client *client);
+
+/* pagevec.c */
+extern void ceph_release_page_vector(struct page **pages, int num_pages);
+
+extern struct page **ceph_get_direct_page_vector(const void __user *data,
+ int num_pages,
+ bool write_page);
+extern void ceph_put_page_vector(struct page **pages, int num_pages,
+ bool dirty);
+extern void ceph_release_page_vector(struct page **pages, int num_pages);
+extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
+extern int ceph_copy_user_to_page_vector(struct page **pages,
+ const void __user *data,
+ loff_t off, size_t len);
+extern void ceph_copy_to_page_vector(struct page **pages,
+ const void *data,
+ loff_t off, size_t len);
+extern void ceph_copy_from_page_vector(struct page **pages,
+ void *data,
+ loff_t off, size_t len);
+extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data,
+ loff_t off, size_t len);
+extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
+
+
+#endif /* _FS_CEPH_SUPER_H */
diff --git a/linux/ceph/mdsmap.h b/linux/ceph/mdsmap.h
new file mode 100644
index 0000000..87ed09f
--- /dev/null
+++ b/linux/ceph/mdsmap.h
@@ -0,0 +1,63 @@
+#ifndef _FS_CEPH_MDSMAP_H
+#define _FS_CEPH_MDSMAP_H
+
+#include <linux/bug.h>
+#include <linux/ceph/types.h>
+
+/*
+ * mds map - describe servers in the mds cluster.
+ *
+ * we limit fields to those the client actually xcares about
+ */
+struct ceph_mds_info {
+ u64 global_id;
+ struct ceph_entity_addr addr;
+ s32 state;
+ int num_export_targets;
+ bool laggy;
+ u32 *export_targets;
+};
+
+struct ceph_mdsmap {
+ u32 m_epoch, m_client_epoch, m_last_failure;
+ u32 m_root;
+ u32 m_session_timeout; /* seconds */
+ u32 m_session_autoclose; /* seconds */
+ u64 m_max_file_size;
+ u32 m_max_mds; /* size of m_addr, m_state arrays */
+ struct ceph_mds_info *m_info;
+
+ /* which object pools file data can be stored in */
+ int m_num_data_pg_pools;
+ u64 *m_data_pg_pools;
+ u64 m_cas_pg_pool;
+};
+
+static inline struct ceph_entity_addr *
+ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
+{
+ if (w >= m->m_max_mds)
+ return NULL;
+ return &m->m_info[w].addr;
+}
+
+static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w)
+{
+ BUG_ON(w < 0);
+ if (w >= m->m_max_mds)
+ return CEPH_MDS_STATE_DNE;
+ return m->m_info[w].state;
+}
+
+static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
+{
+ if (w >= 0 && w < m->m_max_mds)
+ return m->m_info[w].laggy;
+ return false;
+}
+
+extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
+extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
+extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
+
+#endif
diff --git a/linux/ceph/messenger.h b/linux/ceph/messenger.h
new file mode 100644
index 0000000..d21f2db
--- /dev/null
+++ b/linux/ceph/messenger.h
@@ -0,0 +1,304 @@
+#ifndef __FS_CEPH_MESSENGER_H
+#define __FS_CEPH_MESSENGER_H
+
+#include <linux/blk_types.h>
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/net.h>
+#include <linux/radix-tree.h>
+#include <linux/uio.h>
+#include <linux/workqueue.h>
+
+#include <linux/ceph/types.h>
+#include <linux/ceph/buffer.h>
+
+struct ceph_msg;
+struct ceph_connection;
+
+/*
+ * Ceph defines these callbacks for handling connection events.
+ */
+struct ceph_connection_operations {
+ struct ceph_connection *(*get)(struct ceph_connection *);
+ void (*put)(struct ceph_connection *);
+
+ /* handle an incoming message. */
+ void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m);
+
+ /* authorize an outgoing connection */
+ struct ceph_auth_handshake *(*get_authorizer) (
+ struct ceph_connection *con,
+ int *proto, int force_new);
+ int (*verify_authorizer_reply) (struct ceph_connection *con, int len);
+ int (*invalidate_authorizer)(struct ceph_connection *con);
+
+ /* there was some error on the socket (disconnect, whatever) */
+ void (*fault) (struct ceph_connection *con);
+
+ /* a remote host as terminated a message exchange session, and messages
+ * we sent (or they tried to send us) may be lost. */
+ void (*peer_reset) (struct ceph_connection *con);
+
+ struct ceph_msg * (*alloc_msg) (struct ceph_connection *con,
+ struct ceph_msg_header *hdr,
+ int *skip);
+};
+
+/* use format string %s%d */
+#define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num)
+
+struct ceph_messenger {
+ struct ceph_entity_inst inst; /* my name+address */
+ struct ceph_entity_addr my_enc_addr;
+
+ atomic_t stopping;
+ bool nocrc;
+
+ /*
+ * the global_seq counts connections i (attempt to) initiate
+ * in order to disambiguate certain connect race conditions.
+ */
+ u32 global_seq;
+ spinlock_t global_seq_lock;
+
+ u64 supported_features;
+ u64 required_features;
+};
+
+enum ceph_msg_data_type {
+ CEPH_MSG_DATA_NONE, /* message contains no data payload */
+ CEPH_MSG_DATA_PAGES, /* data source/destination is a page array */
+ CEPH_MSG_DATA_PAGELIST, /* data source/destination is a pagelist */
+#ifdef CONFIG_BLOCK
+ CEPH_MSG_DATA_BIO, /* data source/destination is a bio list */
+#endif /* CONFIG_BLOCK */
+};
+
+static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type)
+{
+ switch (type) {
+ case CEPH_MSG_DATA_NONE:
+ case CEPH_MSG_DATA_PAGES:
+ case CEPH_MSG_DATA_PAGELIST:
+#ifdef CONFIG_BLOCK
+ case CEPH_MSG_DATA_BIO:
+#endif /* CONFIG_BLOCK */
+ return true;
+ default:
+ return false;
+ }
+}
+
+struct ceph_msg_data {
+ struct list_head links; /* ceph_msg->data */
+ enum ceph_msg_data_type type;
+ union {
+#ifdef CONFIG_BLOCK
+ struct {
+ struct bio *bio;
+ size_t bio_length;
+ };
+#endif /* CONFIG_BLOCK */
+ struct {
+ struct page **pages; /* NOT OWNER. */
+ size_t length; /* total # bytes */
+ unsigned int alignment; /* first page */
+ };
+ struct ceph_pagelist *pagelist;
+ };
+};
+
+struct ceph_msg_data_cursor {
+ size_t total_resid; /* across all data items */
+ struct list_head *data_head; /* = &ceph_msg->data */
+
+ struct ceph_msg_data *data; /* current data item */
+ size_t resid; /* bytes not yet consumed */
+ bool last_piece; /* current is last piece */
+ bool need_crc; /* crc update needed */
+ union {
+#ifdef CONFIG_BLOCK
+ struct { /* bio */
+ struct bio *bio; /* bio from list */
+ struct bvec_iter bvec_iter;
+ };
+#endif /* CONFIG_BLOCK */
+ struct { /* pages */
+ unsigned int page_offset; /* offset in page */
+ unsigned short page_index; /* index in array */
+ unsigned short page_count; /* pages in array */
+ };
+ struct { /* pagelist */
+ struct page *page; /* page from list */
+ size_t offset; /* bytes from list */
+ };
+ };
+};
+
+/*
+ * a single message. it contains a header (src, dest, message type, etc.),
+ * footer (crc values, mainly), a "front" message body, and possibly a
+ * data payload (stored in some number of pages).
+ */
+struct ceph_msg {
+ struct ceph_msg_header hdr; /* header */
+ struct ceph_msg_footer footer; /* footer */
+ struct kvec front; /* unaligned blobs of message */
+ struct ceph_buffer *middle;
+
+ size_t data_length;
+ struct list_head data;
+ struct ceph_msg_data_cursor cursor;
+
+ struct ceph_connection *con;
+ struct list_head list_head; /* links for connection lists */
+
+ struct kref kref;
+ bool more_to_follow;
+ bool needs_out_seq;
+ int front_alloc_len;
+ unsigned long ack_stamp; /* tx: when we were acked */
+
+ struct ceph_msgpool *pool;
+};
+
+/* ceph connection fault delay defaults, for exponential backoff */
+#define BASE_DELAY_INTERVAL (HZ/2)
+#define MAX_DELAY_INTERVAL (5 * 60 * HZ)
+
+/*
+ * A single connection with another host.
+ *
+ * We maintain a queue of outgoing messages, and some session state to
+ * ensure that we can preserve the lossless, ordered delivery of
+ * messages in the case of a TCP disconnect.
+ */
+struct ceph_connection {
+ void *private;
+
+ const struct ceph_connection_operations *ops;
+
+ struct ceph_messenger *msgr;
+
+ atomic_t sock_state;
+ struct socket *sock;
+ struct ceph_entity_addr peer_addr; /* peer address */
+ struct ceph_entity_addr peer_addr_for_me;
+
+ unsigned long flags;
+ unsigned long state;
+ const char *error_msg; /* error message, if any */
+
+ struct ceph_entity_name peer_name; /* peer name */
+
+ u64 peer_features;
+ u32 connect_seq; /* identify the most recent connection
+ attempt for this connection, client */
+ u32 peer_global_seq; /* peer's global seq for this connection */
+
+ int auth_retry; /* true if we need a newer authorizer */
+ void *auth_reply_buf; /* where to put the authorizer reply */
+ int auth_reply_buf_len;
+
+ struct mutex mutex;
+
+ /* out queue */
+ struct list_head out_queue;
+ struct list_head out_sent; /* sending or sent but unacked */
+ u64 out_seq; /* last message queued for send */
+
+ u64 in_seq, in_seq_acked; /* last message received, acked */
+
+ /* connection negotiation temps */
+ char in_banner[CEPH_BANNER_MAX_LEN];
+ struct ceph_msg_connect out_connect;
+ struct ceph_msg_connect_reply in_reply;
+ struct ceph_entity_addr actual_peer_addr;
+
+ /* message out temps */
+ struct ceph_msg *out_msg; /* sending message (== tail of
+ out_sent) */
+ bool out_msg_done;
+
+ struct kvec out_kvec[8], /* sending header/footer data */
+ *out_kvec_cur;
+ int out_kvec_left; /* kvec's left in out_kvec */
+ int out_skip; /* skip this many bytes */
+ int out_kvec_bytes; /* total bytes left */
+ bool out_kvec_is_msg; /* kvec refers to out_msg */
+ int out_more; /* there is more data after the kvecs */
+ __le64 out_temp_ack; /* for writing an ack */
+
+ /* message in temps */
+ struct ceph_msg_header in_hdr;
+ struct ceph_msg *in_msg;
+ u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */
+
+ char in_tag; /* protocol control byte */
+ int in_base_pos; /* bytes read */
+ __le64 in_temp_ack; /* for reading an ack */
+
+ struct delayed_work work; /* send|recv work */
+ unsigned long delay; /* current delay interval */
+};
+
+
+extern const char *ceph_pr_addr(const struct sockaddr_storage *ss);
+extern int ceph_parse_ips(const char *c, const char *end,
+ struct ceph_entity_addr *addr,
+ int max_count, int *count);
+
+
+extern int ceph_msgr_init(void);
+extern void ceph_msgr_exit(void);
+extern void ceph_msgr_flush(void);
+
+extern void ceph_messenger_init(struct ceph_messenger *msgr,
+ struct ceph_entity_addr *myaddr,
+ u64 supported_features,
+ u64 required_features,
+ bool nocrc);
+
+extern void ceph_con_init(struct ceph_connection *con, void *private,
+ const struct ceph_connection_operations *ops,
+ struct ceph_messenger *msgr);
+extern void ceph_con_open(struct ceph_connection *con,
+ __u8 entity_type, __u64 entity_num,
+ struct ceph_entity_addr *addr);
+extern bool ceph_con_opened(struct ceph_connection *con);
+extern void ceph_con_close(struct ceph_connection *con);
+extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
+
+extern void ceph_msg_revoke(struct ceph_msg *msg);
+extern void ceph_msg_revoke_incoming(struct ceph_msg *msg);
+
+extern void ceph_con_keepalive(struct ceph_connection *con);
+
+extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
+ size_t length, size_t alignment);
+extern void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
+ struct ceph_pagelist *pagelist);
+#ifdef CONFIG_BLOCK
+extern void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
+ size_t length);
+#endif /* CONFIG_BLOCK */
+
+extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
+ bool can_fail);
+extern void ceph_msg_kfree(struct ceph_msg *m);
+
+
+static inline struct ceph_msg *ceph_msg_get(struct ceph_msg *msg)
+{
+ kref_get(&msg->kref);
+ return msg;
+}
+extern void ceph_msg_last_put(struct kref *kref);
+static inline void ceph_msg_put(struct ceph_msg *msg)
+{
+ kref_put(&msg->kref, ceph_msg_last_put);
+}
+
+extern void ceph_msg_dump(struct ceph_msg *msg);
+
+#endif
diff --git a/linux/ceph/mon_client.h b/linux/ceph/mon_client.h
new file mode 100644
index 0000000..a486f39
--- /dev/null
+++ b/linux/ceph/mon_client.h
@@ -0,0 +1,121 @@
+#ifndef _FS_CEPH_MON_CLIENT_H
+#define _FS_CEPH_MON_CLIENT_H
+
+#include <linux/completion.h>
+#include <linux/kref.h>
+#include <linux/rbtree.h>
+
+#include <linux/ceph/messenger.h>
+
+struct ceph_client;
+struct ceph_mount_args;
+struct ceph_auth_client;
+
+/*
+ * The monitor map enumerates the set of all monitors.
+ */
+struct ceph_monmap {
+ struct ceph_fsid fsid;
+ u32 epoch;
+ u32 num_mon;
+ struct ceph_entity_inst mon_inst[0];
+};
+
+struct ceph_mon_client;
+struct ceph_mon_generic_request;
+
+
+/*
+ * Generic mechanism for resending monitor requests.
+ */
+typedef void (*ceph_monc_request_func_t)(struct ceph_mon_client *monc,
+ int newmon);
+
+/* a pending monitor request */
+struct ceph_mon_request {
+ struct ceph_mon_client *monc;
+ struct delayed_work delayed_work;
+ unsigned long delay;
+ ceph_monc_request_func_t do_request;
+};
+
+/*
+ * ceph_mon_generic_request is being used for the statfs and poolop requests
+ * which are bening done a bit differently because we need to get data back
+ * to the caller
+ */
+struct ceph_mon_generic_request {
+ struct kref kref;
+ u64 tid;
+ struct rb_node node;
+ int result;
+ void *buf;
+ int buf_len;
+ struct completion completion;
+ struct ceph_msg *request; /* original request */
+ struct ceph_msg *reply; /* and reply */
+};
+
+struct ceph_mon_client {
+ struct ceph_client *client;
+ struct ceph_monmap *monmap;
+
+ struct mutex mutex;
+ struct delayed_work delayed_work;
+
+ struct ceph_auth_client *auth;
+ struct ceph_msg *m_auth, *m_auth_reply, *m_subscribe, *m_subscribe_ack;
+ int pending_auth;
+
+ bool hunting;
+ int cur_mon; /* last monitor i contacted */
+ unsigned long sub_sent, sub_renew_after;
+ struct ceph_connection con;
+
+ /* pending generic requests */
+ struct rb_root generic_request_tree;
+ int num_generic_requests;
+ u64 last_tid;
+
+ /* mds/osd map */
+ int want_mdsmap;
+ int want_next_osdmap; /* 1 = want, 2 = want+asked */
+ u32 have_osdmap, have_mdsmap;
+
+#ifdef CONFIG_DEBUG_FS
+ struct dentry *debugfs_file;
+#endif
+};
+
+extern struct ceph_monmap *ceph_monmap_decode(void *p, void *end);
+extern int ceph_monmap_contains(struct ceph_monmap *m,
+ struct ceph_entity_addr *addr);
+
+extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl);
+extern void ceph_monc_stop(struct ceph_mon_client *monc);
+
+/*
+ * The model here is to indicate that we need a new map of at least
+ * epoch @want, and also call in when we receive a map. We will
+ * periodically rerequest the map from the monitor cluster until we
+ * get what we want.
+ */
+extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 have);
+extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have);
+
+extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc);
+
+extern int ceph_monc_do_statfs(struct ceph_mon_client *monc,
+ struct ceph_statfs *buf);
+
+extern int ceph_monc_open_session(struct ceph_mon_client *monc);
+
+extern int ceph_monc_validate_auth(struct ceph_mon_client *monc);
+
+extern int ceph_monc_create_snapid(struct ceph_mon_client *monc,
+ u32 pool, u64 *snapid);
+
+extern int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
+ u32 pool, u64 snapid);
+
+#endif
diff --git a/linux/ceph/msgpool.h b/linux/ceph/msgpool.h
new file mode 100644
index 0000000..4b0d389
--- /dev/null
+++ b/linux/ceph/msgpool.h
@@ -0,0 +1,26 @@
+#ifndef _FS_CEPH_MSGPOOL
+#define _FS_CEPH_MSGPOOL
+
+#include <linux/mempool.h>
+#include <linux/ceph/messenger.h>
+
+/*
+ * we use memory pools for preallocating messages we may receive, to
+ * avoid unexpected OOM conditions.
+ */
+struct ceph_msgpool {
+ const char *name;
+ mempool_t *pool;
+ int type; /* preallocated message type */
+ int front_len; /* preallocated payload size */
+};
+
+extern int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
+ int front_len, int size, bool blocking,
+ const char *name);
+extern void ceph_msgpool_destroy(struct ceph_msgpool *pool);
+extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *,
+ int front_len);
+extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *);
+
+#endif
diff --git a/linux/ceph/msgr.h b/linux/ceph/msgr.h
new file mode 100644
index 0000000..3d94a73
--- /dev/null
+++ b/linux/ceph/msgr.h
@@ -0,0 +1,176 @@
+#ifndef CEPH_MSGR_H
+#define CEPH_MSGR_H
+
+/*
+ * Data types for message passing layer used by Ceph.
+ */
+
+#define CEPH_MON_PORT 6789 /* default monitor port */
+
+/*
+ * client-side processes will try to bind to ports in this
+ * range, simply for the benefit of tools like nmap or wireshark
+ * that would like to identify the protocol.
+ */
+#define CEPH_PORT_FIRST 6789
+#define CEPH_PORT_START 6800 /* non-monitors start here */
+#define CEPH_PORT_LAST 6900
+
+/*
+ * tcp connection banner. include a protocol version. and adjust
+ * whenever the wire protocol changes. try to keep this string length
+ * constant.
+ */
+#define CEPH_BANNER "ceph v027"
+#define CEPH_BANNER_MAX_LEN 30
+
+
+/*
+ * Rollover-safe type and comparator for 32-bit sequence numbers.
+ * Comparator returns -1, 0, or 1.
+ */
+typedef __u32 ceph_seq_t;
+
+static inline __s32 ceph_seq_cmp(__u32 a, __u32 b)
+{
+ return (__s32)a - (__s32)b;
+}
+
+
+/*
+ * entity_name -- logical name for a process participating in the
+ * network, e.g. 'mds0' or 'osd3'.
+ */
+struct ceph_entity_name {
+ __u8 type; /* CEPH_ENTITY_TYPE_* */
+ __le64 num;
+} __attribute__ ((packed));
+
+#define CEPH_ENTITY_TYPE_MON 0x01
+#define CEPH_ENTITY_TYPE_MDS 0x02
+#define CEPH_ENTITY_TYPE_OSD 0x04
+#define CEPH_ENTITY_TYPE_CLIENT 0x08
+#define CEPH_ENTITY_TYPE_AUTH 0x20
+
+#define CEPH_ENTITY_TYPE_ANY 0xFF
+
+extern const char *ceph_entity_type_name(int type);
+
+/*
+ * entity_addr -- network address
+ */
+struct ceph_entity_addr {
+ __le32 type;
+ __le32 nonce; /* unique id for process (e.g. pid) */
+ struct sockaddr_storage in_addr;
+} __attribute__ ((packed));
+
+struct ceph_entity_inst {
+ struct ceph_entity_name name;
+ struct ceph_entity_addr addr;
+} __attribute__ ((packed));
+
+
+/* used by message exchange protocol */
+#define CEPH_MSGR_TAG_READY 1 /* server->client: ready for messages */
+#define CEPH_MSGR_TAG_RESETSESSION 2 /* server->client: reset, try again */
+#define CEPH_MSGR_TAG_WAIT 3 /* server->client: wait for racing
+ incoming connection */
+#define CEPH_MSGR_TAG_RETRY_SESSION 4 /* server->client + cseq: try again
+ with higher cseq */
+#define CEPH_MSGR_TAG_RETRY_GLOBAL 5 /* server->client + gseq: try again
+ with higher gseq */
+#define CEPH_MSGR_TAG_CLOSE 6 /* closing pipe */
+#define CEPH_MSGR_TAG_MSG 7 /* message */
+#define CEPH_MSGR_TAG_ACK 8 /* message ack */
+#define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */
+#define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */
+#define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */
+#define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */
+#define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */
+
+
+/*
+ * connection negotiation
+ */
+struct ceph_msg_connect {
+ __le64 features; /* supported feature bits */
+ __le32 host_type; /* CEPH_ENTITY_TYPE_* */
+ __le32 global_seq; /* count connections initiated by this host */
+ __le32 connect_seq; /* count connections initiated in this session */
+ __le32 protocol_version;
+ __le32 authorizer_protocol;
+ __le32 authorizer_len;
+ __u8 flags; /* CEPH_MSG_CONNECT_* */
+} __attribute__ ((packed));
+
+struct ceph_msg_connect_reply {
+ __u8 tag;
+ __le64 features; /* feature bits for this session */
+ __le32 global_seq;
+ __le32 connect_seq;
+ __le32 protocol_version;
+ __le32 authorizer_len;
+ __u8 flags;
+} __attribute__ ((packed));
+
+#define CEPH_MSG_CONNECT_LOSSY 1 /* messages i send may be safely dropped */
+
+
+/*
+ * message header
+ */
+struct ceph_msg_header_old {
+ __le64 seq; /* message seq# for this session */
+ __le64 tid; /* transaction id */
+ __le16 type; /* message type */
+ __le16 priority; /* priority. higher value == higher priority */
+ __le16 version; /* version of message encoding */
+
+ __le32 front_len; /* bytes in main payload */
+ __le32 middle_len;/* bytes in middle payload */
+ __le32 data_len; /* bytes of data payload */
+ __le16 data_off; /* sender: include full offset;
+ receiver: mask against ~PAGE_MASK */
+
+ struct ceph_entity_inst src, orig_src;
+ __le32 reserved;
+ __le32 crc; /* header crc32c */
+} __attribute__ ((packed));
+
+struct ceph_msg_header {
+ __le64 seq; /* message seq# for this session */
+ __le64 tid; /* transaction id */
+ __le16 type; /* message type */
+ __le16 priority; /* priority. higher value == higher priority */
+ __le16 version; /* version of message encoding */
+
+ __le32 front_len; /* bytes in main payload */
+ __le32 middle_len;/* bytes in middle payload */
+ __le32 data_len; /* bytes of data payload */
+ __le16 data_off; /* sender: include full offset;
+ receiver: mask against ~PAGE_MASK */
+
+ struct ceph_entity_name src;
+ __le32 reserved;
+ __le32 crc; /* header crc32c */
+} __attribute__ ((packed));
+
+#define CEPH_MSG_PRIO_LOW 64
+#define CEPH_MSG_PRIO_DEFAULT 127
+#define CEPH_MSG_PRIO_HIGH 196
+#define CEPH_MSG_PRIO_HIGHEST 255
+
+/*
+ * follows data payload
+ */
+struct ceph_msg_footer {
+ __le32 front_crc, middle_crc, data_crc;
+ __u8 flags;
+} __attribute__ ((packed));
+
+#define CEPH_MSG_FOOTER_COMPLETE (1<<0) /* msg wasn't aborted */
+#define CEPH_MSG_FOOTER_NOCRC (1<<1) /* no data crc */
+
+
+#endif
diff --git a/linux/ceph/osd_client.h b/linux/ceph/osd_client.h
new file mode 100644
index 0000000..94ec696
--- /dev/null
+++ b/linux/ceph/osd_client.h
@@ -0,0 +1,374 @@
+#ifndef _FS_CEPH_OSD_CLIENT_H
+#define _FS_CEPH_OSD_CLIENT_H
+
+#include <linux/completion.h>
+#include <linux/kref.h>
+#include <linux/mempool.h>
+#include <linux/rbtree.h>
+
+#include <linux/ceph/types.h>
+#include <linux/ceph/osdmap.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/auth.h>
+#include <linux/ceph/pagelist.h>
+
+struct ceph_msg;
+struct ceph_snap_context;
+struct ceph_osd_request;
+struct ceph_osd_client;
+struct ceph_authorizer;
+
+/*
+ * completion callback for async writepages
+ */
+typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *,
+ struct ceph_msg *);
+typedef void (*ceph_osdc_unsafe_callback_t)(struct ceph_osd_request *, bool);
+
+/* a given osd we're communicating with */
+struct ceph_osd {
+ atomic_t o_ref;
+ struct ceph_osd_client *o_osdc;
+ int o_osd;
+ int o_incarnation;
+ struct rb_node o_node;
+ struct ceph_connection o_con;
+ struct list_head o_requests;
+ struct list_head o_linger_requests;
+ struct list_head o_osd_lru;
+ struct ceph_auth_handshake o_auth;
+ unsigned long lru_ttl;
+ int o_marked_for_keepalive;
+ struct list_head o_keepalive_item;
+};
+
+
+#define CEPH_OSD_MAX_OP 3
+
+enum ceph_osd_data_type {
+ CEPH_OSD_DATA_TYPE_NONE = 0,
+ CEPH_OSD_DATA_TYPE_PAGES,
+ CEPH_OSD_DATA_TYPE_PAGELIST,
+#ifdef CONFIG_BLOCK
+ CEPH_OSD_DATA_TYPE_BIO,
+#endif /* CONFIG_BLOCK */
+};
+
+struct ceph_osd_data {
+ enum ceph_osd_data_type type;
+ union {
+ struct {
+ struct page **pages;
+ u64 length;
+ u32 alignment;
+ bool pages_from_pool;
+ bool own_pages;
+ };
+ struct ceph_pagelist *pagelist;
+#ifdef CONFIG_BLOCK
+ struct {
+ struct bio *bio; /* list of bios */
+ size_t bio_length; /* total in list */
+ };
+#endif /* CONFIG_BLOCK */
+ };
+};
+
+struct ceph_osd_req_op {
+ u16 op; /* CEPH_OSD_OP_* */
+ u32 flags; /* CEPH_OSD_OP_FLAG_* */
+ u32 payload_len;
+ union {
+ struct ceph_osd_data raw_data_in;
+ struct {
+ u64 offset, length;
+ u64 truncate_size;
+ u32 truncate_seq;
+ struct ceph_osd_data osd_data;
+ } extent;
+ struct {
+ const char *class_name;
+ const char *method_name;
+ struct ceph_osd_data request_info;
+ struct ceph_osd_data request_data;
+ struct ceph_osd_data response_data;
+ __u8 class_len;
+ __u8 method_len;
+ __u8 argc;
+ } cls;
+ struct {
+ u64 cookie;
+ u64 ver;
+ u32 prot_ver;
+ u32 timeout;
+ __u8 flag;
+ } watch;
+ struct {
+ u64 expected_object_size;
+ u64 expected_write_size;
+ } alloc_hint;
+ };
+};
+
+/* an in-flight request */
+struct ceph_osd_request {
+ u64 r_tid; /* unique for this client */
+ struct rb_node r_node;
+ struct list_head r_req_lru_item;
+ struct list_head r_osd_item;
+ struct list_head r_linger_item;
+ struct list_head r_linger_osd;
+ struct ceph_osd *r_osd;
+ struct ceph_pg r_pgid;
+ int r_pg_osds[CEPH_PG_MAX_SIZE];
+ int r_num_pg_osds;
+
+ struct ceph_msg *r_request, *r_reply;
+ int r_flags; /* any additional flags for the osd */
+ u32 r_sent; /* >0 if r_request is sending/sent */
+
+ /* request osd ops array */
+ unsigned int r_num_ops;
+ struct ceph_osd_req_op r_ops[CEPH_OSD_MAX_OP];
+
+ /* these are updated on each send */
+ __le32 *r_request_osdmap_epoch;
+ __le32 *r_request_flags;
+ __le64 *r_request_pool;
+ void *r_request_pgid;
+ __le32 *r_request_attempts;
+ bool r_paused;
+ struct ceph_eversion *r_request_reassert_version;
+
+ int r_result;
+ int r_reply_op_len[CEPH_OSD_MAX_OP];
+ s32 r_reply_op_result[CEPH_OSD_MAX_OP];
+ int r_got_reply;
+ int r_linger;
+
+ struct ceph_osd_client *r_osdc;
+ struct kref r_kref;
+ bool r_mempool;
+ struct completion r_completion, r_safe_completion;
+ ceph_osdc_callback_t r_callback;
+ ceph_osdc_unsafe_callback_t r_unsafe_callback;
+ struct ceph_eversion r_reassert_version;
+ struct list_head r_unsafe_item;
+
+ struct inode *r_inode; /* for use by callbacks */
+ void *r_priv; /* ditto */
+
+ struct ceph_object_locator r_base_oloc;
+ struct ceph_object_id r_base_oid;
+ struct ceph_object_locator r_target_oloc;
+ struct ceph_object_id r_target_oid;
+
+ u64 r_snapid;
+ unsigned long r_stamp; /* send OR check time */
+
+ struct ceph_snap_context *r_snapc; /* snap context for writes */
+};
+
+struct ceph_request_redirect {
+ struct ceph_object_locator oloc;
+};
+
+struct ceph_osd_event {
+ u64 cookie;
+ int one_shot;
+ struct ceph_osd_client *osdc;
+ void (*cb)(u64, u64, u8, void *);
+ void *data;
+ struct rb_node node;
+ struct list_head osd_node;
+ struct kref kref;
+};
+
+struct ceph_osd_event_work {
+ struct work_struct work;
+ struct ceph_osd_event *event;
+ u64 ver;
+ u64 notify_id;
+ u8 opcode;
+};
+
+struct ceph_osd_client {
+ struct ceph_client *client;
+
+ struct ceph_osdmap *osdmap; /* current map */
+ struct rw_semaphore map_sem;
+ struct completion map_waiters;
+ u64 last_requested_map;
+
+ struct mutex request_mutex;
+ struct rb_root osds; /* osds */
+ struct list_head osd_lru; /* idle osds */
+ u64 timeout_tid; /* tid of timeout triggering rq */
+ u64 last_tid; /* tid of last request */
+ struct rb_root requests; /* pending requests */
+ struct list_head req_lru; /* in-flight lru */
+ struct list_head req_unsent; /* unsent/need-resend queue */
+ struct list_head req_notarget; /* map to no osd */
+ struct list_head req_linger; /* lingering requests */
+ int num_requests;
+ struct delayed_work timeout_work;
+ struct delayed_work osds_timeout_work;
+#ifdef CONFIG_DEBUG_FS
+ struct dentry *debugfs_file;
+#endif
+
+ mempool_t *req_mempool;
+
+ struct ceph_msgpool msgpool_op;
+ struct ceph_msgpool msgpool_op_reply;
+
+ spinlock_t event_lock;
+ struct rb_root event_tree;
+ u64 event_count;
+
+ struct workqueue_struct *notify_wq;
+};
+
+extern int ceph_osdc_setup(void);
+extern void ceph_osdc_cleanup(void);
+
+extern int ceph_osdc_init(struct ceph_osd_client *osdc,
+ struct ceph_client *client);
+extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
+
+extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
+ struct ceph_msg *msg);
+extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
+ struct ceph_msg *msg);
+
+extern void osd_req_op_init(struct ceph_osd_request *osd_req,
+ unsigned int which, u16 opcode);
+
+extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *,
+ unsigned int which,
+ struct page **pages, u64 length,
+ u32 alignment, bool pages_from_pool,
+ bool own_pages);
+
+extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
+ unsigned int which, u16 opcode,
+ u64 offset, u64 length,
+ u64 truncate_size, u32 truncate_seq);
+extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
+ unsigned int which, u64 length);
+
+extern struct ceph_osd_data *osd_req_op_extent_osd_data(
+ struct ceph_osd_request *osd_req,
+ unsigned int which);
+extern struct ceph_osd_data *osd_req_op_cls_response_data(
+ struct ceph_osd_request *osd_req,
+ unsigned int which);
+
+extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *,
+ unsigned int which,
+ struct page **pages, u64 length,
+ u32 alignment, bool pages_from_pool,
+ bool own_pages);
+extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
+ unsigned int which,
+ struct ceph_pagelist *pagelist);
+#ifdef CONFIG_BLOCK
+extern void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *,
+ unsigned int which,
+ struct bio *bio, size_t bio_length);
+#endif /* CONFIG_BLOCK */
+
+extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
+ unsigned int which,
+ struct ceph_pagelist *pagelist);
+extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
+ unsigned int which,
+ struct page **pages, u64 length,
+ u32 alignment, bool pages_from_pool,
+ bool own_pages);
+extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
+ unsigned int which,
+ struct page **pages, u64 length,
+ u32 alignment, bool pages_from_pool,
+ bool own_pages);
+
+extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req,
+ unsigned int which, u16 opcode,
+ const char *class, const char *method);
+extern void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
+ unsigned int which, u16 opcode,
+ u64 cookie, u64 version, int flag);
+extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
+ unsigned int which,
+ u64 expected_object_size,
+ u64 expected_write_size);
+
+extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
+ struct ceph_snap_context *snapc,
+ unsigned int num_ops,
+ bool use_mempool,
+ gfp_t gfp_flags);
+
+extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off,
+ struct ceph_snap_context *snapc,
+ u64 snap_id,
+ struct timespec *mtime);
+
+extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
+ struct ceph_file_layout *layout,
+ struct ceph_vino vino,
+ u64 offset, u64 *len,
+ int num_ops, int opcode, int flags,
+ struct ceph_snap_context *snapc,
+ u32 truncate_seq, u64 truncate_size,
+ bool use_mempool);
+
+extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req);
+extern void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req);
+
+static inline void ceph_osdc_get_request(struct ceph_osd_request *req)
+{
+ kref_get(&req->r_kref);
+}
+extern void ceph_osdc_release_request(struct kref *kref);
+static inline void ceph_osdc_put_request(struct ceph_osd_request *req)
+{
+ kref_put(&req->r_kref, ceph_osdc_release_request);
+}
+
+extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req,
+ bool nofail);
+extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req);
+extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
+
+extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc);
+
+extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
+ struct ceph_vino vino,
+ struct ceph_file_layout *layout,
+ u64 off, u64 *plen,
+ u32 truncate_seq, u64 truncate_size,
+ struct page **pages, int nr_pages,
+ int page_align);
+
+extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
+ struct ceph_vino vino,
+ struct ceph_file_layout *layout,
+ struct ceph_snap_context *sc,
+ u64 off, u64 len,
+ u32 truncate_seq, u64 truncate_size,
+ struct timespec *mtime,
+ struct page **pages, int nr_pages);
+
+/* watch/notify events */
+extern int ceph_osdc_create_event(struct ceph_osd_client *osdc,
+ void (*event_cb)(u64, u64, u8, void *),
+ void *data, struct ceph_osd_event **pevent);
+extern void ceph_osdc_cancel_event(struct ceph_osd_event *event);
+extern void ceph_osdc_put_event(struct ceph_osd_event *event);
+#endif
+
diff --git a/linux/ceph/osdmap.h b/linux/ceph/osdmap.h
new file mode 100644
index 0000000..561ea89
--- /dev/null
+++ b/linux/ceph/osdmap.h
@@ -0,0 +1,225 @@
+#ifndef _FS_CEPH_OSDMAP_H
+#define _FS_CEPH_OSDMAP_H
+
+#include <linux/rbtree.h>
+#include <linux/ceph/types.h>
+#include <linux/ceph/decode.h>
+#include <linux/ceph/ceph_fs.h>
+#include <linux/crush/crush.h>
+
+/*
+ * The osd map describes the current membership of the osd cluster and
+ * specifies the mapping of objects to placement groups and placement
+ * groups to (sets of) osds. That is, it completely specifies the
+ * (desired) distribution of all data objects in the system at some
+ * point in time.
+ *
+ * Each map version is identified by an epoch, which increases monotonically.
+ *
+ * The map can be updated either via an incremental map (diff) describing
+ * the change between two successive epochs, or as a fully encoded map.
+ */
+struct ceph_pg {
+ uint64_t pool;
+ uint32_t seed;
+};
+
+#define CEPH_POOL_FLAG_HASHPSPOOL 1
+
+struct ceph_pg_pool_info {
+ struct rb_node node;
+ s64 id;
+ u8 type;
+ u8 size;
+ u8 crush_ruleset;
+ u8 object_hash;
+ u32 pg_num, pgp_num;
+ int pg_num_mask, pgp_num_mask;
+ s64 read_tier;
+ s64 write_tier; /* wins for read+write ops */
+ u64 flags;
+ char *name;
+};
+
+static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool)
+{
+ switch (pool->type) {
+ case CEPH_POOL_TYPE_REP:
+ return true;
+ case CEPH_POOL_TYPE_EC:
+ return false;
+ default:
+ BUG_ON(1);
+ }
+}
+
+struct ceph_object_locator {
+ s64 pool;
+};
+
+/*
+ * Maximum supported by kernel client object name length
+ *
+ * (probably outdated: must be >= RBD_MAX_MD_NAME_LEN -- currently 100)
+ */
+#define CEPH_MAX_OID_NAME_LEN 100
+
+struct ceph_object_id {
+ char name[CEPH_MAX_OID_NAME_LEN];
+ int name_len;
+};
+
+struct ceph_pg_mapping {
+ struct rb_node node;
+ struct ceph_pg pgid;
+
+ union {
+ struct {
+ int len;
+ int osds[];
+ } pg_temp;
+ struct {
+ int osd;
+ } primary_temp;
+ };
+};
+
+struct ceph_osdmap {
+ struct ceph_fsid fsid;
+ u32 epoch;
+ u32 mkfs_epoch;
+ struct ceph_timespec created, modified;
+
+ u32 flags; /* CEPH_OSDMAP_* */
+
+ u32 max_osd; /* size of osd_state, _offload, _addr arrays */
+ u8 *osd_state; /* CEPH_OSD_* */
+ u32 *osd_weight; /* 0 = failed, 0x10000 = 100% normal */
+ struct ceph_entity_addr *osd_addr;
+
+ struct rb_root pg_temp;
+ struct rb_root primary_temp;
+
+ u32 *osd_primary_affinity;
+
+ struct rb_root pg_pools;
+ u32 pool_max;
+
+ /* the CRUSH map specifies the mapping of placement groups to
+ * the list of osds that store+replicate them. */
+ struct crush_map *crush;
+
+ struct mutex crush_scratch_mutex;
+ int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3];
+};
+
+static inline void ceph_oid_set_name(struct ceph_object_id *oid,
+ const char *name)
+{
+ int len;
+
+ len = strlen(name);
+ if (len > sizeof(oid->name)) {
+ WARN(1, "ceph_oid_set_name '%s' len %d vs %zu, truncating\n",
+ name, len, sizeof(oid->name));
+ len = sizeof(oid->name);
+ }
+
+ memcpy(oid->name, name, len);
+ oid->name_len = len;
+}
+
+static inline void ceph_oid_copy(struct ceph_object_id *dest,
+ struct ceph_object_id *src)
+{
+ BUG_ON(src->name_len > sizeof(dest->name));
+ memcpy(dest->name, src->name, src->name_len);
+ dest->name_len = src->name_len;
+}
+
+static inline int ceph_osd_exists(struct ceph_osdmap *map, int osd)
+{
+ return osd >= 0 && osd < map->max_osd &&
+ (map->osd_state[osd] & CEPH_OSD_EXISTS);
+}
+
+static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd)
+{
+ return ceph_osd_exists(map, osd) &&
+ (map->osd_state[osd] & CEPH_OSD_UP);
+}
+
+static inline int ceph_osd_is_down(struct ceph_osdmap *map, int osd)
+{
+ return !ceph_osd_is_up(map, osd);
+}
+
+static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag)
+{
+ return map && (map->flags & flag);
+}
+
+extern char *ceph_osdmap_state_str(char *str, int len, int state);
+extern u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd);
+
+static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
+ int osd)
+{
+ if (osd >= map->max_osd)
+ return NULL;
+ return &map->osd_addr[osd];
+}
+
+static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
+{
+ __u8 version;
+
+ if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) {
+ pr_warning("incomplete pg encoding");
+
+ return -EINVAL;
+ }
+ version = ceph_decode_8(p);
+ if (version > 1) {
+ pr_warning("do not understand pg encoding %d > 1",
+ (int)version);
+ return -EINVAL;
+ }
+
+ pgid->pool = ceph_decode_64(p);
+ pgid->seed = ceph_decode_32(p);
+ *p += 4; /* skip deprecated preferred value */
+
+ return 0;
+}
+
+extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end);
+extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
+ struct ceph_osdmap *map,
+ struct ceph_messenger *msgr);
+extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
+
+/* calculate mapping of a file extent to an object */
+extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
+ u64 off, u64 len,
+ u64 *bno, u64 *oxoff, u64 *oxlen);
+
+/* calculate mapping of object to a placement group */
+extern int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap,
+ struct ceph_object_locator *oloc,
+ struct ceph_object_id *oid,
+ struct ceph_pg *pg_out);
+
+extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap,
+ struct ceph_pg pgid,
+ int *osds, int *primary);
+extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
+ struct ceph_pg pgid);
+
+extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map,
+ u64 id);
+
+extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
+extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
+
+#endif
diff --git a/linux/ceph/pagelist.h b/linux/ceph/pagelist.h
new file mode 100644
index 0000000..9660d6b
--- /dev/null
+++ b/linux/ceph/pagelist.h
@@ -0,0 +1,75 @@
+#ifndef __FS_CEPH_PAGELIST_H
+#define __FS_CEPH_PAGELIST_H
+
+#include <linux/list.h>
+
+struct ceph_pagelist {
+ struct list_head head;
+ void *mapped_tail;
+ size_t length;
+ size_t room;
+ struct list_head free_list;
+ size_t num_pages_free;
+};
+
+struct ceph_pagelist_cursor {
+ struct ceph_pagelist *pl; /* pagelist, for error checking */
+ struct list_head *page_lru; /* page in list */
+ size_t room; /* room remaining to reset to */
+};
+
+static inline void ceph_pagelist_init(struct ceph_pagelist *pl)
+{
+ INIT_LIST_HEAD(&pl->head);
+ pl->mapped_tail = NULL;
+ pl->length = 0;
+ pl->room = 0;
+ INIT_LIST_HEAD(&pl->free_list);
+ pl->num_pages_free = 0;
+}
+
+extern int ceph_pagelist_release(struct ceph_pagelist *pl);
+
+extern int ceph_pagelist_append(struct ceph_pagelist *pl, const void *d, size_t l);
+
+extern int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space);
+
+extern int ceph_pagelist_free_reserve(struct ceph_pagelist *pl);
+
+extern void ceph_pagelist_set_cursor(struct ceph_pagelist *pl,
+ struct ceph_pagelist_cursor *c);
+
+extern int ceph_pagelist_truncate(struct ceph_pagelist *pl,
+ struct ceph_pagelist_cursor *c);
+
+static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v)
+{
+ __le64 ev = cpu_to_le64(v);
+ return ceph_pagelist_append(pl, &ev, sizeof(ev));
+}
+static inline int ceph_pagelist_encode_32(struct ceph_pagelist *pl, u32 v)
+{
+ __le32 ev = cpu_to_le32(v);
+ return ceph_pagelist_append(pl, &ev, sizeof(ev));
+}
+static inline int ceph_pagelist_encode_16(struct ceph_pagelist *pl, u16 v)
+{
+ __le16 ev = cpu_to_le16(v);
+ return ceph_pagelist_append(pl, &ev, sizeof(ev));
+}
+static inline int ceph_pagelist_encode_8(struct ceph_pagelist *pl, u8 v)
+{
+ return ceph_pagelist_append(pl, &v, 1);
+}
+static inline int ceph_pagelist_encode_string(struct ceph_pagelist *pl,
+ char *s, size_t len)
+{
+ int ret = ceph_pagelist_encode_32(pl, len);
+ if (ret)
+ return ret;
+ if (len)
+ return ceph_pagelist_append(pl, s, len);
+ return 0;
+}
+
+#endif
diff --git a/linux/ceph/rados.h b/linux/ceph/rados.h
new file mode 100644
index 0000000..f20e0d8
--- /dev/null
+++ b/linux/ceph/rados.h
@@ -0,0 +1,436 @@
+#ifndef CEPH_RADOS_H
+#define CEPH_RADOS_H
+
+/*
+ * Data types for the Ceph distributed object storage layer RADOS
+ * (Reliable Autonomic Distributed Object Store).
+ */
+
+#include <linux/ceph/msgr.h>
+
+/*
+ * fs id
+ */
+struct ceph_fsid {
+ unsigned char fsid[16];
+};
+
+static inline int ceph_fsid_compare(const struct ceph_fsid *a,
+ const struct ceph_fsid *b)
+{
+ return memcmp(a, b, sizeof(*a));
+}
+
+/*
+ * ino, object, etc.
+ */
+typedef __le64 ceph_snapid_t;
+#define CEPH_SNAPDIR ((__u64)(-1)) /* reserved for hidden .snap dir */
+#define CEPH_NOSNAP ((__u64)(-2)) /* "head", "live" revision */
+#define CEPH_MAXSNAP ((__u64)(-3)) /* largest valid snapid */
+
+struct ceph_timespec {
+ __le32 tv_sec;
+ __le32 tv_nsec;
+} __attribute__ ((packed));
+
+
+/*
+ * object layout - how objects are mapped into PGs
+ */
+#define CEPH_OBJECT_LAYOUT_HASH 1
+#define CEPH_OBJECT_LAYOUT_LINEAR 2
+#define CEPH_OBJECT_LAYOUT_HASHINO 3
+
+/*
+ * pg layout -- how PGs are mapped onto (sets of) OSDs
+ */
+#define CEPH_PG_LAYOUT_CRUSH 0
+#define CEPH_PG_LAYOUT_HASH 1
+#define CEPH_PG_LAYOUT_LINEAR 2
+#define CEPH_PG_LAYOUT_HYBRID 3
+
+#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */
+
+/*
+ * placement group.
+ * we encode this into one __le64.
+ */
+struct ceph_pg_v1 {
+ __le16 preferred; /* preferred primary osd */
+ __le16 ps; /* placement seed */
+ __le32 pool; /* object pool */
+} __attribute__ ((packed));
+
+/*
+ * pg_pool is a set of pgs storing a pool of objects
+ *
+ * pg_num -- base number of pseudorandomly placed pgs
+ *
+ * pgp_num -- effective number when calculating pg placement. this
+ * is used for pg_num increases. new pgs result in data being "split"
+ * into new pgs. for this to proceed smoothly, new pgs are intiially
+ * colocated with their parents; that is, pgp_num doesn't increase
+ * until the new pgs have successfully split. only _then_ are the new
+ * pgs placed independently.
+ *
+ * lpg_num -- localized pg count (per device). replicas are randomly
+ * selected.
+ *
+ * lpgp_num -- as above.
+ */
+#define CEPH_NOPOOL ((__u64) (-1)) /* pool id not defined */
+
+#define CEPH_POOL_TYPE_REP 1
+#define CEPH_POOL_TYPE_RAID4 2 /* never implemented */
+#define CEPH_POOL_TYPE_EC 3
+
+/*
+ * stable_mod func is used to control number of placement groups.
+ * similar to straight-up modulo, but produces a stable mapping as b
+ * increases over time. b is the number of bins, and bmask is the
+ * containing power of 2 minus 1.
+ *
+ * b <= bmask and bmask=(2**n)-1
+ * e.g., b=12 -> bmask=15, b=123 -> bmask=127
+ */
+static inline int ceph_stable_mod(int x, int b, int bmask)
+{
+ if ((x & bmask) < b)
+ return x & bmask;
+ else
+ return x & (bmask >> 1);
+}
+
+/*
+ * object layout - how a given object should be stored.
+ */
+struct ceph_object_layout {
+ struct ceph_pg_v1 ol_pgid; /* raw pg, with _full_ ps precision. */
+ __le32 ol_stripe_unit; /* for per-object parity, if any */
+} __attribute__ ((packed));
+
+/*
+ * compound epoch+version, used by storage layer to serialize mutations
+ */
+struct ceph_eversion {
+ __le32 epoch;
+ __le64 version;
+} __attribute__ ((packed));
+
+/*
+ * osd map bits
+ */
+
+/* status bits */
+#define CEPH_OSD_EXISTS (1<<0)
+#define CEPH_OSD_UP (1<<1)
+#define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */
+#define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */
+
+extern const char *ceph_osd_state_name(int s);
+
+/* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */
+#define CEPH_OSD_IN 0x10000
+#define CEPH_OSD_OUT 0
+
+/* osd primary-affinity. fixed point value: 0x10000 == baseline */
+#define CEPH_OSD_MAX_PRIMARY_AFFINITY 0x10000
+#define CEPH_OSD_DEFAULT_PRIMARY_AFFINITY 0x10000
+
+
+/*
+ * osd map flag bits
+ */
+#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */
+#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */
+#define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */
+#define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */
+#define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */
+#define CEPH_OSDMAP_NOUP (1<<5) /* block osd boot */
+#define CEPH_OSDMAP_NODOWN (1<<6) /* block osd mark-down/failure */
+#define CEPH_OSDMAP_NOOUT (1<<7) /* block osd auto mark-out */
+#define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */
+#define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */
+#define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */
+
+/*
+ * The error code to return when an OSD can't handle a write
+ * because it is too large.
+ */
+#define OSD_WRITETOOBIG EMSGSIZE
+
+/*
+ * osd ops
+ *
+ * WARNING: do not use these op codes directly. Use the helpers
+ * defined below instead. In certain cases, op code behavior was
+ * redefined, resulting in special-cases in the helpers.
+ */
+#define CEPH_OSD_OP_MODE 0xf000
+#define CEPH_OSD_OP_MODE_RD 0x1000
+#define CEPH_OSD_OP_MODE_WR 0x2000
+#define CEPH_OSD_OP_MODE_RMW 0x3000
+#define CEPH_OSD_OP_MODE_SUB 0x4000
+
+#define CEPH_OSD_OP_TYPE 0x0f00
+#define CEPH_OSD_OP_TYPE_LOCK 0x0100
+#define CEPH_OSD_OP_TYPE_DATA 0x0200
+#define CEPH_OSD_OP_TYPE_ATTR 0x0300
+#define CEPH_OSD_OP_TYPE_EXEC 0x0400
+#define CEPH_OSD_OP_TYPE_PG 0x0500
+#define CEPH_OSD_OP_TYPE_MULTI 0x0600 /* multiobject */
+
+enum {
+ /** data **/
+ /* read */
+ CEPH_OSD_OP_READ = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 1,
+ CEPH_OSD_OP_STAT = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 2,
+ CEPH_OSD_OP_MAPEXT = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 3,
+
+ /* fancy read */
+ CEPH_OSD_OP_MASKTRUNC = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 4,
+ CEPH_OSD_OP_SPARSE_READ = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 5,
+
+ CEPH_OSD_OP_NOTIFY = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 6,
+ CEPH_OSD_OP_NOTIFY_ACK = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 7,
+
+ /* versioning */
+ CEPH_OSD_OP_ASSERT_VER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 8,
+
+ /* write */
+ CEPH_OSD_OP_WRITE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 1,
+ CEPH_OSD_OP_WRITEFULL = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 2,
+ CEPH_OSD_OP_TRUNCATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 3,
+ CEPH_OSD_OP_ZERO = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 4,
+ CEPH_OSD_OP_DELETE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 5,
+
+ /* fancy write */
+ CEPH_OSD_OP_APPEND = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 6,
+ CEPH_OSD_OP_STARTSYNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 7,
+ CEPH_OSD_OP_SETTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 8,
+ CEPH_OSD_OP_TRIMTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 9,
+
+ CEPH_OSD_OP_TMAPUP = CEPH_OSD_OP_MODE_RMW | CEPH_OSD_OP_TYPE_DATA | 10,
+ CEPH_OSD_OP_TMAPPUT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 11,
+ CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12,
+
+ CEPH_OSD_OP_CREATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13,
+ CEPH_OSD_OP_ROLLBACK= CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 14,
+
+ CEPH_OSD_OP_WATCH = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15,
+
+ /* omap */
+ CEPH_OSD_OP_OMAPGETKEYS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 17,
+ CEPH_OSD_OP_OMAPGETVALS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 18,
+ CEPH_OSD_OP_OMAPGETHEADER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 19,
+ CEPH_OSD_OP_OMAPGETVALSBYKEYS =
+ CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 20,
+ CEPH_OSD_OP_OMAPSETVALS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 21,
+ CEPH_OSD_OP_OMAPSETHEADER = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 22,
+ CEPH_OSD_OP_OMAPCLEAR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 23,
+ CEPH_OSD_OP_OMAPRMKEYS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24,
+ CEPH_OSD_OP_OMAP_CMP = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25,
+
+ /* hints */
+ CEPH_OSD_OP_SETALLOCHINT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 35,
+
+ /** multi **/
+ CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1,
+ CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2,
+ CEPH_OSD_OP_SRC_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 3,
+
+ /** attrs **/
+ /* read */
+ CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1,
+ CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2,
+ CEPH_OSD_OP_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 3,
+
+ /* write */
+ CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1,
+ CEPH_OSD_OP_SETXATTRS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 2,
+ CEPH_OSD_OP_RESETXATTRS = CEPH_OSD_OP_MODE_WR|CEPH_OSD_OP_TYPE_ATTR | 3,
+ CEPH_OSD_OP_RMXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 4,
+
+ /** subop **/
+ CEPH_OSD_OP_PULL = CEPH_OSD_OP_MODE_SUB | 1,
+ CEPH_OSD_OP_PUSH = CEPH_OSD_OP_MODE_SUB | 2,
+ CEPH_OSD_OP_BALANCEREADS = CEPH_OSD_OP_MODE_SUB | 3,
+ CEPH_OSD_OP_UNBALANCEREADS = CEPH_OSD_OP_MODE_SUB | 4,
+ CEPH_OSD_OP_SCRUB = CEPH_OSD_OP_MODE_SUB | 5,
+ CEPH_OSD_OP_SCRUB_RESERVE = CEPH_OSD_OP_MODE_SUB | 6,
+ CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7,
+ CEPH_OSD_OP_SCRUB_STOP = CEPH_OSD_OP_MODE_SUB | 8,
+ CEPH_OSD_OP_SCRUB_MAP = CEPH_OSD_OP_MODE_SUB | 9,
+
+ /** lock **/
+ CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1,
+ CEPH_OSD_OP_WRUNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 2,
+ CEPH_OSD_OP_RDLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 3,
+ CEPH_OSD_OP_RDUNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 4,
+ CEPH_OSD_OP_UPLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 5,
+ CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6,
+
+ /** exec **/
+ /* note: the RD bit here is wrong; see special-case below in helper */
+ CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1,
+
+ /** pg **/
+ CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1,
+ CEPH_OSD_OP_PGLS_FILTER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 2,
+};
+
+static inline int ceph_osd_op_type_lock(int op)
+{
+ return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_LOCK;
+}
+static inline int ceph_osd_op_type_data(int op)
+{
+ return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_DATA;
+}
+static inline int ceph_osd_op_type_attr(int op)
+{
+ return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_ATTR;
+}
+static inline int ceph_osd_op_type_exec(int op)
+{
+ return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_EXEC;
+}
+static inline int ceph_osd_op_type_pg(int op)
+{
+ return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG;
+}
+static inline int ceph_osd_op_type_multi(int op)
+{
+ return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_MULTI;
+}
+
+static inline int ceph_osd_op_mode_subop(int op)
+{
+ return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_SUB;
+}
+static inline int ceph_osd_op_mode_read(int op)
+{
+ return (op & CEPH_OSD_OP_MODE_RD) &&
+ op != CEPH_OSD_OP_CALL;
+}
+static inline int ceph_osd_op_mode_modify(int op)
+{
+ return op & CEPH_OSD_OP_MODE_WR;
+}
+
+/*
+ * note that the following tmap stuff is also defined in the ceph librados.h
+ * any modification here needs to be updated there
+ */
+#define CEPH_OSD_TMAP_HDR 'h'
+#define CEPH_OSD_TMAP_SET 's'
+#define CEPH_OSD_TMAP_CREATE 'c' /* create key */
+#define CEPH_OSD_TMAP_RM 'r'
+#define CEPH_OSD_TMAP_RMSLOPPY 'R'
+
+extern const char *ceph_osd_op_name(int op);
+
+/*
+ * osd op flags
+ *
+ * An op may be READ, WRITE, or READ|WRITE.
+ */
+enum {
+ CEPH_OSD_FLAG_ACK = 0x0001, /* want (or is) "ack" ack */
+ CEPH_OSD_FLAG_ONNVRAM = 0x0002, /* want (or is) "onnvram" ack */
+ CEPH_OSD_FLAG_ONDISK = 0x0004, /* want (or is) "ondisk" ack */
+ CEPH_OSD_FLAG_RETRY = 0x0008, /* resend attempt */
+ CEPH_OSD_FLAG_READ = 0x0010, /* op may read */
+ CEPH_OSD_FLAG_WRITE = 0x0020, /* op may write */
+ CEPH_OSD_FLAG_ORDERSNAP = 0x0040, /* EOLDSNAP if snapc is out of order */
+ CEPH_OSD_FLAG_PEERSTAT_OLD = 0x0080, /* DEPRECATED msg includes osd_peer_stat */
+ CEPH_OSD_FLAG_BALANCE_READS = 0x0100,
+ CEPH_OSD_FLAG_PARALLELEXEC = 0x0200, /* execute op in parallel */
+ CEPH_OSD_FLAG_PGOP = 0x0400, /* pg op, no object */
+ CEPH_OSD_FLAG_EXEC = 0x0800, /* op may exec */
+ CEPH_OSD_FLAG_EXEC_PUBLIC = 0x1000, /* DEPRECATED op may exec (public) */
+ CEPH_OSD_FLAG_LOCALIZE_READS = 0x2000, /* read from nearby replica, if any */
+ CEPH_OSD_FLAG_RWORDERED = 0x4000, /* order wrt concurrent reads */
+ CEPH_OSD_FLAG_IGNORE_CACHE = 0x8000, /* ignore cache logic */
+ CEPH_OSD_FLAG_SKIPRWLOCKS = 0x10000, /* skip rw locks */
+ CEPH_OSD_FLAG_IGNORE_OVERLAY = 0x20000, /* ignore pool overlay */
+ CEPH_OSD_FLAG_FLUSH = 0x40000, /* this is part of flush */
+};
+
+enum {
+ CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */
+ CEPH_OSD_OP_FLAG_FAILOK = 2, /* continue despite failure */
+};
+
+#define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/
+#define EBLACKLISTED ESHUTDOWN /* blacklisted */
+
+/* xattr comparison */
+enum {
+ CEPH_OSD_CMPXATTR_OP_NOP = 0,
+ CEPH_OSD_CMPXATTR_OP_EQ = 1,
+ CEPH_OSD_CMPXATTR_OP_NE = 2,
+ CEPH_OSD_CMPXATTR_OP_GT = 3,
+ CEPH_OSD_CMPXATTR_OP_GTE = 4,
+ CEPH_OSD_CMPXATTR_OP_LT = 5,
+ CEPH_OSD_CMPXATTR_OP_LTE = 6
+};
+
+enum {
+ CEPH_OSD_CMPXATTR_MODE_STRING = 1,
+ CEPH_OSD_CMPXATTR_MODE_U64 = 2
+};
+
+#define RADOS_NOTIFY_VER 1
+
+/*
+ * an individual object operation. each may be accompanied by some data
+ * payload
+ */
+struct ceph_osd_op {
+ __le16 op; /* CEPH_OSD_OP_* */
+ __le32 flags; /* CEPH_OSD_OP_FLAG_* */
+ union {
+ struct {
+ __le64 offset, length;
+ __le64 truncate_size;
+ __le32 truncate_seq;
+ } __attribute__ ((packed)) extent;
+ struct {
+ __le32 name_len;
+ __le32 value_len;
+ __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */
+ __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */
+ } __attribute__ ((packed)) xattr;
+ struct {
+ __u8 class_len;
+ __u8 method_len;
+ __u8 argc;
+ __le32 indata_len;
+ } __attribute__ ((packed)) cls;
+ struct {
+ __le64 cookie, count;
+ } __attribute__ ((packed)) pgls;
+ struct {
+ __le64 snapid;
+ } __attribute__ ((packed)) snap;
+ struct {
+ __le64 cookie;
+ __le64 ver;
+ __u8 flag; /* 0 = unwatch, 1 = watch */
+ } __attribute__ ((packed)) watch;
+ struct {
+ __le64 offset, length;
+ __le64 src_offset;
+ } __attribute__ ((packed)) clonerange;
+ struct {
+ __le64 expected_object_size;
+ __le64 expected_write_size;
+ } __attribute__ ((packed)) alloc_hint;
+ };
+ __le32 payload_len;
+} __attribute__ ((packed));
+
+
+#endif
diff --git a/linux/ceph/types.h b/linux/ceph/types.h
new file mode 100644
index 0000000..d3ff1cf
--- /dev/null
+++ b/linux/ceph/types.h
@@ -0,0 +1,29 @@
+#ifndef _FS_CEPH_TYPES_H
+#define _FS_CEPH_TYPES_H
+
+/* needed before including ceph_fs.h */
+#include <linux/in.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/string.h>
+
+#include <linux/ceph/ceph_fs.h>
+#include <linux/ceph/ceph_frag.h>
+#include <linux/ceph/ceph_hash.h>
+
+/*
+ * Identify inodes by both their ino AND snapshot id (a u64).
+ */
+struct ceph_vino {
+ u64 ino;
+ u64 snap;
+};
+
+
+/* context for the caps reservation mechanism */
+struct ceph_cap_reservation {
+ int count;
+};
+
+
+#endif
diff --git a/linux/crush/crush.h b/linux/crush/crush.h
new file mode 100644
index 0000000..4fad5f8
--- /dev/null
+++ b/linux/crush/crush.h
@@ -0,0 +1,201 @@
+#ifndef CEPH_CRUSH_CRUSH_H
+#define CEPH_CRUSH_CRUSH_H
+
+#include <linux/types.h>
+
+/*
+ * CRUSH is a pseudo-random data distribution algorithm that
+ * efficiently distributes input values (typically, data objects)
+ * across a heterogeneous, structured storage cluster.
+ *
+ * The algorithm was originally described in detail in this paper
+ * (although the algorithm has evolved somewhat since then):
+ *
+ * http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf
+ *
+ * LGPL2
+ */
+
+
+#define CRUSH_MAGIC 0x00010000ul /* for detecting algorithm revisions */
+
+#define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */
+
+
+#define CRUSH_ITEM_UNDEF 0x7ffffffe /* undefined result (internal use only) */
+#define CRUSH_ITEM_NONE 0x7fffffff /* no result */
+
+/*
+ * CRUSH uses user-defined "rules" to describe how inputs should be
+ * mapped to devices. A rule consists of sequence of steps to perform
+ * to generate the set of output devices.
+ */
+struct crush_rule_step {
+ __u32 op;
+ __s32 arg1;
+ __s32 arg2;
+};
+
+/* step op codes */
+enum {
+ CRUSH_RULE_NOOP = 0,
+ CRUSH_RULE_TAKE = 1, /* arg1 = value to start with */
+ CRUSH_RULE_CHOOSE_FIRSTN = 2, /* arg1 = num items to pick */
+ /* arg2 = type */
+ CRUSH_RULE_CHOOSE_INDEP = 3, /* same */
+ CRUSH_RULE_EMIT = 4, /* no args */
+ CRUSH_RULE_CHOOSELEAF_FIRSTN = 6,
+ CRUSH_RULE_CHOOSELEAF_INDEP = 7,
+
+ CRUSH_RULE_SET_CHOOSE_TRIES = 8, /* override choose_total_tries */
+ CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */
+ CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10,
+ CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11,
+ CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12
+};
+
+/*
+ * for specifying choose num (arg1) relative to the max parameter
+ * passed to do_rule
+ */
+#define CRUSH_CHOOSE_N 0
+#define CRUSH_CHOOSE_N_MINUS(x) (-(x))
+
+/*
+ * The rule mask is used to describe what the rule is intended for.
+ * Given a ruleset and size of output set, we search through the
+ * rule list for a matching rule_mask.
+ */
+struct crush_rule_mask {
+ __u8 ruleset;
+ __u8 type;
+ __u8 min_size;
+ __u8 max_size;
+};
+
+struct crush_rule {
+ __u32 len;
+ struct crush_rule_mask mask;
+ struct crush_rule_step steps[0];
+};
+
+#define crush_rule_size(len) (sizeof(struct crush_rule) + \
+ (len)*sizeof(struct crush_rule_step))
+
+
+
+/*
+ * A bucket is a named container of other items (either devices or
+ * other buckets). Items within a bucket are chosen using one of a
+ * few different algorithms. The table summarizes how the speed of
+ * each option measures up against mapping stability when items are
+ * added or removed.
+ *
+ * Bucket Alg Speed Additions Removals
+ * ------------------------------------------------
+ * uniform O(1) poor poor
+ * list O(n) optimal poor
+ * tree O(log n) good good
+ * straw O(n) optimal optimal
+ */
+enum {
+ CRUSH_BUCKET_UNIFORM = 1,
+ CRUSH_BUCKET_LIST = 2,
+ CRUSH_BUCKET_TREE = 3,
+ CRUSH_BUCKET_STRAW = 4
+};
+extern const char *crush_bucket_alg_name(int alg);
+
+struct crush_bucket {
+ __s32 id; /* this'll be negative */
+ __u16 type; /* non-zero; type=0 is reserved for devices */
+ __u8 alg; /* one of CRUSH_BUCKET_* */
+ __u8 hash; /* which hash function to use, CRUSH_HASH_* */
+ __u32 weight; /* 16-bit fixed point */
+ __u32 size; /* num items */
+ __s32 *items;
+
+ /*
+ * cached random permutation: used for uniform bucket and for
+ * the linear search fallback for the other bucket types.
+ */
+ __u32 perm_x; /* @x for which *perm is defined */
+ __u32 perm_n; /* num elements of *perm that are permuted/defined */
+ __u32 *perm;
+};
+
+struct crush_bucket_uniform {
+ struct crush_bucket h;
+ __u32 item_weight; /* 16-bit fixed point; all items equally weighted */
+};
+
+struct crush_bucket_list {
+ struct crush_bucket h;
+ __u32 *item_weights; /* 16-bit fixed point */
+ __u32 *sum_weights; /* 16-bit fixed point. element i is sum
+ of weights 0..i, inclusive */
+};
+
+struct crush_bucket_tree {
+ struct crush_bucket h; /* note: h.size is _tree_ size, not number of
+ actual items */
+ __u8 num_nodes;
+ __u32 *node_weights;
+};
+
+struct crush_bucket_straw {
+ struct crush_bucket h;
+ __u32 *item_weights; /* 16-bit fixed point */
+ __u32 *straws; /* 16-bit fixed point */
+};
+
+
+
+/*
+ * CRUSH map includes all buckets, rules, etc.
+ */
+struct crush_map {
+ struct crush_bucket **buckets;
+ struct crush_rule **rules;
+
+ __s32 max_buckets;
+ __u32 max_rules;
+ __s32 max_devices;
+
+ /* choose local retries before re-descent */
+ __u32 choose_local_tries;
+ /* choose local attempts using a fallback permutation before
+ * re-descent */
+ __u32 choose_local_fallback_tries;
+ /* choose attempts before giving up */
+ __u32 choose_total_tries;
+ /* attempt chooseleaf inner descent once for firstn mode; on
+ * reject retry outer descent. Note that this does *not*
+ * apply to a collision: in that case we will retry as we used
+ * to. */
+ __u32 chooseleaf_descend_once;
+
+ /* if non-zero, feed r into chooseleaf, bit-shifted right by (r-1)
+ * bits. a value of 1 is best for new clusters. for legacy clusters
+ * that want to limit reshuffling, a value of 3 or 4 will make the
+ * mappings line up a bit better with previous mappings. */
+ __u8 chooseleaf_vary_r;
+};
+
+
+/* crush.c */
+extern int crush_get_bucket_item_weight(const struct crush_bucket *b, int pos);
+extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
+extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
+extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
+extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
+extern void crush_destroy_bucket(struct crush_bucket *b);
+extern void crush_destroy_rule(struct crush_rule *r);
+extern void crush_destroy(struct crush_map *map);
+
+static inline int crush_calc_tree_node(int i)
+{
+ return ((i+1) << 1)-1;
+}
+
+#endif
diff --git a/linux/crush/hash.h b/linux/crush/hash.h
new file mode 100644
index 0000000..91e8842
--- /dev/null
+++ b/linux/crush/hash.h
@@ -0,0 +1,17 @@
+#ifndef CEPH_CRUSH_HASH_H
+#define CEPH_CRUSH_HASH_H
+
+#define CRUSH_HASH_RJENKINS1 0
+
+#define CRUSH_HASH_DEFAULT CRUSH_HASH_RJENKINS1
+
+extern const char *crush_hash_name(int type);
+
+extern __u32 crush_hash32(int type, __u32 a);
+extern __u32 crush_hash32_2(int type, __u32 a, __u32 b);
+extern __u32 crush_hash32_3(int type, __u32 a, __u32 b, __u32 c);
+extern __u32 crush_hash32_4(int type, __u32 a, __u32 b, __u32 c, __u32 d);
+extern __u32 crush_hash32_5(int type, __u32 a, __u32 b, __u32 c, __u32 d,
+ __u32 e);
+
+#endif
diff --git a/linux/crush/mapper.h b/linux/crush/mapper.h
new file mode 100644
index 0000000..eab3674
--- /dev/null
+++ b/linux/crush/mapper.h
@@ -0,0 +1,20 @@
+#ifndef CEPH_CRUSH_MAPPER_H
+#define CEPH_CRUSH_MAPPER_H
+
+/*
+ * CRUSH functions for find rules and then mapping an input to an
+ * output set.
+ *
+ * LGPL2
+ */
+
+#include <linux/crush/crush.h>
+
+extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size);
+extern int crush_do_rule(const struct crush_map *map,
+ int ruleno,
+ int x, int *result, int result_max,
+ const __u32 *weights, int weight_max,
+ int *scratch);
+
+#endif
diff --git a/rbd/Kconfig b/rbd/Kconfig
new file mode 100644
index 0000000..014a1cf
--- /dev/null
+++ b/rbd/Kconfig
@@ -0,0 +1,560 @@
+#
+# Block device driver configuration
+#
+
+menuconfig BLK_DEV
+ bool "Block devices"
+ depends on BLOCK
+ default y
+ ---help---
+ Say Y here to get to see options for various different block device
+ drivers. This option alone does not add any kernel code.
+
+ If you say N, all options in this submenu will be skipped and disabled;
+ only do this if you know what you are doing.
+
+if BLK_DEV
+
+config BLK_DEV_NULL_BLK
+ tristate "Null test block driver"
+
+config BLK_DEV_FD
+ tristate "Normal floppy disk support"
+ depends on ARCH_MAY_HAVE_PC_FDC
+ ---help---
+ If you want to use the floppy disk drive(s) of your PC under Linux,
+ say Y. Information about this driver, especially important for IBM
+ Thinkpad users, is contained in
+ <file:Documentation/blockdev/floppy.txt>.
+ That file also contains the location of the Floppy driver FAQ as
+ well as location of the fdutils package used to configure additional
+ parameters of the driver at run time.
+
+ To compile this driver as a module, choose M here: the
+ module will be called floppy.
+
+config AMIGA_FLOPPY
+ tristate "Amiga floppy support"
+ depends on AMIGA
+
+config ATARI_FLOPPY
+ tristate "Atari floppy support"
+ depends on ATARI
+
+config MAC_FLOPPY
+ tristate "Support for PowerMac floppy"
+ depends on PPC_PMAC && !PPC_PMAC64
+ help
+ If you have a SWIM-3 (Super Woz Integrated Machine 3; from Apple)
+ floppy controller, say Y here. Most commonly found in PowerMacs.
+
+config BLK_DEV_SWIM
+ tristate "Support for SWIM Macintosh floppy"
+ depends on M68K && MAC
+ help
+ You should select this option if you want floppy support
+ and you don't have a II, IIfx, Q900, Q950 or AV series.
+
+config AMIGA_Z2RAM
+ tristate "Amiga Zorro II ramdisk support"
+ depends on ZORRO
+ help
+ This enables support for using Chip RAM and Zorro II RAM as a
+ ramdisk or as a swap partition. Say Y if you want to include this
+ driver in the kernel.
+
+ To compile this driver as a module, choose M here: the
+ module will be called z2ram.
+
+config GDROM
+ tristate "SEGA Dreamcast GD-ROM drive"
+ depends on SH_DREAMCAST
+ help
+ A standard SEGA Dreamcast comes with a modified CD ROM drive called a
+ "GD-ROM" by SEGA to signify it is capable of reading special disks
+ with up to 1 GB of data. This drive will also read standard CD ROM
+ disks. Select this option to access any disks in your GD ROM drive.
+ Most users will want to say "Y" here.
+ You can also build this as a module which will be called gdrom.
+
+config PARIDE
+ tristate "Parallel port IDE device support"
+ depends on PARPORT_PC
+ ---help---
+ There are many external CD-ROM and disk devices that connect through
+ your computer's parallel port. Most of them are actually IDE devices
+ using a parallel port IDE adapter. This option enables the PARIDE
+ subsystem which contains drivers for many of these external drives.
+ Read <file:Documentation/blockdev/paride.txt> for more information.
+
+ If you have said Y to the "Parallel-port support" configuration
+ option, you may share a single port between your printer and other
+ parallel port devices. Answer Y to build PARIDE support into your
+ kernel, or M if you would like to build it as a loadable module. If
+ your parallel port support is in a loadable module, you must build
+ PARIDE as a module. If you built PARIDE support into your kernel,
+ you may still build the individual protocol modules and high-level
+ drivers as loadable modules. If you build this support as a module,
+ it will be called paride.
+
+ To use the PARIDE support, you must say Y or M here and also to at
+ least one high-level driver (e.g. "Parallel port IDE disks",
+ "Parallel port ATAPI CD-ROMs", "Parallel port ATAPI disks" etc.) and
+ to at least one protocol driver (e.g. "ATEN EH-100 protocol",
+ "MicroSolutions backpack protocol", "DataStor Commuter protocol"
+ etc.).
+
+source "drivers/block/paride/Kconfig"
+
+source "drivers/block/mtip32xx/Kconfig"
+
+source "drivers/block/zram/Kconfig"
+
+config BLK_CPQ_DA
+ tristate "Compaq SMART2 support"
+ depends on PCI && VIRT_TO_BUS && 0
+ help
+ This is the driver for Compaq Smart Array controllers. Everyone
+ using these boards should say Y here. See the file
+ <file:Documentation/blockdev/cpqarray.txt> for the current list of
+ boards supported by this driver, and for further information on the
+ use of this driver.
+
+config BLK_CPQ_CISS_DA
+ tristate "Compaq Smart Array 5xxx support"
+ depends on PCI
+ select CHECK_SIGNATURE
+ help
+ This is the driver for Compaq Smart Array 5xxx controllers.
+ Everyone using these boards should say Y here.
+ See <file:Documentation/blockdev/cciss.txt> for the current list of
+ boards supported by this driver, and for further information
+ on the use of this driver.
+
+config CISS_SCSI_TAPE
+ bool "SCSI tape drive support for Smart Array 5xxx"
+ depends on BLK_CPQ_CISS_DA && PROC_FS
+ depends on SCSI=y || SCSI=BLK_CPQ_CISS_DA
+ help
+ When enabled (Y), this option allows SCSI tape drives and SCSI medium
+ changers (tape robots) to be accessed via a Compaq 5xxx array
+ controller. (See <file:Documentation/blockdev/cciss.txt> for more details.)
+
+ "SCSI support" and "SCSI tape support" must also be enabled for this
+ option to work.
+
+ When this option is disabled (N), the SCSI portion of the driver
+ is not compiled.
+
+config BLK_DEV_DAC960
+ tristate "Mylex DAC960/DAC1100 PCI RAID Controller support"
+ depends on PCI
+ help
+ This driver adds support for the Mylex DAC960, AcceleRAID, and
+ eXtremeRAID PCI RAID controllers. See the file
+ <file:Documentation/blockdev/README.DAC960> for further information
+ about this driver.
+
+ To compile this driver as a module, choose M here: the
+ module will be called DAC960.
+
+config BLK_DEV_UMEM
+ tristate "Micro Memory MM5415 Battery Backed RAM support"
+ depends on PCI
+ ---help---
+ Saying Y here will include support for the MM5415 family of
+ battery backed (Non-volatile) RAM cards.
+ <http://www.umem.com/>
+
+ The cards appear as block devices that can be partitioned into
+ as many as 15 partitions.
+
+ To compile this driver as a module, choose M here: the
+ module will be called umem.
+
+ The umem driver has not yet been allocated a MAJOR number, so
+ one is chosen dynamically.
+
+config BLK_DEV_UBD
+ bool "Virtual block device"
+ depends on UML
+ ---help---
+ The User-Mode Linux port includes a driver called UBD which will let
+ you access arbitrary files on the host computer as block devices.
+ Unless you know that you do not need such virtual block devices say
+ Y here.
+
+config BLK_DEV_UBD_SYNC
+ bool "Always do synchronous disk IO for UBD"
+ depends on BLK_DEV_UBD
+ ---help---
+ Writes to the virtual block device are not immediately written to the
+ host's disk; this may cause problems if, for example, the User-Mode
+ Linux 'Virtual Machine' uses a journalling filesystem and the host
+ computer crashes.
+
+ Synchronous operation (i.e. always writing data to the host's disk
+ immediately) is configurable on a per-UBD basis by using a special
+ kernel command line option. Alternatively, you can say Y here to
+ turn on synchronous operation by default for all block devices.
+
+ If you're running a journalling file system (like reiserfs, for
+ example) in your virtual machine, you will want to say Y here. If
+ you care for the safety of the data in your virtual machine, Y is a
+ wise choice too. In all other cases (for example, if you're just
+ playing around with User-Mode Linux) you can choose N.
+
+config BLK_DEV_COW_COMMON
+ bool
+ default BLK_DEV_UBD
+
+config BLK_DEV_LOOP
+ tristate "Loopback device support"
+ ---help---
+ Saying Y here will allow you to use a regular file as a block
+ device; you can then create a file system on that block device and
+ mount it just as you would mount other block devices such as hard
+ drive partitions, CD-ROM drives or floppy drives. The loop devices
+ are block special device files with major number 7 and typically
+ called /dev/loop0, /dev/loop1 etc.
+
+ This is useful if you want to check an ISO 9660 file system before
+ burning the CD, or if you want to use floppy images without first
+ writing them to floppy. Furthermore, some Linux distributions avoid
+ the need for a dedicated Linux partition by keeping their complete
+ root file system inside a DOS FAT file using this loop device
+ driver.
+
+ To use the loop device, you need the losetup utility, found in the
+ util-linux package, see
+ <ftp://ftp.kernel.org/pub/linux/utils/util-linux/>.
+
+ The loop device driver can also be used to "hide" a file system in
+ a disk partition, floppy, or regular file, either using encryption
+ (scrambling the data) or steganography (hiding the data in the low
+ bits of, say, a sound file). This is also safe if the file resides
+ on a remote file server.
+
+ There are several ways of encrypting disks. Some of these require
+ kernel patches. The vanilla kernel offers the cryptoloop option
+ and a Device Mapper target (which is superior, as it supports all
+ file systems). If you want to use the cryptoloop, say Y to both
+ LOOP and CRYPTOLOOP, and make sure you have a recent (version 2.12
+ or later) version of util-linux. Additionally, be aware that
+ the cryptoloop is not safe for storing journaled filesystems.
+
+ Note that this loop device has nothing to do with the loopback
+ device used for network connections from the machine to itself.
+
+ To compile this driver as a module, choose M here: the
+ module will be called loop.
+
+ Most users will answer N here.
+
+config BLK_DEV_LOOP_MIN_COUNT
+ int "Number of loop devices to pre-create at init time"
+ depends on BLK_DEV_LOOP
+ default 8
+ help
+ Static number of loop devices to be unconditionally pre-created
+ at init time.
+
+ This default value can be overwritten on the kernel command
+ line or with module-parameter loop.max_loop.
+
+ The historic default is 8. If a late 2011 version of losetup(8)
+ is used, it can be set to 0, since needed loop devices can be
+ dynamically allocated with the /dev/loop-control interface.
+
+config BLK_DEV_CRYPTOLOOP
+ tristate "Cryptoloop Support"
+ select CRYPTO
+ select CRYPTO_CBC
+ depends on BLK_DEV_LOOP
+ ---help---
+ Say Y here if you want to be able to use the ciphers that are
+ provided by the CryptoAPI as loop transformation. This might be
+ used as hard disk encryption.
+
+ WARNING: This device is not safe for journaled file systems like
+ ext3 or Reiserfs. Please use the Device Mapper crypto module
+ instead, which can be configured to be on-disk compatible with the
+ cryptoloop device.
+
+source "drivers/block/drbd/Kconfig"
+
+config BLK_DEV_NBD
+ tristate "Network block device support"
+ depends on NET
+ ---help---
+ Saying Y here will allow your computer to be a client for network
+ block devices, i.e. it will be able to use block devices exported by
+ servers (mount file systems on them etc.). Communication between
+ client and server works over TCP/IP networking, but to the client
+ program this is hidden: it looks like a regular local file access to
+ a block device special file such as /dev/nd0.
+
+ Network block devices also allows you to run a block-device in
+ userland (making server and client physically the same computer,
+ communicating using the loopback network device).
+
+ Read <file:Documentation/blockdev/nbd.txt> for more information,
+ especially about where to find the server code, which runs in user
+ space and does not need special kernel support.
+
+ Note that this has nothing to do with the network file systems NFS
+ or Coda; you can say N here even if you intend to use NFS or Coda.
+
+ To compile this driver as a module, choose M here: the
+ module will be called nbd.
+
+ If unsure, say N.
+
+config BLK_DEV_NVME
+ tristate "NVM Express block device"
+ depends on PCI
+ ---help---
+ The NVM Express driver is for solid state drives directly
+ connected to the PCI or PCI Express bus. If you know you
+ don't have one of these, it is safe to answer N.
+
+ To compile this driver as a module, choose M here: the
+ module will be called nvme.
+
+config BLK_DEV_SKD
+ tristate "STEC S1120 Block Driver"
+ depends on PCI
+ depends on 64BIT
+ ---help---
+ Saying Y or M here will enable support for the
+ STEC, Inc. S1120 PCIe SSD.
+
+ Use device /dev/skd$N amd /dev/skd$Np$M.
+
+config BLK_DEV_OSD
+ tristate "OSD object-as-blkdev support"
+ depends on SCSI_OSD_ULD
+ ---help---
+ Saying Y or M here will allow the exporting of a single SCSI
+ OSD (object-based storage) object as a Linux block device.
+
+ For example, if you create a 2G object on an OSD device,
+ you can then use this module to present that 2G object as
+ a Linux block device.
+
+ To compile this driver as a module, choose M here: the
+ module will be called osdblk.
+
+ If unsure, say N.
+
+config BLK_DEV_SX8
+ tristate "Promise SATA SX8 support"
+ depends on PCI
+ ---help---
+ Saying Y or M here will enable support for the
+ Promise SATA SX8 controllers.
+
+ Use devices /dev/sx8/$N and /dev/sx8/$Np$M.
+
+config BLK_DEV_RAM
+ tristate "RAM block device support"
+ ---help---
+ Saying Y here will allow you to use a portion of your RAM memory as
+ a block device, so that you can make file systems on it, read and
+ write to it and do all the other things that you can do with normal
+ block devices (such as hard drives). It is usually used to load and
+ store a copy of a minimal root file system off of a floppy into RAM
+ during the initial install of Linux.
+
+ Note that the kernel command line option "ramdisk=XX" is now obsolete.
+ For details, read <file:Documentation/blockdev/ramdisk.txt>.
+
+ To compile this driver as a module, choose M here: the
+ module will be called brd. An alias "rd" has been defined
+ for historical reasons.
+
+ Most normal users won't need the RAM disk functionality, and can
+ thus say N here.
+
+config BLK_DEV_RAM_COUNT
+ int "Default number of RAM disks"
+ default "16"
+ depends on BLK_DEV_RAM
+ help
+ The default value is 16 RAM disks. Change this if you know what you
+ are doing. If you boot from a filesystem that needs to be extracted
+ in memory, you will need at least one RAM disk (e.g. root on cramfs).
+
+config BLK_DEV_RAM_SIZE
+ int "Default RAM disk size (kbytes)"
+ depends on BLK_DEV_RAM
+ default "4096"
+ help
+ The default value is 4096 kilobytes. Only change this if you know
+ what you are doing.
+
+config BLK_DEV_XIP
+ bool "Support XIP filesystems on RAM block device"
+ depends on BLK_DEV_RAM
+ default n
+ help
+ Support XIP filesystems (such as ext2 with XIP support on) on
+ top of block ram device. This will slightly enlarge the kernel, and
+ will prevent RAM block device backing store memory from being
+ allocated from highmem (only a problem for highmem systems).
+
+config CDROM_PKTCDVD
+ tristate "Packet writing on CD/DVD media"
+ depends on !UML
+ help
+ If you have a CDROM/DVD drive that supports packet writing, say
+ Y to include support. It should work with any MMC/Mt Fuji
+ compliant ATAPI or SCSI drive, which is just about any newer
+ DVD/CD writer.
+
+ Currently only writing to CD-RW, DVD-RW, DVD+RW and DVDRAM discs
+ is possible.
+ DVD-RW disks must be in restricted overwrite mode.
+
+ See the file <file:Documentation/cdrom/packet-writing.txt>
+ for further information on the use of this driver.
+
+ To compile this driver as a module, choose M here: the
+ module will be called pktcdvd.
+
+config CDROM_PKTCDVD_BUFFERS
+ int "Free buffers for data gathering"
+ depends on CDROM_PKTCDVD
+ default "8"
+ help
+ This controls the maximum number of active concurrent packets. More
+ concurrent packets can increase write performance, but also require
+ more memory. Each concurrent packet will require approximately 64Kb
+ of non-swappable kernel memory, memory which will be allocated when
+ a disc is opened for writing.
+
+config CDROM_PKTCDVD_WCACHE
+ bool "Enable write caching"
+ depends on CDROM_PKTCDVD
+ help
+ If enabled, write caching will be set for the CD-R/W device. For now
+ this option is dangerous unless the CD-RW media is known good, as we
+ don't do deferred write error handling yet.
+
+config ATA_OVER_ETH
+ tristate "ATA over Ethernet support"
+ depends on NET
+ help
+ This driver provides Support for ATA over Ethernet block
+ devices like the Coraid EtherDrive (R) Storage Blade.
+
+config MG_DISK
+ tristate "mGine mflash, gflash support"
+ depends on ARM && GPIOLIB
+ help
+ mGine mFlash(gFlash) block device driver
+
+config MG_DISK_RES
+ int "Size of reserved area before MBR"
+ depends on MG_DISK
+ default 0
+ help
+ Define size of reserved area that usually used for boot. Unit is KB.
+ All of the block device operation will be taken this value as start
+ offset
+ Examples:
+ 1024 => 1 MB
+
+config SUNVDC
+ tristate "Sun Virtual Disk Client support"
+ depends on SUN_LDOMS
+ help
+ Support for virtual disk devices as a client under Sun
+ Logical Domains.
+
+source "drivers/s390/block/Kconfig"
+
+config XILINX_SYSACE
+ tristate "Xilinx SystemACE support"
+ depends on 4xx || MICROBLAZE
+ help
+ Include support for the Xilinx SystemACE CompactFlash interface
+
+config XEN_BLKDEV_FRONTEND
+ tristate "Xen virtual block device support"
+ depends on XEN
+ default y
+ select XEN_XENBUS_FRONTEND
+ help
+ This driver implements the front-end of the Xen virtual
+ block device driver. It communicates with a back-end driver
+ in another domain which drives the actual block device.
+
+config XEN_BLKDEV_BACKEND
+ tristate "Xen block-device backend driver"
+ depends on XEN_BACKEND
+ help
+ The block-device backend driver allows the kernel to export its
+ block devices to other guests via a high-performance shared-memory
+ interface.
+
+ The corresponding Linux frontend driver is enabled by the
+ CONFIG_XEN_BLKDEV_FRONTEND configuration option.
+
+ The backend driver attaches itself to a any block device specified
+ in the XenBus configuration. There are no limits to what the block
+ device as long as it has a major and minor.
+
+ If you are compiling a kernel to run in a Xen block backend driver
+ domain (often this is domain 0) you should say Y here. To
+ compile this driver as a module, chose M here: the module
+ will be called xen-blkback.
+
+
+config VIRTIO_BLK
+ tristate "Virtio block driver"
+ depends on VIRTIO
+ ---help---
+ This is the virtual block driver for virtio. It can be used with
+ lguest or QEMU based VMMs (like KVM or Xen). Say Y or M.
+
+config BLK_DEV_HD
+ bool "Very old hard disk (MFM/RLL/IDE) driver"
+ depends on HAVE_IDE
+ depends on !ARM || ARCH_RPC || BROKEN
+ help
+ This is a very old hard disk driver that lacks the enhanced
+ functionality of the newer ones.
+
+ It is required for systems with ancient MFM/RLL/ESDI drives.
+
+ If unsure, say N.
+
+config BLK_DEV_RBD
+ tristate "Rados block device (RBD)"
+ depends on INET && BLOCK
+ select CEPH_LIB
+ select LIBCRC32C
+ select CRYPTO_AES
+ select CRYPTO
+ default n
+ help
+ Say Y here if you want include the Rados block device, which stripes
+ a block device over objects stored in the Ceph distributed object
+ store.
+
+ More information at http://ceph.newdream.net/.
+
+ If unsure, say N.
+
+config BLK_DEV_RSXX
+ tristate "IBM Flash Adapter 900GB Full Height PCIe Device Driver"
+ depends on PCI
+ help
+ Device driver for IBM's high speed PCIe SSD
+ storage device: Flash Adapter 900GB Full Height.
+
+ To compile this driver as a module, choose M here: the
+ module will be called rsxx.
+
+endif # BLK_DEV
diff --git a/rbd/Makefile b/rbd/Makefile
new file mode 100644
index 0000000..02b688d
--- /dev/null
+++ b/rbd/Makefile
@@ -0,0 +1,49 @@
+#
+# Makefile for the kernel block device drivers.
+#
+# 12 June 2000, Christoph Hellwig <hch at infradead.org>
+# Rewritten to use lists instead of if-statements.
+#
+
+obj-$(CONFIG_MAC_FLOPPY) += swim3.o
+obj-$(CONFIG_BLK_DEV_SWIM) += swim_mod.o
+obj-$(CONFIG_BLK_DEV_FD) += floppy.o
+obj-$(CONFIG_AMIGA_FLOPPY) += amiflop.o
+obj-$(CONFIG_PS3_DISK) += ps3disk.o
+obj-$(CONFIG_PS3_VRAM) += ps3vram.o
+obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o
+obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o
+obj-$(CONFIG_BLK_DEV_RAM) += brd.o
+obj-$(CONFIG_BLK_DEV_LOOP) += loop.o
+obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o
+obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o
+obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o
+obj-$(CONFIG_XILINX_SYSACE) += xsysace.o
+obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o
+obj-$(CONFIG_MG_DISK) += mg_disk.o
+obj-$(CONFIG_SUNVDC) += sunvdc.o
+obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
+obj-$(CONFIG_BLK_DEV_SKD) += skd.o
+obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o
+
+obj-$(CONFIG_BLK_DEV_UMEM) += umem.o
+obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
+obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o
+obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
+
+obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
+obj-$(CONFIG_BLK_DEV_HD) += hd.o
+
+obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
+obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
+obj-$(CONFIG_BLK_DEV_DRBD) += drbd/
+obj-$(CONFIG_BLK_DEV_RBD) += rbd.o
+obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/
+
+obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
+obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o
+obj-$(CONFIG_ZRAM) += zram/
+
+nvme-y := nvme-core.o nvme-scsi.o
+skd-y := skd_main.o
+swim_mod-y := swim.o swim_asm.o
diff --git a/rbd/rbd.c b/rbd/rbd.c
new file mode 100644
index 0000000..4c95b50
--- /dev/null
+++ b/rbd/rbd.c
@@ -0,0 +1,5406 @@
+
+/*
+ rbd.c -- Export ceph rados objects as a Linux block device
+
+
+ based on drivers/block/osdblk.c:
+
+ Copyright 2009 Red Hat, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to
+ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+
+ For usage instructions, please refer to:
+
+ Documentation/ABI/testing/sysfs-bus-rbd
+
+ */
+
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/osd_client.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/decode.h>
+#include <linux/parser.h>
+#include <linux/bsearch.h>
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+
+#include "rbd_types.h"
+
+#define RBD_DEBUG /* Activate rbd_assert() calls */
+
+/*
+ * The basic unit of block I/O is a sector. It is interpreted in a
+ * number of contexts in Linux (blk, bio, genhd), but the default is
+ * universally 512 bytes. These symbols are just slightly more
+ * meaningful than the bare numbers they represent.
+ */
+#define SECTOR_SHIFT 9
+#define SECTOR_SIZE (1ULL << SECTOR_SHIFT)
+
+/*
+ * Increment the given counter and return its updated value.
+ * If the counter is already 0 it will not be incremented.
+ * If the counter is already at its maximum value returns
+ * -EINVAL without updating it.
+ */
+static int atomic_inc_return_safe(atomic_t *v)
+{
+ unsigned int counter;
+
+ counter = (unsigned int)__atomic_add_unless(v, 1, 0);
+ if (counter <= (unsigned int)INT_MAX)
+ return (int)counter;
+
+ atomic_dec(v);
+
+ return -EINVAL;
+}
+
+/* Decrement the counter. Return the resulting value, or -EINVAL */
+static int atomic_dec_return_safe(atomic_t *v)
+{
+ int counter;
+
+ counter = atomic_dec_return(v);
+ if (counter >= 0)
+ return counter;
+
+ atomic_inc(v);
+
+ return -EINVAL;
+}
+
+#define RBD_DRV_NAME "rbd"
+
+#define RBD_MINORS_PER_MAJOR 256
+#define RBD_SINGLE_MAJOR_PART_SHIFT 4
+
+#define RBD_SNAP_DEV_NAME_PREFIX "snap_"
+#define RBD_MAX_SNAP_NAME_LEN \
+ (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
+
+#define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */
+
+#define RBD_SNAP_HEAD_NAME "-"
+
+#define BAD_SNAP_INDEX U32_MAX /* invalid index into snap array */
+
+/* This allows a single page to hold an image name sent by OSD */
+#define RBD_IMAGE_NAME_LEN_MAX (PAGE_SIZE - sizeof (__le32) - 1)
+#define RBD_IMAGE_ID_LEN_MAX 64
+
+#define RBD_OBJ_PREFIX_LEN_MAX 64
+
+/* Feature bits */
+
+#define RBD_FEATURE_LAYERING (1<<0)
+#define RBD_FEATURE_STRIPINGV2 (1<<1)
+#define RBD_FEATURES_ALL \
+ (RBD_FEATURE_LAYERING | RBD_FEATURE_STRIPINGV2)
+
+/* Features supported by this (client software) implementation. */
+
+#define RBD_FEATURES_SUPPORTED (RBD_FEATURES_ALL)
+
+/*
+ * An RBD device name will be "rbd#", where the "rbd" comes from
+ * RBD_DRV_NAME above, and # is a unique integer identifier.
+ * MAX_INT_FORMAT_WIDTH is used in ensuring DEV_NAME_LEN is big
+ * enough to hold all possible device names.
+ */
+#define DEV_NAME_LEN 32
+#define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1)
+
+/*
+ * block device image metadata (in-memory version)
+ */
+struct rbd_image_header {
+ /* These six fields never change for a given rbd image */
+ char *object_prefix;
+ __u8 obj_order;
+ __u8 crypt_type;
+ __u8 comp_type;
+ u64 stripe_unit;
+ u64 stripe_count;
+ u64 features; /* Might be changeable someday? */
+
+ /* The remaining fields need to be updated occasionally */
+ u64 image_size;
+ struct ceph_snap_context *snapc;
+ char *snap_names; /* format 1 only */
+ u64 *snap_sizes; /* format 1 only */
+};
+
+/*
+ * An rbd image specification.
+ *
+ * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely
+ * identify an image. Each rbd_dev structure includes a pointer to
+ * an rbd_spec structure that encapsulates this identity.
+ *
+ * Each of the id's in an rbd_spec has an associated name. For a
+ * user-mapped image, the names are supplied and the id's associated
+ * with them are looked up. For a layered image, a parent image is
+ * defined by the tuple, and the names are looked up.
+ *
+ * An rbd_dev structure contains a parent_spec pointer which is
+ * non-null if the image it represents is a child in a layered
+ * image. This pointer will refer to the rbd_spec structure used
+ * by the parent rbd_dev for its own identity (i.e., the structure
+ * is shared between the parent and child).
+ *
+ * Since these structures are populated once, during the discovery
+ * phase of image construction, they are effectively immutable so
+ * we make no effort to synchronize access to them.
+ *
+ * Note that code herein does not assume the image name is known (it
+ * could be a null pointer).
+ */
+struct rbd_spec {
+ u64 pool_id;
+ const char *pool_name;
+
+ const char *image_id;
+ const char *image_name;
+
+ u64 snap_id;
+ const char *snap_name;
+
+ struct kref kref;
+};
+
+/*
+ * an instance of the client. multiple devices may share an rbd client.
+ */
+struct rbd_client {
+ struct ceph_client *client;
+ struct kref kref;
+ struct list_head node;
+};
+
+struct rbd_img_request;
+typedef void (*rbd_img_callback_t)(struct rbd_img_request *);
+
+#define BAD_WHICH U32_MAX /* Good which or bad which, which? */
+
+struct rbd_obj_request;
+typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *);
+
+enum obj_request_type {
+ OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES
+};
+
+enum obj_req_flags {
+ OBJ_REQ_DONE, /* completion flag: not done = 0, done = 1 */
+ OBJ_REQ_IMG_DATA, /* object usage: standalone = 0, image = 1 */
+ OBJ_REQ_KNOWN, /* EXISTS flag valid: no = 0, yes = 1 */
+ OBJ_REQ_EXISTS, /* target exists: no = 0, yes = 1 */
+};
+
+struct rbd_obj_request {
+ const char *object_name;
+ u64 offset; /* object start byte */
+ u64 length; /* bytes from offset */
+ unsigned long flags;
+
+ /*
+ * An object request associated with an image will have its
+ * img_data flag set; a standalone object request will not.
+ *
+ * A standalone object request will have which == BAD_WHICH
+ * and a null obj_request pointer.
+ *
+ * An object request initiated in support of a layered image
+ * object (to check for its existence before a write) will
+ * have which == BAD_WHICH and a non-null obj_request pointer.
+ *
+ * Finally, an object request for rbd image data will have
+ * which != BAD_WHICH, and will have a non-null img_request
+ * pointer. The value of which will be in the range
+ * 0..(img_request->obj_request_count-1).
+ */
+ union {
+ struct rbd_obj_request *obj_request; /* STAT op */
+ struct {
+ struct rbd_img_request *img_request;
+ u64 img_offset;
+ /* links for img_request->obj_requests list */
+ struct list_head links;
+ };
+ };
+ u32 which; /* posn image request list */
+
+ enum obj_request_type type;
+ union {
+ struct bio *bio_list;
+ struct {
+ struct page **pages;
+ u32 page_count;
+ };
+ };
+ struct page **copyup_pages;
+ u32 copyup_page_count;
+
+ struct ceph_osd_request *osd_req;
+
+ u64 xferred; /* bytes transferred */
+ int result;
+
+ rbd_obj_callback_t callback;
+ struct completion completion;
+
+ struct kref kref;
+};
+
+enum img_req_flags {
+ IMG_REQ_WRITE, /* I/O direction: read = 0, write = 1 */
+ IMG_REQ_CHILD, /* initiator: block = 0, child image = 1 */
+ IMG_REQ_LAYERED, /* ENOENT handling: normal = 0, layered = 1 */
+};
+
+struct rbd_img_request {
+ struct rbd_device *rbd_dev;
+ u64 offset; /* starting image byte offset */
+ u64 length; /* byte count from offset */
+ unsigned long flags;
+ union {
+ u64 snap_id; /* for reads */
+ struct ceph_snap_context *snapc; /* for writes */
+ };
+ union {
+ struct request *rq; /* block request */
+ struct rbd_obj_request *obj_request; /* obj req initiator */
+ };
+ struct page **copyup_pages;
+ u32 copyup_page_count;
+ spinlock_t completion_lock;/* protects next_completion */
+ u32 next_completion;
+ rbd_img_callback_t callback;
+ u64 xferred;/* aggregate bytes transferred */
+ int result; /* first nonzero obj_request result */
+
+ u32 obj_request_count;
+ struct list_head obj_requests; /* rbd_obj_request structs */
+
+ struct kref kref;
+};
+
+#define for_each_obj_request(ireq, oreq) \
+ list_for_each_entry(oreq, &(ireq)->obj_requests, links)
+#define for_each_obj_request_from(ireq, oreq) \
+ list_for_each_entry_from(oreq, &(ireq)->obj_requests, links)
+#define for_each_obj_request_safe(ireq, oreq, n) \
+ list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links)
+
+struct rbd_mapping {
+ u64 size;
+ u64 features;
+ bool read_only;
+};
+
+/*
+ * a single device
+ */
+struct rbd_device {
+ int dev_id; /* blkdev unique id */
+
+ int major; /* blkdev assigned major */
+ int minor;
+ struct gendisk *disk; /* blkdev's gendisk and rq */
+
+ u32 image_format; /* Either 1 or 2 */
+ struct rbd_client *rbd_client;
+
+ char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
+
+ spinlock_t lock; /* queue, flags, open_count */
+
+ struct rbd_image_header header;
+ unsigned long flags; /* possibly lock protected */
+ struct rbd_spec *spec;
+
+ char *header_name;
+
+ struct ceph_file_layout layout;
+
+ struct ceph_osd_event *watch_event;
+ struct rbd_obj_request *watch_request;
+
+ struct rbd_spec *parent_spec;
+ u64 parent_overlap;
+ atomic_t parent_ref;
+ struct rbd_device *parent;
+
+ /* protects updating the header */
+ struct rw_semaphore header_rwsem;
+
+ struct rbd_mapping mapping;
+
+ struct list_head node;
+
+ /* sysfs related */
+ struct device dev;
+ unsigned long open_count; /* protected by lock */
+};
+
+/*
+ * Flag bits for rbd_dev->flags. If atomicity is required,
+ * rbd_dev->lock is used to protect access.
+ *
+ * Currently, only the "removing" flag (which is coupled with the
+ * "open_count" field) requires atomic access.
+ */
+enum rbd_dev_flags {
+ RBD_DEV_FLAG_EXISTS, /* mapped snapshot has not been deleted */
+ RBD_DEV_FLAG_REMOVING, /* this mapping is being removed */
+};
+
+static DEFINE_MUTEX(client_mutex); /* Serialize client creation */
+
+static LIST_HEAD(rbd_dev_list); /* devices */
+static DEFINE_SPINLOCK(rbd_dev_list_lock);
+
+static LIST_HEAD(rbd_client_list); /* clients */
+static DEFINE_SPINLOCK(rbd_client_list_lock);
+
+/* Slab caches for frequently-allocated structures */
+
+static struct kmem_cache *rbd_img_request_cache;
+static struct kmem_cache *rbd_obj_request_cache;
+static struct kmem_cache *rbd_segment_name_cache;
+
+static int rbd_major;
+static DEFINE_IDA(rbd_dev_id_ida);
+
+/*
+ * Default to false for now, as single-major requires >= 0.75 version of
+ * userspace rbd utility.
+ */
+static bool single_major = false;
+module_param(single_major, bool, S_IRUGO);
+MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: false)");
+
+static int rbd_img_request_submit(struct rbd_img_request *img_request);
+
+static void rbd_dev_device_release(struct device *dev);
+
+static ssize_t rbd_add(struct bus_type *bus, const char *buf,
+ size_t count);
+static ssize_t rbd_remove(struct bus_type *bus, const char *buf,
+ size_t count);
+static ssize_t rbd_add_single_major(struct bus_type *bus, const char *buf,
+ size_t count);
+static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf,
+ size_t count);
+static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping);
+static void rbd_spec_put(struct rbd_spec *spec);
+
+static int rbd_dev_id_to_minor(int dev_id)
+{
+ return dev_id << RBD_SINGLE_MAJOR_PART_SHIFT;
+}
+
+static int minor_to_rbd_dev_id(int minor)
+{
+ return minor >> RBD_SINGLE_MAJOR_PART_SHIFT;
+}
+
+static BUS_ATTR(add, S_IWUSR, NULL, rbd_add);
+static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove);
+static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major);
+static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major);
+
+static struct attribute *rbd_bus_attrs[] = {
+ &bus_attr_add.attr,
+ &bus_attr_remove.attr,
+ &bus_attr_add_single_major.attr,
+ &bus_attr_remove_single_major.attr,
+ NULL,
+};
+
+static umode_t rbd_bus_is_visible(struct kobject *kobj,
+ struct attribute *attr, int index)
+{
+ if (!single_major &&
+ (attr == &bus_attr_add_single_major.attr ||
+ attr == &bus_attr_remove_single_major.attr))
+ return 0;
+
+ return attr->mode;
+}
+
+static const struct attribute_group rbd_bus_group = {
+ .attrs = rbd_bus_attrs,
+ .is_visible = rbd_bus_is_visible,
+};
+__ATTRIBUTE_GROUPS(rbd_bus);
+
+static struct bus_type rbd_bus_type = {
+ .name = "rbd",
+ .bus_groups = rbd_bus_groups,
+};
+
+static void rbd_root_dev_release(struct device *dev)
+{
+}
+
+static struct device rbd_root_dev = {
+ .init_name = "rbd",
+ .release = rbd_root_dev_release,
+};
+
+static __printf(2, 3)
+void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ if (!rbd_dev)
+ printk(KERN_WARNING "%s: %pV\n", RBD_DRV_NAME, &vaf);
+ else if (rbd_dev->disk)
+ printk(KERN_WARNING "%s: %s: %pV\n",
+ RBD_DRV_NAME, rbd_dev->disk->disk_name, &vaf);
+ else if (rbd_dev->spec && rbd_dev->spec->image_name)
+ printk(KERN_WARNING "%s: image %s: %pV\n",
+ RBD_DRV_NAME, rbd_dev->spec->image_name, &vaf);
+ else if (rbd_dev->spec && rbd_dev->spec->image_id)
+ printk(KERN_WARNING "%s: id %s: %pV\n",
+ RBD_DRV_NAME, rbd_dev->spec->image_id, &vaf);
+ else /* punt */
+ printk(KERN_WARNING "%s: rbd_dev %p: %pV\n",
+ RBD_DRV_NAME, rbd_dev, &vaf);
+ va_end(args);
+}
+
+#ifdef RBD_DEBUG
+#define rbd_assert(expr) \
+ if (unlikely(!(expr))) { \
+ printk(KERN_ERR "\nAssertion failure in %s() " \
+ "at line %d:\n\n" \
+ "\trbd_assert(%s);\n\n", \
+ __func__, __LINE__, #expr); \
+ BUG(); \
+ }
+#else /* !RBD_DEBUG */
+# define rbd_assert(expr) ((void) 0)
+#endif /* !RBD_DEBUG */
+
+static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request);
+static void rbd_img_parent_read(struct rbd_obj_request *obj_request);
+static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
+
+static int rbd_dev_refresh(struct rbd_device *rbd_dev);
+static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev);
+static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev);
+static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
+ u64 snap_id);
+static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
+ u8 *order, u64 *snap_size);
+static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
+ u64 *snap_features);
+static u64 rbd_snap_id_by_name(struct rbd_device *rbd_dev, const char *name);
+
+static int rbd_open(struct block_device *bdev, fmode_t mode)
+{
+ struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
+ bool removing = false;
+
+ if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only)
+ return -EROFS;
+
+ spin_lock_irq(&rbd_dev->lock);
+ if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags))
+ removing = true;
+ else
+ rbd_dev->open_count++;
+ spin_unlock_irq(&rbd_dev->lock);
+ if (removing)
+ return -ENOENT;
+
+ (void) get_device(&rbd_dev->dev);
+ set_device_ro(bdev, rbd_dev->mapping.read_only);
+
+ return 0;
+}
+
+static void rbd_release(struct gendisk *disk, fmode_t mode)
+{
+ struct rbd_device *rbd_dev = disk->private_data;
+ unsigned long open_count_before;
+
+ spin_lock_irq(&rbd_dev->lock);
+ open_count_before = rbd_dev->open_count--;
+ spin_unlock_irq(&rbd_dev->lock);
+ rbd_assert(open_count_before > 0);
+
+ put_device(&rbd_dev->dev);
+}
+
+static const struct block_device_operations rbd_bd_ops = {
+ .owner = THIS_MODULE,
+ .open = rbd_open,
+ .release = rbd_release,
+};
+
+/*
+ * Initialize an rbd client instance. Success or not, this function
+ * consumes ceph_opts. Caller holds client_mutex.
+ */
+static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts)
+{
+ struct rbd_client *rbdc;
+ int ret = -ENOMEM;
+
+ dout("%s:\n", __func__);
+ rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL);
+ if (!rbdc)
+ goto out_opt;
+
+ kref_init(&rbdc->kref);
+ INIT_LIST_HEAD(&rbdc->node);
+
+ rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0);
+ if (IS_ERR(rbdc->client))
+ goto out_rbdc;
+ ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */
+
+ ret = ceph_open_session(rbdc->client);
+ if (ret < 0)
+ goto out_client;
+
+ spin_lock(&rbd_client_list_lock);
+ list_add_tail(&rbdc->node, &rbd_client_list);
+ spin_unlock(&rbd_client_list_lock);
+
+ dout("%s: rbdc %p\n", __func__, rbdc);
+
+ return rbdc;
+out_client:
+ ceph_destroy_client(rbdc->client);
+out_rbdc:
+ kfree(rbdc);
+out_opt:
+ if (ceph_opts)
+ ceph_destroy_options(ceph_opts);
+ dout("%s: error %d\n", __func__, ret);
+
+ return ERR_PTR(ret);
+}
+
+static struct rbd_client *__rbd_get_client(struct rbd_client *rbdc)
+{
+ kref_get(&rbdc->kref);
+
+ return rbdc;
+}
+
+/*
+ * Find a ceph client with specific addr and configuration. If
+ * found, bump its reference count.
+ */
+static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts)
+{
+ struct rbd_client *client_node;
+ bool found = false;
+
+ if (ceph_opts->flags & CEPH_OPT_NOSHARE)
+ return NULL;
+
+ spin_lock(&rbd_client_list_lock);
+ list_for_each_entry(client_node, &rbd_client_list, node) {
+ if (!ceph_compare_options(ceph_opts, client_node->client)) {
+ __rbd_get_client(client_node);
+
+ found = true;
+ break;
+ }
+ }
+ spin_unlock(&rbd_client_list_lock);
+
+ return found ? client_node : NULL;
+}
+
+/*
+ * mount options
+ */
+enum {
+ Opt_last_int,
+ /* int args above */
+ Opt_last_string,
+ /* string args above */
+ Opt_read_only,
+ Opt_read_write,
+ /* Boolean args above */
+ Opt_last_bool,
+};
+
+static match_table_t rbd_opts_tokens = {
+ /* int args above */
+ /* string args above */
+ {Opt_read_only, "read_only"},
+ {Opt_read_only, "ro"}, /* Alternate spelling */
+ {Opt_read_write, "read_write"},
+ {Opt_read_write, "rw"}, /* Alternate spelling */
+ /* Boolean args above */
+ {-1, NULL}
+};
+
+struct rbd_options {
+ bool read_only;
+};
+
+#define RBD_READ_ONLY_DEFAULT false
+
+static int parse_rbd_opts_token(char *c, void *private)
+{
+ struct rbd_options *rbd_opts = private;
+ substring_t argstr[MAX_OPT_ARGS];
+ int token, intval, ret;
+
+ token = match_token(c, rbd_opts_tokens, argstr);
+ if (token < 0)
+ return -EINVAL;
+
+ if (token < Opt_last_int) {
+ ret = match_int(&argstr[0], &intval);
+ if (ret < 0) {
+ pr_err("bad mount option arg (not int) "
+ "at '%s'\n", c);
+ return ret;
+ }
+ dout("got int token %d val %d\n", token, intval);
+ } else if (token > Opt_last_int && token < Opt_last_string) {
+ dout("got string token %d val %s\n", token,
+ argstr[0].from);
+ } else if (token > Opt_last_string && token < Opt_last_bool) {
+ dout("got Boolean token %d\n", token);
+ } else {
+ dout("got token %d\n", token);
+ }
+
+ switch (token) {
+ case Opt_read_only:
+ rbd_opts->read_only = true;
+ break;
+ case Opt_read_write:
+ rbd_opts->read_only = false;
+ break;
+ default:
+ rbd_assert(false);
+ break;
+ }
+ return 0;
+}
+
+/*
+ * Get a ceph client with specific addr and configuration, if one does
+ * not exist create it. Either way, ceph_opts is consumed by this
+ * function.
+ */
+static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts)
+{
+ struct rbd_client *rbdc;
+
+ mutex_lock_nested(&client_mutex, SINGLE_DEPTH_NESTING);
+ rbdc = rbd_client_find(ceph_opts);
+ if (rbdc) /* using an existing client */
+ ceph_destroy_options(ceph_opts);
+ else
+ rbdc = rbd_client_create(ceph_opts);
+ mutex_unlock(&client_mutex);
+
+ return rbdc;
+}
+
+/*
+ * Destroy ceph client
+ *
+ * Caller must hold rbd_client_list_lock.
+ */
+static void rbd_client_release(struct kref *kref)
+{
+ struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
+
+ dout("%s: rbdc %p\n", __func__, rbdc);
+ spin_lock(&rbd_client_list_lock);
+ list_del(&rbdc->node);
+ spin_unlock(&rbd_client_list_lock);
+
+ ceph_destroy_client(rbdc->client);
+ kfree(rbdc);
+}
+
+/*
+ * Drop reference to ceph client node. If it's not referenced anymore, release
+ * it.
+ */
+static void rbd_put_client(struct rbd_client *rbdc)
+{
+ if (rbdc)
+ kref_put(&rbdc->kref, rbd_client_release);
+}
+
+static bool rbd_image_format_valid(u32 image_format)
+{
+ return image_format == 1 || image_format == 2;
+}
+
+static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk)
+{
+ size_t size;
+ u32 snap_count;
+
+ /* The header has to start with the magic rbd header text */
+ if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT)))
+ return false;
+
+ /* The bio layer requires at least sector-sized I/O */
+
+ if (ondisk->options.order < SECTOR_SHIFT)
+ return false;
+
+ /* If we use u64 in a few spots we may be able to loosen this */
+
+ if (ondisk->options.order > 8 * sizeof (int) - 1)
+ return false;
+
+ /*
+ * The size of a snapshot header has to fit in a size_t, and
+ * that limits the number of snapshots.
+ */
+ snap_count = le32_to_cpu(ondisk->snap_count);
+ size = SIZE_MAX - sizeof (struct ceph_snap_context);
+ if (snap_count > size / sizeof (__le64))
+ return false;
+
+ /*
+ * Not only that, but the size of the entire the snapshot
+ * header must also be representable in a size_t.
+ */
+ size -= snap_count * sizeof (__le64);
+ if ((u64) size < le64_to_cpu(ondisk->snap_names_len))
+ return false;
+
+ return true;
+}
+
+/*
+ * Fill an rbd image header with information from the given format 1
+ * on-disk header.
+ */
+static int rbd_header_from_disk(struct rbd_device *rbd_dev,
+ struct rbd_image_header_ondisk *ondisk)
+{
+ struct rbd_image_header *header = &rbd_dev->header;
+ bool first_time = header->object_prefix == NULL;
+ struct ceph_snap_context *snapc;
+ char *object_prefix = NULL;
+ char *snap_names = NULL;
+ u64 *snap_sizes = NULL;
+ u32 snap_count;
+ size_t size;
+ int ret = -ENOMEM;
+ u32 i;
+
+ /* Allocate this now to avoid having to handle failure below */
+
+ if (first_time) {
+ size_t len;
+
+ len = strnlen(ondisk->object_prefix,
+ sizeof (ondisk->object_prefix));
+ object_prefix = kmalloc(len + 1, GFP_KERNEL);
+ if (!object_prefix)
+ return -ENOMEM;
+ memcpy(object_prefix, ondisk->object_prefix, len);
+ object_prefix[len] = '\0';
+ }
+
+ /* Allocate the snapshot context and fill it in */
+
+ snap_count = le32_to_cpu(ondisk->snap_count);
+ snapc = ceph_create_snap_context(snap_count, GFP_KERNEL);
+ if (!snapc)
+ goto out_err;
+ snapc->seq = le64_to_cpu(ondisk->snap_seq);
+ if (snap_count) {
+ struct rbd_image_snap_ondisk *snaps;
+ u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len);
+
+ /* We'll keep a copy of the snapshot names... */
+
+ if (snap_names_len > (u64)SIZE_MAX)
+ goto out_2big;
+ snap_names = kmalloc(snap_names_len, GFP_KERNEL);
+ if (!snap_names)
+ goto out_err;
+
+ /* ...as well as the array of their sizes. */
+
+ size = snap_count * sizeof (*header->snap_sizes);
+ snap_sizes = kmalloc(size, GFP_KERNEL);
+ if (!snap_sizes)
+ goto out_err;
+
+ /*
+ * Copy the names, and fill in each snapshot's id
+ * and size.
+ *
+ * Note that rbd_dev_v1_header_info() guarantees the
+ * ondisk buffer we're working with has
+ * snap_names_len bytes beyond the end of the
+ * snapshot id array, this memcpy() is safe.
+ */
+ memcpy(snap_names, &ondisk->snaps[snap_count], snap_names_len);
+ snaps = ondisk->snaps;
+ for (i = 0; i < snap_count; i++) {
+ snapc->snaps[i] = le64_to_cpu(snaps[i].id);
+ snap_sizes[i] = le64_to_cpu(snaps[i].image_size);
+ }
+ }
+
+ /* We won't fail any more, fill in the header */
+
+ if (first_time) {
+ header->object_prefix = object_prefix;
+ header->obj_order = ondisk->options.order;
+ header->crypt_type = ondisk->options.crypt_type;
+ header->comp_type = ondisk->options.comp_type;
+ /* The rest aren't used for format 1 images */
+ header->stripe_unit = 0;
+ header->stripe_count = 0;
+ header->features = 0;
+ } else {
+ ceph_put_snap_context(header->snapc);
+ kfree(header->snap_names);
+ kfree(header->snap_sizes);
+ }
+
+ /* The remaining fields always get updated (when we refresh) */
+
+ header->image_size = le64_to_cpu(ondisk->image_size);
+ header->snapc = snapc;
+ header->snap_names = snap_names;
+ header->snap_sizes = snap_sizes;
+
+ /* Make sure mapping size is consistent with header info */
+
+ if (rbd_dev->spec->snap_id == CEPH_NOSNAP || first_time)
+ if (rbd_dev->mapping.size != header->image_size)
+ rbd_dev->mapping.size = header->image_size;
+
+ return 0;
+out_2big:
+ ret = -EIO;
+out_err:
+ kfree(snap_sizes);
+ kfree(snap_names);
+ ceph_put_snap_context(snapc);
+ kfree(object_prefix);
+
+ return ret;
+}
+
+static const char *_rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, u32 which)
+{
+ const char *snap_name;
+
+ rbd_assert(which < rbd_dev->header.snapc->num_snaps);
+
+ /* Skip over names until we find the one we are looking for */
+
+ snap_name = rbd_dev->header.snap_names;
+ while (which--)
+ snap_name += strlen(snap_name) + 1;
+
+ return kstrdup(snap_name, GFP_KERNEL);
+}
+
+/*
+ * Snapshot id comparison function for use with qsort()/bsearch().
+ * Note that result is for snapshots in *descending* order.
+ */
+static int snapid_compare_reverse(const void *s1, const void *s2)
+{
+ u64 snap_id1 = *(u64 *)s1;
+ u64 snap_id2 = *(u64 *)s2;
+
+ if (snap_id1 < snap_id2)
+ return 1;
+ return snap_id1 == snap_id2 ? 0 : -1;
+}
+
+/*
+ * Search a snapshot context to see if the given snapshot id is
+ * present.
+ *
+ * Returns the position of the snapshot id in the array if it's found,
+ * or BAD_SNAP_INDEX otherwise.
+ *
+ * Note: The snapshot array is in kept sorted (by the osd) in
+ * reverse order, highest snapshot id first.
+ */
+static u32 rbd_dev_snap_index(struct rbd_device *rbd_dev, u64 snap_id)
+{
+ struct ceph_snap_context *snapc = rbd_dev->header.snapc;
+ u64 *found;
+
+ found = bsearch(&snap_id, &snapc->snaps, snapc->num_snaps,
+ sizeof (snap_id), snapid_compare_reverse);
+
+ return found ? (u32)(found - &snapc->snaps[0]) : BAD_SNAP_INDEX;
+}
+
+static const char *rbd_dev_v1_snap_name(struct rbd_device *rbd_dev,
+ u64 snap_id)
+{
+ u32 which;
+ const char *snap_name;
+
+ which = rbd_dev_snap_index(rbd_dev, snap_id);
+ if (which == BAD_SNAP_INDEX)
+ return ERR_PTR(-ENOENT);
+
+ snap_name = _rbd_dev_v1_snap_name(rbd_dev, which);
+ return snap_name ? snap_name : ERR_PTR(-ENOMEM);
+}
+
+static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id)
+{
+ if (snap_id == CEPH_NOSNAP)
+ return RBD_SNAP_HEAD_NAME;
+
+ rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+ if (rbd_dev->image_format == 1)
+ return rbd_dev_v1_snap_name(rbd_dev, snap_id);
+
+ return rbd_dev_v2_snap_name(rbd_dev, snap_id);
+}
+
+static int rbd_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
+ u64 *snap_size)
+{
+ rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+ if (snap_id == CEPH_NOSNAP) {
+ *snap_size = rbd_dev->header.image_size;
+ } else if (rbd_dev->image_format == 1) {
+ u32 which;
+
+ which = rbd_dev_snap_index(rbd_dev, snap_id);
+ if (which == BAD_SNAP_INDEX)
+ return -ENOENT;
+
+ *snap_size = rbd_dev->header.snap_sizes[which];
+ } else {
+ u64 size = 0;
+ int ret;
+
+ ret = _rbd_dev_v2_snap_size(rbd_dev, snap_id, NULL, &size);
+ if (ret)
+ return ret;
+
+ *snap_size = size;
+ }
+ return 0;
+}
+
+static int rbd_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
+ u64 *snap_features)
+{
+ rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+ if (snap_id == CEPH_NOSNAP) {
+ *snap_features = rbd_dev->header.features;
+ } else if (rbd_dev->image_format == 1) {
+ *snap_features = 0; /* No features for format 1 */
+ } else {
+ u64 features = 0;
+ int ret;
+
+ ret = _rbd_dev_v2_snap_features(rbd_dev, snap_id, &features);
+ if (ret)
+ return ret;
+
+ *snap_features = features;
+ }
+ return 0;
+}
+
+static int rbd_dev_mapping_set(struct rbd_device *rbd_dev)
+{
+ u64 snap_id = rbd_dev->spec->snap_id;
+ u64 size = 0;
+ u64 features = 0;
+ int ret;
+
+ ret = rbd_snap_size(rbd_dev, snap_id, &size);
+ if (ret)
+ return ret;
+ ret = rbd_snap_features(rbd_dev, snap_id, &features);
+ if (ret)
+ return ret;
+
+ rbd_dev->mapping.size = size;
+ rbd_dev->mapping.features = features;
+
+ return 0;
+}
+
+static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev)
+{
+ rbd_dev->mapping.size = 0;
+ rbd_dev->mapping.features = 0;
+}
+
+static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
+{
+ char *name;
+ u64 segment;
+ int ret;
+ char *name_format;
+
+ name = kmem_cache_alloc(rbd_segment_name_cache, GFP_NOIO);
+ if (!name)
+ return NULL;
+ segment = offset >> rbd_dev->header.obj_order;
+ name_format = "%s.%012llx";
+ if (rbd_dev->image_format == 2)
+ name_format = "%s.%016llx";
+ ret = snprintf(name, CEPH_MAX_OID_NAME_LEN + 1, name_format,
+ rbd_dev->header.object_prefix, segment);
+ if (ret < 0 || ret > CEPH_MAX_OID_NAME_LEN) {
+ pr_err("error formatting segment name for #%llu (%d)\n",
+ segment, ret);
+ kfree(name);
+ name = NULL;
+ }
+
+ return name;
+}
+
+static void rbd_segment_name_free(const char *name)
+{
+ /* The explicit cast here is needed to drop the const qualifier */
+
+ kmem_cache_free(rbd_segment_name_cache, (void *)name);
+}
+
+static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset)
+{
+ u64 segment_size = (u64) 1 << rbd_dev->header.obj_order;
+
+ return offset & (segment_size - 1);
+}
+
+static u64 rbd_segment_length(struct rbd_device *rbd_dev,
+ u64 offset, u64 length)
+{
+ u64 segment_size = (u64) 1 << rbd_dev->header.obj_order;
+
+ offset &= segment_size - 1;
+
+ rbd_assert(length <= U64_MAX - offset);
+ if (offset + length > segment_size)
+ length = segment_size - offset;
+
+ return length;
+}
+
+/*
+ * returns the size of an object in the image
+ */
+static u64 rbd_obj_bytes(struct rbd_image_header *header)
+{
+ return 1 << header->obj_order;
+}
+
+/*
+ * bio helpers
+ */
+
+static void bio_chain_put(struct bio *chain)
+{
+ struct bio *tmp;
+
+ while (chain) {
+ tmp = chain;
+ chain = chain->bi_next;
+ bio_put(tmp);
+ }
+}
+
+/*
+ * zeros a bio chain, starting at specific offset
+ */
+static void zero_bio_chain(struct bio *chain, int start_ofs)
+{
+ struct bio_vec bv;
+ struct bvec_iter iter;
+ unsigned long flags;
+ void *buf;
+ int pos = 0;
+
+ while (chain) {
+ bio_for_each_segment(bv, chain, iter) {
+ if (pos + bv.bv_len > start_ofs) {
+ int remainder = max(start_ofs - pos, 0);
+ buf = bvec_kmap_irq(&bv, &flags);
+ memset(buf + remainder, 0,
+ bv.bv_len - remainder);
+ flush_dcache_page(bv.bv_page);
+ bvec_kunmap_irq(buf, &flags);
+ }
+ pos += bv.bv_len;
+ }
+
+ chain = chain->bi_next;
+ }
+}
+
+/*
+ * similar to zero_bio_chain(), zeros data defined by a page array,
+ * starting at the given byte offset from the start of the array and
+ * continuing up to the given end offset. The pages array is
+ * assumed to be big enough to hold all bytes up to the end.
+ */
+static void zero_pages(struct page **pages, u64 offset, u64 end)
+{
+ struct page **page = &pages[offset >> PAGE_SHIFT];
+
+ rbd_assert(end > offset);
+ rbd_assert(end - offset <= (u64)SIZE_MAX);
+ while (offset < end) {
+ size_t page_offset;
+ size_t length;
+ unsigned long flags;
+ void *kaddr;
+
+ page_offset = offset & ~PAGE_MASK;
+ length = min_t(size_t, PAGE_SIZE - page_offset, end - offset);
+ local_irq_save(flags);
+ kaddr = kmap_atomic(*page);
+ memset(kaddr + page_offset, 0, length);
+ flush_dcache_page(*page);
+ kunmap_atomic(kaddr);
+ local_irq_restore(flags);
+
+ offset += length;
+ page++;
+ }
+}
+
+/*
+ * Clone a portion of a bio, starting at the given byte offset
+ * and continuing for the number of bytes indicated.
+ */
+static struct bio *bio_clone_range(struct bio *bio_src,
+ unsigned int offset,
+ unsigned int len,
+ gfp_t gfpmask)
+{
+ struct bio *bio;
+
+ bio = bio_clone(bio_src, gfpmask);
+ if (!bio)
+ return NULL; /* ENOMEM */
+
+ bio_advance(bio, offset);
+ bio->bi_iter.bi_size = len;
+
+ return bio;
+}
+
+/*
+ * Clone a portion of a bio chain, starting at the given byte offset
+ * into the first bio in the source chain and continuing for the
+ * number of bytes indicated. The result is another bio chain of
+ * exactly the given length, or a null pointer on error.
+ *
+ * The bio_src and offset parameters are both in-out. On entry they
+ * refer to the first source bio and the offset into that bio where
+ * the start of data to be cloned is located.
+ *
+ * On return, bio_src is updated to refer to the bio in the source
+ * chain that contains first un-cloned byte, and *offset will
+ * contain the offset of that byte within that bio.
+ */
+static struct bio *bio_chain_clone_range(struct bio **bio_src,
+ unsigned int *offset,
+ unsigned int len,
+ gfp_t gfpmask)
+{
+ struct bio *bi = *bio_src;
+ unsigned int off = *offset;
+ struct bio *chain = NULL;
+ struct bio **end;
+
+ /* Build up a chain of clone bios up to the limit */
+
+ if (!bi || off >= bi->bi_iter.bi_size || !len)
+ return NULL; /* Nothing to clone */
+
+ end = &chain;
+ while (len) {
+ unsigned int bi_size;
+ struct bio *bio;
+
+ if (!bi) {
+ rbd_warn(NULL, "bio_chain exhausted with %u left", len);
+ goto out_err; /* EINVAL; ran out of bio's */
+ }
+ bi_size = min_t(unsigned int, bi->bi_iter.bi_size - off, len);
+ bio = bio_clone_range(bi, off, bi_size, gfpmask);
+ if (!bio)
+ goto out_err; /* ENOMEM */
+
+ *end = bio;
+ end = &bio->bi_next;
+
+ off += bi_size;
+ if (off == bi->bi_iter.bi_size) {
+ bi = bi->bi_next;
+ off = 0;
+ }
+ len -= bi_size;
+ }
+ *bio_src = bi;
+ *offset = off;
+
+ return chain;
+out_err:
+ bio_chain_put(chain);
+
+ return NULL;
+}
+
+/*
+ * The default/initial value for all object request flags is 0. For
+ * each flag, once its value is set to 1 it is never reset to 0
+ * again.
+ */
+static void obj_request_img_data_set(struct rbd_obj_request *obj_request)
+{
+ if (test_and_set_bit(OBJ_REQ_IMG_DATA, &obj_request->flags)) {
+ struct rbd_device *rbd_dev;
+
+ rbd_dev = obj_request->img_request->rbd_dev;
+ rbd_warn(rbd_dev, "obj_request %p already marked img_data\n",
+ obj_request);
+ }
+}
+
+static bool obj_request_img_data_test(struct rbd_obj_request *obj_request)
+{
+ smp_mb();
+ return test_bit(OBJ_REQ_IMG_DATA, &obj_request->flags) != 0;
+}
+
+static void obj_request_done_set(struct rbd_obj_request *obj_request)
+{
+ if (test_and_set_bit(OBJ_REQ_DONE, &obj_request->flags)) {
+ struct rbd_device *rbd_dev = NULL;
+
+ if (obj_request_img_data_test(obj_request))
+ rbd_dev = obj_request->img_request->rbd_dev;
+ rbd_warn(rbd_dev, "obj_request %p already marked done\n",
+ obj_request);
+ }
+}
+
+static bool obj_request_done_test(struct rbd_obj_request *obj_request)
+{
+ smp_mb();
+ return test_bit(OBJ_REQ_DONE, &obj_request->flags) != 0;
+}
+
+/*
+ * This sets the KNOWN flag after (possibly) setting the EXISTS
+ * flag. The latter is set based on the "exists" value provided.
+ *
+ * Note that for our purposes once an object exists it never goes
+ * away again. It's possible that the response from two existence
+ * checks are separated by the creation of the target object, and
+ * the first ("doesn't exist") response arrives *after* the second
+ * ("does exist"). In that case we ignore the second one.
+ */
+static void obj_request_existence_set(struct rbd_obj_request *obj_request,
+ bool exists)
+{
+ if (exists)
+ set_bit(OBJ_REQ_EXISTS, &obj_request->flags);
+ set_bit(OBJ_REQ_KNOWN, &obj_request->flags);
+ smp_mb();
+}
+
+static bool obj_request_known_test(struct rbd_obj_request *obj_request)
+{
+ smp_mb();
+ return test_bit(OBJ_REQ_KNOWN, &obj_request->flags) != 0;
+}
+
+static bool obj_request_exists_test(struct rbd_obj_request *obj_request)
+{
+ smp_mb();
+ return test_bit(OBJ_REQ_EXISTS, &obj_request->flags) != 0;
+}
+
+static void rbd_obj_request_get(struct rbd_obj_request *obj_request)
+{
+ dout("%s: obj %p (was %d)\n", __func__, obj_request,
+ atomic_read(&obj_request->kref.refcount));
+ kref_get(&obj_request->kref);
+}
+
+static void rbd_obj_request_destroy(struct kref *kref);
+static void rbd_obj_request_put(struct rbd_obj_request *obj_request)
+{
+ rbd_assert(obj_request != NULL);
+ dout("%s: obj %p (was %d)\n", __func__, obj_request,
+ atomic_read(&obj_request->kref.refcount));
+ kref_put(&obj_request->kref, rbd_obj_request_destroy);
+}
+
+static bool img_request_child_test(struct rbd_img_request *img_request);
+static void rbd_parent_request_destroy(struct kref *kref);
+static void rbd_img_request_destroy(struct kref *kref);
+static void rbd_img_request_put(struct rbd_img_request *img_request)
+{
+ rbd_assert(img_request != NULL);
+ dout("%s: img %p (was %d)\n", __func__, img_request,
+ atomic_read(&img_request->kref.refcount));
+ if (img_request_child_test(img_request))
+ kref_put(&img_request->kref, rbd_parent_request_destroy);
+ else
+ kref_put(&img_request->kref, rbd_img_request_destroy);
+}
+
+static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request,
+ struct rbd_obj_request *obj_request)
+{
+ rbd_assert(obj_request->img_request == NULL);
+
+ /* Image request now owns object's original reference */
+ obj_request->img_request = img_request;
+ obj_request->which = img_request->obj_request_count;
+ rbd_assert(!obj_request_img_data_test(obj_request));
+ obj_request_img_data_set(obj_request);
+ rbd_assert(obj_request->which != BAD_WHICH);
+ img_request->obj_request_count++;
+ list_add_tail(&obj_request->links, &img_request->obj_requests);
+ dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request,
+ obj_request->which);
+}
+
+static inline void rbd_img_obj_request_del(struct rbd_img_request *img_request,
+ struct rbd_obj_request *obj_request)
+{
+ rbd_assert(obj_request->which != BAD_WHICH);
+
+ dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request,
+ obj_request->which);
+ list_del(&obj_request->links);
+ rbd_assert(img_request->obj_request_count > 0);
+ img_request->obj_request_count--;
+ rbd_assert(obj_request->which == img_request->obj_request_count);
+ obj_request->which = BAD_WHICH;
+ rbd_assert(obj_request_img_data_test(obj_request));
+ rbd_assert(obj_request->img_request == img_request);
+ obj_request->img_request = NULL;
+ obj_request->callback = NULL;
+ rbd_obj_request_put(obj_request);
+}
+
+static bool obj_request_type_valid(enum obj_request_type type)
+{
+ switch (type) {
+ case OBJ_REQUEST_NODATA:
+ case OBJ_REQUEST_BIO:
+ case OBJ_REQUEST_PAGES:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int rbd_obj_request_submit(struct ceph_osd_client *osdc,
+ struct rbd_obj_request *obj_request)
+{
+ dout("%s: osdc %p obj %p\n", __func__, osdc, obj_request);
+
+ return ceph_osdc_start_request(osdc, obj_request->osd_req, false);
+}
+
+static void rbd_img_request_complete(struct rbd_img_request *img_request)
+{
+
+ dout("%s: img %p\n", __func__, img_request);
+
+ /*
+ * If no error occurred, compute the aggregate transfer
+ * count for the image request. We could instead use
+ * atomic64_cmpxchg() to update it as each object request
+ * completes; not clear which way is better off hand.
+ */
+ if (!img_request->result) {
+ struct rbd_obj_request *obj_request;
+ u64 xferred = 0;
+
+ for_each_obj_request(img_request, obj_request)
+ xferred += obj_request->xferred;
+ img_request->xferred = xferred;
+ }
+
+ if (img_request->callback)
+ img_request->callback(img_request);
+ else
+ rbd_img_request_put(img_request);
+}
+
+/* Caller is responsible for rbd_obj_request_destroy(obj_request) */
+
+static int rbd_obj_request_wait(struct rbd_obj_request *obj_request)
+{
+ dout("%s: obj %p\n", __func__, obj_request);
+
+ return wait_for_completion_interruptible(&obj_request->completion);
+}
+
+/*
+ * The default/initial value for all image request flags is 0. Each
+ * is conditionally set to 1 at image request initialization time
+ * and currently never change thereafter.
+ */
+static void img_request_write_set(struct rbd_img_request *img_request)
+{
+ set_bit(IMG_REQ_WRITE, &img_request->flags);
+ smp_mb();
+}
+
+static bool img_request_write_test(struct rbd_img_request *img_request)
+{
+ smp_mb();
+ return test_bit(IMG_REQ_WRITE, &img_request->flags) != 0;
+}
+
+static void img_request_child_set(struct rbd_img_request *img_request)
+{
+ set_bit(IMG_REQ_CHILD, &img_request->flags);
+ smp_mb();
+}
+
+static void img_request_child_clear(struct rbd_img_request *img_request)
+{
+ clear_bit(IMG_REQ_CHILD, &img_request->flags);
+ smp_mb();
+}
+
+static bool img_request_child_test(struct rbd_img_request *img_request)
+{
+ smp_mb();
+ return test_bit(IMG_REQ_CHILD, &img_request->flags) != 0;
+}
+
+static void img_request_layered_set(struct rbd_img_request *img_request)
+{
+ set_bit(IMG_REQ_LAYERED, &img_request->flags);
+ smp_mb();
+}
+
+static void img_request_layered_clear(struct rbd_img_request *img_request)
+{
+ clear_bit(IMG_REQ_LAYERED, &img_request->flags);
+ smp_mb();
+}
+
+static bool img_request_layered_test(struct rbd_img_request *img_request)
+{
+ smp_mb();
+ return test_bit(IMG_REQ_LAYERED, &img_request->flags) != 0;
+}
+
+static void
+rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request)
+{
+ u64 xferred = obj_request->xferred;
+ u64 length = obj_request->length;
+
+ dout("%s: obj %p img %p result %d %llu/%llu\n", __func__,
+ obj_request, obj_request->img_request, obj_request->result,
+ xferred, length);
+ /*
+ * ENOENT means a hole in the image. We zero-fill the entire
+ * length of the request. A short read also implies zero-fill
+ * to the end of the request. An error requires the whole
+ * length of the request to be reported finished with an error
+ * to the block layer. In each case we update the xferred
+ * count to indicate the whole request was satisfied.
+ */
+ rbd_assert(obj_request->type != OBJ_REQUEST_NODATA);
+ if (obj_request->result == -ENOENT) {
+ if (obj_request->type == OBJ_REQUEST_BIO)
+ zero_bio_chain(obj_request->bio_list, 0);
+ else
+ zero_pages(obj_request->pages, 0, length);
+ obj_request->result = 0;
+ } else if (xferred < length && !obj_request->result) {
+ if (obj_request->type == OBJ_REQUEST_BIO)
+ zero_bio_chain(obj_request->bio_list, xferred);
+ else
+ zero_pages(obj_request->pages, xferred, length);
+ }
+ obj_request->xferred = length;
+ obj_request_done_set(obj_request);
+}
+
+static void rbd_obj_request_complete(struct rbd_obj_request *obj_request)
+{
+ dout("%s: obj %p cb %p\n", __func__, obj_request,
+ obj_request->callback);
+ if (obj_request->callback)
+ obj_request->callback(obj_request);
+ else
+ complete_all(&obj_request->completion);
+}
+
+static void rbd_osd_trivial_callback(struct rbd_obj_request *obj_request)
+{
+ dout("%s: obj %p\n", __func__, obj_request);
+ obj_request_done_set(obj_request);
+}
+
+static void rbd_osd_read_callback(struct rbd_obj_request *obj_request)
+{
+ struct rbd_img_request *img_request = NULL;
+ struct rbd_device *rbd_dev = NULL;
+ bool layered = false;
+
+ if (obj_request_img_data_test(obj_request)) {
+ img_request = obj_request->img_request;
+ layered = img_request && img_request_layered_test(img_request);
+ rbd_dev = img_request->rbd_dev;
+ }
+
+ dout("%s: obj %p img %p result %d %llu/%llu\n", __func__,
+ obj_request, img_request, obj_request->result,
+ obj_request->xferred, obj_request->length);
+ if (layered && obj_request->result == -ENOENT &&
+ obj_request->img_offset < rbd_dev->parent_overlap)
+ rbd_img_parent_read(obj_request);
+ else if (img_request)
+ rbd_img_obj_request_read_callback(obj_request);
+ else
+ obj_request_done_set(obj_request);
+}
+
+static void rbd_osd_write_callback(struct rbd_obj_request *obj_request)
+{
+ dout("%s: obj %p result %d %llu\n", __func__, obj_request,
+ obj_request->result, obj_request->length);
+ /*
+ * There is no such thing as a successful short write. Set
+ * it to our originally-requested length.
+ */
+ obj_request->xferred = obj_request->length;
+ obj_request_done_set(obj_request);
+}
+
+/*
+ * For a simple stat call there's nothing to do. We'll do more if
+ * this is part of a write sequence for a layered image.
+ */
+static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request)
+{
+ dout("%s: obj %p\n", __func__, obj_request);
+ obj_request_done_set(obj_request);
+}
+
+static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
+ struct ceph_msg *msg)
+{
+ struct rbd_obj_request *obj_request = osd_req->r_priv;
+ u16 opcode;
+
+ dout("%s: osd_req %p msg %p\n", __func__, osd_req, msg);
+ rbd_assert(osd_req == obj_request->osd_req);
+ if (obj_request_img_data_test(obj_request)) {
+ rbd_assert(obj_request->img_request);
+ rbd_assert(obj_request->which != BAD_WHICH);
+ } else {
+ rbd_assert(obj_request->which == BAD_WHICH);
+ }
+
+ if (osd_req->r_result < 0)
+ obj_request->result = osd_req->r_result;
+
+ rbd_assert(osd_req->r_num_ops <= CEPH_OSD_MAX_OP);
+
+ /*
+ * We support a 64-bit length, but ultimately it has to be
+ * passed to blk_end_request(), which takes an unsigned int.
+ */
+ obj_request->xferred = osd_req->r_reply_op_len[0];
+ rbd_assert(obj_request->xferred < (u64)UINT_MAX);
+
+ opcode = osd_req->r_ops[0].op;
+ switch (opcode) {
+ case CEPH_OSD_OP_READ:
+ rbd_osd_read_callback(obj_request);
+ break;
+ case CEPH_OSD_OP_SETALLOCHINT:
+ rbd_assert(osd_req->r_ops[1].op == CEPH_OSD_OP_WRITE);
+ /* fall through */
+ case CEPH_OSD_OP_WRITE:
+ rbd_osd_write_callback(obj_request);
+ break;
+ case CEPH_OSD_OP_STAT:
+ rbd_osd_stat_callback(obj_request);
+ break;
+ case CEPH_OSD_OP_CALL:
+ case CEPH_OSD_OP_NOTIFY_ACK:
+ case CEPH_OSD_OP_WATCH:
+ rbd_osd_trivial_callback(obj_request);
+ break;
+ default:
+ rbd_warn(NULL, "%s: unsupported op %hu\n",
+ obj_request->object_name, (unsigned short) opcode);
+ break;
+ }
+
+ if (obj_request_done_test(obj_request))
+ rbd_obj_request_complete(obj_request);
+}
+
+static void rbd_osd_req_format_read(struct rbd_obj_request *obj_request)
+{
+ struct rbd_img_request *img_request = obj_request->img_request;
+ struct ceph_osd_request *osd_req = obj_request->osd_req;
+ u64 snap_id;
+
+ rbd_assert(osd_req != NULL);
+
+ snap_id = img_request ? img_request->snap_id : CEPH_NOSNAP;
+ ceph_osdc_build_request(osd_req, obj_request->offset,
+ NULL, snap_id, NULL);
+}
+
+static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request)
+{
+ struct rbd_img_request *img_request = obj_request->img_request;
+ struct ceph_osd_request *osd_req = obj_request->osd_req;
+ struct ceph_snap_context *snapc;
+ struct timespec mtime = CURRENT_TIME;
+
+ rbd_assert(osd_req != NULL);
+
+ snapc = img_request ? img_request->snapc : NULL;
+ ceph_osdc_build_request(osd_req, obj_request->offset,
+ snapc, CEPH_NOSNAP, &mtime);
+}
+
+/*
+ * Create an osd request. A read request has one osd op (read).
+ * A write request has either one (watch) or two (hint+write) osd ops.
+ * (All rbd data writes are prefixed with an allocation hint op, but
+ * technically osd watch is a write request, hence this distinction.)
+ */
+static struct ceph_osd_request *rbd_osd_req_create(
+ struct rbd_device *rbd_dev,
+ bool write_request,
+ unsigned int num_ops,
+ struct rbd_obj_request *obj_request)
+{
+ struct ceph_snap_context *snapc = NULL;
+ struct ceph_osd_client *osdc;
+ struct ceph_osd_request *osd_req;
+
+ if (obj_request_img_data_test(obj_request)) {
+ struct rbd_img_request *img_request = obj_request->img_request;
+
+ rbd_assert(write_request ==
+ img_request_write_test(img_request));
+ if (write_request)
+ snapc = img_request->snapc;
+ }
+
+ rbd_assert(num_ops == 1 || (write_request && num_ops == 2));
+
+ /* Allocate and initialize the request, for the num_ops ops */
+
+ osdc = &rbd_dev->rbd_client->client->osdc;
+ osd_req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false,
+ GFP_ATOMIC);
+ if (!osd_req)
+ return NULL; /* ENOMEM */
+
+ if (write_request)
+ osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
+ else
+ osd_req->r_flags = CEPH_OSD_FLAG_READ;
+
+ osd_req->r_callback = rbd_osd_req_callback;
+ osd_req->r_priv = obj_request;
+
+ osd_req->r_base_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout);
+ ceph_oid_set_name(&osd_req->r_base_oid, obj_request->object_name);
+
+ return osd_req;
+}
+
+/*
+ * Create a copyup osd request based on the information in the
+ * object request supplied. A copyup request has three osd ops,
+ * a copyup method call, a hint op, and a write op.
+ */
+static struct ceph_osd_request *
+rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request)
+{
+ struct rbd_img_request *img_request;
+ struct ceph_snap_context *snapc;
+ struct rbd_device *rbd_dev;
+ struct ceph_osd_client *osdc;
+ struct ceph_osd_request *osd_req;
+
+ rbd_assert(obj_request_img_data_test(obj_request));
+ img_request = obj_request->img_request;
+ rbd_assert(img_request);
+ rbd_assert(img_request_write_test(img_request));
+
+ /* Allocate and initialize the request, for the three ops */
+
+ snapc = img_request->snapc;
+ rbd_dev = img_request->rbd_dev;
+ osdc = &rbd_dev->rbd_client->client->osdc;
+ osd_req = ceph_osdc_alloc_request(osdc, snapc, 3, false, GFP_ATOMIC);
+ if (!osd_req)
+ return NULL; /* ENOMEM */
+
+ osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
+ osd_req->r_callback = rbd_osd_req_callback;
+ osd_req->r_priv = obj_request;
+
+ osd_req->r_base_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout);
+ ceph_oid_set_name(&osd_req->r_base_oid, obj_request->object_name);
+
+ return osd_req;
+}
+
+
+static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req)
+{
+ ceph_osdc_put_request(osd_req);
+}
+
+/* object_name is assumed to be a non-null pointer and NUL-terminated */
+
+static struct rbd_obj_request *rbd_obj_request_create(const char *object_name,
+ u64 offset, u64 length,
+ enum obj_request_type type)
+{
+ struct rbd_obj_request *obj_request;
+ size_t size;
+ char *name;
+
+ rbd_assert(obj_request_type_valid(type));
+
+ size = strlen(object_name) + 1;
+ name = kmalloc(size, GFP_KERNEL);
+ if (!name)
+ return NULL;
+
+ obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_KERNEL);
+ if (!obj_request) {
+ kfree(name);
+ return NULL;
+ }
+
+ obj_request->object_name = memcpy(name, object_name, size);
+ obj_request->offset = offset;
+ obj_request->length = length;
+ obj_request->flags = 0;
+ obj_request->which = BAD_WHICH;
+ obj_request->type = type;
+ INIT_LIST_HEAD(&obj_request->links);
+ init_completion(&obj_request->completion);
+ kref_init(&obj_request->kref);
+
+ dout("%s: \"%s\" %llu/%llu %d -> obj %p\n", __func__, object_name,
+ offset, length, (int)type, obj_request);
+
+ return obj_request;
+}
+
+static void rbd_obj_request_destroy(struct kref *kref)
+{
+ struct rbd_obj_request *obj_request;
+
+ obj_request = container_of(kref, struct rbd_obj_request, kref);
+
+ dout("%s: obj %p\n", __func__, obj_request);
+
+ rbd_assert(obj_request->img_request == NULL);
+ rbd_assert(obj_request->which == BAD_WHICH);
+
+ if (obj_request->osd_req)
+ rbd_osd_req_destroy(obj_request->osd_req);
+
+ rbd_assert(obj_request_type_valid(obj_request->type));
+ switch (obj_request->type) {
+ case OBJ_REQUEST_NODATA:
+ break; /* Nothing to do */
+ case OBJ_REQUEST_BIO:
+ if (obj_request->bio_list)
+ bio_chain_put(obj_request->bio_list);
+ break;
+ case OBJ_REQUEST_PAGES:
+ if (obj_request->pages)
+ ceph_release_page_vector(obj_request->pages,
+ obj_request->page_count);
+ break;
+ }
+
+ kfree(obj_request->object_name);
+ obj_request->object_name = NULL;
+ kmem_cache_free(rbd_obj_request_cache, obj_request);
+}
+
+/* It's OK to call this for a device with no parent */
+
+static void rbd_spec_put(struct rbd_spec *spec);
+static void rbd_dev_unparent(struct rbd_device *rbd_dev)
+{
+ rbd_dev_remove_parent(rbd_dev);
+ rbd_spec_put(rbd_dev->parent_spec);
+ rbd_dev->parent_spec = NULL;
+ rbd_dev->parent_overlap = 0;
+}
+
+/*
+ * Parent image reference counting is used to determine when an
+ * image's parent fields can be safely torn down--after there are no
+ * more in-flight requests to the parent image. When the last
+ * reference is dropped, cleaning them up is safe.
+ */
+static void rbd_dev_parent_put(struct rbd_device *rbd_dev)
+{
+ int counter;
+
+ if (!rbd_dev->parent_spec)
+ return;
+
+ counter = atomic_dec_return_safe(&rbd_dev->parent_ref);
+ if (counter > 0)
+ return;
+
+ /* Last reference; clean up parent data structures */
+
+ if (!counter)
+ rbd_dev_unparent(rbd_dev);
+ else
+ rbd_warn(rbd_dev, "parent reference underflow\n");
+}
+
+/*
+ * If an image has a non-zero parent overlap, get a reference to its
+ * parent.
+ *
+ * We must get the reference before checking for the overlap to
+ * coordinate properly with zeroing the parent overlap in
+ * rbd_dev_v2_parent_info() when an image gets flattened. We
+ * drop it again if there is no overlap.
+ *
+ * Returns true if the rbd device has a parent with a non-zero
+ * overlap and a reference for it was successfully taken, or
+ * false otherwise.
+ */
+static bool rbd_dev_parent_get(struct rbd_device *rbd_dev)
+{
+ int counter;
+
+ if (!rbd_dev->parent_spec)
+ return false;
+
+ counter = atomic_inc_return_safe(&rbd_dev->parent_ref);
+ if (counter > 0 && rbd_dev->parent_overlap)
+ return true;
+
+ /* Image was flattened, but parent is not yet torn down */
+
+ if (counter < 0)
+ rbd_warn(rbd_dev, "parent reference overflow\n");
+
+ return false;
+}
+
+/*
+ * Caller is responsible for filling in the list of object requests
+ * that comprises the image request, and the Linux request pointer
+ * (if there is one).
+ */
+static struct rbd_img_request *rbd_img_request_create(
+ struct rbd_device *rbd_dev,
+ u64 offset, u64 length,
+ bool write_request)
+{
+ struct rbd_img_request *img_request;
+
+ img_request = kmem_cache_alloc(rbd_img_request_cache, GFP_ATOMIC);
+ if (!img_request)
+ return NULL;
+
+ if (write_request) {
+ down_read(&rbd_dev->header_rwsem);
+ ceph_get_snap_context(rbd_dev->header.snapc);
+ up_read(&rbd_dev->header_rwsem);
+ }
+
+ img_request->rq = NULL;
+ img_request->rbd_dev = rbd_dev;
+ img_request->offset = offset;
+ img_request->length = length;
+ img_request->flags = 0;
+ if (write_request) {
+ img_request_write_set(img_request);
+ img_request->snapc = rbd_dev->header.snapc;
+ } else {
+ img_request->snap_id = rbd_dev->spec->snap_id;
+ }
+ if (rbd_dev_parent_get(rbd_dev))
+ img_request_layered_set(img_request);
+ spin_lock_init(&img_request->completion_lock);
+ img_request->next_completion = 0;
+ img_request->callback = NULL;
+ img_request->result = 0;
+ img_request->obj_request_count = 0;
+ INIT_LIST_HEAD(&img_request->obj_requests);
+ kref_init(&img_request->kref);
+
+ dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev,
+ write_request ? "write" : "read", offset, length,
+ img_request);
+
+ return img_request;
+}
+
+static void rbd_img_request_destroy(struct kref *kref)
+{
+ struct rbd_img_request *img_request;
+ struct rbd_obj_request *obj_request;
+ struct rbd_obj_request *next_obj_request;
+
+ img_request = container_of(kref, struct rbd_img_request, kref);
+
+ dout("%s: img %p\n", __func__, img_request);
+
+ for_each_obj_request_safe(img_request, obj_request, next_obj_request)
+ rbd_img_obj_request_del(img_request, obj_request);
+ rbd_assert(img_request->obj_request_count == 0);
+
+ if (img_request_layered_test(img_request)) {
+ img_request_layered_clear(img_request);
+ rbd_dev_parent_put(img_request->rbd_dev);
+ }
+
+ if (img_request_write_test(img_request))
+ ceph_put_snap_context(img_request->snapc);
+
+ kmem_cache_free(rbd_img_request_cache, img_request);
+}
+
+static struct rbd_img_request *rbd_parent_request_create(
+ struct rbd_obj_request *obj_request,
+ u64 img_offset, u64 length)
+{
+ struct rbd_img_request *parent_request;
+ struct rbd_device *rbd_dev;
+
+ rbd_assert(obj_request->img_request);
+ rbd_dev = obj_request->img_request->rbd_dev;
+
+ parent_request = rbd_img_request_create(rbd_dev->parent,
+ img_offset, length, false);
+ if (!parent_request)
+ return NULL;
+
+ img_request_child_set(parent_request);
+ rbd_obj_request_get(obj_request);
+ parent_request->obj_request = obj_request;
+
+ return parent_request;
+}
+
+static void rbd_parent_request_destroy(struct kref *kref)
+{
+ struct rbd_img_request *parent_request;
+ struct rbd_obj_request *orig_request;
+
+ parent_request = container_of(kref, struct rbd_img_request, kref);
+ orig_request = parent_request->obj_request;
+
+ parent_request->obj_request = NULL;
+ rbd_obj_request_put(orig_request);
+ img_request_child_clear(parent_request);
+
+ rbd_img_request_destroy(kref);
+}
+
+static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
+{
+ struct rbd_img_request *img_request;
+ unsigned int xferred;
+ int result;
+ bool more;
+
+ rbd_assert(obj_request_img_data_test(obj_request));
+ img_request = obj_request->img_request;
+
+ rbd_assert(obj_request->xferred <= (u64)UINT_MAX);
+ xferred = (unsigned int)obj_request->xferred;
+ result = obj_request->result;
+ if (result) {
+ struct rbd_device *rbd_dev = img_request->rbd_dev;
+
+ rbd_warn(rbd_dev, "%s %llx at %llx (%llx)\n",
+ img_request_write_test(img_request) ? "write" : "read",
+ obj_request->length, obj_request->img_offset,
+ obj_request->offset);
+ rbd_warn(rbd_dev, " result %d xferred %x\n",
+ result, xferred);
+ if (!img_request->result)
+ img_request->result = result;
+ }
+
+ /* Image object requests don't own their page array */
+
+ if (obj_request->type == OBJ_REQUEST_PAGES) {
+ obj_request->pages = NULL;
+ obj_request->page_count = 0;
+ }
+
+ if (img_request_child_test(img_request)) {
+ rbd_assert(img_request->obj_request != NULL);
+ more = obj_request->which < img_request->obj_request_count - 1;
+ } else {
+ rbd_assert(img_request->rq != NULL);
+ more = blk_end_request(img_request->rq, result, xferred);
+ }
+
+ return more;
+}
+
+static void rbd_img_obj_callback(struct rbd_obj_request *obj_request)
+{
+ struct rbd_img_request *img_request;
+ u32 which = obj_request->which;
+ bool more = true;
+
+ rbd_assert(obj_request_img_data_test(obj_request));
+ img_request = obj_request->img_request;
+
+ dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
+ rbd_assert(img_request != NULL);
+ rbd_assert(img_request->obj_request_count > 0);
+ rbd_assert(which != BAD_WHICH);
+ rbd_assert(which < img_request->obj_request_count);
+
+ spin_lock_irq(&img_request->completion_lock);
+ if (which != img_request->next_completion)
+ goto out;
+
+ for_each_obj_request_from(img_request, obj_request) {
+ rbd_assert(more);
+ rbd_assert(which < img_request->obj_request_count);
+
+ if (!obj_request_done_test(obj_request))
+ break;
+ more = rbd_img_obj_end_request(obj_request);
+ which++;
+ }
+
+ rbd_assert(more ^ (which == img_request->obj_request_count));
+ img_request->next_completion = which;
+out:
+ spin_unlock_irq(&img_request->completion_lock);
+
+ if (!more)
+ rbd_img_request_complete(img_request);
+}
+
+/*
+ * Split up an image request into one or more object requests, each
+ * to a different object. The "type" parameter indicates whether
+ * "data_desc" is the pointer to the head of a list of bio
+ * structures, or the base of a page array. In either case this
+ * function assumes data_desc describes memory sufficient to hold
+ * all data described by the image request.
+ */
+static int rbd_img_request_fill(struct rbd_img_request *img_request,
+ enum obj_request_type type,
+ void *data_desc)
+{
+ struct rbd_device *rbd_dev = img_request->rbd_dev;
+ struct rbd_obj_request *obj_request = NULL;
+ struct rbd_obj_request *next_obj_request;
+ bool write_request = img_request_write_test(img_request);
+ struct bio *bio_list = NULL;
+ unsigned int bio_offset = 0;
+ struct page **pages = NULL;
+ u64 img_offset;
+ u64 resid;
+ u16 opcode;
+
+ dout("%s: img %p type %d data_desc %p\n", __func__, img_request,
+ (int)type, data_desc);
+
+ opcode = write_request ? CEPH_OSD_OP_WRITE : CEPH_OSD_OP_READ;
+ img_offset = img_request->offset;
+ resid = img_request->length;
+ rbd_assert(resid > 0);
+
+ if (type == OBJ_REQUEST_BIO) {
+ bio_list = data_desc;
+ rbd_assert(img_offset ==
+ bio_list->bi_iter.bi_sector << SECTOR_SHIFT);
+ } else {
+ rbd_assert(type == OBJ_REQUEST_PAGES);
+ pages = data_desc;
+ }
+
+ while (resid) {
+ struct ceph_osd_request *osd_req;
+ const char *object_name;
+ u64 offset;
+ u64 length;
+ unsigned int which = 0;
+
+ object_name = rbd_segment_name(rbd_dev, img_offset);
+ if (!object_name)
+ goto out_unwind;
+ offset = rbd_segment_offset(rbd_dev, img_offset);
+ length = rbd_segment_length(rbd_dev, img_offset, resid);
+ obj_request = rbd_obj_request_create(object_name,
+ offset, length, type);
+ /* object request has its own copy of the object name */
+ rbd_segment_name_free(object_name);
+ if (!obj_request)
+ goto out_unwind;
+
+ /*
+ * set obj_request->img_request before creating the
+ * osd_request so that it gets the right snapc
+ */
+ rbd_img_obj_request_add(img_request, obj_request);
+
+ if (type == OBJ_REQUEST_BIO) {
+ unsigned int clone_size;
+
+ rbd_assert(length <= (u64)UINT_MAX);
+ clone_size = (unsigned int)length;
+ obj_request->bio_list =
+ bio_chain_clone_range(&bio_list,
+ &bio_offset,
+ clone_size,
+ GFP_ATOMIC);
+ if (!obj_request->bio_list)
+ goto out_unwind;
+ } else {
+ unsigned int page_count;
+
+ obj_request->pages = pages;
+ page_count = (u32)calc_pages_for(offset, length);
+ obj_request->page_count = page_count;
+ if ((offset + length) & ~PAGE_MASK)
+ page_count--; /* more on last page */
+ pages += page_count;
+ }
+
+ osd_req = rbd_osd_req_create(rbd_dev, write_request,
+ (write_request ? 2 : 1),
+ obj_request);
+ if (!osd_req)
+ goto out_unwind;
+ obj_request->osd_req = osd_req;
+ obj_request->callback = rbd_img_obj_callback;
+
+ if (write_request) {
+ osd_req_op_alloc_hint_init(osd_req, which,
+ rbd_obj_bytes(&rbd_dev->header),
+ rbd_obj_bytes(&rbd_dev->header));
+ which++;
+ }
+
+ osd_req_op_extent_init(osd_req, which, opcode, offset, length,
+ 0, 0);
+ if (type == OBJ_REQUEST_BIO)
+ osd_req_op_extent_osd_data_bio(osd_req, which,
+ obj_request->bio_list, length);
+ else
+ osd_req_op_extent_osd_data_pages(osd_req, which,
+ obj_request->pages, length,
+ offset & ~PAGE_MASK, false, false);
+
+ if (write_request)
+ rbd_osd_req_format_write(obj_request);
+ else
+ rbd_osd_req_format_read(obj_request);
+
+ obj_request->img_offset = img_offset;
+
+ img_offset += length;
+ resid -= length;
+ }
+
+ return 0;
+
+out_unwind:
+ for_each_obj_request_safe(img_request, obj_request, next_obj_request)
+ rbd_img_obj_request_del(img_request, obj_request);
+
+ return -ENOMEM;
+}
+
+static void
+rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request)
+{
+ struct rbd_img_request *img_request;
+ struct rbd_device *rbd_dev;
+ struct page **pages;
+ u32 page_count;
+
+ rbd_assert(obj_request->type == OBJ_REQUEST_BIO);
+ rbd_assert(obj_request_img_data_test(obj_request));
+ img_request = obj_request->img_request;
+ rbd_assert(img_request);
+
+ rbd_dev = img_request->rbd_dev;
+ rbd_assert(rbd_dev);
+
+ pages = obj_request->copyup_pages;
+ rbd_assert(pages != NULL);
+ obj_request->copyup_pages = NULL;
+ page_count = obj_request->copyup_page_count;
+ rbd_assert(page_count);
+ obj_request->copyup_page_count = 0;
+ ceph_release_page_vector(pages, page_count);
+
+ /*
+ * We want the transfer count to reflect the size of the
+ * original write request. There is no such thing as a
+ * successful short write, so if the request was successful
+ * we can just set it to the originally-requested length.
+ */
+ if (!obj_request->result)
+ obj_request->xferred = obj_request->length;
+
+ /* Finish up with the normal image object callback */
+
+ rbd_img_obj_callback(obj_request);
+}
+
+static void
+rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
+{
+ struct rbd_obj_request *orig_request;
+ struct ceph_osd_request *osd_req;
+ struct ceph_osd_client *osdc;
+ struct rbd_device *rbd_dev;
+ struct page **pages;
+ u32 page_count;
+ int img_result;
+ u64 parent_length;
+ u64 offset;
+ u64 length;
+
+ rbd_assert(img_request_child_test(img_request));
+
+ /* First get what we need from the image request */
+
+ pages = img_request->copyup_pages;
+ rbd_assert(pages != NULL);
+ img_request->copyup_pages = NULL;
+ page_count = img_request->copyup_page_count;
+ rbd_assert(page_count);
+ img_request->copyup_page_count = 0;
+
+ orig_request = img_request->obj_request;
+ rbd_assert(orig_request != NULL);
+ rbd_assert(obj_request_type_valid(orig_request->type));
+ img_result = img_request->result;
+ parent_length = img_request->length;
+ rbd_assert(parent_length == img_request->xferred);
+ rbd_img_request_put(img_request);
+
+ rbd_assert(orig_request->img_request);
+ rbd_dev = orig_request->img_request->rbd_dev;
+ rbd_assert(rbd_dev);
+
+ /*
+ * If the overlap has become 0 (most likely because the
+ * image has been flattened) we need to free the pages
+ * and re-submit the original write request.
+ */
+ if (!rbd_dev->parent_overlap) {
+ struct ceph_osd_client *osdc;
+
+ ceph_release_page_vector(pages, page_count);
+ osdc = &rbd_dev->rbd_client->client->osdc;
+ img_result = rbd_obj_request_submit(osdc, orig_request);
+ if (!img_result)
+ return;
+ }
+
+ if (img_result)
+ goto out_err;
+
+ /*
+ * The original osd request is of no use to use any more.
+ * We need a new one that can hold the three ops in a copyup
+ * request. Allocate the new copyup osd request for the
+ * original request, and release the old one.
+ */
+ img_result = -ENOMEM;
+ osd_req = rbd_osd_req_create_copyup(orig_request);
+ if (!osd_req)
+ goto out_err;
+ rbd_osd_req_destroy(orig_request->osd_req);
+ orig_request->osd_req = osd_req;
+ orig_request->copyup_pages = pages;
+ orig_request->copyup_page_count = page_count;
+
+ /* Initialize the copyup op */
+
+ osd_req_op_cls_init(osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup");
+ osd_req_op_cls_request_data_pages(osd_req, 0, pages, parent_length, 0,
+ false, false);
+
+ /* Then the hint op */
+
+ osd_req_op_alloc_hint_init(osd_req, 1, rbd_obj_bytes(&rbd_dev->header),
+ rbd_obj_bytes(&rbd_dev->header));
+
+ /* And the original write request op */
+
+ offset = orig_request->offset;
+ length = orig_request->length;
+ osd_req_op_extent_init(osd_req, 2, CEPH_OSD_OP_WRITE,
+ offset, length, 0, 0);
+ if (orig_request->type == OBJ_REQUEST_BIO)
+ osd_req_op_extent_osd_data_bio(osd_req, 2,
+ orig_request->bio_list, length);
+ else
+ osd_req_op_extent_osd_data_pages(osd_req, 2,
+ orig_request->pages, length,
+ offset & ~PAGE_MASK, false, false);
+
+ rbd_osd_req_format_write(orig_request);
+
+ /* All set, send it off. */
+
+ orig_request->callback = rbd_img_obj_copyup_callback;
+ osdc = &rbd_dev->rbd_client->client->osdc;
+ img_result = rbd_obj_request_submit(osdc, orig_request);
+ if (!img_result)
+ return;
+out_err:
+ /* Record the error code and complete the request */
+
+ orig_request->result = img_result;
+ orig_request->xferred = 0;
+ obj_request_done_set(orig_request);
+ rbd_obj_request_complete(orig_request);
+}
+
+/*
+ * Read from the parent image the range of data that covers the
+ * entire target of the given object request. This is used for
+ * satisfying a layered image write request when the target of an
+ * object request from the image request does not exist.
+ *
+ * A page array big enough to hold the returned data is allocated
+ * and supplied to rbd_img_request_fill() as the "data descriptor."
+ * When the read completes, this page array will be transferred to
+ * the original object request for the copyup operation.
+ *
+ * If an error occurs, record it as the result of the original
+ * object request and mark it done so it gets completed.
+ */
+static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
+{
+ struct rbd_img_request *img_request = NULL;
+ struct rbd_img_request *parent_request = NULL;
+ struct rbd_device *rbd_dev;
+ u64 img_offset;
+ u64 length;
+ struct page **pages = NULL;
+ u32 page_count;
+ int result;
+
+ rbd_assert(obj_request_img_data_test(obj_request));
+ rbd_assert(obj_request_type_valid(obj_request->type));
+
+ img_request = obj_request->img_request;
+ rbd_assert(img_request != NULL);
+ rbd_dev = img_request->rbd_dev;
+ rbd_assert(rbd_dev->parent != NULL);
+
+ /*
+ * Determine the byte range covered by the object in the
+ * child image to which the original request was to be sent.
+ */
+ img_offset = obj_request->img_offset - obj_request->offset;
+ length = (u64)1 << rbd_dev->header.obj_order;
+
+ /*
+ * There is no defined parent data beyond the parent
+ * overlap, so limit what we read at that boundary if
+ * necessary.
+ */
+ if (img_offset + length > rbd_dev->parent_overlap) {
+ rbd_assert(img_offset < rbd_dev->parent_overlap);
+ length = rbd_dev->parent_overlap - img_offset;
+ }
+
+ /*
+ * Allocate a page array big enough to receive the data read
+ * from the parent.
+ */
+ page_count = (u32)calc_pages_for(0, length);
+ pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
+ if (IS_ERR(pages)) {
+ result = PTR_ERR(pages);
+ pages = NULL;
+ goto out_err;
+ }
+
+ result = -ENOMEM;
+ parent_request = rbd_parent_request_create(obj_request,
+ img_offset, length);
+ if (!parent_request)
+ goto out_err;
+
+ result = rbd_img_request_fill(parent_request, OBJ_REQUEST_PAGES, pages);
+ if (result)
+ goto out_err;
+ parent_request->copyup_pages = pages;
+ parent_request->copyup_page_count = page_count;
+
+ parent_request->callback = rbd_img_obj_parent_read_full_callback;
+ result = rbd_img_request_submit(parent_request);
+ if (!result)
+ return 0;
+
+ parent_request->copyup_pages = NULL;
+ parent_request->copyup_page_count = 0;
+ parent_request->obj_request = NULL;
+ rbd_obj_request_put(obj_request);
+out_err:
+ if (pages)
+ ceph_release_page_vector(pages, page_count);
+ if (parent_request)
+ rbd_img_request_put(parent_request);
+ obj_request->result = result;
+ obj_request->xferred = 0;
+ obj_request_done_set(obj_request);
+
+ return result;
+}
+
+static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request)
+{
+ struct rbd_obj_request *orig_request;
+ struct rbd_device *rbd_dev;
+ int result;
+
+ rbd_assert(!obj_request_img_data_test(obj_request));
+
+ /*
+ * All we need from the object request is the original
+ * request and the result of the STAT op. Grab those, then
+ * we're done with the request.
+ */
+ orig_request = obj_request->obj_request;
+ obj_request->obj_request = NULL;
+ rbd_obj_request_put(orig_request);
+ rbd_assert(orig_request);
+ rbd_assert(orig_request->img_request);
+
+ result = obj_request->result;
+ obj_request->result = 0;
+
+ dout("%s: obj %p for obj %p result %d %llu/%llu\n", __func__,
+ obj_request, orig_request, result,
+ obj_request->xferred, obj_request->length);
+ rbd_obj_request_put(obj_request);
+
+ /*
+ * If the overlap has become 0 (most likely because the
+ * image has been flattened) we need to free the pages
+ * and re-submit the original write request.
+ */
+ rbd_dev = orig_request->img_request->rbd_dev;
+ if (!rbd_dev->parent_overlap) {
+ struct ceph_osd_client *osdc;
+
+ osdc = &rbd_dev->rbd_client->client->osdc;
+ result = rbd_obj_request_submit(osdc, orig_request);
+ if (!result)
+ return;
+ }
+
+ /*
+ * Our only purpose here is to determine whether the object
+ * exists, and we don't want to treat the non-existence as
+ * an error. If something else comes back, transfer the
+ * error to the original request and complete it now.
+ */
+ if (!result) {
+ obj_request_existence_set(orig_request, true);
+ } else if (result == -ENOENT) {
+ obj_request_existence_set(orig_request, false);
+ } else if (result) {
+ orig_request->result = result;
+ goto out;
+ }
+
+ /*
+ * Resubmit the original request now that we have recorded
+ * whether the target object exists.
+ */
+ orig_request->result = rbd_img_obj_request_submit(orig_request);
+out:
+ if (orig_request->result)
+ rbd_obj_request_complete(orig_request);
+}
+
+static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
+{
+ struct rbd_obj_request *stat_request;
+ struct rbd_device *rbd_dev;
+ struct ceph_osd_client *osdc;
+ struct page **pages = NULL;
+ u32 page_count;
+ size_t size;
+ int ret;
+
+ /*
+ * The response data for a STAT call consists of:
+ * le64 length;
+ * struct {
+ * le32 tv_sec;
+ * le32 tv_nsec;
+ * } mtime;
+ */
+ size = sizeof (__le64) + sizeof (__le32) + sizeof (__le32);
+ page_count = (u32)calc_pages_for(0, size);
+ pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+
+ ret = -ENOMEM;
+ stat_request = rbd_obj_request_create(obj_request->object_name, 0, 0,
+ OBJ_REQUEST_PAGES);
+ if (!stat_request)
+ goto out;
+
+ rbd_obj_request_get(obj_request);
+ stat_request->obj_request = obj_request;
+ stat_request->pages = pages;
+ stat_request->page_count = page_count;
+
+ rbd_assert(obj_request->img_request);
+ rbd_dev = obj_request->img_request->rbd_dev;
+ stat_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1,
+ stat_request);
+ if (!stat_request->osd_req)
+ goto out;
+ stat_request->callback = rbd_img_obj_exists_callback;
+
+ osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT);
+ osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0,
+ false, false);
+ rbd_osd_req_format_read(stat_request);
+
+ osdc = &rbd_dev->rbd_client->client->osdc;
+ ret = rbd_obj_request_submit(osdc, stat_request);
+out:
+ if (ret)
+ rbd_obj_request_put(obj_request);
+
+ return ret;
+}
+
+static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request)
+{
+ struct rbd_img_request *img_request;
+ struct rbd_device *rbd_dev;
+ bool known;
+
+ rbd_assert(obj_request_img_data_test(obj_request));
+
+ img_request = obj_request->img_request;
+ rbd_assert(img_request);
+ rbd_dev = img_request->rbd_dev;
+
+ /*
+ * Only writes to layered images need special handling.
+ * Reads and non-layered writes are simple object requests.
+ * Layered writes that start beyond the end of the overlap
+ * with the parent have no parent data, so they too are
+ * simple object requests. Finally, if the target object is
+ * known to already exist, its parent data has already been
+ * copied, so a write to the object can also be handled as a
+ * simple object request.
+ */
+ if (!img_request_write_test(img_request) ||
+ !img_request_layered_test(img_request) ||
+ rbd_dev->parent_overlap <= obj_request->img_offset ||
+ ((known = obj_request_known_test(obj_request)) &&
+ obj_request_exists_test(obj_request))) {
+
+ struct rbd_device *rbd_dev;
+ struct ceph_osd_client *osdc;
+
+ rbd_dev = obj_request->img_request->rbd_dev;
+ osdc = &rbd_dev->rbd_client->client->osdc;
+
+ return rbd_obj_request_submit(osdc, obj_request);
+ }
+
+ /*
+ * It's a layered write. The target object might exist but
+ * we may not know that yet. If we know it doesn't exist,
+ * start by reading the data for the full target object from
+ * the parent so we can use it for a copyup to the target.
+ */
+ if (known)
+ return rbd_img_obj_parent_read_full(obj_request);
+
+ /* We don't know whether the target exists. Go find out. */
+
+ return rbd_img_obj_exists_submit(obj_request);
+}
+
+static int rbd_img_request_submit(struct rbd_img_request *img_request)
+{
+ struct rbd_obj_request *obj_request;
+ struct rbd_obj_request *next_obj_request;
+
+ dout("%s: img %p\n", __func__, img_request);
+ for_each_obj_request_safe(img_request, obj_request, next_obj_request) {
+ int ret;
+
+ ret = rbd_img_obj_request_submit(obj_request);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static void rbd_img_parent_read_callback(struct rbd_img_request *img_request)
+{
+ struct rbd_obj_request *obj_request;
+ struct rbd_device *rbd_dev;
+ u64 obj_end;
+ u64 img_xferred;
+ int img_result;
+
+ rbd_assert(img_request_child_test(img_request));
+
+ /* First get what we need from the image request and release it */
+
+ obj_request = img_request->obj_request;
+ img_xferred = img_request->xferred;
+ img_result = img_request->result;
+ rbd_img_request_put(img_request);
+
+ /*
+ * If the overlap has become 0 (most likely because the
+ * image has been flattened) we need to re-submit the
+ * original request.
+ */
+ rbd_assert(obj_request);
+ rbd_assert(obj_request->img_request);
+ rbd_dev = obj_request->img_request->rbd_dev;
+ if (!rbd_dev->parent_overlap) {
+ struct ceph_osd_client *osdc;
+
+ osdc = &rbd_dev->rbd_client->client->osdc;
+ img_result = rbd_obj_request_submit(osdc, obj_request);
+ if (!img_result)
+ return;
+ }
+
+ obj_request->result = img_result;
+ if (obj_request->result)
+ goto out;
+
+ /*
+ * We need to zero anything beyond the parent overlap
+ * boundary. Since rbd_img_obj_request_read_callback()
+ * will zero anything beyond the end of a short read, an
+ * easy way to do this is to pretend the data from the
+ * parent came up short--ending at the overlap boundary.
+ */
+ rbd_assert(obj_request->img_offset < U64_MAX - obj_request->length);
+ obj_end = obj_request->img_offset + obj_request->length;
+ if (obj_end > rbd_dev->parent_overlap) {
+ u64 xferred = 0;
+
+ if (obj_request->img_offset < rbd_dev->parent_overlap)
+ xferred = rbd_dev->parent_overlap -
+ obj_request->img_offset;
+
+ obj_request->xferred = min(img_xferred, xferred);
+ } else {
+ obj_request->xferred = img_xferred;
+ }
+out:
+ rbd_img_obj_request_read_callback(obj_request);
+ rbd_obj_request_complete(obj_request);
+}
+
+static void rbd_img_parent_read(struct rbd_obj_request *obj_request)
+{
+ struct rbd_img_request *img_request;
+ int result;
+
+ rbd_assert(obj_request_img_data_test(obj_request));
+ rbd_assert(obj_request->img_request != NULL);
+ rbd_assert(obj_request->result == (s32) -ENOENT);
+ rbd_assert(obj_request_type_valid(obj_request->type));
+
+ /* rbd_read_finish(obj_request, obj_request->length); */
+ img_request = rbd_parent_request_create(obj_request,
+ obj_request->img_offset,
+ obj_request->length);
+ result = -ENOMEM;
+ if (!img_request)
+ goto out_err;
+
+ if (obj_request->type == OBJ_REQUEST_BIO)
+ result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
+ obj_request->bio_list);
+ else
+ result = rbd_img_request_fill(img_request, OBJ_REQUEST_PAGES,
+ obj_request->pages);
+ if (result)
+ goto out_err;
+
+ img_request->callback = rbd_img_parent_read_callback;
+ result = rbd_img_request_submit(img_request);
+ if (result)
+ goto out_err;
+
+ return;
+out_err:
+ if (img_request)
+ rbd_img_request_put(img_request);
+ obj_request->result = result;
+ obj_request->xferred = 0;
+ obj_request_done_set(obj_request);
+}
+
+static int rbd_obj_notify_ack_sync(struct rbd_device *rbd_dev, u64 notify_id)
+{
+ struct rbd_obj_request *obj_request;
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ int ret;
+
+ obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
+ OBJ_REQUEST_NODATA);
+ if (!obj_request)
+ return -ENOMEM;
+
+ ret = -ENOMEM;
+ obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1,
+ obj_request);
+ if (!obj_request->osd_req)
+ goto out;
+
+ osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_NOTIFY_ACK,
+ notify_id, 0, 0);
+ rbd_osd_req_format_read(obj_request);
+
+ ret = rbd_obj_request_submit(osdc, obj_request);
+ if (ret)
+ goto out;
+ ret = rbd_obj_request_wait(obj_request);
+out:
+ rbd_obj_request_put(obj_request);
+
+ return ret;
+}
+
+static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
+{
+ struct rbd_device *rbd_dev = (struct rbd_device *)data;
+ int ret;
+
+ if (!rbd_dev)
+ return;
+
+ dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__,
+ rbd_dev->header_name, (unsigned long long)notify_id,
+ (unsigned int)opcode);
+ ret = rbd_dev_refresh(rbd_dev);
+ if (ret)
+ rbd_warn(rbd_dev, "header refresh error (%d)\n", ret);
+
+ rbd_obj_notify_ack_sync(rbd_dev, notify_id);
+}
+
+/*
+ * Request sync osd watch/unwatch. The value of "start" determines
+ * whether a watch request is being initiated or torn down.
+ */
+static int __rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, bool start)
+{
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ struct rbd_obj_request *obj_request;
+ int ret;
+
+ rbd_assert(start ^ !!rbd_dev->watch_event);
+ rbd_assert(start ^ !!rbd_dev->watch_request);
+
+ if (start) {
+ ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev,
+ &rbd_dev->watch_event);
+ if (ret < 0)
+ return ret;
+ rbd_assert(rbd_dev->watch_event != NULL);
+ }
+
+ ret = -ENOMEM;
+ obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
+ OBJ_REQUEST_NODATA);
+ if (!obj_request)
+ goto out_cancel;
+
+ obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, 1,
+ obj_request);
+ if (!obj_request->osd_req)
+ goto out_cancel;
+
+ if (start)
+ ceph_osdc_set_request_linger(osdc, obj_request->osd_req);
+ else
+ ceph_osdc_unregister_linger_request(osdc,
+ rbd_dev->watch_request->osd_req);
+
+ osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH,
+ rbd_dev->watch_event->cookie, 0, start ? 1 : 0);
+ rbd_osd_req_format_write(obj_request);
+
+ ret = rbd_obj_request_submit(osdc, obj_request);
+ if (ret)
+ goto out_cancel;
+ ret = rbd_obj_request_wait(obj_request);
+ if (ret)
+ goto out_cancel;
+ ret = obj_request->result;
+ if (ret)
+ goto out_cancel;
+
+ /*
+ * A watch request is set to linger, so the underlying osd
+ * request won't go away until we unregister it. We retain
+ * a pointer to the object request during that time (in
+ * rbd_dev->watch_request), so we'll keep a reference to
+ * it. We'll drop that reference (below) after we've
+ * unregistered it.
+ */
+ if (start) {
+ rbd_dev->watch_request = obj_request;
+
+ return 0;
+ }
+
+ /* We have successfully torn down the watch request */
+
+ rbd_obj_request_put(rbd_dev->watch_request);
+ rbd_dev->watch_request = NULL;
+out_cancel:
+ /* Cancel the event if we're tearing down, or on error */
+ ceph_osdc_cancel_event(rbd_dev->watch_event);
+ rbd_dev->watch_event = NULL;
+ if (obj_request)
+ rbd_obj_request_put(obj_request);
+
+ return ret;
+}
+
+static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev)
+{
+ return __rbd_dev_header_watch_sync(rbd_dev, true);
+}
+
+static void rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev)
+{
+ int ret;
+
+ ret = __rbd_dev_header_watch_sync(rbd_dev, false);
+ if (ret) {
+ rbd_warn(rbd_dev, "unable to tear down watch request: %d\n",
+ ret);
+ }
+}
+
+/*
+ * Synchronous osd object method call. Returns the number of bytes
+ * returned in the outbound buffer, or a negative error code.
+ */
+static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
+ const char *object_name,
+ const char *class_name,
+ const char *method_name,
+ const void *outbound,
+ size_t outbound_size,
+ void *inbound,
+ size_t inbound_size)
+{
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ struct rbd_obj_request *obj_request;
+ struct page **pages;
+ u32 page_count;
+ int ret;
+
+ /*
+ * Method calls are ultimately read operations. The result
+ * should placed into the inbound buffer provided. They
+ * also supply outbound data--parameters for the object
+ * method. Currently if this is present it will be a
+ * snapshot id.
+ */
+ page_count = (u32)calc_pages_for(0, inbound_size);
+ pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+
+ ret = -ENOMEM;
+ obj_request = rbd_obj_request_create(object_name, 0, inbound_size,
+ OBJ_REQUEST_PAGES);
+ if (!obj_request)
+ goto out;
+
+ obj_request->pages = pages;
+ obj_request->page_count = page_count;
+
+ obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1,
+ obj_request);
+ if (!obj_request->osd_req)
+ goto out;
+
+ osd_req_op_cls_init(obj_request->osd_req, 0, CEPH_OSD_OP_CALL,
+ class_name, method_name);
+ if (outbound_size) {
+ struct ceph_pagelist *pagelist;
+
+ pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
+ if (!pagelist)
+ goto out;
+
+ ceph_pagelist_init(pagelist);
+ ceph_pagelist_append(pagelist, outbound, outbound_size);
+ osd_req_op_cls_request_data_pagelist(obj_request->osd_req, 0,
+ pagelist);
+ }
+ osd_req_op_cls_response_data_pages(obj_request->osd_req, 0,
+ obj_request->pages, inbound_size,
+ 0, false, false);
+ rbd_osd_req_format_read(obj_request);
+
+ ret = rbd_obj_request_submit(osdc, obj_request);
+ if (ret)
+ goto out;
+ ret = rbd_obj_request_wait(obj_request);
+ if (ret)
+ goto out;
+
+ ret = obj_request->result;
+ if (ret < 0)
+ goto out;
+
+ rbd_assert(obj_request->xferred < (u64)INT_MAX);
+ ret = (int)obj_request->xferred;
+ ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred);
+out:
+ if (obj_request)
+ rbd_obj_request_put(obj_request);
+ else
+ ceph_release_page_vector(pages, page_count);
+
+ return ret;
+}
+
+static void rbd_request_fn(struct request_queue *q)
+ __releases(q->queue_lock) __acquires(q->queue_lock)
+{
+ struct rbd_device *rbd_dev = q->queuedata;
+ bool read_only = rbd_dev->mapping.read_only;
+ struct request *rq;
+ int result;
+
+ while ((rq = blk_fetch_request(q))) {
+ bool write_request = rq_data_dir(rq) == WRITE;
+ struct rbd_img_request *img_request;
+ u64 offset;
+ u64 length;
+
+ /* Ignore any non-FS requests that filter through. */
+
+ if (rq->cmd_type != REQ_TYPE_FS) {
+ dout("%s: non-fs request type %d\n", __func__,
+ (int) rq->cmd_type);
+ __blk_end_request_all(rq, 0);
+ continue;
+ }
+
+ /* Ignore/skip any zero-length requests */
+
+ offset = (u64) blk_rq_pos(rq) << SECTOR_SHIFT;
+ length = (u64) blk_rq_bytes(rq);
+
+ if (!length) {
+ dout("%s: zero-length request\n", __func__);
+ __blk_end_request_all(rq, 0);
+ continue;
+ }
+
+ spin_unlock_irq(q->queue_lock);
+
+ /* Disallow writes to a read-only device */
+
+ if (write_request) {
+ result = -EROFS;
+ if (read_only)
+ goto end_request;
+ rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP);
+ }
+
+ /*
+ * Quit early if the mapped snapshot no longer
+ * exists. It's still possible the snapshot will
+ * have disappeared by the time our request arrives
+ * at the osd, but there's no sense in sending it if
+ * we already know.
+ */
+ if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) {
+ dout("request for non-existent snapshot");
+ rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
+ result = -ENXIO;
+ goto end_request;
+ }
+
+ result = -EINVAL;
+ if (offset && length > U64_MAX - offset + 1) {
+ rbd_warn(rbd_dev, "bad request range (%llu~%llu)\n",
+ offset, length);
+ goto end_request; /* Shouldn't happen */
+ }
+
+ result = -EIO;
+ if (offset + length > rbd_dev->mapping.size) {
+ rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)\n",
+ offset, length, rbd_dev->mapping.size);
+ goto end_request;
+ }
+
+ result = -ENOMEM;
+ img_request = rbd_img_request_create(rbd_dev, offset, length,
+ write_request);
+ if (!img_request)
+ goto end_request;
+
+ img_request->rq = rq;
+
+ result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
+ rq->bio);
+ if (!result)
+ result = rbd_img_request_submit(img_request);
+ if (result)
+ rbd_img_request_put(img_request);
+end_request:
+ spin_lock_irq(q->queue_lock);
+ if (result < 0) {
+ rbd_warn(rbd_dev, "%s %llx at %llx result %d\n",
+ write_request ? "write" : "read",
+ length, offset, result);
+
+ __blk_end_request_all(rq, result);
+ }
+ }
+}
+
+/*
+ * a queue callback. Makes sure that we don't create a bio that spans across
+ * multiple osd objects. One exception would be with a single page bios,
+ * which we handle later at bio_chain_clone_range()
+ */
+static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
+ struct bio_vec *bvec)
+{
+ struct rbd_device *rbd_dev = q->queuedata;
+ sector_t sector_offset;
+ sector_t sectors_per_obj;
+ sector_t obj_sector_offset;
+ int ret;
+
+ /*
+ * Find how far into its rbd object the partition-relative
+ * bio start sector is to offset relative to the enclosing
+ * device.
+ */
+ sector_offset = get_start_sect(bmd->bi_bdev) + bmd->bi_sector;
+ sectors_per_obj = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT);
+ obj_sector_offset = sector_offset & (sectors_per_obj - 1);
+
+ /*
+ * Compute the number of bytes from that offset to the end
+ * of the object. Account for what's already used by the bio.
+ */
+ ret = (int) (sectors_per_obj - obj_sector_offset) << SECTOR_SHIFT;
+ if (ret > bmd->bi_size)
+ ret -= bmd->bi_size;
+ else
+ ret = 0;
+
+ /*
+ * Don't send back more than was asked for. And if the bio
+ * was empty, let the whole thing through because: "Note
+ * that a block device *must* allow a single page to be
+ * added to an empty bio."
+ */
+ rbd_assert(bvec->bv_len <= PAGE_SIZE);
+ if (ret > (int) bvec->bv_len || !bmd->bi_size)
+ ret = (int) bvec->bv_len;
+
+ return ret;
+}
+
+static void rbd_free_disk(struct rbd_device *rbd_dev)
+{
+ struct gendisk *disk = rbd_dev->disk;
+
+ if (!disk)
+ return;
+
+ rbd_dev->disk = NULL;
+ if (disk->flags & GENHD_FL_UP) {
+ del_gendisk(disk);
+ if (disk->queue)
+ blk_cleanup_queue(disk->queue);
+ }
+ put_disk(disk);
+}
+
+static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
+ const char *object_name,
+ u64 offset, u64 length, void *buf)
+
+{
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ struct rbd_obj_request *obj_request;
+ struct page **pages = NULL;
+ u32 page_count;
+ size_t size;
+ int ret;
+
+ page_count = (u32) calc_pages_for(offset, length);
+ pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
+ if (IS_ERR(pages))
+ ret = PTR_ERR(pages);
+
+ ret = -ENOMEM;
+ obj_request = rbd_obj_request_create(object_name, offset, length,
+ OBJ_REQUEST_PAGES);
+ if (!obj_request)
+ goto out;
+
+ obj_request->pages = pages;
+ obj_request->page_count = page_count;
+
+ obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1,
+ obj_request);
+ if (!obj_request->osd_req)
+ goto out;
+
+ osd_req_op_extent_init(obj_request->osd_req, 0, CEPH_OSD_OP_READ,
+ offset, length, 0, 0);
+ osd_req_op_extent_osd_data_pages(obj_request->osd_req, 0,
+ obj_request->pages,
+ obj_request->length,
+ obj_request->offset & ~PAGE_MASK,
+ false, false);
+ rbd_osd_req_format_read(obj_request);
+
+ ret = rbd_obj_request_submit(osdc, obj_request);
+ if (ret)
+ goto out;
+ ret = rbd_obj_request_wait(obj_request);
+ if (ret)
+ goto out;
+
+ ret = obj_request->result;
+ if (ret < 0)
+ goto out;
+
+ rbd_assert(obj_request->xferred <= (u64) SIZE_MAX);
+ size = (size_t) obj_request->xferred;
+ ceph_copy_from_page_vector(pages, buf, 0, size);
+ rbd_assert(size <= (size_t)INT_MAX);
+ ret = (int)size;
+out:
+ if (obj_request)
+ rbd_obj_request_put(obj_request);
+ else
+ ceph_release_page_vector(pages, page_count);
+
+ return ret;
+}
+
+/*
+ * Read the complete header for the given rbd device. On successful
+ * return, the rbd_dev->header field will contain up-to-date
+ * information about the image.
+ */
+static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
+{
+ struct rbd_image_header_ondisk *ondisk = NULL;
+ u32 snap_count = 0;
+ u64 names_size = 0;
+ u32 want_count;
+ int ret;
+
+ /*
+ * The complete header will include an array of its 64-bit
+ * snapshot ids, followed by the names of those snapshots as
+ * a contiguous block of NUL-terminated strings. Note that
+ * the number of snapshots could change by the time we read
+ * it in, in which case we re-read it.
+ */
+ do {
+ size_t size;
+
+ kfree(ondisk);
+
+ size = sizeof (*ondisk);
+ size += snap_count * sizeof (struct rbd_image_snap_ondisk);
+ size += names_size;
+ ondisk = kmalloc(size, GFP_KERNEL);
+ if (!ondisk)
+ return -ENOMEM;
+
+ ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name,
+ 0, size, ondisk);
+ if (ret < 0)
+ goto out;
+ if ((size_t)ret < size) {
+ ret = -ENXIO;
+ rbd_warn(rbd_dev, "short header read (want %zd got %d)",
+ size, ret);
+ goto out;
+ }
+ if (!rbd_dev_ondisk_valid(ondisk)) {
+ ret = -ENXIO;
+ rbd_warn(rbd_dev, "invalid header");
+ goto out;
+ }
+
+ names_size = le64_to_cpu(ondisk->snap_names_len);
+ want_count = snap_count;
+ snap_count = le32_to_cpu(ondisk->snap_count);
+ } while (snap_count != want_count);
+
+ ret = rbd_header_from_disk(rbd_dev, ondisk);
+out:
+ kfree(ondisk);
+
+ return ret;
+}
+
+/*
+ * Clear the rbd device's EXISTS flag if the snapshot it's mapped to
+ * has disappeared from the (just updated) snapshot context.
+ */
+static void rbd_exists_validate(struct rbd_device *rbd_dev)
+{
+ u64 snap_id;
+
+ if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags))
+ return;
+
+ snap_id = rbd_dev->spec->snap_id;
+ if (snap_id == CEPH_NOSNAP)
+ return;
+
+ if (rbd_dev_snap_index(rbd_dev, snap_id) == BAD_SNAP_INDEX)
+ clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
+}
+
+static void rbd_dev_update_size(struct rbd_device *rbd_dev)
+{
+ sector_t size;
+ bool removing;
+
+ /*
+ * Don't hold the lock while doing disk operations,
+ * or lock ordering will conflict with the bdev mutex via:
+ * rbd_add() -> blkdev_get() -> rbd_open()
+ */
+ spin_lock_irq(&rbd_dev->lock);
+ removing = test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags);
+ spin_unlock_irq(&rbd_dev->lock);
+ /*
+ * If the device is being removed, rbd_dev->disk has
+ * been destroyed, so don't try to update its size
+ */
+ if (!removing) {
+ size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE;
+ dout("setting size to %llu sectors", (unsigned long long)size);
+ set_capacity(rbd_dev->disk, size);
+ revalidate_disk(rbd_dev->disk);
+ }
+}
+
+static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+{
+ u64 mapping_size;
+ int ret;
+
+ rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+ down_write(&rbd_dev->header_rwsem);
+ mapping_size = rbd_dev->mapping.size;
+ if (rbd_dev->image_format == 1)
+ ret = rbd_dev_v1_header_info(rbd_dev);
+ else
+ ret = rbd_dev_v2_header_info(rbd_dev);
+
+ /* If it's a mapped snapshot, validate its EXISTS flag */
+
+ rbd_exists_validate(rbd_dev);
+ up_write(&rbd_dev->header_rwsem);
+
+ if (mapping_size != rbd_dev->mapping.size) {
+ rbd_dev_update_size(rbd_dev);
+ }
+
+ return ret;
+}
+
+static int rbd_init_disk(struct rbd_device *rbd_dev)
+{
+ struct gendisk *disk;
+ struct request_queue *q;
+ u64 segment_size;
+
+ /* create gendisk info */
+ disk = alloc_disk(single_major ?
+ (1 << RBD_SINGLE_MAJOR_PART_SHIFT) :
+ RBD_MINORS_PER_MAJOR);
+ if (!disk)
+ return -ENOMEM;
+
+ snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
+ rbd_dev->dev_id);
+ disk->major = rbd_dev->major;
+ disk->first_minor = rbd_dev->minor;
+ if (single_major)
+ disk->flags |= GENHD_FL_EXT_DEVT;
+ disk->fops = &rbd_bd_ops;
+ disk->private_data = rbd_dev;
+
+ q = blk_init_queue(rbd_request_fn, &rbd_dev->lock);
+ if (!q)
+ goto out_disk;
+
+ /* We use the default size, but let's be explicit about it. */
+ blk_queue_physical_block_size(q, SECTOR_SIZE);
+
+ /* set io sizes to object size */
+ segment_size = rbd_obj_bytes(&rbd_dev->header);
+ blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
+ blk_queue_max_segment_size(q, segment_size);
+ blk_queue_io_min(q, segment_size);
+ blk_queue_io_opt(q, segment_size);
+
+ blk_queue_merge_bvec(q, rbd_merge_bvec);
+ disk->queue = q;
+
+ q->queuedata = rbd_dev;
+
+ rbd_dev->disk = disk;
+
+ return 0;
+out_disk:
+ put_disk(disk);
+
+ return -ENOMEM;
+}
+
+/*
+ sysfs
+*/
+
+static struct rbd_device *dev_to_rbd_dev(struct device *dev)
+{
+ return container_of(dev, struct rbd_device, dev);
+}
+
+static ssize_t rbd_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+ return sprintf(buf, "%llu\n",
+ (unsigned long long)rbd_dev->mapping.size);
+}
+
+/*
+ * Note this shows the features for whatever's mapped, which is not
+ * necessarily the base image.
+ */
+static ssize_t rbd_features_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+ return sprintf(buf, "0x%016llx\n",
+ (unsigned long long)rbd_dev->mapping.features);
+}
+
+static ssize_t rbd_major_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+ if (rbd_dev->major)
+ return sprintf(buf, "%d\n", rbd_dev->major);
+
+ return sprintf(buf, "(none)\n");
+}
+
+static ssize_t rbd_minor_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+ return sprintf(buf, "%d\n", rbd_dev->minor);
+}
+
+static ssize_t rbd_client_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+ return sprintf(buf, "client%lld\n",
+ ceph_client_id(rbd_dev->rbd_client->client));
+}
+
+static ssize_t rbd_pool_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+ return sprintf(buf, "%s\n", rbd_dev->spec->pool_name);
+}
+
+static ssize_t rbd_pool_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+ return sprintf(buf, "%llu\n",
+ (unsigned long long) rbd_dev->spec->pool_id);
+}
+
+static ssize_t rbd_name_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+ if (rbd_dev->spec->image_name)
+ return sprintf(buf, "%s\n", rbd_dev->spec->image_name);
+
+ return sprintf(buf, "(unknown)\n");
+}
+
+static ssize_t rbd_image_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+ return sprintf(buf, "%s\n", rbd_dev->spec->image_id);
+}
+
+/*
+ * Shows the name of the currently-mapped snapshot (or
+ * RBD_SNAP_HEAD_NAME for the base image).
+ */
+static ssize_t rbd_snap_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+ return sprintf(buf, "%s\n", rbd_dev->spec->snap_name);
+}
+
+/*
+ * For an rbd v2 image, shows the pool id, image id, and snapshot id
+ * for the parent image. If there is no parent, simply shows
+ * "(no parent image)".
+ */
+static ssize_t rbd_parent_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+ struct rbd_spec *spec = rbd_dev->parent_spec;
+ int count;
+ char *bufp = buf;
+
+ if (!spec)
+ return sprintf(buf, "(no parent image)\n");
+
+ count = sprintf(bufp, "pool_id %llu\npool_name %s\n",
+ (unsigned long long) spec->pool_id, spec->pool_name);
+ if (count < 0)
+ return count;
+ bufp += count;
+
+ count = sprintf(bufp, "image_id %s\nimage_name %s\n", spec->image_id,
+ spec->image_name ? spec->image_name : "(unknown)");
+ if (count < 0)
+ return count;
+ bufp += count;
+
+ count = sprintf(bufp, "snap_id %llu\nsnap_name %s\n",
+ (unsigned long long) spec->snap_id, spec->snap_name);
+ if (count < 0)
+ return count;
+ bufp += count;
+
+ count = sprintf(bufp, "overlap %llu\n", rbd_dev->parent_overlap);
+ if (count < 0)
+ return count;
+ bufp += count;
+
+ return (ssize_t) (bufp - buf);
+}
+
+static ssize_t rbd_image_refresh(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t size)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+ int ret;
+
+ ret = rbd_dev_refresh(rbd_dev);
+ if (ret)
+ rbd_warn(rbd_dev, ": manual header refresh error (%d)\n", ret);
+
+ return ret < 0 ? ret : size;
+}
+
+static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
+static DEVICE_ATTR(features, S_IRUGO, rbd_features_show, NULL);
+static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
+static DEVICE_ATTR(minor, S_IRUGO, rbd_minor_show, NULL);
+static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
+static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
+static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
+static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
+static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL);
+static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
+static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
+static DEVICE_ATTR(parent, S_IRUGO, rbd_parent_show, NULL);
+
+static struct attribute *rbd_attrs[] = {
+ &dev_attr_size.attr,
+ &dev_attr_features.attr,
+ &dev_attr_major.attr,
+ &dev_attr_minor.attr,
+ &dev_attr_client_id.attr,
+ &dev_attr_pool.attr,
+ &dev_attr_pool_id.attr,
+ &dev_attr_name.attr,
+ &dev_attr_image_id.attr,
+ &dev_attr_current_snap.attr,
+ &dev_attr_parent.attr,
+ &dev_attr_refresh.attr,
+ NULL
+};
+
+static struct attribute_group rbd_attr_group = {
+ .attrs = rbd_attrs,
+};
+
+static const struct attribute_group *rbd_attr_groups[] = {
+ &rbd_attr_group,
+ NULL
+};
+
+static void rbd_sysfs_dev_release(struct device *dev)
+{
+}
+
+static struct device_type rbd_device_type = {
+ .name = "rbd",
+ .groups = rbd_attr_groups,
+ .release = rbd_sysfs_dev_release,
+};
+
+static struct rbd_spec *rbd_spec_get(struct rbd_spec *spec)
+{
+ kref_get(&spec->kref);
+
+ return spec;
+}
+
+static void rbd_spec_free(struct kref *kref);
+static void rbd_spec_put(struct rbd_spec *spec)
+{
+ if (spec)
+ kref_put(&spec->kref, rbd_spec_free);
+}
+
+static struct rbd_spec *rbd_spec_alloc(void)
+{
+ struct rbd_spec *spec;
+
+ spec = kzalloc(sizeof (*spec), GFP_KERNEL);
+ if (!spec)
+ return NULL;
+ kref_init(&spec->kref);
+
+ return spec;
+}
+
+static void rbd_spec_free(struct kref *kref)
+{
+ struct rbd_spec *spec = container_of(kref, struct rbd_spec, kref);
+
+ kfree(spec->pool_name);
+ kfree(spec->image_id);
+ kfree(spec->image_name);
+ kfree(spec->snap_name);
+ kfree(spec);
+}
+
+static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
+ struct rbd_spec *spec)
+{
+ struct rbd_device *rbd_dev;
+
+ rbd_dev = kzalloc(sizeof (*rbd_dev), GFP_KERNEL);
+ if (!rbd_dev)
+ return NULL;
+
+ spin_lock_init(&rbd_dev->lock);
+ rbd_dev->flags = 0;
+ atomic_set(&rbd_dev->parent_ref, 0);
+ INIT_LIST_HEAD(&rbd_dev->node);
+ init_rwsem(&rbd_dev->header_rwsem);
+
+ rbd_dev->spec = spec;
+ rbd_dev->rbd_client = rbdc;
+
+ /* Initialize the layout used for all rbd requests */
+
+ rbd_dev->layout.fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
+ rbd_dev->layout.fl_stripe_count = cpu_to_le32(1);
+ rbd_dev->layout.fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
+ rbd_dev->layout.fl_pg_pool = cpu_to_le32((u32) spec->pool_id);
+
+ return rbd_dev;
+}
+
+static void rbd_dev_destroy(struct rbd_device *rbd_dev)
+{
+ rbd_put_client(rbd_dev->rbd_client);
+ rbd_spec_put(rbd_dev->spec);
+ kfree(rbd_dev);
+}
+
+/*
+ * Get the size and object order for an image snapshot, or if
+ * snap_id is CEPH_NOSNAP, gets this information for the base
+ * image.
+ */
+static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
+ u8 *order, u64 *snap_size)
+{
+ __le64 snapid = cpu_to_le64(snap_id);
+ int ret;
+ struct {
+ u8 order;
+ __le64 size;
+ } __attribute__ ((packed)) size_buf = { 0 };
+
+ ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+ "rbd", "get_size",
+ &snapid, sizeof (snapid),
+ &size_buf, sizeof (size_buf));
+ dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+ if (ret < 0)
+ return ret;
+ if (ret < sizeof (size_buf))
+ return -ERANGE;
+
+ if (order) {
+ *order = size_buf.order;
+ dout(" order %u", (unsigned int)*order);
+ }
+ *snap_size = le64_to_cpu(size_buf.size);
+
+ dout(" snap_id 0x%016llx snap_size = %llu\n",
+ (unsigned long long)snap_id,
+ (unsigned long long)*snap_size);
+
+ return 0;
+}
+
+static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev)
+{
+ return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
+ &rbd_dev->header.obj_order,
+ &rbd_dev->header.image_size);
+}
+
+static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
+{
+ void *reply_buf;
+ int ret;
+ void *p;
+
+ reply_buf = kzalloc(RBD_OBJ_PREFIX_LEN_MAX, GFP_KERNEL);
+ if (!reply_buf)
+ return -ENOMEM;
+
+ ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+ "rbd", "get_object_prefix", NULL, 0,
+ reply_buf, RBD_OBJ_PREFIX_LEN_MAX);
+ dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+ if (ret < 0)
+ goto out;
+
+ p = reply_buf;
+ rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p,
+ p + ret, NULL, GFP_NOIO);
+ ret = 0;
+
+ if (IS_ERR(rbd_dev->header.object_prefix)) {
+ ret = PTR_ERR(rbd_dev->header.object_prefix);
+ rbd_dev->header.object_prefix = NULL;
+ } else {
+ dout(" object_prefix = %s\n", rbd_dev->header.object_prefix);
+ }
+out:
+ kfree(reply_buf);
+
+ return ret;
+}
+
+static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
+ u64 *snap_features)
+{
+ __le64 snapid = cpu_to_le64(snap_id);
+ struct {
+ __le64 features;
+ __le64 incompat;
+ } __attribute__ ((packed)) features_buf = { 0 };
+ u64 incompat;
+ int ret;
+
+ ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+ "rbd", "get_features",
+ &snapid, sizeof (snapid),
+ &features_buf, sizeof (features_buf));
+ dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+ if (ret < 0)
+ return ret;
+ if (ret < sizeof (features_buf))
+ return -ERANGE;
+
+ incompat = le64_to_cpu(features_buf.incompat);
+ if (incompat & ~RBD_FEATURES_SUPPORTED)
+ return -ENXIO;
+
+ *snap_features = le64_to_cpu(features_buf.features);
+
+ dout(" snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n",
+ (unsigned long long)snap_id,
+ (unsigned long long)*snap_features,
+ (unsigned long long)le64_to_cpu(features_buf.incompat));
+
+ return 0;
+}
+
+static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
+{
+ return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
+ &rbd_dev->header.features);
+}
+
+static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
+{
+ struct rbd_spec *parent_spec;
+ size_t size;
+ void *reply_buf = NULL;
+ __le64 snapid;
+ void *p;
+ void *end;
+ u64 pool_id;
+ char *image_id;
+ u64 snap_id;
+ u64 overlap;
+ int ret;
+
+ parent_spec = rbd_spec_alloc();
+ if (!parent_spec)
+ return -ENOMEM;
+
+ size = sizeof (__le64) + /* pool_id */
+ sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX + /* image_id */
+ sizeof (__le64) + /* snap_id */
+ sizeof (__le64); /* overlap */
+ reply_buf = kmalloc(size, GFP_KERNEL);
+ if (!reply_buf) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ snapid = cpu_to_le64(CEPH_NOSNAP);
+ ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+ "rbd", "get_parent",
+ &snapid, sizeof (snapid),
+ reply_buf, size);
+ dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+ if (ret < 0)
+ goto out_err;
+
+ p = reply_buf;
+ end = reply_buf + ret;
+ ret = -ERANGE;
+ ceph_decode_64_safe(&p, end, pool_id, out_err);
+ if (pool_id == CEPH_NOPOOL) {
+ /*
+ * Either the parent never existed, or we have
+ * record of it but the image got flattened so it no
+ * longer has a parent. When the parent of a
+ * layered image disappears we immediately set the
+ * overlap to 0. The effect of this is that all new
+ * requests will be treated as if the image had no
+ * parent.
+ */
+ if (rbd_dev->parent_overlap) {
+ rbd_dev->parent_overlap = 0;
+ smp_mb();
+ rbd_dev_parent_put(rbd_dev);
+ pr_info("%s: clone image has been flattened\n",
+ rbd_dev->disk->disk_name);
+ }
+
+ goto out; /* No parent? No problem. */
+ }
+
+ /* The ceph file layout needs to fit pool id in 32 bits */
+
+ ret = -EIO;
+ if (pool_id > (u64)U32_MAX) {
+ rbd_warn(NULL, "parent pool id too large (%llu > %u)\n",
+ (unsigned long long)pool_id, U32_MAX);
+ goto out_err;
+ }
+
+ image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
+ if (IS_ERR(image_id)) {
+ ret = PTR_ERR(image_id);
+ goto out_err;
+ }
+ ceph_decode_64_safe(&p, end, snap_id, out_err);
+ ceph_decode_64_safe(&p, end, overlap, out_err);
+
+ /*
+ * The parent won't change (except when the clone is
+ * flattened, already handled that). So we only need to
+ * record the parent spec we have not already done so.
+ */
+ if (!rbd_dev->parent_spec) {
+ parent_spec->pool_id = pool_id;
+ parent_spec->image_id = image_id;
+ parent_spec->snap_id = snap_id;
+ rbd_dev->parent_spec = parent_spec;
+ parent_spec = NULL; /* rbd_dev now owns this */
+ }
+
+ /*
+ * We always update the parent overlap. If it's zero we
+ * treat it specially.
+ */
+ rbd_dev->parent_overlap = overlap;
+ smp_mb();
+ if (!overlap) {
+
+ /* A null parent_spec indicates it's the initial probe */
+
+ if (parent_spec) {
+ /*
+ * The overlap has become zero, so the clone
+ * must have been resized down to 0 at some
+ * point. Treat this the same as a flatten.
+ */
+ rbd_dev_parent_put(rbd_dev);
+ pr_info("%s: clone image now standalone\n",
+ rbd_dev->disk->disk_name);
+ } else {
+ /*
+ * For the initial probe, if we find the
+ * overlap is zero we just pretend there was
+ * no parent image.
+ */
+ rbd_warn(rbd_dev, "ignoring parent of "
+ "clone with overlap 0\n");
+ }
+ }
+out:
+ ret = 0;
+out_err:
+ kfree(reply_buf);
+ rbd_spec_put(parent_spec);
+
+ return ret;
+}
+
+static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
+{
+ struct {
+ __le64 stripe_unit;
+ __le64 stripe_count;
+ } __attribute__ ((packed)) striping_info_buf = { 0 };
+ size_t size = sizeof (striping_info_buf);
+ void *p;
+ u64 obj_size;
+ u64 stripe_unit;
+ u64 stripe_count;
+ int ret;
+
+ ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+ "rbd", "get_stripe_unit_count", NULL, 0,
+ (char *)&striping_info_buf, size);
+ dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+ if (ret < 0)
+ return ret;
+ if (ret < size)
+ return -ERANGE;
+
+ /*
+ * We don't actually support the "fancy striping" feature
+ * (STRIPINGV2) yet, but if the striping sizes are the
+ * defaults the behavior is the same as before. So find
+ * out, and only fail if the image has non-default values.
+ */
+ ret = -EINVAL;
+ obj_size = (u64)1 << rbd_dev->header.obj_order;
+ p = &striping_info_buf;
+ stripe_unit = ceph_decode_64(&p);
+ if (stripe_unit != obj_size) {
+ rbd_warn(rbd_dev, "unsupported stripe unit "
+ "(got %llu want %llu)",
+ stripe_unit, obj_size);
+ return -EINVAL;
+ }
+ stripe_count = ceph_decode_64(&p);
+ if (stripe_count != 1) {
+ rbd_warn(rbd_dev, "unsupported stripe count "
+ "(got %llu want 1)", stripe_count);
+ return -EINVAL;
+ }
+ rbd_dev->header.stripe_unit = stripe_unit;
+ rbd_dev->header.stripe_count = stripe_count;
+
+ return 0;
+}
+
+static char *rbd_dev_image_name(struct rbd_device *rbd_dev)
+{
+ size_t image_id_size;
+ char *image_id;
+ void *p;
+ void *end;
+ size_t size;
+ void *reply_buf = NULL;
+ size_t len = 0;
+ char *image_name = NULL;
+ int ret;
+
+ rbd_assert(!rbd_dev->spec->image_name);
+
+ len = strlen(rbd_dev->spec->image_id);
+ image_id_size = sizeof (__le32) + len;
+ image_id = kmalloc(image_id_size, GFP_KERNEL);
+ if (!image_id)
+ return NULL;
+
+ p = image_id;
+ end = image_id + image_id_size;
+ ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32)len);
+
+ size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX;
+ reply_buf = kmalloc(size, GFP_KERNEL);
+ if (!reply_buf)
+ goto out;
+
+ ret = rbd_obj_method_sync(rbd_dev, RBD_DIRECTORY,
+ "rbd", "dir_get_name",
+ image_id, image_id_size,
+ reply_buf, size);
+ if (ret < 0)
+ goto out;
+ p = reply_buf;
+ end = reply_buf + ret;
+
+ image_name = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL);
+ if (IS_ERR(image_name))
+ image_name = NULL;
+ else
+ dout("%s: name is %s len is %zd\n", __func__, image_name, len);
+out:
+ kfree(reply_buf);
+ kfree(image_id);
+
+ return image_name;
+}
+
+static u64 rbd_v1_snap_id_by_name(struct rbd_device *rbd_dev, const char *name)
+{
+ struct ceph_snap_context *snapc = rbd_dev->header.snapc;
+ const char *snap_name;
+ u32 which = 0;
+
+ /* Skip over names until we find the one we are looking for */
+
+ snap_name = rbd_dev->header.snap_names;
+ while (which < snapc->num_snaps) {
+ if (!strcmp(name, snap_name))
+ return snapc->snaps[which];
+ snap_name += strlen(snap_name) + 1;
+ which++;
+ }
+ return CEPH_NOSNAP;
+}
+
+static u64 rbd_v2_snap_id_by_name(struct rbd_device *rbd_dev, const char *name)
+{
+ struct ceph_snap_context *snapc = rbd_dev->header.snapc;
+ u32 which;
+ bool found = false;
+ u64 snap_id;
+
+ for (which = 0; !found && which < snapc->num_snaps; which++) {
+ const char *snap_name;
+
+ snap_id = snapc->snaps[which];
+ snap_name = rbd_dev_v2_snap_name(rbd_dev, snap_id);
+ if (IS_ERR(snap_name)) {
+ /* ignore no-longer existing snapshots */
+ if (PTR_ERR(snap_name) == -ENOENT)
+ continue;
+ else
+ break;
+ }
+ found = !strcmp(name, snap_name);
+ kfree(snap_name);
+ }
+ return found ? snap_id : CEPH_NOSNAP;
+}
+
+/*
+ * Assumes name is never RBD_SNAP_HEAD_NAME; returns CEPH_NOSNAP if
+ * no snapshot by that name is found, or if an error occurs.
+ */
+static u64 rbd_snap_id_by_name(struct rbd_device *rbd_dev, const char *name)
+{
+ if (rbd_dev->image_format == 1)
+ return rbd_v1_snap_id_by_name(rbd_dev, name);
+
+ return rbd_v2_snap_id_by_name(rbd_dev, name);
+}
+
+/*
+ * When an rbd image has a parent image, it is identified by the
+ * pool, image, and snapshot ids (not names). This function fills
+ * in the names for those ids. (It's OK if we can't figure out the
+ * name for an image id, but the pool and snapshot ids should always
+ * exist and have names.) All names in an rbd spec are dynamically
+ * allocated.
+ *
+ * When an image being mapped (not a parent) is probed, we have the
+ * pool name and pool id, image name and image id, and the snapshot
+ * name. The only thing we're missing is the snapshot id.
+ */
+static int rbd_dev_spec_update(struct rbd_device *rbd_dev)
+{
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ struct rbd_spec *spec = rbd_dev->spec;
+ const char *pool_name;
+ const char *image_name;
+ const char *snap_name;
+ int ret;
+
+ /*
+ * An image being mapped will have the pool name (etc.), but
+ * we need to look up the snapshot id.
+ */
+ if (spec->pool_name) {
+ if (strcmp(spec->snap_name, RBD_SNAP_HEAD_NAME)) {
+ u64 snap_id;
+
+ snap_id = rbd_snap_id_by_name(rbd_dev, spec->snap_name);
+ if (snap_id == CEPH_NOSNAP)
+ return -ENOENT;
+ spec->snap_id = snap_id;
+ } else {
+ spec->snap_id = CEPH_NOSNAP;
+ }
+
+ return 0;
+ }
+
+ /* Get the pool name; we have to make our own copy of this */
+
+ pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, spec->pool_id);
+ if (!pool_name) {
+ rbd_warn(rbd_dev, "no pool with id %llu", spec->pool_id);
+ return -EIO;
+ }
+ pool_name = kstrdup(pool_name, GFP_KERNEL);
+ if (!pool_name)
+ return -ENOMEM;
+
+ /* Fetch the image name; tolerate failure here */
+
+ image_name = rbd_dev_image_name(rbd_dev);
+ if (!image_name)
+ rbd_warn(rbd_dev, "unable to get image name");
+
+ /* Look up the snapshot name, and make a copy */
+
+ snap_name = rbd_snap_name(rbd_dev, spec->snap_id);
+ if (IS_ERR(snap_name)) {
+ ret = PTR_ERR(snap_name);
+ goto out_err;
+ }
+
+ spec->pool_name = pool_name;
+ spec->image_name = image_name;
+ spec->snap_name = snap_name;
+
+ return 0;
+out_err:
+ kfree(image_name);
+ kfree(pool_name);
+
+ return ret;
+}
+
+static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
+{
+ size_t size;
+ int ret;
+ void *reply_buf;
+ void *p;
+ void *end;
+ u64 seq;
+ u32 snap_count;
+ struct ceph_snap_context *snapc;
+ u32 i;
+
+ /*
+ * We'll need room for the seq value (maximum snapshot id),
+ * snapshot count, and array of that many snapshot ids.
+ * For now we have a fixed upper limit on the number we're
+ * prepared to receive.
+ */
+ size = sizeof (__le64) + sizeof (__le32) +
+ RBD_MAX_SNAP_COUNT * sizeof (__le64);
+ reply_buf = kzalloc(size, GFP_KERNEL);
+ if (!reply_buf)
+ return -ENOMEM;
+
+ ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+ "rbd", "get_snapcontext", NULL, 0,
+ reply_buf, size);
+ dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+ if (ret < 0)
+ goto out;
+
+ p = reply_buf;
+ end = reply_buf + ret;
+ ret = -ERANGE;
+ ceph_decode_64_safe(&p, end, seq, out);
+ ceph_decode_32_safe(&p, end, snap_count, out);
+
+ /*
+ * Make sure the reported number of snapshot ids wouldn't go
+ * beyond the end of our buffer. But before checking that,
+ * make sure the computed size of the snapshot context we
+ * allocate is representable in a size_t.
+ */
+ if (snap_count > (SIZE_MAX - sizeof (struct ceph_snap_context))
+ / sizeof (u64)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!ceph_has_room(&p, end, snap_count * sizeof (__le64)))
+ goto out;
+ ret = 0;
+
+ snapc = ceph_create_snap_context(snap_count, GFP_KERNEL);
+ if (!snapc) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ snapc->seq = seq;
+ for (i = 0; i < snap_count; i++)
+ snapc->snaps[i] = ceph_decode_64(&p);
+
+ ceph_put_snap_context(rbd_dev->header.snapc);
+ rbd_dev->header.snapc = snapc;
+
+ dout(" snap context seq = %llu, snap_count = %u\n",
+ (unsigned long long)seq, (unsigned int)snap_count);
+out:
+ kfree(reply_buf);
+
+ return ret;
+}
+
+static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
+ u64 snap_id)
+{
+ size_t size;
+ void *reply_buf;
+ __le64 snapid;
+ int ret;
+ void *p;
+ void *end;
+ char *snap_name;
+
+ size = sizeof (__le32) + RBD_MAX_SNAP_NAME_LEN;
+ reply_buf = kmalloc(size, GFP_KERNEL);
+ if (!reply_buf)
+ return ERR_PTR(-ENOMEM);
+
+ snapid = cpu_to_le64(snap_id);
+ ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+ "rbd", "get_snapshot_name",
+ &snapid, sizeof (snapid),
+ reply_buf, size);
+ dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+ if (ret < 0) {
+ snap_name = ERR_PTR(ret);
+ goto out;
+ }
+
+ p = reply_buf;
+ end = reply_buf + ret;
+ snap_name = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
+ if (IS_ERR(snap_name))
+ goto out;
+
+ dout(" snap_id 0x%016llx snap_name = %s\n",
+ (unsigned long long)snap_id, snap_name);
+out:
+ kfree(reply_buf);
+
+ return snap_name;
+}
+
+static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev)
+{
+ bool first_time = rbd_dev->header.object_prefix == NULL;
+ int ret;
+
+ ret = rbd_dev_v2_image_size(rbd_dev);
+ if (ret)
+ return ret;
+
+ if (first_time) {
+ ret = rbd_dev_v2_header_onetime(rbd_dev);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * If the image supports layering, get the parent info. We
+ * need to probe the first time regardless. Thereafter we
+ * only need to if there's a parent, to see if it has
+ * disappeared due to the mapped image getting flattened.
+ */
+ if (rbd_dev->header.features & RBD_FEATURE_LAYERING &&
+ (first_time || rbd_dev->parent_spec)) {
+ bool warn;
+
+ ret = rbd_dev_v2_parent_info(rbd_dev);
+ if (ret)
+ return ret;
+
+ /*
+ * Print a warning if this is the initial probe and
+ * the image has a parent. Don't print it if the
+ * image now being probed is itself a parent. We
+ * can tell at this point because we won't know its
+ * pool name yet (just its pool id).
+ */
+ warn = rbd_dev->parent_spec && rbd_dev->spec->pool_name;
+ if (first_time && warn)
+ rbd_warn(rbd_dev, "WARNING: kernel layering "
+ "is EXPERIMENTAL!");
+ }
+
+ if (rbd_dev->spec->snap_id == CEPH_NOSNAP)
+ if (rbd_dev->mapping.size != rbd_dev->header.image_size)
+ rbd_dev->mapping.size = rbd_dev->header.image_size;
+
+ ret = rbd_dev_v2_snap_context(rbd_dev);
+ dout("rbd_dev_v2_snap_context returned %d\n", ret);
+
+ return ret;
+}
+
+static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
+{
+ struct device *dev;
+ int ret;
+
+ dev = &rbd_dev->dev;
+ dev->bus = &rbd_bus_type;
+ dev->type = &rbd_device_type;
+ dev->parent = &rbd_root_dev;
+ dev->release = rbd_dev_device_release;
+ dev_set_name(dev, "%d", rbd_dev->dev_id);
+ ret = device_register(dev);
+
+ return ret;
+}
+
+static void rbd_bus_del_dev(struct rbd_device *rbd_dev)
+{
+ device_unregister(&rbd_dev->dev);
+}
+
+/*
+ * Get a unique rbd identifier for the given new rbd_dev, and add
+ * the rbd_dev to the global list.
+ */
+static int rbd_dev_id_get(struct rbd_device *rbd_dev)
+{
+ int new_dev_id;
+
+ new_dev_id = ida_simple_get(&rbd_dev_id_ida,
+ 0, minor_to_rbd_dev_id(1 << MINORBITS),
+ GFP_KERNEL);
+ if (new_dev_id < 0)
+ return new_dev_id;
+
+ rbd_dev->dev_id = new_dev_id;
+
+ spin_lock(&rbd_dev_list_lock);
+ list_add_tail(&rbd_dev->node, &rbd_dev_list);
+ spin_unlock(&rbd_dev_list_lock);
+
+ dout("rbd_dev %p given dev id %d\n", rbd_dev, rbd_dev->dev_id);
+
+ return 0;
+}
+
+/*
+ * Remove an rbd_dev from the global list, and record that its
+ * identifier is no longer in use.
+ */
+static void rbd_dev_id_put(struct rbd_device *rbd_dev)
+{
+ spin_lock(&rbd_dev_list_lock);
+ list_del_init(&rbd_dev->node);
+ spin_unlock(&rbd_dev_list_lock);
+
+ ida_simple_remove(&rbd_dev_id_ida, rbd_dev->dev_id);
+
+ dout("rbd_dev %p released dev id %d\n", rbd_dev, rbd_dev->dev_id);
+}
+
+/*
+ * Skips over white space at *buf, and updates *buf to point to the
+ * first found non-space character (if any). Returns the length of
+ * the token (string of non-white space characters) found. Note
+ * that *buf must be terminated with '\0'.
+ */
+static inline size_t next_token(const char **buf)
+{
+ /*
+ * These are the characters that produce nonzero for
+ * isspace() in the "C" and "POSIX" locales.
+ */
+ const char *spaces = " \f\n\r\t\v";
+
+ *buf += strspn(*buf, spaces); /* Find start of token */
+
+ return strcspn(*buf, spaces); /* Return token length */
+}
+
+/*
+ * Finds the next token in *buf, and if the provided token buffer is
+ * big enough, copies the found token into it. The result, if
+ * copied, is guaranteed to be terminated with '\0'. Note that *buf
+ * must be terminated with '\0' on entry.
+ *
+ * Returns the length of the token found (not including the '\0').
+ * Return value will be 0 if no token is found, and it will be >=
+ * token_size if the token would not fit.
+ *
+ * The *buf pointer will be updated to point beyond the end of the
+ * found token. Note that this occurs even if the token buffer is
+ * too small to hold it.
+ */
+static inline size_t copy_token(const char **buf,
+ char *token,
+ size_t token_size)
+{
+ size_t len;
+
+ len = next_token(buf);
+ if (len < token_size) {
+ memcpy(token, *buf, len);
+ *(token + len) = '\0';
+ }
+ *buf += len;
+
+ return len;
+}
+
+/*
+ * Finds the next token in *buf, dynamically allocates a buffer big
+ * enough to hold a copy of it, and copies the token into the new
+ * buffer. The copy is guaranteed to be terminated with '\0'. Note
+ * that a duplicate buffer is created even for a zero-length token.
+ *
+ * Returns a pointer to the newly-allocated duplicate, or a null
+ * pointer if memory for the duplicate was not available. If
+ * the lenp argument is a non-null pointer, the length of the token
+ * (not including the '\0') is returned in *lenp.
+ *
+ * If successful, the *buf pointer will be updated to point beyond
+ * the end of the found token.
+ *
+ * Note: uses GFP_KERNEL for allocation.
+ */
+static inline char *dup_token(const char **buf, size_t *lenp)
+{
+ char *dup;
+ size_t len;
+
+ len = next_token(buf);
+ dup = kmemdup(*buf, len + 1, GFP_KERNEL);
+ if (!dup)
+ return NULL;
+ *(dup + len) = '\0';
+ *buf += len;
+
+ if (lenp)
+ *lenp = len;
+
+ return dup;
+}
+
+/*
+ * Parse the options provided for an "rbd add" (i.e., rbd image
+ * mapping) request. These arrive via a write to /sys/bus/rbd/add,
+ * and the data written is passed here via a NUL-terminated buffer.
+ * Returns 0 if successful or an error code otherwise.
+ *
+ * The information extracted from these options is recorded in
+ * the other parameters which return dynamically-allocated
+ * structures:
+ * ceph_opts
+ * The address of a pointer that will refer to a ceph options
+ * structure. Caller must release the returned pointer using
+ * ceph_destroy_options() when it is no longer needed.
+ * rbd_opts
+ * Address of an rbd options pointer. Fully initialized by
+ * this function; caller must release with kfree().
+ * spec
+ * Address of an rbd image specification pointer. Fully
+ * initialized by this function based on parsed options.
+ * Caller must release with rbd_spec_put().
+ *
+ * The options passed take this form:
+ * <mon_addrs> <options> <pool_name> <image_name> [<snap_id>]
+ * where:
+ * <mon_addrs>
+ * A comma-separated list of one or more monitor addresses.
+ * A monitor address is an ip address, optionally followed
+ * by a port number (separated by a colon).
+ * I.e.: ip1[:port1][,ip2[:port2]...]
+ * <options>
+ * A comma-separated list of ceph and/or rbd options.
+ * <pool_name>
+ * The name of the rados pool containing the rbd image.
+ * <image_name>
+ * The name of the image in that pool to map.
+ * <snap_id>
+ * An optional snapshot id. If provided, the mapping will
+ * present data from the image at the time that snapshot was
+ * created. The image head is used if no snapshot id is
+ * provided. Snapshot mappings are always read-only.
+ */
+static int rbd_add_parse_args(const char *buf,
+ struct ceph_options **ceph_opts,
+ struct rbd_options **opts,
+ struct rbd_spec **rbd_spec)
+{
+ size_t len;
+ char *options;
+ const char *mon_addrs;
+ char *snap_name;
+ size_t mon_addrs_size;
+ struct rbd_spec *spec = NULL;
+ struct rbd_options *rbd_opts = NULL;
+ struct ceph_options *copts;
+ int ret;
+
+ /* The first four tokens are required */
+
+ len = next_token(&buf);
+ if (!len) {
+ rbd_warn(NULL, "no monitor address(es) provided");
+ return -EINVAL;
+ }
+ mon_addrs = buf;
+ mon_addrs_size = len + 1;
+ buf += len;
+
+ ret = -EINVAL;
+ options = dup_token(&buf, NULL);
+ if (!options)
+ return -ENOMEM;
+ if (!*options) {
+ rbd_warn(NULL, "no options provided");
+ goto out_err;
+ }
+
+ spec = rbd_spec_alloc();
+ if (!spec)
+ goto out_mem;
+
+ spec->pool_name = dup_token(&buf, NULL);
+ if (!spec->pool_name)
+ goto out_mem;
+ if (!*spec->pool_name) {
+ rbd_warn(NULL, "no pool name provided");
+ goto out_err;
+ }
+
+ spec->image_name = dup_token(&buf, NULL);
+ if (!spec->image_name)
+ goto out_mem;
+ if (!*spec->image_name) {
+ rbd_warn(NULL, "no image name provided");
+ goto out_err;
+ }
+
+ /*
+ * Snapshot name is optional; default is to use "-"
+ * (indicating the head/no snapshot).
+ */
+ len = next_token(&buf);
+ if (!len) {
+ buf = RBD_SNAP_HEAD_NAME; /* No snapshot supplied */
+ len = sizeof (RBD_SNAP_HEAD_NAME) - 1;
+ } else if (len > RBD_MAX_SNAP_NAME_LEN) {
+ ret = -ENAMETOOLONG;
+ goto out_err;
+ }
+ snap_name = kmemdup(buf, len + 1, GFP_KERNEL);
+ if (!snap_name)
+ goto out_mem;
+ *(snap_name + len) = '\0';
+ spec->snap_name = snap_name;
+
+ /* Initialize all rbd options to the defaults */
+
+ rbd_opts = kzalloc(sizeof (*rbd_opts), GFP_KERNEL);
+ if (!rbd_opts)
+ goto out_mem;
+
+ rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
+
+ copts = ceph_parse_options(options, mon_addrs,
+ mon_addrs + mon_addrs_size - 1,
+ parse_rbd_opts_token, rbd_opts);
+ if (IS_ERR(copts)) {
+ ret = PTR_ERR(copts);
+ goto out_err;
+ }
+ kfree(options);
+
+ *ceph_opts = copts;
+ *opts = rbd_opts;
+ *rbd_spec = spec;
+
+ return 0;
+out_mem:
+ ret = -ENOMEM;
+out_err:
+ kfree(rbd_opts);
+ rbd_spec_put(spec);
+ kfree(options);
+
+ return ret;
+}
+
+/*
+ * An rbd format 2 image has a unique identifier, distinct from the
+ * name given to it by the user. Internally, that identifier is
+ * what's used to specify the names of objects related to the image.
+ *
+ * A special "rbd id" object is used to map an rbd image name to its
+ * id. If that object doesn't exist, then there is no v2 rbd image
+ * with the supplied name.
+ *
+ * This function will record the given rbd_dev's image_id field if
+ * it can be determined, and in that case will return 0. If any
+ * errors occur a negative errno will be returned and the rbd_dev's
+ * image_id field will be unchanged (and should be NULL).
+ */
+static int rbd_dev_image_id(struct rbd_device *rbd_dev)
+{
+ int ret;
+ size_t size;
+ char *object_name;
+ void *response;
+ char *image_id;
+
+ /*
+ * When probing a parent image, the image id is already
+ * known (and the image name likely is not). There's no
+ * need to fetch the image id again in this case. We
+ * do still need to set the image format though.
+ */
+ if (rbd_dev->spec->image_id) {
+ rbd_dev->image_format = *rbd_dev->spec->image_id ? 2 : 1;
+
+ return 0;
+ }
+
+ /*
+ * First, see if the format 2 image id file exists, and if
+ * so, get the image's persistent id from it.
+ */
+ size = sizeof (RBD_ID_PREFIX) + strlen(rbd_dev->spec->image_name);
+ object_name = kmalloc(size, GFP_NOIO);
+ if (!object_name)
+ return -ENOMEM;
+ sprintf(object_name, "%s%s", RBD_ID_PREFIX, rbd_dev->spec->image_name);
+ dout("rbd id object name is %s\n", object_name);
+
+ /* Response will be an encoded string, which includes a length */
+
+ size = sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX;
+ response = kzalloc(size, GFP_NOIO);
+ if (!response) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* If it doesn't exist we'll assume it's a format 1 image */
+
+ ret = rbd_obj_method_sync(rbd_dev, object_name,
+ "rbd", "get_id", NULL, 0,
+ response, RBD_IMAGE_ID_LEN_MAX);
+ dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
+ if (ret == -ENOENT) {
+ image_id = kstrdup("", GFP_KERNEL);
+ ret = image_id ? 0 : -ENOMEM;
+ if (!ret)
+ rbd_dev->image_format = 1;
+ } else if (ret > sizeof (__le32)) {
+ void *p = response;
+
+ image_id = ceph_extract_encoded_string(&p, p + ret,
+ NULL, GFP_NOIO);
+ ret = IS_ERR(image_id) ? PTR_ERR(image_id) : 0;
+ if (!ret)
+ rbd_dev->image_format = 2;
+ } else {
+ ret = -EINVAL;
+ }
+
+ if (!ret) {
+ rbd_dev->spec->image_id = image_id;
+ dout("image_id is %s\n", image_id);
+ }
+out:
+ kfree(response);
+ kfree(object_name);
+
+ return ret;
+}
+
+/*
+ * Undo whatever state changes are made by v1 or v2 header info
+ * call.
+ */
+static void rbd_dev_unprobe(struct rbd_device *rbd_dev)
+{
+ struct rbd_image_header *header;
+
+ /* Drop parent reference unless it's already been done (or none) */
+
+ if (rbd_dev->parent_overlap)
+ rbd_dev_parent_put(rbd_dev);
+
+ /* Free dynamic fields from the header, then zero it out */
+
+ header = &rbd_dev->header;
+ ceph_put_snap_context(header->snapc);
+ kfree(header->snap_sizes);
+ kfree(header->snap_names);
+ kfree(header->object_prefix);
+ memset(header, 0, sizeof (*header));
+}
+
+static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev)
+{
+ int ret;
+
+ ret = rbd_dev_v2_object_prefix(rbd_dev);
+ if (ret)
+ goto out_err;
+
+ /*
+ * Get the and check features for the image. Currently the
+ * features are assumed to never change.
+ */
+ ret = rbd_dev_v2_features(rbd_dev);
+ if (ret)
+ goto out_err;
+
+ /* If the image supports fancy striping, get its parameters */
+
+ if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) {
+ ret = rbd_dev_v2_striping_info(rbd_dev);
+ if (ret < 0)
+ goto out_err;
+ }
+ /* No support for crypto and compression type format 2 images */
+
+ return 0;
+out_err:
+ rbd_dev->header.features = 0;
+ kfree(rbd_dev->header.object_prefix);
+ rbd_dev->header.object_prefix = NULL;
+
+ return ret;
+}
+
+static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
+{
+ struct rbd_device *parent = NULL;
+ struct rbd_spec *parent_spec;
+ struct rbd_client *rbdc;
+ int ret;
+
+ if (!rbd_dev->parent_spec)
+ return 0;
+ /*
+ * We need to pass a reference to the client and the parent
+ * spec when creating the parent rbd_dev. Images related by
+ * parent/child relationships always share both.
+ */
+ parent_spec = rbd_spec_get(rbd_dev->parent_spec);
+ rbdc = __rbd_get_client(rbd_dev->rbd_client);
+
+ ret = -ENOMEM;
+ parent = rbd_dev_create(rbdc, parent_spec);
+ if (!parent)
+ goto out_err;
+
+ ret = rbd_dev_image_probe(parent, false);
+ if (ret < 0)
+ goto out_err;
+ rbd_dev->parent = parent;
+ atomic_set(&rbd_dev->parent_ref, 1);
+
+ return 0;
+out_err:
+ if (parent) {
+ rbd_dev_unparent(rbd_dev);
+ kfree(rbd_dev->header_name);
+ rbd_dev_destroy(parent);
+ } else {
+ rbd_put_client(rbdc);
+ rbd_spec_put(parent_spec);
+ }
+
+ return ret;
+}
+
+static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
+{
+ int ret;
+
+ /* Get an id and fill in device name. */
+
+ ret = rbd_dev_id_get(rbd_dev);
+ if (ret)
+ return ret;
+
+ BUILD_BUG_ON(DEV_NAME_LEN
+ < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH);
+ sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id);
+
+ /* Record our major and minor device numbers. */
+
+ if (!single_major) {
+ ret = register_blkdev(0, rbd_dev->name);
+ if (ret < 0)
+ goto err_out_id;
+
+ rbd_dev->major = ret;
+ rbd_dev->minor = 0;
+ } else {
+ rbd_dev->major = rbd_major;
+ rbd_dev->minor = rbd_dev_id_to_minor(rbd_dev->dev_id);
+ }
+
+ /* Set up the blkdev mapping. */
+
+ ret = rbd_init_disk(rbd_dev);
+ if (ret)
+ goto err_out_blkdev;
+
+ ret = rbd_dev_mapping_set(rbd_dev);
+ if (ret)
+ goto err_out_disk;
+ set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
+
+ ret = rbd_bus_add_dev(rbd_dev);
+ if (ret)
+ goto err_out_mapping;
+
+ /* Everything's ready. Announce the disk to the world. */
+
+ set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
+ add_disk(rbd_dev->disk);
+
+ pr_info("%s: added with size 0x%llx\n", rbd_dev->disk->disk_name,
+ (unsigned long long) rbd_dev->mapping.size);
+
+ return ret;
+
+err_out_mapping:
+ rbd_dev_mapping_clear(rbd_dev);
+err_out_disk:
+ rbd_free_disk(rbd_dev);
+err_out_blkdev:
+ if (!single_major)
+ unregister_blkdev(rbd_dev->major, rbd_dev->name);
+err_out_id:
+ rbd_dev_id_put(rbd_dev);
+ rbd_dev_mapping_clear(rbd_dev);
+
+ return ret;
+}
+
+static int rbd_dev_header_name(struct rbd_device *rbd_dev)
+{
+ struct rbd_spec *spec = rbd_dev->spec;
+ size_t size;
+
+ /* Record the header object name for this rbd image. */
+
+ rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+
+ if (rbd_dev->image_format == 1)
+ size = strlen(spec->image_name) + sizeof (RBD_SUFFIX);
+ else
+ size = sizeof (RBD_HEADER_PREFIX) + strlen(spec->image_id);
+
+ rbd_dev->header_name = kmalloc(size, GFP_KERNEL);
+ if (!rbd_dev->header_name)
+ return -ENOMEM;
+
+ if (rbd_dev->image_format == 1)
+ sprintf(rbd_dev->header_name, "%s%s",
+ spec->image_name, RBD_SUFFIX);
+ else
+ sprintf(rbd_dev->header_name, "%s%s",
+ RBD_HEADER_PREFIX, spec->image_id);
+ return 0;
+}
+
+static void rbd_dev_image_release(struct rbd_device *rbd_dev)
+{
+ rbd_dev_unprobe(rbd_dev);
+ kfree(rbd_dev->header_name);
+ rbd_dev->header_name = NULL;
+ rbd_dev->image_format = 0;
+ kfree(rbd_dev->spec->image_id);
+ rbd_dev->spec->image_id = NULL;
+
+ rbd_dev_destroy(rbd_dev);
+}
+
+/*
+ * Probe for the existence of the header object for the given rbd
+ * device. If this image is the one being mapped (i.e., not a
+ * parent), initiate a watch on its header object before using that
+ * object to get detailed information about the rbd image.
+ */
+static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
+{
+ int ret;
+
+ /*
+ * Get the id from the image id object. Unless there's an
+ * error, rbd_dev->spec->image_id will be filled in with
+ * a dynamically-allocated string, and rbd_dev->image_format
+ * will be set to either 1 or 2.
+ */
+ ret = rbd_dev_image_id(rbd_dev);
+ if (ret)
+ return ret;
+ rbd_assert(rbd_dev->spec->image_id);
+ rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+
+ ret = rbd_dev_header_name(rbd_dev);
+ if (ret)
+ goto err_out_format;
+
+ if (mapping) {
+ ret = rbd_dev_header_watch_sync(rbd_dev);
+ if (ret)
+ goto out_header_name;
+ }
+
+ if (rbd_dev->image_format == 1)
+ ret = rbd_dev_v1_header_info(rbd_dev);
+ else
+ ret = rbd_dev_v2_header_info(rbd_dev);
+ if (ret)
+ goto err_out_watch;
+
+ ret = rbd_dev_spec_update(rbd_dev);
+ if (ret)
+ goto err_out_probe;
+
+ ret = rbd_dev_probe_parent(rbd_dev);
+ if (ret)
+ goto err_out_probe;
+
+ dout("discovered format %u image, header name is %s\n",
+ rbd_dev->image_format, rbd_dev->header_name);
+
+ return 0;
+err_out_probe:
+ rbd_dev_unprobe(rbd_dev);
+err_out_watch:
+ if (mapping)
+ rbd_dev_header_unwatch_sync(rbd_dev);
+out_header_name:
+ kfree(rbd_dev->header_name);
+ rbd_dev->header_name = NULL;
+err_out_format:
+ rbd_dev->image_format = 0;
+ kfree(rbd_dev->spec->image_id);
+ rbd_dev->spec->image_id = NULL;
+
+ dout("probe failed, returning %d\n", ret);
+
+ return ret;
+}
+
+static ssize_t do_rbd_add(struct bus_type *bus,
+ const char *buf,
+ size_t count)
+{
+ struct rbd_device *rbd_dev = NULL;
+ struct ceph_options *ceph_opts = NULL;
+ struct rbd_options *rbd_opts = NULL;
+ struct rbd_spec *spec = NULL;
+ struct rbd_client *rbdc;
+ struct ceph_osd_client *osdc;
+ bool read_only;
+ int rc = -ENOMEM;
+
+ if (!try_module_get(THIS_MODULE))
+ return -ENODEV;
+
+ /* parse add command */
+ rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec);
+ if (rc < 0)
+ goto err_out_module;
+ read_only = rbd_opts->read_only;
+ kfree(rbd_opts);
+ rbd_opts = NULL; /* done with this */
+
+ rbdc = rbd_get_client(ceph_opts);
+ if (IS_ERR(rbdc)) {
+ rc = PTR_ERR(rbdc);
+ goto err_out_args;
+ }
+
+ /* pick the pool */
+ osdc = &rbdc->client->osdc;
+ rc = ceph_pg_poolid_by_name(osdc->osdmap, spec->pool_name);
+ if (rc < 0)
+ goto err_out_client;
+ spec->pool_id = (u64)rc;
+
+ /* The ceph file layout needs to fit pool id in 32 bits */
+
+ if (spec->pool_id > (u64)U32_MAX) {
+ rbd_warn(NULL, "pool id too large (%llu > %u)\n",
+ (unsigned long long)spec->pool_id, U32_MAX);
+ rc = -EIO;
+ goto err_out_client;
+ }
+
+ rbd_dev = rbd_dev_create(rbdc, spec);
+ if (!rbd_dev)
+ goto err_out_client;
+ rbdc = NULL; /* rbd_dev now owns this */
+ spec = NULL; /* rbd_dev now owns this */
+
+ rc = rbd_dev_image_probe(rbd_dev, true);
+ if (rc < 0)
+ goto err_out_rbd_dev;
+
+ /* If we are mapping a snapshot it must be marked read-only */
+
+ if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
+ read_only = true;
+ rbd_dev->mapping.read_only = read_only;
+
+ rc = rbd_dev_device_setup(rbd_dev);
+ if (rc) {
+ /*
+ * rbd_dev_header_unwatch_sync() can't be moved into
+ * rbd_dev_image_release() without refactoring, see
+ * commit 1f3ef78861ac.
+ */
+ rbd_dev_header_unwatch_sync(rbd_dev);
+ rbd_dev_image_release(rbd_dev);
+ goto err_out_module;
+ }
+
+ return count;
+
+err_out_rbd_dev:
+ rbd_dev_destroy(rbd_dev);
+err_out_client:
+ rbd_put_client(rbdc);
+err_out_args:
+ rbd_spec_put(spec);
+err_out_module:
+ module_put(THIS_MODULE);
+
+ dout("Error adding device %s\n", buf);
+
+ return (ssize_t)rc;
+}
+
+static ssize_t rbd_add(struct bus_type *bus,
+ const char *buf,
+ size_t count)
+{
+ if (single_major)
+ return -EINVAL;
+
+ return do_rbd_add(bus, buf, count);
+}
+
+static ssize_t rbd_add_single_major(struct bus_type *bus,
+ const char *buf,
+ size_t count)
+{
+ return do_rbd_add(bus, buf, count);
+}
+
+static void rbd_dev_device_release(struct device *dev)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+ rbd_free_disk(rbd_dev);
+ clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
+ rbd_dev_mapping_clear(rbd_dev);
+ if (!single_major)
+ unregister_blkdev(rbd_dev->major, rbd_dev->name);
+ rbd_dev_id_put(rbd_dev);
+ rbd_dev_mapping_clear(rbd_dev);
+}
+
+static void rbd_dev_remove_parent(struct rbd_device *rbd_dev)
+{
+ while (rbd_dev->parent) {
+ struct rbd_device *first = rbd_dev;
+ struct rbd_device *second = first->parent;
+ struct rbd_device *third;
+
+ /*
+ * Follow to the parent with no grandparent and
+ * remove it.
+ */
+ while (second && (third = second->parent)) {
+ first = second;
+ second = third;
+ }
+ rbd_assert(second);
+ rbd_dev_image_release(second);
+ first->parent = NULL;
+ first->parent_overlap = 0;
+
+ rbd_assert(first->parent_spec);
+ rbd_spec_put(first->parent_spec);
+ first->parent_spec = NULL;
+ }
+}
+
+static ssize_t do_rbd_remove(struct bus_type *bus,
+ const char *buf,
+ size_t count)
+{
+ struct rbd_device *rbd_dev = NULL;
+ struct list_head *tmp;
+ int dev_id;
+ unsigned long ul;
+ bool already = false;
+ int ret;
+
+ ret = kstrtoul(buf, 10, &ul);
+ if (ret)
+ return ret;
+
+ /* convert to int; abort if we lost anything in the conversion */
+ dev_id = (int)ul;
+ if (dev_id != ul)
+ return -EINVAL;
+
+ ret = -ENOENT;
+ spin_lock(&rbd_dev_list_lock);
+ list_for_each(tmp, &rbd_dev_list) {
+ rbd_dev = list_entry(tmp, struct rbd_device, node);
+ if (rbd_dev->dev_id == dev_id) {
+ ret = 0;
+ break;
+ }
+ }
+ if (!ret) {
+ spin_lock_irq(&rbd_dev->lock);
+ if (rbd_dev->open_count)
+ ret = -EBUSY;
+ else
+ already = test_and_set_bit(RBD_DEV_FLAG_REMOVING,
+ &rbd_dev->flags);
+ spin_unlock_irq(&rbd_dev->lock);
+ }
+ spin_unlock(&rbd_dev_list_lock);
+ if (ret < 0 || already)
+ return ret;
+
+ rbd_dev_header_unwatch_sync(rbd_dev);
+ /*
+ * flush remaining watch callbacks - these must be complete
+ * before the osd_client is shutdown
+ */
+ dout("%s: flushing notifies", __func__);
+ ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
+
+ /*
+ * Don't free anything from rbd_dev->disk until after all
+ * notifies are completely processed. Otherwise
+ * rbd_bus_del_dev() will race with rbd_watch_cb(), resulting
+ * in a potential use after free of rbd_dev->disk or rbd_dev.
+ */
+ rbd_bus_del_dev(rbd_dev);
+ rbd_dev_image_release(rbd_dev);
+ module_put(THIS_MODULE);
+
+ return count;
+}
+
+static ssize_t rbd_remove(struct bus_type *bus,
+ const char *buf,
+ size_t count)
+{
+ if (single_major)
+ return -EINVAL;
+
+ return do_rbd_remove(bus, buf, count);
+}
+
+static ssize_t rbd_remove_single_major(struct bus_type *bus,
+ const char *buf,
+ size_t count)
+{
+ return do_rbd_remove(bus, buf, count);
+}
+
+/*
+ * create control files in sysfs
+ * /sys/bus/rbd/...
+ */
+static int rbd_sysfs_init(void)
+{
+ int ret;
+
+ ret = device_register(&rbd_root_dev);
+ if (ret < 0)
+ return ret;
+
+ ret = bus_register(&rbd_bus_type);
+ if (ret < 0)
+ device_unregister(&rbd_root_dev);
+
+ return ret;
+}
+
+static void rbd_sysfs_cleanup(void)
+{
+ bus_unregister(&rbd_bus_type);
+ device_unregister(&rbd_root_dev);
+}
+
+static int rbd_slab_init(void)
+{
+ rbd_assert(!rbd_img_request_cache);
+ rbd_img_request_cache = kmem_cache_create("rbd_img_request",
+ sizeof (struct rbd_img_request),
+ __alignof__(struct rbd_img_request),
+ 0, NULL);
+ if (!rbd_img_request_cache)
+ return -ENOMEM;
+
+ rbd_assert(!rbd_obj_request_cache);
+ rbd_obj_request_cache = kmem_cache_create("rbd_obj_request",
+ sizeof (struct rbd_obj_request),
+ __alignof__(struct rbd_obj_request),
+ 0, NULL);
+ if (!rbd_obj_request_cache)
+ goto out_err;
+
+ rbd_assert(!rbd_segment_name_cache);
+ rbd_segment_name_cache = kmem_cache_create("rbd_segment_name",
+ CEPH_MAX_OID_NAME_LEN + 1, 1, 0, NULL);
+ if (rbd_segment_name_cache)
+ return 0;
+out_err:
+ if (rbd_obj_request_cache) {
+ kmem_cache_destroy(rbd_obj_request_cache);
+ rbd_obj_request_cache = NULL;
+ }
+
+ kmem_cache_destroy(rbd_img_request_cache);
+ rbd_img_request_cache = NULL;
+
+ return -ENOMEM;
+}
+
+static void rbd_slab_exit(void)
+{
+ rbd_assert(rbd_segment_name_cache);
+ kmem_cache_destroy(rbd_segment_name_cache);
+ rbd_segment_name_cache = NULL;
+
+ rbd_assert(rbd_obj_request_cache);
+ kmem_cache_destroy(rbd_obj_request_cache);
+ rbd_obj_request_cache = NULL;
+
+ rbd_assert(rbd_img_request_cache);
+ kmem_cache_destroy(rbd_img_request_cache);
+ rbd_img_request_cache = NULL;
+}
+
+static int __init rbd_init(void)
+{
+ int rc;
+
+ if (!libceph_compatible(NULL)) {
+ rbd_warn(NULL, "libceph incompatibility (quitting)");
+ return -EINVAL;
+ }
+
+ rc = rbd_slab_init();
+ if (rc)
+ return rc;
+
+ if (single_major) {
+ rbd_major = register_blkdev(0, RBD_DRV_NAME);
+ if (rbd_major < 0) {
+ rc = rbd_major;
+ goto err_out_slab;
+ }
+ }
+
+ rc = rbd_sysfs_init();
+ if (rc)
+ goto err_out_blkdev;
+
+ if (single_major)
+ pr_info("loaded (major %d)\n", rbd_major);
+ else
+ pr_info("loaded\n");
+
+ return 0;
+
+err_out_blkdev:
+ if (single_major)
+ unregister_blkdev(rbd_major, RBD_DRV_NAME);
+err_out_slab:
+ rbd_slab_exit();
+ return rc;
+}
+
+static void __exit rbd_exit(void)
+{
+ rbd_sysfs_cleanup();
+ if (single_major)
+ unregister_blkdev(rbd_major, RBD_DRV_NAME);
+ rbd_slab_exit();
+}
+
+module_init(rbd_init);
+module_exit(rbd_exit);
+
+MODULE_AUTHOR("Alex Elder <elder at inktank.com>");
+MODULE_AUTHOR("Sage Weil <sage at newdream.net>");
+MODULE_AUTHOR("Yehuda Sadeh <yehuda at hq.newdream.net>");
+/* following authorship retained from original osdblk.c */
+MODULE_AUTHOR("Jeff Garzik <jeff at garzik.org>");
+
+MODULE_DESCRIPTION("RADOS Block Device (RBD) driver");
+MODULE_LICENSE("GPL");
diff --git a/rbd/rbd_types.h b/rbd/rbd_types.h
new file mode 100644
index 0000000..49d77cb
--- /dev/null
+++ b/rbd/rbd_types.h
@@ -0,0 +1,81 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2010 Sage Weil <sage at newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_RBD_TYPES_H
+#define CEPH_RBD_TYPES_H
+
+#include <linux/types.h>
+
+/* For format version 2, rbd image 'foo' consists of objects
+ * rbd_id.foo - id of image
+ * rbd_header.<id> - image metadata
+ * rbd_data.<id>.0000000000000000
+ * rbd_data.<id>.0000000000000001
+ * ... - data
+ * Clients do not access header data directly in rbd format 2.
+ */
+
+#define RBD_HEADER_PREFIX "rbd_header."
+#define RBD_DATA_PREFIX "rbd_data."
+#define RBD_ID_PREFIX "rbd_id."
+
+/*
+ * For format version 1, rbd image 'foo' consists of objects
+ * foo.rbd - image metadata
+ * rb.<idhi>.<idlo>.00000000
+ * rb.<idhi>.<idlo>.00000001
+ * ... - data
+ * There is no notion of a persistent image id in rbd format 1.
+ */
+
+#define RBD_SUFFIX ".rbd"
+
+#define RBD_DIRECTORY "rbd_directory"
+#define RBD_INFO "rbd_info"
+
+#define RBD_DEFAULT_OBJ_ORDER 22 /* 4MB */
+#define RBD_MIN_OBJ_ORDER 16
+#define RBD_MAX_OBJ_ORDER 30
+
+#define RBD_COMP_NONE 0
+#define RBD_CRYPT_NONE 0
+
+#define RBD_HEADER_TEXT "<<< Rados Block Device Image >>>\n"
+#define RBD_HEADER_SIGNATURE "RBD"
+#define RBD_HEADER_VERSION "001.005"
+
+struct rbd_image_snap_ondisk {
+ __le64 id;
+ __le64 image_size;
+} __attribute__((packed));
+
+struct rbd_image_header_ondisk {
+ char text[40];
+ char object_prefix[24];
+ char signature[4];
+ char version[8];
+ struct {
+ __u8 order;
+ __u8 crypt_type;
+ __u8 comp_type;
+ __u8 unused;
+ } __attribute__((packed)) options;
+ __le64 image_size;
+ __le64 snap_seq;
+ __le32 snap_count;
+ __le32 reserved;
+ __le64 snap_names_len;
+ struct rbd_image_snap_ondisk snaps[0];
+} __attribute__((packed));
+
+
+#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ceph/ceph-dkms.git
More information about the Pkg-ceph-commits
mailing list