[Pkg-ceph-commits] [ceph] 05/06: added bunch of backported patches
Dmitry Smirnov
onlyjob at moszumanska.debian.org
Wed May 7 08:33:07 UTC 2014
This is an automated email from the git hooks/post-receive script.
onlyjob pushed a commit to branch master
in repository ceph.
commit a7b1d1e
Author: Dmitry Smirnov <onlyjob at member.fsf.org>
Date: Wed May 7 06:40:22 2014
added bunch of backported patches
---
debian/patches/8113.patch | 389 ++++++++++++++++++++++++++++++++++
debian/patches/8175.patch | 66 ++++++
debian/patches/8282.patch | 32 +++
debian/patches/8291.patch | 132 ++++++++++++
debian/patches/bp0001.patch | 28 +++
debian/patches/sample.ceph.conf.patch | 365 +++++++++++++++++++++++++++++++
debian/patches/series | 9 +
7 files changed, 1021 insertions(+)
diff --git a/debian/patches/8113.patch b/debian/patches/8113.patch
new file mode 100644
index 0000000..3a712c2
--- /dev/null
+++ b/debian/patches/8113.patch
@@ -0,0 +1,389 @@
+From 022d467b5d6b77c17b6fdaeec8369cae61e9e5a4 Mon Sep 17 00:00:00 2001
+From: David Zafman <david.zafman at inktank.com>
+Date: Mon, 21 Apr 2014 23:52:04 -0700
+Subject: [PATCH] osd, common: If agent_work() finds no objs to work on delay 5
+ (default) secs
+
+Add config osd_agent_delay_time of 5 seconds
+Honor delay by ignoring agent_choose_mode() calls
+Add tier_delay to logger
+Treat restart after delay like we were previously idle
+
+Fixes: #8113
+Backport: firefly
+
+Signed-off-by: David Zafman <david.zafman at inktank.com>
+(cherry picked from commit b7d31e5f5952c631dd4172bcb825e77a13fc60bc)
+
+--- a/src/common/config_opts.h
++++ b/src/common/config_opts.h
+@@ -398,8 +398,9 @@
+ // max agent flush ops
+ OPTION(osd_agent_max_ops, OPT_INT, 4)
+ OPTION(osd_agent_min_evict_effort, OPT_FLOAT, .1)
+ OPTION(osd_agent_quantize_effort, OPT_FLOAT, .1)
++OPTION(osd_agent_delay_time, OPT_FLOAT, 5.0)
+
+ // decay atime and hist histograms after how many objects go by
+ OPTION(osd_agent_hist_halflife, OPT_INT, 1000)
+
+--- a/src/osd/OSD.cc
++++ b/src/osd/OSD.cc
+@@ -200,8 +200,10 @@
+ agent_ops(0),
+ agent_active(true),
+ agent_thread(this),
+ agent_stop_flag(false),
++ agent_timer_lock("OSD::agent_timer_lock"),
++ agent_timer(osd->client_messenger->cct, agent_timer_lock),
+ objecter_lock("OSD::objecter_lock"),
+ objecter_timer(osd->client_messenger->cct, objecter_lock),
+ objecter(new Objecter(osd->client_messenger->cct, osd->objecter_messenger, osd->monc, &objecter_osdmap,
+ objecter_lock, objecter_timer, 0, 0)),
+@@ -434,8 +436,12 @@
+ {
+ Mutex::Locker l(backfill_request_lock);
+ backfill_request_timer.shutdown();
+ }
++ {
++ Mutex::Locker l(agent_timer_lock);
++ agent_timer.shutdown();
++ }
+ osdmap = OSDMapRef();
+ next_osdmap = OSDMapRef();
+ }
+
+@@ -450,8 +456,9 @@
+ objecter->set_client_incarnation(0);
+ objecter->init_locked();
+ }
+ watch_timer.init();
++ agent_timer.init();
+
+ agent_thread.create();
+ }
+
+@@ -465,8 +472,17 @@
+ agent_cond.Signal();
+ agent_lock.Unlock();
+ }
+
++class AgentTimeoutCB : public Context {
++ PGRef pg;
++public:
++ AgentTimeoutCB(PGRef _pg) : pg(_pg) {}
++ void finish(int) {
++ pg->agent_choose_mode_restart();
++ }
++};
++
+ void OSDService::agent_entry()
+ {
+ dout(10) << __func__ << " start" << dendl;
+ agent_lock.Lock();
+@@ -500,9 +516,20 @@
+ }
+ PGRef pg = *agent_queue_pos;
+ int max = g_conf->osd_agent_max_ops - agent_ops;
+ agent_lock.Unlock();
+- pg->agent_work(max);
++ if (!pg->agent_work(max)) {
++ dout(10) << __func__ << " " << *pg
++ << " no agent_work, delay for " << g_conf->osd_agent_delay_time
++ << " seconds" << dendl;
++
++ osd->logger->inc(l_osd_tier_delay);
++ // Queue a timer to call agent_choose_mode for this pg in 5 seconds
++ agent_timer_lock.Lock();
++ Context *cb = new AgentTimeoutCB(pg);
++ agent_timer.add_event_after(g_conf->osd_agent_delay_time, cb);
++ agent_timer_lock.Unlock();
++ }
+ agent_lock.Lock();
+ }
+ agent_lock.Unlock();
+ dout(10) << __func__ << " finish" << dendl;
+@@ -1477,8 +1504,9 @@
+ osd_plb.add_u64_counter(l_osd_tier_evict, "tier_evict");
+ osd_plb.add_u64_counter(l_osd_tier_whiteout, "tier_whiteout");
+ osd_plb.add_u64_counter(l_osd_tier_dirty, "tier_dirty");
+ osd_plb.add_u64_counter(l_osd_tier_clean, "tier_clean");
++ osd_plb.add_u64_counter(l_osd_tier_delay, "tier_delay");
+
+ osd_plb.add_u64_counter(l_osd_agent_wake, "agent_wake");
+ osd_plb.add_u64_counter(l_osd_agent_skip, "agent_skip");
+ osd_plb.add_u64_counter(l_osd_agent_flush, "agent_flush");
+--- a/src/osd/OSD.h
++++ b/src/osd/OSD.h
+@@ -132,8 +132,9 @@
+ l_osd_tier_evict,
+ l_osd_tier_whiteout,
+ l_osd_tier_dirty,
+ l_osd_tier_clean,
++ l_osd_tier_delay,
+
+ l_osd_agent_wake,
+ l_osd_agent_skip,
+ l_osd_agent_flush,
+@@ -465,8 +466,10 @@
+ return NULL;
+ }
+ } agent_thread;
+ bool agent_stop_flag;
++ Mutex agent_timer_lock;
++ SafeTimer agent_timer;
+
+ void agent_entry();
+ void agent_stop();
+
+--- a/src/osd/PG.h
++++ b/src/osd/PG.h
+@@ -2130,11 +2130,13 @@
+ virtual void on_shutdown() = 0;
+ virtual void check_blacklisted_watchers() = 0;
+ virtual void get_watchers(std::list<obj_watch_item_t>&) = 0;
+
+- virtual void agent_work(int max) = 0;
++ virtual bool agent_work(int max) = 0;
+ virtual void agent_stop() = 0;
++ virtual void agent_delay() = 0;
+ virtual void agent_clear() = 0;
++ virtual void agent_choose_mode_restart() = 0;
+ };
+
+ ostream& operator<<(ostream& out, const PG& pg);
+
+--- a/src/osd/ReplicatedPG.cc
++++ b/src/osd/ReplicatedPG.cc
+@@ -10965,8 +10965,9 @@
+ agent_state->position.pool = info.pgid.pool();
+ agent_state->position.hash = pool.info.get_random_pg_position(
+ info.pgid.pgid,
+ rand());
++ agent_state->start = agent_state->position;
+
+ dout(10) << __func__ << " allocated new state, position "
+ << agent_state->position << dendl;
+ } else {
+@@ -10985,23 +10986,24 @@
+ agent_stop();
+ agent_state.reset(NULL);
+ }
+
+-void ReplicatedPG::agent_work(int start_max)
++// Return false if no objects operated on since start of object hash space
++bool ReplicatedPG::agent_work(int start_max)
+ {
+ lock();
+ if (!agent_state) {
+ dout(10) << __func__ << " no agent state, stopping" << dendl;
+ unlock();
+- return;
++ return true;
+ }
+
+ assert(!deleting);
+
+ if (agent_state->is_idle()) {
+ dout(10) << __func__ << " idle, stopping" << dendl;
+ unlock();
+- return;
++ return true;
+ }
+
+ osd->logger->inc(l_osd_agent_wake);
+
+@@ -11100,15 +11102,44 @@
+ agent_state->atime_hist.decay();
+ agent_state->temp_hist.decay();
+ }
+
++ // Total objects operated on so far
++ int total_started = agent_state->started + started;
++ bool need_delay = false;
++
++ dout(20) << __func__ << " start pos " << agent_state->position
++ << " next start pos " << next
++ << " started " << total_started << dendl;
++
++ // See if we've made a full pass over the object hash space
++ // This might check at most ls_max objects a second time to notice that
++ // we've checked every objects at least once.
++ if (agent_state->position < agent_state->start && next >= agent_state->start) {
++ dout(20) << __func__ << " wrap around " << agent_state->start << dendl;
++ if (total_started == 0)
++ need_delay = true;
++ else
++ total_started = 0;
++ agent_state->start = next;
++ }
++ agent_state->started = total_started;
++
++ // See if we are starting from beginning
+ if (next.is_max())
+ agent_state->position = hobject_t();
+ else
+ agent_state->position = next;
+- dout(20) << __func__ << " final position " << agent_state->position << dendl;
++
++ if (need_delay) {
++ assert(agent_state->delaying == false);
++ agent_delay();
++ unlock();
++ return false;
++ }
+ agent_choose_mode();
+ unlock();
++ return true;
+ }
+
+ void ReplicatedPG::agent_load_hit_sets()
+ {
+@@ -11308,10 +11339,37 @@
+ osd->agent_disable_pg(this, agent_state->evict_effort);
+ }
+ }
+
+-void ReplicatedPG::agent_choose_mode()
++void ReplicatedPG::agent_delay()
+ {
++ dout(20) << __func__ << dendl;
++ if (agent_state && !agent_state->is_idle()) {
++ assert(agent_state->delaying == false);
++ agent_state->delaying = true;
++ osd->agent_disable_pg(this, agent_state->evict_effort);
++ }
++}
++
++void ReplicatedPG::agent_choose_mode_restart()
++{
++ dout(20) << __func__ << dendl;
++ lock();
++ if (agent_state && agent_state->delaying) {
++ agent_state->delaying = false;
++ agent_choose_mode(true);
++ }
++ unlock();
++}
++
++void ReplicatedPG::agent_choose_mode(bool restart)
++{
++ // Let delay play out
++ if (agent_state->delaying) {
++ dout(20) << __func__ << this << " delaying, ignored" << dendl;
++ return;
++ }
++
+ uint64_t divisor = pool.info.get_pg_num_divisor(info.pgid.pgid);
+
+ uint64_t num_user_objects = info.stats.stats.sum.num_objects;
+
+@@ -11383,9 +11441,9 @@
+ // flush mode
+ TierAgentState::flush_mode_t flush_mode = TierAgentState::FLUSH_MODE_IDLE;
+ uint64_t flush_target = pool.info.cache_target_dirty_ratio_micro;
+ uint64_t flush_slop = (float)flush_target * g_conf->osd_agent_slop;
+- if (agent_state->flush_mode == TierAgentState::FLUSH_MODE_IDLE)
++ if (restart || agent_state->flush_mode == TierAgentState::FLUSH_MODE_IDLE)
+ flush_target += flush_slop;
+ else
+ flush_target -= MIN(flush_target, flush_slop);
+
+@@ -11400,9 +11458,9 @@
+ TierAgentState::evict_mode_t evict_mode = TierAgentState::EVICT_MODE_IDLE;
+ unsigned evict_effort = 0;
+ uint64_t evict_target = pool.info.cache_target_full_ratio_micro;
+ uint64_t evict_slop = (float)evict_target * g_conf->osd_agent_slop;
+- if (agent_state->evict_mode == TierAgentState::EVICT_MODE_IDLE)
++ if (restart || agent_state->evict_mode == TierAgentState::EVICT_MODE_IDLE)
+ evict_target += evict_slop;
+ else
+ evict_target -= MIN(evict_target, evict_slop);
+
+@@ -11464,13 +11522,13 @@
+ // NOTE: we are using evict_effort as a proxy for *all* agent effort
+ // (including flush). This is probably fine (they should be
+ // correlated) but it is not precisely correct.
+ if (agent_state->is_idle()) {
+- if (!old_idle) {
++ if (!restart && !old_idle) {
+ osd->agent_disable_pg(this, old_effort);
+ }
+ } else {
+- if (old_idle) {
++ if (restart || old_idle) {
+ osd->agent_enable_pg(this, agent_state->evict_effort);
+ } else if (old_effort != agent_state->evict_effort) {
+ osd->agent_adjust_pg(this, old_effort, agent_state->evict_effort);
+ }
+--- a/src/osd/ReplicatedPG.h
++++ b/src/osd/ReplicatedPG.h
+@@ -808,9 +808,9 @@
+ friend class C_AgentFlushStartStop;
+ friend class C_HitSetFlushing;
+
+ void agent_setup(); ///< initialize agent state
+- void agent_work(int max); ///< entry point to do some agent work
++ bool agent_work(int max); ///< entry point to do some agent work
+ bool agent_maybe_flush(ObjectContextRef& obc); ///< maybe flush
+ bool agent_maybe_evict(ObjectContextRef& obc); ///< maybe evict
+
+ void agent_load_hit_sets(); ///< load HitSets, if needed
+@@ -824,13 +824,15 @@
+ int *atime, int *temperature);
+
+ /// stop the agent
+ void agent_stop();
++ void agent_delay();
+
+ /// clear agent state
+ void agent_clear();
+
+- void agent_choose_mode(); ///< choose (new) agent mode(s)
++ void agent_choose_mode(bool restart = false); ///< choose (new) agent mode(s)
++ void agent_choose_mode_restart();
+
+ /// true if we can send an ondisk/commit for v
+ bool already_complete(eversion_t v) {
+ for (xlist<RepGather*>::iterator i = repop_queue.begin();
+--- a/src/osd/TierAgentState.h
++++ b/src/osd/TierAgentState.h
+@@ -16,8 +16,12 @@
+
+ struct TierAgentState {
+ /// current position iterating across pool
+ hobject_t position;
++ /// Count of agent_work since "start" position of object hash space
++ int started;
++ hobject_t start;
++ bool delaying;
+
+ /// histogram of ages we've encountered
+ pow2_hist_t atime_hist;
+ pow2_hist_t temp_hist;
+@@ -65,19 +69,22 @@
+ /// distributed) that i should aim to evict.
+ unsigned evict_effort;
+
+ TierAgentState()
+- : hist_age(0),
++ : started(0),
++ delaying(false),
++ hist_age(0),
+ flush_mode(FLUSH_MODE_IDLE),
+ evict_mode(EVICT_MODE_IDLE),
+ evict_effort(0)
+ {}
+
+ /// false if we have any work to do
+ bool is_idle() const {
+ return
+- flush_mode == FLUSH_MODE_IDLE &&
+- evict_mode == EVICT_MODE_IDLE;
++ delaying ||
++ (flush_mode == FLUSH_MODE_IDLE &&
++ evict_mode == EVICT_MODE_IDLE);
+ }
+
+ /// add archived HitSet
+ void add_hit_set(time_t start, HitSetRef hs) {
diff --git a/debian/patches/8175.patch b/debian/patches/8175.patch
new file mode 100644
index 0000000..69790c9
--- /dev/null
+++ b/debian/patches/8175.patch
@@ -0,0 +1,66 @@
+From e7df73dd7aaf5a0b1171f73d6695d26cd25b7b35 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage at inktank.com>
+Date: Thu, 1 May 2014 16:53:17 -0700
+Subject: [PATCH] osd: Prevent divide by zero in agent_choose_mode()
+
+Fixes: #8175
+Backport: firefly
+
+Signed-off-by: David Zafman <david.zafman at inktank.com>
+Signed-off-by: Sage Weil <sage at inktank.com>
+(cherry picked from commit f47f867952e6b2a16a296c82bb9b585b21cde6c8)
+
+--- a/src/osd/ReplicatedPG.cc
++++ b/src/osd/ReplicatedPG.cc
+@@ -11369,8 +11369,9 @@
+ return;
+ }
+
+ uint64_t divisor = pool.info.get_pg_num_divisor(info.pgid.pgid);
++ assert(divisor > 0);
+
+ uint64_t num_user_objects = info.stats.stats.sum.num_objects;
+
+ // adjust (effective) user objects down based on the number
+@@ -11417,21 +11418,22 @@
+ uint64_t avg_size = info.stats.stats.sum.num_bytes /
+ info.stats.stats.sum.num_objects;
+ dirty_micro =
+ num_dirty * avg_size * 1000000 /
+- (pool.info.target_max_bytes / divisor);
++ MAX(pool.info.target_max_bytes / divisor, 1);
+ full_micro =
+ num_user_objects * avg_size * 1000000 /
+- (pool.info.target_max_bytes / divisor);
++ MAX(pool.info.target_max_bytes / divisor, 1);
+ }
+ if (pool.info.target_max_objects) {
+ uint64_t dirty_objects_micro =
+ num_dirty * 1000000 /
+- (pool.info.target_max_objects / divisor);
++ MAX(pool.info.target_max_objects / divisor, 1);
+ if (dirty_objects_micro > dirty_micro)
+ dirty_micro = dirty_objects_micro;
+ uint64_t full_objects_micro =
+- num_user_objects * 1000000 / (pool.info.target_max_objects / divisor);
++ num_user_objects * 1000000 /
++ MAX(pool.info.target_max_objects / divisor, 1);
+ if (full_objects_micro > full_micro)
+ full_micro = full_objects_micro;
+ }
+ dout(20) << __func__ << " dirty " << ((float)dirty_micro / 1000000.0)
+@@ -11473,9 +11475,13 @@
+ } else if (full_micro > evict_target) {
+ // set effort in [0..1] range based on where we are between
+ evict_mode = TierAgentState::EVICT_MODE_SOME;
+ uint64_t over = full_micro - evict_target;
+- uint64_t span = 1000000 - evict_target;
++ uint64_t span;
++ if (evict_target >= 1000000)
++ span = 1;
++ else
++ span = 1000000 - evict_target;
+ evict_effort = MAX(over * 1000000 / span,
+ (unsigned)(1000000.0 * g_conf->osd_agent_min_evict_effort));
+
+ // quantize effort to avoid too much reordering in the agent_queue.
diff --git a/debian/patches/8282.patch b/debian/patches/8282.patch
new file mode 100644
index 0000000..d0d3f3e
--- /dev/null
+++ b/debian/patches/8282.patch
@@ -0,0 +1,32 @@
+From 1b899148a729235ab2835d368077f18e62a36a93 Mon Sep 17 00:00:00 2001
+From: Haomai Wang <haomaiwang at gmail.com>
+Date: Sat, 3 May 2014 12:53:06 +0800
+Subject: [PATCH] Fix clone problem
+
+When clone happened, the origin header also will be updated in GenericObjectMap,
+so the new header wraper(StripObjectHeader) should be updated too.
+
+Fix #8282
+Signed-off-by: Haomai Wang <haomaiwang at gmail.com>
+(cherry picked from commit 3aee1e0ffe0583f74c02d9c9e86c7fb267f3515c)
+
+--- a/src/os/KeyValueStore.cc
++++ b/src/os/KeyValueStore.cc
+@@ -203,13 +203,16 @@
+ Header new_origin_header;
+
+ if (target_header)
+ *target_header = old_header;
++ if (origin_header)
++ *origin_header = old_header;
+
+ clone(old_header.header, cid, oid, t, &new_origin_header,
+ &target_header->header);
+
+- old_header.header = new_origin_header;
++ if(origin_header)
++ origin_header->header = new_origin_header;
+
+ if (target_header) {
+ target_header->oid = oid;
+ target_header->cid = cid;
diff --git a/debian/patches/8291.patch b/debian/patches/8291.patch
new file mode 100644
index 0000000..b50de53
--- /dev/null
+++ b/debian/patches/8291.patch
@@ -0,0 +1,132 @@
+From 09a1bc5a4601d356b9cc69be8541e6515d763861 Mon Sep 17 00:00:00 2001
+From: "Yan, Zheng" <zheng.z.yan at intel.com>
+Date: Fri, 11 Apr 2014 15:03:37 +0800
+Subject: [PATCH] client: add asok command to kick sessions that were remote
+ reset
+
+Fixes: #8021
+Signed-off-by: Yan, Zheng <zheng.z.yan at intel.com>
+
+--- a/src/client/Client.cc
++++ b/src/client/Client.cc
+@@ -119,8 +119,10 @@
+ else if (command == "mds_sessions")
+ m_client->dump_mds_sessions(f);
+ else if (command == "dump_cache")
+ m_client->dump_cache(f);
++ else if (command == "kick_stale_sessions")
++ m_client->_kick_stale_sessions();
+ else
+ assert(0 == "bad command registered");
+ m_client->client_lock.Unlock();
+ f->close_section();
+@@ -403,8 +405,16 @@
+ if (ret < 0) {
+ lderr(cct) << "error registering admin socket command: "
+ << cpp_strerror(-ret) << dendl;
+ }
++ ret = admin_socket->register_command("kick_stale_sessions",
++ "kick_stale_sessions",
++ &m_command_hook,
++ "kick sessions that were remote reset");
++ if (ret < 0) {
++ lderr(cct) << "error registering admin socket command: "
++ << cpp_strerror(-ret) << dendl;
++ }
+
+ client_lock.Lock();
+ initialized = true;
+ client_lock.Unlock();
+@@ -418,8 +428,9 @@
+ AdminSocket* admin_socket = cct->get_admin_socket();
+ admin_socket->unregister_command("mds_requests");
+ admin_socket->unregister_command("mds_sessions");
+ admin_socket->unregister_command("dump_cache");
++ admin_socket->unregister_command("kick_stale_sessions");
+
+ if (ino_invalidate_cb) {
+ ldout(cct, 10) << "shutdown stopping cache invalidator finisher" << dendl;
+ async_ino_invalidator.wait_for_empty();
+@@ -1537,9 +1548,10 @@
+ bool Client::have_open_session(int mds)
+ {
+ return
+ mds_sessions.count(mds) &&
+- mds_sessions[mds]->state == MetaSession::STATE_OPEN;
++ (mds_sessions[mds]->state == MetaSession::STATE_OPEN ||
++ mds_sessions[mds]->state == MetaSession::STATE_STALE);
+ }
+
+ MetaSession *Client::_get_mds_session(int mds, Connection *con)
+ {
+@@ -1648,8 +1660,21 @@
+
+ m->put();
+ }
+
++void Client::_kick_stale_sessions()
++{
++ ldout(cct, 1) << "kick_stale_sessions" << dendl;
++
++ for (map<int,MetaSession*>::iterator p = mds_sessions.begin();
++ p != mds_sessions.end(); ) {
++ MetaSession *s = p->second;
++ ++p;
++ if (s->state == MetaSession::STATE_STALE)
++ _closed_mds_session(s);
++ }
++}
++
+ void Client::send_request(MetaRequest *request, MetaSession *session)
+ {
+ // make the request
+ int mds = session->mds_num;
+@@ -8959,8 +8984,12 @@
+ }
+ break;
+
+ case MetaSession::STATE_OPEN:
++ ldout(cct, 1) << "reset from mds we were open; mark session as stale" << dendl;
++ s->state = MetaSession::STATE_STALE;
++ break;
++
+ case MetaSession::STATE_NEW:
+ case MetaSession::STATE_CLOSED:
+ default:
+ break;
+--- a/src/client/Client.h
++++ b/src/client/Client.h
+@@ -249,8 +249,9 @@
+ MetaSession *_get_or_open_mds_session(int mds);
+ MetaSession *_open_mds_session(int mds);
+ void _close_mds_session(MetaSession *s);
+ void _closed_mds_session(MetaSession *s);
++ void _kick_stale_sessions();
+ void handle_client_session(MClientSession *m);
+ void send_reconnect(MetaSession *s);
+ void resend_unsafe_requests(MetaSession *s);
+
+--- a/src/client/MetaSession.cc
++++ b/src/client/MetaSession.cc
+@@ -14,8 +14,9 @@
+ case STATE_OPENING: return "opening";
+ case STATE_OPEN: return "open";
+ case STATE_CLOSING: return "closing";
+ case STATE_CLOSED: return "closed";
++ case STATE_STALE: return "stale";
+ default: return "unknown";
+ }
+ }
+
+--- a/src/client/MetaSession.h
++++ b/src/client/MetaSession.h
+@@ -32,8 +32,9 @@
+ STATE_OPENING,
+ STATE_OPEN,
+ STATE_CLOSING,
+ STATE_CLOSED,
++ STATE_STALE,
+ } state;
+
+ list<Context*> waiting_for_open;
+
diff --git a/debian/patches/bp0001.patch b/debian/patches/bp0001.patch
new file mode 100644
index 0000000..cd8e595
--- /dev/null
+++ b/debian/patches/bp0001.patch
@@ -0,0 +1,28 @@
+From 6a55c3bc3caf46652e962fa9434900fb494d1e6c Mon Sep 17 00:00:00 2001
+From: David Zafman <david.zafman at inktank.com>
+Date: Thu, 1 May 2014 18:54:30 -0700
+Subject: [PATCH] osd/ReplicatedPG: agent_work() fix next if finished early due to start_max
+
+Backport: firefly
+
+Signed-off-by: David Zafman <david.zafman at inktank.com>
+(cherry picked from commit 9cf470cac8dd4d8f769e768f2de6b9eb67a3c3af)
+
+--- a/src/osd/ReplicatedPG.cc
++++ b/src/osd/ReplicatedPG.cc
+@@ -11091,10 +11091,14 @@
+ ++started;
+ if (agent_state->evict_mode != TierAgentState::EVICT_MODE_IDLE &&
+ agent_maybe_evict(obc))
+ ++started;
+- if (started >= start_max)
++ if (started >= start_max) {
++ // If finishing early, set "next" to the next object
++ if (++p != ls.end())
++ next = *p;
+ break;
++ }
+ }
+
+ if (++agent_state->hist_age > g_conf->osd_agent_hist_halflife) {
+ dout(20) << __func__ << " resetting atime and temp histograms" << dendl;
diff --git a/debian/patches/sample.ceph.conf.patch b/debian/patches/sample.ceph.conf.patch
new file mode 100644
index 0000000..88c085a
--- /dev/null
+++ b/debian/patches/sample.ceph.conf.patch
@@ -0,0 +1,365 @@
+Last-Update: 2014-05-07
+Forwarded: yes
+Description: sample.ceph.conf update:
+
+ * corrected URLs.
+ * added [client] section.
+ * more options and descriptions.
+ * filestore settings were moved under [osd].
+ * cephx settings to reflect lack of support for authentication in kernel RBD client.
+ * many minor corrections and updates.
+
+--- a/src/sample.ceph.conf
++++ b/src/sample.ceph.conf
+@@ -30,9 +30,9 @@
+ # $name ; Expands to $type.$id.
+ # ; Example: /var/run/ceph/$cluster-$name.asok
+
+ [global]
+-### http://ceph.com/docs/master/rados/configuration/general-config-ref/
++### http://ceph.com/docs/firefly/rados/configuration/general-config-ref/
+
+ ;fsid = {UUID} # use `uuidgen` to generate your own UUID
+ ;public network = 192.168.0.0/24
+ ;cluster network = 192.168.0.0/24
+@@ -50,10 +50,10 @@
+ # (Default: 0)
+ ;max open files = 131072
+
+
+-### http://ceph.com/docs/master/rados/operations/authentication
+-### http://ceph.com/docs/master/rados/configuration/auth-config-ref/
++### http://ceph.com/docs/firefly/rados/operations/authentication
++### http://ceph.com/docs/firefly/rados/configuration/auth-config-ref/
+
+ # If enabled, the Ceph Storage Cluster daemons (i.e., ceph-mon, ceph-osd,
+ # and ceph-mds) must authenticate with each other.
+ # Type: String (optional); Valid settings are "cephx" or "none".
+@@ -77,23 +77,27 @@
+ # the Ceph Client and the Ceph Storage Cluster, and between daemons
+ # comprising the Ceph Storage Cluster.
+ # Type: Boolean (optional)
+ # (Default: false)
+- cephx require signatures = true ; everywhere possible
++ ;cephx require signatures = true
++
++ # kernel RBD client do not support authentication yet:
++ cephx cluster require signatures = true
++ cephx service require signatures = false
+
+ # The path to the keyring file.
+ # Type: String (optional)
+ # Default: /etc/ceph/$cluster.$name.keyring,/etc/ceph/$cluster.keyring,/etc/ceph/keyring,/etc/ceph/keyring.bin
+ ;keyring = /etc/ceph/$cluster.$name.keyring
+
+
+-### http://ceph.com/docs/master/rados/configuration/pool-pg-config-ref/
++### http://ceph.com/docs/firefly/rados/configuration/pool-pg-config-ref/
+
+
+ ## Replication level, number of data copies.
+ # Type: 32-bit Integer
+ # (Default: 2)
+- ;osd pool default size = 2
++ ;osd pool default size = 3
+
+ ## Replication level in degraded state, less than 'osd pool default size' value.
+ # Sets the minimum number of written replicas for objects in the
+ # pool in order to acknowledge a write operation to the client. If
+@@ -101,9 +105,9 @@
+ # client. This setting ensures a minimum number of replicas when
+ # operating in degraded mode.
+ # Type: 32-bit Integer
+ # (Default: 0), which means no particular minimum. If 0, minimum is size - (size / 2).
+- ;osd pool default min size = 1
++ ;osd pool default min size = 2
+
+ ## Ensure you have a realistic number of placement groups. We recommend
+ ## approximately 100 per OSD. E.g., total number of OSDs multiplied by 100
+ ## divided by the number of replicas (i.e., osd pool default size). So for
+@@ -113,16 +117,16 @@
+ # Description: The default number of placement groups for a pool. The
+ # default value is the same as pg_num with mkpool.
+ # Type: 32-bit Integer
+ # (Default: 8)
+- ;osd pool default pg num = 100
++ ;osd pool default pg num = 128
+
+ # Description: The default number of placement groups for placement for a
+ # pool. The default value is the same as pgp_num with mkpool.
+ # PG and PGP should be equal (for now).
+ # Type: 32-bit Integer
+ # (Default: 8)
+- ;osd pool default pgp num = 100
++ ;osd pool default pgp num = 128
+
+ # The default CRUSH ruleset to use when creating a pool
+ # Type: 32-bit Integer
+ # (Default: 0)
+@@ -134,47 +138,38 @@
+ # (Default: 1) Typically a host containing one or more Ceph OSD Daemons.
+ ;osd crush chooseleaf type = 1
+
+
+-### http://ceph.com/docs/bobtail/rados/configuration/log-and-debug-ref/
++### http://ceph.com/docs/firefly/rados/troubleshooting/log-and-debug/
+
++ # The location of the logging file for your cluster.
++ # Type: String
++ # Required: No
+ # Default: /var/log/ceph/$cluster-$name.log
+ ;log file = /var/log/ceph/$cluster-$name.log
+
++ # Determines if logging messages should appear in syslog.
++ # Type: Boolean
++ # Required: No
++ # (Default: false)
+ ;log to syslog = true
+
+
+-### http://ceph.com/docs/master/rados/configuration/ms-ref/
++### http://ceph.com/docs/firefly/rados/configuration/ms-ref/
+
+ # Enable if you want your daemons to bind to IPv6 address instead of
+ # IPv4 ones. (Not required if you specify a daemon or cluster IP.)
+ # Type: Boolean
+ # (Default: false)
+ ;ms bind ipv6 = true
+
+-
+-### http://ceph.com/docs/master/rados/configuration/filestore-config-ref/
+-
+- # The maximum interval in seconds for synchronizing the filestore.
+- # Type: Double (optional)
+- # (Default: 5)
+- ;filestore max sync interval = 5
+-
+- # Use object map for XATTRS. Set to true for ext4 file systems only.
+- # Type: Boolean (optional)
+- # (Default: false)
+- ;filestore xattr use omap = true
+-
+-### http://ceph.com/docs/master/rados/configuration/journal-ref/
+-
+ ##################
+ ## Monitors
+ ## You need at least one. You need at least three if you want to
+ ## tolerate any node failures. Always create an odd number.
+ [mon]
+-### http://ceph.com/docs/argonaut/config-ref/mon-config/
+-### http://ceph.com/docs/master/rados/configuration/mon-config-ref/
+-### http://ceph.com/docs/dumpling/rados/configuration/mon-osd-interaction/
++### http://ceph.com/docs/firefly/rados/configuration/mon-config-ref/
++### http://ceph.com/docs/firefly/rados/configuration/mon-osd-interaction/
+
+ # The IDs of initial monitors in a cluster during startup.
+ # If specified, Ceph requires an odd number of monitors to form an
+ # initial quorum (e.g., 3).
+@@ -184,9 +179,9 @@
+
+ ;mon host = cephhost01,cephhost02
+ ;mon addr = 192.168.0.101,192.168.0.102
+
+- # The monitor’s data location
++ # The monitor's data location
+ # Default: /var/lib/ceph/mon/$cluster-$id
+ ;mon data = /var/lib/ceph/mon/$name
+
+ # The clock drift in seconds allowed between monitors.
+@@ -196,9 +191,9 @@
+
+ # Exponential backoff for clock drift warnings
+ # Type: Float
+ # (Default: 5)
+- ;mon clock drift warn backoff = 30 ; Tell the monitor to backoff from this warning for 30 seconds
++ ;mon clock drift warn backoff = 30 # Tell the monitor to backoff from this warning for 30 seconds
+
+ # The percentage of disk space used before an OSD is considered full.
+ # Type: Float
+ # (Default: .95)
+@@ -208,10 +203,15 @@
+ # Type: Float
+ # (Default: .85)
+ ;mon osd nearfull ratio = .85
+
++ # The number of seconds Ceph waits before marking a Ceph OSD
++ # Daemon "down" and "out" if it doesn't respond.
++ # Type: 32-bit Integer
++ # (Default: 300)
++ ;mon osd down out interval = 300
+
+-### http://ceph.com/docs/next/rados/troubleshooting/log-and-debug/
++### http://ceph.com/docs/firefly/rados/troubleshooting/log-and-debug/
+
+ # logging, for debugging monitor crashes, in order of
+ # their likelihood of being helpful :)
+ ;debug ms = 1
+@@ -238,18 +238,30 @@
+ # You must deploy at least one metadata server to use CephFS. There is
+ # experimental support for running multiple metadata servers. Do not run
+ # multiple metadata servers in production.
+ [mds]
+-### http://ceph.com/docs/argonaut/config-ref/mds-config/
+-### http://ceph.com/docs/master/cephfs/mds-config-ref/
++### http://ceph.com/docs/firefly/cephfs/mds-config-ref/
+
+ # where the mds keeps it's secret encryption keys
+ ;keyring = /var/lib/ceph/mds/$name/keyring
+
++ # Determines whether a 'ceph-mds' daemon should poll and
++ # replay the log of an active MDS (hot standby).
++ # Type: Boolean
++ # (Default: false)
++ ;mds standby replay = true
++
+ ; mds logging to debug issues.
+ ;debug ms = 1
+ ;debug mds = 20
++ ;debug journaler = 20
+
++ # The number of inodes to cache.
++ # Type: 32-bit Integer
++ # (Default: 100000)
++ ;mds cache size = 250000
++
++ ;mds mem max = 1048576 # KB
+
+ ;[mds.alpha]
+ ; host = alpha
+
+@@ -260,10 +272,9 @@
+ ## osd
+ # You need at least one. Two or more if you want data to be replicated.
+ # Define as many as you like.
+ [osd]
+-### http://ceph.com/docs/argonaut/config-ref/osd-config/
+-### http://ceph.com/docs/bobtail/rados/configuration/osd-config-ref/
++### http://ceph.com/docs/firefly/rados/configuration/osd-config-ref/
+
+ # The path to the OSDs data.
+ # You must create the directory when deploying Ceph.
+ # You should mount a drive for OSD data at this mount point.
+@@ -302,17 +313,22 @@
+ ## hundred MB should be enough; more if you have fast or many
+ ## disks. You can use a file under the osd data dir if need be
+ ## (e.g. /data/$name/journal), but it will be slower than a
+ ## separate disk or partition.
+- # The path to the OSD’s journal. This may be a path to a file or a block
++ # The path to the OSD's journal. This may be a path to a file or a block
+ # device (such as a partition of an SSD). If it is a file, you must
+ # create the directory to contain it.
+ # We recommend using a drive separate from the osd data drive.
+ # Type: String
+ # Default: /var/lib/ceph/osd/$cluster-$id/journal
+ ;osd journal = /var/lib/ceph/osd/$name/journal
+
+-### http://ceph.com/docs/master/rados/configuration/journal-ref/
++ # Check log files for corruption. Can be computationally expensive.
++ # Type: Boolean
++ # (Default: false)
++ ;osd check for log corruption = true
++
++### http://ceph.com/docs/firefly/rados/configuration/journal-ref/
+
+ # The size of the journal in megabytes. If this is 0,
+ # and the journal is a block device, the entire block device is used.
+ # Since v0.54, this is ignored if the journal is a block device,
+@@ -320,23 +336,18 @@
+ # Type: 32-bit Integer
+ # (Default: 5120)
+ # Recommended: Begin with 1GB. Should be at least twice the product
+ # of the expected speed multiplied by "filestore max sync interval".
+- ;osd journal size = 1000 ; journal size, in megabytes
++ ;osd journal size = 2048 ; journal size, in megabytes
+
+ ## If you want to run the journal on a tmpfs, disable DirectIO
+ # Enables direct i/o to the journal.
+- # Requires journal block align set to true.
++ # Requires "journal block align" set to "true".
+ # Type: Boolean
+ # Required: Yes when using aio.
+ # (Default: true)
+ ;journal dio = false
+
+- # Check log files for corruption. Can be computationally expensive.
+- # Type: Boolean
+- # (Default: false)
+- ;osd check for log corruption = true
+-
+ # osd logging to debug osd issues, in order of likelihood of being helpful
+ ;debug ms = 1
+ ;debug osd = 20
+ ;debug filestore = 20
+@@ -361,4 +372,72 @@
+
+ ;[osd.3]
+ ; host = eta
+ ; devs = /dev/sdy
++
++### http://ceph.com/docs/firefly/rados/configuration/filestore-config-ref/
++
++ # The maximum interval in seconds for synchronizing the filestore.
++ # Type: Double (optional)
++ # (Default: 5)
++ ;filestore max sync interval = 5
++
++ # Enable snapshots for a btrfs filestore.
++ # Type: Boolean
++ # Required: No. Only used for btrfs.
++ # (Default: true)
++ ;filestore btrfs snap = false
++
++ # Enables the filestore flusher.
++ # Type: Boolean
++ # Required: No
++ # (Default: false)
++ ;filestore flusher = true
++
++##################
++## client settings
++[client]
++
++### http://ceph.com/docs/firefly/rbd/rbd-config-ref/
++
++ # Enable caching for RADOS Block Device (RBD).
++ # Type: Boolean
++ # Required: No
++ # (Default: false)
++ rbd cache = true
++
++ # The RBD cache size in bytes.
++ # Type: 64-bit Integer
++ # Required: No
++ # (Default: 32 MiB)
++ ;rbd cache size = 33554432
++
++ # The dirty limit in bytes at which the cache triggers write-back.
++ # If 0, uses write-through caching.
++ # Type: 64-bit Integer
++ # Required: No
++ # Constraint: Must be less than rbd cache size.
++ # (Default: 24 MiB)
++ ;rbd cache max dirty = 25165824
++
++ # The dirty target before the cache begins writing data to the data storage.
++ # Does not block writes to the cache.
++ # Type: 64-bit Integer
++ # Required: No
++ # Constraint: Must be less than rbd cache max dirty.
++ # (Default: 16 MiB)
++ ;rbd cache target dirty = 16777216
++
++ # The number of seconds dirty data is in the cache before writeback starts.
++ # Type: Float
++ # Required: No
++ # (Default: 1.0)
++ ;rbd cache max dirty age = 1.0
++
++ # Start out in write-through mode, and switch to write-back after the
++ # first flush request is received. Enabling this is a conservative but
++ # safe setting in case VMs running on rbd are too old to send flushes,
++ # like the virtio driver in Linux before 2.6.32.
++ # Type: Boolean
++ # Required: No
++ # (Default: false)
++ ;rbd cache writethrough until flush = false
diff --git a/debian/patches/series b/debian/patches/series
index bd3a340..57e993a 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,4 +1,13 @@
+## Backported
+8113.patch
+8175.patch
+8282.patch
+8291.patch
+bp0001.patch
+
+## Debian
arch.patch
gcj.patch
modules.patch
+sample.ceph.conf.patch
virtualenv-never-download.patch
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ceph/ceph.git
More information about the Pkg-ceph-commits
mailing list