[Pkg-ceph-commits] [ceph] 05/06: added bunch of backported patches

Dmitry Smirnov onlyjob at moszumanska.debian.org
Wed May 7 08:33:07 UTC 2014


This is an automated email from the git hooks/post-receive script.

onlyjob pushed a commit to branch master
in repository ceph.

commit a7b1d1e
Author: Dmitry Smirnov <onlyjob at member.fsf.org>
Date:   Wed May 7 06:40:22 2014

    added bunch of backported patches
---
 debian/patches/8113.patch             | 389 ++++++++++++++++++++++++++++++++++
 debian/patches/8175.patch             |  66 ++++++
 debian/patches/8282.patch             |  32 +++
 debian/patches/8291.patch             | 132 ++++++++++++
 debian/patches/bp0001.patch           |  28 +++
 debian/patches/sample.ceph.conf.patch | 365 +++++++++++++++++++++++++++++++
 debian/patches/series                 |   9 +
 7 files changed, 1021 insertions(+)

diff --git a/debian/patches/8113.patch b/debian/patches/8113.patch
new file mode 100644
index 0000000..3a712c2
--- /dev/null
+++ b/debian/patches/8113.patch
@@ -0,0 +1,389 @@
+From 022d467b5d6b77c17b6fdaeec8369cae61e9e5a4 Mon Sep 17 00:00:00 2001
+From: David Zafman <david.zafman at inktank.com>
+Date: Mon, 21 Apr 2014 23:52:04 -0700
+Subject: [PATCH] osd, common: If agent_work() finds no objs to work on delay 5
+ (default) secs
+
+Add config osd_agent_delay_time of 5 seconds
+Honor delay by ignoring agent_choose_mode() calls
+Add tier_delay to logger
+Treat restart after delay like we were previously idle
+
+Fixes: #8113
+Backport: firefly
+
+Signed-off-by: David Zafman <david.zafman at inktank.com>
+(cherry picked from commit b7d31e5f5952c631dd4172bcb825e77a13fc60bc)
+
+--- a/src/common/config_opts.h
++++ b/src/common/config_opts.h
+@@ -398,8 +398,9 @@
+ // max agent flush ops
+ OPTION(osd_agent_max_ops, OPT_INT, 4)
+ OPTION(osd_agent_min_evict_effort, OPT_FLOAT, .1)
+ OPTION(osd_agent_quantize_effort, OPT_FLOAT, .1)
++OPTION(osd_agent_delay_time, OPT_FLOAT, 5.0)
+ 
+ // decay atime and hist histograms after how many objects go by
+ OPTION(osd_agent_hist_halflife, OPT_INT, 1000)
+ 
+--- a/src/osd/OSD.cc
++++ b/src/osd/OSD.cc
+@@ -200,8 +200,10 @@
+   agent_ops(0),
+   agent_active(true),
+   agent_thread(this),
+   agent_stop_flag(false),
++  agent_timer_lock("OSD::agent_timer_lock"),
++  agent_timer(osd->client_messenger->cct, agent_timer_lock),
+   objecter_lock("OSD::objecter_lock"),
+   objecter_timer(osd->client_messenger->cct, objecter_lock),
+   objecter(new Objecter(osd->client_messenger->cct, osd->objecter_messenger, osd->monc, &objecter_osdmap,
+ 			objecter_lock, objecter_timer, 0, 0)),
+@@ -434,8 +436,12 @@
+   {
+     Mutex::Locker l(backfill_request_lock);
+     backfill_request_timer.shutdown();
+   }
++  {
++    Mutex::Locker l(agent_timer_lock);
++    agent_timer.shutdown();
++  }
+   osdmap = OSDMapRef();
+   next_osdmap = OSDMapRef();
+ }
+ 
+@@ -450,8 +456,9 @@
+     objecter->set_client_incarnation(0);
+     objecter->init_locked();
+   }
+   watch_timer.init();
++  agent_timer.init();
+ 
+   agent_thread.create();
+ }
+ 
+@@ -465,8 +472,17 @@
+   agent_cond.Signal();
+   agent_lock.Unlock();
+ }
+ 
++class AgentTimeoutCB : public Context {
++  PGRef pg;
++public:
++  AgentTimeoutCB(PGRef _pg) : pg(_pg) {}
++  void finish(int) {
++    pg->agent_choose_mode_restart();
++  }
++};
++
+ void OSDService::agent_entry()
+ {
+   dout(10) << __func__ << " start" << dendl;
+   agent_lock.Lock();
+@@ -500,9 +516,20 @@
+     }
+     PGRef pg = *agent_queue_pos;
+     int max = g_conf->osd_agent_max_ops - agent_ops;
+     agent_lock.Unlock();
+-    pg->agent_work(max);
++    if (!pg->agent_work(max)) {
++      dout(10) << __func__ << " " << *pg
++	<< " no agent_work, delay for " << g_conf->osd_agent_delay_time
++	<< " seconds" << dendl;
++
++      osd->logger->inc(l_osd_tier_delay);
++      // Queue a timer to call agent_choose_mode for this pg in 5 seconds
++      agent_timer_lock.Lock();
++      Context *cb = new AgentTimeoutCB(pg);
++      agent_timer.add_event_after(g_conf->osd_agent_delay_time, cb);
++      agent_timer_lock.Unlock();
++    }
+     agent_lock.Lock();
+   }
+   agent_lock.Unlock();
+   dout(10) << __func__ << " finish" << dendl;
+@@ -1477,8 +1504,9 @@
+   osd_plb.add_u64_counter(l_osd_tier_evict, "tier_evict");
+   osd_plb.add_u64_counter(l_osd_tier_whiteout, "tier_whiteout");
+   osd_plb.add_u64_counter(l_osd_tier_dirty, "tier_dirty");
+   osd_plb.add_u64_counter(l_osd_tier_clean, "tier_clean");
++  osd_plb.add_u64_counter(l_osd_tier_delay, "tier_delay");
+ 
+   osd_plb.add_u64_counter(l_osd_agent_wake, "agent_wake");
+   osd_plb.add_u64_counter(l_osd_agent_skip, "agent_skip");
+   osd_plb.add_u64_counter(l_osd_agent_flush, "agent_flush");
+--- a/src/osd/OSD.h
++++ b/src/osd/OSD.h
+@@ -132,8 +132,9 @@
+   l_osd_tier_evict,
+   l_osd_tier_whiteout,
+   l_osd_tier_dirty,
+   l_osd_tier_clean,
++  l_osd_tier_delay,
+ 
+   l_osd_agent_wake,
+   l_osd_agent_skip,
+   l_osd_agent_flush,
+@@ -465,8 +466,10 @@
+       return NULL;
+     }
+   } agent_thread;
+   bool agent_stop_flag;
++  Mutex agent_timer_lock;
++  SafeTimer agent_timer;
+ 
+   void agent_entry();
+   void agent_stop();
+ 
+--- a/src/osd/PG.h
++++ b/src/osd/PG.h
+@@ -2130,11 +2130,13 @@
+   virtual void on_shutdown() = 0;
+   virtual void check_blacklisted_watchers() = 0;
+   virtual void get_watchers(std::list<obj_watch_item_t>&) = 0;
+ 
+-  virtual void agent_work(int max) = 0;
++  virtual bool agent_work(int max) = 0;
+   virtual void agent_stop() = 0;
++  virtual void agent_delay() = 0;
+   virtual void agent_clear() = 0;
++  virtual void agent_choose_mode_restart() = 0;
+ };
+ 
+ ostream& operator<<(ostream& out, const PG& pg);
+ 
+--- a/src/osd/ReplicatedPG.cc
++++ b/src/osd/ReplicatedPG.cc
+@@ -10965,8 +10965,9 @@
+     agent_state->position.pool = info.pgid.pool();
+     agent_state->position.hash = pool.info.get_random_pg_position(
+       info.pgid.pgid,
+       rand());
++    agent_state->start = agent_state->position;
+ 
+     dout(10) << __func__ << " allocated new state, position "
+ 	     << agent_state->position << dendl;
+   } else {
+@@ -10985,23 +10986,24 @@
+   agent_stop();
+   agent_state.reset(NULL);
+ }
+ 
+-void ReplicatedPG::agent_work(int start_max)
++// Return false if no objects operated on since start of object hash space
++bool ReplicatedPG::agent_work(int start_max)
+ {
+   lock();
+   if (!agent_state) {
+     dout(10) << __func__ << " no agent state, stopping" << dendl;
+     unlock();
+-    return;
++    return true;
+   }
+ 
+   assert(!deleting);
+ 
+   if (agent_state->is_idle()) {
+     dout(10) << __func__ << " idle, stopping" << dendl;
+     unlock();
+-    return;
++    return true;
+   }
+ 
+   osd->logger->inc(l_osd_agent_wake);
+ 
+@@ -11100,15 +11102,44 @@
+     agent_state->atime_hist.decay();
+     agent_state->temp_hist.decay();
+   }
+ 
++  // Total objects operated on so far
++  int total_started = agent_state->started + started;
++  bool need_delay = false;
++
++  dout(20) << __func__ << " start pos " << agent_state->position
++    << " next start pos " << next
++    << " started " << total_started << dendl;
++
++  // See if we've made a full pass over the object hash space
++  // This might check at most ls_max objects a second time to notice that
++  // we've checked every objects at least once.
++  if (agent_state->position < agent_state->start && next >= agent_state->start) {
++    dout(20) << __func__ << " wrap around " << agent_state->start << dendl;
++    if (total_started == 0)
++      need_delay = true;
++    else
++      total_started = 0;
++    agent_state->start = next;
++  }
++  agent_state->started = total_started;
++
++  // See if we are starting from beginning
+   if (next.is_max())
+     agent_state->position = hobject_t();
+   else
+     agent_state->position = next;
+-  dout(20) << __func__ << " final position " << agent_state->position << dendl;
++
++  if (need_delay) {
++    assert(agent_state->delaying == false);
++    agent_delay();
++    unlock();
++    return false;
++  }
+   agent_choose_mode();
+   unlock();
++  return true;
+ }
+ 
+ void ReplicatedPG::agent_load_hit_sets()
+ {
+@@ -11308,10 +11339,37 @@
+     osd->agent_disable_pg(this, agent_state->evict_effort);
+   }
+ }
+ 
+-void ReplicatedPG::agent_choose_mode()
++void ReplicatedPG::agent_delay()
+ {
++  dout(20) << __func__ << dendl;
++  if (agent_state && !agent_state->is_idle()) {
++    assert(agent_state->delaying == false);
++    agent_state->delaying = true;
++    osd->agent_disable_pg(this, agent_state->evict_effort);
++  }
++}
++
++void ReplicatedPG::agent_choose_mode_restart()
++{
++  dout(20) << __func__ << dendl;
++  lock();
++  if (agent_state && agent_state->delaying) {
++    agent_state->delaying = false;
++    agent_choose_mode(true);
++  }
++  unlock();
++}
++
++void ReplicatedPG::agent_choose_mode(bool restart)
++{
++  // Let delay play out
++  if (agent_state->delaying) {
++    dout(20) << __func__ << this << " delaying, ignored" << dendl;
++    return;
++  }
++
+   uint64_t divisor = pool.info.get_pg_num_divisor(info.pgid.pgid);
+ 
+   uint64_t num_user_objects = info.stats.stats.sum.num_objects;
+ 
+@@ -11383,9 +11441,9 @@
+   // flush mode
+   TierAgentState::flush_mode_t flush_mode = TierAgentState::FLUSH_MODE_IDLE;
+   uint64_t flush_target = pool.info.cache_target_dirty_ratio_micro;
+   uint64_t flush_slop = (float)flush_target * g_conf->osd_agent_slop;
+-  if (agent_state->flush_mode == TierAgentState::FLUSH_MODE_IDLE)
++  if (restart || agent_state->flush_mode == TierAgentState::FLUSH_MODE_IDLE)
+     flush_target += flush_slop;
+   else
+     flush_target -= MIN(flush_target, flush_slop);
+ 
+@@ -11400,9 +11458,9 @@
+   TierAgentState::evict_mode_t evict_mode = TierAgentState::EVICT_MODE_IDLE;
+   unsigned evict_effort = 0;
+   uint64_t evict_target = pool.info.cache_target_full_ratio_micro;
+   uint64_t evict_slop = (float)evict_target * g_conf->osd_agent_slop;
+-  if (agent_state->evict_mode == TierAgentState::EVICT_MODE_IDLE)
++  if (restart || agent_state->evict_mode == TierAgentState::EVICT_MODE_IDLE)
+     evict_target += evict_slop;
+   else
+     evict_target -= MIN(evict_target, evict_slop);
+ 
+@@ -11464,13 +11522,13 @@
+   // NOTE: we are using evict_effort as a proxy for *all* agent effort
+   // (including flush).  This is probably fine (they should be
+   // correlated) but it is not precisely correct.
+   if (agent_state->is_idle()) {
+-    if (!old_idle) {
++    if (!restart && !old_idle) {
+       osd->agent_disable_pg(this, old_effort);
+     }
+   } else {
+-    if (old_idle) {
++    if (restart || old_idle) {
+       osd->agent_enable_pg(this, agent_state->evict_effort);
+     } else if (old_effort != agent_state->evict_effort) {
+       osd->agent_adjust_pg(this, old_effort, agent_state->evict_effort);
+     }
+--- a/src/osd/ReplicatedPG.h
++++ b/src/osd/ReplicatedPG.h
+@@ -808,9 +808,9 @@
+   friend class C_AgentFlushStartStop;
+   friend class C_HitSetFlushing;
+ 
+   void agent_setup();       ///< initialize agent state
+-  void agent_work(int max); ///< entry point to do some agent work
++  bool agent_work(int max); ///< entry point to do some agent work
+   bool agent_maybe_flush(ObjectContextRef& obc);  ///< maybe flush
+   bool agent_maybe_evict(ObjectContextRef& obc);  ///< maybe evict
+ 
+   void agent_load_hit_sets();  ///< load HitSets, if needed
+@@ -824,13 +824,15 @@
+ 				 int *atime, int *temperature);
+ 
+   /// stop the agent
+   void agent_stop();
++  void agent_delay();
+ 
+   /// clear agent state
+   void agent_clear();
+ 
+-  void agent_choose_mode();  ///< choose (new) agent mode(s)
++  void agent_choose_mode(bool restart = false);  ///< choose (new) agent mode(s)
++  void agent_choose_mode_restart();
+ 
+   /// true if we can send an ondisk/commit for v
+   bool already_complete(eversion_t v) {
+     for (xlist<RepGather*>::iterator i = repop_queue.begin();
+--- a/src/osd/TierAgentState.h
++++ b/src/osd/TierAgentState.h
+@@ -16,8 +16,12 @@
+ 
+ struct TierAgentState {
+   /// current position iterating across pool
+   hobject_t position;
++  /// Count of agent_work since "start" position of object hash space
++  int started;
++  hobject_t start;
++  bool delaying;
+ 
+   /// histogram of ages we've encountered
+   pow2_hist_t atime_hist;
+   pow2_hist_t temp_hist;
+@@ -65,19 +69,22 @@
+   /// distributed) that i should aim to evict.
+   unsigned evict_effort;
+ 
+   TierAgentState()
+-    : hist_age(0),
++    : started(0),
++      delaying(false),
++      hist_age(0),
+       flush_mode(FLUSH_MODE_IDLE),
+       evict_mode(EVICT_MODE_IDLE),
+       evict_effort(0)
+   {}
+ 
+   /// false if we have any work to do
+   bool is_idle() const {
+     return
+-      flush_mode == FLUSH_MODE_IDLE &&
+-      evict_mode == EVICT_MODE_IDLE;
++      delaying ||
++      (flush_mode == FLUSH_MODE_IDLE &&
++      evict_mode == EVICT_MODE_IDLE);
+   }
+ 
+   /// add archived HitSet
+   void add_hit_set(time_t start, HitSetRef hs) {
diff --git a/debian/patches/8175.patch b/debian/patches/8175.patch
new file mode 100644
index 0000000..69790c9
--- /dev/null
+++ b/debian/patches/8175.patch
@@ -0,0 +1,66 @@
+From e7df73dd7aaf5a0b1171f73d6695d26cd25b7b35 Mon Sep 17 00:00:00 2001
+From: Sage Weil <sage at inktank.com>
+Date: Thu, 1 May 2014 16:53:17 -0700
+Subject: [PATCH] osd: Prevent divide by zero in agent_choose_mode()
+
+Fixes: #8175
+Backport: firefly
+
+Signed-off-by: David Zafman <david.zafman at inktank.com>
+Signed-off-by: Sage Weil <sage at inktank.com>
+(cherry picked from commit f47f867952e6b2a16a296c82bb9b585b21cde6c8)
+
+--- a/src/osd/ReplicatedPG.cc
++++ b/src/osd/ReplicatedPG.cc
+@@ -11369,8 +11369,9 @@
+     return;
+   }
+ 
+   uint64_t divisor = pool.info.get_pg_num_divisor(info.pgid.pgid);
++  assert(divisor > 0);
+ 
+   uint64_t num_user_objects = info.stats.stats.sum.num_objects;
+ 
+   // adjust (effective) user objects down based on the number
+@@ -11417,21 +11418,22 @@
+     uint64_t avg_size = info.stats.stats.sum.num_bytes /
+       info.stats.stats.sum.num_objects;
+     dirty_micro =
+       num_dirty * avg_size * 1000000 /
+-      (pool.info.target_max_bytes / divisor);
++      MAX(pool.info.target_max_bytes / divisor, 1);
+     full_micro =
+       num_user_objects * avg_size * 1000000 /
+-      (pool.info.target_max_bytes / divisor);
++      MAX(pool.info.target_max_bytes / divisor, 1);
+   }
+   if (pool.info.target_max_objects) {
+     uint64_t dirty_objects_micro =
+       num_dirty * 1000000 /
+-      (pool.info.target_max_objects / divisor);
++      MAX(pool.info.target_max_objects / divisor, 1);
+     if (dirty_objects_micro > dirty_micro)
+       dirty_micro = dirty_objects_micro;
+     uint64_t full_objects_micro =
+-      num_user_objects * 1000000 / (pool.info.target_max_objects / divisor);
++      num_user_objects * 1000000 /
++      MAX(pool.info.target_max_objects / divisor, 1);
+     if (full_objects_micro > full_micro)
+       full_micro = full_objects_micro;
+   }
+   dout(20) << __func__ << " dirty " << ((float)dirty_micro / 1000000.0)
+@@ -11473,9 +11475,13 @@
+   } else if (full_micro > evict_target) {
+     // set effort in [0..1] range based on where we are between
+     evict_mode = TierAgentState::EVICT_MODE_SOME;
+     uint64_t over = full_micro - evict_target;
+-    uint64_t span = 1000000 - evict_target;
++    uint64_t span;
++    if (evict_target >= 1000000)
++      span = 1;
++    else
++      span = 1000000 - evict_target;
+     evict_effort = MAX(over * 1000000 / span,
+ 		       (unsigned)(1000000.0 * g_conf->osd_agent_min_evict_effort));
+ 
+     // quantize effort to avoid too much reordering in the agent_queue.
diff --git a/debian/patches/8282.patch b/debian/patches/8282.patch
new file mode 100644
index 0000000..d0d3f3e
--- /dev/null
+++ b/debian/patches/8282.patch
@@ -0,0 +1,32 @@
+From 1b899148a729235ab2835d368077f18e62a36a93 Mon Sep 17 00:00:00 2001
+From: Haomai Wang <haomaiwang at gmail.com>
+Date: Sat, 3 May 2014 12:53:06 +0800
+Subject: [PATCH] Fix clone problem
+
+When clone happened, the origin header also will be updated in GenericObjectMap,
+so the new header wraper(StripObjectHeader) should be updated too.
+
+Fix #8282
+Signed-off-by: Haomai Wang <haomaiwang at gmail.com>
+(cherry picked from commit 3aee1e0ffe0583f74c02d9c9e86c7fb267f3515c)
+
+--- a/src/os/KeyValueStore.cc
++++ b/src/os/KeyValueStore.cc
+@@ -203,13 +203,16 @@
+   Header new_origin_header;
+ 
+   if (target_header)
+     *target_header = old_header;
++  if (origin_header)
++    *origin_header = old_header;
+ 
+   clone(old_header.header, cid, oid, t, &new_origin_header,
+         &target_header->header);
+ 
+-  old_header.header = new_origin_header;
++  if(origin_header)
++    origin_header->header = new_origin_header;
+ 
+   if (target_header) {
+     target_header->oid = oid;
+     target_header->cid = cid;
diff --git a/debian/patches/8291.patch b/debian/patches/8291.patch
new file mode 100644
index 0000000..b50de53
--- /dev/null
+++ b/debian/patches/8291.patch
@@ -0,0 +1,132 @@
+From 09a1bc5a4601d356b9cc69be8541e6515d763861 Mon Sep 17 00:00:00 2001
+From: "Yan, Zheng" <zheng.z.yan at intel.com>
+Date: Fri, 11 Apr 2014 15:03:37 +0800
+Subject: [PATCH] client: add asok command to kick sessions that were remote
+ reset
+
+Fixes: #8021
+Signed-off-by: Yan, Zheng <zheng.z.yan at intel.com>
+
+--- a/src/client/Client.cc
++++ b/src/client/Client.cc
+@@ -119,8 +119,10 @@
+   else if (command == "mds_sessions")
+     m_client->dump_mds_sessions(f);
+   else if (command == "dump_cache")
+     m_client->dump_cache(f);
++  else if (command == "kick_stale_sessions")
++    m_client->_kick_stale_sessions();
+   else
+     assert(0 == "bad command registered");
+   m_client->client_lock.Unlock();
+   f->close_section();
+@@ -403,8 +405,16 @@
+   if (ret < 0) {
+     lderr(cct) << "error registering admin socket command: "
+ 	       << cpp_strerror(-ret) << dendl;
+   }
++  ret = admin_socket->register_command("kick_stale_sessions",
++				       "kick_stale_sessions",
++				       &m_command_hook,
++				       "kick sessions that were remote reset");
++  if (ret < 0) {
++    lderr(cct) << "error registering admin socket command: "
++	       << cpp_strerror(-ret) << dendl;
++  }
+ 
+   client_lock.Lock();
+   initialized = true;
+   client_lock.Unlock();
+@@ -418,8 +428,9 @@
+   AdminSocket* admin_socket = cct->get_admin_socket();
+   admin_socket->unregister_command("mds_requests");
+   admin_socket->unregister_command("mds_sessions");
+   admin_socket->unregister_command("dump_cache");
++  admin_socket->unregister_command("kick_stale_sessions");
+ 
+   if (ino_invalidate_cb) {
+     ldout(cct, 10) << "shutdown stopping cache invalidator finisher" << dendl;
+     async_ino_invalidator.wait_for_empty();
+@@ -1537,9 +1548,10 @@
+ bool Client::have_open_session(int mds)
+ {
+   return
+     mds_sessions.count(mds) &&
+-    mds_sessions[mds]->state == MetaSession::STATE_OPEN;
++    (mds_sessions[mds]->state == MetaSession::STATE_OPEN ||
++     mds_sessions[mds]->state == MetaSession::STATE_STALE);
+ }
+ 
+ MetaSession *Client::_get_mds_session(int mds, Connection *con)
+ {
+@@ -1648,8 +1660,21 @@
+ 
+   m->put();
+ }
+ 
++void Client::_kick_stale_sessions()
++{
++  ldout(cct, 1) << "kick_stale_sessions" << dendl;
++
++  for (map<int,MetaSession*>::iterator p = mds_sessions.begin();
++       p != mds_sessions.end(); ) {
++    MetaSession *s = p->second;
++    ++p;
++    if (s->state == MetaSession::STATE_STALE)
++      _closed_mds_session(s);
++  }
++}
++
+ void Client::send_request(MetaRequest *request, MetaSession *session)
+ {
+   // make the request
+   int mds = session->mds_num;
+@@ -8959,8 +8984,12 @@
+ 	  }
+ 	  break;
+ 
+ 	case MetaSession::STATE_OPEN:
++	  ldout(cct, 1) << "reset from mds we were open; mark session as stale" << dendl;
++	  s->state = MetaSession::STATE_STALE;
++	  break;
++
+ 	case MetaSession::STATE_NEW:
+ 	case MetaSession::STATE_CLOSED:
+ 	default:
+ 	  break;
+--- a/src/client/Client.h
++++ b/src/client/Client.h
+@@ -249,8 +249,9 @@
+   MetaSession *_get_or_open_mds_session(int mds);
+   MetaSession *_open_mds_session(int mds);
+   void _close_mds_session(MetaSession *s);
+   void _closed_mds_session(MetaSession *s);
++  void _kick_stale_sessions();
+   void handle_client_session(MClientSession *m);
+   void send_reconnect(MetaSession *s);
+   void resend_unsafe_requests(MetaSession *s);
+ 
+--- a/src/client/MetaSession.cc
++++ b/src/client/MetaSession.cc
+@@ -14,8 +14,9 @@
+   case STATE_OPENING: return "opening";
+   case STATE_OPEN: return "open";
+   case STATE_CLOSING: return "closing";
+   case STATE_CLOSED: return "closed";
++  case STATE_STALE: return "stale";
+   default: return "unknown";
+   }
+ }
+ 
+--- a/src/client/MetaSession.h
++++ b/src/client/MetaSession.h
+@@ -32,8 +32,9 @@
+     STATE_OPENING,
+     STATE_OPEN,
+     STATE_CLOSING,
+     STATE_CLOSED,
++    STATE_STALE,
+   } state;
+ 
+   list<Context*> waiting_for_open;
+ 
diff --git a/debian/patches/bp0001.patch b/debian/patches/bp0001.patch
new file mode 100644
index 0000000..cd8e595
--- /dev/null
+++ b/debian/patches/bp0001.patch
@@ -0,0 +1,28 @@
+From 6a55c3bc3caf46652e962fa9434900fb494d1e6c Mon Sep 17 00:00:00 2001
+From: David Zafman <david.zafman at inktank.com>
+Date: Thu, 1 May 2014 18:54:30 -0700
+Subject: [PATCH] osd/ReplicatedPG: agent_work() fix next if finished early due to start_max
+
+Backport: firefly
+
+Signed-off-by: David Zafman <david.zafman at inktank.com>
+(cherry picked from commit 9cf470cac8dd4d8f769e768f2de6b9eb67a3c3af)
+
+--- a/src/osd/ReplicatedPG.cc
++++ b/src/osd/ReplicatedPG.cc
+@@ -11091,10 +11091,14 @@
+       ++started;
+     if (agent_state->evict_mode != TierAgentState::EVICT_MODE_IDLE &&
+ 	agent_maybe_evict(obc))
+       ++started;
+-    if (started >= start_max)
++    if (started >= start_max) {
++      // If finishing early, set "next" to the next object
++      if (++p != ls.end())
++	next = *p;
+       break;
++    }
+   }
+ 
+   if (++agent_state->hist_age > g_conf->osd_agent_hist_halflife) {
+     dout(20) << __func__ << " resetting atime and temp histograms" << dendl;
diff --git a/debian/patches/sample.ceph.conf.patch b/debian/patches/sample.ceph.conf.patch
new file mode 100644
index 0000000..88c085a
--- /dev/null
+++ b/debian/patches/sample.ceph.conf.patch
@@ -0,0 +1,365 @@
+Last-Update: 2014-05-07
+Forwarded: yes
+Description: sample.ceph.conf update:
+
+ * corrected URLs.
+ * added [client] section.
+ * more options and descriptions.
+ * filestore settings were moved under [osd].
+ * cephx settings to reflect lack of support for authentication in kernel RBD client.
+ * many minor corrections and updates.
+
+--- a/src/sample.ceph.conf
++++ b/src/sample.ceph.conf
+@@ -30,9 +30,9 @@
+ # $name       ; Expands to $type.$id.
+ #             ; Example: /var/run/ceph/$cluster-$name.asok
+ 
+ [global]
+-### http://ceph.com/docs/master/rados/configuration/general-config-ref/
++### http://ceph.com/docs/firefly/rados/configuration/general-config-ref/
+ 
+     ;fsid                       = {UUID}    # use `uuidgen` to generate your own UUID
+     ;public network             = 192.168.0.0/24
+     ;cluster network            = 192.168.0.0/24
+@@ -50,10 +50,10 @@
+     # (Default: 0)
+     ;max open files             = 131072
+ 
+ 
+-### http://ceph.com/docs/master/rados/operations/authentication
+-### http://ceph.com/docs/master/rados/configuration/auth-config-ref/
++### http://ceph.com/docs/firefly/rados/operations/authentication
++### http://ceph.com/docs/firefly/rados/configuration/auth-config-ref/
+ 
+     # If enabled, the Ceph Storage Cluster daemons (i.e., ceph-mon, ceph-osd,
+     # and ceph-mds) must authenticate with each other.
+     # Type: String (optional); Valid settings are "cephx" or "none".
+@@ -77,23 +77,27 @@
+     # the Ceph Client and the Ceph Storage Cluster, and between daemons
+     # comprising the Ceph Storage Cluster.
+     # Type: Boolean (optional)
+     # (Default: false)
+-    cephx require signatures   = true    ; everywhere possible
++    ;cephx require signatures   = true
++
++    # kernel RBD client do not support authentication yet:
++    cephx cluster require signatures = true
++    cephx service require signatures = false
+ 
+     # The path to the keyring file.
+     # Type: String (optional)
+     # Default: /etc/ceph/$cluster.$name.keyring,/etc/ceph/$cluster.keyring,/etc/ceph/keyring,/etc/ceph/keyring.bin
+     ;keyring                  = /etc/ceph/$cluster.$name.keyring
+ 
+ 
+-### http://ceph.com/docs/master/rados/configuration/pool-pg-config-ref/
++### http://ceph.com/docs/firefly/rados/configuration/pool-pg-config-ref/
+ 
+ 
+     ## Replication level, number of data copies.
+     # Type: 32-bit Integer
+     # (Default: 2)
+-    ;osd pool default size      = 2
++    ;osd pool default size      = 3
+ 
+     ## Replication level in degraded state, less than 'osd pool default size' value.
+     # Sets the minimum number of written replicas for objects in the
+     # pool in order to acknowledge a write operation to the client. If
+@@ -101,9 +105,9 @@
+     # client. This setting ensures a minimum number of replicas when
+     # operating in degraded mode.
+     # Type: 32-bit Integer
+     # (Default: 0), which means no particular minimum. If 0, minimum is size - (size / 2).
+-    ;osd pool default min size  = 1
++    ;osd pool default min size  = 2
+ 
+     ## Ensure you have a realistic number of placement groups. We recommend
+     ## approximately 100 per OSD. E.g., total number of OSDs multiplied by 100
+     ## divided by the number of replicas (i.e., osd pool default size). So for
+@@ -113,16 +117,16 @@
+     # Description: The default number of placement groups for a pool. The
+     #              default value is the same as pg_num with mkpool.
+     # Type: 32-bit Integer
+     # (Default: 8)
+-    ;osd pool default pg num    = 100
++    ;osd pool default pg num    = 128
+ 
+     # Description: The default number of placement groups for placement for a
+     #              pool. The default value is the same as pgp_num with mkpool.
+     #              PG and PGP should be equal (for now).
+     # Type: 32-bit Integer
+     # (Default: 8)
+-    ;osd pool default pgp num   = 100
++    ;osd pool default pgp num   = 128
+ 
+     # The default CRUSH ruleset to use when creating a pool
+     # Type: 32-bit Integer
+     # (Default: 0)
+@@ -134,47 +138,38 @@
+     # (Default: 1) Typically a host containing one or more Ceph OSD Daemons.
+     ;osd crush chooseleaf type = 1
+ 
+ 
+-### http://ceph.com/docs/bobtail/rados/configuration/log-and-debug-ref/
++### http://ceph.com/docs/firefly/rados/troubleshooting/log-and-debug/
+ 
++    # The location of the logging file for your cluster.
++    # Type: String
++    # Required: No
+     # Default: /var/log/ceph/$cluster-$name.log
+     ;log file                   = /var/log/ceph/$cluster-$name.log
+ 
++    # Determines if logging messages should appear in syslog.
++    # Type: Boolean
++    # Required: No
++    # (Default: false)
+     ;log to syslog              = true
+ 
+ 
+-### http://ceph.com/docs/master/rados/configuration/ms-ref/
++### http://ceph.com/docs/firefly/rados/configuration/ms-ref/
+ 
+     # Enable if you want your daemons to bind to IPv6 address instead of
+     # IPv4 ones. (Not required if you specify a daemon or cluster IP.)
+     # Type: Boolean
+     # (Default: false)
+     ;ms bind ipv6               = true
+ 
+-
+-### http://ceph.com/docs/master/rados/configuration/filestore-config-ref/
+-
+-    # The maximum interval in seconds for synchronizing the filestore.
+-    # Type: Double (optional)
+-    # (Default: 5)
+-    ;filestore max sync interval = 5
+-
+-    # Use object map for XATTRS. Set to true for ext4 file systems only.
+-    # Type: Boolean (optional)
+-    # (Default: false)
+-    ;filestore xattr use omap    = true
+-
+-### http://ceph.com/docs/master/rados/configuration/journal-ref/
+-
+ ##################
+ ## Monitors
+ ## You need at least one. You need at least three if you want to
+ ## tolerate any node failures. Always create an odd number.
+ [mon]
+-### http://ceph.com/docs/argonaut/config-ref/mon-config/
+-### http://ceph.com/docs/master/rados/configuration/mon-config-ref/
+-### http://ceph.com/docs/dumpling/rados/configuration/mon-osd-interaction/
++### http://ceph.com/docs/firefly/rados/configuration/mon-config-ref/
++### http://ceph.com/docs/firefly/rados/configuration/mon-osd-interaction/
+ 
+     # The IDs of initial monitors in a cluster during startup.
+     # If specified, Ceph requires an odd number of monitors to form an
+     # initial quorum (e.g., 3).
+@@ -184,9 +179,9 @@
+ 
+     ;mon host                   = cephhost01,cephhost02
+     ;mon addr                   = 192.168.0.101,192.168.0.102
+ 
+-    # The monitor’s data location
++    # The monitor's data location
+     # Default: /var/lib/ceph/mon/$cluster-$id
+     ;mon data                   = /var/lib/ceph/mon/$name
+ 
+     # The clock drift in seconds allowed between monitors.
+@@ -196,9 +191,9 @@
+ 
+     # Exponential backoff for clock drift warnings
+     # Type: Float
+     # (Default: 5)
+-    ;mon clock drift warn backoff = 30    ; Tell the monitor to backoff from this warning for 30 seconds
++    ;mon clock drift warn backoff = 30    # Tell the monitor to backoff from this warning for 30 seconds
+ 
+     # The percentage of disk space used before an OSD is considered full.
+     # Type: Float
+     # (Default: .95)
+@@ -208,10 +203,15 @@
+     # Type: Float
+     # (Default: .85)
+     ;mon osd nearfull ratio     = .85
+ 
++    # The number of seconds Ceph waits before marking a Ceph OSD
++    # Daemon "down" and "out" if it doesn't respond.
++    # Type: 32-bit Integer
++    # (Default: 300)
++    ;mon osd down out interval  = 300
+ 
+-### http://ceph.com/docs/next/rados/troubleshooting/log-and-debug/
++### http://ceph.com/docs/firefly/rados/troubleshooting/log-and-debug/
+ 
+     # logging, for debugging monitor crashes, in order of
+     # their likelihood of being helpful :)
+     ;debug ms                   = 1
+@@ -238,18 +238,30 @@
+ # You must deploy at least one metadata server to use CephFS. There is
+ # experimental support for running multiple metadata servers. Do not run
+ # multiple metadata servers in production.
+ [mds]
+-### http://ceph.com/docs/argonaut/config-ref/mds-config/
+-### http://ceph.com/docs/master/cephfs/mds-config-ref/
++### http://ceph.com/docs/firefly/cephfs/mds-config-ref/
+ 
+     # where the mds keeps it's secret encryption keys
+     ;keyring                    = /var/lib/ceph/mds/$name/keyring
+ 
++    # Determines whether a 'ceph-mds' daemon should poll and
++    # replay the log of an active MDS (hot standby).
++    # Type:  Boolean
++    # (Default: false)
++    ;mds standby replay          = true
++
+     ; mds logging to debug issues.
+     ;debug ms                   = 1
+     ;debug mds                  = 20
++    ;debug journaler            = 20
+ 
++    # The number of inodes to cache.
++    # Type: 32-bit Integer
++    # (Default: 100000)
++    ;mds cache size             = 250000
++
++    ;mds mem max                = 1048576     # KB
+ 
+ ;[mds.alpha]
+ ;    host                       = alpha
+ 
+@@ -260,10 +272,9 @@
+ ## osd
+ # You need at least one.  Two or more if you want data to be replicated.
+ # Define as many as you like.
+ [osd]
+-### http://ceph.com/docs/argonaut/config-ref/osd-config/
+-### http://ceph.com/docs/bobtail/rados/configuration/osd-config-ref/
++### http://ceph.com/docs/firefly/rados/configuration/osd-config-ref/
+ 
+     # The path to the OSDs data.
+     # You must create the directory when deploying Ceph.
+     # You should mount a drive for OSD data at this mount point.
+@@ -302,17 +313,22 @@
+     ## hundred MB should be enough; more if you have fast or many
+     ## disks.  You can use a file under the osd data dir if need be
+     ## (e.g. /data/$name/journal), but it will be slower than a
+     ## separate disk or partition.
+-    # The path to the OSD’s journal. This may be a path to a file or a block
++    # The path to the OSD's journal. This may be a path to a file or a block
+     # device (such as a partition of an SSD). If it is a file, you must
+     # create the directory to contain it.
+     # We recommend using a drive separate from the osd data drive.
+     # Type: String
+     # Default: /var/lib/ceph/osd/$cluster-$id/journal
+     ;osd journal                  = /var/lib/ceph/osd/$name/journal
+ 
+-### http://ceph.com/docs/master/rados/configuration/journal-ref/
++    # Check log files for corruption. Can be computationally expensive.
++    # Type: Boolean
++    # (Default: false)
++    ;osd check for log corruption = true
++
++### http://ceph.com/docs/firefly/rados/configuration/journal-ref/
+ 
+     # The size of the journal in megabytes. If this is 0,
+     # and the journal is a block device, the entire block device is used.
+     # Since v0.54, this is ignored if the journal is a block device,
+@@ -320,23 +336,18 @@
+     # Type: 32-bit Integer
+     # (Default: 5120)
+     # Recommended: Begin with 1GB. Should be at least twice the product
+     # of the expected speed multiplied by "filestore max sync interval".
+-    ;osd journal size             = 1000     ; journal size, in megabytes
++    ;osd journal size             = 2048     ; journal size, in megabytes
+ 
+     ## If you want to run the journal on a tmpfs, disable DirectIO
+     # Enables direct i/o to the journal.
+-    # Requires journal block align set to true.
++    # Requires "journal block align" set to "true".
+     # Type: Boolean
+     # Required: Yes when using aio.
+     # (Default: true)
+     ;journal dio                  = false
+ 
+-    # Check log files for corruption. Can be computationally expensive.
+-    # Type: Boolean
+-    # (Default: false)
+-    ;osd check for log corruption = true
+-
+     # osd logging to debug osd issues, in order of likelihood of being helpful
+     ;debug ms                     = 1
+     ;debug osd                    = 20
+     ;debug filestore              = 20
+@@ -361,4 +372,72 @@
+ 
+ ;[osd.3]
+ ;    host                         = eta
+ ;    devs                         = /dev/sdy
++
++### http://ceph.com/docs/firefly/rados/configuration/filestore-config-ref/
++
++    # The maximum interval in seconds for synchronizing the filestore.
++    # Type: Double (optional)
++    # (Default: 5)
++    ;filestore max sync interval = 5
++
++    # Enable snapshots for a btrfs filestore.
++    # Type: Boolean
++    # Required: No. Only used for btrfs.
++    # (Default: true)
++    ;filestore btrfs snap        = false
++
++    # Enables the filestore flusher.
++    # Type: Boolean
++    # Required: No
++    # (Default: false)
++    ;filestore flusher            = true
++
++##################
++## client settings
++[client]
++
++### http://ceph.com/docs/firefly/rbd/rbd-config-ref/
++
++    # Enable caching for RADOS Block Device (RBD).
++    # Type: Boolean
++    # Required: No
++    # (Default: false)
++    rbd cache                  = true
++
++    # The RBD cache size in bytes.
++    # Type: 64-bit Integer
++    # Required: No
++    # (Default: 32 MiB)
++    ;rbd cache size            = 33554432
++
++    # The dirty limit in bytes at which the cache triggers write-back.
++    # If 0, uses write-through caching.
++    # Type: 64-bit Integer
++    # Required: No
++    # Constraint: Must be less than rbd cache size.
++    # (Default: 24 MiB)
++    ;rbd cache max dirty       = 25165824
++
++    # The dirty target before the cache begins writing data to the data storage.
++    # Does not block writes to the cache.
++    # Type: 64-bit Integer
++    # Required: No
++    # Constraint: Must be less than rbd cache max dirty.
++    # (Default: 16 MiB)
++    ;rbd cache target dirty    = 16777216
++
++    # The number of seconds dirty data is in the cache before writeback starts.
++    # Type: Float
++    # Required: No
++    # (Default: 1.0)
++    ;rbd cache max dirty age   = 1.0
++
++    # Start out in write-through mode, and switch to write-back after the
++    # first flush request is received. Enabling this is a conservative but
++    # safe setting in case VMs running on rbd are too old to send flushes,
++    # like the virtio driver in Linux before 2.6.32.
++    # Type: Boolean
++    # Required: No
++    # (Default: false)
++    ;rbd cache writethrough until flush = false
diff --git a/debian/patches/series b/debian/patches/series
index bd3a340..57e993a 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,4 +1,13 @@
+## Backported
+8113.patch
+8175.patch
+8282.patch
+8291.patch
+bp0001.patch
+
+## Debian
 arch.patch
 gcj.patch
 modules.patch
+sample.ceph.conf.patch
 virtualenv-never-download.patch

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ceph/ceph.git



More information about the Pkg-ceph-commits mailing list