summary refs log tree commit diff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-08-04 10:15:11 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-08-04 10:15:11 -0700
commitc63716ab4d77f3df7d12260fc62cbf847c2a85d1 (patch)
treee87e666aab9be7caa1fcfd41a8bca5607c3795a4 /net
parenta64c40e79fb20c15e42e184d7cde30f900d138eb (diff)
parentae78dd8139ce93a528beb7f3914531b7a7be9e30 (diff)
downloadlinux-c63716ab4d77f3df7d12260fc62cbf847c2a85d1.tar.gz
Merge tag 'ceph-for-4.13-rc4' of git://github.com/ceph/ceph-client
Pull ceph fixes from Ilya Dryomov:
 "A bunch of fixes and follow-ups for -rc1 Luminous patches: issues with
  ->reencode_message() and last minute RADOS semantic changes in
  v12.1.2"

* tag 'ceph-for-4.13-rc4' of git://github.com/ceph/ceph-client:
  libceph: make RECOVERY_DELETES feature create a new interval
  libceph: upmap semantic changes
  crush: assume weight_set != null imples weight_set_size > 0
  libceph: fallback for when there isn't a pool-specific choose_arg
  libceph: don't call ->reencode_message() more than once per message
  libceph: make encode_request_*() work with r_mempool requests
Diffstat (limited to 'net')
-rw-r--r--net/ceph/crush/mapper.c2
-rw-r--r--net/ceph/messenger.c6
-rw-r--r--net/ceph/osd_client.c14
-rw-r--r--net/ceph/osdmap.c60
4 files changed, 45 insertions, 37 deletions
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 746b145bfd11..417df675c71b 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -306,7 +306,7 @@ static __u32 *get_choose_arg_weights(const struct crush_bucket_straw2 *bucket,
 				     const struct crush_choose_arg *arg,
 				     int position)
 {
-	if (!arg || !arg->weight_set || arg->weight_set_size == 0)
+	if (!arg || !arg->weight_set)
 		return bucket->item_weights;
 
 	if (position >= arg->weight_set_size)
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index b7cc615d42ef..a67298c7e0cd 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1287,10 +1287,10 @@ static void prepare_write_message(struct ceph_connection *con)
 	if (m->needs_out_seq) {
 		m->hdr.seq = cpu_to_le64(++con->out_seq);
 		m->needs_out_seq = false;
-	}
 
-	if (con->ops->reencode_message)
-		con->ops->reencode_message(m);
+		if (con->ops->reencode_message)
+			con->ops->reencode_message(m);
+	}
 
 	dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
 	     m, con->out_seq, le16_to_cpu(m->hdr.type),
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 901bb8221366..dcfbdd74dfd1 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1337,6 +1337,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 	bool legacy_change;
 	bool split = false;
 	bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE);
+	bool recovery_deletes = ceph_osdmap_flag(osdc,
+						 CEPH_OSDMAP_RECOVERY_DELETES);
 	enum calc_target_result ct_res;
 	int ret;
 
@@ -1399,6 +1401,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 				 pi->pg_num,
 				 t->sort_bitwise,
 				 sort_bitwise,
+				 t->recovery_deletes,
+				 recovery_deletes,
 				 &last_pgid))
 		force_resend = true;
 
@@ -1421,6 +1425,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 		t->pg_num = pi->pg_num;
 		t->pg_num_mask = pi->pg_num_mask;
 		t->sort_bitwise = sort_bitwise;
+		t->recovery_deletes = recovery_deletes;
 
 		t->osd = acting.primary;
 	}
@@ -1918,10 +1923,12 @@ static void encode_request_partial(struct ceph_osd_request *req,
 	}
 
 	ceph_encode_32(&p, req->r_attempts); /* retry_attempt */
-	BUG_ON(p != end - 8); /* space for features */
+	BUG_ON(p > end - 8); /* space for features */
 
 	msg->hdr.version = cpu_to_le16(8); /* MOSDOp v8 */
 	/* front_len is finalized in encode_request_finish() */
+	msg->front.iov_len = p - msg->front.iov_base;
+	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
 	msg->hdr.data_len = cpu_to_le32(data_len);
 	/*
 	 * The header "data_off" is a hint to the receiver allowing it
@@ -1937,11 +1944,12 @@ static void encode_request_partial(struct ceph_osd_request *req,
 static void encode_request_finish(struct ceph_msg *msg)
 {
 	void *p = msg->front.iov_base;
+	void *const partial_end = p + msg->front.iov_len;
 	void *const end = p + msg->front_alloc_len;
 
 	if (CEPH_HAVE_FEATURE(msg->con->peer_features, RESEND_ON_SPLIT)) {
 		/* luminous OSD -- encode features and be done */
-		p = end - 8;
+		p = partial_end;
 		ceph_encode_64(&p, msg->con->peer_features);
 	} else {
 		struct {
@@ -1984,7 +1992,7 @@ static void encode_request_finish(struct ceph_msg *msg)
 		oid_len = p - oid;
 
 		tail = p;
-		tail_len = (end - p) - 8;
+		tail_len = partial_end - p;
 
 		p = msg->front.iov_base;
 		ceph_encode_copy(&p, &head.client_inc, sizeof(head.client_inc));
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 64ae9f89773a..f358d0bfa76b 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -295,6 +295,10 @@ static int decode_choose_args(void **p, void *end, struct crush_map *c)
 			ret = decode_choose_arg(p, end, arg);
 			if (ret)
 				goto fail;
+
+			if (arg->ids_size &&
+			    arg->ids_size != c->buckets[bucket_index]->size)
+				goto e_inval;
 		}
 
 		insert_choose_arg_map(&c->choose_args, arg_map);
@@ -2078,6 +2082,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
 			  u32 new_pg_num,
 			  bool old_sort_bitwise,
 			  bool new_sort_bitwise,
+			  bool old_recovery_deletes,
+			  bool new_recovery_deletes,
 			  const struct ceph_pg *pgid)
 {
 	return !osds_equal(old_acting, new_acting) ||
@@ -2085,7 +2091,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
 	       old_size != new_size ||
 	       old_min_size != new_min_size ||
 	       ceph_pg_is_split(pgid, old_pg_num, new_pg_num) ||
-	       old_sort_bitwise != new_sort_bitwise;
+	       old_sort_bitwise != new_sort_bitwise ||
+	       old_recovery_deletes != new_recovery_deletes;
 }
 
 static int calc_pg_rank(int osd, const struct ceph_osds *acting)
@@ -2301,10 +2308,17 @@ static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi,
 	}
 }
 
+/*
+ * Magic value used for a "default" fallback choose_args, used if the
+ * crush_choose_arg_map passed to do_crush() does not exist.  If this
+ * also doesn't exist, fall back to canonical weights.
+ */
+#define CEPH_DEFAULT_CHOOSE_ARGS	-1
+
 static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
 		    int *result, int result_max,
 		    const __u32 *weight, int weight_max,
-		    u64 choose_args_index)
+		    s64 choose_args_index)
 {
 	struct crush_choose_arg_map *arg_map;
 	int r;
@@ -2313,6 +2327,9 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
 
 	arg_map = lookup_choose_arg_map(&map->crush->choose_args,
 					choose_args_index);
+	if (!arg_map)
+		arg_map = lookup_choose_arg_map(&map->crush->choose_args,
+						CEPH_DEFAULT_CHOOSE_ARGS);
 
 	mutex_lock(&map->crush_workspace_mutex);
 	r = crush_do_rule(map->crush, ruleno, x, result, result_max,
@@ -2423,40 +2440,23 @@ static void apply_upmap(struct ceph_osdmap *osdmap,
 		for (i = 0; i < pg->pg_upmap.len; i++)
 			raw->osds[i] = pg->pg_upmap.osds[i];
 		raw->size = pg->pg_upmap.len;
-		return;
+		/* check and apply pg_upmap_items, if any */
 	}
 
 	pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid);
 	if (pg) {
-		/*
-		 * Note: this approach does not allow a bidirectional swap,
-		 * e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1].
-		 */
-		for (i = 0; i < pg->pg_upmap_items.len; i++) {
-			int from = pg->pg_upmap_items.from_to[i][0];
-			int to = pg->pg_upmap_items.from_to[i][1];
-			int pos = -1;
-			bool exists = false;
-
-			/* make sure replacement doesn't already appear */
-			for (j = 0; j < raw->size; j++) {
-				int osd = raw->osds[j];
-
-				if (osd == to) {
-					exists = true;
+		for (i = 0; i < raw->size; i++) {
+			for (j = 0; j < pg->pg_upmap_items.len; j++) {
+				int from = pg->pg_upmap_items.from_to[j][0];
+				int to = pg->pg_upmap_items.from_to[j][1];
+
+				if (from == raw->osds[i]) {
+					if (!(to != CRUSH_ITEM_NONE &&
+					      to < osdmap->max_osd &&
+					      osdmap->osd_weight[to] == 0))
+						raw->osds[i] = to;
 					break;
 				}
-				/* ignore mapping if target is marked out */
-				if (osd == from && pos < 0 &&
-				    !(to != CRUSH_ITEM_NONE &&
-				      to < osdmap->max_osd &&
-				      osdmap->osd_weight[to] == 0)) {
-					pos = j;
-				}
-			}
-			if (!exists && pos >= 0) {
-				raw->osds[pos] = to;
-				return;
 			}
 		}
 	}