[Pkg-ofed-commits] [rds-tools] 01/11: Imported Upstream version 2.0.4
Ana Beatriz Guerrero López
ana at moszumanska.debian.org
Fri Jul 18 13:53:27 UTC 2014
This is an automated email from the git hooks/post-receive script.
ana pushed a commit to branch master
in repository rds-tools.
commit a543ed962ec24065a71afd2804f17dc3610f9544
Author: Ana Guerrero López <ana at ekaia.org>
Date: Fri Jul 18 13:47:36 2014 +0200
Imported Upstream version 2.0.4
---
Makefile | 22 +-
Makefile.in | 16 +-
configure | 2 +-
configure.in | 2 +-
examples/Makefile | 2 +-
examples/rds-sample.c | 316 +++++++++++++++++----------
net/ib_rds.h => include/rds.h | 67 +++---
net/rds.h | 50 -----
options.c | 481 ------------------------------------------
pfhack.c | 24 +--
pfhack.h | 12 +-
rds-gen.1 | 89 --------
rds-gen.c | 322 ----------------------------
rds-info.c | 68 +++---
rds-ping.c | 19 +-
rds-rdma.7 | 2 +-
rds-sink.1 | 1 -
rds-sink.c | 250 ----------------------
rds-stress.c | 219 ++++++++++++-------
rds-tools.spec | 25 ++-
rds-tools.spec.in | 23 +-
rds-tools.txt | 39 ----
rdstool.h | 112 ----------
stap/README | 15 --
stap/rds.stp | 35 ---
stats.c | 227 --------------------
26 files changed, 508 insertions(+), 1932 deletions(-)
diff --git a/Makefile b/Makefile
index f52710e..98b048f 100644
--- a/Makefile
+++ b/Makefile
@@ -1,16 +1,16 @@
-prefix = $(DESTDIR)/usr
+prefix = $(DESTDIR)/usr/local
exec_prefix = $(DESTDIR)${prefix}
bindir = $(DESTDIR)${exec_prefix}/bin
-mandir = $(DESTDIR)${prefix}/share/man
+mandir = $(DESTDIR)${prefix}/man
incdir = $(DESTDIR)${prefix}/include
all: all-programs
-CFLAGS = -O2 -Wall
-CPPFLAGS = -DDEBUG_EXE -MD -MP -MF $(@D)/.$(basename $(@F)).d
+CFLAGS = -O2 -Wall -Iinclude
+CPPFLAGS = -DDEBUG_EXE -DRDS_VERSION=\"2.0.4\" -MD -MP -MF $(@D)/.$(basename $(@F)).d
-HEADERS = kernel-list.h rdstool.h pfhack.h net/rds.h net/ib_rds.h
-COMMON_SOURCES = options.c stats.c pfhack.c
+HEADERS = kernel-list.h pfhack.h include/rds.h
+COMMON_SOURCES = pfhack.c
SOURCES = $(addsuffix .c,$(PROGRAMS)) $(COMMON_SOURCES)
CLEAN_OBJECTS = $(addsuffix .o,$(PROGRAMS)) $(subst .c,.o,$(COMMON_SOURCES))
@@ -24,7 +24,7 @@ else
COMMON_OBJECTS = $(subst .c,.o,$(filter-out pfhack.c,$(COMMON_SOURCES)))
endif
-PROGRAMS = rds-gen rds-sink rds-info rds-stress rds-ping
+PROGRAMS = rds-info rds-stress rds-ping
all-programs: $(PROGRAMS)
@@ -36,7 +36,7 @@ install: $(PROGRAMS)
install -m 644 *.1 $(mandir)/man1
install -m 644 *.7 $(mandir)/man7
install -d $(incdir)/net
- install -m 444 net/rds.h $(incdir)/net
+ install -m 444 include/rds.h $(incdir)/net
clean:
rm -f $(PROGRAMS) $(CLEAN_OBJECTS)
@@ -55,15 +55,13 @@ ifneq ($(LOCAL_DFILES),)
-include $(LOCAL_DFILES)
endif
-VERSION := 1.4
+VERSION := 2.0.4
RELEASE := 1
-TAR_PREFIX := rds-tools-$(VERSION)-$(RELEASE)
+TAR_PREFIX := rds-tools-$(VERSION)
TAR_FILE := $(TAR_PREFIX).tar.gz
EXTRA_DIST := rds-info.1 \
- rds-gen.1 \
- rds-sink.1 \
rds-stress.1 \
rds-ping.1 \
rds.7 \
diff --git a/Makefile.in b/Makefile.in
index 088ee69..3d96b15 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -6,11 +6,11 @@ incdir = $(DESTDIR)@includedir@
all: all-programs
-CFLAGS = -O2 -Wall
-CPPFLAGS = -DDEBUG_EXE -MD -MP -MF $(@D)/.$(basename $(@F)).d
+CFLAGS = -O2 -Wall -Iinclude
+CPPFLAGS = -DDEBUG_EXE -DRDS_VERSION=\"@VERSION@\" -MD -MP -MF $(@D)/.$(basename $(@F)).d
-HEADERS = kernel-list.h rdstool.h pfhack.h net/rds.h net/ib_rds.h
-COMMON_SOURCES = options.c stats.c pfhack.c
+HEADERS = kernel-list.h pfhack.h include/rds.h
+COMMON_SOURCES = pfhack.c
SOURCES = $(addsuffix .c,$(PROGRAMS)) $(COMMON_SOURCES)
CLEAN_OBJECTS = $(addsuffix .o,$(PROGRAMS)) $(subst .c,.o,$(COMMON_SOURCES))
@@ -24,7 +24,7 @@ else
COMMON_OBJECTS = $(subst .c,.o,$(filter-out pfhack.c,$(COMMON_SOURCES)))
endif
-PROGRAMS = rds-gen rds-sink rds-info rds-stress rds-ping
+PROGRAMS = rds-info rds-stress rds-ping
all-programs: $(PROGRAMS)
@@ -36,7 +36,7 @@ install: $(PROGRAMS)
install -m 644 *.1 $(mandir)/man1
install -m 644 *.7 $(mandir)/man7
install -d $(incdir)/net
- install -m 444 net/rds.h $(incdir)/net
+ install -m 444 include/rds.h $(incdir)/net
clean:
rm -f $(PROGRAMS) $(CLEAN_OBJECTS)
@@ -58,12 +58,10 @@ endif
VERSION := @VERSION@
RELEASE := @RELEASE@
-TAR_PREFIX := rds-tools-$(VERSION)-$(RELEASE)
+TAR_PREFIX := rds-tools-$(VERSION)
TAR_FILE := $(TAR_PREFIX).tar.gz
EXTRA_DIST := rds-info.1 \
- rds-gen.1 \
- rds-sink.1 \
rds-stress.1 \
rds-ping.1 \
rds.7 \
diff --git a/configure b/configure
index 67b6316..6282b47 100755
--- a/configure
+++ b/configure
@@ -1215,7 +1215,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
-VERSION=1.4
+VERSION=2.0.4
RELEASE=1
diff --git a/configure.in b/configure.in
index 9cccaff..96c574d 100644
--- a/configure.in
+++ b/configure.in
@@ -1,7 +1,7 @@
AC_PREREQ(2.55)
AC_INIT()
-VERSION=1.4
+VERSION=2.0.4
RELEASE=1
AC_SUBST(VERSION)
diff --git a/examples/Makefile b/examples/Makefile
index ef35c1f..cfb439f 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -3,4 +3,4 @@ all: rds-sample
rds-sample: rds-sample.o
-CFLAGS = -I ../net
+CFLAGS = -I ../include
diff --git a/examples/rds-sample.c b/examples/rds-sample.c
index b7cd325..c66bdc3 100644
--- a/examples/rds-sample.c
+++ b/examples/rds-sample.c
@@ -25,12 +25,11 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
-#include <unistd.h>
/* FIXME - this is a hack to getaround RDS not exporting any header files.
- * This is a local copy.
+ * This is a local copy of the file found at net/rds/
*/
-#include "ib_rds.h"
+#include "rds.h"
/* These are defined in rds.h....but that file is not happily included */
#define SOL_RDS 272
#define PF_RDS 28
@@ -39,13 +38,67 @@
#define TESTPORT 4000
#define BUFSIZE 94
-static int do_rdma_read(int sock, struct msghdr *msg, void *buf)
+#define NUM_PRINTABLE_CHARS 94
+#define PRINTABLE_CHARS_OFFSET 33
+
+#define VERBOSE_FLAG (1 << 0)
+#define RDMA_READ_FLAG (1 << 1)
+#define RDMA_WRITE_FLAG (1 << 2)
+
+struct rdss_message {
+ int count;
+ uint32_t flags;
+ char msg[BUFSIZE];
+};
+
+static void print_orb(int i)
+{
+ char buf;
+
+ switch (i % 6) {
+ case 0:
+ buf = '.';
+ break;
+ case 1:
+ buf = 'o';
+ break;
+ case 2:
+ buf = 'O';
+ break;
+ case 3:
+ buf = '0';
+ break;
+ case 4:
+ buf = 'O';
+ break;
+ case 5:
+ buf = 'o';
+ break;
+ }
+
+ printf("\b%c", buf);
+ fflush(stdout);
+}
+
+static void create_message(char *buf, uint32_t start)
+{
+ int i;
+
+ for (i = 0; i < BUFSIZE; i++)
+ buf[i] = ((i + start) % NUM_PRINTABLE_CHARS) + PRINTABLE_CHARS_OFFSET;
+}
+
+static int do_rdma_read(int sock, struct msghdr *msg, struct rdss_message *buf,
+ uint32_t remote_flags)
{
struct rds_rdma_args *args;
struct rds_iovec iov;
struct cmsghdr *cmsg;
int rc;
+ if (remote_flags & RDMA_WRITE_FLAG)
+ create_message(buf->msg, buf->count);
+
cmsg = CMSG_FIRSTHDR(msg);
args = (struct rds_rdma_args *)CMSG_DATA(cmsg);
@@ -55,13 +108,14 @@ static int do_rdma_read(int sock, struct msghdr *msg, void *buf)
cmsg->cmsg_len = CMSG_LEN(sizeof(struct rds_rdma_args));
iov.addr = (uint64_t) buf;
- iov.bytes = BUFSIZE * sizeof(char);
+ iov.bytes = sizeof(struct rdss_message);
args->remote_vec.addr = 0;
- args->remote_vec.bytes = BUFSIZE * sizeof(char);
+ args->remote_vec.bytes = sizeof(struct rdss_message);
args->local_vec_addr = (uint64_t) &iov;
args->nr_local = 1;
- args->flags = RDS_RDMA_NOTIFY_ME;
+ args->flags = remote_flags ? (RDS_RDMA_READWRITE | RDS_RDMA_FENCE) : 0;
+ args->flags |= RDS_RDMA_NOTIFY_ME;
args->user_token = 0;
msg->msg_controllen = CMSG_SPACE(sizeof(struct rds_rdma_args));
@@ -72,26 +126,24 @@ static int do_rdma_read(int sock, struct msghdr *msg, void *buf)
return -1;
}
- sleep(1);
-
- rc = recvmsg(sock, msg, 0);
- if (rc < 0) {
- printf("%s: Error receiving message: %d %d\n", __func__, rc, errno);
- return -1;
- }
+ /* Spin waiting for the confirmation that the RDMA operation has completed */
+ do {
+ rc = recvmsg(sock, msg, MSG_DONTWAIT);
+ } while (rc < 0 && errno == EAGAIN);
return 0;
}
-static void server(char *address)
+static void server(char *address, uint32_t flags)
{
struct sockaddr_in sin, din;
- void *buf, *ctlbuf;
+ struct rdss_message *buf;
struct msghdr msg;
struct iovec *iov;
- int rc, sock;
+ void *ctlbuf;
+ int rc, sock, count = 0;
- buf = calloc(BUFSIZE, sizeof(char));
+ buf = calloc(1, sizeof(struct rdss_message));
if (!buf) {
printf("%s: calloc failed\n", __func__);
return;
@@ -123,14 +175,14 @@ static void server(char *address)
goto out;
}
- ctlbuf = calloc(1, sizeof(struct rds_rdma_args));
+ ctlbuf = calloc(1, CMSG_SPACE(sizeof(struct rds_rdma_args)));
if (!ctlbuf) {
printf("%s: calloc failed\n", __func__);
goto out1;
}
iov[0].iov_base = buf;
- iov[0].iov_len = BUFSIZE * sizeof(char);
+ iov[0].iov_len = sizeof(struct rdss_message);
memset(&msg, 0, sizeof(msg));
msg.msg_name = &din;
@@ -140,25 +192,39 @@ static void server(char *address)
msg.msg_control = ctlbuf;
msg.msg_controllen = CMSG_SPACE(sizeof(struct rds_rdma_args));
- printf("server listening on %s\n", inet_ntoa(sin.sin_addr));
+ if (flags & VERBOSE_FLAG)
+ printf("server listening on %s\n", inet_ntoa(sin.sin_addr));
- rc = recvmsg(sock, &msg, 0);
- if (rc < 0) {
- printf("%s: Error receiving message: %d %d\n", __func__, rc, errno);
- goto out2;
- }
+ do {
+ rc = recvmsg(sock, &msg, 0);
+ if (rc < 0) {
+ printf("%s: Error receiving message: %d %d\n", __func__, rc, errno);
+ goto out2;
+ }
- printf("Received a packet len %d, cmsg len %d, on port %d\n",
- (uint32_t) iov[0].iov_len,
- (uint32_t) msg.msg_controllen,
- din.sin_port);
+ if (flags & VERBOSE_FLAG)
+ printf("Received %s packet %d of len %d, cmsg len %d, on port %d\n",
+ msg.msg_controllen ? "RDS RDMA" : "RDS",
+ count,
+ (uint32_t) iov[0].iov_len,
+ (uint32_t) msg.msg_controllen,
+ din.sin_port);
+
+ if (msg.msg_controllen) {
+ rc = do_rdma_read(sock, &msg, buf, buf->flags);
+ if (rc < 0)
+ goto out2;
+ }
- if (msg.msg_controllen) {
- rc = do_rdma_read(sock, &msg, buf);
- if (rc < 0)
- goto out2;
- }
- printf("payload contains: %s\n", (char *)buf);
+ count++;
+
+ if (flags & VERBOSE_FLAG && !(buf->flags & RDMA_WRITE_FLAG))
+ printf("payload contains: %d %s\n", buf->count, buf->msg);
+
+ if (!(flags & VERBOSE_FLAG))
+ print_orb(count);
+
+ } while (buf->count - 1);
out2:
free(ctlbuf);
@@ -166,26 +232,22 @@ out1:
free(iov);
out:
free(buf);
-}
-static void create_message(char *buf)
-{
- int i;
-
- for (i = 0; i < BUFSIZE; i++)
- buf[i] = i + 0x21;
+ printf("\n%d packets received\n", count);
}
-static int build_rds_rdma_packet(int sock, struct msghdr *msg, void *buf, uint64_t *cookie)
+static int build_rds_rdma_packet(int sock, struct msghdr *msg, void *buf,
+ uint64_t *cookie, uint32_t *flags)
{
struct rds_get_mr_args mr_args;
struct cmsghdr *cmsg;
void *ctlbuf;
+ struct iovec *iov;
mr_args.vec.addr = (uint64_t) buf;
- mr_args.vec.bytes = BUFSIZE * sizeof(char);
+ mr_args.vec.bytes = sizeof(struct rdss_message);
mr_args.cookie_addr = (uint64_t) cookie;
- mr_args.flags = RDS_RDMA_READWRITE;
+ mr_args.flags = RDS_RDMA_USE_ONCE;
ctlbuf = calloc(1, CMSG_SPACE(sizeof(mr_args)));
if (!ctlbuf) {
@@ -202,13 +264,22 @@ static int build_rds_rdma_packet(int sock, struct msghdr *msg, void *buf, uint64
cmsg->cmsg_len = CMSG_LEN(sizeof(mr_args));
memcpy(CMSG_DATA(cmsg), &mr_args, sizeof(mr_args));
- msg->msg_iov = NULL;
- msg->msg_iovlen = 0;
+ iov = calloc(1, sizeof(struct iovec));
+ if (!iov) {
+ printf("%s: calloc failed\n", __func__);
+ return -1;
+ }
+
+ msg->msg_iov = iov;
+ msg->msg_iovlen = 1;
+
+ iov[0].iov_base = buf;
+ iov[0].iov_len = sizeof(struct rdss_message);
return 0;
}
-static int build_rds_packet(struct msghdr *msg, char *buf)
+static int build_rds_packet(struct msghdr *msg, void *buf)
{
struct iovec *iov;
@@ -222,17 +293,15 @@ static int build_rds_packet(struct msghdr *msg, char *buf)
msg->msg_iovlen = 1;
iov[0].iov_base = buf;
- iov[0].iov_len = BUFSIZE * sizeof(char);
+ iov[0].iov_len = sizeof(struct rdss_message);
return 0;
}
-static void client(char *localaddr, char *remoteaddr, int rdma)
+static void client(char *localaddr, char *remoteaddr, uint32_t flags, int count)
{
struct sockaddr_in sin, din;
- struct msghdr msg;
- uint64_t cookie = 0;
- int rc, sock;
+ int rc, sock, num_mess;
void *buf;
buf = calloc(BUFSIZE, sizeof(char));
@@ -241,8 +310,6 @@ static void client(char *localaddr, char *remoteaddr, int rdma)
return;
}
- create_message((char *)buf);
-
sock = socket(PF_RDS, SOCK_SEQPACKET, 0);
if (sock < 0) {
printf("%s: Error creating Socket: %d\n", __func__, sock);
@@ -259,56 +326,84 @@ static void client(char *localaddr, char *remoteaddr, int rdma)
goto out;
}
- memset(&msg, 0, sizeof(msg));
- msg.msg_name = &din;
- msg.msg_namelen = sizeof(din);
+ for (num_mess = count; num_mess || count == -1; num_mess--) {
+ struct rdss_message mess;
+ uint64_t cookie = 0;
+ struct msghdr msg;
- memset(&din, 0, sizeof(din));
- din.sin_family = AF_INET;
- din.sin_addr.s_addr = inet_addr(remoteaddr);
- din.sin_port = TESTPORT;
+ /* For an RDMA_WRITE, it is not necessary to write anything to the buf. As
+ * this is going to be over-written when the server performs a RDMA_WRITE into
+ * this buffer
+ */
+ if (!(flags & RDMA_WRITE_FLAG))
+ create_message((char *)buf, (uint32_t) num_mess);
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_name = &din;
+ msg.msg_namelen = sizeof(din);
+
+ memset(&din, 0, sizeof(din));
+ din.sin_family = AF_INET;
+ din.sin_addr.s_addr = inet_addr(remoteaddr);
+ din.sin_port = TESTPORT;
+
+ mess.count = num_mess;
+ mess.flags = flags;
+ memcpy(&mess.msg, buf, sizeof(mess.msg));
+
+ if (flags & RDMA_READ_FLAG || flags & RDMA_WRITE_FLAG) {
+ rc = build_rds_rdma_packet(sock, &msg, &mess, &cookie, &flags);
+ if (rc < 0)
+ goto out;
+
+ if (flags & VERBOSE_FLAG)
+ printf("Client Sending RDMA message %d from %s to %s\n",
+ count - num_mess, localaddr, remoteaddr);
+ } else {
+ rc = build_rds_packet(&msg, &mess);
+ if (rc < 0)
+ goto out;
+
+ if (flags & VERBOSE_FLAG)
+ printf("client sending %d byte message %s from %s to %s\n",
+ (uint32_t) msg.msg_iov->iov_len,
+ (char *)buf,
+ localaddr,
+ remoteaddr);
+ }
- if (rdma) {
- rc = build_rds_rdma_packet(sock, &msg, buf, &cookie);
- if (rc < 0)
- goto out;
+ rc = sendmsg(sock, &msg, 0);
+ if (rc < 0) {
+ printf("%s: Error sending message: %d %d\n", __func__, rc, errno);
+ goto out1;
+ }
- printf("Client Sending RDMA message from %s to %s\n",
- localaddr, remoteaddr);
- } else {
- rc = build_rds_packet(&msg, buf);
- if (rc < 0)
- goto out;
-
- printf("client sending %d byte message %s from %s to %s on port %d\n",
- (uint32_t) msg.msg_iov->iov_len,
- (char *)buf,
- localaddr,
- remoteaddr,
- sin.sin_port);
- }
+ if (flags & RDMA_READ_FLAG || flags & RDMA_WRITE_FLAG) {
+ /* reuse the same msg, as it should no longer be necessary and this incoming
+ * msg should be empty
+ */
+ rc = recvmsg(sock, &msg, 0);
+ if (rc < 0) {
+ printf("%s: Error receiving message: %d %d\n", __func__, rc, errno);
+ }
+ }
- rc = sendmsg(sock, &msg, 0);
- if (rc < 0) {
- printf("%s: Error sending message: %d %d\n", __func__, rc, errno);
- goto out1;
- }
+ if (flags & VERBOSE_FLAG && flags & RDMA_WRITE_FLAG)
+ printf("payload contains: %d %s\n", mess.count, mess.msg);
- if (rdma) {
- /* reuse the same msg, as it should no longer be necessary and this incoming
- * msg should be empty
- */
- rc = recvmsg(sock, &msg, 0);
- if (rc < 0) {
- printf("%s: Error receiving message: %d %d\n", __func__, rc, errno);
- }
+out1:
+ if (msg.msg_control)
+ free(msg.msg_control);
+ if (msg.msg_iov)
+ free(msg.msg_iov);
+ if (rc < 0)
+ break;
+
+ if (!(flags & VERBOSE_FLAG))
+ print_orb(count - num_mess);
}
-out1:
- if (msg.msg_control)
- free(msg.msg_control);
- if (msg.msg_iov)
- free(msg.msg_iov);
+ printf("\n%d messages sent\n", count - num_mess);
out:
free(buf);
}
@@ -316,7 +411,8 @@ out:
int main(int argc, char **argv)
{
char *serveraddr = NULL, *clientaddr = NULL;
- int i, rdma = 0;
+ uint32_t flags = 0;
+ int i, count = -1;
if (argc < 3) {
printf("not enough args\n");
@@ -324,24 +420,30 @@ int main(int argc, char **argv)
}
for (i = 1; i < argc; i++) {
- if (!strcmp("-s", argv[i])) {
+ if (!strcmp("-s", argv[i]) || !strcmp("--server", argv[i])) {
serveraddr = argv[i+1];
i++;
- } else if (!strcmp("-c", argv[i])) {
+ } else if (!strcmp("-c", argv[i]) || !strcmp("--client", argv[i])) {
clientaddr = argv[i+1];
i++;
- } else if (!strcmp("--rdma", argv[i])) {
- rdma = 1;
+ } else if (!strcmp("-C", argv[i]) || !strcmp("--count", argv[i])) {
+ count = atoi(argv[i+1]);
+ i++;
+ } else if (!strcmp("-rr", argv[i]) || !strcmp("--rdma-read", argv[i])) {
+ flags |= RDMA_READ_FLAG;
+ } else if (!strcmp("-rw", argv[i]) || !strcmp("--rdma-write", argv[i])) {
+ flags |= RDMA_WRITE_FLAG;
+ } else if (!strcmp("-v", argv[i]) || !strcmp("--verbose", argv[i])) {
+ flags |= VERBOSE_FLAG;
} else
printf("Invalid param\n");
}
if (serveraddr && !clientaddr) {
- server(serveraddr);
+ server(serveraddr, flags);
} else if (serveraddr && clientaddr) {
- client(clientaddr, serveraddr, rdma);
+ client(clientaddr, serveraddr, flags, count);
}
return 0;
}
-
diff --git a/net/ib_rds.h b/include/rds.h
similarity index 86%
rename from net/ib_rds.h
rename to include/rds.h
index 992139c..7e4d217 100644
--- a/net/ib_rds.h
+++ b/include/rds.h
@@ -31,8 +31,8 @@
*
*/
-#ifndef IB_RDS_H
-#define IB_RDS_H
+#ifndef _LINUX_RDS_H
+#define _LINUX_RDS_H
#include <linux/types.h>
@@ -56,6 +56,7 @@
/* deprecated: RDS_BARRIER 4 */
#define RDS_RECVERR 5
#define RDS_CONG_MONITOR 6
+#define RDS_GET_MR_FOR_DEST 7
/*
* Control message types for SOL_RDS.
@@ -81,7 +82,10 @@
#define RDS_CMSG_RDMA_MAP 3
#define RDS_CMSG_RDMA_STATUS 4
#define RDS_CMSG_CONG_UPDATE 5
+#define RDS_CMSG_ATOMIC_FADD 6
+#define RDS_CMSG_ATOMIC_CSWP 7
+#define RDS_INFO_FIRST 10000
#define RDS_INFO_COUNTERS 10000
#define RDS_INFO_CONNECTIONS 10001
/* 10002 aka RDS_INFO_FLOWS is deprecated */
@@ -91,6 +95,9 @@
#define RDS_INFO_SOCKETS 10006
#define RDS_INFO_TCP_SOCKETS 10007
#define RDS_INFO_IB_CONNECTIONS 10008
+#define RDS_INFO_CONNECTION_STATS 10009
+#define RDS_INFO_IWARP_CONNECTIONS 10010
+#define RDS_INFO_LAST 10010
struct rds_info_counter {
u_int8_t name[32];
@@ -101,12 +108,14 @@ struct rds_info_counter {
#define RDS_INFO_CONNECTION_FLAG_CONNECTING 0x02
#define RDS_INFO_CONNECTION_FLAG_CONNECTED 0x04
+#define TRANSNAMSIZ 16
+
struct rds_info_connection {
u_int64_t next_tx_seq;
u_int64_t next_rx_seq;
__be32 laddr;
__be32 faddr;
- u_int8_t transport[15]; /* null term ascii */
+ u_int8_t transport[TRANSNAMSIZ]; /* null term ascii */
u_int8_t flags;
} __attribute__((packed));
@@ -138,32 +147,23 @@ struct rds_info_socket {
__be16 bound_port;
__be16 connected_port;
u_int32_t rcvbuf;
- uint64_t inum;
-} __attribute__((packed));
-
-struct rds_info_socket_v1 {
- u_int32_t sndbuf;
- __be32 bound_addr;
- __be32 connected_addr;
- __be16 bound_port;
- __be16 connected_port;
- u_int32_t rcvbuf;
+ u_int64_t inum;
} __attribute__((packed));
struct rds_info_tcp_socket {
- __be32 local_addr;
- __be16 local_port;
- __be32 peer_addr;
- __be16 peer_port;
- u_int64_t hdr_rem;
- u_int64_t data_rem;
- u_int32_t last_sent_nxt;
- u_int32_t last_expected_una;
- u_int32_t last_seen_una;
+ __be32 local_addr;
+ __be16 local_port;
+ __be32 peer_addr;
+ __be16 peer_port;
+ u_int64_t hdr_rem;
+ u_int64_t data_rem;
+ u_int32_t last_sent_nxt;
+ u_int32_t last_expected_una;
+ u_int32_t last_seen_una;
} __attribute__((packed));
#define RDS_IB_GID_LEN 16
-struct rds_info_ib_connection {
+struct rds_info_rdma_connection {
__be32 src_addr;
__be32 dst_addr;
uint8_t src_gid[RDS_IB_GID_LEN];
@@ -172,8 +172,8 @@ struct rds_info_ib_connection {
uint32_t max_send_wr;
uint32_t max_recv_wr;
uint32_t max_send_sge;
- uint32_t rdma_fmr_max;
- uint32_t rdma_fmr_size;
+ uint32_t rdma_mr_max;
+ uint32_t rdma_mr_size;
};
/*
@@ -227,6 +227,13 @@ struct rds_get_mr_args {
uint64_t flags;
};
+struct rds_get_mr_for_dest_args {
+ struct sockaddr_storage dest_addr;
+ struct rds_iovec vec;
+ u_int64_t cookie_addr;
+ uint64_t flags;
+};
+
struct rds_free_mr_args {
rds_rdma_cookie_t cookie;
u_int64_t flags;
@@ -241,6 +248,16 @@ struct rds_rdma_args {
u_int64_t user_token;
};
+struct rds_atomic_args {
+ rds_rdma_cookie_t cookie;
+ uint64_t local_addr;
+ uint64_t remote_addr;
+ uint64_t swap_add;
+ uint64_t compare;
+ u_int64_t flags;
+ u_int64_t user_token;
+};
+
struct rds_rdma_notify {
u_int64_t user_token;
int32_t status;
diff --git a/net/rds.h b/net/rds.h
deleted file mode 100644
index ec9aa6c..0000000
--- a/net/rds.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * net/rds.h - user space interface for RDS
- *
- * Copyright (c) 2006 Oracle. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __NET_RDS_H
-#define __NET_RDS_H
-
-#include "ib_rds.h"
-
-static inline int
-rds_rdma_id_sign(uint64_t id1, uint64_t id2)
-{
- int64_t diff = id1 - id2;
-
- return (diff < 0)? -1 : ((diff == 0)? 0 : 1);
-}
-
-#define rds_rdma_id_cmp(id1, cmp, id2) (rds_rdma_id_sign((id1), (id2)) cmp 0)
-
-#endif /* __NET_RDS_H */
diff --git a/options.c b/options.c
deleted file mode 100644
index 0956b25..0000000
--- a/options.c
+++ /dev/null
@@ -1,481 +0,0 @@
-/*
- * Copyright (c) 2006 Oracle. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * options.c - options and stuff
- */
-
-#define _LARGEFILE64_SOURCE
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <limits.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <netdb.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <signal.h>
-#include <getopt.h>
-#include <libgen.h>
-#include <inttypes.h>
-#include <errno.h>
-
-#include "kernel-list.h"
-#include "rdstool.h"
-
-
-/* This gets changed in parse_options() */
-char *progname = "rds-generic-tool";
-unsigned int verbose = 1;
-
-sig_atomic_t running = 1;
-
-
-/*
- * Take "address:port" and return a sockaddr(_in) that describes it.
- * Since RDS is IPv4 only, we don't worry about PF_INET6.
- *
- * XXX: Should we try a default IP or default port? RDS is very
- * endpoint-oriented; right now we require explicitness.
- *
- * Since getaddrinfo(3) returns multiple addresses, we simply find the
- * first SOCK_DGRAM AF_INET result. Note that RDS actually uses
- * SOCK_SEQPACKET, but we're lying to getaddrinfo(3).
- */
-static int parse_endpoint(struct rds_endpoint *nep)
-{
- int rc;
- char *host, *port;
- struct addrinfo *list, *try;
- struct addrinfo hint = {
- .ai_family = PF_INET,
- .ai_socktype = SOCK_DGRAM,
- };
-
- host = strdup(nep->re_name);
- if (!host) {
- rc = -ENOMEM;
- verbosef(0, stderr, "%s: Unable to allocate memory\n",
- progname);
- goto out;
- }
-
- port = strchr(host, ':');
- if (!port) {
- rc = -EINVAL;
- verbosef(0, stderr, "%s: Invalid endpoint: %s\n",
- progname, nep->re_name);
- goto out;
- }
-
- *port = '\0';
- port++;
-
- rc = getaddrinfo(host, port, &hint, &list);
- if (rc) {
- verbosef(0, stderr, "%s: Unable to resolve \"%s\": %s\n",
- progname, nep->re_name, gai_strerror(rc));
- goto out;
- }
-
- for (try = list; try; try = try->ai_next) {
- if ((try->ai_family == PF_INET) &&
- (try->ai_socktype == SOCK_DGRAM))
- break;
- }
-
- if (try) {
- if (try->ai_addrlen != sizeof(struct sockaddr_in))
- verbosef(0, stderr,
- "%s: OMG WTF BBQ! try->ai_addrlen = %d, sizeof(struct sockaddr_in) = %zd\n",
- progname, try->ai_addrlen,
- sizeof(struct sockaddr_in));
-
- memcpy(&nep->re_addr, try->ai_addr, try->ai_addrlen);
- }
-
- if (list)
- freeaddrinfo(list);
-
-out:
- return rc;
-}
-
-static int add_endpoint(const char *endpoint, struct list_head *list)
-{
- int rc;
- struct rds_endpoint *nep;
-
- nep = malloc(sizeof(struct rds_endpoint));
- if (!nep)
- return -ENOMEM;
-
- nep->re_name = strdup(endpoint);
- if (!nep->re_name) {
- free(nep);
- return -ENOMEM;
- }
-
- rc = parse_endpoint(nep);
- if (!rc) {
- list_add_tail(&nep->re_item, list);
- } else {
- free(nep->re_name);
- free(nep);
- }
-
- return rc;
-}
-
-static int get_number(char *arg, uint64_t *res)
-{
- char *ptr = NULL;
- uint64_t num;
-
- num = strtoull(arg, &ptr, 0);
-
- if ((ptr == arg) || (num == UINT64_MAX))
- return(-EINVAL);
-
- switch (*ptr) {
- case '\0':
- break;
-
- case 'g':
- case 'G':
- num *= 1024;
- /* FALL THROUGH */
-
- case 'm':
- case 'M':
- num *= 1024;
- /* FALL THROUGH */
-
- case 'k':
- case 'K':
- num *= 1024;
- /* FALL THROUGH */
-
- case 'b':
- case 'B':
- break;
-
- default:
- return -EINVAL;
- }
-
- *res = num;
-
- return 0;
-}
-
-extern char *optarg;
-extern int optopt;
-extern int optind;
-extern int opterr;
-int parse_options(int argc, char *argv[], const char *opts,
- struct rds_context *ctxt)
-{
- int c, rc = 0;
- uint64_t val;
- struct list_head saddrs;
-
- if (argc && argv[0])
- progname = basename(argv[0]);
-
- INIT_LIST_HEAD(&saddrs);
- opterr = 0;
- while ((c = getopt(argc, argv, opts)) != EOF) {
- switch (c) {
- case 's':
- if (!list_empty(&saddrs)) {
- verbosef(0, stderr,
- "%s: Only one source address allowed\n",
- progname);
- rc = -EINVAL;
- } else
- rc = add_endpoint(optarg, &saddrs);
- break;
-
- case 'd':
- rc = add_endpoint(optarg, &ctxt->rc_daddrs);
- break;
-
- case 'm':
- rc = get_number(optarg, &val);
- if (rc) {
- verbosef(0, stderr,
- "%s: Invalid number: %s\n",
- progname, optarg);
- break;
- }
-
- if (val > UINT32_MAX) {
- rc = -EINVAL;
- verbosef(0, stderr,
- "%s: Message size too large: %"PRIu64"\n",
- progname, val);
- } else
- ctxt->rc_msgsize = (uint32_t)val;
- break;
-
- case 'l':
- rc = get_number(optarg, &ctxt->rc_total);
- if (rc) {
- verbosef(0, stderr,
- "%s: Invalid number: %s\n",
- progname, optarg);
- }
- break;
-
- case 'f':
- ctxt->rc_filename = optarg;
- stats_extended(1);
- break;
-
- case 'i':
- rc = get_number(optarg, &val);
- if (rc) {
- verbosef(0, stderr,
- "%s: Invalid number: %s\n",
- progname, optarg);
- break;
- }
-
- if (val > LONG_MAX) {
- rc = -EINVAL;
- verbosef(0, stderr,
- "%s: Sleep interval too large: %"PRIu64"\n",
- progname, val);
- } else {
- rc = stats_init((long)val);
- }
-
- break;
-
-
- case 'v':
- verbose++;
- break;
-
- case 'q':
- if (verbose)
- verbose--;
- break;
-
- case 'V':
- print_version();
- break;
-
- case 'h':
- print_usage(0);
- break;
-
- case '-':
- if (!strcmp(optarg, "help"))
- print_usage(0);
- else if (!strcmp(optarg, "version"))
- print_version();
- else {
- rc = -EINVAL;
- verbosef(0, stderr,
- "%s: Invalid argument: \'--%s\'\n",
- progname, optarg);
- }
- break;
-
- case '?':
- verbosef(0, stderr,
- "%s: Invalid option \'-%c\'\n",
- progname, optopt);
- rc = -EINVAL;
- break;
-
- case ':':
- verbosef(0, stderr,
- "%s: Option \'-%c\' requires an argument\n",
- progname, optopt);
- rc = -EINVAL;
- break;
-
- default:
- verbosef(0, stderr,
- "%s: Shouldn't get here %c %c\n",
- progname, optopt, c);
- rc = -EINVAL;
- break;
- }
-
- if (rc)
- goto out;
- }
-
- if (list_empty(&saddrs)) {
- verbosef(0, stderr, "%s: Source endpoint address required\n",
- progname);
- rc = -EINVAL;
- goto out;
- }
-
- ctxt->rc_saddr = list_entry(saddrs.prev, struct rds_endpoint,
- re_item);
-
-out:
- return rc;
-}
-
-int rds_bind(struct rds_context *ctxt)
-{
- int rc;
- struct rds_endpoint *e = ctxt->rc_saddr;
-
- rc = socket(PF_RDS, SOCK_SEQPACKET, 0);
- if (rc < 0) {
- rc = -errno;
- verbosef(0, stderr, "%s: Unable to create socket: %s\n",
- progname, strerror(-rc));
- goto out;
- }
-
- e->re_fd = rc;
- rc = bind(e->re_fd, (struct sockaddr *)&e->re_addr,
- sizeof(struct sockaddr_in));
- if (rc) {
- rc = -errno;
- verbosef(0, stderr, "%s: Unable to bind socket: %s\n",
- progname, strerror(-rc));
-
- close(e->re_fd);
- e->re_fd = -1;
- goto out;
- }
-
-out:
- return rc;
-}
-
-int dup_file(struct rds_context *ctxt, int fd, int flags)
-{
- int tmp_fd, rc = 0;
- char *type;
-
- /* "-" is stdin/stdout */
- if (!strcmp(ctxt->rc_filename, "-"))
- goto out;
-
- tmp_fd = open64(ctxt->rc_filename, flags);
- if (tmp_fd < 0) {
- rc = -errno;
- verbosef(0, stderr, "%s: Unable to open file \"%s\": %s\n",
- progname, ctxt->rc_filename, strerror(-rc));
- goto out;
- }
-
- if (tmp_fd != fd) {
- rc = dup2(tmp_fd, fd);
- if (rc < 0) {
- rc = -errno;
- switch (fd) {
- case STDIN_FILENO:
- type = "stdin";
- break;
-
- case STDOUT_FILENO:
- type = "stdout";
- break;
-
- case STDERR_FILENO:
- type = "stderr";
- break;
-
- default:
- type = "random fd";
- break;
- }
-
- verbosef(0, stderr,
- "%s: Unable to set file \"%s\" as %s: %s\n",
- progname, ctxt->rc_filename, type,
- strerror(-rc));
- } else if (rc != fd) {
- verbosef(0, stderr,
- "%s: dup2(2) failed for some reason!\n",
- progname);
- rc = -EBADF;
- } else
- rc = 0;
- }
-
-out:
- return rc;
-}
-
-int runningp(void)
-{
- return running;
-}
-
-void handler(int signum)
-{
- running = 0;
-}
-
-int setup_signals(void)
-{
- int rc = -EINVAL;
- struct sigaction act;
-
- sigemptyset(&act.sa_mask);
- act.sa_handler = handler;
- act.sa_flags = 0;
-
- if (sigaction(SIGTERM, &act, NULL))
- goto out;
-
- if (sigaction(SIGINT, &act, NULL))
- goto out;
-
- act.sa_handler = SIG_IGN;
- if (sigaction(SIGPIPE, &act, NULL)) /* Get EPIPE instead */
- goto out;
-
- rc = 0;
-
-out:
- return rc;
-}
diff --git a/pfhack.c b/pfhack.c
index 7e320db..aebfa50 100644
--- a/pfhack.c
+++ b/pfhack.c
@@ -53,21 +53,10 @@
#include "kernel-list.h"
#include "pfhack.h"
-#include "rdstool.h"
#define PF_RDS_PATH "/proc/sys/net/rds/pf_rds"
#define SOL_RDS_PATH "/proc/sys/net/rds/sol_rds"
-/* We don't allow any system that can't read pf_rds */
-static void explode(const char *reason)
-{
- fprintf(stderr,
- "%s: Unable to determine RDS constant: %s\n",
- progname, reason);
-
- exit(1);
-}
-
static int discover_constant(const char *path, int official, int *found)
{
int fd;
@@ -98,12 +87,11 @@ static int discover_constant(const char *path, int official, int *found)
close(fd);
- if (ret < 0)
- explode("Error reading address constant");
-
val = strtoul(buf, &ptr, 0);
- if ((val > INT_MAX) || !ptr || (*ptr && (*ptr != '\n')))
- explode("Invalid address constant");
+ if ((val > INT_MAX) || !ptr || (*ptr && (*ptr != '\n'))) {
+ fprintf(stderr, "Unable to determine RDS constant: invalid address constant\n");
+ exit(1);
+ }
*found = val;
return (int)val;
@@ -113,12 +101,12 @@ int discover_pf_rds()
{
static int pf_rds = -1;
- return discover_constant(PF_RDS_PATH, OFFICIAL_PF_RDS, &pf_rds);
+ return discover_constant(PF_RDS_PATH, PF_RDS, &pf_rds);
}
int discover_sol_rds()
{
static int sol_rds = -1;
- return discover_constant(SOL_RDS_PATH, OFFICIAL_SOL_RDS, &sol_rds);
+ return discover_constant(SOL_RDS_PATH, SOL_RDS, &sol_rds);
}
diff --git a/pfhack.h b/pfhack.h
index 2a55b25..4b7d7d3 100644
--- a/pfhack.h
+++ b/pfhack.h
@@ -44,17 +44,11 @@
#ifndef __PF_HACK_H
#define __PF_HACK_H
-#define OFFICIAL_PF_RDS 21
-#define OFFICIAL_SOL_RDS 276
+#define PF_RDS 21
+#define AF_RDS 21
+#define SOL_RDS 276
-
-#ifdef DYNAMIC_PF_RDS
extern int discover_pf_rds();
extern int discover_sol_rds();
-#define AF_RDS discover_pf_rds()
-#define PF_RDS AF_RDS
-#define SOL_RDS discover_sol_rds()
-#endif /* DYNAMIC_PF_RDS */
-
#endif /* __PF_HACK_H */
diff --git a/rds-gen.1 b/rds-gen.1
deleted file mode 100644
index f203f59..0000000
--- a/rds-gen.1
+++ /dev/null
@@ -1,89 +0,0 @@
-.Dd October 30, 2006
-.Dt RDS-GEN-SINK 1
-.Os
-.Sh NAME
-.Nm rds-gen
-.Nd write data from a file to an RDS socket
-.Pp
-.Nm rds-sink
-.Nd write data from an RDS socket to a file
-.Sh SYNOPSIS
-.Nm rds-gen
-.Bk -words
-.Op Fl s Ar source_address:source_port
-.Op Fl d Ar destination_address:destination_port
-.Op Fl f Ar input_file
-.Op Fl m Ar message_size
-.Op Fl l Ar total_bytes
-.Op Fl i Ar interval
-
-.Nm rds-sink
-.Bk -words
-.Op Fl s Ar listen_address:listen_port
-.Op Fl f Ar output_file
-.Op Fl i Ar interval
-
-.Sh DESCRIPTION
-The
-.Nm
-and
-.Nm rds-sink
-utilities are used to stream data through RDS sockets. rds-gen
-reads data from a file descriptor and sends it as messages
-down an RDS socket. rds-sink receives messages from an RDS
-socket and writes it to a file descriptor.
-
-The following options are shared between rds-gen and rds-sink:
-.Bl -tag -width Ds
-.It Fl s Ar address:port
-Binds the RDS socket to the given address and port. rds-gen will
-send messages from this address and port. rds-sink will receive messages
-sent to this address and port.
-.It Fl f Ar file
-rds-gen will read data from this file and rds-sink will write
-data to this file. If '-' is given as the filename then rds-gen
-will use standard input and rds-sink will use standard output.
-.It Fl i Ar interval_seconds
-An iterative summary of the number and size of messages that are sent and
-received is written to standard error at this interval.
-.El
-.Pp
-
-In addition, rds-gen supports the following options:
-.Bl -tag -width Ds
-.It Fl d Ar address:port
-Messages are sent to this destination address and port. If this option
-is specified multiple times then the messages are sent to each destination
-address in a round-robin fashion.
-.It Fl m Ar message_size
-Specifies the size of the messages that are sent down the RDS socket. The default
-message size is 4k. The message size must not be greater than the buffer size.
-.It Fl l Ar total_bytes
-Specifies the number of bytes that will be sent out the socket before rds-gen
-exits. If this is not specified and rds-gen was given a source file then it
-will run until it gets EOF from the file. If no file was given and this
-option is not specified then rds-gen will send data indefinitely.
-.El
-.Pp
-
-.Sh EXAMPLES
-rds-gen on host src sends infinite data to rds-sink on dest who
-prints out the amount of data it receives every second.
-.Pp
-
-.Dl $ rds-sink -s dest:22222 -i 1
-.Dl $ rds-gen -s src:11111 -d dest:22222
-.Pp
-
-Read 100M from /dev/zero on src and write it to /dev/null on dest,
-printing stats on both sides every minute.
-
-.Dl $ rds-sink -s dest:22222 -f /dev/null -i 60
-.Dl $ rds-gen -s src:11111 -f /dev/zero -d dest:22222 -i 60
-.Pp
-
-Watch rds-gen write data as fast as it can into a local black hole because
-there is no bound receiving socket.
-
-.Dl $ rds-gen -s src:11111 -d localhost:31337 -i 1
-.Pp
diff --git a/rds-gen.c b/rds-gen.c
deleted file mode 100644
index f9420c7..0000000
--- a/rds-gen.c
+++ /dev/null
@@ -1,322 +0,0 @@
-/*
- * Copyright (c) 2006 Oracle. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * rds-gen.c: Spew some RDS packets
- */
-
-#define _LARGEFILE64_SOURCE
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <sys/socket.h>
-#include <arpa/inet.h>
-#include <unistd.h>
-#include <errno.h>
-
-#include "kernel-list.h"
-#include "rdstool.h"
-
-void print_usage(int rc)
-{
- int namelen = strlen(progname);
- FILE *output = rc ? stderr : stdout;
-
- verbosef(0, output,
- "Usage: %s -s <source_ip>:<source_port> [[-d <dest_ip>:<dest_port>] ...]\n"
- " %*s [-f <input_file>] [-m <msg_size>]\n"
- " %*s [-l <total_bytes>] [-i <interval>]\n"
- " %*s [-v ...] [-q ...]\n"
- " %s -h\n"
- " %s -V\n",
- progname, namelen, "", namelen, "", namelen, "", progname,
- progname);
-
- exit(rc);
-}
-
-void print_version()
-{
- verbosef(0, stdout, "%s version VERSION\n", progname);
-
- exit(0);
-}
-
-/*
- * Pick the next destination.
- * Currently round-robin, but could be made fancy
- */
-static struct rds_endpoint *pick_dest(struct rds_context *ctxt,
- struct rds_endpoint *de)
-{
- struct list_head *next;
-
- if (!de || (de->re_item.next == &ctxt->rc_daddrs))
- next = ctxt->rc_daddrs.next;
- else
- next = de->re_item.next;
-
- return list_entry(next, struct rds_endpoint, re_item);
-}
-
-static ssize_t fill_stdin(struct rds_context *ctxt, char *bytes,
- ssize_t len)
-{
- ssize_t ret = 0;
- char *ptr = bytes;
-
- static int first = 1;
-
- if (!first)
- return ret;
-
- if (ctxt->rc_filename && strcmp(ctxt->rc_filename,"-"))
- first = 0;
-
- while (len && runningp()) {
- stats_print();
- ret = read(STDIN_FILENO, ptr, len);
- if (!ret) {
- if (ptr != bytes) {
- verbosef(0, stderr,
- "%s: Unexpected end of file reading from %s\n",
- progname, ctxt->rc_filename);
- ret = -EPIPE;
- }
- break;
- }
- if (ret < 0) {
- ret = -errno;
- if (ret == -EINTR)
- continue;
-
- verbosef(0, stderr,
- "%s: Error reading from %s: %s\n",
- progname, ctxt->rc_filename,
- strerror(-ret));
- break;
- }
-
- stats_add_read(ret);
- ptr += ret;
- len -= ret;
- ret = 0; /* If this filled the buffer, we return success */
- }
- verbosef(3, stderr, "Read %zd bytes from stdin\n",
- ptr - bytes);
-
- return ret;
-}
-
-static ssize_t fill_pattern(struct rds_context *ctxt, char *bytes,
- ssize_t len)
-{
- static int first = 1;
-
- stats_print();
-
- if (first) {
- memset(bytes, 0, len);
- first = 0;
- }
-
- return 0;
-}
-
-static ssize_t fill_buff(struct rds_context *ctxt, char *bytes, ssize_t len)
-{
- ssize_t ret;
-
- /* Each possible method must handle calling stats_print() */
- if (ctxt->rc_filename)
- ret = fill_stdin(ctxt, bytes, len);
- else
- ret = fill_pattern(ctxt, bytes, len);
-
- return ret;
-}
-
-static ssize_t send_buff(struct rds_endpoint *se, struct msghdr *msg)
-{
- ssize_t ret = 0;
-
- while (runningp()) {
- stats_print();
-
- ret = sendmsg(se->re_fd, msg, 0);
- if (ret < 0) {
- ret = -errno;
- if (ret == -EINTR)
- continue;
-
- verbosef(0, stderr,
- "%s: Error from sendmsg: %s\n",
- progname, strerror(-ret));
- }
-
- /* Success */
- break;
- }
-
- return ret;
-}
-
-
-static int wli_do_send(struct rds_context *ctxt)
-{
- char bytes[ctxt->rc_msgsize];
- int ret = 0;
- struct rds_endpoint *de = NULL, *se = ctxt->rc_saddr;
- struct iovec iov = {
- .iov_base = bytes,
- .iov_len = ctxt->rc_msgsize,
- };
- struct msghdr msg = {
- .msg_name = NULL, /* Picked later */
- .msg_namelen = sizeof(struct sockaddr_in),
- .msg_iov = &iov,
- .msg_iovlen = 1,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = 0,
- };
-
- verbosef(2, stderr, "Starting send loop\n");
-
- stats_start();
-
- while (runningp()) {
- /* Calls stats_print() */
- ret = fill_buff(ctxt, bytes, ctxt->rc_msgsize);
- if (ret) {
- if (ret == -EINTR)
- continue;
- else
- break;
- }
-
- de = pick_dest(ctxt, de);
- verbosef(2, stderr, "Destination %s\n", de->re_name);
-
- msg.msg_name = &de->re_addr;
- if (ctxt->rc_total &&
- ((stats_get_send() + ctxt->rc_msgsize) > ctxt->rc_total))
- iov.iov_len = ctxt->rc_total - stats_get_send();
-
- /* Calls stats_print() */
- ret = send_buff(se, &msg);
- if (ret < 0)
- break;
-
- stats_add_send(ret);
-
- if (ctxt->rc_total && (stats_get_send() >= ctxt->rc_total))
- break;
- }
- verbosef(2, stderr, "Stopping send loop\n");
-
- stats_total();
-
- return ret;
-}
-
-
-int main(int argc, char *argv[])
-{
- int rc;
- char ipbuf[INET_ADDRSTRLEN];
- struct rds_endpoint *e;
- struct rds_context ctxt = {
- .rc_msgsize = RDS_DEFAULT_MSG_SIZE,
- };
-
- INIT_LIST_HEAD(&ctxt.rc_daddrs);
-
- rc = parse_options(argc, argv, RDS_TOOL_BASE_OPTS RDS_GEN_OPTS,
- &ctxt);
- if (rc)
- print_usage(rc);
-
- if (list_empty(&ctxt.rc_daddrs)) {
- verbosef(0, stderr,
- "%s: Destination endpoint address required\n",
- progname);
- print_usage(-EINVAL);
- }
-
- inet_ntop(PF_INET, &ctxt.rc_saddr->re_addr.sin_addr, ipbuf,
- INET_ADDRSTRLEN);
- verbosef(2, stderr, "Binding endpoint %s:%d\n",
- ipbuf, ntohs(ctxt.rc_saddr->re_addr.sin_port));
-
- rc = rds_bind(&ctxt);
- if (rc)
- goto out;
-
- if (ctxt.rc_filename) {
- rc = dup_file(&ctxt, STDIN_FILENO, O_RDONLY);
- if (rc)
- goto out;
- if (!strcmp(ctxt.rc_filename, "-"))
- ctxt.rc_filename = "<standard input>";
- }
-
- list_for_each_entry(e, &ctxt.rc_daddrs, re_item) {
- inet_ntop(PF_INET, &e->re_addr.sin_addr, ipbuf,
- INET_ADDRSTRLEN);
- verbosef(2, stderr,
- "Adding destination %s:%d\n", ipbuf,
- ntohs(e->re_addr.sin_port));
- }
-
- rc = setup_signals();
- if (rc) {
- verbosef(0, stderr, "%s: Unable to initialize signals\n",
- progname);
- goto out;
- }
-
- rc = wli_do_send(&ctxt);
-
-out:
- free(ctxt.rc_saddr->re_name);
- free(ctxt.rc_saddr);
-
- return rc;
-}
diff --git a/rds-info.c b/rds-info.c
index d90cc16..a4f9008 100644
--- a/rds-info.c
+++ b/rds-info.c
@@ -47,8 +47,8 @@
#include <netinet/in.h>
#include <arpa/inet.h>
-#include "net/rds.h"
-#include "rdstool.h"
+#include "rds.h"
+#include "pfhack.h"
#define rds_conn_flag(conn, flag, letter) \
(conn.flags & RDS_INFO_CONNECTION_FLAG_##flag ? letter : '-')
@@ -67,8 +67,15 @@
for (;len > 0 && copy_into(var, data, each); \
data += each, len -= min(len, each))
+#define verbosef(lvl, f, fmt, a...) do { \
+ if (opt_verbose >= (lvl)) \
+ fprintf((f), fmt, ##a); \
+} while (0)
+
static int opt_verbose = 0;
+char *progname = "rds-info";
+
/* Like inet_ntoa, but can be re-entered several times without clobbering
* the previously returned string. */
static const char *paddr(int af, const void *addrp)
@@ -104,33 +111,10 @@ static void print_counters(void *data, int each, socklen_t len, void *extra)
printf("%25s %16"PRIu64"\n", ctr.name, ctr.value);
}
-static void print_sockets_v1(void *data, int each, socklen_t len, void *extra)
-{
- struct rds_info_socket_v1 sk;
-
- printf("\nRDS Sockets:\n%15s %5s %15s %5s %10s %10s\n",
- "BoundAddr", "BPort", "ConnAddr", "CPort", "SndBuf",
- "RcvBuf");
-
- for_each(sk, data, each, len) {
- printf("%15s %5u %15s %5u %10u %10u\n",
- ipv4addr(sk.bound_addr),
- ntohs(sk.bound_port),
- ipv4addr(sk.connected_addr),
- ntohs(sk.connected_port),
- sk.sndbuf, sk.rcvbuf);
- }
-}
-
static void print_sockets(void *data, int each, socklen_t len, void *extra)
{
struct rds_info_socket sk;
- if (each == sizeof(struct rds_info_socket_v1)) {
- print_sockets_v1(data, each, len, extra);
- return;
- }
-
printf("\nRDS Sockets:\n%15s %5s %15s %5s %10s %10s %8s\n",
"BoundAddr", "BPort", "ConnAddr", "CPort", "SndBuf",
"RcvBuf", "Inode");
@@ -205,7 +189,7 @@ static void print_tcp_socks(void *data, int each, socklen_t len, void *extra)
static void print_ib_conns(void *data, int each, socklen_t len, void *extra)
{
- struct rds_info_ib_connection ic;
+ struct rds_info_rdma_connection ic;
printf("\nRDS IB Connections:\n%15s %15s %32s %32s\n",
"LocalAddr", "RemoteAddr", "LocalDev", "RemoteDev");
@@ -221,8 +205,8 @@ static void print_ib_conns(void *data, int each, socklen_t len, void *extra)
printf(" send_wr=%u", ic.max_send_wr);
printf(", recv_wr=%u", ic.max_recv_wr);
printf(", send_sge=%u", ic.max_send_sge);
- printf(", rdma_fmr_max=%u", ic.rdma_fmr_max);
- printf(", rdma_fmr_size=%u", ic.rdma_fmr_size);
+ printf(", rdma_mr_max=%u", ic.rdma_mr_max);
+ printf(", rdma_mr_size=%u", ic.rdma_mr_size);
}
printf("\n");
@@ -256,11 +240,13 @@ struct info infos[] = {
print_ib_conns, NULL, 0 },
};
-void print_usage(int rc)
+static void print_usage(int rc)
{
FILE *output = rc ? stderr : stdout;
int i;
+ fprintf(stderr, "rds-info version %s\n", RDS_VERSION);
+
verbosef(0, output, "The following options limit output to the given "
"sources:\n");
@@ -275,10 +261,6 @@ void print_usage(int rc)
exit(rc);
}
-void print_version()
-{
-}
-
int main(int argc, char **argv)
{
char optstring[258] = "v+";
@@ -290,6 +272,8 @@ int main(int argc, char **argv)
int c;
char *last;
int i;
+ int pf;
+ int sol;
/* quickly append all our info options to the optstring */
last = &optstring[strlen(optstring)];
@@ -318,7 +302,14 @@ int main(int argc, char **argv)
given_options++;
}
- fd = socket(PF_RDS, SOCK_SEQPACKET, 0);
+#ifdef DYNAMIC_PF_RDS
+ pf = discover_pf_rds();
+ sol = discover_sol_rds();
+#else
+ pf = PF_RDS;
+ sol = SOL_RDS;
+#endif
+ fd = socket(pf, SOCK_SEQPACKET, 0);
if (fd < 0) {
verbosef(0, stderr, "%s: Unable to create socket: %s\n",
progname, strerror(errno));
@@ -326,18 +317,20 @@ int main(int argc, char **argv)
}
for (i = 0; i < array_size(infos); i++) {
+ int invalid_opt = 0;
if (!infos[i].opt_val ||
(given_options && !infos[i].option_given))
continue;
/* read in the info until we get a full snapshot */
- while ((each = getsockopt(fd, SOL_RDS, infos[i].opt_val, data,
+ while ((each = getsockopt(fd, sol, infos[i].opt_val, data,
&len)) < 0) {
if (errno != ENOSPC) {
verbosef(0, stderr,
"%s: Unable get statistics: %s\n",
progname, strerror(errno));
- return 1;
+ invalid_opt = 1;
+ break;
}
if (data)
data = realloc(data, len);
@@ -353,6 +346,9 @@ int main(int argc, char **argv)
}
}
+ if (invalid_opt)
+ continue;
+
infos[i].print(data, each, len, infos[i].extra);
if (given_options && --given_options == 0)
diff --git a/rds-ping.c b/rds-ping.c
index e9c88fc..2d0ac19 100644
--- a/rds-ping.c
+++ b/rds-ping.c
@@ -48,11 +48,9 @@
#include <sys/poll.h>
#include <fcntl.h>
#include <getopt.h>
-#include "net/rds.h"
+#include "rds.h"
-#ifdef DYNAMIC_PF_RDS
#include "pfhack.h"
-#endif
#define die(fmt...) do { \
fprintf(stderr, fmt); \
@@ -257,11 +255,17 @@ rds_socket(struct in_addr *src, struct in_addr *dst)
{
struct sockaddr_in sin;
int fd;
+ int pf;
memset(&sin, 0, sizeof(sin));
sin.sin_family = AF_INET;
- fd = socket(PF_RDS, SOCK_SEQPACKET, 0);
+#ifdef DYNAMIC_PF_RDS
+ pf = discover_pf_rds();
+#else
+ pf = PF_RDS;
+#endif
+ fd = socket(pf, SOCK_SEQPACKET, 0);
if (fd < 0)
die_errno("unable to create RDS socket");
@@ -299,6 +303,8 @@ rds_socket(struct in_addr *src, struct in_addr *dst)
static void
usage(const char *complaint)
{
+ fprintf(stderr, "rds-ping version %s\n", RDS_VERSION);
+
fprintf(stderr,
"%s\nUsage: rds-ping [options] dst_addr\n"
"Options:\n"
@@ -378,8 +384,3 @@ parse_addr(const char *ptr, struct in_addr *ret)
return 0;
}
-/*
- * This are completely stupid. options.c should be removed.
- */
-void print_usage(int durr) { }
-void print_version() { }
diff --git a/rds-rdma.7 b/rds-rdma.7
index 20b1575..c09c656 100644
--- a/rds-rdma.7
+++ b/rds-rdma.7
@@ -1,6 +1,6 @@
.TH "RDS zerocopy" 7
.SH NAME
-RDS-rdma \- Zerocopy Interface for RDMA over RDS
+RDS zerocopy \- Interface for RDMA over RDS
.SH DESCRIPTION
This manual page describes the zerocopy interface of RDS, which
was added in RDSv3. For a description of the basic RDS interface,
diff --git a/rds-sink.1 b/rds-sink.1
deleted file mode 100644
index 05c9d73..0000000
--- a/rds-sink.1
+++ /dev/null
@@ -1 +0,0 @@
-.so man1/rds-gen.1
diff --git a/rds-sink.c b/rds-sink.c
deleted file mode 100644
index 2d47ade..0000000
--- a/rds-sink.c
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright (c) 2006 Oracle. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * rds-sink.c: Collect some RDS packets.
- */
-#define _LARGEFILE64_SOURCE
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <sys/socket.h>
-#include <arpa/inet.h>
-#include <unistd.h>
-#include <errno.h>
-#include <inttypes.h>
-
-#include "kernel-list.h"
-#include "rdstool.h"
-
-void print_usage(int rc)
-{
- int namelen = strlen(progname);
- FILE *output = rc ? stderr : stdout;
-
- verbosef(0, output,
- "Usage: %s -s <source_ip>:<source_port>\n"
- " %*s [-f <output_file>] [-i <interval>]\n"
- " %*s [-v ...] [-q ...]\n"
- " %s -h\n"
- " %s -V\n",
- progname, namelen, "", namelen, "", progname, progname);
-
- exit(rc);
-}
-
-void print_version()
-{
- verbosef(0, stdout, "%s version VERSION\n", progname);
-
- exit(0);
-}
-
-static int empty_buff(struct rds_context *ctxt, char *bytes, ssize_t len)
-{
- int ret = 0;
- char *ptr = bytes;
-
- if (!ctxt->rc_filename)
- len = 0; /* Throw it away */
-
- while (len && runningp()) {
- stats_print();
-
- ret = write(STDOUT_FILENO, ptr, len);
- if (!ret) {
- verbosef(0, stderr,
- "%s: Unexpected end of file writing to %s\n",
- progname, ctxt->rc_filename);
- ret = -EPIPE;
- break;
- }
- if (ret < 0) {
- ret = -errno;
- if (ret == -EINTR)
- continue;
-
- verbosef(0, stderr,
- "%s: Error writing to %s: %s\n",
- progname, ctxt->rc_filename,
- strerror(-ret));
- break;
- }
-
- stats_add_write(ret);
- ptr += ret;
- len -= ret;
- ret = 0;
- }
-
- return ret;
-}
-
-static ssize_t recv_buff(struct rds_endpoint *e, struct msghdr *msg,
- int flags)
-{
- ssize_t ret = 0;
-
- while (runningp()) {
- stats_print();
-
- ret = recvmsg(e->re_fd, msg, flags);
- if (ret < 0) {
- ret = -errno;
- if (ret == -EINTR)
- continue;
-
- verbosef(0, stderr,
- "%s: Error from recvmsg: %s\n",
- progname, strerror(-ret));
- }
-
- /* Success */
- break;
- }
-
- return ret;
-}
-
-static int wli_do_recv(struct rds_context *ctxt)
-{
- struct rds_endpoint *e = ctxt->rc_saddr;
- ssize_t alloced = 0;
- ssize_t ret = 0;
- struct iovec iov = {
- .iov_base = NULL,
- };
- struct msghdr msg = {
- .msg_name = &e->re_addr,
- .msg_namelen = sizeof(struct sockaddr_in),
- .msg_iov = &iov,
- .msg_iovlen = 1,
- };
-
- verbosef(2, stderr, "Starting receive loop\n");
-
- stats_start();
-
- while (runningp()) {
- /* Calls stats_print() */
- iov.iov_len = 0;
- ret = recv_buff(e, &msg, MSG_PEEK|MSG_TRUNC);
- if (ret < 0)
- break;
-
- if (ret > alloced) {
- verbosef(3, stderr,
- "Growing buffer to %zd bytes\n",
- ret);
- iov.iov_base = realloc(iov.iov_base, ret);
- if (iov.iov_base == NULL) {
- ret = -ENOMEM;
- break;
- }
- alloced = ret;
- }
-
- /* Calls stats_print() */
- iov.iov_len = ret;
- ret = recv_buff(e, &msg, 0);
- if (ret < 0)
- break;
-
- stats_add_recv(ret);
-
- /* Calls stats_print() */
- ret = empty_buff(ctxt, iov.iov_base, ret);
- if (ret)
- break;
- }
- verbosef(2, stderr, "Stopping receive loop\n");
-
- stats_total();
-
- return ret;
-}
-
-int main(int argc, char *argv[])
-{
- int rc;
- char ipbuf[INET_ADDRSTRLEN];
- struct rds_context ctxt = {
- .rc_filename = "-",
- };
-
-
- INIT_LIST_HEAD(&ctxt.rc_daddrs);
-
- rc = parse_options(argc, argv, RDS_TOOL_BASE_OPTS RDS_SINK_OPTS,
- &ctxt);
- if (rc)
- print_usage(rc);
-
- inet_ntop(PF_INET, &ctxt.rc_saddr->re_addr.sin_addr, ipbuf,
- INET_ADDRSTRLEN);
- verbosef(2, stderr, "Binding endpoint %s:%d\n",
- ipbuf, ntohs(ctxt.rc_saddr->re_addr.sin_port));
-
- rc = rds_bind(&ctxt);
- if (rc)
- goto out;
-
- if (ctxt.rc_filename) {
- rc = dup_file(&ctxt, STDOUT_FILENO, O_CREAT|O_WRONLY);
- if (rc)
- goto out;
- if (!strcmp(ctxt.rc_filename, "-"))
- ctxt.rc_filename = "<standard output>";
- }
-
- setup_signals();
- if (rc) {
- verbosef(0, stderr, "%s: Unable to initialize signals\n",
- progname);
- goto out;
- }
-
- rc = wli_do_recv(&ctxt);
-
-out:
- free(ctxt.rc_saddr->re_name);
- free(ctxt.rc_saddr);
-
- return rc;
-}
diff --git a/rds-stress.c b/rds-stress.c
index dfe68c5..9f0623c 100644
--- a/rds-stress.c
+++ b/rds-stress.c
@@ -23,11 +23,9 @@
#include <sched.h>
#include <getopt.h>
#include <byteswap.h>
-#include "net/rds.h"
+#include "rds.h"
-#ifdef DYNAMIC_PF_RDS
#include "pfhack.h"
-#endif
/*
*
@@ -42,6 +40,12 @@
* - final stats summary page
*/
+enum {
+ M_RDMA_READWRITE = 0,
+ M_RDMA_READ_ONLY,
+ M_RDMA_WRITE_ONLY
+};
+
struct options {
uint32_t req_depth;
uint32_t req_size;
@@ -65,7 +69,9 @@ struct options {
uint8_t rdma_cache_mrs;
uint8_t rdma_key_o_meter;
uint8_t suppress_warnings;
-
+ uint8_t simplex;
+ uint8_t rw_mode;
+ uint32_t rdma_vector;
uint32_t rdma_alignment;
uint32_t connect_retries;
} __attribute__((packed));
@@ -87,6 +93,8 @@ enum {
S_ACK_RX_BYTES,
S_RDMA_WRITE_BYTES,
S_RDMA_READ_BYTES,
+ S_MBUS_IN_BYTES,
+ S_MBUS_OUT_BYTES,
S_SENDMSG_USECS,
S_RTT_USECS,
S__LAST
@@ -152,6 +160,7 @@ struct header {
uint64_t rdma_pattern;
uint64_t rdma_key;
uint32_t rdma_size;
+ uint32_t rdma_vector;
uint8_t data[0];
} __attribute__((packed));
@@ -188,6 +197,10 @@ static unsigned long sys_page_size;
*/
#define ptr64(p) ((unsigned long) (p))
+/* need vars as long as dynamic vals are possible */
+int pf = PF_RDS;
+int sol = SOL_RDS;
+
/* zero is undefined */
static inline uint64_t minz(uint64_t a, uint64_t b)
{
@@ -244,7 +257,9 @@ static uint32_t parse_addr(char *ptr)
static void usage(void)
{
- printf(
+ fprintf(stderr, "rds-stress version %s\n", RDS_VERSION);
+
+ fprintf(stderr,
"\n"
"Send & Recv parameters:\n"
" -r [addr] use this local address\n"
@@ -254,10 +269,13 @@ static void usage(void)
" -s [addr] send to this address (required)\n"
" -a [bytes, %u] ack message length\n"
" -q [bytes, 1024] request message length\n"
+ " -o datagrams sent one way only (default is both)\n"
" -d [depth, 1] request pipeline depth, nr outstanding\n"
" -t [nr, 1] number of child tasks\n"
" -T [seconds, 0] runtime of test, 0 means infinite\n"
- " -D [bytes] RDMA size (RDSv3 only)\n"
+ " -D [bytes] RDMA: size\n"
+ " -I [iovecs, 1] RDMA: number of user buffers to target (max 512)\n"
+ " -M [nr, 0] RDMA: mode (0=readwrite,1=readonly,2=writeonly)\n"
"\n"
"Optional flags:\n"
" -c measure cpu use with per-cpu soak processes\n"
@@ -342,6 +360,7 @@ static void encode_hdr(struct header *dst, const struct header *hdr)
dst->rdma_pattern = htonll(hdr->rdma_pattern);
dst->rdma_key = htonll(hdr->rdma_key);
dst->rdma_size = htonl(hdr->rdma_size);
+ dst->rdma_vector = htonl(hdr->rdma_vector);
}
static void decode_hdr(struct header *dst, const struct header *hdr)
@@ -362,6 +381,7 @@ static void decode_hdr(struct header *dst, const struct header *hdr)
dst->rdma_pattern = ntohll(hdr->rdma_pattern);
dst->rdma_key = ntohll(hdr->rdma_key);
dst->rdma_size = ntohl(hdr->rdma_size);
+ dst->rdma_vector = ntohl(hdr->rdma_vector);
}
static void fill_hdr(void *message, uint32_t bytes, struct header *hdr)
@@ -480,14 +500,14 @@ static int bound_socket(int domain, int type, int protocol,
struct sockaddr_in *sin)
{
int fd;
- int opt;
+ int sockopt;
fd = socket(domain, type, protocol);
if (fd < 0)
die_errno("socket(%d, %d, %d) failed", domain, type, protocol);
- opt = 1;
- if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)))
+ sockopt = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &sockopt, sizeof(sockopt)))
die_errno("setsockopt(SO_REUSEADDR) failed");
if (bind(fd, (struct sockaddr *)sin, sizeof(struct sockaddr_in)))
@@ -512,7 +532,7 @@ static int rds_socket(struct options *opts, struct sockaddr_in *sin)
int val;
socklen_t optlen;
- fd = bound_socket(PF_RDS, SOCK_SEQPACKET, 0, sin);
+ fd = bound_socket(pf, SOCK_SEQPACKET, 0, sin);
bytes = opts->nr_tasks * opts->req_depth *
(opts->req_size + opts->ack_size) * 2;
@@ -540,7 +560,7 @@ static int rds_socket(struct options *opts, struct sockaddr_in *sin)
val = 1;
if (opts->use_cong_monitor
- && setsockopt(fd, SOL_RDS, RDS_CONG_MONITOR, &val, sizeof(val))) {
+ && setsockopt(fd, sol, RDS_CONG_MONITOR, &val, sizeof(val))) {
if (errno != ENOPROTOOPT)
die_errno("setsockopt(RDS_CONG_MONITOR) failed");
printf("Kernel does not support congestion monitoring; disabled\n");
@@ -568,10 +588,10 @@ static int check_rdma_support(struct options *opts)
sin.sin_port = htons(opts->starting_port);
sin.sin_addr.s_addr = htonl(opts->receive_addr);
- fd = bound_socket(AF_RDS, SOCK_SEQPACKET, 0, &sin);
+ fd = bound_socket(pf, SOCK_SEQPACKET, 0, &sin);
memset(&args, 0, sizeof(args));
- if (setsockopt(fd, SOL_RDS, RDS_FREE_MR, &args, sizeof(args)) >= 0) {
+ if (setsockopt(fd, sol, RDS_FREE_MR, &args, sizeof(args)) >= 0) {
okay = 1;
} else if (errno == ENOPROTOOPT) {
okay = 0;
@@ -596,7 +616,7 @@ static uint64_t get_rdma_key(int fd, uint64_t addr, uint32_t size)
if (opt.rdma_use_once)
mr_args.flags |= RDS_RDMA_USE_ONCE;
- if (setsockopt(fd, SOL_RDS, RDS_GET_MR, &mr_args, sizeof(mr_args)))
+ if (setsockopt(fd, sol, RDS_GET_MR, &mr_args, sizeof(mr_args)))
die_errno("setsockopt(RDS_GET_MR) failed (%u allocated)", mrs_allocated);
trace("RDS get_rdma_key() = %Lx\n",
@@ -618,7 +638,7 @@ static void free_rdma_key(int fd, uint64_t key)
#else
mr_args.flags = RDS_FREE_MR_ARGS_INVALIDATE;
#endif
- if (setsockopt(fd, SOL_RDS, RDS_FREE_MR, &mr_args, sizeof(mr_args)))
+ if (setsockopt(fd, sol, RDS_FREE_MR, &mr_args, sizeof(mr_args)))
die_errno("setsockopt(RDS_FREE_MR) failed");
mrs_allocated--;
}
@@ -844,7 +864,7 @@ static void alloc_rdma_buffers(struct task *t, struct options *opts)
/* We use mmap here rather than malloc, because it is always
* page aligned. */
- len = 2 * opts->nr_tasks * opts->req_depth * opts->rdma_size + sys_page_size;
+ len = 2 * opts->nr_tasks * opts->req_depth * (opts->rdma_vector * opts->rdma_size) + sys_page_size;
base = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
if (base == MAP_FAILED)
die_errno("alloc_rdma_buffers: mmap failed");
@@ -854,10 +874,10 @@ static void alloc_rdma_buffers(struct task *t, struct options *opts)
for (i = 0; i < opts->nr_tasks; ++i, ++t) {
for (j = 0; j < opts->req_depth; ++j) {
t->rdma_buf[j] = (uint64_t *) base;
- base += opts->rdma_size;
+ base += opts->rdma_size * opts->rdma_vector;
t->local_buf[j] = (uint64_t *) base;
- base += opts->rdma_size;
+ base += opts->rdma_size * opts->rdma_vector;
t->rdma_req_key[j] = 0;
t->rdma_inflight[j] = 0;
@@ -866,7 +886,7 @@ static void alloc_rdma_buffers(struct task *t, struct options *opts)
}
static void rdma_build_req(int fd, struct header *hdr, struct task *t,
- unsigned int rdma_size, unsigned int req_depth)
+ unsigned int rdma_size, unsigned int req_depth, int rw_mode, int rdma_vector)
{
uint64_t *rdma_addr, *rdma_key_p;
@@ -874,17 +894,23 @@ static void rdma_build_req(int fd, struct header *hdr, struct task *t,
rdma_key_p = &t->rdma_req_key[t->send_index];
if (opt.rdma_use_get_mr && *rdma_key_p == 0)
- *rdma_key_p = get_rdma_key(fd, ptr64(rdma_addr), rdma_size);
+ *rdma_key_p = get_rdma_key(fd, ptr64(rdma_addr), rdma_size * rdma_vector);
/* We alternate between RDMA READ and WRITEs */
hdr->rdma_op = t->rdma_next_op;
- t->rdma_next_op = RDMA_OP_TOGGLE(t->rdma_next_op);
+ if (M_RDMA_READWRITE == rw_mode)
+ t->rdma_next_op = RDMA_OP_TOGGLE(t->rdma_next_op);
+ else if (M_RDMA_READ_ONLY == rw_mode)
+ t->rdma_next_op = RDMA_OP_READ;
+ else
+ t->rdma_next_op = RDMA_OP_WRITE;
hdr->rdma_pattern = (((uint64_t) t->send_seq) << 32) | getpid();
hdr->rdma_addr = ptr64(rdma_addr);
hdr->rdma_phyaddr = 0;
hdr->rdma_size = rdma_size;
hdr->rdma_key = *rdma_key_p;
+ hdr->rdma_vector = rdma_vector;
if (RDMA_OP_READ == hdr->rdma_op) {
if (opt.verify)
@@ -906,11 +932,17 @@ static void rdma_build_req(int fd, struct header *hdr, struct task *t,
static void rdma_validate(const struct header *in_hdr, struct options *opts)
{
unsigned long rdma_size;
+ unsigned long rdma_vector;
rdma_size = in_hdr->rdma_size;
if (rdma_size != opts->rdma_size)
die("Unexpected RDMA size %lu in request\n", rdma_size);
+ rdma_vector = in_hdr->rdma_vector;
+ if (rdma_vector != opts->rdma_vector)
+ die("Unexpected RDMA vector %lu in request %u \n", rdma_vector, opts->rdma_vector);
+
+
if (in_hdr->rdma_op != RDMA_OP_READ && in_hdr->rdma_op != RDMA_OP_WRITE)
die("Unexpected RDMA op %u in request\n", in_hdr->rdma_op);
@@ -931,6 +963,7 @@ static void rdma_build_ack(struct header *hdr, const struct header *in_hdr)
hdr->rdma_phyaddr = in_hdr->rdma_phyaddr; /* remote's address to rdma to / from */
hdr->rdma_addr = in_hdr->rdma_addr; /* remote's address to rdma to / from */
hdr->rdma_pattern = in_hdr->rdma_pattern;
+ hdr->rdma_vector = in_hdr->rdma_vector;
}
static inline unsigned int rdma_user_token(struct task *t, unsigned int qindex)
@@ -989,7 +1022,7 @@ static void rdma_put_cmsg(struct msghdr *msg, int type,
msg->msg_controllen = CMSG_SPACE(size);
cmsg = CMSG_FIRSTHDR(msg);
- cmsg->cmsg_level = SOL_RDS;
+ cmsg->cmsg_level = sol;
cmsg->cmsg_type = type;
cmsg->cmsg_len = CMSG_LEN(size);
memcpy(CMSG_DATA(cmsg), ptr, size);
@@ -1003,31 +1036,41 @@ static void rdma_put_cmsg(struct msghdr *msg, int type,
static void rdma_build_cmsg_xfer(struct msghdr *msg, const struct header *hdr,
unsigned int user_token, void *local_buf)
{
- static struct rds_iovec iov;
+
+#define RDS_MAX_IOV 512 /* FIX_ME - put this into rds.h or use socket max ?*/
+
+ static struct rds_iovec iov[RDS_MAX_IOV];
struct rds_rdma_args args;
unsigned int rdma_size;
+ unsigned int rdma_vector;
+ unsigned int v;
rdma_size = hdr->rdma_size;
+ rdma_vector = hdr->rdma_vector;
- trace("RDS issuing rdma for token %x key %Lx len %u local_buf %p\n",
+ trace("RDS issuing rdma for token %x key %Lx len %u local_buf %p vector %u\n",
user_token,
(unsigned long long) hdr->rdma_key,
- rdma_size, local_buf);
+ rdma_size, local_buf,
+ rdma_vector);
/* rdma args */
memset(&args, 0, sizeof(args));
/* Set up the iovec pointing to the RDMA buffer */
- args.local_vec_addr = (uint64_t) &iov;
- args.nr_local = 1;
- iov.addr = ptr64(local_buf);
- iov.bytes = rdma_size;
+ args.local_vec_addr = (uint64_t) iov;
+ args.nr_local = rdma_vector;
+
+ for (v = 0; v < rdma_vector; v++) {
+ iov[v].addr = ptr64((local_buf + (rdma_size * v)));
+ iov[v].bytes = rdma_size;
+ }
/* The remote could either give us a physical address, or
* an index into a zero-based FMR. Either way, we just copy it.
*/
args.remote_vec.addr = hdr->rdma_phyaddr;
- args.remote_vec.bytes = rdma_size;
+ args.remote_vec.bytes = rdma_size * rdma_vector;
args.cookie = hdr->rdma_key;
/* read or write */
@@ -1045,7 +1088,12 @@ static void rdma_build_cmsg_xfer(struct msghdr *msg, const struct header *hdr,
}
/* Fence off subsequent SENDs - this is the default */
- if (opt.rdma_use_fence)
+ /* for rdma read operations. We force this fence to control */
+ /* order of remote immediate data rm completion */
+ /* If we do not use a fence the immediate send rm can complete */
+ /* before rdma data arrives.. */
+
+ if (!(args.flags & RDS_RDMA_READWRITE) && opt.rdma_use_fence)
args.flags |= RDS_RDMA_FENCE;
args.flags |= RDS_RDMA_NOTIFY_ME;
@@ -1096,7 +1144,7 @@ static void rdma_process_ack(int fd, struct header *hdr,
/* remote node wrote local buffer check pattern
* sent via immediate data in rdma buffer
*/
- stat_inc(&ctl->cur[S_RDMA_READ_BYTES], hdr->rdma_size);
+ stat_inc(&ctl->cur[S_MBUS_IN_BYTES], hdr->rdma_size);
if (opt.verify) {
/* This funny looking cast avoids compile warnings
@@ -1108,7 +1156,7 @@ static void rdma_process_ack(int fd, struct header *hdr,
break;
case RDMA_OP_READ:
- stat_inc(&ctl->cur[S_RDMA_WRITE_BYTES], hdr->rdma_size);
+ stat_inc(&ctl->cur[S_MBUS_OUT_BYTES], hdr->rdma_size);
break;
}
}
@@ -1163,7 +1211,8 @@ static int send_packet(int fd, struct task *t,
/* Use the RDMA_MAP cmsg to have sendmsg do the
* mapping on the fly. */
rdma_build_cmsg_map(&msg, hdr->rdma_addr,
- hdr->rdma_size, &cookie);
+ hdr->rdma_size * hdr->rdma_vector,
+ &cookie);
}
}
@@ -1224,7 +1273,9 @@ static int send_one(int fd, struct task *t,
if (opts->rdma_size && t->send_seq > 10)
rdma_build_req(fd, &hdr, t,
opts->rdma_size,
- opts->req_depth);
+ opts->req_depth,
+ opts->rw_mode,
+ opts->rdma_vector);
gettimeofday(&start, NULL);
@@ -1265,11 +1316,11 @@ static int send_ack(int fd, struct task *t, unsigned int qindex,
/* need separate rdma stats cells for send/recv */
switch (hdr->rdma_op) {
case RDMA_OP_WRITE:
- stat_inc(&ctl->cur[S_RDMA_WRITE_BYTES], opts->rdma_size);
+ stat_inc(&ctl->cur[S_MBUS_OUT_BYTES], opts->rdma_size);
break;
case RDMA_OP_READ:
- stat_inc(&ctl->cur[S_RDMA_READ_BYTES], opts->rdma_size);
+ stat_inc(&ctl->cur[S_MBUS_IN_BYTES], opts->rdma_size);
break;
}
@@ -1301,11 +1352,11 @@ eagain:
static int send_anything(int fd, struct task *t,
struct options *opts,
struct child_control *ctl,
- int can_send)
+ int can_send, int do_work)
{
if (ack_anything(fd, t, opts, ctl, can_send) < 0)
return -1;
- while (t->pending < opts->req_depth) {
+ while (do_work && t->pending < opts->req_depth) {
if (!can_send)
goto eagain;
if (send_one(fd, t, opts, ctl) < 0)
@@ -1357,7 +1408,7 @@ static int recv_message(int fd,
for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
struct rds_rdma_notify notify;
- if (cmsg->cmsg_level != SOL_RDS)
+ if (cmsg->cmsg_level != sol)
continue;
switch (cmsg->cmsg_type) {
case RDS_CMSG_CONG_UPDATE:
@@ -1498,7 +1549,7 @@ static int recv_one(int fd, struct task *tasks,
}
static void run_child(pid_t parent_pid, struct child_control *ctl,
- struct options *opts, uint16_t id)
+ struct options *opts, uint16_t id, int active)
{
struct sockaddr_in sin;
struct pollfd pfd;
@@ -1507,6 +1558,7 @@ static void run_child(pid_t parent_pid, struct child_control *ctl,
ssize_t ret;
struct task tasks[opts->nr_tasks];
struct timeval start;
+ int do_work = opts->simplex ? active : 1;
sin.sin_family = AF_INET;
sin.sin_port = htons(opts->starting_port + 1 + id);
@@ -1515,6 +1567,7 @@ static void run_child(pid_t parent_pid, struct child_control *ctl,
/* give main display thread a little edge? */
nice(5);
+ /* send to *all* remote tasks */
memset(tasks, 0, sizeof(tasks));
for (i = 0; i < opts->nr_tasks; i++) {
tasks[i].nr = i;
@@ -1579,7 +1632,7 @@ static void run_child(pid_t parent_pid, struct child_control *ctl,
continue;
if (t->drain_rdmas)
continue;
- if (send_anything(fd, t, opts, ctl, can_send) < 0) {
+ if (send_anything(fd, t, opts, ctl, can_send, do_work) < 0) {
pfd.events |= POLLOUT;
/* If the send queue is full, we will see EAGAIN.
@@ -1603,7 +1656,7 @@ static void run_child(pid_t parent_pid, struct child_control *ctl,
}
}
-static struct child_control *start_children(struct options *opts)
+static struct child_control *start_children(struct options *opts, int active)
{
struct child_control *ctl;
pid_t parent = getpid();
@@ -1635,7 +1688,7 @@ static struct child_control *start_children(struct options *opts)
control_fd = -1;
}
rdma_key_o_meter_set_self(i);
- run_child(parent, ctl + i, opts, i);
+ run_child(parent, ctl + i, opts, i, active);
exit(0);
}
ctl[i].pid = pid;
@@ -1668,9 +1721,14 @@ static double throughput(struct counter *disp)
disp[S_ACK_TX_BYTES].sum + disp[S_ACK_RX_BYTES].sum;
}
-static double throughput_rdma(struct counter *disp)
+static double throughput_mbi(struct counter *disp)
{
- return disp[S_RDMA_WRITE_BYTES].sum + disp[S_RDMA_READ_BYTES].sum;
+ return disp[S_MBUS_IN_BYTES].sum;
+}
+
+static double throughput_mbo(struct counter *disp)
+{
+ return disp[S_MBUS_OUT_BYTES].sum;
}
void stat_snapshot(struct counter *disp, struct child_control *ctl,
@@ -1847,14 +1905,14 @@ get_perfdata(int initialize)
int i, count, item_size;
if (sock_fd < 0) {
- sock_fd = socket(PF_RDS, SOCK_SEQPACKET, 0);
+ sock_fd = socket(pf, SOCK_SEQPACKET, 0);
if (sock_fd < 0)
die_errno("Unable to create socket");
}
/* We should only loop once on the first call; after that the
* buffer requirements for RDS counters should not change. */
- while ((item_size = getsockopt(sock_fd, SOL_RDS, RDS_INFO_COUNTERS, curr, &buflen)) < 0) {
+ while ((item_size = getsockopt(sock_fd, sol, RDS_INFO_COUNTERS, curr, &buflen)) < 0) {
if (errno != ENOSPC)
die_errno("getsockopt(RDS_INFO_COUNTERS) failed");
curr = realloc(curr, buflen);
@@ -1992,9 +2050,9 @@ static void release_children_and_wait(struct options *opts,
get_perfdata(1);
printf("\n");
} else {
- printf("%4s %6s %10s %10s %7s %8s %5s\n",
- "tsks", "tx/s", "tx+rx K/s", "rw+rr K/s",
- "tx us/c", "rtt us", "cpu %");
+ printf("%4s %6s %6s %10s %10s %10s %7s %8s %5s\n",
+ "tsks", "tx/s", "rx/s", "tx+rx K/s", "mbi K/s",
+ "mbo K/s", "tx us/c", "rtt us", "cpu %");
}
last_ts = first_ts;
@@ -2028,11 +2086,13 @@ static void release_children_and_wait(struct options *opts,
scale = 1e6 / usec_sub(&now, &last_ts);
if (!opt.show_perfdata) {
- printf("%4u %6"PRIu64" %10.2f %10.2f %7.2f %8.2f %5.2f\n",
+ printf("%4u %6"PRIu64" %6"PRIu64" %10.2f %10.2f %10.2f %7.2f %8.2f %5.2f\n",
nr_running,
disp[S_REQ_TX_BYTES].nr,
+ disp[S_REQ_RX_BYTES].nr,
scale * throughput(disp) / 1024.0,
- scale * throughput_rdma(disp) / 1024.0,
+ scale * throughput_mbi(disp) / 1024.0,
+ scale * throughput_mbo(disp) / 1024.0,
scale * avg(&disp[S_SENDMSG_USECS]),
scale * avg(&disp[S_RTT_USECS]),
scale * cpu);
@@ -2042,10 +2102,11 @@ static void release_children_and_wait(struct options *opts,
opts->nr_tasks, opts->req_size,
opts->ack_size, opts->rdma_size);
- printf("%Lu,%f,%f,%f,%f,%f",
+ printf("%Lu,%f,%f,%f,%f,%f,%f",
(unsigned long long) disp[S_REQ_TX_BYTES].nr,
scale * throughput(disp) / 1024.0,
- scale * throughput_rdma(disp) / 1024.0,
+ scale * throughput_mbi(disp) / 1024.0,
+ scale * throughput_mbo(disp) / 1024.0,
scale * avg(&disp[S_SENDMSG_USECS]),
scale * avg(&disp[S_RTT_USECS]),
cpu >= 0? scale * cpu : 0);
@@ -2096,11 +2157,13 @@ static void release_children_and_wait(struct options *opts,
scale = 1e6 / usec_sub(&last_ts, &first_ts);
- printf("%4u %6lu %10.2f %10.2f %7.2f %8.2f %5.2f (average)\n",
+ printf("%4u %6lu %6lu %10.2f %10.2f %10.2f %7.2f %8.2f %5.2f (average)\n",
opts->nr_tasks,
(long) (scale * summary[S_REQ_TX_BYTES].nr),
+ (long) (scale * summary[S_REQ_RX_BYTES].nr),
scale * throughput(summary) / 1024.0,
- scale * throughput_rdma(disp) / 1024.0,
+ scale * throughput_mbi(summary) / 1024.0,
+ scale * throughput_mbo(summary) / 1024.0,
avg(&summary[S_SENDMSG_USECS]),
avg(&summary[S_RTT_USECS]),
soak_arr? scale * cpu_total : -1.0);
@@ -2196,6 +2259,9 @@ static void encode_options(struct options *dst, const struct options *src)
dst->connect_retries = htonl(src->connect_retries);
dst->suppress_warnings = src->suppress_warnings;/* byte sized */
+ dst->simplex = src->simplex; /* byte sized */
+ dst->rw_mode = src->rw_mode; /* byte sized */
+ dst->rdma_vector = htonl(src->rdma_vector);
}
static void decode_options(struct options *dst, const struct options *src)
@@ -2226,6 +2292,9 @@ static void decode_options(struct options *dst, const struct options *src)
dst->connect_retries = ntohl(src->connect_retries);
dst->suppress_warnings = src->suppress_warnings;/* byte sized */
+ dst->simplex = src->simplex; /* byte sized */
+ dst->rw_mode = src->rw_mode; /* byte sized */
+ dst->rdma_vector = ntohl(src->rdma_vector);
}
static void verify_option_encdec(const struct options *opts)
@@ -2296,7 +2365,7 @@ static int active_parent(struct options *opts, struct soak_control *soak_arr)
verify_option_encdec(opts);
sin.sin_family = AF_INET;
- sin.sin_port = htons(opts->starting_port);
+ sin.sin_port = htons(0);
sin.sin_addr.s_addr = htonl(opts->receive_addr);
fd = bound_socket(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sin);
@@ -2321,7 +2390,7 @@ static int active_parent(struct options *opts, struct soak_control *soak_arr)
peer_send(fd, &enc_options, sizeof(struct options));
printf("negotiated options, tasks will start in 2 seconds\n");
- ctl = start_children(opts);
+ ctl = start_children(opts, 1);
/* Tell the peer to start up. This is necessary when testing
* with a large number of tasks, because otherwise the peer
@@ -2351,6 +2420,8 @@ static int passive_parent(uint32_t addr, uint16_t port,
lfd = bound_socket(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sin);
+ printf("waiting for incoming connection on %s:%d\n", inet_ntoa(sin.sin_addr), port);
+
if (listen(lfd, 255))
die_errno("listen() failed");
@@ -2386,7 +2457,7 @@ static int passive_parent(uint32_t addr, uint16_t port,
opts->receive_addr = addr;
opt = *opts;
- ctl = start_children(opts);
+ ctl = start_children(opts, 0);
/* Wait for "GO" from the initiating peer */
peer_recv(fd, &ok, sizeof(ok));
@@ -2481,10 +2552,10 @@ void stop_soakers(struct soak_control *soak_arr)
}
void check_size(uint32_t size, uint32_t unspec, uint32_t max, char *desc,
- char *opt)
+ char *option)
{
if (size == ~0)
- die("specify %s with %s\n", desc, opt);
+ die("specify %s with %s\n", desc, option);
if (size < max)
die("%s must be at least %u bytes\n", desc, max);
}
@@ -2540,9 +2611,8 @@ int main(int argc, char **argv)
struct soak_control *soak_arr = NULL;
#ifdef DYNAMIC_PF_RDS
- /* Discover PF_RDS/SOL_RDS once, and be done with it */
- (void) discover_pf_rds();
- (void) discover_sol_rds();
+ pf = discover_pf_rds();
+ sol = discover_sol_rds();
#endif
#ifdef _SC_PAGESIZE
@@ -2575,11 +2645,14 @@ int main(int argc, char **argv)
opts.show_params = 0;
opts.connect_retries = 0;
opts.show_perfdata = 0;
+ opts.simplex = 0;
+ opts.rw_mode = 0;
+ opts.rdma_vector = 1;
while(1) {
int c, index;
- c = getopt_long(argc, argv, "+a:cD:d:hp:q:Rr:s:t:T:vVz",
+ c = getopt_long(argc, argv, "+a:cD:d:hI:M:op:q:Rr:s:t:T:vVz",
long_options, &index);
if (c == -1)
break;
@@ -2597,6 +2670,15 @@ int main(int argc, char **argv)
case 'd':
opts.req_depth = parse_ull(optarg,(uint32_t)~0);
break;
+ case 'I':
+ opts.rdma_vector = parse_ull(optarg,512);
+ break;
+ case 'M':
+ opts.rw_mode = parse_ull(optarg,2);
+ break;
+ case 'o':
+ opts.simplex = 1;
+ break;
case 'p':
opts.starting_port = parse_ull(optarg,
(uint16_t)~0);
@@ -2708,8 +2790,3 @@ int main(int argc, char **argv)
return active_parent(&opts, soak_arr);
}
-/*
- * This are completely stupid. options.c should be removed.
- */
-void print_usage(int durr) { }
-void print_version() { }
diff --git a/rds-tools.spec b/rds-tools.spec
index e49a728..b38c06d 100644
--- a/rds-tools.spec
+++ b/rds-tools.spec
@@ -1,18 +1,27 @@
Summary: RDS support tools
Name: rds-tools
-Version: 1.4
+Version: 2.0.4
Release: 1
License: GPL/BSD
Group: Applications/Internet
URL: http://oss.oracle.com/projects/rds/
-Source: rds-tools-%{version}-%{release}.tar.gz
-BuildRoot: /var/tmp/rds-tools-%{version}-%{release}
+Source: rds-tools-%{version}.tar.gz
+BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot
%description
rds-tools is a collection of support tools for the RDS socket API.
+It includes rds-stress, rds-info, and rds-ping.
+
+%package -n rds-devel
+Summary: Header files for RDS development
+Group: Development/Libraries
+
+%description -n rds-devel
+Header file and manpages for rds and rds-rdma that describe
+how to use the socket interface.
%prep
-%setup -n rds-tools-%{version}-%{release}
+%setup -q
%build
%configure
@@ -20,7 +29,7 @@ make %{?_smp_mflags}
%install
rm -rf $RPM_BUILD_ROOT
-make DESTDIR=$RPM_BUILD_ROOT install
+%makeinstall DESTDIR=$RPM_BUILD_ROOT
%clean
rm -rf $RPM_BUILD_ROOT
@@ -28,8 +37,12 @@ rm -rf $RPM_BUILD_ROOT
%files
%defattr(-,root,root)
%{_bindir}/*
-%{_mandir}/*
+%{_mandir}/man1/*
+
+%files -n rds-devel
%{_includedir}/*
+%{_mandir}/man7/*
+%doc docs examples
%changelog
* Sun Nov 25 2007 Vladimir Sokolovsky <vlad at mellanox.co.il>
diff --git a/rds-tools.spec.in b/rds-tools.spec.in
index 6dd8f32..e3d4634 100644
--- a/rds-tools.spec.in
+++ b/rds-tools.spec.in
@@ -5,14 +5,23 @@ Release: @RELEASE@
License: GPL/BSD
Group: Applications/Internet
URL: http://oss.oracle.com/projects/rds/
-Source: rds-tools-%{version}-%{release}.tar.gz
-BuildRoot: /var/tmp/rds-tools-%{version}-%{release}
+Source: rds-tools-%{version}.tar.gz
+BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot
%description
rds-tools is a collection of support tools for the RDS socket API.
+It includes rds-stress, rds-info, and rds-ping.
+
+%package -n rds-devel
+Summary: Header files for RDS development
+Group: Development/Libraries
+
+%description -n rds-devel
+Header file and manpages for rds and rds-rdma that describe
+how to use the socket interface.
%prep
-%setup -n rds-tools-%{version}-%{release}
+%setup -q
%build
%configure
@@ -20,7 +29,7 @@ make %{?_smp_mflags}
%install
rm -rf $RPM_BUILD_ROOT
-make DESTDIR=$RPM_BUILD_ROOT install
+%makeinstall DESTDIR=$RPM_BUILD_ROOT
%clean
rm -rf $RPM_BUILD_ROOT
@@ -28,8 +37,12 @@ rm -rf $RPM_BUILD_ROOT
%files
%defattr(-,root,root)
%{_bindir}/*
-%{_mandir}/*
+%{_mandir}/man1/*
+
+%files -n rds-devel
%{_includedir}/*
+%{_mandir}/man7/*
+%doc docs examples
%changelog
* Sun Nov 25 2007 Vladimir Sokolovsky <vlad at mellanox.co.il>
diff --git a/rds-tools.txt b/rds-tools.txt
deleted file mode 100644
index 2dac8b4..0000000
--- a/rds-tools.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-
-
-So, rds-get-stats is easy and I already have it done. we'd just import
-that.
-
-rds-gen would just send down a socket. I'm hoping for options like:
-
- -s addr:port
- to bind the source address
- -d addr:port
- dest to send to, maybe just round-robin between multiple to
- start?
- -m units
- the size of each sent message
- -b units
- the size of the socket buffer
- -5
- include an md5sum at the tail of each message
- -f file
- read from a file until eof
- -p units
- send from a memory pool of the given length
- -S file
- put the -p pool in this mmaped/mlocked file, use sendfile
- -l units
- only send this many bytes total
- -i timespec
- output vmstat-like line at this interval
-
-I guess that gives us enough to chew on for now :) I want this stuff to
-be dirt simple. trivial arg parser helpers, maybe some list.h from the
-kernel, no glib complexity explosion. I guess I could send you some
-snippets of code along those lines.
-
-Oh, and I guess we'll need a little helper amongst the tools to get
-pf_rds and sol_rds from /proc/sys/net/rds/.
-
-- z
-
diff --git a/rdstool.h b/rdstool.h
deleted file mode 100644
index cc7988f..0000000
--- a/rdstool.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2006 Oracle. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * tools header stuff
- */
-
-#ifndef __RDS_TOOL_H
-#define __RDS_TOOL_H
-
-#include <netinet/in.h>
-
-#include "kernel-list.h"
-#include "pfhack.h"
-
-#ifndef AF_RDS
-# define AF_RDS OFFICIAL_PF_RDS
-#endif
-#ifndef PF_RDS
-# define PF_RDS AF_RDS
-#endif
-#ifndef SOL_RDS
-# define SOL_RDS OFFICIAL_SOL_RDS
-#endif
-
-#define RDS_TOOL_BASE_OPTS ":s:m:f:i:-:vqhV"
-#define RDS_SINK_OPTS
-#define RDS_GEN_OPTS "d:b:l:"
-
-#define RDS_DEFAULT_MSG_SIZE 4096
-
-#define verbosef(lvl, f, fmt, a...) do { \
- if (verbose >= (lvl)) \
- fprintf((f), fmt, ##a); \
-} while (0)
-
-struct rds_endpoint {
- struct list_head re_item;
- char *re_name;
- struct sockaddr_in re_addr;
- int re_fd;
-};
-
-struct rds_context {
- struct rds_endpoint *rc_saddr;
- struct list_head rc_daddrs;
- const char *rc_filename;
- uint32_t rc_msgsize;
- uint64_t rc_total;
-};
-
-/* Set by parse_options() */
-extern char *progname;
-extern unsigned int verbose;
-
-extern int parse_options(int argc, char *argv[], const char *opts,
- struct rds_context *ctxt);
-extern int rds_bind(struct rds_context *ctxt);
-extern int dup_file(struct rds_context *ctxt, int fd, int flags);
-extern int setup_signals(void);
-extern int runningp(void);
-
-/* stats.c */
-extern int stats_init(int delay);
-extern void stats_extended(int extendedp);
-extern void stats_start(void);
-extern void stats_print(void);
-extern void stats_total(void);
-
-extern void stats_add_recv(uint64_t bytes);
-extern void stats_add_send(uint64_t bytes);
-extern uint64_t stats_get_send(void);
-extern void stats_add_read(uint64_t bytes);
-extern void stats_add_write(uint64_t bytes);
-
-
-/* Provided by C files with main() */
-extern void print_usage(int rc);
-extern void print_version(void);
-#endif /* __RDS_TOOL_H */
diff --git a/stap/README b/stap/README
deleted file mode 100644
index d74f0fb..0000000
--- a/stap/README
+++ /dev/null
@@ -1,15 +0,0 @@
-SystemTap script for RDS
-
-SystemTap: http://sourceware.org/systemtap/
-SystemTap wiki: http://sourceware.org/systemtap/wiki
-
-To use SystemTap for tracing RDS, please ensure you
-have debugging symbols available for both your installed kernel
-as well as RDS module. These usually take the form of
-*-debuginfo RPMs, and may be available via your distro's
-update repository, a distro repository disabled by
-default, or via your distro's website.
-
-Please send any comments or improvement patches to
-rds-devel at oss.oracle.com.
-
diff --git a/stap/rds.stp b/stap/rds.stp
deleted file mode 100644
index 4baaf0e..0000000
--- a/stap/rds.stp
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
-probe module("rds").function("rds_*")
-{
- printf("RDS %s\n", pp())
-}
-*/
-
-global reads
-
-probe begin
-{
- reads <<< 0
-}
-
-probe module("rds").function("rds_recvmsg").return
-{
- reads <<< ret
-}
-
-probe module("rds").function("rds_send_pong").return
-{
- reads <<< ret
- //println(caller())
-}
-
-probe timer.sec(5)
-{
- println("RDS bytes received")
- print(@hist_log(reads))
-}
-
-probe end
-{
- printf("end!\n")
-}
diff --git a/stats.c b/stats.c
deleted file mode 100644
index daaabc8..0000000
--- a/stats.c
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Copyright (c) 2006 Oracle. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * stats.c - Print stats at an interval
- */
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <signal.h>
-#include <sys/time.h>
-#include <unistd.h>
-#include <time.h>
-#include <errno.h>
-#include <inttypes.h>
-#include <arpa/inet.h>
-
-#include "kernel-list.h"
-#include "rdstool.h"
-
-static int stats_delay = 0; /* Delay in seconds */
-static int print_extended = 0; /* Print read/write stats? */
-static sig_atomic_t time_to_print = 0;
-
-struct rds_tool_stats {
- uint64_t rs_send_bytes;
- uint64_t rs_send_bytes_interval;
- uint64_t rs_send_packets;
- uint64_t rs_send_packets_interval;
- uint64_t rs_recv_bytes;
- uint64_t rs_recv_bytes_interval;
- uint64_t rs_recv_packets;
- uint64_t rs_recv_packets_interval;
- uint64_t rs_read_bytes;
- uint64_t rs_read_bytes_interval;
- uint64_t rs_write_bytes;
- uint64_t rs_write_bytes_interval;
-} tool_stats;
-
-#define inc_net_stat(type, val) do { \
- tool_stats.rs_##type##_bytes += val; \
- tool_stats.rs_##type##_bytes_interval += val; \
- tool_stats.rs_##type##_packets += 1; \
- tool_stats.rs_##type##_packets_interval += 1; \
-} while (0)
-
-#define inc_io_stat(type, val) do { \
- tool_stats.rs_##type##_bytes += val; \
- tool_stats.rs_##type##_bytes_interval += val; \
-} while (0)
-
-#define clear_interval() do { \
- tool_stats.rs_send_bytes_interval = 0; \
- tool_stats.rs_recv_bytes_interval = 0; \
- tool_stats.rs_send_packets_interval = 0; \
- tool_stats.rs_recv_packets_interval = 0; \
- tool_stats.rs_read_bytes_interval = 0; \
- tool_stats.rs_write_bytes_interval = 0; \
-} while (0)
-
-static void handler(int signum)
-{
- time_to_print = 1;
-}
-
-static int setup_alarm(void)
-{
- int rc = 0;
- struct sigaction act;
-
- sigemptyset(&act.sa_mask);
- act.sa_handler = handler;
- act.sa_flags = 0;
-
- rc = sigaction(SIGALRM, &act, NULL);
- if (rc) {
- rc = -errno;
- verbosef(0, stderr,
- "%s: Unable to initialize timer: %s\n",
- progname, strerror(-rc));
- }
-
- return rc;
-}
-
-void stats_add_read(uint64_t bytes)
-{
- inc_io_stat(read, bytes);
-}
-
-void stats_add_write(uint64_t bytes)
-{
- inc_io_stat(write, bytes);
-}
-
-void stats_add_send(uint64_t bytes)
-{
- inc_net_stat(send, bytes);
-}
-
-uint64_t stats_get_send(void)
-{
- return tool_stats.rs_send_bytes;
-}
-
-void stats_add_recv(uint64_t bytes)
-{
- inc_net_stat(recv, bytes);
-}
-
-static void stats_arm(void)
-{
- time_to_print = 0;
- alarm(stats_delay);
-}
-
-int stats_init(int delay)
-{
- int rc = 0;
-
- stats_delay = delay;
- if (stats_delay)
- rc = setup_alarm();
-
- return rc;
-}
-
-void stats_extended(int extendedp)
-{
- print_extended = !!extendedp;
-}
-
-void stats_start(void)
-{
- if (stats_delay) {
- verbosef(1, stderr,
- "%19s %19s %19s %19s\n",
- "Bytes sent/s", "Packets sent/s",
- "Bytes recv/s", "Packets recv/s");
- if (print_extended)
- verbosef(1, stderr, " %19s %19s",
- "Bytes read/s", "Bytes written/s");
- verbosef(1, stderr, "\n");
-
- stats_arm();
- }
-}
-
-static void stats_output(void)
-{
- verbosef(0, stderr,
- "%19"PRIu64" %19"PRIu64" %19"PRIu64" %19"PRIu64,
- tool_stats.rs_send_bytes_interval / stats_delay,
- tool_stats.rs_send_packets_interval / stats_delay,
- tool_stats.rs_recv_bytes_interval / stats_delay,
- tool_stats.rs_recv_packets_interval / stats_delay);
- if (print_extended)
- verbosef(0, stderr, " %19"PRIu64" %19"PRIu64,
- tool_stats.rs_read_bytes_interval / stats_delay,
- tool_stats.rs_write_bytes_interval / stats_delay);
- verbosef(0, stderr, "\n");
-}
-
-void stats_print(void)
-{
- /* Are stats on? */
- if (stats_delay && time_to_print) {
- stats_output();
- clear_interval();
- stats_arm();
- }
-}
-
-void stats_total(void)
-{
- if (!stats_delay)
- return;
-
- verbosef(0, stderr,
- "Total:\n"
- "%19"PRIu64" %19"PRIu64" %19"PRIu64" %19"PRIu64,
- tool_stats.rs_send_bytes,
- tool_stats.rs_send_packets,
- tool_stats.rs_recv_bytes,
- tool_stats.rs_recv_packets);
- if (print_extended)
- verbosef(0, stderr, " %19"PRIu64" %19"PRIu64,
- tool_stats.rs_read_bytes,
- tool_stats.rs_write_bytes);
-
- verbosef(0, stderr, "\n");
-}
-
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ofed/rds-tools.git
More information about the Pkg-ofed-commits
mailing list