libceph: use MSG_SENDPAGE_NOTLAST with ceph_tcp_sendpage()
Prevent do_tcp_sendpages() from calling tcp_push() (at least) once per page. Instead, arrange for tcp_push() to be called (at least) once per data payload. This results in more MSS-sized packets and fewer packets overall (5-10% reduction in my tests with typical OSD request sizes). See commits2f53384424
("tcp: allow splice() to build full TSO packets"),35f9c09fe9
("tcp: tcp_sendpages() should call tcp_push() once") andae62ca7b03
("tcp: fix MSG_SENDPAGE_NOTLAST logic") for details. Here is an example of a packet size histogram for 128K OSD requests (MSS = 1448, top 5): Before: SIZE COUNT 1448 777700 952 127915 1200 39238 1219 9806 21 5675 After: SIZE COUNT 1448 897280 21 6201 1019 2797 643 2739 376 2479 We could do slightly better by explicitly corking the socket but it's not clear it's worth it. Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
parent
3239eb5215
commit
433b0a1295
|
@ -560,12 +560,15 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
|
|||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* @more: either or both of MSG_MORE and MSG_SENDPAGE_NOTLAST
|
||||
*/
|
||||
static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
|
||||
int offset, size_t size, bool more)
|
||||
int offset, size_t size, int more)
|
||||
{
|
||||
ssize_t (*sendpage)(struct socket *sock, struct page *page,
|
||||
int offset, size_t size, int flags);
|
||||
int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : 0);
|
||||
int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
|
@ -1552,6 +1555,7 @@ static int write_partial_message_data(struct ceph_connection *con)
|
|||
struct ceph_msg *msg = con->out_msg;
|
||||
struct ceph_msg_data_cursor *cursor = &msg->cursor;
|
||||
bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
|
||||
int more = MSG_MORE | MSG_SENDPAGE_NOTLAST;
|
||||
u32 crc;
|
||||
|
||||
dout("%s %p msg %p\n", __func__, con, msg);
|
||||
|
@ -1580,8 +1584,10 @@ static int write_partial_message_data(struct ceph_connection *con)
|
|||
}
|
||||
|
||||
page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
|
||||
if (length == cursor->total_resid)
|
||||
more = MSG_MORE;
|
||||
ret = ceph_tcp_sendpage(con->sock, page, page_offset, length,
|
||||
true);
|
||||
more);
|
||||
if (ret <= 0) {
|
||||
if (do_datacrc)
|
||||
msg->footer.data_crc = cpu_to_le32(crc);
|
||||
|
@ -1611,13 +1617,16 @@ static int write_partial_message_data(struct ceph_connection *con)
|
|||
*/
|
||||
static int write_partial_skip(struct ceph_connection *con)
|
||||
{
|
||||
int more = MSG_MORE | MSG_SENDPAGE_NOTLAST;
|
||||
int ret;
|
||||
|
||||
dout("%s %p %d left\n", __func__, con, con->out_skip);
|
||||
while (con->out_skip > 0) {
|
||||
size_t size = min(con->out_skip, (int) PAGE_SIZE);
|
||||
|
||||
ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true);
|
||||
if (size == con->out_skip)
|
||||
more = MSG_MORE;
|
||||
ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, more);
|
||||
if (ret <= 0)
|
||||
goto out;
|
||||
con->out_skip -= ret;
|
||||
|
|
Loading…
Reference in New Issue