diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 0e58a3187345..757f4692b5bd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1030,6 +1030,9 @@ randomize: sock.socket->sk->sk_allocation = GFP_NOIO; msock.socket->sk->sk_allocation = GFP_NOIO; + sock.socket->sk->sk_use_task_frag = false; + msock.socket->sk->sk_use_task_frag = false; + sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index e379ccc63c52..592cfa8b765a 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -512,6 +512,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, noreclaim_flag = memalloc_noreclaim_save(); do { sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC; + sock->sk->sk_use_task_frag = false; msg.msg_name = NULL; msg.msg_namelen = 0; msg.msg_control = NULL; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index b69b89166b6b..8cedc1ef496c 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1537,6 +1537,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid) queue->sock->sk->sk_rcvtimeo = 10 * HZ; queue->sock->sk->sk_allocation = GFP_ATOMIC; + queue->sock->sk->sk_use_task_frag = false; nvme_tcp_set_queue_io_cpu(queue); queue->request = NULL; queue->data_remaining = 0; diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 5fb1f364e815..1d1cf641937c 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -738,6 +738,7 @@ iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session, sk->sk_reuse = SK_CAN_REUSE; sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */ sk->sk_allocation = GFP_ATOMIC; + sk->sk_use_task_frag = false; sk_set_memalloc(sk); sock_no_linger(sk); diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c index f8b326eed54d..a2b2da1255dd 100644 --- a/drivers/usb/usbip/usbip_common.c +++ b/drivers/usb/usbip/usbip_common.c @@ -315,6 +315,7 @@ int usbip_recv(struct socket *sock, void *buf, int size) do { sock->sk->sk_allocation = GFP_NOIO; + sock->sk->sk_use_task_frag = false; result = sock_recvmsg(sock, &msg, MSG_WAITALL); if (result <= 0) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index e80252a83225..7bc7b5e03c51 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2944,6 +2944,7 @@ generic_ip_connect(struct TCP_Server_Info *server) cifs_dbg(FYI, "Socket created\n"); server->ssocket = socket; socket->sk->sk_allocation = GFP_NOFS; + socket->sk->sk_use_task_frag = false; if (sfamily == AF_INET6) cifs_reclassify_socket6(socket); else diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 8b80ca0cd65f..4450721ec83c 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -645,6 +645,7 @@ static void add_sock(struct socket *sock, struct connection *con) if (dlm_config.ci_protocol == DLM_PROTO_SCTP) sk->sk_state_change = lowcomms_state_change; sk->sk_allocation = GFP_NOFS; + sk->sk_use_task_frag = false; sk->sk_error_report = lowcomms_error_report; release_sock(sk); } @@ -1769,6 +1770,7 @@ static int dlm_listen_for_all(void) listen_con.sock = sock; sock->sk->sk_allocation = GFP_NOFS; + sock->sk->sk_use_task_frag = false; sock->sk->sk_data_ready = lowcomms_listen_data_ready; release_sock(sock->sk); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 37d222bdfc8c..a07b24d170f2 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1602,6 +1602,7 @@ static void o2net_start_connect(struct work_struct *work) sc->sc_sock = sock; /* freed by sc_kref_release */ sock->sk->sk_allocation = GFP_ATOMIC; + sock->sk->sk_use_task_frag = false; myaddr.sin_family = AF_INET; myaddr.sin_addr.s_addr = mynode->nd_ipv4_address; diff --git a/include/net/sock.h b/include/net/sock.h index ecea3dcc2217..dcd72e6285b2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -318,6 +318,9 @@ struct sk_filter; * @sk_stamp: time stamp of last packet received * @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only * @sk_tsflags: SO_TIMESTAMPING flags + * @sk_use_task_frag: allow sk_page_frag() to use current->task_frag. + * Sockets that can be used under memory reclaim should + * set this to false. * @sk_bind_phc: SO_TIMESTAMPING bind PHC index of PTP virtual clock * for timestamping * @sk_tskey: counter to disambiguate concurrent tstamp requests @@ -512,6 +515,7 @@ struct sock { u8 sk_txtime_deadline_mode : 1, sk_txtime_report_errors : 1, sk_txtime_unused : 6; + bool sk_use_task_frag; struct socket *sk_socket; void *sk_user_data; @@ -2560,16 +2564,14 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk) * Both direct reclaim and page faults can nest inside other * socket operations and end up recursing into sk_page_frag() * while it's already in use: explicitly avoid task page_frag - * usage if the caller is potentially doing any of them. - * This assumes that page fault handlers use the GFP_NOFS flags. + * when users disable sk_use_task_frag. * * Return: a per task page_frag if context allows that, * otherwise a per socket one. */ static inline struct page_frag *sk_page_frag(struct sock *sk) { - if ((sk->sk_allocation & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC | __GFP_FS)) == - (__GFP_DIRECT_RECLAIM | __GFP_FS)) + if (sk->sk_use_task_frag) return ¤t->task_frag; return &sk->sk_frag; diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 07db2f436d44..d9120f14684b 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -868,6 +868,7 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket) } csocket->sk->sk_allocation = GFP_NOIO; + csocket->sk->sk_use_task_frag = false; file = sock_alloc_file(csocket, 0, NULL); if (IS_ERR(file)) { pr_err("%s (%d): failed to map fd\n", diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index dfa237fbd5a3..1d06e114ba3f 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -446,6 +446,7 @@ int ceph_tcp_connect(struct ceph_connection *con) if (ret) return ret; sock->sk->sk_allocation = GFP_NOFS; + sock->sk->sk_use_task_frag = false; #ifdef CONFIG_LOCKDEP lockdep_set_class(&sock->sk->sk_lock, &socket_class); diff --git a/net/core/sock.c b/net/core/sock.c index d2587d8712db..f954d5893e79 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3390,6 +3390,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default); sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default); sk->sk_state = TCP_CLOSE; + sk->sk_use_task_frag = true; sk_set_socket(sk, sock); sock_set_flag(sk, SOCK_ZAPPED); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c0506d0d7478..aaa5b2741b79 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1882,6 +1882,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, sk->sk_write_space = xs_udp_write_space; sk->sk_state_change = xs_local_state_change; sk->sk_error_report = xs_error_report; + sk->sk_use_task_frag = false; xprt_clear_connected(xprt); @@ -2082,6 +2083,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_user_data = xprt; sk->sk_data_ready = xs_data_ready; sk->sk_write_space = xs_udp_write_space; + sk->sk_use_task_frag = false; xprt_set_connected(xprt); @@ -2249,6 +2251,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_state_change = xs_tcp_state_change; sk->sk_write_space = xs_tcp_write_space; sk->sk_error_report = xs_error_report; + sk->sk_use_task_frag = false; /* socket options */ sock_reset_flag(sk, SOCK_LINGER); diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index d6fece1ed982..74a54295c164 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -489,6 +489,7 @@ static int espintcp_init_sk(struct sock *sk) /* avoid using task_frag */ sk->sk_allocation = GFP_ATOMIC; + sk->sk_use_task_frag = false; return 0;