diff options
author | 2025-03-08 22:04:20 +0800 | |
---|---|---|
committer | 2025-03-08 22:04:20 +0800 | |
commit | a07bb8fd1299070229f0e8f3dcb57ffd5ef9870a (patch) | |
tree | 84f21bd0bf7071bc5fc7dd989e77d7ceb5476682 /net/smc | |
download | ohosKernel-a07bb8fd1299070229f0e8f3dcb57ffd5ef9870a.tar.gz ohosKernel-a07bb8fd1299070229f0e8f3dcb57ffd5ef9870a.zip |
Initial commit: OpenHarmony-v4.0-ReleaseOpenHarmony-v4.0-Release
Diffstat (limited to 'net/smc')
-rw-r--r-- | net/smc/Kconfig | 21 | ||||
-rw-r--r-- | net/smc/Makefile | 5 | ||||
-rw-r--r-- | net/smc/af_smc.c | 2624 | ||||
-rw-r--r-- | net/smc/smc.h | 300 | ||||
-rw-r--r-- | net/smc/smc_cdc.c | 476 | ||||
-rw-r--r-- | net/smc/smc_cdc.h | 305 | ||||
-rw-r--r-- | net/smc/smc_clc.c | 784 | ||||
-rw-r--r-- | net/smc/smc_clc.h | 333 | ||||
-rw-r--r-- | net/smc/smc_close.c | 499 | ||||
-rw-r--r-- | net/smc/smc_close.h | 30 | ||||
-rw-r--r-- | net/smc/smc_core.c | 1973 | ||||
-rw-r--r-- | net/smc/smc_core.h | 425 | ||||
-rw-r--r-- | net/smc/smc_diag.c | 283 | ||||
-rw-r--r-- | net/smc/smc_ib.c | 643 | ||||
-rw-r--r-- | net/smc/smc_ib.h | 91 | ||||
-rw-r--r-- | net/smc/smc_ism.c | 439 | ||||
-rw-r--r-- | net/smc/smc_ism.h | 56 | ||||
-rw-r--r-- | net/smc/smc_llc.c | 1974 | ||||
-rw-r--r-- | net/smc/smc_llc.h | 109 | ||||
-rw-r--r-- | net/smc/smc_netns.h | 21 | ||||
-rw-r--r-- | net/smc/smc_pnet.c | 1174 | ||||
-rw-r--r-- | net/smc/smc_pnet.h | 70 | ||||
-rw-r--r-- | net/smc/smc_rx.c | 444 | ||||
-rw-r--r-- | net/smc/smc_rx.h | 31 | ||||
-rw-r--r-- | net/smc/smc_tx.c | 646 | ||||
-rw-r--r-- | net/smc/smc_tx.h | 39 | ||||
-rw-r--r-- | net/smc/smc_wr.c | 720 | ||||
-rw-r--r-- | net/smc/smc_wr.h | 131 |
28 files changed, 14646 insertions, 0 deletions
diff --git a/net/smc/Kconfig b/net/smc/Kconfig new file mode 100644 index 000000000..1ab3c5a2c --- /dev/null +++ b/net/smc/Kconfig | |||
@@ -0,0 +1,21 @@ | |||
1 | # SPDX-License-Identifier: GPL-2.0-only | ||
2 | config SMC | ||
3 | tristate "SMC socket protocol family" | ||
4 | depends on INET && INFINIBAND | ||
5 | help | ||
6 | SMC-R provides a "sockets over RDMA" solution making use of | ||
7 | RDMA over Converged Ethernet (RoCE) technology to upgrade | ||
8 | AF_INET TCP connections transparently. | ||
9 | The Linux implementation of the SMC-R solution is designed as | ||
10 | a separate socket family SMC. | ||
11 | |||
12 | Select this option if you want to run SMC socket applications | ||
13 | |||
14 | config SMC_DIAG | ||
15 | tristate "SMC: socket monitoring interface" | ||
16 | depends on SMC | ||
17 | help | ||
18 | Support for SMC socket monitoring interface used by tools such as | ||
19 | smcss. | ||
20 | |||
21 | if unsure, say Y. | ||
diff --git a/net/smc/Makefile b/net/smc/Makefile new file mode 100644 index 000000000..cb1254541 --- /dev/null +++ b/net/smc/Makefile | |||
@@ -0,0 +1,5 @@ | |||
1 | # SPDX-License-Identifier: GPL-2.0-only | ||
2 | obj-$(CONFIG_SMC) += smc.o | ||
3 | obj-$(CONFIG_SMC_DIAG) += smc_diag.o | ||
4 | smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o | ||
5 | smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o | ||
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c new file mode 100644 index 000000000..41cbc7c89 --- /dev/null +++ b/net/smc/af_smc.c | |||
@@ -0,0 +1,2624 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0-only | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * AF_SMC protocol family socket handler keeping the AF_INET sock address type | ||
6 | * applies to SOCK_STREAM sockets only | ||
7 | * offers an alternative communication option for TCP-protocol sockets | ||
8 | * applicable with RoCE-cards only | ||
9 | * | ||
10 | * Initial restrictions: | ||
11 | * - support for alternate links postponed | ||
12 | * | ||
13 | * Copyright IBM Corp. 2016, 2018 | ||
14 | * | ||
15 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> | ||
16 | * based on prototype from Frank Blaschka | ||
17 | */ | ||
18 | |||
19 | #define KMSG_COMPONENT "smc" | ||
20 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | ||
21 | |||
22 | #include <linux/module.h> | ||
23 | #include <linux/socket.h> | ||
24 | #include <linux/workqueue.h> | ||
25 | #include <linux/in.h> | ||
26 | #include <linux/sched/signal.h> | ||
27 | #include <linux/if_vlan.h> | ||
28 | #include <linux/rcupdate_wait.h> | ||
29 | #include <linux/ctype.h> | ||
30 | |||
31 | #include <net/sock.h> | ||
32 | #include <net/tcp.h> | ||
33 | #include <net/smc.h> | ||
34 | #include <asm/ioctls.h> | ||
35 | |||
36 | #include <net/net_namespace.h> | ||
37 | #include <net/netns/generic.h> | ||
38 | #include "smc_netns.h" | ||
39 | |||
40 | #include "smc.h" | ||
41 | #include "smc_clc.h" | ||
42 | #include "smc_llc.h" | ||
43 | #include "smc_cdc.h" | ||
44 | #include "smc_core.h" | ||
45 | #include "smc_ib.h" | ||
46 | #include "smc_ism.h" | ||
47 | #include "smc_pnet.h" | ||
48 | #include "smc_tx.h" | ||
49 | #include "smc_rx.h" | ||
50 | #include "smc_close.h" | ||
51 | |||
52 | static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group | ||
53 | * creation on server | ||
54 | */ | ||
55 | static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group | ||
56 | * creation on client | ||
57 | */ | ||
58 | |||
59 | struct workqueue_struct *smc_hs_wq; /* wq for handshake work */ | ||
60 | struct workqueue_struct *smc_close_wq; /* wq for close work */ | ||
61 | |||
62 | static void smc_tcp_listen_work(struct work_struct *); | ||
63 | static void smc_connect_work(struct work_struct *); | ||
64 | |||
65 | static void smc_set_keepalive(struct sock *sk, int val) | ||
66 | { | ||
67 | struct smc_sock *smc = smc_sk(sk); | ||
68 | |||
69 | smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val); | ||
70 | } | ||
71 | |||
72 | static struct smc_hashinfo smc_v4_hashinfo = { | ||
73 | .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), | ||
74 | }; | ||
75 | |||
76 | static struct smc_hashinfo smc_v6_hashinfo = { | ||
77 | .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock), | ||
78 | }; | ||
79 | |||
80 | int smc_hash_sk(struct sock *sk) | ||
81 | { | ||
82 | struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; | ||
83 | struct hlist_head *head; | ||
84 | |||
85 | head = &h->ht; | ||
86 | |||
87 | write_lock_bh(&h->lock); | ||
88 | sk_add_node(sk, head); | ||
89 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | ||
90 | write_unlock_bh(&h->lock); | ||
91 | |||
92 | return 0; | ||
93 | } | ||
94 | EXPORT_SYMBOL_GPL(smc_hash_sk); | ||
95 | |||
96 | void smc_unhash_sk(struct sock *sk) | ||
97 | { | ||
98 | struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; | ||
99 | |||
100 | write_lock_bh(&h->lock); | ||
101 | if (sk_del_node_init(sk)) | ||
102 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | ||
103 | write_unlock_bh(&h->lock); | ||
104 | } | ||
105 | EXPORT_SYMBOL_GPL(smc_unhash_sk); | ||
106 | |||
107 | struct proto smc_proto = { | ||
108 | .name = "SMC", | ||
109 | .owner = THIS_MODULE, | ||
110 | .keepalive = smc_set_keepalive, | ||
111 | .hash = smc_hash_sk, | ||
112 | .unhash = smc_unhash_sk, | ||
113 | .obj_size = sizeof(struct smc_sock), | ||
114 | .h.smc_hash = &smc_v4_hashinfo, | ||
115 | .slab_flags = SLAB_TYPESAFE_BY_RCU, | ||
116 | }; | ||
117 | EXPORT_SYMBOL_GPL(smc_proto); | ||
118 | |||
119 | struct proto smc_proto6 = { | ||
120 | .name = "SMC6", | ||
121 | .owner = THIS_MODULE, | ||
122 | .keepalive = smc_set_keepalive, | ||
123 | .hash = smc_hash_sk, | ||
124 | .unhash = smc_unhash_sk, | ||
125 | .obj_size = sizeof(struct smc_sock), | ||
126 | .h.smc_hash = &smc_v6_hashinfo, | ||
127 | .slab_flags = SLAB_TYPESAFE_BY_RCU, | ||
128 | }; | ||
129 | EXPORT_SYMBOL_GPL(smc_proto6); | ||
130 | |||
131 | static void smc_restore_fallback_changes(struct smc_sock *smc) | ||
132 | { | ||
133 | if (smc->clcsock->file) { /* non-accepted sockets have no file yet */ | ||
134 | smc->clcsock->file->private_data = smc->sk.sk_socket; | ||
135 | smc->clcsock->file = NULL; | ||
136 | } | ||
137 | } | ||
138 | |||
139 | static int __smc_release(struct smc_sock *smc) | ||
140 | { | ||
141 | struct sock *sk = &smc->sk; | ||
142 | int rc = 0; | ||
143 | |||
144 | if (!smc->use_fallback) { | ||
145 | rc = smc_close_active(smc); | ||
146 | sock_set_flag(sk, SOCK_DEAD); | ||
147 | sk->sk_shutdown |= SHUTDOWN_MASK; | ||
148 | } else { | ||
149 | if (sk->sk_state != SMC_CLOSED) { | ||
150 | if (sk->sk_state != SMC_LISTEN && | ||
151 | sk->sk_state != SMC_INIT) | ||
152 | sock_put(sk); /* passive closing */ | ||
153 | if (sk->sk_state == SMC_LISTEN) { | ||
154 | /* wake up clcsock accept */ | ||
155 | rc = kernel_sock_shutdown(smc->clcsock, | ||
156 | SHUT_RDWR); | ||
157 | } | ||
158 | sk->sk_state = SMC_CLOSED; | ||
159 | sk->sk_state_change(sk); | ||
160 | } | ||
161 | smc_restore_fallback_changes(smc); | ||
162 | } | ||
163 | |||
164 | sk->sk_prot->unhash(sk); | ||
165 | |||
166 | if (sk->sk_state == SMC_CLOSED) { | ||
167 | if (smc->clcsock) { | ||
168 | release_sock(sk); | ||
169 | smc_clcsock_release(smc); | ||
170 | lock_sock(sk); | ||
171 | } | ||
172 | if (!smc->use_fallback) | ||
173 | smc_conn_free(&smc->conn); | ||
174 | } | ||
175 | |||
176 | return rc; | ||
177 | } | ||
178 | |||
179 | static int smc_release(struct socket *sock) | ||
180 | { | ||
181 | struct sock *sk = sock->sk; | ||
182 | struct smc_sock *smc; | ||
183 | int old_state, rc = 0; | ||
184 | |||
185 | if (!sk) | ||
186 | goto out; | ||
187 | |||
188 | sock_hold(sk); /* sock_put below */ | ||
189 | smc = smc_sk(sk); | ||
190 | |||
191 | old_state = sk->sk_state; | ||
192 | |||
193 | /* cleanup for a dangling non-blocking connect */ | ||
194 | if (smc->connect_nonblock && old_state == SMC_INIT) | ||
195 | tcp_abort(smc->clcsock->sk, ECONNABORTED); | ||
196 | |||
197 | if (cancel_work_sync(&smc->connect_work)) | ||
198 | sock_put(&smc->sk); /* sock_hold in smc_connect for passive closing */ | ||
199 | |||
200 | if (sk->sk_state == SMC_LISTEN) | ||
201 | /* smc_close_non_accepted() is called and acquires | ||
202 | * sock lock for child sockets again | ||
203 | */ | ||
204 | lock_sock_nested(sk, SINGLE_DEPTH_NESTING); | ||
205 | else | ||
206 | lock_sock(sk); | ||
207 | |||
208 | if (old_state == SMC_INIT && sk->sk_state == SMC_ACTIVE && | ||
209 | !smc->use_fallback) | ||
210 | smc_close_active_abort(smc); | ||
211 | |||
212 | rc = __smc_release(smc); | ||
213 | |||
214 | /* detach socket */ | ||
215 | sock_orphan(sk); | ||
216 | sock->sk = NULL; | ||
217 | release_sock(sk); | ||
218 | |||
219 | sock_put(sk); /* sock_hold above */ | ||
220 | sock_put(sk); /* final sock_put */ | ||
221 | out: | ||
222 | return rc; | ||
223 | } | ||
224 | |||
225 | static void smc_destruct(struct sock *sk) | ||
226 | { | ||
227 | if (sk->sk_state != SMC_CLOSED) | ||
228 | return; | ||
229 | if (!sock_flag(sk, SOCK_DEAD)) | ||
230 | return; | ||
231 | |||
232 | sk_refcnt_debug_dec(sk); | ||
233 | } | ||
234 | |||
235 | static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, | ||
236 | int protocol) | ||
237 | { | ||
238 | struct smc_sock *smc; | ||
239 | struct proto *prot; | ||
240 | struct sock *sk; | ||
241 | |||
242 | prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto; | ||
243 | sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0); | ||
244 | if (!sk) | ||
245 | return NULL; | ||
246 | |||
247 | sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ | ||
248 | sk->sk_state = SMC_INIT; | ||
249 | sk->sk_destruct = smc_destruct; | ||
250 | sk->sk_protocol = protocol; | ||
251 | smc = smc_sk(sk); | ||
252 | INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); | ||
253 | INIT_WORK(&smc->connect_work, smc_connect_work); | ||
254 | INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); | ||
255 | INIT_LIST_HEAD(&smc->accept_q); | ||
256 | spin_lock_init(&smc->accept_q_lock); | ||
257 | spin_lock_init(&smc->conn.send_lock); | ||
258 | sk->sk_prot->hash(sk); | ||
259 | sk_refcnt_debug_inc(sk); | ||
260 | mutex_init(&smc->clcsock_release_lock); | ||
261 | |||
262 | return sk; | ||
263 | } | ||
264 | |||
265 | static int smc_bind(struct socket *sock, struct sockaddr *uaddr, | ||
266 | int addr_len) | ||
267 | { | ||
268 | struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; | ||
269 | struct sock *sk = sock->sk; | ||
270 | struct smc_sock *smc; | ||
271 | int rc; | ||
272 | |||
273 | smc = smc_sk(sk); | ||
274 | |||
275 | /* replicate tests from inet_bind(), to be safe wrt. future changes */ | ||
276 | rc = -EINVAL; | ||
277 | if (addr_len < sizeof(struct sockaddr_in)) | ||
278 | goto out; | ||
279 | |||
280 | rc = -EAFNOSUPPORT; | ||
281 | if (addr->sin_family != AF_INET && | ||
282 | addr->sin_family != AF_INET6 && | ||
283 | addr->sin_family != AF_UNSPEC) | ||
284 | goto out; | ||
285 | /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */ | ||
286 | if (addr->sin_family == AF_UNSPEC && | ||
287 | addr->sin_addr.s_addr != htonl(INADDR_ANY)) | ||
288 | goto out; | ||
289 | |||
290 | lock_sock(sk); | ||
291 | |||
292 | /* Check if socket is already active */ | ||
293 | rc = -EINVAL; | ||
294 | if (sk->sk_state != SMC_INIT || smc->connect_nonblock) | ||
295 | goto out_rel; | ||
296 | |||
297 | smc->clcsock->sk->sk_reuse = sk->sk_reuse; | ||
298 | rc = kernel_bind(smc->clcsock, uaddr, addr_len); | ||
299 | |||
300 | out_rel: | ||
301 | release_sock(sk); | ||
302 | out: | ||
303 | return rc; | ||
304 | } | ||
305 | |||
306 | static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, | ||
307 | unsigned long mask) | ||
308 | { | ||
309 | /* options we don't get control via setsockopt for */ | ||
310 | nsk->sk_type = osk->sk_type; | ||
311 | nsk->sk_sndbuf = osk->sk_sndbuf; | ||
312 | nsk->sk_rcvbuf = osk->sk_rcvbuf; | ||
313 | nsk->sk_sndtimeo = osk->sk_sndtimeo; | ||
314 | nsk->sk_rcvtimeo = osk->sk_rcvtimeo; | ||
315 | nsk->sk_mark = osk->sk_mark; | ||
316 | nsk->sk_priority = osk->sk_priority; | ||
317 | nsk->sk_rcvlowat = osk->sk_rcvlowat; | ||
318 | nsk->sk_bound_dev_if = osk->sk_bound_dev_if; | ||
319 | nsk->sk_err = osk->sk_err; | ||
320 | |||
321 | nsk->sk_flags &= ~mask; | ||
322 | nsk->sk_flags |= osk->sk_flags & mask; | ||
323 | } | ||
324 | |||
325 | #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \ | ||
326 | (1UL << SOCK_KEEPOPEN) | \ | ||
327 | (1UL << SOCK_LINGER) | \ | ||
328 | (1UL << SOCK_BROADCAST) | \ | ||
329 | (1UL << SOCK_TIMESTAMP) | \ | ||
330 | (1UL << SOCK_DBG) | \ | ||
331 | (1UL << SOCK_RCVTSTAMP) | \ | ||
332 | (1UL << SOCK_RCVTSTAMPNS) | \ | ||
333 | (1UL << SOCK_LOCALROUTE) | \ | ||
334 | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ | ||
335 | (1UL << SOCK_RXQ_OVFL) | \ | ||
336 | (1UL << SOCK_WIFI_STATUS) | \ | ||
337 | (1UL << SOCK_NOFCS) | \ | ||
338 | (1UL << SOCK_FILTER_LOCKED) | \ | ||
339 | (1UL << SOCK_TSTAMP_NEW)) | ||
340 | /* copy only relevant settings and flags of SOL_SOCKET level from smc to | ||
341 | * clc socket (since smc is not called for these options from net/core) | ||
342 | */ | ||
343 | static void smc_copy_sock_settings_to_clc(struct smc_sock *smc) | ||
344 | { | ||
345 | smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC); | ||
346 | } | ||
347 | |||
348 | #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \ | ||
349 | (1UL << SOCK_KEEPOPEN) | \ | ||
350 | (1UL << SOCK_LINGER) | \ | ||
351 | (1UL << SOCK_DBG)) | ||
352 | /* copy only settings and flags relevant for smc from clc to smc socket */ | ||
353 | static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) | ||
354 | { | ||
355 | smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); | ||
356 | } | ||
357 | |||
358 | /* register the new rmb on all links */ | ||
359 | static int smcr_lgr_reg_rmbs(struct smc_link *link, | ||
360 | struct smc_buf_desc *rmb_desc) | ||
361 | { | ||
362 | struct smc_link_group *lgr = link->lgr; | ||
363 | int i, rc = 0; | ||
364 | |||
365 | rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); | ||
366 | if (rc) | ||
367 | return rc; | ||
368 | /* protect against parallel smc_llc_cli_rkey_exchange() and | ||
369 | * parallel smcr_link_reg_rmb() | ||
370 | */ | ||
371 | mutex_lock(&lgr->llc_conf_mutex); | ||
372 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { | ||
373 | if (!smc_link_active(&lgr->lnk[i])) | ||
374 | continue; | ||
375 | rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc); | ||
376 | if (rc) | ||
377 | goto out; | ||
378 | } | ||
379 | |||
380 | /* exchange confirm_rkey msg with peer */ | ||
381 | rc = smc_llc_do_confirm_rkey(link, rmb_desc); | ||
382 | if (rc) { | ||
383 | rc = -EFAULT; | ||
384 | goto out; | ||
385 | } | ||
386 | rmb_desc->is_conf_rkey = true; | ||
387 | out: | ||
388 | mutex_unlock(&lgr->llc_conf_mutex); | ||
389 | smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); | ||
390 | return rc; | ||
391 | } | ||
392 | |||
393 | static int smcr_clnt_conf_first_link(struct smc_sock *smc) | ||
394 | { | ||
395 | struct smc_link *link = smc->conn.lnk; | ||
396 | struct smc_llc_qentry *qentry; | ||
397 | int rc; | ||
398 | |||
399 | /* receive CONFIRM LINK request from server over RoCE fabric */ | ||
400 | qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME, | ||
401 | SMC_LLC_CONFIRM_LINK); | ||
402 | if (!qentry) { | ||
403 | struct smc_clc_msg_decline dclc; | ||
404 | |||
405 | rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), | ||
406 | SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); | ||
407 | return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; | ||
408 | } | ||
409 | smc_llc_save_peer_uid(qentry); | ||
410 | rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ); | ||
411 | smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl); | ||
412 | if (rc) | ||
413 | return SMC_CLC_DECL_RMBE_EC; | ||
414 | |||
415 | rc = smc_ib_modify_qp_rts(link); | ||
416 | if (rc) | ||
417 | return SMC_CLC_DECL_ERR_RDYLNK; | ||
418 | |||
419 | smc_wr_remember_qp_attr(link); | ||
420 | |||
421 | if (smcr_link_reg_rmb(link, smc->conn.rmb_desc)) | ||
422 | return SMC_CLC_DECL_ERR_REGRMB; | ||
423 | |||
424 | /* confirm_rkey is implicit on 1st contact */ | ||
425 | smc->conn.rmb_desc->is_conf_rkey = true; | ||
426 | |||
427 | /* send CONFIRM LINK response over RoCE fabric */ | ||
428 | rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP); | ||
429 | if (rc < 0) | ||
430 | return SMC_CLC_DECL_TIMEOUT_CL; | ||
431 | |||
432 | smc_llc_link_active(link); | ||
433 | smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE); | ||
434 | |||
435 | /* optional 2nd link, receive ADD LINK request from server */ | ||
436 | qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME, | ||
437 | SMC_LLC_ADD_LINK); | ||
438 | if (!qentry) { | ||
439 | struct smc_clc_msg_decline dclc; | ||
440 | |||
441 | rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), | ||
442 | SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); | ||
443 | if (rc == -EAGAIN) | ||
444 | rc = 0; /* no DECLINE received, go with one link */ | ||
445 | return rc; | ||
446 | } | ||
447 | smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl); | ||
448 | smc_llc_cli_add_link(link, qentry); | ||
449 | return 0; | ||
450 | } | ||
451 | |||
452 | static void smcr_conn_save_peer_info(struct smc_sock *smc, | ||
453 | struct smc_clc_msg_accept_confirm *clc) | ||
454 | { | ||
455 | int bufsize = smc_uncompress_bufsize(clc->r0.rmbe_size); | ||
456 | |||
457 | smc->conn.peer_rmbe_idx = clc->r0.rmbe_idx; | ||
458 | smc->conn.local_tx_ctrl.token = ntohl(clc->r0.rmbe_alert_token); | ||
459 | smc->conn.peer_rmbe_size = bufsize; | ||
460 | atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); | ||
461 | smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1); | ||
462 | } | ||
463 | |||
464 | static bool smc_isascii(char *hostname) | ||
465 | { | ||
466 | int i; | ||
467 | |||
468 | for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++) | ||
469 | if (!isascii(hostname[i])) | ||
470 | return false; | ||
471 | return true; | ||
472 | } | ||
473 | |||
474 | static void smcd_conn_save_peer_info(struct smc_sock *smc, | ||
475 | struct smc_clc_msg_accept_confirm *clc) | ||
476 | { | ||
477 | int bufsize = smc_uncompress_bufsize(clc->d0.dmbe_size); | ||
478 | |||
479 | smc->conn.peer_rmbe_idx = clc->d0.dmbe_idx; | ||
480 | smc->conn.peer_token = clc->d0.token; | ||
481 | /* msg header takes up space in the buffer */ | ||
482 | smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); | ||
483 | atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); | ||
484 | smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx; | ||
485 | if (clc->hdr.version > SMC_V1 && | ||
486 | (clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) { | ||
487 | struct smc_clc_msg_accept_confirm_v2 *clc_v2 = | ||
488 | (struct smc_clc_msg_accept_confirm_v2 *)clc; | ||
489 | struct smc_clc_first_contact_ext *fce = | ||
490 | (struct smc_clc_first_contact_ext *) | ||
491 | (((u8 *)clc_v2) + sizeof(*clc_v2)); | ||
492 | |||
493 | memcpy(smc->conn.lgr->negotiated_eid, clc_v2->eid, | ||
494 | SMC_MAX_EID_LEN); | ||
495 | smc->conn.lgr->peer_os = fce->os_type; | ||
496 | smc->conn.lgr->peer_smc_release = fce->release; | ||
497 | if (smc_isascii(fce->hostname)) | ||
498 | memcpy(smc->conn.lgr->peer_hostname, fce->hostname, | ||
499 | SMC_MAX_HOSTNAME_LEN); | ||
500 | } | ||
501 | } | ||
502 | |||
503 | static void smc_conn_save_peer_info(struct smc_sock *smc, | ||
504 | struct smc_clc_msg_accept_confirm *clc) | ||
505 | { | ||
506 | if (smc->conn.lgr->is_smcd) | ||
507 | smcd_conn_save_peer_info(smc, clc); | ||
508 | else | ||
509 | smcr_conn_save_peer_info(smc, clc); | ||
510 | } | ||
511 | |||
512 | static void smc_link_save_peer_info(struct smc_link *link, | ||
513 | struct smc_clc_msg_accept_confirm *clc) | ||
514 | { | ||
515 | link->peer_qpn = ntoh24(clc->r0.qpn); | ||
516 | memcpy(link->peer_gid, clc->r0.lcl.gid, SMC_GID_SIZE); | ||
517 | memcpy(link->peer_mac, clc->r0.lcl.mac, sizeof(link->peer_mac)); | ||
518 | link->peer_psn = ntoh24(clc->r0.psn); | ||
519 | link->peer_mtu = clc->r0.qp_mtu; | ||
520 | } | ||
521 | |||
522 | static void smc_switch_to_fallback(struct smc_sock *smc) | ||
523 | { | ||
524 | wait_queue_head_t *smc_wait = sk_sleep(&smc->sk); | ||
525 | wait_queue_head_t *clc_wait = sk_sleep(smc->clcsock->sk); | ||
526 | unsigned long flags; | ||
527 | |||
528 | smc->use_fallback = true; | ||
529 | if (smc->sk.sk_socket && smc->sk.sk_socket->file) { | ||
530 | smc->clcsock->file = smc->sk.sk_socket->file; | ||
531 | smc->clcsock->file->private_data = smc->clcsock; | ||
532 | smc->clcsock->wq.fasync_list = | ||
533 | smc->sk.sk_socket->wq.fasync_list; | ||
534 | |||
535 | /* There may be some entries remaining in | ||
536 | * smc socket->wq, which should be removed | ||
537 | * to clcsocket->wq during the fallback. | ||
538 | */ | ||
539 | spin_lock_irqsave(&smc_wait->lock, flags); | ||
540 | spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING); | ||
541 | list_splice_init(&smc_wait->head, &clc_wait->head); | ||
542 | spin_unlock(&clc_wait->lock); | ||
543 | spin_unlock_irqrestore(&smc_wait->lock, flags); | ||
544 | } | ||
545 | } | ||
546 | |||
547 | /* fall back during connect */ | ||
548 | static int smc_connect_fallback(struct smc_sock *smc, int reason_code) | ||
549 | { | ||
550 | smc_switch_to_fallback(smc); | ||
551 | smc->fallback_rsn = reason_code; | ||
552 | smc_copy_sock_settings_to_clc(smc); | ||
553 | smc->connect_nonblock = 0; | ||
554 | if (smc->sk.sk_state == SMC_INIT) | ||
555 | smc->sk.sk_state = SMC_ACTIVE; | ||
556 | return 0; | ||
557 | } | ||
558 | |||
559 | /* decline and fall back during connect */ | ||
560 | static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code, | ||
561 | u8 version) | ||
562 | { | ||
563 | int rc; | ||
564 | |||
565 | if (reason_code < 0) { /* error, fallback is not possible */ | ||
566 | if (smc->sk.sk_state == SMC_INIT) | ||
567 | sock_put(&smc->sk); /* passive closing */ | ||
568 | return reason_code; | ||
569 | } | ||
570 | if (reason_code != SMC_CLC_DECL_PEERDECL) { | ||
571 | rc = smc_clc_send_decline(smc, reason_code, version); | ||
572 | if (rc < 0) { | ||
573 | if (smc->sk.sk_state == SMC_INIT) | ||
574 | sock_put(&smc->sk); /* passive closing */ | ||
575 | return rc; | ||
576 | } | ||
577 | } | ||
578 | return smc_connect_fallback(smc, reason_code); | ||
579 | } | ||
580 | |||
581 | /* abort connecting */ | ||
582 | static void smc_connect_abort(struct smc_sock *smc, int local_first) | ||
583 | { | ||
584 | if (local_first) | ||
585 | smc_lgr_cleanup_early(&smc->conn); | ||
586 | else | ||
587 | smc_conn_free(&smc->conn); | ||
588 | } | ||
589 | |||
590 | /* check if there is a rdma device available for this connection. */ | ||
591 | /* called for connect and listen */ | ||
592 | static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini) | ||
593 | { | ||
594 | /* PNET table look up: search active ib_device and port | ||
595 | * within same PNETID that also contains the ethernet device | ||
596 | * used for the internal TCP socket | ||
597 | */ | ||
598 | smc_pnet_find_roce_resource(smc->clcsock->sk, ini); | ||
599 | if (!ini->ib_dev) | ||
600 | return SMC_CLC_DECL_NOSMCRDEV; | ||
601 | return 0; | ||
602 | } | ||
603 | |||
604 | /* check if there is an ISM device available for this connection. */ | ||
605 | /* called for connect and listen */ | ||
606 | static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini) | ||
607 | { | ||
608 | /* Find ISM device with same PNETID as connecting interface */ | ||
609 | smc_pnet_find_ism_resource(smc->clcsock->sk, ini); | ||
610 | if (!ini->ism_dev[0]) | ||
611 | return SMC_CLC_DECL_NOSMCDDEV; | ||
612 | else | ||
613 | ini->ism_chid[0] = smc_ism_get_chid(ini->ism_dev[0]); | ||
614 | return 0; | ||
615 | } | ||
616 | |||
617 | /* is chid unique for the ism devices that are already determined? */ | ||
618 | static bool smc_find_ism_v2_is_unique_chid(u16 chid, struct smc_init_info *ini, | ||
619 | int cnt) | ||
620 | { | ||
621 | int i = (!ini->ism_dev[0]) ? 1 : 0; | ||
622 | |||
623 | for (; i < cnt; i++) | ||
624 | if (ini->ism_chid[i] == chid) | ||
625 | return false; | ||
626 | return true; | ||
627 | } | ||
628 | |||
629 | /* determine possible V2 ISM devices (either without PNETID or with PNETID plus | ||
630 | * PNETID matching net_device) | ||
631 | */ | ||
632 | static int smc_find_ism_v2_device_clnt(struct smc_sock *smc, | ||
633 | struct smc_init_info *ini) | ||
634 | { | ||
635 | int rc = SMC_CLC_DECL_NOSMCDDEV; | ||
636 | struct smcd_dev *smcd; | ||
637 | int i = 1; | ||
638 | u16 chid; | ||
639 | |||
640 | if (smcd_indicated(ini->smc_type_v1)) | ||
641 | rc = 0; /* already initialized for V1 */ | ||
642 | mutex_lock(&smcd_dev_list.mutex); | ||
643 | list_for_each_entry(smcd, &smcd_dev_list.list, list) { | ||
644 | if (smcd->going_away || smcd == ini->ism_dev[0]) | ||
645 | continue; | ||
646 | chid = smc_ism_get_chid(smcd); | ||
647 | if (!smc_find_ism_v2_is_unique_chid(chid, ini, i)) | ||
648 | continue; | ||
649 | if (!smc_pnet_is_pnetid_set(smcd->pnetid) || | ||
650 | smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) { | ||
651 | ini->ism_dev[i] = smcd; | ||
652 | ini->ism_chid[i] = chid; | ||
653 | ini->is_smcd = true; | ||
654 | rc = 0; | ||
655 | i++; | ||
656 | if (i > SMC_MAX_ISM_DEVS) | ||
657 | break; | ||
658 | } | ||
659 | } | ||
660 | mutex_unlock(&smcd_dev_list.mutex); | ||
661 | ini->ism_offered_cnt = i - 1; | ||
662 | if (!ini->ism_dev[0] && !ini->ism_dev[1]) | ||
663 | ini->smcd_version = 0; | ||
664 | |||
665 | return rc; | ||
666 | } | ||
667 | |||
668 | /* Check for VLAN ID and register it on ISM device just for CLC handshake */ | ||
669 | static int smc_connect_ism_vlan_setup(struct smc_sock *smc, | ||
670 | struct smc_init_info *ini) | ||
671 | { | ||
672 | if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev[0], ini->vlan_id)) | ||
673 | return SMC_CLC_DECL_ISMVLANERR; | ||
674 | return 0; | ||
675 | } | ||
676 | |||
677 | static int smc_find_proposal_devices(struct smc_sock *smc, | ||
678 | struct smc_init_info *ini) | ||
679 | { | ||
680 | int rc = 0; | ||
681 | |||
682 | /* check if there is an ism device available */ | ||
683 | if (ini->smcd_version & SMC_V1) { | ||
684 | if (smc_find_ism_device(smc, ini) || | ||
685 | smc_connect_ism_vlan_setup(smc, ini)) { | ||
686 | if (ini->smc_type_v1 == SMC_TYPE_B) | ||
687 | ini->smc_type_v1 = SMC_TYPE_R; | ||
688 | else | ||
689 | ini->smc_type_v1 = SMC_TYPE_N; | ||
690 | } /* else ISM V1 is supported for this connection */ | ||
691 | if (smc_find_rdma_device(smc, ini)) { | ||
692 | if (ini->smc_type_v1 == SMC_TYPE_B) | ||
693 | ini->smc_type_v1 = SMC_TYPE_D; | ||
694 | else | ||
695 | ini->smc_type_v1 = SMC_TYPE_N; | ||
696 | } /* else RDMA is supported for this connection */ | ||
697 | } | ||
698 | if (smc_ism_v2_capable && smc_find_ism_v2_device_clnt(smc, ini)) | ||
699 | ini->smc_type_v2 = SMC_TYPE_N; | ||
700 | |||
701 | /* if neither ISM nor RDMA are supported, fallback */ | ||
702 | if (!smcr_indicated(ini->smc_type_v1) && | ||
703 | ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N) | ||
704 | rc = SMC_CLC_DECL_NOSMCDEV; | ||
705 | |||
706 | return rc; | ||
707 | } | ||
708 | |||
709 | /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is | ||
710 | * used, the VLAN ID will be registered again during the connection setup. | ||
711 | */ | ||
712 | static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, | ||
713 | struct smc_init_info *ini) | ||
714 | { | ||
715 | if (!smcd_indicated(ini->smc_type_v1)) | ||
716 | return 0; | ||
717 | if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev[0], ini->vlan_id)) | ||
718 | return SMC_CLC_DECL_CNFERR; | ||
719 | return 0; | ||
720 | } | ||
721 | |||
722 | #define SMC_CLC_MAX_ACCEPT_LEN \ | ||
723 | (sizeof(struct smc_clc_msg_accept_confirm_v2) + \ | ||
724 | sizeof(struct smc_clc_first_contact_ext) + \ | ||
725 | sizeof(struct smc_clc_msg_trail)) | ||
726 | |||
727 | /* CLC handshake during connect */ | ||
728 | static int smc_connect_clc(struct smc_sock *smc, | ||
729 | struct smc_clc_msg_accept_confirm_v2 *aclc2, | ||
730 | struct smc_init_info *ini) | ||
731 | { | ||
732 | int rc = 0; | ||
733 | |||
734 | /* do inband token exchange */ | ||
735 | rc = smc_clc_send_proposal(smc, ini); | ||
736 | if (rc) | ||
737 | return rc; | ||
738 | /* receive SMC Accept CLC message */ | ||
739 | return smc_clc_wait_msg(smc, aclc2, SMC_CLC_MAX_ACCEPT_LEN, | ||
740 | SMC_CLC_ACCEPT, CLC_WAIT_TIME); | ||
741 | } | ||
742 | |||
743 | /* setup for RDMA connection of client */ | ||
744 | static int smc_connect_rdma(struct smc_sock *smc, | ||
745 | struct smc_clc_msg_accept_confirm *aclc, | ||
746 | struct smc_init_info *ini) | ||
747 | { | ||
748 | int i, reason_code = 0; | ||
749 | struct smc_link *link; | ||
750 | |||
751 | ini->is_smcd = false; | ||
752 | ini->ib_lcl = &aclc->r0.lcl; | ||
753 | ini->ib_clcqpn = ntoh24(aclc->r0.qpn); | ||
754 | ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK; | ||
755 | |||
756 | mutex_lock(&smc_client_lgr_pending); | ||
757 | reason_code = smc_conn_create(smc, ini); | ||
758 | if (reason_code) { | ||
759 | mutex_unlock(&smc_client_lgr_pending); | ||
760 | return reason_code; | ||
761 | } | ||
762 | |||
763 | smc_conn_save_peer_info(smc, aclc); | ||
764 | |||
765 | if (ini->first_contact_local) { | ||
766 | link = smc->conn.lnk; | ||
767 | } else { | ||
768 | /* set link that was assigned by server */ | ||
769 | link = NULL; | ||
770 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { | ||
771 | struct smc_link *l = &smc->conn.lgr->lnk[i]; | ||
772 | |||
773 | if (l->peer_qpn == ntoh24(aclc->r0.qpn) && | ||
774 | !memcmp(l->peer_gid, &aclc->r0.lcl.gid, | ||
775 | SMC_GID_SIZE) && | ||
776 | !memcmp(l->peer_mac, &aclc->r0.lcl.mac, | ||
777 | sizeof(l->peer_mac))) { | ||
778 | link = l; | ||
779 | break; | ||
780 | } | ||
781 | } | ||
782 | if (!link) { | ||
783 | reason_code = SMC_CLC_DECL_NOSRVLINK; | ||
784 | goto connect_abort; | ||
785 | } | ||
786 | smc->conn.lnk = link; | ||
787 | } | ||
788 | |||
789 | /* create send buffer and rmb */ | ||
790 | if (smc_buf_create(smc, false)) { | ||
791 | reason_code = SMC_CLC_DECL_MEM; | ||
792 | goto connect_abort; | ||
793 | } | ||
794 | |||
795 | if (ini->first_contact_local) | ||
796 | smc_link_save_peer_info(link, aclc); | ||
797 | |||
798 | if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) { | ||
799 | reason_code = SMC_CLC_DECL_ERR_RTOK; | ||
800 | goto connect_abort; | ||
801 | } | ||
802 | |||
803 | smc_close_init(smc); | ||
804 | smc_rx_init(smc); | ||
805 | |||
806 | if (ini->first_contact_local) { | ||
807 | if (smc_ib_ready_link(link)) { | ||
808 | reason_code = SMC_CLC_DECL_ERR_RDYLNK; | ||
809 | goto connect_abort; | ||
810 | } | ||
811 | } else { | ||
812 | if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) { | ||
813 | reason_code = SMC_CLC_DECL_ERR_REGRMB; | ||
814 | goto connect_abort; | ||
815 | } | ||
816 | } | ||
817 | smc_rmb_sync_sg_for_device(&smc->conn); | ||
818 | |||
819 | reason_code = smc_clc_send_confirm(smc, ini->first_contact_local, | ||
820 | SMC_V1); | ||
821 | if (reason_code) | ||
822 | goto connect_abort; | ||
823 | |||
824 | smc_tx_init(smc); | ||
825 | |||
826 | if (ini->first_contact_local) { | ||
827 | /* QP confirmation over RoCE fabric */ | ||
828 | smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK); | ||
829 | reason_code = smcr_clnt_conf_first_link(smc); | ||
830 | smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl); | ||
831 | if (reason_code) | ||
832 | goto connect_abort; | ||
833 | } | ||
834 | mutex_unlock(&smc_client_lgr_pending); | ||
835 | |||
836 | smc_copy_sock_settings_to_clc(smc); | ||
837 | smc->connect_nonblock = 0; | ||
838 | if (smc->sk.sk_state == SMC_INIT) | ||
839 | smc->sk.sk_state = SMC_ACTIVE; | ||
840 | |||
841 | return 0; | ||
842 | connect_abort: | ||
843 | smc_connect_abort(smc, ini->first_contact_local); | ||
844 | mutex_unlock(&smc_client_lgr_pending); | ||
845 | smc->connect_nonblock = 0; | ||
846 | |||
847 | return reason_code; | ||
848 | } | ||
849 | |||
850 | /* The server has chosen one of the proposed ISM devices for the communication. | ||
851 | * Determine from the CHID of the received CLC ACCEPT the ISM device chosen. | ||
852 | */ | ||
853 | static int | ||
854 | smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc, | ||
855 | struct smc_init_info *ini) | ||
856 | { | ||
857 | int i; | ||
858 | |||
859 | for (i = 0; i < ini->ism_offered_cnt + 1; i++) { | ||
860 | if (ini->ism_chid[i] == ntohs(aclc->chid)) { | ||
861 | ini->ism_selected = i; | ||
862 | return 0; | ||
863 | } | ||
864 | } | ||
865 | |||
866 | return -EPROTO; | ||
867 | } | ||
868 | |||
869 | /* setup for ISM connection of client */ | ||
870 | static int smc_connect_ism(struct smc_sock *smc, | ||
871 | struct smc_clc_msg_accept_confirm *aclc, | ||
872 | struct smc_init_info *ini) | ||
873 | { | ||
874 | int rc = 0; | ||
875 | |||
876 | ini->is_smcd = true; | ||
877 | ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK; | ||
878 | |||
879 | if (aclc->hdr.version == SMC_V2) { | ||
880 | struct smc_clc_msg_accept_confirm_v2 *aclc_v2 = | ||
881 | (struct smc_clc_msg_accept_confirm_v2 *)aclc; | ||
882 | |||
883 | rc = smc_v2_determine_accepted_chid(aclc_v2, ini); | ||
884 | if (rc) | ||
885 | return rc; | ||
886 | } | ||
887 | ini->ism_peer_gid[ini->ism_selected] = aclc->d0.gid; | ||
888 | |||
889 | /* there is only one lgr role for SMC-D; use server lock */ | ||
890 | mutex_lock(&smc_server_lgr_pending); | ||
891 | rc = smc_conn_create(smc, ini); | ||
892 | if (rc) { | ||
893 | mutex_unlock(&smc_server_lgr_pending); | ||
894 | return rc; | ||
895 | } | ||
896 | |||
897 | /* Create send and receive buffers */ | ||
898 | rc = smc_buf_create(smc, true); | ||
899 | if (rc) { | ||
900 | rc = (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB : SMC_CLC_DECL_MEM; | ||
901 | goto connect_abort; | ||
902 | } | ||
903 | |||
904 | smc_conn_save_peer_info(smc, aclc); | ||
905 | smc_close_init(smc); | ||
906 | smc_rx_init(smc); | ||
907 | smc_tx_init(smc); | ||
908 | |||
909 | rc = smc_clc_send_confirm(smc, ini->first_contact_local, | ||
910 | aclc->hdr.version); | ||
911 | if (rc) | ||
912 | goto connect_abort; | ||
913 | mutex_unlock(&smc_server_lgr_pending); | ||
914 | |||
915 | smc_copy_sock_settings_to_clc(smc); | ||
916 | smc->connect_nonblock = 0; | ||
917 | if (smc->sk.sk_state == SMC_INIT) | ||
918 | smc->sk.sk_state = SMC_ACTIVE; | ||
919 | |||
920 | return 0; | ||
921 | connect_abort: | ||
922 | smc_connect_abort(smc, ini->first_contact_local); | ||
923 | mutex_unlock(&smc_server_lgr_pending); | ||
924 | smc->connect_nonblock = 0; | ||
925 | |||
926 | return rc; | ||
927 | } | ||
928 | |||
929 | /* check if received accept type and version matches a proposed one */ | ||
930 | static int smc_connect_check_aclc(struct smc_init_info *ini, | ||
931 | struct smc_clc_msg_accept_confirm *aclc) | ||
932 | { | ||
933 | if ((aclc->hdr.typev1 == SMC_TYPE_R && | ||
934 | !smcr_indicated(ini->smc_type_v1)) || | ||
935 | (aclc->hdr.typev1 == SMC_TYPE_D && | ||
936 | ((!smcd_indicated(ini->smc_type_v1) && | ||
937 | !smcd_indicated(ini->smc_type_v2)) || | ||
938 | (aclc->hdr.version == SMC_V1 && | ||
939 | !smcd_indicated(ini->smc_type_v1)) || | ||
940 | (aclc->hdr.version == SMC_V2 && | ||
941 | !smcd_indicated(ini->smc_type_v2))))) | ||
942 | return SMC_CLC_DECL_MODEUNSUPP; | ||
943 | |||
944 | return 0; | ||
945 | } | ||
946 | |||
947 | /* perform steps before actually connecting */ | ||
948 | static int __smc_connect(struct smc_sock *smc) | ||
949 | { | ||
950 | u8 version = smc_ism_v2_capable ? SMC_V2 : SMC_V1; | ||
951 | struct smc_clc_msg_accept_confirm_v2 *aclc2; | ||
952 | struct smc_clc_msg_accept_confirm *aclc; | ||
953 | struct smc_init_info *ini = NULL; | ||
954 | u8 *buf = NULL; | ||
955 | int rc = 0; | ||
956 | |||
957 | if (smc->use_fallback) | ||
958 | return smc_connect_fallback(smc, smc->fallback_rsn); | ||
959 | |||
960 | /* if peer has not signalled SMC-capability, fall back */ | ||
961 | if (!tcp_sk(smc->clcsock->sk)->syn_smc) | ||
962 | return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC); | ||
963 | |||
964 | /* IPSec connections opt out of SMC optimizations */ | ||
965 | if (using_ipsec(smc)) | ||
966 | return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC, | ||
967 | version); | ||
968 | |||
969 | ini = kzalloc(sizeof(*ini), GFP_KERNEL); | ||
970 | if (!ini) | ||
971 | return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM, | ||
972 | version); | ||
973 | |||
974 | ini->smcd_version = SMC_V1; | ||
975 | ini->smcd_version |= smc_ism_v2_capable ? SMC_V2 : 0; | ||
976 | ini->smc_type_v1 = SMC_TYPE_B; | ||
977 | ini->smc_type_v2 = smc_ism_v2_capable ? SMC_TYPE_D : SMC_TYPE_N; | ||
978 | |||
979 | /* get vlan id from IP device */ | ||
980 | if (smc_vlan_by_tcpsk(smc->clcsock, ini)) { | ||
981 | ini->smcd_version &= ~SMC_V1; | ||
982 | ini->smc_type_v1 = SMC_TYPE_N; | ||
983 | if (!ini->smcd_version) { | ||
984 | rc = SMC_CLC_DECL_GETVLANERR; | ||
985 | goto fallback; | ||
986 | } | ||
987 | } | ||
988 | |||
989 | rc = smc_find_proposal_devices(smc, ini); | ||
990 | if (rc) | ||
991 | goto fallback; | ||
992 | |||
993 | buf = kzalloc(SMC_CLC_MAX_ACCEPT_LEN, GFP_KERNEL); | ||
994 | if (!buf) { | ||
995 | rc = SMC_CLC_DECL_MEM; | ||
996 | goto fallback; | ||
997 | } | ||
998 | aclc2 = (struct smc_clc_msg_accept_confirm_v2 *)buf; | ||
999 | aclc = (struct smc_clc_msg_accept_confirm *)aclc2; | ||
1000 | |||
1001 | /* perform CLC handshake */ | ||
1002 | rc = smc_connect_clc(smc, aclc2, ini); | ||
1003 | if (rc) | ||
1004 | goto vlan_cleanup; | ||
1005 | |||
1006 | /* check if smc modes and versions of CLC proposal and accept match */ | ||
1007 | rc = smc_connect_check_aclc(ini, aclc); | ||
1008 | version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2; | ||
1009 | ini->smcd_version = version; | ||
1010 | if (rc) | ||
1011 | goto vlan_cleanup; | ||
1012 | |||
1013 | /* depending on previous steps, connect using rdma or ism */ | ||
1014 | if (aclc->hdr.typev1 == SMC_TYPE_R) | ||
1015 | rc = smc_connect_rdma(smc, aclc, ini); | ||
1016 | else if (aclc->hdr.typev1 == SMC_TYPE_D) | ||
1017 | rc = smc_connect_ism(smc, aclc, ini); | ||
1018 | if (rc) | ||
1019 | goto vlan_cleanup; | ||
1020 | |||
1021 | smc_connect_ism_vlan_cleanup(smc, ini); | ||
1022 | kfree(buf); | ||
1023 | kfree(ini); | ||
1024 | return 0; | ||
1025 | |||
1026 | vlan_cleanup: | ||
1027 | smc_connect_ism_vlan_cleanup(smc, ini); | ||
1028 | kfree(buf); | ||
1029 | fallback: | ||
1030 | kfree(ini); | ||
1031 | return smc_connect_decline_fallback(smc, rc, version); | ||
1032 | } | ||
1033 | |||
1034 | static void smc_connect_work(struct work_struct *work) | ||
1035 | { | ||
1036 | struct smc_sock *smc = container_of(work, struct smc_sock, | ||
1037 | connect_work); | ||
1038 | long timeo = smc->sk.sk_sndtimeo; | ||
1039 | int rc = 0; | ||
1040 | |||
1041 | if (!timeo) | ||
1042 | timeo = MAX_SCHEDULE_TIMEOUT; | ||
1043 | lock_sock(smc->clcsock->sk); | ||
1044 | if (smc->clcsock->sk->sk_err) { | ||
1045 | smc->sk.sk_err = smc->clcsock->sk->sk_err; | ||
1046 | } else if ((1 << smc->clcsock->sk->sk_state) & | ||
1047 | (TCPF_SYN_SENT | TCPF_SYN_RECV)) { | ||
1048 | rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo); | ||
1049 | if ((rc == -EPIPE) && | ||
1050 | ((1 << smc->clcsock->sk->sk_state) & | ||
1051 | (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))) | ||
1052 | rc = 0; | ||
1053 | } | ||
1054 | release_sock(smc->clcsock->sk); | ||
1055 | lock_sock(&smc->sk); | ||
1056 | if (rc != 0 || smc->sk.sk_err) { | ||
1057 | smc->sk.sk_state = SMC_CLOSED; | ||
1058 | if (rc == -EPIPE || rc == -EAGAIN) | ||
1059 | smc->sk.sk_err = EPIPE; | ||
1060 | else if (rc == -ECONNREFUSED) | ||
1061 | smc->sk.sk_err = ECONNREFUSED; | ||
1062 | else if (signal_pending(current)) | ||
1063 | smc->sk.sk_err = -sock_intr_errno(timeo); | ||
1064 | sock_put(&smc->sk); /* passive closing */ | ||
1065 | goto out; | ||
1066 | } | ||
1067 | |||
1068 | rc = __smc_connect(smc); | ||
1069 | if (rc < 0) | ||
1070 | smc->sk.sk_err = -rc; | ||
1071 | |||
1072 | out: | ||
1073 | if (!sock_flag(&smc->sk, SOCK_DEAD)) { | ||
1074 | if (smc->sk.sk_err) { | ||
1075 | smc->sk.sk_state_change(&smc->sk); | ||
1076 | } else { /* allow polling before and after fallback decision */ | ||
1077 | smc->clcsock->sk->sk_write_space(smc->clcsock->sk); | ||
1078 | smc->sk.sk_write_space(&smc->sk); | ||
1079 | } | ||
1080 | } | ||
1081 | release_sock(&smc->sk); | ||
1082 | } | ||
1083 | |||
1084 | static int smc_connect(struct socket *sock, struct sockaddr *addr, | ||
1085 | int alen, int flags) | ||
1086 | { | ||
1087 | struct sock *sk = sock->sk; | ||
1088 | struct smc_sock *smc; | ||
1089 | int rc = -EINVAL; | ||
1090 | |||
1091 | smc = smc_sk(sk); | ||
1092 | |||
1093 | /* separate smc parameter checking to be safe */ | ||
1094 | if (alen < sizeof(addr->sa_family)) | ||
1095 | goto out_err; | ||
1096 | if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) | ||
1097 | goto out_err; | ||
1098 | |||
1099 | lock_sock(sk); | ||
1100 | switch (sk->sk_state) { | ||
1101 | default: | ||
1102 | goto out; | ||
1103 | case SMC_ACTIVE: | ||
1104 | rc = -EISCONN; | ||
1105 | goto out; | ||
1106 | case SMC_INIT: | ||
1107 | rc = 0; | ||
1108 | break; | ||
1109 | } | ||
1110 | |||
1111 | smc_copy_sock_settings_to_clc(smc); | ||
1112 | tcp_sk(smc->clcsock->sk)->syn_smc = 1; | ||
1113 | if (smc->connect_nonblock) { | ||
1114 | rc = -EALREADY; | ||
1115 | goto out; | ||
1116 | } | ||
1117 | rc = kernel_connect(smc->clcsock, addr, alen, flags); | ||
1118 | if (rc && rc != -EINPROGRESS) | ||
1119 | goto out; | ||
1120 | |||
1121 | if (smc->use_fallback) | ||
1122 | goto out; | ||
1123 | sock_hold(&smc->sk); /* sock put in passive closing */ | ||
1124 | if (flags & O_NONBLOCK) { | ||
1125 | if (queue_work(smc_hs_wq, &smc->connect_work)) | ||
1126 | smc->connect_nonblock = 1; | ||
1127 | rc = -EINPROGRESS; | ||
1128 | } else { | ||
1129 | rc = __smc_connect(smc); | ||
1130 | if (rc < 0) | ||
1131 | goto out; | ||
1132 | else | ||
1133 | rc = 0; /* success cases including fallback */ | ||
1134 | } | ||
1135 | |||
1136 | out: | ||
1137 | release_sock(sk); | ||
1138 | out_err: | ||
1139 | return rc; | ||
1140 | } | ||
1141 | |||
1142 | static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) | ||
1143 | { | ||
1144 | struct socket *new_clcsock = NULL; | ||
1145 | struct sock *lsk = &lsmc->sk; | ||
1146 | struct sock *new_sk; | ||
1147 | int rc = -EINVAL; | ||
1148 | |||
1149 | release_sock(lsk); | ||
1150 | new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol); | ||
1151 | if (!new_sk) { | ||
1152 | rc = -ENOMEM; | ||
1153 | lsk->sk_err = ENOMEM; | ||
1154 | *new_smc = NULL; | ||
1155 | lock_sock(lsk); | ||
1156 | goto out; | ||
1157 | } | ||
1158 | *new_smc = smc_sk(new_sk); | ||
1159 | |||
1160 | mutex_lock(&lsmc->clcsock_release_lock); | ||
1161 | if (lsmc->clcsock) | ||
1162 | rc = kernel_accept(lsmc->clcsock, &new_clcsock, SOCK_NONBLOCK); | ||
1163 | mutex_unlock(&lsmc->clcsock_release_lock); | ||
1164 | lock_sock(lsk); | ||
1165 | if (rc < 0 && rc != -EAGAIN) | ||
1166 | lsk->sk_err = -rc; | ||
1167 | if (rc < 0 || lsk->sk_state == SMC_CLOSED) { | ||
1168 | new_sk->sk_prot->unhash(new_sk); | ||
1169 | if (new_clcsock) | ||
1170 | sock_release(new_clcsock); | ||
1171 | new_sk->sk_state = SMC_CLOSED; | ||
1172 | sock_set_flag(new_sk, SOCK_DEAD); | ||
1173 | sock_put(new_sk); /* final */ | ||
1174 | *new_smc = NULL; | ||
1175 | goto out; | ||
1176 | } | ||
1177 | |||
1178 | /* new clcsock has inherited the smc listen-specific sk_data_ready | ||
1179 | * function; switch it back to the original sk_data_ready function | ||
1180 | */ | ||
1181 | new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready; | ||
1182 | (*new_smc)->clcsock = new_clcsock; | ||
1183 | out: | ||
1184 | return rc; | ||
1185 | } | ||
1186 | |||
1187 | /* add a just created sock to the accept queue of the listen sock as | ||
1188 | * candidate for a following socket accept call from user space | ||
1189 | */ | ||
1190 | static void smc_accept_enqueue(struct sock *parent, struct sock *sk) | ||
1191 | { | ||
1192 | struct smc_sock *par = smc_sk(parent); | ||
1193 | |||
1194 | sock_hold(sk); /* sock_put in smc_accept_unlink () */ | ||
1195 | spin_lock(&par->accept_q_lock); | ||
1196 | list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); | ||
1197 | spin_unlock(&par->accept_q_lock); | ||
1198 | sk_acceptq_added(parent); | ||
1199 | } | ||
1200 | |||
1201 | /* remove a socket from the accept queue of its parental listening socket */ | ||
1202 | static void smc_accept_unlink(struct sock *sk) | ||
1203 | { | ||
1204 | struct smc_sock *par = smc_sk(sk)->listen_smc; | ||
1205 | |||
1206 | spin_lock(&par->accept_q_lock); | ||
1207 | list_del_init(&smc_sk(sk)->accept_q); | ||
1208 | spin_unlock(&par->accept_q_lock); | ||
1209 | sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); | ||
1210 | sock_put(sk); /* sock_hold in smc_accept_enqueue */ | ||
1211 | } | ||
1212 | |||
1213 | /* remove a sock from the accept queue to bind it to a new socket created | ||
1214 | * for a socket accept call from user space | ||
1215 | */ | ||
1216 | struct sock *smc_accept_dequeue(struct sock *parent, | ||
1217 | struct socket *new_sock) | ||
1218 | { | ||
1219 | struct smc_sock *isk, *n; | ||
1220 | struct sock *new_sk; | ||
1221 | |||
1222 | list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) { | ||
1223 | new_sk = (struct sock *)isk; | ||
1224 | |||
1225 | smc_accept_unlink(new_sk); | ||
1226 | if (new_sk->sk_state == SMC_CLOSED) { | ||
1227 | new_sk->sk_prot->unhash(new_sk); | ||
1228 | if (isk->clcsock) { | ||
1229 | sock_release(isk->clcsock); | ||
1230 | isk->clcsock = NULL; | ||
1231 | } | ||
1232 | sock_put(new_sk); /* final */ | ||
1233 | continue; | ||
1234 | } | ||
1235 | if (new_sock) { | ||
1236 | sock_graft(new_sk, new_sock); | ||
1237 | if (isk->use_fallback) { | ||
1238 | smc_sk(new_sk)->clcsock->file = new_sock->file; | ||
1239 | isk->clcsock->file->private_data = isk->clcsock; | ||
1240 | } | ||
1241 | } | ||
1242 | return new_sk; | ||
1243 | } | ||
1244 | return NULL; | ||
1245 | } | ||
1246 | |||
1247 | /* clean up for a created but never accepted sock */ | ||
1248 | void smc_close_non_accepted(struct sock *sk) | ||
1249 | { | ||
1250 | struct smc_sock *smc = smc_sk(sk); | ||
1251 | |||
1252 | sock_hold(sk); /* sock_put below */ | ||
1253 | lock_sock(sk); | ||
1254 | if (!sk->sk_lingertime) | ||
1255 | /* wait for peer closing */ | ||
1256 | sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; | ||
1257 | __smc_release(smc); | ||
1258 | release_sock(sk); | ||
1259 | sock_put(sk); /* sock_hold above */ | ||
1260 | sock_put(sk); /* final sock_put */ | ||
1261 | } | ||
1262 | |||
1263 | static int smcr_serv_conf_first_link(struct smc_sock *smc) | ||
1264 | { | ||
1265 | struct smc_link *link = smc->conn.lnk; | ||
1266 | struct smc_llc_qentry *qentry; | ||
1267 | int rc; | ||
1268 | |||
1269 | if (smcr_link_reg_rmb(link, smc->conn.rmb_desc)) | ||
1270 | return SMC_CLC_DECL_ERR_REGRMB; | ||
1271 | |||
1272 | /* send CONFIRM LINK request to client over the RoCE fabric */ | ||
1273 | rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ); | ||
1274 | if (rc < 0) | ||
1275 | return SMC_CLC_DECL_TIMEOUT_CL; | ||
1276 | |||
1277 | /* receive CONFIRM LINK response from client over the RoCE fabric */ | ||
1278 | qentry = smc_llc_wait(link->lgr, link, SMC_LLC_WAIT_TIME, | ||
1279 | SMC_LLC_CONFIRM_LINK); | ||
1280 | if (!qentry) { | ||
1281 | struct smc_clc_msg_decline dclc; | ||
1282 | |||
1283 | rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), | ||
1284 | SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); | ||
1285 | return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; | ||
1286 | } | ||
1287 | smc_llc_save_peer_uid(qentry); | ||
1288 | rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP); | ||
1289 | smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl); | ||
1290 | if (rc) | ||
1291 | return SMC_CLC_DECL_RMBE_EC; | ||
1292 | |||
1293 | /* confirm_rkey is implicit on 1st contact */ | ||
1294 | smc->conn.rmb_desc->is_conf_rkey = true; | ||
1295 | |||
1296 | smc_llc_link_active(link); | ||
1297 | smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE); | ||
1298 | |||
1299 | /* initial contact - try to establish second link */ | ||
1300 | smc_llc_srv_add_link(link); | ||
1301 | return 0; | ||
1302 | } | ||
1303 | |||
1304 | /* listen worker: finish */ | ||
1305 | static void smc_listen_out(struct smc_sock *new_smc) | ||
1306 | { | ||
1307 | struct smc_sock *lsmc = new_smc->listen_smc; | ||
1308 | struct sock *newsmcsk = &new_smc->sk; | ||
1309 | |||
1310 | if (lsmc->sk.sk_state == SMC_LISTEN) { | ||
1311 | lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); | ||
1312 | smc_accept_enqueue(&lsmc->sk, newsmcsk); | ||
1313 | release_sock(&lsmc->sk); | ||
1314 | } else { /* no longer listening */ | ||
1315 | smc_close_non_accepted(newsmcsk); | ||
1316 | } | ||
1317 | |||
1318 | /* Wake up accept */ | ||
1319 | lsmc->sk.sk_data_ready(&lsmc->sk); | ||
1320 | sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ | ||
1321 | } | ||
1322 | |||
1323 | /* listen worker: finish in state connected */ | ||
1324 | static void smc_listen_out_connected(struct smc_sock *new_smc) | ||
1325 | { | ||
1326 | struct sock *newsmcsk = &new_smc->sk; | ||
1327 | |||
1328 | if (newsmcsk->sk_state == SMC_INIT) | ||
1329 | newsmcsk->sk_state = SMC_ACTIVE; | ||
1330 | |||
1331 | smc_listen_out(new_smc); | ||
1332 | } | ||
1333 | |||
1334 | /* listen worker: finish in error state */ | ||
1335 | static void smc_listen_out_err(struct smc_sock *new_smc) | ||
1336 | { | ||
1337 | struct sock *newsmcsk = &new_smc->sk; | ||
1338 | |||
1339 | if (newsmcsk->sk_state == SMC_INIT) | ||
1340 | sock_put(&new_smc->sk); /* passive closing */ | ||
1341 | newsmcsk->sk_state = SMC_CLOSED; | ||
1342 | |||
1343 | smc_listen_out(new_smc); | ||
1344 | } | ||
1345 | |||
1346 | /* listen worker: decline and fall back if possible */ | ||
1347 | static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, | ||
1348 | int local_first, u8 version) | ||
1349 | { | ||
1350 | /* RDMA setup failed, switch back to TCP */ | ||
1351 | if (local_first) | ||
1352 | smc_lgr_cleanup_early(&new_smc->conn); | ||
1353 | else | ||
1354 | smc_conn_free(&new_smc->conn); | ||
1355 | if (reason_code < 0) { /* error, no fallback possible */ | ||
1356 | smc_listen_out_err(new_smc); | ||
1357 | return; | ||
1358 | } | ||
1359 | smc_switch_to_fallback(new_smc); | ||
1360 | new_smc->fallback_rsn = reason_code; | ||
1361 | if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { | ||
1362 | if (smc_clc_send_decline(new_smc, reason_code, version) < 0) { | ||
1363 | smc_listen_out_err(new_smc); | ||
1364 | return; | ||
1365 | } | ||
1366 | } | ||
1367 | smc_listen_out_connected(new_smc); | ||
1368 | } | ||
1369 | |||
1370 | /* listen worker: version checking */ | ||
1371 | static int smc_listen_v2_check(struct smc_sock *new_smc, | ||
1372 | struct smc_clc_msg_proposal *pclc, | ||
1373 | struct smc_init_info *ini) | ||
1374 | { | ||
1375 | struct smc_clc_smcd_v2_extension *pclc_smcd_v2_ext; | ||
1376 | struct smc_clc_v2_extension *pclc_v2_ext; | ||
1377 | |||
1378 | ini->smc_type_v1 = pclc->hdr.typev1; | ||
1379 | ini->smc_type_v2 = pclc->hdr.typev2; | ||
1380 | ini->smcd_version = ini->smc_type_v1 != SMC_TYPE_N ? SMC_V1 : 0; | ||
1381 | if (pclc->hdr.version > SMC_V1) | ||
1382 | ini->smcd_version |= | ||
1383 | ini->smc_type_v2 != SMC_TYPE_N ? SMC_V2 : 0; | ||
1384 | if (!smc_ism_v2_capable) { | ||
1385 | ini->smcd_version &= ~SMC_V2; | ||
1386 | goto out; | ||
1387 | } | ||
1388 | pclc_v2_ext = smc_get_clc_v2_ext(pclc); | ||
1389 | if (!pclc_v2_ext) { | ||
1390 | ini->smcd_version &= ~SMC_V2; | ||
1391 | goto out; | ||
1392 | } | ||
1393 | pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext); | ||
1394 | if (!pclc_smcd_v2_ext) | ||
1395 | ini->smcd_version &= ~SMC_V2; | ||
1396 | |||
1397 | out: | ||
1398 | if (!ini->smcd_version) { | ||
1399 | if (pclc->hdr.typev1 == SMC_TYPE_B || | ||
1400 | pclc->hdr.typev2 == SMC_TYPE_B) | ||
1401 | return SMC_CLC_DECL_NOSMCDEV; | ||
1402 | if (pclc->hdr.typev1 == SMC_TYPE_D || | ||
1403 | pclc->hdr.typev2 == SMC_TYPE_D) | ||
1404 | return SMC_CLC_DECL_NOSMCDDEV; | ||
1405 | return SMC_CLC_DECL_NOSMCRDEV; | ||
1406 | } | ||
1407 | |||
1408 | return 0; | ||
1409 | } | ||
1410 | |||
1411 | /* listen worker: check prefixes */ | ||
1412 | static int smc_listen_prfx_check(struct smc_sock *new_smc, | ||
1413 | struct smc_clc_msg_proposal *pclc) | ||
1414 | { | ||
1415 | struct smc_clc_msg_proposal_prefix *pclc_prfx; | ||
1416 | struct socket *newclcsock = new_smc->clcsock; | ||
1417 | |||
1418 | if (pclc->hdr.typev1 == SMC_TYPE_N) | ||
1419 | return 0; | ||
1420 | pclc_prfx = smc_clc_proposal_get_prefix(pclc); | ||
1421 | if (smc_clc_prfx_match(newclcsock, pclc_prfx)) | ||
1422 | return SMC_CLC_DECL_DIFFPREFIX; | ||
1423 | |||
1424 | return 0; | ||
1425 | } | ||
1426 | |||
1427 | /* listen worker: initialize connection and buffers */ | ||
1428 | static int smc_listen_rdma_init(struct smc_sock *new_smc, | ||
1429 | struct smc_init_info *ini) | ||
1430 | { | ||
1431 | int rc; | ||
1432 | |||
1433 | /* allocate connection / link group */ | ||
1434 | rc = smc_conn_create(new_smc, ini); | ||
1435 | if (rc) | ||
1436 | return rc; | ||
1437 | |||
1438 | /* create send buffer and rmb */ | ||
1439 | if (smc_buf_create(new_smc, false)) | ||
1440 | return SMC_CLC_DECL_MEM; | ||
1441 | |||
1442 | return 0; | ||
1443 | } | ||
1444 | |||
1445 | /* listen worker: initialize connection and buffers for SMC-D */ | ||
1446 | static int smc_listen_ism_init(struct smc_sock *new_smc, | ||
1447 | struct smc_init_info *ini) | ||
1448 | { | ||
1449 | int rc; | ||
1450 | |||
1451 | rc = smc_conn_create(new_smc, ini); | ||
1452 | if (rc) | ||
1453 | return rc; | ||
1454 | |||
1455 | /* Create send and receive buffers */ | ||
1456 | rc = smc_buf_create(new_smc, true); | ||
1457 | if (rc) { | ||
1458 | if (ini->first_contact_local) | ||
1459 | smc_lgr_cleanup_early(&new_smc->conn); | ||
1460 | else | ||
1461 | smc_conn_free(&new_smc->conn); | ||
1462 | return (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB : | ||
1463 | SMC_CLC_DECL_MEM; | ||
1464 | } | ||
1465 | |||
1466 | return 0; | ||
1467 | } | ||
1468 | |||
1469 | static bool smc_is_already_selected(struct smcd_dev *smcd, | ||
1470 | struct smc_init_info *ini, | ||
1471 | int matches) | ||
1472 | { | ||
1473 | int i; | ||
1474 | |||
1475 | for (i = 0; i < matches; i++) | ||
1476 | if (smcd == ini->ism_dev[i]) | ||
1477 | return true; | ||
1478 | |||
1479 | return false; | ||
1480 | } | ||
1481 | |||
1482 | /* check for ISM devices matching proposed ISM devices */ | ||
1483 | static void smc_check_ism_v2_match(struct smc_init_info *ini, | ||
1484 | u16 proposed_chid, u64 proposed_gid, | ||
1485 | unsigned int *matches) | ||
1486 | { | ||
1487 | struct smcd_dev *smcd; | ||
1488 | |||
1489 | list_for_each_entry(smcd, &smcd_dev_list.list, list) { | ||
1490 | if (smcd->going_away) | ||
1491 | continue; | ||
1492 | if (smc_is_already_selected(smcd, ini, *matches)) | ||
1493 | continue; | ||
1494 | if (smc_ism_get_chid(smcd) == proposed_chid && | ||
1495 | !smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) { | ||
1496 | ini->ism_peer_gid[*matches] = proposed_gid; | ||
1497 | ini->ism_dev[*matches] = smcd; | ||
1498 | (*matches)++; | ||
1499 | break; | ||
1500 | } | ||
1501 | } | ||
1502 | } | ||
1503 | |||
1504 | static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, | ||
1505 | struct smc_clc_msg_proposal *pclc, | ||
1506 | struct smc_init_info *ini) | ||
1507 | { | ||
1508 | struct smc_clc_smcd_v2_extension *smcd_v2_ext; | ||
1509 | struct smc_clc_v2_extension *smc_v2_ext; | ||
1510 | struct smc_clc_msg_smcd *pclc_smcd; | ||
1511 | unsigned int matches = 0; | ||
1512 | u8 smcd_version; | ||
1513 | u8 *eid = NULL; | ||
1514 | int i; | ||
1515 | |||
1516 | if (!(ini->smcd_version & SMC_V2) || !smcd_indicated(ini->smc_type_v2)) | ||
1517 | goto not_found; | ||
1518 | |||
1519 | pclc_smcd = smc_get_clc_msg_smcd(pclc); | ||
1520 | smc_v2_ext = smc_get_clc_v2_ext(pclc); | ||
1521 | smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext); | ||
1522 | if (!smcd_v2_ext || | ||
1523 | !smc_v2_ext->hdr.flag.seid) /* no system EID support for SMCD */ | ||
1524 | goto not_found; | ||
1525 | |||
1526 | mutex_lock(&smcd_dev_list.mutex); | ||
1527 | if (pclc_smcd->ism.chid) | ||
1528 | /* check for ISM device matching proposed native ISM device */ | ||
1529 | smc_check_ism_v2_match(ini, ntohs(pclc_smcd->ism.chid), | ||
1530 | ntohll(pclc_smcd->ism.gid), &matches); | ||
1531 | for (i = 1; i <= smc_v2_ext->hdr.ism_gid_cnt; i++) { | ||
1532 | /* check for ISM devices matching proposed non-native ISM | ||
1533 | * devices | ||
1534 | */ | ||
1535 | smc_check_ism_v2_match(ini, | ||
1536 | ntohs(smcd_v2_ext->gidchid[i - 1].chid), | ||
1537 | ntohll(smcd_v2_ext->gidchid[i - 1].gid), | ||
1538 | &matches); | ||
1539 | } | ||
1540 | mutex_unlock(&smcd_dev_list.mutex); | ||
1541 | |||
1542 | if (ini->ism_dev[0]) { | ||
1543 | smc_ism_get_system_eid(ini->ism_dev[0], &eid); | ||
1544 | if (memcmp(eid, smcd_v2_ext->system_eid, SMC_MAX_EID_LEN)) | ||
1545 | goto not_found; | ||
1546 | } else { | ||
1547 | goto not_found; | ||
1548 | } | ||
1549 | |||
1550 | /* separate - outside the smcd_dev_list.lock */ | ||
1551 | smcd_version = ini->smcd_version; | ||
1552 | for (i = 0; i < matches; i++) { | ||
1553 | ini->smcd_version = SMC_V2; | ||
1554 | ini->is_smcd = true; | ||
1555 | ini->ism_selected = i; | ||
1556 | if (smc_listen_ism_init(new_smc, ini)) | ||
1557 | /* try next active ISM device */ | ||
1558 | continue; | ||
1559 | return; /* matching and usable V2 ISM device found */ | ||
1560 | } | ||
1561 | /* no V2 ISM device could be initialized */ | ||
1562 | ini->smcd_version = smcd_version; /* restore original value */ | ||
1563 | |||
1564 | not_found: | ||
1565 | ini->smcd_version &= ~SMC_V2; | ||
1566 | ini->ism_dev[0] = NULL; | ||
1567 | ini->is_smcd = false; | ||
1568 | } | ||
1569 | |||
1570 | static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc, | ||
1571 | struct smc_clc_msg_proposal *pclc, | ||
1572 | struct smc_init_info *ini) | ||
1573 | { | ||
1574 | struct smc_clc_msg_smcd *pclc_smcd = smc_get_clc_msg_smcd(pclc); | ||
1575 | |||
1576 | /* check if ISM V1 is available */ | ||
1577 | if (!(ini->smcd_version & SMC_V1) || !smcd_indicated(ini->smc_type_v1)) | ||
1578 | goto not_found; | ||
1579 | ini->is_smcd = true; /* prepare ISM check */ | ||
1580 | ini->ism_peer_gid[0] = ntohll(pclc_smcd->ism.gid); | ||
1581 | if (smc_find_ism_device(new_smc, ini)) | ||
1582 | goto not_found; | ||
1583 | ini->ism_selected = 0; | ||
1584 | if (!smc_listen_ism_init(new_smc, ini)) | ||
1585 | return; /* V1 ISM device found */ | ||
1586 | |||
1587 | not_found: | ||
1588 | ini->ism_dev[0] = NULL; | ||
1589 | ini->is_smcd = false; | ||
1590 | } | ||
1591 | |||
1592 | /* listen worker: register buffers */ | ||
1593 | static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first) | ||
1594 | { | ||
1595 | struct smc_connection *conn = &new_smc->conn; | ||
1596 | |||
1597 | if (!local_first) { | ||
1598 | if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc)) | ||
1599 | return SMC_CLC_DECL_ERR_REGRMB; | ||
1600 | } | ||
1601 | smc_rmb_sync_sg_for_device(&new_smc->conn); | ||
1602 | |||
1603 | return 0; | ||
1604 | } | ||
1605 | |||
1606 | static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc, | ||
1607 | struct smc_clc_msg_proposal *pclc, | ||
1608 | struct smc_init_info *ini) | ||
1609 | { | ||
1610 | int rc; | ||
1611 | |||
1612 | if (!smcr_indicated(ini->smc_type_v1)) | ||
1613 | return SMC_CLC_DECL_NOSMCDEV; | ||
1614 | |||
1615 | /* prepare RDMA check */ | ||
1616 | ini->ib_lcl = &pclc->lcl; | ||
1617 | rc = smc_find_rdma_device(new_smc, ini); | ||
1618 | if (rc) { | ||
1619 | /* no RDMA device found */ | ||
1620 | if (ini->smc_type_v1 == SMC_TYPE_B) | ||
1621 | /* neither ISM nor RDMA device found */ | ||
1622 | rc = SMC_CLC_DECL_NOSMCDEV; | ||
1623 | return rc; | ||
1624 | } | ||
1625 | rc = smc_listen_rdma_init(new_smc, ini); | ||
1626 | if (rc) | ||
1627 | return rc; | ||
1628 | return smc_listen_rdma_reg(new_smc, ini->first_contact_local); | ||
1629 | } | ||
1630 | |||
1631 | /* determine the local device matching to proposal */ | ||
1632 | static int smc_listen_find_device(struct smc_sock *new_smc, | ||
1633 | struct smc_clc_msg_proposal *pclc, | ||
1634 | struct smc_init_info *ini) | ||
1635 | { | ||
1636 | int rc; | ||
1637 | |||
1638 | /* check for ISM device matching V2 proposed device */ | ||
1639 | smc_find_ism_v2_device_serv(new_smc, pclc, ini); | ||
1640 | if (ini->ism_dev[0]) | ||
1641 | return 0; | ||
1642 | |||
1643 | if (!(ini->smcd_version & SMC_V1)) | ||
1644 | return SMC_CLC_DECL_NOSMCDEV; | ||
1645 | |||
1646 | /* check for matching IP prefix and subnet length */ | ||
1647 | rc = smc_listen_prfx_check(new_smc, pclc); | ||
1648 | if (rc) | ||
1649 | return rc; | ||
1650 | |||
1651 | /* get vlan id from IP device */ | ||
1652 | if (smc_vlan_by_tcpsk(new_smc->clcsock, ini)) | ||
1653 | return SMC_CLC_DECL_GETVLANERR; | ||
1654 | |||
1655 | /* check for ISM device matching V1 proposed device */ | ||
1656 | smc_find_ism_v1_device_serv(new_smc, pclc, ini); | ||
1657 | if (ini->ism_dev[0]) | ||
1658 | return 0; | ||
1659 | |||
1660 | if (pclc->hdr.typev1 == SMC_TYPE_D) | ||
1661 | return SMC_CLC_DECL_NOSMCDDEV; /* skip RDMA and decline */ | ||
1662 | |||
1663 | /* check if RDMA is available */ | ||
1664 | return smc_find_rdma_v1_device_serv(new_smc, pclc, ini); | ||
1665 | } | ||
1666 | |||
1667 | /* listen worker: finish RDMA setup */ | ||
1668 | static int smc_listen_rdma_finish(struct smc_sock *new_smc, | ||
1669 | struct smc_clc_msg_accept_confirm *cclc, | ||
1670 | bool local_first) | ||
1671 | { | ||
1672 | struct smc_link *link = new_smc->conn.lnk; | ||
1673 | int reason_code = 0; | ||
1674 | |||
1675 | if (local_first) | ||
1676 | smc_link_save_peer_info(link, cclc); | ||
1677 | |||
1678 | if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) | ||
1679 | return SMC_CLC_DECL_ERR_RTOK; | ||
1680 | |||
1681 | if (local_first) { | ||
1682 | if (smc_ib_ready_link(link)) | ||
1683 | return SMC_CLC_DECL_ERR_RDYLNK; | ||
1684 | /* QP confirmation over RoCE fabric */ | ||
1685 | smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK); | ||
1686 | reason_code = smcr_serv_conf_first_link(new_smc); | ||
1687 | smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl); | ||
1688 | } | ||
1689 | return reason_code; | ||
1690 | } | ||
1691 | |||
1692 | /* setup for connection of server */ | ||
1693 | static void smc_listen_work(struct work_struct *work) | ||
1694 | { | ||
1695 | struct smc_sock *new_smc = container_of(work, struct smc_sock, | ||
1696 | smc_listen_work); | ||
1697 | u8 version = smc_ism_v2_capable ? SMC_V2 : SMC_V1; | ||
1698 | struct socket *newclcsock = new_smc->clcsock; | ||
1699 | struct smc_clc_msg_accept_confirm *cclc; | ||
1700 | struct smc_clc_msg_proposal_area *buf; | ||
1701 | struct smc_clc_msg_proposal *pclc; | ||
1702 | struct smc_init_info *ini = NULL; | ||
1703 | int rc = 0; | ||
1704 | |||
1705 | if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN) | ||
1706 | return smc_listen_out_err(new_smc); | ||
1707 | |||
1708 | if (new_smc->use_fallback) { | ||
1709 | smc_listen_out_connected(new_smc); | ||
1710 | return; | ||
1711 | } | ||
1712 | |||
1713 | /* check if peer is smc capable */ | ||
1714 | if (!tcp_sk(newclcsock->sk)->syn_smc) { | ||
1715 | smc_switch_to_fallback(new_smc); | ||
1716 | new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC; | ||
1717 | smc_listen_out_connected(new_smc); | ||
1718 | return; | ||
1719 | } | ||
1720 | |||
1721 | /* do inband token exchange - | ||
1722 | * wait for and receive SMC Proposal CLC message | ||
1723 | */ | ||
1724 | buf = kzalloc(sizeof(*buf), GFP_KERNEL); | ||
1725 | if (!buf) { | ||
1726 | rc = SMC_CLC_DECL_MEM; | ||
1727 | goto out_decl; | ||
1728 | } | ||
1729 | pclc = (struct smc_clc_msg_proposal *)buf; | ||
1730 | rc = smc_clc_wait_msg(new_smc, pclc, sizeof(*buf), | ||
1731 | SMC_CLC_PROPOSAL, CLC_WAIT_TIME); | ||
1732 | if (rc) | ||
1733 | goto out_decl; | ||
1734 | version = pclc->hdr.version == SMC_V1 ? SMC_V1 : version; | ||
1735 | |||
1736 | /* IPSec connections opt out of SMC optimizations */ | ||
1737 | if (using_ipsec(new_smc)) { | ||
1738 | rc = SMC_CLC_DECL_IPSEC; | ||
1739 | goto out_decl; | ||
1740 | } | ||
1741 | |||
1742 | ini = kzalloc(sizeof(*ini), GFP_KERNEL); | ||
1743 | if (!ini) { | ||
1744 | rc = SMC_CLC_DECL_MEM; | ||
1745 | goto out_decl; | ||
1746 | } | ||
1747 | |||
1748 | /* initial version checking */ | ||
1749 | rc = smc_listen_v2_check(new_smc, pclc, ini); | ||
1750 | if (rc) | ||
1751 | goto out_decl; | ||
1752 | |||
1753 | mutex_lock(&smc_server_lgr_pending); | ||
1754 | smc_close_init(new_smc); | ||
1755 | smc_rx_init(new_smc); | ||
1756 | smc_tx_init(new_smc); | ||
1757 | |||
1758 | /* determine ISM or RoCE device used for connection */ | ||
1759 | rc = smc_listen_find_device(new_smc, pclc, ini); | ||
1760 | if (rc) | ||
1761 | goto out_unlock; | ||
1762 | |||
1763 | /* send SMC Accept CLC message */ | ||
1764 | rc = smc_clc_send_accept(new_smc, ini->first_contact_local, | ||
1765 | ini->smcd_version == SMC_V2 ? SMC_V2 : SMC_V1); | ||
1766 | if (rc) | ||
1767 | goto out_unlock; | ||
1768 | |||
1769 | /* SMC-D does not need this lock any more */ | ||
1770 | if (ini->is_smcd) | ||
1771 | mutex_unlock(&smc_server_lgr_pending); | ||
1772 | |||
1773 | /* receive SMC Confirm CLC message */ | ||
1774 | memset(buf, 0, sizeof(*buf)); | ||
1775 | cclc = (struct smc_clc_msg_accept_confirm *)buf; | ||
1776 | rc = smc_clc_wait_msg(new_smc, cclc, sizeof(*buf), | ||
1777 | SMC_CLC_CONFIRM, CLC_WAIT_TIME); | ||
1778 | if (rc) { | ||
1779 | if (!ini->is_smcd) | ||
1780 | goto out_unlock; | ||
1781 | goto out_decl; | ||
1782 | } | ||
1783 | |||
1784 | /* finish worker */ | ||
1785 | if (!ini->is_smcd) { | ||
1786 | rc = smc_listen_rdma_finish(new_smc, cclc, | ||
1787 | ini->first_contact_local); | ||
1788 | if (rc) | ||
1789 | goto out_unlock; | ||
1790 | mutex_unlock(&smc_server_lgr_pending); | ||
1791 | } | ||
1792 | smc_conn_save_peer_info(new_smc, cclc); | ||
1793 | smc_listen_out_connected(new_smc); | ||
1794 | goto out_free; | ||
1795 | |||
1796 | out_unlock: | ||
1797 | mutex_unlock(&smc_server_lgr_pending); | ||
1798 | out_decl: | ||
1799 | smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0, | ||
1800 | version); | ||
1801 | out_free: | ||
1802 | kfree(ini); | ||
1803 | kfree(buf); | ||
1804 | } | ||
1805 | |||
1806 | static void smc_tcp_listen_work(struct work_struct *work) | ||
1807 | { | ||
1808 | struct smc_sock *lsmc = container_of(work, struct smc_sock, | ||
1809 | tcp_listen_work); | ||
1810 | struct sock *lsk = &lsmc->sk; | ||
1811 | struct smc_sock *new_smc; | ||
1812 | int rc = 0; | ||
1813 | |||
1814 | lock_sock(lsk); | ||
1815 | while (lsk->sk_state == SMC_LISTEN) { | ||
1816 | rc = smc_clcsock_accept(lsmc, &new_smc); | ||
1817 | if (rc) /* clcsock accept queue empty or error */ | ||
1818 | goto out; | ||
1819 | if (!new_smc) | ||
1820 | continue; | ||
1821 | |||
1822 | new_smc->listen_smc = lsmc; | ||
1823 | new_smc->use_fallback = lsmc->use_fallback; | ||
1824 | new_smc->fallback_rsn = lsmc->fallback_rsn; | ||
1825 | sock_hold(lsk); /* sock_put in smc_listen_work */ | ||
1826 | INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); | ||
1827 | smc_copy_sock_settings_to_smc(new_smc); | ||
1828 | new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf; | ||
1829 | new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf; | ||
1830 | sock_hold(&new_smc->sk); /* sock_put in passive closing */ | ||
1831 | if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work)) | ||
1832 | sock_put(&new_smc->sk); | ||
1833 | } | ||
1834 | |||
1835 | out: | ||
1836 | release_sock(lsk); | ||
1837 | sock_put(&lsmc->sk); /* sock_hold in smc_clcsock_data_ready() */ | ||
1838 | } | ||
1839 | |||
1840 | static void smc_clcsock_data_ready(struct sock *listen_clcsock) | ||
1841 | { | ||
1842 | struct smc_sock *lsmc; | ||
1843 | |||
1844 | lsmc = (struct smc_sock *) | ||
1845 | ((uintptr_t)listen_clcsock->sk_user_data & ~SK_USER_DATA_NOCOPY); | ||
1846 | if (!lsmc) | ||
1847 | return; | ||
1848 | lsmc->clcsk_data_ready(listen_clcsock); | ||
1849 | if (lsmc->sk.sk_state == SMC_LISTEN) { | ||
1850 | sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */ | ||
1851 | if (!queue_work(smc_hs_wq, &lsmc->tcp_listen_work)) | ||
1852 | sock_put(&lsmc->sk); | ||
1853 | } | ||
1854 | } | ||
1855 | |||
1856 | static int smc_listen(struct socket *sock, int backlog) | ||
1857 | { | ||
1858 | struct sock *sk = sock->sk; | ||
1859 | struct smc_sock *smc; | ||
1860 | int rc; | ||
1861 | |||
1862 | smc = smc_sk(sk); | ||
1863 | lock_sock(sk); | ||
1864 | |||
1865 | rc = -EINVAL; | ||
1866 | if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) || | ||
1867 | smc->connect_nonblock) | ||
1868 | goto out; | ||
1869 | |||
1870 | rc = 0; | ||
1871 | if (sk->sk_state == SMC_LISTEN) { | ||
1872 | sk->sk_max_ack_backlog = backlog; | ||
1873 | goto out; | ||
1874 | } | ||
1875 | /* some socket options are handled in core, so we could not apply | ||
1876 | * them to the clc socket -- copy smc socket options to clc socket | ||
1877 | */ | ||
1878 | smc_copy_sock_settings_to_clc(smc); | ||
1879 | if (!smc->use_fallback) | ||
1880 | tcp_sk(smc->clcsock->sk)->syn_smc = 1; | ||
1881 | |||
1882 | /* save original sk_data_ready function and establish | ||
1883 | * smc-specific sk_data_ready function | ||
1884 | */ | ||
1885 | smc->clcsk_data_ready = smc->clcsock->sk->sk_data_ready; | ||
1886 | smc->clcsock->sk->sk_data_ready = smc_clcsock_data_ready; | ||
1887 | smc->clcsock->sk->sk_user_data = | ||
1888 | (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); | ||
1889 | rc = kernel_listen(smc->clcsock, backlog); | ||
1890 | if (rc) { | ||
1891 | smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready; | ||
1892 | goto out; | ||
1893 | } | ||
1894 | sk->sk_max_ack_backlog = backlog; | ||
1895 | sk->sk_ack_backlog = 0; | ||
1896 | sk->sk_state = SMC_LISTEN; | ||
1897 | |||
1898 | out: | ||
1899 | release_sock(sk); | ||
1900 | return rc; | ||
1901 | } | ||
1902 | |||
1903 | static int smc_accept(struct socket *sock, struct socket *new_sock, | ||
1904 | int flags, bool kern) | ||
1905 | { | ||
1906 | struct sock *sk = sock->sk, *nsk; | ||
1907 | DECLARE_WAITQUEUE(wait, current); | ||
1908 | struct smc_sock *lsmc; | ||
1909 | long timeo; | ||
1910 | int rc = 0; | ||
1911 | |||
1912 | lsmc = smc_sk(sk); | ||
1913 | sock_hold(sk); /* sock_put below */ | ||
1914 | lock_sock(sk); | ||
1915 | |||
1916 | if (lsmc->sk.sk_state != SMC_LISTEN) { | ||
1917 | rc = -EINVAL; | ||
1918 | release_sock(sk); | ||
1919 | goto out; | ||
1920 | } | ||
1921 | |||
1922 | /* Wait for an incoming connection */ | ||
1923 | timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); | ||
1924 | add_wait_queue_exclusive(sk_sleep(sk), &wait); | ||
1925 | while (!(nsk = smc_accept_dequeue(sk, new_sock))) { | ||
1926 | set_current_state(TASK_INTERRUPTIBLE); | ||
1927 | if (!timeo) { | ||
1928 | rc = -EAGAIN; | ||
1929 | break; | ||
1930 | } | ||
1931 | release_sock(sk); | ||
1932 | timeo = schedule_timeout(timeo); | ||
1933 | /* wakeup by sk_data_ready in smc_listen_work() */ | ||
1934 | sched_annotate_sleep(); | ||
1935 | lock_sock(sk); | ||
1936 | if (signal_pending(current)) { | ||
1937 | rc = sock_intr_errno(timeo); | ||
1938 | break; | ||
1939 | } | ||
1940 | } | ||
1941 | set_current_state(TASK_RUNNING); | ||
1942 | remove_wait_queue(sk_sleep(sk), &wait); | ||
1943 | |||
1944 | if (!rc) | ||
1945 | rc = sock_error(nsk); | ||
1946 | release_sock(sk); | ||
1947 | if (rc) | ||
1948 | goto out; | ||
1949 | |||
1950 | if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) { | ||
1951 | /* wait till data arrives on the socket */ | ||
1952 | timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept * | ||
1953 | MSEC_PER_SEC); | ||
1954 | if (smc_sk(nsk)->use_fallback) { | ||
1955 | struct sock *clcsk = smc_sk(nsk)->clcsock->sk; | ||
1956 | |||
1957 | lock_sock(clcsk); | ||
1958 | if (skb_queue_empty(&clcsk->sk_receive_queue)) | ||
1959 | sk_wait_data(clcsk, &timeo, NULL); | ||
1960 | release_sock(clcsk); | ||
1961 | } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) { | ||
1962 | lock_sock(nsk); | ||
1963 | smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available); | ||
1964 | release_sock(nsk); | ||
1965 | } | ||
1966 | } | ||
1967 | |||
1968 | out: | ||
1969 | sock_put(sk); /* sock_hold above */ | ||
1970 | return rc; | ||
1971 | } | ||
1972 | |||
1973 | static int smc_getname(struct socket *sock, struct sockaddr *addr, | ||
1974 | int peer) | ||
1975 | { | ||
1976 | struct smc_sock *smc; | ||
1977 | |||
1978 | if (peer && (sock->sk->sk_state != SMC_ACTIVE) && | ||
1979 | (sock->sk->sk_state != SMC_APPCLOSEWAIT1)) | ||
1980 | return -ENOTCONN; | ||
1981 | |||
1982 | smc = smc_sk(sock->sk); | ||
1983 | |||
1984 | return smc->clcsock->ops->getname(smc->clcsock, addr, peer); | ||
1985 | } | ||
1986 | |||
1987 | static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) | ||
1988 | { | ||
1989 | struct sock *sk = sock->sk; | ||
1990 | struct smc_sock *smc; | ||
1991 | int rc = -EPIPE; | ||
1992 | |||
1993 | smc = smc_sk(sk); | ||
1994 | lock_sock(sk); | ||
1995 | if ((sk->sk_state != SMC_ACTIVE) && | ||
1996 | (sk->sk_state != SMC_APPCLOSEWAIT1) && | ||
1997 | (sk->sk_state != SMC_INIT)) | ||
1998 | goto out; | ||
1999 | |||
2000 | if (msg->msg_flags & MSG_FASTOPEN) { | ||
2001 | if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { | ||
2002 | smc_switch_to_fallback(smc); | ||
2003 | smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; | ||
2004 | } else { | ||
2005 | rc = -EINVAL; | ||
2006 | goto out; | ||
2007 | } | ||
2008 | } | ||
2009 | |||
2010 | if (smc->use_fallback) | ||
2011 | rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len); | ||
2012 | else | ||
2013 | rc = smc_tx_sendmsg(smc, msg, len); | ||
2014 | out: | ||
2015 | release_sock(sk); | ||
2016 | return rc; | ||
2017 | } | ||
2018 | |||
2019 | static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, | ||
2020 | int flags) | ||
2021 | { | ||
2022 | struct sock *sk = sock->sk; | ||
2023 | struct smc_sock *smc; | ||
2024 | int rc = -ENOTCONN; | ||
2025 | |||
2026 | smc = smc_sk(sk); | ||
2027 | lock_sock(sk); | ||
2028 | if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { | ||
2029 | /* socket was connected before, no more data to read */ | ||
2030 | rc = 0; | ||
2031 | goto out; | ||
2032 | } | ||
2033 | if ((sk->sk_state == SMC_INIT) || | ||
2034 | (sk->sk_state == SMC_LISTEN) || | ||
2035 | (sk->sk_state == SMC_CLOSED)) | ||
2036 | goto out; | ||
2037 | |||
2038 | if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { | ||
2039 | rc = 0; | ||
2040 | goto out; | ||
2041 | } | ||
2042 | |||
2043 | if (smc->use_fallback) { | ||
2044 | rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags); | ||
2045 | } else { | ||
2046 | msg->msg_namelen = 0; | ||
2047 | rc = smc_rx_recvmsg(smc, msg, NULL, len, flags); | ||
2048 | } | ||
2049 | |||
2050 | out: | ||
2051 | release_sock(sk); | ||
2052 | return rc; | ||
2053 | } | ||
2054 | |||
2055 | static __poll_t smc_accept_poll(struct sock *parent) | ||
2056 | { | ||
2057 | struct smc_sock *isk = smc_sk(parent); | ||
2058 | __poll_t mask = 0; | ||
2059 | |||
2060 | spin_lock(&isk->accept_q_lock); | ||
2061 | if (!list_empty(&isk->accept_q)) | ||
2062 | mask = EPOLLIN | EPOLLRDNORM; | ||
2063 | spin_unlock(&isk->accept_q_lock); | ||
2064 | |||
2065 | return mask; | ||
2066 | } | ||
2067 | |||
2068 | static __poll_t smc_poll(struct file *file, struct socket *sock, | ||
2069 | poll_table *wait) | ||
2070 | { | ||
2071 | struct sock *sk = sock->sk; | ||
2072 | struct smc_sock *smc; | ||
2073 | __poll_t mask = 0; | ||
2074 | |||
2075 | if (!sk) | ||
2076 | return EPOLLNVAL; | ||
2077 | |||
2078 | smc = smc_sk(sock->sk); | ||
2079 | if (smc->use_fallback) { | ||
2080 | /* delegate to CLC child sock */ | ||
2081 | mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); | ||
2082 | sk->sk_err = smc->clcsock->sk->sk_err; | ||
2083 | } else { | ||
2084 | if (sk->sk_state != SMC_CLOSED) | ||
2085 | sock_poll_wait(file, sock, wait); | ||
2086 | if (sk->sk_err) | ||
2087 | mask |= EPOLLERR; | ||
2088 | if ((sk->sk_shutdown == SHUTDOWN_MASK) || | ||
2089 | (sk->sk_state == SMC_CLOSED)) | ||
2090 | mask |= EPOLLHUP; | ||
2091 | if (sk->sk_state == SMC_LISTEN) { | ||
2092 | /* woken up by sk_data_ready in smc_listen_work() */ | ||
2093 | mask |= smc_accept_poll(sk); | ||
2094 | } else if (smc->use_fallback) { /* as result of connect_work()*/ | ||
2095 | mask |= smc->clcsock->ops->poll(file, smc->clcsock, | ||
2096 | wait); | ||
2097 | sk->sk_err = smc->clcsock->sk->sk_err; | ||
2098 | } else { | ||
2099 | if ((sk->sk_state != SMC_INIT && | ||
2100 | atomic_read(&smc->conn.sndbuf_space)) || | ||
2101 | sk->sk_shutdown & SEND_SHUTDOWN) { | ||
2102 | mask |= EPOLLOUT | EPOLLWRNORM; | ||
2103 | } else { | ||
2104 | sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); | ||
2105 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | ||
2106 | } | ||
2107 | if (atomic_read(&smc->conn.bytes_to_rcv)) | ||
2108 | mask |= EPOLLIN | EPOLLRDNORM; | ||
2109 | if (sk->sk_shutdown & RCV_SHUTDOWN) | ||
2110 | mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; | ||
2111 | if (sk->sk_state == SMC_APPCLOSEWAIT1) | ||
2112 | mask |= EPOLLIN; | ||
2113 | if (smc->conn.urg_state == SMC_URG_VALID) | ||
2114 | mask |= EPOLLPRI; | ||
2115 | } | ||
2116 | } | ||
2117 | |||
2118 | return mask; | ||
2119 | } | ||
2120 | |||
2121 | static int smc_shutdown(struct socket *sock, int how) | ||
2122 | { | ||
2123 | struct sock *sk = sock->sk; | ||
2124 | bool do_shutdown = true; | ||
2125 | struct smc_sock *smc; | ||
2126 | int rc = -EINVAL; | ||
2127 | int old_state; | ||
2128 | int rc1 = 0; | ||
2129 | |||
2130 | smc = smc_sk(sk); | ||
2131 | |||
2132 | if ((how < SHUT_RD) || (how > SHUT_RDWR)) | ||
2133 | return rc; | ||
2134 | |||
2135 | lock_sock(sk); | ||
2136 | |||
2137 | rc = -ENOTCONN; | ||
2138 | if ((sk->sk_state != SMC_ACTIVE) && | ||
2139 | (sk->sk_state != SMC_PEERCLOSEWAIT1) && | ||
2140 | (sk->sk_state != SMC_PEERCLOSEWAIT2) && | ||
2141 | (sk->sk_state != SMC_APPCLOSEWAIT1) && | ||
2142 | (sk->sk_state != SMC_APPCLOSEWAIT2) && | ||
2143 | (sk->sk_state != SMC_APPFINCLOSEWAIT)) | ||
2144 | goto out; | ||
2145 | if (smc->use_fallback) { | ||
2146 | rc = kernel_sock_shutdown(smc->clcsock, how); | ||
2147 | sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; | ||
2148 | if (sk->sk_shutdown == SHUTDOWN_MASK) { | ||
2149 | sk->sk_state = SMC_CLOSED; | ||
2150 | sock_put(sk); | ||
2151 | } | ||
2152 | goto out; | ||
2153 | } | ||
2154 | switch (how) { | ||
2155 | case SHUT_RDWR: /* shutdown in both directions */ | ||
2156 | old_state = sk->sk_state; | ||
2157 | rc = smc_close_active(smc); | ||
2158 | if (old_state == SMC_ACTIVE && | ||
2159 | sk->sk_state == SMC_PEERCLOSEWAIT1) | ||
2160 | do_shutdown = false; | ||
2161 | break; | ||
2162 | case SHUT_WR: | ||
2163 | rc = smc_close_shutdown_write(smc); | ||
2164 | break; | ||
2165 | case SHUT_RD: | ||
2166 | rc = 0; | ||
2167 | /* nothing more to do because peer is not involved */ | ||
2168 | break; | ||
2169 | } | ||
2170 | if (do_shutdown && smc->clcsock) | ||
2171 | rc1 = kernel_sock_shutdown(smc->clcsock, how); | ||
2172 | /* map sock_shutdown_cmd constants to sk_shutdown value range */ | ||
2173 | sk->sk_shutdown |= how + 1; | ||
2174 | |||
2175 | out: | ||
2176 | release_sock(sk); | ||
2177 | return rc ? rc : rc1; | ||
2178 | } | ||
2179 | |||
2180 | static int smc_setsockopt(struct socket *sock, int level, int optname, | ||
2181 | sockptr_t optval, unsigned int optlen) | ||
2182 | { | ||
2183 | struct sock *sk = sock->sk; | ||
2184 | struct smc_sock *smc; | ||
2185 | int val, rc; | ||
2186 | |||
2187 | if (level == SOL_TCP && optname == TCP_ULP) | ||
2188 | return -EOPNOTSUPP; | ||
2189 | |||
2190 | smc = smc_sk(sk); | ||
2191 | |||
2192 | /* generic setsockopts reaching us here always apply to the | ||
2193 | * CLC socket | ||
2194 | */ | ||
2195 | if (unlikely(!smc->clcsock->ops->setsockopt)) | ||
2196 | rc = -EOPNOTSUPP; | ||
2197 | else | ||
2198 | rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, | ||
2199 | optval, optlen); | ||
2200 | if (smc->clcsock->sk->sk_err) { | ||
2201 | sk->sk_err = smc->clcsock->sk->sk_err; | ||
2202 | sk->sk_error_report(sk); | ||
2203 | } | ||
2204 | |||
2205 | if (optlen < sizeof(int)) | ||
2206 | return -EINVAL; | ||
2207 | if (copy_from_sockptr(&val, optval, sizeof(int))) | ||
2208 | return -EFAULT; | ||
2209 | |||
2210 | lock_sock(sk); | ||
2211 | if (rc || smc->use_fallback) | ||
2212 | goto out; | ||
2213 | switch (optname) { | ||
2214 | case TCP_FASTOPEN: | ||
2215 | case TCP_FASTOPEN_CONNECT: | ||
2216 | case TCP_FASTOPEN_KEY: | ||
2217 | case TCP_FASTOPEN_NO_COOKIE: | ||
2218 | /* option not supported by SMC */ | ||
2219 | if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { | ||
2220 | smc_switch_to_fallback(smc); | ||
2221 | smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; | ||
2222 | } else { | ||
2223 | rc = -EINVAL; | ||
2224 | } | ||
2225 | break; | ||
2226 | case TCP_NODELAY: | ||
2227 | if (sk->sk_state != SMC_INIT && | ||
2228 | sk->sk_state != SMC_LISTEN && | ||
2229 | sk->sk_state != SMC_CLOSED) { | ||
2230 | if (val) | ||
2231 | mod_delayed_work(smc->conn.lgr->tx_wq, | ||
2232 | &smc->conn.tx_work, 0); | ||
2233 | } | ||
2234 | break; | ||
2235 | case TCP_CORK: | ||
2236 | if (sk->sk_state != SMC_INIT && | ||
2237 | sk->sk_state != SMC_LISTEN && | ||
2238 | sk->sk_state != SMC_CLOSED) { | ||
2239 | if (!val) | ||
2240 | mod_delayed_work(smc->conn.lgr->tx_wq, | ||
2241 | &smc->conn.tx_work, 0); | ||
2242 | } | ||
2243 | break; | ||
2244 | case TCP_DEFER_ACCEPT: | ||
2245 | smc->sockopt_defer_accept = val; | ||
2246 | break; | ||
2247 | default: | ||
2248 | break; | ||
2249 | } | ||
2250 | out: | ||
2251 | release_sock(sk); | ||
2252 | |||
2253 | return rc; | ||
2254 | } | ||
2255 | |||
2256 | static int smc_getsockopt(struct socket *sock, int level, int optname, | ||
2257 | char __user *optval, int __user *optlen) | ||
2258 | { | ||
2259 | struct smc_sock *smc; | ||
2260 | |||
2261 | smc = smc_sk(sock->sk); | ||
2262 | /* socket options apply to the CLC socket */ | ||
2263 | if (unlikely(!smc->clcsock->ops->getsockopt)) | ||
2264 | return -EOPNOTSUPP; | ||
2265 | return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, | ||
2266 | optval, optlen); | ||
2267 | } | ||
2268 | |||
2269 | static int smc_ioctl(struct socket *sock, unsigned int cmd, | ||
2270 | unsigned long arg) | ||
2271 | { | ||
2272 | union smc_host_cursor cons, urg; | ||
2273 | struct smc_connection *conn; | ||
2274 | struct smc_sock *smc; | ||
2275 | int answ; | ||
2276 | |||
2277 | smc = smc_sk(sock->sk); | ||
2278 | conn = &smc->conn; | ||
2279 | lock_sock(&smc->sk); | ||
2280 | if (smc->use_fallback) { | ||
2281 | if (!smc->clcsock) { | ||
2282 | release_sock(&smc->sk); | ||
2283 | return -EBADF; | ||
2284 | } | ||
2285 | answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); | ||
2286 | release_sock(&smc->sk); | ||
2287 | return answ; | ||
2288 | } | ||
2289 | switch (cmd) { | ||
2290 | case SIOCINQ: /* same as FIONREAD */ | ||
2291 | if (smc->sk.sk_state == SMC_LISTEN) { | ||
2292 | release_sock(&smc->sk); | ||
2293 | return -EINVAL; | ||
2294 | } | ||
2295 | if (smc->sk.sk_state == SMC_INIT || | ||
2296 | smc->sk.sk_state == SMC_CLOSED) | ||
2297 | answ = 0; | ||
2298 | else | ||
2299 | answ = atomic_read(&smc->conn.bytes_to_rcv); | ||
2300 | break; | ||
2301 | case SIOCOUTQ: | ||
2302 | /* output queue size (not send + not acked) */ | ||
2303 | if (smc->sk.sk_state == SMC_LISTEN) { | ||
2304 | release_sock(&smc->sk); | ||
2305 | return -EINVAL; | ||
2306 | } | ||
2307 | if (smc->sk.sk_state == SMC_INIT || | ||
2308 | smc->sk.sk_state == SMC_CLOSED) | ||
2309 | answ = 0; | ||
2310 | else | ||
2311 | answ = smc->conn.sndbuf_desc->len - | ||
2312 | atomic_read(&smc->conn.sndbuf_space); | ||
2313 | break; | ||
2314 | case SIOCOUTQNSD: | ||
2315 | /* output queue size (not send only) */ | ||
2316 | if (smc->sk.sk_state == SMC_LISTEN) { | ||
2317 | release_sock(&smc->sk); | ||
2318 | return -EINVAL; | ||
2319 | } | ||
2320 | if (smc->sk.sk_state == SMC_INIT || | ||
2321 | smc->sk.sk_state == SMC_CLOSED) | ||
2322 | answ = 0; | ||
2323 | else | ||
2324 | answ = smc_tx_prepared_sends(&smc->conn); | ||
2325 | break; | ||
2326 | case SIOCATMARK: | ||
2327 | if (smc->sk.sk_state == SMC_LISTEN) { | ||
2328 | release_sock(&smc->sk); | ||
2329 | return -EINVAL; | ||
2330 | } | ||
2331 | if (smc->sk.sk_state == SMC_INIT || | ||
2332 | smc->sk.sk_state == SMC_CLOSED) { | ||
2333 | answ = 0; | ||
2334 | } else { | ||
2335 | smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); | ||
2336 | smc_curs_copy(&urg, &conn->urg_curs, conn); | ||
2337 | answ = smc_curs_diff(conn->rmb_desc->len, | ||
2338 | &cons, &urg) == 1; | ||
2339 | } | ||
2340 | break; | ||
2341 | default: | ||
2342 | release_sock(&smc->sk); | ||
2343 | return -ENOIOCTLCMD; | ||
2344 | } | ||
2345 | release_sock(&smc->sk); | ||
2346 | |||
2347 | return put_user(answ, (int __user *)arg); | ||
2348 | } | ||
2349 | |||
2350 | static ssize_t smc_sendpage(struct socket *sock, struct page *page, | ||
2351 | int offset, size_t size, int flags) | ||
2352 | { | ||
2353 | struct sock *sk = sock->sk; | ||
2354 | struct smc_sock *smc; | ||
2355 | int rc = -EPIPE; | ||
2356 | |||
2357 | smc = smc_sk(sk); | ||
2358 | lock_sock(sk); | ||
2359 | if (sk->sk_state != SMC_ACTIVE) { | ||
2360 | release_sock(sk); | ||
2361 | goto out; | ||
2362 | } | ||
2363 | release_sock(sk); | ||
2364 | if (smc->use_fallback) | ||
2365 | rc = kernel_sendpage(smc->clcsock, page, offset, | ||
2366 | size, flags); | ||
2367 | else | ||
2368 | rc = sock_no_sendpage(sock, page, offset, size, flags); | ||
2369 | |||
2370 | out: | ||
2371 | return rc; | ||
2372 | } | ||
2373 | |||
2374 | /* Map the affected portions of the rmbe into an spd, note the number of bytes | ||
2375 | * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor | ||
2376 | * updates till whenever a respective page has been fully processed. | ||
2377 | * Note that subsequent recv() calls have to wait till all splice() processing | ||
2378 | * completed. | ||
2379 | */ | ||
2380 | static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, | ||
2381 | struct pipe_inode_info *pipe, size_t len, | ||
2382 | unsigned int flags) | ||
2383 | { | ||
2384 | struct sock *sk = sock->sk; | ||
2385 | struct smc_sock *smc; | ||
2386 | int rc = -ENOTCONN; | ||
2387 | |||
2388 | smc = smc_sk(sk); | ||
2389 | lock_sock(sk); | ||
2390 | if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { | ||
2391 | /* socket was connected before, no more data to read */ | ||
2392 | rc = 0; | ||
2393 | goto out; | ||
2394 | } | ||
2395 | if (sk->sk_state == SMC_INIT || | ||
2396 | sk->sk_state == SMC_LISTEN || | ||
2397 | sk->sk_state == SMC_CLOSED) | ||
2398 | goto out; | ||
2399 | |||
2400 | if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { | ||
2401 | rc = 0; | ||
2402 | goto out; | ||
2403 | } | ||
2404 | |||
2405 | if (smc->use_fallback) { | ||
2406 | rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos, | ||
2407 | pipe, len, flags); | ||
2408 | } else { | ||
2409 | if (*ppos) { | ||
2410 | rc = -ESPIPE; | ||
2411 | goto out; | ||
2412 | } | ||
2413 | if (flags & SPLICE_F_NONBLOCK) | ||
2414 | flags = MSG_DONTWAIT; | ||
2415 | else | ||
2416 | flags = 0; | ||
2417 | rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags); | ||
2418 | } | ||
2419 | out: | ||
2420 | release_sock(sk); | ||
2421 | |||
2422 | return rc; | ||
2423 | } | ||
2424 | |||
2425 | /* must look like tcp */ | ||
2426 | static const struct proto_ops smc_sock_ops = { | ||
2427 | .family = PF_SMC, | ||
2428 | .owner = THIS_MODULE, | ||
2429 | .release = smc_release, | ||
2430 | .bind = smc_bind, | ||
2431 | .connect = smc_connect, | ||
2432 | .socketpair = sock_no_socketpair, | ||
2433 | .accept = smc_accept, | ||
2434 | .getname = smc_getname, | ||
2435 | .poll = smc_poll, | ||
2436 | .ioctl = smc_ioctl, | ||
2437 | .listen = smc_listen, | ||
2438 | .shutdown = smc_shutdown, | ||
2439 | .setsockopt = smc_setsockopt, | ||
2440 | .getsockopt = smc_getsockopt, | ||
2441 | .sendmsg = smc_sendmsg, | ||
2442 | .recvmsg = smc_recvmsg, | ||
2443 | .mmap = sock_no_mmap, | ||
2444 | .sendpage = smc_sendpage, | ||
2445 | .splice_read = smc_splice_read, | ||
2446 | }; | ||
2447 | |||
2448 | static int smc_create(struct net *net, struct socket *sock, int protocol, | ||
2449 | int kern) | ||
2450 | { | ||
2451 | int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET; | ||
2452 | struct smc_sock *smc; | ||
2453 | struct sock *sk; | ||
2454 | int rc; | ||
2455 | |||
2456 | rc = -ESOCKTNOSUPPORT; | ||
2457 | if (sock->type != SOCK_STREAM) | ||
2458 | goto out; | ||
2459 | |||
2460 | rc = -EPROTONOSUPPORT; | ||
2461 | if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6) | ||
2462 | goto out; | ||
2463 | |||
2464 | rc = -ENOBUFS; | ||
2465 | sock->ops = &smc_sock_ops; | ||
2466 | sk = smc_sock_alloc(net, sock, protocol); | ||
2467 | if (!sk) | ||
2468 | goto out; | ||
2469 | |||
2470 | /* create internal TCP socket for CLC handshake and fallback */ | ||
2471 | smc = smc_sk(sk); | ||
2472 | smc->use_fallback = false; /* assume rdma capability first */ | ||
2473 | smc->fallback_rsn = 0; | ||
2474 | rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, | ||
2475 | &smc->clcsock); | ||
2476 | if (rc) { | ||
2477 | sk_common_release(sk); | ||
2478 | goto out; | ||
2479 | } | ||
2480 | smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); | ||
2481 | smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); | ||
2482 | |||
2483 | out: | ||
2484 | return rc; | ||
2485 | } | ||
2486 | |||
2487 | static const struct net_proto_family smc_sock_family_ops = { | ||
2488 | .family = PF_SMC, | ||
2489 | .owner = THIS_MODULE, | ||
2490 | .create = smc_create, | ||
2491 | }; | ||
2492 | |||
2493 | unsigned int smc_net_id; | ||
2494 | |||
2495 | static __net_init int smc_net_init(struct net *net) | ||
2496 | { | ||
2497 | return smc_pnet_net_init(net); | ||
2498 | } | ||
2499 | |||
2500 | static void __net_exit smc_net_exit(struct net *net) | ||
2501 | { | ||
2502 | smc_pnet_net_exit(net); | ||
2503 | } | ||
2504 | |||
2505 | static struct pernet_operations smc_net_ops = { | ||
2506 | .init = smc_net_init, | ||
2507 | .exit = smc_net_exit, | ||
2508 | .id = &smc_net_id, | ||
2509 | .size = sizeof(struct smc_net), | ||
2510 | }; | ||
2511 | |||
2512 | static int __init smc_init(void) | ||
2513 | { | ||
2514 | int rc; | ||
2515 | |||
2516 | rc = register_pernet_subsys(&smc_net_ops); | ||
2517 | if (rc) | ||
2518 | return rc; | ||
2519 | |||
2520 | smc_ism_init(); | ||
2521 | smc_clc_init(); | ||
2522 | |||
2523 | rc = smc_pnet_init(); | ||
2524 | if (rc) | ||
2525 | goto out_pernet_subsys; | ||
2526 | |||
2527 | rc = -ENOMEM; | ||
2528 | smc_hs_wq = alloc_workqueue("smc_hs_wq", 0, 0); | ||
2529 | if (!smc_hs_wq) | ||
2530 | goto out_pnet; | ||
2531 | |||
2532 | smc_close_wq = alloc_workqueue("smc_close_wq", 0, 0); | ||
2533 | if (!smc_close_wq) | ||
2534 | goto out_alloc_hs_wq; | ||
2535 | |||
2536 | rc = smc_core_init(); | ||
2537 | if (rc) { | ||
2538 | pr_err("%s: smc_core_init fails with %d\n", __func__, rc); | ||
2539 | goto out_alloc_wqs; | ||
2540 | } | ||
2541 | |||
2542 | rc = smc_llc_init(); | ||
2543 | if (rc) { | ||
2544 | pr_err("%s: smc_llc_init fails with %d\n", __func__, rc); | ||
2545 | goto out_core; | ||
2546 | } | ||
2547 | |||
2548 | rc = smc_cdc_init(); | ||
2549 | if (rc) { | ||
2550 | pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc); | ||
2551 | goto out_core; | ||
2552 | } | ||
2553 | |||
2554 | rc = proto_register(&smc_proto, 1); | ||
2555 | if (rc) { | ||
2556 | pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc); | ||
2557 | goto out_core; | ||
2558 | } | ||
2559 | |||
2560 | rc = proto_register(&smc_proto6, 1); | ||
2561 | if (rc) { | ||
2562 | pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc); | ||
2563 | goto out_proto; | ||
2564 | } | ||
2565 | |||
2566 | rc = sock_register(&smc_sock_family_ops); | ||
2567 | if (rc) { | ||
2568 | pr_err("%s: sock_register fails with %d\n", __func__, rc); | ||
2569 | goto out_proto6; | ||
2570 | } | ||
2571 | INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); | ||
2572 | INIT_HLIST_HEAD(&smc_v6_hashinfo.ht); | ||
2573 | |||
2574 | rc = smc_ib_register_client(); | ||
2575 | if (rc) { | ||
2576 | pr_err("%s: ib_register fails with %d\n", __func__, rc); | ||
2577 | goto out_sock; | ||
2578 | } | ||
2579 | |||
2580 | static_branch_enable(&tcp_have_smc); | ||
2581 | return 0; | ||
2582 | |||
2583 | out_sock: | ||
2584 | sock_unregister(PF_SMC); | ||
2585 | out_proto6: | ||
2586 | proto_unregister(&smc_proto6); | ||
2587 | out_proto: | ||
2588 | proto_unregister(&smc_proto); | ||
2589 | out_core: | ||
2590 | smc_core_exit(); | ||
2591 | out_alloc_wqs: | ||
2592 | destroy_workqueue(smc_close_wq); | ||
2593 | out_alloc_hs_wq: | ||
2594 | destroy_workqueue(smc_hs_wq); | ||
2595 | out_pnet: | ||
2596 | smc_pnet_exit(); | ||
2597 | out_pernet_subsys: | ||
2598 | unregister_pernet_subsys(&smc_net_ops); | ||
2599 | |||
2600 | return rc; | ||
2601 | } | ||
2602 | |||
2603 | static void __exit smc_exit(void) | ||
2604 | { | ||
2605 | static_branch_disable(&tcp_have_smc); | ||
2606 | sock_unregister(PF_SMC); | ||
2607 | smc_core_exit(); | ||
2608 | smc_ib_unregister_client(); | ||
2609 | destroy_workqueue(smc_close_wq); | ||
2610 | destroy_workqueue(smc_hs_wq); | ||
2611 | proto_unregister(&smc_proto6); | ||
2612 | proto_unregister(&smc_proto); | ||
2613 | smc_pnet_exit(); | ||
2614 | unregister_pernet_subsys(&smc_net_ops); | ||
2615 | rcu_barrier(); | ||
2616 | } | ||
2617 | |||
2618 | module_init(smc_init); | ||
2619 | module_exit(smc_exit); | ||
2620 | |||
2621 | MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>"); | ||
2622 | MODULE_DESCRIPTION("smc socket address family"); | ||
2623 | MODULE_LICENSE("GPL"); | ||
2624 | MODULE_ALIAS_NETPROTO(PF_SMC); | ||
diff --git a/net/smc/smc.h b/net/smc/smc.h new file mode 100644 index 000000000..e6919fe31 --- /dev/null +++ b/net/smc/smc.h | |||
@@ -0,0 +1,300 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * Definitions for the SMC module (socket related) | ||
6 | * | ||
7 | * Copyright IBM Corp. 2016 | ||
8 | * | ||
9 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> | ||
10 | */ | ||
11 | #ifndef __SMC_H | ||
12 | #define __SMC_H | ||
13 | |||
14 | #include <linux/socket.h> | ||
15 | #include <linux/types.h> | ||
16 | #include <linux/compiler.h> /* __aligned */ | ||
17 | #include <net/sock.h> | ||
18 | |||
19 | #include "smc_ib.h" | ||
20 | |||
21 | #define SMC_V1 1 /* SMC version V1 */ | ||
22 | #define SMC_V2 2 /* SMC version V2 */ | ||
23 | #define SMC_RELEASE 0 | ||
24 | |||
25 | #define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */ | ||
26 | #define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */ | ||
27 | |||
28 | #define SMC_MAX_ISM_DEVS 8 /* max # of proposed non-native ISM | ||
29 | * devices | ||
30 | */ | ||
31 | |||
32 | #define SMC_MAX_HOSTNAME_LEN 32 | ||
33 | #define SMC_MAX_EID_LEN 32 | ||
34 | |||
35 | extern struct proto smc_proto; | ||
36 | extern struct proto smc_proto6; | ||
37 | |||
38 | #ifdef ATOMIC64_INIT | ||
39 | #define KERNEL_HAS_ATOMIC64 | ||
40 | #endif | ||
41 | |||
42 | enum smc_state { /* possible states of an SMC socket */ | ||
43 | SMC_ACTIVE = 1, | ||
44 | SMC_INIT = 2, | ||
45 | SMC_CLOSED = 7, | ||
46 | SMC_LISTEN = 10, | ||
47 | /* normal close */ | ||
48 | SMC_PEERCLOSEWAIT1 = 20, | ||
49 | SMC_PEERCLOSEWAIT2 = 21, | ||
50 | SMC_APPFINCLOSEWAIT = 24, | ||
51 | SMC_APPCLOSEWAIT1 = 22, | ||
52 | SMC_APPCLOSEWAIT2 = 23, | ||
53 | SMC_PEERFINCLOSEWAIT = 25, | ||
54 | /* abnormal close */ | ||
55 | SMC_PEERABORTWAIT = 26, | ||
56 | SMC_PROCESSABORT = 27, | ||
57 | }; | ||
58 | |||
59 | struct smc_link_group; | ||
60 | |||
61 | struct smc_wr_rx_hdr { /* common prefix part of LLC and CDC to demultiplex */ | ||
62 | u8 type; | ||
63 | } __aligned(1); | ||
64 | |||
65 | struct smc_cdc_conn_state_flags { | ||
66 | #if defined(__BIG_ENDIAN_BITFIELD) | ||
67 | u8 peer_done_writing : 1; /* Sending done indicator */ | ||
68 | u8 peer_conn_closed : 1; /* Peer connection closed indicator */ | ||
69 | u8 peer_conn_abort : 1; /* Abnormal close indicator */ | ||
70 | u8 reserved : 5; | ||
71 | #elif defined(__LITTLE_ENDIAN_BITFIELD) | ||
72 | u8 reserved : 5; | ||
73 | u8 peer_conn_abort : 1; | ||
74 | u8 peer_conn_closed : 1; | ||
75 | u8 peer_done_writing : 1; | ||
76 | #endif | ||
77 | }; | ||
78 | |||
79 | struct smc_cdc_producer_flags { | ||
80 | #if defined(__BIG_ENDIAN_BITFIELD) | ||
81 | u8 write_blocked : 1; /* Writing Blocked, no rx buf space */ | ||
82 | u8 urg_data_pending : 1; /* Urgent Data Pending */ | ||
83 | u8 urg_data_present : 1; /* Urgent Data Present */ | ||
84 | u8 cons_curs_upd_req : 1; /* cursor update requested */ | ||
85 | u8 failover_validation : 1;/* message replay due to failover */ | ||
86 | u8 reserved : 3; | ||
87 | #elif defined(__LITTLE_ENDIAN_BITFIELD) | ||
88 | u8 reserved : 3; | ||
89 | u8 failover_validation : 1; | ||
90 | u8 cons_curs_upd_req : 1; | ||
91 | u8 urg_data_present : 1; | ||
92 | u8 urg_data_pending : 1; | ||
93 | u8 write_blocked : 1; | ||
94 | #endif | ||
95 | }; | ||
96 | |||
97 | /* in host byte order */ | ||
98 | union smc_host_cursor { /* SMC cursor - an offset in an RMBE */ | ||
99 | struct { | ||
100 | u16 reserved; | ||
101 | u16 wrap; /* window wrap sequence number */ | ||
102 | u32 count; /* cursor (= offset) part */ | ||
103 | }; | ||
104 | #ifdef KERNEL_HAS_ATOMIC64 | ||
105 | atomic64_t acurs; /* for atomic processing */ | ||
106 | #else | ||
107 | u64 acurs; /* for atomic processing */ | ||
108 | #endif | ||
109 | } __aligned(8); | ||
110 | |||
111 | /* in host byte order, except for flag bitfields in network byte order */ | ||
112 | struct smc_host_cdc_msg { /* Connection Data Control message */ | ||
113 | struct smc_wr_rx_hdr common; /* .type = 0xFE */ | ||
114 | u8 len; /* length = 44 */ | ||
115 | u16 seqno; /* connection seq # */ | ||
116 | u32 token; /* alert_token */ | ||
117 | union smc_host_cursor prod; /* producer cursor */ | ||
118 | union smc_host_cursor cons; /* consumer cursor, | ||
119 | * piggy backed "ack" | ||
120 | */ | ||
121 | struct smc_cdc_producer_flags prod_flags; /* conn. tx/rx status */ | ||
122 | struct smc_cdc_conn_state_flags conn_state_flags; /* peer conn. status*/ | ||
123 | u8 reserved[18]; | ||
124 | } __aligned(8); | ||
125 | |||
126 | enum smc_urg_state { | ||
127 | SMC_URG_VALID = 1, /* data present */ | ||
128 | SMC_URG_NOTYET = 2, /* data pending */ | ||
129 | SMC_URG_READ = 3, /* data was already read */ | ||
130 | }; | ||
131 | |||
132 | struct smc_connection { | ||
133 | struct rb_node alert_node; | ||
134 | struct smc_link_group *lgr; /* link group of connection */ | ||
135 | struct smc_link *lnk; /* assigned SMC-R link */ | ||
136 | u32 alert_token_local; /* unique conn. id */ | ||
137 | u8 peer_rmbe_idx; /* from tcp handshake */ | ||
138 | int peer_rmbe_size; /* size of peer rx buffer */ | ||
139 | atomic_t peer_rmbe_space;/* remaining free bytes in peer | ||
140 | * rmbe | ||
141 | */ | ||
142 | int rtoken_idx; /* idx to peer RMB rkey/addr */ | ||
143 | |||
144 | struct smc_buf_desc *sndbuf_desc; /* send buffer descriptor */ | ||
145 | struct smc_buf_desc *rmb_desc; /* RMBE descriptor */ | ||
146 | int rmbe_size_short;/* compressed notation */ | ||
147 | int rmbe_update_limit; | ||
148 | /* lower limit for consumer | ||
149 | * cursor update | ||
150 | */ | ||
151 | |||
152 | struct smc_host_cdc_msg local_tx_ctrl; /* host byte order staging | ||
153 | * buffer for CDC msg send | ||
154 | * .prod cf. TCP snd_nxt | ||
155 | * .cons cf. TCP sends ack | ||
156 | */ | ||
157 | union smc_host_cursor local_tx_ctrl_fin; | ||
158 | /* prod crsr - confirmed by peer | ||
159 | */ | ||
160 | union smc_host_cursor tx_curs_prep; /* tx - prepared data | ||
161 | * snd_max..wmem_alloc | ||
162 | */ | ||
163 | union smc_host_cursor tx_curs_sent; /* tx - sent data | ||
164 | * snd_nxt ? | ||
165 | */ | ||
166 | union smc_host_cursor tx_curs_fin; /* tx - confirmed by peer | ||
167 | * snd-wnd-begin ? | ||
168 | */ | ||
169 | atomic_t sndbuf_space; /* remaining space in sndbuf */ | ||
170 | u16 tx_cdc_seq; /* sequence # for CDC send */ | ||
171 | u16 tx_cdc_seq_fin; /* sequence # - tx completed */ | ||
172 | spinlock_t send_lock; /* protect wr_sends */ | ||
173 | atomic_t cdc_pend_tx_wr; /* number of pending tx CDC wqe | ||
174 | * - inc when post wqe, | ||
175 | * - dec on polled tx cqe | ||
176 | */ | ||
177 | wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/ | ||
178 | struct delayed_work tx_work; /* retry of smc_cdc_msg_send */ | ||
179 | u32 tx_off; /* base offset in peer rmb */ | ||
180 | |||
181 | struct smc_host_cdc_msg local_rx_ctrl; /* filled during event_handl. | ||
182 | * .prod cf. TCP rcv_nxt | ||
183 | * .cons cf. TCP snd_una | ||
184 | */ | ||
185 | union smc_host_cursor rx_curs_confirmed; /* confirmed to peer | ||
186 | * source of snd_una ? | ||
187 | */ | ||
188 | union smc_host_cursor urg_curs; /* points at urgent byte */ | ||
189 | enum smc_urg_state urg_state; | ||
190 | bool urg_tx_pend; /* urgent data staged */ | ||
191 | bool urg_rx_skip_pend; | ||
192 | /* indicate urgent oob data | ||
193 | * read, but previous regular | ||
194 | * data still pending | ||
195 | */ | ||
196 | char urg_rx_byte; /* urgent byte */ | ||
197 | atomic_t bytes_to_rcv; /* arrived data, | ||
198 | * not yet received | ||
199 | */ | ||
200 | atomic_t splice_pending; /* number of spliced bytes | ||
201 | * pending processing | ||
202 | */ | ||
203 | #ifndef KERNEL_HAS_ATOMIC64 | ||
204 | spinlock_t acurs_lock; /* protect cursors */ | ||
205 | #endif | ||
206 | struct work_struct close_work; /* peer sent some closing */ | ||
207 | struct work_struct abort_work; /* abort the connection */ | ||
208 | struct tasklet_struct rx_tsklet; /* Receiver tasklet for SMC-D */ | ||
209 | u8 rx_off; /* receive offset: | ||
210 | * 0 for SMC-R, 32 for SMC-D | ||
211 | */ | ||
212 | u64 peer_token; /* SMC-D token of peer */ | ||
213 | u8 killed : 1; /* abnormal termination */ | ||
214 | u8 out_of_sync : 1; /* out of sync with peer */ | ||
215 | }; | ||
216 | |||
217 | struct smc_sock { /* smc sock container */ | ||
218 | struct sock sk; | ||
219 | struct socket *clcsock; /* internal tcp socket */ | ||
220 | void (*clcsk_data_ready)(struct sock *sk); | ||
221 | /* original data_ready fct. **/ | ||
222 | struct smc_connection conn; /* smc connection */ | ||
223 | struct smc_sock *listen_smc; /* listen parent */ | ||
224 | struct work_struct connect_work; /* handle non-blocking connect*/ | ||
225 | struct work_struct tcp_listen_work;/* handle tcp socket accepts */ | ||
226 | struct work_struct smc_listen_work;/* prepare new accept socket */ | ||
227 | struct list_head accept_q; /* sockets to be accepted */ | ||
228 | spinlock_t accept_q_lock; /* protects accept_q */ | ||
229 | bool use_fallback; /* fallback to tcp */ | ||
230 | int fallback_rsn; /* reason for fallback */ | ||
231 | u32 peer_diagnosis; /* decline reason from peer */ | ||
232 | int sockopt_defer_accept; | ||
233 | /* sockopt TCP_DEFER_ACCEPT | ||
234 | * value | ||
235 | */ | ||
236 | u8 wait_close_tx_prepared : 1; | ||
237 | /* shutdown wr or close | ||
238 | * started, waiting for unsent | ||
239 | * data to be sent | ||
240 | */ | ||
241 | u8 connect_nonblock : 1; | ||
242 | /* non-blocking connect in | ||
243 | * flight | ||
244 | */ | ||
245 | struct mutex clcsock_release_lock; | ||
246 | /* protects clcsock of a listen | ||
247 | * socket | ||
248 | * */ | ||
249 | }; | ||
250 | |||
251 | static inline struct smc_sock *smc_sk(const struct sock *sk) | ||
252 | { | ||
253 | return (struct smc_sock *)sk; | ||
254 | } | ||
255 | |||
256 | extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */ | ||
257 | extern struct workqueue_struct *smc_close_wq; /* wq for close work */ | ||
258 | |||
259 | #define SMC_SYSTEMID_LEN 8 | ||
260 | |||
261 | extern u8 local_systemid[SMC_SYSTEMID_LEN]; /* unique system identifier */ | ||
262 | |||
263 | #define ntohll(x) be64_to_cpu(x) | ||
264 | #define htonll(x) cpu_to_be64(x) | ||
265 | |||
266 | /* convert an u32 value into network byte order, store it into a 3 byte field */ | ||
267 | static inline void hton24(u8 *net, u32 host) | ||
268 | { | ||
269 | __be32 t; | ||
270 | |||
271 | t = cpu_to_be32(host); | ||
272 | memcpy(net, ((u8 *)&t) + 1, 3); | ||
273 | } | ||
274 | |||
275 | /* convert a received 3 byte field into host byte order*/ | ||
276 | static inline u32 ntoh24(u8 *net) | ||
277 | { | ||
278 | __be32 t = 0; | ||
279 | |||
280 | memcpy(((u8 *)&t) + 1, net, 3); | ||
281 | return be32_to_cpu(t); | ||
282 | } | ||
283 | |||
284 | #ifdef CONFIG_XFRM | ||
285 | static inline bool using_ipsec(struct smc_sock *smc) | ||
286 | { | ||
287 | return (smc->clcsock->sk->sk_policy[0] || | ||
288 | smc->clcsock->sk->sk_policy[1]) ? true : false; | ||
289 | } | ||
290 | #else | ||
291 | static inline bool using_ipsec(struct smc_sock *smc) | ||
292 | { | ||
293 | return false; | ||
294 | } | ||
295 | #endif | ||
296 | |||
297 | struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock); | ||
298 | void smc_close_non_accepted(struct sock *sk); | ||
299 | |||
300 | #endif /* __SMC_H */ | ||
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c new file mode 100644 index 000000000..94503f36b --- /dev/null +++ b/net/smc/smc_cdc.c | |||
@@ -0,0 +1,476 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * Connection Data Control (CDC) | ||
6 | * handles flow control | ||
7 | * | ||
8 | * Copyright IBM Corp. 2016 | ||
9 | * | ||
10 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> | ||
11 | */ | ||
12 | |||
13 | #include <linux/spinlock.h> | ||
14 | |||
15 | #include "smc.h" | ||
16 | #include "smc_wr.h" | ||
17 | #include "smc_cdc.h" | ||
18 | #include "smc_tx.h" | ||
19 | #include "smc_rx.h" | ||
20 | #include "smc_close.h" | ||
21 | |||
22 | /********************************** send *************************************/ | ||
23 | |||
24 | /* handler for send/transmission completion of a CDC msg */ | ||
25 | static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, | ||
26 | struct smc_link *link, | ||
27 | enum ib_wc_status wc_status) | ||
28 | { | ||
29 | struct smc_cdc_tx_pend *cdcpend = (struct smc_cdc_tx_pend *)pnd_snd; | ||
30 | struct smc_connection *conn = cdcpend->conn; | ||
31 | struct smc_sock *smc; | ||
32 | int diff; | ||
33 | |||
34 | smc = container_of(conn, struct smc_sock, conn); | ||
35 | bh_lock_sock(&smc->sk); | ||
36 | if (!wc_status) { | ||
37 | diff = smc_curs_diff(cdcpend->conn->sndbuf_desc->len, | ||
38 | &cdcpend->conn->tx_curs_fin, | ||
39 | &cdcpend->cursor); | ||
40 | /* sndbuf_space is decreased in smc_sendmsg */ | ||
41 | smp_mb__before_atomic(); | ||
42 | atomic_add(diff, &cdcpend->conn->sndbuf_space); | ||
43 | /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */ | ||
44 | smp_mb__after_atomic(); | ||
45 | smc_curs_copy(&conn->tx_curs_fin, &cdcpend->cursor, conn); | ||
46 | smc_curs_copy(&conn->local_tx_ctrl_fin, &cdcpend->p_cursor, | ||
47 | conn); | ||
48 | conn->tx_cdc_seq_fin = cdcpend->ctrl_seq; | ||
49 | } | ||
50 | |||
51 | if (atomic_dec_and_test(&conn->cdc_pend_tx_wr) && | ||
52 | unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq))) | ||
53 | wake_up(&conn->cdc_pend_tx_wq); | ||
54 | WARN_ON(atomic_read(&conn->cdc_pend_tx_wr) < 0); | ||
55 | |||
56 | smc_tx_sndbuf_nonfull(smc); | ||
57 | bh_unlock_sock(&smc->sk); | ||
58 | } | ||
59 | |||
60 | int smc_cdc_get_free_slot(struct smc_connection *conn, | ||
61 | struct smc_link *link, | ||
62 | struct smc_wr_buf **wr_buf, | ||
63 | struct smc_rdma_wr **wr_rdma_buf, | ||
64 | struct smc_cdc_tx_pend **pend) | ||
65 | { | ||
66 | int rc; | ||
67 | |||
68 | rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, | ||
69 | wr_rdma_buf, | ||
70 | (struct smc_wr_tx_pend_priv **)pend); | ||
71 | if (conn->killed) { | ||
72 | /* abnormal termination */ | ||
73 | if (!rc) | ||
74 | smc_wr_tx_put_slot(link, | ||
75 | (struct smc_wr_tx_pend_priv *)(*pend)); | ||
76 | rc = -EPIPE; | ||
77 | } | ||
78 | return rc; | ||
79 | } | ||
80 | |||
81 | static inline void smc_cdc_add_pending_send(struct smc_connection *conn, | ||
82 | struct smc_cdc_tx_pend *pend) | ||
83 | { | ||
84 | BUILD_BUG_ON_MSG( | ||
85 | sizeof(struct smc_cdc_msg) > SMC_WR_BUF_SIZE, | ||
86 | "must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_cdc_msg)"); | ||
87 | BUILD_BUG_ON_MSG( | ||
88 | offsetofend(struct smc_cdc_msg, reserved) > SMC_WR_TX_SIZE, | ||
89 | "must adapt SMC_WR_TX_SIZE to sizeof(struct smc_cdc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()"); | ||
90 | BUILD_BUG_ON_MSG( | ||
91 | sizeof(struct smc_cdc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE, | ||
92 | "must increase SMC_WR_TX_PEND_PRIV_SIZE to at least sizeof(struct smc_cdc_tx_pend)"); | ||
93 | pend->conn = conn; | ||
94 | pend->cursor = conn->tx_curs_sent; | ||
95 | pend->p_cursor = conn->local_tx_ctrl.prod; | ||
96 | pend->ctrl_seq = conn->tx_cdc_seq; | ||
97 | } | ||
98 | |||
99 | int smc_cdc_msg_send(struct smc_connection *conn, | ||
100 | struct smc_wr_buf *wr_buf, | ||
101 | struct smc_cdc_tx_pend *pend) | ||
102 | { | ||
103 | struct smc_link *link = conn->lnk; | ||
104 | union smc_host_cursor cfed; | ||
105 | int rc; | ||
106 | |||
107 | smc_cdc_add_pending_send(conn, pend); | ||
108 | |||
109 | conn->tx_cdc_seq++; | ||
110 | conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; | ||
111 | smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed); | ||
112 | |||
113 | atomic_inc(&conn->cdc_pend_tx_wr); | ||
114 | smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */ | ||
115 | |||
116 | rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); | ||
117 | if (!rc) { | ||
118 | smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn); | ||
119 | conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; | ||
120 | } else { | ||
121 | conn->tx_cdc_seq--; | ||
122 | conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; | ||
123 | atomic_dec(&conn->cdc_pend_tx_wr); | ||
124 | } | ||
125 | |||
126 | return rc; | ||
127 | } | ||
128 | |||
129 | /* send a validation msg indicating the move of a conn to an other QP link */ | ||
130 | int smcr_cdc_msg_send_validation(struct smc_connection *conn, | ||
131 | struct smc_cdc_tx_pend *pend, | ||
132 | struct smc_wr_buf *wr_buf) | ||
133 | { | ||
134 | struct smc_host_cdc_msg *local = &conn->local_tx_ctrl; | ||
135 | struct smc_link *link = conn->lnk; | ||
136 | struct smc_cdc_msg *peer; | ||
137 | int rc; | ||
138 | |||
139 | peer = (struct smc_cdc_msg *)wr_buf; | ||
140 | peer->common.type = local->common.type; | ||
141 | peer->len = local->len; | ||
142 | peer->seqno = htons(conn->tx_cdc_seq_fin); /* seqno last compl. tx */ | ||
143 | peer->token = htonl(local->token); | ||
144 | peer->prod_flags.failover_validation = 1; | ||
145 | |||
146 | /* We need to set pend->conn here to make sure smc_cdc_tx_handler() | ||
147 | * can handle properly | ||
148 | */ | ||
149 | smc_cdc_add_pending_send(conn, pend); | ||
150 | |||
151 | atomic_inc(&conn->cdc_pend_tx_wr); | ||
152 | smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */ | ||
153 | |||
154 | rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); | ||
155 | if (unlikely(rc)) | ||
156 | atomic_dec(&conn->cdc_pend_tx_wr); | ||
157 | |||
158 | return rc; | ||
159 | } | ||
160 | |||
161 | static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn) | ||
162 | { | ||
163 | struct smc_cdc_tx_pend *pend; | ||
164 | struct smc_wr_buf *wr_buf; | ||
165 | struct smc_link *link; | ||
166 | bool again = false; | ||
167 | int rc; | ||
168 | |||
169 | again: | ||
170 | link = conn->lnk; | ||
171 | if (!smc_wr_tx_link_hold(link)) | ||
172 | return -ENOLINK; | ||
173 | rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend); | ||
174 | if (rc) | ||
175 | goto put_out; | ||
176 | |||
177 | spin_lock_bh(&conn->send_lock); | ||
178 | if (link != conn->lnk) { | ||
179 | /* link of connection changed, try again one time*/ | ||
180 | spin_unlock_bh(&conn->send_lock); | ||
181 | smc_wr_tx_put_slot(link, | ||
182 | (struct smc_wr_tx_pend_priv *)pend); | ||
183 | smc_wr_tx_link_put(link); | ||
184 | if (again) | ||
185 | return -ENOLINK; | ||
186 | again = true; | ||
187 | goto again; | ||
188 | } | ||
189 | rc = smc_cdc_msg_send(conn, wr_buf, pend); | ||
190 | spin_unlock_bh(&conn->send_lock); | ||
191 | put_out: | ||
192 | smc_wr_tx_link_put(link); | ||
193 | return rc; | ||
194 | } | ||
195 | |||
196 | int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) | ||
197 | { | ||
198 | int rc; | ||
199 | |||
200 | if (!conn->lgr || (conn->lgr->is_smcd && conn->lgr->peer_shutdown)) | ||
201 | return -EPIPE; | ||
202 | |||
203 | if (conn->lgr->is_smcd) { | ||
204 | spin_lock_bh(&conn->send_lock); | ||
205 | rc = smcd_cdc_msg_send(conn); | ||
206 | spin_unlock_bh(&conn->send_lock); | ||
207 | } else { | ||
208 | rc = smcr_cdc_get_slot_and_msg_send(conn); | ||
209 | } | ||
210 | |||
211 | return rc; | ||
212 | } | ||
213 | |||
214 | void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn) | ||
215 | { | ||
216 | wait_event(conn->cdc_pend_tx_wq, !atomic_read(&conn->cdc_pend_tx_wr)); | ||
217 | } | ||
218 | |||
219 | /* Send a SMC-D CDC header. | ||
220 | * This increments the free space available in our send buffer. | ||
221 | * Also update the confirmed receive buffer with what was sent to the peer. | ||
222 | */ | ||
223 | int smcd_cdc_msg_send(struct smc_connection *conn) | ||
224 | { | ||
225 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); | ||
226 | union smc_host_cursor curs; | ||
227 | struct smcd_cdc_msg cdc; | ||
228 | int rc, diff; | ||
229 | |||
230 | memset(&cdc, 0, sizeof(cdc)); | ||
231 | cdc.common.type = SMC_CDC_MSG_TYPE; | ||
232 | curs.acurs.counter = atomic64_read(&conn->local_tx_ctrl.prod.acurs); | ||
233 | cdc.prod.wrap = curs.wrap; | ||
234 | cdc.prod.count = curs.count; | ||
235 | curs.acurs.counter = atomic64_read(&conn->local_tx_ctrl.cons.acurs); | ||
236 | cdc.cons.wrap = curs.wrap; | ||
237 | cdc.cons.count = curs.count; | ||
238 | cdc.cons.prod_flags = conn->local_tx_ctrl.prod_flags; | ||
239 | cdc.cons.conn_state_flags = conn->local_tx_ctrl.conn_state_flags; | ||
240 | rc = smcd_tx_ism_write(conn, &cdc, sizeof(cdc), 0, 1); | ||
241 | if (rc) | ||
242 | return rc; | ||
243 | smc_curs_copy(&conn->rx_curs_confirmed, &curs, conn); | ||
244 | conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; | ||
245 | /* Calculate transmitted data and increment free send buffer space */ | ||
246 | diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin, | ||
247 | &conn->tx_curs_sent); | ||
248 | /* increased by confirmed number of bytes */ | ||
249 | smp_mb__before_atomic(); | ||
250 | atomic_add(diff, &conn->sndbuf_space); | ||
251 | /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */ | ||
252 | smp_mb__after_atomic(); | ||
253 | smc_curs_copy(&conn->tx_curs_fin, &conn->tx_curs_sent, conn); | ||
254 | |||
255 | smc_tx_sndbuf_nonfull(smc); | ||
256 | return rc; | ||
257 | } | ||
258 | |||
259 | /********************************* receive ***********************************/ | ||
260 | |||
261 | static inline bool smc_cdc_before(u16 seq1, u16 seq2) | ||
262 | { | ||
263 | return (s16)(seq1 - seq2) < 0; | ||
264 | } | ||
265 | |||
266 | static void smc_cdc_handle_urg_data_arrival(struct smc_sock *smc, | ||
267 | int *diff_prod) | ||
268 | { | ||
269 | struct smc_connection *conn = &smc->conn; | ||
270 | char *base; | ||
271 | |||
272 | /* new data included urgent business */ | ||
273 | smc_curs_copy(&conn->urg_curs, &conn->local_rx_ctrl.prod, conn); | ||
274 | conn->urg_state = SMC_URG_VALID; | ||
275 | if (!sock_flag(&smc->sk, SOCK_URGINLINE)) | ||
276 | /* we'll skip the urgent byte, so don't account for it */ | ||
277 | (*diff_prod)--; | ||
278 | base = (char *)conn->rmb_desc->cpu_addr + conn->rx_off; | ||
279 | if (conn->urg_curs.count) | ||
280 | conn->urg_rx_byte = *(base + conn->urg_curs.count - 1); | ||
281 | else | ||
282 | conn->urg_rx_byte = *(base + conn->rmb_desc->len - 1); | ||
283 | sk_send_sigurg(&smc->sk); | ||
284 | } | ||
285 | |||
286 | static void smc_cdc_msg_validate(struct smc_sock *smc, struct smc_cdc_msg *cdc, | ||
287 | struct smc_link *link) | ||
288 | { | ||
289 | struct smc_connection *conn = &smc->conn; | ||
290 | u16 recv_seq = ntohs(cdc->seqno); | ||
291 | s16 diff; | ||
292 | |||
293 | /* check that seqnum was seen before */ | ||
294 | diff = conn->local_rx_ctrl.seqno - recv_seq; | ||
295 | if (diff < 0) { /* diff larger than 0x7fff */ | ||
296 | /* drop connection */ | ||
297 | conn->out_of_sync = 1; /* prevent any further receives */ | ||
298 | spin_lock_bh(&conn->send_lock); | ||
299 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; | ||
300 | conn->lnk = link; | ||
301 | spin_unlock_bh(&conn->send_lock); | ||
302 | sock_hold(&smc->sk); /* sock_put in abort_work */ | ||
303 | if (!queue_work(smc_close_wq, &conn->abort_work)) | ||
304 | sock_put(&smc->sk); | ||
305 | } | ||
306 | } | ||
307 | |||
308 | static void smc_cdc_msg_recv_action(struct smc_sock *smc, | ||
309 | struct smc_cdc_msg *cdc) | ||
310 | { | ||
311 | union smc_host_cursor cons_old, prod_old; | ||
312 | struct smc_connection *conn = &smc->conn; | ||
313 | int diff_cons, diff_prod; | ||
314 | |||
315 | smc_curs_copy(&prod_old, &conn->local_rx_ctrl.prod, conn); | ||
316 | smc_curs_copy(&cons_old, &conn->local_rx_ctrl.cons, conn); | ||
317 | smc_cdc_msg_to_host(&conn->local_rx_ctrl, cdc, conn); | ||
318 | |||
319 | diff_cons = smc_curs_diff(conn->peer_rmbe_size, &cons_old, | ||
320 | &conn->local_rx_ctrl.cons); | ||
321 | if (diff_cons) { | ||
322 | /* peer_rmbe_space is decreased during data transfer with RDMA | ||
323 | * write | ||
324 | */ | ||
325 | smp_mb__before_atomic(); | ||
326 | atomic_add(diff_cons, &conn->peer_rmbe_space); | ||
327 | /* guarantee 0 <= peer_rmbe_space <= peer_rmbe_size */ | ||
328 | smp_mb__after_atomic(); | ||
329 | } | ||
330 | |||
331 | diff_prod = smc_curs_diff(conn->rmb_desc->len, &prod_old, | ||
332 | &conn->local_rx_ctrl.prod); | ||
333 | if (diff_prod) { | ||
334 | if (conn->local_rx_ctrl.prod_flags.urg_data_present) | ||
335 | smc_cdc_handle_urg_data_arrival(smc, &diff_prod); | ||
336 | /* bytes_to_rcv is decreased in smc_recvmsg */ | ||
337 | smp_mb__before_atomic(); | ||
338 | atomic_add(diff_prod, &conn->bytes_to_rcv); | ||
339 | /* guarantee 0 <= bytes_to_rcv <= rmb_desc->len */ | ||
340 | smp_mb__after_atomic(); | ||
341 | smc->sk.sk_data_ready(&smc->sk); | ||
342 | } else { | ||
343 | if (conn->local_rx_ctrl.prod_flags.write_blocked) | ||
344 | smc->sk.sk_data_ready(&smc->sk); | ||
345 | if (conn->local_rx_ctrl.prod_flags.urg_data_pending) | ||
346 | conn->urg_state = SMC_URG_NOTYET; | ||
347 | } | ||
348 | |||
349 | /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */ | ||
350 | if ((diff_cons && smc_tx_prepared_sends(conn)) || | ||
351 | conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || | ||
352 | conn->local_rx_ctrl.prod_flags.urg_data_pending) | ||
353 | smc_tx_sndbuf_nonempty(conn); | ||
354 | |||
355 | if (diff_cons && conn->urg_tx_pend && | ||
356 | atomic_read(&conn->peer_rmbe_space) == conn->peer_rmbe_size) { | ||
357 | /* urg data confirmed by peer, indicate we're ready for more */ | ||
358 | conn->urg_tx_pend = false; | ||
359 | smc->sk.sk_write_space(&smc->sk); | ||
360 | } | ||
361 | |||
362 | if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) { | ||
363 | smc->sk.sk_err = ECONNRESET; | ||
364 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; | ||
365 | } | ||
366 | if (smc_cdc_rxed_any_close_or_senddone(conn)) { | ||
367 | smc->sk.sk_shutdown |= RCV_SHUTDOWN; | ||
368 | if (smc->clcsock && smc->clcsock->sk) | ||
369 | smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN; | ||
370 | sock_set_flag(&smc->sk, SOCK_DONE); | ||
371 | sock_hold(&smc->sk); /* sock_put in close_work */ | ||
372 | if (!queue_work(smc_close_wq, &conn->close_work)) | ||
373 | sock_put(&smc->sk); | ||
374 | } | ||
375 | } | ||
376 | |||
377 | /* called under tasklet context */ | ||
378 | static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc) | ||
379 | { | ||
380 | sock_hold(&smc->sk); | ||
381 | bh_lock_sock(&smc->sk); | ||
382 | smc_cdc_msg_recv_action(smc, cdc); | ||
383 | bh_unlock_sock(&smc->sk); | ||
384 | sock_put(&smc->sk); /* no free sk in softirq-context */ | ||
385 | } | ||
386 | |||
387 | /* Schedule a tasklet for this connection. Triggered from the ISM device IRQ | ||
388 | * handler to indicate update in the DMBE. | ||
389 | * | ||
390 | * Context: | ||
391 | * - tasklet context | ||
392 | */ | ||
393 | static void smcd_cdc_rx_tsklet(unsigned long data) | ||
394 | { | ||
395 | struct smc_connection *conn = (struct smc_connection *)data; | ||
396 | struct smcd_cdc_msg *data_cdc; | ||
397 | struct smcd_cdc_msg cdc; | ||
398 | struct smc_sock *smc; | ||
399 | |||
400 | if (!conn || conn->killed) | ||
401 | return; | ||
402 | |||
403 | data_cdc = (struct smcd_cdc_msg *)conn->rmb_desc->cpu_addr; | ||
404 | smcd_curs_copy(&cdc.prod, &data_cdc->prod, conn); | ||
405 | smcd_curs_copy(&cdc.cons, &data_cdc->cons, conn); | ||
406 | smc = container_of(conn, struct smc_sock, conn); | ||
407 | smc_cdc_msg_recv(smc, (struct smc_cdc_msg *)&cdc); | ||
408 | } | ||
409 | |||
410 | /* Initialize receive tasklet. Called from ISM device IRQ handler to start | ||
411 | * receiver side. | ||
412 | */ | ||
413 | void smcd_cdc_rx_init(struct smc_connection *conn) | ||
414 | { | ||
415 | tasklet_init(&conn->rx_tsklet, smcd_cdc_rx_tsklet, (unsigned long)conn); | ||
416 | } | ||
417 | |||
418 | /***************************** init, exit, misc ******************************/ | ||
419 | |||
420 | static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf) | ||
421 | { | ||
422 | struct smc_link *link = (struct smc_link *)wc->qp->qp_context; | ||
423 | struct smc_cdc_msg *cdc = buf; | ||
424 | struct smc_connection *conn; | ||
425 | struct smc_link_group *lgr; | ||
426 | struct smc_sock *smc; | ||
427 | |||
428 | if (wc->byte_len < offsetof(struct smc_cdc_msg, reserved)) | ||
429 | return; /* short message */ | ||
430 | if (cdc->len != SMC_WR_TX_SIZE) | ||
431 | return; /* invalid message */ | ||
432 | |||
433 | /* lookup connection */ | ||
434 | lgr = smc_get_lgr(link); | ||
435 | read_lock_bh(&lgr->conns_lock); | ||
436 | conn = smc_lgr_find_conn(ntohl(cdc->token), lgr); | ||
437 | read_unlock_bh(&lgr->conns_lock); | ||
438 | if (!conn || conn->out_of_sync) | ||
439 | return; | ||
440 | smc = container_of(conn, struct smc_sock, conn); | ||
441 | |||
442 | if (cdc->prod_flags.failover_validation) { | ||
443 | smc_cdc_msg_validate(smc, cdc, link); | ||
444 | return; | ||
445 | } | ||
446 | if (smc_cdc_before(ntohs(cdc->seqno), | ||
447 | conn->local_rx_ctrl.seqno)) | ||
448 | /* received seqno is old */ | ||
449 | return; | ||
450 | |||
451 | smc_cdc_msg_recv(smc, cdc); | ||
452 | } | ||
453 | |||
454 | static struct smc_wr_rx_handler smc_cdc_rx_handlers[] = { | ||
455 | { | ||
456 | .handler = smc_cdc_rx_handler, | ||
457 | .type = SMC_CDC_MSG_TYPE | ||
458 | }, | ||
459 | { | ||
460 | .handler = NULL, | ||
461 | } | ||
462 | }; | ||
463 | |||
464 | int __init smc_cdc_init(void) | ||
465 | { | ||
466 | struct smc_wr_rx_handler *handler; | ||
467 | int rc = 0; | ||
468 | |||
469 | for (handler = smc_cdc_rx_handlers; handler->handler; handler++) { | ||
470 | INIT_HLIST_NODE(&handler->list); | ||
471 | rc = smc_wr_rx_register_handler(handler); | ||
472 | if (rc) | ||
473 | break; | ||
474 | } | ||
475 | return rc; | ||
476 | } | ||
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h new file mode 100644 index 000000000..696cc11f2 --- /dev/null +++ b/net/smc/smc_cdc.h | |||
@@ -0,0 +1,305 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * Connection Data Control (CDC) | ||
6 | * | ||
7 | * Copyright IBM Corp. 2016 | ||
8 | * | ||
9 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> | ||
10 | */ | ||
11 | |||
12 | #ifndef SMC_CDC_H | ||
13 | #define SMC_CDC_H | ||
14 | |||
15 | #include <linux/kernel.h> /* max_t */ | ||
16 | #include <linux/atomic.h> | ||
17 | #include <linux/in.h> | ||
18 | #include <linux/compiler.h> | ||
19 | |||
20 | #include "smc.h" | ||
21 | #include "smc_core.h" | ||
22 | #include "smc_wr.h" | ||
23 | |||
24 | #define SMC_CDC_MSG_TYPE 0xFE | ||
25 | |||
26 | /* in network byte order */ | ||
27 | union smc_cdc_cursor { /* SMC cursor */ | ||
28 | struct { | ||
29 | __be16 reserved; | ||
30 | __be16 wrap; | ||
31 | __be32 count; | ||
32 | }; | ||
33 | #ifdef KERNEL_HAS_ATOMIC64 | ||
34 | atomic64_t acurs; /* for atomic processing */ | ||
35 | #else | ||
36 | u64 acurs; /* for atomic processing */ | ||
37 | #endif | ||
38 | } __aligned(8); | ||
39 | |||
40 | /* in network byte order */ | ||
41 | struct smc_cdc_msg { | ||
42 | struct smc_wr_rx_hdr common; /* .type = 0xFE */ | ||
43 | u8 len; /* 44 */ | ||
44 | __be16 seqno; | ||
45 | __be32 token; | ||
46 | union smc_cdc_cursor prod; | ||
47 | union smc_cdc_cursor cons; /* piggy backed "ack" */ | ||
48 | struct smc_cdc_producer_flags prod_flags; | ||
49 | struct smc_cdc_conn_state_flags conn_state_flags; | ||
50 | u8 reserved[18]; | ||
51 | }; | ||
52 | |||
53 | /* SMC-D cursor format */ | ||
54 | union smcd_cdc_cursor { | ||
55 | struct { | ||
56 | u16 wrap; | ||
57 | u32 count; | ||
58 | struct smc_cdc_producer_flags prod_flags; | ||
59 | struct smc_cdc_conn_state_flags conn_state_flags; | ||
60 | } __packed; | ||
61 | #ifdef KERNEL_HAS_ATOMIC64 | ||
62 | atomic64_t acurs; /* for atomic processing */ | ||
63 | #else | ||
64 | u64 acurs; /* for atomic processing */ | ||
65 | #endif | ||
66 | } __aligned(8); | ||
67 | |||
68 | /* CDC message for SMC-D */ | ||
69 | struct smcd_cdc_msg { | ||
70 | struct smc_wr_rx_hdr common; /* Type = 0xFE */ | ||
71 | u8 res1[7]; | ||
72 | union smcd_cdc_cursor prod; | ||
73 | union smcd_cdc_cursor cons; | ||
74 | u8 res3[8]; | ||
75 | } __aligned(8); | ||
76 | |||
77 | static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn) | ||
78 | { | ||
79 | return conn->local_rx_ctrl.conn_state_flags.peer_conn_abort || | ||
80 | conn->local_rx_ctrl.conn_state_flags.peer_conn_closed; | ||
81 | } | ||
82 | |||
83 | static inline bool smc_cdc_rxed_any_close_or_senddone( | ||
84 | struct smc_connection *conn) | ||
85 | { | ||
86 | return smc_cdc_rxed_any_close(conn) || | ||
87 | conn->local_rx_ctrl.conn_state_flags.peer_done_writing; | ||
88 | } | ||
89 | |||
90 | static inline void smc_curs_add(int size, union smc_host_cursor *curs, | ||
91 | int value) | ||
92 | { | ||
93 | curs->count += value; | ||
94 | if (curs->count >= size) { | ||
95 | curs->wrap++; | ||
96 | curs->count -= size; | ||
97 | } | ||
98 | } | ||
99 | |||
100 | /* Copy cursor src into tgt */ | ||
101 | static inline void smc_curs_copy(union smc_host_cursor *tgt, | ||
102 | union smc_host_cursor *src, | ||
103 | struct smc_connection *conn) | ||
104 | { | ||
105 | #ifndef KERNEL_HAS_ATOMIC64 | ||
106 | unsigned long flags; | ||
107 | |||
108 | spin_lock_irqsave(&conn->acurs_lock, flags); | ||
109 | tgt->acurs = src->acurs; | ||
110 | spin_unlock_irqrestore(&conn->acurs_lock, flags); | ||
111 | #else | ||
112 | atomic64_set(&tgt->acurs, atomic64_read(&src->acurs)); | ||
113 | #endif | ||
114 | } | ||
115 | |||
116 | static inline void smc_curs_copy_net(union smc_cdc_cursor *tgt, | ||
117 | union smc_cdc_cursor *src, | ||
118 | struct smc_connection *conn) | ||
119 | { | ||
120 | #ifndef KERNEL_HAS_ATOMIC64 | ||
121 | unsigned long flags; | ||
122 | |||
123 | spin_lock_irqsave(&conn->acurs_lock, flags); | ||
124 | tgt->acurs = src->acurs; | ||
125 | spin_unlock_irqrestore(&conn->acurs_lock, flags); | ||
126 | #else | ||
127 | atomic64_set(&tgt->acurs, atomic64_read(&src->acurs)); | ||
128 | #endif | ||
129 | } | ||
130 | |||
131 | static inline void smcd_curs_copy(union smcd_cdc_cursor *tgt, | ||
132 | union smcd_cdc_cursor *src, | ||
133 | struct smc_connection *conn) | ||
134 | { | ||
135 | #ifndef KERNEL_HAS_ATOMIC64 | ||
136 | unsigned long flags; | ||
137 | |||
138 | spin_lock_irqsave(&conn->acurs_lock, flags); | ||
139 | tgt->acurs = src->acurs; | ||
140 | spin_unlock_irqrestore(&conn->acurs_lock, flags); | ||
141 | #else | ||
142 | atomic64_set(&tgt->acurs, atomic64_read(&src->acurs)); | ||
143 | #endif | ||
144 | } | ||
145 | |||
146 | /* calculate cursor difference between old and new, where old <= new and | ||
147 | * difference cannot exceed size | ||
148 | */ | ||
149 | static inline int smc_curs_diff(unsigned int size, | ||
150 | union smc_host_cursor *old, | ||
151 | union smc_host_cursor *new) | ||
152 | { | ||
153 | if (old->wrap != new->wrap) | ||
154 | return max_t(int, 0, | ||
155 | ((size - old->count) + new->count)); | ||
156 | |||
157 | return max_t(int, 0, (new->count - old->count)); | ||
158 | } | ||
159 | |||
160 | /* calculate cursor difference between old and new - returns negative | ||
161 | * value in case old > new | ||
162 | */ | ||
163 | static inline int smc_curs_comp(unsigned int size, | ||
164 | union smc_host_cursor *old, | ||
165 | union smc_host_cursor *new) | ||
166 | { | ||
167 | if (old->wrap > new->wrap || | ||
168 | (old->wrap == new->wrap && old->count > new->count)) | ||
169 | return -smc_curs_diff(size, new, old); | ||
170 | return smc_curs_diff(size, old, new); | ||
171 | } | ||
172 | |||
173 | /* calculate cursor difference between old and new, where old <= new and | ||
174 | * difference may exceed size | ||
175 | */ | ||
176 | static inline int smc_curs_diff_large(unsigned int size, | ||
177 | union smc_host_cursor *old, | ||
178 | union smc_host_cursor *new) | ||
179 | { | ||
180 | if (old->wrap < new->wrap) | ||
181 | return min_t(int, | ||
182 | (size - old->count) + new->count + | ||
183 | (new->wrap - old->wrap - 1) * size, | ||
184 | size); | ||
185 | |||
186 | if (old->wrap > new->wrap) /* wrap has switched from 0xffff to 0x0000 */ | ||
187 | return min_t(int, | ||
188 | (size - old->count) + new->count + | ||
189 | (new->wrap + 0xffff - old->wrap) * size, | ||
190 | size); | ||
191 | |||
192 | return max_t(int, 0, (new->count - old->count)); | ||
193 | } | ||
194 | |||
195 | static inline void smc_host_cursor_to_cdc(union smc_cdc_cursor *peer, | ||
196 | union smc_host_cursor *local, | ||
197 | union smc_host_cursor *save, | ||
198 | struct smc_connection *conn) | ||
199 | { | ||
200 | smc_curs_copy(save, local, conn); | ||
201 | peer->count = htonl(save->count); | ||
202 | peer->wrap = htons(save->wrap); | ||
203 | /* peer->reserved = htons(0); must be ensured by caller */ | ||
204 | } | ||
205 | |||
206 | static inline void smc_host_msg_to_cdc(struct smc_cdc_msg *peer, | ||
207 | struct smc_connection *conn, | ||
208 | union smc_host_cursor *save) | ||
209 | { | ||
210 | struct smc_host_cdc_msg *local = &conn->local_tx_ctrl; | ||
211 | |||
212 | peer->common.type = local->common.type; | ||
213 | peer->len = local->len; | ||
214 | peer->seqno = htons(local->seqno); | ||
215 | peer->token = htonl(local->token); | ||
216 | smc_host_cursor_to_cdc(&peer->prod, &local->prod, save, conn); | ||
217 | smc_host_cursor_to_cdc(&peer->cons, &local->cons, save, conn); | ||
218 | peer->prod_flags = local->prod_flags; | ||
219 | peer->conn_state_flags = local->conn_state_flags; | ||
220 | } | ||
221 | |||
222 | static inline void smc_cdc_cursor_to_host(union smc_host_cursor *local, | ||
223 | union smc_cdc_cursor *peer, | ||
224 | struct smc_connection *conn) | ||
225 | { | ||
226 | union smc_host_cursor temp, old; | ||
227 | union smc_cdc_cursor net; | ||
228 | |||
229 | smc_curs_copy(&old, local, conn); | ||
230 | smc_curs_copy_net(&net, peer, conn); | ||
231 | temp.count = ntohl(net.count); | ||
232 | temp.wrap = ntohs(net.wrap); | ||
233 | if ((old.wrap > temp.wrap) && temp.wrap) | ||
234 | return; | ||
235 | if ((old.wrap == temp.wrap) && | ||
236 | (old.count > temp.count)) | ||
237 | return; | ||
238 | smc_curs_copy(local, &temp, conn); | ||
239 | } | ||
240 | |||
241 | static inline void smcr_cdc_msg_to_host(struct smc_host_cdc_msg *local, | ||
242 | struct smc_cdc_msg *peer, | ||
243 | struct smc_connection *conn) | ||
244 | { | ||
245 | local->common.type = peer->common.type; | ||
246 | local->len = peer->len; | ||
247 | local->seqno = ntohs(peer->seqno); | ||
248 | local->token = ntohl(peer->token); | ||
249 | smc_cdc_cursor_to_host(&local->prod, &peer->prod, conn); | ||
250 | smc_cdc_cursor_to_host(&local->cons, &peer->cons, conn); | ||
251 | local->prod_flags = peer->prod_flags; | ||
252 | local->conn_state_flags = peer->conn_state_flags; | ||
253 | } | ||
254 | |||
255 | static inline void smcd_cdc_msg_to_host(struct smc_host_cdc_msg *local, | ||
256 | struct smcd_cdc_msg *peer, | ||
257 | struct smc_connection *conn) | ||
258 | { | ||
259 | union smc_host_cursor temp; | ||
260 | |||
261 | temp.wrap = peer->prod.wrap; | ||
262 | temp.count = peer->prod.count; | ||
263 | smc_curs_copy(&local->prod, &temp, conn); | ||
264 | |||
265 | temp.wrap = peer->cons.wrap; | ||
266 | temp.count = peer->cons.count; | ||
267 | smc_curs_copy(&local->cons, &temp, conn); | ||
268 | local->prod_flags = peer->cons.prod_flags; | ||
269 | local->conn_state_flags = peer->cons.conn_state_flags; | ||
270 | } | ||
271 | |||
272 | static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local, | ||
273 | struct smc_cdc_msg *peer, | ||
274 | struct smc_connection *conn) | ||
275 | { | ||
276 | if (conn->lgr->is_smcd) | ||
277 | smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer, conn); | ||
278 | else | ||
279 | smcr_cdc_msg_to_host(local, peer, conn); | ||
280 | } | ||
281 | |||
282 | struct smc_cdc_tx_pend { | ||
283 | struct smc_connection *conn; /* socket connection */ | ||
284 | union smc_host_cursor cursor; /* tx sndbuf cursor sent */ | ||
285 | union smc_host_cursor p_cursor; /* rx RMBE cursor produced */ | ||
286 | u16 ctrl_seq; /* conn. tx sequence # */ | ||
287 | }; | ||
288 | |||
289 | int smc_cdc_get_free_slot(struct smc_connection *conn, | ||
290 | struct smc_link *link, | ||
291 | struct smc_wr_buf **wr_buf, | ||
292 | struct smc_rdma_wr **wr_rdma_buf, | ||
293 | struct smc_cdc_tx_pend **pend); | ||
294 | void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn); | ||
295 | int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, | ||
296 | struct smc_cdc_tx_pend *pend); | ||
297 | int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); | ||
298 | int smcd_cdc_msg_send(struct smc_connection *conn); | ||
299 | int smcr_cdc_msg_send_validation(struct smc_connection *conn, | ||
300 | struct smc_cdc_tx_pend *pend, | ||
301 | struct smc_wr_buf *wr_buf); | ||
302 | int smc_cdc_init(void) __init; | ||
303 | void smcd_cdc_rx_init(struct smc_connection *conn); | ||
304 | |||
305 | #endif /* SMC_CDC_H */ | ||
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c new file mode 100644 index 000000000..5ee5b2ce2 --- /dev/null +++ b/net/smc/smc_clc.c | |||
@@ -0,0 +1,784 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * CLC (connection layer control) handshake over initial TCP socket to | ||
6 | * prepare for RDMA traffic | ||
7 | * | ||
8 | * Copyright IBM Corp. 2016, 2018 | ||
9 | * | ||
10 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> | ||
11 | */ | ||
12 | |||
13 | #include <linux/in.h> | ||
14 | #include <linux/inetdevice.h> | ||
15 | #include <linux/if_ether.h> | ||
16 | #include <linux/sched/signal.h> | ||
17 | #include <linux/utsname.h> | ||
18 | #include <linux/ctype.h> | ||
19 | |||
20 | #include <net/addrconf.h> | ||
21 | #include <net/sock.h> | ||
22 | #include <net/tcp.h> | ||
23 | |||
24 | #include "smc.h" | ||
25 | #include "smc_core.h" | ||
26 | #include "smc_clc.h" | ||
27 | #include "smc_ib.h" | ||
28 | #include "smc_ism.h" | ||
29 | |||
30 | #define SMCR_CLC_ACCEPT_CONFIRM_LEN 68 | ||
31 | #define SMCD_CLC_ACCEPT_CONFIRM_LEN 48 | ||
32 | #define SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 78 | ||
33 | #define SMC_CLC_RECV_BUF_LEN 100 | ||
34 | |||
35 | /* eye catcher "SMCR" EBCDIC for CLC messages */ | ||
36 | static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; | ||
37 | /* eye catcher "SMCD" EBCDIC for CLC messages */ | ||
38 | static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'}; | ||
39 | |||
40 | static u8 smc_hostname[SMC_MAX_HOSTNAME_LEN]; | ||
41 | |||
42 | /* check arriving CLC proposal */ | ||
43 | static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc) | ||
44 | { | ||
45 | struct smc_clc_msg_proposal_prefix *pclc_prfx; | ||
46 | struct smc_clc_smcd_v2_extension *smcd_v2_ext; | ||
47 | struct smc_clc_msg_hdr *hdr = &pclc->hdr; | ||
48 | struct smc_clc_v2_extension *v2_ext; | ||
49 | |||
50 | v2_ext = smc_get_clc_v2_ext(pclc); | ||
51 | pclc_prfx = smc_clc_proposal_get_prefix(pclc); | ||
52 | if (hdr->version == SMC_V1) { | ||
53 | if (hdr->typev1 == SMC_TYPE_N) | ||
54 | return false; | ||
55 | if (ntohs(hdr->length) != | ||
56 | sizeof(*pclc) + ntohs(pclc->iparea_offset) + | ||
57 | sizeof(*pclc_prfx) + | ||
58 | pclc_prfx->ipv6_prefixes_cnt * | ||
59 | sizeof(struct smc_clc_ipv6_prefix) + | ||
60 | sizeof(struct smc_clc_msg_trail)) | ||
61 | return false; | ||
62 | } else { | ||
63 | if (ntohs(hdr->length) != | ||
64 | sizeof(*pclc) + | ||
65 | sizeof(struct smc_clc_msg_smcd) + | ||
66 | (hdr->typev1 != SMC_TYPE_N ? | ||
67 | sizeof(*pclc_prfx) + | ||
68 | pclc_prfx->ipv6_prefixes_cnt * | ||
69 | sizeof(struct smc_clc_ipv6_prefix) : 0) + | ||
70 | (hdr->typev2 != SMC_TYPE_N ? | ||
71 | sizeof(*v2_ext) + | ||
72 | v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN : 0) + | ||
73 | (smcd_indicated(hdr->typev2) ? | ||
74 | sizeof(*smcd_v2_ext) + v2_ext->hdr.ism_gid_cnt * | ||
75 | sizeof(struct smc_clc_smcd_gid_chid) : | ||
76 | 0) + | ||
77 | sizeof(struct smc_clc_msg_trail)) | ||
78 | return false; | ||
79 | } | ||
80 | return true; | ||
81 | } | ||
82 | |||
83 | /* check arriving CLC accept or confirm */ | ||
84 | static bool | ||
85 | smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2) | ||
86 | { | ||
87 | struct smc_clc_msg_hdr *hdr = &clc_v2->hdr; | ||
88 | |||
89 | if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D) | ||
90 | return false; | ||
91 | if (hdr->version == SMC_V1) { | ||
92 | if ((hdr->typev1 == SMC_TYPE_R && | ||
93 | ntohs(hdr->length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) || | ||
94 | (hdr->typev1 == SMC_TYPE_D && | ||
95 | ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN)) | ||
96 | return false; | ||
97 | } else { | ||
98 | if (hdr->typev1 == SMC_TYPE_D && | ||
99 | ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 && | ||
100 | (ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 + | ||
101 | sizeof(struct smc_clc_first_contact_ext))) | ||
102 | return false; | ||
103 | } | ||
104 | return true; | ||
105 | } | ||
106 | |||
107 | static void smc_clc_fill_fce(struct smc_clc_first_contact_ext *fce, int *len) | ||
108 | { | ||
109 | memset(fce, 0, sizeof(*fce)); | ||
110 | fce->os_type = SMC_CLC_OS_LINUX; | ||
111 | fce->release = SMC_RELEASE; | ||
112 | memcpy(fce->hostname, smc_hostname, sizeof(smc_hostname)); | ||
113 | (*len) += sizeof(*fce); | ||
114 | } | ||
115 | |||
116 | /* check if received message has a correct header length and contains valid | ||
117 | * heading and trailing eyecatchers | ||
118 | */ | ||
119 | static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) | ||
120 | { | ||
121 | struct smc_clc_msg_accept_confirm_v2 *clc_v2; | ||
122 | struct smc_clc_msg_proposal *pclc; | ||
123 | struct smc_clc_msg_decline *dclc; | ||
124 | struct smc_clc_msg_trail *trl; | ||
125 | |||
126 | if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) && | ||
127 | memcmp(clcm->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER))) | ||
128 | return false; | ||
129 | switch (clcm->type) { | ||
130 | case SMC_CLC_PROPOSAL: | ||
131 | pclc = (struct smc_clc_msg_proposal *)clcm; | ||
132 | if (!smc_clc_msg_prop_valid(pclc)) | ||
133 | return false; | ||
134 | trl = (struct smc_clc_msg_trail *) | ||
135 | ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl)); | ||
136 | break; | ||
137 | case SMC_CLC_ACCEPT: | ||
138 | case SMC_CLC_CONFIRM: | ||
139 | clc_v2 = (struct smc_clc_msg_accept_confirm_v2 *)clcm; | ||
140 | if (!smc_clc_msg_acc_conf_valid(clc_v2)) | ||
141 | return false; | ||
142 | trl = (struct smc_clc_msg_trail *) | ||
143 | ((u8 *)clc_v2 + ntohs(clc_v2->hdr.length) - | ||
144 | sizeof(*trl)); | ||
145 | break; | ||
146 | case SMC_CLC_DECLINE: | ||
147 | dclc = (struct smc_clc_msg_decline *)clcm; | ||
148 | if (ntohs(dclc->hdr.length) != sizeof(*dclc)) | ||
149 | return false; | ||
150 | trl = &dclc->trl; | ||
151 | break; | ||
152 | default: | ||
153 | return false; | ||
154 | } | ||
155 | if (check_trl && | ||
156 | memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) && | ||
157 | memcmp(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER))) | ||
158 | return false; | ||
159 | return true; | ||
160 | } | ||
161 | |||
162 | /* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */ | ||
163 | static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4, | ||
164 | struct smc_clc_msg_proposal_prefix *prop) | ||
165 | { | ||
166 | struct in_device *in_dev = __in_dev_get_rcu(dst->dev); | ||
167 | const struct in_ifaddr *ifa; | ||
168 | |||
169 | if (!in_dev) | ||
170 | return -ENODEV; | ||
171 | |||
172 | in_dev_for_each_ifa_rcu(ifa, in_dev) { | ||
173 | if (!inet_ifa_match(ipv4, ifa)) | ||
174 | continue; | ||
175 | prop->prefix_len = inet_mask_len(ifa->ifa_mask); | ||
176 | prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask; | ||
177 | /* prop->ipv6_prefixes_cnt = 0; already done by memset before */ | ||
178 | return 0; | ||
179 | } | ||
180 | return -ENOENT; | ||
181 | } | ||
182 | |||
183 | /* fill CLC proposal msg with ipv6 prefixes from device */ | ||
184 | static int smc_clc_prfx_set6_rcu(struct dst_entry *dst, | ||
185 | struct smc_clc_msg_proposal_prefix *prop, | ||
186 | struct smc_clc_ipv6_prefix *ipv6_prfx) | ||
187 | { | ||
188 | #if IS_ENABLED(CONFIG_IPV6) | ||
189 | struct inet6_dev *in6_dev = __in6_dev_get(dst->dev); | ||
190 | struct inet6_ifaddr *ifa; | ||
191 | int cnt = 0; | ||
192 | |||
193 | if (!in6_dev) | ||
194 | return -ENODEV; | ||
195 | /* use a maximum of 8 IPv6 prefixes from device */ | ||
196 | list_for_each_entry(ifa, &in6_dev->addr_list, if_list) { | ||
197 | if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL) | ||
198 | continue; | ||
199 | ipv6_addr_prefix(&ipv6_prfx[cnt].prefix, | ||
200 | &ifa->addr, ifa->prefix_len); | ||
201 | ipv6_prfx[cnt].prefix_len = ifa->prefix_len; | ||
202 | cnt++; | ||
203 | if (cnt == SMC_CLC_MAX_V6_PREFIX) | ||
204 | break; | ||
205 | } | ||
206 | prop->ipv6_prefixes_cnt = cnt; | ||
207 | if (cnt) | ||
208 | return 0; | ||
209 | #endif | ||
210 | return -ENOENT; | ||
211 | } | ||
212 | |||
213 | /* retrieve and set prefixes in CLC proposal msg */ | ||
214 | static int smc_clc_prfx_set(struct socket *clcsock, | ||
215 | struct smc_clc_msg_proposal_prefix *prop, | ||
216 | struct smc_clc_ipv6_prefix *ipv6_prfx) | ||
217 | { | ||
218 | struct dst_entry *dst = sk_dst_get(clcsock->sk); | ||
219 | struct sockaddr_storage addrs; | ||
220 | struct sockaddr_in6 *addr6; | ||
221 | struct sockaddr_in *addr; | ||
222 | int rc = -ENOENT; | ||
223 | |||
224 | if (!dst) { | ||
225 | rc = -ENOTCONN; | ||
226 | goto out; | ||
227 | } | ||
228 | if (!dst->dev) { | ||
229 | rc = -ENODEV; | ||
230 | goto out_rel; | ||
231 | } | ||
232 | /* get address to which the internal TCP socket is bound */ | ||
233 | if (kernel_getsockname(clcsock, (struct sockaddr *)&addrs) < 0) | ||
234 | goto out_rel; | ||
235 | /* analyze IP specific data of net_device belonging to TCP socket */ | ||
236 | addr6 = (struct sockaddr_in6 *)&addrs; | ||
237 | rcu_read_lock(); | ||
238 | if (addrs.ss_family == PF_INET) { | ||
239 | /* IPv4 */ | ||
240 | addr = (struct sockaddr_in *)&addrs; | ||
241 | rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop); | ||
242 | } else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) { | ||
243 | /* mapped IPv4 address - peer is IPv4 only */ | ||
244 | rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3], | ||
245 | prop); | ||
246 | } else { | ||
247 | /* IPv6 */ | ||
248 | rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx); | ||
249 | } | ||
250 | rcu_read_unlock(); | ||
251 | out_rel: | ||
252 | dst_release(dst); | ||
253 | out: | ||
254 | return rc; | ||
255 | } | ||
256 | |||
257 | /* match ipv4 addrs of dev against addr in CLC proposal */ | ||
258 | static int smc_clc_prfx_match4_rcu(struct net_device *dev, | ||
259 | struct smc_clc_msg_proposal_prefix *prop) | ||
260 | { | ||
261 | struct in_device *in_dev = __in_dev_get_rcu(dev); | ||
262 | const struct in_ifaddr *ifa; | ||
263 | |||
264 | if (!in_dev) | ||
265 | return -ENODEV; | ||
266 | in_dev_for_each_ifa_rcu(ifa, in_dev) { | ||
267 | if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) && | ||
268 | inet_ifa_match(prop->outgoing_subnet, ifa)) | ||
269 | return 0; | ||
270 | } | ||
271 | |||
272 | return -ENOENT; | ||
273 | } | ||
274 | |||
275 | /* match ipv6 addrs of dev against addrs in CLC proposal */ | ||
276 | static int smc_clc_prfx_match6_rcu(struct net_device *dev, | ||
277 | struct smc_clc_msg_proposal_prefix *prop) | ||
278 | { | ||
279 | #if IS_ENABLED(CONFIG_IPV6) | ||
280 | struct inet6_dev *in6_dev = __in6_dev_get(dev); | ||
281 | struct smc_clc_ipv6_prefix *ipv6_prfx; | ||
282 | struct inet6_ifaddr *ifa; | ||
283 | int i, max; | ||
284 | |||
285 | if (!in6_dev) | ||
286 | return -ENODEV; | ||
287 | /* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */ | ||
288 | ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop)); | ||
289 | max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX); | ||
290 | list_for_each_entry(ifa, &in6_dev->addr_list, if_list) { | ||
291 | if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL) | ||
292 | continue; | ||
293 | for (i = 0; i < max; i++) { | ||
294 | if (ifa->prefix_len == ipv6_prfx[i].prefix_len && | ||
295 | ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix, | ||
296 | ifa->prefix_len)) | ||
297 | return 0; | ||
298 | } | ||
299 | } | ||
300 | #endif | ||
301 | return -ENOENT; | ||
302 | } | ||
303 | |||
304 | /* check if proposed prefixes match one of our device prefixes */ | ||
305 | int smc_clc_prfx_match(struct socket *clcsock, | ||
306 | struct smc_clc_msg_proposal_prefix *prop) | ||
307 | { | ||
308 | struct dst_entry *dst = sk_dst_get(clcsock->sk); | ||
309 | int rc; | ||
310 | |||
311 | if (!dst) { | ||
312 | rc = -ENOTCONN; | ||
313 | goto out; | ||
314 | } | ||
315 | if (!dst->dev) { | ||
316 | rc = -ENODEV; | ||
317 | goto out_rel; | ||
318 | } | ||
319 | rcu_read_lock(); | ||
320 | if (!prop->ipv6_prefixes_cnt) | ||
321 | rc = smc_clc_prfx_match4_rcu(dst->dev, prop); | ||
322 | else | ||
323 | rc = smc_clc_prfx_match6_rcu(dst->dev, prop); | ||
324 | rcu_read_unlock(); | ||
325 | out_rel: | ||
326 | dst_release(dst); | ||
327 | out: | ||
328 | return rc; | ||
329 | } | ||
330 | |||
331 | /* Wait for data on the tcp-socket, analyze received data | ||
332 | * Returns: | ||
333 | * 0 if success and it was not a decline that we received. | ||
334 | * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send. | ||
335 | * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise. | ||
336 | */ | ||
337 | int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, | ||
338 | u8 expected_type, unsigned long timeout) | ||
339 | { | ||
340 | long rcvtimeo = smc->clcsock->sk->sk_rcvtimeo; | ||
341 | struct sock *clc_sk = smc->clcsock->sk; | ||
342 | struct smc_clc_msg_hdr *clcm = buf; | ||
343 | struct msghdr msg = {NULL, 0}; | ||
344 | int reason_code = 0; | ||
345 | struct kvec vec = {buf, buflen}; | ||
346 | int len, datlen, recvlen; | ||
347 | bool check_trl = true; | ||
348 | int krflags; | ||
349 | |||
350 | /* peek the first few bytes to determine length of data to receive | ||
351 | * so we don't consume any subsequent CLC message or payload data | ||
352 | * in the TCP byte stream | ||
353 | */ | ||
354 | /* | ||
355 | * Caller must make sure that buflen is no less than | ||
356 | * sizeof(struct smc_clc_msg_hdr) | ||
357 | */ | ||
358 | krflags = MSG_PEEK | MSG_WAITALL; | ||
359 | clc_sk->sk_rcvtimeo = timeout; | ||
360 | iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, | ||
361 | sizeof(struct smc_clc_msg_hdr)); | ||
362 | len = sock_recvmsg(smc->clcsock, &msg, krflags); | ||
363 | if (signal_pending(current)) { | ||
364 | reason_code = -EINTR; | ||
365 | clc_sk->sk_err = EINTR; | ||
366 | smc->sk.sk_err = EINTR; | ||
367 | goto out; | ||
368 | } | ||
369 | if (clc_sk->sk_err) { | ||
370 | reason_code = -clc_sk->sk_err; | ||
371 | if (clc_sk->sk_err == EAGAIN && | ||
372 | expected_type == SMC_CLC_DECLINE) | ||
373 | clc_sk->sk_err = 0; /* reset for fallback usage */ | ||
374 | else | ||
375 | smc->sk.sk_err = clc_sk->sk_err; | ||
376 | goto out; | ||
377 | } | ||
378 | if (!len) { /* peer has performed orderly shutdown */ | ||
379 | smc->sk.sk_err = ECONNRESET; | ||
380 | reason_code = -ECONNRESET; | ||
381 | goto out; | ||
382 | } | ||
383 | if (len < 0) { | ||
384 | if (len != -EAGAIN || expected_type != SMC_CLC_DECLINE) | ||
385 | smc->sk.sk_err = -len; | ||
386 | reason_code = len; | ||
387 | goto out; | ||
388 | } | ||
389 | datlen = ntohs(clcm->length); | ||
390 | if ((len < sizeof(struct smc_clc_msg_hdr)) || | ||
391 | (clcm->version < SMC_V1) || | ||
392 | ((clcm->type != SMC_CLC_DECLINE) && | ||
393 | (clcm->type != expected_type))) { | ||
394 | smc->sk.sk_err = EPROTO; | ||
395 | reason_code = -EPROTO; | ||
396 | goto out; | ||
397 | } | ||
398 | |||
399 | /* receive the complete CLC message */ | ||
400 | memset(&msg, 0, sizeof(struct msghdr)); | ||
401 | if (datlen > buflen) { | ||
402 | check_trl = false; | ||
403 | recvlen = buflen; | ||
404 | } else { | ||
405 | recvlen = datlen; | ||
406 | } | ||
407 | iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, recvlen); | ||
408 | krflags = MSG_WAITALL; | ||
409 | len = sock_recvmsg(smc->clcsock, &msg, krflags); | ||
410 | if (len < recvlen || !smc_clc_msg_hdr_valid(clcm, check_trl)) { | ||
411 | smc->sk.sk_err = EPROTO; | ||
412 | reason_code = -EPROTO; | ||
413 | goto out; | ||
414 | } | ||
415 | datlen -= len; | ||
416 | while (datlen) { | ||
417 | u8 tmp[SMC_CLC_RECV_BUF_LEN]; | ||
418 | |||
419 | vec.iov_base = &tmp; | ||
420 | vec.iov_len = SMC_CLC_RECV_BUF_LEN; | ||
421 | /* receive remaining proposal message */ | ||
422 | recvlen = datlen > SMC_CLC_RECV_BUF_LEN ? | ||
423 | SMC_CLC_RECV_BUF_LEN : datlen; | ||
424 | iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, recvlen); | ||
425 | len = sock_recvmsg(smc->clcsock, &msg, krflags); | ||
426 | datlen -= len; | ||
427 | } | ||
428 | if (clcm->type == SMC_CLC_DECLINE) { | ||
429 | struct smc_clc_msg_decline *dclc; | ||
430 | |||
431 | dclc = (struct smc_clc_msg_decline *)clcm; | ||
432 | reason_code = SMC_CLC_DECL_PEERDECL; | ||
433 | smc->peer_diagnosis = ntohl(dclc->peer_diagnosis); | ||
434 | if (((struct smc_clc_msg_decline *)buf)->hdr.typev2 & | ||
435 | SMC_FIRST_CONTACT_MASK) { | ||
436 | smc->conn.lgr->sync_err = 1; | ||
437 | smc_lgr_terminate_sched(smc->conn.lgr); | ||
438 | } | ||
439 | } | ||
440 | |||
441 | out: | ||
442 | clc_sk->sk_rcvtimeo = rcvtimeo; | ||
443 | return reason_code; | ||
444 | } | ||
445 | |||
446 | /* send CLC DECLINE message across internal TCP socket */ | ||
447 | int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version) | ||
448 | { | ||
449 | struct smc_clc_msg_decline dclc; | ||
450 | struct msghdr msg; | ||
451 | struct kvec vec; | ||
452 | int len; | ||
453 | |||
454 | memset(&dclc, 0, sizeof(dclc)); | ||
455 | memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); | ||
456 | dclc.hdr.type = SMC_CLC_DECLINE; | ||
457 | dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline)); | ||
458 | dclc.hdr.version = version; | ||
459 | dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX; | ||
460 | dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? | ||
461 | SMC_FIRST_CONTACT_MASK : 0; | ||
462 | if ((!smc->conn.lgr || !smc->conn.lgr->is_smcd) && | ||
463 | smc_ib_is_valid_local_systemid()) | ||
464 | memcpy(dclc.id_for_peer, local_systemid, | ||
465 | sizeof(local_systemid)); | ||
466 | dclc.peer_diagnosis = htonl(peer_diag_info); | ||
467 | memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); | ||
468 | |||
469 | memset(&msg, 0, sizeof(msg)); | ||
470 | vec.iov_base = &dclc; | ||
471 | vec.iov_len = sizeof(struct smc_clc_msg_decline); | ||
472 | len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, | ||
473 | sizeof(struct smc_clc_msg_decline)); | ||
474 | if (len < 0 || len < sizeof(struct smc_clc_msg_decline)) | ||
475 | len = -EPROTO; | ||
476 | return len > 0 ? 0 : len; | ||
477 | } | ||
478 | |||
479 | /* send CLC PROPOSAL message across internal TCP socket */ | ||
480 | int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) | ||
481 | { | ||
482 | struct smc_clc_smcd_v2_extension *smcd_v2_ext; | ||
483 | struct smc_clc_msg_proposal_prefix *pclc_prfx; | ||
484 | struct smc_clc_msg_proposal *pclc_base; | ||
485 | struct smc_clc_smcd_gid_chid *gidchids; | ||
486 | struct smc_clc_msg_proposal_area *pclc; | ||
487 | struct smc_clc_ipv6_prefix *ipv6_prfx; | ||
488 | struct smc_clc_v2_extension *v2_ext; | ||
489 | struct smc_clc_msg_smcd *pclc_smcd; | ||
490 | struct smc_clc_msg_trail *trl; | ||
491 | int len, i, plen, rc; | ||
492 | int reason_code = 0; | ||
493 | struct kvec vec[8]; | ||
494 | struct msghdr msg; | ||
495 | |||
496 | pclc = kzalloc(sizeof(*pclc), GFP_KERNEL); | ||
497 | if (!pclc) | ||
498 | return -ENOMEM; | ||
499 | |||
500 | pclc_base = &pclc->pclc_base; | ||
501 | pclc_smcd = &pclc->pclc_smcd; | ||
502 | pclc_prfx = &pclc->pclc_prfx; | ||
503 | ipv6_prfx = pclc->pclc_prfx_ipv6; | ||
504 | v2_ext = &pclc->pclc_v2_ext; | ||
505 | smcd_v2_ext = &pclc->pclc_smcd_v2_ext; | ||
506 | gidchids = pclc->pclc_gidchids; | ||
507 | trl = &pclc->pclc_trl; | ||
508 | |||
509 | pclc_base->hdr.version = SMC_V2; | ||
510 | pclc_base->hdr.typev1 = ini->smc_type_v1; | ||
511 | pclc_base->hdr.typev2 = ini->smc_type_v2; | ||
512 | plen = sizeof(*pclc_base) + sizeof(*pclc_smcd) + sizeof(*trl); | ||
513 | |||
514 | /* retrieve ip prefixes for CLC proposal msg */ | ||
515 | if (ini->smc_type_v1 != SMC_TYPE_N) { | ||
516 | rc = smc_clc_prfx_set(smc->clcsock, pclc_prfx, ipv6_prfx); | ||
517 | if (rc) { | ||
518 | if (ini->smc_type_v2 == SMC_TYPE_N) { | ||
519 | kfree(pclc); | ||
520 | return SMC_CLC_DECL_CNFERR; | ||
521 | } | ||
522 | pclc_base->hdr.typev1 = SMC_TYPE_N; | ||
523 | } else { | ||
524 | pclc_base->iparea_offset = htons(sizeof(*pclc_smcd)); | ||
525 | plen += sizeof(*pclc_prfx) + | ||
526 | pclc_prfx->ipv6_prefixes_cnt * | ||
527 | sizeof(ipv6_prfx[0]); | ||
528 | } | ||
529 | } | ||
530 | |||
531 | /* build SMC Proposal CLC message */ | ||
532 | memcpy(pclc_base->hdr.eyecatcher, SMC_EYECATCHER, | ||
533 | sizeof(SMC_EYECATCHER)); | ||
534 | pclc_base->hdr.type = SMC_CLC_PROPOSAL; | ||
535 | if (smcr_indicated(ini->smc_type_v1)) { | ||
536 | /* add SMC-R specifics */ | ||
537 | memcpy(pclc_base->lcl.id_for_peer, local_systemid, | ||
538 | sizeof(local_systemid)); | ||
539 | memcpy(pclc_base->lcl.gid, ini->ib_gid, SMC_GID_SIZE); | ||
540 | memcpy(pclc_base->lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1], | ||
541 | ETH_ALEN); | ||
542 | } | ||
543 | if (smcd_indicated(ini->smc_type_v1)) { | ||
544 | /* add SMC-D specifics */ | ||
545 | if (ini->ism_dev[0]) { | ||
546 | pclc_smcd->ism.gid = htonll(ini->ism_dev[0]->local_gid); | ||
547 | pclc_smcd->ism.chid = | ||
548 | htons(smc_ism_get_chid(ini->ism_dev[0])); | ||
549 | } | ||
550 | } | ||
551 | if (ini->smc_type_v2 == SMC_TYPE_N) { | ||
552 | pclc_smcd->v2_ext_offset = 0; | ||
553 | } else { | ||
554 | u16 v2_ext_offset; | ||
555 | u8 *eid = NULL; | ||
556 | |||
557 | v2_ext_offset = sizeof(*pclc_smcd) - | ||
558 | offsetofend(struct smc_clc_msg_smcd, v2_ext_offset); | ||
559 | if (ini->smc_type_v1 != SMC_TYPE_N) | ||
560 | v2_ext_offset += sizeof(*pclc_prfx) + | ||
561 | pclc_prfx->ipv6_prefixes_cnt * | ||
562 | sizeof(ipv6_prfx[0]); | ||
563 | pclc_smcd->v2_ext_offset = htons(v2_ext_offset); | ||
564 | v2_ext->hdr.eid_cnt = 0; | ||
565 | v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt; | ||
566 | v2_ext->hdr.flag.release = SMC_RELEASE; | ||
567 | v2_ext->hdr.flag.seid = 1; | ||
568 | v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) - | ||
569 | offsetofend(struct smc_clnt_opts_area_hdr, | ||
570 | smcd_v2_ext_offset) + | ||
571 | v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN); | ||
572 | if (ini->ism_dev[0]) | ||
573 | smc_ism_get_system_eid(ini->ism_dev[0], &eid); | ||
574 | else | ||
575 | smc_ism_get_system_eid(ini->ism_dev[1], &eid); | ||
576 | if (eid) | ||
577 | memcpy(smcd_v2_ext->system_eid, eid, SMC_MAX_EID_LEN); | ||
578 | plen += sizeof(*v2_ext) + sizeof(*smcd_v2_ext); | ||
579 | if (ini->ism_offered_cnt) { | ||
580 | for (i = 1; i <= ini->ism_offered_cnt; i++) { | ||
581 | gidchids[i - 1].gid = | ||
582 | htonll(ini->ism_dev[i]->local_gid); | ||
583 | gidchids[i - 1].chid = | ||
584 | htons(smc_ism_get_chid(ini->ism_dev[i])); | ||
585 | } | ||
586 | plen += ini->ism_offered_cnt * | ||
587 | sizeof(struct smc_clc_smcd_gid_chid); | ||
588 | } | ||
589 | } | ||
590 | pclc_base->hdr.length = htons(plen); | ||
591 | memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); | ||
592 | |||
593 | /* send SMC Proposal CLC message */ | ||
594 | memset(&msg, 0, sizeof(msg)); | ||
595 | i = 0; | ||
596 | vec[i].iov_base = pclc_base; | ||
597 | vec[i++].iov_len = sizeof(*pclc_base); | ||
598 | vec[i].iov_base = pclc_smcd; | ||
599 | vec[i++].iov_len = sizeof(*pclc_smcd); | ||
600 | if (ini->smc_type_v1 != SMC_TYPE_N) { | ||
601 | vec[i].iov_base = pclc_prfx; | ||
602 | vec[i++].iov_len = sizeof(*pclc_prfx); | ||
603 | if (pclc_prfx->ipv6_prefixes_cnt > 0) { | ||
604 | vec[i].iov_base = ipv6_prfx; | ||
605 | vec[i++].iov_len = pclc_prfx->ipv6_prefixes_cnt * | ||
606 | sizeof(ipv6_prfx[0]); | ||
607 | } | ||
608 | } | ||
609 | if (ini->smc_type_v2 != SMC_TYPE_N) { | ||
610 | vec[i].iov_base = v2_ext; | ||
611 | vec[i++].iov_len = sizeof(*v2_ext); | ||
612 | vec[i].iov_base = smcd_v2_ext; | ||
613 | vec[i++].iov_len = sizeof(*smcd_v2_ext); | ||
614 | if (ini->ism_offered_cnt) { | ||
615 | vec[i].iov_base = gidchids; | ||
616 | vec[i++].iov_len = ini->ism_offered_cnt * | ||
617 | sizeof(struct smc_clc_smcd_gid_chid); | ||
618 | } | ||
619 | } | ||
620 | vec[i].iov_base = trl; | ||
621 | vec[i++].iov_len = sizeof(*trl); | ||
622 | /* due to the few bytes needed for clc-handshake this cannot block */ | ||
623 | len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen); | ||
624 | if (len < 0) { | ||
625 | smc->sk.sk_err = smc->clcsock->sk->sk_err; | ||
626 | reason_code = -smc->sk.sk_err; | ||
627 | } else if (len < ntohs(pclc_base->hdr.length)) { | ||
628 | reason_code = -ENETUNREACH; | ||
629 | smc->sk.sk_err = -reason_code; | ||
630 | } | ||
631 | |||
632 | kfree(pclc); | ||
633 | return reason_code; | ||
634 | } | ||
635 | |||
636 | /* build and send CLC CONFIRM / ACCEPT message */ | ||
637 | static int smc_clc_send_confirm_accept(struct smc_sock *smc, | ||
638 | struct smc_clc_msg_accept_confirm_v2 *clc_v2, | ||
639 | int first_contact, u8 version) | ||
640 | { | ||
641 | struct smc_connection *conn = &smc->conn; | ||
642 | struct smc_clc_msg_accept_confirm *clc; | ||
643 | struct smc_clc_first_contact_ext fce; | ||
644 | struct smc_clc_msg_trail trl; | ||
645 | struct kvec vec[3]; | ||
646 | struct msghdr msg; | ||
647 | int i, len; | ||
648 | |||
649 | /* send SMC Confirm CLC msg */ | ||
650 | clc = (struct smc_clc_msg_accept_confirm *)clc_v2; | ||
651 | clc->hdr.version = version; /* SMC version */ | ||
652 | if (first_contact) | ||
653 | clc->hdr.typev2 |= SMC_FIRST_CONTACT_MASK; | ||
654 | if (conn->lgr->is_smcd) { | ||
655 | /* SMC-D specific settings */ | ||
656 | memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER, | ||
657 | sizeof(SMCD_EYECATCHER)); | ||
658 | clc->hdr.typev1 = SMC_TYPE_D; | ||
659 | clc->d0.gid = conn->lgr->smcd->local_gid; | ||
660 | clc->d0.token = conn->rmb_desc->token; | ||
661 | clc->d0.dmbe_size = conn->rmbe_size_short; | ||
662 | clc->d0.dmbe_idx = 0; | ||
663 | memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); | ||
664 | if (version == SMC_V1) { | ||
665 | clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); | ||
666 | } else { | ||
667 | u8 *eid = NULL; | ||
668 | |||
669 | clc_v2->chid = htons(smc_ism_get_chid(conn->lgr->smcd)); | ||
670 | smc_ism_get_system_eid(conn->lgr->smcd, &eid); | ||
671 | if (eid) | ||
672 | memcpy(clc_v2->eid, eid, SMC_MAX_EID_LEN); | ||
673 | len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; | ||
674 | if (first_contact) | ||
675 | smc_clc_fill_fce(&fce, &len); | ||
676 | clc_v2->hdr.length = htons(len); | ||
677 | } | ||
678 | memcpy(trl.eyecatcher, SMCD_EYECATCHER, | ||
679 | sizeof(SMCD_EYECATCHER)); | ||
680 | } else { | ||
681 | struct smc_link *link = conn->lnk; | ||
682 | |||
683 | /* SMC-R specific settings */ | ||
684 | link = conn->lnk; | ||
685 | memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER, | ||
686 | sizeof(SMC_EYECATCHER)); | ||
687 | clc->hdr.typev1 = SMC_TYPE_R; | ||
688 | clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); | ||
689 | memcpy(clc->r0.lcl.id_for_peer, local_systemid, | ||
690 | sizeof(local_systemid)); | ||
691 | memcpy(&clc->r0.lcl.gid, link->gid, SMC_GID_SIZE); | ||
692 | memcpy(&clc->r0.lcl.mac, &link->smcibdev->mac[link->ibport - 1], | ||
693 | ETH_ALEN); | ||
694 | hton24(clc->r0.qpn, link->roce_qp->qp_num); | ||
695 | clc->r0.rmb_rkey = | ||
696 | htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey); | ||
697 | clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ | ||
698 | clc->r0.rmbe_alert_token = htonl(conn->alert_token_local); | ||
699 | switch (clc->hdr.type) { | ||
700 | case SMC_CLC_ACCEPT: | ||
701 | clc->r0.qp_mtu = link->path_mtu; | ||
702 | break; | ||
703 | case SMC_CLC_CONFIRM: | ||
704 | clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu); | ||
705 | break; | ||
706 | } | ||
707 | clc->r0.rmbe_size = conn->rmbe_size_short; | ||
708 | clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address | ||
709 | (conn->rmb_desc->sgt[link->link_idx].sgl)); | ||
710 | hton24(clc->r0.psn, link->psn_initial); | ||
711 | memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); | ||
712 | } | ||
713 | |||
714 | memset(&msg, 0, sizeof(msg)); | ||
715 | i = 0; | ||
716 | vec[i].iov_base = clc_v2; | ||
717 | if (version > SMC_V1) | ||
718 | vec[i++].iov_len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 - sizeof(trl); | ||
719 | else | ||
720 | vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ? | ||
721 | SMCD_CLC_ACCEPT_CONFIRM_LEN : | ||
722 | SMCR_CLC_ACCEPT_CONFIRM_LEN) - | ||
723 | sizeof(trl); | ||
724 | if (version > SMC_V1 && first_contact) { | ||
725 | vec[i].iov_base = &fce; | ||
726 | vec[i++].iov_len = sizeof(fce); | ||
727 | } | ||
728 | vec[i].iov_base = &trl; | ||
729 | vec[i++].iov_len = sizeof(trl); | ||
730 | return kernel_sendmsg(smc->clcsock, &msg, vec, 1, | ||
731 | ntohs(clc->hdr.length)); | ||
732 | } | ||
733 | |||
734 | /* send CLC CONFIRM message across internal TCP socket */ | ||
735 | int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, | ||
736 | u8 version) | ||
737 | { | ||
738 | struct smc_clc_msg_accept_confirm_v2 cclc_v2; | ||
739 | int reason_code = 0; | ||
740 | int len; | ||
741 | |||
742 | /* send SMC Confirm CLC msg */ | ||
743 | memset(&cclc_v2, 0, sizeof(cclc_v2)); | ||
744 | cclc_v2.hdr.type = SMC_CLC_CONFIRM; | ||
745 | len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact, | ||
746 | version); | ||
747 | if (len < ntohs(cclc_v2.hdr.length)) { | ||
748 | if (len >= 0) { | ||
749 | reason_code = -ENETUNREACH; | ||
750 | smc->sk.sk_err = -reason_code; | ||
751 | } else { | ||
752 | smc->sk.sk_err = smc->clcsock->sk->sk_err; | ||
753 | reason_code = -smc->sk.sk_err; | ||
754 | } | ||
755 | } | ||
756 | return reason_code; | ||
757 | } | ||
758 | |||
759 | /* send CLC ACCEPT message across internal TCP socket */ | ||
760 | int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, | ||
761 | u8 version) | ||
762 | { | ||
763 | struct smc_clc_msg_accept_confirm_v2 aclc_v2; | ||
764 | int len; | ||
765 | |||
766 | memset(&aclc_v2, 0, sizeof(aclc_v2)); | ||
767 | aclc_v2.hdr.type = SMC_CLC_ACCEPT; | ||
768 | len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact, | ||
769 | version); | ||
770 | if (len < ntohs(aclc_v2.hdr.length)) | ||
771 | len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err; | ||
772 | |||
773 | return len > 0 ? 0 : len; | ||
774 | } | ||
775 | |||
776 | void __init smc_clc_init(void) | ||
777 | { | ||
778 | struct new_utsname *u; | ||
779 | |||
780 | memset(smc_hostname, _S, sizeof(smc_hostname)); /* ASCII blanks */ | ||
781 | u = utsname(); | ||
782 | memcpy(smc_hostname, u->nodename, | ||
783 | min_t(size_t, strlen(u->nodename), sizeof(smc_hostname))); | ||
784 | } | ||
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h new file mode 100644 index 000000000..c579d1d59 --- /dev/null +++ b/net/smc/smc_clc.h | |||
@@ -0,0 +1,333 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * CLC (connection layer control) handshake over initial TCP socket to | ||
6 | * prepare for RDMA traffic | ||
7 | * | ||
8 | * Copyright IBM Corp. 2016 | ||
9 | * | ||
10 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> | ||
11 | */ | ||
12 | |||
13 | #ifndef _SMC_CLC_H | ||
14 | #define _SMC_CLC_H | ||
15 | |||
16 | #include <rdma/ib_verbs.h> | ||
17 | |||
18 | #include "smc.h" | ||
19 | |||
20 | #define SMC_CLC_PROPOSAL 0x01 | ||
21 | #define SMC_CLC_ACCEPT 0x02 | ||
22 | #define SMC_CLC_CONFIRM 0x03 | ||
23 | #define SMC_CLC_DECLINE 0x04 | ||
24 | |||
25 | #define SMC_TYPE_R 0 /* SMC-R only */ | ||
26 | #define SMC_TYPE_D 1 /* SMC-D only */ | ||
27 | #define SMC_TYPE_N 2 /* neither SMC-R nor SMC-D */ | ||
28 | #define SMC_TYPE_B 3 /* SMC-R and SMC-D */ | ||
29 | #define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */ | ||
30 | #define CLC_WAIT_TIME_SHORT HZ /* short wait time on clcsock */ | ||
31 | #define SMC_CLC_DECL_MEM 0x01010000 /* insufficient memory resources */ | ||
32 | #define SMC_CLC_DECL_TIMEOUT_CL 0x02010000 /* timeout w4 QP confirm link */ | ||
33 | #define SMC_CLC_DECL_TIMEOUT_AL 0x02020000 /* timeout w4 QP add link */ | ||
34 | #define SMC_CLC_DECL_CNFERR 0x03000000 /* configuration error */ | ||
35 | #define SMC_CLC_DECL_PEERNOSMC 0x03010000 /* peer did not indicate SMC */ | ||
36 | #define SMC_CLC_DECL_IPSEC 0x03020000 /* IPsec usage */ | ||
37 | #define SMC_CLC_DECL_NOSMCDEV 0x03030000 /* no SMC device found (R or D) */ | ||
38 | #define SMC_CLC_DECL_NOSMCDDEV 0x03030001 /* no SMC-D device found */ | ||
39 | #define SMC_CLC_DECL_NOSMCRDEV 0x03030002 /* no SMC-R device found */ | ||
40 | #define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/ | ||
41 | #define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */ | ||
42 | #define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */ | ||
43 | #define SMC_CLC_DECL_DIFFPREFIX 0x03070000 /* IP prefix / subnet mismatch */ | ||
44 | #define SMC_CLC_DECL_GETVLANERR 0x03080000 /* err to get vlan id of ip device*/ | ||
45 | #define SMC_CLC_DECL_ISMVLANERR 0x03090000 /* err to reg vlan id on ism dev */ | ||
46 | #define SMC_CLC_DECL_NOACTLINK 0x030a0000 /* no active smc-r link in lgr */ | ||
47 | #define SMC_CLC_DECL_NOSRVLINK 0x030b0000 /* SMC-R link from srv not found */ | ||
48 | #define SMC_CLC_DECL_VERSMISMAT 0x030c0000 /* SMC version mismatch */ | ||
49 | #define SMC_CLC_DECL_MAX_DMB 0x030d0000 /* SMC-D DMB limit exceeded */ | ||
50 | #define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */ | ||
51 | #define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */ | ||
52 | #define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */ | ||
53 | #define SMC_CLC_DECL_ERR_RTOK 0x09990001 /* rtoken handling failed */ | ||
54 | #define SMC_CLC_DECL_ERR_RDYLNK 0x09990002 /* ib ready link failed */ | ||
55 | #define SMC_CLC_DECL_ERR_REGRMB 0x09990003 /* reg rmb failed */ | ||
56 | |||
57 | #define SMC_FIRST_CONTACT_MASK 0b10 /* first contact bit within typev2 */ | ||
58 | |||
59 | struct smc_clc_msg_hdr { /* header1 of clc messages */ | ||
60 | u8 eyecatcher[4]; /* eye catcher */ | ||
61 | u8 type; /* proposal / accept / confirm / decline */ | ||
62 | __be16 length; | ||
63 | #if defined(__BIG_ENDIAN_BITFIELD) | ||
64 | u8 version : 4, | ||
65 | typev2 : 2, | ||
66 | typev1 : 2; | ||
67 | #elif defined(__LITTLE_ENDIAN_BITFIELD) | ||
68 | u8 typev1 : 2, | ||
69 | typev2 : 2, | ||
70 | version : 4; | ||
71 | #endif | ||
72 | } __packed; /* format defined in RFC7609 */ | ||
73 | |||
74 | struct smc_clc_msg_trail { /* trailer of clc messages */ | ||
75 | u8 eyecatcher[4]; | ||
76 | }; | ||
77 | |||
78 | struct smc_clc_msg_local { /* header2 of clc messages */ | ||
79 | u8 id_for_peer[SMC_SYSTEMID_LEN]; /* unique system id */ | ||
80 | u8 gid[16]; /* gid of ib_device port */ | ||
81 | u8 mac[6]; /* mac of ib_device port */ | ||
82 | }; | ||
83 | |||
84 | /* Struct would be 4 byte aligned, but it is used in an array that is sent | ||
85 | * to peers and must conform to RFC7609, hence we need to use packed here. | ||
86 | */ | ||
87 | struct smc_clc_ipv6_prefix { | ||
88 | struct in6_addr prefix; | ||
89 | u8 prefix_len; | ||
90 | } __packed; /* format defined in RFC7609 */ | ||
91 | |||
92 | #if defined(__BIG_ENDIAN_BITFIELD) | ||
93 | struct smc_clc_v2_flag { | ||
94 | u8 release : 4, | ||
95 | rsvd : 3, | ||
96 | seid : 1; | ||
97 | }; | ||
98 | #elif defined(__LITTLE_ENDIAN_BITFIELD) | ||
99 | struct smc_clc_v2_flag { | ||
100 | u8 seid : 1, | ||
101 | rsvd : 3, | ||
102 | release : 4; | ||
103 | }; | ||
104 | #endif | ||
105 | |||
106 | struct smc_clnt_opts_area_hdr { | ||
107 | u8 eid_cnt; /* number of user defined EIDs */ | ||
108 | u8 ism_gid_cnt; /* number of ISMv2 GIDs */ | ||
109 | u8 reserved1; | ||
110 | struct smc_clc_v2_flag flag; | ||
111 | u8 reserved2[2]; | ||
112 | __be16 smcd_v2_ext_offset; /* SMC-Dv2 Extension Offset */ | ||
113 | }; | ||
114 | |||
115 | struct smc_clc_smcd_gid_chid { | ||
116 | __be64 gid; /* ISM GID */ | ||
117 | __be16 chid; /* ISMv2 CHID */ | ||
118 | } __packed; /* format defined in | ||
119 | * IBM Shared Memory Communications Version 2 | ||
120 | * (https://www.ibm.com/support/pages/node/6326337) | ||
121 | */ | ||
122 | |||
123 | struct smc_clc_v2_extension { | ||
124 | struct smc_clnt_opts_area_hdr hdr; | ||
125 | u8 roce[16]; /* RoCEv2 GID */ | ||
126 | u8 reserved[16]; | ||
127 | u8 user_eids[][SMC_MAX_EID_LEN]; | ||
128 | }; | ||
129 | |||
130 | struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/ | ||
131 | __be32 outgoing_subnet; /* subnet mask */ | ||
132 | u8 prefix_len; /* number of significant bits in mask */ | ||
133 | u8 reserved[2]; | ||
134 | u8 ipv6_prefixes_cnt; /* number of IPv6 prefixes in prefix array */ | ||
135 | } __aligned(4); | ||
136 | |||
137 | struct smc_clc_msg_smcd { /* SMC-D GID information */ | ||
138 | struct smc_clc_smcd_gid_chid ism; /* ISM native GID+CHID of requestor */ | ||
139 | __be16 v2_ext_offset; /* SMC Version 2 Extension Offset */ | ||
140 | u8 reserved[28]; | ||
141 | }; | ||
142 | |||
143 | struct smc_clc_smcd_v2_extension { | ||
144 | u8 system_eid[SMC_MAX_EID_LEN]; | ||
145 | u8 reserved[16]; | ||
146 | struct smc_clc_smcd_gid_chid gidchid[]; | ||
147 | }; | ||
148 | |||
149 | struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ | ||
150 | struct smc_clc_msg_hdr hdr; | ||
151 | struct smc_clc_msg_local lcl; | ||
152 | __be16 iparea_offset; /* offset to IP address information area */ | ||
153 | } __aligned(4); | ||
154 | |||
155 | #define SMC_CLC_MAX_V6_PREFIX 8 | ||
156 | |||
157 | struct smc_clc_msg_proposal_area { | ||
158 | struct smc_clc_msg_proposal pclc_base; | ||
159 | struct smc_clc_msg_smcd pclc_smcd; | ||
160 | struct smc_clc_msg_proposal_prefix pclc_prfx; | ||
161 | struct smc_clc_ipv6_prefix pclc_prfx_ipv6[SMC_CLC_MAX_V6_PREFIX]; | ||
162 | struct smc_clc_v2_extension pclc_v2_ext; | ||
163 | struct smc_clc_smcd_v2_extension pclc_smcd_v2_ext; | ||
164 | struct smc_clc_smcd_gid_chid pclc_gidchids[SMC_MAX_ISM_DEVS]; | ||
165 | struct smc_clc_msg_trail pclc_trl; | ||
166 | }; | ||
167 | |||
168 | struct smcr_clc_msg_accept_confirm { /* SMCR accept/confirm */ | ||
169 | struct smc_clc_msg_local lcl; | ||
170 | u8 qpn[3]; /* QP number */ | ||
171 | __be32 rmb_rkey; /* RMB rkey */ | ||
172 | u8 rmbe_idx; /* Index of RMBE in RMB */ | ||
173 | __be32 rmbe_alert_token; /* unique connection id */ | ||
174 | #if defined(__BIG_ENDIAN_BITFIELD) | ||
175 | u8 rmbe_size : 4, /* buf size (compressed) */ | ||
176 | qp_mtu : 4; /* QP mtu */ | ||
177 | #elif defined(__LITTLE_ENDIAN_BITFIELD) | ||
178 | u8 qp_mtu : 4, | ||
179 | rmbe_size : 4; | ||
180 | #endif | ||
181 | u8 reserved; | ||
182 | __be64 rmb_dma_addr; /* RMB virtual address */ | ||
183 | u8 reserved2; | ||
184 | u8 psn[3]; /* packet sequence number */ | ||
185 | } __packed; | ||
186 | |||
187 | struct smcd_clc_msg_accept_confirm_common { /* SMCD accept/confirm */ | ||
188 | u64 gid; /* Sender GID */ | ||
189 | u64 token; /* DMB token */ | ||
190 | u8 dmbe_idx; /* DMBE index */ | ||
191 | #if defined(__BIG_ENDIAN_BITFIELD) | ||
192 | u8 dmbe_size : 4, /* buf size (compressed) */ | ||
193 | reserved3 : 4; | ||
194 | #elif defined(__LITTLE_ENDIAN_BITFIELD) | ||
195 | u8 reserved3 : 4, | ||
196 | dmbe_size : 4; | ||
197 | #endif | ||
198 | u16 reserved4; | ||
199 | __be32 linkid; /* Link identifier */ | ||
200 | } __packed; | ||
201 | |||
202 | #define SMC_CLC_OS_ZOS 1 | ||
203 | #define SMC_CLC_OS_LINUX 2 | ||
204 | #define SMC_CLC_OS_AIX 3 | ||
205 | |||
206 | struct smc_clc_first_contact_ext { | ||
207 | u8 reserved1; | ||
208 | #if defined(__BIG_ENDIAN_BITFIELD) | ||
209 | u8 os_type : 4, | ||
210 | release : 4; | ||
211 | #elif defined(__LITTLE_ENDIAN_BITFIELD) | ||
212 | u8 release : 4, | ||
213 | os_type : 4; | ||
214 | #endif | ||
215 | u8 reserved2[2]; | ||
216 | u8 hostname[SMC_MAX_HOSTNAME_LEN]; | ||
217 | }; | ||
218 | |||
219 | struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ | ||
220 | struct smc_clc_msg_hdr hdr; | ||
221 | union { | ||
222 | struct smcr_clc_msg_accept_confirm r0; /* SMC-R */ | ||
223 | struct { /* SMC-D */ | ||
224 | struct smcd_clc_msg_accept_confirm_common d0; | ||
225 | u32 reserved5[3]; | ||
226 | }; | ||
227 | }; | ||
228 | } __packed; /* format defined in RFC7609 */ | ||
229 | |||
230 | struct smc_clc_msg_accept_confirm_v2 { /* clc accept / confirm message */ | ||
231 | struct smc_clc_msg_hdr hdr; | ||
232 | union { | ||
233 | struct smcr_clc_msg_accept_confirm r0; /* SMC-R */ | ||
234 | struct { /* SMC-D */ | ||
235 | struct smcd_clc_msg_accept_confirm_common d0; | ||
236 | __be16 chid; | ||
237 | u8 eid[SMC_MAX_EID_LEN]; | ||
238 | u8 reserved5[8]; | ||
239 | }; | ||
240 | }; | ||
241 | }; | ||
242 | |||
243 | struct smc_clc_msg_decline { /* clc decline message */ | ||
244 | struct smc_clc_msg_hdr hdr; | ||
245 | u8 id_for_peer[SMC_SYSTEMID_LEN]; /* sender peer_id */ | ||
246 | __be32 peer_diagnosis; /* diagnosis information */ | ||
247 | #if defined(__BIG_ENDIAN_BITFIELD) | ||
248 | u8 os_type : 4, | ||
249 | reserved : 4; | ||
250 | #elif defined(__LITTLE_ENDIAN_BITFIELD) | ||
251 | u8 reserved : 4, | ||
252 | os_type : 4; | ||
253 | #endif | ||
254 | u8 reserved2[3]; | ||
255 | struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */ | ||
256 | } __aligned(4); | ||
257 | |||
258 | /* determine start of the prefix area within the proposal message */ | ||
259 | static inline struct smc_clc_msg_proposal_prefix * | ||
260 | smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc) | ||
261 | { | ||
262 | return (struct smc_clc_msg_proposal_prefix *) | ||
263 | ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset)); | ||
264 | } | ||
265 | |||
266 | static inline bool smcr_indicated(int smc_type) | ||
267 | { | ||
268 | return smc_type == SMC_TYPE_R || smc_type == SMC_TYPE_B; | ||
269 | } | ||
270 | |||
271 | static inline bool smcd_indicated(int smc_type) | ||
272 | { | ||
273 | return smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B; | ||
274 | } | ||
275 | |||
276 | /* get SMC-D info from proposal message */ | ||
277 | static inline struct smc_clc_msg_smcd * | ||
278 | smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop) | ||
279 | { | ||
280 | if (smcd_indicated(prop->hdr.typev1) && | ||
281 | ntohs(prop->iparea_offset) != sizeof(struct smc_clc_msg_smcd)) | ||
282 | return NULL; | ||
283 | |||
284 | return (struct smc_clc_msg_smcd *)(prop + 1); | ||
285 | } | ||
286 | |||
287 | static inline struct smc_clc_v2_extension * | ||
288 | smc_get_clc_v2_ext(struct smc_clc_msg_proposal *prop) | ||
289 | { | ||
290 | struct smc_clc_msg_smcd *prop_smcd = smc_get_clc_msg_smcd(prop); | ||
291 | |||
292 | if (!prop_smcd || !ntohs(prop_smcd->v2_ext_offset)) | ||
293 | return NULL; | ||
294 | |||
295 | return (struct smc_clc_v2_extension *) | ||
296 | ((u8 *)prop_smcd + | ||
297 | offsetof(struct smc_clc_msg_smcd, v2_ext_offset) + | ||
298 | sizeof(prop_smcd->v2_ext_offset) + | ||
299 | ntohs(prop_smcd->v2_ext_offset)); | ||
300 | } | ||
301 | |||
302 | static inline struct smc_clc_smcd_v2_extension * | ||
303 | smc_get_clc_smcd_v2_ext(struct smc_clc_v2_extension *prop_v2ext) | ||
304 | { | ||
305 | if (!prop_v2ext) | ||
306 | return NULL; | ||
307 | if (!ntohs(prop_v2ext->hdr.smcd_v2_ext_offset)) | ||
308 | return NULL; | ||
309 | |||
310 | return (struct smc_clc_smcd_v2_extension *) | ||
311 | ((u8 *)prop_v2ext + | ||
312 | offsetof(struct smc_clc_v2_extension, hdr) + | ||
313 | offsetof(struct smc_clnt_opts_area_hdr, smcd_v2_ext_offset) + | ||
314 | sizeof(prop_v2ext->hdr.smcd_v2_ext_offset) + | ||
315 | ntohs(prop_v2ext->hdr.smcd_v2_ext_offset)); | ||
316 | } | ||
317 | |||
318 | struct smcd_dev; | ||
319 | struct smc_init_info; | ||
320 | |||
321 | int smc_clc_prfx_match(struct socket *clcsock, | ||
322 | struct smc_clc_msg_proposal_prefix *prop); | ||
323 | int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, | ||
324 | u8 expected_type, unsigned long timeout); | ||
325 | int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version); | ||
326 | int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini); | ||
327 | int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, | ||
328 | u8 version); | ||
329 | int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact, | ||
330 | u8 version); | ||
331 | void smc_clc_init(void) __init; | ||
332 | |||
333 | #endif | ||
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c new file mode 100644 index 000000000..84102db5b --- /dev/null +++ b/net/smc/smc_close.c | |||
@@ -0,0 +1,499 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * Socket Closing - normal and abnormal | ||
6 | * | ||
7 | * Copyright IBM Corp. 2016 | ||
8 | * | ||
9 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> | ||
10 | */ | ||
11 | |||
12 | #include <linux/workqueue.h> | ||
13 | #include <linux/sched/signal.h> | ||
14 | |||
15 | #include <net/sock.h> | ||
16 | #include <net/tcp.h> | ||
17 | |||
18 | #include "smc.h" | ||
19 | #include "smc_tx.h" | ||
20 | #include "smc_cdc.h" | ||
21 | #include "smc_close.h" | ||
22 | |||
23 | /* release the clcsock that is assigned to the smc_sock */ | ||
24 | void smc_clcsock_release(struct smc_sock *smc) | ||
25 | { | ||
26 | struct socket *tcp; | ||
27 | |||
28 | if (smc->listen_smc && current_work() != &smc->smc_listen_work) | ||
29 | cancel_work_sync(&smc->smc_listen_work); | ||
30 | mutex_lock(&smc->clcsock_release_lock); | ||
31 | if (smc->clcsock) { | ||
32 | tcp = smc->clcsock; | ||
33 | smc->clcsock = NULL; | ||
34 | sock_release(tcp); | ||
35 | } | ||
36 | mutex_unlock(&smc->clcsock_release_lock); | ||
37 | } | ||
38 | |||
39 | static void smc_close_cleanup_listen(struct sock *parent) | ||
40 | { | ||
41 | struct sock *sk; | ||
42 | |||
43 | /* Close non-accepted connections */ | ||
44 | while ((sk = smc_accept_dequeue(parent, NULL))) | ||
45 | smc_close_non_accepted(sk); | ||
46 | } | ||
47 | |||
48 | /* wait for sndbuf data being transmitted */ | ||
49 | static void smc_close_stream_wait(struct smc_sock *smc, long timeout) | ||
50 | { | ||
51 | DEFINE_WAIT_FUNC(wait, woken_wake_function); | ||
52 | struct sock *sk = &smc->sk; | ||
53 | |||
54 | if (!timeout) | ||
55 | return; | ||
56 | |||
57 | if (!smc_tx_prepared_sends(&smc->conn)) | ||
58 | return; | ||
59 | |||
60 | smc->wait_close_tx_prepared = 1; | ||
61 | add_wait_queue(sk_sleep(sk), &wait); | ||
62 | while (!signal_pending(current) && timeout) { | ||
63 | int rc; | ||
64 | |||
65 | rc = sk_wait_event(sk, &timeout, | ||
66 | !smc_tx_prepared_sends(&smc->conn) || | ||
67 | sk->sk_err == ECONNABORTED || | ||
68 | sk->sk_err == ECONNRESET || | ||
69 | smc->conn.killed, | ||
70 | &wait); | ||
71 | if (rc) | ||
72 | break; | ||
73 | } | ||
74 | remove_wait_queue(sk_sleep(sk), &wait); | ||
75 | smc->wait_close_tx_prepared = 0; | ||
76 | } | ||
77 | |||
78 | void smc_close_wake_tx_prepared(struct smc_sock *smc) | ||
79 | { | ||
80 | if (smc->wait_close_tx_prepared) | ||
81 | /* wake up socket closing */ | ||
82 | smc->sk.sk_state_change(&smc->sk); | ||
83 | } | ||
84 | |||
85 | static int smc_close_wr(struct smc_connection *conn) | ||
86 | { | ||
87 | conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; | ||
88 | |||
89 | return smc_cdc_get_slot_and_msg_send(conn); | ||
90 | } | ||
91 | |||
92 | static int smc_close_final(struct smc_connection *conn) | ||
93 | { | ||
94 | if (atomic_read(&conn->bytes_to_rcv)) | ||
95 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; | ||
96 | else | ||
97 | conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; | ||
98 | if (conn->killed) | ||
99 | return -EPIPE; | ||
100 | |||
101 | return smc_cdc_get_slot_and_msg_send(conn); | ||
102 | } | ||
103 | |||
104 | int smc_close_abort(struct smc_connection *conn) | ||
105 | { | ||
106 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; | ||
107 | |||
108 | return smc_cdc_get_slot_and_msg_send(conn); | ||
109 | } | ||
110 | |||
111 | static void smc_close_cancel_work(struct smc_sock *smc) | ||
112 | { | ||
113 | struct sock *sk = &smc->sk; | ||
114 | |||
115 | release_sock(sk); | ||
116 | cancel_work_sync(&smc->conn.close_work); | ||
117 | cancel_delayed_work_sync(&smc->conn.tx_work); | ||
118 | lock_sock(sk); | ||
119 | } | ||
120 | |||
121 | /* terminate smc socket abnormally - active abort | ||
122 | * link group is terminated, i.e. RDMA communication no longer possible | ||
123 | */ | ||
124 | void smc_close_active_abort(struct smc_sock *smc) | ||
125 | { | ||
126 | struct sock *sk = &smc->sk; | ||
127 | bool release_clcsock = false; | ||
128 | |||
129 | if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) { | ||
130 | sk->sk_err = ECONNABORTED; | ||
131 | if (smc->clcsock && smc->clcsock->sk) | ||
132 | tcp_abort(smc->clcsock->sk, ECONNABORTED); | ||
133 | } | ||
134 | switch (sk->sk_state) { | ||
135 | case SMC_ACTIVE: | ||
136 | case SMC_APPCLOSEWAIT1: | ||
137 | case SMC_APPCLOSEWAIT2: | ||
138 | sk->sk_state = SMC_PEERABORTWAIT; | ||
139 | smc_close_cancel_work(smc); | ||
140 | if (sk->sk_state != SMC_PEERABORTWAIT) | ||
141 | break; | ||
142 | sk->sk_state = SMC_CLOSED; | ||
143 | sock_put(sk); /* (postponed) passive closing */ | ||
144 | break; | ||
145 | case SMC_PEERCLOSEWAIT1: | ||
146 | case SMC_PEERCLOSEWAIT2: | ||
147 | case SMC_PEERFINCLOSEWAIT: | ||
148 | sk->sk_state = SMC_PEERABORTWAIT; | ||
149 | smc_close_cancel_work(smc); | ||
150 | if (sk->sk_state != SMC_PEERABORTWAIT) | ||
151 | break; | ||
152 | sk->sk_state = SMC_CLOSED; | ||
153 | smc_conn_free(&smc->conn); | ||
154 | release_clcsock = true; | ||
155 | sock_put(sk); /* passive closing */ | ||
156 | break; | ||
157 | case SMC_PROCESSABORT: | ||
158 | case SMC_APPFINCLOSEWAIT: | ||
159 | sk->sk_state = SMC_PEERABORTWAIT; | ||
160 | smc_close_cancel_work(smc); | ||
161 | if (sk->sk_state != SMC_PEERABORTWAIT) | ||
162 | break; | ||
163 | sk->sk_state = SMC_CLOSED; | ||
164 | smc_conn_free(&smc->conn); | ||
165 | release_clcsock = true; | ||
166 | break; | ||
167 | case SMC_INIT: | ||
168 | case SMC_PEERABORTWAIT: | ||
169 | case SMC_CLOSED: | ||
170 | break; | ||
171 | } | ||
172 | |||
173 | sock_set_flag(sk, SOCK_DEAD); | ||
174 | sk->sk_state_change(sk); | ||
175 | |||
176 | if (release_clcsock) { | ||
177 | release_sock(sk); | ||
178 | smc_clcsock_release(smc); | ||
179 | lock_sock(sk); | ||
180 | } | ||
181 | } | ||
182 | |||
183 | static inline bool smc_close_sent_any_close(struct smc_connection *conn) | ||
184 | { | ||
185 | return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort || | ||
186 | conn->local_tx_ctrl.conn_state_flags.peer_conn_closed; | ||
187 | } | ||
188 | |||
189 | int smc_close_active(struct smc_sock *smc) | ||
190 | { | ||
191 | struct smc_cdc_conn_state_flags *txflags = | ||
192 | &smc->conn.local_tx_ctrl.conn_state_flags; | ||
193 | struct smc_connection *conn = &smc->conn; | ||
194 | struct sock *sk = &smc->sk; | ||
195 | int old_state; | ||
196 | long timeout; | ||
197 | int rc = 0; | ||
198 | int rc1 = 0; | ||
199 | |||
200 | timeout = current->flags & PF_EXITING ? | ||
201 | 0 : sock_flag(sk, SOCK_LINGER) ? | ||
202 | sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; | ||
203 | |||
204 | old_state = sk->sk_state; | ||
205 | again: | ||
206 | switch (sk->sk_state) { | ||
207 | case SMC_INIT: | ||
208 | sk->sk_state = SMC_CLOSED; | ||
209 | break; | ||
210 | case SMC_LISTEN: | ||
211 | sk->sk_state = SMC_CLOSED; | ||
212 | sk->sk_state_change(sk); /* wake up accept */ | ||
213 | if (smc->clcsock && smc->clcsock->sk) { | ||
214 | smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready; | ||
215 | smc->clcsock->sk->sk_user_data = NULL; | ||
216 | rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); | ||
217 | } | ||
218 | smc_close_cleanup_listen(sk); | ||
219 | release_sock(sk); | ||
220 | flush_work(&smc->tcp_listen_work); | ||
221 | lock_sock(sk); | ||
222 | break; | ||
223 | case SMC_ACTIVE: | ||
224 | smc_close_stream_wait(smc, timeout); | ||
225 | release_sock(sk); | ||
226 | cancel_delayed_work_sync(&conn->tx_work); | ||
227 | lock_sock(sk); | ||
228 | if (sk->sk_state == SMC_ACTIVE) { | ||
229 | /* send close request */ | ||
230 | rc = smc_close_final(conn); | ||
231 | sk->sk_state = SMC_PEERCLOSEWAIT1; | ||
232 | |||
233 | /* actively shutdown clcsock before peer close it, | ||
234 | * prevent peer from entering TIME_WAIT state. | ||
235 | */ | ||
236 | if (smc->clcsock && smc->clcsock->sk) { | ||
237 | rc1 = kernel_sock_shutdown(smc->clcsock, | ||
238 | SHUT_RDWR); | ||
239 | rc = rc ? rc : rc1; | ||
240 | } | ||
241 | } else { | ||
242 | /* peer event has changed the state */ | ||
243 | goto again; | ||
244 | } | ||
245 | break; | ||
246 | case SMC_APPFINCLOSEWAIT: | ||
247 | /* socket already shutdown wr or both (active close) */ | ||
248 | if (txflags->peer_done_writing && | ||
249 | !smc_close_sent_any_close(conn)) { | ||
250 | /* just shutdown wr done, send close request */ | ||
251 | rc = smc_close_final(conn); | ||
252 | } | ||
253 | sk->sk_state = SMC_CLOSED; | ||
254 | break; | ||
255 | case SMC_APPCLOSEWAIT1: | ||
256 | case SMC_APPCLOSEWAIT2: | ||
257 | if (!smc_cdc_rxed_any_close(conn)) | ||
258 | smc_close_stream_wait(smc, timeout); | ||
259 | release_sock(sk); | ||
260 | cancel_delayed_work_sync(&conn->tx_work); | ||
261 | lock_sock(sk); | ||
262 | if (sk->sk_state != SMC_APPCLOSEWAIT1 && | ||
263 | sk->sk_state != SMC_APPCLOSEWAIT2) | ||
264 | goto again; | ||
265 | /* confirm close from peer */ | ||
266 | rc = smc_close_final(conn); | ||
267 | if (smc_cdc_rxed_any_close(conn)) { | ||
268 | /* peer has closed the socket already */ | ||
269 | sk->sk_state = SMC_CLOSED; | ||
270 | sock_put(sk); /* postponed passive closing */ | ||
271 | } else { | ||
272 | /* peer has just issued a shutdown write */ | ||
273 | sk->sk_state = SMC_PEERFINCLOSEWAIT; | ||
274 | } | ||
275 | break; | ||
276 | case SMC_PEERCLOSEWAIT1: | ||
277 | case SMC_PEERCLOSEWAIT2: | ||
278 | if (txflags->peer_done_writing && | ||
279 | !smc_close_sent_any_close(conn)) { | ||
280 | /* just shutdown wr done, send close request */ | ||
281 | rc = smc_close_final(conn); | ||
282 | } | ||
283 | /* peer sending PeerConnectionClosed will cause transition */ | ||
284 | break; | ||
285 | case SMC_PEERFINCLOSEWAIT: | ||
286 | /* peer sending PeerConnectionClosed will cause transition */ | ||
287 | break; | ||
288 | case SMC_PROCESSABORT: | ||
289 | rc = smc_close_abort(conn); | ||
290 | sk->sk_state = SMC_CLOSED; | ||
291 | break; | ||
292 | case SMC_PEERABORTWAIT: | ||
293 | sk->sk_state = SMC_CLOSED; | ||
294 | break; | ||
295 | case SMC_CLOSED: | ||
296 | /* nothing to do, add tracing in future patch */ | ||
297 | break; | ||
298 | } | ||
299 | |||
300 | if (old_state != sk->sk_state) | ||
301 | sk->sk_state_change(sk); | ||
302 | return rc; | ||
303 | } | ||
304 | |||
305 | static void smc_close_passive_abort_received(struct smc_sock *smc) | ||
306 | { | ||
307 | struct smc_cdc_conn_state_flags *txflags = | ||
308 | &smc->conn.local_tx_ctrl.conn_state_flags; | ||
309 | struct sock *sk = &smc->sk; | ||
310 | |||
311 | switch (sk->sk_state) { | ||
312 | case SMC_INIT: | ||
313 | case SMC_ACTIVE: | ||
314 | case SMC_APPCLOSEWAIT1: | ||
315 | sk->sk_state = SMC_PROCESSABORT; | ||
316 | sock_put(sk); /* passive closing */ | ||
317 | break; | ||
318 | case SMC_APPFINCLOSEWAIT: | ||
319 | sk->sk_state = SMC_PROCESSABORT; | ||
320 | break; | ||
321 | case SMC_PEERCLOSEWAIT1: | ||
322 | case SMC_PEERCLOSEWAIT2: | ||
323 | if (txflags->peer_done_writing && | ||
324 | !smc_close_sent_any_close(&smc->conn)) | ||
325 | /* just shutdown, but not yet closed locally */ | ||
326 | sk->sk_state = SMC_PROCESSABORT; | ||
327 | else | ||
328 | sk->sk_state = SMC_CLOSED; | ||
329 | sock_put(sk); /* passive closing */ | ||
330 | break; | ||
331 | case SMC_APPCLOSEWAIT2: | ||
332 | case SMC_PEERFINCLOSEWAIT: | ||
333 | sk->sk_state = SMC_CLOSED; | ||
334 | sock_put(sk); /* passive closing */ | ||
335 | break; | ||
336 | case SMC_PEERABORTWAIT: | ||
337 | sk->sk_state = SMC_CLOSED; | ||
338 | break; | ||
339 | case SMC_PROCESSABORT: | ||
340 | /* nothing to do, add tracing in future patch */ | ||
341 | break; | ||
342 | } | ||
343 | } | ||
344 | |||
345 | /* Either some kind of closing has been received: peer_conn_closed, | ||
346 | * peer_conn_abort, or peer_done_writing | ||
347 | * or the link group of the connection terminates abnormally. | ||
348 | */ | ||
349 | static void smc_close_passive_work(struct work_struct *work) | ||
350 | { | ||
351 | struct smc_connection *conn = container_of(work, | ||
352 | struct smc_connection, | ||
353 | close_work); | ||
354 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); | ||
355 | struct smc_cdc_conn_state_flags *rxflags; | ||
356 | bool release_clcsock = false; | ||
357 | struct sock *sk = &smc->sk; | ||
358 | int old_state; | ||
359 | |||
360 | lock_sock(sk); | ||
361 | old_state = sk->sk_state; | ||
362 | |||
363 | rxflags = &conn->local_rx_ctrl.conn_state_flags; | ||
364 | if (rxflags->peer_conn_abort) { | ||
365 | /* peer has not received all data */ | ||
366 | smc_close_passive_abort_received(smc); | ||
367 | release_sock(&smc->sk); | ||
368 | cancel_delayed_work_sync(&conn->tx_work); | ||
369 | lock_sock(&smc->sk); | ||
370 | goto wakeup; | ||
371 | } | ||
372 | |||
373 | switch (sk->sk_state) { | ||
374 | case SMC_INIT: | ||
375 | sk->sk_state = SMC_APPCLOSEWAIT1; | ||
376 | break; | ||
377 | case SMC_ACTIVE: | ||
378 | sk->sk_state = SMC_APPCLOSEWAIT1; | ||
379 | /* postpone sock_put() for passive closing to cover | ||
380 | * received SEND_SHUTDOWN as well | ||
381 | */ | ||
382 | break; | ||
383 | case SMC_PEERCLOSEWAIT1: | ||
384 | if (rxflags->peer_done_writing) | ||
385 | sk->sk_state = SMC_PEERCLOSEWAIT2; | ||
386 | fallthrough; | ||
387 | /* to check for closing */ | ||
388 | case SMC_PEERCLOSEWAIT2: | ||
389 | if (!smc_cdc_rxed_any_close(conn)) | ||
390 | break; | ||
391 | if (sock_flag(sk, SOCK_DEAD) && | ||
392 | smc_close_sent_any_close(conn)) { | ||
393 | /* smc_release has already been called locally */ | ||
394 | sk->sk_state = SMC_CLOSED; | ||
395 | } else { | ||
396 | /* just shutdown, but not yet closed locally */ | ||
397 | sk->sk_state = SMC_APPFINCLOSEWAIT; | ||
398 | } | ||
399 | sock_put(sk); /* passive closing */ | ||
400 | break; | ||
401 | case SMC_PEERFINCLOSEWAIT: | ||
402 | if (smc_cdc_rxed_any_close(conn)) { | ||
403 | sk->sk_state = SMC_CLOSED; | ||
404 | sock_put(sk); /* passive closing */ | ||
405 | } | ||
406 | break; | ||
407 | case SMC_APPCLOSEWAIT1: | ||
408 | case SMC_APPCLOSEWAIT2: | ||
409 | /* postpone sock_put() for passive closing to cover | ||
410 | * received SEND_SHUTDOWN as well | ||
411 | */ | ||
412 | break; | ||
413 | case SMC_APPFINCLOSEWAIT: | ||
414 | case SMC_PEERABORTWAIT: | ||
415 | case SMC_PROCESSABORT: | ||
416 | case SMC_CLOSED: | ||
417 | /* nothing to do, add tracing in future patch */ | ||
418 | break; | ||
419 | } | ||
420 | |||
421 | wakeup: | ||
422 | sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ | ||
423 | sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ | ||
424 | |||
425 | if (old_state != sk->sk_state) { | ||
426 | sk->sk_state_change(sk); | ||
427 | if ((sk->sk_state == SMC_CLOSED) && | ||
428 | (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { | ||
429 | smc_conn_free(conn); | ||
430 | if (smc->clcsock) | ||
431 | release_clcsock = true; | ||
432 | } | ||
433 | } | ||
434 | release_sock(sk); | ||
435 | if (release_clcsock) | ||
436 | smc_clcsock_release(smc); | ||
437 | sock_put(sk); /* sock_hold done by schedulers of close_work */ | ||
438 | } | ||
439 | |||
440 | int smc_close_shutdown_write(struct smc_sock *smc) | ||
441 | { | ||
442 | struct smc_connection *conn = &smc->conn; | ||
443 | struct sock *sk = &smc->sk; | ||
444 | int old_state; | ||
445 | long timeout; | ||
446 | int rc = 0; | ||
447 | |||
448 | timeout = current->flags & PF_EXITING ? | ||
449 | 0 : sock_flag(sk, SOCK_LINGER) ? | ||
450 | sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; | ||
451 | |||
452 | old_state = sk->sk_state; | ||
453 | again: | ||
454 | switch (sk->sk_state) { | ||
455 | case SMC_ACTIVE: | ||
456 | smc_close_stream_wait(smc, timeout); | ||
457 | release_sock(sk); | ||
458 | cancel_delayed_work_sync(&conn->tx_work); | ||
459 | lock_sock(sk); | ||
460 | if (sk->sk_state != SMC_ACTIVE) | ||
461 | goto again; | ||
462 | /* send close wr request */ | ||
463 | rc = smc_close_wr(conn); | ||
464 | sk->sk_state = SMC_PEERCLOSEWAIT1; | ||
465 | break; | ||
466 | case SMC_APPCLOSEWAIT1: | ||
467 | /* passive close */ | ||
468 | if (!smc_cdc_rxed_any_close(conn)) | ||
469 | smc_close_stream_wait(smc, timeout); | ||
470 | release_sock(sk); | ||
471 | cancel_delayed_work_sync(&conn->tx_work); | ||
472 | lock_sock(sk); | ||
473 | if (sk->sk_state != SMC_APPCLOSEWAIT1) | ||
474 | goto again; | ||
475 | /* confirm close from peer */ | ||
476 | rc = smc_close_wr(conn); | ||
477 | sk->sk_state = SMC_APPCLOSEWAIT2; | ||
478 | break; | ||
479 | case SMC_APPCLOSEWAIT2: | ||
480 | case SMC_PEERFINCLOSEWAIT: | ||
481 | case SMC_PEERCLOSEWAIT1: | ||
482 | case SMC_PEERCLOSEWAIT2: | ||
483 | case SMC_APPFINCLOSEWAIT: | ||
484 | case SMC_PROCESSABORT: | ||
485 | case SMC_PEERABORTWAIT: | ||
486 | /* nothing to do, add tracing in future patch */ | ||
487 | break; | ||
488 | } | ||
489 | |||
490 | if (old_state != sk->sk_state) | ||
491 | sk->sk_state_change(sk); | ||
492 | return rc; | ||
493 | } | ||
494 | |||
495 | /* Initialize close properties on connection establishment. */ | ||
496 | void smc_close_init(struct smc_sock *smc) | ||
497 | { | ||
498 | INIT_WORK(&smc->conn.close_work, smc_close_passive_work); | ||
499 | } | ||
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h new file mode 100644 index 000000000..634fea2b7 --- /dev/null +++ b/net/smc/smc_close.h | |||
@@ -0,0 +1,30 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * Socket Closing | ||
6 | * | ||
7 | * Copyright IBM Corp. 2016 | ||
8 | * | ||
9 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> | ||
10 | */ | ||
11 | |||
12 | #ifndef SMC_CLOSE_H | ||
13 | #define SMC_CLOSE_H | ||
14 | |||
15 | #include <linux/workqueue.h> | ||
16 | |||
17 | #include "smc.h" | ||
18 | |||
19 | #define SMC_MAX_STREAM_WAIT_TIMEOUT (2 * HZ) | ||
20 | #define SMC_CLOSE_SOCK_PUT_DELAY HZ | ||
21 | |||
22 | void smc_close_wake_tx_prepared(struct smc_sock *smc); | ||
23 | int smc_close_active(struct smc_sock *smc); | ||
24 | int smc_close_shutdown_write(struct smc_sock *smc); | ||
25 | void smc_close_init(struct smc_sock *smc); | ||
26 | void smc_clcsock_release(struct smc_sock *smc); | ||
27 | int smc_close_abort(struct smc_connection *conn); | ||
28 | void smc_close_active_abort(struct smc_sock *smc); | ||
29 | |||
30 | #endif /* SMC_CLOSE_H */ | ||
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c new file mode 100644 index 000000000..bf485a201 --- /dev/null +++ b/net/smc/smc_core.c | |||
@@ -0,0 +1,1973 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * Basic Transport Functions exploiting Infiniband API | ||
6 | * | ||
7 | * Copyright IBM Corp. 2016 | ||
8 | * | ||
9 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> | ||
10 | */ | ||
11 | |||
12 | #include <linux/socket.h> | ||
13 | #include <linux/if_vlan.h> | ||
14 | #include <linux/random.h> | ||
15 | #include <linux/workqueue.h> | ||
16 | #include <linux/wait.h> | ||
17 | #include <linux/reboot.h> | ||
18 | #include <linux/mutex.h> | ||
19 | #include <net/tcp.h> | ||
20 | #include <net/sock.h> | ||
21 | #include <rdma/ib_verbs.h> | ||
22 | #include <rdma/ib_cache.h> | ||
23 | |||
24 | #include "smc.h" | ||
25 | #include "smc_clc.h" | ||
26 | #include "smc_core.h" | ||
27 | #include "smc_ib.h" | ||
28 | #include "smc_wr.h" | ||
29 | #include "smc_llc.h" | ||
30 | #include "smc_cdc.h" | ||
31 | #include "smc_close.h" | ||
32 | #include "smc_ism.h" | ||
33 | |||
34 | #define SMC_LGR_NUM_INCR 256 | ||
35 | #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) | ||
36 | #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) | ||
37 | |||
38 | static struct smc_lgr_list smc_lgr_list = { /* established link groups */ | ||
39 | .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), | ||
40 | .list = LIST_HEAD_INIT(smc_lgr_list.list), | ||
41 | .num = 0, | ||
42 | }; | ||
43 | |||
44 | static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */ | ||
45 | static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted); | ||
46 | |||
47 | static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, | ||
48 | struct smc_buf_desc *buf_desc); | ||
49 | static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft); | ||
50 | |||
51 | static void smc_link_down_work(struct work_struct *work); | ||
52 | |||
53 | /* return head of link group list and its lock for a given link group */ | ||
54 | static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr, | ||
55 | spinlock_t **lgr_lock) | ||
56 | { | ||
57 | if (lgr->is_smcd) { | ||
58 | *lgr_lock = &lgr->smcd->lgr_lock; | ||
59 | return &lgr->smcd->lgr_list; | ||
60 | } | ||
61 | |||
62 | *lgr_lock = &smc_lgr_list.lock; | ||
63 | return &smc_lgr_list.list; | ||
64 | } | ||
65 | |||
66 | static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) | ||
67 | { | ||
68 | /* client link group creation always follows the server link group | ||
69 | * creation. For client use a somewhat higher removal delay time, | ||
70 | * otherwise there is a risk of out-of-sync link groups. | ||
71 | */ | ||
72 | if (!lgr->freeing) { | ||
73 | mod_delayed_work(system_wq, &lgr->free_work, | ||
74 | (!lgr->is_smcd && lgr->role == SMC_CLNT) ? | ||
75 | SMC_LGR_FREE_DELAY_CLNT : | ||
76 | SMC_LGR_FREE_DELAY_SERV); | ||
77 | } | ||
78 | } | ||
79 | |||
80 | /* Register connection's alert token in our lookup structure. | ||
81 | * To use rbtrees we have to implement our own insert core. | ||
82 | * Requires @conns_lock | ||
83 | * @smc connection to register | ||
84 | * Returns 0 on success, != otherwise. | ||
85 | */ | ||
86 | static void smc_lgr_add_alert_token(struct smc_connection *conn) | ||
87 | { | ||
88 | struct rb_node **link, *parent = NULL; | ||
89 | u32 token = conn->alert_token_local; | ||
90 | |||
91 | link = &conn->lgr->conns_all.rb_node; | ||
92 | while (*link) { | ||
93 | struct smc_connection *cur = rb_entry(*link, | ||
94 | struct smc_connection, alert_node); | ||
95 | |||
96 | parent = *link; | ||
97 | if (cur->alert_token_local > token) | ||
98 | link = &parent->rb_left; | ||
99 | else | ||
100 | link = &parent->rb_right; | ||
101 | } | ||
102 | /* Put the new node there */ | ||
103 | rb_link_node(&conn->alert_node, parent, link); | ||
104 | rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); | ||
105 | } | ||
106 | |||
107 | /* assign an SMC-R link to the connection */ | ||
108 | static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first) | ||
109 | { | ||
110 | enum smc_link_state expected = first ? SMC_LNK_ACTIVATING : | ||
111 | SMC_LNK_ACTIVE; | ||
112 | int i, j; | ||
113 | |||
114 | /* do link balancing */ | ||
115 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { | ||
116 | struct smc_link *lnk = &conn->lgr->lnk[i]; | ||
117 | |||
118 | if (lnk->state != expected || lnk->link_is_asym) | ||
119 | continue; | ||
120 | if (conn->lgr->role == SMC_CLNT) { | ||
121 | conn->lnk = lnk; /* temporary, SMC server assigns link*/ | ||
122 | break; | ||
123 | } | ||
124 | if (conn->lgr->conns_num % 2) { | ||
125 | for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) { | ||
126 | struct smc_link *lnk2; | ||
127 | |||
128 | lnk2 = &conn->lgr->lnk[j]; | ||
129 | if (lnk2->state == expected && | ||
130 | !lnk2->link_is_asym) { | ||
131 | conn->lnk = lnk2; | ||
132 | break; | ||
133 | } | ||
134 | } | ||
135 | } | ||
136 | if (!conn->lnk) | ||
137 | conn->lnk = lnk; | ||
138 | break; | ||
139 | } | ||
140 | if (!conn->lnk) | ||
141 | return SMC_CLC_DECL_NOACTLINK; | ||
142 | return 0; | ||
143 | } | ||
144 | |||
145 | /* Register connection in link group by assigning an alert token | ||
146 | * registered in a search tree. | ||
147 | * Requires @conns_lock | ||
148 | * Note that '0' is a reserved value and not assigned. | ||
149 | */ | ||
150 | static int smc_lgr_register_conn(struct smc_connection *conn, bool first) | ||
151 | { | ||
152 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); | ||
153 | static atomic_t nexttoken = ATOMIC_INIT(0); | ||
154 | int rc; | ||
155 | |||
156 | if (!conn->lgr->is_smcd) { | ||
157 | rc = smcr_lgr_conn_assign_link(conn, first); | ||
158 | if (rc) | ||
159 | return rc; | ||
160 | } | ||
161 | /* find a new alert_token_local value not yet used by some connection | ||
162 | * in this link group | ||
163 | */ | ||
164 | sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */ | ||
165 | while (!conn->alert_token_local) { | ||
166 | conn->alert_token_local = atomic_inc_return(&nexttoken); | ||
167 | if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr)) | ||
168 | conn->alert_token_local = 0; | ||
169 | } | ||
170 | smc_lgr_add_alert_token(conn); | ||
171 | conn->lgr->conns_num++; | ||
172 | return 0; | ||
173 | } | ||
174 | |||
175 | /* Unregister connection and reset the alert token of the given connection< | ||
176 | */ | ||
177 | static void __smc_lgr_unregister_conn(struct smc_connection *conn) | ||
178 | { | ||
179 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); | ||
180 | struct smc_link_group *lgr = conn->lgr; | ||
181 | |||
182 | rb_erase(&conn->alert_node, &lgr->conns_all); | ||
183 | lgr->conns_num--; | ||
184 | conn->alert_token_local = 0; | ||
185 | sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ | ||
186 | } | ||
187 | |||
188 | /* Unregister connection from lgr | ||
189 | */ | ||
190 | static void smc_lgr_unregister_conn(struct smc_connection *conn) | ||
191 | { | ||
192 | struct smc_link_group *lgr = conn->lgr; | ||
193 | |||
194 | if (!lgr) | ||
195 | return; | ||
196 | write_lock_bh(&lgr->conns_lock); | ||
197 | if (conn->alert_token_local) { | ||
198 | __smc_lgr_unregister_conn(conn); | ||
199 | } | ||
200 | write_unlock_bh(&lgr->conns_lock); | ||
201 | conn->lgr = NULL; | ||
202 | } | ||
203 | |||
204 | void smc_lgr_cleanup_early(struct smc_connection *conn) | ||
205 | { | ||
206 | struct smc_link_group *lgr = conn->lgr; | ||
207 | spinlock_t *lgr_lock; | ||
208 | |||
209 | if (!lgr) | ||
210 | return; | ||
211 | |||
212 | smc_conn_free(conn); | ||
213 | smc_lgr_list_head(lgr, &lgr_lock); | ||
214 | spin_lock_bh(lgr_lock); | ||
215 | /* do not use this link group for new connections */ | ||
216 | if (!list_empty(&lgr->list)) | ||
217 | list_del_init(&lgr->list); | ||
218 | spin_unlock_bh(lgr_lock); | ||
219 | __smc_lgr_terminate(lgr, true); | ||
220 | } | ||
221 | |||
222 | static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr) | ||
223 | { | ||
224 | int i; | ||
225 | |||
226 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { | ||
227 | struct smc_link *lnk = &lgr->lnk[i]; | ||
228 | |||
229 | if (smc_link_sendable(lnk)) | ||
230 | lnk->state = SMC_LNK_INACTIVE; | ||
231 | } | ||
232 | wake_up_all(&lgr->llc_msg_waiter); | ||
233 | wake_up_all(&lgr->llc_flow_waiter); | ||
234 | } | ||
235 | |||
236 | static void smc_lgr_free(struct smc_link_group *lgr); | ||
237 | |||
238 | static void smc_lgr_free_work(struct work_struct *work) | ||
239 | { | ||
240 | struct smc_link_group *lgr = container_of(to_delayed_work(work), | ||
241 | struct smc_link_group, | ||
242 | free_work); | ||
243 | spinlock_t *lgr_lock; | ||
244 | bool conns; | ||
245 | |||
246 | smc_lgr_list_head(lgr, &lgr_lock); | ||
247 | spin_lock_bh(lgr_lock); | ||
248 | if (lgr->freeing) { | ||
249 | spin_unlock_bh(lgr_lock); | ||
250 | return; | ||
251 | } | ||
252 | read_lock_bh(&lgr->conns_lock); | ||
253 | conns = RB_EMPTY_ROOT(&lgr->conns_all); | ||
254 | read_unlock_bh(&lgr->conns_lock); | ||
255 | if (!conns) { /* number of lgr connections is no longer zero */ | ||
256 | spin_unlock_bh(lgr_lock); | ||
257 | return; | ||
258 | } | ||
259 | list_del_init(&lgr->list); /* remove from smc_lgr_list */ | ||
260 | lgr->freeing = 1; /* this instance does the freeing, no new schedule */ | ||
261 | spin_unlock_bh(lgr_lock); | ||
262 | cancel_delayed_work(&lgr->free_work); | ||
263 | |||
264 | if (!lgr->is_smcd && !lgr->terminating) | ||
265 | smc_llc_send_link_delete_all(lgr, true, | ||
266 | SMC_LLC_DEL_PROG_INIT_TERM); | ||
267 | if (lgr->is_smcd && !lgr->terminating) | ||
268 | smc_ism_signal_shutdown(lgr); | ||
269 | if (!lgr->is_smcd) | ||
270 | smcr_lgr_link_deactivate_all(lgr); | ||
271 | smc_lgr_free(lgr); | ||
272 | } | ||
273 | |||
274 | static void smc_lgr_terminate_work(struct work_struct *work) | ||
275 | { | ||
276 | struct smc_link_group *lgr = container_of(work, struct smc_link_group, | ||
277 | terminate_work); | ||
278 | |||
279 | __smc_lgr_terminate(lgr, true); | ||
280 | } | ||
281 | |||
282 | /* return next unique link id for the lgr */ | ||
283 | static u8 smcr_next_link_id(struct smc_link_group *lgr) | ||
284 | { | ||
285 | u8 link_id; | ||
286 | int i; | ||
287 | |||
288 | while (1) { | ||
289 | again: | ||
290 | link_id = ++lgr->next_link_id; | ||
291 | if (!link_id) /* skip zero as link_id */ | ||
292 | link_id = ++lgr->next_link_id; | ||
293 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { | ||
294 | if (smc_link_usable(&lgr->lnk[i]) && | ||
295 | lgr->lnk[i].link_id == link_id) | ||
296 | goto again; | ||
297 | } | ||
298 | break; | ||
299 | } | ||
300 | return link_id; | ||
301 | } | ||
302 | |||
303 | int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, | ||
304 | u8 link_idx, struct smc_init_info *ini) | ||
305 | { | ||
306 | u8 rndvec[3]; | ||
307 | int rc; | ||
308 | |||
309 | get_device(&ini->ib_dev->ibdev->dev); | ||
310 | atomic_inc(&ini->ib_dev->lnk_cnt); | ||
311 | lnk->link_id = smcr_next_link_id(lgr); | ||
312 | lnk->lgr = lgr; | ||
313 | lnk->link_idx = link_idx; | ||
314 | lnk->smcibdev = ini->ib_dev; | ||
315 | lnk->ibport = ini->ib_port; | ||
316 | lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; | ||
317 | smc_llc_link_set_uid(lnk); | ||
318 | INIT_WORK(&lnk->link_down_wrk, smc_link_down_work); | ||
319 | if (!ini->ib_dev->initialized) { | ||
320 | rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev); | ||
321 | if (rc) | ||
322 | goto out; | ||
323 | } | ||
324 | get_random_bytes(rndvec, sizeof(rndvec)); | ||
325 | lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + | ||
326 | (rndvec[2] << 16); | ||
327 | rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, | ||
328 | ini->vlan_id, lnk->gid, &lnk->sgid_index); | ||
329 | if (rc) | ||
330 | goto out; | ||
331 | rc = smc_llc_link_init(lnk); | ||
332 | if (rc) | ||
333 | goto out; | ||
334 | rc = smc_wr_alloc_link_mem(lnk); | ||
335 | if (rc) | ||
336 | goto clear_llc_lnk; | ||
337 | rc = smc_ib_create_protection_domain(lnk); | ||
338 | if (rc) | ||
339 | goto free_link_mem; | ||
340 | rc = smc_ib_create_queue_pair(lnk); | ||
341 | if (rc) | ||
342 | goto dealloc_pd; | ||
343 | rc = smc_wr_create_link(lnk); | ||
344 | if (rc) | ||
345 | goto destroy_qp; | ||
346 | lnk->state = SMC_LNK_ACTIVATING; | ||
347 | return 0; | ||
348 | |||
349 | destroy_qp: | ||
350 | smc_ib_destroy_queue_pair(lnk); | ||
351 | dealloc_pd: | ||
352 | smc_ib_dealloc_protection_domain(lnk); | ||
353 | free_link_mem: | ||
354 | smc_wr_free_link_mem(lnk); | ||
355 | clear_llc_lnk: | ||
356 | smc_llc_link_clear(lnk, false); | ||
357 | out: | ||
358 | put_device(&ini->ib_dev->ibdev->dev); | ||
359 | memset(lnk, 0, sizeof(struct smc_link)); | ||
360 | lnk->state = SMC_LNK_UNUSED; | ||
361 | if (!atomic_dec_return(&ini->ib_dev->lnk_cnt)) | ||
362 | wake_up(&ini->ib_dev->lnks_deleted); | ||
363 | return rc; | ||
364 | } | ||
365 | |||
366 | /* create a new SMC link group */ | ||
367 | static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) | ||
368 | { | ||
369 | struct smc_link_group *lgr; | ||
370 | struct list_head *lgr_list; | ||
371 | struct smc_link *lnk; | ||
372 | spinlock_t *lgr_lock; | ||
373 | u8 link_idx; | ||
374 | int rc = 0; | ||
375 | int i; | ||
376 | |||
377 | if (ini->is_smcd && ini->vlan_id) { | ||
378 | if (smc_ism_get_vlan(ini->ism_dev[ini->ism_selected], | ||
379 | ini->vlan_id)) { | ||
380 | rc = SMC_CLC_DECL_ISMVLANERR; | ||
381 | goto out; | ||
382 | } | ||
383 | } | ||
384 | |||
385 | lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); | ||
386 | if (!lgr) { | ||
387 | rc = SMC_CLC_DECL_MEM; | ||
388 | goto ism_put_vlan; | ||
389 | } | ||
390 | lgr->tx_wq = alloc_workqueue("smc_tx_wq-%*phN", 0, 0, | ||
391 | SMC_LGR_ID_SIZE, &lgr->id); | ||
392 | if (!lgr->tx_wq) { | ||
393 | rc = -ENOMEM; | ||
394 | goto free_lgr; | ||
395 | } | ||
396 | lgr->is_smcd = ini->is_smcd; | ||
397 | lgr->sync_err = 0; | ||
398 | lgr->terminating = 0; | ||
399 | lgr->freeing = 0; | ||
400 | lgr->vlan_id = ini->vlan_id; | ||
401 | mutex_init(&lgr->sndbufs_lock); | ||
402 | mutex_init(&lgr->rmbs_lock); | ||
403 | rwlock_init(&lgr->conns_lock); | ||
404 | for (i = 0; i < SMC_RMBE_SIZES; i++) { | ||
405 | INIT_LIST_HEAD(&lgr->sndbufs[i]); | ||
406 | INIT_LIST_HEAD(&lgr->rmbs[i]); | ||
407 | } | ||
408 | lgr->next_link_id = 0; | ||
409 | smc_lgr_list.num += SMC_LGR_NUM_INCR; | ||
410 | memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); | ||
411 | INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); | ||
412 | INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work); | ||
413 | lgr->conns_all = RB_ROOT; | ||
414 | if (ini->is_smcd) { | ||
415 | /* SMC-D specific settings */ | ||
416 | get_device(&ini->ism_dev[ini->ism_selected]->dev); | ||
417 | lgr->peer_gid = ini->ism_peer_gid[ini->ism_selected]; | ||
418 | lgr->smcd = ini->ism_dev[ini->ism_selected]; | ||
419 | lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list; | ||
420 | lgr_lock = &lgr->smcd->lgr_lock; | ||
421 | lgr->smc_version = ini->smcd_version; | ||
422 | lgr->peer_shutdown = 0; | ||
423 | atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt); | ||
424 | } else { | ||
425 | /* SMC-R specific settings */ | ||
426 | lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; | ||
427 | memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, | ||
428 | SMC_SYSTEMID_LEN); | ||
429 | memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1], | ||
430 | SMC_MAX_PNETID_LEN); | ||
431 | smc_llc_lgr_init(lgr, smc); | ||
432 | |||
433 | link_idx = SMC_SINGLE_LINK; | ||
434 | lnk = &lgr->lnk[link_idx]; | ||
435 | rc = smcr_link_init(lgr, lnk, link_idx, ini); | ||
436 | if (rc) | ||
437 | goto free_wq; | ||
438 | lgr_list = &smc_lgr_list.list; | ||
439 | lgr_lock = &smc_lgr_list.lock; | ||
440 | atomic_inc(&lgr_cnt); | ||
441 | } | ||
442 | smc->conn.lgr = lgr; | ||
443 | spin_lock_bh(lgr_lock); | ||
444 | list_add_tail(&lgr->list, lgr_list); | ||
445 | spin_unlock_bh(lgr_lock); | ||
446 | return 0; | ||
447 | |||
448 | free_wq: | ||
449 | destroy_workqueue(lgr->tx_wq); | ||
450 | free_lgr: | ||
451 | kfree(lgr); | ||
452 | ism_put_vlan: | ||
453 | if (ini->is_smcd && ini->vlan_id) | ||
454 | smc_ism_put_vlan(ini->ism_dev[ini->ism_selected], ini->vlan_id); | ||
455 | out: | ||
456 | if (rc < 0) { | ||
457 | if (rc == -ENOMEM) | ||
458 | rc = SMC_CLC_DECL_MEM; | ||
459 | else | ||
460 | rc = SMC_CLC_DECL_INTERR; | ||
461 | } | ||
462 | return rc; | ||
463 | } | ||
464 | |||
465 | static int smc_write_space(struct smc_connection *conn) | ||
466 | { | ||
467 | int buffer_len = conn->peer_rmbe_size; | ||
468 | union smc_host_cursor prod; | ||
469 | union smc_host_cursor cons; | ||
470 | int space; | ||
471 | |||
472 | smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn); | ||
473 | smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn); | ||
474 | /* determine rx_buf space */ | ||
475 | space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod); | ||
476 | return space; | ||
477 | } | ||
478 | |||
479 | static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend, | ||
480 | struct smc_wr_buf *wr_buf) | ||
481 | { | ||
482 | struct smc_connection *conn = &smc->conn; | ||
483 | union smc_host_cursor cons, fin; | ||
484 | int rc = 0; | ||
485 | int diff; | ||
486 | |||
487 | smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn); | ||
488 | smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn); | ||
489 | /* set prod cursor to old state, enforce tx_rdma_writes() */ | ||
490 | smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn); | ||
491 | smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn); | ||
492 | |||
493 | if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) { | ||
494 | /* cons cursor advanced more than fin, and prod was set | ||
495 | * fin above, so now prod is smaller than cons. Fix that. | ||
496 | */ | ||
497 | diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons); | ||
498 | smc_curs_add(conn->sndbuf_desc->len, | ||
499 | &conn->tx_curs_sent, diff); | ||
500 | smc_curs_add(conn->sndbuf_desc->len, | ||
501 | &conn->tx_curs_fin, diff); | ||
502 | |||
503 | smp_mb__before_atomic(); | ||
504 | atomic_add(diff, &conn->sndbuf_space); | ||
505 | smp_mb__after_atomic(); | ||
506 | |||
507 | smc_curs_add(conn->peer_rmbe_size, | ||
508 | &conn->local_tx_ctrl.prod, diff); | ||
509 | smc_curs_add(conn->peer_rmbe_size, | ||
510 | &conn->local_tx_ctrl_fin, diff); | ||
511 | } | ||
512 | /* recalculate, value is used by tx_rdma_writes() */ | ||
513 | atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn)); | ||
514 | |||
515 | if (smc->sk.sk_state != SMC_INIT && | ||
516 | smc->sk.sk_state != SMC_CLOSED) { | ||
517 | rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf); | ||
518 | if (!rc) { | ||
519 | queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, 0); | ||
520 | smc->sk.sk_data_ready(&smc->sk); | ||
521 | } | ||
522 | } else { | ||
523 | smc_wr_tx_put_slot(conn->lnk, | ||
524 | (struct smc_wr_tx_pend_priv *)pend); | ||
525 | } | ||
526 | return rc; | ||
527 | } | ||
528 | |||
529 | struct smc_link *smc_switch_conns(struct smc_link_group *lgr, | ||
530 | struct smc_link *from_lnk, bool is_dev_err) | ||
531 | { | ||
532 | struct smc_link *to_lnk = NULL; | ||
533 | struct smc_cdc_tx_pend *pend; | ||
534 | struct smc_connection *conn; | ||
535 | struct smc_wr_buf *wr_buf; | ||
536 | struct smc_sock *smc; | ||
537 | struct rb_node *node; | ||
538 | int i, rc = 0; | ||
539 | |||
540 | /* link is inactive, wake up tx waiters */ | ||
541 | smc_wr_wakeup_tx_wait(from_lnk); | ||
542 | |||
543 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { | ||
544 | if (!smc_link_active(&lgr->lnk[i]) || i == from_lnk->link_idx) | ||
545 | continue; | ||
546 | if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev && | ||
547 | from_lnk->ibport == lgr->lnk[i].ibport) { | ||
548 | continue; | ||
549 | } | ||
550 | to_lnk = &lgr->lnk[i]; | ||
551 | break; | ||
552 | } | ||
553 | if (!to_lnk || !smc_wr_tx_link_hold(to_lnk)) { | ||
554 | smc_lgr_terminate_sched(lgr); | ||
555 | return NULL; | ||
556 | } | ||
557 | again: | ||
558 | read_lock_bh(&lgr->conns_lock); | ||
559 | for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) { | ||
560 | conn = rb_entry(node, struct smc_connection, alert_node); | ||
561 | if (conn->lnk != from_lnk) | ||
562 | continue; | ||
563 | smc = container_of(conn, struct smc_sock, conn); | ||
564 | /* conn->lnk not yet set in SMC_INIT state */ | ||
565 | if (smc->sk.sk_state == SMC_INIT) | ||
566 | continue; | ||
567 | if (smc->sk.sk_state == SMC_CLOSED || | ||
568 | smc->sk.sk_state == SMC_PEERCLOSEWAIT1 || | ||
569 | smc->sk.sk_state == SMC_PEERCLOSEWAIT2 || | ||
570 | smc->sk.sk_state == SMC_APPFINCLOSEWAIT || | ||
571 | smc->sk.sk_state == SMC_APPCLOSEWAIT1 || | ||
572 | smc->sk.sk_state == SMC_APPCLOSEWAIT2 || | ||
573 | smc->sk.sk_state == SMC_PEERFINCLOSEWAIT || | ||
574 | smc->sk.sk_state == SMC_PEERABORTWAIT || | ||
575 | smc->sk.sk_state == SMC_PROCESSABORT) { | ||
576 | spin_lock_bh(&conn->send_lock); | ||
577 | conn->lnk = to_lnk; | ||
578 | spin_unlock_bh(&conn->send_lock); | ||
579 | continue; | ||
580 | } | ||
581 | sock_hold(&smc->sk); | ||
582 | read_unlock_bh(&lgr->conns_lock); | ||
583 | /* pre-fetch buffer outside of send_lock, might sleep */ | ||
584 | rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend); | ||
585 | if (rc) | ||
586 | goto err_out; | ||
587 | /* avoid race with smcr_tx_sndbuf_nonempty() */ | ||
588 | spin_lock_bh(&conn->send_lock); | ||
589 | conn->lnk = to_lnk; | ||
590 | rc = smc_switch_cursor(smc, pend, wr_buf); | ||
591 | spin_unlock_bh(&conn->send_lock); | ||
592 | sock_put(&smc->sk); | ||
593 | if (rc) | ||
594 | goto err_out; | ||
595 | goto again; | ||
596 | } | ||
597 | read_unlock_bh(&lgr->conns_lock); | ||
598 | smc_wr_tx_link_put(to_lnk); | ||
599 | return to_lnk; | ||
600 | |||
601 | err_out: | ||
602 | smcr_link_down_cond_sched(to_lnk); | ||
603 | smc_wr_tx_link_put(to_lnk); | ||
604 | return NULL; | ||
605 | } | ||
606 | |||
607 | static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc, | ||
608 | struct smc_link_group *lgr) | ||
609 | { | ||
610 | int rc; | ||
611 | |||
612 | if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) { | ||
613 | /* unregister rmb with peer */ | ||
614 | rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); | ||
615 | if (!rc) { | ||
616 | /* protect against smc_llc_cli_rkey_exchange() */ | ||
617 | mutex_lock(&lgr->llc_conf_mutex); | ||
618 | smc_llc_do_delete_rkey(lgr, rmb_desc); | ||
619 | rmb_desc->is_conf_rkey = false; | ||
620 | mutex_unlock(&lgr->llc_conf_mutex); | ||
621 | smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); | ||
622 | } | ||
623 | } | ||
624 | |||
625 | if (rmb_desc->is_reg_err) { | ||
626 | /* buf registration failed, reuse not possible */ | ||
627 | mutex_lock(&lgr->rmbs_lock); | ||
628 | list_del(&rmb_desc->list); | ||
629 | mutex_unlock(&lgr->rmbs_lock); | ||
630 | |||
631 | smc_buf_free(lgr, true, rmb_desc); | ||
632 | } else { | ||
633 | rmb_desc->used = 0; | ||
634 | } | ||
635 | } | ||
636 | |||
637 | static void smc_buf_unuse(struct smc_connection *conn, | ||
638 | struct smc_link_group *lgr) | ||
639 | { | ||
640 | if (conn->sndbuf_desc) | ||
641 | conn->sndbuf_desc->used = 0; | ||
642 | if (conn->rmb_desc && lgr->is_smcd) | ||
643 | conn->rmb_desc->used = 0; | ||
644 | else if (conn->rmb_desc) | ||
645 | smcr_buf_unuse(conn->rmb_desc, lgr); | ||
646 | } | ||
647 | |||
648 | /* remove a finished connection from its link group */ | ||
649 | void smc_conn_free(struct smc_connection *conn) | ||
650 | { | ||
651 | struct smc_link_group *lgr = conn->lgr; | ||
652 | |||
653 | if (!lgr) | ||
654 | return; | ||
655 | if (lgr->is_smcd) { | ||
656 | if (!list_empty(&lgr->list)) | ||
657 | smc_ism_unset_conn(conn); | ||
658 | tasklet_kill(&conn->rx_tsklet); | ||
659 | } else { | ||
660 | smc_cdc_wait_pend_tx_wr(conn); | ||
661 | if (current_work() != &conn->abort_work) | ||
662 | cancel_work_sync(&conn->abort_work); | ||
663 | } | ||
664 | if (!list_empty(&lgr->list)) { | ||
665 | smc_buf_unuse(conn, lgr); /* allow buffer reuse */ | ||
666 | smc_lgr_unregister_conn(conn); | ||
667 | } | ||
668 | |||
669 | if (!lgr->conns_num) | ||
670 | smc_lgr_schedule_free_work(lgr); | ||
671 | } | ||
672 | |||
673 | /* unregister a link from a buf_desc */ | ||
674 | static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb, | ||
675 | struct smc_link *lnk) | ||
676 | { | ||
677 | if (is_rmb) | ||
678 | buf_desc->is_reg_mr[lnk->link_idx] = false; | ||
679 | if (!buf_desc->is_map_ib[lnk->link_idx]) | ||
680 | return; | ||
681 | if (is_rmb) { | ||
682 | if (buf_desc->mr_rx[lnk->link_idx]) { | ||
683 | smc_ib_put_memory_region( | ||
684 | buf_desc->mr_rx[lnk->link_idx]); | ||
685 | buf_desc->mr_rx[lnk->link_idx] = NULL; | ||
686 | } | ||
687 | smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE); | ||
688 | } else { | ||
689 | smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE); | ||
690 | } | ||
691 | sg_free_table(&buf_desc->sgt[lnk->link_idx]); | ||
692 | buf_desc->is_map_ib[lnk->link_idx] = false; | ||
693 | } | ||
694 | |||
695 | /* unmap all buffers of lgr for a deleted link */ | ||
696 | static void smcr_buf_unmap_lgr(struct smc_link *lnk) | ||
697 | { | ||
698 | struct smc_link_group *lgr = lnk->lgr; | ||
699 | struct smc_buf_desc *buf_desc, *bf; | ||
700 | int i; | ||
701 | |||
702 | for (i = 0; i < SMC_RMBE_SIZES; i++) { | ||
703 | mutex_lock(&lgr->rmbs_lock); | ||
704 | list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) | ||
705 | smcr_buf_unmap_link(buf_desc, true, lnk); | ||
706 | mutex_unlock(&lgr->rmbs_lock); | ||
707 | mutex_lock(&lgr->sndbufs_lock); | ||
708 | list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], | ||
709 | list) | ||
710 | smcr_buf_unmap_link(buf_desc, false, lnk); | ||
711 | mutex_unlock(&lgr->sndbufs_lock); | ||
712 | } | ||
713 | } | ||
714 | |||
715 | static void smcr_rtoken_clear_link(struct smc_link *lnk) | ||
716 | { | ||
717 | struct smc_link_group *lgr = lnk->lgr; | ||
718 | int i; | ||
719 | |||
720 | for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { | ||
721 | lgr->rtokens[i][lnk->link_idx].rkey = 0; | ||
722 | lgr->rtokens[i][lnk->link_idx].dma_addr = 0; | ||
723 | } | ||
724 | } | ||
725 | |||
726 | /* must be called under lgr->llc_conf_mutex lock */ | ||
727 | void smcr_link_clear(struct smc_link *lnk, bool log) | ||
728 | { | ||
729 | struct smc_ib_device *smcibdev; | ||
730 | |||
731 | if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED) | ||
732 | return; | ||
733 | lnk->peer_qpn = 0; | ||
734 | smc_llc_link_clear(lnk, log); | ||
735 | smcr_buf_unmap_lgr(lnk); | ||
736 | smcr_rtoken_clear_link(lnk); | ||
737 | smc_ib_modify_qp_error(lnk); | ||
738 | smc_wr_free_link(lnk); | ||
739 | smc_ib_destroy_queue_pair(lnk); | ||
740 | smc_ib_dealloc_protection_domain(lnk); | ||
741 | smc_wr_free_link_mem(lnk); | ||
742 | put_device(&lnk->smcibdev->ibdev->dev); | ||
743 | smcibdev = lnk->smcibdev; | ||
744 | memset(lnk, 0, sizeof(struct smc_link)); | ||
745 | lnk->state = SMC_LNK_UNUSED; | ||
746 | if (!atomic_dec_return(&smcibdev->lnk_cnt)) | ||
747 | wake_up(&smcibdev->lnks_deleted); | ||
748 | } | ||
749 | |||
750 | static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, | ||
751 | struct smc_buf_desc *buf_desc) | ||
752 | { | ||
753 | int i; | ||
754 | |||
755 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) | ||
756 | smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]); | ||
757 | |||
758 | if (buf_desc->pages) | ||
759 | __free_pages(buf_desc->pages, buf_desc->order); | ||
760 | kfree(buf_desc); | ||
761 | } | ||
762 | |||
763 | static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb, | ||
764 | struct smc_buf_desc *buf_desc) | ||
765 | { | ||
766 | if (is_dmb) { | ||
767 | /* restore original buf len */ | ||
768 | buf_desc->len += sizeof(struct smcd_cdc_msg); | ||
769 | smc_ism_unregister_dmb(lgr->smcd, buf_desc); | ||
770 | } else { | ||
771 | kfree(buf_desc->cpu_addr); | ||
772 | } | ||
773 | kfree(buf_desc); | ||
774 | } | ||
775 | |||
776 | static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, | ||
777 | struct smc_buf_desc *buf_desc) | ||
778 | { | ||
779 | if (lgr->is_smcd) | ||
780 | smcd_buf_free(lgr, is_rmb, buf_desc); | ||
781 | else | ||
782 | smcr_buf_free(lgr, is_rmb, buf_desc); | ||
783 | } | ||
784 | |||
785 | static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) | ||
786 | { | ||
787 | struct smc_buf_desc *buf_desc, *bf_desc; | ||
788 | struct list_head *buf_list; | ||
789 | int i; | ||
790 | |||
791 | for (i = 0; i < SMC_RMBE_SIZES; i++) { | ||
792 | if (is_rmb) | ||
793 | buf_list = &lgr->rmbs[i]; | ||
794 | else | ||
795 | buf_list = &lgr->sndbufs[i]; | ||
796 | list_for_each_entry_safe(buf_desc, bf_desc, buf_list, | ||
797 | list) { | ||
798 | list_del(&buf_desc->list); | ||
799 | smc_buf_free(lgr, is_rmb, buf_desc); | ||
800 | } | ||
801 | } | ||
802 | } | ||
803 | |||
804 | static void smc_lgr_free_bufs(struct smc_link_group *lgr) | ||
805 | { | ||
806 | /* free send buffers */ | ||
807 | __smc_lgr_free_bufs(lgr, false); | ||
808 | /* free rmbs */ | ||
809 | __smc_lgr_free_bufs(lgr, true); | ||
810 | } | ||
811 | |||
812 | /* remove a link group */ | ||
813 | static void smc_lgr_free(struct smc_link_group *lgr) | ||
814 | { | ||
815 | int i; | ||
816 | |||
817 | if (!lgr->is_smcd) { | ||
818 | mutex_lock(&lgr->llc_conf_mutex); | ||
819 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { | ||
820 | if (lgr->lnk[i].state != SMC_LNK_UNUSED) | ||
821 | smcr_link_clear(&lgr->lnk[i], false); | ||
822 | } | ||
823 | mutex_unlock(&lgr->llc_conf_mutex); | ||
824 | smc_llc_lgr_clear(lgr); | ||
825 | } | ||
826 | |||
827 | smc_lgr_free_bufs(lgr); | ||
828 | destroy_workqueue(lgr->tx_wq); | ||
829 | if (lgr->is_smcd) { | ||
830 | smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); | ||
831 | put_device(&lgr->smcd->dev); | ||
832 | if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) | ||
833 | wake_up(&lgr->smcd->lgrs_deleted); | ||
834 | } else { | ||
835 | if (!atomic_dec_return(&lgr_cnt)) | ||
836 | wake_up(&lgrs_deleted); | ||
837 | } | ||
838 | kfree(lgr); | ||
839 | } | ||
840 | |||
841 | static void smcd_unregister_all_dmbs(struct smc_link_group *lgr) | ||
842 | { | ||
843 | int i; | ||
844 | |||
845 | for (i = 0; i < SMC_RMBE_SIZES; i++) { | ||
846 | struct smc_buf_desc *buf_desc; | ||
847 | |||
848 | list_for_each_entry(buf_desc, &lgr->rmbs[i], list) { | ||
849 | buf_desc->len += sizeof(struct smcd_cdc_msg); | ||
850 | smc_ism_unregister_dmb(lgr->smcd, buf_desc); | ||
851 | } | ||
852 | } | ||
853 | } | ||
854 | |||
855 | static void smc_sk_wake_ups(struct smc_sock *smc) | ||
856 | { | ||
857 | smc->sk.sk_write_space(&smc->sk); | ||
858 | smc->sk.sk_data_ready(&smc->sk); | ||
859 | smc->sk.sk_state_change(&smc->sk); | ||
860 | } | ||
861 | |||
862 | /* kill a connection */ | ||
863 | static void smc_conn_kill(struct smc_connection *conn, bool soft) | ||
864 | { | ||
865 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); | ||
866 | |||
867 | if (conn->lgr->is_smcd && conn->lgr->peer_shutdown) | ||
868 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; | ||
869 | else | ||
870 | smc_close_abort(conn); | ||
871 | conn->killed = 1; | ||
872 | smc->sk.sk_err = ECONNABORTED; | ||
873 | smc_sk_wake_ups(smc); | ||
874 | if (conn->lgr->is_smcd) { | ||
875 | smc_ism_unset_conn(conn); | ||
876 | if (soft) | ||
877 | tasklet_kill(&conn->rx_tsklet); | ||
878 | else | ||
879 | tasklet_unlock_wait(&conn->rx_tsklet); | ||
880 | } else { | ||
881 | smc_cdc_wait_pend_tx_wr(conn); | ||
882 | } | ||
883 | smc_lgr_unregister_conn(conn); | ||
884 | smc_close_active_abort(smc); | ||
885 | } | ||
886 | |||
887 | static void smc_lgr_cleanup(struct smc_link_group *lgr) | ||
888 | { | ||
889 | if (lgr->is_smcd) { | ||
890 | smc_ism_signal_shutdown(lgr); | ||
891 | smcd_unregister_all_dmbs(lgr); | ||
892 | } else { | ||
893 | u32 rsn = lgr->llc_termination_rsn; | ||
894 | |||
895 | if (!rsn) | ||
896 | rsn = SMC_LLC_DEL_PROG_INIT_TERM; | ||
897 | smc_llc_send_link_delete_all(lgr, false, rsn); | ||
898 | smcr_lgr_link_deactivate_all(lgr); | ||
899 | } | ||
900 | } | ||
901 | |||
902 | /* terminate link group | ||
903 | * @soft: true if link group shutdown can take its time | ||
904 | * false if immediate link group shutdown is required | ||
905 | */ | ||
906 | static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) | ||
907 | { | ||
908 | struct smc_connection *conn; | ||
909 | struct smc_sock *smc; | ||
910 | struct rb_node *node; | ||
911 | |||
912 | if (lgr->terminating) | ||
913 | return; /* lgr already terminating */ | ||
914 | /* cancel free_work sync, will terminate when lgr->freeing is set */ | ||
915 | cancel_delayed_work_sync(&lgr->free_work); | ||
916 | lgr->terminating = 1; | ||
917 | |||
918 | /* kill remaining link group connections */ | ||
919 | read_lock_bh(&lgr->conns_lock); | ||
920 | node = rb_first(&lgr->conns_all); | ||
921 | while (node) { | ||
922 | read_unlock_bh(&lgr->conns_lock); | ||
923 | conn = rb_entry(node, struct smc_connection, alert_node); | ||
924 | smc = container_of(conn, struct smc_sock, conn); | ||
925 | sock_hold(&smc->sk); /* sock_put below */ | ||
926 | lock_sock(&smc->sk); | ||
927 | smc_conn_kill(conn, soft); | ||
928 | release_sock(&smc->sk); | ||
929 | sock_put(&smc->sk); /* sock_hold above */ | ||
930 | read_lock_bh(&lgr->conns_lock); | ||
931 | node = rb_first(&lgr->conns_all); | ||
932 | } | ||
933 | read_unlock_bh(&lgr->conns_lock); | ||
934 | smc_lgr_cleanup(lgr); | ||
935 | smc_lgr_free(lgr); | ||
936 | } | ||
937 | |||
938 | /* unlink link group and schedule termination */ | ||
939 | void smc_lgr_terminate_sched(struct smc_link_group *lgr) | ||
940 | { | ||
941 | spinlock_t *lgr_lock; | ||
942 | |||
943 | smc_lgr_list_head(lgr, &lgr_lock); | ||
944 | spin_lock_bh(lgr_lock); | ||
945 | if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) { | ||
946 | spin_unlock_bh(lgr_lock); | ||
947 | return; /* lgr already terminating */ | ||
948 | } | ||
949 | list_del_init(&lgr->list); | ||
950 | lgr->freeing = 1; | ||
951 | spin_unlock_bh(lgr_lock); | ||
952 | schedule_work(&lgr->terminate_work); | ||
953 | } | ||
954 | |||
955 | /* Called when peer lgr shutdown (regularly or abnormally) is received */ | ||
956 | void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) | ||
957 | { | ||
958 | struct smc_link_group *lgr, *l; | ||
959 | LIST_HEAD(lgr_free_list); | ||
960 | |||
961 | /* run common cleanup function and build free list */ | ||
962 | spin_lock_bh(&dev->lgr_lock); | ||
963 | list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) { | ||
964 | if ((!peer_gid || lgr->peer_gid == peer_gid) && | ||
965 | (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { | ||
966 | if (peer_gid) /* peer triggered termination */ | ||
967 | lgr->peer_shutdown = 1; | ||
968 | list_move(&lgr->list, &lgr_free_list); | ||
969 | lgr->freeing = 1; | ||
970 | } | ||
971 | } | ||
972 | spin_unlock_bh(&dev->lgr_lock); | ||
973 | |||
974 | /* cancel the regular free workers and actually free lgrs */ | ||
975 | list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { | ||
976 | list_del_init(&lgr->list); | ||
977 | schedule_work(&lgr->terminate_work); | ||
978 | } | ||
979 | } | ||
980 | |||
981 | /* Called when an SMCD device is removed or the smc module is unloaded */ | ||
982 | void smc_smcd_terminate_all(struct smcd_dev *smcd) | ||
983 | { | ||
984 | struct smc_link_group *lgr, *lg; | ||
985 | LIST_HEAD(lgr_free_list); | ||
986 | |||
987 | spin_lock_bh(&smcd->lgr_lock); | ||
988 | list_splice_init(&smcd->lgr_list, &lgr_free_list); | ||
989 | list_for_each_entry(lgr, &lgr_free_list, list) | ||
990 | lgr->freeing = 1; | ||
991 | spin_unlock_bh(&smcd->lgr_lock); | ||
992 | |||
993 | list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { | ||
994 | list_del_init(&lgr->list); | ||
995 | __smc_lgr_terminate(lgr, false); | ||
996 | } | ||
997 | |||
998 | if (atomic_read(&smcd->lgr_cnt)) | ||
999 | wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt)); | ||
1000 | } | ||
1001 | |||
1002 | /* Called when an SMCR device is removed or the smc module is unloaded. | ||
1003 | * If smcibdev is given, all SMCR link groups using this device are terminated. | ||
1004 | * If smcibdev is NULL, all SMCR link groups are terminated. | ||
1005 | */ | ||
1006 | void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) | ||
1007 | { | ||
1008 | struct smc_link_group *lgr, *lg; | ||
1009 | LIST_HEAD(lgr_free_list); | ||
1010 | int i; | ||
1011 | |||
1012 | spin_lock_bh(&smc_lgr_list.lock); | ||
1013 | if (!smcibdev) { | ||
1014 | list_splice_init(&smc_lgr_list.list, &lgr_free_list); | ||
1015 | list_for_each_entry(lgr, &lgr_free_list, list) | ||
1016 | lgr->freeing = 1; | ||
1017 | } else { | ||
1018 | list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { | ||
1019 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { | ||
1020 | if (lgr->lnk[i].smcibdev == smcibdev) | ||
1021 | smcr_link_down_cond_sched(&lgr->lnk[i]); | ||
1022 | } | ||
1023 | } | ||
1024 | } | ||
1025 | spin_unlock_bh(&smc_lgr_list.lock); | ||
1026 | |||
1027 | list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { | ||
1028 | list_del_init(&lgr->list); | ||
1029 | smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM); | ||
1030 | __smc_lgr_terminate(lgr, false); | ||
1031 | } | ||
1032 | |||
1033 | if (smcibdev) { | ||
1034 | if (atomic_read(&smcibdev->lnk_cnt)) | ||
1035 | wait_event(smcibdev->lnks_deleted, | ||
1036 | !atomic_read(&smcibdev->lnk_cnt)); | ||
1037 | } else { | ||
1038 | if (atomic_read(&lgr_cnt)) | ||
1039 | wait_event(lgrs_deleted, !atomic_read(&lgr_cnt)); | ||
1040 | } | ||
1041 | } | ||
1042 | |||
1043 | /* set new lgr type and clear all asymmetric link tagging */ | ||
1044 | void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type) | ||
1045 | { | ||
1046 | char *lgr_type = ""; | ||
1047 | int i; | ||
1048 | |||
1049 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) | ||
1050 | if (smc_link_usable(&lgr->lnk[i])) | ||
1051 | lgr->lnk[i].link_is_asym = false; | ||
1052 | if (lgr->type == new_type) | ||
1053 | return; | ||
1054 | lgr->type = new_type; | ||
1055 | |||
1056 | switch (lgr->type) { | ||
1057 | case SMC_LGR_NONE: | ||
1058 | lgr_type = "NONE"; | ||
1059 | break; | ||
1060 | case SMC_LGR_SINGLE: | ||
1061 | lgr_type = "SINGLE"; | ||
1062 | break; | ||
1063 | case SMC_LGR_SYMMETRIC: | ||
1064 | lgr_type = "SYMMETRIC"; | ||
1065 | break; | ||
1066 | case SMC_LGR_ASYMMETRIC_PEER: | ||
1067 | lgr_type = "ASYMMETRIC_PEER"; | ||
1068 | break; | ||
1069 | case SMC_LGR_ASYMMETRIC_LOCAL: | ||
1070 | lgr_type = "ASYMMETRIC_LOCAL"; | ||
1071 | break; | ||
1072 | } | ||
1073 | pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: " | ||
1074 | "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id, | ||
1075 | lgr_type, lgr->pnet_id); | ||
1076 | } | ||
1077 | |||
1078 | /* set new lgr type and tag a link as asymmetric */ | ||
1079 | void smcr_lgr_set_type_asym(struct smc_link_group *lgr, | ||
1080 | enum smc_lgr_type new_type, int asym_lnk_idx) | ||
1081 | { | ||
1082 | smcr_lgr_set_type(lgr, new_type); | ||
1083 | lgr->lnk[asym_lnk_idx].link_is_asym = true; | ||
1084 | } | ||
1085 | |||
1086 | /* abort connection, abort_work scheduled from tasklet context */ | ||
1087 | static void smc_conn_abort_work(struct work_struct *work) | ||
1088 | { | ||
1089 | struct smc_connection *conn = container_of(work, | ||
1090 | struct smc_connection, | ||
1091 | abort_work); | ||
1092 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); | ||
1093 | |||
1094 | lock_sock(&smc->sk); | ||
1095 | smc_conn_kill(conn, true); | ||
1096 | release_sock(&smc->sk); | ||
1097 | sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */ | ||
1098 | } | ||
1099 | |||
1100 | void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport) | ||
1101 | { | ||
1102 | struct smc_link_group *lgr, *n; | ||
1103 | |||
1104 | list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { | ||
1105 | struct smc_link *link; | ||
1106 | |||
1107 | if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, | ||
1108 | SMC_MAX_PNETID_LEN) || | ||
1109 | lgr->type == SMC_LGR_SYMMETRIC || | ||
1110 | lgr->type == SMC_LGR_ASYMMETRIC_PEER) | ||
1111 | continue; | ||
1112 | |||
1113 | /* trigger local add link processing */ | ||
1114 | link = smc_llc_usable_link(lgr); | ||
1115 | if (link) | ||
1116 | smc_llc_add_link_local(link); | ||
1117 | } | ||
1118 | } | ||
1119 | |||
1120 | /* link is down - switch connections to alternate link, | ||
1121 | * must be called under lgr->llc_conf_mutex lock | ||
1122 | */ | ||
1123 | static void smcr_link_down(struct smc_link *lnk) | ||
1124 | { | ||
1125 | struct smc_link_group *lgr = lnk->lgr; | ||
1126 | struct smc_link *to_lnk; | ||
1127 | int del_link_id; | ||
1128 | |||
1129 | if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list)) | ||
1130 | return; | ||
1131 | |||
1132 | to_lnk = smc_switch_conns(lgr, lnk, true); | ||
1133 | if (!to_lnk) { /* no backup link available */ | ||
1134 | smcr_link_clear(lnk, true); | ||
1135 | return; | ||
1136 | } | ||
1137 | smcr_lgr_set_type(lgr, SMC_LGR_SINGLE); | ||
1138 | del_link_id = lnk->link_id; | ||
1139 | |||
1140 | if (lgr->role == SMC_SERV) { | ||
1141 | /* trigger local delete link processing */ | ||
1142 | smc_llc_srv_delete_link_local(to_lnk, del_link_id); | ||
1143 | } else { | ||
1144 | if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { | ||
1145 | /* another llc task is ongoing */ | ||
1146 | mutex_unlock(&lgr->llc_conf_mutex); | ||
1147 | wait_event_timeout(lgr->llc_flow_waiter, | ||
1148 | (list_empty(&lgr->list) || | ||
1149 | lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), | ||
1150 | SMC_LLC_WAIT_TIME); | ||
1151 | mutex_lock(&lgr->llc_conf_mutex); | ||
1152 | } | ||
1153 | if (!list_empty(&lgr->list)) { | ||
1154 | smc_llc_send_delete_link(to_lnk, del_link_id, | ||
1155 | SMC_LLC_REQ, true, | ||
1156 | SMC_LLC_DEL_LOST_PATH); | ||
1157 | smcr_link_clear(lnk, true); | ||
1158 | } | ||
1159 | wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */ | ||
1160 | } | ||
1161 | } | ||
1162 | |||
1163 | /* must be called under lgr->llc_conf_mutex lock */ | ||
1164 | void smcr_link_down_cond(struct smc_link *lnk) | ||
1165 | { | ||
1166 | if (smc_link_downing(&lnk->state)) | ||
1167 | smcr_link_down(lnk); | ||
1168 | } | ||
1169 | |||
1170 | /* will get the lgr->llc_conf_mutex lock */ | ||
1171 | void smcr_link_down_cond_sched(struct smc_link *lnk) | ||
1172 | { | ||
1173 | if (smc_link_downing(&lnk->state)) | ||
1174 | schedule_work(&lnk->link_down_wrk); | ||
1175 | } | ||
1176 | |||
1177 | void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport) | ||
1178 | { | ||
1179 | struct smc_link_group *lgr, *n; | ||
1180 | int i; | ||
1181 | |||
1182 | list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { | ||
1183 | if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, | ||
1184 | SMC_MAX_PNETID_LEN)) | ||
1185 | continue; /* lgr is not affected */ | ||
1186 | if (list_empty(&lgr->list)) | ||
1187 | continue; | ||
1188 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { | ||
1189 | struct smc_link *lnk = &lgr->lnk[i]; | ||
1190 | |||
1191 | if (smc_link_usable(lnk) && | ||
1192 | lnk->smcibdev == smcibdev && lnk->ibport == ibport) | ||
1193 | smcr_link_down_cond_sched(lnk); | ||
1194 | } | ||
1195 | } | ||
1196 | } | ||
1197 | |||
1198 | static void smc_link_down_work(struct work_struct *work) | ||
1199 | { | ||
1200 | struct smc_link *link = container_of(work, struct smc_link, | ||
1201 | link_down_wrk); | ||
1202 | struct smc_link_group *lgr = link->lgr; | ||
1203 | |||
1204 | if (list_empty(&lgr->list)) | ||
1205 | return; | ||
1206 | wake_up_all(&lgr->llc_msg_waiter); | ||
1207 | mutex_lock(&lgr->llc_conf_mutex); | ||
1208 | smcr_link_down(link); | ||
1209 | mutex_unlock(&lgr->llc_conf_mutex); | ||
1210 | } | ||
1211 | |||
1212 | static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev, | ||
1213 | struct netdev_nested_priv *priv) | ||
1214 | { | ||
1215 | unsigned short *vlan_id = (unsigned short *)priv->data; | ||
1216 | |||
1217 | if (is_vlan_dev(lower_dev)) { | ||
1218 | *vlan_id = vlan_dev_vlan_id(lower_dev); | ||
1219 | return 1; | ||
1220 | } | ||
1221 | |||
1222 | return 0; | ||
1223 | } | ||
1224 | |||
1225 | /* Determine vlan of internal TCP socket. */ | ||
1226 | int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini) | ||
1227 | { | ||
1228 | struct dst_entry *dst = sk_dst_get(clcsock->sk); | ||
1229 | struct netdev_nested_priv priv; | ||
1230 | struct net_device *ndev; | ||
1231 | int rc = 0; | ||
1232 | |||
1233 | ini->vlan_id = 0; | ||
1234 | if (!dst) { | ||
1235 | rc = -ENOTCONN; | ||
1236 | goto out; | ||
1237 | } | ||
1238 | if (!dst->dev) { | ||
1239 | rc = -ENODEV; | ||
1240 | goto out_rel; | ||
1241 | } | ||
1242 | |||
1243 | ndev = dst->dev; | ||
1244 | if (is_vlan_dev(ndev)) { | ||
1245 | ini->vlan_id = vlan_dev_vlan_id(ndev); | ||
1246 | goto out_rel; | ||
1247 | } | ||
1248 | |||
1249 | priv.data = (void *)&ini->vlan_id; | ||
1250 | rtnl_lock(); | ||
1251 | netdev_walk_all_lower_dev(ndev, smc_vlan_by_tcpsk_walk, &priv); | ||
1252 | rtnl_unlock(); | ||
1253 | |||
1254 | out_rel: | ||
1255 | dst_release(dst); | ||
1256 | out: | ||
1257 | return rc; | ||
1258 | } | ||
1259 | |||
1260 | static bool smcr_lgr_match(struct smc_link_group *lgr, | ||
1261 | struct smc_clc_msg_local *lcl, | ||
1262 | enum smc_lgr_role role, u32 clcqpn) | ||
1263 | { | ||
1264 | int i; | ||
1265 | |||
1266 | if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) || | ||
1267 | lgr->role != role) | ||
1268 | return false; | ||
1269 | |||
1270 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { | ||
1271 | if (!smc_link_active(&lgr->lnk[i])) | ||
1272 | continue; | ||
1273 | if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) && | ||
1274 | !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) && | ||
1275 | !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac))) | ||
1276 | return true; | ||
1277 | } | ||
1278 | return false; | ||
1279 | } | ||
1280 | |||
1281 | static bool smcd_lgr_match(struct smc_link_group *lgr, | ||
1282 | struct smcd_dev *smcismdev, u64 peer_gid) | ||
1283 | { | ||
1284 | return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev; | ||
1285 | } | ||
1286 | |||
1287 | /* create a new SMC connection (and a new link group if necessary) */ | ||
1288 | int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) | ||
1289 | { | ||
1290 | struct smc_connection *conn = &smc->conn; | ||
1291 | struct list_head *lgr_list; | ||
1292 | struct smc_link_group *lgr; | ||
1293 | enum smc_lgr_role role; | ||
1294 | spinlock_t *lgr_lock; | ||
1295 | int rc = 0; | ||
1296 | |||
1297 | lgr_list = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_list : | ||
1298 | &smc_lgr_list.list; | ||
1299 | lgr_lock = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_lock : | ||
1300 | &smc_lgr_list.lock; | ||
1301 | ini->first_contact_local = 1; | ||
1302 | role = smc->listen_smc ? SMC_SERV : SMC_CLNT; | ||
1303 | if (role == SMC_CLNT && ini->first_contact_peer) | ||
1304 | /* create new link group as well */ | ||
1305 | goto create; | ||
1306 | |||
1307 | /* determine if an existing link group can be reused */ | ||
1308 | spin_lock_bh(lgr_lock); | ||
1309 | list_for_each_entry(lgr, lgr_list, list) { | ||
1310 | write_lock_bh(&lgr->conns_lock); | ||
1311 | if ((ini->is_smcd ? | ||
1312 | smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected], | ||
1313 | ini->ism_peer_gid[ini->ism_selected]) : | ||
1314 | smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && | ||
1315 | !lgr->sync_err && | ||
1316 | (ini->smcd_version == SMC_V2 || | ||
1317 | lgr->vlan_id == ini->vlan_id) && | ||
1318 | (role == SMC_CLNT || ini->is_smcd || | ||
1319 | (lgr->conns_num < SMC_RMBS_PER_LGR_MAX && | ||
1320 | !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) { | ||
1321 | /* link group found */ | ||
1322 | ini->first_contact_local = 0; | ||
1323 | conn->lgr = lgr; | ||
1324 | rc = smc_lgr_register_conn(conn, false); | ||
1325 | write_unlock_bh(&lgr->conns_lock); | ||
1326 | if (!rc && delayed_work_pending(&lgr->free_work)) | ||
1327 | cancel_delayed_work(&lgr->free_work); | ||
1328 | break; | ||
1329 | } | ||
1330 | write_unlock_bh(&lgr->conns_lock); | ||
1331 | } | ||
1332 | spin_unlock_bh(lgr_lock); | ||
1333 | if (rc) | ||
1334 | return rc; | ||
1335 | |||
1336 | if (role == SMC_CLNT && !ini->first_contact_peer && | ||
1337 | ini->first_contact_local) { | ||
1338 | /* Server reuses a link group, but Client wants to start | ||
1339 | * a new one | ||
1340 | * send out_of_sync decline, reason synchr. error | ||
1341 | */ | ||
1342 | return SMC_CLC_DECL_SYNCERR; | ||
1343 | } | ||
1344 | |||
1345 | create: | ||
1346 | if (ini->first_contact_local) { | ||
1347 | rc = smc_lgr_create(smc, ini); | ||
1348 | if (rc) | ||
1349 | goto out; | ||
1350 | lgr = conn->lgr; | ||
1351 | write_lock_bh(&lgr->conns_lock); | ||
1352 | rc = smc_lgr_register_conn(conn, true); | ||
1353 | write_unlock_bh(&lgr->conns_lock); | ||
1354 | if (rc) | ||
1355 | goto out; | ||
1356 | } | ||
1357 | conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; | ||
1358 | conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; | ||
1359 | conn->urg_state = SMC_URG_READ; | ||
1360 | init_waitqueue_head(&conn->cdc_pend_tx_wq); | ||
1361 | INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work); | ||
1362 | if (ini->is_smcd) { | ||
1363 | conn->rx_off = sizeof(struct smcd_cdc_msg); | ||
1364 | smcd_cdc_rx_init(conn); /* init tasklet for this conn */ | ||
1365 | } else { | ||
1366 | conn->rx_off = 0; | ||
1367 | } | ||
1368 | #ifndef KERNEL_HAS_ATOMIC64 | ||
1369 | spin_lock_init(&conn->acurs_lock); | ||
1370 | #endif | ||
1371 | |||
1372 | out: | ||
1373 | return rc; | ||
1374 | } | ||
1375 | |||
1376 | /* convert the RMB size into the compressed notation - minimum 16K. | ||
1377 | * In contrast to plain ilog2, this rounds towards the next power of 2, | ||
1378 | * so the socket application gets at least its desired sndbuf / rcvbuf size. | ||
1379 | */ | ||
1380 | static u8 smc_compress_bufsize(int size) | ||
1381 | { | ||
1382 | u8 compressed; | ||
1383 | |||
1384 | if (size <= SMC_BUF_MIN_SIZE) | ||
1385 | return 0; | ||
1386 | |||
1387 | size = (size - 1) >> 14; | ||
1388 | compressed = ilog2(size) + 1; | ||
1389 | if (compressed >= SMC_RMBE_SIZES) | ||
1390 | compressed = SMC_RMBE_SIZES - 1; | ||
1391 | return compressed; | ||
1392 | } | ||
1393 | |||
1394 | /* convert the RMB size from compressed notation into integer */ | ||
1395 | int smc_uncompress_bufsize(u8 compressed) | ||
1396 | { | ||
1397 | u32 size; | ||
1398 | |||
1399 | size = 0x00000001 << (((int)compressed) + 14); | ||
1400 | return (int)size; | ||
1401 | } | ||
1402 | |||
1403 | /* try to reuse a sndbuf or rmb description slot for a certain | ||
1404 | * buffer size; if not available, return NULL | ||
1405 | */ | ||
1406 | static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, | ||
1407 | struct mutex *lock, | ||
1408 | struct list_head *buf_list) | ||
1409 | { | ||
1410 | struct smc_buf_desc *buf_slot; | ||
1411 | |||
1412 | mutex_lock(lock); | ||
1413 | list_for_each_entry(buf_slot, buf_list, list) { | ||
1414 | if (cmpxchg(&buf_slot->used, 0, 1) == 0) { | ||
1415 | mutex_unlock(lock); | ||
1416 | return buf_slot; | ||
1417 | } | ||
1418 | } | ||
1419 | mutex_unlock(lock); | ||
1420 | return NULL; | ||
1421 | } | ||
1422 | |||
1423 | /* one of the conditions for announcing a receiver's current window size is | ||
1424 | * that it "results in a minimum increase in the window size of 10% of the | ||
1425 | * receive buffer space" [RFC7609] | ||
1426 | */ | ||
1427 | static inline int smc_rmb_wnd_update_limit(int rmbe_size) | ||
1428 | { | ||
1429 | return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); | ||
1430 | } | ||
1431 | |||
1432 | /* map an rmb buf to a link */ | ||
1433 | static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb, | ||
1434 | struct smc_link *lnk) | ||
1435 | { | ||
1436 | int rc; | ||
1437 | |||
1438 | if (buf_desc->is_map_ib[lnk->link_idx]) | ||
1439 | return 0; | ||
1440 | |||
1441 | rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL); | ||
1442 | if (rc) | ||
1443 | return rc; | ||
1444 | sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl, | ||
1445 | buf_desc->cpu_addr, buf_desc->len); | ||
1446 | |||
1447 | /* map sg table to DMA address */ | ||
1448 | rc = smc_ib_buf_map_sg(lnk, buf_desc, | ||
1449 | is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); | ||
1450 | /* SMC protocol depends on mapping to one DMA address only */ | ||
1451 | if (rc != 1) { | ||
1452 | rc = -EAGAIN; | ||
1453 | goto free_table; | ||
1454 | } | ||
1455 | |||
1456 | /* create a new memory region for the RMB */ | ||
1457 | if (is_rmb) { | ||
1458 | rc = smc_ib_get_memory_region(lnk->roce_pd, | ||
1459 | IB_ACCESS_REMOTE_WRITE | | ||
1460 | IB_ACCESS_LOCAL_WRITE, | ||
1461 | buf_desc, lnk->link_idx); | ||
1462 | if (rc) | ||
1463 | goto buf_unmap; | ||
1464 | smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE); | ||
1465 | } | ||
1466 | buf_desc->is_map_ib[lnk->link_idx] = true; | ||
1467 | return 0; | ||
1468 | |||
1469 | buf_unmap: | ||
1470 | smc_ib_buf_unmap_sg(lnk, buf_desc, | ||
1471 | is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); | ||
1472 | free_table: | ||
1473 | sg_free_table(&buf_desc->sgt[lnk->link_idx]); | ||
1474 | return rc; | ||
1475 | } | ||
1476 | |||
1477 | /* register a new rmb on IB device, | ||
1478 | * must be called under lgr->llc_conf_mutex lock | ||
1479 | */ | ||
1480 | int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc) | ||
1481 | { | ||
1482 | if (list_empty(&link->lgr->list)) | ||
1483 | return -ENOLINK; | ||
1484 | if (!rmb_desc->is_reg_mr[link->link_idx]) { | ||
1485 | /* register memory region for new rmb */ | ||
1486 | if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) { | ||
1487 | rmb_desc->is_reg_err = true; | ||
1488 | return -EFAULT; | ||
1489 | } | ||
1490 | rmb_desc->is_reg_mr[link->link_idx] = true; | ||
1491 | } | ||
1492 | return 0; | ||
1493 | } | ||
1494 | |||
1495 | static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock, | ||
1496 | struct list_head *lst, bool is_rmb) | ||
1497 | { | ||
1498 | struct smc_buf_desc *buf_desc, *bf; | ||
1499 | int rc = 0; | ||
1500 | |||
1501 | mutex_lock(lock); | ||
1502 | list_for_each_entry_safe(buf_desc, bf, lst, list) { | ||
1503 | if (!buf_desc->used) | ||
1504 | continue; | ||
1505 | rc = smcr_buf_map_link(buf_desc, is_rmb, lnk); | ||
1506 | if (rc) | ||
1507 | goto out; | ||
1508 | } | ||
1509 | out: | ||
1510 | mutex_unlock(lock); | ||
1511 | return rc; | ||
1512 | } | ||
1513 | |||
1514 | /* map all used buffers of lgr for a new link */ | ||
1515 | int smcr_buf_map_lgr(struct smc_link *lnk) | ||
1516 | { | ||
1517 | struct smc_link_group *lgr = lnk->lgr; | ||
1518 | int i, rc = 0; | ||
1519 | |||
1520 | for (i = 0; i < SMC_RMBE_SIZES; i++) { | ||
1521 | rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock, | ||
1522 | &lgr->rmbs[i], true); | ||
1523 | if (rc) | ||
1524 | return rc; | ||
1525 | rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock, | ||
1526 | &lgr->sndbufs[i], false); | ||
1527 | if (rc) | ||
1528 | return rc; | ||
1529 | } | ||
1530 | return 0; | ||
1531 | } | ||
1532 | |||
1533 | /* register all used buffers of lgr for a new link, | ||
1534 | * must be called under lgr->llc_conf_mutex lock | ||
1535 | */ | ||
1536 | int smcr_buf_reg_lgr(struct smc_link *lnk) | ||
1537 | { | ||
1538 | struct smc_link_group *lgr = lnk->lgr; | ||
1539 | struct smc_buf_desc *buf_desc, *bf; | ||
1540 | int i, rc = 0; | ||
1541 | |||
1542 | mutex_lock(&lgr->rmbs_lock); | ||
1543 | for (i = 0; i < SMC_RMBE_SIZES; i++) { | ||
1544 | list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) { | ||
1545 | if (!buf_desc->used) | ||
1546 | continue; | ||
1547 | rc = smcr_link_reg_rmb(lnk, buf_desc); | ||
1548 | if (rc) | ||
1549 | goto out; | ||
1550 | } | ||
1551 | } | ||
1552 | out: | ||
1553 | mutex_unlock(&lgr->rmbs_lock); | ||
1554 | return rc; | ||
1555 | } | ||
1556 | |||
1557 | static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, | ||
1558 | bool is_rmb, int bufsize) | ||
1559 | { | ||
1560 | struct smc_buf_desc *buf_desc; | ||
1561 | |||
1562 | /* try to alloc a new buffer */ | ||
1563 | buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); | ||
1564 | if (!buf_desc) | ||
1565 | return ERR_PTR(-ENOMEM); | ||
1566 | |||
1567 | buf_desc->order = get_order(bufsize); | ||
1568 | buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | | ||
1569 | __GFP_NOMEMALLOC | __GFP_COMP | | ||
1570 | __GFP_NORETRY | __GFP_ZERO, | ||
1571 | buf_desc->order); | ||
1572 | if (!buf_desc->pages) { | ||
1573 | kfree(buf_desc); | ||
1574 | return ERR_PTR(-EAGAIN); | ||
1575 | } | ||
1576 | buf_desc->cpu_addr = (void *)page_address(buf_desc->pages); | ||
1577 | buf_desc->len = bufsize; | ||
1578 | return buf_desc; | ||
1579 | } | ||
1580 | |||
1581 | /* map buf_desc on all usable links, | ||
1582 | * unused buffers stay mapped as long as the link is up | ||
1583 | */ | ||
1584 | static int smcr_buf_map_usable_links(struct smc_link_group *lgr, | ||
1585 | struct smc_buf_desc *buf_desc, bool is_rmb) | ||
1586 | { | ||
1587 | int i, rc = 0, cnt = 0; | ||
1588 | |||
1589 | /* protect against parallel link reconfiguration */ | ||
1590 | mutex_lock(&lgr->llc_conf_mutex); | ||
1591 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { | ||
1592 | struct smc_link *lnk = &lgr->lnk[i]; | ||
1593 | |||
1594 | if (!smc_link_usable(lnk)) | ||
1595 | continue; | ||
1596 | if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) { | ||
1597 | rc = -ENOMEM; | ||
1598 | goto out; | ||
1599 | } | ||
1600 | cnt++; | ||
1601 | } | ||
1602 | out: | ||
1603 | mutex_unlock(&lgr->llc_conf_mutex); | ||
1604 | if (!rc && !cnt) | ||
1605 | rc = -EINVAL; | ||
1606 | return rc; | ||
1607 | } | ||
1608 | |||
1609 | #define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ | ||
1610 | |||
1611 | static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, | ||
1612 | bool is_dmb, int bufsize) | ||
1613 | { | ||
1614 | struct smc_buf_desc *buf_desc; | ||
1615 | int rc; | ||
1616 | |||
1617 | if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES) | ||
1618 | return ERR_PTR(-EAGAIN); | ||
1619 | |||
1620 | /* try to alloc a new DMB */ | ||
1621 | buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); | ||
1622 | if (!buf_desc) | ||
1623 | return ERR_PTR(-ENOMEM); | ||
1624 | if (is_dmb) { | ||
1625 | rc = smc_ism_register_dmb(lgr, bufsize, buf_desc); | ||
1626 | if (rc) { | ||
1627 | kfree(buf_desc); | ||
1628 | if (rc == -ENOMEM) | ||
1629 | return ERR_PTR(-EAGAIN); | ||
1630 | if (rc == -ENOSPC) | ||
1631 | return ERR_PTR(-ENOSPC); | ||
1632 | return ERR_PTR(-EIO); | ||
1633 | } | ||
1634 | buf_desc->pages = virt_to_page(buf_desc->cpu_addr); | ||
1635 | /* CDC header stored in buf. So, pretend it was smaller */ | ||
1636 | buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg); | ||
1637 | } else { | ||
1638 | buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL | | ||
1639 | __GFP_NOWARN | __GFP_NORETRY | | ||
1640 | __GFP_NOMEMALLOC); | ||
1641 | if (!buf_desc->cpu_addr) { | ||
1642 | kfree(buf_desc); | ||
1643 | return ERR_PTR(-EAGAIN); | ||
1644 | } | ||
1645 | buf_desc->len = bufsize; | ||
1646 | } | ||
1647 | return buf_desc; | ||
1648 | } | ||
1649 | |||
1650 | static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) | ||
1651 | { | ||
1652 | struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); | ||
1653 | struct smc_connection *conn = &smc->conn; | ||
1654 | struct smc_link_group *lgr = conn->lgr; | ||
1655 | struct list_head *buf_list; | ||
1656 | int bufsize, bufsize_short; | ||
1657 | struct mutex *lock; /* lock buffer list */ | ||
1658 | int sk_buf_size; | ||
1659 | |||
1660 | if (is_rmb) | ||
1661 | /* use socket recv buffer size (w/o overhead) as start value */ | ||
1662 | sk_buf_size = smc->sk.sk_rcvbuf / 2; | ||
1663 | else | ||
1664 | /* use socket send buffer size (w/o overhead) as start value */ | ||
1665 | sk_buf_size = smc->sk.sk_sndbuf / 2; | ||
1666 | |||
1667 | for (bufsize_short = smc_compress_bufsize(sk_buf_size); | ||
1668 | bufsize_short >= 0; bufsize_short--) { | ||
1669 | |||
1670 | if (is_rmb) { | ||
1671 | lock = &lgr->rmbs_lock; | ||
1672 | buf_list = &lgr->rmbs[bufsize_short]; | ||
1673 | } else { | ||
1674 | lock = &lgr->sndbufs_lock; | ||
1675 | buf_list = &lgr->sndbufs[bufsize_short]; | ||
1676 | } | ||
1677 | bufsize = smc_uncompress_bufsize(bufsize_short); | ||
1678 | if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) | ||
1679 | continue; | ||
1680 | |||
1681 | /* check for reusable slot in the link group */ | ||
1682 | buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list); | ||
1683 | if (buf_desc) { | ||
1684 | memset(buf_desc->cpu_addr, 0, bufsize); | ||
1685 | break; /* found reusable slot */ | ||
1686 | } | ||
1687 | |||
1688 | if (is_smcd) | ||
1689 | buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize); | ||
1690 | else | ||
1691 | buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize); | ||
1692 | |||
1693 | if (PTR_ERR(buf_desc) == -ENOMEM) | ||
1694 | break; | ||
1695 | if (IS_ERR(buf_desc)) | ||
1696 | continue; | ||
1697 | |||
1698 | buf_desc->used = 1; | ||
1699 | mutex_lock(lock); | ||
1700 | list_add(&buf_desc->list, buf_list); | ||
1701 | mutex_unlock(lock); | ||
1702 | break; /* found */ | ||
1703 | } | ||
1704 | |||
1705 | if (IS_ERR(buf_desc)) | ||
1706 | return PTR_ERR(buf_desc); | ||
1707 | |||
1708 | if (!is_smcd) { | ||
1709 | if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) { | ||
1710 | smcr_buf_unuse(buf_desc, lgr); | ||
1711 | return -ENOMEM; | ||
1712 | } | ||
1713 | } | ||
1714 | |||
1715 | if (is_rmb) { | ||
1716 | conn->rmb_desc = buf_desc; | ||
1717 | conn->rmbe_size_short = bufsize_short; | ||
1718 | smc->sk.sk_rcvbuf = bufsize * 2; | ||
1719 | atomic_set(&conn->bytes_to_rcv, 0); | ||
1720 | conn->rmbe_update_limit = | ||
1721 | smc_rmb_wnd_update_limit(buf_desc->len); | ||
1722 | if (is_smcd) | ||
1723 | smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ | ||
1724 | } else { | ||
1725 | conn->sndbuf_desc = buf_desc; | ||
1726 | smc->sk.sk_sndbuf = bufsize * 2; | ||
1727 | atomic_set(&conn->sndbuf_space, bufsize); | ||
1728 | } | ||
1729 | return 0; | ||
1730 | } | ||
1731 | |||
1732 | void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) | ||
1733 | { | ||
1734 | if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk)) | ||
1735 | return; | ||
1736 | smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); | ||
1737 | } | ||
1738 | |||
1739 | void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) | ||
1740 | { | ||
1741 | if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk)) | ||
1742 | return; | ||
1743 | smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); | ||
1744 | } | ||
1745 | |||
1746 | void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) | ||
1747 | { | ||
1748 | int i; | ||
1749 | |||
1750 | if (!conn->lgr || conn->lgr->is_smcd) | ||
1751 | return; | ||
1752 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { | ||
1753 | if (!smc_link_active(&conn->lgr->lnk[i])) | ||
1754 | continue; | ||
1755 | smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc, | ||
1756 | DMA_FROM_DEVICE); | ||
1757 | } | ||
1758 | } | ||
1759 | |||
1760 | void smc_rmb_sync_sg_for_device(struct smc_connection *conn) | ||
1761 | { | ||
1762 | int i; | ||
1763 | |||
1764 | if (!conn->lgr || conn->lgr->is_smcd) | ||
1765 | return; | ||
1766 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { | ||
1767 | if (!smc_link_active(&conn->lgr->lnk[i])) | ||
1768 | continue; | ||
1769 | smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc, | ||
1770 | DMA_FROM_DEVICE); | ||
1771 | } | ||
1772 | } | ||
1773 | |||
1774 | /* create the send and receive buffer for an SMC socket; | ||
1775 | * receive buffers are called RMBs; | ||
1776 | * (even though the SMC protocol allows more than one RMB-element per RMB, | ||
1777 | * the Linux implementation uses just one RMB-element per RMB, i.e. uses an | ||
1778 | * extra RMB for every connection in a link group | ||
1779 | */ | ||
1780 | int smc_buf_create(struct smc_sock *smc, bool is_smcd) | ||
1781 | { | ||
1782 | int rc; | ||
1783 | |||
1784 | /* create send buffer */ | ||
1785 | rc = __smc_buf_create(smc, is_smcd, false); | ||
1786 | if (rc) | ||
1787 | return rc; | ||
1788 | /* create rmb */ | ||
1789 | rc = __smc_buf_create(smc, is_smcd, true); | ||
1790 | if (rc) { | ||
1791 | mutex_lock(&smc->conn.lgr->sndbufs_lock); | ||
1792 | list_del(&smc->conn.sndbuf_desc->list); | ||
1793 | mutex_unlock(&smc->conn.lgr->sndbufs_lock); | ||
1794 | smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); | ||
1795 | smc->conn.sndbuf_desc = NULL; | ||
1796 | } | ||
1797 | return rc; | ||
1798 | } | ||
1799 | |||
1800 | static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) | ||
1801 | { | ||
1802 | int i; | ||
1803 | |||
1804 | for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) { | ||
1805 | if (!test_and_set_bit(i, lgr->rtokens_used_mask)) | ||
1806 | return i; | ||
1807 | } | ||
1808 | return -ENOSPC; | ||
1809 | } | ||
1810 | |||
1811 | static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx, | ||
1812 | u32 rkey) | ||
1813 | { | ||
1814 | int i; | ||
1815 | |||
1816 | for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { | ||
1817 | if (test_bit(i, lgr->rtokens_used_mask) && | ||
1818 | lgr->rtokens[i][lnk_idx].rkey == rkey) | ||
1819 | return i; | ||
1820 | } | ||
1821 | return -ENOENT; | ||
1822 | } | ||
1823 | |||
1824 | /* set rtoken for a new link to an existing rmb */ | ||
1825 | void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new, | ||
1826 | __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey) | ||
1827 | { | ||
1828 | int rtok_idx; | ||
1829 | |||
1830 | rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known)); | ||
1831 | if (rtok_idx == -ENOENT) | ||
1832 | return; | ||
1833 | lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey); | ||
1834 | lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr); | ||
1835 | } | ||
1836 | |||
1837 | /* set rtoken for a new link whose link_id is given */ | ||
1838 | void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id, | ||
1839 | __be64 nw_vaddr, __be32 nw_rkey) | ||
1840 | { | ||
1841 | u64 dma_addr = be64_to_cpu(nw_vaddr); | ||
1842 | u32 rkey = ntohl(nw_rkey); | ||
1843 | bool found = false; | ||
1844 | int link_idx; | ||
1845 | |||
1846 | for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) { | ||
1847 | if (lgr->lnk[link_idx].link_id == link_id) { | ||
1848 | found = true; | ||
1849 | break; | ||
1850 | } | ||
1851 | } | ||
1852 | if (!found) | ||
1853 | return; | ||
1854 | lgr->rtokens[rtok_idx][link_idx].rkey = rkey; | ||
1855 | lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr; | ||
1856 | } | ||
1857 | |||
1858 | /* add a new rtoken from peer */ | ||
1859 | int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey) | ||
1860 | { | ||
1861 | struct smc_link_group *lgr = smc_get_lgr(lnk); | ||
1862 | u64 dma_addr = be64_to_cpu(nw_vaddr); | ||
1863 | u32 rkey = ntohl(nw_rkey); | ||
1864 | int i; | ||
1865 | |||
1866 | for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { | ||
1867 | if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && | ||
1868 | lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr && | ||
1869 | test_bit(i, lgr->rtokens_used_mask)) { | ||
1870 | /* already in list */ | ||
1871 | return i; | ||
1872 | } | ||
1873 | } | ||
1874 | i = smc_rmb_reserve_rtoken_idx(lgr); | ||
1875 | if (i < 0) | ||
1876 | return i; | ||
1877 | lgr->rtokens[i][lnk->link_idx].rkey = rkey; | ||
1878 | lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr; | ||
1879 | return i; | ||
1880 | } | ||
1881 | |||
1882 | /* delete an rtoken from all links */ | ||
1883 | int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey) | ||
1884 | { | ||
1885 | struct smc_link_group *lgr = smc_get_lgr(lnk); | ||
1886 | u32 rkey = ntohl(nw_rkey); | ||
1887 | int i, j; | ||
1888 | |||
1889 | for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { | ||
1890 | if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && | ||
1891 | test_bit(i, lgr->rtokens_used_mask)) { | ||
1892 | for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) { | ||
1893 | lgr->rtokens[i][j].rkey = 0; | ||
1894 | lgr->rtokens[i][j].dma_addr = 0; | ||
1895 | } | ||
1896 | clear_bit(i, lgr->rtokens_used_mask); | ||
1897 | return 0; | ||
1898 | } | ||
1899 | } | ||
1900 | return -ENOENT; | ||
1901 | } | ||
1902 | |||
1903 | /* save rkey and dma_addr received from peer during clc handshake */ | ||
1904 | int smc_rmb_rtoken_handling(struct smc_connection *conn, | ||
1905 | struct smc_link *lnk, | ||
1906 | struct smc_clc_msg_accept_confirm *clc) | ||
1907 | { | ||
1908 | conn->rtoken_idx = smc_rtoken_add(lnk, clc->r0.rmb_dma_addr, | ||
1909 | clc->r0.rmb_rkey); | ||
1910 | if (conn->rtoken_idx < 0) | ||
1911 | return conn->rtoken_idx; | ||
1912 | return 0; | ||
1913 | } | ||
1914 | |||
1915 | static void smc_core_going_away(void) | ||
1916 | { | ||
1917 | struct smc_ib_device *smcibdev; | ||
1918 | struct smcd_dev *smcd; | ||
1919 | |||
1920 | mutex_lock(&smc_ib_devices.mutex); | ||
1921 | list_for_each_entry(smcibdev, &smc_ib_devices.list, list) { | ||
1922 | int i; | ||
1923 | |||
1924 | for (i = 0; i < SMC_MAX_PORTS; i++) | ||
1925 | set_bit(i, smcibdev->ports_going_away); | ||
1926 | } | ||
1927 | mutex_unlock(&smc_ib_devices.mutex); | ||
1928 | |||
1929 | mutex_lock(&smcd_dev_list.mutex); | ||
1930 | list_for_each_entry(smcd, &smcd_dev_list.list, list) { | ||
1931 | smcd->going_away = 1; | ||
1932 | } | ||
1933 | mutex_unlock(&smcd_dev_list.mutex); | ||
1934 | } | ||
1935 | |||
1936 | /* Clean up all SMC link groups */ | ||
1937 | static void smc_lgrs_shutdown(void) | ||
1938 | { | ||
1939 | struct smcd_dev *smcd; | ||
1940 | |||
1941 | smc_core_going_away(); | ||
1942 | |||
1943 | smc_smcr_terminate_all(NULL); | ||
1944 | |||
1945 | mutex_lock(&smcd_dev_list.mutex); | ||
1946 | list_for_each_entry(smcd, &smcd_dev_list.list, list) | ||
1947 | smc_smcd_terminate_all(smcd); | ||
1948 | mutex_unlock(&smcd_dev_list.mutex); | ||
1949 | } | ||
1950 | |||
1951 | static int smc_core_reboot_event(struct notifier_block *this, | ||
1952 | unsigned long event, void *ptr) | ||
1953 | { | ||
1954 | smc_lgrs_shutdown(); | ||
1955 | smc_ib_unregister_client(); | ||
1956 | return 0; | ||
1957 | } | ||
1958 | |||
1959 | static struct notifier_block smc_reboot_notifier = { | ||
1960 | .notifier_call = smc_core_reboot_event, | ||
1961 | }; | ||
1962 | |||
1963 | int __init smc_core_init(void) | ||
1964 | { | ||
1965 | return register_reboot_notifier(&smc_reboot_notifier); | ||
1966 | } | ||
1967 | |||
1968 | /* Called (from smc_exit) when module is removed */ | ||
1969 | void smc_core_exit(void) | ||
1970 | { | ||
1971 | unregister_reboot_notifier(&smc_reboot_notifier); | ||
1972 | smc_lgrs_shutdown(); | ||
1973 | } | ||
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h new file mode 100644 index 000000000..9364d0f35 --- /dev/null +++ b/net/smc/smc_core.h | |||
@@ -0,0 +1,425 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * Definitions for SMC Connections, Link Groups and Links | ||
6 | * | ||
7 | * Copyright IBM Corp. 2016 | ||
8 | * | ||
9 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> | ||
10 | */ | ||
11 | |||
12 | #ifndef _SMC_CORE_H | ||
13 | #define _SMC_CORE_H | ||
14 | |||
15 | #include <linux/atomic.h> | ||
16 | #include <rdma/ib_verbs.h> | ||
17 | |||
18 | #include "smc.h" | ||
19 | #include "smc_ib.h" | ||
20 | |||
21 | #define SMC_RMBS_PER_LGR_MAX 255 /* max. # of RMBs per link group */ | ||
22 | |||
23 | struct smc_lgr_list { /* list of link group definition */ | ||
24 | struct list_head list; | ||
25 | spinlock_t lock; /* protects list of link groups */ | ||
26 | u32 num; /* unique link group number */ | ||
27 | }; | ||
28 | |||
29 | enum smc_lgr_role { /* possible roles of a link group */ | ||
30 | SMC_CLNT, /* client */ | ||
31 | SMC_SERV /* server */ | ||
32 | }; | ||
33 | |||
34 | enum smc_link_state { /* possible states of a link */ | ||
35 | SMC_LNK_UNUSED, /* link is unused */ | ||
36 | SMC_LNK_INACTIVE, /* link is inactive */ | ||
37 | SMC_LNK_ACTIVATING, /* link is being activated */ | ||
38 | SMC_LNK_ACTIVE, /* link is active */ | ||
39 | }; | ||
40 | |||
41 | #define SMC_WR_BUF_SIZE 48 /* size of work request buffer */ | ||
42 | |||
43 | struct smc_wr_buf { | ||
44 | u8 raw[SMC_WR_BUF_SIZE]; | ||
45 | }; | ||
46 | |||
47 | #define SMC_WR_REG_MR_WAIT_TIME (5 * HZ)/* wait time for ib_wr_reg_mr result */ | ||
48 | |||
49 | enum smc_wr_reg_state { | ||
50 | POSTED, /* ib_wr_reg_mr request posted */ | ||
51 | CONFIRMED, /* ib_wr_reg_mr response: successful */ | ||
52 | FAILED /* ib_wr_reg_mr response: failure */ | ||
53 | }; | ||
54 | |||
55 | struct smc_rdma_sge { /* sges for RDMA writes */ | ||
56 | struct ib_sge wr_tx_rdma_sge[SMC_IB_MAX_SEND_SGE]; | ||
57 | }; | ||
58 | |||
59 | #define SMC_MAX_RDMA_WRITES 2 /* max. # of RDMA writes per | ||
60 | * message send | ||
61 | */ | ||
62 | |||
63 | struct smc_rdma_sges { /* sges per message send */ | ||
64 | struct smc_rdma_sge tx_rdma_sge[SMC_MAX_RDMA_WRITES]; | ||
65 | }; | ||
66 | |||
67 | struct smc_rdma_wr { /* work requests per message | ||
68 | * send | ||
69 | */ | ||
70 | struct ib_rdma_wr wr_tx_rdma[SMC_MAX_RDMA_WRITES]; | ||
71 | }; | ||
72 | |||
73 | #define SMC_LGR_ID_SIZE 4 | ||
74 | |||
75 | struct smc_link { | ||
76 | struct smc_ib_device *smcibdev; /* ib-device */ | ||
77 | u8 ibport; /* port - values 1 | 2 */ | ||
78 | struct ib_pd *roce_pd; /* IB protection domain, | ||
79 | * unique for every RoCE QP | ||
80 | */ | ||
81 | struct ib_qp *roce_qp; /* IB queue pair */ | ||
82 | struct ib_qp_attr qp_attr; /* IB queue pair attributes */ | ||
83 | |||
84 | struct smc_wr_buf *wr_tx_bufs; /* WR send payload buffers */ | ||
85 | struct ib_send_wr *wr_tx_ibs; /* WR send meta data */ | ||
86 | struct ib_sge *wr_tx_sges; /* WR send gather meta data */ | ||
87 | struct smc_rdma_sges *wr_tx_rdma_sges;/*RDMA WRITE gather meta data*/ | ||
88 | struct smc_rdma_wr *wr_tx_rdmas; /* WR RDMA WRITE */ | ||
89 | struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */ | ||
90 | struct completion *wr_tx_compl; /* WR send CQE completion */ | ||
91 | /* above four vectors have wr_tx_cnt elements and use the same index */ | ||
92 | dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */ | ||
93 | atomic_long_t wr_tx_id; /* seq # of last sent WR */ | ||
94 | unsigned long *wr_tx_mask; /* bit mask of used indexes */ | ||
95 | u32 wr_tx_cnt; /* number of WR send buffers */ | ||
96 | wait_queue_head_t wr_tx_wait; /* wait for free WR send buf */ | ||
97 | atomic_t wr_tx_refcnt; /* tx refs to link */ | ||
98 | |||
99 | struct smc_wr_buf *wr_rx_bufs; /* WR recv payload buffers */ | ||
100 | struct ib_recv_wr *wr_rx_ibs; /* WR recv meta data */ | ||
101 | struct ib_sge *wr_rx_sges; /* WR recv scatter meta data */ | ||
102 | /* above three vectors have wr_rx_cnt elements and use the same index */ | ||
103 | dma_addr_t wr_rx_dma_addr; /* DMA address of wr_rx_bufs */ | ||
104 | u64 wr_rx_id; /* seq # of last recv WR */ | ||
105 | u32 wr_rx_cnt; /* number of WR recv buffers */ | ||
106 | unsigned long wr_rx_tstamp; /* jiffies when last buf rx */ | ||
107 | |||
108 | struct ib_reg_wr wr_reg; /* WR register memory region */ | ||
109 | wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */ | ||
110 | atomic_t wr_reg_refcnt; /* reg refs to link */ | ||
111 | enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */ | ||
112 | |||
113 | u8 gid[SMC_GID_SIZE];/* gid matching used vlan id*/ | ||
114 | u8 sgid_index; /* gid index for vlan id */ | ||
115 | u32 peer_qpn; /* QP number of peer */ | ||
116 | enum ib_mtu path_mtu; /* used mtu */ | ||
117 | enum ib_mtu peer_mtu; /* mtu size of peer */ | ||
118 | u32 psn_initial; /* QP tx initial packet seqno */ | ||
119 | u32 peer_psn; /* QP rx initial packet seqno */ | ||
120 | u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */ | ||
121 | u8 peer_gid[SMC_GID_SIZE]; /* gid of peer*/ | ||
122 | u8 link_id; /* unique # within link group */ | ||
123 | u8 link_uid[SMC_LGR_ID_SIZE]; /* unique lnk id */ | ||
124 | u8 peer_link_uid[SMC_LGR_ID_SIZE]; /* peer uid */ | ||
125 | u8 link_idx; /* index in lgr link array */ | ||
126 | u8 link_is_asym; /* is link asymmetric? */ | ||
127 | struct smc_link_group *lgr; /* parent link group */ | ||
128 | struct work_struct link_down_wrk; /* wrk to bring link down */ | ||
129 | |||
130 | enum smc_link_state state; /* state of link */ | ||
131 | struct delayed_work llc_testlink_wrk; /* testlink worker */ | ||
132 | struct completion llc_testlink_resp; /* wait for rx of testlink */ | ||
133 | int llc_testlink_time; /* testlink interval */ | ||
134 | }; | ||
135 | |||
136 | /* For now we just allow one parallel link per link group. The SMC protocol | ||
137 | * allows more (up to 8). | ||
138 | */ | ||
139 | #define SMC_LINKS_PER_LGR_MAX 3 | ||
140 | #define SMC_SINGLE_LINK 0 | ||
141 | |||
142 | /* tx/rx buffer list element for sndbufs list and rmbs list of a lgr */ | ||
143 | struct smc_buf_desc { | ||
144 | struct list_head list; | ||
145 | void *cpu_addr; /* virtual address of buffer */ | ||
146 | struct page *pages; | ||
147 | int len; /* length of buffer */ | ||
148 | u32 used; /* currently used / unused */ | ||
149 | union { | ||
150 | struct { /* SMC-R */ | ||
151 | struct sg_table sgt[SMC_LINKS_PER_LGR_MAX]; | ||
152 | /* virtual buffer */ | ||
153 | struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX]; | ||
154 | /* for rmb only: memory region | ||
155 | * incl. rkey provided to peer | ||
156 | */ | ||
157 | u32 order; /* allocation order */ | ||
158 | |||
159 | u8 is_conf_rkey; | ||
160 | /* confirm_rkey done */ | ||
161 | u8 is_reg_mr[SMC_LINKS_PER_LGR_MAX]; | ||
162 | /* mem region registered */ | ||
163 | u8 is_map_ib[SMC_LINKS_PER_LGR_MAX]; | ||
164 | /* mem region mapped to lnk */ | ||
165 | u8 is_reg_err; | ||
166 | /* buffer registration err */ | ||
167 | }; | ||
168 | struct { /* SMC-D */ | ||
169 | unsigned short sba_idx; | ||
170 | /* SBA index number */ | ||
171 | u64 token; | ||
172 | /* DMB token number */ | ||
173 | dma_addr_t dma_addr; | ||
174 | /* DMA address */ | ||
175 | }; | ||
176 | }; | ||
177 | }; | ||
178 | |||
179 | struct smc_rtoken { /* address/key of remote RMB */ | ||
180 | u64 dma_addr; | ||
181 | u32 rkey; | ||
182 | }; | ||
183 | |||
184 | #define SMC_BUF_MIN_SIZE 16384 /* minimum size of an RMB */ | ||
185 | #define SMC_RMBE_SIZES 16 /* number of distinct RMBE sizes */ | ||
186 | /* theoretically, the RFC states that largest size would be 512K, | ||
187 | * i.e. compressed 5 and thus 6 sizes (0..5), despite | ||
188 | * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15) | ||
189 | */ | ||
190 | |||
191 | struct smcd_dev; | ||
192 | |||
193 | enum smc_lgr_type { /* redundancy state of lgr */ | ||
194 | SMC_LGR_NONE, /* no active links, lgr to be deleted */ | ||
195 | SMC_LGR_SINGLE, /* 1 active RNIC on each peer */ | ||
196 | SMC_LGR_SYMMETRIC, /* 2 active RNICs on each peer */ | ||
197 | SMC_LGR_ASYMMETRIC_PEER, /* local has 2, peer 1 active RNICs */ | ||
198 | SMC_LGR_ASYMMETRIC_LOCAL, /* local has 1, peer 2 active RNICs */ | ||
199 | }; | ||
200 | |||
201 | enum smc_llc_flowtype { | ||
202 | SMC_LLC_FLOW_NONE = 0, | ||
203 | SMC_LLC_FLOW_ADD_LINK = 2, | ||
204 | SMC_LLC_FLOW_DEL_LINK = 4, | ||
205 | SMC_LLC_FLOW_RKEY = 6, | ||
206 | }; | ||
207 | |||
208 | struct smc_llc_qentry; | ||
209 | |||
210 | struct smc_llc_flow { | ||
211 | enum smc_llc_flowtype type; | ||
212 | struct smc_llc_qentry *qentry; | ||
213 | }; | ||
214 | |||
215 | struct smc_link_group { | ||
216 | struct list_head list; | ||
217 | struct rb_root conns_all; /* connection tree */ | ||
218 | rwlock_t conns_lock; /* protects conns_all */ | ||
219 | unsigned int conns_num; /* current # of connections */ | ||
220 | unsigned short vlan_id; /* vlan id of link group */ | ||
221 | |||
222 | struct list_head sndbufs[SMC_RMBE_SIZES];/* tx buffers */ | ||
223 | struct mutex sndbufs_lock; /* protects tx buffers */ | ||
224 | struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */ | ||
225 | struct mutex rmbs_lock; /* protects rx buffers */ | ||
226 | |||
227 | u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */ | ||
228 | struct delayed_work free_work; /* delayed freeing of an lgr */ | ||
229 | struct work_struct terminate_work; /* abnormal lgr termination */ | ||
230 | struct workqueue_struct *tx_wq; /* wq for conn. tx workers */ | ||
231 | u8 sync_err : 1; /* lgr no longer fits to peer */ | ||
232 | u8 terminating : 1;/* lgr is terminating */ | ||
233 | u8 freeing : 1; /* lgr is being freed */ | ||
234 | |||
235 | bool is_smcd; /* SMC-R or SMC-D */ | ||
236 | u8 smc_version; | ||
237 | u8 negotiated_eid[SMC_MAX_EID_LEN]; | ||
238 | u8 peer_os; /* peer operating system */ | ||
239 | u8 peer_smc_release; | ||
240 | u8 peer_hostname[SMC_MAX_HOSTNAME_LEN]; | ||
241 | union { | ||
242 | struct { /* SMC-R */ | ||
243 | enum smc_lgr_role role; | ||
244 | /* client or server */ | ||
245 | struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; | ||
246 | /* smc link */ | ||
247 | char peer_systemid[SMC_SYSTEMID_LEN]; | ||
248 | /* unique system_id of peer */ | ||
249 | struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX] | ||
250 | [SMC_LINKS_PER_LGR_MAX]; | ||
251 | /* remote addr/key pairs */ | ||
252 | DECLARE_BITMAP(rtokens_used_mask, SMC_RMBS_PER_LGR_MAX); | ||
253 | /* used rtoken elements */ | ||
254 | u8 next_link_id; | ||
255 | enum smc_lgr_type type; | ||
256 | /* redundancy state */ | ||
257 | u8 pnet_id[SMC_MAX_PNETID_LEN + 1]; | ||
258 | /* pnet id of this lgr */ | ||
259 | struct list_head llc_event_q; | ||
260 | /* queue for llc events */ | ||
261 | spinlock_t llc_event_q_lock; | ||
262 | /* protects llc_event_q */ | ||
263 | struct mutex llc_conf_mutex; | ||
264 | /* protects lgr reconfig. */ | ||
265 | struct work_struct llc_add_link_work; | ||
266 | struct work_struct llc_del_link_work; | ||
267 | struct work_struct llc_event_work; | ||
268 | /* llc event worker */ | ||
269 | wait_queue_head_t llc_flow_waiter; | ||
270 | /* w4 next llc event */ | ||
271 | wait_queue_head_t llc_msg_waiter; | ||
272 | /* w4 next llc msg */ | ||
273 | struct smc_llc_flow llc_flow_lcl; | ||
274 | /* llc local control field */ | ||
275 | struct smc_llc_flow llc_flow_rmt; | ||
276 | /* llc remote control field */ | ||
277 | struct smc_llc_qentry *delayed_event; | ||
278 | /* arrived when flow active */ | ||
279 | spinlock_t llc_flow_lock; | ||
280 | /* protects llc flow */ | ||
281 | int llc_testlink_time; | ||
282 | /* link keep alive time */ | ||
283 | u32 llc_termination_rsn; | ||
284 | /* rsn code for termination */ | ||
285 | }; | ||
286 | struct { /* SMC-D */ | ||
287 | u64 peer_gid; | ||
288 | /* Peer GID (remote) */ | ||
289 | struct smcd_dev *smcd; | ||
290 | /* ISM device for VLAN reg. */ | ||
291 | u8 peer_shutdown : 1; | ||
292 | /* peer triggered shutdownn */ | ||
293 | }; | ||
294 | }; | ||
295 | }; | ||
296 | |||
297 | struct smc_clc_msg_local; | ||
298 | |||
299 | struct smc_init_info { | ||
300 | u8 is_smcd; | ||
301 | u8 smc_type_v1; | ||
302 | u8 smc_type_v2; | ||
303 | u8 first_contact_peer; | ||
304 | u8 first_contact_local; | ||
305 | unsigned short vlan_id; | ||
306 | /* SMC-R */ | ||
307 | struct smc_clc_msg_local *ib_lcl; | ||
308 | struct smc_ib_device *ib_dev; | ||
309 | u8 ib_gid[SMC_GID_SIZE]; | ||
310 | u8 ib_port; | ||
311 | u32 ib_clcqpn; | ||
312 | /* SMC-D */ | ||
313 | u64 ism_peer_gid[SMC_MAX_ISM_DEVS + 1]; | ||
314 | struct smcd_dev *ism_dev[SMC_MAX_ISM_DEVS + 1]; | ||
315 | u16 ism_chid[SMC_MAX_ISM_DEVS + 1]; | ||
316 | u8 ism_offered_cnt; /* # of ISM devices offered */ | ||
317 | u8 ism_selected; /* index of selected ISM dev*/ | ||
318 | u8 smcd_version; | ||
319 | }; | ||
320 | |||
321 | /* Find the connection associated with the given alert token in the link group. | ||
322 | * To use rbtrees we have to implement our own search core. | ||
323 | * Requires @conns_lock | ||
324 | * @token alert token to search for | ||
325 | * @lgr link group to search in | ||
326 | * Returns connection associated with token if found, NULL otherwise. | ||
327 | */ | ||
328 | static inline struct smc_connection *smc_lgr_find_conn( | ||
329 | u32 token, struct smc_link_group *lgr) | ||
330 | { | ||
331 | struct smc_connection *res = NULL; | ||
332 | struct rb_node *node; | ||
333 | |||
334 | node = lgr->conns_all.rb_node; | ||
335 | while (node) { | ||
336 | struct smc_connection *cur = rb_entry(node, | ||
337 | struct smc_connection, alert_node); | ||
338 | |||
339 | if (cur->alert_token_local > token) { | ||
340 | node = node->rb_left; | ||
341 | } else { | ||
342 | if (cur->alert_token_local < token) { | ||
343 | node = node->rb_right; | ||
344 | } else { | ||
345 | res = cur; | ||
346 | break; | ||
347 | } | ||
348 | } | ||
349 | } | ||
350 | |||
351 | return res; | ||
352 | } | ||
353 | |||
354 | /* returns true if the specified link is usable */ | ||
355 | static inline bool smc_link_usable(struct smc_link *lnk) | ||
356 | { | ||
357 | if (lnk->state == SMC_LNK_UNUSED || lnk->state == SMC_LNK_INACTIVE) | ||
358 | return false; | ||
359 | return true; | ||
360 | } | ||
361 | |||
362 | static inline bool smc_link_sendable(struct smc_link *lnk) | ||
363 | { | ||
364 | return smc_link_usable(lnk) && | ||
365 | lnk->qp_attr.cur_qp_state == IB_QPS_RTS; | ||
366 | } | ||
367 | |||
368 | static inline bool smc_link_active(struct smc_link *lnk) | ||
369 | { | ||
370 | return lnk->state == SMC_LNK_ACTIVE; | ||
371 | } | ||
372 | |||
373 | struct smc_sock; | ||
374 | struct smc_clc_msg_accept_confirm; | ||
375 | struct smc_clc_msg_local; | ||
376 | |||
377 | void smc_lgr_cleanup_early(struct smc_connection *conn); | ||
378 | void smc_lgr_terminate_sched(struct smc_link_group *lgr); | ||
379 | void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport); | ||
380 | void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport); | ||
381 | void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, | ||
382 | unsigned short vlan); | ||
383 | void smc_smcd_terminate_all(struct smcd_dev *dev); | ||
384 | void smc_smcr_terminate_all(struct smc_ib_device *smcibdev); | ||
385 | int smc_buf_create(struct smc_sock *smc, bool is_smcd); | ||
386 | int smc_uncompress_bufsize(u8 compressed); | ||
387 | int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link, | ||
388 | struct smc_clc_msg_accept_confirm *clc); | ||
389 | int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey); | ||
390 | int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey); | ||
391 | void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new, | ||
392 | __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey); | ||
393 | void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id, | ||
394 | __be64 nw_vaddr, __be32 nw_rkey); | ||
395 | void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn); | ||
396 | void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn); | ||
397 | void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn); | ||
398 | void smc_rmb_sync_sg_for_device(struct smc_connection *conn); | ||
399 | int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini); | ||
400 | |||
401 | void smc_conn_free(struct smc_connection *conn); | ||
402 | int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini); | ||
403 | void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr); | ||
404 | int smc_core_init(void); | ||
405 | void smc_core_exit(void); | ||
406 | |||
407 | int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, | ||
408 | u8 link_idx, struct smc_init_info *ini); | ||
409 | void smcr_link_clear(struct smc_link *lnk, bool log); | ||
410 | int smcr_buf_map_lgr(struct smc_link *lnk); | ||
411 | int smcr_buf_reg_lgr(struct smc_link *lnk); | ||
412 | void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type); | ||
413 | void smcr_lgr_set_type_asym(struct smc_link_group *lgr, | ||
414 | enum smc_lgr_type new_type, int asym_lnk_idx); | ||
415 | int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc); | ||
416 | struct smc_link *smc_switch_conns(struct smc_link_group *lgr, | ||
417 | struct smc_link *from_lnk, bool is_dev_err); | ||
418 | void smcr_link_down_cond(struct smc_link *lnk); | ||
419 | void smcr_link_down_cond_sched(struct smc_link *lnk); | ||
420 | |||
421 | static inline struct smc_link_group *smc_get_lgr(struct smc_link *link) | ||
422 | { | ||
423 | return link->lgr; | ||
424 | } | ||
425 | #endif | ||
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c new file mode 100644 index 000000000..f15fca59b --- /dev/null +++ b/net/smc/smc_diag.c | |||
@@ -0,0 +1,283 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0-only | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * Monitoring SMC transport protocol sockets | ||
6 | * | ||
7 | * Copyright IBM Corp. 2016 | ||
8 | * | ||
9 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> | ||
10 | */ | ||
11 | |||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/init.h> | ||
16 | #include <linux/sock_diag.h> | ||
17 | #include <linux/inet_diag.h> | ||
18 | #include <linux/smc_diag.h> | ||
19 | #include <net/netlink.h> | ||
20 | #include <net/smc.h> | ||
21 | |||
22 | #include "smc.h" | ||
23 | #include "smc_core.h" | ||
24 | |||
25 | struct smc_diag_dump_ctx { | ||
26 | int pos[2]; | ||
27 | }; | ||
28 | |||
29 | static struct smc_diag_dump_ctx *smc_dump_context(struct netlink_callback *cb) | ||
30 | { | ||
31 | return (struct smc_diag_dump_ctx *)cb->ctx; | ||
32 | } | ||
33 | |||
34 | static void smc_gid_be16_convert(__u8 *buf, u8 *gid_raw) | ||
35 | { | ||
36 | sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x", | ||
37 | be16_to_cpu(((__be16 *)gid_raw)[0]), | ||
38 | be16_to_cpu(((__be16 *)gid_raw)[1]), | ||
39 | be16_to_cpu(((__be16 *)gid_raw)[2]), | ||
40 | be16_to_cpu(((__be16 *)gid_raw)[3]), | ||
41 | be16_to_cpu(((__be16 *)gid_raw)[4]), | ||
42 | be16_to_cpu(((__be16 *)gid_raw)[5]), | ||
43 | be16_to_cpu(((__be16 *)gid_raw)[6]), | ||
44 | be16_to_cpu(((__be16 *)gid_raw)[7])); | ||
45 | } | ||
46 | |||
47 | static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk) | ||
48 | { | ||
49 | struct smc_sock *smc = smc_sk(sk); | ||
50 | |||
51 | memset(r, 0, sizeof(*r)); | ||
52 | r->diag_family = sk->sk_family; | ||
53 | sock_diag_save_cookie(sk, r->id.idiag_cookie); | ||
54 | if (!smc->clcsock) | ||
55 | return; | ||
56 | r->id.idiag_sport = htons(smc->clcsock->sk->sk_num); | ||
57 | r->id.idiag_dport = smc->clcsock->sk->sk_dport; | ||
58 | r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if; | ||
59 | if (sk->sk_protocol == SMCPROTO_SMC) { | ||
60 | r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr; | ||
61 | r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr; | ||
62 | #if IS_ENABLED(CONFIG_IPV6) | ||
63 | } else if (sk->sk_protocol == SMCPROTO_SMC6) { | ||
64 | memcpy(&r->id.idiag_src, &smc->clcsock->sk->sk_v6_rcv_saddr, | ||
65 | sizeof(smc->clcsock->sk->sk_v6_rcv_saddr)); | ||
66 | memcpy(&r->id.idiag_dst, &smc->clcsock->sk->sk_v6_daddr, | ||
67 | sizeof(smc->clcsock->sk->sk_v6_daddr)); | ||
68 | #endif | ||
69 | } | ||
70 | } | ||
71 | |||
72 | static int smc_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, | ||
73 | struct smc_diag_msg *r, | ||
74 | struct user_namespace *user_ns) | ||
75 | { | ||
76 | if (nla_put_u8(skb, SMC_DIAG_SHUTDOWN, sk->sk_shutdown)) | ||
77 | return 1; | ||
78 | |||
79 | r->diag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); | ||
80 | r->diag_inode = sock_i_ino(sk); | ||
81 | return 0; | ||
82 | } | ||
83 | |||
84 | static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, | ||
85 | struct netlink_callback *cb, | ||
86 | const struct smc_diag_req *req, | ||
87 | struct nlattr *bc) | ||
88 | { | ||
89 | struct smc_sock *smc = smc_sk(sk); | ||
90 | struct smc_diag_fallback fallback; | ||
91 | struct user_namespace *user_ns; | ||
92 | struct smc_diag_msg *r; | ||
93 | struct nlmsghdr *nlh; | ||
94 | |||
95 | nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, | ||
96 | cb->nlh->nlmsg_type, sizeof(*r), NLM_F_MULTI); | ||
97 | if (!nlh) | ||
98 | return -EMSGSIZE; | ||
99 | |||
100 | r = nlmsg_data(nlh); | ||
101 | smc_diag_msg_common_fill(r, sk); | ||
102 | r->diag_state = sk->sk_state; | ||
103 | if (smc->use_fallback) | ||
104 | r->diag_mode = SMC_DIAG_MODE_FALLBACK_TCP; | ||
105 | else if (smc->conn.lgr && smc->conn.lgr->is_smcd) | ||
106 | r->diag_mode = SMC_DIAG_MODE_SMCD; | ||
107 | else | ||
108 | r->diag_mode = SMC_DIAG_MODE_SMCR; | ||
109 | user_ns = sk_user_ns(NETLINK_CB(cb->skb).sk); | ||
110 | if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns)) | ||
111 | goto errout; | ||
112 | |||
113 | fallback.reason = smc->fallback_rsn; | ||
114 | fallback.peer_diagnosis = smc->peer_diagnosis; | ||
115 | if (nla_put(skb, SMC_DIAG_FALLBACK, sizeof(fallback), &fallback) < 0) | ||
116 | goto errout; | ||
117 | |||
118 | if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && | ||
119 | smc->conn.alert_token_local) { | ||
120 | struct smc_connection *conn = &smc->conn; | ||
121 | struct smc_diag_conninfo cinfo = { | ||
122 | .token = conn->alert_token_local, | ||
123 | .sndbuf_size = conn->sndbuf_desc ? | ||
124 | conn->sndbuf_desc->len : 0, | ||
125 | .rmbe_size = conn->rmb_desc ? conn->rmb_desc->len : 0, | ||
126 | .peer_rmbe_size = conn->peer_rmbe_size, | ||
127 | |||
128 | .rx_prod.wrap = conn->local_rx_ctrl.prod.wrap, | ||
129 | .rx_prod.count = conn->local_rx_ctrl.prod.count, | ||
130 | .rx_cons.wrap = conn->local_rx_ctrl.cons.wrap, | ||
131 | .rx_cons.count = conn->local_rx_ctrl.cons.count, | ||
132 | |||
133 | .tx_prod.wrap = conn->local_tx_ctrl.prod.wrap, | ||
134 | .tx_prod.count = conn->local_tx_ctrl.prod.count, | ||
135 | .tx_cons.wrap = conn->local_tx_ctrl.cons.wrap, | ||
136 | .tx_cons.count = conn->local_tx_ctrl.cons.count, | ||
137 | |||
138 | .tx_prod_flags = | ||
139 | *(u8 *)&conn->local_tx_ctrl.prod_flags, | ||
140 | .tx_conn_state_flags = | ||
141 | *(u8 *)&conn->local_tx_ctrl.conn_state_flags, | ||
142 | .rx_prod_flags = *(u8 *)&conn->local_rx_ctrl.prod_flags, | ||
143 | .rx_conn_state_flags = | ||
144 | *(u8 *)&conn->local_rx_ctrl.conn_state_flags, | ||
145 | |||
146 | .tx_prep.wrap = conn->tx_curs_prep.wrap, | ||
147 | .tx_prep.count = conn->tx_curs_prep.count, | ||
148 | .tx_sent.wrap = conn->tx_curs_sent.wrap, | ||
149 | .tx_sent.count = conn->tx_curs_sent.count, | ||
150 | .tx_fin.wrap = conn->tx_curs_fin.wrap, | ||
151 | .tx_fin.count = conn->tx_curs_fin.count, | ||
152 | }; | ||
153 | |||
154 | if (nla_put(skb, SMC_DIAG_CONNINFO, sizeof(cinfo), &cinfo) < 0) | ||
155 | goto errout; | ||
156 | } | ||
157 | |||
158 | if (smc->conn.lgr && !smc->conn.lgr->is_smcd && | ||
159 | (req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && | ||
160 | !list_empty(&smc->conn.lgr->list)) { | ||
161 | struct smc_diag_lgrinfo linfo = { | ||
162 | .role = smc->conn.lgr->role, | ||
163 | .lnk[0].ibport = smc->conn.lgr->lnk[0].ibport, | ||
164 | .lnk[0].link_id = smc->conn.lgr->lnk[0].link_id, | ||
165 | }; | ||
166 | |||
167 | memcpy(linfo.lnk[0].ibname, | ||
168 | smc->conn.lgr->lnk[0].smcibdev->ibdev->name, | ||
169 | sizeof(smc->conn.lgr->lnk[0].smcibdev->ibdev->name)); | ||
170 | smc_gid_be16_convert(linfo.lnk[0].gid, | ||
171 | smc->conn.lgr->lnk[0].gid); | ||
172 | smc_gid_be16_convert(linfo.lnk[0].peer_gid, | ||
173 | smc->conn.lgr->lnk[0].peer_gid); | ||
174 | |||
175 | if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0) | ||
176 | goto errout; | ||
177 | } | ||
178 | if (smc->conn.lgr && smc->conn.lgr->is_smcd && | ||
179 | (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) && | ||
180 | !list_empty(&smc->conn.lgr->list)) { | ||
181 | struct smc_connection *conn = &smc->conn; | ||
182 | struct smcd_diag_dmbinfo dinfo; | ||
183 | |||
184 | memset(&dinfo, 0, sizeof(dinfo)); | ||
185 | |||
186 | dinfo.linkid = *((u32 *)conn->lgr->id); | ||
187 | dinfo.peer_gid = conn->lgr->peer_gid; | ||
188 | dinfo.my_gid = conn->lgr->smcd->local_gid; | ||
189 | dinfo.token = conn->rmb_desc->token; | ||
190 | dinfo.peer_token = conn->peer_token; | ||
191 | |||
192 | if (nla_put(skb, SMC_DIAG_DMBINFO, sizeof(dinfo), &dinfo) < 0) | ||
193 | goto errout; | ||
194 | } | ||
195 | |||
196 | nlmsg_end(skb, nlh); | ||
197 | return 0; | ||
198 | |||
199 | errout: | ||
200 | nlmsg_cancel(skb, nlh); | ||
201 | return -EMSGSIZE; | ||
202 | } | ||
203 | |||
204 | static int smc_diag_dump_proto(struct proto *prot, struct sk_buff *skb, | ||
205 | struct netlink_callback *cb, int p_type) | ||
206 | { | ||
207 | struct smc_diag_dump_ctx *cb_ctx = smc_dump_context(cb); | ||
208 | struct net *net = sock_net(skb->sk); | ||
209 | int snum = cb_ctx->pos[p_type]; | ||
210 | struct nlattr *bc = NULL; | ||
211 | struct hlist_head *head; | ||
212 | int rc = 0, num = 0; | ||
213 | struct sock *sk; | ||
214 | |||
215 | read_lock(&prot->h.smc_hash->lock); | ||
216 | head = &prot->h.smc_hash->ht; | ||
217 | if (hlist_empty(head)) | ||
218 | goto out; | ||
219 | |||
220 | sk_for_each(sk, head) { | ||
221 | if (!net_eq(sock_net(sk), net)) | ||
222 | continue; | ||
223 | if (num < snum) | ||
224 | goto next; | ||
225 | rc = __smc_diag_dump(sk, skb, cb, nlmsg_data(cb->nlh), bc); | ||
226 | if (rc < 0) | ||
227 | goto out; | ||
228 | next: | ||
229 | num++; | ||
230 | } | ||
231 | |||
232 | out: | ||
233 | read_unlock(&prot->h.smc_hash->lock); | ||
234 | cb_ctx->pos[p_type] = num; | ||
235 | return rc; | ||
236 | } | ||
237 | |||
238 | static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) | ||
239 | { | ||
240 | int rc = 0; | ||
241 | |||
242 | rc = smc_diag_dump_proto(&smc_proto, skb, cb, SMCPROTO_SMC); | ||
243 | if (!rc) | ||
244 | smc_diag_dump_proto(&smc_proto6, skb, cb, SMCPROTO_SMC6); | ||
245 | return skb->len; | ||
246 | } | ||
247 | |||
248 | static int smc_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) | ||
249 | { | ||
250 | struct net *net = sock_net(skb->sk); | ||
251 | |||
252 | if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && | ||
253 | h->nlmsg_flags & NLM_F_DUMP) { | ||
254 | { | ||
255 | struct netlink_dump_control c = { | ||
256 | .dump = smc_diag_dump, | ||
257 | .min_dump_alloc = SKB_WITH_OVERHEAD(32768), | ||
258 | }; | ||
259 | return netlink_dump_start(net->diag_nlsk, skb, h, &c); | ||
260 | } | ||
261 | } | ||
262 | return 0; | ||
263 | } | ||
264 | |||
265 | static const struct sock_diag_handler smc_diag_handler = { | ||
266 | .family = AF_SMC, | ||
267 | .dump = smc_diag_handler_dump, | ||
268 | }; | ||
269 | |||
270 | static int __init smc_diag_init(void) | ||
271 | { | ||
272 | return sock_diag_register(&smc_diag_handler); | ||
273 | } | ||
274 | |||
275 | static void __exit smc_diag_exit(void) | ||
276 | { | ||
277 | sock_diag_unregister(&smc_diag_handler); | ||
278 | } | ||
279 | |||
280 | module_init(smc_diag_init); | ||
281 | module_exit(smc_diag_exit); | ||
282 | MODULE_LICENSE("GPL"); | ||
283 | MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 43 /* AF_SMC */); | ||
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c new file mode 100644 index 000000000..f1ffbd414 --- /dev/null +++ b/net/smc/smc_ib.c | |||
@@ -0,0 +1,643 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * IB infrastructure: | ||
6 | * Establish SMC-R as an Infiniband Client to be notified about added and | ||
7 | * removed IB devices of type RDMA. | ||
8 | * Determine device and port characteristics for these IB devices. | ||
9 | * | ||
10 | * Copyright IBM Corp. 2016 | ||
11 | * | ||
12 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> | ||
13 | */ | ||
14 | |||
15 | #include <linux/random.h> | ||
16 | #include <linux/workqueue.h> | ||
17 | #include <linux/scatterlist.h> | ||
18 | #include <linux/wait.h> | ||
19 | #include <linux/mutex.h> | ||
20 | #include <rdma/ib_verbs.h> | ||
21 | #include <rdma/ib_cache.h> | ||
22 | |||
23 | #include "smc_pnet.h" | ||
24 | #include "smc_ib.h" | ||
25 | #include "smc_core.h" | ||
26 | #include "smc_wr.h" | ||
27 | #include "smc.h" | ||
28 | |||
29 | #define SMC_MAX_CQE 32766 /* max. # of completion queue elements */ | ||
30 | |||
31 | #define SMC_QP_MIN_RNR_TIMER 5 | ||
32 | #define SMC_QP_TIMEOUT 15 /* 4096 * 2 ** timeout usec */ | ||
33 | #define SMC_QP_RETRY_CNT 7 /* 7: infinite */ | ||
34 | #define SMC_QP_RNR_RETRY 7 /* 7: infinite */ | ||
35 | |||
36 | struct smc_ib_devices smc_ib_devices = { /* smc-registered ib devices */ | ||
37 | .mutex = __MUTEX_INITIALIZER(smc_ib_devices.mutex), | ||
38 | .list = LIST_HEAD_INIT(smc_ib_devices.list), | ||
39 | }; | ||
40 | |||
41 | u8 local_systemid[SMC_SYSTEMID_LEN]; /* unique system identifier */ | ||
42 | |||
43 | static int smc_ib_modify_qp_init(struct smc_link *lnk) | ||
44 | { | ||
45 | struct ib_qp_attr qp_attr; | ||
46 | |||
47 | memset(&qp_attr, 0, sizeof(qp_attr)); | ||
48 | qp_attr.qp_state = IB_QPS_INIT; | ||
49 | qp_attr.pkey_index = 0; | ||
50 | qp_attr.port_num = lnk->ibport; | ||
51 | qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE | ||
52 | | IB_ACCESS_REMOTE_WRITE; | ||
53 | return ib_modify_qp(lnk->roce_qp, &qp_attr, | ||
54 | IB_QP_STATE | IB_QP_PKEY_INDEX | | ||
55 | IB_QP_ACCESS_FLAGS | IB_QP_PORT); | ||
56 | } | ||
57 | |||
58 | static int smc_ib_modify_qp_rtr(struct smc_link *lnk) | ||
59 | { | ||
60 | enum ib_qp_attr_mask qp_attr_mask = | ||
61 | IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | | ||
62 | IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; | ||
63 | struct ib_qp_attr qp_attr; | ||
64 | |||
65 | memset(&qp_attr, 0, sizeof(qp_attr)); | ||
66 | qp_attr.qp_state = IB_QPS_RTR; | ||
67 | qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu); | ||
68 | qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; | ||
69 | rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport); | ||
70 | rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, 1, 0); | ||
71 | rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid); | ||
72 | memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac, | ||
73 | sizeof(lnk->peer_mac)); | ||
74 | qp_attr.dest_qp_num = lnk->peer_qpn; | ||
75 | qp_attr.rq_psn = lnk->peer_psn; /* starting receive packet seq # */ | ||
76 | qp_attr.max_dest_rd_atomic = 1; /* max # of resources for incoming | ||
77 | * requests | ||
78 | */ | ||
79 | qp_attr.min_rnr_timer = SMC_QP_MIN_RNR_TIMER; | ||
80 | |||
81 | return ib_modify_qp(lnk->roce_qp, &qp_attr, qp_attr_mask); | ||
82 | } | ||
83 | |||
84 | int smc_ib_modify_qp_rts(struct smc_link *lnk) | ||
85 | { | ||
86 | struct ib_qp_attr qp_attr; | ||
87 | |||
88 | memset(&qp_attr, 0, sizeof(qp_attr)); | ||
89 | qp_attr.qp_state = IB_QPS_RTS; | ||
90 | qp_attr.timeout = SMC_QP_TIMEOUT; /* local ack timeout */ | ||
91 | qp_attr.retry_cnt = SMC_QP_RETRY_CNT; /* retry count */ | ||
92 | qp_attr.rnr_retry = SMC_QP_RNR_RETRY; /* RNR retries, 7=infinite */ | ||
93 | qp_attr.sq_psn = lnk->psn_initial; /* starting send packet seq # */ | ||
94 | qp_attr.max_rd_atomic = 1; /* # of outstanding RDMA reads and | ||
95 | * atomic ops allowed | ||
96 | */ | ||
97 | return ib_modify_qp(lnk->roce_qp, &qp_attr, | ||
98 | IB_QP_STATE | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | | ||
99 | IB_QP_SQ_PSN | IB_QP_RNR_RETRY | | ||
100 | IB_QP_MAX_QP_RD_ATOMIC); | ||
101 | } | ||
102 | |||
103 | int smc_ib_modify_qp_error(struct smc_link *lnk) | ||
104 | { | ||
105 | struct ib_qp_attr qp_attr; | ||
106 | |||
107 | memset(&qp_attr, 0, sizeof(qp_attr)); | ||
108 | qp_attr.qp_state = IB_QPS_ERR; | ||
109 | return ib_modify_qp(lnk->roce_qp, &qp_attr, IB_QP_STATE); | ||
110 | } | ||
111 | |||
112 | int smc_ib_ready_link(struct smc_link *lnk) | ||
113 | { | ||
114 | struct smc_link_group *lgr = smc_get_lgr(lnk); | ||
115 | int rc = 0; | ||
116 | |||
117 | rc = smc_ib_modify_qp_init(lnk); | ||
118 | if (rc) | ||
119 | goto out; | ||
120 | |||
121 | rc = smc_ib_modify_qp_rtr(lnk); | ||
122 | if (rc) | ||
123 | goto out; | ||
124 | smc_wr_remember_qp_attr(lnk); | ||
125 | rc = ib_req_notify_cq(lnk->smcibdev->roce_cq_recv, | ||
126 | IB_CQ_SOLICITED_MASK); | ||
127 | if (rc) | ||
128 | goto out; | ||
129 | rc = smc_wr_rx_post_init(lnk); | ||
130 | if (rc) | ||
131 | goto out; | ||
132 | smc_wr_remember_qp_attr(lnk); | ||
133 | |||
134 | if (lgr->role == SMC_SERV) { | ||
135 | rc = smc_ib_modify_qp_rts(lnk); | ||
136 | if (rc) | ||
137 | goto out; | ||
138 | smc_wr_remember_qp_attr(lnk); | ||
139 | } | ||
140 | out: | ||
141 | return rc; | ||
142 | } | ||
143 | |||
144 | static int smc_ib_fill_mac(struct smc_ib_device *smcibdev, u8 ibport) | ||
145 | { | ||
146 | const struct ib_gid_attr *attr; | ||
147 | int rc; | ||
148 | |||
149 | attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, 0); | ||
150 | if (IS_ERR(attr)) | ||
151 | return -ENODEV; | ||
152 | |||
153 | rc = rdma_read_gid_l2_fields(attr, NULL, smcibdev->mac[ibport - 1]); | ||
154 | rdma_put_gid_attr(attr); | ||
155 | return rc; | ||
156 | } | ||
157 | |||
158 | /* Create an identifier unique for this instance of SMC-R. | ||
159 | * The MAC-address of the first active registered IB device | ||
160 | * plus a random 2-byte number is used to create this identifier. | ||
161 | * This name is delivered to the peer during connection initialization. | ||
162 | */ | ||
163 | static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev, | ||
164 | u8 ibport) | ||
165 | { | ||
166 | memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1], | ||
167 | sizeof(smcibdev->mac[ibport - 1])); | ||
168 | } | ||
169 | |||
170 | bool smc_ib_is_valid_local_systemid(void) | ||
171 | { | ||
172 | return !is_zero_ether_addr(&local_systemid[2]); | ||
173 | } | ||
174 | |||
175 | static void smc_ib_init_local_systemid(void) | ||
176 | { | ||
177 | get_random_bytes(&local_systemid[0], 2); | ||
178 | } | ||
179 | |||
180 | bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) | ||
181 | { | ||
182 | return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE; | ||
183 | } | ||
184 | |||
185 | /* determine the gid for an ib-device port and vlan id */ | ||
186 | int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, | ||
187 | unsigned short vlan_id, u8 gid[], u8 *sgid_index) | ||
188 | { | ||
189 | const struct ib_gid_attr *attr; | ||
190 | const struct net_device *ndev; | ||
191 | int i; | ||
192 | |||
193 | for (i = 0; i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) { | ||
194 | attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, i); | ||
195 | if (IS_ERR(attr)) | ||
196 | continue; | ||
197 | |||
198 | rcu_read_lock(); | ||
199 | ndev = rdma_read_gid_attr_ndev_rcu(attr); | ||
200 | if (!IS_ERR(ndev) && | ||
201 | ((!vlan_id && !is_vlan_dev(ndev)) || | ||
202 | (vlan_id && is_vlan_dev(ndev) && | ||
203 | vlan_dev_vlan_id(ndev) == vlan_id)) && | ||
204 | attr->gid_type == IB_GID_TYPE_ROCE) { | ||
205 | rcu_read_unlock(); | ||
206 | if (gid) | ||
207 | memcpy(gid, &attr->gid, SMC_GID_SIZE); | ||
208 | if (sgid_index) | ||
209 | *sgid_index = attr->index; | ||
210 | rdma_put_gid_attr(attr); | ||
211 | return 0; | ||
212 | } | ||
213 | rcu_read_unlock(); | ||
214 | rdma_put_gid_attr(attr); | ||
215 | } | ||
216 | return -ENODEV; | ||
217 | } | ||
218 | |||
219 | static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) | ||
220 | { | ||
221 | int rc; | ||
222 | |||
223 | memset(&smcibdev->pattr[ibport - 1], 0, | ||
224 | sizeof(smcibdev->pattr[ibport - 1])); | ||
225 | rc = ib_query_port(smcibdev->ibdev, ibport, | ||
226 | &smcibdev->pattr[ibport - 1]); | ||
227 | if (rc) | ||
228 | goto out; | ||
229 | /* the SMC protocol requires specification of the RoCE MAC address */ | ||
230 | rc = smc_ib_fill_mac(smcibdev, ibport); | ||
231 | if (rc) | ||
232 | goto out; | ||
233 | if (!smc_ib_is_valid_local_systemid() && | ||
234 | smc_ib_port_active(smcibdev, ibport)) | ||
235 | /* create unique system identifier */ | ||
236 | smc_ib_define_local_systemid(smcibdev, ibport); | ||
237 | out: | ||
238 | return rc; | ||
239 | } | ||
240 | |||
241 | /* process context wrapper for might_sleep smc_ib_remember_port_attr */ | ||
242 | static void smc_ib_port_event_work(struct work_struct *work) | ||
243 | { | ||
244 | struct smc_ib_device *smcibdev = container_of( | ||
245 | work, struct smc_ib_device, port_event_work); | ||
246 | u8 port_idx; | ||
247 | |||
248 | for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) { | ||
249 | smc_ib_remember_port_attr(smcibdev, port_idx + 1); | ||
250 | clear_bit(port_idx, &smcibdev->port_event_mask); | ||
251 | if (!smc_ib_port_active(smcibdev, port_idx + 1)) { | ||
252 | set_bit(port_idx, smcibdev->ports_going_away); | ||
253 | smcr_port_err(smcibdev, port_idx + 1); | ||
254 | } else { | ||
255 | clear_bit(port_idx, smcibdev->ports_going_away); | ||
256 | smcr_port_add(smcibdev, port_idx + 1); | ||
257 | } | ||
258 | } | ||
259 | } | ||
260 | |||
261 | /* can be called in IRQ context */ | ||
262 | static void smc_ib_global_event_handler(struct ib_event_handler *handler, | ||
263 | struct ib_event *ibevent) | ||
264 | { | ||
265 | struct smc_ib_device *smcibdev; | ||
266 | bool schedule = false; | ||
267 | u8 port_idx; | ||
268 | |||
269 | smcibdev = container_of(handler, struct smc_ib_device, event_handler); | ||
270 | |||
271 | switch (ibevent->event) { | ||
272 | case IB_EVENT_DEVICE_FATAL: | ||
273 | /* terminate all ports on device */ | ||
274 | for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++) { | ||
275 | set_bit(port_idx, &smcibdev->port_event_mask); | ||
276 | if (!test_and_set_bit(port_idx, | ||
277 | smcibdev->ports_going_away)) | ||
278 | schedule = true; | ||
279 | } | ||
280 | if (schedule) | ||
281 | schedule_work(&smcibdev->port_event_work); | ||
282 | break; | ||
283 | case IB_EVENT_PORT_ACTIVE: | ||
284 | port_idx = ibevent->element.port_num - 1; | ||
285 | if (port_idx >= SMC_MAX_PORTS) | ||
286 | break; | ||
287 | set_bit(port_idx, &smcibdev->port_event_mask); | ||
288 | if (test_and_clear_bit(port_idx, smcibdev->ports_going_away)) | ||
289 | schedule_work(&smcibdev->port_event_work); | ||
290 | break; | ||
291 | case IB_EVENT_PORT_ERR: | ||
292 | port_idx = ibevent->element.port_num - 1; | ||
293 | if (port_idx >= SMC_MAX_PORTS) | ||
294 | break; | ||
295 | set_bit(port_idx, &smcibdev->port_event_mask); | ||
296 | if (!test_and_set_bit(port_idx, smcibdev->ports_going_away)) | ||
297 | schedule_work(&smcibdev->port_event_work); | ||
298 | break; | ||
299 | case IB_EVENT_GID_CHANGE: | ||
300 | port_idx = ibevent->element.port_num - 1; | ||
301 | if (port_idx >= SMC_MAX_PORTS) | ||
302 | break; | ||
303 | set_bit(port_idx, &smcibdev->port_event_mask); | ||
304 | schedule_work(&smcibdev->port_event_work); | ||
305 | break; | ||
306 | default: | ||
307 | break; | ||
308 | } | ||
309 | } | ||
310 | |||
311 | void smc_ib_dealloc_protection_domain(struct smc_link *lnk) | ||
312 | { | ||
313 | if (lnk->roce_pd) | ||
314 | ib_dealloc_pd(lnk->roce_pd); | ||
315 | lnk->roce_pd = NULL; | ||
316 | } | ||
317 | |||
318 | int smc_ib_create_protection_domain(struct smc_link *lnk) | ||
319 | { | ||
320 | int rc; | ||
321 | |||
322 | lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0); | ||
323 | rc = PTR_ERR_OR_ZERO(lnk->roce_pd); | ||
324 | if (IS_ERR(lnk->roce_pd)) | ||
325 | lnk->roce_pd = NULL; | ||
326 | return rc; | ||
327 | } | ||
328 | |||
329 | static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) | ||
330 | { | ||
331 | struct smc_link *lnk = (struct smc_link *)priv; | ||
332 | struct smc_ib_device *smcibdev = lnk->smcibdev; | ||
333 | u8 port_idx; | ||
334 | |||
335 | switch (ibevent->event) { | ||
336 | case IB_EVENT_QP_FATAL: | ||
337 | case IB_EVENT_QP_ACCESS_ERR: | ||
338 | port_idx = ibevent->element.qp->port - 1; | ||
339 | if (port_idx >= SMC_MAX_PORTS) | ||
340 | break; | ||
341 | set_bit(port_idx, &smcibdev->port_event_mask); | ||
342 | if (!test_and_set_bit(port_idx, smcibdev->ports_going_away)) | ||
343 | schedule_work(&smcibdev->port_event_work); | ||
344 | break; | ||
345 | default: | ||
346 | break; | ||
347 | } | ||
348 | } | ||
349 | |||
350 | void smc_ib_destroy_queue_pair(struct smc_link *lnk) | ||
351 | { | ||
352 | if (lnk->roce_qp) | ||
353 | ib_destroy_qp(lnk->roce_qp); | ||
354 | lnk->roce_qp = NULL; | ||
355 | } | ||
356 | |||
357 | /* create a queue pair within the protection domain for a link */ | ||
358 | int smc_ib_create_queue_pair(struct smc_link *lnk) | ||
359 | { | ||
360 | struct ib_qp_init_attr qp_attr = { | ||
361 | .event_handler = smc_ib_qp_event_handler, | ||
362 | .qp_context = lnk, | ||
363 | .send_cq = lnk->smcibdev->roce_cq_send, | ||
364 | .recv_cq = lnk->smcibdev->roce_cq_recv, | ||
365 | .srq = NULL, | ||
366 | .cap = { | ||
367 | /* include unsolicited rdma_writes as well, | ||
368 | * there are max. 2 RDMA_WRITE per 1 WR_SEND | ||
369 | */ | ||
370 | .max_send_wr = SMC_WR_BUF_CNT * 3, | ||
371 | .max_recv_wr = SMC_WR_BUF_CNT * 3, | ||
372 | .max_send_sge = SMC_IB_MAX_SEND_SGE, | ||
373 | .max_recv_sge = 1, | ||
374 | }, | ||
375 | .sq_sig_type = IB_SIGNAL_REQ_WR, | ||
376 | .qp_type = IB_QPT_RC, | ||
377 | }; | ||
378 | int rc; | ||
379 | |||
380 | lnk->roce_qp = ib_create_qp(lnk->roce_pd, &qp_attr); | ||
381 | rc = PTR_ERR_OR_ZERO(lnk->roce_qp); | ||
382 | if (IS_ERR(lnk->roce_qp)) | ||
383 | lnk->roce_qp = NULL; | ||
384 | else | ||
385 | smc_wr_remember_qp_attr(lnk); | ||
386 | return rc; | ||
387 | } | ||
388 | |||
389 | void smc_ib_put_memory_region(struct ib_mr *mr) | ||
390 | { | ||
391 | ib_dereg_mr(mr); | ||
392 | } | ||
393 | |||
394 | static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot, u8 link_idx) | ||
395 | { | ||
396 | unsigned int offset = 0; | ||
397 | int sg_num; | ||
398 | |||
399 | /* map the largest prefix of a dma mapped SG list */ | ||
400 | sg_num = ib_map_mr_sg(buf_slot->mr_rx[link_idx], | ||
401 | buf_slot->sgt[link_idx].sgl, | ||
402 | buf_slot->sgt[link_idx].orig_nents, | ||
403 | &offset, PAGE_SIZE); | ||
404 | |||
405 | return sg_num; | ||
406 | } | ||
407 | |||
408 | /* Allocate a memory region and map the dma mapped SG list of buf_slot */ | ||
409 | int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, | ||
410 | struct smc_buf_desc *buf_slot, u8 link_idx) | ||
411 | { | ||
412 | if (buf_slot->mr_rx[link_idx]) | ||
413 | return 0; /* already done */ | ||
414 | |||
415 | buf_slot->mr_rx[link_idx] = | ||
416 | ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order); | ||
417 | if (IS_ERR(buf_slot->mr_rx[link_idx])) { | ||
418 | int rc; | ||
419 | |||
420 | rc = PTR_ERR(buf_slot->mr_rx[link_idx]); | ||
421 | buf_slot->mr_rx[link_idx] = NULL; | ||
422 | return rc; | ||
423 | } | ||
424 | |||
425 | if (smc_ib_map_mr_sg(buf_slot, link_idx) != 1) | ||
426 | return -EINVAL; | ||
427 | |||
428 | return 0; | ||
429 | } | ||
430 | |||
431 | /* synchronize buffer usage for cpu access */ | ||
432 | void smc_ib_sync_sg_for_cpu(struct smc_link *lnk, | ||
433 | struct smc_buf_desc *buf_slot, | ||
434 | enum dma_data_direction data_direction) | ||
435 | { | ||
436 | struct scatterlist *sg; | ||
437 | unsigned int i; | ||
438 | |||
439 | /* for now there is just one DMA address */ | ||
440 | for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg, | ||
441 | buf_slot->sgt[lnk->link_idx].nents, i) { | ||
442 | if (!sg_dma_len(sg)) | ||
443 | break; | ||
444 | ib_dma_sync_single_for_cpu(lnk->smcibdev->ibdev, | ||
445 | sg_dma_address(sg), | ||
446 | sg_dma_len(sg), | ||
447 | data_direction); | ||
448 | } | ||
449 | } | ||
450 | |||
451 | /* synchronize buffer usage for device access */ | ||
452 | void smc_ib_sync_sg_for_device(struct smc_link *lnk, | ||
453 | struct smc_buf_desc *buf_slot, | ||
454 | enum dma_data_direction data_direction) | ||
455 | { | ||
456 | struct scatterlist *sg; | ||
457 | unsigned int i; | ||
458 | |||
459 | /* for now there is just one DMA address */ | ||
460 | for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg, | ||
461 | buf_slot->sgt[lnk->link_idx].nents, i) { | ||
462 | if (!sg_dma_len(sg)) | ||
463 | break; | ||
464 | ib_dma_sync_single_for_device(lnk->smcibdev->ibdev, | ||
465 | sg_dma_address(sg), | ||
466 | sg_dma_len(sg), | ||
467 | data_direction); | ||
468 | } | ||
469 | } | ||
470 | |||
471 | /* Map a new TX or RX buffer SG-table to DMA */ | ||
472 | int smc_ib_buf_map_sg(struct smc_link *lnk, | ||
473 | struct smc_buf_desc *buf_slot, | ||
474 | enum dma_data_direction data_direction) | ||
475 | { | ||
476 | int mapped_nents; | ||
477 | |||
478 | mapped_nents = ib_dma_map_sg(lnk->smcibdev->ibdev, | ||
479 | buf_slot->sgt[lnk->link_idx].sgl, | ||
480 | buf_slot->sgt[lnk->link_idx].orig_nents, | ||
481 | data_direction); | ||
482 | if (!mapped_nents) | ||
483 | return -ENOMEM; | ||
484 | |||
485 | return mapped_nents; | ||
486 | } | ||
487 | |||
488 | void smc_ib_buf_unmap_sg(struct smc_link *lnk, | ||
489 | struct smc_buf_desc *buf_slot, | ||
490 | enum dma_data_direction data_direction) | ||
491 | { | ||
492 | if (!buf_slot->sgt[lnk->link_idx].sgl->dma_address) | ||
493 | return; /* already unmapped */ | ||
494 | |||
495 | ib_dma_unmap_sg(lnk->smcibdev->ibdev, | ||
496 | buf_slot->sgt[lnk->link_idx].sgl, | ||
497 | buf_slot->sgt[lnk->link_idx].orig_nents, | ||
498 | data_direction); | ||
499 | buf_slot->sgt[lnk->link_idx].sgl->dma_address = 0; | ||
500 | } | ||
501 | |||
502 | long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) | ||
503 | { | ||
504 | struct ib_cq_init_attr cqattr = { | ||
505 | .cqe = SMC_MAX_CQE, .comp_vector = 0 }; | ||
506 | int cqe_size_order, smc_order; | ||
507 | long rc; | ||
508 | |||
509 | mutex_lock(&smcibdev->mutex); | ||
510 | rc = 0; | ||
511 | if (smcibdev->initialized) | ||
512 | goto out; | ||
513 | /* the calculated number of cq entries fits to mlx5 cq allocation */ | ||
514 | cqe_size_order = cache_line_size() == 128 ? 7 : 6; | ||
515 | smc_order = MAX_ORDER - cqe_size_order - 1; | ||
516 | if (SMC_MAX_CQE + 2 > (0x00000001 << smc_order) * PAGE_SIZE) | ||
517 | cqattr.cqe = (0x00000001 << smc_order) * PAGE_SIZE - 2; | ||
518 | smcibdev->roce_cq_send = ib_create_cq(smcibdev->ibdev, | ||
519 | smc_wr_tx_cq_handler, NULL, | ||
520 | smcibdev, &cqattr); | ||
521 | rc = PTR_ERR_OR_ZERO(smcibdev->roce_cq_send); | ||
522 | if (IS_ERR(smcibdev->roce_cq_send)) { | ||
523 | smcibdev->roce_cq_send = NULL; | ||
524 | goto out; | ||
525 | } | ||
526 | smcibdev->roce_cq_recv = ib_create_cq(smcibdev->ibdev, | ||
527 | smc_wr_rx_cq_handler, NULL, | ||
528 | smcibdev, &cqattr); | ||
529 | rc = PTR_ERR_OR_ZERO(smcibdev->roce_cq_recv); | ||
530 | if (IS_ERR(smcibdev->roce_cq_recv)) { | ||
531 | smcibdev->roce_cq_recv = NULL; | ||
532 | goto err; | ||
533 | } | ||
534 | smc_wr_add_dev(smcibdev); | ||
535 | smcibdev->initialized = 1; | ||
536 | goto out; | ||
537 | |||
538 | err: | ||
539 | ib_destroy_cq(smcibdev->roce_cq_send); | ||
540 | out: | ||
541 | mutex_unlock(&smcibdev->mutex); | ||
542 | return rc; | ||
543 | } | ||
544 | |||
545 | static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev) | ||
546 | { | ||
547 | mutex_lock(&smcibdev->mutex); | ||
548 | if (!smcibdev->initialized) | ||
549 | goto out; | ||
550 | smcibdev->initialized = 0; | ||
551 | ib_destroy_cq(smcibdev->roce_cq_recv); | ||
552 | ib_destroy_cq(smcibdev->roce_cq_send); | ||
553 | smc_wr_remove_dev(smcibdev); | ||
554 | out: | ||
555 | mutex_unlock(&smcibdev->mutex); | ||
556 | } | ||
557 | |||
558 | static struct ib_client smc_ib_client; | ||
559 | |||
560 | /* callback function for ib_register_client() */ | ||
561 | static int smc_ib_add_dev(struct ib_device *ibdev) | ||
562 | { | ||
563 | struct smc_ib_device *smcibdev; | ||
564 | u8 port_cnt; | ||
565 | int i; | ||
566 | |||
567 | if (ibdev->node_type != RDMA_NODE_IB_CA) | ||
568 | return -EOPNOTSUPP; | ||
569 | |||
570 | smcibdev = kzalloc(sizeof(*smcibdev), GFP_KERNEL); | ||
571 | if (!smcibdev) | ||
572 | return -ENOMEM; | ||
573 | |||
574 | smcibdev->ibdev = ibdev; | ||
575 | INIT_WORK(&smcibdev->port_event_work, smc_ib_port_event_work); | ||
576 | atomic_set(&smcibdev->lnk_cnt, 0); | ||
577 | init_waitqueue_head(&smcibdev->lnks_deleted); | ||
578 | mutex_init(&smcibdev->mutex); | ||
579 | mutex_lock(&smc_ib_devices.mutex); | ||
580 | list_add_tail(&smcibdev->list, &smc_ib_devices.list); | ||
581 | mutex_unlock(&smc_ib_devices.mutex); | ||
582 | ib_set_client_data(ibdev, &smc_ib_client, smcibdev); | ||
583 | INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev, | ||
584 | smc_ib_global_event_handler); | ||
585 | ib_register_event_handler(&smcibdev->event_handler); | ||
586 | |||
587 | /* trigger reading of the port attributes */ | ||
588 | port_cnt = smcibdev->ibdev->phys_port_cnt; | ||
589 | pr_warn_ratelimited("smc: adding ib device %s with port count %d\n", | ||
590 | smcibdev->ibdev->name, port_cnt); | ||
591 | for (i = 0; | ||
592 | i < min_t(size_t, port_cnt, SMC_MAX_PORTS); | ||
593 | i++) { | ||
594 | set_bit(i, &smcibdev->port_event_mask); | ||
595 | /* determine pnetids of the port */ | ||
596 | if (smc_pnetid_by_dev_port(ibdev->dev.parent, i, | ||
597 | smcibdev->pnetid[i])) | ||
598 | smc_pnetid_by_table_ib(smcibdev, i + 1); | ||
599 | pr_warn_ratelimited("smc: ib device %s port %d has pnetid " | ||
600 | "%.16s%s\n", | ||
601 | smcibdev->ibdev->name, i + 1, | ||
602 | smcibdev->pnetid[i], | ||
603 | smcibdev->pnetid_by_user[i] ? | ||
604 | " (user defined)" : | ||
605 | ""); | ||
606 | } | ||
607 | schedule_work(&smcibdev->port_event_work); | ||
608 | return 0; | ||
609 | } | ||
610 | |||
611 | /* callback function for ib_unregister_client() */ | ||
612 | static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) | ||
613 | { | ||
614 | struct smc_ib_device *smcibdev = client_data; | ||
615 | |||
616 | mutex_lock(&smc_ib_devices.mutex); | ||
617 | list_del_init(&smcibdev->list); /* remove from smc_ib_devices */ | ||
618 | mutex_unlock(&smc_ib_devices.mutex); | ||
619 | pr_warn_ratelimited("smc: removing ib device %s\n", | ||
620 | smcibdev->ibdev->name); | ||
621 | smc_smcr_terminate_all(smcibdev); | ||
622 | smc_ib_cleanup_per_ibdev(smcibdev); | ||
623 | ib_unregister_event_handler(&smcibdev->event_handler); | ||
624 | cancel_work_sync(&smcibdev->port_event_work); | ||
625 | kfree(smcibdev); | ||
626 | } | ||
627 | |||
628 | static struct ib_client smc_ib_client = { | ||
629 | .name = "smc_ib", | ||
630 | .add = smc_ib_add_dev, | ||
631 | .remove = smc_ib_remove_dev, | ||
632 | }; | ||
633 | |||
634 | int __init smc_ib_register_client(void) | ||
635 | { | ||
636 | smc_ib_init_local_systemid(); | ||
637 | return ib_register_client(&smc_ib_client); | ||
638 | } | ||
639 | |||
640 | void smc_ib_unregister_client(void) | ||
641 | { | ||
642 | ib_unregister_client(&smc_ib_client); | ||
643 | } | ||
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h new file mode 100644 index 000000000..f90d15eae --- /dev/null +++ b/net/smc/smc_ib.h | |||
@@ -0,0 +1,91 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * Definitions for IB environment | ||
6 | * | ||
7 | * Copyright IBM Corp. 2016 | ||
8 | * | ||
9 | * Author(s): Ursula Braun <Ursula Braun@linux.vnet.ibm.com> | ||
10 | */ | ||
11 | |||
12 | #ifndef _SMC_IB_H | ||
13 | #define _SMC_IB_H | ||
14 | |||
15 | #include <linux/interrupt.h> | ||
16 | #include <linux/if_ether.h> | ||
17 | #include <linux/mutex.h> | ||
18 | #include <linux/wait.h> | ||
19 | #include <rdma/ib_verbs.h> | ||
20 | #include <net/smc.h> | ||
21 | |||
22 | #define SMC_MAX_PORTS 2 /* Max # of ports */ | ||
23 | #define SMC_GID_SIZE sizeof(union ib_gid) | ||
24 | |||
25 | #define SMC_IB_MAX_SEND_SGE 2 | ||
26 | |||
27 | struct smc_ib_devices { /* list of smc ib devices definition */ | ||
28 | struct list_head list; | ||
29 | struct mutex mutex; /* protects list of smc ib devices */ | ||
30 | }; | ||
31 | |||
32 | extern struct smc_ib_devices smc_ib_devices; /* list of smc ib devices */ | ||
33 | |||
34 | struct smc_ib_device { /* ib-device infos for smc */ | ||
35 | struct list_head list; | ||
36 | struct ib_device *ibdev; | ||
37 | struct ib_port_attr pattr[SMC_MAX_PORTS]; /* ib dev. port attrs */ | ||
38 | struct ib_event_handler event_handler; /* global ib_event handler */ | ||
39 | struct ib_cq *roce_cq_send; /* send completion queue */ | ||
40 | struct ib_cq *roce_cq_recv; /* recv completion queue */ | ||
41 | struct tasklet_struct send_tasklet; /* called by send cq handler */ | ||
42 | struct tasklet_struct recv_tasklet; /* called by recv cq handler */ | ||
43 | char mac[SMC_MAX_PORTS][ETH_ALEN]; | ||
44 | /* mac address per port*/ | ||
45 | u8 pnetid[SMC_MAX_PORTS][SMC_MAX_PNETID_LEN]; | ||
46 | /* pnetid per port */ | ||
47 | bool pnetid_by_user[SMC_MAX_PORTS]; | ||
48 | /* pnetid defined by user? */ | ||
49 | u8 initialized : 1; /* ib dev CQ, evthdl done */ | ||
50 | struct work_struct port_event_work; | ||
51 | unsigned long port_event_mask; | ||
52 | DECLARE_BITMAP(ports_going_away, SMC_MAX_PORTS); | ||
53 | atomic_t lnk_cnt; /* number of links on ibdev */ | ||
54 | wait_queue_head_t lnks_deleted; /* wait 4 removal of all links*/ | ||
55 | struct mutex mutex; /* protect dev setup+cleanup */ | ||
56 | }; | ||
57 | |||
58 | struct smc_buf_desc; | ||
59 | struct smc_link; | ||
60 | |||
61 | int smc_ib_register_client(void) __init; | ||
62 | void smc_ib_unregister_client(void); | ||
63 | bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport); | ||
64 | int smc_ib_buf_map_sg(struct smc_link *lnk, | ||
65 | struct smc_buf_desc *buf_slot, | ||
66 | enum dma_data_direction data_direction); | ||
67 | void smc_ib_buf_unmap_sg(struct smc_link *lnk, | ||
68 | struct smc_buf_desc *buf_slot, | ||
69 | enum dma_data_direction data_direction); | ||
70 | void smc_ib_dealloc_protection_domain(struct smc_link *lnk); | ||
71 | int smc_ib_create_protection_domain(struct smc_link *lnk); | ||
72 | void smc_ib_destroy_queue_pair(struct smc_link *lnk); | ||
73 | int smc_ib_create_queue_pair(struct smc_link *lnk); | ||
74 | int smc_ib_ready_link(struct smc_link *lnk); | ||
75 | int smc_ib_modify_qp_rts(struct smc_link *lnk); | ||
76 | int smc_ib_modify_qp_reset(struct smc_link *lnk); | ||
77 | int smc_ib_modify_qp_error(struct smc_link *lnk); | ||
78 | long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev); | ||
79 | int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, | ||
80 | struct smc_buf_desc *buf_slot, u8 link_idx); | ||
81 | void smc_ib_put_memory_region(struct ib_mr *mr); | ||
82 | void smc_ib_sync_sg_for_cpu(struct smc_link *lnk, | ||
83 | struct smc_buf_desc *buf_slot, | ||
84 | enum dma_data_direction data_direction); | ||
85 | void smc_ib_sync_sg_for_device(struct smc_link *lnk, | ||
86 | struct smc_buf_desc *buf_slot, | ||
87 | enum dma_data_direction data_direction); | ||
88 | int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, | ||
89 | unsigned short vlan_id, u8 gid[], u8 *sgid_index); | ||
90 | bool smc_ib_is_valid_local_systemid(void); | ||
91 | #endif | ||
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c new file mode 100644 index 000000000..8e33c0128 --- /dev/null +++ b/net/smc/smc_ism.c | |||
@@ -0,0 +1,439 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* Shared Memory Communications Direct over ISM devices (SMC-D) | ||
3 | * | ||
4 | * Functions for ISM device. | ||
5 | * | ||
6 | * Copyright IBM Corp. 2018 | ||
7 | */ | ||
8 | |||
9 | #include <linux/spinlock.h> | ||
10 | #include <linux/mutex.h> | ||
11 | #include <linux/slab.h> | ||
12 | #include <asm/page.h> | ||
13 | |||
14 | #include "smc.h" | ||
15 | #include "smc_core.h" | ||
16 | #include "smc_ism.h" | ||
17 | #include "smc_pnet.h" | ||
18 | |||
19 | struct smcd_dev_list smcd_dev_list = { | ||
20 | .list = LIST_HEAD_INIT(smcd_dev_list.list), | ||
21 | .mutex = __MUTEX_INITIALIZER(smcd_dev_list.mutex) | ||
22 | }; | ||
23 | |||
24 | bool smc_ism_v2_capable; | ||
25 | |||
26 | /* Test if an ISM communication is possible - same CPC */ | ||
27 | int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd) | ||
28 | { | ||
29 | return smcd->ops->query_remote_gid(smcd, peer_gid, vlan_id ? 1 : 0, | ||
30 | vlan_id); | ||
31 | } | ||
32 | |||
33 | int smc_ism_write(struct smcd_dev *smcd, const struct smc_ism_position *pos, | ||
34 | void *data, size_t len) | ||
35 | { | ||
36 | int rc; | ||
37 | |||
38 | rc = smcd->ops->move_data(smcd, pos->token, pos->index, pos->signal, | ||
39 | pos->offset, data, len); | ||
40 | |||
41 | return rc < 0 ? rc : 0; | ||
42 | } | ||
43 | |||
44 | void smc_ism_get_system_eid(struct smcd_dev *smcd, u8 **eid) | ||
45 | { | ||
46 | smcd->ops->get_system_eid(smcd, eid); | ||
47 | } | ||
48 | |||
49 | u16 smc_ism_get_chid(struct smcd_dev *smcd) | ||
50 | { | ||
51 | return smcd->ops->get_chid(smcd); | ||
52 | } | ||
53 | |||
54 | /* Set a connection using this DMBE. */ | ||
55 | void smc_ism_set_conn(struct smc_connection *conn) | ||
56 | { | ||
57 | unsigned long flags; | ||
58 | |||
59 | spin_lock_irqsave(&conn->lgr->smcd->lock, flags); | ||
60 | conn->lgr->smcd->conn[conn->rmb_desc->sba_idx] = conn; | ||
61 | spin_unlock_irqrestore(&conn->lgr->smcd->lock, flags); | ||
62 | } | ||
63 | |||
64 | /* Unset a connection using this DMBE. */ | ||
65 | void smc_ism_unset_conn(struct smc_connection *conn) | ||
66 | { | ||
67 | unsigned long flags; | ||
68 | |||
69 | if (!conn->rmb_desc) | ||
70 | return; | ||
71 | |||
72 | spin_lock_irqsave(&conn->lgr->smcd->lock, flags); | ||
73 | conn->lgr->smcd->conn[conn->rmb_desc->sba_idx] = NULL; | ||
74 | spin_unlock_irqrestore(&conn->lgr->smcd->lock, flags); | ||
75 | } | ||
76 | |||
77 | /* Register a VLAN identifier with the ISM device. Use a reference count | ||
78 | * and add a VLAN identifier only when the first DMB using this VLAN is | ||
79 | * registered. | ||
80 | */ | ||
81 | int smc_ism_get_vlan(struct smcd_dev *smcd, unsigned short vlanid) | ||
82 | { | ||
83 | struct smc_ism_vlanid *new_vlan, *vlan; | ||
84 | unsigned long flags; | ||
85 | int rc = 0; | ||
86 | |||
87 | if (!vlanid) /* No valid vlan id */ | ||
88 | return -EINVAL; | ||
89 | |||
90 | /* create new vlan entry, in case we need it */ | ||
91 | new_vlan = kzalloc(sizeof(*new_vlan), GFP_KERNEL); | ||
92 | if (!new_vlan) | ||
93 | return -ENOMEM; | ||
94 | new_vlan->vlanid = vlanid; | ||
95 | refcount_set(&new_vlan->refcnt, 1); | ||
96 | |||
97 | /* if there is an existing entry, increase count and return */ | ||
98 | spin_lock_irqsave(&smcd->lock, flags); | ||
99 | list_for_each_entry(vlan, &smcd->vlan, list) { | ||
100 | if (vlan->vlanid == vlanid) { | ||
101 | refcount_inc(&vlan->refcnt); | ||
102 | kfree(new_vlan); | ||
103 | goto out; | ||
104 | } | ||
105 | } | ||
106 | |||
107 | /* no existing entry found. | ||
108 | * add new entry to device; might fail, e.g., if HW limit reached | ||
109 | */ | ||
110 | if (smcd->ops->add_vlan_id(smcd, vlanid)) { | ||
111 | kfree(new_vlan); | ||
112 | rc = -EIO; | ||
113 | goto out; | ||
114 | } | ||
115 | list_add_tail(&new_vlan->list, &smcd->vlan); | ||
116 | out: | ||
117 | spin_unlock_irqrestore(&smcd->lock, flags); | ||
118 | return rc; | ||
119 | } | ||
120 | |||
121 | /* Unregister a VLAN identifier with the ISM device. Use a reference count | ||
122 | * and remove a VLAN identifier only when the last DMB using this VLAN is | ||
123 | * unregistered. | ||
124 | */ | ||
125 | int smc_ism_put_vlan(struct smcd_dev *smcd, unsigned short vlanid) | ||
126 | { | ||
127 | struct smc_ism_vlanid *vlan; | ||
128 | unsigned long flags; | ||
129 | bool found = false; | ||
130 | int rc = 0; | ||
131 | |||
132 | if (!vlanid) /* No valid vlan id */ | ||
133 | return -EINVAL; | ||
134 | |||
135 | spin_lock_irqsave(&smcd->lock, flags); | ||
136 | list_for_each_entry(vlan, &smcd->vlan, list) { | ||
137 | if (vlan->vlanid == vlanid) { | ||
138 | if (!refcount_dec_and_test(&vlan->refcnt)) | ||
139 | goto out; | ||
140 | found = true; | ||
141 | break; | ||
142 | } | ||
143 | } | ||
144 | if (!found) { | ||
145 | rc = -ENOENT; | ||
146 | goto out; /* VLAN id not in table */ | ||
147 | } | ||
148 | |||
149 | /* Found and the last reference just gone */ | ||
150 | if (smcd->ops->del_vlan_id(smcd, vlanid)) | ||
151 | rc = -EIO; | ||
152 | list_del(&vlan->list); | ||
153 | kfree(vlan); | ||
154 | out: | ||
155 | spin_unlock_irqrestore(&smcd->lock, flags); | ||
156 | return rc; | ||
157 | } | ||
158 | |||
159 | int smc_ism_unregister_dmb(struct smcd_dev *smcd, struct smc_buf_desc *dmb_desc) | ||
160 | { | ||
161 | struct smcd_dmb dmb; | ||
162 | int rc = 0; | ||
163 | |||
164 | if (!dmb_desc->dma_addr) | ||
165 | return rc; | ||
166 | |||
167 | memset(&dmb, 0, sizeof(dmb)); | ||
168 | dmb.dmb_tok = dmb_desc->token; | ||
169 | dmb.sba_idx = dmb_desc->sba_idx; | ||
170 | dmb.cpu_addr = dmb_desc->cpu_addr; | ||
171 | dmb.dma_addr = dmb_desc->dma_addr; | ||
172 | dmb.dmb_len = dmb_desc->len; | ||
173 | rc = smcd->ops->unregister_dmb(smcd, &dmb); | ||
174 | if (!rc || rc == ISM_ERROR) { | ||
175 | dmb_desc->cpu_addr = NULL; | ||
176 | dmb_desc->dma_addr = 0; | ||
177 | } | ||
178 | |||
179 | return rc; | ||
180 | } | ||
181 | |||
182 | int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len, | ||
183 | struct smc_buf_desc *dmb_desc) | ||
184 | { | ||
185 | struct smcd_dmb dmb; | ||
186 | int rc; | ||
187 | |||
188 | memset(&dmb, 0, sizeof(dmb)); | ||
189 | dmb.dmb_len = dmb_len; | ||
190 | dmb.sba_idx = dmb_desc->sba_idx; | ||
191 | dmb.vlan_id = lgr->vlan_id; | ||
192 | dmb.rgid = lgr->peer_gid; | ||
193 | rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb); | ||
194 | if (!rc) { | ||
195 | dmb_desc->sba_idx = dmb.sba_idx; | ||
196 | dmb_desc->token = dmb.dmb_tok; | ||
197 | dmb_desc->cpu_addr = dmb.cpu_addr; | ||
198 | dmb_desc->dma_addr = dmb.dma_addr; | ||
199 | dmb_desc->len = dmb.dmb_len; | ||
200 | } | ||
201 | return rc; | ||
202 | } | ||
203 | |||
204 | struct smc_ism_event_work { | ||
205 | struct work_struct work; | ||
206 | struct smcd_dev *smcd; | ||
207 | struct smcd_event event; | ||
208 | }; | ||
209 | |||
210 | #define ISM_EVENT_REQUEST 0x0001 | ||
211 | #define ISM_EVENT_RESPONSE 0x0002 | ||
212 | #define ISM_EVENT_REQUEST_IR 0x00000001 | ||
213 | #define ISM_EVENT_CODE_SHUTDOWN 0x80 | ||
214 | #define ISM_EVENT_CODE_TESTLINK 0x83 | ||
215 | |||
216 | union smcd_sw_event_info { | ||
217 | u64 info; | ||
218 | struct { | ||
219 | u8 uid[SMC_LGR_ID_SIZE]; | ||
220 | unsigned short vlan_id; | ||
221 | u16 code; | ||
222 | }; | ||
223 | }; | ||
224 | |||
225 | static void smcd_handle_sw_event(struct smc_ism_event_work *wrk) | ||
226 | { | ||
227 | union smcd_sw_event_info ev_info; | ||
228 | |||
229 | ev_info.info = wrk->event.info; | ||
230 | switch (wrk->event.code) { | ||
231 | case ISM_EVENT_CODE_SHUTDOWN: /* Peer shut down DMBs */ | ||
232 | smc_smcd_terminate(wrk->smcd, wrk->event.tok, ev_info.vlan_id); | ||
233 | break; | ||
234 | case ISM_EVENT_CODE_TESTLINK: /* Activity timer */ | ||
235 | if (ev_info.code == ISM_EVENT_REQUEST) { | ||
236 | ev_info.code = ISM_EVENT_RESPONSE; | ||
237 | wrk->smcd->ops->signal_event(wrk->smcd, | ||
238 | wrk->event.tok, | ||
239 | ISM_EVENT_REQUEST_IR, | ||
240 | ISM_EVENT_CODE_TESTLINK, | ||
241 | ev_info.info); | ||
242 | } | ||
243 | break; | ||
244 | } | ||
245 | } | ||
246 | |||
247 | int smc_ism_signal_shutdown(struct smc_link_group *lgr) | ||
248 | { | ||
249 | int rc; | ||
250 | union smcd_sw_event_info ev_info; | ||
251 | |||
252 | if (lgr->peer_shutdown) | ||
253 | return 0; | ||
254 | |||
255 | memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE); | ||
256 | ev_info.vlan_id = lgr->vlan_id; | ||
257 | ev_info.code = ISM_EVENT_REQUEST; | ||
258 | rc = lgr->smcd->ops->signal_event(lgr->smcd, lgr->peer_gid, | ||
259 | ISM_EVENT_REQUEST_IR, | ||
260 | ISM_EVENT_CODE_SHUTDOWN, | ||
261 | ev_info.info); | ||
262 | return rc; | ||
263 | } | ||
264 | |||
265 | /* worker for SMC-D events */ | ||
266 | static void smc_ism_event_work(struct work_struct *work) | ||
267 | { | ||
268 | struct smc_ism_event_work *wrk = | ||
269 | container_of(work, struct smc_ism_event_work, work); | ||
270 | |||
271 | switch (wrk->event.type) { | ||
272 | case ISM_EVENT_GID: /* GID event, token is peer GID */ | ||
273 | smc_smcd_terminate(wrk->smcd, wrk->event.tok, VLAN_VID_MASK); | ||
274 | break; | ||
275 | case ISM_EVENT_DMB: | ||
276 | break; | ||
277 | case ISM_EVENT_SWR: /* Software defined event */ | ||
278 | smcd_handle_sw_event(wrk); | ||
279 | break; | ||
280 | } | ||
281 | kfree(wrk); | ||
282 | } | ||
283 | |||
284 | static void smcd_release(struct device *dev) | ||
285 | { | ||
286 | struct smcd_dev *smcd = container_of(dev, struct smcd_dev, dev); | ||
287 | |||
288 | kfree(smcd->conn); | ||
289 | kfree(smcd); | ||
290 | } | ||
291 | |||
292 | struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, | ||
293 | const struct smcd_ops *ops, int max_dmbs) | ||
294 | { | ||
295 | struct smcd_dev *smcd; | ||
296 | |||
297 | smcd = kzalloc(sizeof(*smcd), GFP_KERNEL); | ||
298 | if (!smcd) | ||
299 | return NULL; | ||
300 | smcd->conn = kcalloc(max_dmbs, sizeof(struct smc_connection *), | ||
301 | GFP_KERNEL); | ||
302 | if (!smcd->conn) { | ||
303 | kfree(smcd); | ||
304 | return NULL; | ||
305 | } | ||
306 | |||
307 | smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)", | ||
308 | WQ_MEM_RECLAIM, name); | ||
309 | if (!smcd->event_wq) { | ||
310 | kfree(smcd->conn); | ||
311 | kfree(smcd); | ||
312 | return NULL; | ||
313 | } | ||
314 | |||
315 | smcd->dev.parent = parent; | ||
316 | smcd->dev.release = smcd_release; | ||
317 | device_initialize(&smcd->dev); | ||
318 | dev_set_name(&smcd->dev, name); | ||
319 | smcd->ops = ops; | ||
320 | if (smc_pnetid_by_dev_port(parent, 0, smcd->pnetid)) | ||
321 | smc_pnetid_by_table_smcd(smcd); | ||
322 | |||
323 | spin_lock_init(&smcd->lock); | ||
324 | spin_lock_init(&smcd->lgr_lock); | ||
325 | INIT_LIST_HEAD(&smcd->vlan); | ||
326 | INIT_LIST_HEAD(&smcd->lgr_list); | ||
327 | init_waitqueue_head(&smcd->lgrs_deleted); | ||
328 | return smcd; | ||
329 | } | ||
330 | EXPORT_SYMBOL_GPL(smcd_alloc_dev); | ||
331 | |||
332 | int smcd_register_dev(struct smcd_dev *smcd) | ||
333 | { | ||
334 | int rc; | ||
335 | |||
336 | mutex_lock(&smcd_dev_list.mutex); | ||
337 | if (list_empty(&smcd_dev_list.list)) { | ||
338 | u8 *system_eid = NULL; | ||
339 | |||
340 | smc_ism_get_system_eid(smcd, &system_eid); | ||
341 | if (system_eid[24] != '0' || system_eid[28] != '0') | ||
342 | smc_ism_v2_capable = true; | ||
343 | } | ||
344 | /* sort list: devices without pnetid before devices with pnetid */ | ||
345 | if (smcd->pnetid[0]) | ||
346 | list_add_tail(&smcd->list, &smcd_dev_list.list); | ||
347 | else | ||
348 | list_add(&smcd->list, &smcd_dev_list.list); | ||
349 | mutex_unlock(&smcd_dev_list.mutex); | ||
350 | |||
351 | pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n", | ||
352 | dev_name(&smcd->dev), smcd->pnetid, | ||
353 | smcd->pnetid_by_user ? " (user defined)" : ""); | ||
354 | |||
355 | rc = device_add(&smcd->dev); | ||
356 | if (rc) { | ||
357 | mutex_lock(&smcd_dev_list.mutex); | ||
358 | list_del(&smcd->list); | ||
359 | mutex_unlock(&smcd_dev_list.mutex); | ||
360 | } | ||
361 | |||
362 | return rc; | ||
363 | } | ||
364 | EXPORT_SYMBOL_GPL(smcd_register_dev); | ||
365 | |||
366 | void smcd_unregister_dev(struct smcd_dev *smcd) | ||
367 | { | ||
368 | pr_warn_ratelimited("smc: removing smcd device %s\n", | ||
369 | dev_name(&smcd->dev)); | ||
370 | mutex_lock(&smcd_dev_list.mutex); | ||
371 | list_del_init(&smcd->list); | ||
372 | mutex_unlock(&smcd_dev_list.mutex); | ||
373 | smcd->going_away = 1; | ||
374 | smc_smcd_terminate_all(smcd); | ||
375 | flush_workqueue(smcd->event_wq); | ||
376 | destroy_workqueue(smcd->event_wq); | ||
377 | |||
378 | device_del(&smcd->dev); | ||
379 | } | ||
380 | EXPORT_SYMBOL_GPL(smcd_unregister_dev); | ||
381 | |||
382 | void smcd_free_dev(struct smcd_dev *smcd) | ||
383 | { | ||
384 | put_device(&smcd->dev); | ||
385 | } | ||
386 | EXPORT_SYMBOL_GPL(smcd_free_dev); | ||
387 | |||
388 | /* SMCD Device event handler. Called from ISM device interrupt handler. | ||
389 | * Parameters are smcd device pointer, | ||
390 | * - event->type (0 --> DMB, 1 --> GID), | ||
391 | * - event->code (event code), | ||
392 | * - event->tok (either DMB token when event type 0, or GID when event type 1) | ||
393 | * - event->time (time of day) | ||
394 | * - event->info (debug info). | ||
395 | * | ||
396 | * Context: | ||
397 | * - Function called in IRQ context from ISM device driver event handler. | ||
398 | */ | ||
399 | void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event) | ||
400 | { | ||
401 | struct smc_ism_event_work *wrk; | ||
402 | |||
403 | if (smcd->going_away) | ||
404 | return; | ||
405 | /* copy event to event work queue, and let it be handled there */ | ||
406 | wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC); | ||
407 | if (!wrk) | ||
408 | return; | ||
409 | INIT_WORK(&wrk->work, smc_ism_event_work); | ||
410 | wrk->smcd = smcd; | ||
411 | wrk->event = *event; | ||
412 | queue_work(smcd->event_wq, &wrk->work); | ||
413 | } | ||
414 | EXPORT_SYMBOL_GPL(smcd_handle_event); | ||
415 | |||
416 | /* SMCD Device interrupt handler. Called from ISM device interrupt handler. | ||
417 | * Parameters are smcd device pointer and DMB number. Find the connection and | ||
418 | * schedule the tasklet for this connection. | ||
419 | * | ||
420 | * Context: | ||
421 | * - Function called in IRQ context from ISM device driver IRQ handler. | ||
422 | */ | ||
423 | void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno) | ||
424 | { | ||
425 | struct smc_connection *conn = NULL; | ||
426 | unsigned long flags; | ||
427 | |||
428 | spin_lock_irqsave(&smcd->lock, flags); | ||
429 | conn = smcd->conn[dmbno]; | ||
430 | if (conn && !conn->killed) | ||
431 | tasklet_schedule(&conn->rx_tsklet); | ||
432 | spin_unlock_irqrestore(&smcd->lock, flags); | ||
433 | } | ||
434 | EXPORT_SYMBOL_GPL(smcd_handle_irq); | ||
435 | |||
436 | void __init smc_ism_init(void) | ||
437 | { | ||
438 | smc_ism_v2_capable = false; | ||
439 | } | ||
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h new file mode 100644 index 000000000..8048e09dd --- /dev/null +++ b/net/smc/smc_ism.h | |||
@@ -0,0 +1,56 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* Shared Memory Communications Direct over ISM devices (SMC-D) | ||
3 | * | ||
4 | * SMC-D ISM device structure definitions. | ||
5 | * | ||
6 | * Copyright IBM Corp. 2018 | ||
7 | */ | ||
8 | |||
9 | #ifndef SMCD_ISM_H | ||
10 | #define SMCD_ISM_H | ||
11 | |||
12 | #include <linux/uio.h> | ||
13 | #include <linux/mutex.h> | ||
14 | |||
15 | #include "smc.h" | ||
16 | |||
17 | struct smcd_dev_list { /* List of SMCD devices */ | ||
18 | struct list_head list; | ||
19 | struct mutex mutex; /* Protects list of devices */ | ||
20 | }; | ||
21 | |||
22 | extern struct smcd_dev_list smcd_dev_list; /* list of smcd devices */ | ||
23 | extern bool smc_ism_v2_capable; /* HW supports ISM V2 and thus | ||
24 | * System EID is defined | ||
25 | */ | ||
26 | |||
27 | struct smc_ism_vlanid { /* VLAN id set on ISM device */ | ||
28 | struct list_head list; | ||
29 | unsigned short vlanid; /* Vlan id */ | ||
30 | refcount_t refcnt; /* Reference count */ | ||
31 | }; | ||
32 | |||
33 | struct smc_ism_position { /* ISM device position to write to */ | ||
34 | u64 token; /* Token of DMB */ | ||
35 | u32 offset; /* Offset into DMBE */ | ||
36 | u8 index; /* Index of DMBE */ | ||
37 | u8 signal; /* Generate interrupt on owner side */ | ||
38 | }; | ||
39 | |||
40 | struct smcd_dev; | ||
41 | |||
42 | int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *dev); | ||
43 | void smc_ism_set_conn(struct smc_connection *conn); | ||
44 | void smc_ism_unset_conn(struct smc_connection *conn); | ||
45 | int smc_ism_get_vlan(struct smcd_dev *dev, unsigned short vlan_id); | ||
46 | int smc_ism_put_vlan(struct smcd_dev *dev, unsigned short vlan_id); | ||
47 | int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size, | ||
48 | struct smc_buf_desc *dmb_desc); | ||
49 | int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc); | ||
50 | int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos, | ||
51 | void *data, size_t len); | ||
52 | int smc_ism_signal_shutdown(struct smc_link_group *lgr); | ||
53 | void smc_ism_get_system_eid(struct smcd_dev *dev, u8 **eid); | ||
54 | u16 smc_ism_get_chid(struct smcd_dev *dev); | ||
55 | void smc_ism_init(void); | ||
56 | #endif | ||
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c new file mode 100644 index 000000000..0ef15f8fb --- /dev/null +++ b/net/smc/smc_llc.c | |||
@@ -0,0 +1,1974 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * Link Layer Control (LLC) | ||
6 | * | ||
7 | * Copyright IBM Corp. 2016 | ||
8 | * | ||
9 | * Author(s): Klaus Wacker <Klaus.Wacker@de.ibm.com> | ||
10 | * Ursula Braun <ubraun@linux.vnet.ibm.com> | ||
11 | */ | ||
12 | |||
13 | #include <net/tcp.h> | ||
14 | #include <rdma/ib_verbs.h> | ||
15 | |||
16 | #include "smc.h" | ||
17 | #include "smc_core.h" | ||
18 | #include "smc_clc.h" | ||
19 | #include "smc_llc.h" | ||
20 | #include "smc_pnet.h" | ||
21 | |||
22 | #define SMC_LLC_DATA_LEN 40 | ||
23 | |||
24 | struct smc_llc_hdr { | ||
25 | struct smc_wr_rx_hdr common; | ||
26 | u8 length; /* 44 */ | ||
27 | #if defined(__BIG_ENDIAN_BITFIELD) | ||
28 | u8 reserved:4, | ||
29 | add_link_rej_rsn:4; | ||
30 | #elif defined(__LITTLE_ENDIAN_BITFIELD) | ||
31 | u8 add_link_rej_rsn:4, | ||
32 | reserved:4; | ||
33 | #endif | ||
34 | u8 flags; | ||
35 | }; | ||
36 | |||
37 | #define SMC_LLC_FLAG_NO_RMBE_EYEC 0x03 | ||
38 | |||
39 | struct smc_llc_msg_confirm_link { /* type 0x01 */ | ||
40 | struct smc_llc_hdr hd; | ||
41 | u8 sender_mac[ETH_ALEN]; | ||
42 | u8 sender_gid[SMC_GID_SIZE]; | ||
43 | u8 sender_qp_num[3]; | ||
44 | u8 link_num; | ||
45 | u8 link_uid[SMC_LGR_ID_SIZE]; | ||
46 | u8 max_links; | ||
47 | u8 reserved[9]; | ||
48 | }; | ||
49 | |||
50 | #define SMC_LLC_FLAG_ADD_LNK_REJ 0x40 | ||
51 | #define SMC_LLC_REJ_RSN_NO_ALT_PATH 1 | ||
52 | |||
53 | #define SMC_LLC_ADD_LNK_MAX_LINKS 2 | ||
54 | |||
55 | struct smc_llc_msg_add_link { /* type 0x02 */ | ||
56 | struct smc_llc_hdr hd; | ||
57 | u8 sender_mac[ETH_ALEN]; | ||
58 | u8 reserved2[2]; | ||
59 | u8 sender_gid[SMC_GID_SIZE]; | ||
60 | u8 sender_qp_num[3]; | ||
61 | u8 link_num; | ||
62 | #if defined(__BIG_ENDIAN_BITFIELD) | ||
63 | u8 reserved3 : 4, | ||
64 | qp_mtu : 4; | ||
65 | #elif defined(__LITTLE_ENDIAN_BITFIELD) | ||
66 | u8 qp_mtu : 4, | ||
67 | reserved3 : 4; | ||
68 | #endif | ||
69 | u8 initial_psn[3]; | ||
70 | u8 reserved[8]; | ||
71 | }; | ||
72 | |||
73 | struct smc_llc_msg_add_link_cont_rt { | ||
74 | __be32 rmb_key; | ||
75 | __be32 rmb_key_new; | ||
76 | __be64 rmb_vaddr_new; | ||
77 | }; | ||
78 | |||
79 | #define SMC_LLC_RKEYS_PER_CONT_MSG 2 | ||
80 | |||
81 | struct smc_llc_msg_add_link_cont { /* type 0x03 */ | ||
82 | struct smc_llc_hdr hd; | ||
83 | u8 link_num; | ||
84 | u8 num_rkeys; | ||
85 | u8 reserved2[2]; | ||
86 | struct smc_llc_msg_add_link_cont_rt rt[SMC_LLC_RKEYS_PER_CONT_MSG]; | ||
87 | u8 reserved[4]; | ||
88 | } __packed; /* format defined in RFC7609 */ | ||
89 | |||
90 | #define SMC_LLC_FLAG_DEL_LINK_ALL 0x40 | ||
91 | #define SMC_LLC_FLAG_DEL_LINK_ORDERLY 0x20 | ||
92 | |||
93 | struct smc_llc_msg_del_link { /* type 0x04 */ | ||
94 | struct smc_llc_hdr hd; | ||
95 | u8 link_num; | ||
96 | __be32 reason; | ||
97 | u8 reserved[35]; | ||
98 | } __packed; /* format defined in RFC7609 */ | ||
99 | |||
100 | struct smc_llc_msg_test_link { /* type 0x07 */ | ||
101 | struct smc_llc_hdr hd; | ||
102 | u8 user_data[16]; | ||
103 | u8 reserved[24]; | ||
104 | }; | ||
105 | |||
106 | struct smc_rmb_rtoken { | ||
107 | union { | ||
108 | u8 num_rkeys; /* first rtoken byte of CONFIRM LINK msg */ | ||
109 | /* is actually the num of rtokens, first */ | ||
110 | /* rtoken is always for the current link */ | ||
111 | u8 link_id; /* link id of the rtoken */ | ||
112 | }; | ||
113 | __be32 rmb_key; | ||
114 | __be64 rmb_vaddr; | ||
115 | } __packed; /* format defined in RFC7609 */ | ||
116 | |||
117 | #define SMC_LLC_RKEYS_PER_MSG 3 | ||
118 | |||
119 | struct smc_llc_msg_confirm_rkey { /* type 0x06 */ | ||
120 | struct smc_llc_hdr hd; | ||
121 | struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG]; | ||
122 | u8 reserved; | ||
123 | }; | ||
124 | |||
125 | #define SMC_LLC_DEL_RKEY_MAX 8 | ||
126 | #define SMC_LLC_FLAG_RKEY_RETRY 0x10 | ||
127 | #define SMC_LLC_FLAG_RKEY_NEG 0x20 | ||
128 | |||
129 | struct smc_llc_msg_delete_rkey { /* type 0x09 */ | ||
130 | struct smc_llc_hdr hd; | ||
131 | u8 num_rkeys; | ||
132 | u8 err_mask; | ||
133 | u8 reserved[2]; | ||
134 | __be32 rkey[8]; | ||
135 | u8 reserved2[4]; | ||
136 | }; | ||
137 | |||
138 | union smc_llc_msg { | ||
139 | struct smc_llc_msg_confirm_link confirm_link; | ||
140 | struct smc_llc_msg_add_link add_link; | ||
141 | struct smc_llc_msg_add_link_cont add_link_cont; | ||
142 | struct smc_llc_msg_del_link delete_link; | ||
143 | |||
144 | struct smc_llc_msg_confirm_rkey confirm_rkey; | ||
145 | struct smc_llc_msg_delete_rkey delete_rkey; | ||
146 | |||
147 | struct smc_llc_msg_test_link test_link; | ||
148 | struct { | ||
149 | struct smc_llc_hdr hdr; | ||
150 | u8 data[SMC_LLC_DATA_LEN]; | ||
151 | } raw; | ||
152 | }; | ||
153 | |||
154 | #define SMC_LLC_FLAG_RESP 0x80 | ||
155 | |||
156 | struct smc_llc_qentry { | ||
157 | struct list_head list; | ||
158 | struct smc_link *link; | ||
159 | union smc_llc_msg msg; | ||
160 | }; | ||
161 | |||
162 | static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc); | ||
163 | |||
164 | struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow) | ||
165 | { | ||
166 | struct smc_llc_qentry *qentry = flow->qentry; | ||
167 | |||
168 | flow->qentry = NULL; | ||
169 | return qentry; | ||
170 | } | ||
171 | |||
172 | void smc_llc_flow_qentry_del(struct smc_llc_flow *flow) | ||
173 | { | ||
174 | struct smc_llc_qentry *qentry; | ||
175 | |||
176 | if (flow->qentry) { | ||
177 | qentry = flow->qentry; | ||
178 | flow->qentry = NULL; | ||
179 | kfree(qentry); | ||
180 | } | ||
181 | } | ||
182 | |||
183 | static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow, | ||
184 | struct smc_llc_qentry *qentry) | ||
185 | { | ||
186 | flow->qentry = qentry; | ||
187 | } | ||
188 | |||
189 | static void smc_llc_flow_parallel(struct smc_link_group *lgr, u8 flow_type, | ||
190 | struct smc_llc_qentry *qentry) | ||
191 | { | ||
192 | u8 msg_type = qentry->msg.raw.hdr.common.type; | ||
193 | |||
194 | if ((msg_type == SMC_LLC_ADD_LINK || msg_type == SMC_LLC_DELETE_LINK) && | ||
195 | flow_type != msg_type && !lgr->delayed_event) { | ||
196 | lgr->delayed_event = qentry; | ||
197 | return; | ||
198 | } | ||
199 | /* drop parallel or already-in-progress llc requests */ | ||
200 | if (flow_type != msg_type) | ||
201 | pr_warn_once("smc: SMC-R lg %*phN dropped parallel " | ||
202 | "LLC msg: msg %d flow %d role %d\n", | ||
203 | SMC_LGR_ID_SIZE, &lgr->id, | ||
204 | qentry->msg.raw.hdr.common.type, | ||
205 | flow_type, lgr->role); | ||
206 | kfree(qentry); | ||
207 | } | ||
208 | |||
209 | /* try to start a new llc flow, initiated by an incoming llc msg */ | ||
210 | static bool smc_llc_flow_start(struct smc_llc_flow *flow, | ||
211 | struct smc_llc_qentry *qentry) | ||
212 | { | ||
213 | struct smc_link_group *lgr = qentry->link->lgr; | ||
214 | |||
215 | spin_lock_bh(&lgr->llc_flow_lock); | ||
216 | if (flow->type) { | ||
217 | /* a flow is already active */ | ||
218 | smc_llc_flow_parallel(lgr, flow->type, qentry); | ||
219 | spin_unlock_bh(&lgr->llc_flow_lock); | ||
220 | return false; | ||
221 | } | ||
222 | switch (qentry->msg.raw.hdr.common.type) { | ||
223 | case SMC_LLC_ADD_LINK: | ||
224 | flow->type = SMC_LLC_FLOW_ADD_LINK; | ||
225 | break; | ||
226 | case SMC_LLC_DELETE_LINK: | ||
227 | flow->type = SMC_LLC_FLOW_DEL_LINK; | ||
228 | break; | ||
229 | case SMC_LLC_CONFIRM_RKEY: | ||
230 | case SMC_LLC_DELETE_RKEY: | ||
231 | flow->type = SMC_LLC_FLOW_RKEY; | ||
232 | break; | ||
233 | default: | ||
234 | flow->type = SMC_LLC_FLOW_NONE; | ||
235 | } | ||
236 | smc_llc_flow_qentry_set(flow, qentry); | ||
237 | spin_unlock_bh(&lgr->llc_flow_lock); | ||
238 | return true; | ||
239 | } | ||
240 | |||
241 | /* start a new local llc flow, wait till current flow finished */ | ||
242 | int smc_llc_flow_initiate(struct smc_link_group *lgr, | ||
243 | enum smc_llc_flowtype type) | ||
244 | { | ||
245 | enum smc_llc_flowtype allowed_remote = SMC_LLC_FLOW_NONE; | ||
246 | int rc; | ||
247 | |||
248 | /* all flows except confirm_rkey and delete_rkey are exclusive, | ||
249 | * confirm/delete rkey flows can run concurrently (local and remote) | ||
250 | */ | ||
251 | if (type == SMC_LLC_FLOW_RKEY) | ||
252 | allowed_remote = SMC_LLC_FLOW_RKEY; | ||
253 | again: | ||
254 | if (list_empty(&lgr->list)) | ||
255 | return -ENODEV; | ||
256 | spin_lock_bh(&lgr->llc_flow_lock); | ||
257 | if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE && | ||
258 | (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE || | ||
259 | lgr->llc_flow_rmt.type == allowed_remote)) { | ||
260 | lgr->llc_flow_lcl.type = type; | ||
261 | spin_unlock_bh(&lgr->llc_flow_lock); | ||
262 | return 0; | ||
263 | } | ||
264 | spin_unlock_bh(&lgr->llc_flow_lock); | ||
265 | rc = wait_event_timeout(lgr->llc_flow_waiter, (list_empty(&lgr->list) || | ||
266 | (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE && | ||
267 | (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE || | ||
268 | lgr->llc_flow_rmt.type == allowed_remote))), | ||
269 | SMC_LLC_WAIT_TIME * 10); | ||
270 | if (!rc) | ||
271 | return -ETIMEDOUT; | ||
272 | goto again; | ||
273 | } | ||
274 | |||
275 | /* finish the current llc flow */ | ||
276 | void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow) | ||
277 | { | ||
278 | spin_lock_bh(&lgr->llc_flow_lock); | ||
279 | memset(flow, 0, sizeof(*flow)); | ||
280 | flow->type = SMC_LLC_FLOW_NONE; | ||
281 | spin_unlock_bh(&lgr->llc_flow_lock); | ||
282 | if (!list_empty(&lgr->list) && lgr->delayed_event && | ||
283 | flow == &lgr->llc_flow_lcl) | ||
284 | schedule_work(&lgr->llc_event_work); | ||
285 | else | ||
286 | wake_up(&lgr->llc_flow_waiter); | ||
287 | } | ||
288 | |||
289 | /* lnk is optional and used for early wakeup when link goes down, useful in | ||
290 | * cases where we wait for a response on the link after we sent a request | ||
291 | */ | ||
292 | struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr, | ||
293 | struct smc_link *lnk, | ||
294 | int time_out, u8 exp_msg) | ||
295 | { | ||
296 | struct smc_llc_flow *flow = &lgr->llc_flow_lcl; | ||
297 | u8 rcv_msg; | ||
298 | |||
299 | wait_event_timeout(lgr->llc_msg_waiter, | ||
300 | (flow->qentry || | ||
301 | (lnk && !smc_link_usable(lnk)) || | ||
302 | list_empty(&lgr->list)), | ||
303 | time_out); | ||
304 | if (!flow->qentry || | ||
305 | (lnk && !smc_link_usable(lnk)) || list_empty(&lgr->list)) { | ||
306 | smc_llc_flow_qentry_del(flow); | ||
307 | goto out; | ||
308 | } | ||
309 | rcv_msg = flow->qentry->msg.raw.hdr.common.type; | ||
310 | if (exp_msg && rcv_msg != exp_msg) { | ||
311 | if (exp_msg == SMC_LLC_ADD_LINK && | ||
312 | rcv_msg == SMC_LLC_DELETE_LINK) { | ||
313 | /* flow_start will delay the unexpected msg */ | ||
314 | smc_llc_flow_start(&lgr->llc_flow_lcl, | ||
315 | smc_llc_flow_qentry_clr(flow)); | ||
316 | return NULL; | ||
317 | } | ||
318 | pr_warn_once("smc: SMC-R lg %*phN dropped unexpected LLC msg: " | ||
319 | "msg %d exp %d flow %d role %d flags %x\n", | ||
320 | SMC_LGR_ID_SIZE, &lgr->id, rcv_msg, exp_msg, | ||
321 | flow->type, lgr->role, | ||
322 | flow->qentry->msg.raw.hdr.flags); | ||
323 | smc_llc_flow_qentry_del(flow); | ||
324 | } | ||
325 | out: | ||
326 | return flow->qentry; | ||
327 | } | ||
328 | |||
329 | /********************************** send *************************************/ | ||
330 | |||
331 | struct smc_llc_tx_pend { | ||
332 | }; | ||
333 | |||
334 | /* handler for send/transmission completion of an LLC msg */ | ||
335 | static void smc_llc_tx_handler(struct smc_wr_tx_pend_priv *pend, | ||
336 | struct smc_link *link, | ||
337 | enum ib_wc_status wc_status) | ||
338 | { | ||
339 | /* future work: handle wc_status error for recovery and failover */ | ||
340 | } | ||
341 | |||
342 | /** | ||
343 | * smc_llc_add_pending_send() - add LLC control message to pending WQE transmits | ||
344 | * @link: Pointer to SMC link used for sending LLC control message. | ||
345 | * @wr_buf: Out variable returning pointer to work request payload buffer. | ||
346 | * @pend: Out variable returning pointer to private pending WR tracking. | ||
347 | * It's the context the transmit complete handler will get. | ||
348 | * | ||
349 | * Reserves and pre-fills an entry for a pending work request send/tx. | ||
350 | * Used by mid-level smc_llc_send_msg() to prepare for later actual send/tx. | ||
351 | * Can sleep due to smc_get_ctrl_buf (if not in softirq context). | ||
352 | * | ||
353 | * Return: 0 on success, otherwise an error value. | ||
354 | */ | ||
355 | static int smc_llc_add_pending_send(struct smc_link *link, | ||
356 | struct smc_wr_buf **wr_buf, | ||
357 | struct smc_wr_tx_pend_priv **pend) | ||
358 | { | ||
359 | int rc; | ||
360 | |||
361 | rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, NULL, | ||
362 | pend); | ||
363 | if (rc < 0) | ||
364 | return rc; | ||
365 | BUILD_BUG_ON_MSG( | ||
366 | sizeof(union smc_llc_msg) > SMC_WR_BUF_SIZE, | ||
367 | "must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_llc_msg)"); | ||
368 | BUILD_BUG_ON_MSG( | ||
369 | sizeof(union smc_llc_msg) != SMC_WR_TX_SIZE, | ||
370 | "must adapt SMC_WR_TX_SIZE to sizeof(struct smc_llc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()"); | ||
371 | BUILD_BUG_ON_MSG( | ||
372 | sizeof(struct smc_llc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE, | ||
373 | "must increase SMC_WR_TX_PEND_PRIV_SIZE to at least sizeof(struct smc_llc_tx_pend)"); | ||
374 | return 0; | ||
375 | } | ||
376 | |||
377 | /* high-level API to send LLC confirm link */ | ||
378 | int smc_llc_send_confirm_link(struct smc_link *link, | ||
379 | enum smc_llc_reqresp reqresp) | ||
380 | { | ||
381 | struct smc_llc_msg_confirm_link *confllc; | ||
382 | struct smc_wr_tx_pend_priv *pend; | ||
383 | struct smc_wr_buf *wr_buf; | ||
384 | int rc; | ||
385 | |||
386 | if (!smc_wr_tx_link_hold(link)) | ||
387 | return -ENOLINK; | ||
388 | rc = smc_llc_add_pending_send(link, &wr_buf, &pend); | ||
389 | if (rc) | ||
390 | goto put_out; | ||
391 | confllc = (struct smc_llc_msg_confirm_link *)wr_buf; | ||
392 | memset(confllc, 0, sizeof(*confllc)); | ||
393 | confllc->hd.common.type = SMC_LLC_CONFIRM_LINK; | ||
394 | confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link); | ||
395 | confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC; | ||
396 | if (reqresp == SMC_LLC_RESP) | ||
397 | confllc->hd.flags |= SMC_LLC_FLAG_RESP; | ||
398 | memcpy(confllc->sender_mac, link->smcibdev->mac[link->ibport - 1], | ||
399 | ETH_ALEN); | ||
400 | memcpy(confllc->sender_gid, link->gid, SMC_GID_SIZE); | ||
401 | hton24(confllc->sender_qp_num, link->roce_qp->qp_num); | ||
402 | confllc->link_num = link->link_id; | ||
403 | memcpy(confllc->link_uid, link->link_uid, SMC_LGR_ID_SIZE); | ||
404 | confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; | ||
405 | /* send llc message */ | ||
406 | rc = smc_wr_tx_send(link, pend); | ||
407 | put_out: | ||
408 | smc_wr_tx_link_put(link); | ||
409 | return rc; | ||
410 | } | ||
411 | |||
412 | /* send LLC confirm rkey request */ | ||
413 | static int smc_llc_send_confirm_rkey(struct smc_link *send_link, | ||
414 | struct smc_buf_desc *rmb_desc) | ||
415 | { | ||
416 | struct smc_llc_msg_confirm_rkey *rkeyllc; | ||
417 | struct smc_wr_tx_pend_priv *pend; | ||
418 | struct smc_wr_buf *wr_buf; | ||
419 | struct smc_link *link; | ||
420 | int i, rc, rtok_ix; | ||
421 | |||
422 | if (!smc_wr_tx_link_hold(send_link)) | ||
423 | return -ENOLINK; | ||
424 | rc = smc_llc_add_pending_send(send_link, &wr_buf, &pend); | ||
425 | if (rc) | ||
426 | goto put_out; | ||
427 | rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf; | ||
428 | memset(rkeyllc, 0, sizeof(*rkeyllc)); | ||
429 | rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY; | ||
430 | rkeyllc->hd.length = sizeof(struct smc_llc_msg_confirm_rkey); | ||
431 | |||
432 | rtok_ix = 1; | ||
433 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { | ||
434 | link = &send_link->lgr->lnk[i]; | ||
435 | if (smc_link_active(link) && link != send_link) { | ||
436 | rkeyllc->rtoken[rtok_ix].link_id = link->link_id; | ||
437 | rkeyllc->rtoken[rtok_ix].rmb_key = | ||
438 | htonl(rmb_desc->mr_rx[link->link_idx]->rkey); | ||
439 | rkeyllc->rtoken[rtok_ix].rmb_vaddr = cpu_to_be64( | ||
440 | (u64)sg_dma_address( | ||
441 | rmb_desc->sgt[link->link_idx].sgl)); | ||
442 | rtok_ix++; | ||
443 | } | ||
444 | } | ||
445 | /* rkey of send_link is in rtoken[0] */ | ||
446 | rkeyllc->rtoken[0].num_rkeys = rtok_ix - 1; | ||
447 | rkeyllc->rtoken[0].rmb_key = | ||
448 | htonl(rmb_desc->mr_rx[send_link->link_idx]->rkey); | ||
449 | rkeyllc->rtoken[0].rmb_vaddr = cpu_to_be64( | ||
450 | (u64)sg_dma_address(rmb_desc->sgt[send_link->link_idx].sgl)); | ||
451 | /* send llc message */ | ||
452 | rc = smc_wr_tx_send(send_link, pend); | ||
453 | put_out: | ||
454 | smc_wr_tx_link_put(send_link); | ||
455 | return rc; | ||
456 | } | ||
457 | |||
458 | /* send LLC delete rkey request */ | ||
459 | static int smc_llc_send_delete_rkey(struct smc_link *link, | ||
460 | struct smc_buf_desc *rmb_desc) | ||
461 | { | ||
462 | struct smc_llc_msg_delete_rkey *rkeyllc; | ||
463 | struct smc_wr_tx_pend_priv *pend; | ||
464 | struct smc_wr_buf *wr_buf; | ||
465 | int rc; | ||
466 | |||
467 | if (!smc_wr_tx_link_hold(link)) | ||
468 | return -ENOLINK; | ||
469 | rc = smc_llc_add_pending_send(link, &wr_buf, &pend); | ||
470 | if (rc) | ||
471 | goto put_out; | ||
472 | rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf; | ||
473 | memset(rkeyllc, 0, sizeof(*rkeyllc)); | ||
474 | rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY; | ||
475 | rkeyllc->hd.length = sizeof(struct smc_llc_msg_delete_rkey); | ||
476 | rkeyllc->num_rkeys = 1; | ||
477 | rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey); | ||
478 | /* send llc message */ | ||
479 | rc = smc_wr_tx_send(link, pend); | ||
480 | put_out: | ||
481 | smc_wr_tx_link_put(link); | ||
482 | return rc; | ||
483 | } | ||
484 | |||
485 | /* send ADD LINK request or response */ | ||
486 | int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], | ||
487 | struct smc_link *link_new, | ||
488 | enum smc_llc_reqresp reqresp) | ||
489 | { | ||
490 | struct smc_llc_msg_add_link *addllc; | ||
491 | struct smc_wr_tx_pend_priv *pend; | ||
492 | struct smc_wr_buf *wr_buf; | ||
493 | int rc; | ||
494 | |||
495 | if (!smc_wr_tx_link_hold(link)) | ||
496 | return -ENOLINK; | ||
497 | rc = smc_llc_add_pending_send(link, &wr_buf, &pend); | ||
498 | if (rc) | ||
499 | goto put_out; | ||
500 | addllc = (struct smc_llc_msg_add_link *)wr_buf; | ||
501 | |||
502 | memset(addllc, 0, sizeof(*addllc)); | ||
503 | addllc->hd.common.type = SMC_LLC_ADD_LINK; | ||
504 | addllc->hd.length = sizeof(struct smc_llc_msg_add_link); | ||
505 | if (reqresp == SMC_LLC_RESP) | ||
506 | addllc->hd.flags |= SMC_LLC_FLAG_RESP; | ||
507 | memcpy(addllc->sender_mac, mac, ETH_ALEN); | ||
508 | memcpy(addllc->sender_gid, gid, SMC_GID_SIZE); | ||
509 | if (link_new) { | ||
510 | addllc->link_num = link_new->link_id; | ||
511 | hton24(addllc->sender_qp_num, link_new->roce_qp->qp_num); | ||
512 | hton24(addllc->initial_psn, link_new->psn_initial); | ||
513 | if (reqresp == SMC_LLC_REQ) | ||
514 | addllc->qp_mtu = link_new->path_mtu; | ||
515 | else | ||
516 | addllc->qp_mtu = min(link_new->path_mtu, | ||
517 | link_new->peer_mtu); | ||
518 | } | ||
519 | /* send llc message */ | ||
520 | rc = smc_wr_tx_send(link, pend); | ||
521 | put_out: | ||
522 | smc_wr_tx_link_put(link); | ||
523 | return rc; | ||
524 | } | ||
525 | |||
526 | /* send DELETE LINK request or response */ | ||
527 | int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id, | ||
528 | enum smc_llc_reqresp reqresp, bool orderly, | ||
529 | u32 reason) | ||
530 | { | ||
531 | struct smc_llc_msg_del_link *delllc; | ||
532 | struct smc_wr_tx_pend_priv *pend; | ||
533 | struct smc_wr_buf *wr_buf; | ||
534 | int rc; | ||
535 | |||
536 | if (!smc_wr_tx_link_hold(link)) | ||
537 | return -ENOLINK; | ||
538 | rc = smc_llc_add_pending_send(link, &wr_buf, &pend); | ||
539 | if (rc) | ||
540 | goto put_out; | ||
541 | delllc = (struct smc_llc_msg_del_link *)wr_buf; | ||
542 | |||
543 | memset(delllc, 0, sizeof(*delllc)); | ||
544 | delllc->hd.common.type = SMC_LLC_DELETE_LINK; | ||
545 | delllc->hd.length = sizeof(struct smc_llc_msg_del_link); | ||
546 | if (reqresp == SMC_LLC_RESP) | ||
547 | delllc->hd.flags |= SMC_LLC_FLAG_RESP; | ||
548 | if (orderly) | ||
549 | delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; | ||
550 | if (link_del_id) | ||
551 | delllc->link_num = link_del_id; | ||
552 | else | ||
553 | delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL; | ||
554 | delllc->reason = htonl(reason); | ||
555 | /* send llc message */ | ||
556 | rc = smc_wr_tx_send(link, pend); | ||
557 | put_out: | ||
558 | smc_wr_tx_link_put(link); | ||
559 | return rc; | ||
560 | } | ||
561 | |||
562 | /* send LLC test link request */ | ||
563 | static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16]) | ||
564 | { | ||
565 | struct smc_llc_msg_test_link *testllc; | ||
566 | struct smc_wr_tx_pend_priv *pend; | ||
567 | struct smc_wr_buf *wr_buf; | ||
568 | int rc; | ||
569 | |||
570 | if (!smc_wr_tx_link_hold(link)) | ||
571 | return -ENOLINK; | ||
572 | rc = smc_llc_add_pending_send(link, &wr_buf, &pend); | ||
573 | if (rc) | ||
574 | goto put_out; | ||
575 | testllc = (struct smc_llc_msg_test_link *)wr_buf; | ||
576 | memset(testllc, 0, sizeof(*testllc)); | ||
577 | testllc->hd.common.type = SMC_LLC_TEST_LINK; | ||
578 | testllc->hd.length = sizeof(struct smc_llc_msg_test_link); | ||
579 | memcpy(testllc->user_data, user_data, sizeof(testllc->user_data)); | ||
580 | /* send llc message */ | ||
581 | rc = smc_wr_tx_send(link, pend); | ||
582 | put_out: | ||
583 | smc_wr_tx_link_put(link); | ||
584 | return rc; | ||
585 | } | ||
586 | |||
587 | /* schedule an llc send on link, may wait for buffers */ | ||
588 | static int smc_llc_send_message(struct smc_link *link, void *llcbuf) | ||
589 | { | ||
590 | struct smc_wr_tx_pend_priv *pend; | ||
591 | struct smc_wr_buf *wr_buf; | ||
592 | int rc; | ||
593 | |||
594 | if (!smc_wr_tx_link_hold(link)) | ||
595 | return -ENOLINK; | ||
596 | rc = smc_llc_add_pending_send(link, &wr_buf, &pend); | ||
597 | if (rc) | ||
598 | goto put_out; | ||
599 | memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg)); | ||
600 | rc = smc_wr_tx_send(link, pend); | ||
601 | put_out: | ||
602 | smc_wr_tx_link_put(link); | ||
603 | return rc; | ||
604 | } | ||
605 | |||
606 | /* schedule an llc send on link, may wait for buffers, | ||
607 | * and wait for send completion notification. | ||
608 | * @return 0 on success | ||
609 | */ | ||
610 | static int smc_llc_send_message_wait(struct smc_link *link, void *llcbuf) | ||
611 | { | ||
612 | struct smc_wr_tx_pend_priv *pend; | ||
613 | struct smc_wr_buf *wr_buf; | ||
614 | int rc; | ||
615 | |||
616 | if (!smc_wr_tx_link_hold(link)) | ||
617 | return -ENOLINK; | ||
618 | rc = smc_llc_add_pending_send(link, &wr_buf, &pend); | ||
619 | if (rc) | ||
620 | goto put_out; | ||
621 | memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg)); | ||
622 | rc = smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME); | ||
623 | put_out: | ||
624 | smc_wr_tx_link_put(link); | ||
625 | return rc; | ||
626 | } | ||
627 | |||
628 | /********************************* receive ***********************************/ | ||
629 | |||
630 | static int smc_llc_alloc_alt_link(struct smc_link_group *lgr, | ||
631 | enum smc_lgr_type lgr_new_t) | ||
632 | { | ||
633 | int i; | ||
634 | |||
635 | if (lgr->type == SMC_LGR_SYMMETRIC || | ||
636 | (lgr->type != SMC_LGR_SINGLE && | ||
637 | (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL || | ||
638 | lgr_new_t == SMC_LGR_ASYMMETRIC_PEER))) | ||
639 | return -EMLINK; | ||
640 | |||
641 | if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL || | ||
642 | lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) { | ||
643 | for (i = SMC_LINKS_PER_LGR_MAX - 1; i >= 0; i--) | ||
644 | if (lgr->lnk[i].state == SMC_LNK_UNUSED) | ||
645 | return i; | ||
646 | } else { | ||
647 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) | ||
648 | if (lgr->lnk[i].state == SMC_LNK_UNUSED) | ||
649 | return i; | ||
650 | } | ||
651 | return -EMLINK; | ||
652 | } | ||
653 | |||
654 | /* return first buffer from any of the next buf lists */ | ||
655 | static struct smc_buf_desc *_smc_llc_get_next_rmb(struct smc_link_group *lgr, | ||
656 | int *buf_lst) | ||
657 | { | ||
658 | struct smc_buf_desc *buf_pos; | ||
659 | |||
660 | while (*buf_lst < SMC_RMBE_SIZES) { | ||
661 | buf_pos = list_first_entry_or_null(&lgr->rmbs[*buf_lst], | ||
662 | struct smc_buf_desc, list); | ||
663 | if (buf_pos) | ||
664 | return buf_pos; | ||
665 | (*buf_lst)++; | ||
666 | } | ||
667 | return NULL; | ||
668 | } | ||
669 | |||
670 | /* return next rmb from buffer lists */ | ||
671 | static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr, | ||
672 | int *buf_lst, | ||
673 | struct smc_buf_desc *buf_pos) | ||
674 | { | ||
675 | struct smc_buf_desc *buf_next; | ||
676 | |||
677 | if (!buf_pos || list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) { | ||
678 | (*buf_lst)++; | ||
679 | return _smc_llc_get_next_rmb(lgr, buf_lst); | ||
680 | } | ||
681 | buf_next = list_next_entry(buf_pos, list); | ||
682 | return buf_next; | ||
683 | } | ||
684 | |||
685 | static struct smc_buf_desc *smc_llc_get_first_rmb(struct smc_link_group *lgr, | ||
686 | int *buf_lst) | ||
687 | { | ||
688 | *buf_lst = 0; | ||
689 | return smc_llc_get_next_rmb(lgr, buf_lst, NULL); | ||
690 | } | ||
691 | |||
692 | /* send one add_link_continue msg */ | ||
693 | static int smc_llc_add_link_cont(struct smc_link *link, | ||
694 | struct smc_link *link_new, u8 *num_rkeys_todo, | ||
695 | int *buf_lst, struct smc_buf_desc **buf_pos) | ||
696 | { | ||
697 | struct smc_llc_msg_add_link_cont *addc_llc; | ||
698 | struct smc_link_group *lgr = link->lgr; | ||
699 | int prim_lnk_idx, lnk_idx, i, rc; | ||
700 | struct smc_wr_tx_pend_priv *pend; | ||
701 | struct smc_wr_buf *wr_buf; | ||
702 | struct smc_buf_desc *rmb; | ||
703 | u8 n; | ||
704 | |||
705 | if (!smc_wr_tx_link_hold(link)) | ||
706 | return -ENOLINK; | ||
707 | rc = smc_llc_add_pending_send(link, &wr_buf, &pend); | ||
708 | if (rc) | ||
709 | goto put_out; | ||
710 | addc_llc = (struct smc_llc_msg_add_link_cont *)wr_buf; | ||
711 | memset(addc_llc, 0, sizeof(*addc_llc)); | ||
712 | |||
713 | prim_lnk_idx = link->link_idx; | ||
714 | lnk_idx = link_new->link_idx; | ||
715 | addc_llc->link_num = link_new->link_id; | ||
716 | addc_llc->num_rkeys = *num_rkeys_todo; | ||
717 | n = *num_rkeys_todo; | ||
718 | for (i = 0; i < min_t(u8, n, SMC_LLC_RKEYS_PER_CONT_MSG); i++) { | ||
719 | if (!*buf_pos) { | ||
720 | addc_llc->num_rkeys = addc_llc->num_rkeys - | ||
721 | *num_rkeys_todo; | ||
722 | *num_rkeys_todo = 0; | ||
723 | break; | ||
724 | } | ||
725 | rmb = *buf_pos; | ||
726 | |||
727 | addc_llc->rt[i].rmb_key = htonl(rmb->mr_rx[prim_lnk_idx]->rkey); | ||
728 | addc_llc->rt[i].rmb_key_new = htonl(rmb->mr_rx[lnk_idx]->rkey); | ||
729 | addc_llc->rt[i].rmb_vaddr_new = | ||
730 | cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl)); | ||
731 | |||
732 | (*num_rkeys_todo)--; | ||
733 | *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos); | ||
734 | while (*buf_pos && !(*buf_pos)->used) | ||
735 | *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos); | ||
736 | } | ||
737 | addc_llc->hd.common.type = SMC_LLC_ADD_LINK_CONT; | ||
738 | addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont); | ||
739 | if (lgr->role == SMC_CLNT) | ||
740 | addc_llc->hd.flags |= SMC_LLC_FLAG_RESP; | ||
741 | rc = smc_wr_tx_send(link, pend); | ||
742 | put_out: | ||
743 | smc_wr_tx_link_put(link); | ||
744 | return rc; | ||
745 | } | ||
746 | |||
747 | static int smc_llc_cli_rkey_exchange(struct smc_link *link, | ||
748 | struct smc_link *link_new) | ||
749 | { | ||
750 | struct smc_llc_msg_add_link_cont *addc_llc; | ||
751 | struct smc_link_group *lgr = link->lgr; | ||
752 | u8 max, num_rkeys_send, num_rkeys_recv; | ||
753 | struct smc_llc_qentry *qentry; | ||
754 | struct smc_buf_desc *buf_pos; | ||
755 | int buf_lst; | ||
756 | int rc = 0; | ||
757 | int i; | ||
758 | |||
759 | mutex_lock(&lgr->rmbs_lock); | ||
760 | num_rkeys_send = lgr->conns_num; | ||
761 | buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst); | ||
762 | do { | ||
763 | qentry = smc_llc_wait(lgr, NULL, SMC_LLC_WAIT_TIME, | ||
764 | SMC_LLC_ADD_LINK_CONT); | ||
765 | if (!qentry) { | ||
766 | rc = -ETIMEDOUT; | ||
767 | break; | ||
768 | } | ||
769 | addc_llc = &qentry->msg.add_link_cont; | ||
770 | num_rkeys_recv = addc_llc->num_rkeys; | ||
771 | max = min_t(u8, num_rkeys_recv, SMC_LLC_RKEYS_PER_CONT_MSG); | ||
772 | for (i = 0; i < max; i++) { | ||
773 | smc_rtoken_set(lgr, link->link_idx, link_new->link_idx, | ||
774 | addc_llc->rt[i].rmb_key, | ||
775 | addc_llc->rt[i].rmb_vaddr_new, | ||
776 | addc_llc->rt[i].rmb_key_new); | ||
777 | num_rkeys_recv--; | ||
778 | } | ||
779 | smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); | ||
780 | rc = smc_llc_add_link_cont(link, link_new, &num_rkeys_send, | ||
781 | &buf_lst, &buf_pos); | ||
782 | if (rc) | ||
783 | break; | ||
784 | } while (num_rkeys_send || num_rkeys_recv); | ||
785 | |||
786 | mutex_unlock(&lgr->rmbs_lock); | ||
787 | return rc; | ||
788 | } | ||
789 | |||
790 | /* prepare and send an add link reject response */ | ||
791 | static int smc_llc_cli_add_link_reject(struct smc_llc_qentry *qentry) | ||
792 | { | ||
793 | qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP; | ||
794 | qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_ADD_LNK_REJ; | ||
795 | qentry->msg.raw.hdr.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH; | ||
796 | return smc_llc_send_message(qentry->link, &qentry->msg); | ||
797 | } | ||
798 | |||
799 | static int smc_llc_cli_conf_link(struct smc_link *link, | ||
800 | struct smc_init_info *ini, | ||
801 | struct smc_link *link_new, | ||
802 | enum smc_lgr_type lgr_new_t) | ||
803 | { | ||
804 | struct smc_link_group *lgr = link->lgr; | ||
805 | struct smc_llc_qentry *qentry = NULL; | ||
806 | int rc = 0; | ||
807 | |||
808 | /* receive CONFIRM LINK request over RoCE fabric */ | ||
809 | qentry = smc_llc_wait(lgr, NULL, SMC_LLC_WAIT_FIRST_TIME, 0); | ||
810 | if (!qentry) { | ||
811 | rc = smc_llc_send_delete_link(link, link_new->link_id, | ||
812 | SMC_LLC_REQ, false, | ||
813 | SMC_LLC_DEL_LOST_PATH); | ||
814 | return -ENOLINK; | ||
815 | } | ||
816 | if (qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) { | ||
817 | /* received DELETE_LINK instead */ | ||
818 | qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP; | ||
819 | smc_llc_send_message(link, &qentry->msg); | ||
820 | smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); | ||
821 | return -ENOLINK; | ||
822 | } | ||
823 | smc_llc_save_peer_uid(qentry); | ||
824 | smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); | ||
825 | |||
826 | rc = smc_ib_modify_qp_rts(link_new); | ||
827 | if (rc) { | ||
828 | smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ, | ||
829 | false, SMC_LLC_DEL_LOST_PATH); | ||
830 | return -ENOLINK; | ||
831 | } | ||
832 | smc_wr_remember_qp_attr(link_new); | ||
833 | |||
834 | rc = smcr_buf_reg_lgr(link_new); | ||
835 | if (rc) { | ||
836 | smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ, | ||
837 | false, SMC_LLC_DEL_LOST_PATH); | ||
838 | return -ENOLINK; | ||
839 | } | ||
840 | |||
841 | /* send CONFIRM LINK response over RoCE fabric */ | ||
842 | rc = smc_llc_send_confirm_link(link_new, SMC_LLC_RESP); | ||
843 | if (rc) { | ||
844 | smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ, | ||
845 | false, SMC_LLC_DEL_LOST_PATH); | ||
846 | return -ENOLINK; | ||
847 | } | ||
848 | smc_llc_link_active(link_new); | ||
849 | if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL || | ||
850 | lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) | ||
851 | smcr_lgr_set_type_asym(lgr, lgr_new_t, link_new->link_idx); | ||
852 | else | ||
853 | smcr_lgr_set_type(lgr, lgr_new_t); | ||
854 | return 0; | ||
855 | } | ||
856 | |||
857 | static void smc_llc_save_add_link_info(struct smc_link *link, | ||
858 | struct smc_llc_msg_add_link *add_llc) | ||
859 | { | ||
860 | link->peer_qpn = ntoh24(add_llc->sender_qp_num); | ||
861 | memcpy(link->peer_gid, add_llc->sender_gid, SMC_GID_SIZE); | ||
862 | memcpy(link->peer_mac, add_llc->sender_mac, ETH_ALEN); | ||
863 | link->peer_psn = ntoh24(add_llc->initial_psn); | ||
864 | link->peer_mtu = add_llc->qp_mtu; | ||
865 | } | ||
866 | |||
867 | /* as an SMC client, process an add link request */ | ||
868 | int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) | ||
869 | { | ||
870 | struct smc_llc_msg_add_link *llc = &qentry->msg.add_link; | ||
871 | enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC; | ||
872 | struct smc_link_group *lgr = smc_get_lgr(link); | ||
873 | struct smc_link *lnk_new = NULL; | ||
874 | struct smc_init_info ini; | ||
875 | int lnk_idx, rc = 0; | ||
876 | |||
877 | if (!llc->qp_mtu) | ||
878 | goto out_reject; | ||
879 | |||
880 | ini.vlan_id = lgr->vlan_id; | ||
881 | smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev); | ||
882 | if (!memcmp(llc->sender_gid, link->peer_gid, SMC_GID_SIZE) && | ||
883 | !memcmp(llc->sender_mac, link->peer_mac, ETH_ALEN)) { | ||
884 | if (!ini.ib_dev) | ||
885 | goto out_reject; | ||
886 | lgr_new_t = SMC_LGR_ASYMMETRIC_PEER; | ||
887 | } | ||
888 | if (!ini.ib_dev) { | ||
889 | lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; | ||
890 | ini.ib_dev = link->smcibdev; | ||
891 | ini.ib_port = link->ibport; | ||
892 | } | ||
893 | lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t); | ||
894 | if (lnk_idx < 0) | ||
895 | goto out_reject; | ||
896 | lnk_new = &lgr->lnk[lnk_idx]; | ||
897 | rc = smcr_link_init(lgr, lnk_new, lnk_idx, &ini); | ||
898 | if (rc) | ||
899 | goto out_reject; | ||
900 | smc_llc_save_add_link_info(lnk_new, llc); | ||
901 | lnk_new->link_id = llc->link_num; /* SMC server assigns link id */ | ||
902 | smc_llc_link_set_uid(lnk_new); | ||
903 | |||
904 | rc = smc_ib_ready_link(lnk_new); | ||
905 | if (rc) | ||
906 | goto out_clear_lnk; | ||
907 | |||
908 | rc = smcr_buf_map_lgr(lnk_new); | ||
909 | if (rc) | ||
910 | goto out_clear_lnk; | ||
911 | |||
912 | rc = smc_llc_send_add_link(link, | ||
913 | lnk_new->smcibdev->mac[ini.ib_port - 1], | ||
914 | lnk_new->gid, lnk_new, SMC_LLC_RESP); | ||
915 | if (rc) | ||
916 | goto out_clear_lnk; | ||
917 | rc = smc_llc_cli_rkey_exchange(link, lnk_new); | ||
918 | if (rc) { | ||
919 | rc = 0; | ||
920 | goto out_clear_lnk; | ||
921 | } | ||
922 | rc = smc_llc_cli_conf_link(link, &ini, lnk_new, lgr_new_t); | ||
923 | if (!rc) | ||
924 | goto out; | ||
925 | out_clear_lnk: | ||
926 | lnk_new->state = SMC_LNK_INACTIVE; | ||
927 | smcr_link_clear(lnk_new, false); | ||
928 | out_reject: | ||
929 | smc_llc_cli_add_link_reject(qentry); | ||
930 | out: | ||
931 | kfree(qentry); | ||
932 | return rc; | ||
933 | } | ||
934 | |||
935 | /* as an SMC client, invite server to start the add_link processing */ | ||
936 | static void smc_llc_cli_add_link_invite(struct smc_link *link, | ||
937 | struct smc_llc_qentry *qentry) | ||
938 | { | ||
939 | struct smc_link_group *lgr = smc_get_lgr(link); | ||
940 | struct smc_init_info ini; | ||
941 | |||
942 | if (lgr->type == SMC_LGR_SYMMETRIC || | ||
943 | lgr->type == SMC_LGR_ASYMMETRIC_PEER) | ||
944 | goto out; | ||
945 | |||
946 | ini.vlan_id = lgr->vlan_id; | ||
947 | smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev); | ||
948 | if (!ini.ib_dev) | ||
949 | goto out; | ||
950 | |||
951 | smc_llc_send_add_link(link, ini.ib_dev->mac[ini.ib_port - 1], | ||
952 | ini.ib_gid, NULL, SMC_LLC_REQ); | ||
953 | out: | ||
954 | kfree(qentry); | ||
955 | } | ||
956 | |||
957 | static bool smc_llc_is_empty_llc_message(union smc_llc_msg *llc) | ||
958 | { | ||
959 | int i; | ||
960 | |||
961 | for (i = 0; i < ARRAY_SIZE(llc->raw.data); i++) | ||
962 | if (llc->raw.data[i]) | ||
963 | return false; | ||
964 | return true; | ||
965 | } | ||
966 | |||
967 | static bool smc_llc_is_local_add_link(union smc_llc_msg *llc) | ||
968 | { | ||
969 | if (llc->raw.hdr.common.type == SMC_LLC_ADD_LINK && | ||
970 | smc_llc_is_empty_llc_message(llc)) | ||
971 | return true; | ||
972 | return false; | ||
973 | } | ||
974 | |||
975 | static void smc_llc_process_cli_add_link(struct smc_link_group *lgr) | ||
976 | { | ||
977 | struct smc_llc_qentry *qentry; | ||
978 | |||
979 | qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl); | ||
980 | |||
981 | mutex_lock(&lgr->llc_conf_mutex); | ||
982 | if (smc_llc_is_local_add_link(&qentry->msg)) | ||
983 | smc_llc_cli_add_link_invite(qentry->link, qentry); | ||
984 | else | ||
985 | smc_llc_cli_add_link(qentry->link, qentry); | ||
986 | mutex_unlock(&lgr->llc_conf_mutex); | ||
987 | } | ||
988 | |||
989 | static int smc_llc_active_link_count(struct smc_link_group *lgr) | ||
990 | { | ||
991 | int i, link_count = 0; | ||
992 | |||
993 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { | ||
994 | if (!smc_link_active(&lgr->lnk[i])) | ||
995 | continue; | ||
996 | link_count++; | ||
997 | } | ||
998 | return link_count; | ||
999 | } | ||
1000 | |||
1001 | /* find the asymmetric link when 3 links are established */ | ||
1002 | static struct smc_link *smc_llc_find_asym_link(struct smc_link_group *lgr) | ||
1003 | { | ||
1004 | int asym_idx = -ENOENT; | ||
1005 | int i, j, k; | ||
1006 | bool found; | ||
1007 | |||
1008 | /* determine asymmetric link */ | ||
1009 | found = false; | ||
1010 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { | ||
1011 | for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) { | ||
1012 | if (!smc_link_usable(&lgr->lnk[i]) || | ||
1013 | !smc_link_usable(&lgr->lnk[j])) | ||
1014 | continue; | ||
1015 | if (!memcmp(lgr->lnk[i].gid, lgr->lnk[j].gid, | ||
1016 | SMC_GID_SIZE)) { | ||
1017 | found = true; /* asym_lnk is i or j */ | ||
1018 | break; | ||
1019 | } | ||
1020 | } | ||
1021 | if (found) | ||
1022 | break; | ||
1023 | } | ||
1024 | if (!found) | ||
1025 | goto out; /* no asymmetric link */ | ||
1026 | for (k = 0; k < SMC_LINKS_PER_LGR_MAX; k++) { | ||
1027 | if (!smc_link_usable(&lgr->lnk[k])) | ||
1028 | continue; | ||
1029 | if (k != i && | ||
1030 | !memcmp(lgr->lnk[i].peer_gid, lgr->lnk[k].peer_gid, | ||
1031 | SMC_GID_SIZE)) { | ||
1032 | asym_idx = i; | ||
1033 | break; | ||
1034 | } | ||
1035 | if (k != j && | ||
1036 | !memcmp(lgr->lnk[j].peer_gid, lgr->lnk[k].peer_gid, | ||
1037 | SMC_GID_SIZE)) { | ||
1038 | asym_idx = j; | ||
1039 | break; | ||
1040 | } | ||
1041 | } | ||
1042 | out: | ||
1043 | return (asym_idx < 0) ? NULL : &lgr->lnk[asym_idx]; | ||
1044 | } | ||
1045 | |||
1046 | static void smc_llc_delete_asym_link(struct smc_link_group *lgr) | ||
1047 | { | ||
1048 | struct smc_link *lnk_new = NULL, *lnk_asym; | ||
1049 | struct smc_llc_qentry *qentry; | ||
1050 | int rc; | ||
1051 | |||
1052 | lnk_asym = smc_llc_find_asym_link(lgr); | ||
1053 | if (!lnk_asym) | ||
1054 | return; /* no asymmetric link */ | ||
1055 | if (!smc_link_downing(&lnk_asym->state)) | ||
1056 | return; | ||
1057 | lnk_new = smc_switch_conns(lgr, lnk_asym, false); | ||
1058 | smc_wr_tx_wait_no_pending_sends(lnk_asym); | ||
1059 | if (!lnk_new) | ||
1060 | goto out_free; | ||
1061 | /* change flow type from ADD_LINK into DEL_LINK */ | ||
1062 | lgr->llc_flow_lcl.type = SMC_LLC_FLOW_DEL_LINK; | ||
1063 | rc = smc_llc_send_delete_link(lnk_new, lnk_asym->link_id, SMC_LLC_REQ, | ||
1064 | true, SMC_LLC_DEL_NO_ASYM_NEEDED); | ||
1065 | if (rc) { | ||
1066 | smcr_link_down_cond(lnk_new); | ||
1067 | goto out_free; | ||
1068 | } | ||
1069 | qentry = smc_llc_wait(lgr, lnk_new, SMC_LLC_WAIT_TIME, | ||
1070 | SMC_LLC_DELETE_LINK); | ||
1071 | if (!qentry) { | ||
1072 | smcr_link_down_cond(lnk_new); | ||
1073 | goto out_free; | ||
1074 | } | ||
1075 | smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); | ||
1076 | out_free: | ||
1077 | smcr_link_clear(lnk_asym, true); | ||
1078 | } | ||
1079 | |||
1080 | static int smc_llc_srv_rkey_exchange(struct smc_link *link, | ||
1081 | struct smc_link *link_new) | ||
1082 | { | ||
1083 | struct smc_llc_msg_add_link_cont *addc_llc; | ||
1084 | struct smc_link_group *lgr = link->lgr; | ||
1085 | u8 max, num_rkeys_send, num_rkeys_recv; | ||
1086 | struct smc_llc_qentry *qentry = NULL; | ||
1087 | struct smc_buf_desc *buf_pos; | ||
1088 | int buf_lst; | ||
1089 | int rc = 0; | ||
1090 | int i; | ||
1091 | |||
1092 | mutex_lock(&lgr->rmbs_lock); | ||
1093 | num_rkeys_send = lgr->conns_num; | ||
1094 | buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst); | ||
1095 | do { | ||
1096 | smc_llc_add_link_cont(link, link_new, &num_rkeys_send, | ||
1097 | &buf_lst, &buf_pos); | ||
1098 | qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_TIME, | ||
1099 | SMC_LLC_ADD_LINK_CONT); | ||
1100 | if (!qentry) { | ||
1101 | rc = -ETIMEDOUT; | ||
1102 | goto out; | ||
1103 | } | ||
1104 | addc_llc = &qentry->msg.add_link_cont; | ||
1105 | num_rkeys_recv = addc_llc->num_rkeys; | ||
1106 | max = min_t(u8, num_rkeys_recv, SMC_LLC_RKEYS_PER_CONT_MSG); | ||
1107 | for (i = 0; i < max; i++) { | ||
1108 | smc_rtoken_set(lgr, link->link_idx, link_new->link_idx, | ||
1109 | addc_llc->rt[i].rmb_key, | ||
1110 | addc_llc->rt[i].rmb_vaddr_new, | ||
1111 | addc_llc->rt[i].rmb_key_new); | ||
1112 | num_rkeys_recv--; | ||
1113 | } | ||
1114 | smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); | ||
1115 | } while (num_rkeys_send || num_rkeys_recv); | ||
1116 | out: | ||
1117 | mutex_unlock(&lgr->rmbs_lock); | ||
1118 | return rc; | ||
1119 | } | ||
1120 | |||
1121 | static int smc_llc_srv_conf_link(struct smc_link *link, | ||
1122 | struct smc_link *link_new, | ||
1123 | enum smc_lgr_type lgr_new_t) | ||
1124 | { | ||
1125 | struct smc_link_group *lgr = link->lgr; | ||
1126 | struct smc_llc_qentry *qentry = NULL; | ||
1127 | int rc; | ||
1128 | |||
1129 | /* send CONFIRM LINK request over the RoCE fabric */ | ||
1130 | rc = smc_llc_send_confirm_link(link_new, SMC_LLC_REQ); | ||
1131 | if (rc) | ||
1132 | return -ENOLINK; | ||
1133 | /* receive CONFIRM LINK response over the RoCE fabric */ | ||
1134 | qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME, 0); | ||
1135 | if (!qentry || | ||
1136 | qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) { | ||
1137 | /* send DELETE LINK */ | ||
1138 | smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ, | ||
1139 | false, SMC_LLC_DEL_LOST_PATH); | ||
1140 | if (qentry) | ||
1141 | smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); | ||
1142 | return -ENOLINK; | ||
1143 | } | ||
1144 | smc_llc_save_peer_uid(qentry); | ||
1145 | smc_llc_link_active(link_new); | ||
1146 | if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL || | ||
1147 | lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) | ||
1148 | smcr_lgr_set_type_asym(lgr, lgr_new_t, link_new->link_idx); | ||
1149 | else | ||
1150 | smcr_lgr_set_type(lgr, lgr_new_t); | ||
1151 | smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); | ||
1152 | return 0; | ||
1153 | } | ||
1154 | |||
1155 | int smc_llc_srv_add_link(struct smc_link *link) | ||
1156 | { | ||
1157 | enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC; | ||
1158 | struct smc_link_group *lgr = link->lgr; | ||
1159 | struct smc_llc_msg_add_link *add_llc; | ||
1160 | struct smc_llc_qentry *qentry = NULL; | ||
1161 | struct smc_link *link_new; | ||
1162 | struct smc_init_info ini; | ||
1163 | int lnk_idx, rc = 0; | ||
1164 | |||
1165 | /* ignore client add link recommendation, start new flow */ | ||
1166 | ini.vlan_id = lgr->vlan_id; | ||
1167 | smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev); | ||
1168 | if (!ini.ib_dev) { | ||
1169 | lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL; | ||
1170 | ini.ib_dev = link->smcibdev; | ||
1171 | ini.ib_port = link->ibport; | ||
1172 | } | ||
1173 | lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t); | ||
1174 | if (lnk_idx < 0) | ||
1175 | return 0; | ||
1176 | |||
1177 | rc = smcr_link_init(lgr, &lgr->lnk[lnk_idx], lnk_idx, &ini); | ||
1178 | if (rc) | ||
1179 | return rc; | ||
1180 | link_new = &lgr->lnk[lnk_idx]; | ||
1181 | rc = smc_llc_send_add_link(link, | ||
1182 | link_new->smcibdev->mac[ini.ib_port - 1], | ||
1183 | link_new->gid, link_new, SMC_LLC_REQ); | ||
1184 | if (rc) | ||
1185 | goto out_err; | ||
1186 | /* receive ADD LINK response over the RoCE fabric */ | ||
1187 | qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_TIME, SMC_LLC_ADD_LINK); | ||
1188 | if (!qentry) { | ||
1189 | rc = -ETIMEDOUT; | ||
1190 | goto out_err; | ||
1191 | } | ||
1192 | add_llc = &qentry->msg.add_link; | ||
1193 | if (add_llc->hd.flags & SMC_LLC_FLAG_ADD_LNK_REJ) { | ||
1194 | smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); | ||
1195 | rc = -ENOLINK; | ||
1196 | goto out_err; | ||
1197 | } | ||
1198 | if (lgr->type == SMC_LGR_SINGLE && | ||
1199 | (!memcmp(add_llc->sender_gid, link->peer_gid, SMC_GID_SIZE) && | ||
1200 | !memcmp(add_llc->sender_mac, link->peer_mac, ETH_ALEN))) { | ||
1201 | lgr_new_t = SMC_LGR_ASYMMETRIC_PEER; | ||
1202 | } | ||
1203 | smc_llc_save_add_link_info(link_new, add_llc); | ||
1204 | smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); | ||
1205 | |||
1206 | rc = smc_ib_ready_link(link_new); | ||
1207 | if (rc) | ||
1208 | goto out_err; | ||
1209 | rc = smcr_buf_map_lgr(link_new); | ||
1210 | if (rc) | ||
1211 | goto out_err; | ||
1212 | rc = smcr_buf_reg_lgr(link_new); | ||
1213 | if (rc) | ||
1214 | goto out_err; | ||
1215 | rc = smc_llc_srv_rkey_exchange(link, link_new); | ||
1216 | if (rc) | ||
1217 | goto out_err; | ||
1218 | rc = smc_llc_srv_conf_link(link, link_new, lgr_new_t); | ||
1219 | if (rc) | ||
1220 | goto out_err; | ||
1221 | return 0; | ||
1222 | out_err: | ||
1223 | link_new->state = SMC_LNK_INACTIVE; | ||
1224 | smcr_link_clear(link_new, false); | ||
1225 | return rc; | ||
1226 | } | ||
1227 | |||
1228 | static void smc_llc_process_srv_add_link(struct smc_link_group *lgr) | ||
1229 | { | ||
1230 | struct smc_link *link = lgr->llc_flow_lcl.qentry->link; | ||
1231 | int rc; | ||
1232 | |||
1233 | smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); | ||
1234 | |||
1235 | mutex_lock(&lgr->llc_conf_mutex); | ||
1236 | rc = smc_llc_srv_add_link(link); | ||
1237 | if (!rc && lgr->type == SMC_LGR_SYMMETRIC) { | ||
1238 | /* delete any asymmetric link */ | ||
1239 | smc_llc_delete_asym_link(lgr); | ||
1240 | } | ||
1241 | mutex_unlock(&lgr->llc_conf_mutex); | ||
1242 | } | ||
1243 | |||
1244 | /* enqueue a local add_link req to trigger a new add_link flow */ | ||
1245 | void smc_llc_add_link_local(struct smc_link *link) | ||
1246 | { | ||
1247 | struct smc_llc_msg_add_link add_llc = {}; | ||
1248 | |||
1249 | add_llc.hd.length = sizeof(add_llc); | ||
1250 | add_llc.hd.common.type = SMC_LLC_ADD_LINK; | ||
1251 | /* no dev and port needed */ | ||
1252 | smc_llc_enqueue(link, (union smc_llc_msg *)&add_llc); | ||
1253 | } | ||
1254 | |||
1255 | /* worker to process an add link message */ | ||
1256 | static void smc_llc_add_link_work(struct work_struct *work) | ||
1257 | { | ||
1258 | struct smc_link_group *lgr = container_of(work, struct smc_link_group, | ||
1259 | llc_add_link_work); | ||
1260 | |||
1261 | if (list_empty(&lgr->list)) { | ||
1262 | /* link group is terminating */ | ||
1263 | smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); | ||
1264 | goto out; | ||
1265 | } | ||
1266 | |||
1267 | if (lgr->role == SMC_CLNT) | ||
1268 | smc_llc_process_cli_add_link(lgr); | ||
1269 | else | ||
1270 | smc_llc_process_srv_add_link(lgr); | ||
1271 | out: | ||
1272 | smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); | ||
1273 | } | ||
1274 | |||
1275 | /* enqueue a local del_link msg to trigger a new del_link flow, | ||
1276 | * called only for role SMC_SERV | ||
1277 | */ | ||
1278 | void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id) | ||
1279 | { | ||
1280 | struct smc_llc_msg_del_link del_llc = {}; | ||
1281 | |||
1282 | del_llc.hd.length = sizeof(del_llc); | ||
1283 | del_llc.hd.common.type = SMC_LLC_DELETE_LINK; | ||
1284 | del_llc.link_num = del_link_id; | ||
1285 | del_llc.reason = htonl(SMC_LLC_DEL_LOST_PATH); | ||
1286 | del_llc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; | ||
1287 | smc_llc_enqueue(link, (union smc_llc_msg *)&del_llc); | ||
1288 | } | ||
1289 | |||
1290 | static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr) | ||
1291 | { | ||
1292 | struct smc_link *lnk_del = NULL, *lnk_asym, *lnk; | ||
1293 | struct smc_llc_msg_del_link *del_llc; | ||
1294 | struct smc_llc_qentry *qentry; | ||
1295 | int active_links; | ||
1296 | int lnk_idx; | ||
1297 | |||
1298 | qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl); | ||
1299 | lnk = qentry->link; | ||
1300 | del_llc = &qentry->msg.delete_link; | ||
1301 | |||
1302 | if (del_llc->hd.flags & SMC_LLC_FLAG_DEL_LINK_ALL) { | ||
1303 | smc_lgr_terminate_sched(lgr); | ||
1304 | goto out; | ||
1305 | } | ||
1306 | mutex_lock(&lgr->llc_conf_mutex); | ||
1307 | /* delete single link */ | ||
1308 | for (lnk_idx = 0; lnk_idx < SMC_LINKS_PER_LGR_MAX; lnk_idx++) { | ||
1309 | if (lgr->lnk[lnk_idx].link_id != del_llc->link_num) | ||
1310 | continue; | ||
1311 | lnk_del = &lgr->lnk[lnk_idx]; | ||
1312 | break; | ||
1313 | } | ||
1314 | del_llc->hd.flags |= SMC_LLC_FLAG_RESP; | ||
1315 | if (!lnk_del) { | ||
1316 | /* link was not found */ | ||
1317 | del_llc->reason = htonl(SMC_LLC_DEL_NOLNK); | ||
1318 | smc_llc_send_message(lnk, &qentry->msg); | ||
1319 | goto out_unlock; | ||
1320 | } | ||
1321 | lnk_asym = smc_llc_find_asym_link(lgr); | ||
1322 | |||
1323 | del_llc->reason = 0; | ||
1324 | smc_llc_send_message(lnk, &qentry->msg); /* response */ | ||
1325 | |||
1326 | if (smc_link_downing(&lnk_del->state)) | ||
1327 | smc_switch_conns(lgr, lnk_del, false); | ||
1328 | smcr_link_clear(lnk_del, true); | ||
1329 | |||
1330 | active_links = smc_llc_active_link_count(lgr); | ||
1331 | if (lnk_del == lnk_asym) { | ||
1332 | /* expected deletion of asym link, don't change lgr state */ | ||
1333 | } else if (active_links == 1) { | ||
1334 | smcr_lgr_set_type(lgr, SMC_LGR_SINGLE); | ||
1335 | } else if (!active_links) { | ||
1336 | smcr_lgr_set_type(lgr, SMC_LGR_NONE); | ||
1337 | smc_lgr_terminate_sched(lgr); | ||
1338 | } | ||
1339 | out_unlock: | ||
1340 | mutex_unlock(&lgr->llc_conf_mutex); | ||
1341 | out: | ||
1342 | kfree(qentry); | ||
1343 | } | ||
1344 | |||
1345 | /* try to send a DELETE LINK ALL request on any active link, | ||
1346 | * waiting for send completion | ||
1347 | */ | ||
1348 | void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn) | ||
1349 | { | ||
1350 | struct smc_llc_msg_del_link delllc = {}; | ||
1351 | int i; | ||
1352 | |||
1353 | delllc.hd.common.type = SMC_LLC_DELETE_LINK; | ||
1354 | delllc.hd.length = sizeof(delllc); | ||
1355 | if (ord) | ||
1356 | delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; | ||
1357 | delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL; | ||
1358 | delllc.reason = htonl(rsn); | ||
1359 | |||
1360 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { | ||
1361 | if (!smc_link_sendable(&lgr->lnk[i])) | ||
1362 | continue; | ||
1363 | if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc)) | ||
1364 | break; | ||
1365 | } | ||
1366 | } | ||
1367 | |||
1368 | static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr) | ||
1369 | { | ||
1370 | struct smc_llc_msg_del_link *del_llc; | ||
1371 | struct smc_link *lnk, *lnk_del; | ||
1372 | struct smc_llc_qentry *qentry; | ||
1373 | int active_links; | ||
1374 | int i; | ||
1375 | |||
1376 | mutex_lock(&lgr->llc_conf_mutex); | ||
1377 | qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl); | ||
1378 | lnk = qentry->link; | ||
1379 | del_llc = &qentry->msg.delete_link; | ||
1380 | |||
1381 | if (qentry->msg.delete_link.hd.flags & SMC_LLC_FLAG_DEL_LINK_ALL) { | ||
1382 | /* delete entire lgr */ | ||
1383 | smc_llc_send_link_delete_all(lgr, true, ntohl( | ||
1384 | qentry->msg.delete_link.reason)); | ||
1385 | smc_lgr_terminate_sched(lgr); | ||
1386 | goto out; | ||
1387 | } | ||
1388 | /* delete single link */ | ||
1389 | lnk_del = NULL; | ||
1390 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { | ||
1391 | if (lgr->lnk[i].link_id == del_llc->link_num) { | ||
1392 | lnk_del = &lgr->lnk[i]; | ||
1393 | break; | ||
1394 | } | ||
1395 | } | ||
1396 | if (!lnk_del) | ||
1397 | goto out; /* asymmetric link already deleted */ | ||
1398 | |||
1399 | if (smc_link_downing(&lnk_del->state)) { | ||
1400 | if (smc_switch_conns(lgr, lnk_del, false)) | ||
1401 | smc_wr_tx_wait_no_pending_sends(lnk_del); | ||
1402 | } | ||
1403 | if (!list_empty(&lgr->list)) { | ||
1404 | /* qentry is either a request from peer (send it back to | ||
1405 | * initiate the DELETE_LINK processing), or a locally | ||
1406 | * enqueued DELETE_LINK request (forward it) | ||
1407 | */ | ||
1408 | if (!smc_llc_send_message(lnk, &qentry->msg)) { | ||
1409 | struct smc_llc_qentry *qentry2; | ||
1410 | |||
1411 | qentry2 = smc_llc_wait(lgr, lnk, SMC_LLC_WAIT_TIME, | ||
1412 | SMC_LLC_DELETE_LINK); | ||
1413 | if (qentry2) | ||
1414 | smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); | ||
1415 | } | ||
1416 | } | ||
1417 | smcr_link_clear(lnk_del, true); | ||
1418 | |||
1419 | active_links = smc_llc_active_link_count(lgr); | ||
1420 | if (active_links == 1) { | ||
1421 | smcr_lgr_set_type(lgr, SMC_LGR_SINGLE); | ||
1422 | } else if (!active_links) { | ||
1423 | smcr_lgr_set_type(lgr, SMC_LGR_NONE); | ||
1424 | smc_lgr_terminate_sched(lgr); | ||
1425 | } | ||
1426 | |||
1427 | if (lgr->type == SMC_LGR_SINGLE && !list_empty(&lgr->list)) { | ||
1428 | /* trigger setup of asymm alt link */ | ||
1429 | smc_llc_add_link_local(lnk); | ||
1430 | } | ||
1431 | out: | ||
1432 | mutex_unlock(&lgr->llc_conf_mutex); | ||
1433 | kfree(qentry); | ||
1434 | } | ||
1435 | |||
1436 | static void smc_llc_delete_link_work(struct work_struct *work) | ||
1437 | { | ||
1438 | struct smc_link_group *lgr = container_of(work, struct smc_link_group, | ||
1439 | llc_del_link_work); | ||
1440 | |||
1441 | if (list_empty(&lgr->list)) { | ||
1442 | /* link group is terminating */ | ||
1443 | smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); | ||
1444 | goto out; | ||
1445 | } | ||
1446 | |||
1447 | if (lgr->role == SMC_CLNT) | ||
1448 | smc_llc_process_cli_delete_link(lgr); | ||
1449 | else | ||
1450 | smc_llc_process_srv_delete_link(lgr); | ||
1451 | out: | ||
1452 | smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); | ||
1453 | } | ||
1454 | |||
1455 | /* process a confirm_rkey request from peer, remote flow */ | ||
1456 | static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr) | ||
1457 | { | ||
1458 | struct smc_llc_msg_confirm_rkey *llc; | ||
1459 | struct smc_llc_qentry *qentry; | ||
1460 | struct smc_link *link; | ||
1461 | int num_entries; | ||
1462 | int rk_idx; | ||
1463 | int i; | ||
1464 | |||
1465 | qentry = lgr->llc_flow_rmt.qentry; | ||
1466 | llc = &qentry->msg.confirm_rkey; | ||
1467 | link = qentry->link; | ||
1468 | |||
1469 | num_entries = llc->rtoken[0].num_rkeys; | ||
1470 | /* first rkey entry is for receiving link */ | ||
1471 | rk_idx = smc_rtoken_add(link, | ||
1472 | llc->rtoken[0].rmb_vaddr, | ||
1473 | llc->rtoken[0].rmb_key); | ||
1474 | if (rk_idx < 0) | ||
1475 | goto out_err; | ||
1476 | |||
1477 | for (i = 1; i <= min_t(u8, num_entries, SMC_LLC_RKEYS_PER_MSG - 1); i++) | ||
1478 | smc_rtoken_set2(lgr, rk_idx, llc->rtoken[i].link_id, | ||
1479 | llc->rtoken[i].rmb_vaddr, | ||
1480 | llc->rtoken[i].rmb_key); | ||
1481 | /* max links is 3 so there is no need to support conf_rkey_cont msgs */ | ||
1482 | goto out; | ||
1483 | out_err: | ||
1484 | llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; | ||
1485 | llc->hd.flags |= SMC_LLC_FLAG_RKEY_RETRY; | ||
1486 | out: | ||
1487 | llc->hd.flags |= SMC_LLC_FLAG_RESP; | ||
1488 | smc_llc_send_message(link, &qentry->msg); | ||
1489 | smc_llc_flow_qentry_del(&lgr->llc_flow_rmt); | ||
1490 | } | ||
1491 | |||
1492 | /* process a delete_rkey request from peer, remote flow */ | ||
1493 | static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr) | ||
1494 | { | ||
1495 | struct smc_llc_msg_delete_rkey *llc; | ||
1496 | struct smc_llc_qentry *qentry; | ||
1497 | struct smc_link *link; | ||
1498 | u8 err_mask = 0; | ||
1499 | int i, max; | ||
1500 | |||
1501 | qentry = lgr->llc_flow_rmt.qentry; | ||
1502 | llc = &qentry->msg.delete_rkey; | ||
1503 | link = qentry->link; | ||
1504 | |||
1505 | max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX); | ||
1506 | for (i = 0; i < max; i++) { | ||
1507 | if (smc_rtoken_delete(link, llc->rkey[i])) | ||
1508 | err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i); | ||
1509 | } | ||
1510 | if (err_mask) { | ||
1511 | llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; | ||
1512 | llc->err_mask = err_mask; | ||
1513 | } | ||
1514 | llc->hd.flags |= SMC_LLC_FLAG_RESP; | ||
1515 | smc_llc_send_message(link, &qentry->msg); | ||
1516 | smc_llc_flow_qentry_del(&lgr->llc_flow_rmt); | ||
1517 | } | ||
1518 | |||
1519 | static void smc_llc_protocol_violation(struct smc_link_group *lgr, u8 type) | ||
1520 | { | ||
1521 | pr_warn_ratelimited("smc: SMC-R lg %*phN LLC protocol violation: " | ||
1522 | "llc_type %d\n", SMC_LGR_ID_SIZE, &lgr->id, type); | ||
1523 | smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_PROT_VIOL); | ||
1524 | smc_lgr_terminate_sched(lgr); | ||
1525 | } | ||
1526 | |||
1527 | /* flush the llc event queue */ | ||
1528 | static void smc_llc_event_flush(struct smc_link_group *lgr) | ||
1529 | { | ||
1530 | struct smc_llc_qentry *qentry, *q; | ||
1531 | |||
1532 | spin_lock_bh(&lgr->llc_event_q_lock); | ||
1533 | list_for_each_entry_safe(qentry, q, &lgr->llc_event_q, list) { | ||
1534 | list_del_init(&qentry->list); | ||
1535 | kfree(qentry); | ||
1536 | } | ||
1537 | spin_unlock_bh(&lgr->llc_event_q_lock); | ||
1538 | } | ||
1539 | |||
1540 | static void smc_llc_event_handler(struct smc_llc_qentry *qentry) | ||
1541 | { | ||
1542 | union smc_llc_msg *llc = &qentry->msg; | ||
1543 | struct smc_link *link = qentry->link; | ||
1544 | struct smc_link_group *lgr = link->lgr; | ||
1545 | |||
1546 | if (!smc_link_usable(link)) | ||
1547 | goto out; | ||
1548 | |||
1549 | switch (llc->raw.hdr.common.type) { | ||
1550 | case SMC_LLC_TEST_LINK: | ||
1551 | llc->test_link.hd.flags |= SMC_LLC_FLAG_RESP; | ||
1552 | smc_llc_send_message(link, llc); | ||
1553 | break; | ||
1554 | case SMC_LLC_ADD_LINK: | ||
1555 | if (list_empty(&lgr->list)) | ||
1556 | goto out; /* lgr is terminating */ | ||
1557 | if (lgr->role == SMC_CLNT) { | ||
1558 | if (smc_llc_is_local_add_link(llc)) { | ||
1559 | if (lgr->llc_flow_lcl.type == | ||
1560 | SMC_LLC_FLOW_ADD_LINK) | ||
1561 | break; /* add_link in progress */ | ||
1562 | if (smc_llc_flow_start(&lgr->llc_flow_lcl, | ||
1563 | qentry)) { | ||
1564 | schedule_work(&lgr->llc_add_link_work); | ||
1565 | } | ||
1566 | return; | ||
1567 | } | ||
1568 | if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK && | ||
1569 | !lgr->llc_flow_lcl.qentry) { | ||
1570 | /* a flow is waiting for this message */ | ||
1571 | smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, | ||
1572 | qentry); | ||
1573 | wake_up(&lgr->llc_msg_waiter); | ||
1574 | } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, | ||
1575 | qentry)) { | ||
1576 | schedule_work(&lgr->llc_add_link_work); | ||
1577 | } | ||
1578 | } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { | ||
1579 | /* as smc server, handle client suggestion */ | ||
1580 | schedule_work(&lgr->llc_add_link_work); | ||
1581 | } | ||
1582 | return; | ||
1583 | case SMC_LLC_CONFIRM_LINK: | ||
1584 | case SMC_LLC_ADD_LINK_CONT: | ||
1585 | if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { | ||
1586 | /* a flow is waiting for this message */ | ||
1587 | smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); | ||
1588 | wake_up(&lgr->llc_msg_waiter); | ||
1589 | return; | ||
1590 | } | ||
1591 | break; | ||
1592 | case SMC_LLC_DELETE_LINK: | ||
1593 | if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK && | ||
1594 | !lgr->llc_flow_lcl.qentry) { | ||
1595 | /* DEL LINK REQ during ADD LINK SEQ */ | ||
1596 | smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); | ||
1597 | wake_up(&lgr->llc_msg_waiter); | ||
1598 | } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { | ||
1599 | schedule_work(&lgr->llc_del_link_work); | ||
1600 | } | ||
1601 | return; | ||
1602 | case SMC_LLC_CONFIRM_RKEY: | ||
1603 | /* new request from remote, assign to remote flow */ | ||
1604 | if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) { | ||
1605 | /* process here, does not wait for more llc msgs */ | ||
1606 | smc_llc_rmt_conf_rkey(lgr); | ||
1607 | smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt); | ||
1608 | } | ||
1609 | return; | ||
1610 | case SMC_LLC_CONFIRM_RKEY_CONT: | ||
1611 | /* not used because max links is 3, and 3 rkeys fit into | ||
1612 | * one CONFIRM_RKEY message | ||
1613 | */ | ||
1614 | break; | ||
1615 | case SMC_LLC_DELETE_RKEY: | ||
1616 | /* new request from remote, assign to remote flow */ | ||
1617 | if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) { | ||
1618 | /* process here, does not wait for more llc msgs */ | ||
1619 | smc_llc_rmt_delete_rkey(lgr); | ||
1620 | smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt); | ||
1621 | } | ||
1622 | return; | ||
1623 | default: | ||
1624 | smc_llc_protocol_violation(lgr, llc->raw.hdr.common.type); | ||
1625 | break; | ||
1626 | } | ||
1627 | out: | ||
1628 | kfree(qentry); | ||
1629 | } | ||
1630 | |||
1631 | /* worker to process llc messages on the event queue */ | ||
1632 | static void smc_llc_event_work(struct work_struct *work) | ||
1633 | { | ||
1634 | struct smc_link_group *lgr = container_of(work, struct smc_link_group, | ||
1635 | llc_event_work); | ||
1636 | struct smc_llc_qentry *qentry; | ||
1637 | |||
1638 | if (!lgr->llc_flow_lcl.type && lgr->delayed_event) { | ||
1639 | qentry = lgr->delayed_event; | ||
1640 | lgr->delayed_event = NULL; | ||
1641 | if (smc_link_usable(qentry->link)) | ||
1642 | smc_llc_event_handler(qentry); | ||
1643 | else | ||
1644 | kfree(qentry); | ||
1645 | } | ||
1646 | |||
1647 | again: | ||
1648 | spin_lock_bh(&lgr->llc_event_q_lock); | ||
1649 | if (!list_empty(&lgr->llc_event_q)) { | ||
1650 | qentry = list_first_entry(&lgr->llc_event_q, | ||
1651 | struct smc_llc_qentry, list); | ||
1652 | list_del_init(&qentry->list); | ||
1653 | spin_unlock_bh(&lgr->llc_event_q_lock); | ||
1654 | smc_llc_event_handler(qentry); | ||
1655 | goto again; | ||
1656 | } | ||
1657 | spin_unlock_bh(&lgr->llc_event_q_lock); | ||
1658 | } | ||
1659 | |||
1660 | /* process llc responses in tasklet context */ | ||
1661 | static void smc_llc_rx_response(struct smc_link *link, | ||
1662 | struct smc_llc_qentry *qentry) | ||
1663 | { | ||
1664 | enum smc_llc_flowtype flowtype = link->lgr->llc_flow_lcl.type; | ||
1665 | struct smc_llc_flow *flow = &link->lgr->llc_flow_lcl; | ||
1666 | u8 llc_type = qentry->msg.raw.hdr.common.type; | ||
1667 | |||
1668 | switch (llc_type) { | ||
1669 | case SMC_LLC_TEST_LINK: | ||
1670 | if (smc_link_active(link)) | ||
1671 | complete(&link->llc_testlink_resp); | ||
1672 | break; | ||
1673 | case SMC_LLC_ADD_LINK: | ||
1674 | case SMC_LLC_ADD_LINK_CONT: | ||
1675 | case SMC_LLC_CONFIRM_LINK: | ||
1676 | if (flowtype != SMC_LLC_FLOW_ADD_LINK || flow->qentry) | ||
1677 | break; /* drop out-of-flow response */ | ||
1678 | goto assign; | ||
1679 | case SMC_LLC_DELETE_LINK: | ||
1680 | if (flowtype != SMC_LLC_FLOW_DEL_LINK || flow->qentry) | ||
1681 | break; /* drop out-of-flow response */ | ||
1682 | goto assign; | ||
1683 | case SMC_LLC_CONFIRM_RKEY: | ||
1684 | case SMC_LLC_DELETE_RKEY: | ||
1685 | if (flowtype != SMC_LLC_FLOW_RKEY || flow->qentry) | ||
1686 | break; /* drop out-of-flow response */ | ||
1687 | goto assign; | ||
1688 | case SMC_LLC_CONFIRM_RKEY_CONT: | ||
1689 | /* not used because max links is 3 */ | ||
1690 | break; | ||
1691 | default: | ||
1692 | smc_llc_protocol_violation(link->lgr, llc_type); | ||
1693 | break; | ||
1694 | } | ||
1695 | kfree(qentry); | ||
1696 | return; | ||
1697 | assign: | ||
1698 | /* assign responses to the local flow, we requested them */ | ||
1699 | smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry); | ||
1700 | wake_up(&link->lgr->llc_msg_waiter); | ||
1701 | } | ||
1702 | |||
1703 | static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc) | ||
1704 | { | ||
1705 | struct smc_link_group *lgr = link->lgr; | ||
1706 | struct smc_llc_qentry *qentry; | ||
1707 | unsigned long flags; | ||
1708 | |||
1709 | qentry = kmalloc(sizeof(*qentry), GFP_ATOMIC); | ||
1710 | if (!qentry) | ||
1711 | return; | ||
1712 | qentry->link = link; | ||
1713 | INIT_LIST_HEAD(&qentry->list); | ||
1714 | memcpy(&qentry->msg, llc, sizeof(union smc_llc_msg)); | ||
1715 | |||
1716 | /* process responses immediately */ | ||
1717 | if (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) { | ||
1718 | smc_llc_rx_response(link, qentry); | ||
1719 | return; | ||
1720 | } | ||
1721 | |||
1722 | /* add requests to event queue */ | ||
1723 | spin_lock_irqsave(&lgr->llc_event_q_lock, flags); | ||
1724 | list_add_tail(&qentry->list, &lgr->llc_event_q); | ||
1725 | spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags); | ||
1726 | queue_work(system_highpri_wq, &lgr->llc_event_work); | ||
1727 | } | ||
1728 | |||
1729 | /* copy received msg and add it to the event queue */ | ||
1730 | static void smc_llc_rx_handler(struct ib_wc *wc, void *buf) | ||
1731 | { | ||
1732 | struct smc_link *link = (struct smc_link *)wc->qp->qp_context; | ||
1733 | union smc_llc_msg *llc = buf; | ||
1734 | |||
1735 | if (wc->byte_len < sizeof(*llc)) | ||
1736 | return; /* short message */ | ||
1737 | if (llc->raw.hdr.length != sizeof(*llc)) | ||
1738 | return; /* invalid message */ | ||
1739 | |||
1740 | smc_llc_enqueue(link, llc); | ||
1741 | } | ||
1742 | |||
1743 | /***************************** worker, utils *********************************/ | ||
1744 | |||
1745 | static void smc_llc_testlink_work(struct work_struct *work) | ||
1746 | { | ||
1747 | struct smc_link *link = container_of(to_delayed_work(work), | ||
1748 | struct smc_link, llc_testlink_wrk); | ||
1749 | unsigned long next_interval; | ||
1750 | unsigned long expire_time; | ||
1751 | u8 user_data[16] = { 0 }; | ||
1752 | int rc; | ||
1753 | |||
1754 | if (!smc_link_active(link)) | ||
1755 | return; /* don't reschedule worker */ | ||
1756 | expire_time = link->wr_rx_tstamp + link->llc_testlink_time; | ||
1757 | if (time_is_after_jiffies(expire_time)) { | ||
1758 | next_interval = expire_time - jiffies; | ||
1759 | goto out; | ||
1760 | } | ||
1761 | reinit_completion(&link->llc_testlink_resp); | ||
1762 | smc_llc_send_test_link(link, user_data); | ||
1763 | /* receive TEST LINK response over RoCE fabric */ | ||
1764 | rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp, | ||
1765 | SMC_LLC_WAIT_TIME); | ||
1766 | if (!smc_link_active(link)) | ||
1767 | return; /* link state changed */ | ||
1768 | if (rc <= 0) { | ||
1769 | smcr_link_down_cond_sched(link); | ||
1770 | return; | ||
1771 | } | ||
1772 | next_interval = link->llc_testlink_time; | ||
1773 | out: | ||
1774 | schedule_delayed_work(&link->llc_testlink_wrk, next_interval); | ||
1775 | } | ||
1776 | |||
1777 | void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) | ||
1778 | { | ||
1779 | struct net *net = sock_net(smc->clcsock->sk); | ||
1780 | |||
1781 | INIT_WORK(&lgr->llc_event_work, smc_llc_event_work); | ||
1782 | INIT_WORK(&lgr->llc_add_link_work, smc_llc_add_link_work); | ||
1783 | INIT_WORK(&lgr->llc_del_link_work, smc_llc_delete_link_work); | ||
1784 | INIT_LIST_HEAD(&lgr->llc_event_q); | ||
1785 | spin_lock_init(&lgr->llc_event_q_lock); | ||
1786 | spin_lock_init(&lgr->llc_flow_lock); | ||
1787 | init_waitqueue_head(&lgr->llc_flow_waiter); | ||
1788 | init_waitqueue_head(&lgr->llc_msg_waiter); | ||
1789 | mutex_init(&lgr->llc_conf_mutex); | ||
1790 | lgr->llc_testlink_time = READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); | ||
1791 | } | ||
1792 | |||
1793 | /* called after lgr was removed from lgr_list */ | ||
1794 | void smc_llc_lgr_clear(struct smc_link_group *lgr) | ||
1795 | { | ||
1796 | smc_llc_event_flush(lgr); | ||
1797 | wake_up_all(&lgr->llc_flow_waiter); | ||
1798 | wake_up_all(&lgr->llc_msg_waiter); | ||
1799 | cancel_work_sync(&lgr->llc_event_work); | ||
1800 | cancel_work_sync(&lgr->llc_add_link_work); | ||
1801 | cancel_work_sync(&lgr->llc_del_link_work); | ||
1802 | if (lgr->delayed_event) { | ||
1803 | kfree(lgr->delayed_event); | ||
1804 | lgr->delayed_event = NULL; | ||
1805 | } | ||
1806 | } | ||
1807 | |||
1808 | int smc_llc_link_init(struct smc_link *link) | ||
1809 | { | ||
1810 | init_completion(&link->llc_testlink_resp); | ||
1811 | INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work); | ||
1812 | return 0; | ||
1813 | } | ||
1814 | |||
1815 | void smc_llc_link_active(struct smc_link *link) | ||
1816 | { | ||
1817 | pr_warn_ratelimited("smc: SMC-R lg %*phN link added: id %*phN, " | ||
1818 | "peerid %*phN, ibdev %s, ibport %d\n", | ||
1819 | SMC_LGR_ID_SIZE, &link->lgr->id, | ||
1820 | SMC_LGR_ID_SIZE, &link->link_uid, | ||
1821 | SMC_LGR_ID_SIZE, &link->peer_link_uid, | ||
1822 | link->smcibdev->ibdev->name, link->ibport); | ||
1823 | link->state = SMC_LNK_ACTIVE; | ||
1824 | if (link->lgr->llc_testlink_time) { | ||
1825 | link->llc_testlink_time = link->lgr->llc_testlink_time; | ||
1826 | schedule_delayed_work(&link->llc_testlink_wrk, | ||
1827 | link->llc_testlink_time); | ||
1828 | } | ||
1829 | } | ||
1830 | |||
1831 | /* called in worker context */ | ||
1832 | void smc_llc_link_clear(struct smc_link *link, bool log) | ||
1833 | { | ||
1834 | if (log) | ||
1835 | pr_warn_ratelimited("smc: SMC-R lg %*phN link removed: id %*phN" | ||
1836 | ", peerid %*phN, ibdev %s, ibport %d\n", | ||
1837 | SMC_LGR_ID_SIZE, &link->lgr->id, | ||
1838 | SMC_LGR_ID_SIZE, &link->link_uid, | ||
1839 | SMC_LGR_ID_SIZE, &link->peer_link_uid, | ||
1840 | link->smcibdev->ibdev->name, link->ibport); | ||
1841 | complete(&link->llc_testlink_resp); | ||
1842 | cancel_delayed_work_sync(&link->llc_testlink_wrk); | ||
1843 | } | ||
1844 | |||
1845 | /* register a new rtoken at the remote peer (for all links) */ | ||
1846 | int smc_llc_do_confirm_rkey(struct smc_link *send_link, | ||
1847 | struct smc_buf_desc *rmb_desc) | ||
1848 | { | ||
1849 | struct smc_link_group *lgr = send_link->lgr; | ||
1850 | struct smc_llc_qentry *qentry = NULL; | ||
1851 | int rc = 0; | ||
1852 | |||
1853 | rc = smc_llc_send_confirm_rkey(send_link, rmb_desc); | ||
1854 | if (rc) | ||
1855 | goto out; | ||
1856 | /* receive CONFIRM RKEY response from server over RoCE fabric */ | ||
1857 | qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME, | ||
1858 | SMC_LLC_CONFIRM_RKEY); | ||
1859 | if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG)) | ||
1860 | rc = -EFAULT; | ||
1861 | out: | ||
1862 | if (qentry) | ||
1863 | smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); | ||
1864 | return rc; | ||
1865 | } | ||
1866 | |||
1867 | /* unregister an rtoken at the remote peer */ | ||
1868 | int smc_llc_do_delete_rkey(struct smc_link_group *lgr, | ||
1869 | struct smc_buf_desc *rmb_desc) | ||
1870 | { | ||
1871 | struct smc_llc_qentry *qentry = NULL; | ||
1872 | struct smc_link *send_link; | ||
1873 | int rc = 0; | ||
1874 | |||
1875 | send_link = smc_llc_usable_link(lgr); | ||
1876 | if (!send_link) | ||
1877 | return -ENOLINK; | ||
1878 | |||
1879 | /* protected by llc_flow control */ | ||
1880 | rc = smc_llc_send_delete_rkey(send_link, rmb_desc); | ||
1881 | if (rc) | ||
1882 | goto out; | ||
1883 | /* receive DELETE RKEY response from server over RoCE fabric */ | ||
1884 | qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME, | ||
1885 | SMC_LLC_DELETE_RKEY); | ||
1886 | if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG)) | ||
1887 | rc = -EFAULT; | ||
1888 | out: | ||
1889 | if (qentry) | ||
1890 | smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); | ||
1891 | return rc; | ||
1892 | } | ||
1893 | |||
1894 | void smc_llc_link_set_uid(struct smc_link *link) | ||
1895 | { | ||
1896 | __be32 link_uid; | ||
1897 | |||
1898 | link_uid = htonl(*((u32 *)link->lgr->id) + link->link_id); | ||
1899 | memcpy(link->link_uid, &link_uid, SMC_LGR_ID_SIZE); | ||
1900 | } | ||
1901 | |||
1902 | /* save peers link user id, used for debug purposes */ | ||
1903 | void smc_llc_save_peer_uid(struct smc_llc_qentry *qentry) | ||
1904 | { | ||
1905 | memcpy(qentry->link->peer_link_uid, qentry->msg.confirm_link.link_uid, | ||
1906 | SMC_LGR_ID_SIZE); | ||
1907 | } | ||
1908 | |||
1909 | /* evaluate confirm link request or response */ | ||
1910 | int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry, | ||
1911 | enum smc_llc_reqresp type) | ||
1912 | { | ||
1913 | if (type == SMC_LLC_REQ) { /* SMC server assigns link_id */ | ||
1914 | qentry->link->link_id = qentry->msg.confirm_link.link_num; | ||
1915 | smc_llc_link_set_uid(qentry->link); | ||
1916 | } | ||
1917 | if (!(qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)) | ||
1918 | return -ENOTSUPP; | ||
1919 | return 0; | ||
1920 | } | ||
1921 | |||
1922 | /***************************** init, exit, misc ******************************/ | ||
1923 | |||
1924 | static struct smc_wr_rx_handler smc_llc_rx_handlers[] = { | ||
1925 | { | ||
1926 | .handler = smc_llc_rx_handler, | ||
1927 | .type = SMC_LLC_CONFIRM_LINK | ||
1928 | }, | ||
1929 | { | ||
1930 | .handler = smc_llc_rx_handler, | ||
1931 | .type = SMC_LLC_TEST_LINK | ||
1932 | }, | ||
1933 | { | ||
1934 | .handler = smc_llc_rx_handler, | ||
1935 | .type = SMC_LLC_ADD_LINK | ||
1936 | }, | ||
1937 | { | ||
1938 | .handler = smc_llc_rx_handler, | ||
1939 | .type = SMC_LLC_ADD_LINK_CONT | ||
1940 | }, | ||
1941 | { | ||
1942 | .handler = smc_llc_rx_handler, | ||
1943 | .type = SMC_LLC_DELETE_LINK | ||
1944 | }, | ||
1945 | { | ||
1946 | .handler = smc_llc_rx_handler, | ||
1947 | .type = SMC_LLC_CONFIRM_RKEY | ||
1948 | }, | ||
1949 | { | ||
1950 | .handler = smc_llc_rx_handler, | ||
1951 | .type = SMC_LLC_CONFIRM_RKEY_CONT | ||
1952 | }, | ||
1953 | { | ||
1954 | .handler = smc_llc_rx_handler, | ||
1955 | .type = SMC_LLC_DELETE_RKEY | ||
1956 | }, | ||
1957 | { | ||
1958 | .handler = NULL, | ||
1959 | } | ||
1960 | }; | ||
1961 | |||
1962 | int __init smc_llc_init(void) | ||
1963 | { | ||
1964 | struct smc_wr_rx_handler *handler; | ||
1965 | int rc = 0; | ||
1966 | |||
1967 | for (handler = smc_llc_rx_handlers; handler->handler; handler++) { | ||
1968 | INIT_HLIST_NODE(&handler->list); | ||
1969 | rc = smc_wr_rx_register_handler(handler); | ||
1970 | if (rc) | ||
1971 | break; | ||
1972 | } | ||
1973 | return rc; | ||
1974 | } | ||
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h new file mode 100644 index 000000000..cc00a2ec4 --- /dev/null +++ b/net/smc/smc_llc.h | |||
@@ -0,0 +1,109 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * Definitions for LLC (link layer control) message handling | ||
6 | * | ||
7 | * Copyright IBM Corp. 2016 | ||
8 | * | ||
9 | * Author(s): Klaus Wacker <Klaus.Wacker@de.ibm.com> | ||
10 | * Ursula Braun <ubraun@linux.vnet.ibm.com> | ||
11 | */ | ||
12 | |||
13 | #ifndef SMC_LLC_H | ||
14 | #define SMC_LLC_H | ||
15 | |||
16 | #include "smc_wr.h" | ||
17 | |||
18 | #define SMC_LLC_FLAG_RESP 0x80 | ||
19 | |||
20 | #define SMC_LLC_WAIT_FIRST_TIME (5 * HZ) | ||
21 | #define SMC_LLC_WAIT_TIME (2 * HZ) | ||
22 | |||
23 | enum smc_llc_reqresp { | ||
24 | SMC_LLC_REQ, | ||
25 | SMC_LLC_RESP | ||
26 | }; | ||
27 | |||
28 | enum smc_llc_msg_type { | ||
29 | SMC_LLC_CONFIRM_LINK = 0x01, | ||
30 | SMC_LLC_ADD_LINK = 0x02, | ||
31 | SMC_LLC_ADD_LINK_CONT = 0x03, | ||
32 | SMC_LLC_DELETE_LINK = 0x04, | ||
33 | SMC_LLC_CONFIRM_RKEY = 0x06, | ||
34 | SMC_LLC_TEST_LINK = 0x07, | ||
35 | SMC_LLC_CONFIRM_RKEY_CONT = 0x08, | ||
36 | SMC_LLC_DELETE_RKEY = 0x09, | ||
37 | }; | ||
38 | |||
39 | #define smc_link_downing(state) \ | ||
40 | (cmpxchg(state, SMC_LNK_ACTIVE, SMC_LNK_INACTIVE) == SMC_LNK_ACTIVE) | ||
41 | |||
42 | /* LLC DELETE LINK Request Reason Codes */ | ||
43 | #define SMC_LLC_DEL_LOST_PATH 0x00010000 | ||
44 | #define SMC_LLC_DEL_OP_INIT_TERM 0x00020000 | ||
45 | #define SMC_LLC_DEL_PROG_INIT_TERM 0x00030000 | ||
46 | #define SMC_LLC_DEL_PROT_VIOL 0x00040000 | ||
47 | #define SMC_LLC_DEL_NO_ASYM_NEEDED 0x00050000 | ||
48 | /* LLC DELETE LINK Response Reason Codes */ | ||
49 | #define SMC_LLC_DEL_NOLNK 0x00100000 /* Unknown Link ID (no link) */ | ||
50 | #define SMC_LLC_DEL_NOLGR 0x00200000 /* Unknown Link Group */ | ||
51 | |||
52 | /* returns a usable link of the link group, or NULL */ | ||
53 | static inline struct smc_link *smc_llc_usable_link(struct smc_link_group *lgr) | ||
54 | { | ||
55 | int i; | ||
56 | |||
57 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) | ||
58 | if (smc_link_usable(&lgr->lnk[i])) | ||
59 | return &lgr->lnk[i]; | ||
60 | return NULL; | ||
61 | } | ||
62 | |||
63 | /* set the termination reason code for the link group */ | ||
64 | static inline void smc_llc_set_termination_rsn(struct smc_link_group *lgr, | ||
65 | u32 rsn) | ||
66 | { | ||
67 | if (!lgr->llc_termination_rsn) | ||
68 | lgr->llc_termination_rsn = rsn; | ||
69 | } | ||
70 | |||
71 | /* transmit */ | ||
72 | int smc_llc_send_confirm_link(struct smc_link *lnk, | ||
73 | enum smc_llc_reqresp reqresp); | ||
74 | int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], | ||
75 | struct smc_link *link_new, | ||
76 | enum smc_llc_reqresp reqresp); | ||
77 | int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id, | ||
78 | enum smc_llc_reqresp reqresp, bool orderly, | ||
79 | u32 reason); | ||
80 | void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id); | ||
81 | void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc); | ||
82 | void smc_llc_lgr_clear(struct smc_link_group *lgr); | ||
83 | int smc_llc_link_init(struct smc_link *link); | ||
84 | void smc_llc_link_active(struct smc_link *link); | ||
85 | void smc_llc_link_clear(struct smc_link *link, bool log); | ||
86 | int smc_llc_do_confirm_rkey(struct smc_link *send_link, | ||
87 | struct smc_buf_desc *rmb_desc); | ||
88 | int smc_llc_do_delete_rkey(struct smc_link_group *lgr, | ||
89 | struct smc_buf_desc *rmb_desc); | ||
90 | int smc_llc_flow_initiate(struct smc_link_group *lgr, | ||
91 | enum smc_llc_flowtype type); | ||
92 | void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow); | ||
93 | int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry, | ||
94 | enum smc_llc_reqresp type); | ||
95 | void smc_llc_link_set_uid(struct smc_link *link); | ||
96 | void smc_llc_save_peer_uid(struct smc_llc_qentry *qentry); | ||
97 | struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr, | ||
98 | struct smc_link *lnk, | ||
99 | int time_out, u8 exp_msg); | ||
100 | struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow); | ||
101 | void smc_llc_flow_qentry_del(struct smc_llc_flow *flow); | ||
102 | void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, | ||
103 | u32 rsn); | ||
104 | int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry); | ||
105 | int smc_llc_srv_add_link(struct smc_link *link); | ||
106 | void smc_llc_add_link_local(struct smc_link *link); | ||
107 | int smc_llc_init(void) __init; | ||
108 | |||
109 | #endif /* SMC_LLC_H */ | ||
diff --git a/net/smc/smc_netns.h b/net/smc/smc_netns.h new file mode 100644 index 000000000..0f4f35aa4 --- /dev/null +++ b/net/smc/smc_netns.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* Shared Memory Communications | ||
3 | * | ||
4 | * Network namespace definitions. | ||
5 | * | ||
6 | * Copyright IBM Corp. 2018 | ||
7 | */ | ||
8 | |||
9 | #ifndef SMC_NETNS_H | ||
10 | #define SMC_NETNS_H | ||
11 | |||
12 | #include "smc_pnet.h" | ||
13 | |||
14 | extern unsigned int smc_net_id; | ||
15 | |||
16 | /* per-network namespace private data */ | ||
17 | struct smc_net { | ||
18 | struct smc_pnettable pnettable; | ||
19 | struct smc_pnetids_ndev pnetids_ndev; | ||
20 | }; | ||
21 | #endif | ||
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c new file mode 100644 index 000000000..30bae60d6 --- /dev/null +++ b/net/smc/smc_pnet.c | |||
@@ -0,0 +1,1174 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * Generic netlink support functions to configure an SMC-R PNET table | ||
6 | * | ||
7 | * Copyright IBM Corp. 2016 | ||
8 | * | ||
9 | * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com> | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/list.h> | ||
14 | #include <linux/ctype.h> | ||
15 | #include <linux/mutex.h> | ||
16 | #include <net/netlink.h> | ||
17 | #include <net/genetlink.h> | ||
18 | |||
19 | #include <uapi/linux/if.h> | ||
20 | #include <uapi/linux/smc.h> | ||
21 | |||
22 | #include <rdma/ib_verbs.h> | ||
23 | |||
24 | #include <net/netns/generic.h> | ||
25 | #include "smc_netns.h" | ||
26 | |||
27 | #include "smc_pnet.h" | ||
28 | #include "smc_ib.h" | ||
29 | #include "smc_ism.h" | ||
30 | #include "smc_core.h" | ||
31 | |||
32 | static struct net_device *__pnet_find_base_ndev(struct net_device *ndev); | ||
33 | static struct net_device *pnet_find_base_ndev(struct net_device *ndev); | ||
34 | |||
35 | static const struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { | ||
36 | [SMC_PNETID_NAME] = { | ||
37 | .type = NLA_NUL_STRING, | ||
38 | .len = SMC_MAX_PNETID_LEN | ||
39 | }, | ||
40 | [SMC_PNETID_ETHNAME] = { | ||
41 | .type = NLA_NUL_STRING, | ||
42 | .len = IFNAMSIZ - 1 | ||
43 | }, | ||
44 | [SMC_PNETID_IBNAME] = { | ||
45 | .type = NLA_NUL_STRING, | ||
46 | .len = IB_DEVICE_NAME_MAX - 1 | ||
47 | }, | ||
48 | [SMC_PNETID_IBPORT] = { .type = NLA_U8 } | ||
49 | }; | ||
50 | |||
51 | static struct genl_family smc_pnet_nl_family; | ||
52 | |||
53 | enum smc_pnet_nametype { | ||
54 | SMC_PNET_ETH = 1, | ||
55 | SMC_PNET_IB = 2, | ||
56 | }; | ||
57 | |||
58 | /* pnet entry stored in pnet table */ | ||
59 | struct smc_pnetentry { | ||
60 | struct list_head list; | ||
61 | char pnet_name[SMC_MAX_PNETID_LEN + 1]; | ||
62 | enum smc_pnet_nametype type; | ||
63 | union { | ||
64 | struct { | ||
65 | char eth_name[IFNAMSIZ + 1]; | ||
66 | struct net_device *ndev; | ||
67 | }; | ||
68 | struct { | ||
69 | char ib_name[IB_DEVICE_NAME_MAX + 1]; | ||
70 | u8 ib_port; | ||
71 | }; | ||
72 | }; | ||
73 | }; | ||
74 | |||
75 | /* Check if the pnetid is set */ | ||
76 | bool smc_pnet_is_pnetid_set(u8 *pnetid) | ||
77 | { | ||
78 | if (pnetid[0] == 0 || pnetid[0] == _S) | ||
79 | return false; | ||
80 | return true; | ||
81 | } | ||
82 | |||
83 | /* Check if two given pnetids match */ | ||
84 | static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2) | ||
85 | { | ||
86 | int i; | ||
87 | |||
88 | for (i = 0; i < SMC_MAX_PNETID_LEN; i++) { | ||
89 | if ((pnetid1[i] == 0 || pnetid1[i] == _S) && | ||
90 | (pnetid2[i] == 0 || pnetid2[i] == _S)) | ||
91 | break; | ||
92 | if (pnetid1[i] != pnetid2[i]) | ||
93 | return false; | ||
94 | } | ||
95 | return true; | ||
96 | } | ||
97 | |||
98 | /* Remove a pnetid from the pnet table. | ||
99 | */ | ||
100 | static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) | ||
101 | { | ||
102 | struct smc_pnetentry *pnetelem, *tmp_pe; | ||
103 | struct smc_pnettable *pnettable; | ||
104 | struct smc_ib_device *ibdev; | ||
105 | struct smcd_dev *smcd_dev; | ||
106 | struct smc_net *sn; | ||
107 | int rc = -ENOENT; | ||
108 | int ibport; | ||
109 | |||
110 | /* get pnettable for namespace */ | ||
111 | sn = net_generic(net, smc_net_id); | ||
112 | pnettable = &sn->pnettable; | ||
113 | |||
114 | /* remove table entry */ | ||
115 | mutex_lock(&pnettable->lock); | ||
116 | list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, | ||
117 | list) { | ||
118 | if (!pnet_name || | ||
119 | smc_pnet_match(pnetelem->pnet_name, pnet_name)) { | ||
120 | list_del(&pnetelem->list); | ||
121 | if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) { | ||
122 | dev_put(pnetelem->ndev); | ||
123 | pr_warn_ratelimited("smc: net device %s " | ||
124 | "erased user defined " | ||
125 | "pnetid %.16s\n", | ||
126 | pnetelem->eth_name, | ||
127 | pnetelem->pnet_name); | ||
128 | } | ||
129 | kfree(pnetelem); | ||
130 | rc = 0; | ||
131 | } | ||
132 | } | ||
133 | mutex_unlock(&pnettable->lock); | ||
134 | |||
135 | /* if this is not the initial namespace, stop here */ | ||
136 | if (net != &init_net) | ||
137 | return rc; | ||
138 | |||
139 | /* remove ib devices */ | ||
140 | mutex_lock(&smc_ib_devices.mutex); | ||
141 | list_for_each_entry(ibdev, &smc_ib_devices.list, list) { | ||
142 | for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { | ||
143 | if (ibdev->pnetid_by_user[ibport] && | ||
144 | (!pnet_name || | ||
145 | smc_pnet_match(pnet_name, | ||
146 | ibdev->pnetid[ibport]))) { | ||
147 | pr_warn_ratelimited("smc: ib device %s ibport " | ||
148 | "%d erased user defined " | ||
149 | "pnetid %.16s\n", | ||
150 | ibdev->ibdev->name, | ||
151 | ibport + 1, | ||
152 | ibdev->pnetid[ibport]); | ||
153 | memset(ibdev->pnetid[ibport], 0, | ||
154 | SMC_MAX_PNETID_LEN); | ||
155 | ibdev->pnetid_by_user[ibport] = false; | ||
156 | rc = 0; | ||
157 | } | ||
158 | } | ||
159 | } | ||
160 | mutex_unlock(&smc_ib_devices.mutex); | ||
161 | /* remove smcd devices */ | ||
162 | mutex_lock(&smcd_dev_list.mutex); | ||
163 | list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { | ||
164 | if (smcd_dev->pnetid_by_user && | ||
165 | (!pnet_name || | ||
166 | smc_pnet_match(pnet_name, smcd_dev->pnetid))) { | ||
167 | pr_warn_ratelimited("smc: smcd device %s " | ||
168 | "erased user defined pnetid " | ||
169 | "%.16s\n", dev_name(&smcd_dev->dev), | ||
170 | smcd_dev->pnetid); | ||
171 | memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN); | ||
172 | smcd_dev->pnetid_by_user = false; | ||
173 | rc = 0; | ||
174 | } | ||
175 | } | ||
176 | mutex_unlock(&smcd_dev_list.mutex); | ||
177 | return rc; | ||
178 | } | ||
179 | |||
180 | /* Add the reference to a given network device to the pnet table. | ||
181 | */ | ||
182 | static int smc_pnet_add_by_ndev(struct net_device *ndev) | ||
183 | { | ||
184 | struct smc_pnetentry *pnetelem, *tmp_pe; | ||
185 | struct smc_pnettable *pnettable; | ||
186 | struct net *net = dev_net(ndev); | ||
187 | struct smc_net *sn; | ||
188 | int rc = -ENOENT; | ||
189 | |||
190 | /* get pnettable for namespace */ | ||
191 | sn = net_generic(net, smc_net_id); | ||
192 | pnettable = &sn->pnettable; | ||
193 | |||
194 | mutex_lock(&pnettable->lock); | ||
195 | list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { | ||
196 | if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev && | ||
197 | !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) { | ||
198 | dev_hold(ndev); | ||
199 | pnetelem->ndev = ndev; | ||
200 | rc = 0; | ||
201 | pr_warn_ratelimited("smc: adding net device %s with " | ||
202 | "user defined pnetid %.16s\n", | ||
203 | pnetelem->eth_name, | ||
204 | pnetelem->pnet_name); | ||
205 | break; | ||
206 | } | ||
207 | } | ||
208 | mutex_unlock(&pnettable->lock); | ||
209 | return rc; | ||
210 | } | ||
211 | |||
212 | /* Remove the reference to a given network device from the pnet table. | ||
213 | */ | ||
214 | static int smc_pnet_remove_by_ndev(struct net_device *ndev) | ||
215 | { | ||
216 | struct smc_pnetentry *pnetelem, *tmp_pe; | ||
217 | struct smc_pnettable *pnettable; | ||
218 | struct net *net = dev_net(ndev); | ||
219 | struct smc_net *sn; | ||
220 | int rc = -ENOENT; | ||
221 | |||
222 | /* get pnettable for namespace */ | ||
223 | sn = net_generic(net, smc_net_id); | ||
224 | pnettable = &sn->pnettable; | ||
225 | |||
226 | mutex_lock(&pnettable->lock); | ||
227 | list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { | ||
228 | if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) { | ||
229 | dev_put(pnetelem->ndev); | ||
230 | pnetelem->ndev = NULL; | ||
231 | rc = 0; | ||
232 | pr_warn_ratelimited("smc: removing net device %s with " | ||
233 | "user defined pnetid %.16s\n", | ||
234 | pnetelem->eth_name, | ||
235 | pnetelem->pnet_name); | ||
236 | break; | ||
237 | } | ||
238 | } | ||
239 | mutex_unlock(&pnettable->lock); | ||
240 | return rc; | ||
241 | } | ||
242 | |||
243 | /* Apply pnetid to ib device when no pnetid is set. | ||
244 | */ | ||
245 | static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port, | ||
246 | char *pnet_name) | ||
247 | { | ||
248 | bool applied = false; | ||
249 | |||
250 | mutex_lock(&smc_ib_devices.mutex); | ||
251 | if (!smc_pnet_is_pnetid_set(ib_dev->pnetid[ib_port - 1])) { | ||
252 | memcpy(ib_dev->pnetid[ib_port - 1], pnet_name, | ||
253 | SMC_MAX_PNETID_LEN); | ||
254 | ib_dev->pnetid_by_user[ib_port - 1] = true; | ||
255 | applied = true; | ||
256 | } | ||
257 | mutex_unlock(&smc_ib_devices.mutex); | ||
258 | return applied; | ||
259 | } | ||
260 | |||
261 | /* Apply pnetid to smcd device when no pnetid is set. | ||
262 | */ | ||
263 | static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name) | ||
264 | { | ||
265 | bool applied = false; | ||
266 | |||
267 | mutex_lock(&smcd_dev_list.mutex); | ||
268 | if (!smc_pnet_is_pnetid_set(smcd_dev->pnetid)) { | ||
269 | memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN); | ||
270 | smcd_dev->pnetid_by_user = true; | ||
271 | applied = true; | ||
272 | } | ||
273 | mutex_unlock(&smcd_dev_list.mutex); | ||
274 | return applied; | ||
275 | } | ||
276 | |||
277 | /* The limit for pnetid is 16 characters. | ||
278 | * Valid characters should be (single-byte character set) a-z, A-Z, 0-9. | ||
279 | * Lower case letters are converted to upper case. | ||
280 | * Interior blanks should not be used. | ||
281 | */ | ||
282 | static bool smc_pnetid_valid(const char *pnet_name, char *pnetid) | ||
283 | { | ||
284 | char *bf = skip_spaces(pnet_name); | ||
285 | size_t len = strlen(bf); | ||
286 | char *end = bf + len; | ||
287 | |||
288 | if (!len) | ||
289 | return false; | ||
290 | while (--end >= bf && isspace(*end)) | ||
291 | ; | ||
292 | if (end - bf >= SMC_MAX_PNETID_LEN) | ||
293 | return false; | ||
294 | while (bf <= end) { | ||
295 | if (!isalnum(*bf)) | ||
296 | return false; | ||
297 | *pnetid++ = islower(*bf) ? toupper(*bf) : *bf; | ||
298 | bf++; | ||
299 | } | ||
300 | *pnetid = '\0'; | ||
301 | return true; | ||
302 | } | ||
303 | |||
304 | /* Find an infiniband device by a given name. The device might not exist. */ | ||
305 | static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) | ||
306 | { | ||
307 | struct smc_ib_device *ibdev; | ||
308 | |||
309 | mutex_lock(&smc_ib_devices.mutex); | ||
310 | list_for_each_entry(ibdev, &smc_ib_devices.list, list) { | ||
311 | if (!strncmp(ibdev->ibdev->name, ib_name, | ||
312 | sizeof(ibdev->ibdev->name)) || | ||
313 | (ibdev->ibdev->dev.parent && | ||
314 | !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, | ||
315 | IB_DEVICE_NAME_MAX - 1))) { | ||
316 | goto out; | ||
317 | } | ||
318 | } | ||
319 | ibdev = NULL; | ||
320 | out: | ||
321 | mutex_unlock(&smc_ib_devices.mutex); | ||
322 | return ibdev; | ||
323 | } | ||
324 | |||
325 | /* Find an smcd device by a given name. The device might not exist. */ | ||
326 | static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name) | ||
327 | { | ||
328 | struct smcd_dev *smcd_dev; | ||
329 | |||
330 | mutex_lock(&smcd_dev_list.mutex); | ||
331 | list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { | ||
332 | if (!strncmp(dev_name(&smcd_dev->dev), smcd_name, | ||
333 | IB_DEVICE_NAME_MAX - 1)) | ||
334 | goto out; | ||
335 | } | ||
336 | smcd_dev = NULL; | ||
337 | out: | ||
338 | mutex_unlock(&smcd_dev_list.mutex); | ||
339 | return smcd_dev; | ||
340 | } | ||
341 | |||
342 | static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, | ||
343 | char *eth_name, char *pnet_name) | ||
344 | { | ||
345 | struct smc_pnetentry *tmp_pe, *new_pe; | ||
346 | struct net_device *ndev, *base_ndev; | ||
347 | u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; | ||
348 | bool new_netdev; | ||
349 | int rc; | ||
350 | |||
351 | /* check if (base) netdev already has a pnetid. If there is one, we do | ||
352 | * not want to add a pnet table entry | ||
353 | */ | ||
354 | rc = -EEXIST; | ||
355 | ndev = dev_get_by_name(net, eth_name); /* dev_hold() */ | ||
356 | if (ndev) { | ||
357 | base_ndev = pnet_find_base_ndev(ndev); | ||
358 | if (!smc_pnetid_by_dev_port(base_ndev->dev.parent, | ||
359 | base_ndev->dev_port, ndev_pnetid)) | ||
360 | goto out_put; | ||
361 | } | ||
362 | |||
363 | /* add a new netdev entry to the pnet table if there isn't one */ | ||
364 | rc = -ENOMEM; | ||
365 | new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL); | ||
366 | if (!new_pe) | ||
367 | goto out_put; | ||
368 | new_pe->type = SMC_PNET_ETH; | ||
369 | memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN); | ||
370 | strncpy(new_pe->eth_name, eth_name, IFNAMSIZ); | ||
371 | new_pe->ndev = ndev; | ||
372 | |||
373 | rc = -EEXIST; | ||
374 | new_netdev = true; | ||
375 | mutex_lock(&pnettable->lock); | ||
376 | list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { | ||
377 | if (tmp_pe->type == SMC_PNET_ETH && | ||
378 | !strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) { | ||
379 | new_netdev = false; | ||
380 | break; | ||
381 | } | ||
382 | } | ||
383 | if (new_netdev) { | ||
384 | list_add_tail(&new_pe->list, &pnettable->pnetlist); | ||
385 | mutex_unlock(&pnettable->lock); | ||
386 | } else { | ||
387 | mutex_unlock(&pnettable->lock); | ||
388 | kfree(new_pe); | ||
389 | goto out_put; | ||
390 | } | ||
391 | if (ndev) | ||
392 | pr_warn_ratelimited("smc: net device %s " | ||
393 | "applied user defined pnetid %.16s\n", | ||
394 | new_pe->eth_name, new_pe->pnet_name); | ||
395 | return 0; | ||
396 | |||
397 | out_put: | ||
398 | if (ndev) | ||
399 | dev_put(ndev); | ||
400 | return rc; | ||
401 | } | ||
402 | |||
403 | static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name, | ||
404 | u8 ib_port, char *pnet_name) | ||
405 | { | ||
406 | struct smc_pnetentry *tmp_pe, *new_pe; | ||
407 | struct smc_ib_device *ib_dev; | ||
408 | bool smcddev_applied = true; | ||
409 | bool ibdev_applied = true; | ||
410 | struct smcd_dev *smcd_dev; | ||
411 | bool new_ibdev; | ||
412 | |||
413 | /* try to apply the pnetid to active devices */ | ||
414 | ib_dev = smc_pnet_find_ib(ib_name); | ||
415 | if (ib_dev) { | ||
416 | ibdev_applied = smc_pnet_apply_ib(ib_dev, ib_port, pnet_name); | ||
417 | if (ibdev_applied) | ||
418 | pr_warn_ratelimited("smc: ib device %s ibport %d " | ||
419 | "applied user defined pnetid " | ||
420 | "%.16s\n", ib_dev->ibdev->name, | ||
421 | ib_port, | ||
422 | ib_dev->pnetid[ib_port - 1]); | ||
423 | } | ||
424 | smcd_dev = smc_pnet_find_smcd(ib_name); | ||
425 | if (smcd_dev) { | ||
426 | smcddev_applied = smc_pnet_apply_smcd(smcd_dev, pnet_name); | ||
427 | if (smcddev_applied) | ||
428 | pr_warn_ratelimited("smc: smcd device %s " | ||
429 | "applied user defined pnetid " | ||
430 | "%.16s\n", dev_name(&smcd_dev->dev), | ||
431 | smcd_dev->pnetid); | ||
432 | } | ||
433 | /* Apply fails when a device has a hardware-defined pnetid set, do not | ||
434 | * add a pnet table entry in that case. | ||
435 | */ | ||
436 | if (!ibdev_applied || !smcddev_applied) | ||
437 | return -EEXIST; | ||
438 | |||
439 | /* add a new ib entry to the pnet table if there isn't one */ | ||
440 | new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL); | ||
441 | if (!new_pe) | ||
442 | return -ENOMEM; | ||
443 | new_pe->type = SMC_PNET_IB; | ||
444 | memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN); | ||
445 | strncpy(new_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX); | ||
446 | new_pe->ib_port = ib_port; | ||
447 | |||
448 | new_ibdev = true; | ||
449 | mutex_lock(&pnettable->lock); | ||
450 | list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { | ||
451 | if (tmp_pe->type == SMC_PNET_IB && | ||
452 | !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) { | ||
453 | new_ibdev = false; | ||
454 | break; | ||
455 | } | ||
456 | } | ||
457 | if (new_ibdev) { | ||
458 | list_add_tail(&new_pe->list, &pnettable->pnetlist); | ||
459 | mutex_unlock(&pnettable->lock); | ||
460 | } else { | ||
461 | mutex_unlock(&pnettable->lock); | ||
462 | kfree(new_pe); | ||
463 | } | ||
464 | return (new_ibdev) ? 0 : -EEXIST; | ||
465 | } | ||
466 | |||
467 | /* Append a pnetid to the end of the pnet table if not already on this list. | ||
468 | */ | ||
469 | static int smc_pnet_enter(struct net *net, struct nlattr *tb[]) | ||
470 | { | ||
471 | char pnet_name[SMC_MAX_PNETID_LEN + 1]; | ||
472 | struct smc_pnettable *pnettable; | ||
473 | bool new_netdev = false; | ||
474 | bool new_ibdev = false; | ||
475 | struct smc_net *sn; | ||
476 | u8 ibport = 1; | ||
477 | char *string; | ||
478 | int rc; | ||
479 | |||
480 | /* get pnettable for namespace */ | ||
481 | sn = net_generic(net, smc_net_id); | ||
482 | pnettable = &sn->pnettable; | ||
483 | |||
484 | rc = -EINVAL; | ||
485 | if (!tb[SMC_PNETID_NAME]) | ||
486 | goto error; | ||
487 | string = (char *)nla_data(tb[SMC_PNETID_NAME]); | ||
488 | if (!smc_pnetid_valid(string, pnet_name)) | ||
489 | goto error; | ||
490 | |||
491 | if (tb[SMC_PNETID_ETHNAME]) { | ||
492 | string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); | ||
493 | rc = smc_pnet_add_eth(pnettable, net, string, pnet_name); | ||
494 | if (!rc) | ||
495 | new_netdev = true; | ||
496 | else if (rc != -EEXIST) | ||
497 | goto error; | ||
498 | } | ||
499 | |||
500 | /* if this is not the initial namespace, stop here */ | ||
501 | if (net != &init_net) | ||
502 | return new_netdev ? 0 : -EEXIST; | ||
503 | |||
504 | rc = -EINVAL; | ||
505 | if (tb[SMC_PNETID_IBNAME]) { | ||
506 | string = (char *)nla_data(tb[SMC_PNETID_IBNAME]); | ||
507 | string = strim(string); | ||
508 | if (tb[SMC_PNETID_IBPORT]) { | ||
509 | ibport = nla_get_u8(tb[SMC_PNETID_IBPORT]); | ||
510 | if (ibport < 1 || ibport > SMC_MAX_PORTS) | ||
511 | goto error; | ||
512 | } | ||
513 | rc = smc_pnet_add_ib(pnettable, string, ibport, pnet_name); | ||
514 | if (!rc) | ||
515 | new_ibdev = true; | ||
516 | else if (rc != -EEXIST) | ||
517 | goto error; | ||
518 | } | ||
519 | return (new_netdev || new_ibdev) ? 0 : -EEXIST; | ||
520 | |||
521 | error: | ||
522 | return rc; | ||
523 | } | ||
524 | |||
525 | /* Convert an smc_pnetentry to a netlink attribute sequence */ | ||
526 | static int smc_pnet_set_nla(struct sk_buff *msg, | ||
527 | struct smc_pnetentry *pnetelem) | ||
528 | { | ||
529 | if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name)) | ||
530 | return -1; | ||
531 | if (pnetelem->type == SMC_PNET_ETH) { | ||
532 | if (nla_put_string(msg, SMC_PNETID_ETHNAME, | ||
533 | pnetelem->eth_name)) | ||
534 | return -1; | ||
535 | } else { | ||
536 | if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a")) | ||
537 | return -1; | ||
538 | } | ||
539 | if (pnetelem->type == SMC_PNET_IB) { | ||
540 | if (nla_put_string(msg, SMC_PNETID_IBNAME, pnetelem->ib_name) || | ||
541 | nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port)) | ||
542 | return -1; | ||
543 | } else { | ||
544 | if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") || | ||
545 | nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff)) | ||
546 | return -1; | ||
547 | } | ||
548 | |||
549 | return 0; | ||
550 | } | ||
551 | |||
552 | static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) | ||
553 | { | ||
554 | struct net *net = genl_info_net(info); | ||
555 | |||
556 | return smc_pnet_enter(net, info->attrs); | ||
557 | } | ||
558 | |||
559 | static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) | ||
560 | { | ||
561 | struct net *net = genl_info_net(info); | ||
562 | |||
563 | if (!info->attrs[SMC_PNETID_NAME]) | ||
564 | return -EINVAL; | ||
565 | return smc_pnet_remove_by_pnetid(net, | ||
566 | (char *)nla_data(info->attrs[SMC_PNETID_NAME])); | ||
567 | } | ||
568 | |||
569 | static int smc_pnet_dump_start(struct netlink_callback *cb) | ||
570 | { | ||
571 | cb->args[0] = 0; | ||
572 | return 0; | ||
573 | } | ||
574 | |||
575 | static int smc_pnet_dumpinfo(struct sk_buff *skb, | ||
576 | u32 portid, u32 seq, u32 flags, | ||
577 | struct smc_pnetentry *pnetelem) | ||
578 | { | ||
579 | void *hdr; | ||
580 | |||
581 | hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family, | ||
582 | flags, SMC_PNETID_GET); | ||
583 | if (!hdr) | ||
584 | return -ENOMEM; | ||
585 | if (smc_pnet_set_nla(skb, pnetelem) < 0) { | ||
586 | genlmsg_cancel(skb, hdr); | ||
587 | return -EMSGSIZE; | ||
588 | } | ||
589 | genlmsg_end(skb, hdr); | ||
590 | return 0; | ||
591 | } | ||
592 | |||
593 | static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid, | ||
594 | u32 seq, u8 *pnetid, int start_idx) | ||
595 | { | ||
596 | struct smc_pnettable *pnettable; | ||
597 | struct smc_pnetentry *pnetelem; | ||
598 | struct smc_net *sn; | ||
599 | int idx = 0; | ||
600 | |||
601 | /* get pnettable for namespace */ | ||
602 | sn = net_generic(net, smc_net_id); | ||
603 | pnettable = &sn->pnettable; | ||
604 | |||
605 | /* dump pnettable entries */ | ||
606 | mutex_lock(&pnettable->lock); | ||
607 | list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { | ||
608 | if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid)) | ||
609 | continue; | ||
610 | if (idx++ < start_idx) | ||
611 | continue; | ||
612 | /* if this is not the initial namespace, dump only netdev */ | ||
613 | if (net != &init_net && pnetelem->type != SMC_PNET_ETH) | ||
614 | continue; | ||
615 | if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI, | ||
616 | pnetelem)) { | ||
617 | --idx; | ||
618 | break; | ||
619 | } | ||
620 | } | ||
621 | mutex_unlock(&pnettable->lock); | ||
622 | return idx; | ||
623 | } | ||
624 | |||
625 | static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb) | ||
626 | { | ||
627 | struct net *net = sock_net(skb->sk); | ||
628 | int idx; | ||
629 | |||
630 | idx = _smc_pnet_dump(net, skb, NETLINK_CB(cb->skb).portid, | ||
631 | cb->nlh->nlmsg_seq, NULL, cb->args[0]); | ||
632 | |||
633 | cb->args[0] = idx; | ||
634 | return skb->len; | ||
635 | } | ||
636 | |||
637 | /* Retrieve one PNETID entry */ | ||
638 | static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) | ||
639 | { | ||
640 | struct net *net = genl_info_net(info); | ||
641 | struct sk_buff *msg; | ||
642 | void *hdr; | ||
643 | |||
644 | if (!info->attrs[SMC_PNETID_NAME]) | ||
645 | return -EINVAL; | ||
646 | |||
647 | msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); | ||
648 | if (!msg) | ||
649 | return -ENOMEM; | ||
650 | |||
651 | _smc_pnet_dump(net, msg, info->snd_portid, info->snd_seq, | ||
652 | nla_data(info->attrs[SMC_PNETID_NAME]), 0); | ||
653 | |||
654 | /* finish multi part message and send it */ | ||
655 | hdr = nlmsg_put(msg, info->snd_portid, info->snd_seq, NLMSG_DONE, 0, | ||
656 | NLM_F_MULTI); | ||
657 | if (!hdr) { | ||
658 | nlmsg_free(msg); | ||
659 | return -EMSGSIZE; | ||
660 | } | ||
661 | return genlmsg_reply(msg, info); | ||
662 | } | ||
663 | |||
664 | /* Remove and delete all pnetids from pnet table. | ||
665 | */ | ||
666 | static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) | ||
667 | { | ||
668 | struct net *net = genl_info_net(info); | ||
669 | |||
670 | smc_pnet_remove_by_pnetid(net, NULL); | ||
671 | return 0; | ||
672 | } | ||
673 | |||
674 | /* SMC_PNETID generic netlink operation definition */ | ||
675 | static const struct genl_ops smc_pnet_ops[] = { | ||
676 | { | ||
677 | .cmd = SMC_PNETID_GET, | ||
678 | .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, | ||
679 | /* can be retrieved by unprivileged users */ | ||
680 | .doit = smc_pnet_get, | ||
681 | .dumpit = smc_pnet_dump, | ||
682 | .start = smc_pnet_dump_start | ||
683 | }, | ||
684 | { | ||
685 | .cmd = SMC_PNETID_ADD, | ||
686 | .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, | ||
687 | .flags = GENL_ADMIN_PERM, | ||
688 | .doit = smc_pnet_add | ||
689 | }, | ||
690 | { | ||
691 | .cmd = SMC_PNETID_DEL, | ||
692 | .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, | ||
693 | .flags = GENL_ADMIN_PERM, | ||
694 | .doit = smc_pnet_del | ||
695 | }, | ||
696 | { | ||
697 | .cmd = SMC_PNETID_FLUSH, | ||
698 | .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, | ||
699 | .flags = GENL_ADMIN_PERM, | ||
700 | .doit = smc_pnet_flush | ||
701 | } | ||
702 | }; | ||
703 | |||
704 | /* SMC_PNETID family definition */ | ||
705 | static struct genl_family smc_pnet_nl_family __ro_after_init = { | ||
706 | .hdrsize = 0, | ||
707 | .name = SMCR_GENL_FAMILY_NAME, | ||
708 | .version = SMCR_GENL_FAMILY_VERSION, | ||
709 | .maxattr = SMC_PNETID_MAX, | ||
710 | .policy = smc_pnet_policy, | ||
711 | .netnsok = true, | ||
712 | .module = THIS_MODULE, | ||
713 | .ops = smc_pnet_ops, | ||
714 | .n_ops = ARRAY_SIZE(smc_pnet_ops) | ||
715 | }; | ||
716 | |||
717 | bool smc_pnet_is_ndev_pnetid(struct net *net, u8 *pnetid) | ||
718 | { | ||
719 | struct smc_net *sn = net_generic(net, smc_net_id); | ||
720 | struct smc_pnetids_ndev_entry *pe; | ||
721 | bool rc = false; | ||
722 | |||
723 | read_lock(&sn->pnetids_ndev.lock); | ||
724 | list_for_each_entry(pe, &sn->pnetids_ndev.list, list) { | ||
725 | if (smc_pnet_match(pnetid, pe->pnetid)) { | ||
726 | rc = true; | ||
727 | goto unlock; | ||
728 | } | ||
729 | } | ||
730 | |||
731 | unlock: | ||
732 | read_unlock(&sn->pnetids_ndev.lock); | ||
733 | return rc; | ||
734 | } | ||
735 | |||
736 | static int smc_pnet_add_pnetid(struct net *net, u8 *pnetid) | ||
737 | { | ||
738 | struct smc_net *sn = net_generic(net, smc_net_id); | ||
739 | struct smc_pnetids_ndev_entry *pe, *pi; | ||
740 | |||
741 | pe = kzalloc(sizeof(*pe), GFP_KERNEL); | ||
742 | if (!pe) | ||
743 | return -ENOMEM; | ||
744 | |||
745 | write_lock(&sn->pnetids_ndev.lock); | ||
746 | list_for_each_entry(pi, &sn->pnetids_ndev.list, list) { | ||
747 | if (smc_pnet_match(pnetid, pe->pnetid)) { | ||
748 | refcount_inc(&pi->refcnt); | ||
749 | kfree(pe); | ||
750 | goto unlock; | ||
751 | } | ||
752 | } | ||
753 | refcount_set(&pe->refcnt, 1); | ||
754 | memcpy(pe->pnetid, pnetid, SMC_MAX_PNETID_LEN); | ||
755 | list_add_tail(&pe->list, &sn->pnetids_ndev.list); | ||
756 | |||
757 | unlock: | ||
758 | write_unlock(&sn->pnetids_ndev.lock); | ||
759 | return 0; | ||
760 | } | ||
761 | |||
762 | static void smc_pnet_remove_pnetid(struct net *net, u8 *pnetid) | ||
763 | { | ||
764 | struct smc_net *sn = net_generic(net, smc_net_id); | ||
765 | struct smc_pnetids_ndev_entry *pe, *pe2; | ||
766 | |||
767 | write_lock(&sn->pnetids_ndev.lock); | ||
768 | list_for_each_entry_safe(pe, pe2, &sn->pnetids_ndev.list, list) { | ||
769 | if (smc_pnet_match(pnetid, pe->pnetid)) { | ||
770 | if (refcount_dec_and_test(&pe->refcnt)) { | ||
771 | list_del(&pe->list); | ||
772 | kfree(pe); | ||
773 | } | ||
774 | break; | ||
775 | } | ||
776 | } | ||
777 | write_unlock(&sn->pnetids_ndev.lock); | ||
778 | } | ||
779 | |||
780 | static void smc_pnet_add_base_pnetid(struct net *net, struct net_device *dev, | ||
781 | u8 *ndev_pnetid) | ||
782 | { | ||
783 | struct net_device *base_dev; | ||
784 | |||
785 | base_dev = __pnet_find_base_ndev(dev); | ||
786 | if (base_dev->flags & IFF_UP && | ||
787 | !smc_pnetid_by_dev_port(base_dev->dev.parent, base_dev->dev_port, | ||
788 | ndev_pnetid)) { | ||
789 | /* add to PNETIDs list */ | ||
790 | smc_pnet_add_pnetid(net, ndev_pnetid); | ||
791 | } | ||
792 | } | ||
793 | |||
794 | /* create initial list of netdevice pnetids */ | ||
795 | static void smc_pnet_create_pnetids_list(struct net *net) | ||
796 | { | ||
797 | u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; | ||
798 | struct net_device *dev; | ||
799 | |||
800 | rtnl_lock(); | ||
801 | for_each_netdev(net, dev) | ||
802 | smc_pnet_add_base_pnetid(net, dev, ndev_pnetid); | ||
803 | rtnl_unlock(); | ||
804 | } | ||
805 | |||
806 | /* clean up list of netdevice pnetids */ | ||
807 | static void smc_pnet_destroy_pnetids_list(struct net *net) | ||
808 | { | ||
809 | struct smc_net *sn = net_generic(net, smc_net_id); | ||
810 | struct smc_pnetids_ndev_entry *pe, *temp_pe; | ||
811 | |||
812 | write_lock(&sn->pnetids_ndev.lock); | ||
813 | list_for_each_entry_safe(pe, temp_pe, &sn->pnetids_ndev.list, list) { | ||
814 | list_del(&pe->list); | ||
815 | kfree(pe); | ||
816 | } | ||
817 | write_unlock(&sn->pnetids_ndev.lock); | ||
818 | } | ||
819 | |||
820 | static int smc_pnet_netdev_event(struct notifier_block *this, | ||
821 | unsigned long event, void *ptr) | ||
822 | { | ||
823 | struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); | ||
824 | struct net *net = dev_net(event_dev); | ||
825 | u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; | ||
826 | |||
827 | switch (event) { | ||
828 | case NETDEV_REBOOT: | ||
829 | case NETDEV_UNREGISTER: | ||
830 | smc_pnet_remove_by_ndev(event_dev); | ||
831 | return NOTIFY_OK; | ||
832 | case NETDEV_REGISTER: | ||
833 | smc_pnet_add_by_ndev(event_dev); | ||
834 | return NOTIFY_OK; | ||
835 | case NETDEV_UP: | ||
836 | smc_pnet_add_base_pnetid(net, event_dev, ndev_pnetid); | ||
837 | return NOTIFY_OK; | ||
838 | case NETDEV_DOWN: | ||
839 | event_dev = __pnet_find_base_ndev(event_dev); | ||
840 | if (!smc_pnetid_by_dev_port(event_dev->dev.parent, | ||
841 | event_dev->dev_port, ndev_pnetid)) { | ||
842 | /* remove from PNETIDs list */ | ||
843 | smc_pnet_remove_pnetid(net, ndev_pnetid); | ||
844 | } | ||
845 | return NOTIFY_OK; | ||
846 | default: | ||
847 | return NOTIFY_DONE; | ||
848 | } | ||
849 | } | ||
850 | |||
851 | static struct notifier_block smc_netdev_notifier = { | ||
852 | .notifier_call = smc_pnet_netdev_event | ||
853 | }; | ||
854 | |||
855 | /* init network namespace */ | ||
856 | int smc_pnet_net_init(struct net *net) | ||
857 | { | ||
858 | struct smc_net *sn = net_generic(net, smc_net_id); | ||
859 | struct smc_pnettable *pnettable = &sn->pnettable; | ||
860 | struct smc_pnetids_ndev *pnetids_ndev = &sn->pnetids_ndev; | ||
861 | |||
862 | INIT_LIST_HEAD(&pnettable->pnetlist); | ||
863 | mutex_init(&pnettable->lock); | ||
864 | INIT_LIST_HEAD(&pnetids_ndev->list); | ||
865 | rwlock_init(&pnetids_ndev->lock); | ||
866 | |||
867 | smc_pnet_create_pnetids_list(net); | ||
868 | |||
869 | return 0; | ||
870 | } | ||
871 | |||
872 | int __init smc_pnet_init(void) | ||
873 | { | ||
874 | int rc; | ||
875 | |||
876 | rc = genl_register_family(&smc_pnet_nl_family); | ||
877 | if (rc) | ||
878 | return rc; | ||
879 | rc = register_netdevice_notifier(&smc_netdev_notifier); | ||
880 | if (rc) | ||
881 | genl_unregister_family(&smc_pnet_nl_family); | ||
882 | |||
883 | return rc; | ||
884 | } | ||
885 | |||
886 | /* exit network namespace */ | ||
887 | void smc_pnet_net_exit(struct net *net) | ||
888 | { | ||
889 | /* flush pnet table */ | ||
890 | smc_pnet_remove_by_pnetid(net, NULL); | ||
891 | smc_pnet_destroy_pnetids_list(net); | ||
892 | } | ||
893 | |||
894 | void smc_pnet_exit(void) | ||
895 | { | ||
896 | unregister_netdevice_notifier(&smc_netdev_notifier); | ||
897 | genl_unregister_family(&smc_pnet_nl_family); | ||
898 | } | ||
899 | |||
900 | static struct net_device *__pnet_find_base_ndev(struct net_device *ndev) | ||
901 | { | ||
902 | int i, nest_lvl; | ||
903 | |||
904 | ASSERT_RTNL(); | ||
905 | nest_lvl = ndev->lower_level; | ||
906 | for (i = 0; i < nest_lvl; i++) { | ||
907 | struct list_head *lower = &ndev->adj_list.lower; | ||
908 | |||
909 | if (list_empty(lower)) | ||
910 | break; | ||
911 | lower = lower->next; | ||
912 | ndev = netdev_lower_get_next(ndev, &lower); | ||
913 | } | ||
914 | return ndev; | ||
915 | } | ||
916 | |||
917 | /* Determine one base device for stacked net devices. | ||
918 | * If the lower device level contains more than one devices | ||
919 | * (for instance with bonding slaves), just the first device | ||
920 | * is used to reach a base device. | ||
921 | */ | ||
922 | static struct net_device *pnet_find_base_ndev(struct net_device *ndev) | ||
923 | { | ||
924 | rtnl_lock(); | ||
925 | ndev = __pnet_find_base_ndev(ndev); | ||
926 | rtnl_unlock(); | ||
927 | return ndev; | ||
928 | } | ||
929 | |||
930 | static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, | ||
931 | u8 *pnetid) | ||
932 | { | ||
933 | struct smc_pnettable *pnettable; | ||
934 | struct net *net = dev_net(ndev); | ||
935 | struct smc_pnetentry *pnetelem; | ||
936 | struct smc_net *sn; | ||
937 | int rc = -ENOENT; | ||
938 | |||
939 | /* get pnettable for namespace */ | ||
940 | sn = net_generic(net, smc_net_id); | ||
941 | pnettable = &sn->pnettable; | ||
942 | |||
943 | mutex_lock(&pnettable->lock); | ||
944 | list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { | ||
945 | if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) { | ||
946 | /* get pnetid of netdev device */ | ||
947 | memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN); | ||
948 | rc = 0; | ||
949 | break; | ||
950 | } | ||
951 | } | ||
952 | mutex_unlock(&pnettable->lock); | ||
953 | return rc; | ||
954 | } | ||
955 | |||
956 | /* find a roce device for the given pnetid */ | ||
957 | static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, | ||
958 | struct smc_init_info *ini, | ||
959 | struct smc_ib_device *known_dev) | ||
960 | { | ||
961 | struct smc_ib_device *ibdev; | ||
962 | int i; | ||
963 | |||
964 | ini->ib_dev = NULL; | ||
965 | mutex_lock(&smc_ib_devices.mutex); | ||
966 | list_for_each_entry(ibdev, &smc_ib_devices.list, list) { | ||
967 | if (ibdev == known_dev) | ||
968 | continue; | ||
969 | for (i = 1; i <= SMC_MAX_PORTS; i++) { | ||
970 | if (!rdma_is_port_valid(ibdev->ibdev, i)) | ||
971 | continue; | ||
972 | if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) && | ||
973 | smc_ib_port_active(ibdev, i) && | ||
974 | !test_bit(i - 1, ibdev->ports_going_away) && | ||
975 | !smc_ib_determine_gid(ibdev, i, ini->vlan_id, | ||
976 | ini->ib_gid, NULL)) { | ||
977 | ini->ib_dev = ibdev; | ||
978 | ini->ib_port = i; | ||
979 | goto out; | ||
980 | } | ||
981 | } | ||
982 | } | ||
983 | out: | ||
984 | mutex_unlock(&smc_ib_devices.mutex); | ||
985 | } | ||
986 | |||
987 | /* find alternate roce device with same pnet_id and vlan_id */ | ||
988 | void smc_pnet_find_alt_roce(struct smc_link_group *lgr, | ||
989 | struct smc_init_info *ini, | ||
990 | struct smc_ib_device *known_dev) | ||
991 | { | ||
992 | _smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev); | ||
993 | } | ||
994 | |||
995 | /* if handshake network device belongs to a roce device, return its | ||
996 | * IB device and port | ||
997 | */ | ||
998 | static void smc_pnet_find_rdma_dev(struct net_device *netdev, | ||
999 | struct smc_init_info *ini) | ||
1000 | { | ||
1001 | struct smc_ib_device *ibdev; | ||
1002 | |||
1003 | mutex_lock(&smc_ib_devices.mutex); | ||
1004 | list_for_each_entry(ibdev, &smc_ib_devices.list, list) { | ||
1005 | struct net_device *ndev; | ||
1006 | int i; | ||
1007 | |||
1008 | for (i = 1; i <= SMC_MAX_PORTS; i++) { | ||
1009 | if (!rdma_is_port_valid(ibdev->ibdev, i)) | ||
1010 | continue; | ||
1011 | if (!ibdev->ibdev->ops.get_netdev) | ||
1012 | continue; | ||
1013 | ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i); | ||
1014 | if (!ndev) | ||
1015 | continue; | ||
1016 | dev_put(ndev); | ||
1017 | if (netdev == ndev && | ||
1018 | smc_ib_port_active(ibdev, i) && | ||
1019 | !test_bit(i - 1, ibdev->ports_going_away) && | ||
1020 | !smc_ib_determine_gid(ibdev, i, ini->vlan_id, | ||
1021 | ini->ib_gid, NULL)) { | ||
1022 | ini->ib_dev = ibdev; | ||
1023 | ini->ib_port = i; | ||
1024 | break; | ||
1025 | } | ||
1026 | } | ||
1027 | } | ||
1028 | mutex_unlock(&smc_ib_devices.mutex); | ||
1029 | } | ||
1030 | |||
1031 | /* Determine the corresponding IB device port based on the hardware PNETID. | ||
1032 | * Searching stops at the first matching active IB device port with vlan_id | ||
1033 | * configured. | ||
1034 | * If nothing found, check pnetid table. | ||
1035 | * If nothing found, try to use handshake device | ||
1036 | */ | ||
1037 | static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, | ||
1038 | struct smc_init_info *ini) | ||
1039 | { | ||
1040 | u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; | ||
1041 | |||
1042 | ndev = pnet_find_base_ndev(ndev); | ||
1043 | if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, | ||
1044 | ndev_pnetid) && | ||
1045 | smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) { | ||
1046 | smc_pnet_find_rdma_dev(ndev, ini); | ||
1047 | return; /* pnetid could not be determined */ | ||
1048 | } | ||
1049 | _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL); | ||
1050 | } | ||
1051 | |||
1052 | static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, | ||
1053 | struct smc_init_info *ini) | ||
1054 | { | ||
1055 | u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; | ||
1056 | struct smcd_dev *ismdev; | ||
1057 | |||
1058 | ndev = pnet_find_base_ndev(ndev); | ||
1059 | if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, | ||
1060 | ndev_pnetid) && | ||
1061 | smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) | ||
1062 | return; /* pnetid could not be determined */ | ||
1063 | |||
1064 | mutex_lock(&smcd_dev_list.mutex); | ||
1065 | list_for_each_entry(ismdev, &smcd_dev_list.list, list) { | ||
1066 | if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) && | ||
1067 | !ismdev->going_away && | ||
1068 | (!ini->ism_peer_gid[0] || | ||
1069 | !smc_ism_cantalk(ini->ism_peer_gid[0], ini->vlan_id, | ||
1070 | ismdev))) { | ||
1071 | ini->ism_dev[0] = ismdev; | ||
1072 | break; | ||
1073 | } | ||
1074 | } | ||
1075 | mutex_unlock(&smcd_dev_list.mutex); | ||
1076 | } | ||
1077 | |||
1078 | /* PNET table analysis for a given sock: | ||
1079 | * determine ib_device and port belonging to used internal TCP socket | ||
1080 | * ethernet interface. | ||
1081 | */ | ||
1082 | void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini) | ||
1083 | { | ||
1084 | struct dst_entry *dst = sk_dst_get(sk); | ||
1085 | |||
1086 | ini->ib_dev = NULL; | ||
1087 | ini->ib_port = 0; | ||
1088 | if (!dst) | ||
1089 | goto out; | ||
1090 | if (!dst->dev) | ||
1091 | goto out_rel; | ||
1092 | |||
1093 | smc_pnet_find_roce_by_pnetid(dst->dev, ini); | ||
1094 | |||
1095 | out_rel: | ||
1096 | dst_release(dst); | ||
1097 | out: | ||
1098 | return; | ||
1099 | } | ||
1100 | |||
1101 | void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini) | ||
1102 | { | ||
1103 | struct dst_entry *dst = sk_dst_get(sk); | ||
1104 | |||
1105 | ini->ism_dev[0] = NULL; | ||
1106 | if (!dst) | ||
1107 | goto out; | ||
1108 | if (!dst->dev) | ||
1109 | goto out_rel; | ||
1110 | |||
1111 | smc_pnet_find_ism_by_pnetid(dst->dev, ini); | ||
1112 | |||
1113 | out_rel: | ||
1114 | dst_release(dst); | ||
1115 | out: | ||
1116 | return; | ||
1117 | } | ||
1118 | |||
1119 | /* Lookup and apply a pnet table entry to the given ib device. | ||
1120 | */ | ||
1121 | int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port) | ||
1122 | { | ||
1123 | char *ib_name = smcibdev->ibdev->name; | ||
1124 | struct smc_pnettable *pnettable; | ||
1125 | struct smc_pnetentry *tmp_pe; | ||
1126 | struct smc_net *sn; | ||
1127 | int rc = -ENOENT; | ||
1128 | |||
1129 | /* get pnettable for init namespace */ | ||
1130 | sn = net_generic(&init_net, smc_net_id); | ||
1131 | pnettable = &sn->pnettable; | ||
1132 | |||
1133 | mutex_lock(&pnettable->lock); | ||
1134 | list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { | ||
1135 | if (tmp_pe->type == SMC_PNET_IB && | ||
1136 | !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) && | ||
1137 | tmp_pe->ib_port == ib_port) { | ||
1138 | smc_pnet_apply_ib(smcibdev, ib_port, tmp_pe->pnet_name); | ||
1139 | rc = 0; | ||
1140 | break; | ||
1141 | } | ||
1142 | } | ||
1143 | mutex_unlock(&pnettable->lock); | ||
1144 | |||
1145 | return rc; | ||
1146 | } | ||
1147 | |||
1148 | /* Lookup and apply a pnet table entry to the given smcd device. | ||
1149 | */ | ||
1150 | int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev) | ||
1151 | { | ||
1152 | const char *ib_name = dev_name(&smcddev->dev); | ||
1153 | struct smc_pnettable *pnettable; | ||
1154 | struct smc_pnetentry *tmp_pe; | ||
1155 | struct smc_net *sn; | ||
1156 | int rc = -ENOENT; | ||
1157 | |||
1158 | /* get pnettable for init namespace */ | ||
1159 | sn = net_generic(&init_net, smc_net_id); | ||
1160 | pnettable = &sn->pnettable; | ||
1161 | |||
1162 | mutex_lock(&pnettable->lock); | ||
1163 | list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { | ||
1164 | if (tmp_pe->type == SMC_PNET_IB && | ||
1165 | !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) { | ||
1166 | smc_pnet_apply_smcd(smcddev, tmp_pe->pnet_name); | ||
1167 | rc = 0; | ||
1168 | break; | ||
1169 | } | ||
1170 | } | ||
1171 | mutex_unlock(&pnettable->lock); | ||
1172 | |||
1173 | return rc; | ||
1174 | } | ||
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h new file mode 100644 index 000000000..80a88eea4 --- /dev/null +++ b/net/smc/smc_pnet.h | |||
@@ -0,0 +1,70 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * PNET table queries | ||
6 | * | ||
7 | * Copyright IBM Corp. 2016 | ||
8 | * | ||
9 | * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com> | ||
10 | */ | ||
11 | |||
12 | #ifndef _SMC_PNET_H | ||
13 | #define _SMC_PNET_H | ||
14 | |||
15 | #include <net/smc.h> | ||
16 | |||
17 | #if IS_ENABLED(CONFIG_HAVE_PNETID) | ||
18 | #include <asm/pnet.h> | ||
19 | #endif | ||
20 | |||
21 | struct smc_ib_device; | ||
22 | struct smcd_dev; | ||
23 | struct smc_init_info; | ||
24 | struct smc_link_group; | ||
25 | |||
26 | /** | ||
27 | * struct smc_pnettable - SMC PNET table anchor | ||
28 | * @lock: Lock for list action | ||
29 | * @pnetlist: List of PNETIDs | ||
30 | */ | ||
31 | struct smc_pnettable { | ||
32 | struct mutex lock; | ||
33 | struct list_head pnetlist; | ||
34 | }; | ||
35 | |||
36 | struct smc_pnetids_ndev { /* list of pnetids for net devices in UP state*/ | ||
37 | struct list_head list; | ||
38 | rwlock_t lock; | ||
39 | }; | ||
40 | |||
41 | struct smc_pnetids_ndev_entry { | ||
42 | struct list_head list; | ||
43 | u8 pnetid[SMC_MAX_PNETID_LEN]; | ||
44 | refcount_t refcnt; | ||
45 | }; | ||
46 | |||
47 | static inline int smc_pnetid_by_dev_port(struct device *dev, | ||
48 | unsigned short port, u8 *pnetid) | ||
49 | { | ||
50 | #if IS_ENABLED(CONFIG_HAVE_PNETID) | ||
51 | return pnet_id_by_dev_port(dev, port, pnetid); | ||
52 | #else | ||
53 | return -ENOENT; | ||
54 | #endif | ||
55 | } | ||
56 | |||
57 | int smc_pnet_init(void) __init; | ||
58 | int smc_pnet_net_init(struct net *net); | ||
59 | void smc_pnet_exit(void); | ||
60 | void smc_pnet_net_exit(struct net *net); | ||
61 | void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini); | ||
62 | void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini); | ||
63 | int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port); | ||
64 | int smc_pnetid_by_table_smcd(struct smcd_dev *smcd); | ||
65 | void smc_pnet_find_alt_roce(struct smc_link_group *lgr, | ||
66 | struct smc_init_info *ini, | ||
67 | struct smc_ib_device *known_dev); | ||
68 | bool smc_pnet_is_ndev_pnetid(struct net *net, u8 *pnetid); | ||
69 | bool smc_pnet_is_pnetid_set(u8 *pnetid); | ||
70 | #endif | ||
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c new file mode 100644 index 000000000..7f7e983e4 --- /dev/null +++ b/net/smc/smc_rx.c | |||
@@ -0,0 +1,444 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * Manage RMBE | ||
6 | * copy new RMBE data into user space | ||
7 | * | ||
8 | * Copyright IBM Corp. 2016 | ||
9 | * | ||
10 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> | ||
11 | */ | ||
12 | |||
13 | #include <linux/net.h> | ||
14 | #include <linux/rcupdate.h> | ||
15 | #include <linux/sched/signal.h> | ||
16 | |||
17 | #include <net/sock.h> | ||
18 | |||
19 | #include "smc.h" | ||
20 | #include "smc_core.h" | ||
21 | #include "smc_cdc.h" | ||
22 | #include "smc_tx.h" /* smc_tx_consumer_update() */ | ||
23 | #include "smc_rx.h" | ||
24 | |||
25 | /* callback implementation to wakeup consumers blocked with smc_rx_wait(). | ||
26 | * indirectly called by smc_cdc_msg_recv_action(). | ||
27 | */ | ||
28 | static void smc_rx_wake_up(struct sock *sk) | ||
29 | { | ||
30 | struct socket_wq *wq; | ||
31 | |||
32 | /* derived from sock_def_readable() */ | ||
33 | /* called already in smc_listen_work() */ | ||
34 | rcu_read_lock(); | ||
35 | wq = rcu_dereference(sk->sk_wq); | ||
36 | if (skwq_has_sleeper(wq)) | ||
37 | wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI | | ||
38 | EPOLLRDNORM | EPOLLRDBAND); | ||
39 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); | ||
40 | if ((sk->sk_shutdown == SHUTDOWN_MASK) || | ||
41 | (sk->sk_state == SMC_CLOSED)) | ||
42 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); | ||
43 | rcu_read_unlock(); | ||
44 | } | ||
45 | |||
46 | /* Update consumer cursor | ||
47 | * @conn connection to update | ||
48 | * @cons consumer cursor | ||
49 | * @len number of Bytes consumed | ||
50 | * Returns: | ||
51 | * 1 if we should end our receive, 0 otherwise | ||
52 | */ | ||
53 | static int smc_rx_update_consumer(struct smc_sock *smc, | ||
54 | union smc_host_cursor cons, size_t len) | ||
55 | { | ||
56 | struct smc_connection *conn = &smc->conn; | ||
57 | struct sock *sk = &smc->sk; | ||
58 | bool force = false; | ||
59 | int diff, rc = 0; | ||
60 | |||
61 | smc_curs_add(conn->rmb_desc->len, &cons, len); | ||
62 | |||
63 | /* did we process urgent data? */ | ||
64 | if (conn->urg_state == SMC_URG_VALID || conn->urg_rx_skip_pend) { | ||
65 | diff = smc_curs_comp(conn->rmb_desc->len, &cons, | ||
66 | &conn->urg_curs); | ||
67 | if (sock_flag(sk, SOCK_URGINLINE)) { | ||
68 | if (diff == 0) { | ||
69 | force = true; | ||
70 | rc = 1; | ||
71 | conn->urg_state = SMC_URG_READ; | ||
72 | } | ||
73 | } else { | ||
74 | if (diff == 1) { | ||
75 | /* skip urgent byte */ | ||
76 | force = true; | ||
77 | smc_curs_add(conn->rmb_desc->len, &cons, 1); | ||
78 | conn->urg_rx_skip_pend = false; | ||
79 | } else if (diff < -1) | ||
80 | /* we read past urgent byte */ | ||
81 | conn->urg_state = SMC_URG_READ; | ||
82 | } | ||
83 | } | ||
84 | |||
85 | smc_curs_copy(&conn->local_tx_ctrl.cons, &cons, conn); | ||
86 | |||
87 | /* send consumer cursor update if required */ | ||
88 | /* similar to advertising new TCP rcv_wnd if required */ | ||
89 | smc_tx_consumer_update(conn, force); | ||
90 | |||
91 | return rc; | ||
92 | } | ||
93 | |||
94 | static void smc_rx_update_cons(struct smc_sock *smc, size_t len) | ||
95 | { | ||
96 | struct smc_connection *conn = &smc->conn; | ||
97 | union smc_host_cursor cons; | ||
98 | |||
99 | smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); | ||
100 | smc_rx_update_consumer(smc, cons, len); | ||
101 | } | ||
102 | |||
103 | struct smc_spd_priv { | ||
104 | struct smc_sock *smc; | ||
105 | size_t len; | ||
106 | }; | ||
107 | |||
108 | static void smc_rx_pipe_buf_release(struct pipe_inode_info *pipe, | ||
109 | struct pipe_buffer *buf) | ||
110 | { | ||
111 | struct smc_spd_priv *priv = (struct smc_spd_priv *)buf->private; | ||
112 | struct smc_sock *smc = priv->smc; | ||
113 | struct smc_connection *conn; | ||
114 | struct sock *sk = &smc->sk; | ||
115 | |||
116 | if (sk->sk_state == SMC_CLOSED || | ||
117 | sk->sk_state == SMC_PEERFINCLOSEWAIT || | ||
118 | sk->sk_state == SMC_APPFINCLOSEWAIT) | ||
119 | goto out; | ||
120 | conn = &smc->conn; | ||
121 | lock_sock(sk); | ||
122 | smc_rx_update_cons(smc, priv->len); | ||
123 | release_sock(sk); | ||
124 | if (atomic_sub_and_test(priv->len, &conn->splice_pending)) | ||
125 | smc_rx_wake_up(sk); | ||
126 | out: | ||
127 | kfree(priv); | ||
128 | put_page(buf->page); | ||
129 | sock_put(sk); | ||
130 | } | ||
131 | |||
132 | static const struct pipe_buf_operations smc_pipe_ops = { | ||
133 | .release = smc_rx_pipe_buf_release, | ||
134 | .get = generic_pipe_buf_get | ||
135 | }; | ||
136 | |||
137 | static void smc_rx_spd_release(struct splice_pipe_desc *spd, | ||
138 | unsigned int i) | ||
139 | { | ||
140 | put_page(spd->pages[i]); | ||
141 | } | ||
142 | |||
143 | static int smc_rx_splice(struct pipe_inode_info *pipe, char *src, size_t len, | ||
144 | struct smc_sock *smc) | ||
145 | { | ||
146 | struct splice_pipe_desc spd; | ||
147 | struct partial_page partial; | ||
148 | struct smc_spd_priv *priv; | ||
149 | int bytes; | ||
150 | |||
151 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
152 | if (!priv) | ||
153 | return -ENOMEM; | ||
154 | priv->len = len; | ||
155 | priv->smc = smc; | ||
156 | partial.offset = src - (char *)smc->conn.rmb_desc->cpu_addr; | ||
157 | partial.len = len; | ||
158 | partial.private = (unsigned long)priv; | ||
159 | |||
160 | spd.nr_pages_max = 1; | ||
161 | spd.nr_pages = 1; | ||
162 | spd.pages = &smc->conn.rmb_desc->pages; | ||
163 | spd.partial = &partial; | ||
164 | spd.ops = &smc_pipe_ops; | ||
165 | spd.spd_release = smc_rx_spd_release; | ||
166 | |||
167 | bytes = splice_to_pipe(pipe, &spd); | ||
168 | if (bytes > 0) { | ||
169 | sock_hold(&smc->sk); | ||
170 | get_page(smc->conn.rmb_desc->pages); | ||
171 | atomic_add(bytes, &smc->conn.splice_pending); | ||
172 | } | ||
173 | |||
174 | return bytes; | ||
175 | } | ||
176 | |||
177 | static int smc_rx_data_available_and_no_splice_pend(struct smc_connection *conn) | ||
178 | { | ||
179 | return atomic_read(&conn->bytes_to_rcv) && | ||
180 | !atomic_read(&conn->splice_pending); | ||
181 | } | ||
182 | |||
183 | /* blocks rcvbuf consumer until >=len bytes available or timeout or interrupted | ||
184 | * @smc smc socket | ||
185 | * @timeo pointer to max seconds to wait, pointer to value 0 for no timeout | ||
186 | * @fcrit add'l criterion to evaluate as function pointer | ||
187 | * Returns: | ||
188 | * 1 if at least 1 byte available in rcvbuf or if socket error/shutdown. | ||
189 | * 0 otherwise (nothing in rcvbuf nor timeout, e.g. interrupted). | ||
190 | */ | ||
191 | int smc_rx_wait(struct smc_sock *smc, long *timeo, | ||
192 | int (*fcrit)(struct smc_connection *conn)) | ||
193 | { | ||
194 | DEFINE_WAIT_FUNC(wait, woken_wake_function); | ||
195 | struct smc_connection *conn = &smc->conn; | ||
196 | struct smc_cdc_conn_state_flags *cflags = | ||
197 | &conn->local_tx_ctrl.conn_state_flags; | ||
198 | struct sock *sk = &smc->sk; | ||
199 | int rc; | ||
200 | |||
201 | if (fcrit(conn)) | ||
202 | return 1; | ||
203 | sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); | ||
204 | add_wait_queue(sk_sleep(sk), &wait); | ||
205 | rc = sk_wait_event(sk, timeo, | ||
206 | sk->sk_err || | ||
207 | cflags->peer_conn_abort || | ||
208 | sk->sk_shutdown & RCV_SHUTDOWN || | ||
209 | conn->killed || | ||
210 | fcrit(conn), | ||
211 | &wait); | ||
212 | remove_wait_queue(sk_sleep(sk), &wait); | ||
213 | sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); | ||
214 | return rc; | ||
215 | } | ||
216 | |||
217 | static int smc_rx_recv_urg(struct smc_sock *smc, struct msghdr *msg, int len, | ||
218 | int flags) | ||
219 | { | ||
220 | struct smc_connection *conn = &smc->conn; | ||
221 | union smc_host_cursor cons; | ||
222 | struct sock *sk = &smc->sk; | ||
223 | int rc = 0; | ||
224 | |||
225 | if (sock_flag(sk, SOCK_URGINLINE) || | ||
226 | !(conn->urg_state == SMC_URG_VALID) || | ||
227 | conn->urg_state == SMC_URG_READ) | ||
228 | return -EINVAL; | ||
229 | |||
230 | if (conn->urg_state == SMC_URG_VALID) { | ||
231 | if (!(flags & MSG_PEEK)) | ||
232 | smc->conn.urg_state = SMC_URG_READ; | ||
233 | msg->msg_flags |= MSG_OOB; | ||
234 | if (len > 0) { | ||
235 | if (!(flags & MSG_TRUNC)) | ||
236 | rc = memcpy_to_msg(msg, &conn->urg_rx_byte, 1); | ||
237 | len = 1; | ||
238 | smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); | ||
239 | if (smc_curs_diff(conn->rmb_desc->len, &cons, | ||
240 | &conn->urg_curs) > 1) | ||
241 | conn->urg_rx_skip_pend = true; | ||
242 | /* Urgent Byte was already accounted for, but trigger | ||
243 | * skipping the urgent byte in non-inline case | ||
244 | */ | ||
245 | if (!(flags & MSG_PEEK)) | ||
246 | smc_rx_update_consumer(smc, cons, 0); | ||
247 | } else { | ||
248 | msg->msg_flags |= MSG_TRUNC; | ||
249 | } | ||
250 | |||
251 | return rc ? -EFAULT : len; | ||
252 | } | ||
253 | |||
254 | if (sk->sk_state == SMC_CLOSED || sk->sk_shutdown & RCV_SHUTDOWN) | ||
255 | return 0; | ||
256 | |||
257 | return -EAGAIN; | ||
258 | } | ||
259 | |||
260 | static bool smc_rx_recvmsg_data_available(struct smc_sock *smc) | ||
261 | { | ||
262 | struct smc_connection *conn = &smc->conn; | ||
263 | |||
264 | if (smc_rx_data_available(conn)) | ||
265 | return true; | ||
266 | else if (conn->urg_state == SMC_URG_VALID) | ||
267 | /* we received a single urgent Byte - skip */ | ||
268 | smc_rx_update_cons(smc, 0); | ||
269 | return false; | ||
270 | } | ||
271 | |||
272 | /* smc_rx_recvmsg - receive data from RMBE | ||
273 | * @msg: copy data to receive buffer | ||
274 | * @pipe: copy data to pipe if set - indicates splice() call | ||
275 | * | ||
276 | * rcvbuf consumer: main API called by socket layer. | ||
277 | * Called under sk lock. | ||
278 | */ | ||
279 | int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, | ||
280 | struct pipe_inode_info *pipe, size_t len, int flags) | ||
281 | { | ||
282 | size_t copylen, read_done = 0, read_remaining = len; | ||
283 | size_t chunk_len, chunk_off, chunk_len_sum; | ||
284 | struct smc_connection *conn = &smc->conn; | ||
285 | int (*func)(struct smc_connection *conn); | ||
286 | union smc_host_cursor cons; | ||
287 | int readable, chunk; | ||
288 | char *rcvbuf_base; | ||
289 | struct sock *sk; | ||
290 | int splbytes; | ||
291 | long timeo; | ||
292 | int target; /* Read at least these many bytes */ | ||
293 | int rc; | ||
294 | |||
295 | if (unlikely(flags & MSG_ERRQUEUE)) | ||
296 | return -EINVAL; /* future work for sk.sk_family == AF_SMC */ | ||
297 | |||
298 | sk = &smc->sk; | ||
299 | if (sk->sk_state == SMC_LISTEN) | ||
300 | return -ENOTCONN; | ||
301 | if (flags & MSG_OOB) | ||
302 | return smc_rx_recv_urg(smc, msg, len, flags); | ||
303 | timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); | ||
304 | target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); | ||
305 | |||
306 | /* we currently use 1 RMBE per RMB, so RMBE == RMB base addr */ | ||
307 | rcvbuf_base = conn->rx_off + conn->rmb_desc->cpu_addr; | ||
308 | |||
309 | do { /* while (read_remaining) */ | ||
310 | if (read_done >= target || (pipe && read_done)) | ||
311 | break; | ||
312 | |||
313 | if (conn->killed) | ||
314 | break; | ||
315 | |||
316 | if (smc_rx_recvmsg_data_available(smc)) | ||
317 | goto copy; | ||
318 | |||
319 | if (sk->sk_shutdown & RCV_SHUTDOWN) { | ||
320 | /* smc_cdc_msg_recv_action() could have run after | ||
321 | * above smc_rx_recvmsg_data_available() | ||
322 | */ | ||
323 | if (smc_rx_recvmsg_data_available(smc)) | ||
324 | goto copy; | ||
325 | break; | ||
326 | } | ||
327 | |||
328 | if (read_done) { | ||
329 | if (sk->sk_err || | ||
330 | sk->sk_state == SMC_CLOSED || | ||
331 | !timeo || | ||
332 | signal_pending(current)) | ||
333 | break; | ||
334 | } else { | ||
335 | if (sk->sk_err) { | ||
336 | read_done = sock_error(sk); | ||
337 | break; | ||
338 | } | ||
339 | if (sk->sk_state == SMC_CLOSED) { | ||
340 | if (!sock_flag(sk, SOCK_DONE)) { | ||
341 | /* This occurs when user tries to read | ||
342 | * from never connected socket. | ||
343 | */ | ||
344 | read_done = -ENOTCONN; | ||
345 | break; | ||
346 | } | ||
347 | break; | ||
348 | } | ||
349 | if (!timeo) | ||
350 | return -EAGAIN; | ||
351 | if (signal_pending(current)) { | ||
352 | read_done = sock_intr_errno(timeo); | ||
353 | break; | ||
354 | } | ||
355 | } | ||
356 | |||
357 | if (!smc_rx_data_available(conn)) { | ||
358 | smc_rx_wait(smc, &timeo, smc_rx_data_available); | ||
359 | continue; | ||
360 | } | ||
361 | |||
362 | copy: | ||
363 | /* initialize variables for 1st iteration of subsequent loop */ | ||
364 | /* could be just 1 byte, even after waiting on data above */ | ||
365 | readable = atomic_read(&conn->bytes_to_rcv); | ||
366 | splbytes = atomic_read(&conn->splice_pending); | ||
367 | if (!readable || (msg && splbytes)) { | ||
368 | if (splbytes) | ||
369 | func = smc_rx_data_available_and_no_splice_pend; | ||
370 | else | ||
371 | func = smc_rx_data_available; | ||
372 | smc_rx_wait(smc, &timeo, func); | ||
373 | continue; | ||
374 | } | ||
375 | |||
376 | smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); | ||
377 | /* subsequent splice() calls pick up where previous left */ | ||
378 | if (splbytes) | ||
379 | smc_curs_add(conn->rmb_desc->len, &cons, splbytes); | ||
380 | if (conn->urg_state == SMC_URG_VALID && | ||
381 | sock_flag(&smc->sk, SOCK_URGINLINE) && | ||
382 | readable > 1) | ||
383 | readable--; /* always stop at urgent Byte */ | ||
384 | /* not more than what user space asked for */ | ||
385 | copylen = min_t(size_t, read_remaining, readable); | ||
386 | /* determine chunks where to read from rcvbuf */ | ||
387 | /* either unwrapped case, or 1st chunk of wrapped case */ | ||
388 | chunk_len = min_t(size_t, copylen, conn->rmb_desc->len - | ||
389 | cons.count); | ||
390 | chunk_len_sum = chunk_len; | ||
391 | chunk_off = cons.count; | ||
392 | smc_rmb_sync_sg_for_cpu(conn); | ||
393 | for (chunk = 0; chunk < 2; chunk++) { | ||
394 | if (!(flags & MSG_TRUNC)) { | ||
395 | if (msg) { | ||
396 | rc = memcpy_to_msg(msg, rcvbuf_base + | ||
397 | chunk_off, | ||
398 | chunk_len); | ||
399 | } else { | ||
400 | rc = smc_rx_splice(pipe, rcvbuf_base + | ||
401 | chunk_off, chunk_len, | ||
402 | smc); | ||
403 | } | ||
404 | if (rc < 0) { | ||
405 | if (!read_done) | ||
406 | read_done = -EFAULT; | ||
407 | smc_rmb_sync_sg_for_device(conn); | ||
408 | goto out; | ||
409 | } | ||
410 | } | ||
411 | read_remaining -= chunk_len; | ||
412 | read_done += chunk_len; | ||
413 | |||
414 | if (chunk_len_sum == copylen) | ||
415 | break; /* either on 1st or 2nd iteration */ | ||
416 | /* prepare next (== 2nd) iteration */ | ||
417 | chunk_len = copylen - chunk_len; /* remainder */ | ||
418 | chunk_len_sum += chunk_len; | ||
419 | chunk_off = 0; /* modulo offset in recv ring buffer */ | ||
420 | } | ||
421 | smc_rmb_sync_sg_for_device(conn); | ||
422 | |||
423 | /* update cursors */ | ||
424 | if (!(flags & MSG_PEEK)) { | ||
425 | /* increased in recv tasklet smc_cdc_msg_rcv() */ | ||
426 | smp_mb__before_atomic(); | ||
427 | atomic_sub(copylen, &conn->bytes_to_rcv); | ||
428 | /* guarantee 0 <= bytes_to_rcv <= rmb_desc->len */ | ||
429 | smp_mb__after_atomic(); | ||
430 | if (msg && smc_rx_update_consumer(smc, cons, copylen)) | ||
431 | goto out; | ||
432 | } | ||
433 | } while (read_remaining); | ||
434 | out: | ||
435 | return read_done; | ||
436 | } | ||
437 | |||
438 | /* Initialize receive properties on connection establishment. NB: not __init! */ | ||
439 | void smc_rx_init(struct smc_sock *smc) | ||
440 | { | ||
441 | smc->sk.sk_data_ready = smc_rx_wake_up; | ||
442 | atomic_set(&smc->conn.splice_pending, 0); | ||
443 | smc->conn.urg_state = SMC_URG_READ; | ||
444 | } | ||
diff --git a/net/smc/smc_rx.h b/net/smc/smc_rx.h new file mode 100644 index 000000000..db823c97d --- /dev/null +++ b/net/smc/smc_rx.h | |||
@@ -0,0 +1,31 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * Manage RMBE | ||
6 | * | ||
7 | * Copyright IBM Corp. 2016 | ||
8 | * | ||
9 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> | ||
10 | */ | ||
11 | |||
12 | #ifndef SMC_RX_H | ||
13 | #define SMC_RX_H | ||
14 | |||
15 | #include <linux/socket.h> | ||
16 | #include <linux/types.h> | ||
17 | |||
18 | #include "smc.h" | ||
19 | |||
20 | void smc_rx_init(struct smc_sock *smc); | ||
21 | |||
22 | int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, | ||
23 | struct pipe_inode_info *pipe, size_t len, int flags); | ||
24 | int smc_rx_wait(struct smc_sock *smc, long *timeo, | ||
25 | int (*fcrit)(struct smc_connection *conn)); | ||
26 | static inline int smc_rx_data_available(struct smc_connection *conn) | ||
27 | { | ||
28 | return atomic_read(&conn->bytes_to_rcv); | ||
29 | } | ||
30 | |||
31 | #endif /* SMC_RX_H */ | ||
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c new file mode 100644 index 000000000..52ef1fca0 --- /dev/null +++ b/net/smc/smc_tx.c | |||
@@ -0,0 +1,646 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * Manage send buffer. | ||
6 | * Producer: | ||
7 | * Copy user space data into send buffer, if send buffer space available. | ||
8 | * Consumer: | ||
9 | * Trigger RDMA write into RMBE of peer and send CDC, if RMBE space available. | ||
10 | * | ||
11 | * Copyright IBM Corp. 2016 | ||
12 | * | ||
13 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> | ||
14 | */ | ||
15 | |||
16 | #include <linux/net.h> | ||
17 | #include <linux/rcupdate.h> | ||
18 | #include <linux/workqueue.h> | ||
19 | #include <linux/sched/signal.h> | ||
20 | |||
21 | #include <net/sock.h> | ||
22 | #include <net/tcp.h> | ||
23 | |||
24 | #include "smc.h" | ||
25 | #include "smc_wr.h" | ||
26 | #include "smc_cdc.h" | ||
27 | #include "smc_close.h" | ||
28 | #include "smc_ism.h" | ||
29 | #include "smc_tx.h" | ||
30 | |||
31 | #define SMC_TX_WORK_DELAY 0 | ||
32 | #define SMC_TX_CORK_DELAY (HZ >> 2) /* 250 ms */ | ||
33 | |||
34 | /***************************** sndbuf producer *******************************/ | ||
35 | |||
36 | /* callback implementation for sk.sk_write_space() | ||
37 | * to wakeup sndbuf producers that blocked with smc_tx_wait(). | ||
38 | * called under sk_socket lock. | ||
39 | */ | ||
40 | static void smc_tx_write_space(struct sock *sk) | ||
41 | { | ||
42 | struct socket *sock = sk->sk_socket; | ||
43 | struct smc_sock *smc = smc_sk(sk); | ||
44 | struct socket_wq *wq; | ||
45 | |||
46 | /* similar to sk_stream_write_space */ | ||
47 | if (atomic_read(&smc->conn.sndbuf_space) && sock) { | ||
48 | clear_bit(SOCK_NOSPACE, &sock->flags); | ||
49 | rcu_read_lock(); | ||
50 | wq = rcu_dereference(sk->sk_wq); | ||
51 | if (skwq_has_sleeper(wq)) | ||
52 | wake_up_interruptible_poll(&wq->wait, | ||
53 | EPOLLOUT | EPOLLWRNORM | | ||
54 | EPOLLWRBAND); | ||
55 | if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) | ||
56 | sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT); | ||
57 | rcu_read_unlock(); | ||
58 | } | ||
59 | } | ||
60 | |||
61 | /* Wakeup sndbuf producers that blocked with smc_tx_wait(). | ||
62 | * Cf. tcp_data_snd_check()=>tcp_check_space()=>tcp_new_space(). | ||
63 | */ | ||
64 | void smc_tx_sndbuf_nonfull(struct smc_sock *smc) | ||
65 | { | ||
66 | if (smc->sk.sk_socket && | ||
67 | test_bit(SOCK_NOSPACE, &smc->sk.sk_socket->flags)) | ||
68 | smc->sk.sk_write_space(&smc->sk); | ||
69 | } | ||
70 | |||
71 | /* blocks sndbuf producer until at least one byte of free space available | ||
72 | * or urgent Byte was consumed | ||
73 | */ | ||
74 | static int smc_tx_wait(struct smc_sock *smc, int flags) | ||
75 | { | ||
76 | DEFINE_WAIT_FUNC(wait, woken_wake_function); | ||
77 | struct smc_connection *conn = &smc->conn; | ||
78 | struct sock *sk = &smc->sk; | ||
79 | long timeo; | ||
80 | int rc = 0; | ||
81 | |||
82 | /* similar to sk_stream_wait_memory */ | ||
83 | timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); | ||
84 | add_wait_queue(sk_sleep(sk), &wait); | ||
85 | while (1) { | ||
86 | sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); | ||
87 | if (sk->sk_err || | ||
88 | (sk->sk_shutdown & SEND_SHUTDOWN) || | ||
89 | conn->killed || | ||
90 | conn->local_tx_ctrl.conn_state_flags.peer_done_writing) { | ||
91 | rc = -EPIPE; | ||
92 | break; | ||
93 | } | ||
94 | if (smc_cdc_rxed_any_close(conn)) { | ||
95 | rc = -ECONNRESET; | ||
96 | break; | ||
97 | } | ||
98 | if (!timeo) { | ||
99 | /* ensure EPOLLOUT is subsequently generated */ | ||
100 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | ||
101 | rc = -EAGAIN; | ||
102 | break; | ||
103 | } | ||
104 | if (signal_pending(current)) { | ||
105 | rc = sock_intr_errno(timeo); | ||
106 | break; | ||
107 | } | ||
108 | sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); | ||
109 | if (atomic_read(&conn->sndbuf_space) && !conn->urg_tx_pend) | ||
110 | break; /* at least 1 byte of free & no urgent data */ | ||
111 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | ||
112 | sk_wait_event(sk, &timeo, | ||
113 | sk->sk_err || | ||
114 | (sk->sk_shutdown & SEND_SHUTDOWN) || | ||
115 | smc_cdc_rxed_any_close(conn) || | ||
116 | (atomic_read(&conn->sndbuf_space) && | ||
117 | !conn->urg_tx_pend), | ||
118 | &wait); | ||
119 | } | ||
120 | remove_wait_queue(sk_sleep(sk), &wait); | ||
121 | return rc; | ||
122 | } | ||
123 | |||
124 | static bool smc_tx_is_corked(struct smc_sock *smc) | ||
125 | { | ||
126 | struct tcp_sock *tp = tcp_sk(smc->clcsock->sk); | ||
127 | |||
128 | return (tp->nonagle & TCP_NAGLE_CORK) ? true : false; | ||
129 | } | ||
130 | |||
131 | /* sndbuf producer: main API called by socket layer. | ||
132 | * called under sock lock. | ||
133 | */ | ||
134 | int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) | ||
135 | { | ||
136 | size_t copylen, send_done = 0, send_remaining = len; | ||
137 | size_t chunk_len, chunk_off, chunk_len_sum; | ||
138 | struct smc_connection *conn = &smc->conn; | ||
139 | union smc_host_cursor prep; | ||
140 | struct sock *sk = &smc->sk; | ||
141 | char *sndbuf_base; | ||
142 | int tx_cnt_prep; | ||
143 | int writespace; | ||
144 | int rc, chunk; | ||
145 | |||
146 | /* This should be in poll */ | ||
147 | sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); | ||
148 | |||
149 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) { | ||
150 | rc = -EPIPE; | ||
151 | goto out_err; | ||
152 | } | ||
153 | |||
154 | while (msg_data_left(msg)) { | ||
155 | if (sk->sk_state == SMC_INIT) | ||
156 | return -ENOTCONN; | ||
157 | if (smc->sk.sk_shutdown & SEND_SHUTDOWN || | ||
158 | (smc->sk.sk_err == ECONNABORTED) || | ||
159 | conn->killed) | ||
160 | return -EPIPE; | ||
161 | if (smc_cdc_rxed_any_close(conn)) | ||
162 | return send_done ?: -ECONNRESET; | ||
163 | |||
164 | if (msg->msg_flags & MSG_OOB) | ||
165 | conn->local_tx_ctrl.prod_flags.urg_data_pending = 1; | ||
166 | |||
167 | if (!atomic_read(&conn->sndbuf_space) || conn->urg_tx_pend) { | ||
168 | if (send_done) | ||
169 | return send_done; | ||
170 | rc = smc_tx_wait(smc, msg->msg_flags); | ||
171 | if (rc) | ||
172 | goto out_err; | ||
173 | continue; | ||
174 | } | ||
175 | |||
176 | /* initialize variables for 1st iteration of subsequent loop */ | ||
177 | /* could be just 1 byte, even after smc_tx_wait above */ | ||
178 | writespace = atomic_read(&conn->sndbuf_space); | ||
179 | /* not more than what user space asked for */ | ||
180 | copylen = min_t(size_t, send_remaining, writespace); | ||
181 | /* determine start of sndbuf */ | ||
182 | sndbuf_base = conn->sndbuf_desc->cpu_addr; | ||
183 | smc_curs_copy(&prep, &conn->tx_curs_prep, conn); | ||
184 | tx_cnt_prep = prep.count; | ||
185 | /* determine chunks where to write into sndbuf */ | ||
186 | /* either unwrapped case, or 1st chunk of wrapped case */ | ||
187 | chunk_len = min_t(size_t, copylen, conn->sndbuf_desc->len - | ||
188 | tx_cnt_prep); | ||
189 | chunk_len_sum = chunk_len; | ||
190 | chunk_off = tx_cnt_prep; | ||
191 | smc_sndbuf_sync_sg_for_cpu(conn); | ||
192 | for (chunk = 0; chunk < 2; chunk++) { | ||
193 | rc = memcpy_from_msg(sndbuf_base + chunk_off, | ||
194 | msg, chunk_len); | ||
195 | if (rc) { | ||
196 | smc_sndbuf_sync_sg_for_device(conn); | ||
197 | if (send_done) | ||
198 | return send_done; | ||
199 | goto out_err; | ||
200 | } | ||
201 | send_done += chunk_len; | ||
202 | send_remaining -= chunk_len; | ||
203 | |||
204 | if (chunk_len_sum == copylen) | ||
205 | break; /* either on 1st or 2nd iteration */ | ||
206 | /* prepare next (== 2nd) iteration */ | ||
207 | chunk_len = copylen - chunk_len; /* remainder */ | ||
208 | chunk_len_sum += chunk_len; | ||
209 | chunk_off = 0; /* modulo offset in send ring buffer */ | ||
210 | } | ||
211 | smc_sndbuf_sync_sg_for_device(conn); | ||
212 | /* update cursors */ | ||
213 | smc_curs_add(conn->sndbuf_desc->len, &prep, copylen); | ||
214 | smc_curs_copy(&conn->tx_curs_prep, &prep, conn); | ||
215 | /* increased in send tasklet smc_cdc_tx_handler() */ | ||
216 | smp_mb__before_atomic(); | ||
217 | atomic_sub(copylen, &conn->sndbuf_space); | ||
218 | /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */ | ||
219 | smp_mb__after_atomic(); | ||
220 | /* since we just produced more new data into sndbuf, | ||
221 | * trigger sndbuf consumer: RDMA write into peer RMBE and CDC | ||
222 | */ | ||
223 | if ((msg->msg_flags & MSG_OOB) && !send_remaining) | ||
224 | conn->urg_tx_pend = true; | ||
225 | if ((msg->msg_flags & MSG_MORE || smc_tx_is_corked(smc)) && | ||
226 | (atomic_read(&conn->sndbuf_space) > | ||
227 | (conn->sndbuf_desc->len >> 1))) | ||
228 | /* for a corked socket defer the RDMA writes if there | ||
229 | * is still sufficient sndbuf_space available | ||
230 | */ | ||
231 | queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, | ||
232 | SMC_TX_CORK_DELAY); | ||
233 | else | ||
234 | smc_tx_sndbuf_nonempty(conn); | ||
235 | } /* while (msg_data_left(msg)) */ | ||
236 | |||
237 | return send_done; | ||
238 | |||
239 | out_err: | ||
240 | rc = sk_stream_error(sk, msg->msg_flags, rc); | ||
241 | /* make sure we wake any epoll edge trigger waiter */ | ||
242 | if (unlikely(rc == -EAGAIN)) | ||
243 | sk->sk_write_space(sk); | ||
244 | return rc; | ||
245 | } | ||
246 | |||
247 | /***************************** sndbuf consumer *******************************/ | ||
248 | |||
249 | /* sndbuf consumer: actual data transfer of one target chunk with ISM write */ | ||
250 | int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len, | ||
251 | u32 offset, int signal) | ||
252 | { | ||
253 | struct smc_ism_position pos; | ||
254 | int rc; | ||
255 | |||
256 | memset(&pos, 0, sizeof(pos)); | ||
257 | pos.token = conn->peer_token; | ||
258 | pos.index = conn->peer_rmbe_idx; | ||
259 | pos.offset = conn->tx_off + offset; | ||
260 | pos.signal = signal; | ||
261 | rc = smc_ism_write(conn->lgr->smcd, &pos, data, len); | ||
262 | if (rc) | ||
263 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; | ||
264 | return rc; | ||
265 | } | ||
266 | |||
267 | /* sndbuf consumer: actual data transfer of one target chunk with RDMA write */ | ||
268 | static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, | ||
269 | int num_sges, struct ib_rdma_wr *rdma_wr) | ||
270 | { | ||
271 | struct smc_link_group *lgr = conn->lgr; | ||
272 | struct smc_link *link = conn->lnk; | ||
273 | int rc; | ||
274 | |||
275 | rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link); | ||
276 | rdma_wr->wr.num_sge = num_sges; | ||
277 | rdma_wr->remote_addr = | ||
278 | lgr->rtokens[conn->rtoken_idx][link->link_idx].dma_addr + | ||
279 | /* RMBE within RMB */ | ||
280 | conn->tx_off + | ||
281 | /* offset within RMBE */ | ||
282 | peer_rmbe_offset; | ||
283 | rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][link->link_idx].rkey; | ||
284 | rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL); | ||
285 | if (rc) | ||
286 | smcr_link_down_cond_sched(link); | ||
287 | return rc; | ||
288 | } | ||
289 | |||
290 | /* sndbuf consumer */ | ||
291 | static inline void smc_tx_advance_cursors(struct smc_connection *conn, | ||
292 | union smc_host_cursor *prod, | ||
293 | union smc_host_cursor *sent, | ||
294 | size_t len) | ||
295 | { | ||
296 | smc_curs_add(conn->peer_rmbe_size, prod, len); | ||
297 | /* increased in recv tasklet smc_cdc_msg_rcv() */ | ||
298 | smp_mb__before_atomic(); | ||
299 | /* data in flight reduces usable snd_wnd */ | ||
300 | atomic_sub(len, &conn->peer_rmbe_space); | ||
301 | /* guarantee 0 <= peer_rmbe_space <= peer_rmbe_size */ | ||
302 | smp_mb__after_atomic(); | ||
303 | smc_curs_add(conn->sndbuf_desc->len, sent, len); | ||
304 | } | ||
305 | |||
306 | /* SMC-R helper for smc_tx_rdma_writes() */ | ||
307 | static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, | ||
308 | size_t src_off, size_t src_len, | ||
309 | size_t dst_off, size_t dst_len, | ||
310 | struct smc_rdma_wr *wr_rdma_buf) | ||
311 | { | ||
312 | struct smc_link *link = conn->lnk; | ||
313 | |||
314 | dma_addr_t dma_addr = | ||
315 | sg_dma_address(conn->sndbuf_desc->sgt[link->link_idx].sgl); | ||
316 | int src_len_sum = src_len, dst_len_sum = dst_len; | ||
317 | int sent_count = src_off; | ||
318 | int srcchunk, dstchunk; | ||
319 | int num_sges; | ||
320 | int rc; | ||
321 | |||
322 | for (dstchunk = 0; dstchunk < 2; dstchunk++) { | ||
323 | struct ib_sge *sge = | ||
324 | wr_rdma_buf->wr_tx_rdma[dstchunk].wr.sg_list; | ||
325 | |||
326 | num_sges = 0; | ||
327 | for (srcchunk = 0; srcchunk < 2; srcchunk++) { | ||
328 | sge[srcchunk].addr = dma_addr + src_off; | ||
329 | sge[srcchunk].length = src_len; | ||
330 | num_sges++; | ||
331 | |||
332 | src_off += src_len; | ||
333 | if (src_off >= conn->sndbuf_desc->len) | ||
334 | src_off -= conn->sndbuf_desc->len; | ||
335 | /* modulo in send ring */ | ||
336 | if (src_len_sum == dst_len) | ||
337 | break; /* either on 1st or 2nd iteration */ | ||
338 | /* prepare next (== 2nd) iteration */ | ||
339 | src_len = dst_len - src_len; /* remainder */ | ||
340 | src_len_sum += src_len; | ||
341 | } | ||
342 | rc = smc_tx_rdma_write(conn, dst_off, num_sges, | ||
343 | &wr_rdma_buf->wr_tx_rdma[dstchunk]); | ||
344 | if (rc) | ||
345 | return rc; | ||
346 | if (dst_len_sum == len) | ||
347 | break; /* either on 1st or 2nd iteration */ | ||
348 | /* prepare next (== 2nd) iteration */ | ||
349 | dst_off = 0; /* modulo offset in RMBE ring buffer */ | ||
350 | dst_len = len - dst_len; /* remainder */ | ||
351 | dst_len_sum += dst_len; | ||
352 | src_len = min_t(int, dst_len, conn->sndbuf_desc->len - | ||
353 | sent_count); | ||
354 | src_len_sum = src_len; | ||
355 | } | ||
356 | return 0; | ||
357 | } | ||
358 | |||
359 | /* SMC-D helper for smc_tx_rdma_writes() */ | ||
360 | static int smcd_tx_rdma_writes(struct smc_connection *conn, size_t len, | ||
361 | size_t src_off, size_t src_len, | ||
362 | size_t dst_off, size_t dst_len) | ||
363 | { | ||
364 | int src_len_sum = src_len, dst_len_sum = dst_len; | ||
365 | int srcchunk, dstchunk; | ||
366 | int rc; | ||
367 | |||
368 | for (dstchunk = 0; dstchunk < 2; dstchunk++) { | ||
369 | for (srcchunk = 0; srcchunk < 2; srcchunk++) { | ||
370 | void *data = conn->sndbuf_desc->cpu_addr + src_off; | ||
371 | |||
372 | rc = smcd_tx_ism_write(conn, data, src_len, dst_off + | ||
373 | sizeof(struct smcd_cdc_msg), 0); | ||
374 | if (rc) | ||
375 | return rc; | ||
376 | dst_off += src_len; | ||
377 | src_off += src_len; | ||
378 | if (src_off >= conn->sndbuf_desc->len) | ||
379 | src_off -= conn->sndbuf_desc->len; | ||
380 | /* modulo in send ring */ | ||
381 | if (src_len_sum == dst_len) | ||
382 | break; /* either on 1st or 2nd iteration */ | ||
383 | /* prepare next (== 2nd) iteration */ | ||
384 | src_len = dst_len - src_len; /* remainder */ | ||
385 | src_len_sum += src_len; | ||
386 | } | ||
387 | if (dst_len_sum == len) | ||
388 | break; /* either on 1st or 2nd iteration */ | ||
389 | /* prepare next (== 2nd) iteration */ | ||
390 | dst_off = 0; /* modulo offset in RMBE ring buffer */ | ||
391 | dst_len = len - dst_len; /* remainder */ | ||
392 | dst_len_sum += dst_len; | ||
393 | src_len = min_t(int, dst_len, conn->sndbuf_desc->len - src_off); | ||
394 | src_len_sum = src_len; | ||
395 | } | ||
396 | return 0; | ||
397 | } | ||
398 | |||
399 | /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit; | ||
400 | * usable snd_wnd as max transmit | ||
401 | */ | ||
402 | static int smc_tx_rdma_writes(struct smc_connection *conn, | ||
403 | struct smc_rdma_wr *wr_rdma_buf) | ||
404 | { | ||
405 | size_t len, src_len, dst_off, dst_len; /* current chunk values */ | ||
406 | union smc_host_cursor sent, prep, prod, cons; | ||
407 | struct smc_cdc_producer_flags *pflags; | ||
408 | int to_send, rmbespace; | ||
409 | int rc; | ||
410 | |||
411 | /* source: sndbuf */ | ||
412 | smc_curs_copy(&sent, &conn->tx_curs_sent, conn); | ||
413 | smc_curs_copy(&prep, &conn->tx_curs_prep, conn); | ||
414 | /* cf. wmem_alloc - (snd_max - snd_una) */ | ||
415 | to_send = smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep); | ||
416 | if (to_send <= 0) | ||
417 | return 0; | ||
418 | |||
419 | /* destination: RMBE */ | ||
420 | /* cf. snd_wnd */ | ||
421 | rmbespace = atomic_read(&conn->peer_rmbe_space); | ||
422 | if (rmbespace <= 0) | ||
423 | return 0; | ||
424 | smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn); | ||
425 | smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn); | ||
426 | |||
427 | /* if usable snd_wnd closes ask peer to advertise once it opens again */ | ||
428 | pflags = &conn->local_tx_ctrl.prod_flags; | ||
429 | pflags->write_blocked = (to_send >= rmbespace); | ||
430 | /* cf. usable snd_wnd */ | ||
431 | len = min(to_send, rmbespace); | ||
432 | |||
433 | /* initialize variables for first iteration of subsequent nested loop */ | ||
434 | dst_off = prod.count; | ||
435 | if (prod.wrap == cons.wrap) { | ||
436 | /* the filled destination area is unwrapped, | ||
437 | * hence the available free destination space is wrapped | ||
438 | * and we need 2 destination chunks of sum len; start with 1st | ||
439 | * which is limited by what's available in sndbuf | ||
440 | */ | ||
441 | dst_len = min_t(size_t, | ||
442 | conn->peer_rmbe_size - prod.count, len); | ||
443 | } else { | ||
444 | /* the filled destination area is wrapped, | ||
445 | * hence the available free destination space is unwrapped | ||
446 | * and we need a single destination chunk of entire len | ||
447 | */ | ||
448 | dst_len = len; | ||
449 | } | ||
450 | /* dst_len determines the maximum src_len */ | ||
451 | if (sent.count + dst_len <= conn->sndbuf_desc->len) { | ||
452 | /* unwrapped src case: single chunk of entire dst_len */ | ||
453 | src_len = dst_len; | ||
454 | } else { | ||
455 | /* wrapped src case: 2 chunks of sum dst_len; start with 1st: */ | ||
456 | src_len = conn->sndbuf_desc->len - sent.count; | ||
457 | } | ||
458 | |||
459 | if (conn->lgr->is_smcd) | ||
460 | rc = smcd_tx_rdma_writes(conn, len, sent.count, src_len, | ||
461 | dst_off, dst_len); | ||
462 | else | ||
463 | rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len, | ||
464 | dst_off, dst_len, wr_rdma_buf); | ||
465 | if (rc) | ||
466 | return rc; | ||
467 | |||
468 | if (conn->urg_tx_pend && len == to_send) | ||
469 | pflags->urg_data_present = 1; | ||
470 | smc_tx_advance_cursors(conn, &prod, &sent, len); | ||
471 | /* update connection's cursors with advanced local cursors */ | ||
472 | smc_curs_copy(&conn->local_tx_ctrl.prod, &prod, conn); | ||
473 | /* dst: peer RMBE */ | ||
474 | smc_curs_copy(&conn->tx_curs_sent, &sent, conn);/* src: local sndbuf */ | ||
475 | |||
476 | return 0; | ||
477 | } | ||
478 | |||
479 | /* Wakeup sndbuf consumers from any context (IRQ or process) | ||
480 | * since there is more data to transmit; usable snd_wnd as max transmit | ||
481 | */ | ||
482 | static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) | ||
483 | { | ||
484 | struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags; | ||
485 | struct smc_link *link = conn->lnk; | ||
486 | struct smc_rdma_wr *wr_rdma_buf; | ||
487 | struct smc_cdc_tx_pend *pend; | ||
488 | struct smc_wr_buf *wr_buf; | ||
489 | int rc; | ||
490 | |||
491 | if (!link || !smc_wr_tx_link_hold(link)) | ||
492 | return -ENOLINK; | ||
493 | rc = smc_cdc_get_free_slot(conn, link, &wr_buf, &wr_rdma_buf, &pend); | ||
494 | if (rc < 0) { | ||
495 | smc_wr_tx_link_put(link); | ||
496 | if (rc == -EBUSY) { | ||
497 | struct smc_sock *smc = | ||
498 | container_of(conn, struct smc_sock, conn); | ||
499 | |||
500 | if (smc->sk.sk_err == ECONNABORTED) | ||
501 | return sock_error(&smc->sk); | ||
502 | if (conn->killed) | ||
503 | return -EPIPE; | ||
504 | rc = 0; | ||
505 | mod_delayed_work(conn->lgr->tx_wq, &conn->tx_work, | ||
506 | SMC_TX_WORK_DELAY); | ||
507 | } | ||
508 | return rc; | ||
509 | } | ||
510 | |||
511 | spin_lock_bh(&conn->send_lock); | ||
512 | if (link != conn->lnk) { | ||
513 | /* link of connection changed, tx_work will restart */ | ||
514 | smc_wr_tx_put_slot(link, | ||
515 | (struct smc_wr_tx_pend_priv *)pend); | ||
516 | rc = -ENOLINK; | ||
517 | goto out_unlock; | ||
518 | } | ||
519 | if (!pflags->urg_data_present) { | ||
520 | rc = smc_tx_rdma_writes(conn, wr_rdma_buf); | ||
521 | if (rc) { | ||
522 | smc_wr_tx_put_slot(link, | ||
523 | (struct smc_wr_tx_pend_priv *)pend); | ||
524 | goto out_unlock; | ||
525 | } | ||
526 | } | ||
527 | |||
528 | rc = smc_cdc_msg_send(conn, wr_buf, pend); | ||
529 | if (!rc && pflags->urg_data_present) { | ||
530 | pflags->urg_data_pending = 0; | ||
531 | pflags->urg_data_present = 0; | ||
532 | } | ||
533 | |||
534 | out_unlock: | ||
535 | spin_unlock_bh(&conn->send_lock); | ||
536 | smc_wr_tx_link_put(link); | ||
537 | return rc; | ||
538 | } | ||
539 | |||
540 | static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn) | ||
541 | { | ||
542 | struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags; | ||
543 | int rc = 0; | ||
544 | |||
545 | spin_lock_bh(&conn->send_lock); | ||
546 | if (!pflags->urg_data_present) | ||
547 | rc = smc_tx_rdma_writes(conn, NULL); | ||
548 | if (!rc) | ||
549 | rc = smcd_cdc_msg_send(conn); | ||
550 | |||
551 | if (!rc && pflags->urg_data_present) { | ||
552 | pflags->urg_data_pending = 0; | ||
553 | pflags->urg_data_present = 0; | ||
554 | } | ||
555 | spin_unlock_bh(&conn->send_lock); | ||
556 | return rc; | ||
557 | } | ||
558 | |||
559 | int smc_tx_sndbuf_nonempty(struct smc_connection *conn) | ||
560 | { | ||
561 | int rc; | ||
562 | |||
563 | if (conn->killed || | ||
564 | conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) | ||
565 | return -EPIPE; /* connection being aborted */ | ||
566 | if (conn->lgr->is_smcd) | ||
567 | rc = smcd_tx_sndbuf_nonempty(conn); | ||
568 | else | ||
569 | rc = smcr_tx_sndbuf_nonempty(conn); | ||
570 | |||
571 | if (!rc) { | ||
572 | /* trigger socket release if connection is closing */ | ||
573 | struct smc_sock *smc = container_of(conn, struct smc_sock, | ||
574 | conn); | ||
575 | smc_close_wake_tx_prepared(smc); | ||
576 | } | ||
577 | return rc; | ||
578 | } | ||
579 | |||
580 | /* Wakeup sndbuf consumers from process context | ||
581 | * since there is more data to transmit | ||
582 | */ | ||
583 | void smc_tx_work(struct work_struct *work) | ||
584 | { | ||
585 | struct smc_connection *conn = container_of(to_delayed_work(work), | ||
586 | struct smc_connection, | ||
587 | tx_work); | ||
588 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); | ||
589 | int rc; | ||
590 | |||
591 | lock_sock(&smc->sk); | ||
592 | if (smc->sk.sk_err) | ||
593 | goto out; | ||
594 | |||
595 | rc = smc_tx_sndbuf_nonempty(conn); | ||
596 | if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked && | ||
597 | !atomic_read(&conn->bytes_to_rcv)) | ||
598 | conn->local_rx_ctrl.prod_flags.write_blocked = 0; | ||
599 | |||
600 | out: | ||
601 | release_sock(&smc->sk); | ||
602 | } | ||
603 | |||
604 | void smc_tx_consumer_update(struct smc_connection *conn, bool force) | ||
605 | { | ||
606 | union smc_host_cursor cfed, cons, prod; | ||
607 | int sender_free = conn->rmb_desc->len; | ||
608 | int to_confirm; | ||
609 | |||
610 | smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); | ||
611 | smc_curs_copy(&cfed, &conn->rx_curs_confirmed, conn); | ||
612 | to_confirm = smc_curs_diff(conn->rmb_desc->len, &cfed, &cons); | ||
613 | if (to_confirm > conn->rmbe_update_limit) { | ||
614 | smc_curs_copy(&prod, &conn->local_rx_ctrl.prod, conn); | ||
615 | sender_free = conn->rmb_desc->len - | ||
616 | smc_curs_diff_large(conn->rmb_desc->len, | ||
617 | &cfed, &prod); | ||
618 | } | ||
619 | |||
620 | if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || | ||
621 | force || | ||
622 | ((to_confirm > conn->rmbe_update_limit) && | ||
623 | ((sender_free <= (conn->rmb_desc->len / 2)) || | ||
624 | conn->local_rx_ctrl.prod_flags.write_blocked))) { | ||
625 | if (conn->killed || | ||
626 | conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) | ||
627 | return; | ||
628 | if ((smc_cdc_get_slot_and_msg_send(conn) < 0) && | ||
629 | !conn->killed) { | ||
630 | queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, | ||
631 | SMC_TX_WORK_DELAY); | ||
632 | return; | ||
633 | } | ||
634 | } | ||
635 | if (conn->local_rx_ctrl.prod_flags.write_blocked && | ||
636 | !atomic_read(&conn->bytes_to_rcv)) | ||
637 | conn->local_rx_ctrl.prod_flags.write_blocked = 0; | ||
638 | } | ||
639 | |||
640 | /***************************** send initialize *******************************/ | ||
641 | |||
642 | /* Initialize send properties on connection establishment. NB: not __init! */ | ||
643 | void smc_tx_init(struct smc_sock *smc) | ||
644 | { | ||
645 | smc->sk.sk_write_space = smc_tx_write_space; | ||
646 | } | ||
diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h new file mode 100644 index 000000000..07e6ad762 --- /dev/null +++ b/net/smc/smc_tx.h | |||
@@ -0,0 +1,39 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * Manage send buffer | ||
6 | * | ||
7 | * Copyright IBM Corp. 2016 | ||
8 | * | ||
9 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> | ||
10 | */ | ||
11 | |||
12 | #ifndef SMC_TX_H | ||
13 | #define SMC_TX_H | ||
14 | |||
15 | #include <linux/socket.h> | ||
16 | #include <linux/types.h> | ||
17 | |||
18 | #include "smc.h" | ||
19 | #include "smc_cdc.h" | ||
20 | |||
21 | static inline int smc_tx_prepared_sends(struct smc_connection *conn) | ||
22 | { | ||
23 | union smc_host_cursor sent, prep; | ||
24 | |||
25 | smc_curs_copy(&sent, &conn->tx_curs_sent, conn); | ||
26 | smc_curs_copy(&prep, &conn->tx_curs_prep, conn); | ||
27 | return smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep); | ||
28 | } | ||
29 | |||
30 | void smc_tx_work(struct work_struct *work); | ||
31 | void smc_tx_init(struct smc_sock *smc); | ||
32 | int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len); | ||
33 | int smc_tx_sndbuf_nonempty(struct smc_connection *conn); | ||
34 | void smc_tx_sndbuf_nonfull(struct smc_sock *smc); | ||
35 | void smc_tx_consumer_update(struct smc_connection *conn, bool force); | ||
36 | int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len, | ||
37 | u32 offset, int signal); | ||
38 | |||
39 | #endif /* SMC_TX_H */ | ||
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c new file mode 100644 index 000000000..5a81f8c9e --- /dev/null +++ b/net/smc/smc_wr.c | |||
@@ -0,0 +1,720 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * Work Requests exploiting Infiniband API | ||
6 | * | ||
7 | * Work requests (WR) of type ib_post_send or ib_post_recv respectively | ||
8 | * are submitted to either RC SQ or RC RQ respectively | ||
9 | * (reliably connected send/receive queue) | ||
10 | * and become work queue entries (WQEs). | ||
11 | * While an SQ WR/WQE is pending, we track it until transmission completion. | ||
12 | * Through a send or receive completion queue (CQ) respectively, | ||
13 | * we get completion queue entries (CQEs) [aka work completions (WCs)]. | ||
14 | * Since the CQ callback is called from IRQ context, we split work by using | ||
15 | * bottom halves implemented by tasklets. | ||
16 | * | ||
17 | * SMC uses this to exchange LLC (link layer control) | ||
18 | * and CDC (connection data control) messages. | ||
19 | * | ||
20 | * Copyright IBM Corp. 2016 | ||
21 | * | ||
22 | * Author(s): Steffen Maier <maier@linux.vnet.ibm.com> | ||
23 | */ | ||
24 | |||
25 | #include <linux/atomic.h> | ||
26 | #include <linux/hashtable.h> | ||
27 | #include <linux/wait.h> | ||
28 | #include <rdma/ib_verbs.h> | ||
29 | #include <asm/div64.h> | ||
30 | |||
31 | #include "smc.h" | ||
32 | #include "smc_wr.h" | ||
33 | |||
34 | #define SMC_WR_MAX_POLL_CQE 10 /* max. # of compl. queue elements in 1 poll */ | ||
35 | |||
36 | #define SMC_WR_RX_HASH_BITS 4 | ||
37 | static DEFINE_HASHTABLE(smc_wr_rx_hash, SMC_WR_RX_HASH_BITS); | ||
38 | static DEFINE_SPINLOCK(smc_wr_rx_hash_lock); | ||
39 | |||
40 | struct smc_wr_tx_pend { /* control data for a pending send request */ | ||
41 | u64 wr_id; /* work request id sent */ | ||
42 | smc_wr_tx_handler handler; | ||
43 | enum ib_wc_status wc_status; /* CQE status */ | ||
44 | struct smc_link *link; | ||
45 | u32 idx; | ||
46 | struct smc_wr_tx_pend_priv priv; | ||
47 | u8 compl_requested; | ||
48 | }; | ||
49 | |||
50 | /******************************** send queue *********************************/ | ||
51 | |||
52 | /*------------------------------- completion --------------------------------*/ | ||
53 | |||
54 | /* returns true if at least one tx work request is pending on the given link */ | ||
55 | static inline bool smc_wr_is_tx_pend(struct smc_link *link) | ||
56 | { | ||
57 | if (find_first_bit(link->wr_tx_mask, link->wr_tx_cnt) != | ||
58 | link->wr_tx_cnt) { | ||
59 | return true; | ||
60 | } | ||
61 | return false; | ||
62 | } | ||
63 | |||
64 | /* wait till all pending tx work requests on the given link are completed */ | ||
65 | void smc_wr_tx_wait_no_pending_sends(struct smc_link *link) | ||
66 | { | ||
67 | wait_event(link->wr_tx_wait, !smc_wr_is_tx_pend(link)); | ||
68 | } | ||
69 | |||
70 | static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id) | ||
71 | { | ||
72 | u32 i; | ||
73 | |||
74 | for (i = 0; i < link->wr_tx_cnt; i++) { | ||
75 | if (link->wr_tx_pends[i].wr_id == wr_id) | ||
76 | return i; | ||
77 | } | ||
78 | return link->wr_tx_cnt; | ||
79 | } | ||
80 | |||
81 | static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) | ||
82 | { | ||
83 | struct smc_wr_tx_pend pnd_snd; | ||
84 | struct smc_link *link; | ||
85 | u32 pnd_snd_idx; | ||
86 | |||
87 | link = wc->qp->qp_context; | ||
88 | |||
89 | if (wc->opcode == IB_WC_REG_MR) { | ||
90 | if (wc->status) | ||
91 | link->wr_reg_state = FAILED; | ||
92 | else | ||
93 | link->wr_reg_state = CONFIRMED; | ||
94 | smc_wr_wakeup_reg_wait(link); | ||
95 | return; | ||
96 | } | ||
97 | |||
98 | pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id); | ||
99 | if (pnd_snd_idx == link->wr_tx_cnt) | ||
100 | return; | ||
101 | link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status; | ||
102 | if (link->wr_tx_pends[pnd_snd_idx].compl_requested) | ||
103 | complete(&link->wr_tx_compl[pnd_snd_idx]); | ||
104 | memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], sizeof(pnd_snd)); | ||
105 | /* clear the full struct smc_wr_tx_pend including .priv */ | ||
106 | memset(&link->wr_tx_pends[pnd_snd_idx], 0, | ||
107 | sizeof(link->wr_tx_pends[pnd_snd_idx])); | ||
108 | memset(&link->wr_tx_bufs[pnd_snd_idx], 0, | ||
109 | sizeof(link->wr_tx_bufs[pnd_snd_idx])); | ||
110 | if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask)) | ||
111 | return; | ||
112 | if (wc->status) { | ||
113 | /* terminate link */ | ||
114 | smcr_link_down_cond_sched(link); | ||
115 | } | ||
116 | if (pnd_snd.handler) | ||
117 | pnd_snd.handler(&pnd_snd.priv, link, wc->status); | ||
118 | wake_up(&link->wr_tx_wait); | ||
119 | } | ||
120 | |||
121 | static void smc_wr_tx_tasklet_fn(unsigned long data) | ||
122 | { | ||
123 | struct smc_ib_device *dev = (struct smc_ib_device *)data; | ||
124 | struct ib_wc wc[SMC_WR_MAX_POLL_CQE]; | ||
125 | int i = 0, rc; | ||
126 | int polled = 0; | ||
127 | |||
128 | again: | ||
129 | polled++; | ||
130 | do { | ||
131 | memset(&wc, 0, sizeof(wc)); | ||
132 | rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc); | ||
133 | if (polled == 1) { | ||
134 | ib_req_notify_cq(dev->roce_cq_send, | ||
135 | IB_CQ_NEXT_COMP | | ||
136 | IB_CQ_REPORT_MISSED_EVENTS); | ||
137 | } | ||
138 | if (!rc) | ||
139 | break; | ||
140 | for (i = 0; i < rc; i++) | ||
141 | smc_wr_tx_process_cqe(&wc[i]); | ||
142 | } while (rc > 0); | ||
143 | if (polled == 1) | ||
144 | goto again; | ||
145 | } | ||
146 | |||
147 | void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context) | ||
148 | { | ||
149 | struct smc_ib_device *dev = (struct smc_ib_device *)cq_context; | ||
150 | |||
151 | tasklet_schedule(&dev->send_tasklet); | ||
152 | } | ||
153 | |||
154 | /*---------------------------- request submission ---------------------------*/ | ||
155 | |||
156 | static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) | ||
157 | { | ||
158 | *idx = link->wr_tx_cnt; | ||
159 | if (!smc_link_sendable(link)) | ||
160 | return -ENOLINK; | ||
161 | for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) { | ||
162 | if (!test_and_set_bit(*idx, link->wr_tx_mask)) | ||
163 | return 0; | ||
164 | } | ||
165 | *idx = link->wr_tx_cnt; | ||
166 | return -EBUSY; | ||
167 | } | ||
168 | |||
169 | /** | ||
170 | * smc_wr_tx_get_free_slot() - returns buffer for message assembly, | ||
171 | * and sets info for pending transmit tracking | ||
172 | * @link: Pointer to smc_link used to later send the message. | ||
173 | * @handler: Send completion handler function pointer. | ||
174 | * @wr_buf: Out value returns pointer to message buffer. | ||
175 | * @wr_rdma_buf: Out value returns pointer to rdma work request. | ||
176 | * @wr_pend_priv: Out value returns pointer serving as handler context. | ||
177 | * | ||
178 | * Return: 0 on success, or -errno on error. | ||
179 | */ | ||
180 | int smc_wr_tx_get_free_slot(struct smc_link *link, | ||
181 | smc_wr_tx_handler handler, | ||
182 | struct smc_wr_buf **wr_buf, | ||
183 | struct smc_rdma_wr **wr_rdma_buf, | ||
184 | struct smc_wr_tx_pend_priv **wr_pend_priv) | ||
185 | { | ||
186 | struct smc_link_group *lgr = smc_get_lgr(link); | ||
187 | struct smc_wr_tx_pend *wr_pend; | ||
188 | u32 idx = link->wr_tx_cnt; | ||
189 | struct ib_send_wr *wr_ib; | ||
190 | u64 wr_id; | ||
191 | int rc; | ||
192 | |||
193 | *wr_buf = NULL; | ||
194 | *wr_pend_priv = NULL; | ||
195 | if (in_softirq() || lgr->terminating) { | ||
196 | rc = smc_wr_tx_get_free_slot_index(link, &idx); | ||
197 | if (rc) | ||
198 | return rc; | ||
199 | } else { | ||
200 | rc = wait_event_interruptible_timeout( | ||
201 | link->wr_tx_wait, | ||
202 | !smc_link_sendable(link) || | ||
203 | lgr->terminating || | ||
204 | (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), | ||
205 | SMC_WR_TX_WAIT_FREE_SLOT_TIME); | ||
206 | if (!rc) { | ||
207 | /* timeout - terminate link */ | ||
208 | smcr_link_down_cond_sched(link); | ||
209 | return -EPIPE; | ||
210 | } | ||
211 | if (idx == link->wr_tx_cnt) | ||
212 | return -EPIPE; | ||
213 | } | ||
214 | wr_id = smc_wr_tx_get_next_wr_id(link); | ||
215 | wr_pend = &link->wr_tx_pends[idx]; | ||
216 | wr_pend->wr_id = wr_id; | ||
217 | wr_pend->handler = handler; | ||
218 | wr_pend->link = link; | ||
219 | wr_pend->idx = idx; | ||
220 | wr_ib = &link->wr_tx_ibs[idx]; | ||
221 | wr_ib->wr_id = wr_id; | ||
222 | *wr_buf = &link->wr_tx_bufs[idx]; | ||
223 | if (wr_rdma_buf) | ||
224 | *wr_rdma_buf = &link->wr_tx_rdmas[idx]; | ||
225 | *wr_pend_priv = &wr_pend->priv; | ||
226 | return 0; | ||
227 | } | ||
228 | |||
229 | int smc_wr_tx_put_slot(struct smc_link *link, | ||
230 | struct smc_wr_tx_pend_priv *wr_pend_priv) | ||
231 | { | ||
232 | struct smc_wr_tx_pend *pend; | ||
233 | |||
234 | pend = container_of(wr_pend_priv, struct smc_wr_tx_pend, priv); | ||
235 | if (pend->idx < link->wr_tx_cnt) { | ||
236 | u32 idx = pend->idx; | ||
237 | |||
238 | /* clear the full struct smc_wr_tx_pend including .priv */ | ||
239 | memset(&link->wr_tx_pends[idx], 0, | ||
240 | sizeof(link->wr_tx_pends[idx])); | ||
241 | memset(&link->wr_tx_bufs[idx], 0, | ||
242 | sizeof(link->wr_tx_bufs[idx])); | ||
243 | test_and_clear_bit(idx, link->wr_tx_mask); | ||
244 | wake_up(&link->wr_tx_wait); | ||
245 | return 1; | ||
246 | } | ||
247 | |||
248 | return 0; | ||
249 | } | ||
250 | |||
251 | /* Send prepared WR slot via ib_post_send. | ||
252 | * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer | ||
253 | */ | ||
254 | int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) | ||
255 | { | ||
256 | struct smc_wr_tx_pend *pend; | ||
257 | int rc; | ||
258 | |||
259 | ib_req_notify_cq(link->smcibdev->roce_cq_send, | ||
260 | IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); | ||
261 | pend = container_of(priv, struct smc_wr_tx_pend, priv); | ||
262 | rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL); | ||
263 | if (rc) { | ||
264 | smc_wr_tx_put_slot(link, priv); | ||
265 | smcr_link_down_cond_sched(link); | ||
266 | } | ||
267 | return rc; | ||
268 | } | ||
269 | |||
270 | /* Send prepared WR slot via ib_post_send and wait for send completion | ||
271 | * notification. | ||
272 | * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer | ||
273 | */ | ||
274 | int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, | ||
275 | unsigned long timeout) | ||
276 | { | ||
277 | struct smc_wr_tx_pend *pend; | ||
278 | u32 pnd_idx; | ||
279 | int rc; | ||
280 | |||
281 | pend = container_of(priv, struct smc_wr_tx_pend, priv); | ||
282 | pend->compl_requested = 1; | ||
283 | pnd_idx = pend->idx; | ||
284 | init_completion(&link->wr_tx_compl[pnd_idx]); | ||
285 | |||
286 | rc = smc_wr_tx_send(link, priv); | ||
287 | if (rc) | ||
288 | return rc; | ||
289 | /* wait for completion by smc_wr_tx_process_cqe() */ | ||
290 | rc = wait_for_completion_interruptible_timeout( | ||
291 | &link->wr_tx_compl[pnd_idx], timeout); | ||
292 | if (rc <= 0) | ||
293 | rc = -ENODATA; | ||
294 | if (rc > 0) | ||
295 | rc = 0; | ||
296 | return rc; | ||
297 | } | ||
298 | |||
299 | /* Register a memory region and wait for result. */ | ||
300 | int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) | ||
301 | { | ||
302 | int rc; | ||
303 | |||
304 | ib_req_notify_cq(link->smcibdev->roce_cq_send, | ||
305 | IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); | ||
306 | link->wr_reg_state = POSTED; | ||
307 | link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr; | ||
308 | link->wr_reg.mr = mr; | ||
309 | link->wr_reg.key = mr->rkey; | ||
310 | rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, NULL); | ||
311 | if (rc) | ||
312 | return rc; | ||
313 | |||
314 | atomic_inc(&link->wr_reg_refcnt); | ||
315 | rc = wait_event_interruptible_timeout(link->wr_reg_wait, | ||
316 | (link->wr_reg_state != POSTED), | ||
317 | SMC_WR_REG_MR_WAIT_TIME); | ||
318 | if (atomic_dec_and_test(&link->wr_reg_refcnt)) | ||
319 | wake_up_all(&link->wr_reg_wait); | ||
320 | if (!rc) { | ||
321 | /* timeout - terminate link */ | ||
322 | smcr_link_down_cond_sched(link); | ||
323 | return -EPIPE; | ||
324 | } | ||
325 | if (rc == -ERESTARTSYS) | ||
326 | return -EINTR; | ||
327 | switch (link->wr_reg_state) { | ||
328 | case CONFIRMED: | ||
329 | rc = 0; | ||
330 | break; | ||
331 | case FAILED: | ||
332 | rc = -EIO; | ||
333 | break; | ||
334 | case POSTED: | ||
335 | rc = -EPIPE; | ||
336 | break; | ||
337 | } | ||
338 | return rc; | ||
339 | } | ||
340 | |||
341 | /****************************** receive queue ********************************/ | ||
342 | |||
343 | int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler) | ||
344 | { | ||
345 | struct smc_wr_rx_handler *h_iter; | ||
346 | int rc = 0; | ||
347 | |||
348 | spin_lock(&smc_wr_rx_hash_lock); | ||
349 | hash_for_each_possible(smc_wr_rx_hash, h_iter, list, handler->type) { | ||
350 | if (h_iter->type == handler->type) { | ||
351 | rc = -EEXIST; | ||
352 | goto out_unlock; | ||
353 | } | ||
354 | } | ||
355 | hash_add(smc_wr_rx_hash, &handler->list, handler->type); | ||
356 | out_unlock: | ||
357 | spin_unlock(&smc_wr_rx_hash_lock); | ||
358 | return rc; | ||
359 | } | ||
360 | |||
361 | /* Demultiplex a received work request based on the message type to its handler. | ||
362 | * Relies on smc_wr_rx_hash having been completely filled before any IB WRs, | ||
363 | * and not being modified any more afterwards so we don't need to lock it. | ||
364 | */ | ||
365 | static inline void smc_wr_rx_demultiplex(struct ib_wc *wc) | ||
366 | { | ||
367 | struct smc_link *link = (struct smc_link *)wc->qp->qp_context; | ||
368 | struct smc_wr_rx_handler *handler; | ||
369 | struct smc_wr_rx_hdr *wr_rx; | ||
370 | u64 temp_wr_id; | ||
371 | u32 index; | ||
372 | |||
373 | if (wc->byte_len < sizeof(*wr_rx)) | ||
374 | return; /* short message */ | ||
375 | temp_wr_id = wc->wr_id; | ||
376 | index = do_div(temp_wr_id, link->wr_rx_cnt); | ||
377 | wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[index]; | ||
378 | hash_for_each_possible(smc_wr_rx_hash, handler, list, wr_rx->type) { | ||
379 | if (handler->type == wr_rx->type) | ||
380 | handler->handler(wc, wr_rx); | ||
381 | } | ||
382 | } | ||
383 | |||
384 | static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num) | ||
385 | { | ||
386 | struct smc_link *link; | ||
387 | int i; | ||
388 | |||
389 | for (i = 0; i < num; i++) { | ||
390 | link = wc[i].qp->qp_context; | ||
391 | if (wc[i].status == IB_WC_SUCCESS) { | ||
392 | link->wr_rx_tstamp = jiffies; | ||
393 | smc_wr_rx_demultiplex(&wc[i]); | ||
394 | smc_wr_rx_post(link); /* refill WR RX */ | ||
395 | } else { | ||
396 | /* handle status errors */ | ||
397 | switch (wc[i].status) { | ||
398 | case IB_WC_RETRY_EXC_ERR: | ||
399 | case IB_WC_RNR_RETRY_EXC_ERR: | ||
400 | case IB_WC_WR_FLUSH_ERR: | ||
401 | smcr_link_down_cond_sched(link); | ||
402 | break; | ||
403 | default: | ||
404 | smc_wr_rx_post(link); /* refill WR RX */ | ||
405 | break; | ||
406 | } | ||
407 | } | ||
408 | } | ||
409 | } | ||
410 | |||
411 | static void smc_wr_rx_tasklet_fn(unsigned long data) | ||
412 | { | ||
413 | struct smc_ib_device *dev = (struct smc_ib_device *)data; | ||
414 | struct ib_wc wc[SMC_WR_MAX_POLL_CQE]; | ||
415 | int polled = 0; | ||
416 | int rc; | ||
417 | |||
418 | again: | ||
419 | polled++; | ||
420 | do { | ||
421 | memset(&wc, 0, sizeof(wc)); | ||
422 | rc = ib_poll_cq(dev->roce_cq_recv, SMC_WR_MAX_POLL_CQE, wc); | ||
423 | if (polled == 1) { | ||
424 | ib_req_notify_cq(dev->roce_cq_recv, | ||
425 | IB_CQ_SOLICITED_MASK | ||
426 | | IB_CQ_REPORT_MISSED_EVENTS); | ||
427 | } | ||
428 | if (!rc) | ||
429 | break; | ||
430 | smc_wr_rx_process_cqes(&wc[0], rc); | ||
431 | } while (rc > 0); | ||
432 | if (polled == 1) | ||
433 | goto again; | ||
434 | } | ||
435 | |||
436 | void smc_wr_rx_cq_handler(struct ib_cq *ib_cq, void *cq_context) | ||
437 | { | ||
438 | struct smc_ib_device *dev = (struct smc_ib_device *)cq_context; | ||
439 | |||
440 | tasklet_schedule(&dev->recv_tasklet); | ||
441 | } | ||
442 | |||
443 | int smc_wr_rx_post_init(struct smc_link *link) | ||
444 | { | ||
445 | u32 i; | ||
446 | int rc = 0; | ||
447 | |||
448 | for (i = 0; i < link->wr_rx_cnt; i++) | ||
449 | rc = smc_wr_rx_post(link); | ||
450 | return rc; | ||
451 | } | ||
452 | |||
453 | /***************************** init, exit, misc ******************************/ | ||
454 | |||
455 | void smc_wr_remember_qp_attr(struct smc_link *lnk) | ||
456 | { | ||
457 | struct ib_qp_attr *attr = &lnk->qp_attr; | ||
458 | struct ib_qp_init_attr init_attr; | ||
459 | |||
460 | memset(attr, 0, sizeof(*attr)); | ||
461 | memset(&init_attr, 0, sizeof(init_attr)); | ||
462 | ib_query_qp(lnk->roce_qp, attr, | ||
463 | IB_QP_STATE | | ||
464 | IB_QP_CUR_STATE | | ||
465 | IB_QP_PKEY_INDEX | | ||
466 | IB_QP_PORT | | ||
467 | IB_QP_QKEY | | ||
468 | IB_QP_AV | | ||
469 | IB_QP_PATH_MTU | | ||
470 | IB_QP_TIMEOUT | | ||
471 | IB_QP_RETRY_CNT | | ||
472 | IB_QP_RNR_RETRY | | ||
473 | IB_QP_RQ_PSN | | ||
474 | IB_QP_ALT_PATH | | ||
475 | IB_QP_MIN_RNR_TIMER | | ||
476 | IB_QP_SQ_PSN | | ||
477 | IB_QP_PATH_MIG_STATE | | ||
478 | IB_QP_CAP | | ||
479 | IB_QP_DEST_QPN, | ||
480 | &init_attr); | ||
481 | |||
482 | lnk->wr_tx_cnt = min_t(size_t, SMC_WR_BUF_CNT, | ||
483 | lnk->qp_attr.cap.max_send_wr); | ||
484 | lnk->wr_rx_cnt = min_t(size_t, SMC_WR_BUF_CNT * 3, | ||
485 | lnk->qp_attr.cap.max_recv_wr); | ||
486 | } | ||
487 | |||
488 | static void smc_wr_init_sge(struct smc_link *lnk) | ||
489 | { | ||
490 | u32 i; | ||
491 | |||
492 | for (i = 0; i < lnk->wr_tx_cnt; i++) { | ||
493 | lnk->wr_tx_sges[i].addr = | ||
494 | lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE; | ||
495 | lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE; | ||
496 | lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey; | ||
497 | lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[0].lkey = | ||
498 | lnk->roce_pd->local_dma_lkey; | ||
499 | lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[1].lkey = | ||
500 | lnk->roce_pd->local_dma_lkey; | ||
501 | lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[0].lkey = | ||
502 | lnk->roce_pd->local_dma_lkey; | ||
503 | lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[1].lkey = | ||
504 | lnk->roce_pd->local_dma_lkey; | ||
505 | lnk->wr_tx_ibs[i].next = NULL; | ||
506 | lnk->wr_tx_ibs[i].sg_list = &lnk->wr_tx_sges[i]; | ||
507 | lnk->wr_tx_ibs[i].num_sge = 1; | ||
508 | lnk->wr_tx_ibs[i].opcode = IB_WR_SEND; | ||
509 | lnk->wr_tx_ibs[i].send_flags = | ||
510 | IB_SEND_SIGNALED | IB_SEND_SOLICITED; | ||
511 | lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE; | ||
512 | lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE; | ||
513 | lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list = | ||
514 | lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge; | ||
515 | lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list = | ||
516 | lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge; | ||
517 | } | ||
518 | for (i = 0; i < lnk->wr_rx_cnt; i++) { | ||
519 | lnk->wr_rx_sges[i].addr = | ||
520 | lnk->wr_rx_dma_addr + i * SMC_WR_BUF_SIZE; | ||
521 | lnk->wr_rx_sges[i].length = SMC_WR_BUF_SIZE; | ||
522 | lnk->wr_rx_sges[i].lkey = lnk->roce_pd->local_dma_lkey; | ||
523 | lnk->wr_rx_ibs[i].next = NULL; | ||
524 | lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i]; | ||
525 | lnk->wr_rx_ibs[i].num_sge = 1; | ||
526 | } | ||
527 | lnk->wr_reg.wr.next = NULL; | ||
528 | lnk->wr_reg.wr.num_sge = 0; | ||
529 | lnk->wr_reg.wr.send_flags = IB_SEND_SIGNALED; | ||
530 | lnk->wr_reg.wr.opcode = IB_WR_REG_MR; | ||
531 | lnk->wr_reg.access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE; | ||
532 | } | ||
533 | |||
534 | void smc_wr_free_link(struct smc_link *lnk) | ||
535 | { | ||
536 | struct ib_device *ibdev; | ||
537 | |||
538 | if (!lnk->smcibdev) | ||
539 | return; | ||
540 | ibdev = lnk->smcibdev->ibdev; | ||
541 | |||
542 | smc_wr_wakeup_reg_wait(lnk); | ||
543 | smc_wr_wakeup_tx_wait(lnk); | ||
544 | |||
545 | smc_wr_tx_wait_no_pending_sends(lnk); | ||
546 | wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt))); | ||
547 | wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt))); | ||
548 | |||
549 | if (lnk->wr_rx_dma_addr) { | ||
550 | ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, | ||
551 | SMC_WR_BUF_SIZE * lnk->wr_rx_cnt, | ||
552 | DMA_FROM_DEVICE); | ||
553 | lnk->wr_rx_dma_addr = 0; | ||
554 | } | ||
555 | if (lnk->wr_tx_dma_addr) { | ||
556 | ib_dma_unmap_single(ibdev, lnk->wr_tx_dma_addr, | ||
557 | SMC_WR_BUF_SIZE * lnk->wr_tx_cnt, | ||
558 | DMA_TO_DEVICE); | ||
559 | lnk->wr_tx_dma_addr = 0; | ||
560 | } | ||
561 | } | ||
562 | |||
563 | void smc_wr_free_link_mem(struct smc_link *lnk) | ||
564 | { | ||
565 | kfree(lnk->wr_tx_compl); | ||
566 | lnk->wr_tx_compl = NULL; | ||
567 | kfree(lnk->wr_tx_pends); | ||
568 | lnk->wr_tx_pends = NULL; | ||
569 | kfree(lnk->wr_tx_mask); | ||
570 | lnk->wr_tx_mask = NULL; | ||
571 | kfree(lnk->wr_tx_sges); | ||
572 | lnk->wr_tx_sges = NULL; | ||
573 | kfree(lnk->wr_tx_rdma_sges); | ||
574 | lnk->wr_tx_rdma_sges = NULL; | ||
575 | kfree(lnk->wr_rx_sges); | ||
576 | lnk->wr_rx_sges = NULL; | ||
577 | kfree(lnk->wr_tx_rdmas); | ||
578 | lnk->wr_tx_rdmas = NULL; | ||
579 | kfree(lnk->wr_rx_ibs); | ||
580 | lnk->wr_rx_ibs = NULL; | ||
581 | kfree(lnk->wr_tx_ibs); | ||
582 | lnk->wr_tx_ibs = NULL; | ||
583 | kfree(lnk->wr_tx_bufs); | ||
584 | lnk->wr_tx_bufs = NULL; | ||
585 | kfree(lnk->wr_rx_bufs); | ||
586 | lnk->wr_rx_bufs = NULL; | ||
587 | } | ||
588 | |||
589 | int smc_wr_alloc_link_mem(struct smc_link *link) | ||
590 | { | ||
591 | /* allocate link related memory */ | ||
592 | link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL); | ||
593 | if (!link->wr_tx_bufs) | ||
594 | goto no_mem; | ||
595 | link->wr_rx_bufs = kcalloc(SMC_WR_BUF_CNT * 3, SMC_WR_BUF_SIZE, | ||
596 | GFP_KERNEL); | ||
597 | if (!link->wr_rx_bufs) | ||
598 | goto no_mem_wr_tx_bufs; | ||
599 | link->wr_tx_ibs = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_ibs[0]), | ||
600 | GFP_KERNEL); | ||
601 | if (!link->wr_tx_ibs) | ||
602 | goto no_mem_wr_rx_bufs; | ||
603 | link->wr_rx_ibs = kcalloc(SMC_WR_BUF_CNT * 3, | ||
604 | sizeof(link->wr_rx_ibs[0]), | ||
605 | GFP_KERNEL); | ||
606 | if (!link->wr_rx_ibs) | ||
607 | goto no_mem_wr_tx_ibs; | ||
608 | link->wr_tx_rdmas = kcalloc(SMC_WR_BUF_CNT, | ||
609 | sizeof(link->wr_tx_rdmas[0]), | ||
610 | GFP_KERNEL); | ||
611 | if (!link->wr_tx_rdmas) | ||
612 | goto no_mem_wr_rx_ibs; | ||
613 | link->wr_tx_rdma_sges = kcalloc(SMC_WR_BUF_CNT, | ||
614 | sizeof(link->wr_tx_rdma_sges[0]), | ||
615 | GFP_KERNEL); | ||
616 | if (!link->wr_tx_rdma_sges) | ||
617 | goto no_mem_wr_tx_rdmas; | ||
618 | link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]), | ||
619 | GFP_KERNEL); | ||
620 | if (!link->wr_tx_sges) | ||
621 | goto no_mem_wr_tx_rdma_sges; | ||
622 | link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3, | ||
623 | sizeof(link->wr_rx_sges[0]), | ||
624 | GFP_KERNEL); | ||
625 | if (!link->wr_rx_sges) | ||
626 | goto no_mem_wr_tx_sges; | ||
627 | link->wr_tx_mask = kcalloc(BITS_TO_LONGS(SMC_WR_BUF_CNT), | ||
628 | sizeof(*link->wr_tx_mask), | ||
629 | GFP_KERNEL); | ||
630 | if (!link->wr_tx_mask) | ||
631 | goto no_mem_wr_rx_sges; | ||
632 | link->wr_tx_pends = kcalloc(SMC_WR_BUF_CNT, | ||
633 | sizeof(link->wr_tx_pends[0]), | ||
634 | GFP_KERNEL); | ||
635 | if (!link->wr_tx_pends) | ||
636 | goto no_mem_wr_tx_mask; | ||
637 | link->wr_tx_compl = kcalloc(SMC_WR_BUF_CNT, | ||
638 | sizeof(link->wr_tx_compl[0]), | ||
639 | GFP_KERNEL); | ||
640 | if (!link->wr_tx_compl) | ||
641 | goto no_mem_wr_tx_pends; | ||
642 | return 0; | ||
643 | |||
644 | no_mem_wr_tx_pends: | ||
645 | kfree(link->wr_tx_pends); | ||
646 | no_mem_wr_tx_mask: | ||
647 | kfree(link->wr_tx_mask); | ||
648 | no_mem_wr_rx_sges: | ||
649 | kfree(link->wr_rx_sges); | ||
650 | no_mem_wr_tx_sges: | ||
651 | kfree(link->wr_tx_sges); | ||
652 | no_mem_wr_tx_rdma_sges: | ||
653 | kfree(link->wr_tx_rdma_sges); | ||
654 | no_mem_wr_tx_rdmas: | ||
655 | kfree(link->wr_tx_rdmas); | ||
656 | no_mem_wr_rx_ibs: | ||
657 | kfree(link->wr_rx_ibs); | ||
658 | no_mem_wr_tx_ibs: | ||
659 | kfree(link->wr_tx_ibs); | ||
660 | no_mem_wr_rx_bufs: | ||
661 | kfree(link->wr_rx_bufs); | ||
662 | no_mem_wr_tx_bufs: | ||
663 | kfree(link->wr_tx_bufs); | ||
664 | no_mem: | ||
665 | return -ENOMEM; | ||
666 | } | ||
667 | |||
668 | void smc_wr_remove_dev(struct smc_ib_device *smcibdev) | ||
669 | { | ||
670 | tasklet_kill(&smcibdev->recv_tasklet); | ||
671 | tasklet_kill(&smcibdev->send_tasklet); | ||
672 | } | ||
673 | |||
674 | void smc_wr_add_dev(struct smc_ib_device *smcibdev) | ||
675 | { | ||
676 | tasklet_init(&smcibdev->recv_tasklet, smc_wr_rx_tasklet_fn, | ||
677 | (unsigned long)smcibdev); | ||
678 | tasklet_init(&smcibdev->send_tasklet, smc_wr_tx_tasklet_fn, | ||
679 | (unsigned long)smcibdev); | ||
680 | } | ||
681 | |||
682 | int smc_wr_create_link(struct smc_link *lnk) | ||
683 | { | ||
684 | struct ib_device *ibdev = lnk->smcibdev->ibdev; | ||
685 | int rc = 0; | ||
686 | |||
687 | smc_wr_tx_set_wr_id(&lnk->wr_tx_id, 0); | ||
688 | lnk->wr_rx_id = 0; | ||
689 | lnk->wr_rx_dma_addr = ib_dma_map_single( | ||
690 | ibdev, lnk->wr_rx_bufs, SMC_WR_BUF_SIZE * lnk->wr_rx_cnt, | ||
691 | DMA_FROM_DEVICE); | ||
692 | if (ib_dma_mapping_error(ibdev, lnk->wr_rx_dma_addr)) { | ||
693 | lnk->wr_rx_dma_addr = 0; | ||
694 | rc = -EIO; | ||
695 | goto out; | ||
696 | } | ||
697 | lnk->wr_tx_dma_addr = ib_dma_map_single( | ||
698 | ibdev, lnk->wr_tx_bufs, SMC_WR_BUF_SIZE * lnk->wr_tx_cnt, | ||
699 | DMA_TO_DEVICE); | ||
700 | if (ib_dma_mapping_error(ibdev, lnk->wr_tx_dma_addr)) { | ||
701 | rc = -EIO; | ||
702 | goto dma_unmap; | ||
703 | } | ||
704 | smc_wr_init_sge(lnk); | ||
705 | memset(lnk->wr_tx_mask, 0, | ||
706 | BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask)); | ||
707 | init_waitqueue_head(&lnk->wr_tx_wait); | ||
708 | atomic_set(&lnk->wr_tx_refcnt, 0); | ||
709 | init_waitqueue_head(&lnk->wr_reg_wait); | ||
710 | atomic_set(&lnk->wr_reg_refcnt, 0); | ||
711 | return rc; | ||
712 | |||
713 | dma_unmap: | ||
714 | ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, | ||
715 | SMC_WR_BUF_SIZE * lnk->wr_rx_cnt, | ||
716 | DMA_FROM_DEVICE); | ||
717 | lnk->wr_rx_dma_addr = 0; | ||
718 | out: | ||
719 | return rc; | ||
720 | } | ||
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h new file mode 100644 index 000000000..cb58e6007 --- /dev/null +++ b/net/smc/smc_wr.h | |||
@@ -0,0 +1,131 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * Work Requests exploiting Infiniband API | ||
6 | * | ||
7 | * Copyright IBM Corp. 2016 | ||
8 | * | ||
9 | * Author(s): Steffen Maier <maier@linux.vnet.ibm.com> | ||
10 | */ | ||
11 | |||
12 | #ifndef SMC_WR_H | ||
13 | #define SMC_WR_H | ||
14 | |||
15 | #include <linux/atomic.h> | ||
16 | #include <rdma/ib_verbs.h> | ||
17 | #include <asm/div64.h> | ||
18 | |||
19 | #include "smc.h" | ||
20 | #include "smc_core.h" | ||
21 | |||
22 | #define SMC_WR_BUF_CNT 16 /* # of ctrl buffers per link */ | ||
23 | |||
24 | #define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ) | ||
25 | |||
26 | #define SMC_WR_TX_SIZE 44 /* actual size of wr_send data (<=SMC_WR_BUF_SIZE) */ | ||
27 | |||
28 | #define SMC_WR_TX_PEND_PRIV_SIZE 32 | ||
29 | |||
30 | struct smc_wr_tx_pend_priv { | ||
31 | u8 priv[SMC_WR_TX_PEND_PRIV_SIZE]; | ||
32 | }; | ||
33 | |||
34 | typedef void (*smc_wr_tx_handler)(struct smc_wr_tx_pend_priv *, | ||
35 | struct smc_link *, | ||
36 | enum ib_wc_status); | ||
37 | |||
38 | typedef bool (*smc_wr_tx_filter)(struct smc_wr_tx_pend_priv *, | ||
39 | unsigned long); | ||
40 | |||
41 | typedef void (*smc_wr_tx_dismisser)(struct smc_wr_tx_pend_priv *); | ||
42 | |||
43 | struct smc_wr_rx_handler { | ||
44 | struct hlist_node list; /* hash table collision resolution */ | ||
45 | void (*handler)(struct ib_wc *, void *); | ||
46 | u8 type; | ||
47 | }; | ||
48 | |||
49 | /* Only used by RDMA write WRs. | ||
50 | * All other WRs (CDC/LLC) use smc_wr_tx_send handling WR_ID implicitly | ||
51 | */ | ||
52 | static inline long smc_wr_tx_get_next_wr_id(struct smc_link *link) | ||
53 | { | ||
54 | return atomic_long_inc_return(&link->wr_tx_id); | ||
55 | } | ||
56 | |||
57 | static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val) | ||
58 | { | ||
59 | atomic_long_set(wr_tx_id, val); | ||
60 | } | ||
61 | |||
62 | static inline bool smc_wr_tx_link_hold(struct smc_link *link) | ||
63 | { | ||
64 | if (!smc_link_sendable(link)) | ||
65 | return false; | ||
66 | atomic_inc(&link->wr_tx_refcnt); | ||
67 | return true; | ||
68 | } | ||
69 | |||
70 | static inline void smc_wr_tx_link_put(struct smc_link *link) | ||
71 | { | ||
72 | if (atomic_dec_and_test(&link->wr_tx_refcnt)) | ||
73 | wake_up_all(&link->wr_tx_wait); | ||
74 | } | ||
75 | |||
76 | static inline void smc_wr_wakeup_tx_wait(struct smc_link *lnk) | ||
77 | { | ||
78 | wake_up_all(&lnk->wr_tx_wait); | ||
79 | } | ||
80 | |||
81 | static inline void smc_wr_wakeup_reg_wait(struct smc_link *lnk) | ||
82 | { | ||
83 | wake_up(&lnk->wr_reg_wait); | ||
84 | } | ||
85 | |||
86 | /* post a new receive work request to fill a completed old work request entry */ | ||
87 | static inline int smc_wr_rx_post(struct smc_link *link) | ||
88 | { | ||
89 | int rc; | ||
90 | u64 wr_id, temp_wr_id; | ||
91 | u32 index; | ||
92 | |||
93 | wr_id = ++link->wr_rx_id; /* tasklet context, thus not atomic */ | ||
94 | temp_wr_id = wr_id; | ||
95 | index = do_div(temp_wr_id, link->wr_rx_cnt); | ||
96 | link->wr_rx_ibs[index].wr_id = wr_id; | ||
97 | rc = ib_post_recv(link->roce_qp, &link->wr_rx_ibs[index], NULL); | ||
98 | return rc; | ||
99 | } | ||
100 | |||
101 | int smc_wr_create_link(struct smc_link *lnk); | ||
102 | int smc_wr_alloc_link_mem(struct smc_link *lnk); | ||
103 | void smc_wr_free_link(struct smc_link *lnk); | ||
104 | void smc_wr_free_link_mem(struct smc_link *lnk); | ||
105 | void smc_wr_remember_qp_attr(struct smc_link *lnk); | ||
106 | void smc_wr_remove_dev(struct smc_ib_device *smcibdev); | ||
107 | void smc_wr_add_dev(struct smc_ib_device *smcibdev); | ||
108 | |||
109 | int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler, | ||
110 | struct smc_wr_buf **wr_buf, | ||
111 | struct smc_rdma_wr **wrs, | ||
112 | struct smc_wr_tx_pend_priv **wr_pend_priv); | ||
113 | int smc_wr_tx_put_slot(struct smc_link *link, | ||
114 | struct smc_wr_tx_pend_priv *wr_pend_priv); | ||
115 | int smc_wr_tx_send(struct smc_link *link, | ||
116 | struct smc_wr_tx_pend_priv *wr_pend_priv); | ||
117 | int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, | ||
118 | unsigned long timeout); | ||
119 | void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context); | ||
120 | void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, | ||
121 | smc_wr_tx_filter filter, | ||
122 | smc_wr_tx_dismisser dismisser, | ||
123 | unsigned long data); | ||
124 | void smc_wr_tx_wait_no_pending_sends(struct smc_link *link); | ||
125 | |||
126 | int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler); | ||
127 | int smc_wr_rx_post_init(struct smc_link *link); | ||
128 | void smc_wr_rx_cq_handler(struct ib_cq *ib_cq, void *cq_context); | ||
129 | int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr); | ||
130 | |||
131 | #endif /* SMC_WR_H */ | ||