diff options
Diffstat (limited to 'net/smc/smc_ib.c')
-rw-r--r-- | net/smc/smc_ib.c | 643 |
1 files changed, 643 insertions, 0 deletions
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c new file mode 100644 index 000000000..f1ffbd414 --- /dev/null +++ b/net/smc/smc_ib.c | |||
@@ -0,0 +1,643 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | ||
4 | * | ||
5 | * IB infrastructure: | ||
6 | * Establish SMC-R as an Infiniband Client to be notified about added and | ||
7 | * removed IB devices of type RDMA. | ||
8 | * Determine device and port characteristics for these IB devices. | ||
9 | * | ||
10 | * Copyright IBM Corp. 2016 | ||
11 | * | ||
12 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> | ||
13 | */ | ||
14 | |||
15 | #include <linux/random.h> | ||
16 | #include <linux/workqueue.h> | ||
17 | #include <linux/scatterlist.h> | ||
18 | #include <linux/wait.h> | ||
19 | #include <linux/mutex.h> | ||
20 | #include <rdma/ib_verbs.h> | ||
21 | #include <rdma/ib_cache.h> | ||
22 | |||
23 | #include "smc_pnet.h" | ||
24 | #include "smc_ib.h" | ||
25 | #include "smc_core.h" | ||
26 | #include "smc_wr.h" | ||
27 | #include "smc.h" | ||
28 | |||
29 | #define SMC_MAX_CQE 32766 /* max. # of completion queue elements */ | ||
30 | |||
31 | #define SMC_QP_MIN_RNR_TIMER 5 | ||
32 | #define SMC_QP_TIMEOUT 15 /* 4096 * 2 ** timeout usec */ | ||
33 | #define SMC_QP_RETRY_CNT 7 /* 7: infinite */ | ||
34 | #define SMC_QP_RNR_RETRY 7 /* 7: infinite */ | ||
35 | |||
36 | struct smc_ib_devices smc_ib_devices = { /* smc-registered ib devices */ | ||
37 | .mutex = __MUTEX_INITIALIZER(smc_ib_devices.mutex), | ||
38 | .list = LIST_HEAD_INIT(smc_ib_devices.list), | ||
39 | }; | ||
40 | |||
41 | u8 local_systemid[SMC_SYSTEMID_LEN]; /* unique system identifier */ | ||
42 | |||
43 | static int smc_ib_modify_qp_init(struct smc_link *lnk) | ||
44 | { | ||
45 | struct ib_qp_attr qp_attr; | ||
46 | |||
47 | memset(&qp_attr, 0, sizeof(qp_attr)); | ||
48 | qp_attr.qp_state = IB_QPS_INIT; | ||
49 | qp_attr.pkey_index = 0; | ||
50 | qp_attr.port_num = lnk->ibport; | ||
51 | qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE | ||
52 | | IB_ACCESS_REMOTE_WRITE; | ||
53 | return ib_modify_qp(lnk->roce_qp, &qp_attr, | ||
54 | IB_QP_STATE | IB_QP_PKEY_INDEX | | ||
55 | IB_QP_ACCESS_FLAGS | IB_QP_PORT); | ||
56 | } | ||
57 | |||
58 | static int smc_ib_modify_qp_rtr(struct smc_link *lnk) | ||
59 | { | ||
60 | enum ib_qp_attr_mask qp_attr_mask = | ||
61 | IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | | ||
62 | IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; | ||
63 | struct ib_qp_attr qp_attr; | ||
64 | |||
65 | memset(&qp_attr, 0, sizeof(qp_attr)); | ||
66 | qp_attr.qp_state = IB_QPS_RTR; | ||
67 | qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu); | ||
68 | qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; | ||
69 | rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport); | ||
70 | rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, 1, 0); | ||
71 | rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid); | ||
72 | memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac, | ||
73 | sizeof(lnk->peer_mac)); | ||
74 | qp_attr.dest_qp_num = lnk->peer_qpn; | ||
75 | qp_attr.rq_psn = lnk->peer_psn; /* starting receive packet seq # */ | ||
76 | qp_attr.max_dest_rd_atomic = 1; /* max # of resources for incoming | ||
77 | * requests | ||
78 | */ | ||
79 | qp_attr.min_rnr_timer = SMC_QP_MIN_RNR_TIMER; | ||
80 | |||
81 | return ib_modify_qp(lnk->roce_qp, &qp_attr, qp_attr_mask); | ||
82 | } | ||
83 | |||
84 | int smc_ib_modify_qp_rts(struct smc_link *lnk) | ||
85 | { | ||
86 | struct ib_qp_attr qp_attr; | ||
87 | |||
88 | memset(&qp_attr, 0, sizeof(qp_attr)); | ||
89 | qp_attr.qp_state = IB_QPS_RTS; | ||
90 | qp_attr.timeout = SMC_QP_TIMEOUT; /* local ack timeout */ | ||
91 | qp_attr.retry_cnt = SMC_QP_RETRY_CNT; /* retry count */ | ||
92 | qp_attr.rnr_retry = SMC_QP_RNR_RETRY; /* RNR retries, 7=infinite */ | ||
93 | qp_attr.sq_psn = lnk->psn_initial; /* starting send packet seq # */ | ||
94 | qp_attr.max_rd_atomic = 1; /* # of outstanding RDMA reads and | ||
95 | * atomic ops allowed | ||
96 | */ | ||
97 | return ib_modify_qp(lnk->roce_qp, &qp_attr, | ||
98 | IB_QP_STATE | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | | ||
99 | IB_QP_SQ_PSN | IB_QP_RNR_RETRY | | ||
100 | IB_QP_MAX_QP_RD_ATOMIC); | ||
101 | } | ||
102 | |||
103 | int smc_ib_modify_qp_error(struct smc_link *lnk) | ||
104 | { | ||
105 | struct ib_qp_attr qp_attr; | ||
106 | |||
107 | memset(&qp_attr, 0, sizeof(qp_attr)); | ||
108 | qp_attr.qp_state = IB_QPS_ERR; | ||
109 | return ib_modify_qp(lnk->roce_qp, &qp_attr, IB_QP_STATE); | ||
110 | } | ||
111 | |||
112 | int smc_ib_ready_link(struct smc_link *lnk) | ||
113 | { | ||
114 | struct smc_link_group *lgr = smc_get_lgr(lnk); | ||
115 | int rc = 0; | ||
116 | |||
117 | rc = smc_ib_modify_qp_init(lnk); | ||
118 | if (rc) | ||
119 | goto out; | ||
120 | |||
121 | rc = smc_ib_modify_qp_rtr(lnk); | ||
122 | if (rc) | ||
123 | goto out; | ||
124 | smc_wr_remember_qp_attr(lnk); | ||
125 | rc = ib_req_notify_cq(lnk->smcibdev->roce_cq_recv, | ||
126 | IB_CQ_SOLICITED_MASK); | ||
127 | if (rc) | ||
128 | goto out; | ||
129 | rc = smc_wr_rx_post_init(lnk); | ||
130 | if (rc) | ||
131 | goto out; | ||
132 | smc_wr_remember_qp_attr(lnk); | ||
133 | |||
134 | if (lgr->role == SMC_SERV) { | ||
135 | rc = smc_ib_modify_qp_rts(lnk); | ||
136 | if (rc) | ||
137 | goto out; | ||
138 | smc_wr_remember_qp_attr(lnk); | ||
139 | } | ||
140 | out: | ||
141 | return rc; | ||
142 | } | ||
143 | |||
144 | static int smc_ib_fill_mac(struct smc_ib_device *smcibdev, u8 ibport) | ||
145 | { | ||
146 | const struct ib_gid_attr *attr; | ||
147 | int rc; | ||
148 | |||
149 | attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, 0); | ||
150 | if (IS_ERR(attr)) | ||
151 | return -ENODEV; | ||
152 | |||
153 | rc = rdma_read_gid_l2_fields(attr, NULL, smcibdev->mac[ibport - 1]); | ||
154 | rdma_put_gid_attr(attr); | ||
155 | return rc; | ||
156 | } | ||
157 | |||
158 | /* Create an identifier unique for this instance of SMC-R. | ||
159 | * The MAC-address of the first active registered IB device | ||
160 | * plus a random 2-byte number is used to create this identifier. | ||
161 | * This name is delivered to the peer during connection initialization. | ||
162 | */ | ||
163 | static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev, | ||
164 | u8 ibport) | ||
165 | { | ||
166 | memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1], | ||
167 | sizeof(smcibdev->mac[ibport - 1])); | ||
168 | } | ||
169 | |||
170 | bool smc_ib_is_valid_local_systemid(void) | ||
171 | { | ||
172 | return !is_zero_ether_addr(&local_systemid[2]); | ||
173 | } | ||
174 | |||
175 | static void smc_ib_init_local_systemid(void) | ||
176 | { | ||
177 | get_random_bytes(&local_systemid[0], 2); | ||
178 | } | ||
179 | |||
180 | bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) | ||
181 | { | ||
182 | return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE; | ||
183 | } | ||
184 | |||
185 | /* determine the gid for an ib-device port and vlan id */ | ||
186 | int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, | ||
187 | unsigned short vlan_id, u8 gid[], u8 *sgid_index) | ||
188 | { | ||
189 | const struct ib_gid_attr *attr; | ||
190 | const struct net_device *ndev; | ||
191 | int i; | ||
192 | |||
193 | for (i = 0; i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) { | ||
194 | attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, i); | ||
195 | if (IS_ERR(attr)) | ||
196 | continue; | ||
197 | |||
198 | rcu_read_lock(); | ||
199 | ndev = rdma_read_gid_attr_ndev_rcu(attr); | ||
200 | if (!IS_ERR(ndev) && | ||
201 | ((!vlan_id && !is_vlan_dev(ndev)) || | ||
202 | (vlan_id && is_vlan_dev(ndev) && | ||
203 | vlan_dev_vlan_id(ndev) == vlan_id)) && | ||
204 | attr->gid_type == IB_GID_TYPE_ROCE) { | ||
205 | rcu_read_unlock(); | ||
206 | if (gid) | ||
207 | memcpy(gid, &attr->gid, SMC_GID_SIZE); | ||
208 | if (sgid_index) | ||
209 | *sgid_index = attr->index; | ||
210 | rdma_put_gid_attr(attr); | ||
211 | return 0; | ||
212 | } | ||
213 | rcu_read_unlock(); | ||
214 | rdma_put_gid_attr(attr); | ||
215 | } | ||
216 | return -ENODEV; | ||
217 | } | ||
218 | |||
219 | static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) | ||
220 | { | ||
221 | int rc; | ||
222 | |||
223 | memset(&smcibdev->pattr[ibport - 1], 0, | ||
224 | sizeof(smcibdev->pattr[ibport - 1])); | ||
225 | rc = ib_query_port(smcibdev->ibdev, ibport, | ||
226 | &smcibdev->pattr[ibport - 1]); | ||
227 | if (rc) | ||
228 | goto out; | ||
229 | /* the SMC protocol requires specification of the RoCE MAC address */ | ||
230 | rc = smc_ib_fill_mac(smcibdev, ibport); | ||
231 | if (rc) | ||
232 | goto out; | ||
233 | if (!smc_ib_is_valid_local_systemid() && | ||
234 | smc_ib_port_active(smcibdev, ibport)) | ||
235 | /* create unique system identifier */ | ||
236 | smc_ib_define_local_systemid(smcibdev, ibport); | ||
237 | out: | ||
238 | return rc; | ||
239 | } | ||
240 | |||
241 | /* process context wrapper for might_sleep smc_ib_remember_port_attr */ | ||
242 | static void smc_ib_port_event_work(struct work_struct *work) | ||
243 | { | ||
244 | struct smc_ib_device *smcibdev = container_of( | ||
245 | work, struct smc_ib_device, port_event_work); | ||
246 | u8 port_idx; | ||
247 | |||
248 | for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) { | ||
249 | smc_ib_remember_port_attr(smcibdev, port_idx + 1); | ||
250 | clear_bit(port_idx, &smcibdev->port_event_mask); | ||
251 | if (!smc_ib_port_active(smcibdev, port_idx + 1)) { | ||
252 | set_bit(port_idx, smcibdev->ports_going_away); | ||
253 | smcr_port_err(smcibdev, port_idx + 1); | ||
254 | } else { | ||
255 | clear_bit(port_idx, smcibdev->ports_going_away); | ||
256 | smcr_port_add(smcibdev, port_idx + 1); | ||
257 | } | ||
258 | } | ||
259 | } | ||
260 | |||
261 | /* can be called in IRQ context */ | ||
262 | static void smc_ib_global_event_handler(struct ib_event_handler *handler, | ||
263 | struct ib_event *ibevent) | ||
264 | { | ||
265 | struct smc_ib_device *smcibdev; | ||
266 | bool schedule = false; | ||
267 | u8 port_idx; | ||
268 | |||
269 | smcibdev = container_of(handler, struct smc_ib_device, event_handler); | ||
270 | |||
271 | switch (ibevent->event) { | ||
272 | case IB_EVENT_DEVICE_FATAL: | ||
273 | /* terminate all ports on device */ | ||
274 | for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++) { | ||
275 | set_bit(port_idx, &smcibdev->port_event_mask); | ||
276 | if (!test_and_set_bit(port_idx, | ||
277 | smcibdev->ports_going_away)) | ||
278 | schedule = true; | ||
279 | } | ||
280 | if (schedule) | ||
281 | schedule_work(&smcibdev->port_event_work); | ||
282 | break; | ||
283 | case IB_EVENT_PORT_ACTIVE: | ||
284 | port_idx = ibevent->element.port_num - 1; | ||
285 | if (port_idx >= SMC_MAX_PORTS) | ||
286 | break; | ||
287 | set_bit(port_idx, &smcibdev->port_event_mask); | ||
288 | if (test_and_clear_bit(port_idx, smcibdev->ports_going_away)) | ||
289 | schedule_work(&smcibdev->port_event_work); | ||
290 | break; | ||
291 | case IB_EVENT_PORT_ERR: | ||
292 | port_idx = ibevent->element.port_num - 1; | ||
293 | if (port_idx >= SMC_MAX_PORTS) | ||
294 | break; | ||
295 | set_bit(port_idx, &smcibdev->port_event_mask); | ||
296 | if (!test_and_set_bit(port_idx, smcibdev->ports_going_away)) | ||
297 | schedule_work(&smcibdev->port_event_work); | ||
298 | break; | ||
299 | case IB_EVENT_GID_CHANGE: | ||
300 | port_idx = ibevent->element.port_num - 1; | ||
301 | if (port_idx >= SMC_MAX_PORTS) | ||
302 | break; | ||
303 | set_bit(port_idx, &smcibdev->port_event_mask); | ||
304 | schedule_work(&smcibdev->port_event_work); | ||
305 | break; | ||
306 | default: | ||
307 | break; | ||
308 | } | ||
309 | } | ||
310 | |||
311 | void smc_ib_dealloc_protection_domain(struct smc_link *lnk) | ||
312 | { | ||
313 | if (lnk->roce_pd) | ||
314 | ib_dealloc_pd(lnk->roce_pd); | ||
315 | lnk->roce_pd = NULL; | ||
316 | } | ||
317 | |||
318 | int smc_ib_create_protection_domain(struct smc_link *lnk) | ||
319 | { | ||
320 | int rc; | ||
321 | |||
322 | lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0); | ||
323 | rc = PTR_ERR_OR_ZERO(lnk->roce_pd); | ||
324 | if (IS_ERR(lnk->roce_pd)) | ||
325 | lnk->roce_pd = NULL; | ||
326 | return rc; | ||
327 | } | ||
328 | |||
329 | static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) | ||
330 | { | ||
331 | struct smc_link *lnk = (struct smc_link *)priv; | ||
332 | struct smc_ib_device *smcibdev = lnk->smcibdev; | ||
333 | u8 port_idx; | ||
334 | |||
335 | switch (ibevent->event) { | ||
336 | case IB_EVENT_QP_FATAL: | ||
337 | case IB_EVENT_QP_ACCESS_ERR: | ||
338 | port_idx = ibevent->element.qp->port - 1; | ||
339 | if (port_idx >= SMC_MAX_PORTS) | ||
340 | break; | ||
341 | set_bit(port_idx, &smcibdev->port_event_mask); | ||
342 | if (!test_and_set_bit(port_idx, smcibdev->ports_going_away)) | ||
343 | schedule_work(&smcibdev->port_event_work); | ||
344 | break; | ||
345 | default: | ||
346 | break; | ||
347 | } | ||
348 | } | ||
349 | |||
350 | void smc_ib_destroy_queue_pair(struct smc_link *lnk) | ||
351 | { | ||
352 | if (lnk->roce_qp) | ||
353 | ib_destroy_qp(lnk->roce_qp); | ||
354 | lnk->roce_qp = NULL; | ||
355 | } | ||
356 | |||
357 | /* create a queue pair within the protection domain for a link */ | ||
358 | int smc_ib_create_queue_pair(struct smc_link *lnk) | ||
359 | { | ||
360 | struct ib_qp_init_attr qp_attr = { | ||
361 | .event_handler = smc_ib_qp_event_handler, | ||
362 | .qp_context = lnk, | ||
363 | .send_cq = lnk->smcibdev->roce_cq_send, | ||
364 | .recv_cq = lnk->smcibdev->roce_cq_recv, | ||
365 | .srq = NULL, | ||
366 | .cap = { | ||
367 | /* include unsolicited rdma_writes as well, | ||
368 | * there are max. 2 RDMA_WRITE per 1 WR_SEND | ||
369 | */ | ||
370 | .max_send_wr = SMC_WR_BUF_CNT * 3, | ||
371 | .max_recv_wr = SMC_WR_BUF_CNT * 3, | ||
372 | .max_send_sge = SMC_IB_MAX_SEND_SGE, | ||
373 | .max_recv_sge = 1, | ||
374 | }, | ||
375 | .sq_sig_type = IB_SIGNAL_REQ_WR, | ||
376 | .qp_type = IB_QPT_RC, | ||
377 | }; | ||
378 | int rc; | ||
379 | |||
380 | lnk->roce_qp = ib_create_qp(lnk->roce_pd, &qp_attr); | ||
381 | rc = PTR_ERR_OR_ZERO(lnk->roce_qp); | ||
382 | if (IS_ERR(lnk->roce_qp)) | ||
383 | lnk->roce_qp = NULL; | ||
384 | else | ||
385 | smc_wr_remember_qp_attr(lnk); | ||
386 | return rc; | ||
387 | } | ||
388 | |||
389 | void smc_ib_put_memory_region(struct ib_mr *mr) | ||
390 | { | ||
391 | ib_dereg_mr(mr); | ||
392 | } | ||
393 | |||
394 | static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot, u8 link_idx) | ||
395 | { | ||
396 | unsigned int offset = 0; | ||
397 | int sg_num; | ||
398 | |||
399 | /* map the largest prefix of a dma mapped SG list */ | ||
400 | sg_num = ib_map_mr_sg(buf_slot->mr_rx[link_idx], | ||
401 | buf_slot->sgt[link_idx].sgl, | ||
402 | buf_slot->sgt[link_idx].orig_nents, | ||
403 | &offset, PAGE_SIZE); | ||
404 | |||
405 | return sg_num; | ||
406 | } | ||
407 | |||
408 | /* Allocate a memory region and map the dma mapped SG list of buf_slot */ | ||
409 | int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, | ||
410 | struct smc_buf_desc *buf_slot, u8 link_idx) | ||
411 | { | ||
412 | if (buf_slot->mr_rx[link_idx]) | ||
413 | return 0; /* already done */ | ||
414 | |||
415 | buf_slot->mr_rx[link_idx] = | ||
416 | ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order); | ||
417 | if (IS_ERR(buf_slot->mr_rx[link_idx])) { | ||
418 | int rc; | ||
419 | |||
420 | rc = PTR_ERR(buf_slot->mr_rx[link_idx]); | ||
421 | buf_slot->mr_rx[link_idx] = NULL; | ||
422 | return rc; | ||
423 | } | ||
424 | |||
425 | if (smc_ib_map_mr_sg(buf_slot, link_idx) != 1) | ||
426 | return -EINVAL; | ||
427 | |||
428 | return 0; | ||
429 | } | ||
430 | |||
431 | /* synchronize buffer usage for cpu access */ | ||
432 | void smc_ib_sync_sg_for_cpu(struct smc_link *lnk, | ||
433 | struct smc_buf_desc *buf_slot, | ||
434 | enum dma_data_direction data_direction) | ||
435 | { | ||
436 | struct scatterlist *sg; | ||
437 | unsigned int i; | ||
438 | |||
439 | /* for now there is just one DMA address */ | ||
440 | for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg, | ||
441 | buf_slot->sgt[lnk->link_idx].nents, i) { | ||
442 | if (!sg_dma_len(sg)) | ||
443 | break; | ||
444 | ib_dma_sync_single_for_cpu(lnk->smcibdev->ibdev, | ||
445 | sg_dma_address(sg), | ||
446 | sg_dma_len(sg), | ||
447 | data_direction); | ||
448 | } | ||
449 | } | ||
450 | |||
451 | /* synchronize buffer usage for device access */ | ||
452 | void smc_ib_sync_sg_for_device(struct smc_link *lnk, | ||
453 | struct smc_buf_desc *buf_slot, | ||
454 | enum dma_data_direction data_direction) | ||
455 | { | ||
456 | struct scatterlist *sg; | ||
457 | unsigned int i; | ||
458 | |||
459 | /* for now there is just one DMA address */ | ||
460 | for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg, | ||
461 | buf_slot->sgt[lnk->link_idx].nents, i) { | ||
462 | if (!sg_dma_len(sg)) | ||
463 | break; | ||
464 | ib_dma_sync_single_for_device(lnk->smcibdev->ibdev, | ||
465 | sg_dma_address(sg), | ||
466 | sg_dma_len(sg), | ||
467 | data_direction); | ||
468 | } | ||
469 | } | ||
470 | |||
471 | /* Map a new TX or RX buffer SG-table to DMA */ | ||
472 | int smc_ib_buf_map_sg(struct smc_link *lnk, | ||
473 | struct smc_buf_desc *buf_slot, | ||
474 | enum dma_data_direction data_direction) | ||
475 | { | ||
476 | int mapped_nents; | ||
477 | |||
478 | mapped_nents = ib_dma_map_sg(lnk->smcibdev->ibdev, | ||
479 | buf_slot->sgt[lnk->link_idx].sgl, | ||
480 | buf_slot->sgt[lnk->link_idx].orig_nents, | ||
481 | data_direction); | ||
482 | if (!mapped_nents) | ||
483 | return -ENOMEM; | ||
484 | |||
485 | return mapped_nents; | ||
486 | } | ||
487 | |||
488 | void smc_ib_buf_unmap_sg(struct smc_link *lnk, | ||
489 | struct smc_buf_desc *buf_slot, | ||
490 | enum dma_data_direction data_direction) | ||
491 | { | ||
492 | if (!buf_slot->sgt[lnk->link_idx].sgl->dma_address) | ||
493 | return; /* already unmapped */ | ||
494 | |||
495 | ib_dma_unmap_sg(lnk->smcibdev->ibdev, | ||
496 | buf_slot->sgt[lnk->link_idx].sgl, | ||
497 | buf_slot->sgt[lnk->link_idx].orig_nents, | ||
498 | data_direction); | ||
499 | buf_slot->sgt[lnk->link_idx].sgl->dma_address = 0; | ||
500 | } | ||
501 | |||
502 | long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) | ||
503 | { | ||
504 | struct ib_cq_init_attr cqattr = { | ||
505 | .cqe = SMC_MAX_CQE, .comp_vector = 0 }; | ||
506 | int cqe_size_order, smc_order; | ||
507 | long rc; | ||
508 | |||
509 | mutex_lock(&smcibdev->mutex); | ||
510 | rc = 0; | ||
511 | if (smcibdev->initialized) | ||
512 | goto out; | ||
513 | /* the calculated number of cq entries fits to mlx5 cq allocation */ | ||
514 | cqe_size_order = cache_line_size() == 128 ? 7 : 6; | ||
515 | smc_order = MAX_ORDER - cqe_size_order - 1; | ||
516 | if (SMC_MAX_CQE + 2 > (0x00000001 << smc_order) * PAGE_SIZE) | ||
517 | cqattr.cqe = (0x00000001 << smc_order) * PAGE_SIZE - 2; | ||
518 | smcibdev->roce_cq_send = ib_create_cq(smcibdev->ibdev, | ||
519 | smc_wr_tx_cq_handler, NULL, | ||
520 | smcibdev, &cqattr); | ||
521 | rc = PTR_ERR_OR_ZERO(smcibdev->roce_cq_send); | ||
522 | if (IS_ERR(smcibdev->roce_cq_send)) { | ||
523 | smcibdev->roce_cq_send = NULL; | ||
524 | goto out; | ||
525 | } | ||
526 | smcibdev->roce_cq_recv = ib_create_cq(smcibdev->ibdev, | ||
527 | smc_wr_rx_cq_handler, NULL, | ||
528 | smcibdev, &cqattr); | ||
529 | rc = PTR_ERR_OR_ZERO(smcibdev->roce_cq_recv); | ||
530 | if (IS_ERR(smcibdev->roce_cq_recv)) { | ||
531 | smcibdev->roce_cq_recv = NULL; | ||
532 | goto err; | ||
533 | } | ||
534 | smc_wr_add_dev(smcibdev); | ||
535 | smcibdev->initialized = 1; | ||
536 | goto out; | ||
537 | |||
538 | err: | ||
539 | ib_destroy_cq(smcibdev->roce_cq_send); | ||
540 | out: | ||
541 | mutex_unlock(&smcibdev->mutex); | ||
542 | return rc; | ||
543 | } | ||
544 | |||
545 | static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev) | ||
546 | { | ||
547 | mutex_lock(&smcibdev->mutex); | ||
548 | if (!smcibdev->initialized) | ||
549 | goto out; | ||
550 | smcibdev->initialized = 0; | ||
551 | ib_destroy_cq(smcibdev->roce_cq_recv); | ||
552 | ib_destroy_cq(smcibdev->roce_cq_send); | ||
553 | smc_wr_remove_dev(smcibdev); | ||
554 | out: | ||
555 | mutex_unlock(&smcibdev->mutex); | ||
556 | } | ||
557 | |||
558 | static struct ib_client smc_ib_client; | ||
559 | |||
560 | /* callback function for ib_register_client() */ | ||
561 | static int smc_ib_add_dev(struct ib_device *ibdev) | ||
562 | { | ||
563 | struct smc_ib_device *smcibdev; | ||
564 | u8 port_cnt; | ||
565 | int i; | ||
566 | |||
567 | if (ibdev->node_type != RDMA_NODE_IB_CA) | ||
568 | return -EOPNOTSUPP; | ||
569 | |||
570 | smcibdev = kzalloc(sizeof(*smcibdev), GFP_KERNEL); | ||
571 | if (!smcibdev) | ||
572 | return -ENOMEM; | ||
573 | |||
574 | smcibdev->ibdev = ibdev; | ||
575 | INIT_WORK(&smcibdev->port_event_work, smc_ib_port_event_work); | ||
576 | atomic_set(&smcibdev->lnk_cnt, 0); | ||
577 | init_waitqueue_head(&smcibdev->lnks_deleted); | ||
578 | mutex_init(&smcibdev->mutex); | ||
579 | mutex_lock(&smc_ib_devices.mutex); | ||
580 | list_add_tail(&smcibdev->list, &smc_ib_devices.list); | ||
581 | mutex_unlock(&smc_ib_devices.mutex); | ||
582 | ib_set_client_data(ibdev, &smc_ib_client, smcibdev); | ||
583 | INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev, | ||
584 | smc_ib_global_event_handler); | ||
585 | ib_register_event_handler(&smcibdev->event_handler); | ||
586 | |||
587 | /* trigger reading of the port attributes */ | ||
588 | port_cnt = smcibdev->ibdev->phys_port_cnt; | ||
589 | pr_warn_ratelimited("smc: adding ib device %s with port count %d\n", | ||
590 | smcibdev->ibdev->name, port_cnt); | ||
591 | for (i = 0; | ||
592 | i < min_t(size_t, port_cnt, SMC_MAX_PORTS); | ||
593 | i++) { | ||
594 | set_bit(i, &smcibdev->port_event_mask); | ||
595 | /* determine pnetids of the port */ | ||
596 | if (smc_pnetid_by_dev_port(ibdev->dev.parent, i, | ||
597 | smcibdev->pnetid[i])) | ||
598 | smc_pnetid_by_table_ib(smcibdev, i + 1); | ||
599 | pr_warn_ratelimited("smc: ib device %s port %d has pnetid " | ||
600 | "%.16s%s\n", | ||
601 | smcibdev->ibdev->name, i + 1, | ||
602 | smcibdev->pnetid[i], | ||
603 | smcibdev->pnetid_by_user[i] ? | ||
604 | " (user defined)" : | ||
605 | ""); | ||
606 | } | ||
607 | schedule_work(&smcibdev->port_event_work); | ||
608 | return 0; | ||
609 | } | ||
610 | |||
611 | /* callback function for ib_unregister_client() */ | ||
612 | static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) | ||
613 | { | ||
614 | struct smc_ib_device *smcibdev = client_data; | ||
615 | |||
616 | mutex_lock(&smc_ib_devices.mutex); | ||
617 | list_del_init(&smcibdev->list); /* remove from smc_ib_devices */ | ||
618 | mutex_unlock(&smc_ib_devices.mutex); | ||
619 | pr_warn_ratelimited("smc: removing ib device %s\n", | ||
620 | smcibdev->ibdev->name); | ||
621 | smc_smcr_terminate_all(smcibdev); | ||
622 | smc_ib_cleanup_per_ibdev(smcibdev); | ||
623 | ib_unregister_event_handler(&smcibdev->event_handler); | ||
624 | cancel_work_sync(&smcibdev->port_event_work); | ||
625 | kfree(smcibdev); | ||
626 | } | ||
627 | |||
628 | static struct ib_client smc_ib_client = { | ||
629 | .name = "smc_ib", | ||
630 | .add = smc_ib_add_dev, | ||
631 | .remove = smc_ib_remove_dev, | ||
632 | }; | ||
633 | |||
634 | int __init smc_ib_register_client(void) | ||
635 | { | ||
636 | smc_ib_init_local_systemid(); | ||
637 | return ib_register_client(&smc_ib_client); | ||
638 | } | ||
639 | |||
640 | void smc_ib_unregister_client(void) | ||
641 | { | ||
642 | ib_unregister_client(&smc_ib_client); | ||
643 | } | ||