kernel_samsung_a53x/drivers/infiniband/ulp/iser/iser_memory.c
Sergey Gorenko 0332ae0092 IB/iser: Prevent invalidating wrong MR
[ Upstream commit 2f1888281e67205bd80d3e8f54dbd519a9653f26 ]

The iser_reg_resources structure has two pointers to MR but only one
mr_valid field. The implementation assumes that we use only *sig_mr when
pi_enable is true. Otherwise, we use only *mr. However, it is only
sometimes correct. Read commands without protection information occur even
when pi_enble is true. For example, the following SCSI commands have a
Data-In buffer but never have protection information: READ CAPACITY (16),
INQUIRY, MODE SENSE(6), MAINTENANCE IN. So, we use
*sig_mr for some SCSI commands and *mr for the other SCSI commands.

In most cases, it works fine because the remote invalidation is applied.
However, there are two cases when the remote invalidation is not
applicable.
 1. Small write commands when all data is sent as an immediate.
 2. The target does not support the remote invalidation feature.

The lazy invalidation is used if the remote invalidation is impossible.
Since, at the lazy invalidation, we always invalidate the MR we want to
use, the wrong MR may be invalidated.

To fix the issue, we need a field per MR that indicates the MR needs
invalidation. Since the ib_mr structure already has such a field, let's
use ib_mr.need_inval instead of iser_reg_resources.mr_valid.

Fixes: b76a439982f8 ("IB/iser: Use IB_WR_REG_MR_INTEGRITY for PI handover")
Link: https://lore.kernel.org/r/20231219072311.40989-1-sergeygo@nvidia.com
Acked-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Signed-off-by: Sergey Gorenko <sergeygo@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
2024-11-18 12:12:43 +01:00

393 lines
11 KiB
C
Executable file

/*
* Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
* Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/scatterlist.h>
#include "iscsi_iser.h"
void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc)
{
iser_err_comp(wc, "memreg");
}
static struct iser_fr_desc *
iser_reg_desc_get_fr(struct ib_conn *ib_conn)
{
struct iser_fr_pool *fr_pool = &ib_conn->fr_pool;
struct iser_fr_desc *desc;
unsigned long flags;
spin_lock_irqsave(&fr_pool->lock, flags);
desc = list_first_entry(&fr_pool->list,
struct iser_fr_desc, list);
list_del(&desc->list);
spin_unlock_irqrestore(&fr_pool->lock, flags);
return desc;
}
static void
iser_reg_desc_put_fr(struct ib_conn *ib_conn,
struct iser_fr_desc *desc)
{
struct iser_fr_pool *fr_pool = &ib_conn->fr_pool;
unsigned long flags;
spin_lock_irqsave(&fr_pool->lock, flags);
list_add(&desc->list, &fr_pool->list);
spin_unlock_irqrestore(&fr_pool->lock, flags);
}
int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
struct iser_data_buf *data,
enum iser_data_dir iser_dir,
enum dma_data_direction dma_dir)
{
struct ib_device *dev;
iser_task->dir[iser_dir] = 1;
dev = iser_task->iser_conn->ib_conn.device->ib_device;
data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, dma_dir);
if (unlikely(data->dma_nents == 0)) {
iser_err("dma_map_sg failed!!!\n");
return -EINVAL;
}
return 0;
}
void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
struct iser_data_buf *data,
enum dma_data_direction dir)
{
struct ib_device *dev;
dev = iser_task->iser_conn->ib_conn.device->ib_device;
ib_dma_unmap_sg(dev, data->sg, data->size, dir);
}
static int
iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
struct iser_mem_reg *reg)
{
struct scatterlist *sg = mem->sg;
reg->sge.lkey = device->pd->local_dma_lkey;
/*
* FIXME: rework the registration code path to differentiate
* rkey/lkey use cases
*/
if (device->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)
reg->rkey = device->pd->unsafe_global_rkey;
else
reg->rkey = 0;
reg->sge.addr = sg_dma_address(&sg[0]);
reg->sge.length = sg_dma_len(&sg[0]);
iser_dbg("Single DMA entry: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
" length=0x%x\n", reg->sge.lkey, reg->rkey,
reg->sge.addr, reg->sge.length);
return 0;
}
void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
struct iser_fr_desc *desc;
struct ib_mr_status mr_status;
desc = reg->mem_h;
if (!desc)
return;
/*
* The signature MR cannot be invalidated and reused without checking.
* libiscsi calls the check_protection transport handler only if
* SCSI-Response is received. And the signature MR is not checked if
* the task is completed for some other reason like a timeout or error
* handling. That's why we must check the signature MR here before
* putting it to the free pool.
*/
if (unlikely(desc->sig_protected)) {
desc->sig_protected = false;
ib_check_mr_status(desc->rsc.sig_mr, IB_MR_CHECK_SIG_STATUS,
&mr_status);
}
iser_reg_desc_put_fr(&iser_task->iser_conn->ib_conn, reg->mem_h);
reg->mem_h = NULL;
}
static void
iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_domain *domain)
{
domain->sig_type = IB_SIG_TYPE_T10_DIF;
domain->sig.dif.pi_interval = scsi_prot_interval(sc);
domain->sig.dif.ref_tag = t10_pi_ref_tag(sc->request);
/*
* At the moment we hard code those, but in the future
* we will take them from sc.
*/
domain->sig.dif.apptag_check_mask = 0xffff;
domain->sig.dif.app_escape = true;
domain->sig.dif.ref_escape = true;
if (sc->prot_flags & SCSI_PROT_REF_INCREMENT)
domain->sig.dif.ref_remap = true;
};
static int
iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
{
switch (scsi_get_prot_op(sc)) {
case SCSI_PROT_WRITE_INSERT:
case SCSI_PROT_READ_STRIP:
sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
iser_set_dif_domain(sc, &sig_attrs->wire);
sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
break;
case SCSI_PROT_READ_INSERT:
case SCSI_PROT_WRITE_STRIP:
sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
iser_set_dif_domain(sc, &sig_attrs->mem);
sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
IB_T10DIF_CSUM : IB_T10DIF_CRC;
break;
case SCSI_PROT_READ_PASS:
case SCSI_PROT_WRITE_PASS:
iser_set_dif_domain(sc, &sig_attrs->wire);
sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
iser_set_dif_domain(sc, &sig_attrs->mem);
sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
IB_T10DIF_CSUM : IB_T10DIF_CRC;
break;
default:
iser_err("Unsupported PI operation %d\n",
scsi_get_prot_op(sc));
return -EINVAL;
}
return 0;
}
static inline void
iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
{
*mask = 0;
if (sc->prot_flags & SCSI_PROT_REF_CHECK)
*mask |= IB_SIG_CHECK_REFTAG;
if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
*mask |= IB_SIG_CHECK_GUARD;
}
static inline void
iser_inv_rkey(struct ib_send_wr *inv_wr,
struct ib_mr *mr,
struct ib_cqe *cqe,
struct ib_send_wr *next_wr)
{
inv_wr->opcode = IB_WR_LOCAL_INV;
inv_wr->wr_cqe = cqe;
inv_wr->ex.invalidate_rkey = mr->rkey;
inv_wr->send_flags = 0;
inv_wr->num_sge = 0;
inv_wr->next = next_wr;
}
static int
iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
struct iser_data_buf *mem,
struct iser_data_buf *sig_mem,
struct iser_reg_resources *rsc,
struct iser_mem_reg *sig_reg)
{
struct iser_tx_desc *tx_desc = &iser_task->desc;
struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
struct ib_mr *mr = rsc->sig_mr;
struct ib_sig_attrs *sig_attrs = mr->sig_attrs;
struct ib_reg_wr *wr = &tx_desc->reg_wr;
int ret;
memset(sig_attrs, 0, sizeof(*sig_attrs));
ret = iser_set_sig_attrs(iser_task->sc, sig_attrs);
if (ret)
goto err;
iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask);
if (rsc->sig_mr->need_inval)
iser_inv_rkey(&tx_desc->inv_wr, mr, cqe, &wr->wr);
ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
ret = ib_map_mr_sg_pi(mr, mem->sg, mem->dma_nents, NULL,
sig_mem->sg, sig_mem->dma_nents, NULL, SZ_4K);
if (unlikely(ret)) {
iser_err("failed to map PI sg (%d)\n",
mem->dma_nents + sig_mem->dma_nents);
goto err;
}
memset(wr, 0, sizeof(*wr));
wr->wr.next = &tx_desc->send_wr;
wr->wr.opcode = IB_WR_REG_MR_INTEGRITY;
wr->wr.wr_cqe = cqe;
wr->wr.num_sge = 0;
wr->wr.send_flags = 0;
wr->mr = mr;
wr->key = mr->rkey;
wr->access = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE;
rsc->sig_mr->need_inval = true;
sig_reg->sge.lkey = mr->lkey;
sig_reg->rkey = mr->rkey;
sig_reg->sge.addr = mr->iova;
sig_reg->sge.length = mr->length;
iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=%u\n",
sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr,
sig_reg->sge.length);
err:
return ret;
}
static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
struct iser_data_buf *mem,
struct iser_reg_resources *rsc,
struct iser_mem_reg *reg)
{
struct iser_tx_desc *tx_desc = &iser_task->desc;
struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
struct ib_mr *mr = rsc->mr;
struct ib_reg_wr *wr = &tx_desc->reg_wr;
int n;
if (rsc->mr->need_inval)
iser_inv_rkey(&tx_desc->inv_wr, mr, cqe, &wr->wr);
ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
n = ib_map_mr_sg(mr, mem->sg, mem->dma_nents, NULL, SZ_4K);
if (unlikely(n != mem->dma_nents)) {
iser_err("failed to map sg (%d/%d)\n",
n, mem->dma_nents);
return n < 0 ? n : -EINVAL;
}
wr->wr.next = &tx_desc->send_wr;
wr->wr.opcode = IB_WR_REG_MR;
wr->wr.wr_cqe = cqe;
wr->wr.send_flags = 0;
wr->wr.num_sge = 0;
wr->mr = mr;
wr->key = mr->rkey;
wr->access = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ;
rsc->mr->need_inval = true;
reg->sge.lkey = mr->lkey;
reg->rkey = mr->rkey;
reg->sge.addr = mr->iova;
reg->sge.length = mr->length;
iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=0x%x\n",
reg->sge.lkey, reg->rkey, reg->sge.addr, reg->sge.length);
return 0;
}
static int
iser_reg_data_sg(struct iscsi_iser_task *task,
struct iser_data_buf *mem,
struct iser_fr_desc *desc,
bool use_dma_key,
struct iser_mem_reg *reg)
{
struct iser_device *device = task->iser_conn->ib_conn.device;
if (use_dma_key)
return iser_reg_dma(device, mem, reg);
return iser_fast_reg_mr(task, mem, &desc->rsc, reg);
}
int iser_reg_mem_fastreg(struct iscsi_iser_task *task,
enum iser_data_dir dir,
bool all_imm)
{
struct ib_conn *ib_conn = &task->iser_conn->ib_conn;
struct iser_data_buf *mem = &task->data[dir];
struct iser_mem_reg *reg = &task->rdma_reg[dir];
struct iser_fr_desc *desc = NULL;
bool use_dma_key;
int err;
use_dma_key = mem->dma_nents == 1 && (all_imm || !iser_always_reg) &&
scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL;
if (!use_dma_key) {
desc = iser_reg_desc_get_fr(ib_conn);
reg->mem_h = desc;
}
if (scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL) {
err = iser_reg_data_sg(task, mem, desc, use_dma_key, reg);
if (unlikely(err))
goto err_reg;
} else {
err = iser_reg_sig_mr(task, mem, &task->prot[dir],
&desc->rsc, reg);
if (unlikely(err))
goto err_reg;
desc->sig_protected = true;
}
return 0;
err_reg:
if (desc)
iser_reg_desc_put_fr(ib_conn, desc);
return err;
}