From patchwork Mon May 25 12:56:37 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [RHEL7,COMMIT] fs/fuse kio: align CS messages to 512 bytes From: Konstantin Khorenko X-Patchwork-Id: 12796 Message-Id: <202005251256.04PCubXR003041@finist-ce7.sw.ru> To: Ildar Ismagilov Cc: OpenVZ devel Date: Mon, 25 May 2020 15:56:37 +0300 The commit is pushed to "branch-rh7-3.10.0-1127.8.2.vz7.161.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-1127.8.2.vz7.161.1 ------> commit 94fa9d799079e071295ec87761f6df55b94f60b3 Author: Ildar Ismagilov Date: Mon May 25 15:56:37 2020 +0300 fs/fuse kio: align CS messages to 512 bytes CS now receives client messages into large continuous buffers in a batch. And csd_aio and csd_next modes require data buffers to be 512 bytes aligned to use them for O_DIRECT. It means that now every message (and message header too) must be aligned. The message alignment is used only if storage version is greater than or equal to PCS_CS_MSG_ALIGNED_VERSION. https://pmc.acronis.com/browse/VSTOR-33830 Signed-off-by: Ildar Ismagilov Acked-by: Andrey Zaitsev Acked-by: Alexey Kuznetsov --- fs/fuse/kio/pcs/fuse_stat.c | 1 + fs/fuse/kio/pcs/pcs_cs.c | 86 +++++++++++++++++++++++++++++++++++----- fs/fuse/kio/pcs/pcs_cs.h | 3 ++ fs/fuse/kio/pcs/pcs_cs_prot.h | 9 +++++ fs/fuse/kio/pcs/pcs_map.c | 15 ++++--- fs/fuse/kio/pcs/pcs_prot_types.h | 1 + fs/fuse/kio/pcs/pcs_req.h | 2 + 7 files changed, 100 insertions(+), 17 deletions(-) diff --git a/fs/fuse/kio/pcs/fuse_stat.c b/fs/fuse/kio/pcs/fuse_stat.c index 1bcffa9641a36..47e7f1c404eb5 100644 --- a/fs/fuse/kio/pcs/fuse_stat.c +++ b/fs/fuse/kio/pcs/fuse_stat.c @@ -725,6 +725,7 @@ struct fuse_val_stat *req_stat_entry(struct pcs_fuse_io_stat *io, u32 type) return &io->read_bytes; case PCS_CS_WRITE_SYNC_RESP: case PCS_CS_WRITE_RESP: + case PCS_CS_WRITE_AL_RESP: return &io->write_bytes; case PCS_CS_SYNC_RESP: return &io->flush_cnt; diff --git a/fs/fuse/kio/pcs/pcs_cs.c b/fs/fuse/kio/pcs/pcs_cs.c index 9487e614bce8b..abf714620b9c9 100644 --- a/fs/fuse/kio/pcs/pcs_cs.c +++ b/fs/fuse/kio/pcs/pcs_cs.c @@ -82,6 +82,31 @@ static int pcs_cs_percpu_stat_alloc(struct pcs_cs *cs) return -ENOMEM; } +u32 pcs_cs_msg_size(u32 size, u32 storage_version) +{ + if (pcs_cs_use_aligned_io(storage_version)) + size = ALIGN(size, PCS_CS_MSG_ALIGNMENT); + + return size; +} + +struct pcs_msg* pcs_alloc_cs_msg(u32 type, u32 size, u32 storage_version) +{ + struct pcs_msg* msg; + struct pcs_rpc_hdr* h; + + msg = pcs_rpc_alloc_output_msg(pcs_cs_msg_size(size, storage_version)); + if (!msg) + return NULL; + + h = (struct pcs_rpc_hdr*)msg_inline_head(msg); + memset(h, 0, msg->size); + h->len = msg->size; + h->type = type; + + return msg; +} + static void pcs_cs_percpu_stat_free(struct pcs_cs *cs) { free_percpu(cs->stat.sync_ops_rate); @@ -354,6 +379,7 @@ void pcs_cs_update_stat(struct pcs_cs *cs, u32 iolat, u32 netlat, int op_type) switch (op_type) { case PCS_CS_WRITE_SYNC_RESP: case PCS_CS_WRITE_RESP: + case PCS_CS_WRITE_AL_RESP: this_cpu_inc(cs->stat.write_ops_rate->total); break; case PCS_CS_READ_RESP: @@ -549,6 +575,40 @@ static void cs_get_data(struct pcs_msg *msg, int offset, struct iov_iter *it) } } +static void cs_get_data_aligned(struct pcs_msg *msg, int offset, struct iov_iter *it) +{ + struct pcs_int_request * ireq = ireq_from_msg(msg); + int storage_version = atomic_read(&ireq->cc->storage_version); + unsigned hdrsize = pcs_cs_msg_size(sizeof(struct pcs_cs_iohdr), + storage_version); + unsigned padding; + + if (offset < sizeof(struct pcs_cs_iohdr)) { + cs_get_data(msg, offset, it); + return; + } + + if (offset < hdrsize) { + BUILD_BUG_ON(sizeof(ireq->cc->nilbuffer) < PCS_CS_MSG_ALIGNMENT); + iov_iter_init_plain(it, ireq->cc->nilbuffer, hdrsize - offset, 0); + return; + } + + if (offset < hdrsize + ireq->iochunk.size) { + /* cs_get_data() does not know about header padding, so fixup the offset */ + offset -= hdrsize - sizeof(struct pcs_cs_iohdr); + cs_get_data(msg, offset, it); + return; + } + + padding = pcs_cs_msg_size(ireq->iochunk.size, storage_version) - + ireq->iochunk.size; + BUG_ON(offset >= hdrsize + ireq->iochunk.size + padding); + + iov_iter_init_plain(it, ireq->cc->nilbuffer, + hdrsize + ireq->iochunk.size + padding - offset, 0); +} + static void cs_sent(struct pcs_msg *msg) { msg->done = cs_response_done; @@ -565,6 +625,8 @@ void pcs_cs_submit(struct pcs_cs *cs, struct pcs_int_request *ireq) struct pcs_cs_iohdr *ioh; struct pcs_cs_list *csl = ireq->iochunk.csl; struct pcs_map_entry *map = ireq->iochunk.map; /* ireq keeps reference to map */ + int storage_version = atomic_read(&ireq->cc->storage_version); + int aligned_msg; msg->private = cs; @@ -572,15 +634,21 @@ void pcs_cs_submit(struct pcs_cs *cs, struct pcs_int_request *ireq) msg->private2 = ireq; ioh = &ireq->iochunk.hbuf; - ioh->hdr.len = sizeof(struct pcs_cs_iohdr); + ioh->hdr.len = pcs_cs_msg_size(sizeof(struct pcs_cs_iohdr), + storage_version); + aligned_msg = pcs_cs_use_aligned_io(storage_version); switch (ireq->iochunk.cmd) { case PCS_REQ_T_READ: ioh->hdr.type = PCS_CS_READ_REQ; break; case PCS_REQ_T_WRITE: - ioh->hdr.type = (ireq->dentry->fileinfo.attr.attrib & PCS_FATTR_IMMEDIATE_WRITE) ? - PCS_CS_WRITE_SYNC_REQ : PCS_CS_WRITE_REQ; - ioh->hdr.len += ireq->iochunk.size; + if (aligned_msg) + ioh->hdr.type = PCS_CS_WRITE_AL_REQ; + else + ioh->hdr.type = (ireq->dentry->fileinfo.attr.attrib & PCS_FATTR_IMMEDIATE_WRITE) ? + PCS_CS_WRITE_SYNC_REQ : PCS_CS_WRITE_REQ; + ioh->hdr.len = pcs_cs_msg_size(ioh->hdr.len + ireq->iochunk.size, + storage_version); break; case PCS_REQ_T_WRITE_HOLE: ioh->hdr.type = PCS_CS_WRITE_HOLE_REQ; @@ -611,7 +679,7 @@ void pcs_cs_submit(struct pcs_cs *cs, struct pcs_int_request *ireq) msg->rpc = NULL; pcs_clear_error(&msg->error); msg->done = cs_sent; - msg->get_iter = cs_get_data; + msg->get_iter = aligned_msg ? cs_get_data_aligned : cs_get_data; if ((map->state & PCS_MAP_DEAD) || (map->cs_list != csl)) { ireq->error.value = PCS_ERR_CSD_STALE_MAP; @@ -1085,17 +1153,13 @@ static struct pcs_msg *cs_prep_probe(struct pcs_cs *cs) struct pcs_msg *msg; struct pcs_cs_map_prop *m; unsigned int msg_sz = offsetof(struct pcs_cs_map_prop, nodes) + sizeof(struct pcs_cs_node_desc); + int storage_version = atomic_read(&cc_from_csset(cs->css)->storage_version); - - msg = pcs_rpc_alloc_output_msg(msg_sz); + msg = pcs_alloc_cs_msg(PCS_CS_MAP_PROP_REQ, msg_sz, storage_version); if (!msg) return NULL; m = (struct pcs_cs_map_prop *)msg_inline_head(msg); - memset(m, 0, msg_sz); - - m->hdr.h.type = PCS_CS_MAP_PROP_REQ; - m->hdr.h.len = msg_sz; m->flags = CS_MAPF_PING; m->nnodes = 1; diff --git a/fs/fuse/kio/pcs/pcs_cs.h b/fs/fuse/kio/pcs/pcs_cs.h index 5a7bee151be8d..81743fd8a3e11 100644 --- a/fs/fuse/kio/pcs/pcs_cs.h +++ b/fs/fuse/kio/pcs/pcs_cs.h @@ -201,4 +201,7 @@ static inline bool cs_is_blacklisted(struct pcs_cs *cs) void pcs_cs_set_stat_up(struct pcs_cs_set *set); +u32 pcs_cs_msg_size(u32 size, u32 storage_version); +struct pcs_msg* pcs_alloc_cs_msg(u32 type, u32 size, u32 storage_version); + #endif /* _PCS_CS_H_ */ diff --git a/fs/fuse/kio/pcs/pcs_cs_prot.h b/fs/fuse/kio/pcs/pcs_cs_prot.h index 8ca6cbabf7418..12ffbf94cb2e7 100644 --- a/fs/fuse/kio/pcs/pcs_cs_prot.h +++ b/fs/fuse/kio/pcs/pcs_cs_prot.h @@ -6,6 +6,8 @@ #define PCS_CS_FLUSH_WEIGHT (128*1024) #define PCS_CS_HOLE_WEIGHT (4096) +#define PCS_CS_MSG_ALIGNMENT (512ULL) + struct pcs_cs_sync_data { PCS_INTEGRITY_SEQ_T integrity_seq; /* Invariant. Changed only on CS host crash */ @@ -67,6 +69,10 @@ struct pcs_cs_iohdr { struct pcs_cs_sync_resp sync_resp[0]; /* Used only in response to write/sync */ } __attribute__((aligned(8))); +static inline int pcs_cs_use_aligned_io(u32 storage_version) +{ + return (storage_version >= PCS_CS_MSG_ALIGNED_VERSION); +} /* Maximal message size. Actually, random */ #define PCS_CS_MSG_MAX_SIZE (1024*1024 + sizeof(struct pcs_cs_iohdr)) @@ -86,6 +92,9 @@ struct pcs_cs_iohdr { #define PCS_CS_WRITE_SYNC_REQ (PCS_RPC_CS_CLIENT_BASE + 8) #define PCS_CS_WRITE_SYNC_RESP (PCS_CS_WRITE_SYNC_REQ|PCS_RPC_DIRECTION) +#define PCS_CS_WRITE_AL_REQ (PCS_RPC_CS_CLIENT_BASE + 20) +#define PCS_CS_WRITE_AL_RESP (PCS_CS_WRITE_AL_REQ|PCS_RPC_DIRECTION) + struct pcs_cs_cong_notification { struct pcs_rpc_hdr hdr; diff --git a/fs/fuse/kio/pcs/pcs_map.c b/fs/fuse/kio/pcs/pcs_map.c index d70ef8fea70e8..89caac4284a1d 100644 --- a/fs/fuse/kio/pcs/pcs_map.c +++ b/fs/fuse/kio/pcs/pcs_map.c @@ -2575,6 +2575,7 @@ static int commit_cs_record(struct pcs_map_entry * m, struct pcs_cs_record * rec BUG_ON(srec->dirty_integrity && srec->dirty_integrity != sync->integrity_seq); dirtify = (op_type == PCS_CS_WRITE_SYNC_RESP || op_type == PCS_CS_WRITE_RESP || + op_type == PCS_CS_WRITE_AL_RESP || op_type == PCS_CS_WRITE_HOLE_RESP || op_type == PCS_CS_WRITE_ZERO_RESP); /* The following looks scary, could be more clear. * The goal is to update sync seq numbers: @@ -2926,17 +2927,15 @@ static void prepare_map_flush_msg(struct pcs_map_entry * m, struct pcs_int_reque { struct pcs_cs_iohdr * ioh; struct pcs_cs_sync_resp * arr; + unsigned varsize = 0; assert_spin_locked(&m->lock); ioh = (struct pcs_cs_iohdr *)msg->_inline_buffer; arr = (struct pcs_cs_sync_resp *)(ioh + 1); - ioh->hdr.len = sizeof(struct pcs_cs_iohdr); - ioh->hdr.type = PCS_CS_SYNC_REQ; memset(&ioh->sync, 0, sizeof(ioh->sync)); ioh->offset = 0; - ioh->size = 0; ioh->_reserved = 0; ioh->sync.misc = PCS_CS_IO_SEQ; @@ -2959,7 +2958,7 @@ static void prepare_map_flush_msg(struct pcs_map_entry * m, struct pcs_int_reque arr->sync.ts_io = 0; arr->sync.ts_net = 0; arr->sync._reserved = 0; - ioh->hdr.len += sizeof(struct pcs_cs_sync_resp); + varsize += sizeof(struct pcs_cs_sync_resp); FUSE_KLOG(cc_from_maps(m->maps)->fc, LOG_DEBUG5, "fill sync "NODE_FMT" [%d,%d,%d,%d]", NODE_ARGS(arr->cs_id), arr->sync.integrity_seq, arr->sync.sync_epoch, arr->sync.sync_dirty, arr->sync.sync_current); @@ -2967,6 +2966,9 @@ static void prepare_map_flush_msg(struct pcs_map_entry * m, struct pcs_int_reque } } } + ioh->size = varsize; + ioh->hdr.len = pcs_cs_msg_size(sizeof(struct pcs_cs_iohdr) + varsize, + atomic_read(&cc_from_map(m)->storage_version)); msg->size = ioh->hdr.len; msg->private = sreq; msg->done = sync_done; @@ -3019,8 +3021,9 @@ static int prepare_map_flush_ireq(struct pcs_map_entry *m, if (!sreq) goto err_cslist; - msg = pcs_rpc_alloc_output_msg(sizeof(struct pcs_cs_iohdr) + - cslist->nsrv * sizeof(struct pcs_cs_sync_resp)); + msg = pcs_alloc_cs_msg(PCS_CS_SYNC_REQ, sizeof(struct pcs_cs_iohdr) + + cslist->nsrv * sizeof(struct pcs_cs_sync_resp), + atomic_read(&cc_from_map(m)->storage_version)); if (!msg) goto err_ireq; diff --git a/fs/fuse/kio/pcs/pcs_prot_types.h b/fs/fuse/kio/pcs/pcs_prot_types.h index 638b076674678..d48cfc4f0470d 100644 --- a/fs/fuse/kio/pcs/pcs_prot_types.h +++ b/fs/fuse/kio/pcs/pcs_prot_types.h @@ -15,6 +15,7 @@ #define PCS_VERSION_UNKNOWN 0 #define PCS_VZ7_VERSION 100 +#define PCS_CS_MSG_ALIGNED_VERSION 134 /* milliseconds since Jan 1970 */ typedef u64 PCS_FILETIME_T; diff --git a/fs/fuse/kio/pcs/pcs_req.h b/fs/fuse/kio/pcs/pcs_req.h index 33f0fe9e7cb55..722175a1132f6 100644 --- a/fs/fuse/kio/pcs/pcs_req.h +++ b/fs/fuse/kio/pcs/pcs_req.h @@ -239,6 +239,8 @@ struct pcs_cluster_core char cluster_name[NAME_MAX]; atomic_t storage_version; + + char nilbuffer[PCS_CS_MSG_ALIGNMENT]; }; static inline struct pcs_cluster_core *cc_from_csset(struct pcs_cs_set * css)