Last active
January 29, 2019 01:12
-
-
Save ggouaillardet/332c3bff74f88aac756e3fc076bbb346 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| diff --git a/ompi/datatype/ompi_datatype_args.c b/ompi/datatype/ompi_datatype_args.c | |
| index 737d3e5..cc50bc9 100644 | |
| --- a/ompi/datatype/ompi_datatype_args.c | |
| +++ b/ompi/datatype/ompi_datatype_args.c | |
| @@ -13,8 +13,8 @@ | |
| * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. | |
| * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights | |
| * reserved. | |
| - * Copyright (c) 2015-2017 Research Organization for Information Science | |
| - * and Technology (RIST). All rights reserved. | |
| + * Copyright (c) 2015-2019 Research Organization for Information Science | |
| + * and Technology (RIST). All rights reserved. | |
| * Copyright (c) 2017 IBM Corporation. All rights reserved. | |
| * $COPYRIGHT$ | |
| * | |
| @@ -237,6 +237,8 @@ int32_t ompi_datatype_set_args( ompi_datatype_t* pData, | |
| */ | |
| OBJ_RETAIN( d[pos] ); | |
| pArgs->total_pack_size += ((ompi_datatype_args_t*)d[pos]->args)->total_pack_size; | |
| + } else { | |
| + pArgs->total_pack_size += sizeof(int); /* _NAMED */ | |
| } | |
| pArgs->total_pack_size += sizeof(int); /* each data has an ID */ | |
| } | |
| diff --git a/ompi/mca/osc/rdma/osc_rdma_accumulate.c b/ompi/mca/osc/rdma/osc_rdma_accumulate.c | |
| index 31c3fc2..e582e0d 100644 | |
| --- a/ompi/mca/osc/rdma/osc_rdma_accumulate.c | |
| +++ b/ompi/mca/osc/rdma/osc_rdma_accumulate.c | |
| @@ -2,9 +2,11 @@ | |
| /* | |
| * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights | |
| * reserved. | |
| - * Copyright (c) 2016-2017 Research Organization for Information Science | |
| - * and Technology (RIST). All rights reserved. | |
| + * Copyright (c) 2016-2019 Research Organization for Information Science | |
| + * and Technology (RIST). All rights reserved. | |
| * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. | |
| + * Copyright (c) 2019 Triad National Security, LLC. All rights | |
| + * reserved. | |
| * $COPYRIGHT$ | |
| * | |
| * Additional copyrights may follow | |
| @@ -50,71 +52,6 @@ struct ompi_osc_rdma_event_t { | |
| typedef struct ompi_osc_rdma_event_t ompi_osc_rdma_event_t; | |
| -#if 0 | |
| -static void *ompi_osc_rdma_event_put (int fd, int flags, void *context) | |
| -{ | |
| - ompi_osc_rdma_event_t *event = (ompi_osc_rdma_event_t *) context; | |
| - int ret; | |
| - | |
| - ret = event->module->selected_btl->btl_put (event->module->selected_btl, event->endpoint, event->local_address, | |
| - event->remote_address, event->local_handle, event->remote_handle, | |
| - event->length, 0, MCA_BTL_NO_ORDER, event->cbfunc, event->cbcontext, | |
| - event->cbdata); | |
| - if (OPAL_LIKELY(OPAL_SUCCESS == ret)) { | |
| - /* done with this event */ | |
| - opal_event_del (&event->super); | |
| - free (event); | |
| - } else { | |
| - /* re-activate the event */ | |
| - opal_event_active (&event->super, OPAL_EV_READ, 1); | |
| - } | |
| - | |
| - return NULL; | |
| -} | |
| - | |
| -static int ompi_osc_rdma_event_queue (ompi_osc_rdma_module_t *module, struct mca_btl_base_endpoint_t *endpoint, | |
| - ompi_osc_rdma_event_type_t event_type, void *local_address, mca_btl_base_registration_handle_t *local_handle, | |
| - uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle, | |
| - uint64_t length, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, | |
| - void *cbdata) | |
| -{ | |
| - ompi_osc_rdma_event_t *event = malloc (sizeof (*event)); | |
| - void *(*event_func) (int, int, void *); | |
| - | |
| - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "queueing event type %d", event_type); | |
| - | |
| - if (OPAL_UNLIKELY(NULL == event)) { | |
| - return OMPI_ERR_OUT_OF_RESOURCE; | |
| - } | |
| - | |
| - event->module = module; | |
| - event->endpoint = endpoint; | |
| - event->local_address = local_address; | |
| - event->local_handle = local_handle; | |
| - event->remote_address = remote_address; | |
| - event->remote_handle = remote_handle; | |
| - event->length = length; | |
| - event->cbfunc = cbfunc; | |
| - event->cbcontext = cbcontext; | |
| - event->cbdata = cbdata; | |
| - | |
| - switch (event_type) { | |
| - case OMPI_OSC_RDMA_EVENT_TYPE_PUT: | |
| - event_func = ompi_osc_rdma_event_put; | |
| - break; | |
| - default: | |
| - opal_output(0, "osc/rdma: cannot queue unknown event type %d", event_type); | |
| - abort (); | |
| - } | |
| - | |
| - opal_event_set (opal_sync_event_base, &event->super, -1, OPAL_EV_READ, | |
| - event_func, event); | |
| - opal_event_active (&event->super, OPAL_EV_READ, 1); | |
| - | |
| - return OMPI_SUCCESS; | |
| -} | |
| -#endif | |
| - | |
| static int ompi_osc_rdma_gacc_local (const void *source_buffer, int source_count, ompi_datatype_t *source_datatype, | |
| void *result_buffer, int result_count, ompi_datatype_t *result_datatype, | |
| ompi_osc_rdma_peer_t *peer, uint64_t target_address, | |
| @@ -127,7 +64,7 @@ static int ompi_osc_rdma_gacc_local (const void *source_buffer, int source_count | |
| do { | |
| OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "performing accumulate with local region(s)"); | |
| - if (NULL != result_buffer) { | |
| + if (NULL != result_datatype) { | |
| /* get accumulate */ | |
| ret = ompi_datatype_sndrcv ((void *) (intptr_t) target_address, target_count, target_datatype, | |
| @@ -184,7 +121,8 @@ static inline int ompi_osc_rdma_cas_local (const void *source_addr, const void * | |
| static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const void *source, int source_count, | |
| ompi_datatype_t *source_datatype, void *result, int result_count, | |
| - ompi_datatype_t *result_datatype, ompi_osc_rdma_peer_t *peer, uint64_t target_address, | |
| + ompi_datatype_t *result_datatype, opal_convertor_t *result_convertor, | |
| + ompi_osc_rdma_peer_t *peer, uint64_t target_address, | |
| mca_btl_base_registration_handle_t *target_handle, int target_count, | |
| ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_osc_rdma_request_t *request) | |
| { | |
| @@ -219,8 +157,7 @@ static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const v | |
| uint32_t iov_count = 1; | |
| size_t size = request->len; | |
| - opal_convertor_unpack (&request->convertor, &iov, &iov_count, &size); | |
| - opal_convertor_cleanup (&request->convertor); | |
| + opal_convertor_unpack (result_convertor, &iov, &iov_count, &size); | |
| } else { | |
| /* copy contiguous data to the result buffer */ | |
| ompi_datatype_sndrcv (ptr, len, MPI_BYTE, result, result_count, result_datatype); | |
| @@ -264,7 +201,7 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v | |
| struct iovec source_iovec[OMPI_OSC_RDMA_DECODE_MAX], target_iovec[OMPI_OSC_RDMA_DECODE_MAX]; | |
| const size_t acc_limit = (mca_osc_rdma_component.buffer_size >> 3); | |
| uint32_t source_primitive_count, target_primitive_count; | |
| - opal_convertor_t source_convertor, target_convertor; | |
| + opal_convertor_t source_convertor, target_convertor, result_convertor; | |
| uint32_t source_iov_count, target_iov_count; | |
| uint32_t source_iov_index, target_iov_index; | |
| ompi_datatype_t *source_primitive, *target_primitive; | |
| @@ -281,6 +218,13 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v | |
| request->internal = true; | |
| } | |
| + if (&ompi_mpi_op_no_op.op == op) { | |
| + /* NTH: just zero these out to catch any coding errors (they should be ignored in the no-op case) */ | |
| + source_count = 0; | |
| + source_datatype = NULL; | |
| + source_addr = NULL; | |
| + } | |
| + | |
| request->cleanup = ompi_osc_rdma_gacc_master_cleanup; | |
| request->type = result_datatype ? OMPI_OSC_RDMA_TYPE_GET_ACC : OMPI_OSC_RDMA_TYPE_ACC; | |
| @@ -303,7 +247,7 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v | |
| } | |
| ret = ompi_osc_rdma_gacc_contig (sync, source_addr, source_count, source_datatype, result_addr, | |
| - result_count, result_datatype, peer, target_address, | |
| + result_count, result_datatype, NULL, peer, target_address, | |
| target_handle, target_count, target_datatype, op, | |
| request); | |
| if (OPAL_LIKELY(OMPI_SUCCESS == ret)) { | |
| @@ -357,6 +301,20 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v | |
| if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { | |
| return ret; | |
| } | |
| + source_iov_count = 0; | |
| + } else { | |
| + source_iovec[0].iov_len = (size_t) -1; | |
| + source_iovec[0].iov_base = NULL; | |
| + source_iov_count = 1; | |
| + } | |
| + | |
| + if (result_datatype) { | |
| + OBJ_CONSTRUCT(&result_convertor, opal_convertor_t); | |
| + ret = opal_convertor_copy_and_prepare_for_recv (ompi_mpi_local_convertor, &result_datatype->super, result_count, result_addr, | |
| + 0, &result_convertor); | |
| + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { | |
| + return ret; | |
| + } | |
| } | |
| /* target_datatype can never be NULL */ | |
| @@ -372,59 +330,42 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v | |
| target_iov_index = 0; | |
| target_iov_count = 0; | |
| + source_iov_index = 0; | |
| result_position = 0; | |
| subreq = NULL; | |
| do { | |
| - /* decode segments of the source data */ | |
| - source_iov_count = OMPI_OSC_RDMA_DECODE_MAX; | |
| - source_iov_index = 0; | |
| - /* opal_convertor_raw returns done when it has reached the end of the data */ | |
| - if (!source_datatype) { | |
| - done = true; | |
| - source_iovec[0].iov_len = (size_t) -1; | |
| - source_iovec[0].iov_base = NULL; | |
| - source_iov_count = 1; | |
| - } else { | |
| - done = opal_convertor_raw (&source_convertor, source_iovec, &source_iov_count, &source_size); | |
| - } | |
| - | |
| - /* loop on the target segments until we have exhaused the decoded source data */ | |
| - while (source_iov_index != source_iov_count) { | |
| - if (target_iov_index == target_iov_count) { | |
| - /* decode segments of the target buffer */ | |
| - target_iov_count = OMPI_OSC_RDMA_DECODE_MAX; | |
| - target_iov_index = 0; | |
| - (void) opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size); | |
| + /* decode segments of the target buffer */ | |
| + target_iov_count = OMPI_OSC_RDMA_DECODE_MAX; | |
| + target_iov_index = 0; | |
| + done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size); | |
| + | |
| + /* loop on the source segments (if any) until we have exhaused the decoded target data */ | |
| + while (target_iov_index != target_iov_count) { | |
| + if (source_iov_count == source_iov_index) { | |
| + /* decode segments of the source data */ | |
| + source_iov_count = OMPI_OSC_RDMA_DECODE_MAX; | |
| + source_iov_index = 0; | |
| + (void) opal_convertor_raw (&source_convertor, source_iovec, &source_iov_count, &source_size); | |
| } | |
| /* we already checked that the target was large enough. this should be impossible */ | |
| assert (0 != target_iov_count); | |
| /* determine how much to put in this operation */ | |
| - acc_len = min(target_iovec[target_iov_index].iov_len, source_iovec[source_iov_index].iov_len); | |
| - acc_len = min((size_t) acc_len, acc_limit); | |
| + acc_len = min(min(target_iovec[target_iov_index].iov_len, source_iovec[source_iov_index].iov_len), acc_limit); | |
| - /* execute the get */ | |
| + /* execute the get-accumulate */ | |
| if (!subreq) { | |
| OMPI_OSC_RDMA_REQUEST_ALLOC(module, peer, subreq); | |
| subreq->internal = true; | |
| subreq->parent_request = request; | |
| + subreq->type = result_datatype ? OMPI_OSC_RDMA_TYPE_GET_ACC : OMPI_OSC_RDMA_TYPE_ACC; | |
| (void) OPAL_THREAD_ADD_FETCH32 (&request->outstanding_requests, 1); | |
| } | |
| - if (result_datatype) { | |
| - /* prepare a convertor for this part of the result */ | |
| - opal_convertor_copy_and_prepare_for_recv (ompi_mpi_local_convertor, &result_datatype->super, result_count, | |
| - result_addr, 0, &subreq->convertor); | |
| - opal_convertor_set_position (&subreq->convertor, &result_position); | |
| - subreq->type = OMPI_OSC_RDMA_TYPE_GET_ACC; | |
| - } else { | |
| - subreq->type = OMPI_OSC_RDMA_TYPE_ACC; | |
| - } | |
| - | |
| ret = ompi_osc_rdma_gacc_contig (sync, source_iovec[source_iov_index].iov_base, acc_len / target_primitive->super.size, | |
| - target_primitive, NULL, 0, NULL, peer, | |
| + target_primitive, NULL, 0, NULL, &result_convertor, peer, | |
| (uint64_t) (intptr_t) target_iovec[target_iov_index].iov_base, target_handle, | |
| acc_len / target_primitive->super.size, target_primitive, op, subreq); | |
| if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { | |
| @@ -444,13 +385,16 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v | |
| /* adjust io vectors */ | |
| target_iovec[target_iov_index].iov_len -= acc_len; | |
| - source_iovec[source_iov_index].iov_len -= acc_len; | |
| target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + acc_len); | |
| - source_iovec[source_iov_index].iov_base = (void *)((intptr_t) source_iovec[source_iov_index].iov_base + acc_len); | |
| + target_iov_index += (0 == target_iovec[target_iov_index].iov_len); | |
| + | |
| result_position += acc_len; | |
| - source_iov_index += !source_datatype || (0 == source_iovec[source_iov_index].iov_len); | |
| - target_iov_index += (0 == target_iovec[target_iov_index].iov_len); | |
| + if (source_datatype) { | |
| + source_iov_index += (0 == source_iovec[source_iov_index].iov_len); | |
| + source_iovec[source_iov_index].iov_len -= acc_len; | |
| + source_iovec[source_iov_index].iov_base = (void *)((intptr_t) source_iovec[source_iov_index].iov_base + acc_len); | |
| + } | |
| } | |
| } while (!done); | |
| @@ -462,6 +406,11 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v | |
| OBJ_DESTRUCT(&source_convertor); | |
| } | |
| + if (result_datatype) { | |
| + opal_convertor_cleanup (&result_convertor); | |
| + OBJ_DESTRUCT(&result_convertor); | |
| + } | |
| + | |
| OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "finished scheduling rdma on non-contiguous datatype(s)"); | |
| opal_convertor_cleanup (&target_convertor); | |
| @@ -587,9 +536,9 @@ static int ompi_osc_rdma_fetch_and_op_cas (ompi_osc_rdma_sync_t *sync, const voi | |
| new_value = old_value; | |
| if (&ompi_mpi_op_replace.op == op) { | |
| - memcpy ((void *)((intptr_t) &new_value + offset), origin_addr, extent); | |
| + memcpy ((void *)((intptr_t) &new_value + offset), origin_addr + dt->super.true_lb, extent); | |
| } else if (&ompi_mpi_op_no_op.op != op) { | |
| - ompi_op_reduce (op, (void *) origin_addr, (void*)((intptr_t) &new_value + offset), 1, dt); | |
| + ompi_op_reduce (op, (void *) origin_addr + dt->super.true_lb, (void*)((intptr_t) &new_value + offset), 1, dt); | |
| } | |
| ret = ompi_osc_rdma_btl_cswap (module, peer->data_endpoint, address, target_handle, | |
| @@ -854,7 +803,7 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo | |
| ompi_osc_rdma_module_t *module = sync->module; | |
| mca_btl_base_registration_handle_t *target_handle; | |
| uint64_t target_address; | |
| - ptrdiff_t lb, origin_extent, target_span; | |
| + ptrdiff_t lb, target_lb, origin_extent, target_span; | |
| bool lock_acquired = false; | |
| int ret; | |
| @@ -867,11 +816,11 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo | |
| return OMPI_SUCCESS; | |
| } | |
| - target_span = opal_datatype_span(&target_datatype->super, target_count, &lb); | |
| + target_span = opal_datatype_span(&target_datatype->super, target_count, &target_lb); | |
| // a buffer defined by (buf, count, dt) | |
| // will have data starting at buf+offset and ending len bytes later: | |
| - ret = osc_rdma_get_remote_segment (module, peer, target_disp, target_span+lb, &target_address, &target_handle); | |
| + ret = osc_rdma_get_remote_segment (module, peer, target_disp, target_span+target_lb, &target_address, &target_handle); | |
| if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { | |
| return ret; | |
| } | |
| @@ -895,10 +844,10 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo | |
| if (origin_extent <= 8 && 1 == origin_count && !ompi_osc_rdma_peer_local_base (peer)) { | |
| if (module->acc_use_amo && ompi_datatype_is_predefined (origin_datatype)) { | |
| if (NULL == result_addr) { | |
| - ret = ompi_osc_rdma_acc_single_atomic (sync, origin_addr, origin_datatype, origin_extent, peer, target_address, | |
| + ret = ompi_osc_rdma_acc_single_atomic (sync, origin_addr, origin_datatype, origin_extent, peer, target_address+target_lb, | |
| target_handle, op, request, lock_acquired); | |
| } else { | |
| - ret = ompi_osc_rdma_fetch_and_op_atomic (sync, origin_addr, result_addr, origin_datatype, origin_extent, peer, target_address, | |
| + ret = ompi_osc_rdma_fetch_and_op_atomic (sync, origin_addr, result_addr, origin_datatype, origin_extent, peer, target_address+target_lb, | |
| target_handle, op, request, lock_acquired); | |
| } | |
| @@ -907,7 +856,7 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo | |
| } | |
| } | |
| - ret = ompi_osc_rdma_fetch_and_op_cas (sync, origin_addr, result_addr, origin_datatype, origin_extent, peer, target_address, | |
| + ret = ompi_osc_rdma_fetch_and_op_cas (sync, origin_addr, result_addr, origin_datatype, origin_extent, peer, target_address+target_lb, | |
| target_handle, op, request, lock_acquired); | |
| if (OMPI_SUCCESS == ret) { | |
| return OMPI_SUCCESS; | |
| diff --git a/ompi/mca/osc/rdma/osc_rdma_request.c b/ompi/mca/osc/rdma/osc_rdma_request.c | |
| index eb06368..d1cf115 100644 | |
| --- a/ompi/mca/osc/rdma/osc_rdma_request.c | |
| +++ b/ompi/mca/osc/rdma/osc_rdma_request.c | |
| @@ -6,6 +6,8 @@ | |
| * Copyright (c) 2016 The University of Tennessee and The University | |
| * of Tennessee Research Foundation. All rights | |
| * reserved. | |
| + * Copyright (c) 2019 Triad National Security, LLC. All rights | |
| + * reserved. | |
| * $COPYRIGHT$ | |
| * | |
| * Additional copyrights may follow | |
| @@ -56,15 +58,7 @@ static void request_construct(ompi_osc_rdma_request_t *request) | |
| request->internal = false; | |
| request->cleanup = NULL; | |
| request->outstanding_requests = 0; | |
| - OBJ_CONSTRUCT(&request->convertor, opal_convertor_t); | |
| -} | |
| - | |
| -static void request_destruct(ompi_osc_rdma_request_t *request) | |
| -{ | |
| - OBJ_DESTRUCT(&request->convertor); | |
| } | |
| -OBJ_CLASS_INSTANCE(ompi_osc_rdma_request_t, | |
| - ompi_request_t, | |
| - request_construct, | |
| - request_destruct); | |
| +OBJ_CLASS_INSTANCE(ompi_osc_rdma_request_t, ompi_request_t, | |
| + request_construct, NULL); | |
| diff --git a/ompi/mca/osc/rdma/osc_rdma_request.h b/ompi/mca/osc/rdma/osc_rdma_request.h | |
| index ad052e1..3a57d84 100644 | |
| --- a/ompi/mca/osc/rdma/osc_rdma_request.h | |
| +++ b/ompi/mca/osc/rdma/osc_rdma_request.h | |
| @@ -3,6 +3,8 @@ | |
| * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. | |
| * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights | |
| * reserved. | |
| + * Copyright (c) 2019 Triad National Security, LLC. All rights | |
| + * reserved. | |
| * $COPYRIGHT$ | |
| * | |
| * Additional copyrights may follow | |
| @@ -51,8 +53,6 @@ struct ompi_osc_rdma_request_t { | |
| uint64_t target_address; | |
| struct ompi_osc_rdma_request_t *parent_request; | |
| - /* used for non-contiguous get accumulate operations */ | |
| - opal_convertor_t convertor; | |
| /** synchronization object */ | |
| struct ompi_osc_rdma_sync_t *sync; | |
| diff --git a/ompi/op/op.h b/ompi/op/op.h | |
| index aa52688..7e1fe33 100644 | |
| --- a/ompi/op/op.h | |
| +++ b/ompi/op/op.h | |
| @@ -15,6 +15,8 @@ | |
| * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. | |
| * Copyright (c) 2015 Los Alamos National Security, LLC. All rights | |
| * reserved. | |
| + * Copyright (c) 2019 Research Organization for Information Science | |
| + * and Technology (RIST). All rights reserved. | |
| * $COPYRIGHT$ | |
| * | |
| * Additional copyrights may follow | |
| @@ -569,9 +571,16 @@ static inline void ompi_op_reduce(ompi_op_t * op, void *source, | |
| /* For intrinsics, we also pass the corresponding op module */ | |
| if (0 != (op->o_flags & OMPI_OP_FLAGS_INTRINSIC)) { | |
| - op->o_func.intrinsic.fns[ompi_op_ddt_map[dtype->id]](source, target, | |
| - &count, &dtype, | |
| - op->o_func.intrinsic.modules[ompi_op_ddt_map[dtype->id]]); | |
| + int dtype_id; | |
| + if (!ompi_datatype_is_predefined(dtype)) { | |
| + ompi_datatype_t *dt = ompi_datatype_get_single_predefined_type_from_args(dtype); | |
| + dtype_id = ompi_op_ddt_map[dt->id]; | |
| + } else { | |
| + dtype_id = ompi_op_ddt_map[dtype->id]; | |
| + } | |
| + op->o_func.intrinsic.fns[dtype_id](source, target, | |
| + &count, &dtype, | |
| + op->o_func.intrinsic.modules[dtype_id]); | |
| return; | |
| } | |
| diff --git a/opal/datatype/opal_convertor_raw.c b/opal/datatype/opal_convertor_raw.c | |
| index 0901938..777fbc7 100644 | |
| --- a/opal/datatype/opal_convertor_raw.c | |
| +++ b/opal/datatype/opal_convertor_raw.c | |
| @@ -5,8 +5,8 @@ | |
| * reserved. | |
| * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. | |
| * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. | |
| - * Copyright (c) 2017 Research Organization for Information Science | |
| - * and Technology (RIST). All rights reserved. | |
| + * Copyright (c) 2017-2019 Research Organization for Information Science | |
| + * and Technology (RIST). All rights reserved. | |
| * $COPYRIGHT$ | |
| * | |
| * Additional copyrights may follow | |
| @@ -170,9 +170,18 @@ opal_convertor_raw( opal_convertor_t* pConvertor, | |
| ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)(pElem + pElem->loop.items); | |
| if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { | |
| - uint32_t i; | |
| - source_base += end_loop->first_elem_disp; | |
| - for( i = count_desc; (i > 0) && (index < *iov_count); i--, index++ ) { | |
| + ptrdiff_t offset = end_loop->first_elem_disp; | |
| + source_base += offset; | |
| + for(uint32_t i = count_desc; i > 0; i--, index++ ) { | |
| + if (index >= *iov_count) { | |
| + dt_elem_desc_t* nElem = pElem + 1; | |
| + while (nElem->elem.common.type == OPAL_DATATYPE_LOOP) { | |
| + nElem++; | |
| + } | |
| + assert(OPAL_DATATYPE_END_LOOP != nElem->elem.common.type); | |
| + offset = nElem->elem.disp; | |
| + break; | |
| + } | |
| OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, end_loop->size, pConvertor->pBaseBuf, | |
| pConvertor->pDesc, pConvertor->count ); | |
| iov[index].iov_base = (IOVBASE_TYPE *) source_base; | |
| @@ -181,7 +190,7 @@ opal_convertor_raw( opal_convertor_t* pConvertor, | |
| raw_data += end_loop->size; | |
| count_desc--; | |
| } | |
| - source_base -= end_loop->first_elem_disp; | |
| + source_base -= offset; | |
| if( 0 == count_desc ) { /* completed */ | |
| pos_desc += pElem->loop.items + 1; | |
| goto update_loop_description; | |
| diff --git a/test/datatype/Makefile.am b/test/datatype/Makefile.am | |
| index cd86713..8efd034 100644 | |
| --- a/test/datatype/Makefile.am | |
| +++ b/test/datatype/Makefile.am | |
| @@ -4,8 +4,8 @@ | |
| # reserved. | |
| # Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. | |
| # Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. | |
| -# Copyright (c) 2014-2015 Research Organization for Information Science | |
| -# and Technology (RIST). All rights reserved. | |
| +# Copyright (c) 2014-2019 Research Organization for Information Science | |
| +# and Technology (RIST). All rights reserved. | |
| # Copyright (c) 2016 IBM Corporation. All rights reserved. | |
| # $COPYRIGHT$ | |
| # | |
| @@ -15,7 +15,7 @@ | |
| # | |
| if PROJECT_OMPI | |
| - MPI_TESTS = checksum position position_noncontig ddt_test ddt_raw unpack_ooo ddt_pack external32 | |
| + MPI_TESTS = checksum position position_noncontig ddt_test ddt_raw ddt_raw2 unpack_ooo ddt_pack external32 | |
| MPI_CHECKS = to_self | |
| endif | |
| TESTS = opal_datatype_test unpack_hetero $(MPI_TESTS) | |
| @@ -40,6 +40,12 @@ ddt_raw_LDADD = \ | |
| $(top_builddir)/ompi/lib@[email protected] \ | |
| $(top_builddir)/opal/lib@[email protected] | |
| +ddt_raw2_SOURCES = ddt_raw2.c ddt_lib.c ddt_lib.h | |
| +ddt_raw2_LDFLAGS = $(OMPI_PKG_CONFIG_LDFLAGS) | |
| +ddt_raw2_LDADD = \ | |
| + $(top_builddir)/ompi/lib@[email protected] \ | |
| + $(top_builddir)/opal/lib@[email protected] | |
| + | |
| ddt_pack_SOURCES = ddt_pack.c | |
| ddt_pack_LDFLAGS = $(OMPI_PKG_CONFIG_LDFLAGS) | |
| ddt_pack_LDADD = \ | |
| diff --git a/test/datatype/ddt_pack.c b/test/datatype/ddt_pack.c | |
| index 1164e6f..423574e 100644 | |
| --- a/test/datatype/ddt_pack.c | |
| +++ b/test/datatype/ddt_pack.c | |
| @@ -51,7 +51,7 @@ main(int argc, char* argv[]) | |
| int ret = 0; | |
| int blen[4]; | |
| ptrdiff_t disp[4]; | |
| - ompi_datatype_t *newType, *types[4], *struct_type, *vec_type; | |
| + ompi_datatype_t *newType, *types[4], *struct_type, *vec_type, *dup_type; | |
| ptrdiff_t old_lb, old_extent, old_true_lb, old_true_extent; | |
| ptrdiff_t lb, extent, true_lb, true_extent; | |
| @@ -394,6 +394,53 @@ main(int argc, char* argv[]) | |
| ret = ompi_datatype_destroy(&unpacked_dt); | |
| if (ret != 0) goto cleanup; | |
| + /** | |
| + * | |
| + * TEST 7 | |
| + * | |
| + */ | |
| + printf("---> Basic test with dup'ed MPI_INT\n"); | |
| + | |
| + ret = get_extents(&ompi_mpi_int.dt, &old_lb, &old_extent, &old_true_lb, &old_true_extent); | |
| + if (ret != 0) goto cleanup; | |
| + ret = ompi_datatype_duplicate(&ompi_mpi_int.dt, &dup_type); | |
| + if (ret != 0) goto cleanup; | |
| + ompi_datatype_t * type = &ompi_mpi_int.dt; | |
| + ret = ompi_datatype_set_args(dup_type, 0, NULL, 0, NULL, 1, &type, MPI_COMBINER_DUP); | |
| + if (ret != 0) goto cleanup; | |
| + packed_ddt_len = ompi_datatype_pack_description_length(dup_type); | |
| + ptr = payload = malloc(packed_ddt_len); | |
| + ret = ompi_datatype_get_pack_description(dup_type, &packed_ddt); | |
| + if (ret != 0) goto cleanup; | |
| + | |
| + memcpy(payload, packed_ddt, packed_ddt_len); | |
| + unpacked_dt = ompi_datatype_create_from_packed_description(&payload, | |
| + ompi_proc_local()); | |
| + free(ptr); | |
| + if (unpacked_dt == NULL) { | |
| + printf("\tFAILED: could not unpack datatype\n"); | |
| + ret = 1; | |
| + goto cleanup; | |
| + } else { | |
| + ret = get_extents(unpacked_dt, &lb, &extent, &true_lb, &true_extent); | |
| + if (ret != 0) goto cleanup; | |
| + | |
| + if (old_lb != lb || old_extent != extent || | |
| + old_true_lb != true_lb || old_true_extent != extent) { | |
| + printf("\tFAILED: datatypes don't match\n"); | |
| + ret = 1; | |
| + goto cleanup; | |
| + } | |
| + printf("\tPASSED\n"); | |
| + } | |
| + if (unpacked_dt == &ompi_mpi_int32_t.dt) { | |
| + printf("\tPASSED\n"); | |
| + } else { | |
| + printf("\tFAILED: datatypes don't match\n"); | |
| + ret = 1; | |
| + goto cleanup; | |
| + } | |
| + ompi_datatype_destroy(&dup_type); | |
| cleanup: | |
| ompi_datatype_finalize(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment