| 1 | /* -*- Mode: C; c-basic-offset:4 ; -*- */ |
|---|
| 2 | /* |
|---|
| 3 | * (C) 2001 by Argonne National Laboratory. |
|---|
| 4 | * See COPYRIGHT in top-level directory. |
|---|
| 5 | */ |
|---|
| 6 | |
|---|
| 7 | #include "mpid_nem_impl.h" |
|---|
| 8 | |
|---|
| 9 | #define NULL_CONTEXT_ID -1 |
|---|
| 10 | |
|---|
| 11 | static int barrier (MPID_Comm *comm_ptr); |
|---|
| 12 | static int alloc_barrier_vars (MPID_Comm *comm, MPID_nem_barrier_vars_t **vars); |
|---|
| 13 | |
|---|
| 14 | static MPID_Collops collective_functions = { |
|---|
| 15 | 0, /* ref_count */ |
|---|
| 16 | barrier, /* Barrier */ |
|---|
| 17 | NULL, /* Bcast */ |
|---|
| 18 | NULL, /* Gather */ |
|---|
| 19 | NULL, /* Gatherv */ |
|---|
| 20 | NULL, /* Scatter */ |
|---|
| 21 | NULL, /* Scatterv */ |
|---|
| 22 | NULL, /* Allgather */ |
|---|
| 23 | NULL, /* Allgatherv */ |
|---|
| 24 | NULL, /* Alltoall */ |
|---|
| 25 | NULL, /* Alltoallv */ |
|---|
| 26 | NULL, /* Alltoallw */ |
|---|
| 27 | NULL, /* Reduce */ |
|---|
| 28 | NULL, /* Allreduce */ |
|---|
| 29 | NULL, /* Reduce_scatter */ |
|---|
| 30 | NULL, /* Scan */ |
|---|
| 31 | NULL /* Exscan */ |
|---|
| 32 | }; |
|---|
| 33 | |
|---|
| 34 | #undef FUNCNAME |
|---|
| 35 | #define FUNCNAME MPIDI_CH3I_comm_create |
|---|
| 36 | #undef FCNAME |
|---|
| 37 | #define FCNAME MPIDI_QUOTE(FUNCNAME) |
|---|
| 38 | int MPIDI_CH3I_comm_create (MPID_Comm *comm) |
|---|
| 39 | { |
|---|
| 40 | int mpi_errno = MPI_SUCCESS; |
|---|
| 41 | MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_COMM_CREATE); |
|---|
| 42 | |
|---|
| 43 | MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_COMM_CREATE); |
|---|
| 44 | comm->ch.barrier_vars = NULL; |
|---|
| 45 | |
|---|
| 46 | mpi_errno = MPIU_Find_local_and_external(comm, &comm->ch.local_size, &comm->ch.local_rank, |
|---|
| 47 | &comm->ch.local_ranks, &comm->ch.external_size, |
|---|
| 48 | &comm->ch.external_rank, &comm->ch.external_ranks, |
|---|
| 49 | &comm->ch.intranode_table, &comm->ch.internode_table); |
|---|
| 50 | if (mpi_errno) MPIU_ERR_POP (mpi_errno); |
|---|
| 51 | |
|---|
| 52 | comm->coll_fns = &collective_functions; |
|---|
| 53 | |
|---|
| 54 | fn_exit: |
|---|
| 55 | MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_COMM_CREATE); |
|---|
| 56 | return mpi_errno; |
|---|
| 57 | fn_fail: |
|---|
| 58 | goto fn_exit; |
|---|
| 59 | } |
|---|
| 60 | |
|---|
| 61 | #undef FUNCNAME |
|---|
| 62 | #define FUNCNAME MPIDI_CH3I_comm_destroy |
|---|
| 63 | #undef FCNAME |
|---|
| 64 | #define FCNAME MPIDI_QUOTE(FUNCNAME) |
|---|
| 65 | int MPIDI_CH3I_comm_destroy (MPID_Comm *comm) |
|---|
| 66 | { |
|---|
| 67 | int mpi_errno = MPI_SUCCESS; |
|---|
| 68 | MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_COMM_DESTROY); |
|---|
| 69 | |
|---|
| 70 | MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_COMM_DESTROY); |
|---|
| 71 | if (comm->ch.barrier_vars && OPA_fetch_and_decr_int(&comm->ch.barrier_vars->usage_cnt) == 1) |
|---|
| 72 | { |
|---|
| 73 | OPA_write_barrier(); |
|---|
| 74 | OPA_store_int(&comm->ch.barrier_vars->context_id, NULL_CONTEXT_ID); |
|---|
| 75 | } |
|---|
| 76 | if (comm->ch.local_size) |
|---|
| 77 | MPIU_Free (comm->ch.local_ranks); |
|---|
| 78 | if (comm->ch.external_size) |
|---|
| 79 | MPIU_Free (comm->ch.external_ranks); |
|---|
| 80 | |
|---|
| 81 | MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_COMM_DESTROY); |
|---|
| 82 | return mpi_errno; |
|---|
| 83 | } |
|---|
| 84 | |
|---|
| 85 | #undef FUNCNAME |
|---|
| 86 | #define FUNCNAME alloc_barrier_vars |
|---|
| 87 | #undef FCNAME |
|---|
| 88 | #define FCNAME MPIDI_QUOTE(FUNCNAME) |
|---|
| 89 | static int alloc_barrier_vars (MPID_Comm *comm, MPID_nem_barrier_vars_t **vars) |
|---|
| 90 | { |
|---|
| 91 | int mpi_errno = MPI_SUCCESS; |
|---|
| 92 | int i; |
|---|
| 93 | int c; |
|---|
| 94 | |
|---|
| 95 | for (i = 0; i < MPID_NEM_NUM_BARRIER_VARS; ++i) |
|---|
| 96 | { |
|---|
| 97 | c = OPA_cas_int(&MPID_nem_mem_region.barrier_vars[i].context_id, NULL_CONTEXT_ID, comm->context_id); |
|---|
| 98 | if (c == NULL_CONTEXT_ID || c == comm->context_id) |
|---|
| 99 | { |
|---|
| 100 | *vars = &MPID_nem_mem_region.barrier_vars[i]; |
|---|
| 101 | OPA_write_barrier(); |
|---|
| 102 | OPA_incr_int(&(*vars)->usage_cnt); |
|---|
| 103 | goto fn_exit; |
|---|
| 104 | } |
|---|
| 105 | } |
|---|
| 106 | |
|---|
| 107 | *vars = NULL; |
|---|
| 108 | |
|---|
| 109 | fn_exit: |
|---|
| 110 | return mpi_errno; |
|---|
| 111 | } |
|---|
| 112 | |
|---|
| 113 | #undef FUNCNAME |
|---|
| 114 | #define FUNCNAME barrier |
|---|
| 115 | #undef FCNAME |
|---|
| 116 | #define FCNAME MPIDI_QUOTE(FUNCNAME) |
|---|
| 117 | static int msg_barrier (MPID_Comm *comm_ptr, int rank, int size, int *rank_array) |
|---|
| 118 | { |
|---|
| 119 | int mpi_errno = MPI_SUCCESS; |
|---|
| 120 | int src, dst, mask; |
|---|
| 121 | MPI_Comm comm = comm_ptr->handle; |
|---|
| 122 | |
|---|
| 123 | mask = 0x1; |
|---|
| 124 | while (mask < size) |
|---|
| 125 | { |
|---|
| 126 | dst = rank_array[(rank + mask) % size]; |
|---|
| 127 | src = rank_array[(rank - mask + size) % size]; |
|---|
| 128 | mpi_errno = MPIC_Sendrecv (NULL, 0, MPI_BYTE, dst, MPIR_BARRIER_TAG, |
|---|
| 129 | NULL, 0, MPI_BYTE, src, MPIR_BARRIER_TAG, comm, MPI_STATUS_IGNORE); |
|---|
| 130 | if (mpi_errno) MPIU_ERR_POP (mpi_errno); |
|---|
| 131 | mask <<= 1; |
|---|
| 132 | } |
|---|
| 133 | |
|---|
| 134 | fn_exit: |
|---|
| 135 | return mpi_errno; |
|---|
| 136 | fn_fail: |
|---|
| 137 | goto fn_exit; |
|---|
| 138 | } |
|---|
| 139 | |
|---|
| 140 | |
|---|
| 141 | #undef FUNCNAME |
|---|
| 142 | #define FUNCNAME barrier |
|---|
| 143 | #undef FCNAME |
|---|
| 144 | #define FCNAME MPIDI_QUOTE(FUNCNAME) |
|---|
| 145 | static int barrier (MPID_Comm *comm_ptr) |
|---|
| 146 | { |
|---|
| 147 | int mpi_errno = MPI_SUCCESS; |
|---|
| 148 | MPID_nem_barrier_vars_t *barrier_vars; |
|---|
| 149 | int local_size = comm_ptr->ch.local_size; |
|---|
| 150 | int external_size = comm_ptr->ch.external_size; |
|---|
| 151 | |
|---|
| 152 | /* Trivial barriers return immediately */ |
|---|
| 153 | if (comm_ptr->local_size == 1) |
|---|
| 154 | return MPI_SUCCESS; |
|---|
| 155 | |
|---|
| 156 | /* Only one collective operation per communicator can be active at any |
|---|
| 157 | time */ |
|---|
| 158 | MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER (comm_ptr); |
|---|
| 159 | |
|---|
| 160 | if (local_size == 1) |
|---|
| 161 | { |
|---|
| 162 | /* there are only external processes -- do msg barrier only */ |
|---|
| 163 | mpi_errno = msg_barrier (comm_ptr, comm_ptr->ch.external_rank, external_size, comm_ptr->ch.external_ranks); |
|---|
| 164 | if (mpi_errno) MPIU_ERR_POP (mpi_errno); |
|---|
| 165 | |
|---|
| 166 | goto fn_exit; |
|---|
| 167 | } |
|---|
| 168 | |
|---|
| 169 | if (comm_ptr->ch.barrier_vars == NULL) |
|---|
| 170 | { |
|---|
| 171 | mpi_errno = alloc_barrier_vars (comm_ptr, &comm_ptr->ch.barrier_vars); |
|---|
| 172 | if (mpi_errno) MPIU_ERR_POP (mpi_errno); |
|---|
| 173 | |
|---|
| 174 | if (comm_ptr->ch.barrier_vars == NULL) |
|---|
| 175 | { |
|---|
| 176 | /* no barrier_vars left -- revert to safe but inefficient |
|---|
| 177 | implementation: do a barrier using messages with local |
|---|
| 178 | procs, then with external procs, then again with local |
|---|
| 179 | procs. */ |
|---|
| 180 | /* FIXME: need a better solution here. e.g., allocate |
|---|
| 181 | some barrier_vars on the first barrier for the life of |
|---|
| 182 | the communicator (as is the case now), others must be |
|---|
| 183 | allocated for each barrier, then released. If we run |
|---|
| 184 | out of barrier_vars after that, then use msg_barrier. |
|---|
| 185 | */ |
|---|
| 186 | mpi_errno = msg_barrier (comm_ptr, comm_ptr->ch.local_rank, local_size, comm_ptr->ch.local_ranks); |
|---|
| 187 | if (mpi_errno) MPIU_ERR_POP (mpi_errno); |
|---|
| 188 | |
|---|
| 189 | if (comm_ptr->ch.local_rank == 0) |
|---|
| 190 | { |
|---|
| 191 | mpi_errno = msg_barrier (comm_ptr, comm_ptr->ch.external_rank, external_size, comm_ptr->ch.external_ranks); |
|---|
| 192 | if (mpi_errno) MPIU_ERR_POP (mpi_errno); |
|---|
| 193 | } |
|---|
| 194 | |
|---|
| 195 | mpi_errno = msg_barrier (comm_ptr, comm_ptr->ch.local_rank, local_size, comm_ptr->ch.local_ranks); |
|---|
| 196 | if (mpi_errno) MPIU_ERR_POP (mpi_errno); |
|---|
| 197 | |
|---|
| 198 | goto fn_exit; |
|---|
| 199 | } |
|---|
| 200 | } |
|---|
| 201 | |
|---|
| 202 | barrier_vars = comm_ptr->ch.barrier_vars; |
|---|
| 203 | |
|---|
| 204 | if (external_size == 1) |
|---|
| 205 | { |
|---|
| 206 | /* there are only local procs -- do shared memory barrier only */ |
|---|
| 207 | int prev; |
|---|
| 208 | int sense; |
|---|
| 209 | |
|---|
| 210 | sense = OPA_load_int(&barrier_vars->sig); |
|---|
| 211 | OPA_read_barrier(); |
|---|
| 212 | |
|---|
| 213 | prev = OPA_fetch_and_incr_int(&barrier_vars->cnt); |
|---|
| 214 | if (prev == local_size - 1) |
|---|
| 215 | { |
|---|
| 216 | OPA_store_int(&barrier_vars->cnt, 0); |
|---|
| 217 | OPA_write_barrier(); |
|---|
| 218 | OPA_store_int(&barrier_vars->sig, 1 - sense); |
|---|
| 219 | } |
|---|
| 220 | else |
|---|
| 221 | { |
|---|
| 222 | while (OPA_load_int(&barrier_vars->sig) == sense) |
|---|
| 223 | MPIDU_Yield(); |
|---|
| 224 | } |
|---|
| 225 | |
|---|
| 226 | goto fn_exit; |
|---|
| 227 | } |
|---|
| 228 | |
|---|
| 229 | /* there are both local and external processes */ |
|---|
| 230 | |
|---|
| 231 | if (comm_ptr->ch.local_rank == 0) |
|---|
| 232 | { |
|---|
| 233 | /* do barrier between local and external */ |
|---|
| 234 | int external_rank = comm_ptr->ch.external_rank; |
|---|
| 235 | int *external_ranks = comm_ptr->ch.external_ranks; |
|---|
| 236 | |
|---|
| 237 | /* wait for local procs to reach barrier */ |
|---|
| 238 | if (local_size > 1) |
|---|
| 239 | while (OPA_load_int(&barrier_vars->sig0) == 0) |
|---|
| 240 | MPIDU_Yield(); |
|---|
| 241 | |
|---|
| 242 | /* now do a barrier with external processes */ |
|---|
| 243 | mpi_errno = msg_barrier (comm_ptr, external_rank, external_size, external_ranks); |
|---|
| 244 | if (mpi_errno) MPIU_ERR_POP (mpi_errno); |
|---|
| 245 | |
|---|
| 246 | /* reset ctr and release local procs */ |
|---|
| 247 | if (local_size > 1) |
|---|
| 248 | { |
|---|
| 249 | OPA_store_int(&barrier_vars->sig0, 0); |
|---|
| 250 | OPA_store_int(&barrier_vars->cnt, 0); |
|---|
| 251 | OPA_write_barrier(); |
|---|
| 252 | OPA_store_int(&barrier_vars->sig, 1 - OPA_load_int(&barrier_vars->sig)); |
|---|
| 253 | } |
|---|
| 254 | } |
|---|
| 255 | else |
|---|
| 256 | { |
|---|
| 257 | /* just do the local barrier -- Decrement a counter. If |
|---|
| 258 | counter is 1 (i.e., only root is left), set sig0 to signal |
|---|
| 259 | root. Then, wait on signal variable. */ |
|---|
| 260 | int prev; |
|---|
| 261 | int sense; |
|---|
| 262 | sense = OPA_load_int(&barrier_vars->sig); |
|---|
| 263 | OPA_read_barrier(); |
|---|
| 264 | |
|---|
| 265 | prev = OPA_fetch_and_incr_int(&barrier_vars->cnt); |
|---|
| 266 | if (prev == local_size - 2) /* - 2 because it's the value before we added 1 and we're not waiting for root */ |
|---|
| 267 | { |
|---|
| 268 | OPA_write_barrier(); |
|---|
| 269 | OPA_store_int(&barrier_vars->sig0, 1); |
|---|
| 270 | } |
|---|
| 271 | |
|---|
| 272 | while (OPA_load_int(&barrier_vars->sig) == sense) |
|---|
| 273 | MPIDU_Yield(); |
|---|
| 274 | } |
|---|
| 275 | |
|---|
| 276 | fn_exit: |
|---|
| 277 | MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr ); |
|---|
| 278 | return mpi_errno; |
|---|
| 279 | fn_fail: |
|---|
| 280 | goto fn_exit; |
|---|
| 281 | } |
|---|
| 282 | |
|---|
| 283 | |
|---|
| 284 | #undef FUNCNAME |
|---|
| 285 | #define FUNCNAME MPID_nem_barrier_vars_init |
|---|
| 286 | #undef FCNAME |
|---|
| 287 | #define FCNAME MPIDI_QUOTE(FUNCNAME) |
|---|
| 288 | int MPID_nem_barrier_vars_init (MPID_nem_barrier_vars_t *barrier_region) |
|---|
| 289 | { |
|---|
| 290 | int mpi_errno = MPI_SUCCESS; |
|---|
| 291 | int i; |
|---|
| 292 | MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_BARRIER_VARS_INIT); |
|---|
| 293 | |
|---|
| 294 | MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_BARRIER_VARS_INIT); |
|---|
| 295 | if (MPID_nem_mem_region.local_rank == 0) |
|---|
| 296 | for (i = 0; i < MPID_NEM_NUM_BARRIER_VARS; ++i) |
|---|
| 297 | { |
|---|
| 298 | OPA_store_int(&barrier_region[i].context_id, NULL_CONTEXT_ID); |
|---|
| 299 | OPA_store_int(&barrier_region[i].usage_cnt, 0); |
|---|
| 300 | OPA_store_int(&barrier_region[i].cnt, 0); |
|---|
| 301 | OPA_store_int(&barrier_region[i].sig0, 0); |
|---|
| 302 | OPA_store_int(&barrier_region[i].sig, 0); |
|---|
| 303 | } |
|---|
| 304 | |
|---|
| 305 | MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_BARRIER_VARS_INIT); |
|---|
| 306 | return mpi_errno; |
|---|
| 307 | } |
|---|
| 308 | |
|---|
| 309 | #undef FUNCNAME |
|---|
| 310 | #define FUNCNAME MPID_nem_barrier_vars_init |
|---|
| 311 | #undef FCNAME |
|---|
| 312 | #define FCNAME MPIDI_QUOTE(FUNCNAME) |
|---|
| 313 | int MPID_nem_coll_barrier_init(void) |
|---|
| 314 | { |
|---|
| 315 | int mpi_errno = MPI_SUCCESS; |
|---|
| 316 | MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_COLL_BARRIER_INIT); |
|---|
| 317 | |
|---|
| 318 | MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_COLL_BARRIER_INIT); |
|---|
| 319 | |
|---|
| 320 | /* mpi_errno = MPIDI_CH3I_comm_create (MPIR_Process.comm_world); */ |
|---|
| 321 | /* if (mpi_errno) MPIU_ERR_POP (mpi_errno); */ |
|---|
| 322 | |
|---|
| 323 | MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_COLL_BARRIER_INIT); |
|---|
| 324 | return mpi_errno; |
|---|
| 325 | } |
|---|