|
| 1 | +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * |
| 2 | + * |
| 3 | + * Copyright (C) 2024, Northwestern University |
| 4 | + * See COPYRIGHT notice in top-level directory. |
| 5 | + * |
| 6 | + * This program shows 3 ways of performing all-to-many MPI communication. |
| 7 | + * |
| 8 | + * Command-line option '-a' uses MPI_Alltoallv |
| 9 | + * Command-line option '-s' uses MPI_Issend/MPI_Irecv/MPI_Waitall |
| 10 | + * The default is to use MPI_Isend/MPI_Irecv/MPI_Waitall |
| 11 | + * |
| 12 | + * Command-line option '-n' sets the number of iterations |
| 13 | + * Command-line option '-m' sets the maximal number of receivers |
| 14 | + * Command-line option '-r' can be used to set the number of receivers. For |
| 15 | + * example, when running 128 MPI processes per compute node, setting '-r |
| 16 | + * 128' will pick one receiver per compute node. |
| 17 | + * Command-line option '-l' can be used to set the message size. |
| 18 | + * |
| 19 | + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ |
| 20 | + |
| 21 | +#include <stdio.h> |
| 22 | +#include <stdlib.h> |
| 23 | +#include <string.h> |
| 24 | +#include <unistd.h> |
| 25 | + |
| 26 | +#include <mpi.h> |
| 27 | + |
| 28 | +#define ERR \ |
| 29 | + if (err != MPI_SUCCESS) { \ |
| 30 | + int errorStringLen; \ |
| 31 | + char errorString[MPI_MAX_ERROR_STRING]; \ |
| 32 | + MPI_Error_string(err, errorString, &errorStringLen); \ |
| 33 | + printf("Error at line %d: %s\n",__LINE__,errorString); \ |
| 34 | + nerrs++; \ |
| 35 | + goto err_out; \ |
| 36 | + } |
| 37 | +#define STRINGIFY(x) #x |
| 38 | +#define TOSTRING(x) STRINGIFY(x) |
| 39 | + |
| 40 | +/*----< usage() >------------------------------------------------------------*/ |
| 41 | +static void usage (char *argv0) { |
| 42 | + char *help = "Usage: %s [OPTION]\n\ |
| 43 | + [-h] Print this help message\n\ |
| 44 | + [-v] Verbose mode (default: no)\n\ |
| 45 | + [-a] use MPI_alltoallv (default: MPI_Isend/Irecv)\n\ |
| 46 | + [-s] use MPI_Issend (default: MPI_Isend/Irecv)\n\ |
| 47 | + [-n num] number of iterations (default: 1)\n\ |
| 48 | + [-m num] number of receivers (default: total number of processes / ratio)\n\ |
| 49 | + [-r ratio] ratio of number of receivers to all processes (default: 1)\n\ |
| 50 | + [-l len] individual message size (default: 48)\n"; |
| 51 | + fprintf (stderr, help, argv0); |
| 52 | +} |
| 53 | + |
| 54 | +/*----< main() >------------------------------------------------------------*/ |
| 55 | +int main(int argc, char **argv) { |
| 56 | + extern int optind; |
| 57 | + extern char *optarg; |
| 58 | + int i, j, rank, nprocs, err, nerrs=0, verbose, len, ntimes, ratio; |
| 59 | + int use_alltoall, use_issend, is_recver, num_recvers, *recver_rank; |
| 60 | + int max_num_recvers; |
| 61 | + char *buf; |
| 62 | + double timing, maxt; |
| 63 | + |
| 64 | + MPI_Init(&argc, &argv); |
| 65 | + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); |
| 66 | + MPI_Comm_rank(MPI_COMM_WORLD, &rank); |
| 67 | + |
| 68 | + verbose = 0; |
| 69 | + use_alltoall = 0; |
| 70 | + use_issend = 0; |
| 71 | + len = 48; |
| 72 | + ntimes = 1; |
| 73 | + ratio = 1; |
| 74 | + |
| 75 | + /* command-line arguments */ |
| 76 | + while ((i = getopt (argc, argv, "hvasl:n:r:m:")) != EOF) |
| 77 | + switch (i) { |
| 78 | + case 'v': |
| 79 | + verbose = 1; |
| 80 | + break; |
| 81 | + case 's': |
| 82 | + use_issend = 1; |
| 83 | + break; |
| 84 | + case 'a': |
| 85 | + use_alltoall = 1; |
| 86 | + break; |
| 87 | + case 'l': |
| 88 | + len = atoi(optarg); |
| 89 | + break; |
| 90 | + case 'n': |
| 91 | + ntimes = atoi(optarg); |
| 92 | + break; |
| 93 | + case 'r': |
| 94 | + ratio = atoi(optarg); |
| 95 | + break; |
| 96 | + case 'm': |
| 97 | + max_num_recvers = atoi(optarg); |
| 98 | + break; |
| 99 | + case 'h': |
| 100 | + default: |
| 101 | + if (rank == 0) usage(argv[0]); |
| 102 | + goto err_out; |
| 103 | + } |
| 104 | + |
| 105 | + if (use_alltoall == 1 && use_issend == 1) { |
| 106 | + if (rank == 0) |
| 107 | + printf("Error: command-line options '-a' and '-s' cannot be both set\n"); |
| 108 | + goto err_out; |
| 109 | + } |
| 110 | + |
| 111 | + is_recver = 0; |
| 112 | + num_recvers = nprocs / ratio; |
| 113 | + if (num_recvers > max_num_recvers) num_recvers = max_num_recvers; |
| 114 | + |
| 115 | + if (rank == 0) { |
| 116 | + if (use_alltoall) |
| 117 | + printf("---- Using MPI_Alltoallv\n"); |
| 118 | + else if (use_issend) |
| 119 | + printf("---- Using MPI_Issend/Irecv\n"); |
| 120 | + else |
| 121 | + printf("---- Using MPI_Isend/Irecv\n"); |
| 122 | +#ifdef MPICH_VERSION |
| 123 | + printf("---- This MPI is based on MPICH version %s\n",MPICH_VERSION); |
| 124 | +#endif |
| 125 | +#ifdef CRAY_MPICH_VERSION |
| 126 | + printf("---- This MPI is based on Cray MPICH version %s\n", |
| 127 | + TOSTRING(CRAY_MPICH_VERSION)); |
| 128 | +#endif |
| 129 | + printf("nprocs = %d\n", nprocs); |
| 130 | + printf("len = %d\n", len); |
| 131 | + printf("ntimes = %d\n", ntimes); |
| 132 | + printf("ratio = %d\n", ratio); |
| 133 | + printf("num_recvers = %d\n", num_recvers); |
| 134 | + } |
| 135 | + |
| 136 | + recver_rank = (int*) malloc(sizeof(int) * num_recvers); |
| 137 | + if (verbose && rank == 0) printf("recver_rank: "); |
| 138 | + for (i=0; i<num_recvers; i++) { |
| 139 | + recver_rank[i] = i * ratio; |
| 140 | + if (rank == recver_rank[i]) is_recver = 1; |
| 141 | + if (verbose && rank == 0) printf(" %d", recver_rank[i]); |
| 142 | + } |
| 143 | + if (verbose && rank == 0) printf("\n"); |
| 144 | + if (verbose) fflush(stdout); |
| 145 | + MPI_Barrier(MPI_COMM_WORLD); |
| 146 | + timing = MPI_Wtime(); |
| 147 | + |
| 148 | + buf = (char*) malloc((nprocs + num_recvers) * len); |
| 149 | + |
| 150 | + if (use_alltoall == 0) { |
| 151 | + MPI_Request *reqs; |
| 152 | + MPI_Status *st; |
| 153 | + |
| 154 | + reqs = (MPI_Request*) calloc(nprocs + num_recvers, sizeof(MPI_Request)); |
| 155 | + st = (MPI_Status*)malloc(sizeof(MPI_Status) * (nprocs + num_recvers)); |
| 156 | + |
| 157 | + for (i=0; i<ntimes; i++) { |
| 158 | + int nreqs=0; |
| 159 | + char *ptr = buf; |
| 160 | + |
| 161 | + /* post recv requests */ |
| 162 | + if (is_recver) { |
| 163 | + for (j=0; j<nprocs; j++) { |
| 164 | + err = MPI_Irecv(ptr, len, MPI_BYTE, j, 0, MPI_COMM_WORLD, |
| 165 | + &reqs[nreqs++]); |
| 166 | + ERR |
| 167 | + ptr += len; |
| 168 | + } |
| 169 | + } |
| 170 | + |
| 171 | + /* post send requests */ |
| 172 | + for (j=0; j<num_recvers; j++) { |
| 173 | + if (use_issend) |
| 174 | + err = MPI_Issend(ptr, len, MPI_BYTE, recver_rank[j], 0, |
| 175 | + MPI_COMM_WORLD, &reqs[nreqs++]); |
| 176 | + else |
| 177 | + err = MPI_Isend(ptr, len, MPI_BYTE, recver_rank[j], 0, |
| 178 | + MPI_COMM_WORLD, &reqs[nreqs++]); |
| 179 | + ERR |
| 180 | + ptr += len; |
| 181 | + } |
| 182 | + |
| 183 | + err = MPI_Waitall(nreqs, reqs, st); |
| 184 | + ERR |
| 185 | + } |
| 186 | + free(st); |
| 187 | + free(reqs); |
| 188 | + } |
| 189 | + else { |
| 190 | + int *sendCounts, *recvCounts, *sdispls, *rdispls; |
| 191 | + char *r_buf, *s_buf; |
| 192 | + |
| 193 | + sendCounts = (int*) calloc(nprocs * 2, sizeof(int)); |
| 194 | + recvCounts = sendCounts + nprocs; |
| 195 | + sdispls = (int*) calloc(nprocs * 2, sizeof(int)); |
| 196 | + rdispls = sdispls + nprocs; |
| 197 | + |
| 198 | + if (is_recver) { |
| 199 | + for (i=0; i<nprocs; i++) { |
| 200 | + recvCounts[i] = len; |
| 201 | + rdispls[i] = len * i; |
| 202 | + } |
| 203 | + } |
| 204 | + r_buf = buf; |
| 205 | + |
| 206 | + for (i=0; i<num_recvers; i++) { |
| 207 | + sendCounts[recver_rank[i]] = len; |
| 208 | + sdispls[recver_rank[i]] = len * i; |
| 209 | + } |
| 210 | + s_buf = r_buf + nprocs * len; |
| 211 | + |
| 212 | + for (i=0; i<ntimes; i++) { |
| 213 | + err = MPI_Alltoallv(s_buf, sendCounts, sdispls, MPI_BYTE, |
| 214 | + r_buf, recvCounts, rdispls, MPI_BYTE, |
| 215 | + MPI_COMM_WORLD); |
| 216 | + ERR |
| 217 | + } |
| 218 | + free(sendCounts); |
| 219 | + free(sdispls); |
| 220 | + } |
| 221 | + free(buf); |
| 222 | + free(recver_rank); |
| 223 | + |
| 224 | + timing = MPI_Wtime() - timing; |
| 225 | + MPI_Reduce(&timing, &maxt, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); |
| 226 | + if (rank == 0) { |
| 227 | + double wb = (double)len * nprocs * ntimes * num_recvers; |
| 228 | + wb /= 1048576.0; /* in MB */ |
| 229 | + printf("Total message amount: %.2f MiB\n", wb); |
| 230 | + printf("Max time: %.2f sec\n", maxt); |
| 231 | + printf("Comm bandwidth: %.2f MiB/sec\n", wb / maxt); |
| 232 | + } |
| 233 | + |
| 234 | +err_out: |
| 235 | + MPI_Finalize(); |
| 236 | + return 0; |
| 237 | +} |
| 238 | + |
| 239 | + |
0 commit comments