mpirun注意到在信号11上退出了节点X540UV上具有PID 0的进程等级1(分段故障)

我是MPI初学者。我想使用MPI乘以2d向量。我在编写2d矩阵时使用mat_a [] [],mat_b [] []编写此程序,但我没有收到此问题,但我定义使用2d向量解决了以下问题:>

 #include<stdio.h>
#include<mpi.h>
#include <iostream>
#include<curses.h>
#include<vector>
#define NUM_ROWS_A 12 //rows of input [A]
#define NUM_COLUMNS_A 12 //columns of input [A]
#define NUM_ROWS_B 12 //rows of input [B]
#define NUM_COLUMNS_B 12 //columns of input [B]
#define MASTER_TO_SLAVE_TAG 1 //tag for messages sent from master to slaves
#define SLAVE_TO_MASTER_TAG 4 //tag for messages sent from slaves to master
void makeAB(); //makes the [A] and [B] matrixes
void printArray(); //print the content of output matrix [C];
int rank; //process rank
int size; //number of processes
int i,j,k; //helper variables
std::vector<std::vector<double> >mat_a(NUM_ROWS_A,std::vector<double>(NUM_COLUMNS_A)); //declare input [A]
std::vector<std::vector<double> >mat_b(NUM_ROWS_B,std::vector<double>(NUM_COLUMNS_B)); //declare input [B]
std::vector<std::vector<diuble> >mat_result(NUM_ROWS_A,std::vector<double>(NUM_COLUMNS_B)); //declare output [C]
double start_time; //hold start time
double end_time; // hold end time
int low_bound; //low bound of the number of rows of [A] allocated to a slave
int upper_bound; //upper bound of the number of rows of [A] allocated to a slave
int portion; //portion of the number of rows of [A] allocated to a slave
MPI_Status status; // store status of a MPI_Recv
MPI_Request request; //capture request of a MPI_Isend
int main(int argc,char *argv[])
{
    MPI_Init(&argc,&argv); //initialize MPI operations
    MPI_Comm_rank(MPI_COMM_WORLD,&rank); //get the rank
    MPI_Comm_size(MPI_COMM_WORLD,&size); //get number of processes
    /* master initializes work*/
    if (rank == 0) {
        makeAB();
        start_time = MPI_Wtime();
        for (i = 1; i < size; i++) {//for each slave other than the master
            portion = (NUM_ROWS_A / (size - 1)); // calculate portion without master
            low_bound = (i - 1) * portion;
            if (((i + 1) == size) && ((NUM_ROWS_A % (size - 1)) != 0)) {//if rows of [A] cannot be equally divided among slaves
                upper_bound = NUM_ROWS_A; //last slave gets all the remaining rows
            } else {
                upper_bound = low_bound + portion; //rows of [A] are equally divisable among slaves
            }
            //send the low bound first without blocking,to the intended slave
            MPI_Isend(&low_bound,1,MPI_INT,i,MASTER_TO_SLAVE_TAG,MPI_COMM_WORLD,&request);
            //next send the upper bound without blocking,to the intended slave
            MPI_Isend(&upper_bound,MASTER_TO_SLAVE_TAG + 1,&request);
            //finally send the allocated row portion of [A] without blocking,to the intended slave
            MPI_Isend(&mat_a[low_bound][0],(upper_bound - low_bound) * NUM_COLUMNS_A,MPI_DOUBLE,MASTER_TO_SLAVE_TAG + 2,&request);
        }
    }
    //broadcast [B] to all the slaves
    MPI_Bcast(&mat_b,NUM_ROWS_B*NUM_COLUMNS_B,MPI_COMM_WORLD);
    /* work done by slaves*/
    if (rank > 0) {
        //receive low bound from the master
        MPI_Recv(&low_bound,&status);
        //next receive upper bound from the master
        MPI_Recv(&upper_bound,&status);
        //finally receive row portion of [A] to be processed from the master
        MPI_Recv(&mat_a[low_bound][0],&status);
        for (i = low_bound; i < upper_bound; i++) {//iterate through a given set of rows of [A]
            for (j = 0; j < NUM_COLUMNS_B; j++) {//iterate through columns of [B]
                for (k = 0; k < NUM_ROWS_B; k++) {//iterate through rows of [B]
                    mat_result[i][j] += (mat_a[i][k] * mat_b[k][j]);
                }
            }
        }
        //send back the low bound first without blocking,to the master
        MPI_Isend(&low_bound,SLAVE_TO_MASTER_TAG,&request);
        //send the upper bound next without blocking,to the master
        MPI_Isend(&upper_bound,SLAVE_TO_MASTER_TAG + 1,&request);
        //finally send the processed portion of data without blocking,to the master
        MPI_Isend(&mat_result[low_bound][0],(upper_bound - low_bound) * NUM_COLUMNS_B,SLAVE_TO_MASTER_TAG + 2,&request);
    }
    /* master gathers processed work*/
    if (rank == 0) {
        for (i = 1; i < size; i++) {// untill all slaves have handed back the processed data
            //receive low bound from a slave
            MPI_Recv(&low_bound,&status);
            //receive upper bound from a slave
            MPI_Recv(&upper_bound,&status);
            //receive processed data from a slave
            MPI_Recv(&mat_result[low_bound][0],&status);
        }
        end_time = MPI_Wtime();
        printf("\nRunning Time = %f\n\n",end_time - start_time);
        printArray();
    }
    MPI_Finalize(); //finalize MPI operations
    return 0;
}
void makeAB()
{
    for (i = 0; i < NUM_ROWS_A; i++) {
        for (j = 0; j < NUM_COLUMNS_A; j++) {
            mat_a[i][j] = i + j;
        }
    }
    for (i = 0; i < NUM_ROWS_B; i++) {
        for (j = 0; j < NUM_COLUMNS_B; j++) {
            mat_b[i][j] = i*j;
        }
    }
}
void printArray()
{
    for (i = 0; i < NUM_ROWS_A; i++) {
        printf("\n");
        for (j = 0; j < NUM_COLUMNS_A; j++)
            printf("%8.2f  ",mat_a[i][j]);
    }
    printf("\n\n\n");
    for (i = 0; i < NUM_ROWS_B; i++) {
        printf("\n");
        for (j = 0; j < NUM_COLUMNS_B; j++)
            printf("%8.2f  ",mat_b[i][j]);
    }
    printf("\n\n\n");
    for (i = 0; i < NUM_ROWS_A; i++) {
        printf("\n");
        for (j = 0; j < NUM_COLUMNS_B; j++)
            printf("%8.2f  ",mat_result[i][j]);
    }
    printf("\n\n");
}

我正在使用编译程序, mpic ++ multi.cpp -o多 并运行它, mpirun -np 3多 但我收到此错误,

    [X540UV:08026] *** Process received signal ***
    [X540UV:08028] *** Process received signal ***
    [X540UV:08028] Signal: Segmentation fault (11)
    [X540UV:08028] Signal code: Address not mapped (1)
    [X540UV:08028] Failing at address: 0xf6e680
    [X540UV:08027] *** Process received signal ***
    [X540UV:08027] Signal: Segmentation fault (11)
    [X540UV:08027] Signal code: Address not mapped (1)
    [X540UV:08027] Failing at address: 0xf6e680
    [X540UV:08027] [ 0] /lib/x86_64-linux-gnu/libc.so.6(+0x354b0)[0x7f1a9cae74b0]
    [X540UV:08027] [ 1] multi3[0x401cfa]
    [X540UV:08027] [ 2] multi3[0x4012d5]
    [X540UV:08027] [ 3] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f1a9cad2830]
    [X540UV:08027] [ 4] multi3[0x400e79]
    [X540UV:08027] *** End of error message ***
    [X540UV:08028] [ 0] [X540UV:08026] Signal: Segmentation fault (11)
    [X540UV:08026] Signal code: Address not mapped (1)
    [X540UV:08026] Failing at address: 0xf6e680
    /lib/x86_64-linux-gnu/libc.so.6(+0x354b0)[0x7fd43be274b0]
    [X540UV:08026] [ 0] [X540UV:08028] /lib/x86_64-linux-gnu/libc.so.6(+0x354b0)[0x7fde584744b0]
    [X540UV:08026] [ 1] multi3[0x401cfa]
    [X540UV:08026] [ 2] multi3[0x4012d5]
    [ 1] [X540UV:08026] [ 3] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7fde5845f830]
    [X540UV:08026] [ 4] multi3[0x400e79]
    [X540UV:08026] *** End of error message ***
    multi3[0x401cfa]
    [X540UV:08028] [ 2] multi3[0x4012d5]
    [X540UV:08028] [ 3] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7fd43be12830]
    [X540UV:08028] [ 4] multi3[0x400e79]
    [X540UV:08028] *** End of error message ***
    --------------------------------------------------------------------------
    mpirun noticed that process rank 1 with PID 0 on node X540UV exited on signal 11 (Segmentation fault).
a371434403 回答:mpirun注意到在信号11上退出了节点X540UV上具有PID 0的进程等级1(分段故障)

暂时没有好的解决方案,如果你有好的解决方案,请发邮件至:iooj@foxmail.com
本文链接:https://www.f2er.com/3152294.html

大家都在问