SuperLU Dist

De Wiki de Calcul Québec
Aller à : Navigation, rechercher
Cette page est une traduction de la page SuperLU Dist et la traduction est complétée à 100 % et à jour.

Autres langues :anglais 100% • ‎français 100%

Description

This library is the parallel version of SuperLU. Parallelizing is done using the MPI library. For more details, check our page on SuperLU. Since using the parallel version results in difference code, here is an example using SuperLU_Dist.

Example

The following file contains an example (in C) to solve a system of complex linear equations with SuperLU_Dist:

File : ex_superludist.c
/*
 * -- Distributed SuperLU routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 *
 */
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <superlu_ddefs.h>
#include <machines.h>
 
int dcreate_matrix(SuperMatrix *A, int nrhs, double **rhs,
                   int *ldb, double **x, int *ldx,
                   FILE *fp, gridinfo_t *grid)
{
/* 
 * -- Distributed SuperLU routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 *
 *
 * Purpose
 * =======
 * 
 * DCREATE_MATRIX read the matrix from data file in Harwell-Boeing format,
 * and distribute it to processors in a distributed compressed row format.
 * It also generate the distributed true solution X and the right-hand
 * side RHS.
 *
 *
 * Arguments   
 * =========      
 *
 * A     (output) SuperMatrix*
 *       Local matrix A in NR_loc format. 
 *
 * NRHS  (input) int_t
 *       Number of right-hand sides.
 *
 * RHS   (output) double**
 *       The right-hand side matrix.
 *
 * LDB   (output) int*
 *       Leading dimension of the right-hand side matrix.
 *
 * X     (output) double**
 *       The true solution matrix.
 *
 * LDX   (output) int*
 *       The leading dimension of the true solution matrix.
 *
 * FP    (input) FILE*
 *       The matrix file pointer.
 *
 * GRID  (input) gridinof_t*
 *       The 2D process mesh.
 *
 */
 
    SuperMatrix GA;              /* global A */
    double   *b_global, *xtrue_global;  /* replicated on all processes */
    int_t    *rowind, *colptr;	 /* global */
    double   *nzval;             /* global */
    double   *nzval_loc;         /* local */
    int_t    *colind, *rowptr;	 /* local */
    int_t    m, n, nnz;
    int_t    m_loc, fst_row, nnz_loc;
    int_t    m_loc_fst; /* Record m_loc of the first p-1 processors,
			   when mod(m, p) is not zero. */ 
    int_t    iam, row, col, i, j, relpos;
    char     trans[1];
    int_t      *marker;
 
    iam = grid->iam;
 
#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(iam, "Enter dcreate_matrix()");
#endif
 
    if ( !iam ) {
        /* Read the matrix stored on disk in Harwell-Boeing format. */
        dreadhb_dist(iam, fp, &m, &n, &nnz, &nzval, &rowind, &colptr);
 
	/* Broadcast matrix A to the other PEs. */
	MPI_Bcast( &m,     1,   mpi_int_t,  0, grid->comm );
	MPI_Bcast( &n,     1,   mpi_int_t,  0, grid->comm );
	MPI_Bcast( &nnz,   1,   mpi_int_t,  0, grid->comm );
	MPI_Bcast( nzval,  nnz, MPI_DOUBLE, 0, grid->comm );
	MPI_Bcast( rowind, nnz, mpi_int_t,  0, grid->comm );
	MPI_Bcast( colptr, n+1, mpi_int_t,  0, grid->comm );
    } else {
	/* Receive matrix A from PE 0. */
	MPI_Bcast( &m,   1,   mpi_int_t,  0, grid->comm );
	MPI_Bcast( &n,   1,   mpi_int_t,  0, grid->comm );
	MPI_Bcast( &nnz, 1,   mpi_int_t,  0, grid->comm );
 
	/* Allocate storage for compressed column representation. */
	dallocateA_dist(n, nnz, &nzval, &rowind, &colptr);
 
	MPI_Bcast( nzval,   nnz, MPI_DOUBLE, 0, grid->comm );
	MPI_Bcast( rowind,  nnz, mpi_int_t,  0, grid->comm );
	MPI_Bcast( colptr,  n+1, mpi_int_t,  0, grid->comm );
    }
 
#if 0
    nzval[0]=0.1;
#endif
 
    /* Compute the number of rows to be distributed to local process */
    m_loc = m / (grid->nprow * grid->npcol); 
    m_loc_fst = m_loc;
    /* When m / procs is not an integer */
    if ((m_loc * grid->nprow * grid->npcol) != m) {
      m_loc = m_loc+1;
      m_loc_fst = m_loc;
      if (iam == (grid->nprow * grid->npcol - 1)) 
	m_loc = m - m_loc_fst * (grid->nprow * grid->npcol - 1);
    }
 
    /* Create compressed column matrix for GA. */
    dCreate_CompCol_Matrix_dist(&GA, m, n, nnz, nzval, rowind, colptr,
				SLU_NC, SLU_D, SLU_GE);
 
    /* Generate the exact solution and compute the right-hand side. */
    if ( !(b_global = doubleMalloc_dist(m*nrhs)) )
        ABORT("Malloc fails for b[]");
    if ( !(xtrue_global = doubleMalloc_dist(n*nrhs)) )
        ABORT("Malloc fails for xtrue[]");
    *trans = 'N';
 
    dGenXtrue_dist(n, nrhs, xtrue_global, n);
    dFillRHS_dist(trans, nrhs, xtrue_global, n, &GA, b_global, m);
 
    /*************************************************
     * Change GA to a local A with NR_loc format     *
     *************************************************/
 
    rowptr = (int_t *) intMalloc_dist(m_loc+1);
    marker = (int_t *) intCalloc_dist(n);
 
    /* Get counts of each row of GA */
    for (i = 0; i < n; ++i)
      for (j = colptr[i]; j < colptr[i+1]; ++j) ++marker[rowind[j]];
    /* Set up row pointers */
    rowptr[0] = 0;
    fst_row = iam * m_loc_fst;
    nnz_loc = 0;
    for (j = 0; j < m_loc; ++j) {
      row = fst_row + j;
      rowptr[j+1] = rowptr[j] + marker[row];
      marker[j] = rowptr[j];
    }
    nnz_loc = rowptr[m_loc];
 
    nzval_loc = (double *) doubleMalloc_dist(nnz_loc);
    colind = (int_t *) intMalloc_dist(nnz_loc);
 
    /* Transfer the matrix into the compressed row storage */
    for (i = 0; i < n; ++i) {
      for (j = colptr[i]; j < colptr[i+1]; ++j) {
	row = rowind[j];
	if ( (row>=fst_row) && (row<fst_row+m_loc) ) {
	  row = row - fst_row;
	  relpos = marker[row];
	  colind[relpos] = i;
	  nzval_loc[relpos] = nzval[j];
	  ++marker[row];
	}
      }
    }
 
#if ( DEBUGlevel>=2 )
    if ( !iam ) dPrint_CompCol_Matrix_dist(&GA);
#endif   
 
    /* Destroy GA */
    Destroy_CompCol_Matrix_dist(&GA);
 
    /******************************************************/
    /* Change GA to a local A with NR_loc format */
    /******************************************************/
 
    /* Set up the local A in NR_loc format */
    dCreate_CompRowLoc_Matrix_dist(A, m, n, nnz_loc, m_loc, fst_row,
				   nzval_loc, colind, rowptr,
				   SLU_NR_loc, SLU_D, SLU_GE);
 
    /* Get the local B */
    if ( !((*rhs) = doubleMalloc_dist(m_loc*nrhs)) )
        ABORT("Malloc fails for rhs[]");
    for (j =0; j < nrhs; ++j) {
	for (i = 0; i < m_loc; ++i) {
	    row = fst_row + i;
	    (*rhs)[j*m_loc+i] = b_global[j*n+row];
	}
    }
    *ldb = m_loc;
 
    /* Set the all true X */    
    *ldx = m_loc;
    if ( !((*x) = doubleMalloc_dist(*ldx * nrhs)) )
        ABORT("Malloc fails for x_loc[]");
 
    /* Get the local part of xtrue_global */
    for (j = 0; j < nrhs; ++j) {
      for (i = 0; i < m_loc; ++i)
	(*x)[i + j*(*ldx)] = xtrue_global[i + fst_row + j*n];
    }
 
    SUPERLU_FREE(b_global);
    SUPERLU_FREE(xtrue_global);
    SUPERLU_FREE(marker);
 
#if ( DEBUGlevel>=1 )
    printf("sizeof(NRforamt_loc) %d\n", sizeof(NRformat_loc));
    CHECK_MALLOC(iam, "Exit dcreate_matrix()");
#endif
    return 0;
}
 
/*
 * Prototypes
 */
static void ReadVector(FILE *, int_t, int_t *, int_t, int_t);
static void dReadValues(FILE *, int_t, double *, int_t, int_t);
static void FormFullA(int_t, int_t *, double **, int_t **, int_t **);
static int DumpLine(FILE *);
static int ParseIntFormat(char *, int_t *, int_t *);
static int ParseFloatFormat(char *, int_t *, int_t *);
 
 
void
dreadhb_dist(int iam, FILE *fp, int_t *nrow, int_t *ncol, int_t *nonz,
	     double **nzval, int_t **rowind, int_t **colptr)
{
/* 
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 *
 *
 * Purpose
 * =======
 * 
 * Read a DOUBLE PRECISION matrix stored in Harwell-Boeing format 
 * as described below.
 * 
 * Line 1 (A72,A8) 
 *  	Col. 1 - 72   Title (TITLE) 
 *	Col. 73 - 80  Key (KEY) 
 * 
 * Line 2 (5I14) 
 * 	Col. 1 - 14   Total number of lines excluding header (TOTCRD) 
 * 	Col. 15 - 28  Number of lines for pointers (PTRCRD) 
 * 	Col. 29 - 42  Number of lines for row (or variable) indices (INDCRD) 
 * 	Col. 43 - 56  Number of lines for numerical values (VALCRD) 
 *	Col. 57 - 70  Number of lines for right-hand sides (RHSCRD) 
 *                    (including starting guesses and solution vectors 
 *		       if present) 
 *           	      (zero indicates no right-hand side data is present) 
 *
 * Line 3 (A3, 11X, 4I14) 
 *   	Col. 1 - 3    Matrix type (see below) (MXTYPE) 
 * 	Col. 15 - 28  Number of rows (or variables) (NROW) 
 * 	Col. 29 - 42  Number of columns (or elements) (NCOL) 
 *	Col. 43 - 56  Number of row (or variable) indices (NNZERO) 
 *	              (equal to number of entries for assembled matrices) 
 * 	Col. 57 - 70  Number of elemental matrix entries (NELTVL) 
 *	              (zero in the case of assembled matrices) 
 * Line 4 (2A16, 2A20) 
 * 	Col. 1 - 16   Format for pointers (PTRFMT) 
 *	Col. 17 - 32  Format for row (or variable) indices (INDFMT) 
 *	Col. 33 - 52  Format for numerical values of coefficient matrix (VALFMT) 
 * 	Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) 
 *
 * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present 
 *    	Col. 1 	      Right-hand side type: 
 *	         	  F for full storage or M for same format as matrix 
 *    	Col. 2        G if a starting vector(s) (Guess) is supplied. (RHSTYP) 
 *    	Col. 3        X if an exact solution vector(s) is supplied. 
 *	Col. 15 - 28  Number of right-hand sides (NRHS) 
 *	Col. 29 - 42  Number of row indices (NRHSIX) 
 *          	      (ignored in case of unassembled matrices) 
 *
 * The three character type field on line 3 describes the matrix type. 
 * The following table lists the permitted values for each of the three 
 * characters. As an example of the type field, RSA denotes that the matrix 
 * is real, symmetric, and assembled. 
 *
 * First Character: 
 *	R Real matrix 
 *	C Complex matrix 
 *	P Pattern only (no numerical values supplied) 
 *
 * Second Character: 
 *	S Symmetric 
 *	U Unsymmetric 
 *	H Hermitian 
 *	Z Skew symmetric 
 *	R Rectangular 
 *
 * Third Character: 
 *	A Assembled 
 *	E Elemental matrices (unassembled) 
 *
 */
 
    register int_t i, numer_lines, rhscrd = 0;
    int_t tmp, colnum, colsize, rownum, rowsize, valnum, valsize;
    char buf[100], type[4];
    int_t sym;
 
#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(0, "Enter dreadhb_dist()");
#endif
 
    /* Line 1 */
    fgets(buf, 100, fp);
 
    /* Line 2 */
    for (i=0; i<5; i++) {
	fscanf(fp, "%14c", buf); buf[14] = 0;
	tmp = atoi(buf); /*sscanf(buf, "%d", &tmp);*/
	if (i == 3) numer_lines = tmp;
	if (i == 4 && tmp) rhscrd = tmp;
    }
    DumpLine(fp);
 
    /* Line 3 */
    fscanf(fp, "%3c", type);
    fscanf(fp, "%11c", buf); /* pad */
    type[3] = 0;
#if ( DEBUGlevel>=1 )
    if ( !iam ) printf("Matrix type %s\n", type);
#endif
 
    fscanf(fp, "%14c", buf); *nrow = atoi(buf); 
    fscanf(fp, "%14c", buf); *ncol = atoi(buf); 
    fscanf(fp, "%14c", buf); *nonz = atoi(buf); 
    fscanf(fp, "%14c", buf); tmp = atoi(buf);   
 
    if (tmp != 0)
	if ( !iam ) printf("This is not an assembled matrix!\n");
    if (*nrow != *ncol)
	if ( !iam ) printf("Matrix is not square.\n");
    DumpLine(fp);
 
    /* Allocate storage for the three arrays ( nzval, rowind, colptr ) */
    dallocateA_dist(*ncol, *nonz, nzval, rowind, colptr);
 
    /* Line 4: format statement */
    fscanf(fp, "%16c", buf);
    ParseIntFormat(buf, &colnum, &colsize);
    fscanf(fp, "%16c", buf);
    ParseIntFormat(buf, &rownum, &rowsize);
    fscanf(fp, "%20c", buf);
    ParseFloatFormat(buf, &valnum, &valsize);
    fscanf(fp, "%20c", buf);
    DumpLine(fp);
 
    /* Line 5: right-hand side */    
    if ( rhscrd ) DumpLine(fp); /* skip RHSFMT */
 
#if ( DEBUGlevel>=1 )
    if ( !iam ) {
	printf("%d rows, %d nonzeros\n", *nrow, *nonz);
	printf("colnum %d, colsize %d\n", colnum, colsize);
	printf("rownum %d, rowsize %d\n", rownum, rowsize);
	printf("valnum %d, valsize %d\n", valnum, valsize);
    }
#endif
 
    ReadVector(fp, *ncol+1, *colptr, colnum, colsize);
#if ( DEBUGlevel>=1 )
    if ( !iam )	printf("read colptr[%d] = %d\n", *ncol, (*colptr)[*ncol]);
#endif
    ReadVector(fp, *nonz, *rowind, rownum, rowsize);
#if ( DEBUGlevel>=1 )
    if ( !iam )	printf("read rowind[%d] = %d\n", *nonz-1, (*rowind)[*nonz-1]);
#endif
    if ( numer_lines ) {
        dReadValues(fp, *nonz, *nzval, valnum, valsize);
#if ( DEBUGlevel>=1 )
	if ( !iam ) {
	  printf("read nzval[%d] = %e\n", 1, (*nzval)[1]);
	  printf("read nzval[%d] = %e\n", 4, (*nzval)[4]);
	  printf("read nzval[%d] = %e\n", 5, (*nzval)[5]);
	  printf("read nzval[%d] = %e\n", *nonz-1, (*nzval)[*nonz-1]);
	}
#endif
    }
 
    sym = (type[1] == 'S'


The example can be compiled with a command line such as the following if a SuperLU_Dist module is loaded:

[nom@serveur $] 
mpicc ex_superludist.c -lsuperlu_dist -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm


Outils personnels
Espaces de noms

Variantes
Actions
Navigation
Ressources de Calcul Québec
Outils
Partager