SuperLU Dist

De Wiki de Calcul Québec
Aller à : Navigation, rechercher
Autres langues :anglais 100% • ‎français 100%

Description

Cette bibliothèque est la version parallèle de SuperLU. La parallélisation est faite à l'aide de la bibliothèque MPI. Pour plus de détails, consultez notre documentation sur SuperLU. Puisque l'écriture du code est différente, nous vous fournissons un exemple pour SuperLU_Dist.

Exemple

Le fichier suivant contient un exemple en langage C de résolution d'équations linéaires complexes avec la bibliothèque SuperLU_Dist :

Fichier : ex_superludist.c
/*
 * -- Distributed SuperLU routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 *
 */
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <superlu_ddefs.h>
#include <machines.h>
 
int dcreate_matrix(SuperMatrix *A, int nrhs, double **rhs,
                   int *ldb, double **x, int *ldx,
                   FILE *fp, gridinfo_t *grid)
{
/* 
 * -- Distributed SuperLU routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * March 15, 2003
 *
 *
 * Purpose
 * =======
 * 
 * DCREATE_MATRIX read the matrix from data file in Harwell-Boeing format,
 * and distribute it to processors in a distributed compressed row format.
 * It also generate the distributed true solution X and the right-hand
 * side RHS.
 *
 *
 * Arguments   
 * =========      
 *
 * A     (output) SuperMatrix*
 *       Local matrix A in NR_loc format. 
 *
 * NRHS  (input) int_t
 *       Number of right-hand sides.
 *
 * RHS   (output) double**
 *       The right-hand side matrix.
 *
 * LDB   (output) int*
 *       Leading dimension of the right-hand side matrix.
 *
 * X     (output) double**
 *       The true solution matrix.
 *
 * LDX   (output) int*
 *       The leading dimension of the true solution matrix.
 *
 * FP    (input) FILE*
 *       The matrix file pointer.
 *
 * GRID  (input) gridinof_t*
 *       The 2D process mesh.
 *
 */
 
    SuperMatrix GA;              /* global A */
    double   *b_global, *xtrue_global;  /* replicated on all processes */
    int_t    *rowind, *colptr;	 /* global */
    double   *nzval;             /* global */
    double   *nzval_loc;         /* local */
    int_t    *colind, *rowptr;	 /* local */
    int_t    m, n, nnz;
    int_t    m_loc, fst_row, nnz_loc;
    int_t    m_loc_fst; /* Record m_loc of the first p-1 processors,
			   when mod(m, p) is not zero. */ 
    int_t    iam, row, col, i, j, relpos;
    char     trans[1];
    int_t      *marker;
 
    iam = grid->iam;
 
#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(iam, "Enter dcreate_matrix()");
#endif
 
    if ( !iam ) {
        /* Read the matrix stored on disk in Harwell-Boeing format. */
        dreadhb_dist(iam, fp, &m, &n, &nnz, &nzval, &rowind, &colptr);
 
	/* Broadcast matrix A to the other PEs. */
	MPI_Bcast( &m,     1,   mpi_int_t,  0, grid->comm );
	MPI_Bcast( &n,     1,   mpi_int_t,  0, grid->comm );
	MPI_Bcast( &nnz,   1,   mpi_int_t,  0, grid->comm );
	MPI_Bcast( nzval,  nnz, MPI_DOUBLE, 0, grid->comm );
	MPI_Bcast( rowind, nnz, mpi_int_t,  0, grid->comm );
	MPI_Bcast( colptr, n+1, mpi_int_t,  0, grid->comm );
    } else {
	/* Receive matrix A from PE 0. */
	MPI_Bcast( &m,   1,   mpi_int_t,  0, grid->comm );
	MPI_Bcast( &n,   1,   mpi_int_t,  0, grid->comm );
	MPI_Bcast( &nnz, 1,   mpi_int_t,  0, grid->comm );
 
	/* Allocate storage for compressed column representation. */
	dallocateA_dist(n, nnz, &nzval, &rowind, &colptr);
 
	MPI_Bcast( nzval,   nnz, MPI_DOUBLE, 0, grid->comm );
	MPI_Bcast( rowind,  nnz, mpi_int_t,  0, grid->comm );
	MPI_Bcast( colptr,  n+1, mpi_int_t,  0, grid->comm );
    }
 
#if 0
    nzval[0]=0.1;
#endif
 
    /* Compute the number of rows to be distributed to local process */
    m_loc = m / (grid->nprow * grid->npcol); 
    m_loc_fst = m_loc;
    /* When m / procs is not an integer */
    if ((m_loc * grid->nprow * grid->npcol) != m) {
      m_loc = m_loc+1;
      m_loc_fst = m_loc;
      if (iam == (grid->nprow * grid->npcol - 1)) 
	m_loc = m - m_loc_fst * (grid->nprow * grid->npcol - 1);
    }
 
    /* Create compressed column matrix for GA. */
    dCreate_CompCol_Matrix_dist(&GA, m, n, nnz, nzval, rowind, colptr,
				SLU_NC, SLU_D, SLU_GE);
 
    /* Generate the exact solution and compute the right-hand side. */
    if ( !(b_global = doubleMalloc_dist(m*nrhs)) )
        ABORT("Malloc fails for b[]");
    if ( !(xtrue_global = doubleMalloc_dist(n*nrhs)) )
        ABORT("Malloc fails for xtrue[]");
    *trans = 'N';
 
    dGenXtrue_dist(n, nrhs, xtrue_global, n);
    dFillRHS_dist(trans, nrhs, xtrue_global, n, &GA, b_global, m);
 
    /*************************************************
     * Change GA to a local A with NR_loc format     *
     *************************************************/
 
    rowptr = (int_t *) intMalloc_dist(m_loc+1);
    marker = (int_t *) intCalloc_dist(n);
 
    /* Get counts of each row of GA */
    for (i = 0; i < n; ++i)
      for (j = colptr[i]; j < colptr[i+1]; ++j) ++marker[rowind[j]];
    /* Set up row pointers */
    rowptr[0] = 0;
    fst_row = iam * m_loc_fst;
    nnz_loc = 0;
    for (j = 0; j < m_loc; ++j) {
      row = fst_row + j;
      rowptr[j+1] = rowptr[j] + marker[row];
      marker[j] = rowptr[j];
    }
    nnz_loc = rowptr[m_loc];
 
    nzval_loc = (double *) doubleMalloc_dist(nnz_loc);
    colind = (int_t *) intMalloc_dist(nnz_loc);
 
    /* Transfer the matrix into the compressed row storage */
    for (i = 0; i < n; ++i) {
      for (j = colptr[i]; j < colptr[i+1]; ++j) {
	row = rowind[j];
	if ( (row>=fst_row) && (row<fst_row+m_loc) ) {
	  row = row - fst_row;
	  relpos = marker[row];
	  colind[relpos] = i;
	  nzval_loc[relpos] = nzval[j];
	  ++marker[row];
	}
      }
    }
 
#if ( DEBUGlevel>=2 )
    if ( !iam ) dPrint_CompCol_Matrix_dist(&GA);
#endif   
 
    /* Destroy GA */
    Destroy_CompCol_Matrix_dist(&GA);
 
    /******************************************************/
    /* Change GA to a local A with NR_loc format */
    /******************************************************/
 
    /* Set up the local A in NR_loc format */
    dCreate_CompRowLoc_Matrix_dist(A, m, n, nnz_loc, m_loc, fst_row,
				   nzval_loc, colind, rowptr,
				   SLU_NR_loc, SLU_D, SLU_GE);
 
    /* Get the local B */
    if ( !((*rhs) = doubleMalloc_dist(m_loc*nrhs)) )
        ABORT("Malloc fails for rhs[]");
    for (j =0; j < nrhs; ++j) {
	for (i = 0; i < m_loc; ++i) {
	    row = fst_row + i;
	    (*rhs)[j*m_loc+i] = b_global[j*n+row];
	}
    }
    *ldb = m_loc;
 
    /* Set the all true X */    
    *ldx = m_loc;
    if ( !((*x) = doubleMalloc_dist(*ldx * nrhs)) )
        ABORT("Malloc fails for x_loc[]");
 
    /* Get the local part of xtrue_global */
    for (j = 0; j < nrhs; ++j) {
      for (i = 0; i < m_loc; ++i)
	(*x)[i + j*(*ldx)] = xtrue_global[i + fst_row + j*n];
    }
 
    SUPERLU_FREE(b_global);
    SUPERLU_FREE(xtrue_global);
    SUPERLU_FREE(marker);
 
#if ( DEBUGlevel>=1 )
    printf("sizeof(NRforamt_loc) %d\n", sizeof(NRformat_loc));
    CHECK_MALLOC(iam, "Exit dcreate_matrix()");
#endif
    return 0;
}
 
/*
 * Prototypes
 */
static void ReadVector(FILE *, int_t, int_t *, int_t, int_t);
static void dReadValues(FILE *, int_t, double *, int_t, int_t);
static void FormFullA(int_t, int_t *, double **, int_t **, int_t **);
static int DumpLine(FILE *);
static int ParseIntFormat(char *, int_t *, int_t *);
static int ParseFloatFormat(char *, int_t *, int_t *);
 
 
void
dreadhb_dist(int iam, FILE *fp, int_t *nrow, int_t *ncol, int_t *nonz,
	     double **nzval, int_t **rowind, int_t **colptr)
{
/* 
 * -- Distributed SuperLU routine (version 1.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley.
 * September 1, 1999
 *
 *
 * Purpose
 * =======
 * 
 * Read a DOUBLE PRECISION matrix stored in Harwell-Boeing format 
 * as described below.
 * 
 * Line 1 (A72,A8) 
 *  	Col. 1 - 72   Title (TITLE) 
 *	Col. 73 - 80  Key (KEY) 
 * 
 * Line 2 (5I14) 
 * 	Col. 1 - 14   Total number of lines excluding header (TOTCRD) 
 * 	Col. 15 - 28  Number of lines for pointers (PTRCRD) 
 * 	Col. 29 - 42  Number of lines for row (or variable) indices (INDCRD) 
 * 	Col. 43 - 56  Number of lines for numerical values (VALCRD) 
 *	Col. 57 - 70  Number of lines for right-hand sides (RHSCRD) 
 *                    (including starting guesses and solution vectors 
 *		       if present) 
 *           	      (zero indicates no right-hand side data is present) 
 *
 * Line 3 (A3, 11X, 4I14) 
 *   	Col. 1 - 3    Matrix type (see below) (MXTYPE) 
 * 	Col. 15 - 28  Number of rows (or variables) (NROW) 
 * 	Col. 29 - 42  Number of columns (or elements) (NCOL) 
 *	Col. 43 - 56  Number of row (or variable) indices (NNZERO) 
 *	              (equal to number of entries for assembled matrices) 
 * 	Col. 57 - 70  Number of elemental matrix entries (NELTVL) 
 *	              (zero in the case of assembled matrices) 
 * Line 4 (2A16, 2A20) 
 * 	Col. 1 - 16   Format for pointers (PTRFMT) 
 *	Col. 17 - 32  Format for row (or variable) indices (INDFMT) 
 *	Col. 33 - 52  Format for numerical values of coefficient matrix (VALFMT) 
 * 	Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) 
 *
 * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present 
 *    	Col. 1 	      Right-hand side type: 
 *	         	  F for full storage or M for same format as matrix 
 *    	Col. 2        G if a starting vector(s) (Guess) is supplied. (RHSTYP) 
 *    	Col. 3        X if an exact solution vector(s) is supplied. 
 *	Col. 15 - 28  Number of right-hand sides (NRHS) 
 *	Col. 29 - 42  Number of row indices (NRHSIX) 
 *          	      (ignored in case of unassembled matrices) 
 *
 * The three character type field on line 3 describes the matrix type. 
 * The following table lists the permitted values for each of the three 
 * characters. As an example of the type field, RSA denotes that the matrix 
 * is real, symmetric, and assembled. 
 *
 * First Character: 
 *	R Real matrix 
 *	C Complex matrix 
 *	P Pattern only (no numerical values supplied) 
 *
 * Second Character: 
 *	S Symmetric 
 *	U Unsymmetric 
 *	H Hermitian 
 *	Z Skew symmetric 
 *	R Rectangular 
 *
 * Third Character: 
 *	A Assembled 
 *	E Elemental matrices (unassembled) 
 *
 */
 
    register int_t i, numer_lines, rhscrd = 0;
    int_t tmp, colnum, colsize, rownum, rowsize, valnum, valsize;
    char buf[100], type[4];
    int_t sym;
 
#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(0, "Enter dreadhb_dist()");
#endif
 
    /* Line 1 */
    fgets(buf, 100, fp);
 
    /* Line 2 */
    for (i=0; i<5; i++) {
	fscanf(fp, "%14c", buf); buf[14] = 0;
	tmp = atoi(buf); /*sscanf(buf, "%d", &tmp);*/
	if (i == 3) numer_lines = tmp;
	if (i == 4 && tmp) rhscrd = tmp;
    }
    DumpLine(fp);
 
    /* Line 3 */
    fscanf(fp, "%3c", type);
    fscanf(fp, "%11c", buf); /* pad */
    type[3] = 0;
#if ( DEBUGlevel>=1 )
    if ( !iam ) printf("Matrix type %s\n", type);
#endif
 
    fscanf(fp, "%14c", buf); *nrow = atoi(buf); 
    fscanf(fp, "%14c", buf); *ncol = atoi(buf); 
    fscanf(fp, "%14c", buf); *nonz = atoi(buf); 
    fscanf(fp, "%14c", buf); tmp = atoi(buf);   
 
    if (tmp != 0)
	if ( !iam ) printf("This is not an assembled matrix!\n");
    if (*nrow != *ncol)
	if ( !iam ) printf("Matrix is not square.\n");
    DumpLine(fp);
 
    /* Allocate storage for the three arrays ( nzval, rowind, colptr ) */
    dallocateA_dist(*ncol, *nonz, nzval, rowind, colptr);
 
    /* Line 4: format statement */
    fscanf(fp, "%16c", buf);
    ParseIntFormat(buf, &colnum, &colsize);
    fscanf(fp, "%16c", buf);
    ParseIntFormat(buf, &rownum, &rowsize);
    fscanf(fp, "%20c", buf);
    ParseFloatFormat(buf, &valnum, &valsize);
    fscanf(fp, "%20c", buf);
    DumpLine(fp);
 
    /* Line 5: right-hand side */    
    if ( rhscrd ) DumpLine(fp); /* skip RHSFMT */
 
#if ( DEBUGlevel>=1 )
    if ( !iam ) {
	printf("%d rows, %d nonzeros\n", *nrow, *nonz);
	printf("colnum %d, colsize %d\n", colnum, colsize);
	printf("rownum %d, rowsize %d\n", rownum, rowsize);
	printf("valnum %d, valsize %d\n", valnum, valsize);
    }
#endif
 
    ReadVector(fp, *ncol+1, *colptr, colnum, colsize);
#if ( DEBUGlevel>=1 )
    if ( !iam )	printf("read colptr[%d] = %d\n", *ncol, (*colptr)[*ncol]);
#endif
    ReadVector(fp, *nonz, *rowind, rownum, rowsize);
#if ( DEBUGlevel>=1 )
    if ( !iam )	printf("read rowind[%d] = %d\n", *nonz-1, (*rowind)[*nonz-1]);
#endif
    if ( numer_lines ) {
        dReadValues(fp, *nonz, *nzval, valnum, valsize);
#if ( DEBUGlevel>=1 )
	if ( !iam ) {
	  printf("read nzval[%d] = %e\n", 1, (*nzval)[1]);
	  printf("read nzval[%d] = %e\n", 4, (*nzval)[4]);
	  printf("read nzval[%d] = %e\n", 5, (*nzval)[5]);
	  printf("read nzval[%d] = %e\n", *nonz-1, (*nzval)[*nonz-1]);
	}
#endif
    }
 
    sym = (type[1] == 'S'


Il est possible de compiler l'exemple avec une ligne de commande ressemblant à ceci, si la bibliothèque SuperLU_Dist se trouve dans vos modules.

[nom@serveur $] 
mpicc ex_superludist.c -lsuperlu_dist -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm


Outils personnels
Espaces de noms

Variantes
Actions
Navigation
Ressources de Calcul Québec
Outils
Partager