Using petsc (parallel)
Jump to navigation
Jump to search
And here is a quick parallel code - compile as in using petsc but run like
mpirun -n <N procS> ./a.out <other options>
On my workstation, n = 2 actually results in better performance than n = 1(it has four cores but usually multicore cpus do nothing much for petsc), though not if I bump the problem up to 1024x1024
On bluecrystalp1, running a 1024x1024 problem with -pc_type sor -ksp_type bcgs, I get
1 node, 1 core : 70s 1 node, 2 cores : 45s 1 node, 4 cores : 32s 2 nodes 4 cores each : 14s
compared to the 49s required on my workstation with one core. Asking for 4 nodes seems to result in lengthy queuing.
using mumps instead (-pc_type lu -pc_factor mat_solver_package mumps) I get
1 node, 1 core : 77s 1 node, 2 cores: 48s 1 node, 4 cores: 2 node, 1 core each: 47s 2 node, 2 cores each: 29s 2 node, 4 cores each:
!! solve a 2D n x n (a(x,y)u')' + b(x,y) u = c(x,y) !! with periodic boundary conditions in y, u(x=0,y) = 0 and u'(x=1,y)= d !! a(x,y) = 1 for x < 1/2, h for x > 1/2 !! b(x,y) = 1 for x < 1/2, r for x > 1/2 !! c(x,y) = 1 for x < 1/2, p for x > 1/2 program main implicit none #include "finclude/petsc.h" #include "finclude/petscvec.h" #include "finclude/petscmat.h" #include "finclude/petscksp.h" #include "finclude/petscpc.h" Mat A Vec x,b, res KSP ksp PetscInt ierr, ndof, n, m, i , j, lbegin, lend PetscScalar dfluxe(0:1), dfluxw(0:1), dfluxn(0:1), dfluxs(0:1), su(0:0), sc, & h, r, p , d, rnorm, bnorm, one PetscInt row(0:0), fcol(0:1) n = 256 m = n/2 ndof = n * n !x > 1/2 diffusion = h u' h = 1e-2 !x > 1/2 dissipation = r u , set small r to have problems inverting the matrix !so that direct solvers win r = 1e-6 !x > 1/2 source = p p = 0.5 ! u' at x = 1 d = 100 ! this has to be the first petsc call call PetscInitialize(PETSC_NULL_CHARACTER,ierr) !allocate space for matrix elements call MatCreateMpiAIJ(PETSC_COMM_WORLD, PETSC_DECIDE, PETSC_DECIDE, & ndof, ndof, 5, PETSC_NULL_INTEGER, 1,PETSC_NULL_INTEGER, A,ierr) ! alocate space for rhs, residual and solution call MatGetVecs(A, x, b, ierr); call MatGetVecs(A, x, res, ierr); !establish which dofs are on this processor call MatGetOwnerShipRange(A, lbegin, lend, ierr) ! bulk dfluxe(0) = 1.0 dfluxe(1) = -1.0 su(0) = 1.0 sc = 1.0 do i = 1, n-2 dfluxw(0) = dfluxe(0) dfluxw(1) = dfluxe(1) dfluxn(0) = 1.0 dfluxn(1) = -1.0 if (i > m) then su(0) = r sc = p dfluxe(0) = h dfluxe(1) = -h dfluxn(0) = h dfluxn(1) = -h endif dfluxs(0) = dfluxn(0) dfluxs(1) = dfluxn(1) do j = 1, n-2 row(0) = j*n + i if ((lbegin .le. row(0)) .and. (lend .gt. row(0))) then ! source term call MatSetValues(A, 1 ,row, 1, row, su, ADD_VALUES,ierr) call VecSetValue( b, row(0) , sc , INSERT_VALUES, ierr) ! fluxes fcol(0) = row(0) fcol(1) = row(0) + 1 call MatSetValues(A, 1 ,row, 2, fcol, dfluxe, ADD_VALUES,ierr) fcol(1) = row(0) - 1 call MatSetValues(A, 1 ,row, 2, fcol, dfluxw, ADD_VALUES,ierr) fcol(1) = row(0) + n call MatSetValues(A, 1 ,row, 2, fcol, dfluxn, ADD_VALUES,ierr) fcol(1) = row(0) - n call MatSetValues(A, 1 ,row, 2, fcol, dfluxs, ADD_VALUES,ierr) end if end do ! periodic boundaries j = 0 row(0) = j*n + i if ((lbegin .le. row(0)) .and. (lend .gt. row(0))) then ! source term call MatSetValues(A, 1 ,row, 1, row, su, ADD_VALUES,ierr) call VecSetValue( b, row(0) , sc , INSERT_VALUES, ierr) ! fluxes fcol(0) = row(0) fcol(1) = row(0) + 1 call MatSetValues(A, 1 ,row, 2, fcol, dfluxe, ADD_VALUES,ierr) fcol(1) = row(0) - 1 call MatSetValues(A, 1 ,row, 2, fcol, dfluxw, ADD_VALUES,ierr) fcol(1) = row(0) + n call MatSetValues(A, 1 ,row, 2, fcol, dfluxn, ADD_VALUES,ierr) fcol(1) = (n-1)*n + i call MatSetValues(A, 1 ,row, 2, fcol, dfluxs, ADD_VALUES,ierr) end if j = n-1 row(0) = j*n + i if ((lbegin .le. row(0)) .and. (lend .gt. row(0))) then ! source term call MatSetValues(A, 1 ,row, 1, row, su, ADD_VALUES,ierr) call VecSetValue( b, row(0) , sc , INSERT_VALUES, ierr) ! fluxes fcol(0) = row(0) fcol(1) = row(0) + 1 call MatSetValues(A, 1 ,row, 2, fcol, dfluxe, ADD_VALUES,ierr) fcol(1) = row(0) - 1 call MatSetValues(A, 1 ,row, 2, fcol, dfluxw, ADD_VALUES,ierr) fcol(1) = row(0) - n call MatSetValues(A, 1 ,row, 2, fcol, dfluxn, ADD_VALUES,ierr) fcol(1) = i call MatSetValues(A, 1 ,row, 2, fcol, dfluxs, ADD_VALUES,ierr) end if end do !x-boundaries dfluxw(0) = h dfluxw(1) = -h do j = 0, n - 1 i = 0 row(0) = j*n + i if ((lbegin .le. row(0)) .and. (lend .gt. row(0))) then call MatSetValues(A, 1 ,row, 1, row, su, ADD_VALUES,ierr) end if i = n - 1 row(0) = j*n + i if ((lbegin .le. row(0)) .and. (lend .gt. row(0))) then su = r sc = h*d call MatSetValues(A, 1 ,row, 1, row, su, ADD_VALUES,ierr) call VecSetValue( b, row(0) , sc , INSERT_VALUES, ierr) fcol(0) = row(0) fcol(1) = row(0) - 1 call MatSetValues(A, 1 ,row, 2, fcol, dfluxw, ADD_VALUES,ierr) end if end do !needs to be done before A can be used call MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY,ierr) call MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY,ierr) !KSP context: KSP is petsc's interface to linear solvers call KSPCreate(PETSC_COMM_WORLD,ksp,ierr) !A is defines both the operator and preconditioner (but might not) call KSPSetOperators(ksp,A,A,DIFFERENT_NONZERO_PATTERN,ierr) !grab all the options from the command line call KSPSetFromOptions(ksp,ierr) ! solve call KSPSolve(ksp,b,x,ierr) !check results call MatMult(A, x, res, ierr) one = -1.0 call VecAXPY(res , one , b, ierr) call VecNorm(res , NORM_2, rnorm, ierr) call VecNorm(b , NORM_2, bnorm, ierr) write(*,*) "||Ax - b||/||b|| = ", rnorm/bnorm !clean up call VecDestroy(x,ierr) call VecDestroy(b,ierr) call MatDestroy(A,ierr) !last petsc call call PetscFinalize(ierr) end program main