!!--------------------------------------------------------------------------- ! gfortran -ffree-form -Wall -Wextra -lblas ge.f90 !!--------------------------------------------------------------------------- ! Gaussian elimination without pivoting ! ! [1] J. Demmel "Numerical Linear Algebra" ! [2] N. J. Nigham "Gaussian Elimination" ! ! ELIMINATION ! for k = 1: n-1 ! for i = k+1: n ! a(i, k) /= a(k, k) ! for i = k+1: n ! for j = k+1: n ! a(i, j) -= a(i, k) * a(k, j) ! end ! end ! end ! ! BACKSUBSTITUTION - L U y = f => L x = f => x = L \ f => y = U \ x ! for i = 1: n ! x(i) = f(i) ! for j = 1: i-1 ! x(i) -= a(i, j) * x(j) ! end ! end ! ! for i = n: 1 ! y(i) = x(i) ! for j = n: i+1 ! y(i) -= a(i, j) * y(j) ! end ! y(i) /= a(i, i) ! end !!--------------------------------------------------------------------------- program ge !!--------------------------------------------------------------------------- implicit none ! Matrix of coefficients; the one is filled by random_number() real, dimension(:, :), allocatable :: A ! "Analytical" solution; the one is filled by random_number() real, dimension(:), allocatable :: u ! Right-hand side (RHS); the one is calculated as f = A * u ! Numerical solution (NS) of the equation A y = f ! RHS is overwritten by NS real, dimension(:), allocatable :: y ! Size of matrix integer, parameter :: n = 5 ! Allocate memory allocate(A(1: n, 1: n)) allocate(u(1: n)) allocate(y(1: n)) ! Algorithm uses straightforward formulas call Generate_Data() call Straightforward_Elimination() call Straightforward_Backsubstition() call Print_Norms() ! Algorithm uses Fortran 90/95 features call Generate_Data() call Fortran9x_Elimination() call Fortran9x_Backsubstition() call Print_Norms() ! Algorithm uses BLAS call Generate_Data() call BLAS_Elimination() call BLAS_Backsubstition() call Print_Norms() ! Free memory deallocate(A) deallocate(u) deallocate(y) !!--------------------------------------------------------------------------- contains !!--------------------------------------------------------------------------- subroutine Print_Norms() write (*, *) maxval(abs(u)), maxval(abs(y - u)) end subroutine Print_Norms !!--------------------------------------------------------------------------- ! This version is a simplified modification of ! http://gcc.gnu.org/onlinedocs/gfortran/RANDOM_005fSEED.html subroutine Init_Random_Seed() integer :: i, n integer, dimension(:), allocatable :: seed call random_seed(size = n) allocate(seed(n)) seed = 37 * (/ (i - 1, i = 1, n) /) call random_seed(put = seed) deallocate(seed) end subroutine Init_Random_Seed !!--------------------------------------------------------------------------- subroutine Generate_Data() call Init_Random_Seed() call random_number(A) call random_number(u) y = matmul(A, u) end subroutine Generate_Data !!--------------------------------------------------------------------------- subroutine Straightforward_Elimination() integer :: i, j, k do k = 1, n-1 do i = k+1, n a(i, k) = a(i, k) / a(k, k) end do do j = k+1, n do i = k+1, n a(i, j) = a(i, j) - a(i, k) * a(k, j) end do end do end do end subroutine Straightforward_Elimination !!--------------------------------------------------------------------------- subroutine Fortran9x_Elimination() integer :: k do k = 1, n-1 a(k+1: n, k) = a(k+1: n, k) / a(k, k) a(k+1: n, k+1: n) = a(k+1: n, k+1: n) - & matmul(a(k+1: n, k: k), a(k: k, k+1: n)) end do end subroutine Fortran9x_Elimination !!--------------------------------------------------------------------------- subroutine BLAS_Elimination() integer :: k do k = 1, n-1 ! x = a*x call sscal(n-k, 1.0 / a(k, k), a(k+1, k), 1) ! A := alpha*x*y'+ A call sger(n-k, n-k, -1.0, & a(k+1, k), 1, & a(k, k+1), n, & a(k+1, k+1), n) end do end subroutine BLAS_Elimination !!--------------------------------------------------------------------------- subroutine Straightforward_Backsubstition() integer :: i, j ! L x = f => x = L \ f do i = 1, n do j = 1, i-1 y(i) = y(i) - a(i, j) * y(j) end do end do ! U y = x => y = U \ x do i = n, 1, -1 do j = i+1, n y(i) = y(i) - a(i, j) * y(j) end do y(i) = y(i) / a(i, i) end do end subroutine Straightforward_Backsubstition !!--------------------------------------------------------------------------- subroutine Fortran9x_Backsubstition() integer :: i ! L x = f => x = L \ f do i = 1, n y(i) = y(i) - dot_product(a(i, 1: i-1), y(1: i-1)) end do ! U y = x => y = U \ x do i = n, 1, -1 y(i) = y(i) - dot_product(a(i, i+1: n), y(i+1: n)) y(i) = y(i) / a(i, i) end do end subroutine Fortran9x_Backsubstition !!--------------------------------------------------------------------------- subroutine BLAS_Backsubstition() ! L x = f => x = L \ f ! op(A)*X = alpha*B call strsm('L', 'L', 'N', 'U', n, 1, 1.0, a, n, y, n) ! U y = x => y = U \ x ! op(A)*X = alpha*B call strsm('L', 'U', 'N', 'N', n, 1, 1.0, a, n, y, n) end subroutine BLAS_Backsubstition !!--------------------------------------------------------------------------- end program ge !!---------------------------------------------------------------------------
Pieces of Code
Sunday, February 27, 2011
Gaussian elimination without pivoting using straightforward formulas, array syntax and BLAS routines
Saturday, February 26, 2011
Calculate pi in OpenMP-parallel
/* * Calculate $\pi = 4 \int_0^1 \sqrt{1 - x^2} \, dx$ * using a rectangle rule $\sum_{i=1}^N f(x_0 + i h - h/2) h$ * * Compile as $> gcc -Wall -Wextra -fopenmp -lm * Run as $> OMP_NUM_THREADS=2 ./a.out */ #include <stdio.h> #include <math.h> #include <omp.h> int main() { const int N = 10000000; const double L = 1.0; const double h = L / N; const double x_0 = 0.0; double pi; double t_1, t_2; int i; double sum = 0.0; t_1 = omp_get_wtime(); #pragma omp parallel for reduction(+: sum) schedule(static) for (i = 0; i < N; ++i) { double x = x_0 + i * h + h/2; sum += sqrt(1 - x*x); } t_2 = omp_get_wtime(); pi = sum * h * 4.0; printf("omp_get_max_threads(): %d\n", omp_get_max_threads()); printf("time: %f\n", t_2 - t_1); printf("pi ~ %f\n", pi); return 0; }
Friday, February 25, 2011
Count number of bits set
/* $> gcc -msse4.2 -Wall -Wextra http://stackoverflow.com/questions/109023/best-algorithm-to-count-the-number-of-set-bits-in-a-32-bit-integer */ #include <stdio.h> int main() { unsigned int number; int num_bits_set; scanf("%u", &number); num_bits_set = __builtin_popcount(number); printf("%d\n", num_bits_set); return 0; }
Tuesday, November 30, 2010
MPI and Cartesian communicators
/* ----------------------------------------------------------------------- */ /* * file: mpi-cart.c * compilation: mpicc -Wall -Wextra mpi-cart.c -o mpi-cart * invocation: mpirun -n 8 mpi-cart */ /* ----------------------------------------------------------------------- */ /* Studying MPI functions related to the Cartesian topologies */ /* ----------------------------------------------------------------------- */ #include <stdio.h> #include <stdlib.h> /* ----------------------------------------------------------------------- */ #include <mpi.h> /* ----------------------------------------------------------------------- */ typedef struct MPI_Data_s { MPI_Comm comm; int size; int rank; int root; } MPI_Data; /* ----------------------------------------------------------------------- */ #define CART2D_NDIMS 2 typedef struct MPI_Cart2D_Data_s { MPI_Data mpi_data; int ndims; int dims[CART2D_NDIMS]; int periods[CART2D_NDIMS]; int coords[CART2D_NDIMS]; } MPI_Cart2D_Data; /* ----------------------------------------------------------------------- */ #define CART4D_NDIMS 4 typedef struct MPI_Cart4D_Data_s { MPI_Data mpi_data; int ndims; int dims[CART4D_NDIMS]; int periods[CART4D_NDIMS]; int coords[CART4D_NDIMS]; } MPI_Cart4D_Data; /* ----------------------------------------------------------------------- */ void Create_2D_Subcomms(MPI_Cart4D_Data *cart4d, int *remain_dims, MPI_Cart2D_Data *cart2d) { MPI_Cart_sub(cart4d->mpi_data.comm, remain_dims, &(cart2d->mpi_data.comm) ); MPI_Comm_size(cart2d->mpi_data.comm, &(cart2d->mpi_data.size)); MPI_Comm_rank(cart2d->mpi_data.comm, &(cart2d->mpi_data.rank)); cart2d->mpi_data.root = 0; cart2d->ndims = CART2D_NDIMS; MPI_Cart_get(cart2d->mpi_data.comm, cart2d->ndims, cart2d->dims, cart2d->periods, cart2d->coords); } /* ----------------------------------------------------------------------- */ int main(int argc, char **argv) { /* Some variables */ MPI_Data world; MPI_Cart4D_Data cart4d; int remain_dims[CART4D_NDIMS]; MPI_Cart2D_Data cart2d_01, cart2d_12; /* Beginning of the program */ MPI_Init(&argc, &argv); /* Fill-in MPI_COMM_WORLD data */ world.comm = MPI_COMM_WORLD; MPI_Comm_size(world.comm, &(world.size)); MPI_Comm_rank(world.comm, &(world.rank)); world.root = 0; /* Create a division of processors in a 4D Cartesian grid */ cart4d.ndims = CART4D_NDIMS; cart4d.dims[0] = 0; cart4d.dims[1] = 0; cart4d.dims[2] = 0; cart4d.dims[3] = 1; MPI_Dims_create(world.size, cart4d.ndims, cart4d.dims); if (world.rank == world.root) printf("cart4d dims: <%d, %d, %d, %d>\n", cart4d.dims[0], cart4d.dims[1], cart4d.dims[2], cart4d.dims[3] ); /* Create a new communicator with 4D Cartesian topology attached */ cart4d.periods[0] = 0; cart4d.periods[1] = 0; cart4d.periods[2] = 0; cart4d.periods[3] = 0; MPI_Cart_create(world.comm, cart4d.ndims, cart4d.dims, cart4d.periods, 1, &(cart4d.mpi_data.comm) ); /* Fill-in 4D Cartesian communicator data */ MPI_Comm_size(cart4d.mpi_data.comm, &(cart4d.mpi_data.size)); MPI_Comm_rank(cart4d.mpi_data.comm, &(cart4d.mpi_data.rank)); cart4d.mpi_data.root = 0; MPI_Cart_coords(cart4d.mpi_data.comm, cart4d.mpi_data.rank, cart4d.ndims, cart4d.coords ); /* Create 2D subcommunicators and fill-in associated data */ remain_dims[0] = 1; remain_dims[1] = 1; remain_dims[2] = 0; remain_dims[3] = 0; Create_2D_Subcomms(&cart4d, remain_dims, &cart2d_01); remain_dims[0] = 0; remain_dims[1] = 1; remain_dims[2] = 1; remain_dims[3] = 0; Create_2D_Subcomms(&cart4d, remain_dims, &cart2d_12); /* Print info on communicators */ printf("world: %d; cart4d: %d, (%d, %d, %d, %d); " "cart2d_01: %d, (%d, %d); cart2d_12: %d, (%d, %d)\n", world.rank, cart4d.mpi_data.rank, cart4d.coords[0], cart4d.coords[1], cart4d.coords[2], cart4d.coords[3], cart2d_01.mpi_data.rank, cart2d_01.coords[0], cart2d_01.coords[1], cart2d_12.mpi_data.rank, cart2d_12.coords[0], cart2d_12.coords[1] ); /* End of the program */ MPI_Finalize(); return EXIT_SUCCESS; } /* ----------------------------------------------------------------------- */
Subscribe to:
Posts (Atom)