Sunday, February 27, 2011

Gaussian elimination without pivoting using straightforward formulas, array syntax and BLAS routines

!!---------------------------------------------------------------------------
! gfortran -ffree-form -Wall -Wextra -lblas ge.f90
!!---------------------------------------------------------------------------
! Gaussian elimination without pivoting
!
! [1] J. Demmel "Numerical Linear Algebra"
! [2] N. J. Nigham "Gaussian Elimination"
!
!   ELIMINATION
!   for k = 1: n-1
!       for i = k+1: n
!           a(i, k) /= a(k, k)
!       for i = k+1: n
!           for j = k+1: n
!               a(i, j) -= a(i, k) * a(k, j)
!           end
!       end
!   end
!
!   BACKSUBSTITUTION  -  L U y = f  =>  L x = f  =>  x = L \ f  =>  y = U \ x
!   for i = 1: n
!       x(i) = f(i)
!       for j = 1: i-1
!           x(i) -= a(i, j) * x(j)
!       end
!   end
!
!   for i = n: 1
!       y(i) = x(i)
!       for j = n: i+1
!           y(i) -= a(i, j) * y(j)
!       end
!       y(i) /= a(i, i)
!   end 
!!---------------------------------------------------------------------------
program ge
!!---------------------------------------------------------------------------
    implicit none

    ! Matrix of coefficients; the one is filled by random_number()
    real, dimension(:, :), allocatable :: A

    ! "Analytical" solution; the one is filled by random_number()
    real, dimension(:), allocatable :: u

    ! Right-hand side (RHS); the one is calculated as f = A * u
    ! Numerical solution (NS) of the equation A y = f
    ! RHS is overwritten by NS
    real, dimension(:), allocatable :: y

    ! Size of matrix
    integer, parameter :: n = 5

    ! Allocate memory
    allocate(A(1: n, 1: n))
    allocate(u(1: n))
    allocate(y(1: n))

    ! Algorithm uses straightforward formulas
    call Generate_Data()
    call Straightforward_Elimination()
    call Straightforward_Backsubstition()
    call Print_Norms()

    ! Algorithm uses Fortran 90/95 features
    call Generate_Data()
    call Fortran9x_Elimination()
    call Fortran9x_Backsubstition()
    call Print_Norms()

    ! Algorithm uses BLAS
    call Generate_Data()
    call BLAS_Elimination()
    call BLAS_Backsubstition()
    call Print_Norms()

    ! Free memory
    deallocate(A)
    deallocate(u)
    deallocate(y)
!!---------------------------------------------------------------------------
contains
!!---------------------------------------------------------------------------
subroutine Print_Norms()
    write (*, *) maxval(abs(u)), maxval(abs(y - u))
end subroutine Print_Norms
!!---------------------------------------------------------------------------
! This version is a simplified modification of
! http://gcc.gnu.org/onlinedocs/gfortran/RANDOM_005fSEED.html
subroutine Init_Random_Seed()
    integer :: i, n
    integer, dimension(:), allocatable :: seed

    call random_seed(size = n)
    allocate(seed(n))

    seed = 37 * (/ (i - 1, i = 1, n) /)
    call random_seed(put = seed)

    deallocate(seed)
end subroutine Init_Random_Seed
!!---------------------------------------------------------------------------
subroutine Generate_Data()
    call Init_Random_Seed()
    call random_number(A)
    call random_number(u)
    y = matmul(A, u)
end subroutine Generate_Data
!!---------------------------------------------------------------------------
subroutine Straightforward_Elimination()
    integer :: i, j, k

    do k = 1, n-1
        do i = k+1, n
            a(i, k) = a(i, k) / a(k, k)
        end do

        do j = k+1, n
            do i = k+1, n
                a(i, j) = a(i, j) - a(i, k) * a(k, j)
            end do
        end do
    end do
end subroutine Straightforward_Elimination
!!---------------------------------------------------------------------------
subroutine Fortran9x_Elimination()
    integer :: k

    do k = 1, n-1
        a(k+1: n, k) = a(k+1: n, k) / a(k, k)

        a(k+1: n, k+1: n) = a(k+1: n, k+1: n) - &
                matmul(a(k+1: n, k: k), a(k: k, k+1: n))
    end do
end subroutine Fortran9x_Elimination
!!---------------------------------------------------------------------------
subroutine BLAS_Elimination()
    integer :: k

    do k = 1, n-1
        ! x = a*x
        call sscal(n-k, 1.0 / a(k, k), a(k+1, k), 1)

        ! A := alpha*x*y'+ A
        call sger(n-k, n-k, -1.0, &
                a(k+1, k), 1, &
                a(k, k+1), n, &
                a(k+1, k+1), n)
    end do
end subroutine BLAS_Elimination
!!---------------------------------------------------------------------------
subroutine Straightforward_Backsubstition()
    integer :: i, j

    ! L x = f  =>  x = L \ f
    do i = 1, n
        do j = 1, i-1
            y(i) = y(i) - a(i, j) * y(j)
        end do
    end do

    ! U y = x  =>  y = U \ x
    do i = n, 1, -1
        do j = i+1, n
            y(i) = y(i) - a(i, j) * y(j)
        end do

        y(i) = y(i) / a(i, i)
    end do
end subroutine Straightforward_Backsubstition
!!---------------------------------------------------------------------------
subroutine Fortran9x_Backsubstition()
    integer :: i

    ! L x = f  =>  x = L \ f
    do i = 1, n
        y(i) = y(i) - dot_product(a(i, 1: i-1), y(1: i-1))
    end do

    ! U y = x  =>  y = U \ x
    do i = n, 1, -1
        y(i) = y(i) - dot_product(a(i, i+1: n), y(i+1: n))
        y(i) = y(i) / a(i, i)
    end do
end subroutine Fortran9x_Backsubstition
!!---------------------------------------------------------------------------
subroutine BLAS_Backsubstition()
    ! L x = f  =>  x = L \ f
    ! op(A)*X = alpha*B
    call strsm('L', 'L', 'N', 'U', n, 1, 1.0, a, n, y, n)

    ! U y = x  =>  y = U \ x
    ! op(A)*X = alpha*B
    call strsm('L', 'U', 'N', 'N', n, 1, 1.0, a, n, y, n)
end subroutine BLAS_Backsubstition
!!---------------------------------------------------------------------------
end program ge
!!---------------------------------------------------------------------------

Saturday, February 26, 2011

Calculate pi in OpenMP-parallel

/*
 * Calculate $\pi = 4 \int_0^1 \sqrt{1 - x^2} \, dx$
 * using a rectangle rule $\sum_{i=1}^N f(x_0 + i h - h/2) h$
 *
 * Compile as   $> gcc -Wall -Wextra -fopenmp -lm
 * Run as       $> OMP_NUM_THREADS=2 ./a.out
 */
#include <stdio.h>
#include <math.h>
#include <omp.h>

int main()
{
    const int N = 10000000;
    const double L = 1.0;
    const double h = L / N;
    const double x_0 = 0.0;

    double pi;
    double t_1, t_2;

    int i;
    double sum = 0.0;

    t_1 = omp_get_wtime();

#pragma omp parallel for reduction(+: sum) schedule(static)
    for (i = 0; i < N; ++i)
    {
        double x = x_0 + i * h + h/2;
        sum += sqrt(1 - x*x);
    }

    t_2 = omp_get_wtime();

    pi = sum * h * 4.0;

    printf("omp_get_max_threads(): %d\n", omp_get_max_threads());
    printf("time: %f\n", t_2 - t_1);
    printf("pi ~ %f\n", pi);

    return 0;
}

Friday, February 25, 2011

Count number of bits set

/*
    $> gcc -msse4.2 -Wall -Wextra
    http://stackoverflow.com/questions/109023/best-algorithm-to-count-the-number-of-set-bits-in-a-32-bit-integer
*/
#include <stdio.h>

int main()
{
    unsigned int number;
    int num_bits_set;

    scanf("%u", &number);
    num_bits_set = __builtin_popcount(number);

    printf("%d\n", num_bits_set);

    return 0;
}

Tuesday, November 30, 2010

MPI and Cartesian communicators

/* ----------------------------------------------------------------------- */
/*
 * file:        mpi-cart.c
 * compilation: mpicc -Wall -Wextra mpi-cart.c -o mpi-cart
 * invocation:  mpirun -n 8 mpi-cart
 */
/* ----------------------------------------------------------------------- */
/* Studying MPI functions related to the Cartesian topologies */
/* ----------------------------------------------------------------------- */
#include <stdio.h>
#include <stdlib.h>
/* ----------------------------------------------------------------------- */
#include <mpi.h>
/* ----------------------------------------------------------------------- */
typedef struct MPI_Data_s
{
    MPI_Comm comm;
    int size;
    int rank;
    int root;
} MPI_Data;
/* ----------------------------------------------------------------------- */
#define CART2D_NDIMS 2
typedef struct MPI_Cart2D_Data_s
{
    MPI_Data mpi_data;
    int ndims;
    int dims[CART2D_NDIMS];
    int periods[CART2D_NDIMS];
    int coords[CART2D_NDIMS];
} MPI_Cart2D_Data;
/* ----------------------------------------------------------------------- */
#define CART4D_NDIMS 4
typedef struct MPI_Cart4D_Data_s
{
    MPI_Data mpi_data;
    int ndims;
    int dims[CART4D_NDIMS];
    int periods[CART4D_NDIMS];
    int coords[CART4D_NDIMS];
} MPI_Cart4D_Data;
/* ----------------------------------------------------------------------- */
void Create_2D_Subcomms(MPI_Cart4D_Data *cart4d, int *remain_dims,
        MPI_Cart2D_Data *cart2d)
{
    MPI_Cart_sub(cart4d->mpi_data.comm, remain_dims,
            &(cart2d->mpi_data.comm)
    );

    MPI_Comm_size(cart2d->mpi_data.comm, &(cart2d->mpi_data.size));
    MPI_Comm_rank(cart2d->mpi_data.comm, &(cart2d->mpi_data.rank));
    cart2d->mpi_data.root = 0;
    cart2d->ndims = CART2D_NDIMS;
    MPI_Cart_get(cart2d->mpi_data.comm, cart2d->ndims,
            cart2d->dims, cart2d->periods, cart2d->coords);
}
/* ----------------------------------------------------------------------- */
int main(int argc, char **argv)
{
    /* Some variables */
    MPI_Data world;
    MPI_Cart4D_Data cart4d;
    int remain_dims[CART4D_NDIMS];
    MPI_Cart2D_Data cart2d_01, cart2d_12;

    /* Beginning of the program */
    MPI_Init(&argc, &argv);

    /* Fill-in MPI_COMM_WORLD data */
    world.comm = MPI_COMM_WORLD;
    MPI_Comm_size(world.comm, &(world.size));
    MPI_Comm_rank(world.comm, &(world.rank));
    world.root = 0;

    /* Create a division of processors in a 4D Cartesian grid */
    cart4d.ndims = CART4D_NDIMS;
    cart4d.dims[0] = 0;
    cart4d.dims[1] = 0;
    cart4d.dims[2] = 0;
    cart4d.dims[3] = 1;
    MPI_Dims_create(world.size, cart4d.ndims, cart4d.dims);
    if (world.rank == world.root)
        printf("cart4d dims: <%d, %d, %d, %d>\n",
                cart4d.dims[0],
                cart4d.dims[1],
                cart4d.dims[2],
                cart4d.dims[3]
        );

    /* Create a new communicator with 4D Cartesian topology attached */
    cart4d.periods[0] = 0;
    cart4d.periods[1] = 0;
    cart4d.periods[2] = 0;
    cart4d.periods[3] = 0;
    MPI_Cart_create(world.comm, cart4d.ndims, cart4d.dims, cart4d.periods,
            1, &(cart4d.mpi_data.comm)
    );

    /* Fill-in 4D Cartesian communicator data */
    MPI_Comm_size(cart4d.mpi_data.comm, &(cart4d.mpi_data.size));
    MPI_Comm_rank(cart4d.mpi_data.comm, &(cart4d.mpi_data.rank));
    cart4d.mpi_data.root = 0;
    MPI_Cart_coords(cart4d.mpi_data.comm, cart4d.mpi_data.rank,
            cart4d.ndims, cart4d.coords
    );

    /* Create 2D subcommunicators and fill-in associated data */
    remain_dims[0] = 1;
    remain_dims[1] = 1;
    remain_dims[2] = 0;
    remain_dims[3] = 0;
    Create_2D_Subcomms(&cart4d, remain_dims, &cart2d_01);

    remain_dims[0] = 0;
    remain_dims[1] = 1;
    remain_dims[2] = 1;
    remain_dims[3] = 0;
    Create_2D_Subcomms(&cart4d, remain_dims, &cart2d_12);

    /* Print info on communicators */
    printf("world: %d; cart4d: %d, (%d, %d, %d, %d); "
            "cart2d_01: %d, (%d, %d); cart2d_12: %d, (%d, %d)\n",
            world.rank,
            cart4d.mpi_data.rank,
            cart4d.coords[0],
            cart4d.coords[1],
            cart4d.coords[2],
            cart4d.coords[3],
            cart2d_01.mpi_data.rank,
            cart2d_01.coords[0],
            cart2d_01.coords[1],
            cart2d_12.mpi_data.rank,
            cart2d_12.coords[0],
            cart2d_12.coords[1]
    );

    /* End of the program */
    MPI_Finalize();

    return EXIT_SUCCESS;
}
/* ----------------------------------------------------------------------- */