Sunday, February 27, 2011

Gaussian elimination without pivoting using straightforward formulas, array syntax and BLAS routines

!!---------------------------------------------------------------------------
! gfortran -ffree-form -Wall -Wextra -lblas ge.f90
!!---------------------------------------------------------------------------
! Gaussian elimination without pivoting
!
! [1] J. Demmel "Numerical Linear Algebra"
! [2] N. J. Nigham "Gaussian Elimination"
!
!   ELIMINATION
!   for k = 1: n-1
!       for i = k+1: n
!           a(i, k) /= a(k, k)
!       for i = k+1: n
!           for j = k+1: n
!               a(i, j) -= a(i, k) * a(k, j)
!           end
!       end
!   end
!
!   BACKSUBSTITUTION  -  L U y = f  =>  L x = f  =>  x = L \ f  =>  y = U \ x
!   for i = 1: n
!       x(i) = f(i)
!       for j = 1: i-1
!           x(i) -= a(i, j) * x(j)
!       end
!   end
!
!   for i = n: 1
!       y(i) = x(i)
!       for j = n: i+1
!           y(i) -= a(i, j) * y(j)
!       end
!       y(i) /= a(i, i)
!   end 
!!---------------------------------------------------------------------------
program ge
!!---------------------------------------------------------------------------
    implicit none

    ! Matrix of coefficients; the one is filled by random_number()
    real, dimension(:, :), allocatable :: A

    ! "Analytical" solution; the one is filled by random_number()
    real, dimension(:), allocatable :: u

    ! Right-hand side (RHS); the one is calculated as f = A * u
    ! Numerical solution (NS) of the equation A y = f
    ! RHS is overwritten by NS
    real, dimension(:), allocatable :: y

    ! Size of matrix
    integer, parameter :: n = 5

    ! Allocate memory
    allocate(A(1: n, 1: n))
    allocate(u(1: n))
    allocate(y(1: n))

    ! Algorithm uses straightforward formulas
    call Generate_Data()
    call Straightforward_Elimination()
    call Straightforward_Backsubstition()
    call Print_Norms()

    ! Algorithm uses Fortran 90/95 features
    call Generate_Data()
    call Fortran9x_Elimination()
    call Fortran9x_Backsubstition()
    call Print_Norms()

    ! Algorithm uses BLAS
    call Generate_Data()
    call BLAS_Elimination()
    call BLAS_Backsubstition()
    call Print_Norms()

    ! Free memory
    deallocate(A)
    deallocate(u)
    deallocate(y)
!!---------------------------------------------------------------------------
contains
!!---------------------------------------------------------------------------
subroutine Print_Norms()
    write (*, *) maxval(abs(u)), maxval(abs(y - u))
end subroutine Print_Norms
!!---------------------------------------------------------------------------
! This version is a simplified modification of
! http://gcc.gnu.org/onlinedocs/gfortran/RANDOM_005fSEED.html
subroutine Init_Random_Seed()
    integer :: i, n
    integer, dimension(:), allocatable :: seed

    call random_seed(size = n)
    allocate(seed(n))

    seed = 37 * (/ (i - 1, i = 1, n) /)
    call random_seed(put = seed)

    deallocate(seed)
end subroutine Init_Random_Seed
!!---------------------------------------------------------------------------
subroutine Generate_Data()
    call Init_Random_Seed()
    call random_number(A)
    call random_number(u)
    y = matmul(A, u)
end subroutine Generate_Data
!!---------------------------------------------------------------------------
subroutine Straightforward_Elimination()
    integer :: i, j, k

    do k = 1, n-1
        do i = k+1, n
            a(i, k) = a(i, k) / a(k, k)
        end do

        do j = k+1, n
            do i = k+1, n
                a(i, j) = a(i, j) - a(i, k) * a(k, j)
            end do
        end do
    end do
end subroutine Straightforward_Elimination
!!---------------------------------------------------------------------------
subroutine Fortran9x_Elimination()
    integer :: k

    do k = 1, n-1
        a(k+1: n, k) = a(k+1: n, k) / a(k, k)

        a(k+1: n, k+1: n) = a(k+1: n, k+1: n) - &
                matmul(a(k+1: n, k: k), a(k: k, k+1: n))
    end do
end subroutine Fortran9x_Elimination
!!---------------------------------------------------------------------------
subroutine BLAS_Elimination()
    integer :: k

    do k = 1, n-1
        ! x = a*x
        call sscal(n-k, 1.0 / a(k, k), a(k+1, k), 1)

        ! A := alpha*x*y'+ A
        call sger(n-k, n-k, -1.0, &
                a(k+1, k), 1, &
                a(k, k+1), n, &
                a(k+1, k+1), n)
    end do
end subroutine BLAS_Elimination
!!---------------------------------------------------------------------------
subroutine Straightforward_Backsubstition()
    integer :: i, j

    ! L x = f  =>  x = L \ f
    do i = 1, n
        do j = 1, i-1
            y(i) = y(i) - a(i, j) * y(j)
        end do
    end do

    ! U y = x  =>  y = U \ x
    do i = n, 1, -1
        do j = i+1, n
            y(i) = y(i) - a(i, j) * y(j)
        end do

        y(i) = y(i) / a(i, i)
    end do
end subroutine Straightforward_Backsubstition
!!---------------------------------------------------------------------------
subroutine Fortran9x_Backsubstition()
    integer :: i

    ! L x = f  =>  x = L \ f
    do i = 1, n
        y(i) = y(i) - dot_product(a(i, 1: i-1), y(1: i-1))
    end do

    ! U y = x  =>  y = U \ x
    do i = n, 1, -1
        y(i) = y(i) - dot_product(a(i, i+1: n), y(i+1: n))
        y(i) = y(i) / a(i, i)
    end do
end subroutine Fortran9x_Backsubstition
!!---------------------------------------------------------------------------
subroutine BLAS_Backsubstition()
    ! L x = f  =>  x = L \ f
    ! op(A)*X = alpha*B
    call strsm('L', 'L', 'N', 'U', n, 1, 1.0, a, n, y, n)

    ! U y = x  =>  y = U \ x
    ! op(A)*X = alpha*B
    call strsm('L', 'U', 'N', 'N', n, 1, 1.0, a, n, y, n)
end subroutine BLAS_Backsubstition
!!---------------------------------------------------------------------------
end program ge
!!---------------------------------------------------------------------------

Saturday, February 26, 2011

Calculate pi in OpenMP-parallel

/*
 * Calculate $\pi = 4 \int_0^1 \sqrt{1 - x^2} \, dx$
 * using a rectangle rule $\sum_{i=1}^N f(x_0 + i h - h/2) h$
 *
 * Compile as   $> gcc -Wall -Wextra -fopenmp -lm
 * Run as       $> OMP_NUM_THREADS=2 ./a.out
 */
#include <stdio.h>
#include <math.h>
#include <omp.h>

int main()
{
    const int N = 10000000;
    const double L = 1.0;
    const double h = L / N;
    const double x_0 = 0.0;

    double pi;
    double t_1, t_2;

    int i;
    double sum = 0.0;

    t_1 = omp_get_wtime();

#pragma omp parallel for reduction(+: sum) schedule(static)
    for (i = 0; i < N; ++i)
    {
        double x = x_0 + i * h + h/2;
        sum += sqrt(1 - x*x);
    }

    t_2 = omp_get_wtime();

    pi = sum * h * 4.0;

    printf("omp_get_max_threads(): %d\n", omp_get_max_threads());
    printf("time: %f\n", t_2 - t_1);
    printf("pi ~ %f\n", pi);

    return 0;
}

Friday, February 25, 2011

Count number of bits set

/*
    $> gcc -msse4.2 -Wall -Wextra
    http://stackoverflow.com/questions/109023/best-algorithm-to-count-the-number-of-set-bits-in-a-32-bit-integer
*/
#include <stdio.h>

int main()
{
    unsigned int number;
    int num_bits_set;

    scanf("%u", &number);
    num_bits_set = __builtin_popcount(number);

    printf("%d\n", num_bits_set);

    return 0;
}