On Sat, 17 Oct 1998, Loren P. Meissner wrote:
> I have just been communicating with a person about the problem of
> reading a "huge" file of data when you can't predict in advance how big
> it will be. For purposes of illustration, suppose that the idea is to do
> an internal sort (say, Quicksort) after the data has been read in. I am
> trying to compare two options:
>
. . .
! Here is a different solution to the "huge file problem"
! It has been constructed using the following guidelines:
!
! (0) The solution should be "pure Fortran 90", i.e. run on
! an "abstract Fortran 90 machine", without reference to
! any operating system peculiarities
!
! (1) We assume that the (virtual) size of the computer memory
! available to the user is large enough to hold the file
!
! (2) The file should only be read once (reading it twice just to
! find out its length is considered a waste of resources)
!
! (3) Unused memory overhead should be reasonably small (therefore
! first to allocate N units of memory, then 2*N units and to
! copy the data to the new memory is not really feasible
! because it uses up to 3 times the really needed memory and
! also the copy operation is wasteful.
!
! (4) Dynamic memory extension is handled via a pointer listi p(:),
! each list element responsible for mm2 records, where mm2>>1
!
! (5) The way the list is extended, only one copy needs to be done
!
! (6) For accessing the records, a function "record" is supplied
! which returns the value of the required record
!
! (7) If the file is to be sorted (to assume just some usage for it),
! one could generate a list of integers xlist = (/ 0, 1, ... n-1 /)
! and do sorting by a generic sort algorithm which might be fed
! the comparison function "greater(i,j)" (this is all it has to know
! about the nature of the objects to be sorted).
!
! (8) It is assumed that the records are all of the same size. If not, the file
! should be read with ADVANCE="no" and stored differently. Things would
! get more complicated, but still could be done.
!
! Below is a sample program which has been run on:
! IBM RS/6000, Cray T3E, NEC SX-4, SUN (NAG compiler).
!
!
! Regards,
! Friedrich Hertweck
!=============================================================================
module Handle_Huge
implicit none
integer, parameter :: mm1= 3 ! increment for record groups
integer, parameter :: mm2=10 ! number of records per group
type :: rec_group
character(len=64), pointer :: r(:)
end type
!
! the array p(:) implements a two-level data structure to contain the records
!
type (rec_group), pointer :: p(:)
type (rec_group), pointer, private :: temp(:) ! needed to extend p(:)
!
! the following diagram shows the situation for mm1=3, mm2=10 and n=43 records
!
! p(0:2*mm1-1)
! ------- +=====+ p(0)%r(0:mm2-1)
! f a | | +---+---+---+---+---+---+---+---+---+---+
! i l | 0----> | R | R | R | R | R | R | R | R | R | R |
! r l | | +---+---+---+---+---+---+---+---+---+---+
! s o +-----+
! t c | | +---+---+---+---+---+---+---+---+---+---+
! a | 0----> | R | R | R | R | R | R | R | R | R | R |
! t | | +---+---+---+---+---+---+---+---+---+---+
! i +-----+
! o | | +---+---+---+---+---+---+---+---+---+---+
! n | 0----> | R | R | R | R | R | R | R | R | R | R |
! | | +---+---+---+---+---+---+---+---+---+---+
! ------- +=====+
! s a | | +---+---+---+---+---+---+---+---+---+---+
! e l | 0----> | R | R | R | R | R | R | R | R | R | R |
! c l | | +---+---+---+---+---+---+---+---+---+---+
! o o +-----+
! n c | | +---+---+---+---+---+---+---+---+---+---+ last array
! d a | 0----> | R | R | R | - | - | - | - | - | - | - | filled only
! t | | +---+---+---+---+---+---+---+---+---+---+ partially
! i +-----+
! o | |
! n | # | no array has been allocated, pointer is NULL()
! | |
! ------- +=====+
!
contains
!===================================================================================
subroutine read_data (u, n, m1) ! read the data from "huge file".
integer, intent(in) :: u ! unit number from which to read
integer, intent(out) :: n ! number of records read from unit u
integer, intent(out) :: m1 ! length of array p(:) is m1, i.e. p(0:m1-1)
! n/mm2 + 1 elements are used
integer :: rc, i, j, i1
m1 = mm1
i1 = 0
n = 0
allocate (p(0:m1-1)) ! allocate first set of m1 record groups
do i=0,m1-1
nullify (p(i)%r) ! the m1 pointers r(:) in the set are nullified
end do
do
do i=i1,m1-1
allocate (p(i)%r(0:mm2-1)) ! allocate space for mm2 new records
do j=0,mm2-1
read(u,"(a)",iostat=rc) p(i)%r(j)
if (rc<0) return
n = n + 1 ! n counts the number of records read
end do
end do
!
! the set of record groups has been filled; we need more space
!
temp => p
allocate (p(0:m1+mm1-1)) ! allocate more space
do i=0,m1-1 ! the first m1 pointers in the new set
p(i)%r => temp(i)%r ! point to the data in the old set
end do
do i=m1,m1+mm1-1 ! the mm1 new pointers in the new set
nullify (p(i)%r) ! are nullified
end do
deallocate (temp) ! get rid of old set
i1 = m1 ! i1 is the index of the first new group
m1 = m1 + mm1 ! update length of new group set
end do
end subroutine read_data
!-----------------------------------------------------------------------------------
function record(i) result (r) ! function returns the record for index i
integer, intent(in) :: i
character(len=64) :: r
r = p(i/mm2)%r(modulo(i,mm2))
end function record
!-----------------------------------------------------------------------------------
function greater(i,j) result (g) ! result is T if record-i is > record-j
integer, intent(in) :: i, j ! when comparing the x-coordinate
logical :: g
g = p(i/mm2)%r(modulo(i,mm2))(17:28) > p(j/mm2)%r(modulo(j,mm2))(17:28)
end function greater
end module Handle_Huge
!===================================================================================
program HugeFile
use Handle_Huge
implicit none
integer :: i, n, s
call generate_file("huge.data")
open(1,file="huge.data",action="read")
call read_data (1, n, s)
close(1,status="keep")
print *, n, " records read" ! NOTE: the records MUST be indexed 0:n-1
print *, s, " is the length of p(:)" ! p(0:s-1), entries 0:s/mm2 are defined
!
! print the records; if i>0 print result of comparison to preceding rexcord
!
print "(a,L4)", record(0)
print "(a,L4)", (record(i), greater(i,i-1), i=1,n-1)
end program
!-----------------------------------------------------------------------------------
subroutine generate_file(fname)
implicit none
character(len=*), intent(in) :: fname
integer :: i
real, dimension(43) :: x, y
call random_number(x)
call random_number(y)
open(1,file=fname,action="write")
do i=1,SIZE(x)
write(1,"(a,i4,a,2f12.8)") "(x,y)-pair ", i, ":", x(i), y(i)
end do
close(1,status="keep")
end subroutine generate_file
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|