File: //usr/share/perl5/File/RandomAccess.pm
#------------------------------------------------------------------------------
# File:         RandomAccess.pm
#
# Description:  Buffer to support random access reading of sequential file
#
# Revisions:    02/11/2004 - P. Harvey Created
#               02/20/2004 - P. Harvey Added flag to disable SeekTest in new()
#               11/18/2004 - P. Harvey Fixed bug with seek relative to end of file
#               01/02/2005 - P. Harvey Added DEBUG code
#               01/09/2006 - P. Harvey Fixed bug in ReadLine() when using
#                            multi-character EOL sequences
#               02/20/2006 - P. Harvey Fixed bug where seek past end of file could
#                            generate "substr outside string" warning
#               06/10/2006 - P. Harvey Decreased $CHUNK_SIZE from 64k to 8k
#               11/23/2006 - P. Harvey Limit reads to < 0x80000000 bytes
#               11/26/2008 - P. Harvey Fixed bug in ReadLine when reading from a
#                            scalar with a multi-character newline
#               01/24/2009 - PH Protect against reading too much at once
#               10/04/2018 - PH Added NoBuffer option
#
# Notes:        Calls the normal file i/o routines unless SeekTest() fails, in
#               which case the file is buffered in memory to allow random access.
#               SeekTest() is called automatically when the object is created
#               unless specified.
#
#               May also be used for string i/o (just pass a scalar reference)
#
# Legal:        Copyright (c) 2003-2022 Phil Harvey (philharvey66 at gmail.com)
#               This library is free software; you can redistribute it and/or
#               modify it under the same terms as Perl itself.
#------------------------------------------------------------------------------
package File::RandomAccess;
use strict;
require 5.002;
require Exporter;
use vars qw($VERSION @ISA @EXPORT_OK);
$VERSION = '1.11';
@ISA = qw(Exporter);
sub Read($$$);
# constants
my $CHUNK_SIZE = 8192;  # size of chunks to read from file (must be power of 2)
my $SKIP_SIZE = 65536;  # size to skip when fast-forwarding over sequential data
my $SLURP_CHUNKS = 16;  # read this many chunks at a time when slurping
#------------------------------------------------------------------------------
# Create new RandomAccess object
# Inputs: 0) reference to RandomAccess object or RandomAccess class name
#         1) file reference or scalar reference
#         2) flag set if file is already random access (disables automatic SeekTest)
sub new($$;$)
{
    my ($that, $filePt, $isRandom) = @_;
    my $class = ref($that) || $that;
    my $self;
    if (ref $filePt eq 'SCALAR') {
        # string i/o
        $self = {
            BUFF_PT => $filePt,
            BASE => 0,
            POS => 0,
            LEN => length($$filePt),
            TESTED => -1,
        };
        bless $self, $class;
    } else {
        # file i/o
        my $buff = '';
        $self = {
            FILE_PT => $filePt, # file pointer
            BUFF_PT => \$buff,  # reference to file data
            BASE => 0,          # location of start of buffer in file
            POS => 0,           # current position in buffer
            LEN => 0,           # length of data in buffer
            TESTED => 0,        # 0=untested, 1=passed, -1=failed (requires buffering)
        };
        bless $self, $class;
        $self->SeekTest() unless $isRandom;
    }
    return $self;
}
#------------------------------------------------------------------------------
# Enable DEBUG code
# Inputs: 0) reference to RandomAccess object
sub Debug($)
{
    my $self = shift;
    $self->{DEBUG} = { };
}
#------------------------------------------------------------------------------
# Perform seek test and turn on buffering if necessary
# Inputs: 0) reference to RandomAccess object
# Returns: 1 if seek test passed (ie. no buffering required)
# Notes: Must be done before any other i/o
sub SeekTest($)
{
    my $self = shift;
    unless ($self->{TESTED}) {
        my $fp = $self->{FILE_PT};
        if (seek($fp, 1, 1) and seek($fp, -1, 1)) {
            $self->{TESTED} = 1;    # test passed
        } else {
            $self->{TESTED} = -1;   # test failed (requires buffering)
        }
    }
    return $self->{TESTED} == 1 ? 1 : 0;
}
#------------------------------------------------------------------------------
# Get current position in file
# Inputs: 0) reference to RandomAccess object
# Returns: current position in file
sub Tell($)
{
    my $self = shift;
    my $rtnVal;
    if ($self->{TESTED} < 0) {
        $rtnVal = $self->{POS} + $self->{BASE};
    } else {
        $rtnVal = tell($self->{FILE_PT});
    }
    return $rtnVal;
}
#------------------------------------------------------------------------------
# Seek to position in file
# Inputs: 0) reference to RandomAccess object
#         1) position, 2) whence (0 or undef=from start, 1=from cur pos, 2=from end)
# Returns: 1 on success
# Notes: When buffered, this doesn't quite behave like seek() since it will return
#        success even if you seek outside the limits of the file.  However if you
#        do this, you will get an error on your next Read().
sub Seek($$;$)
{
    my ($self, $num, $whence) = @_;
    $whence = 0 unless defined $whence;
    my $rtnVal;
    if ($self->{TESTED} < 0) {
        my $newPos;
        if ($whence == 0) {
            $newPos = $num - $self->{BASE}; # from start of file
        } elsif ($whence == 1) {
            $newPos = $num + $self->{POS};  # relative to current position
        } elsif ($self->{NoBuffer} and $self->{FILE_PT}) {
            $newPos = -1;   # (can't seek relative to end if no buffering)
        } else {
            $self->Slurp();                 # read whole file into buffer
            $newPos = $num + $self->{LEN};  # relative to end of file
        }
        if ($newPos >= 0) {
            $self->{POS} = $newPos;
            $rtnVal = 1;
        }
    } else {
        $rtnVal = seek($self->{FILE_PT}, $num, $whence);
    }
    return $rtnVal;
}
#------------------------------------------------------------------------------
# Read from the file
# Inputs: 0) reference to RandomAccess object, 1) buffer, 2) bytes to read
# Returns: Number of bytes read
sub Read($$$)
{
    my $self = shift;
    my $len = $_[1];
    my $rtnVal;
    # protect against reading too much at once
    # (also from dying with a "Negative length" error)
    if ($len & 0xf8000000) {
        return 0 if $len < 0;
        # read in smaller blocks because Windows attempts to pre-allocate
        # memory for the full size, which can lead to an out-of-memory error
        my $maxLen = 0x4000000; # (MUST be less than bitmask in "if" above)
        my $num = Read($self, $_[0], $maxLen);
        return $num if $num < $maxLen;
        for (;;) {
            $len -= $maxLen;
            last if $len <= 0;
            my $l = $len < $maxLen ? $len : $maxLen;
            my $buff;
            my $n = Read($self, $buff, $l);
            last unless $n;
            $_[0] .= $buff;
            $num += $n;
            last if $n < $l;
        }
        return $num;
    }
    # read through our buffer if necessary
    if ($self->{TESTED} < 0) {
        # purge old data before reading in NoBuffer mode
        $self->Purge() or return 0 if $self->{NoBuffer};
        my $buff;
        my $newPos = $self->{POS} + $len;
        # number of bytes to read from file
        my $num = $newPos - $self->{LEN};
        if ($num > 0 and $self->{FILE_PT}) {
            # read data from file in multiples of $CHUNK_SIZE
            $num = (($num - 1) | ($CHUNK_SIZE - 1)) + 1;
            $num = read($self->{FILE_PT}, $buff, $num);
            if ($num) {
                ${$self->{BUFF_PT}} .= $buff;
                $self->{LEN} += $num;
            }
        }
        # number of bytes left in data buffer
        $num = $self->{LEN} - $self->{POS};
        if ($len <= $num) {
            $rtnVal = $len;
        } elsif ($num <= 0) {
            $_[0] = '';
            return 0;
        } else {
            $rtnVal = $num;
        }
        # return data from our buffer
        $_[0] = substr(${$self->{BUFF_PT}}, $self->{POS}, $rtnVal);
        $self->{POS} += $rtnVal;
    } else {
        # read directly from file
        $_[0] = '' unless defined $_[0];
        $rtnVal = read($self->{FILE_PT}, $_[0], $len) || 0;
    }
    if ($self->{DEBUG}) {
        my $pos = $self->Tell() - $rtnVal;
        unless ($self->{DEBUG}->{$pos} and $self->{DEBUG}->{$pos} > $rtnVal) {
            $self->{DEBUG}->{$pos} = $rtnVal;
        }
    }
    return $rtnVal;
}
#------------------------------------------------------------------------------
# Read a line from file (end of line is $/)
# Inputs: 0) reference to RandomAccess object, 1) buffer
# Returns: Number of bytes read
sub ReadLine($$)
{
    my $self = shift;
    my $rtnVal;
    my $fp = $self->{FILE_PT};
    if ($self->{TESTED} < 0) {
        my ($num, $buff);
        $self->Purge() or return 0 if $self->{NoBuffer};
        my $pos = $self->{POS};
        if ($fp) {
            # make sure we have some data after the current position
            while ($self->{LEN} <= $pos) {
                $num = read($fp, $buff, $CHUNK_SIZE);
                return 0 unless $num;
                ${$self->{BUFF_PT}} .= $buff;
                $self->{LEN} += $num;
            }
            # scan and read until we find the EOL (or hit EOF)
            for (;;) {
                $pos = index(${$self->{BUFF_PT}}, $/, $pos);
                if ($pos >= 0) {
                    $pos += length($/);
                    last;
                }
                $pos = $self->{LEN};    # have scanned to end of buffer
                $num = read($fp, $buff, $CHUNK_SIZE) or last;
                ${$self->{BUFF_PT}} .= $buff;
                $self->{LEN} += $num;
            }
        } else {
            # string i/o
            $pos = index(${$self->{BUFF_PT}}, $/, $pos);
            if ($pos < 0) {
                $pos = $self->{LEN};
                $self->{POS} = $pos if $self->{POS} > $pos;
            } else {
                $pos += length($/);
            }
        }
        # read the line from our buffer
        $rtnVal = $pos - $self->{POS};
        $_[0] = substr(${$self->{BUFF_PT}}, $self->{POS}, $rtnVal);
        $self->{POS} = $pos;
    } else {
        $_[0] = <$fp>;
        if (defined $_[0]) {
            $rtnVal = length($_[0]);
        } else {
            $rtnVal = 0;
        }
    }
    if ($self->{DEBUG}) {
        my $pos = $self->Tell() - $rtnVal;
        unless ($self->{DEBUG}->{$pos} and $self->{DEBUG}->{$pos} > $rtnVal) {
            $self->{DEBUG}->{$pos} = $rtnVal;
        }
    }
    return $rtnVal;
}
#------------------------------------------------------------------------------
# Read whole file into buffer (without changing read pointer)
# Inputs: 0) reference to RandomAccess object
sub Slurp($)
{
    my $self = shift;
    my $fp = $self->{FILE_PT} || return;
    # read whole file into buffer (in large chunks)
    my ($buff, $num);
    while (($num = read($fp, $buff, $CHUNK_SIZE * $SLURP_CHUNKS)) != 0) {
        ${$self->{BUFF_PT}} .= $buff;
        $self->{LEN} += $num;
    }
}
#------------------------------------------------------------------------------
# Purge internal buffer [internal use only]
# Inputs: 0) reference to RandomAccess object
# Returns: 1 on success, or 0 if current buffer position is negative
# Notes: This is called only in NoBuffer mode
sub Purge($)
{
    my $self = shift;
    return 1 unless $self->{FILE_PT};
    return 0 if $self->{POS} < 0;   # error if we can't read from here
    if ($self->{POS} > $CHUNK_SIZE) {
        my $purge = $self->{POS} - ($self->{POS} % $CHUNK_SIZE);
        if ($purge >= $self->{LEN}) {
            # read up to current position in 64k chunks, discarding as we go
            while ($self->{POS} > $self->{LEN}) {
                $self->{BASE} += $self->{LEN};
                $self->{POS} -= $self->{LEN};
                ${$self->{BUFF_PT}} = '';
                $self->{LEN} = read($self->{FILE_PT}, ${$self->{BUFF_PT}}, $SKIP_SIZE);
                last if $self->{LEN} < $SKIP_SIZE;
            }
        } elsif ($purge > 0) {
            ${$self->{BUFF_PT}} = substr ${$self->{BUFF_PT}}, $purge;
            $self->{BASE} += $purge;
            $self->{POS} -= $purge;
            $self->{LEN} -= $purge;
        }
    }
    return 1;
}
#------------------------------------------------------------------------------
# Set binary mode
# Inputs: 0) reference to RandomAccess object
sub BinMode($)
{
    my $self = shift;
    binmode($self->{FILE_PT}) if $self->{FILE_PT};
}
#------------------------------------------------------------------------------
# Close the file and free the buffer
# Inputs: 0) reference to RandomAccess object
sub Close($)
{
    my $self = shift;
    if ($self->{DEBUG}) {
        local $_;
        if ($self->Seek(0,2)) {
            $self->{DEBUG}->{$self->Tell()} = 0;    # set EOF marker
            my $last;
            my $tot = 0;
            my $bad = 0;
            foreach (sort { $a <=> $b } keys %{$self->{DEBUG}}) {
                my $pos = $_;
                my $len = $self->{DEBUG}->{$_};
                if (defined $last and $last < $pos) {
                    my $bytes = $pos - $last;
                    $tot += $bytes;
                    $self->Seek($last);
                    my $buff;
                    $self->Read($buff, $bytes);
                    my $warn = '';
                    if ($buff =~ /[^\0]/) {
                        $bad += ($pos - $last);
                        $warn = ' - NON-ZERO!';
                    }
                    printf "0x%.8x - 0x%.8x (%d bytes)$warn\n", $last, $pos, $bytes;
                }
                my $cur = $pos + $len;
                $last = $cur unless defined $last and $last > $cur;
            }
            print "$tot bytes missed";
            $bad and print ", $bad non-zero!";
            print "\n";
        } else {
            warn "File::RandomAccess DEBUG not working (file already closed?)\n";
        }
        delete $self->{DEBUG};
    }
    # close the file
    if ($self->{FILE_PT}) {
        close($self->{FILE_PT});
        delete $self->{FILE_PT};
    }
    # reset the buffer
    my $emptyBuff = '';
    $self->{BUFF_PT} = \$emptyBuff;
    $self->{BASE} = 0;
    $self->{LEN} = 0;
    $self->{POS} = 0;
}
#------------------------------------------------------------------------------
1;  # end