CPAN-Search-Lite documentation

CPAN::Search::Lite::Info - extract information from CPAN indices

Code Index:


__top


NAME

CPAN::Search::Lite::Info - extract information from CPAN indices

__top


DESCRIPTION

This module extracts information from the CPAN indices $CPAN/indices/ls-lR.gz, $CPAN/modules/03modlist.data.gz, $CPAN/modules/02packages.details.txt.gz, and $CPAN/authors/01mailrc.txt.gz. If a local CPAN mirror isn't present, it will use the files fetched from a remote CPAN mirror under CPAN by the CPAN::Search::Lite::Index manpage.

A CPAN::Search::Lite::Info object is created with

    my $info = CPAN::Search::Lite::Info(CPAN => $cpan);

where $cpan specifies the top-level CPAN directory underneath which the index files are found. Calling

    $info->fetch_info();

will result in the object being populated with 3 hash references:

__top


SEE ALSO

the CPAN::Search::Lite::Index manpage

__top


package CPAN::Search::Lite::Info;
use strict;
use warnings;
use Storable;
use CPAN::DistnameInfo;
use File::Spec::Functions;
use Compress::Zlib;
use File::Listing;
use File::Basename;
use Safe;
use CPAN::Search::Lite::Util qw(vcmp);
our ($ext);
$ext = qr/\.(tar\.gz|tar\.Z|tgz|zip)$/;
our ($VERSION);
$VERSION = 0.66;

sub new {
    my ($class, %args) = @_;
    die "Must supply the top-level CPAN directory" unless $args{CPAN};
    my $self = {CPAN => $args{CPAN}, ignore => $args{ignore},
                dists => {}, auths => {}, mods => {}};
    bless $self, $class;
}

sub fetch_info {
    my $self = shift;
    $self->mailrc();
    $self->dists_and_mods();
    return 1;
}

sub dists_and_mods {
    my $self = shift;
    my $modlist = $self->modlist();
    my ($packages, $cpan_files) = $self->packages();

    my ($dists, $mods);
    my $ignore = $self->{ignore};
    my $pat;
    if ($ignore and ref($ignore) eq 'ARRAY') {
      $pat = join '|', @$ignore;
    }
    foreach my $cpan_file (keys %$cpan_files) {
        my $d = CPAN::DistnameInfo->new($cpan_file);
        next unless ($d->maturity eq 'released');
        my $dist = $d->dist;
        my $version = $d->version;
        my $cpanid = $d->cpanid;
        my $filename = $d->filename;
        unless ($dist and $version and $cpanid) {
            print "No dist_name/version/cpanid for $cpan_file: skipping\n";
            delete $cpan_files->{$cpan_file};
            next;
        }
        # ignore specified dists
        if ($pat and ($dist =~ /^($pat)$/)) {
             delete $cpan_files->{$cpan_file};
             print "Ignoring $dist\n";
             next;
        }
        if (not $dists->{$dist} or 
            vcmp($version, $dists->{$dist}->{version}) > 0) {
            $dists->{$dist}->{version} = $version;
            $dists->{$dist}->{filename} = $filename;
            $dists->{$dist}->{cpanid} = $cpanid;
        }
    }

    my $wanted;
    foreach my $dist (keys %$dists) {
        $wanted->{basename($dists->{$dist}->{filename})} = $dist;
    }
    $self->parse_ls($dists, $wanted);
    foreach my $module (keys %$packages) {
        my $file = basename($packages->{$module}->{file});
        my $dist;
        unless ($dist = $wanted->{$file} and $dists->{$dist}) {
            delete $packages->{$module};
            next;
        }
        $mods->{$module}->{dist} = $dist;
        $dists->{$dist}->{modules}->{$module}++; 
        my $version = $packages->{$module}->{version};
        $mods->{$module}->{version} = $version;
        if (my $info = $modlist->{$module}) {
            if (my $desc = $info->{description}) {
                $mods->{$module}->{description} =  $desc;
                (my $trial_dist = $module) =~ s!::!-!g;
                if ($trial_dist eq $dist) {
                    $dists->{$dist}->{description} = $desc;
                }
            }
            if (my $chapterid = $info->{chapterid} + 0) {
                $mods->{$module}->{chapterid} = $chapterid;
                (my $sub_chapter = $module) =~ s!^([^:]+).*!$1!;
                $dists->{$dist}->{chapterid}->{$chapterid}->{$sub_chapter}++;
            } 
            my %dslip = ();
            for (qw(statd stats statl stati statp) ) {
                next unless defined $info->{$_};
                $dslip{$_} = $info->{$_};
            }
            if (%dslip) {
                my $value = '';
                foreach (qw(d s l i p)) {
                    my $key = 'stat' . $_;
                    $value .= (defined $dslip{$key} ?
                               $dslip{$key} : '?');
                }
                $mods->{$module}->{dslip} = $value;
            }
        }
    }
    $self->{dists} = $dists;
    $self->{mods} = $mods;
}
  
sub parse_ls {
    my ($self, $dists, $wanted) = @_;
    my $ls = catfile $self->{CPAN}, 'indices', 'ls-lR.gz';
    print "Reading information from $ls\n";
    my ($buffer, $dir, $lines, $listing);
    my $gz = gzopen($ls, 'rb')
        or die "Cannot open $ls: $gzerrno";
    while ($gz->gzreadline($buffer) > 0) {
        next unless $buffer =~ /^-r.*$ext/;
        push @$lines, $buffer;
    }
    die "Error reading from $ls: $gzerrno" . ($gzerrno+0)
        if $gzerrno != Z_STREAM_END;
    $gz->gzclose();
    $dir = parse_dir($lines, '+0000');
    for (@$dir) {
        next unless ($_->[1] eq 'f' and $wanted->{$_->[0]});
        $listing->{$_->[0]} = {size => $_->[2], time => $_->[3]};
    }
    foreach my $dist (keys %$dists) {
        my $filename = $dists->{$dist}->{filename};
        my $base = basename($filename);
        unless ($listing->{$base} and $listing->{$base}->{size}
                and $listing->{$base}->{time}) {
            delete $dists->{$dist};
            next;
        }
        my ($mday, $mon, $year) = (gmtime($listing->{$base}->{time}))[3,4,5];
        $mon++;
        $year += 1900;
        $dists->{$dist}->{filename} = $filename;
        $dists->{$dist}->{size} = $listing->{$base}->{size};
        $dists->{$dist}->{date} = "$year-$mon-$mday";
    }
}

sub modlist {
    my $self = shift;
    my $mod = catfile $self->{CPAN}, 'modules', '03modlist.data.gz';
    print "Reading information from $mod\n";
    my $lines = zcat($mod);
    while (@$lines) {
        my $shift = shift(@$lines);
        last if $shift =~ /^\s*$/;
    }
    push @$lines, q{CPAN::Modulelist->data;};
    my($comp) = Safe->new("CPAN::Safe1");
    my($eval) = join("\n", @$lines);
    my $ret = $comp->reval($eval);
    die "Cannot eval $mod: $@" if $@;
    return $ret;
}

sub packages {
    my $self = shift;
    my $packages = catfile $self->{CPAN}, 'modules', 
        '02packages.details.txt.gz';
    print "Reading information from $packages\n";
    my $lines = zcat($packages);
    while (@$lines) {
        my $shift = shift(@$lines);
        last if $shift =~ /^\s*$/;
    }
    my ($ret, $cpan_files);
    foreach (@$lines) {
	my ($mod,$version,$file,$comment) = split " ", $_, 4;
        $version = undef if $version eq 'undef';
        $ret->{$mod} = {version => $version, file => $file};
        $cpan_files->{$file}++;
    }
    return ($ret, $cpan_files);
}

sub mailrc {
    my $self = shift;
    my $mailrc = catfile $self->{CPAN}, 'authors', '01mailrc.txt.gz';
    print "Reading information from $mailrc\n";
    my $lines = zcat($mailrc);
    my $auths;
    foreach (@$lines) {
	#my($userid,$fullname,$email) =
	    #m/alias\s+(\S+)\s+\"([^\"\<]+)\s+\<([^\>]+)\>\"/;
        my ($userid, $authinfo) = m/alias\s+(\S+)\s+\"([^\"]+)\"/;
        next unless $userid;
        my ($fullname, $email);
        if ($authinfo =~ m/([^<]+)\<(.*)\>/) {
            $fullname = $1;
            $email = $2;
        }
        else {
            $fullname = '';
            $email = lc($userid) . '@cpan.org';
        }
       $auths->{$userid} = {fullname => trim($fullname),
                            email => trim($email)};
    }
    $self->{auths} = $auths;
}

sub zcat {
    my $file = shift;
    my ($buffer, $lines);
    my $gz = gzopen($file, 'rb')
        or die "Cannot open $file: $gzerrno";
    while ($gz->gzreadline($buffer) > 0) {
        push @$lines, $buffer;
    }
    die "Error reading from $file: $gzerrno" . ($gzerrno+0)
        if $gzerrno != Z_STREAM_END;
    $gz->gzclose();
    return $lines;
}

sub trim {
    my $string = shift;
    return '' unless $string;
    $string =~ s/^\s+//;
    $string =~ s/\s+$//;
    $string =~ s/\s+/ /g;
    return $string;
}

1;

__END__


CPAN-Search-Lite documentation