CPAN::Search::Lite::Info - extract information from CPAN indices
This module extracts information from the CPAN indices
$CPAN/indices/ls-lR.gz, $CPAN/modules/03modlist.data.gz,
$CPAN/modules/02packages.details.txt.gz, and
$CPAN/authors/01mailrc.txt.gz. If a local CPAN mirror
isn't present, it will use the files fetched from a remote CPAN mirror
under CPAN
by the CPAN::Search::Lite::Index manpage.
A CPAN::Search::Lite::Info
object is created with
my $info = CPAN::Search::Lite::Info(CPAN => $cpan);
where $cpan
specifies the top-level CPAN directory
underneath which the index files are found. Calling
$info->fetch_info();
will result in the object being populated with 3 hash references:
$info->{dists}
version
- the version of the CPAN filefilename
- the CPAN filenamecpanid
- the CPAN author iddescription
- a description, if availablesize
- the size of the filedate
- the last modified date (YYYY/MM/DD) of the filemd5
- the CPAN md5 checksum of the filemodules
- specifies the modules present in the distribution:for my $module (keys %{$info->{$distname}->{modules}}) { print "Module: $module\n"; }
chapterid
- specifies the chapterid and the subchapter
for the distribution:for my $id (keys %{$info->{$distname}->{chapterid}}) { print "For chapterid $id\n"; for my $sc (keys %{$info->{$distname}->{chapterid}->{$id}}) { print " Subchapter: $sc\n"; } }
requires
- a hash reference whose keys are the names of
prerequisite modules required for the package and whose values are
the associated module versions. This information comes from the
META.yml file processed in the CPAN::Search::Lite::Extract manpage.$info->{mods}
dist
- the distribution name containing the moduleversion
- the versiondescription
- a description, if availablechapterid
- the chapter id of the module, if presentdslip
- a 5 character string specifying the dslip
(development, support, language, interface, public licence) information.$info->{auths}
fullname
- the author's full nameemail
- the author's email addressthe CPAN::Search::Lite::Index manpage
package CPAN::Search::Lite::Info; use strict; use warnings; use Storable; use CPAN::DistnameInfo; use File::Spec::Functions; use Compress::Zlib; use File::Listing; use File::Basename; use Safe; use CPAN::Search::Lite::Util qw(vcmp); our ($ext); $ext = qr/\.(tar\.gz|tar\.Z|tgz|zip)$/; our ($VERSION); $VERSION = 0.66; sub new { my ($class, %args) = @_; die "Must supply the top-level CPAN directory" unless $args{CPAN}; my $self = {CPAN => $args{CPAN}, ignore => $args{ignore}, dists => {}, auths => {}, mods => {}}; bless $self, $class; } sub fetch_info { my $self = shift; $self->mailrc(); $self->dists_and_mods(); return 1; } sub dists_and_mods { my $self = shift; my $modlist = $self->modlist(); my ($packages, $cpan_files) = $self->packages(); my ($dists, $mods); my $ignore = $self->{ignore}; my $pat; if ($ignore and ref($ignore) eq 'ARRAY') { $pat = join '|', @$ignore; } foreach my $cpan_file (keys %$cpan_files) { my $d = CPAN::DistnameInfo->new($cpan_file); next unless ($d->maturity eq 'released'); my $dist = $d->dist; my $version = $d->version; my $cpanid = $d->cpanid; my $filename = $d->filename; unless ($dist and $version and $cpanid) { print "No dist_name/version/cpanid for $cpan_file: skipping\n"; delete $cpan_files->{$cpan_file}; next; } # ignore specified dists if ($pat and ($dist =~ /^($pat)$/)) { delete $cpan_files->{$cpan_file}; print "Ignoring $dist\n"; next; } if (not $dists->{$dist} or vcmp($version, $dists->{$dist}->{version}) > 0) { $dists->{$dist}->{version} = $version; $dists->{$dist}->{filename} = $filename; $dists->{$dist}->{cpanid} = $cpanid; } } my $wanted; foreach my $dist (keys %$dists) { $wanted->{basename($dists->{$dist}->{filename})} = $dist; } $self->parse_ls($dists, $wanted); foreach my $module (keys %$packages) { my $file = basename($packages->{$module}->{file}); my $dist; unless ($dist = $wanted->{$file} and $dists->{$dist}) { delete $packages->{$module}; next; } $mods->{$module}->{dist} = $dist; $dists->{$dist}->{modules}->{$module}++; my $version = $packages->{$module}->{version}; $mods->{$module}->{version} = $version; if (my $info = $modlist->{$module}) { if (my $desc = $info->{description}) { $mods->{$module}->{description} = $desc; (my $trial_dist = $module) =~ s!::!-!g; if ($trial_dist eq $dist) { $dists->{$dist}->{description} = $desc; } } if (my $chapterid = $info->{chapterid} + 0) { $mods->{$module}->{chapterid} = $chapterid; (my $sub_chapter = $module) =~ s!^([^:]+).*!$1!; $dists->{$dist}->{chapterid}->{$chapterid}->{$sub_chapter}++; } my %dslip = (); for (qw(statd stats statl stati statp) ) { next unless defined $info->{$_}; $dslip{$_} = $info->{$_}; } if (%dslip) { my $value = ''; foreach (qw(d s l i p)) { my $key = 'stat' . $_; $value .= (defined $dslip{$key} ? $dslip{$key} : '?'); } $mods->{$module}->{dslip} = $value; } } } $self->{dists} = $dists; $self->{mods} = $mods; } sub parse_ls { my ($self, $dists, $wanted) = @_; my $ls = catfile $self->{CPAN}, 'indices', 'ls-lR.gz'; print "Reading information from $ls\n"; my ($buffer, $dir, $lines, $listing); my $gz = gzopen($ls, 'rb') or die "Cannot open $ls: $gzerrno"; while ($gz->gzreadline($buffer) > 0) { next unless $buffer =~ /^-r.*$ext/; push @$lines, $buffer; } die "Error reading from $ls: $gzerrno" . ($gzerrno+0) if $gzerrno != Z_STREAM_END; $gz->gzclose(); $dir = parse_dir($lines, '+0000'); for (@$dir) { next unless ($_->[1] eq 'f' and $wanted->{$_->[0]}); $listing->{$_->[0]} = {size => $_->[2], time => $_->[3]}; } foreach my $dist (keys %$dists) { my $filename = $dists->{$dist}->{filename}; my $base = basename($filename); unless ($listing->{$base} and $listing->{$base}->{size} and $listing->{$base}->{time}) { delete $dists->{$dist}; next; } my ($mday, $mon, $year) = (gmtime($listing->{$base}->{time}))[3,4,5]; $mon++; $year += 1900; $dists->{$dist}->{filename} = $filename; $dists->{$dist}->{size} = $listing->{$base}->{size}; $dists->{$dist}->{date} = "$year-$mon-$mday"; } } sub modlist { my $self = shift; my $mod = catfile $self->{CPAN}, 'modules', '03modlist.data.gz'; print "Reading information from $mod\n"; my $lines = zcat($mod); while (@$lines) { my $shift = shift(@$lines); last if $shift =~ /^\s*$/; } push @$lines, q{CPAN::Modulelist->data;}; my($comp) = Safe->new("CPAN::Safe1"); my($eval) = join("\n", @$lines); my $ret = $comp->reval($eval); die "Cannot eval $mod: $@" if $@; return $ret; } sub packages { my $self = shift; my $packages = catfile $self->{CPAN}, 'modules', '02packages.details.txt.gz'; print "Reading information from $packages\n"; my $lines = zcat($packages); while (@$lines) { my $shift = shift(@$lines); last if $shift =~ /^\s*$/; } my ($ret, $cpan_files); foreach (@$lines) { my ($mod,$version,$file,$comment) = split " ", $_, 4; $version = undef if $version eq 'undef'; $ret->{$mod} = {version => $version, file => $file}; $cpan_files->{$file}++; } return ($ret, $cpan_files); } sub mailrc { my $self = shift; my $mailrc = catfile $self->{CPAN}, 'authors', '01mailrc.txt.gz'; print "Reading information from $mailrc\n"; my $lines = zcat($mailrc); my $auths; foreach (@$lines) { #my($userid,$fullname,$email) = #m/alias\s+(\S+)\s+\"([^\"\<]+)\s+\<([^\>]+)\>\"/; my ($userid, $authinfo) = m/alias\s+(\S+)\s+\"([^\"]+)\"/; next unless $userid; my ($fullname, $email); if ($authinfo =~ m/([^<]+)\<(.*)\>/) { $fullname = $1; $email = $2; } else { $fullname = ''; $email = lc($userid) . '@cpan.org'; } $auths->{$userid} = {fullname => trim($fullname), email => trim($email)}; } $self->{auths} = $auths; } sub zcat { my $file = shift; my ($buffer, $lines); my $gz = gzopen($file, 'rb') or die "Cannot open $file: $gzerrno"; while ($gz->gzreadline($buffer) > 0) { push @$lines, $buffer; } die "Error reading from $file: $gzerrno" . ($gzerrno+0) if $gzerrno != Z_STREAM_END; $gz->gzclose(); return $lines; } sub trim { my $string = shift; return '' unless $string; $string =~ s/^\s+//; $string =~ s/\s+$//; $string =~ s/\s+/ /g; return $string; } 1; __END__