Skip to content

Commit 82e44c3

Browse files
committed
author import: handle duplicate ids
When reading 00whois.xml, there can be cpanid elements containing duplicate id element values. This can happen when there is both a list and author entry for the same name. In these cases, the author entry is extraneous. Rather than relying on XML::Simple's built in array to hash transformation (using id values), do the work ourselves so we can handle the duplicate id entries. Also configure XML::Simple to be more strict about how it parses.
1 parent 9d3f047 commit 82e44c3

File tree

1 file changed

+41
-1
lines changed

1 file changed

+41
-1
lines changed

lib/MetaCPAN/Script/Author.pm

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,49 @@ my @compare_fields = do {
7878
sort grep !$seen{$_}++, @cpan_fields, @author_config_fields;
7979
};
8080

81+
has whois_data => (
82+
is => 'ro',
83+
traits => ['NoGetopt'],
84+
lazy => 1,
85+
builder => '_build_whois_data',
86+
);
87+
88+
sub _build_whois_data {
89+
my $self = shift;
90+
my $data = XMLin(
91+
$self->author_fh,
92+
ForceArray => 1,
93+
SuppressEmpty => '',
94+
NoAttr => 1,
95+
KeyAttr => [],
96+
);
97+
98+
my $whois_data = {};
99+
100+
for my $author ( @{ $data->{cpanid} } ) {
101+
my $data = {
102+
map {
103+
my $content = $author->{$_};
104+
@$content == 1
105+
&& !ref $content->[0] ? ( $_ => $content->[0] ) : ();
106+
} keys %$author
107+
};
108+
109+
my $pauseid = $data->{id};
110+
my $existing = $whois_data->{$pauseid};
111+
if ( !$existing
112+
|| $existing->{type} eq 'author' && $data->{type} eq 'list' )
113+
{
114+
$whois_data->{$pauseid} = $data;
115+
}
116+
}
117+
118+
return $whois_data;
119+
}
120+
81121
sub index_authors {
82122
my $self = shift;
83-
my $authors = XMLin( $self->author_fh )->{cpanid};
123+
my $authors = $self->whois_data;
84124

85125
if ( $self->pauseid ) {
86126
log_info {"Indexing 1 author"};

0 commit comments

Comments
 (0)