[Rpm-maint] Rpm Database musings
Michael Schroeder
mls at suse.de
Mon Mar 4 15:25:26 UTC 2013
On Mon, Mar 04, 2013 at 03:12:51PM +0100, Florian Festi wrote:
> On 03/01/2013 05:32 PM, Michael Schroeder wrote:
> > (the median is quite different from the avg, that means that
> > some packages are quite big.)
>
> ...
>
> > - That means, if I have 2000 packages installed on my system
> > (which is about the real number), the concatenated headers will
> > use 20 MByte (using the median), 10 MByte when using LZO
> > compression, 7.5 with xz.
>
> My guess is that the typically installed package are the bigger ones
> while a large number of rarely installed add-on packages are rather
> small (at least in header size). So it might be worth using one machine
> as an example instead of doing calculations with the "average package".
My little number generator is attached. Use
rpm -qa --qf '%{HEADERIMMUTABLE}\n' | perl ./rpmhdrc
to run it against the installed packages.
Cheers,
Michael.
--
Michael Schroeder mls at suse.de
SUSE LINUX Products GmbH, GF Jeff Hawn, HRB 16746 AG Nuernberg
main(_){while(_=~getchar())putchar(~_-1/(~(_|32)/13*2-11)*13);}
-------------- next part --------------
#!/usr/bin/perl -w
use Data::Dumper;
use Compress::LZO;
#use Compress::Zlib;
#use Compress::Raw::Lzma;
sub xzencode {
my ($data, $level) = @_;
$level = LZMA_PRESET_DEFAULT unless defined $level;
my ($lz, $status) = new Compress::Raw::Lzma::EasyEncoder('AppendOutput' => 1, 'Preset' => $level) or die("Cannot create lzma object\n");
my $output = '';
$status = $lz->code($data, $output);
die("code error\n") if $status;
$status = $lz->flush($output, LZMA_FINISH);
return $output;
}
sub headfilter {
my ($head, $filt) = @_;
my ($cnt, $cntdata) = unpack('@8NN', $head);
my $i;
my $out = substr($head, 0, 16);
my $maxcnt = 0;
for ($i = 1; $i <= $cnt; $i++) {
my $x = substr($head, $i * 16, 16);
my ($tag, $type, $offset, $count) = unpack('N4', $x);
next unless $filt->{$tag};
my $y = substr($head, $i * 16 + 16, 16);
my ($ytag, $ytype, $yoffset, $ycount) = unpack('N4', $y);
die("argh!\n") if $yoffset <= $offset;
$out .= $x . substr($head, 16 + $cnt * 16 + $offset, $yoffset - $offset);
$maxcnt = $count if $maxcnt < $count;
}
return ($out, $maxcnt);
}
sub headfilterx {
my ($head, $filt) = @_;
my ($cnt, $cntdata) = unpack('@8NN', $head);
my $i;
my @xlist;
for ($i = 1; $i <= $cnt; $i++) {
my $x = substr($head, $i * 16, 16);
my ($tag, $type, $offset, $count) = unpack('N4', $x);
next unless $filt->{$tag};
my $y = substr($head, $i * 16 + 16, 16);
my ($ytag, $ytype, $yoffset, $ycount) = unpack('N4', $y);
die("argh!\n") if $yoffset <= $offset;
push @xlist, [$i * 16, 16];
push @xlist, [16 + $cnt * 16 + $offset, $yoffset - $offset];
}
@xlist = sort {$b->[0] <=> $a->[0]} @xlist;
substr($head, $_->[0], $_->[1], '') for @xlist;
return $head;
}
my %data;
print "scanning\n";
my $scn = 0;
my %cltags = map {$_ => 1} qw{1080 1081 1082};
my %fltags = map {$_ => 1} qw{1028 1029 1030 1033 1034 1035 1036 1037 1039 1040 1045 1095 1096 1097 1116 1117 1118 1140 1141 1142 1143 1144 1145 5008};
my %bntags = map {$_ => 1} qw{1117};
my %xxtags = (%cltags, %fltags);
while(my $rpm = <STDIN>) {
chomp $rpm;
next if $rpm eq '(none)';
my $head;
my $lead;
my ($headmagic, $cnt, $cntdata);
if ($rpm =~ /^[0-9a-f]+$/) {
$head = pack('H*', "8eade80100000000".$rpm);
} else {
open(RPM, '<', $rpm) || die;
if (read(RPM, $lead, 96) != 96) {
warn("Bad rpm $rpm\n");
close RPM;
next;
}
($magic, $sigtype) = unpack('N at 78n', $lead);
if ($magic != 0xedabeedb || $sigtype != 5) {
warn("Bad rpm $rpm\n");
close RPM;
next;
}
if (read(RPM, $head, 16) != 16) {
warn("Bad rpm $rpm\n");
close RPM;
next;
}
($headmagic, $cnt, $cntdata) = unpack('N at 8NN', $head);
if ($headmagic != 0x8eade801) {
warn("Bad rpm $rpm\n");
close RPM;
next;
}
$cntdata = ($cntdata + 7) & ~7;
if (read(RPM, $head, $cntdata + $cnt * 16, 16) != $cntdata + $cnt * 16) {
warn("Bad rpm $rpm\n");
close RPM;
next;
}
$head = '';
if (read(RPM, $head, 16) != 16) {
warn("Bad rpm $rpm\n");
close RPM;
next;
}
($headmagic, $cnt, $cntdata) = unpack('N at 8NN', $head);
if ($headmagic != 0x8eade801) {
warn("Bad rpm $rpm\n");
close RPM;
next;
}
$cntdata = ($cntdata + 7) & ~7;
if (read(RPM, $head, $cntdata + $cnt * 16, 16) != $cntdata + $cnt * 16) {
warn("Bad rpm $rpm\n");
close RPM;
next;
}
close RPM;
}
my $ucsize = length($head);
my $lzosize = length(Compress::LZO::compress($head, 9));
my ($clhead, $clnum) = headfilter($head, \%cltags);
my $clucsize = length($clhead);
my $cllzosize = length(Compress::LZO::compress($clhead, 9));
my ($flhead, $flnum) = headfilter($head, \%fltags);
my $flucsize = length($flhead);
my $fllzosize = length(Compress::LZO::compress($flhead, 9));
my ($bnhead, $bnnum) = headfilter($head, \%bntags);
my $bnucsize = length($bnhead);
my $bnlzosize = length(Compress::LZO::compress($bnhead, 9));
my ($xxhead) = headfilterx($head, \%xxtags);
my $xxucsize = length($xxhead);
my $xxlzosize = length(Compress::LZO::compress($xxhead, 9));
# my $gzsize = length(Compress::Zlib::compress($head, 9));
# my $xzsize = length(xzencode($head, 9));
# $data{$rpm} = [$ucsize, $lzosize, $gzsize, $xzsize];
$data{$rpm} = [$ucsize, $lzosize, $clucsize, $cllzosize, $clnum, $flucsize, $fllzosize, $flnum, $xxucsize, $xxlzosize, $bnucsize, $bnlzosize, $bnnum];
#print "$rpm $ucsize $lzosize $gzsize $xzsize\n";
$scn++;
print "$scn done\n" if $scn % 10 == 0;
}
#my @n = qw{uncompressed lzo gzip xz};
my @n = ('unc ', 'lzo ', 'clunc', 'cllzo', 'clnum', 'flunc', 'fllzo', 'flnum', 'xxunc', 'xxlzo', 'bnunc', 'bnlzo', 'bnnum');
print "\nscanned ".scalar(keys %data)." rpms\n";
die unless %data;
for ($i = 0; $i < @n; $i++) {
my @s = sort {$a->[$i] <=> $b->[$i]} values %data;
print "$n[$i]: ";
my $sum = 0;
$sum += $_->[$i] for values %data;
my $avg = int($sum / scalar(values %data) + 1);
printf "sum: %9d, avg: %7d, median: %7d\n", $sum, $avg, $s[scalar(@s) / 2]->[$i];
}
More information about the Rpm-maint
mailing list