Jump to content

SearchShards

From Wikitech

what

Search/Lucene index sharding.

finest quality script to gather index distribution data

#!/usr/bin/perl
my $index;
my @hosts = (1..20);

for my $n (@hosts) {
    open SSH,  "ssh root\@search$n \"find /a/search/indexes/update -ctime 0 -type d -name [0-9]* \"|";
    while (<SSH>) {
        if (/([^\/]+)\/\d+$/) {
            $index->{$1}->{$n} = 1;
        }
    }
    close SSH;
}

print "{| class=\"wikitable sortable\"\n|-\n! shard";
for my $n (@hosts) {
    print " !! $n";
}
print "\n";
for my $shard (sort keys %{$index}) {
    print "|-\n| $shard";
    for my $n (@hosts) {
        my $cell = (defined $index->{$shard}->{$n}) ? 'y' : '—';
        print " || $cell";
    }
    print "\n";
}
print "|}\n";