Verify-copy.sh

From Wikitech

You probably want to use compare.py instead and do a full verification.

A script comparing ES with metadata would be nice (does that exist?).

This page was last updated in 2011 and may be outdated. Please update it if you can.
#!/bin/bash

#
# script to compare random samples from two external store databases and report whether they match
# asks one db for the rowcount
# generates 2000 random numbers within the rowcount range
# selects the blob with the random ID, md5sums it from both DBs
# compares the md5sum
# reports how many match / fail
#
# example usage:
#   ./verify-copy.sh 10.0.2.158 yowiki blobs es1002.eqiad.wmnet yowiki blobs_cluster10

# passwords
mysqluser="wikiadmin"
mysqlpass="xxxxxxxxx"
# get the database name and the two hostnames from the command line
hosta=$1
dbnamea=$2
tablea=$3
hostb=$4
dbnameb=$5
tableb=$6

# print help
if [[ -z "$hosta" || $hosta == '-h' || $hosta == '--help' ]]
then
        echo "Compares random entries in the external database tables."
        echo "Example usage:"
        echo "  ./verify-copy.sh 10.0.2.158 yowiki blobs es1002.eqiad.wmnet yowiki blobs_cluster10"
        exit 0
fi

## find out how many rows we've got
num_rows=$(mysql --batch --skip-column-names -u $mysqluser -p$mysqlpass -h $hosta $dbnamea -e "select count(*) from $tablea;")

## select 2000 random rows (in 10 groups of 200 rows) and md5sum them, compare, record status
num_failed=0
num_succeeded=0
for j in {1..10}
do
        idlist='1'
        for i in {1..200}
        do
                id=$RANDOM
                let "id %= $num_rows"
                idlist="${idlist},$id"
        done
        suma=$(mysql --batch --skip-column-names -u $mysqluser -p$mysqlpass -h $hosta $dbnamea -e "select blob_text from $tablea where blob_id in ($idlist)" | md5sum | cut -f 1 -d\  )
        sumb=$(mysql --batch --skip-column-names -u $mysqluser -p$mysqlpass -h $hostb $dbnameb -e "select blob_text from $tableb where blob_id in ($idlist)" | md5sum | cut -f 1 -d\  )
        if [ $suma == $sumb ]
        then
                let "num_succeeded += 1"
        else
                let "num_failed += 1"
        fi
done

echo "matched: $num_succeeded, failed: $num_failed"