Verify-copy.sh
Appearance
You probably want to use compare.py
instead and do a full verification.
A script comparing ES with metadata would be nice (does that exist?).
This page was last updated in 2011 and may be outdated. Please update it if you can.
#!/bin/bash # # script to compare random samples from two external store databases and report whether they match # asks one db for the rowcount # generates 2000 random numbers within the rowcount range # selects the blob with the random ID, md5sums it from both DBs # compares the md5sum # reports how many match / fail # # example usage: # ./verify-copy.sh 10.0.2.158 yowiki blobs es1002.eqiad.wmnet yowiki blobs_cluster10 # passwords mysqluser="wikiadmin" mysqlpass="xxxxxxxxx" # get the database name and the two hostnames from the command line hosta=$1 dbnamea=$2 tablea=$3 hostb=$4 dbnameb=$5 tableb=$6 # print help if [[ -z "$hosta" || $hosta == '-h' || $hosta == '--help' ]] then echo "Compares random entries in the external database tables." echo "Example usage:" echo " ./verify-copy.sh 10.0.2.158 yowiki blobs es1002.eqiad.wmnet yowiki blobs_cluster10" exit 0 fi ## find out how many rows we've got num_rows=$(mysql --batch --skip-column-names -u $mysqluser -p$mysqlpass -h $hosta $dbnamea -e "select count(*) from $tablea;") ## select 2000 random rows (in 10 groups of 200 rows) and md5sum them, compare, record status num_failed=0 num_succeeded=0 for j in {1..10} do idlist='1' for i in {1..200} do id=$RANDOM let "id %= $num_rows" idlist="${idlist},$id" done suma=$(mysql --batch --skip-column-names -u $mysqluser -p$mysqlpass -h $hosta $dbnamea -e "select blob_text from $tablea where blob_id in ($idlist)" | md5sum | cut -f 1 -d\ ) sumb=$(mysql --batch --skip-column-names -u $mysqluser -p$mysqlpass -h $hostb $dbnameb -e "select blob_text from $tableb where blob_id in ($idlist)" | md5sum | cut -f 1 -d\ ) if [ $suma == $sumb ] then let "num_succeeded += 1" else let "num_failed += 1" fi done echo "matched: $num_succeeded, failed: $num_failed"