Jump to content

Verify-copy.sh

From Wikitech
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

You probably want to use compare.py instead and do a full verification.

A script comparing ES with metadata would be nice (does that exist?).

This page was last updated in 2011 and may be outdated. Please update it if you can.
#!/bin/bash

#
# script to compare random samples from two external store databases and report whether they match
# asks one db for the rowcount
# generates 2000 random numbers within the rowcount range
# selects the blob with the random ID, md5sums it from both DBs
# compares the md5sum
# reports how many match / fail
#
# example usage:
#   ./verify-copy.sh 10.0.2.158 yowiki blobs es1002.eqiad.wmnet yowiki blobs_cluster10

# passwords
mysqluser="wikiadmin"
mysqlpass="xxxxxxxxx"
# get the database name and the two hostnames from the command line
hosta=$1
dbnamea=$2
tablea=$3
hostb=$4
dbnameb=$5
tableb=$6

# print help
if [[ -z "$hosta" || $hosta == '-h' || $hosta == '--help' ]]
then
        echo "Compares random entries in the external database tables."
        echo "Example usage:"
        echo "  ./verify-copy.sh 10.0.2.158 yowiki blobs es1002.eqiad.wmnet yowiki blobs_cluster10"
        exit 0
fi

## find out how many rows we've got
num_rows=$(mysql --batch --skip-column-names -u $mysqluser -p$mysqlpass -h $hosta $dbnamea -e "select count(*) from $tablea;")

## select 2000 random rows (in 10 groups of 200 rows) and md5sum them, compare, record status
num_failed=0
num_succeeded=0
for j in {1..10}
do
        idlist='1'
        for i in {1..200}
        do
                id=$RANDOM
                let "id %= $num_rows"
                idlist="${idlist},$id"
        done
        suma=$(mysql --batch --skip-column-names -u $mysqluser -p$mysqlpass -h $hosta $dbnamea -e "select blob_text from $tablea where blob_id in ($idlist)" | md5sum | cut -f 1 -d\  )
        sumb=$(mysql --batch --skip-column-names -u $mysqluser -p$mysqlpass -h $hostb $dbnameb -e "select blob_text from $tableb where blob_id in ($idlist)" | md5sum | cut -f 1 -d\  )
        if [ $suma == $sumb ]
        then
                let "num_succeeded += 1"
        else
                let "num_failed += 1"
        fi
done

echo "matched: $num_succeeded, failed: $num_failed"