Jump to content

User:Jhedden/notes/ElasticSearch

From Wikitech
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

Script to remote reindex or compare indexes between clusters

 #!/bin/bash
 # Copyright 2020 Wikimedia Foundation Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may
 # not use this file except in compliance with the License. You may obtain
 # a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 # License for the specific language governing permissions and limitations
 # under the License.
 
 set -o errexit
 set -o pipefail
 set -o nounset
 
 VERBOSE="false"
 LOCAL_ES='http://localhost:9200'
 REMOTE_ES='http://tools-elastic-01.tools.eqiad.wmflabs:80'
 
 if [ "$1" != "" ]; then
   REMOTE_PASS=$1
 else
   echo "ERROR: No remote password provided"
   exit
 fi
 
 logmsg () {
   # Always log changes, log everything else in verbose mode
   level="$1"
   msg="$2"
 
   case $level in
     CHANGE)
       echo "$msg"
       ;;
     *)
       if [ "$VERBOSE" = "true" ]; then
         echo "$msg"
       fi
       ;;
   esac
 }
 
 
 # Load all the indexes into an array
 mapfile -t DATA <<< $(curl -s "$REMOTE_ES/_cat/indices?format=json" | jq -c '.[]')
 
 for i in ${!DATA[@]}; do
   unset INDEX HEALTH LOCAL_DATA LOCAL_INDEX
   declare -A INDEX
   while IFS='|' read -r key value; do
     INDEX[name]=$key
     INDEX[docs]=$value
   done < <(echo ${DATA[${i}]} | jq -r '. | "\(.index)|\(."docs.count")"')
 
   # Skip internal .tasks index
   if [ "${INDEX[name]}" = ".tasks" ]; then
     continue
   fi
 
   # Start a remote reindex if the index doesn't exist locally
   logmsg "INFO" "Checking index: ${INDEX[name]}"
   HEALTH=$(curl -s "$LOCAL_ES/_cat/indices/${INDEX[name]}?format=json" | jq -r '.status' 2>/dev/null || echo 0)
 
   if [ $HEALTH = '404' ]; then
     logmsg "CHANGE" "Starting remote reindex on ${INDEX[name]}"
     curl -HContent-Type:application/json -XPOST $LOCAL_ES/_reindex?pretty -d'
 {
   "source": {
     "remote": {
       "host": "'"${REMOTE_ES}"'",
       "username": "reindex",
       "password": "'"${REMOTE_PASS}"'"
     },
     "index": "'"${INDEX[name]}"'",
     "size": "200"
   },
   "dest": {
     "index": "'"${INDEX[name]}"'"
   }
 }'
 
     # Configure replicas on the new index
     logmsg "CHANGE" "Adding replicas on ${INDEX[name]}"
     curl -s -HContent-Type:application/json -XPUT $LOCAL_ES/${INDEX[name]}/_settings -d '{"index.number_of_replicas" : 2}'
   else
     # If the index exists locally, compare .docs.count between remote and local
     logmsg "INFO" "Found existing index ${INDEX[name]} checking doc count"
     mapfile -t LOCAL_DATA <<< $(curl -s "$LOCAL_ES/_cat/indices/${INDEX[name]}?format=json" | jq -c '.[]')
     declare -A LOCAL_INDEX
     while IFS='|' read -r key value; do
       LOCAL_INDEX[name]=$key
       LOCAL_INDEX[docs]=$value
     done < <(echo ${LOCAL_DATA} | jq -r '. | "\(.index)|\(."docs.count")"')
 
     if [ "${LOCAL_INDEX[docs]}" = "${INDEX[docs]}" ]; then
       logmsg "INFO" "doc count in sync on index ${INDEX[name]}"
     else
       logmsg "CHANGE" "OUT OF SYNC index ${INDEX[name]} found: ${LOCAL_INDEX[docs]} expected: ${INDEX[docs]}"
     fi
   fi
 done