User:Jhedden/notes/ElasticSearch

From Wikitech

Script to remote reindex or compare indexes between clusters

 #!/bin/bash
 # Copyright 2020 Wikimedia Foundation Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may
 # not use this file except in compliance with the License. You may obtain
 # a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 # License for the specific language governing permissions and limitations
 # under the License.
 
 set -o errexit
 set -o pipefail
 set -o nounset
 
 VERBOSE="false"
 LOCAL_ES='http://localhost:9200'
 REMOTE_ES='http://tools-elastic-01.tools.eqiad.wmflabs:80'
 
 if [ "$1" != "" ]; then
   REMOTE_PASS=$1
 else
   echo "ERROR: No remote password provided"
   exit
 fi
 
 logmsg () {
   # Always log changes, log everything else in verbose mode
   level="$1"
   msg="$2"
 
   case $level in
     CHANGE)
       echo "$msg"
       ;;
     *)
       if [ "$VERBOSE" = "true" ]; then
         echo "$msg"
       fi
       ;;
   esac
 }
 
 
 # Load all the indexes into an array
 mapfile -t DATA <<< $(curl -s "$REMOTE_ES/_cat/indices?format=json" | jq -c '.[]')
 
 for i in ${!DATA[@]}; do
   unset INDEX HEALTH LOCAL_DATA LOCAL_INDEX
   declare -A INDEX
   while IFS='|' read -r key value; do
     INDEX[name]=$key
     INDEX[docs]=$value
   done < <(echo ${DATA[${i}]} | jq -r '. | "\(.index)|\(."docs.count")"')
 
   # Skip internal .tasks index
   if [ "${INDEX[name]}" = ".tasks" ]; then
     continue
   fi
 
   # Start a remote reindex if the index doesn't exist locally
   logmsg "INFO" "Checking index: ${INDEX[name]}"
   HEALTH=$(curl -s "$LOCAL_ES/_cat/indices/${INDEX[name]}?format=json" | jq -r '.status' 2>/dev/null || echo 0)
 
   if [ $HEALTH = '404' ]; then
     logmsg "CHANGE" "Starting remote reindex on ${INDEX[name]}"
     curl -HContent-Type:application/json -XPOST $LOCAL_ES/_reindex?pretty -d'
 {
   "source": {
     "remote": {
       "host": "'"${REMOTE_ES}"'",
       "username": "reindex",
       "password": "'"${REMOTE_PASS}"'"
     },
     "index": "'"${INDEX[name]}"'",
     "size": "200"
   },
   "dest": {
     "index": "'"${INDEX[name]}"'"
   }
 }'
 
     # Configure replicas on the new index
     logmsg "CHANGE" "Adding replicas on ${INDEX[name]}"
     curl -s -HContent-Type:application/json -XPUT $LOCAL_ES/${INDEX[name]}/_settings -d '{"index.number_of_replicas" : 2}'
   else
     # If the index exists locally, compare .docs.count between remote and local
     logmsg "INFO" "Found existing index ${INDEX[name]} checking doc count"
     mapfile -t LOCAL_DATA <<< $(curl -s "$LOCAL_ES/_cat/indices/${INDEX[name]}?format=json" | jq -c '.[]')
     declare -A LOCAL_INDEX
     while IFS='|' read -r key value; do
       LOCAL_INDEX[name]=$key
       LOCAL_INDEX[docs]=$value
     done < <(echo ${LOCAL_DATA} | jq -r '. | "\(.index)|\(."docs.count")"')
 
     if [ "${LOCAL_INDEX[docs]}" = "${INDEX[docs]}" ]; then
       logmsg "INFO" "doc count in sync on index ${INDEX[name]}"
     else
       logmsg "CHANGE" "OUT OF SYNC index ${INDEX[name]} found: ${LOCAL_INDEX[docs]} expected: ${INDEX[docs]}"
     fi
   fi
 done