#!/bin/ksh
#
#H# netdata.sh
#H#
#H# Function: start or stop the netdata daemon
#H# 
#H# Usage:    netdata.sh [-v|--verbose] [-q|--quiet] [-f|--force] [-p|--port netdataport] [-c|--cleanup] 
#H#                      [start [netdata_options]|stop|restart [netdata_options]|status|cleanup|deamon_help]
#H#
#H# Parameter
#H#   -v - verbose mode
#H#   -q - quiet mode
#H#   -f - force execution
#H#   -p - port for netdata (default: 19999]
#H#   -c - cleanup the netdata data directories; this parameter is only valid with start or stop
#H#
#H#   cleanup - cleanup the netdata data directories;  this parameter is only valid if netdata is not running
#H#   netdata_options - add. options for starting the netdata daemon
#H#   deamon_help - list the usage help for the netdata daemon
#H#
#H# Author:
#H#   Bernd Schemmer (Bernd.Schemmer@gmx.de)
#H#
#
# History:  
#   23.10.2016 v1.0.0 /bs
#     initial release
#

# for debugging
#
#PREFIX="echo "
PREFIX=""

# define constants
#
__TRUE=0
__FALSE=1

# variables for the trap handler
#
INSIDE_DIE=${__FALSE}

# script name and directory
#
typeset -r SCRIPTNAME="${0##*/}"
typeset SCRIPTDIR="${0%/*}"
if [ "${SCRIPTNAME}"x = "${SCRIPTDIR}"x ] ; then
  SCRIPTDIR="$( whence ${SCRIPTNAME} )"
  SCRIPTDIR="${SCRIPTDIR%/*}"
fi  
REAL_SCRIPTDIR="$( cd -P ${SCRIPTDIR} ; pwd )"

LOGFILE="${LOGFILE:=/var/tmp/${SCRIPTNAME}.log}"

# define variables
#
FORCE=${FORCE:=${__FALSE}}
QUIET=${QUIET:=${__FALSE}}
VERBOSE=${VERBOSE:=${__FALSE}}

# default for the port used by netdata
#
NETDATA_PORT="19999"

# additional options for the netdata daemon
#
NETDATA_OPTIONS=""

# (relative) netdata data directories
#
NETDATA_DATA_DIRS="./var/cache/netdata/
./var/log/netdata/
./var/run/
"

# parameter -c or cleanup used?
#
CLEANUP_DATA_DIRS=${__FALSE}

# ----------------------------------------------------------------------
# general functions
#
LogMsg () {
  [[ ${QUIET} = ${__TRUE} ]] && return

  typeset THISMSG="$*"
  
  if [ "$1"x = "-"x ] ; then
    shift
    THISMSG="$*"
  else
    THISMSG="[$( date +"%d.%m.%Y %H:%M" ) ${THISSCRIPT}] $*"
  fi
  
  echo "${THISMSG}"  
  [ "${LOGFILE}"x != ""x ] && echo "${THISMSG}" >>"${LOGFILE}"
}

# ----------------------------------------------------------------------
#
LogInfo () {
  [ ${VERBOSE} = ${__TRUE} ] && LogMsg "INFO: $*"
}

# ----------------------------------------------------------------------
#
LogError () {
  LogMsg "ERROR: $*"
}

# ----------------------------------------------------------------------
#
LogWarning () {
  LogMsg "WARNING: $*"
}

# ----------------------------------------------------------------------
#
cleanup() {

#  LogMsg "-"
#  LogMsg "-"
  
  return 0
}

# ----------------------------------------------------------------------
# general exit routine
#      
die () {
  typeset THISRC=$1

  INSIDE_DIE=${__TRUE}

  trap "" EXIT

  cleanup

  if [ $# -ne 0 ] ; then
    shift
    if [ $# -ne 0 ] ; then
      if [ ${THISRC} = 0 ] ; then
        LogMsg "$*"
      else
        LogError "$*! RC=${THISRC}"
      fi
    fi      
  fi
  LogMsg "### ${SCRIPTNAME} ended at $( date )"
  exit ${THISRC}
}

# ----------------------------------------------------------------------
#
signal_handler() {
  trap "" exit
  
  if [ ${INSIDE_DIE} = ${__FALSE} ] ; then
    die 100 "Script aborted for unknown reason"
  fi  
}

# ----------------------------------------------------------------------

cleanup_netdata_directories() {
  for CUR_ENTRY in ${NETDATA_DATA_DIRS} ; do
    if [ -d "${CUR_ENTRY}" ] ; then
      LogMsg "Removing all files in the directory ${CUR_ENTRY} ..."
      OUTPUT="$( rm -rf ${CUR_ENTRY}/* 2>&1 )"
      [ "${OUTPUT}"x != ""x ] && LogMsg "-" "${OUTPUT}"
    elif [ -r "${CUR_ENTRY}" ] ; then
      LogMsg "Removing the file ${CUR_ENTRY} ..."
      OUTPUT="$( rm -f ${CUR_ENTRY} 2>&1 )"
      [ "${OUTPUT}"x != ""x ] && LogMsg "-" "${OUTPUT}"
    else
      LogInfo "${CUR_ENTRY} does not exist - ignoring this entry"
    fi
  done    
}
  	
# ----------------------------------------------------------------------

show_netdata_daemon_status() {
  LogMsg "The netdata daemon is${1} running; the PID is ${NETDATA_PID}"
  LogMsg "-"
  LogMsg "-" "$( ps -f -p${NETDATA_PID} )"
  LogMsg "-"
  if [ "${NETDATA_PID_USER}"x != "${CUR_USER_NAME}"x ] ; then
    LogWarning "The netdata daemon was started from this user: ${NETDATA_PID_USER} (you're ${CUR_USER_NAME})"
  fi    

  if [ "${NETDATA_PID_USER}"x = "${CUR_USER_NAME}"x -o "${CUR_USER_NAME}"x = "root"x ] ; then
    LogMsg "The netdata daemon is listening on these ports:"
    LogMsg "-" 
    LogMsg "-" "$( netstat -tlpn  | grep " ${NETDATA_PID}/" )"
    LogMsg "-"
  else
    LogWarning "Can not detect the port used by the netdata daemon"
  fi
}

# ----------------------------------------------------------------------
#
LogMsg "### ${SCRIPTNAME} started at $( date )"

# install the trap handler
#
trap "signal_handler" EXIT

# get the parameter
#
PARAMETER_OKAY=${__TRUE}
ACTION=""

while [ $# -ne 0 ] ; do
  case $1 in

    -h | --help | -H | help |  "" )
       grep "^#H#" $0 | cut -c4-
       die 0
       ;;

    -v | --verbose )
       VERBOSE=${__TRUE}
       ;;

    +v | ++verbose )
       VERBOSE=${__FALSE}
       ;;

    -q | --quiet )
       QUIET=${__TRUE}
       ;;

    +q | ++quiet )
       QUIET=${__FALSE}
       ;;

    -f | --force )
       FORCE=${__TRUE}
       ;;

    +f | ++force )
       FORCE=${__FALSE}
       ;;

    -d | --debug )
       PREFIX="echo "
       ;;

    +d | ++debug )
       PREFIX=""
       ;;

    -p | --port )
       if [ $# -ge 2 ] ; then
         NETDATA_PORT="$2"
         shift
       else
         die 7 "Incomplete paraemter -p found"
       fi
       ;;

    -c | --cleanup )
       CLEANUP_DATA_DIRS=${__TRUE}
       ;;

    cleanup )
       ACTION="cleanup"
       CLEANUP_DATA_DIRS=${__TRUE}
       shift 
       [ $# -ne 0 ] && die 8 "Invalid parameter found: $*"
       break
       ;;
    
    start )
       [ "${ACTION}"x != ""x ] && die 11 "Duplicate action parameter found"
       ACTION="start"
       shift 
       NETDATA_OPTIONS="$*"
       break
       ;;

    stop )
       [ "${ACTION}"x != ""x ] && die 11 "Duplicate action parameter found"
       ACTION="stop"
       shift 
       [ $# -ne 0 ] && die 8 "Invalid parameter found: $*"
       break
       ;;

    status )
       [ "${ACTION}"x != ""x ] && die 11 "Duplicate action parameter found"
       ACTION="status"
       shift 
       [ $# -ne 0 ] && die 8 "Invalid parameter found: $*"
       break
       ;;

    restart )
       [ "${ACTION}"x != ""x ] && die 11 "Duplicate action parameter found"
       ACTION="restart"
       shift 
       NETDATA_OPTIONS="$*"
       break
       ;;

    daemon_help   )
       [ "${ACTION}"x != ""x ] && die 11 "Duplicate action parameter found"
       ACTION="daemon_help"
       shift 
       [ $# -ne 0 ] && die 8 "Invalid parameter found: $*"
       break
       ;;

    * ) 
       LogError "Unknown parameter found: $1"
       PARAMETER_OKAY=${__TRUE}
       ;;
     
  esac
  shift
done
[ "${ACTION}"x = ""x ] && ACTION="status"

if [ ${PARAMETER_OKAY} != ${__TRUE} ] ; then
  grep "^#H#" $0 | cut -c4-
  die 2
fi

# ----------------------------------------------------------------------

THISRC=${__FALSE}

CUR_USER_ID="$( id -u )"
CUR_USER_NAME="$( id -un )"
CUR_GROUP_ID="$( id -g )"
CUR_GROUP_NAME="$( id -gn )"

# [ "${CUR_USER_ID}"x != "0"x ] && die 101 "This script must be executed by root only"

NETDATA_PID_FILE="/tmp/netdata.pid"

if [ "${JWM_DIR}"x != ""x ] ; then
  LogMsg "Using the netdata directory based on the the environment variable JWM_DIR (\"${JWM_DIR}\") "
  NETDATA_BASE_DIR="${JWM_DIR}/netdata"
else
  NETDATA_BASE_DIR="$( cd $( dirname $0 )/../netdata 2>/dev/null && pwd  || echo ${PWD}/netdata )"
  LogMsg "Environment variable JWM_DIR not set - using the directory based on the script directory ($( dirname $0 )) "
fi

LogMsg "The netdata base directory is ${NETDATA_BASE_DIR}"

[ ! -d "${NETDATA_BASE_DIR}" ] && die 9 "The directory \"${NETDATA_BASE_DIR}\" does not exist"
cd "${NETDATA_BASE_DIR}" || die 10 "Can not change to the directory \"${NETDATA_BASE_DIR}\" "

NETDATA_BINARY="./usr/sbin/netdata"
[ ! -x "${NETDATA_BINARY}" ] && die 11 "The netdata daemon binary ${NETDATA_BINARY} does not exist or is not executable"

NETDATA_CONF_FILE="./etc/netdata/netdata.conf"

if [ -r "${NETDATA_PID_FILE}" ] ; then
  NETDATA_PID="$( cat  "${NETDATA_PID_FILE}" )"
  [ "${NETDATA_PID}"x != ""x ] && ps -p "${NETDATA_PID}" 2>/dev/null 1>/dev/null || NETDATA_PID=""
else 
  NETDATA_PID="$( ps -ef | grep -v grep |  grep "${NETDATA_BINARY}"  | awk '{ print $2};' )"
fi

if [ "${NETDATA_PID}"x != ""x ] ; then
  NETDATA_PID_USER="$( ps -p "${NETDATA_PID}" -o user= )"
else
  NETDATA_PID_USER=""
fi

case ${ACTION} in

  cleanup)
    if [ "${NETDATA_PID}"x != ""x ] ; then
      show_netdata_daemon_status 
      die 57 "Can not cleanup the data directories while netdata is running"
    else
      cleanup_netdata_directories
    fi
    ;;  
    
  start )
    if [ "${NETDATA_PID}"x != ""x ] ; then
      show_netdata_daemon_status " already"
      THISRC=${__FALSE}
    else
      if [ ${CLEANUP_DATA_DIRS} = ${__TRUE} ] ; then
         cleanup_netdata_directories
      fi
    
      LogMsg "Starting the netdata daemon ..."
      LogMsg "The port to use for the netdata daemon is ${NETDATA_PORT}"

      if [ ${NETDATA_PORT} -le 1024 ] ; then
        [ "${CUR_USER_ID}"x != "0"x ] && die 101 "This script must be executed by root only if using a port less then 1024"
      fi
      
      if [ "${NETDATA_OPTIONS}"x != ""x ] ; then
        LogMsg "The additional options to use for the netdata daemon are \"${NETDATA_OPTIONS}\" "
        if [[ ${NETDATA_OPTIONS} == *-D* ]] ; then
          die 50 "You can not use this script to start the netdata daemon in the foreground"
        fi
      fi    
 
      if [ -r "${NETDATA_CONF_FILE}" ] ; then
        LogMsg "Using the existing config file ${NETDATA_CONF_FILE}"
      else
        NETDATA_OPTIONS="${NETDATA_OPTIONS} -u ${CUR_USER_NAME}"
      fi
              
      CUR_OUTPUT="$( find . ! -user ${CUR_USER_ID} ! -group ${CUR_GROUP_ID} 2>/dev/null )"
      if [ "${CUR_OUTPUT}"x != ""x ] ; then
        LogWarning "All files in the directory tree $PWD should be owned by ${CUR_USER_NAME}:${CUR_GROUP_MNAME} to make netdata work"
      fi

      CUR_OUTPUT="$( set -x ; "${NETDATA_BINARY}" -P "${NETDATA_PID_FILE}" -p ${NETDATA_PORT} ${NETDATA_OPTIONS} 2>&1 )"
      THISRC=$?
      LogMsg "-" "${CUR_OUTPUT}"
      if [ ${THISRC} -ne 0 ] ; then
        LogMsg "Failed to start the netdata daemon -- please check the messages for the cause of the error"
        THISRC=${__FALSE}
      else
        LogMsg "netdata daemon started; the PID is $( cat "${NETDATA_PID_FILE}" )"
        LogMsg "Use http://localhost:${NETDATA_PORT} or http://${HOSTNAME}:${NETDATA_PORT} to access netdata in your Webbrowser"
        LogMsg "Use http://localhost:${NETDATA_PORT}/netdata.conf to view the configuration of the running netdata"
        THISRC=${__TRUE}
      fi
    fi  
    ;;

  stop )
    if [ "${NETDATA_PID}"x = ""x ] ; then
      LogMsg "The netdata daemon is NOT running (well, it may be running but it was not started by this script)"
     THISRC=${__FALSE}
    else
      THISRC=${__TRUE}
      LogMsg "The netdata daemon is running; the PID is ${NETDATA_PID}"

      if [ "${NETDATA_PID_USER}"x != "${CUR_USER_NAME}"x ] ; then
        if [ "${CUR_USER_NAME}"x != "root"x ] ; then
          die 55 "The netdata daemon was started from this user: ${NETDATA_PID_USER} but you're ${CUR_USER_NAME} -- can not stop the netdata daemon"
        else
          LogWarning "The netdata daemon was started from this user: ${NETDATA_PID_USER} (you're ${CUR_USER_NAME}) ,,,"
        fi
      fi
              
      LogMsg "Stopping the netdata daemon ..."
      kill "${NETDATA_PID}"
      sleep 2
      ps -p "${NETDATA_PID}" 2>/dev/null 1>/dev/null
      if [ $? -eq 0 ] ; then
        LogMsg "The netdata daemon is still running -- now using \"kill -9  ${NETDATA_PID} \" ..."
        kill -9 "${NETDATA_PID}"
        ps -p "${NETDATA_PID}" 2>/dev/null 1>/dev/null
        if [ $? -eq 0 ] ; then
          LogMsg "The netdata daemon is still running -- please kill the process ${NETDATA_PID} manually"
          THISRC=${__FALSE}        
        fi
      fi
      if [ ${THISRC} -eq ${__TRUE} ] ; then
        [ -r "${NETDATA_PID_FILE}" ] && rm -f  "${NETDATA_PID_FILE}" 2>/dev/null      
        if [ ${CLEANUP_DATA_DIRS} = ${__TRUE} ] ; then
           cleanup_netdata_directories
        fi
      fi  
    fi        
    ;;

  restart )
    "${REAL_SCRIPTDIR}/${SCRIPTNAME}" stop && "${REAL_SCRIPTDIR}/${SCRIPTNAME}" start ${NETDATA_OPTIONS}
    THISRC=$?
    ;;

  status )
    if [ ${CLEANUP_DATA_DIRS} = ${__TRUE} ] ; then
      LogWarning "Can not cleanup the data directories while netdata is running"
    fi
    
    if [ "${NETDATA_PID}"x != ""x ] ; then
      show_netdata_daemon_status
      THISRC=${__TRUE}
    else
      LogMsg "The netdata daemon is NOT running (well, it may be running but it was not started by this script)"
      THISRC=${__FALSE}
    fi
    ;;

  daemon_help )
    LogMsg "Additional parameter supported by the netdata daemon are:"    
    LogMsg "-" "$( "${NETDATA_BINARY}" -h )"
    ;;

  esac
  
# ----------------------------------------------------------------------
#

# ----------------------------------------------------------------------
#

die ${THISRC}

# ----------------------------------------------------------------------
#
