基于 shell 的自动邮件任务监测器

前端之家收集整理的这篇文章主要介绍了基于 shell 的自动邮件任务监测器前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。

1. 需求场景

自动邮件报表中,总是会出现邮件发送不及时、邮件发送出错等等问题。因此开发了此系统,当邮件任务执行出错或者未执行时发送短信报警警告

2. 基本思路

在执行邮件任务之前,即在系统调用发送邮件方法之前,先在邮件检测系统表中记录,状态为失败,发送成功之后更新数据库,置为成功。

shell邮件检测系统每1分钟执行一次,遍历邮件任务表和检测表,2者对比发现失败的任务和未执行的任务。一旦发现,调用短信发送脚本发送短信报警

3. 整体代码

  1. #!/bin/sh
  2. ### DOUGUO JOB INSPECTOSCOPE SNIFFER ###
  3. ### @author zjf ###
  4. ### @date 2017-12-27 ###
  5.  
  6. # INITALIZE SHELL
  7. . /etc/profile
  8. . ~/.bash_profile
  9.  
  10. # DEFINE ERROE CODE
  11. ERROR_CODE_001="job non-executed"
  12. ERROR_CODE_002="data result non-compliant formats"
  13. ERROR_CODE_003="send-mail/exec-job non-succeed"
  14.  
  15. # INITALIZE WORKING DIR
  16. _SCRIPT_NAME=$0
  17. _WORK_DIR=`dirname ${_SCRIPT_NAME}`
  18. cd ${_WORK_DIR}
  19.  
  20. # TIME ARGS
  21. CUR_DATE=`date -d "-0 day" "+%Y-%m-%d"`
  22. JUDGE_MINUTE=`date -d "-5 minute" +%H:%M`
  23. #JUDGE_MINUTE="08:05"
  24. echo "CUR_DATE:$CUR_DATE"
  25. echo "JUDGE_MINUTE:$JUDGE_MINUTE"
  26.  
  27. # DB ARGS
  28. HOSTNAME="192.168.1.135"
  29. USERNAME="hadooper"
  30. PASSWORD="hadoop@K+IhBOS"
  31. DBNAME="douguo_data"
  32.  
  33. # DEFINE _FUN RETURN RESULT
  34. g_contain_rs=""
  35.  
  36. # the area to define functions
  37. # functions must be defined before use it
  38. # _FUN
  39. function contain() {
  40. g_contain_rs="0"
  41.  
  42. array=$1
  43. value=$2
  44.  
  45. for _arr_val in ${array[@]}
  46. do
  47. ta_array=`echo ${_arr_val} | cut -d ',' -f 1`
  48. tb_value=`echo ${value} | cut -d ',' -f 1`
  49.  
  50. if [ "${ta_array}" = "${tb_value}" ];then
  51. # 已记录
  52. g_contain_rs="1"
  53. fi
  54. done
  55. }
  56.  
  57. # _FUN
  58. # uodate job's status
  59. function update_job_status() {
  60. _jon_id=$1
  61. _to_change_schema=$2
  62. _to_change_data=$3
  63. _stat_date=$4
  64. query_update="update dd_job_monitor set ${_to_change_schema}='${_to_change_data}' where job_id=${_jon_id} and date(statdate)='${_stat_date}'"
  65. query_result=`/usr/local/MysqL/bin/MysqL -h${HOSTNAME} -u${USERNAME} -p${PASSWORD} -D ${DBNAME} -e "${query_update}"`
  66. }
  67.  
  68. # _FUN
  69. # insert into job tb,otherwise the alarm module will always alarming !
  70. function insert_job() {
  71. _jon_id=$1
  72. _job_name=$2
  73. _stat_date=$3
  74. query_update="INSERT INTO dd_job_monitor(job_id,job_type,send_type,job_name,run_time,msg_reciver,data_status,job_status,alarm_status,statdate) VALUES(${_jon_id},'alarm','${_job_name}','',1,'${_stat_date}')"
  75. echo "$query_update"
  76. query_result=`/usr/local/MysqL/bin/MysqL -h${HOSTNAME} -u${USERNAME} -p${PASSWORD} -D ${DBNAME} -e "${query_update}"`
  77. }
  78.  
  79. # query languages
  80. ## !modify log 2018-02-26 :
  81. # 在QUERYA中,设置条件id not in (36,12),这2个邮件在当天的最后5分钟
  82. # 本系统有5min延迟,故会产生重复报警,所以不再检测邮件 36,12
  83. QUERYA="select concat_ws(',',concat_ws('',id,subject),right(send_time,5)) from dd_sys_mail_set where send_type='day' and id not in (36,12);"
  84. #QUERYA="select concat_ws(',5)) from dd_sys_mail_set where send_type='day';"
  85. QUERYB="select concat_ws(',job_id,job_name),right(run_time,5),alarm_status) from dd_job_monitor where date(statdate)='${CUR_DATE}'"
  86.  
  87. # query from MysqL
  88. count_a=`/usr/local/MysqL/bin/MysqL -h${HOSTNAME} -u${USERNAME} -p${PASSWORD} -D ${DBNAME} -e "${QUERYA}" --skip-column-name`
  89. count_b=`/usr/local/MysqL/bin/MysqL -h${HOSTNAME} -u${USERNAME} -p${PASSWORD} -D ${DBNAME} -e "${QUERYB}" --skip-column-name`
  90.  
  91. # echo in data files and delete the first line 'log in file /var/MysqL/query.log'
  92. echo "${count_a}" | sed 's/\t/,/g' > ${_WORK_DIR}/tmp/queryloga.log
  93. echo "${count_b}" | sed 's/\t/,/g' > ${_WORK_DIR}/tmp/querylogb.log
  94. sed -i '1d' ${_WORK_DIR}/tmp/queryloga.log
  95. sed -i '1d' ${_WORK_DIR}/tmp/querylogb.log
  96.  
  97. # read log a and create array a
  98. LOOP_FLAG_A=0
  99. ARR_RS_A=()
  100. while read line;
  101. do
  102. ARR_RS_A[${LOOP_FLAG_A}]=${line}
  103. LOOP_FLAG_A=`expr $LOOP_FLAG_A + 1`
  104. done < ${_WORK_DIR}/tmp/queryloga.log
  105.  
  106. # read log b and create array b
  107. LOOP_FLAG_B=0
  108. ARR_RS_B=()
  109. while read line;
  110. do
  111. ARR_RS_B[${LOOP_FLAG_B}]=${line}
  112. LOOP_FLAG_B=`expr $LOOP_FLAG_B + 1`
  113. done < ${_WORK_DIR}/tmp/querylogb.log
  114.  
  115. # loop the result arr and judge if can send the alarm
  116. for SYSJOB in ${ARR_RS_A[@]}
  117. do
  118. # split the job info
  119. SYSJOB_MINUTE=`echo $SYSJOB | cut -d ',' -f 2`
  120. SYSJOB_NAME=`echo $SYSJOB | cut -d ',' -f 1`
  121. SYSJOB_NAME_SUBJECT=`echo ${SYSJOB_NAME} | cut -d '' -f 2`
  122. SYSJOB_NAME_ID=`echo ${SYSJOB_NAME} | cut -d '' -f 1`
  123.  
  124. # if time before 5 min ago equals job run time,it should be recorded in ARR_RS_B
  125. if [ "${SYSJOB_MINUTE}" = "${JUDGE_MINUTE}" ]; then
  126.  
  127. # if job logged in job table ? 1 : 0
  128. contain "${ARR_RS_B[*]}" "${SYSJOB_NAME}"
  129.  
  130. # 0: job not in tb
  131. if [ "$g_contain_rs" = "0" ]; then
  132. error_msg_001="DC_WEB: MAIL: ERROR:001: ${SYSJOB_NAME_ID}: ${SYSJOB_NAME_SUBJECT}: ${ERROR_CODE_001}"
  133. echo "${error_msg_001}" >> ${_WORK_DIR}/job.log
  134. `sh /opt/DATA/goldmine/src/utils/sms/sms_send.sh "${error_msg_001}"`
  135. insert_job "${SYSJOB_NAME_ID}" "${SYSJOB_NAME_SUBJECT}" "${CUR_DATE}"
  136. else
  137.  
  138. # test...
  139. echo "logging...<${CUR_DATE}> the inspectoscope has logged the job:${SYSJOB_NAME_ID}:${SYSJOB_NAME_SUBJECT}" >> ${_WORK_DIR}/job.log
  140.  
  141. for _LOGTMPJOB in ${ARR_RS_B[@]}
  142. do
  143. LOGJOB_NAME=`echo $_LOGTMPJOB | cut -d ',' -f 1`
  144. LOGJOB_MINUTE=`echo $_LOGTMPJOB | cut -d ',' -f 2`
  145. LOGJOB_DATA_STATUS=`echo $_LOGTMPJOB | cut -d ',' -f 3`
  146. LOGJOB_JOB_STATUS=`echo $_LOGTMPJOB | cut -d ',' -f 4`
  147. LOGJOB_ALARM_STATUS=`echo $_LOGTMPJOB | cut -d ',' -f 5`
  148. LOGJOB_JOB_ID=`echo $LOGJOB_NAME | cut -d '' -f 1`
  149. LOGJOB_JOB_NAME=`echo $LOGJOB_NAME | cut -d '' -f 2`
  150.  
  151. if [ "${SYSJOB_NAME}" = "${LOGJOB_NAME}" ]; then
  152. # LOGJOB_ALARM_STATUS 1:has warned,0:not warnd
  153. if [[ "${LOGJOB_DATA_STATUS}" = "-1" && "${LOGJOB_ALARM_STATUS}" = "0" ]]; then
  154. error_msg_002="DC_WEB: MAIL: ERROR:002: ${LOGJOB_JOB_ID}: ${LOGJOB_JOB_NAME}: ${ERROR_CODE_002}"
  155. echo "${error_msg_002}" >> ${_WORK_DIR}/job.log
  156. `sh /opt/DATA/goldmine/src/utils/sms/sms_send.sh "${error_msg_002}"`
  157. fi
  158. if [[ "${LOGJOB_JOB_STATUS}" = "-1" && "${LOGJOB_ALARM_STATUS}" = "0" ]]; then
  159. error_msg_003="DC_WEB: MAIL: ERROR:003: ${LOGJOB_JOB_ID}: ${LOGJOB_JOB_NAME}: ${ERROR_CODE_003}"
  160. echo "${error_msg_003}" >> ${_WORK_DIR}/job.log
  161. `sh /opt/DATA/goldmine/src/utils/sms/sms_send.sh "${error_msg_003}"`
  162. fi
  163. # update job alarm status
  164. _fun_param_schema_name="alarm_status"
  165. _fun_param_schema_data="1"
  166. update_job_status ${LOGJOB_JOB_ID} ${_fun_param_schema_name} ${_fun_param_schema_data} ${CUR_DATE}
  167.  
  168. # test...
  169. echo "logging...<${CUR_DATE}> the inspectoscope has alarm the job:${LOGJOB_JOB_ID}:${LOGJOB_JOB_NAME}" >> ${_WORK_DIR}/job.log
  170.  
  171. fi
  172. done
  173. fi
  174. fi
  175. done

猜你在找的Bash相关文章