0
0
Fork 0
mirror of https://github.com/alerta/alerta-contrib.git synced 2025-03-17 22:12:38 +00:00
alerta_alerta-contrib/integrations/cloudwatch/cloudwatch.py

157 lines
4.5 KiB
Python
Raw Normal View History

2014-06-15 00:54:46 +00:00
2014-08-12 12:08:03 +00:00
import os
2014-06-15 00:54:46 +00:00
import sys
import json
import time
import datetime
2014-10-13 22:11:22 +00:00
import logging
2014-06-15 00:54:46 +00:00
import boto.sqs
from boto.sqs.message import RawMessage
from boto import exception
2014-08-09 22:18:49 +00:00
from alerta.api import ApiClient
from alerta.alert import Alert
from alerta.heartbeat import Heartbeat
2014-06-15 00:54:46 +00:00
2014-10-13 22:11:22 +00:00
__version__ = '3.3.0'
2014-06-15 00:54:46 +00:00
AWS_SQS_QUEUE = os.environ.get('AWS_SQS_QUEUE')
2014-08-12 12:08:03 +00:00
AWS_ACCESS_KEY_ID = os.environ.get('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY')
AWS_REGION = os.environ.get('AWS_REGION')
2014-06-15 00:54:46 +00:00
2014-08-12 12:08:03 +00:00
AWS_ACCOUNT_ID = {
'101234567890': 'aws-account-name'
}
2014-06-15 00:54:46 +00:00
2014-10-13 22:11:22 +00:00
LOG = logging.getLogger("alerta.cloudwatch")
logging.basicConfig(format="%(asctime)s - %(name)s: %(levelname)s - %(message)s", level=logging.DEBUG)
2014-06-15 00:54:46 +00:00
2014-08-09 22:18:49 +00:00
class CloudWatch(object):
2014-06-15 00:54:46 +00:00
2014-08-09 22:18:49 +00:00
def __init__(self):
2014-06-15 00:54:46 +00:00
2014-10-13 22:11:22 +00:00
self.api = ApiClient()
2014-06-15 00:54:46 +00:00
try:
connection = boto.sqs.connect_to_region(
2014-08-12 12:08:03 +00:00
AWS_REGION,
aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY
2014-06-15 00:54:46 +00:00
)
2014-08-12 12:08:03 +00:00
except boto.exception.SQSError as e:
2014-10-13 22:11:22 +00:00
LOG.error('SQS API call failed: %s', e)
2014-06-15 00:54:46 +00:00
sys.exit(1)
try:
2014-08-12 12:08:03 +00:00
self.sqs = connection.create_queue(AWS_SQS_QUEUE)
self.sqs.set_message_class(RawMessage)
except boto.exception.SQSError as e:
2014-10-13 22:11:22 +00:00
LOG.error('SQS queue error: %s', e)
2014-06-15 00:54:46 +00:00
sys.exit(1)
2014-08-12 12:08:03 +00:00
def run(self):
2014-08-09 22:18:49 +00:00
while True:
2014-10-13 22:11:22 +00:00
LOG.debug('Waiting for CloudWatch alarms on %s...', AWS_SQS_QUEUE)
2014-06-15 00:54:46 +00:00
try:
2014-10-13 22:11:22 +00:00
notification = self.sqs.read(wait_time_seconds=20)
2014-08-12 12:08:03 +00:00
except boto.exception.SQSError as e:
2014-10-13 22:11:22 +00:00
LOG.warning('Could not read from queue: %s', e)
2014-08-12 12:08:03 +00:00
time.sleep(20)
continue
2014-10-13 22:11:22 +00:00
if notification:
cloudwatchAlert = self.parse_notification(notification)
2014-06-15 00:54:46 +00:00
try:
2014-08-12 12:08:03 +00:00
self.api.send(cloudwatchAlert)
except Exception as e:
2014-10-13 22:11:22 +00:00
LOG.warning('Failed to send alert: %s', e)
self.sqs.delete_message(notification)
2014-06-15 00:54:46 +00:00
2014-10-13 22:11:22 +00:00
LOG.debug('Send heartbeat...')
heartbeat = Heartbeat(tags=[__version__])
2014-08-12 12:08:03 +00:00
try:
self.api.send(heartbeat)
except Exception as e:
2014-10-13 22:11:22 +00:00
LOG.warning('Failed to send heartbeat: %s', e)
2014-06-15 00:54:46 +00:00
2014-10-13 22:11:22 +00:00
def parse_notification(self, notification):
2014-06-15 00:54:46 +00:00
2014-10-13 22:11:22 +00:00
notification = json.loads(notification.get_body())
alarm = json.loads(notification['Message'])
2014-06-15 00:54:46 +00:00
if 'Trigger' not in alarm:
return
# Defaults
2014-08-12 12:08:03 +00:00
resource = '%s:%s' % (alarm['Trigger']['Dimensions'][0]['name'], alarm['Trigger']['Dimensions'][0]['value'])
2014-06-15 00:54:46 +00:00
event = alarm['AlarmName']
severity = self.cw_state_to_severity(alarm['NewStateValue'])
group = 'CloudWatch'
2014-10-13 22:11:22 +00:00
value = alarm['Trigger']['MetricName']
text = alarm['AlarmDescription']
2014-08-12 12:08:03 +00:00
service = [AWS_ACCOUNT_ID.get(alarm['AWSAccountId'], 'AWSAccountId:' + alarm['AWSAccountId'])]
2014-10-13 22:11:22 +00:00
tags = [alarm['Trigger']['Namespace']]
2014-06-15 00:54:46 +00:00
correlate = list()
2014-08-12 12:08:03 +00:00
origin = notification['TopicArn']
2014-06-15 00:54:46 +00:00
timeout = None
create_time = datetime.datetime.strptime(notification['Timestamp'], '%Y-%m-%dT%H:%M:%S.%fZ')
raw_data = notification['Message']
cloudwatchAlert = Alert(
resource=resource,
event=event,
correlate=correlate,
group=group,
value=value,
severity=severity,
2014-10-13 22:11:22 +00:00
environment='Production',
2014-06-15 00:54:46 +00:00
service=service,
text=text,
event_type='cloudwatchAlarm',
tags=tags,
attributes={
2014-08-12 12:08:03 +00:00
'awsMessageId': notification['MessageId'],
'awsRegion': alarm['Region'],
'thresholdInfo': alarm['NewStateReason']
2014-06-15 00:54:46 +00:00
},
origin=origin,
timeout=timeout,
create_time=create_time,
raw_data=raw_data,
)
return cloudwatchAlert
@staticmethod
def cw_state_to_severity(state):
if state == 'ALARM':
2014-08-09 22:18:49 +00:00
return 'major'
2014-06-15 00:54:46 +00:00
elif state == 'INSUFFICIENT_DATA':
2014-08-09 22:18:49 +00:00
return 'warning'
2014-06-15 00:54:46 +00:00
elif state == 'OK':
2014-08-09 22:18:49 +00:00
return 'normal'
2014-06-15 00:54:46 +00:00
else:
2014-08-09 22:18:49 +00:00
return 'unknown'
2014-08-11 19:44:54 +00:00
def main():
2014-06-15 00:54:46 +00:00
2014-10-13 22:11:22 +00:00
LOG = logging.getLogger("alerta.cloudwatch")
2014-08-12 12:08:03 +00:00
try:
CloudWatch().run()
except (SystemExit, KeyboardInterrupt):
2014-10-13 22:11:22 +00:00
LOG.info("Exiting alerta cloudwatch.")
sys.exit(0)
except Exception as e:
LOG.error(e, exc_info=1)
sys.exit(1)
if __name__ == '__main__':
main()
2014-08-12 12:08:03 +00:00