plugins/OpenStack/Aodh/alarming.py

   1 # Copyright 2017 Intel Research and Development Ireland Limited
   2 # *************************************************************
   3
   4 # This file is part of OSM Monitoring module
   5 # All Rights Reserved to Intel Corporation
   6
   7 # Licensed under the Apache License, Version 2.0 (the "License"); you may
   8 # not use this file except in compliance with the License. You may obtain
   9 # a copy of the License at
  10
  11 #         http://www.apache.org/licenses/LICENSE-2.0
  12
  13 # Unless required by applicable law or agreed to in writing, software
  14 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  15 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  16 # License for the specific language governing permissions and limitations
  17 # under the License.
  18
  19 # For those usages not covered by the Apache License, Version 2.0 please
  20 # contact: helena.mcgough@intel.com or adrian.hoban@intel.com
  21 ##
  22 """Carry out alarming requests via Aodh API."""
  23
  24 import json
  25 import logging as log
  26
  27 from core.message_bus.producer import KafkaProducer
  28
  29 from kafka import KafkaConsumer
  30
  31 from plugins.OpenStack.common import Common
  32 from plugins.OpenStack.response import OpenStack_Response
  33
  34 __author__ = "Helena McGough"
  35
  36 ALARM_NAMES = {
  37     "average_memory_usage_above_threshold": "average_memory_utilization",
  38     "disk_read_ops": "disk_read_ops",
  39     "disk_write_ops": "disk_write_ops",
  40     "disk_read_bytes": "disk_read_bytes",
  41     "disk_write_bytes": "disk_write_bytes",
  42     "net_packets_dropped": "packets_dropped",
  43     "packets_in_above_threshold": "packets_received",
  44     "packets_out_above_threshold": "packets_sent",
  45     "cpu_utilization_above_threshold": "cpu_utilization"}
  46
  47 SEVERITIES = {
  48     "warning": "low",
  49     "minor": "low",
  50     "major": "moderate",
  51     "critical": "critical",
  52     "indeterminate": "critical"}
  53
  54 STATISTICS = {
  55     "average": "avg",
  56     "minimum": "min",
  57     "maximum": "max",
  58     "count": "count",
  59     "sum": "sum"}
  60
  61
  62 class Alarming(object):
  63     """Carries out alarming requests and responses via Aodh API."""
  64
  65     def __init__(self):
  66         """Create the OpenStack alarming instance."""
  67         self._common = Common()
  68
  69         # TODO(mcgoughh): Remove hardcoded kafkaconsumer
  70         # Initialize a generic consumer object to consume message from the SO
  71         server = {'server': 'localhost:9092', 'topic': 'alarm_request'}
  72         self._consumer = KafkaConsumer(server['topic'],
  73                                        group_id='osm_mon',
  74                                        bootstrap_servers=server['server'])
  75
  76         # Use the Response class to generate valid json response messages
  77         self._response = OpenStack_Response()
  78
  79         # Initializer a producer to send responses back to SO
  80         self._producer = KafkaProducer("alarm_response")
  81
  82     def alarming(self):
  83         """Consume info from the message bus to manage alarms."""
  84         # Check the alarming functionlity that needs to be performed
  85         for message in self._consumer:
  86
  87             values = json.loads(message.value)
  88             vim_type = values['vim_type'].lower()
  89
  90             if vim_type == "openstack":
  91                 log.info("Alarm action required: %s" % (message.topic))
  92
  93                 # Generate and auth_token and endpoint for request
  94                 auth_token, endpoint = self.authenticate()
  95
  96                 if message.key == "create_alarm_request":
  97                     # Configure/Update an alarm
  98                     alarm_details = values['alarm_create_request']
  99
 100                     alarm_id, alarm_status = self.configure_alarm(
 101                         endpoint, auth_token, alarm_details)
 102
 103                     # Generate a valid response message, send via producer
 104                     try:
 105                         resp_message = self._response.generate_response(
 106                             'create_alarm_response', status=alarm_status,
 107                             alarm_id=alarm_id,
 108                             cor_id=alarm_details['correlation_id'])
 109                         self._producer.create_alarm_response(
 110                             'create_alarm_resonse', resp_message,
 111                             'alarm_response')
 112                     except Exception as exc:
 113                         log.warn("Response creation failed: %s", exc)
 114
 115                 elif message.key == "list_alarm_request":
 116                     # Check for a specifed: alarm_name, resource_uuid, severity
 117                     # and generate the appropriate list
 118                     list_details = values['alarm_list_request']
 119                     try:
 120                         name = list_details['alarm_name'].lower()
 121                         alarm_list = self.list_alarms(
 122                             endpoint, auth_token, alarm_name=name)
 123                     except Exception as a_name:
 124                         log.debug("No name specified for list:%s", a_name)
 125                         try:
 126                             resource = list_details['resource_uuid']
 127                             alarm_list = self.list_alarms(
 128                                 endpoint, auth_token, resource_id=resource)
 129                         except Exception as r_id:
 130                             log.debug("No resource id specified for this list:\
 131                                        %s", r_id)
 132                             try:
 133                                 severe = list_details['severity'].lower()
 134                                 alarm_list = self.list_alarms(
 135                                     endpoint, auth_token, severity=severe)
 136                             except Exception as exc:
 137                                 log.info("No severity specified for list: %s.\
 138                                            will return full list.", exc)
 139                                 alarm_list = self.list_alarms(
 140                                     endpoint, auth_token)
 141
 142                     try:
 143                         # Generate and send a list response back
 144                         resp_message = self._response.generate_response(
 145                             'list_alarm_response', alarm_list=alarm_list,
 146                             cor_id=list_details['correlation_id'])
 147                         self._producer.list_alarm_response(
 148                             'list_alarm_response', resp_message,
 149                             'alarm_response')
 150                     except Exception as exc:
 151                         log.warn("Failed to send a valid response back.")
 152
 153                 elif message.key == "delete_alarm_request":
 154                     request_details = values['alarm_delete_request']
 155                     alarm_id = request_details['alarm_uuid']
 156
 157                     resp_status = self.delete_alarm(
 158                         endpoint, auth_token, alarm_id)
 159
 160                     # Generate and send a response message
 161                     try:
 162                         resp_message = self._response.generate_response(
 163                             'delete_alarm_response', alarm_id=alarm_id,
 164                             status=resp_status,
 165                             cor_id=request_details['correlation_id'])
 166                         self._producer.delete_alarm_response(
 167                             'delete_alarm_response', resp_message,
 168                             'alarm_response')
 169                     except Exception as exc:
 170                         log.warn("Failed to create delete reponse:%s", exc)
 171
 172                 elif message.key == "acknowledge_alarm":
 173                     # Acknowledge that an alarm has been dealt with by the SO
 174                     alarm_id = values['ack_details']['alarm_uuid']
 175
 176                     response = self.update_alarm_state(
 177                         endpoint, auth_token, alarm_id)
 178
 179                     # Log if an alarm was reset
 180                     if response is True:
 181                         log.info("Acknowledged the alarm and cleared it.")
 182                     else:
 183                         log.warn("Failed to acknowledge/clear the alarm.")
 184
 185                 elif message.key == "update_alarm_request":
 186                     # Update alarm configurations
 187                     alarm_details = values['alarm_update_request']
 188
 189                     alarm_id, status = self.update_alarm(
 190                         endpoint, auth_token, alarm_details)
 191
 192                     # Generate a response for an update request
 193                     try:
 194                         resp_message = self._response.generate_response(
 195                             'update_alarm_response', alarm_id=alarm_id,
 196                             cor_id=alarm_details['correlation_id'],
 197                             status=status)
 198                         self._producer.update_alarm_response(
 199                             'update_alarm_response', resp_message,
 200                             'alarm_response')
 201                     except Exception as exc:
 202                         log.warn("Failed to send an update response:%s", exc)
 203
 204                 else:
 205                     log.debug("Unknown key, no action will be performed")
 206             else:
 207                 log.info("Message topic not relevant to this plugin: %s",
 208                          message.topic)
 209
 210         return
 211
 212     def configure_alarm(self, endpoint, auth_token, values):
 213         """Create requested alarm in Aodh."""
 214         url = "{}/v2/alarms/".format(endpoint)
 215
 216         # Check if the desired alarm is supported
 217         alarm_name = values['alarm_name'].lower()
 218         metric_name = values['metric_name'].lower()
 219         resource_id = values['resource_uuid']
 220
 221         if alarm_name not in ALARM_NAMES.keys():
 222             log.warn("This alarm is not supported, by a valid metric.")
 223             return None, False
 224         if ALARM_NAMES[alarm_name] != metric_name:
 225             log.warn("This is not the correct metric for this alarm.")
 226             return None, False
 227
 228         # Check for the required metric
 229         metric_id = self.check_for_metric(auth_token, metric_name, resource_id)
 230
 231         try:
 232             if metric_id is not None:
 233                 # Create the alarm if metric is available
 234                 payload = self.check_payload(values, metric_name, resource_id,
 235                                              alarm_name)
 236                 new_alarm = self._common._perform_request(
 237                     url, auth_token, req_type="post", payload=payload)
 238                 return json.loads(new_alarm.text)['alarm_id'], True
 239             else:
 240                 log.warn("The required Gnocchi metric does not exist.")
 241                 return None, False
 242
 243         except Exception as exc:
 244             log.warn("Failed to create the alarm: %s", exc)
 245         return None, False
 246
 247     def delete_alarm(self, endpoint, auth_token, alarm_id):
 248         """Delete alarm function."""
 249         url = "{}/v2/alarms/%s".format(endpoint) % (alarm_id)
 250
 251         try:
 252             result = self._common._perform_request(
 253                 url, auth_token, req_type="delete")
 254             if str(result.status_code) == "404":
 255                 # If status code is 404 alarm did not exist
 256                 return False
 257             else:
 258                 return True
 259
 260         except Exception as exc:
 261             log.warn("Failed to delete alarm: %s because %s.", alarm_id, exc)
 262         return False
 263
 264     def list_alarms(self, endpoint, auth_token,
 265                     alarm_name=None, resource_id=None, severity=None):
 266         """Generate the requested list of alarms."""
 267         url = "{}/v2/alarms/".format(endpoint)
 268         alarm_list = []
 269
 270         result = self._common._perform_request(
 271             url, auth_token, req_type="get")
 272         if result is not None:
 273             # Check for a specified list based on:
 274             # alarm_name, severity, resource_id
 275             if alarm_name is not None:
 276                 for alarm in json.loads(result.text):
 277                     if alarm_name in str(alarm):
 278                         alarm_list.append(str(alarm))
 279             elif resource_id is not None:
 280                 for alarm in json.loads(result.text):
 281                     if resource_id in str(alarm):
 282                         alarm_list.append(str(alarm))
 283             elif severity is not None:
 284                 for alarm in json.loads(result.text):
 285                     if severity in str(alarm):
 286                         alarm_list.append(str(alarm))
 287             else:
 288                 alarm_list = result.text
 289         else:
 290             return None
 291         return alarm_list
 292
 293     def update_alarm_state(self, endpoint, auth_token, alarm_id):
 294         """Set the state of an alarm to ok when ack message is received."""
 295         url = "{}/v2/alarms/%s/state".format(endpoint) % alarm_id
 296         payload = json.dumps("ok")
 297
 298         try:
 299             self._common._perform_request(
 300                 url, auth_token, req_type="put", payload=payload)
 301             return True
 302         except Exception as exc:
 303             log.warn("Unable to update alarm state: %s", exc)
 304         return False
 305
 306     def update_alarm(self, endpoint, auth_token, values):
 307         """Get alarm name for an alarm configuration update."""
 308         # Get already existing alarm details
 309         url = "{}/v2/alarms/%s".format(endpoint) % values['alarm_uuid']
 310
 311         # Gets current configurations about the alarm
 312         try:
 313             result = self._common._perform_request(
 314                 url, auth_token, req_type="get")
 315             alarm_name = json.loads(result.text)['name']
 316             rule = json.loads(result.text)['gnocchi_resources_threshold_rule']
 317             alarm_state = json.loads(result.text)['state']
 318             resource_id = rule['resource_id']
 319             metric_name = rule['metric']
 320         except Exception as exc:
 321             log.warn("Failed to retreive existing alarm info: %s.\
 322                      Can only update OSM alarms.", exc)
 323             return None, False
 324
 325         # Generates and check payload configuration for alarm update
 326         payload = self.check_payload(values, metric_name, resource_id,
 327                                      alarm_name, alarm_state=alarm_state)
 328
 329         # Updates the alarm configurations with the valid payload
 330         if payload is not None:
 331             try:
 332                 update_alarm = self._common._perform_request(
 333                     url, auth_token, req_type="put", payload=payload)
 334
 335                 return json.loads(update_alarm.text)['alarm_id'], True
 336             except Exception as exc:
 337                 log.warn("Alarm update could not be performed: %s", exc)
 338                 return None, False
 339         return None, False
 340
 341     def check_payload(self, values, metric_name, resource_id,
 342                       alarm_name, alarm_state=None):
 343         """Check that the payload is configuration for update/create alarm."""
 344         try:
 345             # Check state and severity
 346             severity = values['severity'].lower()
 347             if severity == "indeterminate":
 348                 alarm_state = "insufficient data"
 349             if alarm_state is None:
 350                 alarm_state = "ok"
 351
 352             statistic = values['statistic'].lower()
 353             # Try to configure the payload for the update/create request
 354             # Can only update: threshold, operation, statistic and
 355             # the severity of the alarm
 356             rule = {'threshold': values['threshold_value'],
 357                     'comparison_operator': values['operation'].lower(),
 358                     'metric': metric_name,
 359                     'resource_id': resource_id,
 360                     'resource_type': 'generic',
 361                     'aggregation_method': STATISTICS[statistic]}
 362             payload = json.dumps({'state': alarm_state,
 363                                   'name': alarm_name,
 364                                   'severity': SEVERITIES[severity],
 365                                   'type': 'gnocchi_resources_threshold',
 366                                   'gnocchi_resources_threshold_rule': rule, })
 367             return payload
 368         except KeyError as exc:
 369             log.warn("Alarm is not configured correctly: %s", exc)
 370         return None
 371
 372     def authenticate(self):
 373         """Generate an authentication token and endpoint for alarm request."""
 374         try:
 375             # Check for a tenant_id
 376             auth_token = self._common._authenticate()
 377             endpoint = self._common.get_endpoint("alarming")
 378             return auth_token, endpoint
 379         except Exception as exc:
 380             log.warn("Authentication to Keystone failed:%s", exc)
 381         return None, None
 382
 383     def get_alarm_state(self, endpoint, auth_token, alarm_id):
 384         """Get the state of the alarm."""
 385         url = "{}/v2/alarms/%s/state".format(endpoint) % alarm_id
 386
 387         try:
 388             alarm_state = self._common._perform_request(
 389                 url, auth_token, req_type="get")
 390             return json.loads(alarm_state.text)
 391         except Exception as exc:
 392             log.warn("Failed to get the state of the alarm:%s", exc)
 393         return None
 394
 395     def check_for_metric(self, auth_token, m_name, r_id):
 396         """Check for the alarm metric."""
 397         try:
 398             endpoint = self._common.get_endpoint("metric")
 399
 400             url = "{}/v1/metric/".format(endpoint)
 401             metric_list = self._common._perform_request(
 402                 url, auth_token, req_type="get")
 403
 404             for metric in json.loads(metric_list.text):
 405                 name = metric['name']
 406                 resource = metric['resource_id']
 407                 if (name == m_name and resource == r_id):
 408                     metric_id = metric['id']
 409             log.info("The required metric exists, an alarm will be created.")
 410             return metric_id
 411         except Exception as exc:
 412             log.info("Desired Gnocchi metric not found:%s", exc)
 413         return None