plugins/OpenStack/Aodh/alarming.py

   1 # Copyright 2017 Intel Research and Development Ireland Limited
   2 # *************************************************************
   3
   4 # This file is part of OSM Monitoring module
   5 # All Rights Reserved to Intel Corporation
   6
   7 # Licensed under the Apache License, Version 2.0 (the "License"); you may
   8 # not use this file except in compliance with the License. You may obtain
   9 # a copy of the License at
  10
  11 #         http://www.apache.org/licenses/LICENSE-2.0
  12
  13 # Unless required by applicable law or agreed to in writing, software
  14 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  15 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  16 # License for the specific language governing permissions and limitations
  17 # under the License.
  18
  19 # For those usages not covered by the Apache License, Version 2.0 please
  20 # contact: helena.mcgough@intel.com or adrian.hoban@intel.com
  21 ##
  22 """Carry out alarming requests via Aodh API."""
  23
  24 import json
  25 import logging
  26 log = logging.getLogger(__name__)
  27
  28 from core.message_bus.producer import KafkaProducer
  29
  30 from kafka import KafkaConsumer
  31
  32 from plugins.OpenStack.common import Common
  33 from plugins.OpenStack.response import OpenStack_Response
  34
  35 __author__ = "Helena McGough"
  36
  37 ALARM_NAMES = {
  38     "average_memory_usage_above_threshold": "average_memory_utilization",
  39     "disk_read_ops": "disk_read_ops",
  40     "disk_write_ops": "disk_write_ops",
  41     "disk_read_bytes": "disk_read_bytes",
  42     "disk_write_bytes": "disk_write_bytes",
  43     "net_packets_dropped": "packets_dropped",
  44     "packets_in_above_threshold": "packets_received",
  45     "packets_out_above_threshold": "packets_sent",
  46     "cpu_utilization_above_threshold": "cpu_utilization"}
  47
  48 SEVERITIES = {
  49     "warning": "low",
  50     "minor": "low",
  51     "major": "moderate",
  52     "critical": "critical",
  53     "indeterminate": "critical"}
  54
  55 STATISTICS = {
  56     "average": "avg",
  57     "minimum": "min",
  58     "maximum": "max",
  59     "count": "count",
  60     "sum": "sum"}
  61
  62
  63 class Alarming(object):
  64     """Carries out alarming requests and responses via Aodh API."""
  65
  66     def __init__(self):
  67         """Create the OpenStack alarming instance."""
  68         self._common = Common()
  69
  70         # TODO(mcgoughh): Remove hardcoded kafkaconsumer
  71         # Initialize a generic consumer object to consume message from the SO
  72         server = {'server': 'localhost:9092', 'topic': 'alarm_request'}
  73         self._consumer = KafkaConsumer(server['topic'],
  74                                        group_id='osm_mon',
  75                                        bootstrap_servers=server['server'])
  76
  77         # Use the Response class to generate valid json response messages
  78         self._response = OpenStack_Response()
  79
  80         # Initializer a producer to send responses back to SO
  81         self._producer = KafkaProducer("alarm_response")
  82
  83     def alarming(self):
  84         """Consume info from the message bus to manage alarms."""
  85         # Check the alarming functionlity that needs to be performed
  86         for message in self._consumer:
  87
  88             values = json.loads(message.value)
  89             vim_type = values['vim_type'].lower()
  90
  91             if vim_type == "openstack":
  92                 log.info("Alarm action required: %s" % (message.topic))
  93
  94                 # Generate and auth_token and endpoint for request
  95                 auth_token, endpoint = self.authenticate()
  96
  97                 if message.key == "create_alarm_request":
  98                     # Configure/Update an alarm
  99                     alarm_details = values['alarm_create_request']
 100
 101                     alarm_id, alarm_status = self.configure_alarm(
 102                         endpoint, auth_token, alarm_details)
 103
 104                     # Generate a valid response message, send via producer
 105                     try:
 106                         if alarm_status is True:
 107                             log.info("Alarm successfully created")
 108
 109                         resp_message = self._response.generate_response(
 110                             'create_alarm_response', status=alarm_status,
 111                             alarm_id=alarm_id,
 112                             cor_id=alarm_details['correlation_id'])
 113                         log.info("Response Message: %s", resp_message)
 114                         self._producer.create_alarm_response(
 115                             'create_alarm_resonse', resp_message,
 116                             'alarm_response')
 117                     except Exception as exc:
 118                         log.warn("Response creation failed: %s", exc)
 119
 120                 elif message.key == "list_alarm_request":
 121                     # Check for a specifed: alarm_name, resource_uuid, severity
 122                     # and generate the appropriate list
 123                     list_details = values['alarm_list_request']
 124
 125                     alarm_list = self.list_alarms(
 126                         endpoint, auth_token, list_details)
 127
 128                     try:
 129                         # Generate and send a list response back
 130                         resp_message = self._response.generate_response(
 131                             'list_alarm_response', alarm_list=alarm_list,
 132                             cor_id=list_details['correlation_id'])
 133                         log.info("Response Message: %s", resp_message)
 134                         self._producer.list_alarm_response(
 135                             'list_alarm_response', resp_message,
 136                             'alarm_response')
 137                     except Exception as exc:
 138                         log.warn("Failed to send a valid response back.")
 139
 140                 elif message.key == "delete_alarm_request":
 141                     request_details = values['alarm_delete_request']
 142                     alarm_id = request_details['alarm_uuid']
 143
 144                     resp_status = self.delete_alarm(
 145                         endpoint, auth_token, alarm_id)
 146
 147                     # Generate and send a response message
 148                     try:
 149                         resp_message = self._response.generate_response(
 150                             'delete_alarm_response', alarm_id=alarm_id,
 151                             status=resp_status,
 152                             cor_id=request_details['correlation_id'])
 153                         log.info("Response message: %s", resp_message)
 154                         self._producer.delete_alarm_response(
 155                             'delete_alarm_response', resp_message,
 156                             'alarm_response')
 157                     except Exception as exc:
 158                         log.warn("Failed to create delete reponse:%s", exc)
 159
 160                 elif message.key == "acknowledge_alarm":
 161                     # Acknowledge that an alarm has been dealt with by the SO
 162                     alarm_id = values['ack_details']['alarm_uuid']
 163
 164                     response = self.update_alarm_state(
 165                         endpoint, auth_token, alarm_id)
 166
 167                     # Log if an alarm was reset
 168                     if response is True:
 169                         log.info("Acknowledged the alarm and cleared it.")
 170                     else:
 171                         log.warn("Failed to acknowledge/clear the alarm.")
 172
 173                 elif message.key == "update_alarm_request":
 174                     # Update alarm configurations
 175                     alarm_details = values['alarm_update_request']
 176
 177                     alarm_id, status = self.update_alarm(
 178                         endpoint, auth_token, alarm_details)
 179
 180                     # Generate a response for an update request
 181                     try:
 182                         resp_message = self._response.generate_response(
 183                             'update_alarm_response', alarm_id=alarm_id,
 184                             cor_id=alarm_details['correlation_id'],
 185                             status=status)
 186                         log.info("Response message: %s", resp_message)
 187                         self._producer.update_alarm_response(
 188                             'update_alarm_response', resp_message,
 189                             'alarm_response')
 190                     except Exception as exc:
 191                         log.warn("Failed to send an update response:%s", exc)
 192
 193                 else:
 194                     log.debug("Unknown key, no action will be performed")
 195             else:
 196                 log.info("Message topic not relevant to this plugin: %s",
 197                          message.topic)
 198
 199         return
 200
 201     def configure_alarm(self, endpoint, auth_token, values):
 202         """Create requested alarm in Aodh."""
 203         url = "{}/v2/alarms/".format(endpoint)
 204
 205         # Check if the desired alarm is supported
 206         alarm_name = values['alarm_name'].lower()
 207         metric_name = values['metric_name'].lower()
 208         resource_id = values['resource_uuid']
 209
 210         if alarm_name not in ALARM_NAMES.keys():
 211             log.warn("This alarm is not supported, by a valid metric.")
 212             return None, False
 213         if ALARM_NAMES[alarm_name] != metric_name:
 214             log.warn("This is not the correct metric for this alarm.")
 215             return None, False
 216
 217         # Check for the required metric
 218         metric_id = self.check_for_metric(auth_token, metric_name, resource_id)
 219
 220         try:
 221             if metric_id is not None:
 222                 # Create the alarm if metric is available
 223                 payload = self.check_payload(values, metric_name, resource_id,
 224                                              alarm_name)
 225                 new_alarm = self._common._perform_request(
 226                     url, auth_token, req_type="post", payload=payload)
 227                 return json.loads(new_alarm.text)['alarm_id'], True
 228             else:
 229                 log.warn("The required Gnocchi metric does not exist.")
 230                 return None, False
 231
 232         except Exception as exc:
 233             log.warn("Failed to create the alarm: %s", exc)
 234         return None, False
 235
 236     def delete_alarm(self, endpoint, auth_token, alarm_id):
 237         """Delete alarm function."""
 238         url = "{}/v2/alarms/%s".format(endpoint) % (alarm_id)
 239
 240         try:
 241             result = self._common._perform_request(
 242                 url, auth_token, req_type="delete")
 243             if str(result.status_code) == "404":
 244                 log.info("Alarm doesn't exist: %s", result.status_code)
 245                 # If status code is 404 alarm did not exist
 246                 return False
 247             else:
 248                 return True
 249
 250         except Exception as exc:
 251             log.warn("Failed to delete alarm: %s because %s.", alarm_id, exc)
 252         return False
 253
 254     def list_alarms(self, endpoint, auth_token, list_details):
 255         """Generate the requested list of alarms."""
 256         url = "{}/v2/alarms/".format(endpoint)
 257         a_list, name_list, sev_list, res_list = [], [], [], []
 258
 259         # TODO(mcgoughh): for now resource_id is a mandatory field
 260         resource = list_details['resource_uuid']
 261
 262         # Checking what fields are specified for a list request
 263         try:
 264             name = list_details['alarm_name'].lower()
 265             if name not in ALARM_NAMES.keys():
 266                 log.warn("This alarm is not supported, won't be used!")
 267                 name = None
 268         except KeyError as exc:
 269             log.info("Alarm name isn't specified.")
 270             name = None
 271
 272         try:
 273             severity = list_details['severity'].lower()
 274             sev = SEVERITIES[severity]
 275         except KeyError as exc:
 276             log.info("Severity is unspecified/incorrectly configured")
 277             sev = None
 278
 279         # Perform the request to get the desired list
 280         try:
 281             result = self._common._perform_request(
 282                 url, auth_token, req_type="get")
 283
 284             if result is not None:
 285                 # Get list based on resource id
 286                 for alarm in json.loads(result.text):
 287                     rule = alarm['gnocchi_resources_threshold_rule']
 288                     if resource == rule['resource_id']:
 289                         res_list.append(str(alarm))
 290                     if not res_list:
 291                         log.info("No alarms for this resource")
 292                         return a_list
 293
 294                 # Generate specified listed if requested
 295                 if name is not None and sev is not None:
 296                     log.info("Return a list of %s alarms with %s severity.",
 297                              name, sev)
 298                     for alarm in json.loads(result.text):
 299                         if name == alarm['name']:
 300                             name_list.append(str(alarm))
 301                     for alarm in json.loads(result.text):
 302                         if sev == alarm['severity']:
 303                             sev_list.append(str(alarm))
 304                     name_sev_list = list(set(name_list).intersection(sev_list))
 305                     a_list = list(set(name_sev_list).intersection(res_list))
 306                 elif name is not None:
 307                     log.info("Returning a %s list of alarms.", name)
 308                     for alarm in json.loads(result.text):
 309                         if name == alarm['name']:
 310                             name_list.append(str(alarm))
 311                     a_list = list(set(name_list).intersection(res_list))
 312                 elif sev is not None:
 313                     log.info("Returning %s severity alarm list.", sev)
 314                     for alarm in json.loads(result.text):
 315                         if sev == alarm['severity']:
 316                             sev_list.append(str(alarm))
 317                     a_list = list(set(sev_list).intersection(res_list))
 318                 else:
 319                     log.info("Returning an entire list of alarms.")
 320                     a_list = res_list
 321             else:
 322                 log.info("There are no alarms!")
 323
 324         except Exception as exc:
 325             log.info("Failed to generate required list: %s", exc)
 326             return None
 327
 328         return a_list
 329
 330     def update_alarm_state(self, endpoint, auth_token, alarm_id):
 331         """Set the state of an alarm to ok when ack message is received."""
 332         url = "{}/v2/alarms/%s/state".format(endpoint) % alarm_id
 333         payload = json.dumps("ok")
 334
 335         try:
 336             self._common._perform_request(
 337                 url, auth_token, req_type="put", payload=payload)
 338             return True
 339         except Exception as exc:
 340             log.warn("Unable to update alarm state: %s", exc)
 341         return False
 342
 343     def update_alarm(self, endpoint, auth_token, values):
 344         """Get alarm name for an alarm configuration update."""
 345         # Get already existing alarm details
 346         url = "{}/v2/alarms/%s".format(endpoint) % values['alarm_uuid']
 347
 348         # Gets current configurations about the alarm
 349         try:
 350             result = self._common._perform_request(
 351                 url, auth_token, req_type="get")
 352             alarm_name = json.loads(result.text)['name']
 353             rule = json.loads(result.text)['gnocchi_resources_threshold_rule']
 354             alarm_state = json.loads(result.text)['state']
 355             resource_id = rule['resource_id']
 356             metric_name = rule['metric']
 357         except Exception as exc:
 358             log.warn("Failed to retreive existing alarm info: %s.\
 359                      Can only update OSM alarms.", exc)
 360             return None, False
 361
 362         # Generates and check payload configuration for alarm update
 363         payload = self.check_payload(values, metric_name, resource_id,
 364                                      alarm_name, alarm_state=alarm_state)
 365
 366         # Updates the alarm configurations with the valid payload
 367         if payload is not None:
 368             try:
 369                 update_alarm = self._common._perform_request(
 370                     url, auth_token, req_type="put", payload=payload)
 371
 372                 return json.loads(update_alarm.text)['alarm_id'], True
 373             except Exception as exc:
 374                 log.warn("Alarm update could not be performed: %s", exc)
 375                 return None, False
 376         return None, False
 377
 378     def check_payload(self, values, metric_name, resource_id,
 379                       alarm_name, alarm_state=None):
 380         """Check that the payload is configuration for update/create alarm."""
 381         try:
 382             # Check state and severity
 383             severity = values['severity'].lower()
 384             if severity == "indeterminate":
 385                 alarm_state = "insufficient data"
 386             if alarm_state is None:
 387                 alarm_state = "ok"
 388
 389             statistic = values['statistic'].lower()
 390             # Try to configure the payload for the update/create request
 391             # Can only update: threshold, operation, statistic and
 392             # the severity of the alarm
 393             rule = {'threshold': values['threshold_value'],
 394                     'comparison_operator': values['operation'].lower(),
 395                     'metric': metric_name,
 396                     'resource_id': resource_id,
 397                     'resource_type': 'generic',
 398                     'aggregation_method': STATISTICS[statistic]}
 399             payload = json.dumps({'state': alarm_state,
 400                                   'name': alarm_name,
 401                                   'severity': SEVERITIES[severity],
 402                                   'type': 'gnocchi_resources_threshold',
 403                                   'gnocchi_resources_threshold_rule': rule, })
 404             return payload
 405         except KeyError as exc:
 406             log.warn("Alarm is not configured correctly: %s", exc)
 407         return None
 408
 409     def authenticate(self):
 410         """Generate an authentication token and endpoint for alarm request."""
 411         try:
 412             # Check for a tenant_id
 413             auth_token = self._common._authenticate()
 414             endpoint = self._common.get_endpoint("alarming")
 415             return auth_token, endpoint
 416         except Exception as exc:
 417             log.warn("Authentication to Keystone failed:%s", exc)
 418         return None, None
 419
 420     def get_alarm_state(self, endpoint, auth_token, alarm_id):
 421         """Get the state of the alarm."""
 422         url = "{}/v2/alarms/%s/state".format(endpoint) % alarm_id
 423
 424         try:
 425             alarm_state = self._common._perform_request(
 426                 url, auth_token, req_type="get")
 427             return json.loads(alarm_state.text)
 428         except Exception as exc:
 429             log.warn("Failed to get the state of the alarm:%s", exc)
 430         return None
 431
 432     def check_for_metric(self, auth_token, m_name, r_id):
 433         """Check for the alarm metric."""
 434         try:
 435             endpoint = self._common.get_endpoint("metric")
 436
 437             url = "{}/v1/metric/".format(endpoint)
 438             metric_list = self._common._perform_request(
 439                 url, auth_token, req_type="get")
 440
 441             for metric in json.loads(metric_list.text):
 442                 name = metric['name']
 443                 resource = metric['resource_id']
 444                 if (name == m_name and resource == r_id):
 445                     metric_id = metric['id']
 446             log.info("The required metric exists, an alarm will be created.")
 447             return metric_id
 448         except Exception as exc:
 449             log.info("Desired Gnocchi metric not found:%s", exc)
 450         return None