plugins/OpenStack/Aodh/alarming.py

   1 # Copyright 2017 Intel Research and Development Ireland Limited
   2 # *************************************************************
   3
   4 # This file is part of OSM Monitoring module
   5 # All Rights Reserved to Intel Corporation
   6
   7 # Licensed under the Apache License, Version 2.0 (the "License"); you may
   8 # not use this file except in compliance with the License. You may obtain
   9 # a copy of the License at
  10
  11 #         http://www.apache.org/licenses/LICENSE-2.0
  12
  13 # Unless required by applicable law or agreed to in writing, software
  14 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  15 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  16 # License for the specific language governing permissions and limitations
  17 # under the License.
  18
  19 # For those usages not covered by the Apache License, Version 2.0 please
  20 # contact: helena.mcgough@intel.com or adrian.hoban@intel.com
  21 ##
  22 """Carry out alarming requests via Aodh API."""
  23
  24 import json
  25 import logging
  26 log = logging.getLogger(__name__)
  27
  28 from core.message_bus.producer import KafkaProducer
  29
  30 from kafka import KafkaConsumer
  31
  32 from plugins.OpenStack.common import Common
  33 from plugins.OpenStack.response import OpenStack_Response
  34
  35 __author__ = "Helena McGough"
  36
  37 ALARM_NAMES = {
  38     "average_memory_usage_above_threshold": "average_memory_utilization",
  39     "disk_read_ops": "disk_read_ops",
  40     "disk_write_ops": "disk_write_ops",
  41     "disk_read_bytes": "disk_read_bytes",
  42     "disk_write_bytes": "disk_write_bytes",
  43     "net_packets_dropped": "packets_dropped",
  44     "packets_in_above_threshold": "packets_received",
  45     "packets_out_above_threshold": "packets_sent",
  46     "cpu_utilization_above_threshold": "cpu_utilization"}
  47
  48 SEVERITIES = {
  49     "warning": "low",
  50     "minor": "low",
  51     "major": "moderate",
  52     "critical": "critical",
  53     "indeterminate": "critical"}
  54
  55 STATISTICS = {
  56     "average": "avg",
  57     "minimum": "min",
  58     "maximum": "max",
  59     "count": "count",
  60     "sum": "sum"}
  61
  62
  63 class Alarming(object):
  64     """Carries out alarming requests and responses via Aodh API."""
  65
  66     def __init__(self):
  67         """Create the OpenStack alarming instance."""
  68         self._common = Common()
  69
  70         # TODO(mcgoughh): Remove hardcoded kafkaconsumer
  71         # Initialize a generic consumer object to consume message from the SO
  72         server = {'server': 'localhost:9092', 'topic': 'alarm_request'}
  73         self._consumer = KafkaConsumer(server['topic'],
  74                                        group_id='osm_mon',
  75                                        bootstrap_servers=server['server'])
  76
  77         # Use the Response class to generate valid json response messages
  78         self._response = OpenStack_Response()
  79
  80         # Initializer a producer to send responses back to SO
  81         self._producer = KafkaProducer("alarm_response")
  82
  83     def alarming(self):
  84         """Consume info from the message bus to manage alarms."""
  85         # Check the alarming functionlity that needs to be performed
  86         for message in self._consumer:
  87
  88             values = json.loads(message.value)
  89             vim_type = values['vim_type'].lower()
  90
  91             if vim_type == "openstack":
  92                 log.info("Alarm action required: %s" % (message.topic))
  93
  94                 # Generate and auth_token and endpoint for request
  95                 auth_token, endpoint = self.authenticate()
  96
  97                 if message.key == "create_alarm_request":
  98                     # Configure/Update an alarm
  99                     alarm_details = values['alarm_create_request']
 100
 101                     alarm_id, alarm_status = self.configure_alarm(
 102                         endpoint, auth_token, alarm_details)
 103
 104                     # Generate a valid response message, send via producer
 105                     try:
 106                         if alarm_status is True:
 107                             log.info("Alarm successfully created")
 108
 109                         resp_message = self._response.generate_response(
 110                             'create_alarm_response', status=alarm_status,
 111                             alarm_id=alarm_id,
 112                             cor_id=alarm_details['correlation_id'])
 113                         self._producer.create_alarm_response(
 114                             'create_alarm_resonse', resp_message,
 115                             'alarm_response')
 116                     except Exception as exc:
 117                         log.warn("Response creation failed: %s", exc)
 118
 119                 elif message.key == "list_alarm_request":
 120                     # Check for a specifed: alarm_name, resource_uuid, severity
 121                     # and generate the appropriate list
 122                     list_details = values['alarm_list_request']
 123
 124                     alarm_list = self.list_alarms(
 125                         endpoint, auth_token, list_details)
 126
 127                     try:
 128                         # Generate and send a list response back
 129                         resp_message = self._response.generate_response(
 130                             'list_alarm_response', alarm_list=alarm_list,
 131                             cor_id=list_details['correlation_id'])
 132                         self._producer.list_alarm_response(
 133                             'list_alarm_response', resp_message,
 134                             'alarm_response')
 135                     except Exception as exc:
 136                         log.warn("Failed to send a valid response back.")
 137
 138                 elif message.key == "delete_alarm_request":
 139                     request_details = values['alarm_delete_request']
 140                     alarm_id = request_details['alarm_uuid']
 141
 142                     resp_status = self.delete_alarm(
 143                         endpoint, auth_token, alarm_id)
 144
 145                     # Generate and send a response message
 146                     try:
 147                         resp_message = self._response.generate_response(
 148                             'delete_alarm_response', alarm_id=alarm_id,
 149                             status=resp_status,
 150                             cor_id=request_details['correlation_id'])
 151                         self._producer.delete_alarm_response(
 152                             'delete_alarm_response', resp_message,
 153                             'alarm_response')
 154                     except Exception as exc:
 155                         log.warn("Failed to create delete reponse:%s", exc)
 156
 157                 elif message.key == "acknowledge_alarm":
 158                     # Acknowledge that an alarm has been dealt with by the SO
 159                     alarm_id = values['ack_details']['alarm_uuid']
 160
 161                     response = self.update_alarm_state(
 162                         endpoint, auth_token, alarm_id)
 163
 164                     # Log if an alarm was reset
 165                     if response is True:
 166                         log.info("Acknowledged the alarm and cleared it.")
 167                     else:
 168                         log.warn("Failed to acknowledge/clear the alarm.")
 169
 170                 elif message.key == "update_alarm_request":
 171                     # Update alarm configurations
 172                     alarm_details = values['alarm_update_request']
 173
 174                     alarm_id, status = self.update_alarm(
 175                         endpoint, auth_token, alarm_details)
 176
 177                     # Generate a response for an update request
 178                     try:
 179                         resp_message = self._response.generate_response(
 180                             'update_alarm_response', alarm_id=alarm_id,
 181                             cor_id=alarm_details['correlation_id'],
 182                             status=status)
 183                         self._producer.update_alarm_response(
 184                             'update_alarm_response', resp_message,
 185                             'alarm_response')
 186                     except Exception as exc:
 187                         log.warn("Failed to send an update response:%s", exc)
 188
 189                 else:
 190                     log.debug("Unknown key, no action will be performed")
 191             else:
 192                 log.info("Message topic not relevant to this plugin: %s",
 193                          message.topic)
 194
 195         return
 196
 197     def configure_alarm(self, endpoint, auth_token, values):
 198         """Create requested alarm in Aodh."""
 199         url = "{}/v2/alarms/".format(endpoint)
 200
 201         # Check if the desired alarm is supported
 202         alarm_name = values['alarm_name'].lower()
 203         metric_name = values['metric_name'].lower()
 204         resource_id = values['resource_uuid']
 205
 206         if alarm_name not in ALARM_NAMES.keys():
 207             log.warn("This alarm is not supported, by a valid metric.")
 208             return None, False
 209         if ALARM_NAMES[alarm_name] != metric_name:
 210             log.warn("This is not the correct metric for this alarm.")
 211             return None, False
 212
 213         # Check for the required metric
 214         metric_id = self.check_for_metric(auth_token, metric_name, resource_id)
 215
 216         try:
 217             if metric_id is not None:
 218                 # Create the alarm if metric is available
 219                 payload = self.check_payload(values, metric_name, resource_id,
 220                                              alarm_name)
 221                 new_alarm = self._common._perform_request(
 222                     url, auth_token, req_type="post", payload=payload)
 223                 return json.loads(new_alarm.text)['alarm_id'], True
 224             else:
 225                 log.warn("The required Gnocchi metric does not exist.")
 226                 return None, False
 227
 228         except Exception as exc:
 229             log.warn("Failed to create the alarm: %s", exc)
 230         return None, False
 231
 232     def delete_alarm(self, endpoint, auth_token, alarm_id):
 233         """Delete alarm function."""
 234         url = "{}/v2/alarms/%s".format(endpoint) % (alarm_id)
 235
 236         try:
 237             result = self._common._perform_request(
 238                 url, auth_token, req_type="delete")
 239             if str(result.status_code) == "404":
 240                 log.info("Alarm doesn't exist: %s", result.status_code)
 241                 # If status code is 404 alarm did not exist
 242                 return False
 243             else:
 244                 return True
 245
 246         except Exception as exc:
 247             log.warn("Failed to delete alarm: %s because %s.", alarm_id, exc)
 248         return False
 249
 250     def list_alarms(self, endpoint, auth_token, list_details):
 251         """Generate the requested list of alarms."""
 252         url = "{}/v2/alarms/".format(endpoint)
 253         a_list, name_list, sev_list, res_list = [], [], [], []
 254
 255         # TODO(mcgoughh): for now resource_id is a mandatory field
 256         resource = list_details['resource_uuid']
 257
 258         # Checking what fields are specified for a list request
 259         try:
 260             name = list_details['alarm_name'].lower()
 261             if name not in ALARM_NAMES.keys():
 262                 log.warn("This alarm is not supported, won't be used!")
 263                 name = None
 264         except KeyError as exc:
 265             log.info("Alarm name isn't specified.")
 266             name = None
 267
 268         try:
 269             severity = list_details['severity'].lower()
 270             sev = SEVERITIES[severity]
 271         except KeyError as exc:
 272             log.info("Severity is unspecified/incorrectly configured")
 273             sev = None
 274
 275         # Perform the request to get the desired list
 276         try:
 277             result = self._common._perform_request(
 278                 url, auth_token, req_type="get")
 279
 280             if result is not None:
 281                 # Get list based on resource id
 282                 for alarm in json.loads(result.text):
 283                     rule = alarm['gnocchi_resources_threshold_rule']
 284                     if resource == rule['resource_id']:
 285                         res_list.append(str(alarm))
 286                     if not res_list:
 287                         log.info("No alarms for this resource")
 288                         return a_list
 289
 290                 # Generate specified listed if requested
 291                 if name is not None and sev is not None:
 292                     log.info("Return a list of %s alarms with %s severity.",
 293                              name, sev)
 294                     for alarm in json.loads(result.text):
 295                         if name == alarm['name']:
 296                             name_list.append(str(alarm))
 297                     for alarm in json.loads(result.text):
 298                         if sev == alarm['severity']:
 299                             sev_list.append(str(alarm))
 300                     name_sev_list = list(set(name_list).intersection(sev_list))
 301                     a_list = list(set(name_sev_list).intersection(res_list))
 302                 elif name is not None:
 303                     log.info("Returning a %s list of alarms.", name)
 304                     for alarm in json.loads(result.text):
 305                         if name == alarm['name']:
 306                             name_list.append(str(alarm))
 307                     a_list = list(set(name_list).intersection(res_list))
 308                 elif sev is not None:
 309                     log.info("Returning %s severity alarm list.", sev)
 310                     for alarm in json.loads(result.text):
 311                         if sev == alarm['severity']:
 312                             sev_list.append(str(alarm))
 313                     a_list = list(set(sev_list).intersection(res_list))
 314                 else:
 315                     log.info("Returning an entire list of alarms.")
 316                     a_list = res_list
 317             else:
 318                 log.info("There are no alarms!")
 319
 320         except Exception as exc:
 321             log.info("Failed to generate required list: %s", exc)
 322             return None
 323
 324         return a_list
 325
 326     def update_alarm_state(self, endpoint, auth_token, alarm_id):
 327         """Set the state of an alarm to ok when ack message is received."""
 328         url = "{}/v2/alarms/%s/state".format(endpoint) % alarm_id
 329         payload = json.dumps("ok")
 330
 331         try:
 332             self._common._perform_request(
 333                 url, auth_token, req_type="put", payload=payload)
 334             return True
 335         except Exception as exc:
 336             log.warn("Unable to update alarm state: %s", exc)
 337         return False
 338
 339     def update_alarm(self, endpoint, auth_token, values):
 340         """Get alarm name for an alarm configuration update."""
 341         # Get already existing alarm details
 342         url = "{}/v2/alarms/%s".format(endpoint) % values['alarm_uuid']
 343
 344         # Gets current configurations about the alarm
 345         try:
 346             result = self._common._perform_request(
 347                 url, auth_token, req_type="get")
 348             alarm_name = json.loads(result.text)['name']
 349             rule = json.loads(result.text)['gnocchi_resources_threshold_rule']
 350             alarm_state = json.loads(result.text)['state']
 351             resource_id = rule['resource_id']
 352             metric_name = rule['metric']
 353         except Exception as exc:
 354             log.warn("Failed to retreive existing alarm info: %s.\
 355                      Can only update OSM alarms.", exc)
 356             return None, False
 357
 358         # Generates and check payload configuration for alarm update
 359         payload = self.check_payload(values, metric_name, resource_id,
 360                                      alarm_name, alarm_state=alarm_state)
 361
 362         # Updates the alarm configurations with the valid payload
 363         if payload is not None:
 364             try:
 365                 update_alarm = self._common._perform_request(
 366                     url, auth_token, req_type="put", payload=payload)
 367
 368                 return json.loads(update_alarm.text)['alarm_id'], True
 369             except Exception as exc:
 370                 log.warn("Alarm update could not be performed: %s", exc)
 371                 return None, False
 372         return None, False
 373
 374     def check_payload(self, values, metric_name, resource_id,
 375                       alarm_name, alarm_state=None):
 376         """Check that the payload is configuration for update/create alarm."""
 377         try:
 378             # Check state and severity
 379             severity = values['severity'].lower()
 380             if severity == "indeterminate":
 381                 alarm_state = "insufficient data"
 382             if alarm_state is None:
 383                 alarm_state = "ok"
 384
 385             statistic = values['statistic'].lower()
 386             # Try to configure the payload for the update/create request
 387             # Can only update: threshold, operation, statistic and
 388             # the severity of the alarm
 389             rule = {'threshold': values['threshold_value'],
 390                     'comparison_operator': values['operation'].lower(),
 391                     'metric': metric_name,
 392                     'resource_id': resource_id,
 393                     'resource_type': 'generic',
 394                     'aggregation_method': STATISTICS[statistic]}
 395             payload = json.dumps({'state': alarm_state,
 396                                   'name': alarm_name,
 397                                   'severity': SEVERITIES[severity],
 398                                   'type': 'gnocchi_resources_threshold',
 399                                   'gnocchi_resources_threshold_rule': rule, })
 400             return payload
 401         except KeyError as exc:
 402             log.warn("Alarm is not configured correctly: %s", exc)
 403         return None
 404
 405     def authenticate(self):
 406         """Generate an authentication token and endpoint for alarm request."""
 407         try:
 408             # Check for a tenant_id
 409             auth_token = self._common._authenticate()
 410             endpoint = self._common.get_endpoint("alarming")
 411             return auth_token, endpoint
 412         except Exception as exc:
 413             log.warn("Authentication to Keystone failed:%s", exc)
 414         return None, None
 415
 416     def get_alarm_state(self, endpoint, auth_token, alarm_id):
 417         """Get the state of the alarm."""
 418         url = "{}/v2/alarms/%s/state".format(endpoint) % alarm_id
 419
 420         try:
 421             alarm_state = self._common._perform_request(
 422                 url, auth_token, req_type="get")
 423             return json.loads(alarm_state.text)
 424         except Exception as exc:
 425             log.warn("Failed to get the state of the alarm:%s", exc)
 426         return None
 427
 428     def check_for_metric(self, auth_token, m_name, r_id):
 429         """Check for the alarm metric."""
 430         try:
 431             endpoint = self._common.get_endpoint("metric")
 432
 433             url = "{}/v1/metric/".format(endpoint)
 434             metric_list = self._common._perform_request(
 435                 url, auth_token, req_type="get")
 436
 437             for metric in json.loads(metric_list.text):
 438                 name = metric['name']
 439                 resource = metric['resource_id']
 440                 if (name == m_name and resource == r_id):
 441                     metric_id = metric['id']
 442             log.info("The required metric exists, an alarm will be created.")
 443             return metric_id
 444         except Exception as exc:
 445             log.info("Desired Gnocchi metric not found:%s", exc)
 446         return None