osm_mon/plugins/OpenStack/Aodh/alarming.py

   1 # Copyright 2017 Intel Research and Development Ireland Limited
   2 # *************************************************************
   3
   4 # This file is part of OSM Monitoring module
   5 # All Rights Reserved to Intel Corporation
   6
   7 # Licensed under the Apache License, Version 2.0 (the "License"); you may
   8 # not use this file except in compliance with the License. You may obtain
   9 # a copy of the License at
  10
  11 #         http://www.apache.org/licenses/LICENSE-2.0
  12
  13 # Unless required by applicable law or agreed to in writing, software
  14 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  15 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  16 # License for the specific language governing permissions and limitations
  17 # under the License.
  18
  19 # For those usages not covered by the Apache License, Version 2.0 please
  20 # contact: helena.mcgough@intel.com or adrian.hoban@intel.com
  21 ##
  22 """Carry out alarming requests via Aodh API."""
  23
  24 import json
  25 import logging
  26
  27 import six
  28 import yaml
  29
  30 from osm_mon.core.auth import AuthManager
  31 from osm_mon.core.database import DatabaseManager
  32 from osm_mon.core.message_bus.producer import KafkaProducer
  33 from osm_mon.core.settings import Config
  34 from osm_mon.plugins.OpenStack.Gnocchi.metrics import METRIC_MAPPINGS
  35 from osm_mon.plugins.OpenStack.common import Common
  36 from osm_mon.plugins.OpenStack.response import OpenStack_Response
  37
  38 log = logging.getLogger(__name__)
  39
  40 SEVERITIES = {
  41     "warning": "low",
  42     "minor": "low",
  43     "major": "moderate",
  44     "critical": "critical",
  45     "indeterminate": "critical"}
  46
  47 STATISTICS = {
  48     "average": "mean",
  49     "minimum": "min",
  50     "maximum": "max",
  51     "count": "count",
  52     "sum": "sum"}
  53
  54
  55 class Alarming(object):
  56     """Carries out alarming requests and responses via Aodh API."""
  57
  58     def __init__(self):
  59         """Create the OpenStack alarming instance."""
  60         # Initialize configuration and notifications
  61         config = Config.instance()
  62         config.read_environ()
  63
  64         self._database_manager = DatabaseManager()
  65         self._auth_manager = AuthManager()
  66
  67         # Use the Response class to generate valid json response messages
  68         self._response = OpenStack_Response()
  69
  70         # Initializer a producer to send responses back to SO
  71         self._producer = KafkaProducer("alarm_response")
  72
  73     def configure_alarm(self, alarm_endpoint, metric_endpoint, auth_token, values, vim_config):
  74         """Create requested alarm in Aodh."""
  75         url = "{}/v2/alarms/".format(alarm_endpoint)
  76
  77         # Check if the desired alarm is supported
  78         alarm_name = values['alarm_name'].lower()
  79         metric_name = values['metric_name'].lower()
  80         resource_id = values['resource_uuid']
  81
  82         if metric_name not in METRIC_MAPPINGS.keys():
  83             log.warning("This metric is not supported.")
  84             return None, False
  85
  86         # Check for the required metric
  87         metric_id = self.check_for_metric(auth_token, metric_endpoint, metric_name, resource_id)
  88
  89         try:
  90             if metric_id is not None:
  91                 # Create the alarm if metric is available
  92                 if 'granularity' in vim_config and 'granularity' not in values:
  93                     values['granularity'] = vim_config['granularity']
  94                 payload = self.check_payload(values, metric_name, resource_id,
  95                                              alarm_name)
  96                 new_alarm = Common.perform_request(
  97                     url, auth_token, req_type="post", payload=payload)
  98                 return json.loads(new_alarm.text)['alarm_id'], True
  99             else:
 100                 log.warning("The required Gnocchi metric does not exist.")
 101                 return None, False
 102
 103         except Exception as exc:
 104             log.warning("Failed to create the alarm: %s", exc)
 105         return None, False
 106
 107     def alarming(self, message, vim_uuid):
 108         """Consume info from the message bus to manage alarms."""
 109         try:
 110             values = json.loads(message.value)
 111         except ValueError:
 112             values = yaml.safe_load(message.value)
 113
 114         log.info("OpenStack alarm action required.")
 115
 116         auth_token = Common.get_auth_token(vim_uuid)
 117
 118         alarm_endpoint = Common.get_endpoint("alarming", vim_uuid)
 119         metric_endpoint = Common.get_endpoint("metric", vim_uuid)
 120
 121         vim_account = self._auth_manager.get_credentials(vim_uuid)
 122         vim_config = json.loads(vim_account.config)
 123
 124         if message.key == "create_alarm_request":
 125             # Configure/Update an alarm
 126             alarm_details = values['alarm_create_request']
 127
 128             alarm_id, alarm_status = self.configure_alarm(
 129                 alarm_endpoint, metric_endpoint, auth_token, alarm_details, vim_config)
 130
 131             # Generate a valid response message, send via producer
 132             if alarm_status is True:
 133                 log.info("Alarm successfully created")
 134                 self._database_manager.save_alarm(alarm_id,
 135                                                   vim_uuid,
 136                                                   alarm_details['threshold_value'],
 137                                                   alarm_details['operation'].lower(),
 138                                                   alarm_details['metric_name'].lower(),
 139                                                   alarm_details['vdu_name'].lower(),
 140                                                   alarm_details['vnf_member_index'].lower(),
 141                                                   alarm_details['ns_id'].lower()
 142                                                   )
 143             try:
 144                 resp_message = self._response.generate_response(
 145                     'create_alarm_response', status=alarm_status,
 146                     alarm_id=alarm_id,
 147                     cor_id=alarm_details['correlation_id'])
 148                 log.info("Response Message: %s", resp_message)
 149                 self._producer.create_alarm_response(
 150                     'create_alarm_response', resp_message)
 151             except Exception:
 152                 log.exception("Response creation failed:")
 153
 154         elif message.key == "list_alarm_request":
 155             # Check for a specified: alarm_name, resource_uuid, severity
 156             # and generate the appropriate list
 157             list_details = values['alarm_list_request']
 158
 159             alarm_list = self.list_alarms(
 160                 alarm_endpoint, auth_token, list_details)
 161
 162             try:
 163                 # Generate and send a list response back
 164                 resp_message = self._response.generate_response(
 165                     'list_alarm_response', alarm_list=alarm_list,
 166                     cor_id=list_details['correlation_id'])
 167                 log.info("Response Message: %s", resp_message)
 168                 self._producer.list_alarm_response(
 169                     'list_alarm_response', resp_message)
 170             except Exception:
 171                 log.exception("Failed to send a valid response back.")
 172
 173         elif message.key == "delete_alarm_request":
 174             request_details = values['alarm_delete_request']
 175             alarm_id = request_details['alarm_uuid']
 176
 177             resp_status = self.delete_alarm(
 178                 alarm_endpoint, auth_token, alarm_id)
 179
 180             # Generate and send a response message
 181             try:
 182                 resp_message = self._response.generate_response(
 183                     'delete_alarm_response', alarm_id=alarm_id,
 184                     status=resp_status,
 185                     cor_id=request_details['correlation_id'])
 186                 log.info("Response message: %s", resp_message)
 187                 self._producer.delete_alarm_response(
 188                     'delete_alarm_response', resp_message)
 189             except Exception:
 190                 log.exception("Failed to create delete response: ")
 191
 192         elif message.key == "acknowledge_alarm":
 193             # Acknowledge that an alarm has been dealt with by the SO
 194             alarm_id = values['ack_details']['alarm_uuid']
 195
 196             response = self.update_alarm_state(
 197                 alarm_endpoint, auth_token, alarm_id)
 198
 199             # Log if an alarm was reset
 200             if response is True:
 201                 log.info("Acknowledged the alarm and cleared it.")
 202             else:
 203                 log.warning("Failed to acknowledge/clear the alarm.")
 204
 205         elif message.key == "update_alarm_request":
 206             # Update alarm configurations
 207             alarm_details = values['alarm_update_request']
 208
 209             alarm_id, status = self.update_alarm(
 210                 alarm_endpoint, auth_token, alarm_details, vim_config)
 211
 212             # Generate a response for an update request
 213             try:
 214                 resp_message = self._response.generate_response(
 215                     'update_alarm_response', alarm_id=alarm_id,
 216                     cor_id=alarm_details['correlation_id'],
 217                     status=status)
 218                 log.info("Response message: %s", resp_message)
 219                 self._producer.update_alarm_response(
 220                     'update_alarm_response', resp_message)
 221             except Exception:
 222                 log.exception("Failed to send an update response: ")
 223
 224         else:
 225             log.debug("Unknown key, no action will be performed")
 226
 227         return
 228
 229     def delete_alarm(self, endpoint, auth_token, alarm_id):
 230         """Delete alarm function."""
 231         url = "{}/v2/alarms/%s".format(endpoint) % alarm_id
 232
 233         try:
 234             result = Common.perform_request(
 235                 url, auth_token, req_type="delete")
 236             if str(result.status_code) == "404":
 237                 log.info("Alarm doesn't exist: %s", result.status_code)
 238                 # If status code is 404 alarm did not exist
 239                 return False
 240             else:
 241                 return True
 242
 243         except Exception:
 244             log.exception("Failed to delete alarm %s :", alarm_id)
 245         return False
 246
 247     def list_alarms(self, endpoint, auth_token, list_details):
 248         """Generate the requested list of alarms."""
 249         url = "{}/v2/alarms/".format(endpoint)
 250         a_list, name_list, sev_list, res_list = [], [], [], []
 251
 252         # TODO(mcgoughh): for now resource_id is a mandatory field
 253         # Check for a resource id
 254         try:
 255             resource = list_details['resource_uuid']
 256         except KeyError as exc:
 257             log.warning("Resource id not specified for list request: %s", exc)
 258             return None
 259
 260         # Checking what fields are specified for a list request
 261         try:
 262             name = list_details['alarm_name'].lower()
 263         except KeyError as exc:
 264             log.info("Alarm name isn't specified.")
 265             name = None
 266
 267         try:
 268             severity = list_details['severity'].lower()
 269             sev = SEVERITIES[severity]
 270         except KeyError as exc:
 271             log.info("Severity is unspecified/incorrectly configured")
 272             sev = None
 273
 274         # Perform the request to get the desired list
 275         try:
 276             result = Common.perform_request(
 277                 url, auth_token, req_type="get")
 278
 279             if result is not None:
 280                 # Get list based on resource id
 281                 for alarm in json.loads(result.text):
 282                     rule = alarm['gnocchi_resources_threshold_rule']
 283                     if resource == rule['resource_id']:
 284                         res_list.append(alarm)
 285                     if not res_list:
 286                         log.info("No alarms for this resource")
 287                         return a_list
 288
 289                 # Generate specified listed if requested
 290                 if name is not None and sev is not None:
 291                     log.info("Return a list of %s alarms with %s severity.",
 292                              name, sev)
 293                     for alarm in json.loads(result.text):
 294                         if name == alarm['name']:
 295                             name_list.append(alarm)
 296                     for alarm in json.loads(result.text):
 297                         if sev == alarm['severity']:
 298                             sev_list.append(alarm)
 299                     name_sev_list = list(set(name_list).intersection(sev_list))
 300                     a_list = list(set(name_sev_list).intersection(res_list))
 301                 elif name is not None:
 302                     log.info("Returning a %s list of alarms.", name)
 303                     for alarm in json.loads(result.text):
 304                         if name == alarm['name']:
 305                             name_list.append(alarm)
 306                     a_list = list(set(name_list).intersection(res_list))
 307                 elif sev is not None:
 308                     log.info("Returning %s severity alarm list.", sev)
 309                     for alarm in json.loads(result.text):
 310                         if sev == alarm['severity']:
 311                             sev_list.append(alarm)
 312                     a_list = list(set(sev_list).intersection(res_list))
 313                 else:
 314                     log.info("Returning an entire list of alarms.")
 315                     a_list = res_list
 316             else:
 317                 log.info("There are no alarms!")
 318
 319         except Exception as exc:
 320             log.info("Failed to generate required list: %s", exc)
 321             return None
 322
 323         return a_list
 324
 325     def update_alarm_state(self, endpoint, auth_token, alarm_id):
 326         """Set the state of an alarm to ok when ack message is received."""
 327         url = "{}/v2/alarms/%s/state".format(endpoint) % alarm_id
 328         payload = json.dumps("ok")
 329
 330         try:
 331             Common.perform_request(
 332                 url, auth_token, req_type="put", payload=payload)
 333             return True
 334         except Exception:
 335             log.exception("Unable to update alarm state: ")
 336         return False
 337
 338     def update_alarm(self, endpoint, auth_token, values, vim_config):
 339         """Get alarm name for an alarm configuration update."""
 340         # Get already existing alarm details
 341         url = "{}/v2/alarms/%s".format(endpoint) % values['alarm_uuid']
 342
 343         # Gets current configurations about the alarm
 344         try:
 345             result = Common.perform_request(
 346                 url, auth_token, req_type="get")
 347             alarm_name = json.loads(result.text)['name']
 348             rule = json.loads(result.text)['gnocchi_resources_threshold_rule']
 349             alarm_state = json.loads(result.text)['state']
 350             resource_id = rule['resource_id']
 351             metric_name = [key for key, value in six.iteritems(METRIC_MAPPINGS) if value == rule['metric']][0]
 352         except Exception as exc:
 353             log.exception("Failed to retrieve existing alarm info. Can only update OSM alarms.")
 354             return None, False
 355
 356         # Generates and check payload configuration for alarm update
 357         if 'granularity' in vim_config and 'granularity' not in values:
 358             values['granularity'] = vim_config['granularity']
 359         payload = self.check_payload(values, metric_name, resource_id,
 360                                      alarm_name, alarm_state=alarm_state)
 361
 362         # Updates the alarm configurations with the valid payload
 363         if payload is not None:
 364             try:
 365                 update_alarm = Common.perform_request(
 366                     url, auth_token, req_type="put", payload=payload)
 367
 368                 return json.loads(update_alarm.text)['alarm_id'], True
 369             except Exception as exc:
 370                 log.exception("Alarm update could not be performed: ")
 371         return None, False
 372
 373     def check_payload(self, values, metric_name, resource_id,
 374                       alarm_name, alarm_state=None):
 375         """Check that the payload is configuration for update/create alarm."""
 376         try:
 377             cfg = Config.instance()
 378             # Check state and severity
 379
 380             severity = 'critical'
 381             if 'severity' in values:
 382                 severity = values['severity'].lower()
 383
 384             if severity == "indeterminate":
 385                 alarm_state = "insufficient data"
 386             if alarm_state is None:
 387                 alarm_state = "ok"
 388
 389             statistic = values['statistic'].lower()
 390
 391             granularity = cfg.OS_DEFAULT_GRANULARITY
 392             if 'granularity' in values:
 393                 granularity = values['granularity']
 394
 395             resource_type = 'generic'
 396             if 'resource_type' in values:
 397                 resource_type = values['resource_type'].lower()
 398
 399             # Try to configure the payload for the update/create request
 400             # Can only update: threshold, operation, statistic and
 401             # the severity of the alarm
 402             rule = {'threshold': values['threshold_value'],
 403                     'comparison_operator': values['operation'].lower(),
 404                     'metric': METRIC_MAPPINGS[metric_name],
 405                     'resource_id': resource_id,
 406                     'resource_type': resource_type,
 407                     'aggregation_method': STATISTICS[statistic],
 408                     'granularity': granularity, }
 409             payload = json.dumps({'state': alarm_state,
 410                                   'name': alarm_name,
 411                                   'severity': SEVERITIES[severity],
 412                                   'type': 'gnocchi_resources_threshold',
 413                                   'gnocchi_resources_threshold_rule': rule,
 414                                   'alarm_actions': [cfg.OS_NOTIFIER_URI], })
 415             return payload
 416         except KeyError as exc:
 417             log.warning("Alarm is not configured correctly: %s", exc)
 418         return None
 419
 420     def get_alarm_state(self, endpoint, auth_token, alarm_id):
 421         """Get the state of the alarm."""
 422         url = "{}/v2/alarms/%s/state".format(endpoint) % alarm_id
 423
 424         try:
 425             alarm_state = Common.perform_request(
 426                 url, auth_token, req_type="get")
 427             return json.loads(alarm_state.text)
 428         except Exception as exc:
 429             log.warning("Failed to get the state of the alarm:%s", exc)
 430         return None
 431
 432     def check_for_metric(self, auth_token, metric_endpoint, m_name, r_id):
 433         """Check for the alarm metric."""
 434         try:
 435             url = "{}/v1/resource/generic/{}".format(metric_endpoint, r_id)
 436             result = Common.perform_request(
 437                 url, auth_token, req_type="get")
 438             resource = json.loads(result.text)
 439             metric_list = resource['metrics']
 440             if metric_list.get(METRIC_MAPPINGS[m_name]):
 441                 metric_id = metric_list[METRIC_MAPPINGS[m_name]]
 442             else:
 443                 metric_id = None
 444                 log.info("Desired Gnocchi metric not found")
 445             return metric_id
 446         except Exception as exc:
 447             log.info("Desired Gnocchi metric not found:%s", exc)
 448         return None