Merge pull request #154 from stevenvanrossem/master
[osm/vim-emu.git] / src / emuvim / dcemulator / monitoring.py
index 8db10da..ba04771 100755 (executable)
@@ -1,18 +1,42 @@
-__author__ = 'Administrator'\r
+"""\r
+Copyright (c) 2015 SONATA-NFV\r
+ALL RIGHTS RESERVED.\r
+\r
+Licensed under the Apache License, Version 2.0 (the "License");\r
+you may not use this file except in compliance with the License.\r
+You may obtain a copy of the License at\r
+\r
+    http://www.apache.org/licenses/LICENSE-2.0\r
+\r
+Unless required by applicable law or agreed to in writing, software\r
+distributed under the License is distributed on an "AS IS" BASIS,\r
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+See the License for the specific language governing permissions and\r
+limitations under the License.\r
+\r
+Neither the name of the SONATA-NFV [, ANY ADDITIONAL AFFILIATION]\r
+nor the names of its contributors may be used to endorse or promote\r
+products derived from this software without specific prior written\r
+permission.\r
+\r
+This work has been performed in the framework of the SONATA project,\r
+funded by the European Commission under Grant number 671517 through\r
+the Horizon 2020 and 5G-PPP programmes. The authors would like to\r
+acknowledge the contributions of their colleagues of the SONATA\r
+partner consortium (www.sonata-nfv.eu).\r
+"""\r
 \r
-import urllib2\r
 import logging\r
+import sys\r
 from mininet.node import  OVSSwitch\r
 import ast\r
 import time\r
 from prometheus_client import start_http_server, Summary, Histogram, Gauge, Counter, REGISTRY, CollectorRegistry, \\r
     pushadd_to_gateway, push_to_gateway, delete_from_gateway\r
 import threading\r
-from subprocess import Popen, PIPE\r
+from subprocess import Popen\r
 import os\r
 \r
-import paramiko\r
-import gevent\r
 \r
 logging.basicConfig(level=logging.INFO)\r
 \r
@@ -24,16 +48,20 @@ class DCNetworkMonitor():
     def __init__(self, net):\r
         self.net = net\r
 \r
-        prometheus_ip = '0.0.0.0'\r
+        # TODO: these global variables should be part of a config file?\r
+        '''\r
+        # prometheus is started outside of son-emu\r
+        prometheus_ip = '127.0.0.1'\r
         prometheus_port = '9090'\r
         self.prometheus_REST_api = 'http://{0}:{1}'.format(prometheus_ip, prometheus_port)\r
-\r
-\r
-\r
+        '''\r
         # helper variables to calculate the metrics\r
+        # pushgateway is started outside of son-emu and son-emu is started with net=host\r
+        # so localhost:9091 works\r
         self.pushgateway = 'localhost:9091'\r
-        # Start up the server to expose the metrics to Prometheus.\r
-        #start_http_server(8000)\r
+        # when sdk is started with docker-compose, we could use\r
+        # self.pushgateway = 'pushgateway:9091'\r
+\r
         # supported Prometheus metrics\r
         self.registry = CollectorRegistry()\r
         self.prom_tx_packet_count = Gauge('sonemu_tx_count_packets', 'Total number of packets sent',\r
@@ -75,9 +103,8 @@ class DCNetworkMonitor():
         self.monitor_flow_thread.start()\r
 \r
         # helper tools\r
-        #self.pushgateway_process = self.start_PushGateway()\r
-        #self.prometheus_process = self.start_Prometheus()\r
-        self.cadvisor_process = self.start_cadvisor()\r
+        # cAdvisor, Prometheus pushgateway and DB are started as external container, outside of son-emu\r
+\r
 \r
     # first set some parameters, before measurement can start\r
     def setup_flow(self, vnf_name, vnf_interface=None, metric='tx_packets', cookie=0):\r
@@ -98,10 +125,8 @@ class DCNetworkMonitor():
         for connected_sw in self.net.DCNetwork_graph.neighbors(vnf_name):\r
             link_dict = self.net.DCNetwork_graph[vnf_name][connected_sw]\r
             for link in link_dict:\r
-                # logging.info("{0},{1}".format(link_dict[link],vnf_interface))\r
                 if link_dict[link]['src_port_id'] == vnf_interface:\r
                     # found the right link and connected switch\r
-                    # logging.info("{0},{1}".format(link_dict[link]['src_port_id'], vnf_source_interface))\r
                     vnf_switch = connected_sw\r
                     flow_metric['mon_port'] = link_dict[link]['dst_port_nr']\r
                     break\r
@@ -139,7 +164,15 @@ class DCNetworkMonitor():
             logging.exception("setup_metric error.")\r
             return ex.message\r
 \r
-    def stop_flow(self, vnf_name, vnf_interface=None, metric=None, cookie=0):\r
+    def stop_flow(self, vnf_name, vnf_interface=None, metric=None, cookie=0,):\r
+\r
+        # check if port is specified (vnf:port)\r
+        if vnf_interface is None and metric is not None:\r
+            # take first interface by default\r
+            connected_sw = self.net.DCNetwork_graph.neighbors(vnf_name)[0]\r
+            link_dict = self.net.DCNetwork_graph[vnf_name][connected_sw]\r
+            vnf_interface = link_dict[0]['src_port_id']\r
+\r
         for flow_dict in self.flow_metrics:\r
             if flow_dict['vnf_name'] == vnf_name and flow_dict['vnf_interface'] == vnf_interface \\r
                     and flow_dict['metric_key'] == metric and flow_dict['cookie'] == cookie:\r
@@ -150,8 +183,6 @@ class DCNetworkMonitor():
 \r
                 for collector in self.registry._collectors:\r
                     if (vnf_name, vnf_interface, cookie) in collector._metrics:\r
-                        #logging.info('2 name:{0} labels:{1} metrics:{2}'.format(collector._name, collector._labelnames,\r
-                        #                                                        collector._metrics))\r
                         collector.remove(vnf_name, vnf_interface, cookie)\r
 \r
                 delete_from_gateway(self.pushgateway, job='sonemu-SDNcontroller')\r
@@ -161,6 +192,8 @@ class DCNetworkMonitor():
                 logging.info('Stopped monitoring flow {3}: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric, cookie))\r
                 return 'Stopped monitoring flow {3}: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric, cookie)\r
 \r
+        return 'Error stopping monitoring flow: {0} on {1}:{2}'.format(metric, vnf_name, vnf_interface)\r
+\r
 \r
     # first set some parameters, before measurement can start\r
     def setup_metric(self, vnf_name, vnf_interface=None, metric='tx_packets'):\r
@@ -180,10 +213,8 @@ class DCNetworkMonitor():
         for connected_sw in self.net.DCNetwork_graph.neighbors(vnf_name):\r
             link_dict = self.net.DCNetwork_graph[vnf_name][connected_sw]\r
             for link in link_dict:\r
-                # logging.info("{0},{1}".format(link_dict[link],vnf_interface))\r
                 if link_dict[link]['src_port_id'] == vnf_interface:\r
                     # found the right link and connected switch\r
-                    # logging.info("{0},{1}".format(link_dict[link]['src_port_id'], vnf_source_interface))\r
                     network_metric['mon_port'] = link_dict[link]['dst_port_nr']\r
                     break\r
 \r
@@ -234,8 +265,14 @@ class DCNetworkMonitor():
 \r
     def stop_metric(self, vnf_name, vnf_interface=None, metric=None):\r
 \r
+        # check if port is specified (vnf:port)\r
+        if vnf_interface is None and metric is not None:\r
+            # take first interface by default\r
+            connected_sw = self.net.DCNetwork_graph.neighbors(vnf_name)[0]\r
+            link_dict = self.net.DCNetwork_graph[vnf_name][connected_sw]\r
+            vnf_interface = link_dict[0]['src_port_id']\r
+\r
         for metric_dict in self.network_metrics:\r
-            #logging.info('start Stopped monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric_dict))\r
             if metric_dict['vnf_name'] == vnf_name and metric_dict['vnf_interface'] == vnf_interface \\r
                     and metric_dict['metric_key'] == metric:\r
 \r
@@ -248,7 +285,7 @@ class DCNetworkMonitor():
                 #self.registry.unregister(self.prom_metrics[metric_dict['metric_key']])\r
 \r
                 for collector in self.registry._collectors :\r
-                    #logging.info('name:{0} labels:{1} metrics:{2}'.format(collector._name, collector._labelnames, collector._metrics))\r
+\r
                     """\r
                     INFO:root:name:sonemu_rx_count_packets\r
                     labels:('vnf_name', 'vnf_interface')\r
@@ -258,11 +295,10 @@ class DCNetworkMonitor():
                     0x7f353447fd10 >}\r
                     """\r
                     logging.info('{0}'.format(collector._metrics.values()))\r
-                    #if self.prom_metrics[metric_dict['metric_key']]\r
+\r
                     if (vnf_name, vnf_interface, 'None') in collector._metrics:\r
                         logging.info('2 name:{0} labels:{1} metrics:{2}'.format(collector._name, collector._labelnames,\r
                                                                               collector._metrics))\r
-                        #collector._metrics = {}\r
                         collector.remove(vnf_name, vnf_interface, 'None')\r
 \r
                 # set values to NaN, prometheus api currently does not support removal of metrics\r
@@ -297,8 +333,13 @@ class DCNetworkMonitor():
                 logging.info('Stopped monitoring vnf: {0}'.format(vnf_name))\r
                 return 'Stopped monitoring: {0}'.format(vnf_name)\r
 \r
+        return 'Error stopping monitoring metric: {0} on {1}:{2}'.format(metric, vnf_name, vnf_interface)\r
+\r
+\r
 \r
-    # get all metrics defined in the list and export it to Prometheus\r
+\r
+\r
+# get all metrics defined in the list and export it to Prometheus\r
     def get_flow_metrics(self):\r
         while self.start_monitoring:\r
 \r
@@ -308,6 +349,7 @@ class DCNetworkMonitor():
                 data = {}\r
 \r
                 data['cookie'] = flow_dict['cookie']\r
+                data['cookie_mask'] = flow_dict['cookie']\r
 \r
                 if 'tx' in flow_dict['metric_key']:\r
                     data['match'] = {'in_port':flow_dict['mon_port']}\r
@@ -317,9 +359,15 @@ class DCNetworkMonitor():
 \r
                 # query Ryu\r
                 ret = self.net.ryu_REST('stats/flow', dpid=flow_dict['switch_dpid'], data=data)\r
-                flow_stat_dict = ast.literal_eval(ret)\r
+                if isinstance(ret, dict):\r
+                    flow_stat_dict = ret\r
+                elif isinstance(ret, basestring):\r
+                    flow_stat_dict = ast.literal_eval(ret.rstrip())\r
+                else:\r
+                    flow_stat_dict = None\r
+\r
+                logging.debug('received flow stat:{0} '.format(flow_stat_dict))\r
 \r
-                #logging.info('received flow stat:{0} '.format(flow_stat_dict))\r
                 self.set_flow_metric(flow_dict, flow_stat_dict)\r
 \r
             self.monitor_flow_lock.release()\r
@@ -342,7 +390,7 @@ class DCNetworkMonitor():
 \r
                 metric_list = [metric_dict for metric_dict in self.network_metrics\r
                                if int(metric_dict['switch_dpid'])==int(dpid)]\r
-                #logging.info('1set prom packets:{0} '.format(self.network_metrics))\r
+\r
                 for metric_dict in metric_list:\r
                     self.set_network_metric(metric_dict, port_stat_dict)\r
 \r
@@ -364,23 +412,21 @@ class DCNetworkMonitor():
             if int(port_stat['port_no']) == int(mon_port):\r
                 port_uptime = port_stat['duration_sec'] + port_stat['duration_nsec'] * 10 ** (-9)\r
                 this_measurement = int(port_stat[metric_key])\r
-                #logging.info('set prom packets:{0} {1}:{2}'.format(this_measurement, vnf_name, vnf_interface))\r
 \r
                 # set prometheus metric\r
                 self.prom_metrics[metric_dict['metric_key']].\\r
                     labels({'vnf_name': vnf_name, 'vnf_interface': vnf_interface, 'flow_id': None}).\\r
                     set(this_measurement)\r
-                #push_to_gateway(self.pushgateway, job='SDNcontroller',\r
-                #                grouping_key={'metric':metric_dict['metric_key']}, registry=self.registry)\r
 \r
                 # 1 single monitor job for all metrics of the SDN controller\r
                 pushadd_to_gateway(self.pushgateway, job='sonemu-SDNcontroller', registry=self.registry)\r
 \r
+                # also the rate is calculated here, but not used for now\r
+                # (rate can be easily queried from prometheus also)\r
                 if previous_monitor_time <= 0 or previous_monitor_time >= port_uptime:\r
                     metric_dict['previous_measurement'] = int(port_stat[metric_key])\r
                     metric_dict['previous_monitor_time'] = port_uptime\r
                     # do first measurement\r
-                    #logging.info('first measurement')\r
                     time.sleep(1)\r
                     self.monitor_lock.release()\r
 \r
@@ -390,7 +436,6 @@ class DCNetworkMonitor():
                 else:\r
                     time_delta = (port_uptime - metric_dict['previous_monitor_time'])\r
                     metric_rate = (this_measurement - metric_dict['previous_measurement']) / float(time_delta)\r
-                    #logging.info('metric: {0} rate:{1}'.format(metric_dict['metric_key'], metric_rate))\r
 \r
                 metric_dict['previous_measurement'] = this_measurement\r
                 metric_dict['previous_monitor_time'] = port_uptime\r
@@ -401,7 +446,6 @@ class DCNetworkMonitor():
 \r
     def set_flow_metric(self, metric_dict, flow_stat_dict):\r
         # vnf tx is the datacenter switch rx and vice-versa\r
-        #metric_key = self.switch_tx_rx(metric_dict['metric_key'])\r
         metric_key = metric_dict['metric_key']\r
         switch_dpid = metric_dict['switch_dpid']\r
         vnf_name = metric_dict['vnf_name']\r
@@ -410,44 +454,24 @@ class DCNetworkMonitor():
         previous_monitor_time = metric_dict['previous_monitor_time']\r
         cookie = metric_dict['cookie']\r
 \r
-        # TODO aggregate all found flow stats\r
-        flow_stat = flow_stat_dict[str(switch_dpid)][0]\r
-        if 'bytes' in  metric_key:\r
-            counter = flow_stat['byte_count']\r
-        elif 'packet' in metric_key:\r
-            counter = flow_stat['packet_count']\r
+        counter = 0\r
+        for flow_stat in flow_stat_dict[str(switch_dpid)]:\r
+            if 'bytes' in metric_key:\r
+                counter += flow_stat['byte_count']\r
+            elif 'packet' in metric_key:\r
+                counter += flow_stat['packet_count']\r
 \r
+        flow_stat = flow_stat_dict[str(switch_dpid)][0]\r
         flow_uptime = flow_stat['duration_sec'] + flow_stat['duration_nsec'] * 10 ** (-9)\r
 \r
         self.prom_metrics[metric_dict['metric_key']]. \\r
             labels({'vnf_name': vnf_name, 'vnf_interface': vnf_interface, 'flow_id': cookie}). \\r
             set(counter)\r
-        pushadd_to_gateway(self.pushgateway, job='sonemu-SDNcontroller', registry=self.registry)\r
-\r
-        #logging.exception('metric {0} not found on {1}:{2}'.format(metric_key, vnf_name, vnf_interface))\r
-        #return 'metric {0} not found on {1}:{2}'.format(metric_key, vnf_name, vnf_interface)\r
+        try:\r
+            pushadd_to_gateway(self.pushgateway, job='sonemu-SDNcontroller', registry=self.registry)\r
+        except Exception, e:\r
+            logging.warning("Pushgateway not reachable: {0} {1}".format(Exception, e))\r
 \r
-    def query_Prometheus(self, query):\r
-        '''\r
-        escaped_chars='{}[]'\r
-        for old in escaped_chars:\r
-            new = '\{0}'.format(old)\r
-            query = query.replace(old, new)\r
-        '''\r
-        url = self.prometheus_REST_api + '/' + 'api/v1/query?query=' + query\r
-        #logging.info('query:{0}'.format(url))\r
-        req = urllib2.Request(url)\r
-        ret = urllib2.urlopen(req).read()\r
-        ret = ast.literal_eval(ret)\r
-        if ret['status'] == 'success':\r
-            #logging.info('return:{0}'.format(ret))\r
-            try:\r
-                ret = ret['data']['result'][0]['value']\r
-            except:\r
-                ret = None\r
-        else:\r
-            ret = None\r
-        return ret\r
 \r
     def start_Prometheus(self, port=9090):\r
         # prometheus.yml configuration file is located in the same directory as this file\r
@@ -496,6 +520,7 @@ class DCNetworkMonitor():
         self.monitor_thread.join()\r
         self.monitor_flow_thread.join()\r
 \r
+        # these containers are used for monitoring but are started now outside of son-emu\r
         '''\r
         if self.prometheus_process is not None:\r
             logging.info('stopping prometheus container')\r
@@ -508,13 +533,13 @@ class DCNetworkMonitor():
             self.pushgateway_process.terminate()\r
             self.pushgateway_process.kill()\r
             self._stop_container('pushgateway')\r
-        '''\r
 \r
         if self.cadvisor_process is not None:\r
             logging.info('stopping cadvisor container')\r
             self.cadvisor_process.terminate()\r
             self.cadvisor_process.kill()\r
             self._stop_container('cadvisor')\r
+        '''\r
 \r
     def switch_tx_rx(self,metric=''):\r
         # when monitoring vnfs, the tx of the datacenter switch is actually the rx of the vnf\r
@@ -537,33 +562,3 @@ class DCNetworkMonitor():
                name]\r
         Popen(cmd).wait()\r
 \r
-    def profile(self, mgmt_ip, rate, input_ip, vnf_uuid ):\r
-\r
-        ssh = paramiko.SSHClient()\r
-        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())\r
-        #ssh.connect(mgmt_ip, username='steven', password='test')\r
-        ssh.connect(mgmt_ip, username='root', password='root')\r
-\r
-        iperf_cmd = 'iperf -c {0} -u -l18 -b{1}M -t1000 &'.format(input_ip, rate)\r
-        if rate > 0:\r
-            stdin, stdout, stderr = ssh.exec_command(iperf_cmd)\r
-\r
-        start_time = time.time()\r
-        query_cpu = '(sum(rate(container_cpu_usage_seconds_total{{id="/docker/{0}"}}[{1}s])))'.format(vnf_uuid, 1)\r
-        while (time.time() - start_time) < 15:\r
-            data = self.query_Prometheus(query_cpu)\r
-            # logging.info('rate: {1} data:{0}'.format(data, rate))\r
-            gevent.sleep(0)\r
-            time.sleep(1)\r
-\r
-        query_cpu2 = '(sum(rate(container_cpu_usage_seconds_total{{id="/docker/{0}"}}[{1}s])))'.format(vnf_uuid, 8)\r
-        cpu_load = float(self.query_Prometheus(query_cpu2)[1])\r
-        output = 'rate: {1}Mbps; cpu_load: {0}%'.format(round(cpu_load * 100, 2), rate)\r
-        output_line = output\r
-        logging.info(output_line)\r
-\r
-        stop_iperf = 'pkill -9 iperf'\r
-        stdin, stdout, stderr = ssh.exec_command(stop_iperf)\r
-\r
-        return output_line\r
-\r