Merge pull request #85 from stevenvanrossem/master
authorstevenvanrossem <steven.vanrossem@intec.ugent.be>
Fri, 22 Apr 2016 20:20:52 +0000 (22:20 +0200)
committerstevenvanrossem <steven.vanrossem@intec.ugent.be>
Fri, 22 Apr 2016 20:20:52 +0000 (22:20 +0200)
update monitoring features of the emulator

src/emuvim/api/zerorpc/network.py
src/emuvim/cli/monitor.py
src/emuvim/dcemulator/monitoring.py
src/emuvim/dcemulator/prometheus.yml
src/emuvim/examples/monitoring_demo_topology.py [new file with mode: 0755]

index ac17e25..8278422 100644 (file)
@@ -102,9 +102,9 @@ class DCNetworkApi(object):
 
     # remove the rate measurement for a vnf interface
     def stop_metric(self, vnf_name, vnf_interface, metric):
-        logging.debug("RPC CALL: setup metric")
+        logging.debug("RPC CALL: stop metric")
         try:
-            c = self.net.monitor_agent.remove_metric(vnf_name, vnf_interface, metric)
+            c = self.net.monitor_agent.stop_metric(vnf_name, vnf_interface, metric)
             return c
         except Exception as ex:
             logging.exception("RPC error.")
index 040fa13..14cce08 100755 (executable)
@@ -1,5 +1,5 @@
 """\r
-son-emu network CLI\r
+son-emu monitor CLI\r
 (c) 2016 by Manuel Peuster <manuel.peuster@upb.de>\r
 """\r
 \r
@@ -40,7 +40,7 @@ class ZeroRpcClient(object):
     def stop_metric(self, args):\r
         vnf_name = self._parse_vnf_name(args.get("vnf_name"))\r
         vnf_interface = self._parse_vnf_interface(args.get("vnf_name"))\r
-        r = self.c.remove_metric(\r
+        r = self.c.stop_metric(\r
             vnf_name,\r
             vnf_interface,\r
             args.get("metric"))\r
index 82411fd..54e7986 100755 (executable)
@@ -5,7 +5,8 @@ import logging
 from mininet.node import  OVSSwitch\r
 import ast\r
 import time\r
-from prometheus_client import start_http_server, Summary, Histogram, Gauge, Counter, REGISTRY\r
+from prometheus_client import start_http_server, Summary, Histogram, Gauge, Counter, REGISTRY, CollectorRegistry, \\r
+    pushadd_to_gateway, push_to_gateway, delete_from_gateway\r
 import threading\r
 from subprocess import Popen, PIPE\r
 import os\r
@@ -25,17 +26,19 @@ class DCNetworkMonitor():
         self.REST_api = 'http://{0}:{1}'.format(self.ip,self.port)\r
 \r
         # helper variables to calculate the metrics\r
+        self.pushgateway = 'localhost:9091'\r
         # Start up the server to expose the metrics to Prometheus.\r
-        start_http_server(8000)\r
+        #start_http_server(8000)\r
         # supported Prometheus metrics\r
+        self.registry = CollectorRegistry()\r
         self.prom_tx_packet_count = Gauge('sonemu_tx_count_packets', 'Total number of packets sent',\r
-                                          ['vnf_name', 'vnf_interface'])\r
+                                          ['vnf_name', 'vnf_interface'], registry=self.registry)\r
         self.prom_rx_packet_count = Gauge('sonemu_rx_count_packets', 'Total number of packets received',\r
-                                          ['vnf_name', 'vnf_interface'])\r
+                                          ['vnf_name', 'vnf_interface'], registry=self.registry)\r
         self.prom_tx_byte_count = Gauge('sonemu_tx_count_bytes', 'Total number of bytes sent',\r
-                                        ['vnf_name', 'vnf_interface'])\r
+                                        ['vnf_name', 'vnf_interface'], registry=self.registry)\r
         self.prom_rx_byte_count = Gauge('sonemu_rx_count_bytes', 'Total number of bytes received',\r
-                                        ['vnf_name', 'vnf_interface'])\r
+                                        ['vnf_name', 'vnf_interface'], registry=self.registry)\r
 \r
         self.prom_metrics={'tx_packets':self.prom_tx_packet_count, 'rx_packets':self.prom_rx_packet_count,\r
                            'tx_bytes':self.prom_tx_byte_count,'rx_bytes':self.prom_rx_byte_count}\r
@@ -53,6 +56,7 @@ class DCNetworkMonitor():
         mon_port = None\r
         }\r
         '''\r
+        self.monitor_lock = threading.Lock()\r
         self.network_metrics = []\r
 \r
         # start monitoring thread\r
@@ -61,6 +65,7 @@ class DCNetworkMonitor():
         self.monitor_thread.start()\r
 \r
         # helper tools\r
+        self.pushgateway_process = self.start_PushGateway()\r
         self.prometheus_process = self.start_Prometheus()\r
         self.cadvisor_process = self.start_cadvisor()\r
 \r
@@ -121,7 +126,11 @@ class DCNetworkMonitor():
             network_metric['switch_dpid'] = int(str(next_node.dpid), 16)\r
             network_metric['metric_key'] = metric\r
 \r
+            self.monitor_lock.acquire()\r
+\r
             self.network_metrics.append(network_metric)\r
+            self.monitor_lock.release()\r
+\r
 \r
             logging.info('Started monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric))\r
             return 'Started monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric)\r
@@ -131,17 +140,48 @@ class DCNetworkMonitor():
             return ex.message\r
 \r
     def stop_metric(self, vnf_name, vnf_interface, metric):\r
+\r
         for metric_dict in self.network_metrics:\r
             if metric_dict['vnf_name'] == vnf_name and metric_dict['vnf_interface'] == vnf_interface \\r
                     and metric_dict['metric_key'] == metric:\r
 \r
+                self.monitor_lock.acquire()\r
+\r
                 self.network_metrics.remove(metric_dict)\r
 \r
                 #this removes the complete metric, all labels...\r
                 #REGISTRY.unregister(self.prom_metrics[metric_dict['metric_key']])\r
+                #self.registry.unregister(self.prom_metrics[metric_dict['metric_key']])\r
+\r
+                for collector in self.registry._collectors :\r
+                    logging.info('name:{0} labels:{1} metrics:{2}'.format(collector._name, collector._labelnames, collector._metrics))\r
+                    """\r
+                    INFO:root:name:sonemu_rx_count_packets\r
+                    labels:('vnf_name', 'vnf_interface')\r
+                    metrics:{(u'tsrc', u'output'): < prometheus_client.core.Gauge\r
+                    object\r
+                    at\r
+                    0x7f353447fd10 >}\r
+                    """\r
+                    logging.info('{0}'.format(collector._metrics.values()))\r
+                    #if self.prom_metrics[metric_dict['metric_key']]\r
+                    if (vnf_name, vnf_interface) in collector._metrics:\r
+                        logging.info('2 name:{0} labels:{1} metrics:{2}'.format(collector._name, collector._labelnames,\r
+                                                                              collector._metrics))\r
+                        #collector._metrics = {}\r
+                        collector.remove(vnf_name, vnf_interface)\r
 \r
                 # set values to NaN, prometheus api currently does not support removal of metrics\r
-                self.prom_metrics[metric_dict['metric_key']].labels(vnf_name, vnf_interface).set(float('nan'))\r
+                #self.prom_metrics[metric_dict['metric_key']].labels(vnf_name, vnf_interface).set(float('nan'))\r
+\r
+                # this removes the complete metric, all labels...\r
+                # 1 single monitor job for all metrics of the SDN controller\r
+                # we can only  remove from the pushgateway grouping keys(labels) which we have defined for the add_to_pushgateway\r
+                # we can not specify labels from the metrics to be removed\r
+                # if we need to remove the metrics seperatelty, we need to give them a separate grouping key, and probably a diffferent registry also\r
+                delete_from_gateway(self.pushgateway, job='sonemu-SDNcontroller')\r
+\r
+                self.monitor_lock.release()\r
 \r
                 logging.info('Stopped monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric))\r
                 return 'Stopped monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric)\r
@@ -150,6 +190,9 @@ class DCNetworkMonitor():
     # get all metrics defined in the list and export it to Prometheus\r
     def get_network_metrics(self):\r
         while self.start_monitoring:\r
+\r
+            self.monitor_lock.acquire()\r
+\r
             # group metrics by dpid to optimize the rest api calls\r
             dpid_list = [metric_dict['switch_dpid'] for metric_dict in self.network_metrics]\r
             dpid_set = set(dpid_list)\r
@@ -166,6 +209,7 @@ class DCNetworkMonitor():
                 for metric_dict in metric_list:\r
                     self.set_network_metric(metric_dict, port_stat_dict)\r
 \r
+            self.monitor_lock.release()\r
             time.sleep(1)\r
 \r
     # add metric to the list to export to Prometheus, parse the Ryu port-stats reply\r
@@ -186,7 +230,14 @@ class DCNetworkMonitor():
                 #logging.info('set prom packets:{0} {1}:{2}'.format(this_measurement, vnf_name, vnf_interface))\r
 \r
                 # set prometheus metric\r
-                self.prom_metrics[metric_dict['metric_key']].labels(vnf_name, vnf_interface).set(this_measurement)\r
+                self.prom_metrics[metric_dict['metric_key']].\\r
+                    labels({'vnf_name':vnf_name, 'vnf_interface':vnf_interface}).\\r
+                    set(this_measurement)\r
+                #push_to_gateway(self.pushgateway, job='SDNcontroller',\r
+                #                grouping_key={'metric':metric_dict['metric_key']}, registry=self.registry)\r
+\r
+                # 1 single monitor job for all metrics of the SDN controller\r
+                pushadd_to_gateway(self.pushgateway, job='sonemu-SDNcontroller', registry=self.registry)\r
 \r
                 if previous_monitor_time <= 0 or previous_monitor_time >= port_uptime:\r
                     metric_dict['previous_measurement'] = int(port_stat[metric_key])\r
@@ -194,16 +245,17 @@ class DCNetworkMonitor():
                     # do first measurement\r
                     #logging.info('first measurement')\r
                     time.sleep(1)\r
-                    byte_rate = self.get_network_metrics()\r
-                    return byte_rate\r
+                    self.monitor_lock.release()\r
+                    metric_rate = self.get_network_metrics()\r
+                    return metric_rate\r
                 else:\r
                     time_delta = (port_uptime - metric_dict['previous_monitor_time'])\r
-                    byte_rate = (this_measurement - metric_dict['previous_measurement']) / float(time_delta)\r
+                    metric_rate = (this_measurement - metric_dict['previous_measurement']) / float(time_delta)\r
                     # logging.info('uptime:{2} delta:{0} rate:{1}'.format(time_delta,byte_rate,port_uptime))\r
 \r
                 metric_dict['previous_measurement'] = this_measurement\r
                 metric_dict['previous_monitor_time'] = port_uptime\r
-                return byte_rate\r
+                return metric_rate\r
 \r
         logging.exception('metric {0} not found on {1}:{2}'.format(metric_key, vnf_name, vnf_interface))\r
         return 'metric {0} not found on {1}:{2}'.format(metric_key, vnf_name, vnf_interface)\r
@@ -228,6 +280,18 @@ class DCNetworkMonitor():
         logging.info('Start Prometheus container {0}'.format(cmd))\r
         return Popen(cmd)\r
 \r
+    def start_PushGateway(self, port=9091):\r
+        cmd = ["docker",\r
+               "run",\r
+               "-d",\r
+               "-p", "{0}:9091".format(port),\r
+               "--name", "pushgateway",\r
+               "prom/pushgateway"\r
+               ]\r
+\r
+        logging.info('Start Prometheus Push Gateway container {0}'.format(cmd))\r
+        return Popen(cmd)\r
+\r
     def start_cadvisor(self, port=8090):\r
         cmd = ["docker",\r
                "run",\r
@@ -254,6 +318,12 @@ class DCNetworkMonitor():
             self.prometheus_process.kill()\r
             self._stop_container('prometheus')\r
 \r
+        if self.pushgateway_process is not None:\r
+            logging.info('stopping pushgateway container')\r
+            self.pushgateway_process.terminate()\r
+            self.pushgateway_process.kill()\r
+            self._stop_container('pushgateway')\r
+\r
         if self.cadvisor_process is not None:\r
             logging.info('stopping cadvisor container')\r
             self.cadvisor_process.terminate()\r
index 2915578..4db5b6f 100644 (file)
@@ -34,3 +34,12 @@ scrape_configs:
     target_groups:
       - targets: ['172.17.0.1:8090']
 
+  - job_name: 'PushGateway'
+
+    # Override the global default and scrape targets from this job every 5 seconds.
+    scrape_interval: 5s
+
+    target_groups:
+      - targets: ['172.17.0.1:9091']
+
+
diff --git a/src/emuvim/examples/monitoring_demo_topology.py b/src/emuvim/examples/monitoring_demo_topology.py
new file mode 100755 (executable)
index 0000000..16c2b50
--- /dev/null
@@ -0,0 +1,121 @@
+"""
+This is an example topology for the distributed cloud emulator (dcemulator).
+(c) 2015 by Manuel Peuster <manuel.peuster@upb.de>
+
+
+This is an example that shows how a user of the emulation tool can
+define network topologies with multiple emulated cloud data centers.
+
+The definition is done with a Python API which looks very similar to the
+Mininet API (in fact it is a wrapper for it).
+
+We only specify the topology *between* data centers not within a single
+data center (data center internal setups or placements are not of interest,
+we want to experiment with VNF chains deployed across multiple PoPs).
+
+The original Mininet API has to be completely hidden and not be used by this
+script.
+"""
+import logging
+from mininet.log import setLogLevel
+from emuvim.dcemulator.net import DCNetwork
+from emuvim.api.zerorpc.compute import ZeroRpcApiEndpoint
+from emuvim.api.zerorpc.network import ZeroRpcApiEndpointDCNetwork
+
+logging.basicConfig(level=logging.INFO)
+
+
+def create_topology1():
+    """
+    1. Create a data center network object (DCNetwork) with monitoring enabled
+    """
+    net = DCNetwork(monitor=True)
+
+    """
+    1b. add a monitoring agent to the DCNetwork
+    """
+    mon_api = ZeroRpcApiEndpointDCNetwork("0.0.0.0", 5151)
+    mon_api.connectDCNetwork(net)
+    mon_api.start()
+    """
+    2. Add (logical) data centers to the topology
+       (each data center is one "bigswitch" in our simplified
+        first prototype)
+    """
+    dc1 = net.addDatacenter("datacenter1")
+    dc2 = net.addDatacenter("datacenter2")
+    dc3 = net.addDatacenter("long_data_center_name3")
+    dc4 = net.addDatacenter(
+        "datacenter4",
+        metadata={"mydata": "we can also add arbitrary metadata to each DC"})
+
+    """
+    3. You can add additional SDN switches for data center
+       interconnections to the network.
+    """
+    s1 = net.addSwitch("s1")
+
+    """
+    4. Add links between your data centers and additional switches
+       to define you topology.
+       These links can use Mininet's features to limit bw, add delay or jitter.
+    """
+    net.addLink(dc1, dc2)
+    net.addLink("datacenter1", s1)
+    net.addLink(s1, dc3)
+    net.addLink(s1, "datacenter4")
+
+    """
+    5. We want to access and control our data centers from the outside,
+       e.g., we want to connect an orchestrator to start/stop compute
+       resources aka. VNFs (represented by Docker containers in the emulated)
+
+       So we need to instantiate API endpoints (e.g. a zerorpc or REST
+       interface). Depending on the endpoint implementations, we can connect
+       one or more data centers to it, which can then be controlled through
+       this API, e.g., start/stop/list compute instances.
+    """
+    # create a new instance of a endpoint implementation
+    zapi1 = ZeroRpcApiEndpoint("0.0.0.0", 4242)
+    # connect data centers to this endpoint
+    zapi1.connectDatacenter(dc1)
+    zapi1.connectDatacenter(dc2)
+    zapi1.connectDatacenter(dc3)
+    zapi1.connectDatacenter(dc4)
+    # run API endpoint server (in another thread, don't block)
+    zapi1.start()
+
+    """
+    5.1. For our example, we create a second endpoint to illustrate that
+         this is supported by our design. This feature allows us to have
+         one API endpoint for each data center. This makes the emulation
+         environment more realistic because you can easily create one
+         OpenStack-like REST API endpoint for *each* data center.
+         This will look like a real-world multi PoP/data center deployment
+         from the perspective of an orchestrator.
+    """
+    zapi2 = ZeroRpcApiEndpoint("0.0.0.0", 4343)
+    zapi2.connectDatacenter(dc3)
+    zapi2.connectDatacenter(dc4)
+    zapi2.start()
+
+    """
+    6. Finally we are done and can start our network (the emulator).
+       We can also enter the Mininet CLI to interactively interact
+       with our compute resources (just like in default Mininet).
+       But we can also implement fully automated experiments that
+       can be executed again and again.
+    """
+    net.start()
+    net.CLI()
+    # when the user types exit in the CLI, we stop the emulator
+    net.stop()
+
+
+def main():
+    setLogLevel('info')  # set Mininet loglevel
+    create_topology1()
+
+
+if __name__ == '__main__':
+    main()