Merge pull request #85 from stevenvanrossem/master update monitoring features of the emulator

commit: a2a1ca80a47ccac410cc8750f3e3bd942f820afa [log] [tgz]
author: stevenvanrossem <steven.vanrossem@intec.ugent.be> Fri Apr 22 22:20:52 2016 +0200
committer: stevenvanrossem <steven.vanrossem@intec.ugent.be> Fri Apr 22 22:20:52 2016 +0200
tree: 72a8fd8dcdcec4a7349bf8858b4938976ca75231
parent: 0ef3877f096e85183a63298674736e14c46da1bd [diff]
parent: 300e1e54aefeace41e0f2785c82daccb3dc50357 [diff]
diff --git a/src/emuvim/api/zerorpc/network.py b/src/emuvim/api/zerorpc/network.py
index ac17e25..8278422 100644
--- a/src/emuvim/api/zerorpc/network.py
+++ b/src/emuvim/api/zerorpc/network.py

@@ -102,9 +102,9 @@
 
     # remove the rate measurement for a vnf interface
     def stop_metric(self, vnf_name, vnf_interface, metric):
-        logging.debug("RPC CALL: setup metric")
+        logging.debug("RPC CALL: stop metric")
         try:
-            c = self.net.monitor_agent.remove_metric(vnf_name, vnf_interface, metric)
+            c = self.net.monitor_agent.stop_metric(vnf_name, vnf_interface, metric)
             return c
         except Exception as ex:
             logging.exception("RPC error.")

diff --git a/src/emuvim/cli/monitor.py b/src/emuvim/cli/monitor.py
index 040fa13..14cce08 100755
--- a/src/emuvim/cli/monitor.py
+++ b/src/emuvim/cli/monitor.py

@@ -1,5 +1,5 @@
 """

-son-emu network CLI

+son-emu monitor CLI

 (c) 2016 by Manuel Peuster <manuel.peuster@upb.de>

 """

 

@@ -40,7 +40,7 @@
     def stop_metric(self, args):

         vnf_name = self._parse_vnf_name(args.get("vnf_name"))

         vnf_interface = self._parse_vnf_interface(args.get("vnf_name"))

-        r = self.c.remove_metric(

+        r = self.c.stop_metric(

             vnf_name,

             vnf_interface,

             args.get("metric"))


diff --git a/src/emuvim/dcemulator/monitoring.py b/src/emuvim/dcemulator/monitoring.py
index 82411fd..54e7986 100755
--- a/src/emuvim/dcemulator/monitoring.py
+++ b/src/emuvim/dcemulator/monitoring.py

@@ -5,7 +5,8 @@
 from mininet.node import  OVSSwitch

 import ast

 import time

-from prometheus_client import start_http_server, Summary, Histogram, Gauge, Counter, REGISTRY

+from prometheus_client import start_http_server, Summary, Histogram, Gauge, Counter, REGISTRY, CollectorRegistry, \

+    pushadd_to_gateway, push_to_gateway, delete_from_gateway

 import threading

 from subprocess import Popen, PIPE

 import os

@@ -25,17 +26,19 @@
         self.REST_api = 'http://{0}:{1}'.format(self.ip,self.port)

 

         # helper variables to calculate the metrics

+        self.pushgateway = 'localhost:9091'

         # Start up the server to expose the metrics to Prometheus.

-        start_http_server(8000)

+        #start_http_server(8000)

         # supported Prometheus metrics

+        self.registry = CollectorRegistry()

         self.prom_tx_packet_count = Gauge('sonemu_tx_count_packets', 'Total number of packets sent',

-                                          ['vnf_name', 'vnf_interface'])

+                                          ['vnf_name', 'vnf_interface'], registry=self.registry)

         self.prom_rx_packet_count = Gauge('sonemu_rx_count_packets', 'Total number of packets received',

-                                          ['vnf_name', 'vnf_interface'])

+                                          ['vnf_name', 'vnf_interface'], registry=self.registry)

         self.prom_tx_byte_count = Gauge('sonemu_tx_count_bytes', 'Total number of bytes sent',

-                                        ['vnf_name', 'vnf_interface'])

+                                        ['vnf_name', 'vnf_interface'], registry=self.registry)

         self.prom_rx_byte_count = Gauge('sonemu_rx_count_bytes', 'Total number of bytes received',

-                                        ['vnf_name', 'vnf_interface'])

+                                        ['vnf_name', 'vnf_interface'], registry=self.registry)

 

         self.prom_metrics={'tx_packets':self.prom_tx_packet_count, 'rx_packets':self.prom_rx_packet_count,

                            'tx_bytes':self.prom_tx_byte_count,'rx_bytes':self.prom_rx_byte_count}

@@ -53,6 +56,7 @@
         mon_port = None

         }

         '''

+        self.monitor_lock = threading.Lock()

         self.network_metrics = []

 

         # start monitoring thread

@@ -61,6 +65,7 @@
         self.monitor_thread.start()

 

         # helper tools

+        self.pushgateway_process = self.start_PushGateway()

         self.prometheus_process = self.start_Prometheus()

         self.cadvisor_process = self.start_cadvisor()

 

@@ -121,7 +126,11 @@
             network_metric['switch_dpid'] = int(str(next_node.dpid), 16)

             network_metric['metric_key'] = metric

 

+            self.monitor_lock.acquire()

+

             self.network_metrics.append(network_metric)

+            self.monitor_lock.release()

+

 

             logging.info('Started monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric))

             return 'Started monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric)

@@ -131,17 +140,48 @@
             return ex.message

 

     def stop_metric(self, vnf_name, vnf_interface, metric):

+

         for metric_dict in self.network_metrics:

             if metric_dict['vnf_name'] == vnf_name and metric_dict['vnf_interface'] == vnf_interface \

                     and metric_dict['metric_key'] == metric:

 

+                self.monitor_lock.acquire()

+

                 self.network_metrics.remove(metric_dict)

 

                 #this removes the complete metric, all labels...

                 #REGISTRY.unregister(self.prom_metrics[metric_dict['metric_key']])

+                #self.registry.unregister(self.prom_metrics[metric_dict['metric_key']])

+

+                for collector in self.registry._collectors :

+                    logging.info('name:{0} labels:{1} metrics:{2}'.format(collector._name, collector._labelnames, collector._metrics))

+                    """

+                    INFO:root:name:sonemu_rx_count_packets

+                    labels:('vnf_name', 'vnf_interface')

+                    metrics:{(u'tsrc', u'output'): < prometheus_client.core.Gauge

+                    object

+                    at

+                    0x7f353447fd10 >}

+                    """

+                    logging.info('{0}'.format(collector._metrics.values()))

+                    #if self.prom_metrics[metric_dict['metric_key']]

+                    if (vnf_name, vnf_interface) in collector._metrics:

+                        logging.info('2 name:{0} labels:{1} metrics:{2}'.format(collector._name, collector._labelnames,

+                                                                              collector._metrics))

+                        #collector._metrics = {}

+                        collector.remove(vnf_name, vnf_interface)

 

                 # set values to NaN, prometheus api currently does not support removal of metrics

-                self.prom_metrics[metric_dict['metric_key']].labels(vnf_name, vnf_interface).set(float('nan'))

+                #self.prom_metrics[metric_dict['metric_key']].labels(vnf_name, vnf_interface).set(float('nan'))

+

+                # this removes the complete metric, all labels...

+                # 1 single monitor job for all metrics of the SDN controller

+                # we can only  remove from the pushgateway grouping keys(labels) which we have defined for the add_to_pushgateway

+                # we can not specify labels from the metrics to be removed

+                # if we need to remove the metrics seperatelty, we need to give them a separate grouping key, and probably a diffferent registry also

+                delete_from_gateway(self.pushgateway, job='sonemu-SDNcontroller')

+

+                self.monitor_lock.release()

 

                 logging.info('Stopped monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric))

                 return 'Stopped monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric)

@@ -150,6 +190,9 @@
     # get all metrics defined in the list and export it to Prometheus

     def get_network_metrics(self):

         while self.start_monitoring:

+

+            self.monitor_lock.acquire()

+

             # group metrics by dpid to optimize the rest api calls

             dpid_list = [metric_dict['switch_dpid'] for metric_dict in self.network_metrics]

             dpid_set = set(dpid_list)

@@ -166,6 +209,7 @@
                 for metric_dict in metric_list:

                     self.set_network_metric(metric_dict, port_stat_dict)

 

+            self.monitor_lock.release()

             time.sleep(1)

 

     # add metric to the list to export to Prometheus, parse the Ryu port-stats reply

@@ -186,7 +230,14 @@
                 #logging.info('set prom packets:{0} {1}:{2}'.format(this_measurement, vnf_name, vnf_interface))

 

                 # set prometheus metric

-                self.prom_metrics[metric_dict['metric_key']].labels(vnf_name, vnf_interface).set(this_measurement)

+                self.prom_metrics[metric_dict['metric_key']].\

+                    labels({'vnf_name':vnf_name, 'vnf_interface':vnf_interface}).\

+                    set(this_measurement)

+                #push_to_gateway(self.pushgateway, job='SDNcontroller',

+                #                grouping_key={'metric':metric_dict['metric_key']}, registry=self.registry)

+

+                # 1 single monitor job for all metrics of the SDN controller

+                pushadd_to_gateway(self.pushgateway, job='sonemu-SDNcontroller', registry=self.registry)

 

                 if previous_monitor_time <= 0 or previous_monitor_time >= port_uptime:

                     metric_dict['previous_measurement'] = int(port_stat[metric_key])

@@ -194,16 +245,17 @@
                     # do first measurement

                     #logging.info('first measurement')

                     time.sleep(1)

-                    byte_rate = self.get_network_metrics()

-                    return byte_rate

+                    self.monitor_lock.release()

+                    metric_rate = self.get_network_metrics()

+                    return metric_rate

                 else:

                     time_delta = (port_uptime - metric_dict['previous_monitor_time'])

-                    byte_rate = (this_measurement - metric_dict['previous_measurement']) / float(time_delta)

+                    metric_rate = (this_measurement - metric_dict['previous_measurement']) / float(time_delta)

                     # logging.info('uptime:{2} delta:{0} rate:{1}'.format(time_delta,byte_rate,port_uptime))

 

                 metric_dict['previous_measurement'] = this_measurement

                 metric_dict['previous_monitor_time'] = port_uptime

-                return byte_rate

+                return metric_rate

 

         logging.exception('metric {0} not found on {1}:{2}'.format(metric_key, vnf_name, vnf_interface))

         return 'metric {0} not found on {1}:{2}'.format(metric_key, vnf_name, vnf_interface)

@@ -228,6 +280,18 @@
         logging.info('Start Prometheus container {0}'.format(cmd))

         return Popen(cmd)

 

+    def start_PushGateway(self, port=9091):

+        cmd = ["docker",

+               "run",

+               "-d",

+               "-p", "{0}:9091".format(port),

+               "--name", "pushgateway",

+               "prom/pushgateway"

+               ]

+

+        logging.info('Start Prometheus Push Gateway container {0}'.format(cmd))

+        return Popen(cmd)

+

     def start_cadvisor(self, port=8090):

         cmd = ["docker",

                "run",

@@ -254,6 +318,12 @@
             self.prometheus_process.kill()

             self._stop_container('prometheus')

 

+        if self.pushgateway_process is not None:

+            logging.info('stopping pushgateway container')

+            self.pushgateway_process.terminate()

+            self.pushgateway_process.kill()

+            self._stop_container('pushgateway')

+

         if self.cadvisor_process is not None:

             logging.info('stopping cadvisor container')

             self.cadvisor_process.terminate()


diff --git a/src/emuvim/dcemulator/prometheus.yml b/src/emuvim/dcemulator/prometheus.yml
index 2915578..4db5b6f 100644
--- a/src/emuvim/dcemulator/prometheus.yml
+++ b/src/emuvim/dcemulator/prometheus.yml

@@ -34,3 +34,12 @@
     target_groups:
       - targets: ['172.17.0.1:8090']
 
+  - job_name: 'PushGateway'
+
+    # Override the global default and scrape targets from this job every 5 seconds.
+    scrape_interval: 5s
+
+    target_groups:
+      - targets: ['172.17.0.1:9091']
+
+

diff --git a/src/emuvim/examples/monitoring_demo_topology.py b/src/emuvim/examples/monitoring_demo_topology.py
new file mode 100755
index 0000000..16c2b50
--- /dev/null
+++ b/src/emuvim/examples/monitoring_demo_topology.py

@@ -0,0 +1,121 @@
+"""
+This is an example topology for the distributed cloud emulator (dcemulator).
+(c) 2015 by Manuel Peuster <manuel.peuster@upb.de>
+
+
+This is an example that shows how a user of the emulation tool can
+define network topologies with multiple emulated cloud data centers.
+
+The definition is done with a Python API which looks very similar to the
+Mininet API (in fact it is a wrapper for it).
+
+We only specify the topology *between* data centers not within a single
+data center (data center internal setups or placements are not of interest,
+we want to experiment with VNF chains deployed across multiple PoPs).
+
+The original Mininet API has to be completely hidden and not be used by this
+script.
+"""
+import logging
+from mininet.log import setLogLevel
+from emuvim.dcemulator.net import DCNetwork
+from emuvim.api.zerorpc.compute import ZeroRpcApiEndpoint
+from emuvim.api.zerorpc.network import ZeroRpcApiEndpointDCNetwork
+
+logging.basicConfig(level=logging.INFO)
+
+
+def create_topology1():
+    """
+    1. Create a data center network object (DCNetwork) with monitoring enabled
+    """
+    net = DCNetwork(monitor=True)
+
+    """
+    1b. add a monitoring agent to the DCNetwork
+    """
+    mon_api = ZeroRpcApiEndpointDCNetwork("0.0.0.0", 5151)
+    mon_api.connectDCNetwork(net)
+    mon_api.start()
+    """
+    2. Add (logical) data centers to the topology
+       (each data center is one "bigswitch" in our simplified
+        first prototype)
+    """
+    dc1 = net.addDatacenter("datacenter1")
+    dc2 = net.addDatacenter("datacenter2")
+    dc3 = net.addDatacenter("long_data_center_name3")
+    dc4 = net.addDatacenter(
+        "datacenter4",
+        metadata={"mydata": "we can also add arbitrary metadata to each DC"})
+
+    """
+    3. You can add additional SDN switches for data center
+       interconnections to the network.
+    """
+    s1 = net.addSwitch("s1")
+
+    """
+    4. Add links between your data centers and additional switches
+       to define you topology.
+       These links can use Mininet's features to limit bw, add delay or jitter.
+    """
+    net.addLink(dc1, dc2)
+    net.addLink("datacenter1", s1)
+    net.addLink(s1, dc3)
+    net.addLink(s1, "datacenter4")
+
+    """
+    5. We want to access and control our data centers from the outside,
+       e.g., we want to connect an orchestrator to start/stop compute
+       resources aka. VNFs (represented by Docker containers in the emulated)
+
+       So we need to instantiate API endpoints (e.g. a zerorpc or REST
+       interface). Depending on the endpoint implementations, we can connect
+       one or more data centers to it, which can then be controlled through
+       this API, e.g., start/stop/list compute instances.
+    """
+    # create a new instance of a endpoint implementation
+    zapi1 = ZeroRpcApiEndpoint("0.0.0.0", 4242)
+    # connect data centers to this endpoint
+    zapi1.connectDatacenter(dc1)
+    zapi1.connectDatacenter(dc2)
+    zapi1.connectDatacenter(dc3)
+    zapi1.connectDatacenter(dc4)
+    # run API endpoint server (in another thread, don't block)
+    zapi1.start()
+
+    """
+    5.1. For our example, we create a second endpoint to illustrate that
+         this is supported by our design. This feature allows us to have
+         one API endpoint for each data center. This makes the emulation
+         environment more realistic because you can easily create one
+         OpenStack-like REST API endpoint for *each* data center.
+         This will look like a real-world multi PoP/data center deployment
+         from the perspective of an orchestrator.
+    """
+    zapi2 = ZeroRpcApiEndpoint("0.0.0.0", 4343)
+    zapi2.connectDatacenter(dc3)
+    zapi2.connectDatacenter(dc4)
+    zapi2.start()
+
+    """
+    6. Finally we are done and can start our network (the emulator).
+       We can also enter the Mininet CLI to interactively interact
+       with our compute resources (just like in default Mininet).
+       But we can also implement fully automated experiments that
+       can be executed again and again.
+    """
+    net.start()
+    net.CLI()
+    # when the user types exit in the CLI, we stop the emulator
+    net.stop()
+
+
+def main():
+    setLogLevel('info')  # set Mininet loglevel
+    create_topology1()
+
+
+if __name__ == '__main__':
+    main()
commit	a2a1ca80a47ccac410cc8750f3e3bd942f820afa	[log] [tgz]
author	stevenvanrossem <steven.vanrossem@intec.ugent.be>	Fri Apr 22 22:20:52 2016 +0200
committer	stevenvanrossem <steven.vanrossem@intec.ugent.be>	Fri Apr 22 22:20:52 2016 +0200
tree	72a8fd8dcdcec4a7349bf8858b4938976ca75231
parent	0ef3877f096e85183a63298674736e14c46da1bd [diff]
parent	300e1e54aefeace41e0f2785c82daccb3dc50357 [diff]