Merge pull request #85 from stevenvanrossem/master
update monitoring features of the emulator
diff --git a/src/emuvim/api/zerorpc/network.py b/src/emuvim/api/zerorpc/network.py
index ac17e25..8278422 100644
--- a/src/emuvim/api/zerorpc/network.py
+++ b/src/emuvim/api/zerorpc/network.py
@@ -102,9 +102,9 @@
# remove the rate measurement for a vnf interface
def stop_metric(self, vnf_name, vnf_interface, metric):
- logging.debug("RPC CALL: setup metric")
+ logging.debug("RPC CALL: stop metric")
try:
- c = self.net.monitor_agent.remove_metric(vnf_name, vnf_interface, metric)
+ c = self.net.monitor_agent.stop_metric(vnf_name, vnf_interface, metric)
return c
except Exception as ex:
logging.exception("RPC error.")
diff --git a/src/emuvim/cli/monitor.py b/src/emuvim/cli/monitor.py
index 040fa13..14cce08 100755
--- a/src/emuvim/cli/monitor.py
+++ b/src/emuvim/cli/monitor.py
@@ -1,5 +1,5 @@
"""
-son-emu network CLI
+son-emu monitor CLI
(c) 2016 by Manuel Peuster <manuel.peuster@upb.de>
"""
@@ -40,7 +40,7 @@
def stop_metric(self, args):
vnf_name = self._parse_vnf_name(args.get("vnf_name"))
vnf_interface = self._parse_vnf_interface(args.get("vnf_name"))
- r = self.c.remove_metric(
+ r = self.c.stop_metric(
vnf_name,
vnf_interface,
args.get("metric"))
diff --git a/src/emuvim/dcemulator/monitoring.py b/src/emuvim/dcemulator/monitoring.py
index 82411fd..54e7986 100755
--- a/src/emuvim/dcemulator/monitoring.py
+++ b/src/emuvim/dcemulator/monitoring.py
@@ -5,7 +5,8 @@
from mininet.node import OVSSwitch
import ast
import time
-from prometheus_client import start_http_server, Summary, Histogram, Gauge, Counter, REGISTRY
+from prometheus_client import start_http_server, Summary, Histogram, Gauge, Counter, REGISTRY, CollectorRegistry, \
+ pushadd_to_gateway, push_to_gateway, delete_from_gateway
import threading
from subprocess import Popen, PIPE
import os
@@ -25,17 +26,19 @@
self.REST_api = 'http://{0}:{1}'.format(self.ip,self.port)
# helper variables to calculate the metrics
+ self.pushgateway = 'localhost:9091'
# Start up the server to expose the metrics to Prometheus.
- start_http_server(8000)
+ #start_http_server(8000)
# supported Prometheus metrics
+ self.registry = CollectorRegistry()
self.prom_tx_packet_count = Gauge('sonemu_tx_count_packets', 'Total number of packets sent',
- ['vnf_name', 'vnf_interface'])
+ ['vnf_name', 'vnf_interface'], registry=self.registry)
self.prom_rx_packet_count = Gauge('sonemu_rx_count_packets', 'Total number of packets received',
- ['vnf_name', 'vnf_interface'])
+ ['vnf_name', 'vnf_interface'], registry=self.registry)
self.prom_tx_byte_count = Gauge('sonemu_tx_count_bytes', 'Total number of bytes sent',
- ['vnf_name', 'vnf_interface'])
+ ['vnf_name', 'vnf_interface'], registry=self.registry)
self.prom_rx_byte_count = Gauge('sonemu_rx_count_bytes', 'Total number of bytes received',
- ['vnf_name', 'vnf_interface'])
+ ['vnf_name', 'vnf_interface'], registry=self.registry)
self.prom_metrics={'tx_packets':self.prom_tx_packet_count, 'rx_packets':self.prom_rx_packet_count,
'tx_bytes':self.prom_tx_byte_count,'rx_bytes':self.prom_rx_byte_count}
@@ -53,6 +56,7 @@
mon_port = None
}
'''
+ self.monitor_lock = threading.Lock()
self.network_metrics = []
# start monitoring thread
@@ -61,6 +65,7 @@
self.monitor_thread.start()
# helper tools
+ self.pushgateway_process = self.start_PushGateway()
self.prometheus_process = self.start_Prometheus()
self.cadvisor_process = self.start_cadvisor()
@@ -121,7 +126,11 @@
network_metric['switch_dpid'] = int(str(next_node.dpid), 16)
network_metric['metric_key'] = metric
+ self.monitor_lock.acquire()
+
self.network_metrics.append(network_metric)
+ self.monitor_lock.release()
+
logging.info('Started monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric))
return 'Started monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric)
@@ -131,17 +140,48 @@
return ex.message
def stop_metric(self, vnf_name, vnf_interface, metric):
+
for metric_dict in self.network_metrics:
if metric_dict['vnf_name'] == vnf_name and metric_dict['vnf_interface'] == vnf_interface \
and metric_dict['metric_key'] == metric:
+ self.monitor_lock.acquire()
+
self.network_metrics.remove(metric_dict)
#this removes the complete metric, all labels...
#REGISTRY.unregister(self.prom_metrics[metric_dict['metric_key']])
+ #self.registry.unregister(self.prom_metrics[metric_dict['metric_key']])
+
+ for collector in self.registry._collectors :
+ logging.info('name:{0} labels:{1} metrics:{2}'.format(collector._name, collector._labelnames, collector._metrics))
+ """
+ INFO:root:name:sonemu_rx_count_packets
+ labels:('vnf_name', 'vnf_interface')
+ metrics:{(u'tsrc', u'output'): < prometheus_client.core.Gauge
+ object
+ at
+ 0x7f353447fd10 >}
+ """
+ logging.info('{0}'.format(collector._metrics.values()))
+ #if self.prom_metrics[metric_dict['metric_key']]
+ if (vnf_name, vnf_interface) in collector._metrics:
+ logging.info('2 name:{0} labels:{1} metrics:{2}'.format(collector._name, collector._labelnames,
+ collector._metrics))
+ #collector._metrics = {}
+ collector.remove(vnf_name, vnf_interface)
# set values to NaN, prometheus api currently does not support removal of metrics
- self.prom_metrics[metric_dict['metric_key']].labels(vnf_name, vnf_interface).set(float('nan'))
+ #self.prom_metrics[metric_dict['metric_key']].labels(vnf_name, vnf_interface).set(float('nan'))
+
+ # this removes the complete metric, all labels...
+ # 1 single monitor job for all metrics of the SDN controller
+ # we can only remove from the pushgateway grouping keys(labels) which we have defined for the add_to_pushgateway
+ # we can not specify labels from the metrics to be removed
+ # if we need to remove the metrics seperatelty, we need to give them a separate grouping key, and probably a diffferent registry also
+ delete_from_gateway(self.pushgateway, job='sonemu-SDNcontroller')
+
+ self.monitor_lock.release()
logging.info('Stopped monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric))
return 'Stopped monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric)
@@ -150,6 +190,9 @@
# get all metrics defined in the list and export it to Prometheus
def get_network_metrics(self):
while self.start_monitoring:
+
+ self.monitor_lock.acquire()
+
# group metrics by dpid to optimize the rest api calls
dpid_list = [metric_dict['switch_dpid'] for metric_dict in self.network_metrics]
dpid_set = set(dpid_list)
@@ -166,6 +209,7 @@
for metric_dict in metric_list:
self.set_network_metric(metric_dict, port_stat_dict)
+ self.monitor_lock.release()
time.sleep(1)
# add metric to the list to export to Prometheus, parse the Ryu port-stats reply
@@ -186,7 +230,14 @@
#logging.info('set prom packets:{0} {1}:{2}'.format(this_measurement, vnf_name, vnf_interface))
# set prometheus metric
- self.prom_metrics[metric_dict['metric_key']].labels(vnf_name, vnf_interface).set(this_measurement)
+ self.prom_metrics[metric_dict['metric_key']].\
+ labels({'vnf_name':vnf_name, 'vnf_interface':vnf_interface}).\
+ set(this_measurement)
+ #push_to_gateway(self.pushgateway, job='SDNcontroller',
+ # grouping_key={'metric':metric_dict['metric_key']}, registry=self.registry)
+
+ # 1 single monitor job for all metrics of the SDN controller
+ pushadd_to_gateway(self.pushgateway, job='sonemu-SDNcontroller', registry=self.registry)
if previous_monitor_time <= 0 or previous_monitor_time >= port_uptime:
metric_dict['previous_measurement'] = int(port_stat[metric_key])
@@ -194,16 +245,17 @@
# do first measurement
#logging.info('first measurement')
time.sleep(1)
- byte_rate = self.get_network_metrics()
- return byte_rate
+ self.monitor_lock.release()
+ metric_rate = self.get_network_metrics()
+ return metric_rate
else:
time_delta = (port_uptime - metric_dict['previous_monitor_time'])
- byte_rate = (this_measurement - metric_dict['previous_measurement']) / float(time_delta)
+ metric_rate = (this_measurement - metric_dict['previous_measurement']) / float(time_delta)
# logging.info('uptime:{2} delta:{0} rate:{1}'.format(time_delta,byte_rate,port_uptime))
metric_dict['previous_measurement'] = this_measurement
metric_dict['previous_monitor_time'] = port_uptime
- return byte_rate
+ return metric_rate
logging.exception('metric {0} not found on {1}:{2}'.format(metric_key, vnf_name, vnf_interface))
return 'metric {0} not found on {1}:{2}'.format(metric_key, vnf_name, vnf_interface)
@@ -228,6 +280,18 @@
logging.info('Start Prometheus container {0}'.format(cmd))
return Popen(cmd)
+ def start_PushGateway(self, port=9091):
+ cmd = ["docker",
+ "run",
+ "-d",
+ "-p", "{0}:9091".format(port),
+ "--name", "pushgateway",
+ "prom/pushgateway"
+ ]
+
+ logging.info('Start Prometheus Push Gateway container {0}'.format(cmd))
+ return Popen(cmd)
+
def start_cadvisor(self, port=8090):
cmd = ["docker",
"run",
@@ -254,6 +318,12 @@
self.prometheus_process.kill()
self._stop_container('prometheus')
+ if self.pushgateway_process is not None:
+ logging.info('stopping pushgateway container')
+ self.pushgateway_process.terminate()
+ self.pushgateway_process.kill()
+ self._stop_container('pushgateway')
+
if self.cadvisor_process is not None:
logging.info('stopping cadvisor container')
self.cadvisor_process.terminate()
diff --git a/src/emuvim/dcemulator/prometheus.yml b/src/emuvim/dcemulator/prometheus.yml
index 2915578..4db5b6f 100644
--- a/src/emuvim/dcemulator/prometheus.yml
+++ b/src/emuvim/dcemulator/prometheus.yml
@@ -34,3 +34,12 @@
target_groups:
- targets: ['172.17.0.1:8090']
+ - job_name: 'PushGateway'
+
+ # Override the global default and scrape targets from this job every 5 seconds.
+ scrape_interval: 5s
+
+ target_groups:
+ - targets: ['172.17.0.1:9091']
+
+
diff --git a/src/emuvim/examples/monitoring_demo_topology.py b/src/emuvim/examples/monitoring_demo_topology.py
new file mode 100755
index 0000000..16c2b50
--- /dev/null
+++ b/src/emuvim/examples/monitoring_demo_topology.py
@@ -0,0 +1,121 @@
+"""
+This is an example topology for the distributed cloud emulator (dcemulator).
+(c) 2015 by Manuel Peuster <manuel.peuster@upb.de>
+
+
+This is an example that shows how a user of the emulation tool can
+define network topologies with multiple emulated cloud data centers.
+
+The definition is done with a Python API which looks very similar to the
+Mininet API (in fact it is a wrapper for it).
+
+We only specify the topology *between* data centers not within a single
+data center (data center internal setups or placements are not of interest,
+we want to experiment with VNF chains deployed across multiple PoPs).
+
+The original Mininet API has to be completely hidden and not be used by this
+script.
+"""
+import logging
+from mininet.log import setLogLevel
+from emuvim.dcemulator.net import DCNetwork
+from emuvim.api.zerorpc.compute import ZeroRpcApiEndpoint
+from emuvim.api.zerorpc.network import ZeroRpcApiEndpointDCNetwork
+
+logging.basicConfig(level=logging.INFO)
+
+
+def create_topology1():
+ """
+ 1. Create a data center network object (DCNetwork) with monitoring enabled
+ """
+ net = DCNetwork(monitor=True)
+
+ """
+ 1b. add a monitoring agent to the DCNetwork
+ """
+ mon_api = ZeroRpcApiEndpointDCNetwork("0.0.0.0", 5151)
+ mon_api.connectDCNetwork(net)
+ mon_api.start()
+ """
+ 2. Add (logical) data centers to the topology
+ (each data center is one "bigswitch" in our simplified
+ first prototype)
+ """
+ dc1 = net.addDatacenter("datacenter1")
+ dc2 = net.addDatacenter("datacenter2")
+ dc3 = net.addDatacenter("long_data_center_name3")
+ dc4 = net.addDatacenter(
+ "datacenter4",
+ metadata={"mydata": "we can also add arbitrary metadata to each DC"})
+
+ """
+ 3. You can add additional SDN switches for data center
+ interconnections to the network.
+ """
+ s1 = net.addSwitch("s1")
+
+ """
+ 4. Add links between your data centers and additional switches
+ to define you topology.
+ These links can use Mininet's features to limit bw, add delay or jitter.
+ """
+ net.addLink(dc1, dc2)
+ net.addLink("datacenter1", s1)
+ net.addLink(s1, dc3)
+ net.addLink(s1, "datacenter4")
+
+ """
+ 5. We want to access and control our data centers from the outside,
+ e.g., we want to connect an orchestrator to start/stop compute
+ resources aka. VNFs (represented by Docker containers in the emulated)
+
+ So we need to instantiate API endpoints (e.g. a zerorpc or REST
+ interface). Depending on the endpoint implementations, we can connect
+ one or more data centers to it, which can then be controlled through
+ this API, e.g., start/stop/list compute instances.
+ """
+ # create a new instance of a endpoint implementation
+ zapi1 = ZeroRpcApiEndpoint("0.0.0.0", 4242)
+ # connect data centers to this endpoint
+ zapi1.connectDatacenter(dc1)
+ zapi1.connectDatacenter(dc2)
+ zapi1.connectDatacenter(dc3)
+ zapi1.connectDatacenter(dc4)
+ # run API endpoint server (in another thread, don't block)
+ zapi1.start()
+
+ """
+ 5.1. For our example, we create a second endpoint to illustrate that
+ this is supported by our design. This feature allows us to have
+ one API endpoint for each data center. This makes the emulation
+ environment more realistic because you can easily create one
+ OpenStack-like REST API endpoint for *each* data center.
+ This will look like a real-world multi PoP/data center deployment
+ from the perspective of an orchestrator.
+ """
+ zapi2 = ZeroRpcApiEndpoint("0.0.0.0", 4343)
+ zapi2.connectDatacenter(dc3)
+ zapi2.connectDatacenter(dc4)
+ zapi2.start()
+
+ """
+ 6. Finally we are done and can start our network (the emulator).
+ We can also enter the Mininet CLI to interactively interact
+ with our compute resources (just like in default Mininet).
+ But we can also implement fully automated experiments that
+ can be executed again and again.
+ """
+ net.start()
+ net.CLI()
+ # when the user types exit in the CLI, we stop the emulator
+ net.stop()
+
+
+def main():
+ setLogLevel('info') # set Mininet loglevel
+ create_topology1()
+
+
+if __name__ == '__main__':
+ main()