blob: b3ec7ee1be65dd62e6c0def94e76966bd6c0220f [file] [log] [blame]
stevenvanrossemc5a536a2016-02-16 14:52:39 +01001__author__ = 'Administrator'
2
3import urllib2
4import logging
5from mininet.node import OVSSwitch
6import ast
stevenvanrossem9315da42016-04-11 12:10:06 +02007import time
stevenvanrossem300e1e52016-04-22 22:17:51 +02008from prometheus_client import start_http_server, Summary, Histogram, Gauge, Counter, REGISTRY, CollectorRegistry, \
9 pushadd_to_gateway, push_to_gateway, delete_from_gateway
stevenvanrossema24b4372016-04-14 09:55:20 +020010import threading
stevenvanrossemb098cb52016-04-15 13:28:23 +020011from subprocess import Popen, PIPE
12import os
stevenvanrossema24b4372016-04-14 09:55:20 +020013
stevenvanrossemc5a536a2016-02-16 14:52:39 +010014logging.basicConfig(level=logging.INFO)
15
16"""
stevenvanrossema24b4372016-04-14 09:55:20 +020017class to read openflow stats from the Ryu controller of the DCNetwork
stevenvanrossemc5a536a2016-02-16 14:52:39 +010018"""
19
20class DCNetworkMonitor():
21 def __init__(self, net):
22 self.net = net
stevenvanrossema24b4372016-04-14 09:55:20 +020023 # link to Ryu REST_API
stevenvanrossemc5a536a2016-02-16 14:52:39 +010024 self.ip = '0.0.0.0'
25 self.port = '8080'
26 self.REST_api = 'http://{0}:{1}'.format(self.ip,self.port)
27
stevenvanrossema24b4372016-04-14 09:55:20 +020028 # helper variables to calculate the metrics
stevenvanrossem300e1e52016-04-22 22:17:51 +020029 self.pushgateway = 'localhost:9091'
stevenvanrossema24b4372016-04-14 09:55:20 +020030 # Start up the server to expose the metrics to Prometheus.
stevenvanrossem300e1e52016-04-22 22:17:51 +020031 #start_http_server(8000)
stevenvanrossema24b4372016-04-14 09:55:20 +020032 # supported Prometheus metrics
stevenvanrossem300e1e52016-04-22 22:17:51 +020033 self.registry = CollectorRegistry()
stevenvanrossema24b4372016-04-14 09:55:20 +020034 self.prom_tx_packet_count = Gauge('sonemu_tx_count_packets', 'Total number of packets sent',
stevenvanrossem300e1e52016-04-22 22:17:51 +020035 ['vnf_name', 'vnf_interface'], registry=self.registry)
stevenvanrossema24b4372016-04-14 09:55:20 +020036 self.prom_rx_packet_count = Gauge('sonemu_rx_count_packets', 'Total number of packets received',
stevenvanrossem300e1e52016-04-22 22:17:51 +020037 ['vnf_name', 'vnf_interface'], registry=self.registry)
stevenvanrossema24b4372016-04-14 09:55:20 +020038 self.prom_tx_byte_count = Gauge('sonemu_tx_count_bytes', 'Total number of bytes sent',
stevenvanrossem300e1e52016-04-22 22:17:51 +020039 ['vnf_name', 'vnf_interface'], registry=self.registry)
stevenvanrossema24b4372016-04-14 09:55:20 +020040 self.prom_rx_byte_count = Gauge('sonemu_rx_count_bytes', 'Total number of bytes received',
stevenvanrossem300e1e52016-04-22 22:17:51 +020041 ['vnf_name', 'vnf_interface'], registry=self.registry)
stevenvanrossema24b4372016-04-14 09:55:20 +020042
43 self.prom_metrics={'tx_packets':self.prom_tx_packet_count, 'rx_packets':self.prom_rx_packet_count,
44 'tx_bytes':self.prom_tx_byte_count,'rx_bytes':self.prom_rx_byte_count}
45
46 # list of installed metrics to monitor
47 # each entry can contain this data
48 '''
49 {
50 switch_dpid = 0
51 vnf_name = None
52 vnf_interface = None
53 previous_measurement = 0
54 previous_monitor_time = 0
55 metric_key = None
56 mon_port = None
57 }
58 '''
stevenvanrossem300e1e52016-04-22 22:17:51 +020059 self.monitor_lock = threading.Lock()
stevenvanrossemc6abf132016-04-14 11:15:58 +020060 self.network_metrics = []
stevenvanrossema24b4372016-04-14 09:55:20 +020061
62 # start monitoring thread
stevenvanrossemb098cb52016-04-15 13:28:23 +020063 self.start_monitoring = True
stevenvanrossema24b4372016-04-14 09:55:20 +020064 self.monitor_thread = threading.Thread(target=self.get_network_metrics)
65 self.monitor_thread.start()
66
stevenvanrossemc6abf132016-04-14 11:15:58 +020067 # helper tools
stevenvanrossemadfd06f2016-04-22 10:39:08 +020068 self.pushgateway_process = self.start_PushGateway()
stevenvanrossemb098cb52016-04-15 13:28:23 +020069 self.prometheus_process = self.start_Prometheus()
70 self.cadvisor_process = self.start_cadvisor()
stevenvanrossema24b4372016-04-14 09:55:20 +020071
stevenvanrossemed711fd2016-04-11 16:59:29 +020072 # first set some parameters, before measurement can start
stevenvanrossema24b4372016-04-14 09:55:20 +020073 def setup_metric(self, vnf_name, vnf_interface=None, metric='tx_packets'):
74
75 network_metric = {}
76
stevenvanrossem9315da42016-04-11 12:10:06 +020077 # check if port is specified (vnf:port)
stevenvanrossemed711fd2016-04-11 16:59:29 +020078 if vnf_interface is None:
stevenvanrossem9315da42016-04-11 12:10:06 +020079 # take first interface by default
80 connected_sw = self.net.DCNetwork_graph.neighbors(vnf_name)[0]
81 link_dict = self.net.DCNetwork_graph[vnf_name][connected_sw]
82 vnf_interface = link_dict[0]['src_port_id']
stevenvanrossem9315da42016-04-11 12:10:06 +020083
stevenvanrossema24b4372016-04-14 09:55:20 +020084 network_metric['vnf_name'] = vnf_name
85 network_metric['vnf_interface'] = vnf_interface
stevenvanrossema24b4372016-04-14 09:55:20 +020086
stevenvanrossem9315da42016-04-11 12:10:06 +020087 for connected_sw in self.net.DCNetwork_graph.neighbors(vnf_name):
88 link_dict = self.net.DCNetwork_graph[vnf_name][connected_sw]
89 for link in link_dict:
stevenvanrossemed711fd2016-04-11 16:59:29 +020090 # logging.info("{0},{1}".format(link_dict[link],vnf_interface))
stevenvanrossem9315da42016-04-11 12:10:06 +020091 if link_dict[link]['src_port_id'] == vnf_interface:
92 # found the right link and connected switch
93 # logging.info("{0},{1}".format(link_dict[link]['src_port_id'], vnf_source_interface))
stevenvanrossema24b4372016-04-14 09:55:20 +020094 network_metric['mon_port'] = link_dict[link]['dst_port']
stevenvanrossem9315da42016-04-11 12:10:06 +020095 break
96
stevenvanrossema24b4372016-04-14 09:55:20 +020097 if 'mon_port' not in network_metric:
98 logging.exception("vnf interface {0}:{1} not found!".format(vnf_name,vnf_interface))
99 return "vnf interface {0}:{1} not found!".format(vnf_name,vnf_interface)
100
stevenvanrossem9315da42016-04-11 12:10:06 +0200101 try:
102 # default port direction to monitor
stevenvanrossemed711fd2016-04-11 16:59:29 +0200103 if metric is None:
stevenvanrossema24b4372016-04-14 09:55:20 +0200104 metric = 'tx_packets'
stevenvanrossem9315da42016-04-11 12:10:06 +0200105
stevenvanrossemc5a536a2016-02-16 14:52:39 +0100106 vnf_switch = self.net.DCNetwork_graph.neighbors(str(vnf_name))
107
108 if len(vnf_switch) > 1:
109 logging.info("vnf: {0} has multiple ports".format(vnf_name))
110 return
111 elif len(vnf_switch) == 0:
112 logging.info("vnf: {0} is not connected".format(vnf_name))
113 return
114 else:
115 vnf_switch = vnf_switch[0]
116 next_node = self.net.getNodeByName(vnf_switch)
117
stevenvanrossemed711fd2016-04-11 16:59:29 +0200118 if not isinstance(next_node, OVSSwitch):
stevenvanrossemc5a536a2016-02-16 14:52:39 +0100119 logging.info("vnf: {0} is not connected to switch".format(vnf_name))
120 return
121
stevenvanrossema24b4372016-04-14 09:55:20 +0200122 network_metric['previous_measurement'] = 0
123 network_metric['previous_monitor_time'] = 0
stevenvanrossemb098cb52016-04-15 13:28:23 +0200124
stevenvanrossem9315da42016-04-11 12:10:06 +0200125
stevenvanrossema24b4372016-04-14 09:55:20 +0200126 network_metric['switch_dpid'] = int(str(next_node.dpid), 16)
127 network_metric['metric_key'] = metric
stevenvanrossemb098cb52016-04-15 13:28:23 +0200128
stevenvanrossem300e1e52016-04-22 22:17:51 +0200129 self.monitor_lock.acquire()
130
stevenvanrossema24b4372016-04-14 09:55:20 +0200131 self.network_metrics.append(network_metric)
stevenvanrossem300e1e52016-04-22 22:17:51 +0200132 self.monitor_lock.release()
133
stevenvanrossema24b4372016-04-14 09:55:20 +0200134
135 logging.info('Started monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric))
136 return 'Started monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric)
stevenvanrossemc5a536a2016-02-16 14:52:39 +0100137
stevenvanrossemed711fd2016-04-11 16:59:29 +0200138 except Exception as ex:
stevenvanrossembbdb5ee2016-04-15 15:18:44 +0200139 logging.exception("setup_metric error.")
stevenvanrossemed711fd2016-04-11 16:59:29 +0200140 return ex.message
stevenvanrossem9315da42016-04-11 12:10:06 +0200141
stevenvanrossembbdb5ee2016-04-15 15:18:44 +0200142 def stop_metric(self, vnf_name, vnf_interface, metric):
stevenvanrossem300e1e52016-04-22 22:17:51 +0200143
stevenvanrossemb098cb52016-04-15 13:28:23 +0200144 for metric_dict in self.network_metrics:
145 if metric_dict['vnf_name'] == vnf_name and metric_dict['vnf_interface'] == vnf_interface \
stevenvanrossembbdb5ee2016-04-15 15:18:44 +0200146 and metric_dict['metric_key'] == metric:
147
stevenvanrossem300e1e52016-04-22 22:17:51 +0200148 self.monitor_lock.acquire()
149
stevenvanrossemb098cb52016-04-15 13:28:23 +0200150 self.network_metrics.remove(metric_dict)
stevenvanrossembbdb5ee2016-04-15 15:18:44 +0200151
152 #this removes the complete metric, all labels...
153 #REGISTRY.unregister(self.prom_metrics[metric_dict['metric_key']])
stevenvanrossem300e1e52016-04-22 22:17:51 +0200154 #self.registry.unregister(self.prom_metrics[metric_dict['metric_key']])
155
156 for collector in self.registry._collectors :
157 logging.info('name:{0} labels:{1} metrics:{2}'.format(collector._name, collector._labelnames, collector._metrics))
158 """
159 INFO:root:name:sonemu_rx_count_packets
160 labels:('vnf_name', 'vnf_interface')
161 metrics:{(u'tsrc', u'output'): < prometheus_client.core.Gauge
162 object
163 at
164 0x7f353447fd10 >}
165 """
166 logging.info('{0}'.format(collector._metrics.values()))
167 #if self.prom_metrics[metric_dict['metric_key']]
168 if (vnf_name, vnf_interface) in collector._metrics:
169 logging.info('2 name:{0} labels:{1} metrics:{2}'.format(collector._name, collector._labelnames,
170 collector._metrics))
171 #collector._metrics = {}
172 collector.remove(vnf_name, vnf_interface)
stevenvanrossembbdb5ee2016-04-15 15:18:44 +0200173
174 # set values to NaN, prometheus api currently does not support removal of metrics
stevenvanrossem300e1e52016-04-22 22:17:51 +0200175 #self.prom_metrics[metric_dict['metric_key']].labels(vnf_name, vnf_interface).set(float('nan'))
176
177 # this removes the complete metric, all labels...
178 # 1 single monitor job for all metrics of the SDN controller
179 # we can only remove from the pushgateway grouping keys(labels) which we have defined for the add_to_pushgateway
180 # we can not specify labels from the metrics to be removed
181 # if we need to remove the metrics seperatelty, we need to give them a separate grouping key, and probably a diffferent registry also
182 delete_from_gateway(self.pushgateway, job='sonemu-SDNcontroller')
183
184 self.monitor_lock.release()
stevenvanrossembbdb5ee2016-04-15 15:18:44 +0200185
stevenvanrossemb098cb52016-04-15 13:28:23 +0200186 logging.info('Stopped monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric))
187 return 'Stopped monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric)
stevenvanrossem9315da42016-04-11 12:10:06 +0200188
stevenvanrossemb098cb52016-04-15 13:28:23 +0200189
190 # get all metrics defined in the list and export it to Prometheus
stevenvanrossema24b4372016-04-14 09:55:20 +0200191 def get_network_metrics(self):
stevenvanrossemb098cb52016-04-15 13:28:23 +0200192 while self.start_monitoring:
stevenvanrossem300e1e52016-04-22 22:17:51 +0200193
194 self.monitor_lock.acquire()
195
stevenvanrossema24b4372016-04-14 09:55:20 +0200196 # group metrics by dpid to optimize the rest api calls
197 dpid_list = [metric_dict['switch_dpid'] for metric_dict in self.network_metrics]
198 dpid_set = set(dpid_list)
199
200 for dpid in dpid_set:
201
202 # query Ryu
203 ret = self.REST_cmd('stats/port', dpid)
204 port_stat_dict = ast.literal_eval(ret)
205
206 metric_list = [metric_dict for metric_dict in self.network_metrics
207 if int(metric_dict['switch_dpid'])==int(dpid)]
208 #logging.info('1set prom packets:{0} '.format(self.network_metrics))
209 for metric_dict in metric_list:
210 self.set_network_metric(metric_dict, port_stat_dict)
211
stevenvanrossem300e1e52016-04-22 22:17:51 +0200212 self.monitor_lock.release()
stevenvanrossema24b4372016-04-14 09:55:20 +0200213 time.sleep(1)
214
stevenvanrossemb098cb52016-04-15 13:28:23 +0200215 # add metric to the list to export to Prometheus, parse the Ryu port-stats reply
stevenvanrossema24b4372016-04-14 09:55:20 +0200216 def set_network_metric(self, metric_dict, port_stat_dict):
stevenvanrossemb098cb52016-04-15 13:28:23 +0200217 # vnf tx is the datacenter switch rx and vice-versa
218 metric_key = self.switch_tx_rx(metric_dict['metric_key'])
stevenvanrossema24b4372016-04-14 09:55:20 +0200219 switch_dpid = metric_dict['switch_dpid']
220 vnf_name = metric_dict['vnf_name']
221 vnf_interface = metric_dict['vnf_interface']
222 previous_measurement = metric_dict['previous_measurement']
223 previous_monitor_time = metric_dict['previous_monitor_time']
224 mon_port = metric_dict['mon_port']
225
226 for port_stat in port_stat_dict[str(switch_dpid)]:
227 if int(port_stat['port_no']) == int(mon_port):
228 port_uptime = port_stat['duration_sec'] + port_stat['duration_nsec'] * 10 ** (-9)
229 this_measurement = int(port_stat[metric_key])
230 #logging.info('set prom packets:{0} {1}:{2}'.format(this_measurement, vnf_name, vnf_interface))
231
232 # set prometheus metric
stevenvanrossem300e1e52016-04-22 22:17:51 +0200233 self.prom_metrics[metric_dict['metric_key']].\
234 labels({'vnf_name':vnf_name, 'vnf_interface':vnf_interface}).\
235 set(this_measurement)
236 #push_to_gateway(self.pushgateway, job='SDNcontroller',
237 # grouping_key={'metric':metric_dict['metric_key']}, registry=self.registry)
238
239 # 1 single monitor job for all metrics of the SDN controller
240 pushadd_to_gateway(self.pushgateway, job='sonemu-SDNcontroller', registry=self.registry)
stevenvanrossema24b4372016-04-14 09:55:20 +0200241
242 if previous_monitor_time <= 0 or previous_monitor_time >= port_uptime:
243 metric_dict['previous_measurement'] = int(port_stat[metric_key])
244 metric_dict['previous_monitor_time'] = port_uptime
245 # do first measurement
246 #logging.info('first measurement')
247 time.sleep(1)
stevenvanrossem300e1e52016-04-22 22:17:51 +0200248 self.monitor_lock.release()
stevenvanrossem61fd5282016-04-29 12:41:54 +0200249
stevenvanrossem300e1e52016-04-22 22:17:51 +0200250 metric_rate = self.get_network_metrics()
251 return metric_rate
stevenvanrossem61fd5282016-04-29 12:41:54 +0200252
stevenvanrossema24b4372016-04-14 09:55:20 +0200253 else:
254 time_delta = (port_uptime - metric_dict['previous_monitor_time'])
stevenvanrossem300e1e52016-04-22 22:17:51 +0200255 metric_rate = (this_measurement - metric_dict['previous_measurement']) / float(time_delta)
stevenvanrossem61fd5282016-04-29 12:41:54 +0200256 logging.info('metric: {0} rate:{1}'.format(metric_dict['metric_key'], metric_rate))
stevenvanrossema24b4372016-04-14 09:55:20 +0200257
258 metric_dict['previous_measurement'] = this_measurement
259 metric_dict['previous_monitor_time'] = port_uptime
stevenvanrossem300e1e52016-04-22 22:17:51 +0200260 return metric_rate
stevenvanrossema24b4372016-04-14 09:55:20 +0200261
262 logging.exception('metric {0} not found on {1}:{2}'.format(metric_key, vnf_name, vnf_interface))
263 return 'metric {0} not found on {1}:{2}'.format(metric_key, vnf_name, vnf_interface)
264
265
stevenvanrossemc5a536a2016-02-16 14:52:39 +0100266 def REST_cmd(self, prefix, dpid):
267 url = self.REST_api + '/' + str(prefix) + '/' + str(dpid)
268 req = urllib2.Request(url)
269 ret = urllib2.urlopen(req).read()
stevenvanrossemc6abf132016-04-14 11:15:58 +0200270 return ret
271
272 def start_Prometheus(self, port=9090):
stevenvanrossemb098cb52016-04-15 13:28:23 +0200273 # prometheus.yml configuration file is located in the same directory as this file
stevenvanrossemc6abf132016-04-14 11:15:58 +0200274 cmd = ["docker",
275 "run",
276 "--rm",
277 "-p", "{0}:9090".format(port),
stevenvanrossemb098cb52016-04-15 13:28:23 +0200278 "-v", "{0}/prometheus.yml:/etc/prometheus/prometheus.yml".format(os.path.dirname(os.path.abspath(__file__))),
stevenvanrossem61fd5282016-04-29 12:41:54 +0200279 "-v", "{0}/profile.rules:/etc/prometheus/profile.rules".format(os.path.dirname(os.path.abspath(__file__))),
stevenvanrossemc6abf132016-04-14 11:15:58 +0200280 "--name", "prometheus",
281 "prom/prometheus"
282 ]
stevenvanrossemb098cb52016-04-15 13:28:23 +0200283 logging.info('Start Prometheus container {0}'.format(cmd))
284 return Popen(cmd)
stevenvanrossemc6abf132016-04-14 11:15:58 +0200285
stevenvanrossemadfd06f2016-04-22 10:39:08 +0200286 def start_PushGateway(self, port=9091):
287 cmd = ["docker",
288 "run",
289 "-d",
290 "-p", "{0}:9091".format(port),
291 "--name", "pushgateway",
292 "prom/pushgateway"
293 ]
294
295 logging.info('Start Prometheus Push Gateway container {0}'.format(cmd))
296 return Popen(cmd)
297
stevenvanrossemb098cb52016-04-15 13:28:23 +0200298 def start_cadvisor(self, port=8090):
stevenvanrossemc6abf132016-04-14 11:15:58 +0200299 cmd = ["docker",
300 "run",
301 "--rm",
302 "--volume=/:/rootfs:ro",
303 "--volume=/var/run:/var/run:rw",
304 "--volume=/sys:/sys:ro",
305 "--volume=/var/lib/docker/:/var/lib/docker:ro",
306 "--publish={0}:8080".format(port),
307 "--name=cadvisor",
308 "google/cadvisor:latest"
309 ]
stevenvanrossemb098cb52016-04-15 13:28:23 +0200310 logging.info('Start cAdvisor container {0}'.format(cmd))
311 return Popen(cmd)
stevenvanrossemc6abf132016-04-14 11:15:58 +0200312
313 def stop(self):
stevenvanrossemb098cb52016-04-15 13:28:23 +0200314 # stop the monitoring thread
315 self.start_monitoring = False
316 self.monitor_thread.join()
317
stevenvanrossemc6abf132016-04-14 11:15:58 +0200318 if self.prometheus_process is not None:
stevenvanrossemb098cb52016-04-15 13:28:23 +0200319 logging.info('stopping prometheus container')
stevenvanrossemc6abf132016-04-14 11:15:58 +0200320 self.prometheus_process.terminate()
321 self.prometheus_process.kill()
stevenvanrossemb098cb52016-04-15 13:28:23 +0200322 self._stop_container('prometheus')
stevenvanrossemc6abf132016-04-14 11:15:58 +0200323
stevenvanrossemadfd06f2016-04-22 10:39:08 +0200324 if self.pushgateway_process is not None:
325 logging.info('stopping pushgateway container')
326 self.pushgateway_process.terminate()
327 self.pushgateway_process.kill()
328 self._stop_container('pushgateway')
329
stevenvanrossemb098cb52016-04-15 13:28:23 +0200330 if self.cadvisor_process is not None:
331 logging.info('stopping cadvisor container')
332 self.cadvisor_process.terminate()
333 self.cadvisor_process.kill()
334 self._stop_container('cadvisor')
335
336 def switch_tx_rx(self,metric=''):
337 # when monitoring vnfs, the tx of the datacenter switch is actually the rx of the vnf
338 # so we need to change the metric name to be consistent with the vnf rx or tx
339 if 'tx' in metric:
340 metric = metric.replace('tx','rx')
341 elif 'rx' in metric:
342 metric = metric.replace('rx','tx')
343
344 return metric
345
346 def _stop_container(self, name):
347 cmd = ["docker",
348 "stop",
349 name]
350 Popen(cmd).wait()
351
352 cmd = ["docker",
353 "rm",
354 name]
355 Popen(cmd).wait()
356
357