blob: ce24a4000867508f92e0cce7c7b326b3c325218a [file] [log] [blame]
peusterm79ef6ae2016-07-08 13:53:57 +02001"""
2Copyright (c) 2015 SONATA-NFV
3ALL RIGHTS RESERVED.
4
5Licensed under the Apache License, Version 2.0 (the "License");
6you may not use this file except in compliance with the License.
7You may obtain a copy of the License at
8
9 http://www.apache.org/licenses/LICENSE-2.0
10
11Unless required by applicable law or agreed to in writing, software
12distributed under the License is distributed on an "AS IS" BASIS,
13WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14See the License for the specific language governing permissions and
15limitations under the License.
16
17Neither the name of the SONATA-NFV [, ANY ADDITIONAL AFFILIATION]
18nor the names of its contributors may be used to endorse or promote
19products derived from this software without specific prior written
20permission.
21
22This work has been performed in the framework of the SONATA project,
23funded by the European Commission under Grant number 671517 through
24the Horizon 2020 and 5G-PPP programmes. The authors would like to
25acknowledge the contributions of their colleagues of the SONATA
26partner consortium (www.sonata-nfv.eu).
27"""
stevenvanrossemc5a536a2016-02-16 14:52:39 +010028
stevenvanrossemc5a536a2016-02-16 14:52:39 +010029import logging
stevenvanrossem51d4ae72016-08-10 13:22:53 +020030import sys
stevenvanrossemc5a536a2016-02-16 14:52:39 +010031from mininet.node import OVSSwitch
32import ast
stevenvanrossem9315da42016-04-11 12:10:06 +020033import time
stevenvanrossem300e1e52016-04-22 22:17:51 +020034from prometheus_client import start_http_server, Summary, Histogram, Gauge, Counter, REGISTRY, CollectorRegistry, \
35 pushadd_to_gateway, push_to_gateway, delete_from_gateway
stevenvanrossema24b4372016-04-14 09:55:20 +020036import threading
stevenvanrossemce032e12017-04-05 17:31:20 +020037from subprocess import Popen, check_call
stevenvanrossemb098cb52016-04-15 13:28:23 +020038import os
stevenvanrossembeba14d2017-01-28 15:40:30 +010039import docker
stevenvanrossemfb8c8392017-01-28 17:29:11 +010040import json
stevenvanrossem461941c2016-05-10 11:41:29 +020041
stevenvanrossemc5a536a2016-02-16 14:52:39 +010042logging.basicConfig(level=logging.INFO)
43
44"""
stevenvanrossema24b4372016-04-14 09:55:20 +020045class to read openflow stats from the Ryu controller of the DCNetwork
stevenvanrossemc5a536a2016-02-16 14:52:39 +010046"""
47
stevenvanrossema5aeb372016-08-18 17:32:24 +020048PUSHGATEWAY_PORT = 9091
stevenvanrosseme50b0a72016-08-18 17:42:50 +020049# we cannot use port 8080 because ryu-ofrest api is already using that one
50CADVISOR_PORT = 8081
stevenvanrossema5aeb372016-08-18 17:32:24 +020051
stevenvanrossemb7715d32016-08-26 16:22:21 +020052COOKIE_MASK = 0xffffffff
53
stevenvanrossemc5a536a2016-02-16 14:52:39 +010054class DCNetworkMonitor():
55 def __init__(self, net):
56 self.net = net
stevenvanrossembeba14d2017-01-28 15:40:30 +010057 self.dockercli = docker.from_env()
stevenvanrossem27b6d952016-05-10 16:37:57 +020058
stevenvanrossema5aeb372016-08-18 17:32:24 +020059 # pushgateway address
60 self.pushgateway = 'localhost:{0}'.format(PUSHGATEWAY_PORT)
stevenvanrosseme131bf52016-07-14 11:42:09 +020061
stevenvanrossema24b4372016-04-14 09:55:20 +020062 # supported Prometheus metrics
stevenvanrossem300e1e52016-04-22 22:17:51 +020063 self.registry = CollectorRegistry()
stevenvanrossema24b4372016-04-14 09:55:20 +020064 self.prom_tx_packet_count = Gauge('sonemu_tx_count_packets', 'Total number of packets sent',
stevenvanrossem461941c2016-05-10 11:41:29 +020065 ['vnf_name', 'vnf_interface', 'flow_id'], registry=self.registry)
stevenvanrossema24b4372016-04-14 09:55:20 +020066 self.prom_rx_packet_count = Gauge('sonemu_rx_count_packets', 'Total number of packets received',
stevenvanrossem461941c2016-05-10 11:41:29 +020067 ['vnf_name', 'vnf_interface', 'flow_id'], registry=self.registry)
stevenvanrossema24b4372016-04-14 09:55:20 +020068 self.prom_tx_byte_count = Gauge('sonemu_tx_count_bytes', 'Total number of bytes sent',
stevenvanrossem461941c2016-05-10 11:41:29 +020069 ['vnf_name', 'vnf_interface', 'flow_id'], registry=self.registry)
stevenvanrossema24b4372016-04-14 09:55:20 +020070 self.prom_rx_byte_count = Gauge('sonemu_rx_count_bytes', 'Total number of bytes received',
stevenvanrossem461941c2016-05-10 11:41:29 +020071 ['vnf_name', 'vnf_interface', 'flow_id'], registry=self.registry)
stevenvanrossema24b4372016-04-14 09:55:20 +020072
73 self.prom_metrics={'tx_packets':self.prom_tx_packet_count, 'rx_packets':self.prom_rx_packet_count,
74 'tx_bytes':self.prom_tx_byte_count,'rx_bytes':self.prom_rx_byte_count}
75
76 # list of installed metrics to monitor
77 # each entry can contain this data
78 '''
79 {
80 switch_dpid = 0
81 vnf_name = None
82 vnf_interface = None
83 previous_measurement = 0
84 previous_monitor_time = 0
85 metric_key = None
86 mon_port = None
87 }
88 '''
stevenvanrossem300e1e52016-04-22 22:17:51 +020089 self.monitor_lock = threading.Lock()
stevenvanrossem461941c2016-05-10 11:41:29 +020090 self.monitor_flow_lock = threading.Lock()
stevenvanrossemc6abf132016-04-14 11:15:58 +020091 self.network_metrics = []
stevenvanrossem461941c2016-05-10 11:41:29 +020092 self.flow_metrics = []
stevenvanrossemfb8c8392017-01-28 17:29:11 +010093 self.skewmon_metrics = {}
stevenvanrossema24b4372016-04-14 09:55:20 +020094
95 # start monitoring thread
stevenvanrossemb098cb52016-04-15 13:28:23 +020096 self.start_monitoring = True
stevenvanrossema24b4372016-04-14 09:55:20 +020097 self.monitor_thread = threading.Thread(target=self.get_network_metrics)
98 self.monitor_thread.start()
99
stevenvanrossem461941c2016-05-10 11:41:29 +0200100 self.monitor_flow_thread = threading.Thread(target=self.get_flow_metrics)
101 self.monitor_flow_thread.start()
102
stevenvanrossemc6abf132016-04-14 11:15:58 +0200103 # helper tools
stevenvanrossema5aeb372016-08-18 17:32:24 +0200104 # cAdvisor, Prometheus pushgateway are started as external container, to gather monitoring metric in son-emu
stevenvanrosseme50b0a72016-08-18 17:42:50 +0200105 self.pushgateway_process = self.start_PushGateway()
106 self.cadvisor_process = self.start_cAdvisor()
stevenvanrossem89706802016-07-19 02:54:45 +0200107
stevenvanrossema24b4372016-04-14 09:55:20 +0200108
stevenvanrossemed711fd2016-04-11 16:59:29 +0200109 # first set some parameters, before measurement can start
stevenvanrossem461941c2016-05-10 11:41:29 +0200110 def setup_flow(self, vnf_name, vnf_interface=None, metric='tx_packets', cookie=0):
111
112 flow_metric = {}
113
114 # check if port is specified (vnf:port)
115 if vnf_interface is None:
116 # take first interface by default
117 connected_sw = self.net.DCNetwork_graph.neighbors(vnf_name)[0]
118 link_dict = self.net.DCNetwork_graph[vnf_name][connected_sw]
119 vnf_interface = link_dict[0]['src_port_id']
120
121 flow_metric['vnf_name'] = vnf_name
122 flow_metric['vnf_interface'] = vnf_interface
123
124 vnf_switch = None
125 for connected_sw in self.net.DCNetwork_graph.neighbors(vnf_name):
126 link_dict = self.net.DCNetwork_graph[vnf_name][connected_sw]
127 for link in link_dict:
stevenvanrossem461941c2016-05-10 11:41:29 +0200128 if link_dict[link]['src_port_id'] == vnf_interface:
129 # found the right link and connected switch
stevenvanrossem461941c2016-05-10 11:41:29 +0200130 vnf_switch = connected_sw
131 flow_metric['mon_port'] = link_dict[link]['dst_port_nr']
132 break
133
134 if not vnf_switch:
135 logging.exception("vnf switch of {0}:{1} not found!".format(vnf_name, vnf_interface))
136 return "vnf switch of {0}:{1} not found!".format(vnf_name, vnf_interface)
137
138 try:
139 # default port direction to monitor
140 if metric is None:
141 metric = 'tx_packets'
142
143 next_node = self.net.getNodeByName(vnf_switch)
144
145 if not isinstance(next_node, OVSSwitch):
146 logging.info("vnf: {0} is not connected to switch".format(vnf_name))
147 return
148
149 flow_metric['previous_measurement'] = 0
150 flow_metric['previous_monitor_time'] = 0
151
152 flow_metric['switch_dpid'] = int(str(next_node.dpid), 16)
153 flow_metric['metric_key'] = metric
154 flow_metric['cookie'] = cookie
155
156 self.monitor_flow_lock.acquire()
157 self.flow_metrics.append(flow_metric)
158 self.monitor_flow_lock.release()
159
160 logging.info('Started monitoring flow:{3} {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric, cookie))
161 return 'Started monitoring flow:{3} {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric, cookie)
162
163 except Exception as ex:
164 logging.exception("setup_metric error.")
165 return ex.message
166
stevenvanrossem9c8a4122016-07-16 03:23:13 +0200167 def stop_flow(self, vnf_name, vnf_interface=None, metric=None, cookie=0,):
168
169 # check if port is specified (vnf:port)
170 if vnf_interface is None and metric is not None:
171 # take first interface by default
172 connected_sw = self.net.DCNetwork_graph.neighbors(vnf_name)[0]
173 link_dict = self.net.DCNetwork_graph[vnf_name][connected_sw]
174 vnf_interface = link_dict[0]['src_port_id']
175
stevenvanrossem1ef77022016-05-12 16:36:10 +0200176 for flow_dict in self.flow_metrics:
177 if flow_dict['vnf_name'] == vnf_name and flow_dict['vnf_interface'] == vnf_interface \
178 and flow_dict['metric_key'] == metric and flow_dict['cookie'] == cookie:
179
180 self.monitor_flow_lock.acquire()
181
182 self.flow_metrics.remove(flow_dict)
183
stevenvanrossembeba14d2017-01-28 15:40:30 +0100184 # set metric to NaN
185 self.prom_metrics[flow_dict['metric_key']]. \
186 labels(vnf_name=vnf_name, vnf_interface=vnf_interface, flow_id=cookie). \
187 set(float('nan'))
stevenvanrossem1ef77022016-05-12 16:36:10 +0200188
189 delete_from_gateway(self.pushgateway, job='sonemu-SDNcontroller')
190
191 self.monitor_flow_lock.release()
192
193 logging.info('Stopped monitoring flow {3}: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric, cookie))
194 return 'Stopped monitoring flow {3}: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric, cookie)
195
stevenvanrossem9c8a4122016-07-16 03:23:13 +0200196 return 'Error stopping monitoring flow: {0} on {1}:{2}'.format(metric, vnf_name, vnf_interface)
197
stevenvanrossem461941c2016-05-10 11:41:29 +0200198
199 # first set some parameters, before measurement can start
stevenvanrossema24b4372016-04-14 09:55:20 +0200200 def setup_metric(self, vnf_name, vnf_interface=None, metric='tx_packets'):
201
202 network_metric = {}
203
stevenvanrossem9315da42016-04-11 12:10:06 +0200204 # check if port is specified (vnf:port)
stevenvanrossemed711fd2016-04-11 16:59:29 +0200205 if vnf_interface is None:
stevenvanrossem9315da42016-04-11 12:10:06 +0200206 # take first interface by default
207 connected_sw = self.net.DCNetwork_graph.neighbors(vnf_name)[0]
208 link_dict = self.net.DCNetwork_graph[vnf_name][connected_sw]
209 vnf_interface = link_dict[0]['src_port_id']
stevenvanrossem9315da42016-04-11 12:10:06 +0200210
stevenvanrossema24b4372016-04-14 09:55:20 +0200211 network_metric['vnf_name'] = vnf_name
212 network_metric['vnf_interface'] = vnf_interface
stevenvanrossema24b4372016-04-14 09:55:20 +0200213
stevenvanrossem9315da42016-04-11 12:10:06 +0200214 for connected_sw in self.net.DCNetwork_graph.neighbors(vnf_name):
215 link_dict = self.net.DCNetwork_graph[vnf_name][connected_sw]
216 for link in link_dict:
stevenvanrossem9315da42016-04-11 12:10:06 +0200217 if link_dict[link]['src_port_id'] == vnf_interface:
218 # found the right link and connected switch
stevenvanrossem307aa1f2016-05-06 10:35:15 +0200219 network_metric['mon_port'] = link_dict[link]['dst_port_nr']
stevenvanrossem9315da42016-04-11 12:10:06 +0200220 break
221
stevenvanrossema24b4372016-04-14 09:55:20 +0200222 if 'mon_port' not in network_metric:
223 logging.exception("vnf interface {0}:{1} not found!".format(vnf_name,vnf_interface))
224 return "vnf interface {0}:{1} not found!".format(vnf_name,vnf_interface)
225
stevenvanrossem9315da42016-04-11 12:10:06 +0200226 try:
227 # default port direction to monitor
stevenvanrossemed711fd2016-04-11 16:59:29 +0200228 if metric is None:
stevenvanrossema24b4372016-04-14 09:55:20 +0200229 metric = 'tx_packets'
stevenvanrossem9315da42016-04-11 12:10:06 +0200230
stevenvanrossemc5a536a2016-02-16 14:52:39 +0100231 vnf_switch = self.net.DCNetwork_graph.neighbors(str(vnf_name))
232
233 if len(vnf_switch) > 1:
234 logging.info("vnf: {0} has multiple ports".format(vnf_name))
235 return
236 elif len(vnf_switch) == 0:
237 logging.info("vnf: {0} is not connected".format(vnf_name))
238 return
239 else:
240 vnf_switch = vnf_switch[0]
241 next_node = self.net.getNodeByName(vnf_switch)
242
stevenvanrossemed711fd2016-04-11 16:59:29 +0200243 if not isinstance(next_node, OVSSwitch):
stevenvanrossemc5a536a2016-02-16 14:52:39 +0100244 logging.info("vnf: {0} is not connected to switch".format(vnf_name))
245 return
246
stevenvanrossema24b4372016-04-14 09:55:20 +0200247 network_metric['previous_measurement'] = 0
248 network_metric['previous_monitor_time'] = 0
stevenvanrossemb098cb52016-04-15 13:28:23 +0200249
stevenvanrossem9315da42016-04-11 12:10:06 +0200250
stevenvanrossema24b4372016-04-14 09:55:20 +0200251 network_metric['switch_dpid'] = int(str(next_node.dpid), 16)
252 network_metric['metric_key'] = metric
stevenvanrossemb098cb52016-04-15 13:28:23 +0200253
stevenvanrossem300e1e52016-04-22 22:17:51 +0200254 self.monitor_lock.acquire()
stevenvanrossema24b4372016-04-14 09:55:20 +0200255 self.network_metrics.append(network_metric)
stevenvanrossem300e1e52016-04-22 22:17:51 +0200256 self.monitor_lock.release()
257
stevenvanrossema24b4372016-04-14 09:55:20 +0200258
259 logging.info('Started monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric))
260 return 'Started monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric)
stevenvanrossemc5a536a2016-02-16 14:52:39 +0100261
stevenvanrossemed711fd2016-04-11 16:59:29 +0200262 except Exception as ex:
stevenvanrossembbdb5ee2016-04-15 15:18:44 +0200263 logging.exception("setup_metric error.")
stevenvanrossemed711fd2016-04-11 16:59:29 +0200264 return ex.message
stevenvanrossem9315da42016-04-11 12:10:06 +0200265
stevenvanrossem461941c2016-05-10 11:41:29 +0200266 def stop_metric(self, vnf_name, vnf_interface=None, metric=None):
stevenvanrossem300e1e52016-04-22 22:17:51 +0200267
stevenvanrossem9c8a4122016-07-16 03:23:13 +0200268 # check if port is specified (vnf:port)
269 if vnf_interface is None and metric is not None:
270 # take first interface by default
271 connected_sw = self.net.DCNetwork_graph.neighbors(vnf_name)[0]
272 link_dict = self.net.DCNetwork_graph[vnf_name][connected_sw]
273 vnf_interface = link_dict[0]['src_port_id']
274
stevenvanrossemb098cb52016-04-15 13:28:23 +0200275 for metric_dict in self.network_metrics:
276 if metric_dict['vnf_name'] == vnf_name and metric_dict['vnf_interface'] == vnf_interface \
stevenvanrossembbdb5ee2016-04-15 15:18:44 +0200277 and metric_dict['metric_key'] == metric:
278
stevenvanrossem300e1e52016-04-22 22:17:51 +0200279 self.monitor_lock.acquire()
280
stevenvanrossemb098cb52016-04-15 13:28:23 +0200281 self.network_metrics.remove(metric_dict)
stevenvanrossembbdb5ee2016-04-15 15:18:44 +0200282
stevenvanrossembbdb5ee2016-04-15 15:18:44 +0200283 # set values to NaN, prometheus api currently does not support removal of metrics
stevenvanrossem300e1e52016-04-22 22:17:51 +0200284 #self.prom_metrics[metric_dict['metric_key']].labels(vnf_name, vnf_interface).set(float('nan'))
stevenvanrossembeba14d2017-01-28 15:40:30 +0100285 self.prom_metrics[metric_dict['metric_key']]. \
286 labels(vnf_name=vnf_name, vnf_interface=vnf_interface, flow_id=None). \
287 set(float('nan'))
stevenvanrossem300e1e52016-04-22 22:17:51 +0200288
289 # this removes the complete metric, all labels...
290 # 1 single monitor job for all metrics of the SDN controller
291 # we can only remove from the pushgateway grouping keys(labels) which we have defined for the add_to_pushgateway
292 # we can not specify labels from the metrics to be removed
293 # if we need to remove the metrics seperatelty, we need to give them a separate grouping key, and probably a diffferent registry also
294 delete_from_gateway(self.pushgateway, job='sonemu-SDNcontroller')
295
296 self.monitor_lock.release()
stevenvanrossembbdb5ee2016-04-15 15:18:44 +0200297
stevenvanrossemb098cb52016-04-15 13:28:23 +0200298 logging.info('Stopped monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric))
299 return 'Stopped monitoring: {2} on {0}:{1}'.format(vnf_name, vnf_interface, metric)
stevenvanrossem9315da42016-04-11 12:10:06 +0200300
stevenvanrossem461941c2016-05-10 11:41:29 +0200301 # delete everything from this vnf
302 elif metric_dict['vnf_name'] == vnf_name and vnf_interface is None and metric is None:
303 self.monitor_lock.acquire()
304 self.network_metrics.remove(metric_dict)
305 for collector in self.registry._collectors:
306 collector_dict = collector._metrics.copy()
307 for name, interface, id in collector_dict:
308 if name == vnf_name:
309 logging.info('3 name:{0} labels:{1} metrics:{2}'.format(collector._name, collector._labelnames,
310 collector._metrics))
311 collector.remove(name, interface, 'None')
312
313 delete_from_gateway(self.pushgateway, job='sonemu-SDNcontroller')
314 self.monitor_lock.release()
315 logging.info('Stopped monitoring vnf: {0}'.format(vnf_name))
316 return 'Stopped monitoring: {0}'.format(vnf_name)
317
stevenvanrossem9c8a4122016-07-16 03:23:13 +0200318 return 'Error stopping monitoring metric: {0} on {1}:{2}'.format(metric, vnf_name, vnf_interface)
stevenvanrossemb098cb52016-04-15 13:28:23 +0200319
stevenvanrossem9c8a4122016-07-16 03:23:13 +0200320
stevenvanrossem9c8a4122016-07-16 03:23:13 +0200321# get all metrics defined in the list and export it to Prometheus
stevenvanrossem461941c2016-05-10 11:41:29 +0200322 def get_flow_metrics(self):
323 while self.start_monitoring:
324
325 self.monitor_flow_lock.acquire()
326
327 for flow_dict in self.flow_metrics:
328 data = {}
329
330 data['cookie'] = flow_dict['cookie']
stevenvanrossemb7715d32016-08-26 16:22:21 +0200331 data['cookie_mask'] = COOKIE_MASK
stevenvanrossem461941c2016-05-10 11:41:29 +0200332
333 if 'tx' in flow_dict['metric_key']:
334 data['match'] = {'in_port':flow_dict['mon_port']}
335 elif 'rx' in flow_dict['metric_key']:
336 data['out_port'] = flow_dict['mon_port']
337
338
339 # query Ryu
stevenvanrossem27b6d952016-05-10 16:37:57 +0200340 ret = self.net.ryu_REST('stats/flow', dpid=flow_dict['switch_dpid'], data=data)
stevenvanrossem51d4ae72016-08-10 13:22:53 +0200341 if isinstance(ret, dict):
342 flow_stat_dict = ret
343 elif isinstance(ret, basestring):
344 flow_stat_dict = ast.literal_eval(ret.rstrip())
345 else:
346 flow_stat_dict = None
347
stevenvanrosseme131bf52016-07-14 11:42:09 +0200348 logging.debug('received flow stat:{0} '.format(flow_stat_dict))
stevenvanrossem3fc13932016-08-09 23:39:16 +0200349
stevenvanrossem461941c2016-05-10 11:41:29 +0200350 self.set_flow_metric(flow_dict, flow_stat_dict)
351
stevenvanrossembeba14d2017-01-28 15:40:30 +0100352
353 try:
354 if len(self.flow_metrics) > 0:
355 pushadd_to_gateway(self.pushgateway, job='sonemu-SDNcontroller', registry=self.registry)
356 except Exception, e:
357 logging.warning("Pushgateway not reachable: {0} {1}".format(Exception, e))
358
stevenvanrossem461941c2016-05-10 11:41:29 +0200359 self.monitor_flow_lock.release()
360 time.sleep(1)
361
stevenvanrossema24b4372016-04-14 09:55:20 +0200362 def get_network_metrics(self):
stevenvanrossemb098cb52016-04-15 13:28:23 +0200363 while self.start_monitoring:
stevenvanrossem300e1e52016-04-22 22:17:51 +0200364
365 self.monitor_lock.acquire()
366
stevenvanrossema24b4372016-04-14 09:55:20 +0200367 # group metrics by dpid to optimize the rest api calls
368 dpid_list = [metric_dict['switch_dpid'] for metric_dict in self.network_metrics]
369 dpid_set = set(dpid_list)
370
371 for dpid in dpid_set:
372
373 # query Ryu
stevenvanrossem27b6d952016-05-10 16:37:57 +0200374 ret = self.net.ryu_REST('stats/port', dpid=dpid)
stevenvanrossemb7715d32016-08-26 16:22:21 +0200375 if isinstance(ret, dict):
376 port_stat_dict = ret
377 elif isinstance(ret, basestring):
378 port_stat_dict = ast.literal_eval(ret.rstrip())
379 else:
380 port_stat_dict = None
stevenvanrossema24b4372016-04-14 09:55:20 +0200381
382 metric_list = [metric_dict for metric_dict in self.network_metrics
383 if int(metric_dict['switch_dpid'])==int(dpid)]
stevenvanrosseme131bf52016-07-14 11:42:09 +0200384
stevenvanrossema24b4372016-04-14 09:55:20 +0200385 for metric_dict in metric_list:
386 self.set_network_metric(metric_dict, port_stat_dict)
387
stevenvanrossembeba14d2017-01-28 15:40:30 +0100388 try:
389 if len(self.network_metrics) > 0:
390 pushadd_to_gateway(self.pushgateway, job='sonemu-SDNcontroller', registry=self.registry)
391 except Exception, e:
392 logging.warning("Pushgateway not reachable: {0} {1}".format(Exception, e))
393
stevenvanrossem300e1e52016-04-22 22:17:51 +0200394 self.monitor_lock.release()
stevenvanrossema24b4372016-04-14 09:55:20 +0200395 time.sleep(1)
396
stevenvanrossemb098cb52016-04-15 13:28:23 +0200397 # add metric to the list to export to Prometheus, parse the Ryu port-stats reply
stevenvanrossema24b4372016-04-14 09:55:20 +0200398 def set_network_metric(self, metric_dict, port_stat_dict):
stevenvanrossemb098cb52016-04-15 13:28:23 +0200399 # vnf tx is the datacenter switch rx and vice-versa
400 metric_key = self.switch_tx_rx(metric_dict['metric_key'])
stevenvanrossema24b4372016-04-14 09:55:20 +0200401 switch_dpid = metric_dict['switch_dpid']
402 vnf_name = metric_dict['vnf_name']
403 vnf_interface = metric_dict['vnf_interface']
404 previous_measurement = metric_dict['previous_measurement']
405 previous_monitor_time = metric_dict['previous_monitor_time']
406 mon_port = metric_dict['mon_port']
407
408 for port_stat in port_stat_dict[str(switch_dpid)]:
409 if int(port_stat['port_no']) == int(mon_port):
410 port_uptime = port_stat['duration_sec'] + port_stat['duration_nsec'] * 10 ** (-9)
411 this_measurement = int(port_stat[metric_key])
stevenvanrossema24b4372016-04-14 09:55:20 +0200412
413 # set prometheus metric
stevenvanrossem300e1e52016-04-22 22:17:51 +0200414 self.prom_metrics[metric_dict['metric_key']].\
stevenvanrossembeba14d2017-01-28 15:40:30 +0100415 labels(vnf_name=vnf_name, vnf_interface=vnf_interface, flow_id=None).\
stevenvanrossem300e1e52016-04-22 22:17:51 +0200416 set(this_measurement)
stevenvanrossem300e1e52016-04-22 22:17:51 +0200417
stevenvanrosseme131bf52016-07-14 11:42:09 +0200418 # also the rate is calculated here, but not used for now
419 # (rate can be easily queried from prometheus also)
stevenvanrossema24b4372016-04-14 09:55:20 +0200420 if previous_monitor_time <= 0 or previous_monitor_time >= port_uptime:
421 metric_dict['previous_measurement'] = int(port_stat[metric_key])
422 metric_dict['previous_monitor_time'] = port_uptime
423 # do first measurement
stevenvanrossemc721f282016-08-30 10:56:05 +0200424 #time.sleep(1)
425 #self.monitor_lock.release()
426 # rate cannot be calculated yet (need a first measurement)
427 metric_rate = None
stevenvanrossem61fd5282016-04-29 12:41:54 +0200428
stevenvanrossema24b4372016-04-14 09:55:20 +0200429 else:
430 time_delta = (port_uptime - metric_dict['previous_monitor_time'])
stevenvanrossem300e1e52016-04-22 22:17:51 +0200431 metric_rate = (this_measurement - metric_dict['previous_measurement']) / float(time_delta)
stevenvanrossema24b4372016-04-14 09:55:20 +0200432
433 metric_dict['previous_measurement'] = this_measurement
434 metric_dict['previous_monitor_time'] = port_uptime
stevenvanrossemc721f282016-08-30 10:56:05 +0200435 return
stevenvanrossema24b4372016-04-14 09:55:20 +0200436
437 logging.exception('metric {0} not found on {1}:{2}'.format(metric_key, vnf_name, vnf_interface))
stevenvanrossemc721f282016-08-30 10:56:05 +0200438 logging.exception('monport:{0}, dpid:{1}'.format(mon_port, switch_dpid))
439 logging.exception('port dict:{0}'.format(port_stat_dict))
stevenvanrossema24b4372016-04-14 09:55:20 +0200440 return 'metric {0} not found on {1}:{2}'.format(metric_key, vnf_name, vnf_interface)
441
stevenvanrossem461941c2016-05-10 11:41:29 +0200442 def set_flow_metric(self, metric_dict, flow_stat_dict):
443 # vnf tx is the datacenter switch rx and vice-versa
stevenvanrossem461941c2016-05-10 11:41:29 +0200444 metric_key = metric_dict['metric_key']
445 switch_dpid = metric_dict['switch_dpid']
446 vnf_name = metric_dict['vnf_name']
447 vnf_interface = metric_dict['vnf_interface']
448 previous_measurement = metric_dict['previous_measurement']
449 previous_monitor_time = metric_dict['previous_monitor_time']
450 cookie = metric_dict['cookie']
stevenvanrossema24b4372016-04-14 09:55:20 +0200451
stevenvanrossem9c8a4122016-07-16 03:23:13 +0200452 counter = 0
453 for flow_stat in flow_stat_dict[str(switch_dpid)]:
454 if 'bytes' in metric_key:
455 counter += flow_stat['byte_count']
456 elif 'packet' in metric_key:
457 counter += flow_stat['packet_count']
458
stevenvanrossemc721f282016-08-30 10:56:05 +0200459 # flow_uptime disabled for now (can give error)
460 #flow_stat = flow_stat_dict[str(switch_dpid)][0]
461 #flow_uptime = flow_stat['duration_sec'] + flow_stat['duration_nsec'] * 10 ** (-9)
stevenvanrossem461941c2016-05-10 11:41:29 +0200462
463 self.prom_metrics[metric_dict['metric_key']]. \
stevenvanrossembeba14d2017-01-28 15:40:30 +0100464 labels(vnf_name=vnf_name, vnf_interface=vnf_interface, flow_id=cookie). \
stevenvanrossem461941c2016-05-10 11:41:29 +0200465 set(counter)
stevenvanrossemc6abf132016-04-14 11:15:58 +0200466
stevenvanrosseme50b0a72016-08-18 17:42:50 +0200467 def start_Prometheus(self, port=9090):
stevenvanrossemb098cb52016-04-15 13:28:23 +0200468 # prometheus.yml configuration file is located in the same directory as this file
stevenvanrossemc6abf132016-04-14 11:15:58 +0200469 cmd = ["docker",
470 "run",
471 "--rm",
472 "-p", "{0}:9090".format(port),
stevenvanrossemb098cb52016-04-15 13:28:23 +0200473 "-v", "{0}/prometheus.yml:/etc/prometheus/prometheus.yml".format(os.path.dirname(os.path.abspath(__file__))),
stevenvanrossem61fd5282016-04-29 12:41:54 +0200474 "-v", "{0}/profile.rules:/etc/prometheus/profile.rules".format(os.path.dirname(os.path.abspath(__file__))),
stevenvanrossemc6abf132016-04-14 11:15:58 +0200475 "--name", "prometheus",
476 "prom/prometheus"
477 ]
stevenvanrossemb098cb52016-04-15 13:28:23 +0200478 logging.info('Start Prometheus container {0}'.format(cmd))
479 return Popen(cmd)
stevenvanrossemc6abf132016-04-14 11:15:58 +0200480
stevenvanrossema5aeb372016-08-18 17:32:24 +0200481 def start_PushGateway(self, port=PUSHGATEWAY_PORT):
stevenvanrossemadfd06f2016-04-22 10:39:08 +0200482 cmd = ["docker",
483 "run",
484 "-d",
485 "-p", "{0}:9091".format(port),
486 "--name", "pushgateway",
stevenvanrossemc721f282016-08-30 10:56:05 +0200487 "--label", 'com.containernet=""',
stevenvanrossemadfd06f2016-04-22 10:39:08 +0200488 "prom/pushgateway"
489 ]
490
491 logging.info('Start Prometheus Push Gateway container {0}'.format(cmd))
492 return Popen(cmd)
493
stevenvanrosseme50b0a72016-08-18 17:42:50 +0200494 def start_cAdvisor(self, port=CADVISOR_PORT):
stevenvanrossemc6abf132016-04-14 11:15:58 +0200495 cmd = ["docker",
496 "run",
497 "--rm",
498 "--volume=/:/rootfs:ro",
499 "--volume=/var/run:/var/run:rw",
500 "--volume=/sys:/sys:ro",
501 "--volume=/var/lib/docker/:/var/lib/docker:ro",
502 "--publish={0}:8080".format(port),
503 "--name=cadvisor",
stevenvanrossemc721f282016-08-30 10:56:05 +0200504 "--label",'com.containernet=""',
stevenvanrossemc6abf132016-04-14 11:15:58 +0200505 "google/cadvisor:latest"
506 ]
stevenvanrossemb098cb52016-04-15 13:28:23 +0200507 logging.info('Start cAdvisor container {0}'.format(cmd))
508 return Popen(cmd)
stevenvanrossemc6abf132016-04-14 11:15:58 +0200509
510 def stop(self):
stevenvanrossemb098cb52016-04-15 13:28:23 +0200511 # stop the monitoring thread
512 self.start_monitoring = False
513 self.monitor_thread.join()
stevenvanrossem461941c2016-05-10 11:41:29 +0200514 self.monitor_flow_thread.join()
stevenvanrossemb098cb52016-04-15 13:28:23 +0200515
stevenvanrossem9c8a4122016-07-16 03:23:13 +0200516 # these containers are used for monitoring but are started now outside of son-emu
stevenvanrossembeba14d2017-01-28 15:40:30 +0100517
stevenvanrossemadfd06f2016-04-22 10:39:08 +0200518 if self.pushgateway_process is not None:
519 logging.info('stopping pushgateway container')
stevenvanrossemadfd06f2016-04-22 10:39:08 +0200520 self._stop_container('pushgateway')
521
stevenvanrossemb098cb52016-04-15 13:28:23 +0200522 if self.cadvisor_process is not None:
523 logging.info('stopping cadvisor container')
stevenvanrossemb098cb52016-04-15 13:28:23 +0200524 self._stop_container('cadvisor')
525
526 def switch_tx_rx(self,metric=''):
527 # when monitoring vnfs, the tx of the datacenter switch is actually the rx of the vnf
528 # so we need to change the metric name to be consistent with the vnf rx or tx
529 if 'tx' in metric:
530 metric = metric.replace('tx','rx')
531 elif 'rx' in metric:
532 metric = metric.replace('rx','tx')
533
534 return metric
535
536 def _stop_container(self, name):
stevenvanrossemb098cb52016-04-15 13:28:23 +0200537
stevenvanrossemce032e12017-04-05 17:31:20 +0200538 #container = self.dockercli.containers.get(name)
539 #container.stop()
540 #container.remove(force=True)
541
542 # the only robust way to stop these containers is via Popen, it seems
543 time.sleep(1)
544 cmd = ['docker', 'rm', '-f', name]
545 Popen(cmd)
546
stevenvanrossembeba14d2017-01-28 15:40:30 +0100547
stevenvanrossemfb8c8392017-01-28 17:29:11 +0100548 def update_skewmon(self, vnf_name, resource_name, action):
549
550 ret = ''
551
552 config_file_path = '/tmp/skewmon.cfg'
553 configfile = open(config_file_path, 'a+')
554 try:
555 config = json.load(configfile)
556 except:
557 #not a valid json file or empty
558 config = {}
559
560 #initialize config file
561 if len(self.skewmon_metrics) == 0:
562 config = {}
563 json.dump(config, configfile)
564 configfile.close()
565
566 docker_name = 'mn.' + vnf_name
567 vnf_container = self.dockercli.containers.get(docker_name)
568 key = resource_name + '_' + vnf_container.short_id
569 vnf_id = vnf_container.id
570
571 if action == 'start':
572 # add a new vnf to monitor
573 config[key] = dict(VNF_NAME=vnf_name,
574 VNF_ID=vnf_id,
575 VNF_METRIC=resource_name)
576 ret = 'adding to skewness monitor: {0} {1} '.format(vnf_name, resource_name)
577 logging.info(ret)
578 elif action == 'stop':
579 # remove vnf to monitor
580 config.pop(key)
581 ret = 'removing from skewness monitor: {0} {1} '.format(vnf_name, resource_name)
582 logging.info(ret)
583
584 self.skewmon_metrics = config
585 configfile = open(config_file_path, 'w')
586 json.dump(config, configfile)
587 configfile.close()
588
589 try:
590 skewmon_container = self.dockercli.containers.get('skewmon')
591
592 # remove container if config is empty
593 if len(config) == 0:
594 ret += 'stopping skewness monitor'
595 logging.info('stopping skewness monitor')
596 skewmon_container.remove(force=True)
597
598 except docker.errors.NotFound:
599 # start container if not running
600 ret += 'starting skewness monitor'
601 logging.info('starting skewness monitor')
602 volumes = {'/sys/fs/cgroup':{'bind':'/sys/fs/cgroup', 'mode':'ro'},
603 '/tmp/skewmon.cfg':{'bind':'/config.txt', 'mode':'ro'}}
604 self.dockercli.containers.run('skewmon',
605 detach=True,
606 volumes=volumes,
607 labels=['com.containernet'],
608 name='skewmon'
609 )
stevenvanrossema7f601c2017-02-09 13:54:42 +0100610 # Wait a while for containers to be completely started
611 started = False
612 wait_time = 0
613 while not started:
614 list1 = self.dockercli.containers.list(filters={'status': 'running', 'name': 'prometheus'})
615 if len(list1) >= 1:
stevenvanrossem33d76892017-02-13 00:13:37 +0100616 time.sleep(1)
stevenvanrossema7f601c2017-02-09 13:54:42 +0100617 started = True
618 if wait_time > 5:
619 return 'skewmon not started'
620 time.sleep(1)
621 wait_time += 1
stevenvanrossemfb8c8392017-01-28 17:29:11 +0100622 return ret
623
624
625
626
stevenvanrossemb098cb52016-04-15 13:28:23 +0200627