Added logging functionality to resource models.
[osm/vim-emu.git] / src / emuvim / dcemulator / node.py
1 """
2 Distributed Cloud Emulator (dcemulator)
3 (c) 2015 by Manuel Peuster <manuel.peuster@upb.de>
4 """
5 from mininet.node import Docker
6 from mininet.link import Link
7 import logging
8 import time
9 import json
10
11 LOG = logging.getLogger("dcemulator")
12 LOG.setLevel(logging.DEBUG)
13
14
15 DCDPID_BASE = 1000 # start of switch dpid's used for data center switches
16
17
18 class EmulatorCompute(Docker):
19 """
20 Emulator specific compute node class.
21 Inherits from Dockernet's Docker host class.
22 Represents a single container connected to a (logical)
23 data center.
24 We can add emulator specific helper functions to it.
25 """
26
27 def __init__(
28 self, name, dimage, **kwargs):
29 self.datacenter = kwargs.get("datacenter") # pointer to current DC
30 self.flavor_name = kwargs.get("flavor_name")
31 LOG.debug("Starting compute instance %r in data center %r" % (name, str(self.datacenter)))
32 # call original Docker.__init__
33 Docker.__init__(self, name, dimage, **kwargs)
34
35 def getNetworkStatus(self):
36 """
37 Helper method to receive information about the virtual networks
38 this compute instance is connected to.
39 """
40 # format list of tuples (name, Ip, MAC, isUp, status)
41 return [(str(i), i.IP(), i.MAC(), i.isUp(), i.status())
42 for i in self.intfList()]
43
44 def getStatus(self):
45 """
46 Helper method to receive information about this compute instance.
47 """
48 status = {}
49 status["name"] = self.name
50 status["network"] = self.getNetworkStatus()
51 status["image"] = self.dimage
52 status["cpu_quota"] = self.cpu_quota
53 status["cpu_period"] = self.cpu_period
54 status["cpu_shares"] = self.cpu_shares
55 status["cpuset"] = self.cpuset
56 status["mem_limit"] = self.mem_limit
57 status["memswap_limit"] = self.memswap_limit
58 status["state"] = self.dcli.inspect_container(self.dc)["State"]
59 status["id"] = self.dcli.inspect_container(self.dc)["Id"]
60 status["datacenter"] = (None if self.datacenter is None
61 else self.datacenter.label)
62 return status
63
64
65 class Datacenter(object):
66 """
67 Represents a logical data center to which compute resources
68 (Docker containers) can be added at runtime.
69
70 Will also implement resource bookkeeping in later versions.
71 """
72
73 DC_COUNTER = 1
74
75 def __init__(self, label, metadata={}, resource_log_path=None):
76 self.net = None # DCNetwork to which we belong
77 # each node (DC) has a short internal name used by Mininet
78 # this is caused by Mininets naming limitations for swtiches etc.
79 self.name = "dc%d" % Datacenter.DC_COUNTER
80 Datacenter.DC_COUNTER += 1
81 # use this for user defined names that can be longer than self.name
82 self.label = label
83 # dict to store arbitrary metadata (e.g. latitude and longitude)
84 self.metadata = metadata
85 # path to which resource information should be logged (e.g. for experiments). None = no logging
86 self.resource_log_path = resource_log_path
87 # first prototype assumes one "bigswitch" per DC
88 self.switch = None
89 # keep track of running containers
90 self.containers = {}
91 # pointer to assigned resource model
92 self._resource_model = None
93
94 def __repr__(self):
95 return self.label
96
97 def _get_next_dc_dpid(self):
98 global DCDPID_BASE
99 DCDPID_BASE += 1
100 return DCDPID_BASE
101
102 def create(self):
103 """
104 Each data center is represented by a single switch to which
105 compute resources can be connected at run time.
106
107 TODO: This will be changed in the future to support multiple networks
108 per data center
109 """
110 self.switch = self.net.addSwitch(
111 "%s.s1" % self.name, dpid=hex(self._get_next_dc_dpid())[2:])
112 LOG.debug("created data center switch: %s" % str(self.switch))
113
114 def start(self):
115 pass
116
117 def startCompute(self, name, image=None, command=None, network=None, flavor_name="tiny"):
118 """
119 Create a new container as compute resource and connect it to this
120 data center.
121 :param name: name (string)
122 :param image: image name (string)
123 :param command: command (string)
124 :param network: networks list({"ip": "10.0.0.254/8"}, {"ip": "11.0.0.254/24"})
125 :param flavor_name: name of the flavor for this compute container
126 :return:
127 """
128 assert name is not None
129 # no duplications
130 if name in [c.name for c in self.net.getAllContainers()]:
131 raise Exception("Container with name %s already exists." % name)
132 # set default parameter
133 if image is None:
134 image = "ubuntu"
135 if network is None:
136 network = {} # {"ip": "10.0.0.254/8"}
137 if isinstance(network, dict):
138 network = [network] # if we have only one network, put it in a list
139 if isinstance(network, list):
140 if len(network) < 1:
141 network.append({})
142
143 # allocate in resource resource model and compute resource limits for new container
144 cpu_limit = mem_limit = disk_limit = -1
145 cpu_period = cpu_quota = None
146 if self._resource_model is not None:
147 # call allocate in resource model to calculate resource limit for this container
148 (cpu_limit, mem_limit, disk_limit) = alloc = self._resource_model.allocate(name, flavor_name)
149 LOG.debug("Allocation result: %r" % str(alloc))
150 # check if we have a cpu_limit given by the used resource model
151 if cpu_limit > 0:
152 # calculate cpu period and quota for CFS
153 # (see: https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt)
154 # TODO consider multi core machines etc! non trivial!
155 # Attention minimum cpu_quota is 1ms (micro)
156 cpu_period = 100000 # lets consider a fixed period of 100000 microseconds for now
157 cpu_quota = cpu_period * cpu_limit # calculate the fraction of cpu time for this container
158 LOG.debug(
159 "CPU limit: cpu_quota = cpu_period * cpu_limit = %f * %f = %f" % (cpu_period, cpu_limit, cpu_quota))
160 # ATTENTION >= 1000 to avoid a invalid argument system error ... no idea why
161 if cpu_quota < 1000:
162 cpu_quota = 1000
163 LOG.warning("Increased CPU quota for %r to avoid system error." % name)
164 # check if we have a mem_limit given by the used resource model
165 if mem_limit > 0:
166 LOG.debug(
167 "MEM limit: mem_limit = %f MB" % mem_limit)
168 # ATTENTION minimum mem_limit per container is 4MB
169 if mem_limit < 4:
170 mem_limit = 4
171 LOG.warning("Increased MEM limit for %r because it was less than 4.0 MB." % name)
172 # create the container
173 d = self.net.addDocker(
174 "%s" % (name),
175 dimage=image,
176 dcmd=command,
177 datacenter=self,
178 flavor_name=flavor_name,
179 cpu_period=int(cpu_period) if cpu_limit > 0 else None, # set cpu limits if needed
180 cpu_quota=int(cpu_quota) if cpu_limit > 0 else None,
181 mem_limit="%dm" % int(mem_limit) if mem_limit > 0 else None, # set mem limits if needed
182 memswap_limit="%dm" % int(mem_limit) if mem_limit > 0 else None # lets set swap to mem limit for now
183 )
184 # connect all given networks
185 for nw in network:
186 # TODO we cannot use TCLink here (see: https://github.com/mpeuster/dockernet/issues/3)
187 self.net.addLink(d, self.switch, params1=nw, cls=Link)
188 # do bookkeeping
189 self.containers[name] = d
190
191 # write resource log if a path is given
192 if self.resource_log_path is not None:
193 l = dict()
194 l["t"] = time.time()
195 l["name"] = name
196 l["compute"] = d.getStatus()
197 l["flavor_name"] = flavor_name
198 l["action"] = "allocate"
199 l["cpu_limit"] = cpu_limit
200 l["mem_limit"] = mem_limit
201 l["disk_limit"] = disk_limit
202 l["rm_state"] = None if self._resource_model is None else self._resource_model.get_state_dict()
203 # append to logfile
204 with open(self.resource_log_path, "a") as f:
205 f.write("%s\n" % json.dumps(l))
206 return d # we might use UUIDs for naming later on
207
208 def stopCompute(self, name):
209 """
210 Stop and remove a container from this data center.
211 """
212 assert name is not None
213 if name not in self.containers:
214 raise Exception("Container with name %s not found." % name)
215 LOG.debug("Stopping compute instance %r in data center %r" % (name, str(self)))
216 self.net.removeLink(
217 link=None, node1=self.containers[name], node2=self.switch)
218 self.net.removeDocker("%s" % (name))
219 del self.containers[name]
220 # call resource model and free resources
221 if self._resource_model is not None:
222 self._resource_model.free(name)
223
224 # write resource log if a path is given
225 if self.resource_log_path is not None:
226 l = dict()
227 l["t"] = time.time()
228 l["name"] = name
229 l["flavor_name"] = None
230 l["action"] = "free"
231 l["cpu_limit"] = -1
232 l["mem_limit"] = -1
233 l["disk_limit"] = -1
234 l["rm_state"] = None if self._resource_model is None else self._resource_model.get_state_dict()
235 # append to logfile
236 with open(self.resource_log_path, "a") as f:
237 f.write("%s\n" % json.dumps(l))
238 return True
239
240 def listCompute(self):
241 """
242 Return a list of all running containers assigned to this
243 data center.
244 """
245 return list(self.containers.itervalues())
246
247 def getStatus(self):
248 """
249 Return a dict with status information about this DC.
250 """
251 return {
252 "label": self.label,
253 "internalname": self.name,
254 "switch": self.switch.name,
255 "n_running_containers": len(self.containers),
256 "metadata": self.metadata
257 }
258
259 def assignResourceModel(self, rm):
260 """
261 Assign a resource model to this DC.
262 :param rm: a BaseResourceModel object
263 :return:
264 """
265 if self._resource_model is not None:
266 raise Exception("There is already an resource model assigned to this DC.")
267 self._resource_model = rm
268 self.net.rm_registrar.register(self, rm)
269 LOG.info("Assigned RM: %r to DC: %r" % (rm, self))
270