Re-wrote resource model API and UPB simple resource model. Due to an update of Docker...
[osm/vim-emu.git] / src / emuvim / dcemulator / node.py
1 """
2 Distributed Cloud Emulator (dcemulator)
3 (c) 2015 by Manuel Peuster <manuel.peuster@upb.de>
4 """
5 from mininet.node import Docker
6 from mininet.link import Link
7 import logging
8 import time
9 import json
10
11 LOG = logging.getLogger("dcemulator")
12 LOG.setLevel(logging.DEBUG)
13
14
15 DCDPID_BASE = 1000 # start of switch dpid's used for data center switches
16
17
18 class EmulatorCompute(Docker):
19 """
20 Emulator specific compute node class.
21 Inherits from Dockernet's Docker host class.
22 Represents a single container connected to a (logical)
23 data center.
24 We can add emulator specific helper functions to it.
25 """
26
27 def __init__(
28 self, name, dimage, **kwargs):
29 self.datacenter = kwargs.get("datacenter") # pointer to current DC
30 self.flavor_name = kwargs.get("flavor_name")
31 LOG.debug("Starting compute instance %r in data center %r" % (name, str(self.datacenter)))
32 # call original Docker.__init__
33 Docker.__init__(self, name, dimage, **kwargs)
34
35 def getNetworkStatus(self):
36 """
37 Helper method to receive information about the virtual networks
38 this compute instance is connected to.
39 """
40 # format list of tuples (name, Ip, MAC, isUp, status)
41 return [(str(i), i.IP(), i.MAC(), i.isUp(), i.status())
42 for i in self.intfList()]
43
44 def getStatus(self):
45 """
46 Helper method to receive information about this compute instance.
47 """
48 status = {}
49 status["name"] = self.name
50 status["network"] = self.getNetworkStatus()
51 status["image"] = self.dimage
52 status["cpu_quota"] = self.cpu_quota
53 status["cpu_period"] = self.cpu_period
54 status["cpu_shares"] = self.cpu_shares
55 status["cpuset"] = self.cpuset
56 status["mem_limit"] = self.mem_limit
57 status["memswap_limit"] = self.memswap_limit
58 status["state"] = self.dcli.inspect_container(self.dc)["State"]
59 status["id"] = self.dcli.inspect_container(self.dc)["Id"]
60 status["datacenter"] = (None if self.datacenter is None
61 else self.datacenter.label)
62 return status
63
64
65 class Datacenter(object):
66 """
67 Represents a logical data center to which compute resources
68 (Docker containers) can be added at runtime.
69
70 Will also implement resource bookkeeping in later versions.
71 """
72
73 DC_COUNTER = 1
74
75 def __init__(self, label, metadata={}, resource_log_path=None):
76 self.net = None # DCNetwork to which we belong
77 # each node (DC) has a short internal name used by Mininet
78 # this is caused by Mininets naming limitations for swtiches etc.
79 self.name = "dc%d" % Datacenter.DC_COUNTER
80 Datacenter.DC_COUNTER += 1
81 # use this for user defined names that can be longer than self.name
82 self.label = label
83 # dict to store arbitrary metadata (e.g. latitude and longitude)
84 self.metadata = metadata
85 # path to which resource information should be logged (e.g. for experiments). None = no logging
86 self.resource_log_path = resource_log_path
87 # first prototype assumes one "bigswitch" per DC
88 self.switch = None
89 # keep track of running containers
90 self.containers = {}
91 # pointer to assigned resource model
92 self._resource_model = None
93
94 def __repr__(self):
95 return self.label
96
97 def _get_next_dc_dpid(self):
98 global DCDPID_BASE
99 DCDPID_BASE += 1
100 return DCDPID_BASE
101
102 def create(self):
103 """
104 Each data center is represented by a single switch to which
105 compute resources can be connected at run time.
106
107 TODO: This will be changed in the future to support multiple networks
108 per data center
109 """
110 self.switch = self.net.addSwitch(
111 "%s.s1" % self.name, dpid=hex(self._get_next_dc_dpid())[2:])
112 LOG.debug("created data center switch: %s" % str(self.switch))
113
114 def start(self):
115 pass
116
117 def startCompute(self, name, image=None, command=None, network=None, flavor_name="tiny"):
118 """
119 Create a new container as compute resource and connect it to this
120 data center.
121 :param name: name (string)
122 :param image: image name (string)
123 :param command: command (string)
124 :param network: networks list({"ip": "10.0.0.254/8"}, {"ip": "11.0.0.254/24"})
125 :param flavor_name: name of the flavor for this compute container
126 :return:
127 """
128 assert name is not None
129 # no duplications
130 if name in [c.name for c in self.net.getAllContainers()]:
131 raise Exception("Container with name %s already exists." % name)
132 # set default parameter
133 if image is None:
134 image = "ubuntu"
135 if network is None:
136 network = {} # {"ip": "10.0.0.254/8"}
137 if isinstance(network, dict):
138 network = [network] # if we have only one network, put it in a list
139 if isinstance(network, list):
140 if len(network) < 1:
141 network.append({})
142
143 """
144 # allocate in resource resource model and compute resource limits for new container
145 cpu_limit = mem_limit = disk_limit = -1
146 cpu_period = cpu_quota = None
147 if self._resource_model is not None:
148 # call allocate in resource model to calculate resource limit for this container
149 (cpu_limit, mem_limit, disk_limit) = alloc = self._resource_model.allocate(name, flavor_name)
150 LOG.debug("Allocation result: %r" % str(alloc))
151 # check if we have a cpu_limit given by the used resource model
152 if cpu_limit > 0:
153 # calculate cpu period and quota for CFS
154 # (see: https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt)
155 # TODO consider multi core machines etc! non trivial!
156 # Attention minimum cpu_quota is 1ms (micro)
157 cpu_period = 100000 # lets consider a fixed period of 100000 microseconds for now
158 cpu_quota = cpu_period * cpu_limit # calculate the fraction of cpu time for this container
159 LOG.debug(
160 "CPU limit: cpu_quota = cpu_period * cpu_limit = %f * %f = %f" % (cpu_period, cpu_limit, cpu_quota))
161 # ATTENTION >= 1000 to avoid a invalid argument system error ... no idea why
162 if cpu_quota < 1000:
163 cpu_quota = 1000
164 LOG.warning("Increased CPU quota for %r to avoid system error." % name)
165 # check if we have a mem_limit given by the used resource model
166 if mem_limit > 0:
167 LOG.debug(
168 "MEM limit: mem_limit = %f MB" % mem_limit)
169 # ATTENTION minimum mem_limit per container is 4MB
170 if mem_limit < 4:
171 mem_limit = 4
172 LOG.warning("Increased MEM limit for %r because it was less than 4.0 MB." % name)
173 """
174 # create the container
175 d = self.net.addDocker(
176 "%s" % (name),
177 dimage=image,
178 dcmd=command,
179 datacenter=self,
180 flavor_name=flavor_name
181 )
182
183 # apply resource limits to container if a resource model is defined
184 if self._resource_model is not None:
185 self._resource_model.allocate(d)
186
187 # connect all given networks
188 # if no --net option is given, network = [{}], so 1 empty dict in the list
189 # this results in 1 default interface with a default ip address
190 for nw in network:
191 # TODO we cannot use TCLink here (see: https://github.com/mpeuster/dockernet/issues/3)
192 self.net.addLink(d, self.switch, params1=nw, cls=Link)
193 # do bookkeeping
194 self.containers[name] = d
195
196 # TODO re-enable logging
197 """
198 # write resource log if a path is given
199 if self.resource_log_path is not None:
200 l = dict()
201 l["t"] = time.time()
202 l["name"] = name
203 l["compute"] = d.getStatus()
204 l["flavor_name"] = flavor_name
205 l["action"] = "allocate"
206 l["cpu_limit"] = cpu_limit
207 l["mem_limit"] = mem_limit
208 l["disk_limit"] = disk_limit
209 l["rm_state"] = None if self._resource_model is None else self._resource_model.get_state_dict()
210 # append to logfile
211 with open(self.resource_log_path, "a") as f:
212 f.write("%s\n" % json.dumps(l))
213 """
214 return d # we might use UUIDs for naming later on
215
216 def stopCompute(self, name):
217 """
218 Stop and remove a container from this data center.
219 """
220 assert name is not None
221 if name not in self.containers:
222 raise Exception("Container with name %s not found." % name)
223 LOG.debug("Stopping compute instance %r in data center %r" % (name, str(self)))
224
225 # call resource model and free resources
226 if self._resource_model is not None:
227 self._resource_model.free(self.containers[name])
228
229 # remove links
230 self.net.removeLink(
231 link=None, node1=self.containers[name], node2=self.switch)
232
233 # remove container
234 self.net.removeDocker("%s" % (name))
235 del self.containers[name]
236
237 # TODO re-enable logging
238 """
239 # write resource log if a path is given
240 if self.resource_log_path is not None:
241 l = dict()
242 l["t"] = time.time()
243 l["name"] = name
244 l["flavor_name"] = None
245 l["action"] = "free"
246 l["cpu_limit"] = -1
247 l["mem_limit"] = -1
248 l["disk_limit"] = -1
249 l["rm_state"] = None if self._resource_model is None else self._resource_model.get_state_dict()
250 # append to logfile
251 with open(self.resource_log_path, "a") as f:
252 f.write("%s\n" % json.dumps(l))
253 """
254 return True
255
256 def listCompute(self):
257 """
258 Return a list of all running containers assigned to this
259 data center.
260 """
261 return list(self.containers.itervalues())
262
263 def getStatus(self):
264 """
265 Return a dict with status information about this DC.
266 """
267 return {
268 "label": self.label,
269 "internalname": self.name,
270 "switch": self.switch.name,
271 "n_running_containers": len(self.containers),
272 "metadata": self.metadata
273 }
274
275 def assignResourceModel(self, rm):
276 """
277 Assign a resource model to this DC.
278 :param rm: a BaseResourceModel object
279 :return:
280 """
281 if self._resource_model is not None:
282 raise Exception("There is already an resource model assigned to this DC.")
283 self._resource_model = rm
284 self.net.rm_registrar.register(self, rm)
285 LOG.info("Assigned RM: %r to DC: %r" % (rm, self))
286