Merge pull request #76 from mpeuster/master
[osm/vim-emu.git] / src / emuvim / dcemulator / node.py
1 """
2 Distributed Cloud Emulator (dcemulator)
3 (c) 2015 by Manuel Peuster <manuel.peuster@upb.de>
4 """
5 from mininet.node import Docker
6 from mininet.link import Link
7 import logging
8 import time
9 import json
10
11 LOG = logging.getLogger("dcemulator")
12 LOG.setLevel(logging.DEBUG)
13
14
15 DCDPID_BASE = 1000 # start of switch dpid's used for data center switches
16
17
18 class EmulatorCompute(Docker):
19 """
20 Emulator specific compute node class.
21 Inherits from Dockernet's Docker host class.
22 Represents a single container connected to a (logical)
23 data center.
24 We can add emulator specific helper functions to it.
25 """
26
27 def __init__(
28 self, name, dimage, **kwargs):
29 self.datacenter = kwargs.get("datacenter") # pointer to current DC
30 self.flavor_name = kwargs.get("flavor_name")
31 LOG.debug("Starting compute instance %r in data center %r" % (name, str(self.datacenter)))
32 # call original Docker.__init__
33 Docker.__init__(self, name, dimage, **kwargs)
34
35 def getNetworkStatus(self):
36 """
37 Helper method to receive information about the virtual networks
38 this compute instance is connected to.
39 """
40 # format list of tuples (name, Ip, MAC, isUp, status)
41 return [(str(i), i.IP(), i.MAC(), i.isUp(), i.status())
42 for i in self.intfList()]
43
44 def getStatus(self):
45 """
46 Helper method to receive information about this compute instance.
47 """
48 status = {}
49 status["name"] = self.name
50 status["network"] = self.getNetworkStatus()
51 status["image"] = self.dimage
52 status["cpu_quota"] = self.cpu_quota
53 status["cpu_period"] = self.cpu_period
54 status["cpu_shares"] = self.cpu_shares
55 status["cpuset"] = self.cpuset
56 status["mem_limit"] = self.mem_limit
57 status["memswap_limit"] = self.memswap_limit
58 status["state"] = self.dcli.inspect_container(self.dc)["State"]
59 status["id"] = self.dcli.inspect_container(self.dc)["Id"]
60 status["datacenter"] = (None if self.datacenter is None
61 else self.datacenter.label)
62 return status
63
64
65 class Datacenter(object):
66 """
67 Represents a logical data center to which compute resources
68 (Docker containers) can be added at runtime.
69
70 Will also implement resource bookkeeping in later versions.
71 """
72
73 DC_COUNTER = 1
74
75 def __init__(self, label, metadata={}, resource_log_path=None):
76 self.net = None # DCNetwork to which we belong
77 # each node (DC) has a short internal name used by Mininet
78 # this is caused by Mininets naming limitations for swtiches etc.
79 self.name = "dc%d" % Datacenter.DC_COUNTER
80 Datacenter.DC_COUNTER += 1
81 # use this for user defined names that can be longer than self.name
82 self.label = label
83 # dict to store arbitrary metadata (e.g. latitude and longitude)
84 self.metadata = metadata
85 # path to which resource information should be logged (e.g. for experiments). None = no logging
86 self.resource_log_path = resource_log_path
87 # first prototype assumes one "bigswitch" per DC
88 self.switch = None
89 # keep track of running containers
90 self.containers = {}
91 # pointer to assigned resource model
92 self._resource_model = None
93
94 def __repr__(self):
95 return self.label
96
97 def _get_next_dc_dpid(self):
98 global DCDPID_BASE
99 DCDPID_BASE += 1
100 return DCDPID_BASE
101
102 def create(self):
103 """
104 Each data center is represented by a single switch to which
105 compute resources can be connected at run time.
106
107 TODO: This will be changed in the future to support multiple networks
108 per data center
109 """
110 self.switch = self.net.addSwitch(
111 "%s.s1" % self.name, dpid=hex(self._get_next_dc_dpid())[2:])
112 LOG.debug("created data center switch: %s" % str(self.switch))
113
114 def start(self):
115 pass
116
117 def startCompute(self, name, image=None, command=None, network=None, flavor_name="tiny"):
118 """
119 Create a new container as compute resource and connect it to this
120 data center.
121 :param name: name (string)
122 :param image: image name (string)
123 :param command: command (string)
124 :param network: networks list({"ip": "10.0.0.254/8"}, {"ip": "11.0.0.254/24"})
125 :param flavor_name: name of the flavor for this compute container
126 :return:
127 """
128 assert name is not None
129 # no duplications
130 if name in [c.name for c in self.net.getAllContainers()]:
131 raise Exception("Container with name %s already exists." % name)
132 # set default parameter
133 if image is None:
134 image = "ubuntu"
135 if network is None:
136 network = {} # {"ip": "10.0.0.254/8"}
137 if isinstance(network, dict):
138 network = [network] # if we have only one network, put it in a list
139 if isinstance(network, list):
140 if len(network) < 1:
141 network.append({})
142
143 # allocate in resource resource model and compute resource limits for new container
144 cpu_limit = mem_limit = disk_limit = -1
145 cpu_period = cpu_quota = None
146 if self._resource_model is not None:
147 # call allocate in resource model to calculate resource limit for this container
148 (cpu_limit, mem_limit, disk_limit) = alloc = self._resource_model.allocate(name, flavor_name)
149 LOG.debug("Allocation result: %r" % str(alloc))
150 # check if we have a cpu_limit given by the used resource model
151 if cpu_limit > 0:
152 # calculate cpu period and quota for CFS
153 # (see: https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt)
154 # TODO consider multi core machines etc! non trivial!
155 # Attention minimum cpu_quota is 1ms (micro)
156 cpu_period = 100000 # lets consider a fixed period of 100000 microseconds for now
157 cpu_quota = cpu_period * cpu_limit # calculate the fraction of cpu time for this container
158 LOG.debug(
159 "CPU limit: cpu_quota = cpu_period * cpu_limit = %f * %f = %f" % (cpu_period, cpu_limit, cpu_quota))
160 # ATTENTION >= 1000 to avoid a invalid argument system error ... no idea why
161 if cpu_quota < 1000:
162 cpu_quota = 1000
163 LOG.warning("Increased CPU quota for %r to avoid system error." % name)
164 # check if we have a mem_limit given by the used resource model
165 if mem_limit > 0:
166 LOG.debug(
167 "MEM limit: mem_limit = %f MB" % mem_limit)
168 # ATTENTION minimum mem_limit per container is 4MB
169 if mem_limit < 4:
170 mem_limit = 4
171 LOG.warning("Increased MEM limit for %r because it was less than 4.0 MB." % name)
172 # create the container
173 d = self.net.addDocker(
174 "%s" % (name),
175 dimage=image,
176 dcmd=command,
177 datacenter=self,
178 flavor_name=flavor_name,
179 cpu_period=int(cpu_period) if cpu_limit > 0 else None, # set cpu limits if needed
180 cpu_quota=int(cpu_quota) if cpu_limit > 0 else None,
181 #mem_limit="%dm" % int(mem_limit) if mem_limit > 0 else None, # set mem limits if needed
182 #memswap_limit="%dm" % int(mem_limit) if mem_limit > 0 else None # lets set swap to mem limit for now
183 )
184 # connect all given networks
185 # if no --net option is given, network = [{}], so 1 empty dict in the list
186 # this results in 1 default interface with a default ip address
187 for nw in network:
188 # TODO we cannot use TCLink here (see: https://github.com/mpeuster/dockernet/issues/3)
189 self.net.addLink(d, self.switch, params1=nw, cls=Link)
190 # do bookkeeping
191 self.containers[name] = d
192
193 # write resource log if a path is given
194 if self.resource_log_path is not None:
195 l = dict()
196 l["t"] = time.time()
197 l["name"] = name
198 l["compute"] = d.getStatus()
199 l["flavor_name"] = flavor_name
200 l["action"] = "allocate"
201 l["cpu_limit"] = cpu_limit
202 l["mem_limit"] = mem_limit
203 l["disk_limit"] = disk_limit
204 l["rm_state"] = None if self._resource_model is None else self._resource_model.get_state_dict()
205 # append to logfile
206 with open(self.resource_log_path, "a") as f:
207 f.write("%s\n" % json.dumps(l))
208 return d # we might use UUIDs for naming later on
209
210 def stopCompute(self, name):
211 """
212 Stop and remove a container from this data center.
213 """
214 assert name is not None
215 if name not in self.containers:
216 raise Exception("Container with name %s not found." % name)
217 LOG.debug("Stopping compute instance %r in data center %r" % (name, str(self)))
218 self.net.removeLink(
219 link=None, node1=self.containers[name], node2=self.switch)
220 self.net.removeDocker("%s" % (name))
221 del self.containers[name]
222 # call resource model and free resources
223 if self._resource_model is not None:
224 self._resource_model.free(name)
225
226 # write resource log if a path is given
227 if self.resource_log_path is not None:
228 l = dict()
229 l["t"] = time.time()
230 l["name"] = name
231 l["flavor_name"] = None
232 l["action"] = "free"
233 l["cpu_limit"] = -1
234 l["mem_limit"] = -1
235 l["disk_limit"] = -1
236 l["rm_state"] = None if self._resource_model is None else self._resource_model.get_state_dict()
237 # append to logfile
238 with open(self.resource_log_path, "a") as f:
239 f.write("%s\n" % json.dumps(l))
240 return True
241
242 def listCompute(self):
243 """
244 Return a list of all running containers assigned to this
245 data center.
246 """
247 return list(self.containers.itervalues())
248
249 def getStatus(self):
250 """
251 Return a dict with status information about this DC.
252 """
253 return {
254 "label": self.label,
255 "internalname": self.name,
256 "switch": self.switch.name,
257 "n_running_containers": len(self.containers),
258 "metadata": self.metadata
259 }
260
261 def assignResourceModel(self, rm):
262 """
263 Assign a resource model to this DC.
264 :param rm: a BaseResourceModel object
265 :return:
266 """
267 if self._resource_model is not None:
268 raise Exception("There is already an resource model assigned to this DC.")
269 self._resource_model = rm
270 self.net.rm_registrar.register(self, rm)
271 LOG.info("Assigned RM: %r to DC: %r" % (rm, self))
272