Re-wrote resource model API and UPB simple resource model. Due to an update of Docker...

[osm/vim-emu.git] / src / emuvim / dcemulator / node.py
diff --git a/src/emuvim/dcemulator/node.py b/src/emuvim/dcemulator/node.py

index 336126c..1c85700 100755 (executable)
--- a/src/emuvim/dcemulator/node.py
+++ b/src/emuvim/dcemulator/node.py
@@ -3,7 +3,13 @@ Distributed Cloud Emulator (dcemulator)
  (c) 2015 by Manuel Peuster <manuel.peuster@upb.de>
  """
  from mininet.node import Docker
+from mininet.link import Link
  import logging
+import time
+import json
+
+LOG = logging.getLogger("dcemulator")
+LOG.setLevel(logging.DEBUG)
  
  
  DCDPID_BASE = 1000  # start of switch dpid's used for data center switches
@@ -20,9 +26,9 @@ class EmulatorCompute(Docker):
  
      def __init__(
              self, name, dimage, **kwargs):
-        logging.debug("Create EmulatorCompute instance: %s" % name)
-        self.datacenter = None  # pointer to current DC
-
+        self.datacenter = kwargs.get("datacenter")  # pointer to current DC
+        self.flavor_name = kwargs.get("flavor_name")
+        LOG.debug("Starting compute instance %r in data center %r" % (name, str(self.datacenter)))
          # call original Docker.__init__
          Docker.__init__(self, name, dimage, **kwargs)
  
@@ -66,7 +72,7 @@ class Datacenter(object):
  
      DC_COUNTER = 1
  
-    def __init__(self, label, metadata={}):
+    def __init__(self, label, metadata={}, resource_log_path=None):
          self.net = None  # DCNetwork to which we belong
          # each node (DC) has a short internal name used by Mininet
          # this is caused by Mininets naming limitations for swtiches etc.
@@ -76,8 +82,17 @@ class Datacenter(object):
          self.label = label  
          # dict to store arbitrary metadata (e.g. latitude and longitude)
          self.metadata = metadata
-        self.switch = None  # first prototype assumes one "bigswitch" per DC
-        self.containers = {}  # keep track of running containers
+        # path to which resource information should be logged (e.g. for experiments). None = no logging
+        self.resource_log_path = resource_log_path
+        # first prototype assumes one "bigswitch" per DC
+        self.switch = None
+        # keep track of running containers
+        self.containers = {}
+        # pointer to assigned resource model
+        self._resource_model = None
+
+    def __repr__(self):
+        return self.label
  
      def _get_next_dc_dpid(self):
          global DCDPID_BASE
@@ -94,12 +109,12 @@ class Datacenter(object):
          """
          self.switch = self.net.addSwitch(
              "%s.s1" % self.name, dpid=hex(self._get_next_dc_dpid())[2:])
-        logging.debug("created data center switch: %s" % str(self.switch))
+        LOG.debug("created data center switch: %s" % str(self.switch))
  
      def start(self):
          pass
  
-    def startCompute(self, name, image=None, command=None, network=None):
+    def startCompute(self, name, image=None, command=None, network=None, flavor_name="tiny"):
          """
          Create a new container as compute resource and connect it to this
          data center.
@@ -107,6 +122,7 @@ class Datacenter(object):
          :param image: image name (string)
          :param command: command (string)
          :param network: networks list({"ip": "10.0.0.254/8"}, {"ip": "11.0.0.254/24"})
+        :param flavor_name: name of the flavor for this compute container
          :return:
          """
          assert name is not None
@@ -124,14 +140,77 @@ class Datacenter(object):
              if len(network) < 1:
                  network.append({})
  
+        """
+        # allocate in resource resource model and compute resource limits for new container
+        cpu_limit = mem_limit = disk_limit = -1
+        cpu_period = cpu_quota = None
+        if self._resource_model is not None:
+            # call allocate in resource model to calculate resource limit for this container
+            (cpu_limit, mem_limit, disk_limit) = alloc = self._resource_model.allocate(name, flavor_name)
+            LOG.debug("Allocation result: %r" % str(alloc))
+            # check if we have a cpu_limit given by the used resource model
+            if cpu_limit > 0:
+                # calculate cpu period and quota for CFS
+                # (see: https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt)
+                # TODO consider multi core machines etc! non trivial!
+                # Attention minimum cpu_quota is 1ms (micro)
+                cpu_period = 100000  # lets consider a fixed period of 100000 microseconds for now
+                cpu_quota = cpu_period * cpu_limit  # calculate the fraction of cpu time for this container
+                LOG.debug(
+                    "CPU limit: cpu_quota = cpu_period * cpu_limit = %f * %f = %f" % (cpu_period, cpu_limit, cpu_quota))
+                # ATTENTION >= 1000 to avoid a invalid argument system error ... no idea why
+                if cpu_quota < 1000:
+                    cpu_quota = 1000
+                    LOG.warning("Increased CPU quota for %r to avoid system error." % name)
+            # check if we have a mem_limit given by the used resource model
+            if mem_limit > 0:
+                LOG.debug(
+                    "MEM limit: mem_limit = %f MB" % mem_limit)
+                # ATTENTION minimum mem_limit per container is 4MB
+                if mem_limit < 4:
+                    mem_limit = 4
+                    LOG.warning("Increased MEM limit for %r because it was less than 4.0 MB." % name)
+        """
          # create the container
-        d = self.net.addDocker("%s" % (name), dimage=image, dcmd=command)
+        d = self.net.addDocker(
+            "%s" % (name),
+            dimage=image,
+            dcmd=command,
+            datacenter=self,
+            flavor_name=flavor_name
+        )
+
+        # apply resource limits to container if a resource model is defined
+        if self._resource_model is not None:
+            self._resource_model.allocate(d)
+
          # connect all given networks
+        # if no --net option is given, network = [{}], so 1 empty dict in the list
+        # this results in 1 default interface with a default ip address
          for nw in network:
-            self.net.addLink(d, self.switch, params1=nw)
+            # TODO we cannot use TCLink here (see: https://github.com/mpeuster/dockernet/issues/3)
+            self.net.addLink(d, self.switch, params1=nw, cls=Link)
          # do bookkeeping
          self.containers[name] = d
-        d.datacenter = self
+
+        # TODO re-enable logging
+        """
+        # write resource log if a path is given
+        if self.resource_log_path is not None:
+            l = dict()
+            l["t"] = time.time()
+            l["name"] = name
+            l["compute"] = d.getStatus()
+            l["flavor_name"] = flavor_name
+            l["action"] = "allocate"
+            l["cpu_limit"] = cpu_limit
+            l["mem_limit"] = mem_limit
+            l["disk_limit"] = disk_limit
+            l["rm_state"] = None if self._resource_model is None else self._resource_model.get_state_dict()
+            # append to logfile
+            with open(self.resource_log_path, "a") as f:
+                f.write("%s\n" % json.dumps(l))
+        """
          return d  # we might use UUIDs for naming later on
  
      def stopCompute(self, name):
@@ -141,10 +220,37 @@ class Datacenter(object):
          assert name is not None
          if name not in self.containers:
              raise Exception("Container with name %s not found." % name)
+        LOG.debug("Stopping compute instance %r in data center %r" % (name, str(self)))
+
+        # call resource model and free resources
+        if self._resource_model is not None:
+            self._resource_model.free(self.containers[name])
+
+        # remove links
          self.net.removeLink(
              link=None, node1=self.containers[name], node2=self.switch)
+
+        # remove container
          self.net.removeDocker("%s" % (name))
          del self.containers[name]
+
+        # TODO re-enable logging
+        """
+        # write resource log if a path is given
+        if self.resource_log_path is not None:
+            l = dict()
+            l["t"] = time.time()
+            l["name"] = name
+            l["flavor_name"] = None
+            l["action"] = "free"
+            l["cpu_limit"] = -1
+            l["mem_limit"] = -1
+            l["disk_limit"] = -1
+            l["rm_state"] = None if self._resource_model is None else self._resource_model.get_state_dict()
+            # append to logfile
+            with open(self.resource_log_path, "a") as f:
+                f.write("%s\n" % json.dumps(l))
+        """
          return True
  
      def listCompute(self):
@@ -165,3 +271,16 @@ class Datacenter(object):
              "n_running_containers": len(self.containers),
              "metadata": self.metadata
          }
+
+    def assignResourceModel(self, rm):
+        """
+        Assign a resource model to this DC.
+        :param rm: a BaseResourceModel object
+        :return:
+        """
+        if self._resource_model is not None:
+            raise Exception("There is already an resource model assigned to this DC.")
+        self._resource_model = rm
+        self.net.rm_registrar.register(self, rm)
+        LOG.info("Assigned RM: %r to DC: %r" % (rm, self))
+