Added reverse sync
[osm/common.git] / osm_common / fsmongo.py
index c558d8e..f43dca6 100644 (file)
 # contact: eduardo.sousa@canonical.com
 ##
 
+import errno
+from http import HTTPStatus
 from io import BytesIO, StringIO
-from pymongo import MongoClient
-from gridfs import GridFSBucket, errors
 import logging
-from http import HTTPStatus
 import os
+import datetime
+
+from gridfs import GridFSBucket, errors
 from osm_common.fsbase import FsBase, FsException
+from pymongo import MongoClient
+
 
 __author__ = "Eduardo Sousa <eduardo.sousa@canonical.com>"
 
@@ -34,6 +38,7 @@ class GridByteStream(BytesIO):
         self.filename = filename
         self.fs = fs
         self.mode = mode
+        self.file_type = "file"  # Set "file" as default file_type
 
         self.__initialize__()
 
@@ -48,8 +53,9 @@ class GridByteStream(BytesIO):
             if exception_file:
                 raise FsException("Multiple files found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR)
 
-            if requested_file.metadata["type"] == "file":
+            if requested_file.metadata["type"] in ("file", "sym"):
                 grid_file = requested_file
+                self.file_type = requested_file.metadata["type"]
             else:
                 raise FsException("Type isn't file", http_code=HTTPStatus.INTERNAL_SERVER_ERROR)
 
@@ -84,13 +90,13 @@ class GridByteStream(BytesIO):
                 self._id,
                 self.filename,
                 self,
-                metadata={"type": "file"}
+                metadata={"type": self.file_type}
             )
         else:
             self.fs.upload_from_stream(
                 self.filename,
                 self,
-                metadata={"type": "file"}
+                metadata={"type": self.file_type}
             )
         super(GridByteStream, self).close()
 
@@ -108,6 +114,7 @@ class GridStringStream(StringIO):
         self.filename = filename
         self.fs = fs
         self.mode = mode
+        self.file_type = "file"  # Set "file" as default file_type
 
         self.__initialize__()
 
@@ -122,8 +129,9 @@ class GridStringStream(StringIO):
             if exception_file:
                 raise FsException("Multiple files found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR)
 
-            if requested_file.metadata["type"] == "file":
+            if requested_file.metadata["type"] in ("file", "dir"):
                 grid_file = requested_file
+                self.file_type = requested_file.metadata["type"]
             else:
                 raise FsException("File type isn't file", http_code=HTTPStatus.INTERNAL_SERVER_ERROR)
 
@@ -165,13 +173,13 @@ class GridStringStream(StringIO):
                 self._id,
                 self.filename,
                 stream,
-                metadata={"type": "file"}
+                metadata={"type": self.file_type}
             )
         else:
             self.fs.upload_from_stream(
                 self.filename,
                 stream,
-                metadata={"type": "file"}
+                metadata={"type": self.file_type}
             )
         stream.close()
         super(GridStringStream, self).close()
@@ -191,25 +199,41 @@ class FsMongo(FsBase):
         self.client = None
         self.fs = None
 
-    def __update_local_fs(self):
+    def __update_local_fs(self, from_path=None):
         dir_cursor = self.fs.find({"metadata.type": "dir"}, no_cursor_timeout=True)
 
         for directory in dir_cursor:
+            if from_path and not directory.filename.startswith(from_path):
+                continue
             os.makedirs(self.path + directory.filename, exist_ok=True)
 
-        file_cursor = self.fs.find({"metadata.type": "file"}, no_cursor_timeout=True)
+        file_cursor = self.fs.find({"metadata.type": {"$in": ["file", "sym"]}}, no_cursor_timeout=True)
 
         for writing_file in file_cursor:
+            if from_path and not writing_file.filename.startswith(from_path):
+                continue
             file_path = self.path + writing_file.filename
-            file_stream = open(file_path, 'wb+')
-            self.fs.download_to_stream(writing_file._id, file_stream)
-            file_stream.close()
-            if "permissions" in writing_file.metadata:
-                os.chmod(file_path, writing_file.metadata["permissions"])
 
-    def get_params(self):
-        self.__update_local_fs()
+            if writing_file.metadata["type"] == "sym":
+                with BytesIO() as b:
+                    self.fs.download_to_stream(writing_file._id, b)
+                    b.seek(0)
+                    link = b.read().decode("utf-8")
+
+                try:
+                    os.remove(file_path)
+                except OSError as e:
+                    if e.errno != errno.ENOENT:
+                        # This is probably permission denied or worse
+                        raise
+                os.symlink(link, file_path)
+            else:
+                with open(file_path, 'wb+') as file_stream:
+                    self.fs.download_to_stream(writing_file._id, file_stream)
+                if "permissions" in writing_file.metadata:
+                    os.chmod(file_path, writing_file.metadata["permissions"])
 
+    def get_params(self):
         return {"fs": "mongo", "path": self.path}
 
     def fs_connect(self, config):
@@ -302,9 +326,16 @@ class FsMongo(FsBase):
             if exception_file:
                 raise FsException("Multiple files found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR)
 
+            # if no special mode is required just check it does exists
+            if not mode:
+                return True
+
             if requested_file.metadata["type"] == mode:
                 return True
 
+            if requested_file.metadata["type"] == "sym" and mode == "file":
+                return True
+
         return False
 
     def file_size(self, storage):
@@ -337,11 +368,20 @@ class FsMongo(FsBase):
         for member in tar_object.getmembers():
             if member.isfile():
                 stream = tar_object.extractfile(member)
+            elif member.issym():
+                stream = BytesIO(member.linkname.encode("utf-8"))
             else:
                 stream = BytesIO()
 
+            if member.isfile():
+                file_type = "file"
+            elif member.issym():
+                file_type = "sym"
+            else:
+                file_type = "dir"
+
             metadata = {
-                "type": "file" if member.isfile() else "dir",
+                "type": file_type,
                 "permissions": member.mode
             }
 
@@ -427,6 +467,125 @@ class FsMongo(FsBase):
                 else:
                     self.fs.delete(requested_file._id)
             if not found and not ignore_non_exist:
-                raise FsException("File {} does not exist".format(storage), http_code=HTTPStatus.NOT_FOUND)    
+                raise FsException("File {} does not exist".format(storage), http_code=HTTPStatus.NOT_FOUND)
         except IOError as e:
             raise FsException("File {} cannot be deleted: {}".format(f, e), http_code=HTTPStatus.INTERNAL_SERVER_ERROR)
+
+    def sync(self, from_path=None):
+        """
+        Sync from FSMongo to local storage
+        :param from_path: if supplied, only copy content from this path, not all
+        :return: None
+        """
+        if from_path:
+            if os.path.isabs(from_path):
+                from_path = os.path.relpath(from_path, self.path)
+        self.__update_local_fs(from_path=from_path)
+
+    def _update_mongo_fs(self, from_path):
+
+        os_path = self.path + from_path
+
+        # Obtain list of files and dirs in filesystem
+        members = []
+        for root, dirs, files in os.walk(os_path):
+            for folder in dirs:
+                member = {
+                    "filename": os.path.join(root, folder),
+                    "type": "dir"
+                }
+                members.append(member)
+            for file in files:
+                filename = os.path.join(root, file)
+                if os.path.islink(filename):
+                    file_type = "sym"
+                else:
+                    file_type = "file"
+                member = {
+                    "filename": os.path.join(root, file),
+                    "type": file_type
+                }
+                members.append(member)
+
+        # Obtain files in mongo dict
+        remote_files = self._get_mongo_files(from_path)
+
+        # Upload members if they do not exists or have been modified
+        # We will do this for performance (avoid updating unmodified files) and to avoid
+        # updating a file with an older one in case there are two sources for synchronization
+        # in high availability scenarios
+        for member in members:
+            # obtain permission
+            mask = int(oct(os.stat(member["filename"]).st_mode)[-3:], 8)
+
+            # convert to relative path
+            rel_filename = os.path.relpath(member["filename"], self.path)
+            last_modified_date = datetime.datetime.fromtimestamp(os.path.getmtime(member["filename"]))
+
+            remote_file = remote_files.get(rel_filename)
+            upload_date = remote_file[0].uploadDate if remote_file else datetime.datetime.min
+            # remove processed files from dict
+            remote_files.pop(rel_filename, None)
+
+            if last_modified_date >= upload_date:
+
+                stream = None
+                fh = None
+                try:
+                    file_type = member["type"]
+                    if file_type == "dir":
+                        stream = BytesIO()
+                    elif file_type == "sym":
+                        stream = BytesIO(os.readlink(member["filename"]).encode("utf-8"))
+                    else:
+                        fh = open(member["filename"], "rb")
+                        stream = BytesIO(fh.read())
+
+                    metadata = {
+                        "type": file_type,
+                        "permissions": mask
+                    }
+
+                    self.fs.upload_from_stream(
+                        rel_filename,
+                        stream,
+                        metadata=metadata
+                    )
+
+                    # delete old files
+                    if remote_file:
+                        for file in remote_file:
+                            self.fs.delete(file._id)
+                finally:
+                    if fh:
+                        fh.close()
+                    if stream:
+                        stream.close()
+
+        # delete files that are not any more in local fs
+        for remote_file in remote_files.values():
+            for file in remote_file:
+                self.fs.delete(file._id)
+
+    def _get_mongo_files(self, from_path=None):
+
+        file_dict = {}
+        file_cursor = self.fs.find(no_cursor_timeout=True, sort=[('uploadDate', -1)])
+        for file in file_cursor:
+            if from_path and not file.filename.startswith(from_path):
+                continue
+            if file.filename in file_dict:
+                file_dict[file.filename].append(file)
+            else:
+                file_dict[file.filename] = [file]
+        return file_dict
+
+    def reverse_sync(self, from_path: str):
+        """
+        Sync from local storage to FSMongo
+        :param from_path: base directory to upload content to mongo fs
+        :return: None
+        """
+        if os.path.isabs(from_path):
+            from_path = os.path.relpath(from_path, self.path)
+        self._update_mongo_fs(from_path=from_path)