X-Git-Url: https://osm.etsi.org/gitweb/?a=blobdiff_plain;f=osm_common%2Ffsmongo.py;h=487eaf8d042685baa53c7a40b373679540a1d095;hb=945fa22703037593544f3ea0c93913d562cf1532;hp=f43dca6ec8317431a59d9f8ce1f2b9e553df6d56;hpb=f296d2a468e38fef4145f526b5bc0726502d77cf;p=osm%2Fcommon.git diff --git a/osm_common/fsmongo.py b/osm_common/fsmongo.py index f43dca6..487eaf8 100644 --- a/osm_common/fsmongo.py +++ b/osm_common/fsmongo.py @@ -22,6 +22,8 @@ from io import BytesIO, StringIO import logging import os import datetime +import tarfile +import zipfile from gridfs import GridFSBucket, errors from osm_common.fsbase import FsBase, FsException @@ -51,13 +53,17 @@ class GridByteStream(BytesIO): exception_file = next(cursor, None) if exception_file: - raise FsException("Multiple files found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR) + raise FsException( + "Multiple files found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR + ) if requested_file.metadata["type"] in ("file", "sym"): grid_file = requested_file self.file_type = requested_file.metadata["type"] else: - raise FsException("Type isn't file", http_code=HTTPStatus.INTERNAL_SERVER_ERROR) + raise FsException( + "Type isn't file", http_code=HTTPStatus.INTERNAL_SERVER_ERROR + ) if grid_file: self._id = grid_file._id @@ -74,29 +80,26 @@ class GridByteStream(BytesIO): if self._id: self.fs.delete(self._id) - cursor = self.fs.find({ - "filename": self.filename.split("/")[0], - "metadata": {"type": "dir"}}) + cursor = self.fs.find( + {"filename": self.filename.split("/")[0], "metadata": {"type": "dir"}} + ) parent_dir = next(cursor, None) if not parent_dir: parent_dir_name = self.filename.split("/")[0] - self.filename = self.filename.replace(parent_dir_name, parent_dir_name[:-1], 1) + self.filename = self.filename.replace( + parent_dir_name, parent_dir_name[:-1], 1 + ) self.seek(0, 0) if self._id: self.fs.upload_from_stream_with_id( - self._id, - self.filename, - self, - metadata={"type": self.file_type} + self._id, self.filename, self, metadata={"type": self.file_type} ) else: self.fs.upload_from_stream( - self.filename, - self, - metadata={"type": self.file_type} + self.filename, self, metadata={"type": self.file_type} ) super(GridByteStream, self).close() @@ -127,13 +130,17 @@ class GridStringStream(StringIO): exception_file = next(cursor, None) if exception_file: - raise FsException("Multiple files found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR) + raise FsException( + "Multiple files found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR + ) if requested_file.metadata["type"] in ("file", "dir"): grid_file = requested_file self.file_type = requested_file.metadata["type"] else: - raise FsException("File type isn't file", http_code=HTTPStatus.INTERNAL_SERVER_ERROR) + raise FsException( + "File type isn't file", http_code=HTTPStatus.INTERNAL_SERVER_ERROR + ) if grid_file: stream = BytesIO() @@ -154,15 +161,17 @@ class GridStringStream(StringIO): if self._id: self.fs.delete(self._id) - cursor = self.fs.find({ - "filename": self.filename.split("/")[0], - "metadata": {"type": "dir"}}) + cursor = self.fs.find( + {"filename": self.filename.split("/")[0], "metadata": {"type": "dir"}} + ) parent_dir = next(cursor, None) if not parent_dir: parent_dir_name = self.filename.split("/")[0] - self.filename = self.filename.replace(parent_dir_name, parent_dir_name[:-1], 1) + self.filename = self.filename.replace( + parent_dir_name, parent_dir_name[:-1], 1 + ) self.seek(0, 0) stream = BytesIO() @@ -170,16 +179,11 @@ class GridStringStream(StringIO): stream.seek(0, 0) if self._id: self.fs.upload_from_stream_with_id( - self._id, - self.filename, - stream, - metadata={"type": self.file_type} + self._id, self.filename, stream, metadata={"type": self.file_type} ) else: self.fs.upload_from_stream( - self.filename, - stream, - metadata={"type": self.file_type} + self.filename, stream, metadata={"type": self.file_type} ) stream.close() super(GridStringStream, self).close() @@ -192,8 +196,7 @@ class GridStringStream(StringIO): class FsMongo(FsBase): - - def __init__(self, logger_name='fs', lock=False): + def __init__(self, logger_name="fs", lock=False): super().__init__(logger_name, lock) self.path = None self.client = None @@ -202,12 +205,17 @@ class FsMongo(FsBase): def __update_local_fs(self, from_path=None): dir_cursor = self.fs.find({"metadata.type": "dir"}, no_cursor_timeout=True) + valid_paths = [] + for directory in dir_cursor: if from_path and not directory.filename.startswith(from_path): continue os.makedirs(self.path + directory.filename, exist_ok=True) + valid_paths.append(self.path + directory.filename) - file_cursor = self.fs.find({"metadata.type": {"$in": ["file", "sym"]}}, no_cursor_timeout=True) + file_cursor = self.fs.find( + {"metadata.type": {"$in": ["file", "sym"]}}, no_cursor_timeout=True + ) for writing_file in file_cursor: if from_path and not writing_file.filename.startswith(from_path): @@ -228,7 +236,10 @@ class FsMongo(FsBase): raise os.symlink(link, file_path) else: - with open(file_path, 'wb+') as file_stream: + folder = os.path.dirname(file_path) + if folder not in valid_paths: + os.makedirs(folder, exist_ok=True) + with open(file_path, "wb+") as file_stream: self.fs.download_to_stream(writing_file._id, file_stream) if "permissions" in writing_file.metadata: os.chmod(file_path, writing_file.metadata["permissions"]) @@ -243,15 +254,21 @@ class FsMongo(FsBase): if "path" in config: self.path = config["path"] else: - raise FsException("Missing parameter \"path\"") + raise FsException('Missing parameter "path"') if not self.path.endswith("/"): self.path += "/" if not os.path.exists(self.path): - raise FsException("Invalid configuration param at '[storage]': path '{}' does not exist".format( - config["path"])) + raise FsException( + "Invalid configuration param at '[storage]': path '{}' does not exist".format( + config["path"] + ) + ) elif not os.access(self.path, os.W_OK): - raise FsException("Invalid configuration param at '[storage]': path '{}' is not writable".format( - config["path"])) + raise FsException( + "Invalid configuration param at '[storage]': path '{}' is not writable".format( + config["path"] + ) + ) if all(key in config.keys() for key in ["uri", "collection"]): self.client = MongoClient(config["uri"]) self.fs = GridFSBucket(self.client[config["collection"]]) @@ -260,9 +277,9 @@ class FsMongo(FsBase): self.fs = GridFSBucket(self.client[config["collection"]]) else: if "collection" not in config.keys(): - raise FsException("Missing parameter \"collection\"") + raise FsException('Missing parameter "collection"') else: - raise FsException("Missing parameters: \"uri\" or \"host\" + \"port\"") + raise FsException('Missing parameters: "uri" or "host" + "port"') except FsException: raise except Exception as e: # TODO refine @@ -278,8 +295,7 @@ class FsMongo(FsBase): :return: None or raises an exception """ try: - self.fs.upload_from_stream( - folder, BytesIO(), metadata={"type": "dir"}) + self.fs.upload_from_stream(folder, BytesIO(), metadata={"type": "dir"}) except errors.FileExists: # make it idempotent pass except Exception as e: @@ -294,15 +310,15 @@ class FsMongo(FsBase): """ try: dst_cursor = self.fs.find( - {"filename": {"$regex": "^{}(/|$)".format(dst)}}, - no_cursor_timeout=True) + {"filename": {"$regex": "^{}(/|$)".format(dst)}}, no_cursor_timeout=True + ) for dst_file in dst_cursor: self.fs.delete(dst_file._id) src_cursor = self.fs.find( - {"filename": {"$regex": "^{}(/|$)".format(src)}}, - no_cursor_timeout=True) + {"filename": {"$regex": "^{}(/|$)".format(src)}}, no_cursor_timeout=True + ) for src_file in src_cursor: self.fs.rename(src_file._id, src_file.filename.replace(src, dst, 1)) @@ -324,7 +340,11 @@ class FsMongo(FsBase): exception_file = next(cursor, None) if exception_file: - raise FsException("Multiple files found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR) + raise FsException( + "Multiple files found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR + ) + + print(requested_file.metadata) # if no special mode is required just check it does exists if not mode: @@ -352,46 +372,66 @@ class FsMongo(FsBase): exception_file = next(cursor, None) if exception_file: - raise FsException("Multiple files found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR) + raise FsException( + "Multiple files found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR + ) return requested_file.length - def file_extract(self, tar_object, path): + def file_extract(self, compressed_object, path): """ extract a tar file - :param tar_object: object of type tar + :param compressed_object: object of type tar or zip :param path: can be a str or a str list, or a tar object where to extract the tar_object :return: None """ f = path if isinstance(path, str) else "/".join(path) - for member in tar_object.getmembers(): - if member.isfile(): - stream = tar_object.extractfile(member) - elif member.issym(): - stream = BytesIO(member.linkname.encode("utf-8")) - else: - stream = BytesIO() + if type(compressed_object) is tarfile.TarFile: + for member in compressed_object.getmembers(): + if member.isfile(): + stream = compressed_object.extractfile(member) + elif member.issym(): + stream = BytesIO(member.linkname.encode("utf-8")) + else: + stream = BytesIO() - if member.isfile(): - file_type = "file" - elif member.issym(): - file_type = "sym" - else: - file_type = "dir" + if member.isfile(): + file_type = "file" + elif member.issym(): + file_type = "sym" + else: + file_type = "dir" - metadata = { - "type": file_type, - "permissions": member.mode - } + metadata = {"type": file_type, "permissions": member.mode} - self.fs.upload_from_stream( - f + "/" + member.name, - stream, - metadata=metadata - ) + self.fs.upload_from_stream( + f + "/" + member.name, stream, metadata=metadata + ) - stream.close() + stream.close() + elif type(compressed_object) is zipfile.ZipFile: + for member in compressed_object.infolist(): + if member.is_dir(): + stream = BytesIO() + else: + stream = compressed_object.read(member) + + if member.is_dir(): + file_type = "dir" + else: + file_type = "file" + + metadata = {"type": file_type} + + print("Now uploading...") + print(f + "/" + member.filename) + self.fs.upload_from_stream( + f + "/" + member.filename, stream, metadata=metadata + ) + + if member.is_dir(): + stream.close() def file_open(self, storage, mode): """ @@ -408,9 +448,13 @@ class FsMongo(FsBase): else: return GridStringStream(f, self.fs, mode) except errors.NoFile: - raise FsException("File {} does not exist".format(f), http_code=HTTPStatus.NOT_FOUND) + raise FsException( + "File {} does not exist".format(f), http_code=HTTPStatus.NOT_FOUND + ) except IOError: - raise FsException("File {} cannot be opened".format(f), http_code=HTTPStatus.BAD_REQUEST) + raise FsException( + "File {} cannot be opened".format(f), http_code=HTTPStatus.BAD_REQUEST + ) def dir_ls(self, storage): """ @@ -427,18 +471,31 @@ class FsMongo(FsBase): exception_dir = next(dir_cursor, None) if exception_dir: - raise FsException("Multiple directories found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR) + raise FsException( + "Multiple directories found", + http_code=HTTPStatus.INTERNAL_SERVER_ERROR, + ) if requested_dir.metadata["type"] != "dir": - raise FsException("File {} does not exist".format(f), http_code=HTTPStatus.NOT_FOUND) + raise FsException( + "File {} does not exist".format(f), + http_code=HTTPStatus.NOT_FOUND, + ) + + if f.endswith("/"): + f = f[:-1] - files_cursor = self.fs.find({"filename": {"$regex": "^{}/([^/])*".format(f)}}) + files_cursor = self.fs.find( + {"filename": {"$regex": "^{}/([^/])*".format(f)}} + ) for children_file in files_cursor: - files += [children_file.filename.replace(f + '/', '', 1)] + files += [children_file.filename.replace(f + "/", "", 1)] return files except IOError: - raise FsException("File {} cannot be opened".format(f), http_code=HTTPStatus.BAD_REQUEST) + raise FsException( + "File {} cannot be opened".format(f), http_code=HTTPStatus.BAD_REQUEST + ) def file_delete(self, storage, ignore_non_exist=False): """ @@ -457,7 +514,10 @@ class FsMongo(FsBase): exception_file = next(file_cursor, None) if exception_file: - raise FsException("Multiple files found", http_code=HTTPStatus.INTERNAL_SERVER_ERROR) + raise FsException( + "Multiple files found", + http_code=HTTPStatus.INTERNAL_SERVER_ERROR, + ) if requested_file.metadata["type"] == "dir": dir_cursor = self.fs.find({"filename": {"$regex": "^{}".format(f)}}) @@ -467,9 +527,15 @@ class FsMongo(FsBase): else: self.fs.delete(requested_file._id) if not found and not ignore_non_exist: - raise FsException("File {} does not exist".format(storage), http_code=HTTPStatus.NOT_FOUND) + raise FsException( + "File {} does not exist".format(storage), + http_code=HTTPStatus.NOT_FOUND, + ) except IOError as e: - raise FsException("File {} cannot be deleted: {}".format(f, e), http_code=HTTPStatus.INTERNAL_SERVER_ERROR) + raise FsException( + "File {} cannot be deleted: {}".format(f, e), + http_code=HTTPStatus.INTERNAL_SERVER_ERROR, + ) def sync(self, from_path=None): """ @@ -490,10 +556,9 @@ class FsMongo(FsBase): members = [] for root, dirs, files in os.walk(os_path): for folder in dirs: - member = { - "filename": os.path.join(root, folder), - "type": "dir" - } + member = {"filename": os.path.join(root, folder), "type": "dir"} + if os.path.islink(member["filename"]): + member["type"] = "sym" members.append(member) for file in files: filename = os.path.join(root, file) @@ -501,10 +566,7 @@ class FsMongo(FsBase): file_type = "sym" else: file_type = "file" - member = { - "filename": os.path.join(root, file), - "type": file_type - } + member = {"filename": os.path.join(root, file), "type": file_type} members.append(member) # Obtain files in mongo dict @@ -520,10 +582,14 @@ class FsMongo(FsBase): # convert to relative path rel_filename = os.path.relpath(member["filename"], self.path) - last_modified_date = datetime.datetime.fromtimestamp(os.path.getmtime(member["filename"])) + last_modified_date = datetime.datetime.fromtimestamp( + os.path.getmtime(member["filename"]) + ) remote_file = remote_files.get(rel_filename) - upload_date = remote_file[0].uploadDate if remote_file else datetime.datetime.min + upload_date = ( + remote_file[0].uploadDate if remote_file else datetime.datetime.min + ) # remove processed files from dict remote_files.pop(rel_filename, None) @@ -536,21 +602,16 @@ class FsMongo(FsBase): if file_type == "dir": stream = BytesIO() elif file_type == "sym": - stream = BytesIO(os.readlink(member["filename"]).encode("utf-8")) + stream = BytesIO( + os.readlink(member["filename"]).encode("utf-8") + ) else: fh = open(member["filename"], "rb") stream = BytesIO(fh.read()) - metadata = { - "type": file_type, - "permissions": mask - } + metadata = {"type": file_type, "permissions": mask} - self.fs.upload_from_stream( - rel_filename, - stream, - metadata=metadata - ) + self.fs.upload_from_stream(rel_filename, stream, metadata=metadata) # delete old files if remote_file: @@ -570,7 +631,7 @@ class FsMongo(FsBase): def _get_mongo_files(self, from_path=None): file_dict = {} - file_cursor = self.fs.find(no_cursor_timeout=True, sort=[('uploadDate', -1)]) + file_cursor = self.fs.find(no_cursor_timeout=True, sort=[("uploadDate", -1)]) for file in file_cursor: if from_path and not file.filename.startswith(from_path): continue