[Bug 244] For nested directories, extract the basename to get the filename after...
[osm/SO.git] / rwlaunchpad / plugins / rwlaunchpadtasklet / rift / package / package.py
1
2 #
3 # Copyright 2016 RIFT.IO Inc
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #
17
18 import io
19 import os
20 import re
21 import shutil
22 import tarfile
23
24 from . import checksums
25 from . import convert
26 from . import image
27
28
29 class ArchiveError(Exception):
30 pass
31
32
33 class ExtractError(Exception):
34 pass
35
36
37 class PackageError(Exception):
38 pass
39
40
41 class PackageValidationError(Exception):
42 pass
43
44
45 class PackageAppendError(Exception):
46 pass
47
48
49 class PackageFileChecksumError(PackageValidationError):
50 def __init__(self, filename):
51 self.filename = filename
52 super().__init__("Checksum mismatch for {}".format(filename))
53
54
55 class DescriptorPackage(object):
56 """ This class provides an base class for a descriptor package representing
57
58 A descriptor package is a package which contains a single descriptor and any
59 associated files (logos, charms, scripts, etc). This package representation
60 attempts to be agnostic as to where the package files are being stored
61 (in memory, on disk, etc).
62
63 The package provides a simple interface to interact with the files within the
64 package and access the contained descriptor.
65 """
66 DESCRIPTOR_REGEX = r"{prefix}({descriptor_type}/[^/]*|[^/]*{descriptor_type})\.(xml|yml|yaml|json)$"
67
68 def __init__(self, log, open_fn):
69 self._log = log
70 self._open_fn = open_fn
71
72 self._package_file_mode_map = {}
73 self._package_dirs = set()
74
75 @property
76 def prefix(self):
77 """ Return the leading parent directories shared by all files in the package
78
79 In order to remain flexible as to where tar was invoked to create the package,
80 the prefix represents the common parent directory path which all files in the
81 package have in common.
82 """
83 entries = list(self._package_file_mode_map) + list(self._package_dirs)
84
85 if len(entries) > 1:
86 prefix = os.path.commonprefix(entries)
87 if prefix and not prefix.endswith("/"):
88 prefix += "/"
89 elif len(entries) == 1:
90 entry = entries[0]
91 if "/" in entry:
92 prefix = os.path.dirname(entry) + "/"
93 else:
94 prefix = ""
95 else:
96 prefix = ""
97
98 return prefix
99
100 @property
101 def files(self):
102 """ Return all files (with the prefix) in the package """
103 return list(self._package_file_mode_map)
104
105 @property
106 def dirs(self):
107 """ Return all directories in the package """
108 return list(self._package_dirs)
109
110 @property
111 def descriptor_type(self):
112 """ A shorthand name for the type of descriptor (e.g. nsd)"""
113 raise NotImplementedError("Subclass must implement this property")
114
115 @property
116 def serializer(self):
117 """ An instance of convert.ProtoMessageSerializer """
118 raise NotImplementedError("Subclass must implement this property")
119
120 @property
121 def descriptor_file(self):
122 """ The descriptor file name (with prefix) """
123 regex = self.__class__.DESCRIPTOR_REGEX.format(
124 descriptor_type=self.descriptor_type,
125 prefix=self.prefix,
126 )
127 desc_file = None
128 for filename in self.files:
129 if re.match(regex, filename):
130 if desc_file is not None:
131 raise PackageError("Package contains more than one descriptor")
132 desc_file = filename
133
134 if desc_file is None:
135 raise PackageError("Could not find descriptor file in package")
136
137 return desc_file
138
139 @property
140 def descriptor_msg(self):
141 """ The proto-GI descriptor message """
142 filename = self.descriptor_file
143 with self.open(filename) as hdl:
144 _, ext = os.path.splitext(filename)
145 nsd = self.serializer.from_file_hdl(hdl, ext)
146 return nsd
147
148 @property
149 def json_descriptor(self):
150 """ The JSON serialized descriptor message"""
151 nsd = self.descriptor_msg
152 return self.serializer.to_json_string(nsd)
153
154 @property
155 def descriptor_id(self):
156 """ The descriptor id which uniquely identifies this descriptor in the system """
157 if not self.descriptor_msg.has_field("id"):
158 msg = "Descriptor must have an id field"
159 self._log.error(msg)
160 raise PackageError(msg)
161
162 return self.descriptor_msg.id
163
164 @property
165 def descriptor_name(self):
166 """ The descriptor name of this descriptor in the system """
167 if not self.descriptor_msg.has_field("name"):
168 msg = "Descriptor name not present"
169 self._log.error(msg)
170 raise PackageError(msg)
171
172 return self.descriptor_msg.name
173
174 @classmethod
175 def get_descriptor_patterns(cls):
176 """ Returns a tuple of descriptor regex and Package Types """
177 package_types = (VnfdPackage, NsdPackage)
178 patterns = []
179
180 for pkg_cls in package_types:
181 regex = cls.DESCRIPTOR_REGEX.format(
182 descriptor_type=pkg_cls.DESCRIPTOR_TYPE,
183 prefix=".*"
184 )
185
186 patterns.append((regex, pkg_cls))
187
188 return patterns
189
190 @classmethod
191 def from_package_files(cls, log, open_fn, files):
192 """ Creates a new DescriptorPackage subclass instance from a list of files
193
194 This classmethod detects the Package type from the package contents
195 and returns a new Package instance.
196
197 This will NOT subsequently add the files to the package so that must
198 be done by the client
199
200 Arguments:
201 log - A logger
202 open_fn - A function which can take a file name and mode and return
203 a file handle.
204 files - A list of files which would be added to the package after
205 intantiation
206
207 Returns:
208 A new DescriptorPackage subclass of the correct type for the descriptor
209
210 Raises:
211 PackageError - Package type could not be determined from the list of files.
212 """
213 patterns = cls.get_descriptor_patterns()
214 pkg_cls = None
215 regexes = set()
216 for name in files:
217 for regex, cls in patterns:
218 regexes.add(regex)
219 if re.match(regex, name) is not None:
220 pkg_cls = cls
221 break
222
223 if pkg_cls is None:
224 log.error("No file in archive matched known descriptor formats: %s", regexes)
225 raise PackageError("Could not determine package type from contents")
226
227 package = pkg_cls(log, open_fn)
228 return package
229
230 @classmethod
231 def from_descriptor_file_hdl(cls, log, file_hdl):
232 """ Creates a new DescriptorPackage from a descriptor file handle
233
234 The descriptor file is added to the package before returning.
235
236 Arguments:
237 log - A logger
238 file_hdl - A file handle whose name attribute can be recognized as
239 particular descriptor type.
240
241 Returns:
242 A new DescriptorPackage subclass of the correct type for the descriptor
243
244 Raises:
245 PackageError - Package type could not be determined from the list of files.
246 ValueError - file_hdl did not have a name attribute provided
247 """
248
249 package_types = (VnfdPackage, NsdPackage)
250 filename_patterns = []
251 for package_cls in package_types:
252 filename_patterns.append(
253 (r".*{}.*".format(package_cls.DESCRIPTOR_TYPE), package_cls)
254 )
255
256 if not hasattr(file_hdl, 'name'):
257 raise ValueError("File descriptor must have a name attribute to create a descriptor package")
258
259 # Iterate through the recognized patterns and assign files accordingly
260 package_cls = None
261 for pattern, cls in filename_patterns:
262 if re.match(pattern, file_hdl.name):
263 package_cls = cls
264 break
265
266 if not package_cls:
267 raise PackageError("Could not determine package type from file name: %s" % file_hdl.name)
268
269 _, ext = os.path.splitext(file_hdl.name)
270 try:
271 package_cls.SERIALIZER.from_file_hdl(file_hdl, ext)
272 except convert.SerializationError as e:
273 raise PackageError("Could not deserialize descriptor %s" % file_hdl.name) from e
274
275 # Create a new file handle for each open call to prevent independent clients
276 # from affecting each other
277 file_hdl.seek(0)
278 new_hdl = io.BytesIO(file_hdl.read())
279
280 def do_open(file_path):
281 assert file_path == file_hdl.name
282 hdl = io.BytesIO(new_hdl.getvalue())
283 return hdl
284
285 desc_pkg = package_cls(log, do_open)
286 desc_pkg.add_file(file_hdl.name)
287
288 return desc_pkg
289
290 def get_file_mode(self, pkg_file):
291 """ Returns the file mode for the package file
292
293 Arguments:
294 pkg_file - A file name in the package
295
296 Returns:
297 The permission mode
298
299 Raises:
300 PackageError - The file does not exist in the package
301 """
302 try:
303 return self._package_file_mode_map[pkg_file]
304 except KeyError as e:
305 msg = "Could not find package_file: %s" % pkg_file
306 self._log.error(msg)
307 raise PackageError(msg) from e
308
309 def extract_dir(self, src_dir, dest_root_dir, extract_images=False):
310 """ Extract a specific directory contents to dest_root_dir
311
312 Arguments:
313 src_dir - A directory within the package (None means all files/directories)
314 dest_root_dir - A directory to extract directory contents to
315 extract_images - A flag indicating whether we want to extract images
316
317 Raises:
318 ExtractError - Directory contents could not be extracted
319 """
320 if src_dir is not None and src_dir not in self._package_dirs:
321 raise ExtractError("Could not find source dir: %s" % src_dir)
322
323 for filename in self.files:
324 if not extract_images and image.is_image_file(filename):
325 continue
326
327 if src_dir is not None and not filename.startswith(src_dir):
328 continue
329
330 # Copy the contents of the file to the correct path
331 # Remove the common prefix and create the dest filename
332 if src_dir is not None:
333 fname = filename[len(src_dir):]
334 if fname[0] == '/':
335 fname = fname[1:]
336 else:
337 fname = filename
338 dest_file_path = os.path.join(dest_root_dir, fname)
339 dest_dir_path = os.path.dirname(dest_file_path)
340 if not os.path.exists(dest_dir_path):
341 os.makedirs(dest_dir_path)
342
343 with open(dest_file_path, 'wb') as dst_hdl:
344 with self.open(filename) as src_hdl:
345 shutil.copyfileobj(src_hdl, dst_hdl, 10 * 1024 * 1024)
346
347 # Set the file mode to original
348 os.chmod(dest_file_path, self._package_file_mode_map[filename])
349
350 def insert_file(self, new_file, dest_file, rel_path, mode=0o777):
351 self.add_file(rel_path, mode)
352
353 try:
354 # Copy the contents of the file to the correct path
355 # For folder creation (or nested folders), dest_file appears w/ trailing "/" like: dir1/ or dir1/dir2/
356 # For regular file upload, dest_file appears as dir1/abc.txt
357 dest_dir_path = os.path.dirname(dest_file)
358 if not os.path.isdir(dest_dir_path):
359 os.makedirs(dest_dir_path)
360 if not os.path.basename(dest_file):
361 self._log.debug("Created dir path, no filename to insert in {}, skipping..".format(dest_dir_path))
362 return
363
364 with open(dest_file, 'wb') as dst_hdl:
365 with open(new_file, 'rb') as src_hdl:
366 shutil.copyfileobj(src_hdl, dst_hdl, 10 * 1024 * 1024)
367
368 # Set the file mode to original
369 os.chmod(dest_file, self._package_file_mode_map[rel_path])
370 except Exception as e:
371 # Clear the file when an exception happens
372 if os.path.isfile(dest_file):
373 os.remove(dest_file)
374
375 raise PackageAppendError(str(e))
376
377 def delete_file(self, dest_file, rel_path):
378 self.remove_file(rel_path)
379
380 try:
381 os.remove(dest_file)
382 except Exception as e:
383 raise PackageAppendError(str(e))
384
385 def extract_file(self, src_file, dest_file):
386 """ Extract a specific package file to dest_file
387
388 The destination directory will be created if it does not exist.
389
390 Arguments:
391 src_file - A file within the package
392 dest_file - A file path to extract file contents to
393
394 Raises:
395 ExtractError - Directory contents could not be extracted
396 """
397 if src_file not in self._package_file_mode_map:
398 msg = "Could not find source file %s" % src_file
399 self._log.error(msg)
400 raise ExtractError(msg)
401
402 # Copy the contents of the file to the correct path
403 dest_dir_path = os.path.dirname(dest_file)
404 if not os.path.isdir(dest_dir_path):
405 os.makedirs(dest_dir_path)
406
407 with open(dest_file, 'wb') as dst_hdl:
408 with self.open(src_file) as src_hdl:
409 shutil.copyfileobj(src_hdl, dst_hdl, 10 * 1024 * 1024)
410
411 # Set the file mode to original
412 os.chmod(dest_file, self._package_file_mode_map[src_file])
413
414 def extract(self, dest_root_dir, extract_images=False):
415 """ Extract all package contents to a destination directory
416
417 Arguments:
418 dest_root_dir - The directory to extract package contents to
419
420 Raises:
421 NotADirectoryError - dest_root_dir is not a directory
422 """
423 if not os.path.isdir(dest_root_dir):
424 raise NotADirectoryError(dest_root_dir)
425
426 self.extract_dir(None, dest_root_dir, extract_images)
427
428 def open(self, rel_path):
429 """ Open a file contained in the package in read-only, binary mode.
430
431 Arguments:
432 rel_path - The file path within the package
433
434 Returns:
435 A file-like object opened in read-only mode.
436
437 Raises:
438 PackageError - The file could not be opened
439 """
440 try:
441 return self._open_fn(rel_path)
442 except Exception as e:
443 msg = "Could not open file from package: %s" % rel_path
444 self._log.warning(msg)
445 raise PackageError(msg) from e
446
447 def add_file(self, rel_path, mode=0o777):
448 """ Add a file to the package.
449
450 The file should be specified as a relative path to the package
451 root. The open_fn provided in the constructor must be able to
452 take the relative path and open the actual source file from
453 wherever the file actually is stored.
454
455 If the file's parent directories do not yet exist, add them to
456 the package.
457
458 Arguments:
459 rel_path - The file path relative to the top of the package.
460 mode - The permission mode the file should be stored with so
461 it can be extracted with the correct permissions.
462
463 Raises:
464 PackageError - The file could not be added to the package
465 """
466 if not rel_path:
467 raise PackageError("Empty file name added")
468
469 if rel_path in self._package_file_mode_map:
470 raise PackageError("File %s already exists in package" % rel_path)
471
472 # If the file's directory is not in the package add it.
473 rel_dir = os.path.dirname(rel_path)
474 while rel_dir:
475 self._package_dirs.add(rel_dir)
476 rel_dir = os.path.dirname(rel_dir)
477
478 self._package_file_mode_map[rel_path] = mode
479
480 def remove_file(self, rel_path):
481 if not rel_path:
482 raise PackageError("Empty file name added")
483
484 if rel_path not in self._package_file_mode_map:
485 raise PackageError("File %s does not in package" % rel_path)
486
487 del self._package_file_mode_map[rel_path]
488
489 def add_dir(self, rel_path):
490 """ Add a directory to the package
491
492 Arguments:
493 rel_path - The directories relative path.
494
495 Raises:
496 PackageError - A file already exists in the package with the same name.
497 """
498 if rel_path in self._package_file_mode_map:
499 raise PackageError("File already exists with the same name: %s", rel_path)
500
501 if rel_path in self._package_dirs:
502 self._log.warning("%s directory already exists", rel_path)
503 return
504
505 self._package_dirs.add(rel_path)
506
507
508 class NsdPackage(DescriptorPackage):
509 DESCRIPTOR_TYPE = "nsd"
510 SERIALIZER = convert.RwNsdSerializer()
511
512 @property
513 def descriptor_type(self):
514 return "nsd"
515
516 @property
517 def serializer(self):
518 return NsdPackage.SERIALIZER
519
520
521 class VnfdPackage(DescriptorPackage):
522 DESCRIPTOR_TYPE = "vnfd"
523 SERIALIZER = convert.RwVnfdSerializer()
524
525 @property
526 def descriptor_type(self):
527 return "vnfd"
528
529 @property
530 def serializer(self):
531 return VnfdPackage.SERIALIZER
532
533
534 class PackageChecksumValidator(object):
535 """ This class uses the checksums.txt file in the package
536 and validates that all files in the package match the checksum that exists within
537 the file.
538 """
539 CHECKSUM_FILE = "{prefix}checksums.txt"
540
541 def __init__(self, log):
542 self._log = log
543
544 @classmethod
545 def get_package_checksum_file(cls, package):
546 checksum_file = cls.CHECKSUM_FILE.format(prefix=package.prefix)
547 if checksum_file not in package.files:
548 raise FileNotFoundError("%s does not exist in archive" % checksum_file)
549
550 return checksum_file
551
552 def validate(self, package):
553 """ Validate file checksums match that in the checksums.txt
554
555 Arguments:
556 package - The Descriptor Package which possiblity contains checksums.txt
557
558 Returns: A dictionary of files that were validated by the checksums.txt
559 along with their checksums
560
561 Raises:
562 PackageValidationError - The package validation failed for some
563 generic reason.
564 PackageFileChecksumError - A file within the package did not match the
565 checksum within checksums.txt
566 """
567 validated_file_checksums = {}
568
569 try:
570 checksum_file = PackageChecksumValidator.get_package_checksum_file(package)
571 with package.open(checksum_file) as checksum_hdl:
572 archive_checksums = checksums.ArchiveChecksums.from_file_desc(checksum_hdl)
573 except (FileNotFoundError, PackageError) as e:
574 self._log.warning("Could not open package checksum file. Not validating checksums.")
575 return validated_file_checksums
576
577 for pkg_file in package.files:
578 if pkg_file == checksum_file:
579 continue
580
581 pkg_file_no_prefix = pkg_file.replace(package.prefix, "", 1)
582 if pkg_file_no_prefix not in archive_checksums:
583 self._log.warning("File %s not found in checksum file %s",
584 pkg_file, checksum_file)
585 continue
586
587 try:
588 with package.open(pkg_file) as pkg_file_hdl:
589 file_checksum = checksums.checksum(pkg_file_hdl)
590 except PackageError as e:
591 msg = "Could not read package file {} for checksum validation: {}".format(
592 pkg_file, str(e))
593 self._log.error(msg)
594 raise PackageValidationError(msg) from e
595
596 if archive_checksums[pkg_file_no_prefix] != file_checksum:
597 msg = "{} checksum ({}) did match expected checksum ({})".format(
598 pkg_file, file_checksum, archive_checksums[pkg_file_no_prefix]
599 )
600 self._log.error(msg)
601 raise PackageFileChecksumError(pkg_file)
602
603 validated_file_checksums[pkg_file] = file_checksum
604
605 return validated_file_checksums
606
607
608 class TarPackageArchive(object):
609 """ This class represents a package stored within a tar.gz archive file """
610 def __init__(self, log, tar_file_hdl, mode="r"):
611 self._log = log
612 self._tar_filepath = tar_file_hdl
613 self._tar_infos = {}
614
615 self._tarfile = tarfile.open(fileobj=tar_file_hdl, mode=mode)
616
617 self._load_archive()
618
619 def __repr__(self):
620 return "TarPackageArchive(%s)" % self._tar_filepath
621
622 def _get_members(self):
623 return [info for info in self._tarfile.getmembers()]
624
625 def _load_archive(self):
626 self._tar_infos = {info.name: info for info in self._get_members() if info.name}
627
628 def __del__(self):
629 self.close()
630
631 def close(self):
632 """ Close the opened tarfile"""
633 if self._tarfile is not None:
634 self._tarfile.close()
635 self._tarfile = None
636
637 @property
638 def filenames(self):
639 """ The list of file members within the tar file """
640 return [name for name in self._tar_infos if tarfile.TarInfo.isfile(self._tar_infos[name])]
641
642 def open_file(self, rel_file_path):
643 """ Opens a file within the archive as read-only, byte mode.
644
645 Arguments:
646 rel_file_path - The file path within the archive to open
647
648 Returns:
649 A file like object (see tarfile.extractfile())
650
651 Raises:
652 ArchiveError - The file could not be opened for some generic reason.
653 """
654 if rel_file_path not in self._tar_infos:
655 raise ArchiveError("Could not find %s in tar file", rel_file_path)
656
657 try:
658 return self._tarfile.extractfile(rel_file_path)
659 except tarfile.TarError as e:
660 msg = "Failed to read file {} from tarfile {}: {}".format(
661 rel_file_path, self._tar_filepath, str(e)
662 )
663 self._log.error(msg)
664 raise ArchiveError(msg) from e
665
666 def create_package(self):
667 """ Creates a Descriptor package from the archive contents
668
669 Returns:
670 A DescriptorPackage of the correct descriptor type
671 """
672 package = DescriptorPackage.from_package_files(self._log, self.open_file, self.filenames)
673 for pkg_file in self.filenames:
674 package.add_file(pkg_file, self._tar_infos[pkg_file].mode)
675
676 return package
677
678
679 class TemporaryPackage(object):
680 """ This class is a container for a temporary file-backed package
681
682 This class contains a DescriptorPackage and can be used in place of one.
683 Provides a useful context manager which will close and destroy the file
684 that is backing the DescriptorPackage on exit.
685 """
686 def __init__(self, log, package, file_hdl):
687 self._log = log
688 self._package = package
689 self._file_hdl = file_hdl
690
691 if not hasattr(self._file_hdl, "name"):
692 raise ValueError("File handle must have a name attribute")
693
694 def __getattr__(self, attr):
695 return getattr(self._package, attr)
696
697 def __enter__(self):
698 return self._package
699
700 def __exit__(self, type, value, tb):
701 self.close()
702
703 def filename(self):
704 """ Returns the filepath with is backing the Package """
705 return self._file_hdl.name
706
707 def package(self):
708 """ The contained DescriptorPackage instance """
709 return self._package
710
711 def close(self):
712 """ Close and remove the backed file """
713 filename = self._file_hdl.name
714
715 try:
716 self._file_hdl.close()
717 except OSError as e:
718 self._log.warning("Failed to close package file: %s", str(e))
719
720 try:
721 os.remove(filename)
722 except OSError as e:
723 self._log.warning("Failed to remove package file: %s", str(e))