3ce3500e032facf13e8e80dc19f12b74be1da193
[osm/SO.git] / rwlaunchpad / plugins / rwlaunchpadtasklet / rift / package / package.py
1
2 #
3 # Copyright 2016 RIFT.IO Inc
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #
17
18 import io
19 import os
20 import re
21 import shutil
22 import tarfile
23
24 from . import checksums
25 from . import convert
26 from . import image
27
28
29 class ArchiveError(Exception):
30 pass
31
32
33 class ExtractError(Exception):
34 pass
35
36
37 class PackageError(Exception):
38 pass
39
40
41 class PackageValidationError(Exception):
42 pass
43
44
45 class PackageAppendError(Exception):
46 pass
47
48
49 class PackageFileChecksumError(PackageValidationError):
50 def __init__(self, filename):
51 self.filename = filename
52 super().__init__("Checksum mismatch for {}".format(filename))
53
54
55 class DescriptorPackage(object):
56 """ This class provides an base class for a descriptor package representing
57
58 A descriptor package is a package which contains a single descriptor and any
59 associated files (logos, charms, scripts, etc). This package representation
60 attempts to be agnostic as to where the package files are being stored
61 (in memory, on disk, etc).
62
63 The package provides a simple interface to interact with the files within the
64 package and access the contained descriptor.
65 """
66 DESCRIPTOR_REGEX = r"{prefix}({descriptor_type}/[^/]*|[^/]*{descriptor_type})\.(xml|yml|yaml|json)$"
67
68 def __init__(self, log, open_fn):
69 self._log = log
70 self._open_fn = open_fn
71
72 self._package_file_mode_map = {}
73 self._package_dirs = set()
74
75 @property
76 def prefix(self):
77 """ Return the leading parent directories shared by all files in the package
78
79 In order to remain flexible as to where tar was invoked to create the package,
80 the prefix represents the common parent directory path which all files in the
81 package have in common.
82 """
83 entries = list(self._package_file_mode_map) + list(self._package_dirs)
84
85 if len(entries) > 1:
86 prefix = os.path.commonprefix(entries)
87 if prefix and not prefix.endswith("/"):
88 prefix += "/"
89 elif len(entries) == 1:
90 entry = entries[0]
91 if "/" in entry:
92 prefix = os.path.dirname(entry) + "/"
93 else:
94 prefix = ""
95 else:
96 prefix = ""
97
98 return prefix
99
100 @property
101 def files(self):
102 """ Return all files (with the prefix) in the package """
103 return list(self._package_file_mode_map)
104
105 @property
106 def dirs(self):
107 """ Return all directories in the package """
108 return list(self._package_dirs)
109
110 @property
111 def descriptor_type(self):
112 """ A shorthand name for the type of descriptor (e.g. nsd)"""
113 raise NotImplementedError("Subclass must implement this property")
114
115 @property
116 def serializer(self):
117 """ An instance of convert.ProtoMessageSerializer """
118 raise NotImplementedError("Subclass must implement this property")
119
120 @property
121 def descriptor_file(self):
122 """ The descriptor file name (with prefix) """
123 regex = self.__class__.DESCRIPTOR_REGEX.format(
124 descriptor_type=self.descriptor_type,
125 prefix=self.prefix,
126 )
127 desc_file = None
128 for filename in self.files:
129 if re.match(regex, filename):
130 if desc_file is not None:
131 raise PackageError("Package contains more than one descriptor")
132 desc_file = filename
133
134 if desc_file is None:
135 raise PackageError("Could not find descriptor file in package")
136
137 return desc_file
138
139 @property
140 def descriptor_msg(self):
141 """ The proto-GI descriptor message """
142 filename = self.descriptor_file
143 with self.open(filename) as hdl:
144 _, ext = os.path.splitext(filename)
145 nsd = self.serializer.from_file_hdl(hdl, ext)
146 return nsd
147
148 @property
149 def json_descriptor(self):
150 """ The JSON serialized descriptor message"""
151 nsd = self.descriptor_msg
152 return self.serializer.to_json_string(nsd)
153
154 @property
155 def descriptor_id(self):
156 """ The descriptor id which uniquely identifies this descriptor in the system """
157 if not self.descriptor_msg.has_field("id"):
158 msg = "Descriptor must have an id field"
159 self._log.error(msg)
160 raise PackageError(msg)
161
162 return self.descriptor_msg.id
163
164 @classmethod
165 def get_descriptor_patterns(cls):
166 """ Returns a tuple of descriptor regex and Package Types """
167 package_types = (VnfdPackage, NsdPackage)
168 patterns = []
169
170 for pkg_cls in package_types:
171 regex = cls.DESCRIPTOR_REGEX.format(
172 descriptor_type=pkg_cls.DESCRIPTOR_TYPE,
173 prefix=".*"
174 )
175
176 patterns.append((regex, pkg_cls))
177
178 return patterns
179
180 @classmethod
181 def from_package_files(cls, log, open_fn, files):
182 """ Creates a new DescriptorPackage subclass instance from a list of files
183
184 This classmethod detects the Package type from the package contents
185 and returns a new Package instance.
186
187 This will NOT subsequently add the files to the package so that must
188 be done by the client
189
190 Arguments:
191 log - A logger
192 open_fn - A function which can take a file name and mode and return
193 a file handle.
194 files - A list of files which would be added to the package after
195 intantiation
196
197 Returns:
198 A new DescriptorPackage subclass of the correct type for the descriptor
199
200 Raises:
201 PackageError - Package type could not be determined from the list of files.
202 """
203 patterns = cls.get_descriptor_patterns()
204 pkg_cls = None
205 regexes = set()
206 for name in files:
207 for regex, cls in patterns:
208 regexes.add(regex)
209 if re.match(regex, name) is not None:
210 pkg_cls = cls
211 break
212
213 if pkg_cls is None:
214 log.error("No file in archive matched known descriptor formats: %s", regexes)
215 raise PackageError("Could not determine package type from contents")
216
217 package = pkg_cls(log, open_fn)
218 return package
219
220 @classmethod
221 def from_descriptor_file_hdl(cls, log, file_hdl):
222 """ Creates a new DescriptorPackage from a descriptor file handle
223
224 The descriptor file is added to the package before returning.
225
226 Arguments:
227 log - A logger
228 file_hdl - A file handle whose name attribute can be recognized as
229 particular descriptor type.
230
231 Returns:
232 A new DescriptorPackage subclass of the correct type for the descriptor
233
234 Raises:
235 PackageError - Package type could not be determined from the list of files.
236 ValueError - file_hdl did not have a name attribute provided
237 """
238
239 package_types = (VnfdPackage, NsdPackage)
240 filename_patterns = []
241 for package_cls in package_types:
242 filename_patterns.append(
243 (r".*{}.*".format(package_cls.DESCRIPTOR_TYPE), package_cls)
244 )
245
246 if not hasattr(file_hdl, 'name'):
247 raise ValueError("File descriptor must have a name attribute to create a descriptor package")
248
249 # Iterate through the recognized patterns and assign files accordingly
250 package_cls = None
251 for pattern, cls in filename_patterns:
252 if re.match(pattern, file_hdl.name):
253 package_cls = cls
254 break
255
256 if not package_cls:
257 raise PackageError("Could not determine package type from file name: %s" % file_hdl.name)
258
259 _, ext = os.path.splitext(file_hdl.name)
260 try:
261 package_cls.SERIALIZER.from_file_hdl(file_hdl, ext)
262 except convert.SerializationError as e:
263 raise PackageError("Could not deserialize descriptor %s" % file_hdl.name) from e
264
265 # Create a new file handle for each open call to prevent independent clients
266 # from affecting each other
267 file_hdl.seek(0)
268 new_hdl = io.BytesIO(file_hdl.read())
269
270 def do_open(file_path):
271 assert file_path == file_hdl.name
272 hdl = io.BytesIO(new_hdl.getvalue())
273 return hdl
274
275 desc_pkg = package_cls(log, do_open)
276 desc_pkg.add_file(file_hdl.name)
277
278 return desc_pkg
279
280 def get_file_mode(self, pkg_file):
281 """ Returns the file mode for the package file
282
283 Arguments:
284 pkg_file - A file name in the package
285
286 Returns:
287 The permission mode
288
289 Raises:
290 PackageError - The file does not exist in the package
291 """
292 try:
293 return self._package_file_mode_map[pkg_file]
294 except KeyError as e:
295 msg = "Could not find package_file: %s" % pkg_file
296 self._log.error(msg)
297 raise PackageError(msg) from e
298
299 def extract_dir(self, src_dir, dest_root_dir, extract_images=False):
300 """ Extract a specific directory contents to dest_root_dir
301
302 Arguments:
303 src_dir - A directory within the package (None means all files/directories)
304 dest_root_dir - A directory to extract directory contents to
305 extract_images - A flag indicating whether we want to extract images
306
307 Raises:
308 ExtractError - Directory contents could not be extracted
309 """
310 if src_dir is not None and src_dir not in self._package_dirs:
311 raise ExtractError("Could not find source dir: %s" % src_dir)
312
313 for filename in self.files:
314 if not extract_images and image.is_image_file(filename):
315 continue
316
317 if src_dir is not None and not filename.startswith(src_dir):
318 continue
319
320 # Copy the contents of the file to the correct path
321 # Remove the common prefix and create the dest filename
322 if src_dir is not None:
323 fname = filename[len(src_dir):]
324 if fname[0] == '/':
325 fname = fname[1:]
326 else:
327 fname = filename
328 dest_file_path = os.path.join(dest_root_dir, fname)
329 dest_dir_path = os.path.dirname(dest_file_path)
330 if not os.path.exists(dest_dir_path):
331 os.makedirs(dest_dir_path)
332
333 with open(dest_file_path, 'wb') as dst_hdl:
334 with self.open(filename) as src_hdl:
335 shutil.copyfileobj(src_hdl, dst_hdl, 10 * 1024 * 1024)
336
337 # Set the file mode to original
338 os.chmod(dest_file_path, self._package_file_mode_map[filename])
339
340 def insert_file(self, new_file, dest_file, rel_path, mode=0o777):
341 self.add_file(rel_path, mode)
342
343 try:
344 # Copy the contents of the file to the correct path
345 dest_dir_path = os.path.dirname(dest_file)
346 if not os.path.isdir(dest_dir_path):
347 os.makedirs(dest_dir_path)
348
349 with open(dest_file, 'wb') as dst_hdl:
350 with open(new_file, 'rb') as src_hdl:
351 shutil.copyfileobj(src_hdl, dst_hdl, 10 * 1024 * 1024)
352
353 # Set the file mode to original
354 os.chmod(dest_file, self._package_file_mode_map[rel_path])
355 except Exception as e:
356 # Clear the file when an exception happens
357 if os.path.isfile(dest_file):
358 os.remove(dest_file)
359
360 raise PackageAppendError(str(e))
361
362 def delete_file(self, dest_file, rel_path):
363 self.remove_file(rel_path)
364
365 try:
366 os.remove(dest_file)
367 except Exception as e:
368 raise PackageAppendError(str(e))
369
370 def extract_file(self, src_file, dest_file):
371 """ Extract a specific package file to dest_file
372
373 The destination directory will be created if it does not exist.
374
375 Arguments:
376 src_file - A file within the package
377 dest_file - A file path to extract file contents to
378
379 Raises:
380 ExtractError - Directory contents could not be extracted
381 """
382 if src_file not in self._package_file_mode_map:
383 msg = "Could not find source file %s" % src_file
384 self._log.error(msg)
385 raise ExtractError(msg)
386
387 # Copy the contents of the file to the correct path
388 dest_dir_path = os.path.dirname(dest_file)
389 if not os.path.isdir(dest_dir_path):
390 os.makedirs(dest_dir_path)
391
392 with open(dest_file, 'wb') as dst_hdl:
393 with self.open(src_file) as src_hdl:
394 shutil.copyfileobj(src_hdl, dst_hdl, 10 * 1024 * 1024)
395
396 # Set the file mode to original
397 os.chmod(dest_file, self._package_file_mode_map[src_file])
398
399 def extract(self, dest_root_dir, extract_images=False):
400 """ Extract all package contents to a destination directory
401
402 Arguments:
403 dest_root_dir - The directory to extract package contents to
404
405 Raises:
406 NotADirectoryError - dest_root_dir is not a directory
407 """
408 if not os.path.isdir(dest_root_dir):
409 raise NotADirectoryError(dest_root_dir)
410
411 self.extract_dir(None, dest_root_dir, extract_images)
412
413 def open(self, rel_path):
414 """ Open a file contained in the package in read-only, binary mode.
415
416 Arguments:
417 rel_path - The file path within the package
418
419 Returns:
420 A file-like object opened in read-only mode.
421
422 Raises:
423 PackageError - The file could not be opened
424 """
425 try:
426 return self._open_fn(rel_path)
427 except Exception as e:
428 msg = "Could not open file from package: %s" % rel_path
429 self._log.warning(msg)
430 raise PackageError(msg) from e
431
432 def add_file(self, rel_path, mode=0o777):
433 """ Add a file to the package.
434
435 The file should be specified as a relative path to the package
436 root. The open_fn provided in the constructor must be able to
437 take the relative path and open the actual source file from
438 wherever the file actually is stored.
439
440 If the file's parent directories do not yet exist, add them to
441 the package.
442
443 Arguments:
444 rel_path - The file path relative to the top of the package.
445 mode - The permission mode the file should be stored with so
446 it can be extracted with the correct permissions.
447
448 Raises:
449 PackageError - The file could not be added to the package
450 """
451 if not rel_path:
452 raise PackageError("Empty file name added")
453
454 if rel_path in self._package_file_mode_map:
455 raise PackageError("File %s already exists in package" % rel_path)
456
457 # If the file's directory is not in the package add it.
458 rel_dir = os.path.dirname(rel_path)
459 while rel_dir:
460 self._package_dirs.add(rel_dir)
461 rel_dir = os.path.dirname(rel_dir)
462
463 self._package_file_mode_map[rel_path] = mode
464
465 def remove_file(self, rel_path):
466 if not rel_path:
467 raise PackageError("Empty file name added")
468
469 if rel_path not in self._package_file_mode_map:
470 raise PackageError("File %s does not in package" % rel_path)
471
472 del self._package_file_mode_map[rel_path]
473
474 def add_dir(self, rel_path):
475 """ Add a directory to the package
476
477 Arguments:
478 rel_path - The directories relative path.
479
480 Raises:
481 PackageError - A file already exists in the package with the same name.
482 """
483 if rel_path in self._package_file_mode_map:
484 raise PackageError("File already exists with the same name: %s", rel_path)
485
486 if rel_path in self._package_dirs:
487 self._log.warning("%s directory already exists", rel_path)
488 return
489
490 self._package_dirs.add(rel_path)
491
492
493 class NsdPackage(DescriptorPackage):
494 DESCRIPTOR_TYPE = "nsd"
495 SERIALIZER = convert.RwNsdSerializer()
496
497 @property
498 def descriptor_type(self):
499 return "nsd"
500
501 @property
502 def serializer(self):
503 return NsdPackage.SERIALIZER
504
505
506 class VnfdPackage(DescriptorPackage):
507 DESCRIPTOR_TYPE = "vnfd"
508 SERIALIZER = convert.RwVnfdSerializer()
509
510 @property
511 def descriptor_type(self):
512 return "vnfd"
513
514 @property
515 def serializer(self):
516 return VnfdPackage.SERIALIZER
517
518
519 class PackageChecksumValidator(object):
520 """ This class uses the checksums.txt file in the package
521 and validates that all files in the package match the checksum that exists within
522 the file.
523 """
524 CHECKSUM_FILE = "{prefix}checksums.txt"
525
526 def __init__(self, log):
527 self._log = log
528
529 @classmethod
530 def get_package_checksum_file(cls, package):
531 checksum_file = cls.CHECKSUM_FILE.format(prefix=package.prefix)
532 if checksum_file not in package.files:
533 raise FileNotFoundError("%s does not exist in archive" % checksum_file)
534
535 return checksum_file
536
537 def validate(self, package):
538 """ Validate file checksums match that in the checksums.txt
539
540 Arguments:
541 package - The Descriptor Package which possiblity contains checksums.txt
542
543 Returns: A dictionary of files that were validated by the checksums.txt
544 along with their checksums
545
546 Raises:
547 PackageValidationError - The package validation failed for some
548 generic reason.
549 PackageFileChecksumError - A file within the package did not match the
550 checksum within checksums.txt
551 """
552 validated_file_checksums = {}
553
554 try:
555 checksum_file = PackageChecksumValidator.get_package_checksum_file(package)
556 with package.open(checksum_file) as checksum_hdl:
557 archive_checksums = checksums.ArchiveChecksums.from_file_desc(checksum_hdl)
558 except (FileNotFoundError, PackageError) as e:
559 self._log.warning("Could not open package checksum file. Not validating checksums.")
560 return validated_file_checksums
561
562 for pkg_file in package.files:
563 if pkg_file == checksum_file:
564 continue
565
566 pkg_file_no_prefix = pkg_file.replace(package.prefix, "", 1)
567 if pkg_file_no_prefix not in archive_checksums:
568 self._log.warning("File %s not found in checksum file %s",
569 pkg_file, checksum_file)
570 continue
571
572 try:
573 with package.open(pkg_file) as pkg_file_hdl:
574 file_checksum = checksums.checksum(pkg_file_hdl)
575 except PackageError as e:
576 msg = "Could not read package file {} for checksum validation: {}".format(
577 pkg_file, str(e))
578 self._log.error(msg)
579 raise PackageValidationError(msg) from e
580
581 if archive_checksums[pkg_file_no_prefix] != file_checksum:
582 msg = "{} checksum ({}) did match expected checksum ({})".format(
583 pkg_file, file_checksum, archive_checksums[pkg_file_no_prefix]
584 )
585 self._log.error(msg)
586 raise PackageFileChecksumError(pkg_file)
587
588 validated_file_checksums[pkg_file] = file_checksum
589
590 return validated_file_checksums
591
592
593 class TarPackageArchive(object):
594 """ This class represents a package stored within a tar.gz archive file """
595 def __init__(self, log, tar_file_hdl, mode="r"):
596 self._log = log
597 self._tar_filepath = tar_file_hdl
598 self._tar_infos = {}
599
600 self._tarfile = tarfile.open(fileobj=tar_file_hdl, mode=mode)
601
602 self._load_archive()
603
604 def __repr__(self):
605 return "TarPackageArchive(%s)" % self._tar_filepath
606
607 def _get_members(self):
608 return [info for info in self._tarfile.getmembers()]
609
610 def _load_archive(self):
611 self._tar_infos = {info.name: info for info in self._get_members() if info.name}
612
613 def __del__(self):
614 self.close()
615
616 def close(self):
617 """ Close the opened tarfile"""
618 if self._tarfile is not None:
619 self._tarfile.close()
620 self._tarfile = None
621
622 @property
623 def filenames(self):
624 """ The list of file members within the tar file """
625 return [name for name in self._tar_infos if tarfile.TarInfo.isfile(self._tar_infos[name])]
626
627 def open_file(self, rel_file_path):
628 """ Opens a file within the archive as read-only, byte mode.
629
630 Arguments:
631 rel_file_path - The file path within the archive to open
632
633 Returns:
634 A file like object (see tarfile.extractfile())
635
636 Raises:
637 ArchiveError - The file could not be opened for some generic reason.
638 """
639 if rel_file_path not in self._tar_infos:
640 raise ArchiveError("Could not find %s in tar file", rel_file_path)
641
642 try:
643 return self._tarfile.extractfile(rel_file_path)
644 except tarfile.TarError as e:
645 msg = "Failed to read file {} from tarfile {}: {}".format(
646 rel_file_path, self._tar_filepath, str(e)
647 )
648 self._log.error(msg)
649 raise ArchiveError(msg) from e
650
651 def create_package(self):
652 """ Creates a Descriptor package from the archive contents
653
654 Returns:
655 A DescriptorPackage of the correct descriptor type
656 """
657 package = DescriptorPackage.from_package_files(self._log, self.open_file, self.filenames)
658 for pkg_file in self.filenames:
659 package.add_file(pkg_file, self._tar_infos[pkg_file].mode)
660
661 return package
662
663
664 class TemporaryPackage(object):
665 """ This class is a container for a temporary file-backed package
666
667 This class contains a DescriptorPackage and can be used in place of one.
668 Provides a useful context manager which will close and destroy the file
669 that is backing the DescriptorPackage on exit.
670 """
671 def __init__(self, log, package, file_hdl):
672 self._log = log
673 self._package = package
674 self._file_hdl = file_hdl
675
676 if not hasattr(self._file_hdl, "name"):
677 raise ValueError("File handle must have a name attribute")
678
679 def __getattr__(self, attr):
680 return getattr(self._package, attr)
681
682 def __enter__(self):
683 return self._package
684
685 def __exit__(self, type, value, tb):
686 self.close()
687
688 def filename(self):
689 """ Returns the filepath with is backing the Package """
690 return self._file_hdl.name
691
692 def package(self):
693 """ The contained DescriptorPackage instance """
694 return self._package
695
696 def close(self):
697 """ Close and remove the backed file """
698 filename = self._file_hdl.name
699
700 try:
701 self._file_hdl.close()
702 except OSError as e:
703 self._log.warning("Failed to close package file: %s", str(e))
704
705 try:
706 os.remove(filename)
707 except OSError as e:
708 self._log.warning("Failed to remove package file: %s", str(e))