RIFT OSM R1 Initial Submission
[osm/SO.git] / rwlaunchpad / plugins / rwlaunchpadtasklet / rift / package / package.py
1
2 #
3 # Copyright 2016 RIFT.IO Inc
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #
17
18 import io
19 import os
20 import re
21 import shutil
22 import tarfile
23
24 from . import checksums
25 from . import convert
26 from . import image
27
28
29 class ArchiveError(Exception):
30 pass
31
32
33 class ExtractError(Exception):
34 pass
35
36
37 class PackageError(Exception):
38 pass
39
40
41 class PackageValidationError(Exception):
42 pass
43
44
45 class PackageFileChecksumError(PackageValidationError):
46 def __init__(self, filename):
47 self.filename = filename
48 super().__init__("Checksum mismatch for {}".format(filename))
49
50
51 class DescriptorPackage(object):
52 """ This class provides an base class for a descriptor package representing
53
54 A descriptor package is a package which contains a single descriptor and any
55 associated files (logos, charms, scripts, etc). This package representation
56 attempts to be agnostic as to where the package files are being stored
57 (in memory, on disk, etc).
58
59 The package provides a simple interface to interact with the files within the
60 package and access the contained descriptor.
61 """
62 DESCRIPTOR_REGEX = r"{prefix}({descriptor_type}/[^/]*|[^/]*{descriptor_type})\.(xml|yml|yaml|json)$"
63
64 def __init__(self, log, open_fn):
65 self._log = log
66 self._open_fn = open_fn
67
68 self._package_file_mode_map = {}
69 self._package_dirs = set()
70
71 @property
72 def prefix(self):
73 """ Return the leading parent directories shared by all files in the package
74
75 In order to remain flexible as to where tar was invoked to create the package,
76 the prefix represents the common parent directory path which all files in the
77 package have in common.
78 """
79 entries = list(self._package_file_mode_map) + list(self._package_dirs)
80
81 if len(entries) > 1:
82 prefix = os.path.commonprefix(entries)
83 if prefix and not prefix.endswith("/"):
84 prefix += "/"
85 elif len(entries) == 1:
86 entry = entries[0]
87 if "/" in entry:
88 prefix = os.path.dirname(entry) + "/"
89 else:
90 prefix = ""
91 else:
92 prefix = ""
93
94 return prefix
95
96 @property
97 def files(self):
98 """ Return all files (with the prefix) in the package """
99 return list(self._package_file_mode_map)
100
101 @property
102 def dirs(self):
103 """ Return all directories in the package """
104 return list(self._package_dirs)
105
106 @property
107 def descriptor_type(self):
108 """ A shorthand name for the type of descriptor (e.g. nsd)"""
109 raise NotImplementedError("Subclass must implement this property")
110
111 @property
112 def serializer(self):
113 """ An instance of convert.ProtoMessageSerializer """
114 raise NotImplementedError("Subclass must implement this property")
115
116 @property
117 def descriptor_file(self):
118 """ The descriptor file name (with prefix) """
119 regex = self.__class__.DESCRIPTOR_REGEX.format(
120 descriptor_type=self.descriptor_type,
121 prefix=self.prefix,
122 )
123 desc_file = None
124 for filename in self.files:
125 if re.match(regex, filename):
126 if desc_file is not None:
127 raise PackageError("Package contains more than one descriptor")
128 desc_file = filename
129
130 if desc_file is None:
131 raise PackageError("Could not find descriptor file in package")
132
133 return desc_file
134
135 @property
136 def descriptor_msg(self):
137 """ The proto-GI descriptor message """
138 filename = self.descriptor_file
139 with self.open(filename) as hdl:
140 _, ext = os.path.splitext(filename)
141 nsd = self.serializer.from_file_hdl(hdl, ext)
142 return nsd
143
144 @property
145 def json_descriptor(self):
146 """ The JSON serialized descriptor message"""
147 nsd = self.descriptor_msg
148 return self.serializer.to_json_string(nsd)
149
150 @property
151 def descriptor_id(self):
152 """ The descriptor id which uniquely identifies this descriptor in the system """
153 if not self.descriptor_msg.has_field("id"):
154 msg = "Descriptor must have an id field"
155 self._log.error(msg)
156 raise PackageError(msg)
157
158 return self.descriptor_msg.id
159
160 @classmethod
161 def get_descriptor_patterns(cls):
162 """ Returns a tuple of descriptor regex and Package Types """
163 package_types = (VnfdPackage, NsdPackage)
164 patterns = []
165
166 for pkg_cls in package_types:
167 regex = cls.DESCRIPTOR_REGEX.format(
168 descriptor_type=pkg_cls.DESCRIPTOR_TYPE,
169 prefix=".*"
170 )
171
172 patterns.append((regex, pkg_cls))
173
174 return patterns
175
176 @classmethod
177 def from_package_files(cls, log, open_fn, files):
178 """ Creates a new DescriptorPackage subclass instance from a list of files
179
180 This classmethod detects the Package type from the package contents
181 and returns a new Package instance.
182
183 This will NOT subsequently add the files to the package so that must
184 be done by the client
185
186 Arguments:
187 log - A logger
188 open_fn - A function which can take a file name and mode and return
189 a file handle.
190 files - A list of files which would be added to the package after
191 intantiation
192
193 Returns:
194 A new DescriptorPackage subclass of the correct type for the descriptor
195
196 Raises:
197 PackageError - Package type could not be determined from the list of files.
198 """
199 patterns = cls.get_descriptor_patterns()
200 pkg_cls = None
201 regexes = set()
202 for name in files:
203 for regex, cls in patterns:
204 regexes.add(regex)
205 if re.match(regex, name) is not None:
206 pkg_cls = cls
207 break
208
209 if pkg_cls is None:
210 log.error("No file in archive matched known descriptor formats: %s", regexes)
211 raise PackageError("Could not determine package type from contents")
212
213 package = pkg_cls(log, open_fn)
214 return package
215
216 @classmethod
217 def from_descriptor_file_hdl(cls, log, file_hdl):
218 """ Creates a new DescriptorPackage from a descriptor file handle
219
220 The descriptor file is added to the package before returning.
221
222 Arguments:
223 log - A logger
224 file_hdl - A file handle whose name attribute can be recognized as
225 particular descriptor type.
226
227 Returns:
228 A new DescriptorPackage subclass of the correct type for the descriptor
229
230 Raises:
231 PackageError - Package type could not be determined from the list of files.
232 ValueError - file_hdl did not have a name attribute provided
233 """
234
235 package_types = (VnfdPackage, NsdPackage)
236 filename_patterns = []
237 for package_cls in package_types:
238 filename_patterns.append(
239 (r".*{}.*".format(package_cls.DESCRIPTOR_TYPE), package_cls)
240 )
241
242 if not hasattr(file_hdl, 'name'):
243 raise ValueError("File descriptor must have a name attribute to create a descriptor package")
244
245 # Iterate through the recognized patterns and assign files accordingly
246 package_cls = None
247 for pattern, cls in filename_patterns:
248 if re.match(pattern, file_hdl.name):
249 package_cls = cls
250 break
251
252 if not package_cls:
253 raise PackageError("Could not determine package type from file name: %s" % file_hdl.name)
254
255 _, ext = os.path.splitext(file_hdl.name)
256 try:
257 package_cls.SERIALIZER.from_file_hdl(file_hdl, ext)
258 except convert.SerializationError as e:
259 raise PackageError("Could not deserialize descriptor %s" % file_hdl.name) from e
260
261 # Create a new file handle for each open call to prevent independent clients
262 # from affecting each other
263 file_hdl.seek(0)
264 new_hdl = io.BytesIO(file_hdl.read())
265
266 def do_open(file_path):
267 assert file_path == file_hdl.name
268 hdl = io.BytesIO(new_hdl.getvalue())
269 return hdl
270
271 desc_pkg = package_cls(log, do_open)
272 desc_pkg.add_file(file_hdl.name)
273
274 return desc_pkg
275
276 def get_file_mode(self, pkg_file):
277 """ Returns the file mode for the package file
278
279 Arguments:
280 pkg_file - A file name in the package
281
282 Returns:
283 The permission mode
284
285 Raises:
286 PackageError - The file does not exist in the package
287 """
288 try:
289 return self._package_file_mode_map[pkg_file]
290 except KeyError as e:
291 msg = "Could not find package_file: %s" % pkg_file
292 self._log.error(msg)
293 raise PackageError(msg) from e
294
295 def extract_dir(self, src_dir, dest_root_dir, extract_images=False):
296 """ Extract a specific directory contents to dest_root_dir
297
298 Arguments:
299 src_dir - A directory within the package (None means all files/directories)
300 dest_root_dir - A directory to extract directory contents to
301 extract_images - A flag indicating whether we want to extract images
302
303 Raises:
304 ExtractError - Directory contents could not be extracted
305 """
306 if src_dir is not None and src_dir not in self._package_dirs:
307 raise ExtractError("Could not find source dir: %s" % src_dir)
308
309 for filename in self.files:
310 if not extract_images and image.is_image_file(filename):
311 continue
312
313 if src_dir is not None and not filename.startswith(src_dir):
314 continue
315
316 # Copy the contents of the file to the correct path
317 dest_file_path = os.path.join(dest_root_dir, filename)
318 dest_dir_path = os.path.dirname(dest_file_path)
319 if not os.path.exists(dest_dir_path):
320 os.makedirs(dest_dir_path)
321
322 with open(dest_file_path, 'wb') as dst_hdl:
323 with self.open(filename) as src_hdl:
324 shutil.copyfileobj(src_hdl, dst_hdl, 10 * 1024 * 1024)
325
326 # Set the file mode to original
327 os.chmod(dest_file_path, self._package_file_mode_map[filename])
328
329 def extract_file(self, src_file, dest_file):
330 """ Extract a specific package file to dest_file
331
332 The destination directory will be created if it does not exist.
333
334 Arguments:
335 src_file - A file within the package
336 dest_file - A file path to extract file contents to
337
338 Raises:
339 ExtractError - Directory contents could not be extracted
340 """
341 if src_file not in self._package_file_mode_map:
342 msg = "Could not find source file %s" % src_file
343 self._log.error(msg)
344 raise ExtractError(msg)
345
346 # Copy the contents of the file to the correct path
347 dest_dir_path = os.path.dirname(dest_file)
348 if not os.path.isdir(dest_dir_path):
349 os.makedirs(dest_dir_path)
350
351 with open(dest_file, 'wb') as dst_hdl:
352 with self.open(src_file) as src_hdl:
353 shutil.copyfileobj(src_hdl, dst_hdl, 10 * 1024 * 1024)
354
355 # Set the file mode to original
356 os.chmod(dest_file, self._package_file_mode_map[src_file])
357
358 def extract(self, dest_root_dir, extract_images=False):
359 """ Extract all package contents to a destination directory
360
361 Arguments:
362 dest_root_dir - The directory to extract package contents to
363
364 Raises:
365 NotADirectoryError - dest_root_dir is not a directory
366 """
367 if not os.path.isdir(dest_root_dir):
368 raise NotADirectoryError(dest_root_dir)
369
370 self.extract_dir(None, dest_root_dir, extract_images)
371
372 def open(self, rel_path):
373 """ Open a file contained in the package in read-only, binary mode.
374
375 Arguments:
376 rel_path - The file path within the package
377
378 Returns:
379 A file-like object opened in read-only mode.
380
381 Raises:
382 PackageError - The file could not be opened
383 """
384 try:
385 return self._open_fn(rel_path)
386 except Exception as e:
387 msg = "Could not open file from package: %s" % rel_path
388 self._log.warning(msg)
389 raise PackageError(msg) from e
390
391 def add_file(self, rel_path, mode=0o777):
392 """ Add a file to the package.
393
394 The file should be specified as a relative path to the package
395 root. The open_fn provided in the constructor must be able to
396 take the relative path and open the actual source file from
397 wherever the file actually is stored.
398
399 If the file's parent directories do not yet exist, add them to
400 the package.
401
402 Arguments:
403 rel_path - The file path relative to the top of the package.
404 mode - The permission mode the file should be stored with so
405 it can be extracted with the correct permissions.
406
407 Raises:
408 PackageError - The file could not be added to the package
409 """
410 if not rel_path:
411 raise PackageError("Empty file name added")
412
413 if rel_path in self._package_file_mode_map:
414 raise PackageError("File %s already exists in package" % rel_path)
415
416 # If the file's directory is not in the package add it.
417 rel_dir = os.path.dirname(rel_path)
418 while rel_dir:
419 self._package_dirs.add(rel_dir)
420 rel_dir = os.path.dirname(rel_dir)
421
422 self._package_file_mode_map[rel_path] = mode
423
424 def add_dir(self, rel_path):
425 """ Add a directory to the package
426
427 Arguments:
428 rel_path - The directories relative path.
429
430 Raises:
431 PackageError - A file already exists in the package with the same name.
432 """
433 if rel_path in self._package_file_mode_map:
434 raise PackageError("File already exists with the same name: %s", rel_path)
435
436 if rel_path in self._package_dirs:
437 self._log.warning("%s directory already exists", rel_path)
438 return
439
440 self._package_dirs.add(rel_path)
441
442
443 class NsdPackage(DescriptorPackage):
444 DESCRIPTOR_TYPE = "nsd"
445 SERIALIZER = convert.RwNsdSerializer()
446
447 @property
448 def descriptor_type(self):
449 return "nsd"
450
451 @property
452 def serializer(self):
453 return NsdPackage.SERIALIZER
454
455
456 class VnfdPackage(DescriptorPackage):
457 DESCRIPTOR_TYPE = "vnfd"
458 SERIALIZER = convert.RwVnfdSerializer()
459
460 @property
461 def descriptor_type(self):
462 return "vnfd"
463
464 @property
465 def serializer(self):
466 return VnfdPackage.SERIALIZER
467
468
469 class PackageChecksumValidator(object):
470 """ This class uses the checksums.txt file in the package
471 and validates that all files in the package match the checksum that exists within
472 the file.
473 """
474 CHECKSUM_FILE = "{prefix}checksums.txt"
475
476 def __init__(self, log):
477 self._log = log
478
479 @classmethod
480 def get_package_checksum_file(cls, package):
481 checksum_file = cls.CHECKSUM_FILE.format(prefix=package.prefix)
482 if checksum_file not in package.files:
483 raise FileNotFoundError("%s does not exist in archive" % checksum_file)
484
485 return checksum_file
486
487 def validate(self, package):
488 """ Validate file checksums match that in the checksums.txt
489
490 Arguments:
491 package - The Descriptor Package which possiblity contains checksums.txt
492
493 Returns: A dictionary of files that were validated by the checksums.txt
494 along with their checksums
495
496 Raises:
497 PackageValidationError - The package validation failed for some
498 generic reason.
499 PackageFileChecksumError - A file within the package did not match the
500 checksum within checksums.txt
501 """
502 validated_file_checksums = {}
503
504 try:
505 checksum_file = PackageChecksumValidator.get_package_checksum_file(package)
506 with package.open(checksum_file) as checksum_hdl:
507 archive_checksums = checksums.ArchiveChecksums.from_file_desc(checksum_hdl)
508 except (FileNotFoundError, PackageError) as e:
509 self._log.warning("Could not open package checksum file. Not validating checksums.")
510 return validated_file_checksums
511
512 for pkg_file in package.files:
513 if pkg_file == checksum_file:
514 continue
515
516 pkg_file_no_prefix = pkg_file.replace(package.prefix, "", 1)
517 if pkg_file_no_prefix not in archive_checksums:
518 self._log.warning("File %s not found in checksum file %s",
519 pkg_file, checksum_file)
520 continue
521
522 try:
523 with package.open(pkg_file) as pkg_file_hdl:
524 file_checksum = checksums.checksum(pkg_file_hdl)
525 except PackageError as e:
526 msg = "Could not read package file {} for checksum validation: {}".format(
527 pkg_file, str(e))
528 self._log.error(msg)
529 raise PackageValidationError(msg) from e
530
531 if archive_checksums[pkg_file_no_prefix] != file_checksum:
532 msg = "{} checksum ({}) did match expected checksum ({})".format(
533 pkg_file, file_checksum, archive_checksums[pkg_file_no_prefix]
534 )
535 self._log.error(msg)
536 raise PackageFileChecksumError(pkg_file)
537
538 validated_file_checksums[pkg_file] = file_checksum
539
540 return validated_file_checksums
541
542
543 class TarPackageArchive(object):
544 """ This class represents a package stored within a tar.gz archive file """
545 def __init__(self, log, tar_file_hdl, mode="r"):
546 self._log = log
547 self._tar_filepath = tar_file_hdl
548 self._tar_infos = {}
549
550 self._tarfile = tarfile.open(fileobj=tar_file_hdl, mode=mode)
551
552 self._load_archive()
553
554 def __repr__(self):
555 return "TarPackageArchive(%s)" % self._tar_filepath
556
557 def _get_members(self):
558 return [info for info in self._tarfile.getmembers()]
559
560 def _load_archive(self):
561 self._tar_infos = {info.name: info for info in self._get_members() if info.name}
562
563 def __del__(self):
564 self.close()
565
566 def close(self):
567 """ Close the opened tarfile"""
568 if self._tarfile is not None:
569 self._tarfile.close()
570 self._tarfile = None
571
572 @property
573 def filenames(self):
574 """ The list of file members within the tar file """
575 return [name for name in self._tar_infos if tarfile.TarInfo.isfile(self._tar_infos[name])]
576
577 def open_file(self, rel_file_path):
578 """ Opens a file within the archive as read-only, byte mode.
579
580 Arguments:
581 rel_file_path - The file path within the archive to open
582
583 Returns:
584 A file like object (see tarfile.extractfile())
585
586 Raises:
587 ArchiveError - The file could not be opened for some generic reason.
588 """
589 if rel_file_path not in self._tar_infos:
590 raise ArchiveError("Could not find %s in tar file", rel_file_path)
591
592 try:
593 return self._tarfile.extractfile(rel_file_path)
594 except tarfile.TarError as e:
595 msg = "Failed to read file {} from tarfile {}: {}".format(
596 rel_file_path, self._tar_filepath, str(e)
597 )
598 self._log.error(msg)
599 raise ArchiveError(msg) from e
600
601 def create_package(self):
602 """ Creates a Descriptor package from the archive contents
603
604 Returns:
605 A DescriptorPackage of the correct descriptor type
606 """
607 package = DescriptorPackage.from_package_files(self._log, self.open_file, self.filenames)
608 for pkg_file in self.filenames:
609 package.add_file(pkg_file, self._tar_infos[pkg_file].mode)
610
611 return package
612
613
614 class TemporaryPackage(object):
615 """ This class is a container for a temporary file-backed package
616
617 This class contains a DescriptorPackage and can be used in place of one.
618 Provides a useful context manager which will close and destroy the file
619 that is backing the DescriptorPackage on exit.
620 """
621 def __init__(self, log, package, file_hdl):
622 self._log = log
623 self._package = package
624 self._file_hdl = file_hdl
625
626 if not hasattr(self._file_hdl, "name"):
627 raise ValueError("File handle must have a name attribute")
628
629 def __getattr__(self, attr):
630 return getattr(self._package, attr)
631
632 def __enter__(self):
633 return self._package
634
635 def __exit__(self, type, value, tb):
636 self.close()
637
638 def filename(self):
639 """ Returns the filepath with is backing the Package """
640 return self._file_hdl.name
641
642 def package(self):
643 """ The contained DescriptorPackage instance """
644 return self._package
645
646 def close(self):
647 """ Close and remove the backed file """
648 filename = self._file_hdl.name
649
650 try:
651 self._file_hdl.close()
652 except OSError as e:
653 self._log.warning("Failed to close package file: %s", str(e))
654
655 try:
656 os.remove(filename)
657 except OSError as e:
658 self._log.warning("Failed to remove package file: %s", str(e))