Commit 17583c8b authored by Mark Beierl's avatar Mark Beierl
Browse files

Merge branch 'master' into 'master'

Update squid charm

See merge request !105
parents e2197769 9c012576
Pipeline #143 passed with stage
in 1 minute and 34 seconds
#!/usr/bin/env python3
# Copyright 2019 Canonical Ltd.
# Copyright 2019-2020 Canonical Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -13,9 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Main entry point to the Operator Framework."""
import inspect
import logging
import os
import shutil
import subprocess
import sys
import typing
import warnings
from pathlib import Path
import yaml
......@@ -23,9 +29,10 @@ import yaml
import ops.charm
import ops.framework
import ops.model
import logging
import ops.storage
from ops.log import setup_root_logging
from ops.jujuversion import JujuVersion
CHARM_STATE_FILE = '.unit-state.db'
......@@ -33,6 +40,17 @@ CHARM_STATE_FILE = '.unit-state.db'
logger = logging.getLogger()
def _exe_path(path: Path) -> typing.Optional[Path]:
"""Find and return the full path to the given binary.
Here path is the absolute path to a binary, but might be missing an extension.
"""
p = shutil.which(path.name, mode=os.F_OK, path=str(path.parent))
if p is None:
return None
return Path(p)
def _get_charm_dir():
charm_dir = os.environ.get("JUJU_CHARM_DIR")
if charm_dir is None:
......@@ -43,22 +61,12 @@ def _get_charm_dir():
return charm_dir
def _load_metadata(charm_dir):
metadata = yaml.safe_load((charm_dir / 'metadata.yaml').read_text())
actions_meta = charm_dir / 'actions.yaml'
if actions_meta.exists():
actions_metadata = yaml.safe_load(actions_meta.read_text())
else:
actions_metadata = {}
return metadata, actions_metadata
def _create_event_link(charm, bound_event):
def _create_event_link(charm, bound_event, link_to):
"""Create a symlink for a particular event.
charm -- A charm object.
bound_event -- An event for which to create a symlink.
link_to -- What the event link should point to
"""
if issubclass(bound_event.event_type, ops.charm.HookEvent):
event_dir = charm.framework.charm_dir / 'hooks'
......@@ -76,10 +84,7 @@ def _create_event_link(charm, bound_event):
event_dir.mkdir(exist_ok=True)
if not event_path.exists():
# CPython has different implementations for populating sys.argv[0] for Linux and Windows.
# For Windows it is always an absolute path (any symlinks are resolved)
# while for Linux it can be a relative path.
target_path = os.path.relpath(os.path.realpath(sys.argv[0]), str(event_dir))
target_path = os.path.relpath(link_to, str(event_dir))
# Ignore the non-symlink files or directories
# assuming the charm author knows what they are doing.
......@@ -103,10 +108,16 @@ def _setup_event_links(charm_dir, charm):
charm -- An instance of the Charm class.
"""
# XXX: on windows this function does not accomplish what it wants to:
# it creates symlinks with no extension pointing to a .py
# and juju only knows how to handle .exe, .bat, .cmd, and .ps1
# so it does its job, but does not accomplish anything as the
# hooks aren't 'callable'.
link_to = os.path.realpath(os.environ.get("JUJU_DISPATCH_PATH", sys.argv[0]))
for bound_event in charm.on.events().values():
# Only events that originate from Juju need symlinks.
if issubclass(bound_event.event_type, (ops.charm.HookEvent, ops.charm.ActionEvent)):
_create_event_link(charm, bound_event)
_create_event_link(charm, bound_event, link_to)
def _emit_charm_event(charm, event_name):
......@@ -119,13 +130,13 @@ def _emit_charm_event(charm, event_name):
try:
event_to_emit = getattr(charm.on, event_name)
except AttributeError:
logger.debug("event %s not defined for %s", event_name, charm)
logger.debug("Event %s not defined for %s.", event_name, charm)
# If the event is not supported by the charm implementation, do
# not error out or try to emit it. This is to support rollbacks.
if event_to_emit is not None:
args, kwargs = _get_event_args(charm, event_to_emit)
logger.debug('Emitting Juju event %s', event_name)
logger.debug('Emitting Juju event %s.', event_name)
event_to_emit.emit(*args, **kwargs)
......@@ -156,79 +167,239 @@ def _get_event_args(charm, bound_event):
return [], {}
def main(charm_class):
class _Dispatcher:
"""Encapsulate how to figure out what event Juju wants us to run.
Also knows how to run “legacy” hooks when Juju called us via a top-level
``dispatch`` binary.
Args:
charm_dir: the toplevel directory of the charm
Attributes:
event_name: the name of the event to run
is_dispatch_aware: are we running under a Juju that knows about the
dispatch binary, and is that binary present?
"""
def __init__(self, charm_dir: Path):
self._charm_dir = charm_dir
self._exec_path = Path(os.environ.get('JUJU_DISPATCH_PATH', sys.argv[0]))
dispatch = charm_dir / 'dispatch'
if JujuVersion.from_environ().is_dispatch_aware() and _exe_path(dispatch) is not None:
self._init_dispatch()
else:
self._init_legacy()
def ensure_event_links(self, charm):
"""Make sure necessary symlinks are present on disk."""
if self.is_dispatch_aware:
# links aren't needed
return
# When a charm is force-upgraded and a unit is in an error state Juju
# does not run upgrade-charm and instead runs the failed hook followed
# by config-changed. Given the nature of force-upgrading the hook setup
# code is not triggered on config-changed.
#
# 'start' event is included as Juju does not fire the install event for
# K8s charms (see LP: #1854635).
if (self.event_name in ('install', 'start', 'upgrade_charm')
or self.event_name.endswith('_storage_attached')):
_setup_event_links(self._charm_dir, charm)
def run_any_legacy_hook(self):
"""Run any extant legacy hook.
If there is both a dispatch file and a legacy hook for the
current event, run the wanted legacy hook.
"""
if not self.is_dispatch_aware:
# we *are* the legacy hook
return
dispatch_path = _exe_path(self._charm_dir / self._dispatch_path)
if dispatch_path is None:
logger.debug("Legacy %s does not exist.", self._dispatch_path)
return
# super strange that there isn't an is_executable
if not os.access(str(dispatch_path), os.X_OK):
logger.warning("Legacy %s exists but is not executable.", self._dispatch_path)
return
if dispatch_path.resolve() == Path(sys.argv[0]).resolve():
logger.debug("Legacy %s is just a link to ourselves.", self._dispatch_path)
return
argv = sys.argv.copy()
argv[0] = str(dispatch_path)
logger.info("Running legacy %s.", self._dispatch_path)
try:
subprocess.run(argv, check=True)
except subprocess.CalledProcessError as e:
logger.warning("Legacy %s exited with status %d.", self._dispatch_path, e.returncode)
sys.exit(e.returncode)
except OSError as e:
logger.warning("Unable to run legacy %s: %s", self._dispatch_path, e)
sys.exit(1)
else:
logger.debug("Legacy %s exited with status 0.", self._dispatch_path)
def _set_name_from_path(self, path: Path):
"""Sets the name attribute to that which can be inferred from the given path."""
name = path.name.replace('-', '_')
if path.parent.name == 'actions':
name = '{}_action'.format(name)
self.event_name = name
def _init_legacy(self):
"""Set up the 'legacy' dispatcher.
The current Juju doesn't know about 'dispatch' and calls hooks
explicitly.
"""
self.is_dispatch_aware = False
self._set_name_from_path(self._exec_path)
def _init_dispatch(self):
"""Set up the new 'dispatch' dispatcher.
The current Juju will run 'dispatch' if it exists, and otherwise fall
back to the old behaviour.
JUJU_DISPATCH_PATH will be set to the wanted hook, e.g. hooks/install,
in both cases.
"""
self._dispatch_path = Path(os.environ['JUJU_DISPATCH_PATH'])
if 'OPERATOR_DISPATCH' in os.environ:
logger.debug("Charm called itself via %s.", self._dispatch_path)
sys.exit(0)
os.environ['OPERATOR_DISPATCH'] = '1'
self.is_dispatch_aware = True
self._set_name_from_path(self._dispatch_path)
def is_restricted_context(self):
"""Return True if we are running in a restricted Juju context.
When in a restricted context, most commands (relation-get, config-get,
state-get) are not available. As such, we change how we interact with
Juju.
"""
return self.event_name in ('collect_metrics',)
def _should_use_controller_storage(db_path: Path, meta: ops.charm.CharmMeta) -> bool:
"""Figure out whether we want to use controller storage or not."""
# if you've previously used local state, carry on using that
if db_path.exists():
logger.debug("Using local storage: %s already exists", db_path)
return False
# if you're not in k8s you don't need controller storage
if 'kubernetes' not in meta.series:
logger.debug("Using local storage: not a kubernetes charm")
return False
# are we in a new enough Juju?
cur_version = JujuVersion.from_environ()
if cur_version.has_controller_storage():
logger.debug("Using controller storage: JUJU_VERSION=%s", cur_version)
return True
else:
logger.debug("Using local storage: JUJU_VERSION=%s", cur_version)
return False
def main(charm_class: ops.charm.CharmBase, use_juju_for_storage: bool = None):
"""Setup the charm and dispatch the observed event.
The event name is based on the way this executable was called (argv[0]).
Args:
charm_class: your charm class.
use_juju_for_storage: whether to use controller-side storage. If not specified
then kubernetes charms that haven't previously used local storage and that
are running on a new enough Juju default to controller-side storage,
otherwise local storage is used.
"""
charm_dir = _get_charm_dir()
model_backend = ops.model.ModelBackend()
model_backend = ops.model._ModelBackend()
debug = ('JUJU_DEBUG' in os.environ)
setup_root_logging(model_backend, debug=debug)
logger.debug("Operator Framework %s up and running.", ops.__version__)
dispatcher = _Dispatcher(charm_dir)
dispatcher.run_any_legacy_hook()
metadata = (charm_dir / 'metadata.yaml').read_text()
actions_meta = charm_dir / 'actions.yaml'
if actions_meta.exists():
actions_metadata = actions_meta.read_text()
else:
actions_metadata = None
if not yaml.__with_libyaml__:
logger.debug('yaml does not have libyaml extensions, using slower pure Python yaml loader')
meta = ops.charm.CharmMeta.from_yaml(metadata, actions_metadata)
model = ops.model.Model(meta, model_backend)
# Process the Juju event relevant to the current hook execution
# JUJU_HOOK_NAME, JUJU_FUNCTION_NAME, and JUJU_ACTION_NAME are not used
# in order to support simulation of events from debugging sessions.
#
# TODO: For Windows, when symlinks are used, this is not a valid
# method of getting an event name (see LP: #1854505).
juju_exec_path = Path(sys.argv[0])
has_dispatch = juju_exec_path.name == 'dispatch'
if has_dispatch:
# The executable was 'dispatch', which means the actual hook we want to
# run needs to be looked up in the JUJU_DISPATCH_PATH env var, where it
# should be a path relative to the charm directory (the directory that
# holds `dispatch`). If that path actually exists, we want to run that
# before continuing.
dispatch_path = juju_exec_path.parent / Path(os.environ['JUJU_DISPATCH_PATH'])
if dispatch_path.exists() and dispatch_path.resolve() != juju_exec_path.resolve():
argv = sys.argv.copy()
argv[0] = str(dispatch_path)
try:
subprocess.run(argv, check=True)
except subprocess.CalledProcessError as e:
logger.warning("hook %s exited with status %d", dispatch_path, e.returncode)
sys.exit(e.returncode)
juju_exec_path = dispatch_path
juju_event_name = juju_exec_path.name.replace('-', '_')
if juju_exec_path.parent.name == 'actions':
juju_event_name = '{}_action'.format(juju_event_name)
metadata, actions_metadata = _load_metadata(charm_dir)
meta = ops.charm.CharmMeta(metadata, actions_metadata)
unit_name = os.environ['JUJU_UNIT_NAME']
model = ops.model.Model(unit_name, meta, model_backend)
# TODO: If Juju unit agent crashes after exit(0) from the charm code
# the framework will commit the snapshot but Juju will not commit its
# operation.
charm_state_path = charm_dir / CHARM_STATE_FILE
framework = ops.framework.Framework(charm_state_path, charm_dir, meta, model)
if use_juju_for_storage and not ops.storage.juju_backend_available():
# raise an exception; the charm is broken and needs fixing.
msg = 'charm set use_juju_for_storage=True, but Juju version {} does not support it'
raise RuntimeError(msg.format(JujuVersion.from_environ()))
if use_juju_for_storage is None:
use_juju_for_storage = _should_use_controller_storage(charm_state_path, meta)
if use_juju_for_storage:
if dispatcher.is_restricted_context():
# TODO: jam 2020-06-30 This unconditionally avoids running a collect metrics event
# Though we eventually expect that juju will run collect-metrics in a
# non-restricted context. Once we can determine that we are running collect-metrics
# in a non-restricted context, we should fire the event as normal.
logger.debug('"%s" is not supported when using Juju for storage\n'
'see: https://github.com/canonical/operator/issues/348',
dispatcher.event_name)
# Note that we don't exit nonzero, because that would cause Juju to rerun the hook
return
store = ops.storage.JujuStorage()
else:
store = ops.storage.SQLiteStorage(charm_state_path)
framework = ops.framework.Framework(store, charm_dir, meta, model)
framework.set_breakpointhook()
try:
charm = charm_class(framework, None)
if not has_dispatch:
# When a charm is force-upgraded and a unit is in an error state Juju
# does not run upgrade-charm and instead runs the failed hook followed
# by config-changed. Given the nature of force-upgrading the hook setup
# code is not triggered on config-changed.
#
# 'start' event is included as Juju does not fire the install event for
# K8s charms (see LP: #1854635).
if (juju_event_name in ('install', 'start', 'upgrade_charm')
or juju_event_name.endswith('_storage_attached')):
_setup_event_links(charm_dir, charm)
sig = inspect.signature(charm_class)
try:
sig.bind(framework)
except TypeError:
msg = (
"the second argument, 'key', has been deprecated and will be "
"removed after the 0.7 release")
warnings.warn(msg, DeprecationWarning)
charm = charm_class(framework, None)
else:
charm = charm_class(framework)
dispatcher.ensure_event_links(charm)
# TODO: Remove the collect_metrics check below as soon as the relevant
# Juju changes are made.
# Juju changes are made. Also adjust the docstring on
# EventBase.defer().
#
# Skip reemission of deferred events for collect-metrics events because
# they do not have the full access to all hook tools.
if juju_event_name != 'collect_metrics':
if not dispatcher.is_restricted_context():
framework.reemit()
_emit_charm_event(charm, juju_event_name)
_emit_charm_event(charm, dispatcher.event_name)
framework.commit()
finally:
......
......@@ -12,63 +12,152 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Representations of Juju's model, application, unit, and other entities."""
import datetime
import decimal
import ipaddress
import json
import weakref
import os
import re
import shutil
import tempfile
import time
import datetime
import re
import ipaddress
import decimal
import typing
import weakref
from abc import ABC, abstractmethod
from collections.abc import Mapping, MutableMapping
from pathlib import Path
from subprocess import run, PIPE, CalledProcessError
import yaml
import ops
from ops.jujuversion import JujuVersion
if yaml.__with_libyaml__:
_DefaultDumper = yaml.CSafeDumper
else:
_DefaultDumper = yaml.SafeDumper
class Model:
"""Represents the Juju Model as seen from this unit.
def __init__(self, unit_name, meta, backend):
This should not be instantiated directly by Charmers, but can be accessed as `self.model`
from any class that derives from Object.
"""
def __init__(self, meta: 'ops.charm.CharmMeta', backend: '_ModelBackend'):
self._cache = _ModelCache(backend)
self._backend = backend
self.unit = self.get_unit(unit_name)
self.app = self.unit.app
self.relations = RelationMapping(meta.relations, self.unit, self._backend, self._cache)
self.config = ConfigData(self._backend)
self.resources = Resources(list(meta.resources), self._backend)
self.pod = Pod(self._backend)
self.storages = StorageMapping(list(meta.storages), self._backend)
self._unit = self.get_unit(self._backend.unit_name)
self._relations = RelationMapping(meta.relations, self.unit, self._backend, self._cache)
self._config = ConfigData(self._backend)
self._resources = Resources(list(meta.resources), self._backend)
self._pod = Pod(self._backend)
self._storages = StorageMapping(list(meta.storages), self._backend)
self._bindings = BindingMapping(self._backend)
def get_unit(self, unit_name):
@property
def unit(self) -> 'Unit':
"""A :class:`Unit` that represents the unit that is running this code (eg yourself)."""
return self._unit
@property
def app(self):
"""A :class:`Application` that represents the application this unit is a part of."""
return self._unit.app
@property
def relations(self) -> 'RelationMapping':
"""Mapping of endpoint to list of :class:`Relation`.
Answers the question "what am I currently related to".
See also :meth:`.get_relation`.
"""
return self._relations
@property
def config(self) -> 'ConfigData':
"""Return a mapping of config for the current application."""
return self._config
@property
def resources(self) -> 'Resources':
"""Access to resources for this charm.
Use ``model.resources.fetch(resource_name)`` to get the path on disk
where the resource can be found.
"""
return self._resources
@property
def storages(self) -> 'StorageMapping':
"""Mapping of storage_name to :class:`Storage` as defined in metadata.yaml."""
return self._storages
@property
def pod(self) -> 'Pod':
"""Use ``model.pod.set_spec`` to set the container specification for Kubernetes charms."""
return self._pod
@property
def name(self) -> str:
"""Return the name of the Model that this unit is running in.
This is read from the environment variable ``JUJU_MODEL_NAME``.
"""
return self._backend.model_name
def get_unit(self, unit_name: str) -> 'Unit':
"""Get an arbitrary unit by name.
Internally this uses a cache, so asking for the same unit two times will
return the same object.
"""
return self._cache.get(Unit, unit_name)
def get_app(self, app_name):
def get_app(self, app_name: str) -> 'Application':
"""Get an application by name.
Internally this uses a cache, so asking for the same application two times will
return the same object.
"""
return self._cache.get(Application, app_name)
def get_relation(self, relation_name, relation_id=None):
def get_relation(
self, relation_name: str,
relation_id: typing.Optional[int] = None) -> 'Relation':
"""Get a specific Relation instance.
If relation_id is given, this will return that Relation instance.
If relation_id is not given, this will return the Relation instance if the
relation is established only once or None if it is not established. If this
same relation is established multiple times the error TooManyRelatedAppsError is raised.
Args:
relation_name: The name of the endpoint for this charm
relation_id: An identifier for a specific relation. Used to disambiguate when a
given application has more than one relation on a given endpoint.
Raises:
TooManyRelatedAppsError: is raised if there is more than one relation to the
supplied relation_name and no relation_id was supplied
"""
return self.relations._get_unique(relation_name, relation_id)
def get_binding(self, binding_key):
def get_binding(self, binding_key: typing.Union[str, 'Relation']) -> 'Binding':
"""Get a network space binding.
binding_key -- The relation name or instance to obtain bindings for.
Args:
binding_key: The relation name or instance to obtain bindings for.
If binding_key is a relation name, the method returns the default binding for that
relation. If a relation instance is provided, the method first looks up a more specific
binding for that specific relation ID, and if none is found falls back to the default
binding for the relation name.
Returns:
If ``binding_key`` is a relation name, the method returns the default binding
for that relation. If a relation instance is provided, the method first looks
up a more specific binding for that specific relation ID, and if none is found
falls back to the default binding for the relation name.
"""
return self._bindings.get(binding_key)
......@@ -89,6 +178,16 @@ class _ModelCache:
class Application:
"""Represents a named application in the model.
This might be your application, or might be an application that you are related to.
Charmers should not instantiate Application objects directly, but should use
:meth:`Model.get_app` if they need a reference to a given application.
Attributes:
name: The name of this application (eg, 'mysql'). This name may differ from the name of
the charm, if the user has deployed it to a different name.
"""
def __init__(self, name, backend, cache):
self.name = name
......@@ -97,8 +196,27 @@ class Application:
self._is_our_app = self.name == self._backend.app_name
self._status = None
def _invalidate(self):
self._status = None
@property
def status(self):
def status(self) -> 'StatusBase':
"""Used to report or read the status of the overall application.
Can only be read and set by the lead unit of the application.
The status of remote units is always Unknown.
Raises:
RuntimeError: if you try to set the status of another application, or if you try to
set the status of this application as a unit that is not the leader.
InvalidStatusError: if you try to set the status to something that is not a
:class:`StatusBase`
Example::
self.model.app.status = BlockedStatus('I need a human to come help me')
"""
if not self._is_our_app:
return UnknownStatus()
......@@ -113,7 +231,7 @@ class Application:
return self._status
@status.setter
def status(self, value):
def status(self, value: 'StatusBase'):
if not isinstance(value, StatusBase):
raise InvalidStatusError(
'invalid value provided for application {} status: {}'.format(self, value)
......@@ -133,6 +251,15 @@ class Application:
class Unit:
"""Represents a named unit in the model.
This might be your unit, another unit of your application, or a unit of another application
that you are related to.
Attributes:
name: The name of the unit (eg, 'mysql/0')
app: The Application the unit is a part of.
"""
def __init__(self, name, backend, cache):
self.name = name
......@@ -145,8 +272,23 @@ class Unit:
self._is_our_unit = self.name == self._backend.unit_name
self._status = None
def _invalidate(self):
self._status = None
@property
def status(self):
def status(self) -> 'StatusBase':
"""Used to report or read the status of a specific unit.
The status of any unit other than yourself is always Unknown.
Raises:
RuntimeError: if you try to set the status of a unit other than yourself.
InvalidStatusError: if you try to set the status to something other than
a :class:`StatusBase`
Example::
self.model.unit.status = MaintenanceStatus('reconfiguring the frobnicators')
"""
if not self._is_our_unit:
return UnknownStatus()
......@@ -158,7 +300,7 @@ class Unit:
return self._status
@status.setter
def status(self, value):
def status(self, value: 'StatusBase'):
if not isinstance(value, StatusBase):
raise InvalidStatusError(
'invalid value provided for unit {} status: {}'.format(self, value)
......@@ -173,17 +315,27 @@ class Unit:
def __repr__(self):
return '<{}.{} {}>'.format(type(self).__module__, type(self).__name__, self.name)
def is_leader(self):
def is_leader(self) -> bool:
"""Return whether this unit is the leader of its application.
This can only be called for your own unit.
Returns:
True if you are the leader, False otherwise
Raises:
RuntimeError: if called for a unit that is not yourself
"""
if self._is_our_unit:
# This value is not cached as it is not guaranteed to persist for the whole duration
# of a hook execution.
return self._backend.is_leader()
else:
raise RuntimeError(
'cannot determine leadership status for remote applications: {}'.format(self)
'leadership status of remote units ({}) is not visible to other'
' applications'.format(self)
)
def set_workload_version(self, version):
def set_workload_version(self, version: str) -> None:
"""Record the version of the software running as the workload.
This shouldn't be confused with the revision of the charm. This is informative only;
......@@ -196,6 +348,11 @@ class Unit:
class LazyMapping(Mapping, ABC):
"""Represents a dict that isn't populated until it is accessed.
Charm authors should generally never need to use this directly, but it forms
the basis for many of the dicts that the framework tracks.
"""
_lazy_data = None
......@@ -225,14 +382,17 @@ class LazyMapping(Mapping, ABC):
def __getitem__(self, key):
return self._data[key]
def __repr__(self):
return repr(self._data)
class RelationMapping(Mapping):
"""Map of relation names to lists of Relation instances."""
"""Map of relation names to lists of :class:`Relation` instances."""
def __init__(self, relations_meta, our_unit, backend, cache):
self._peers = set()
for name, relation_meta in relations_meta.items():
if relation_meta.role == 'peers':
if relation_meta.role.is_peer():
self._peers.add(name)
self._our_unit = our_unit
self._backend = backend
......@@ -260,6 +420,12 @@ class RelationMapping(Mapping):
return relation_list
def _invalidate(self, relation_name):
"""Used to wipe the cache of a given relation_name.
Not meant to be used by Charm authors. The content of relation data is
static for the lifetime of a hook, so it is safe to cache in memory once
accessed.
"""
self._data[relation_name] = None
def _get_unique(self, relation_name, relation_id=None):
......@@ -288,12 +454,21 @@ class RelationMapping(Mapping):
class BindingMapping:
"""Mapping of endpoints to network bindings.
Charm authors should not instantiate this directly, but access it via
:meth:`Model.get_binding`
"""
def __init__(self, backend):
self._backend = backend
self._data = {}
def get(self, binding_key):
def get(self, binding_key: typing.Union[str, 'Relation']) -> 'Binding':
"""Get a specific Binding for an endpoint/relation.
Not used directly by Charm authors. See :meth:`Model.get_binding`
"""
if isinstance(binding_key, Relation):
binding_name = binding_key.name
relation_id = binding_key.id
......@@ -311,7 +486,11 @@ class BindingMapping:
class Binding:
"""Binding to a network space."""
"""Binding to a network space.
Attributes:
name: The name of the endpoint this binding represents (eg, 'db')
"""
def __init__(self, name, relation_id, backend):
self.name = name
......@@ -320,7 +499,8 @@ class Binding:
self._network = None
@property
def network(self):
def network(self) -> 'Network':
"""The network information for this binding."""
if self._network is None:
try:
self._network = Network(self._backend.network_get(self.name, self._relation_id))
......@@ -334,50 +514,122 @@ class Binding:
class Network:
"""Network space details."""
"""Network space details.
Charm authors should not instantiate this directly, but should get access to the Network
definition from :meth:`Model.get_binding` and its ``network`` attribute.
Attributes:
interfaces: A list of :class:`NetworkInterface` details. This includes the
information about how your application should be configured (eg, what
IP addresses should you bind to.)
Note that multiple addresses for a single interface are represented as multiple
interfaces. (eg, ``[NetworkInfo('ens1', '10.1.1.1/32'),
NetworkInfo('ens1', '10.1.2.1/32'])``)
ingress_addresses: A list of :class:`ipaddress.ip_address` objects representing the IP
addresses that other units should use to get in touch with you.
egress_subnets: A list of :class:`ipaddress.ip_network` representing the subnets that
other units will see you connecting from. Due to things like NAT it isn't always
possible to narrow it down to a single address, but when it is clear, the CIDRs
will be constrained to a single address. (eg, 10.0.0.1/32)
Args:
network_info: A dict of network information as returned by ``network-get``.
"""
def __init__(self, network_info):
def __init__(self, network_info: dict):
self.interfaces = []
# Treat multiple addresses on an interface as multiple logical
# interfaces with the same name.
for interface_info in network_info['bind-addresses']:
interface_name = interface_info['interface-name']
for address_info in interface_info['addresses']:
for interface_info in network_info.get('bind-addresses', []):
interface_name = interface_info.get('interface-name')
for address_info in interface_info.get('addresses', []):
self.interfaces.append(NetworkInterface(interface_name, address_info))
self.ingress_addresses = []
for address in network_info['ingress-addresses']:
for address in network_info.get('ingress-addresses', []):
self.ingress_addresses.append(ipaddress.ip_address(address))
self.egress_subnets = []
for subnet in network_info['egress-subnets']:
for subnet in network_info.get('egress-subnets', []):
self.egress_subnets.append(ipaddress.ip_network(subnet))
@property
def bind_address(self):
return self.interfaces[0].address
"""A single address that your application should bind() to.
For the common case where there is a single answer. This represents a single
address from :attr:`.interfaces` that can be used to configure where your
application should bind() and listen().
"""
if self.interfaces:
return self.interfaces[0].address
else:
return None
@property
def ingress_address(self):
return self.ingress_addresses[0]
"""The address other applications should use to connect to your unit.
Due to things like public/private addresses, NAT and tunneling, the address you bind()
to is not always the address other people can use to connect() to you.
This is just the first address from :attr:`.ingress_addresses`.
"""
if self.ingress_addresses:
return self.ingress_addresses[0]
else:
return None
class NetworkInterface:
"""Represents a single network interface that the charm needs to know about.
Charmers should not instantiate this type directly. Instead use :meth:`Model.get_binding`
to get the network information for a given endpoint.
def __init__(self, name, address_info):
Attributes:
name: The name of the interface (eg. 'eth0', or 'ens1')
subnet: An :class:`ipaddress.ip_network` representation of the IP for the network
interface. This may be a single address (eg '10.0.1.2/32')
"""
def __init__(self, name: str, address_info: dict):
self.name = name
# TODO: expose a hardware address here, see LP: #1864070.
self.address = ipaddress.ip_address(address_info['value'])
cidr = address_info['cidr']
if not cidr:
# The cidr field may be empty, see LP: #1864102.
# In this case, make it a /32 or /128 IP network.
self.subnet = ipaddress.ip_network(address_info['value'])
address = address_info.get('value')
# The value field may be empty.
if address:
self.address = ipaddress.ip_address(address)
else:
self.address = None
cidr = address_info.get('cidr')
# The cidr field may be empty, see LP: #1864102.
if cidr:
self.subnet = ipaddress.ip_network(cidr)
elif address:
# If we have an address, convert it to a /32 or /128 IP network.
self.subnet = ipaddress.ip_network(address)
else:
self.subnet = None
# TODO: expose a hostname/canonical name for the address here, see LP: #1864086.
class Relation:
def __init__(self, relation_name, relation_id, is_peer, our_unit, backend, cache):
"""Represents an established relation between this application and another application.
This class should not be instantiated directly, instead use :meth:`Model.get_relation`
or :attr:`ops.charm.RelationEvent.relation`.
Attributes:
name: The name of the local endpoint of the relation (eg 'db')
id: The identifier for a particular relation (integer)
app: An :class:`Application` representing the remote application of this relation.
For peer relations this will be the local application.
units: A set of :class:`Unit` for units that have started and joined this relation.
data: A :class:`RelationData` holding the data buckets for each entity
of a relation. Accessed via eg Relation.data[unit]['foo']
"""
def __init__(
self, relation_name: str, relation_id: int, is_peer: bool, our_unit: Unit,
backend: '_ModelBackend', cache: '_ModelCache'):
self.name = relation_name
self.id = relation_id
self.app = None
......@@ -405,7 +657,21 @@ class Relation:
class RelationData(Mapping):
def __init__(self, relation, our_unit, backend):
"""Represents the various data buckets of a given relation.
Each unit and application involved in a relation has their own data bucket.
Eg: ``{entity: RelationDataContent}``
where entity can be either a :class:`Unit` or a :class:`Application`.
Units can read and write their own data, and if they are the leader,
they can read and write their application data. They are allowed to read
remote unit and application data.
This class should not be created directly. It should be accessed via
:attr:`Relation.data`
"""
def __init__(self, relation: Relation, our_unit: Unit, backend: '_ModelBackend'):
self.relation = weakref.proxy(relation)
self._data = {
our_unit: RelationDataContent(self.relation, our_unit, backend),
......@@ -432,10 +698,14 @@ class RelationData(Mapping):
def __getitem__(self, key):
return self._data[key]
def __repr__(self):
return repr(self._data)
# We mix in MutableMapping here to get some convenience implementations, but whether it's actually
# mutable or not is controlled by the flag.
class RelationDataContent(LazyMapping, MutableMapping):
"""Data content of a unit or application in a relation."""
def __init__(self, relation, entity, backend):
self.relation = relation
......@@ -444,6 +714,7 @@ class RelationDataContent(LazyMapping, MutableMapping):
self._is_app = isinstance(entity, Application)
def _load(self):
"""Load the data from the current entity / relation."""
try:
return self._backend.relation_get(self.relation.id, self._entity.name, self._is_app)
except RelationNotFoundError:
......@@ -451,6 +722,7 @@ class RelationDataContent(LazyMapping, MutableMapping):
return {}
def _is_mutable(self):
"""Return if the data content can be modified."""
if self._is_app:
is_our_app = self._backend.app_name == self._entity.name
if not is_our_app:
......@@ -478,7 +750,7 @@ class RelationDataContent(LazyMapping, MutableMapping):
if value == '':
# Match the behavior of Juju, which is that setting the value to an
# empty string will remove the key entirely from the relation data.
del self._data[key]
self._data.pop(key, None)
else:
self._data[key] = value
......@@ -489,6 +761,10 @@ class RelationDataContent(LazyMapping, MutableMapping):
class ConfigData(LazyMapping):
"""Configuration data.
This class should not be created directly. It should be accessed via :attr:`Model.config`.
"""
def __init__(self, backend):
self._backend = backend
......@@ -498,24 +774,69 @@ class ConfigData(LazyMapping):
class StatusBase:
"""Status values specific to applications and units."""
"""Status values specific to applications and units.
To access a status by name, see :meth:`StatusBase.from_name`, most use cases will just
directly use the child class to indicate their status.
"""
_statuses = {}
name = None
def __init__(self, message):
def __init__(self, message: str):
self.message = message
def __new__(cls, *args, **kwargs):
"""Forbid the usage of StatusBase directly."""
if cls is StatusBase:
raise TypeError("cannot instantiate a base class")
cls._statuses[cls.name] = cls
return super().__new__(cls)
def __eq__(self, other):
if not isinstance(self, type(other)):
return False
return self.message == other.message
def __repr__(self):
return "{.__class__.__name__}({!r})".format(self, self.message)
@classmethod
def from_name(cls, name, message):
return cls._statuses[name](message)
def from_name(cls, name: str, message: str):
"""Get the specific Status for the name (or UnknownStatus if not registered)."""
if name == 'unknown':
# unknown is special
return UnknownStatus()
else:
return cls._statuses[name](message)
@classmethod
def register(cls, child):
"""Register a Status for the child's name."""
if child.name is None:
raise AttributeError('cannot register a Status which has no name')
cls._statuses[child.name] = child
return child
@StatusBase.register
class UnknownStatus(StatusBase):
"""The unit status is unknown.
A unit-agent has finished calling install, config-changed and start, but the
charm has not called status-set yet.
"""
name = 'unknown'
def __init__(self):
# Unknown status cannot be set and does not have a message associated with it.
super().__init__('')
def __repr__(self):
return "UnknownStatus()"
@StatusBase.register
class ActiveStatus(StatusBase):
"""The unit is ready.
......@@ -523,10 +844,11 @@ class ActiveStatus(StatusBase):
"""
name = 'active'
def __init__(self, message=None):
super().__init__(message or '')
def __init__(self, message: str = ''):
super().__init__(message)
@StatusBase.register
class BlockedStatus(StatusBase):
"""The unit requires manual intervention.
......@@ -535,6 +857,7 @@ class BlockedStatus(StatusBase):
name = 'blocked'
@StatusBase.register
class MaintenanceStatus(StatusBase):
"""The unit is performing maintenance tasks.
......@@ -546,20 +869,7 @@ class MaintenanceStatus(StatusBase):
name = 'maintenance'
class UnknownStatus(StatusBase):
"""The unit status is unknown.
A unit-agent has finished calling install, config-changed and start, but the
charm has not called status-set yet.
"""
name = 'unknown'
def __init__(self):
# Unknown status cannot be set and does not have a message associated with it.
super().__init__('')
@StatusBase.register
class WaitingStatus(StatusBase):
"""A unit is unable to progress.
......@@ -571,14 +881,13 @@ class WaitingStatus(StatusBase):
class Resources:
"""Object representing resources for the charm.
"""
"""Object representing resources for the charm."""
def __init__(self, names, backend):
def __init__(self, names: typing.Iterable[str], backend: '_ModelBackend'):
self._backend = backend
self._paths = {name: None for name in names}
def fetch(self, name):
def fetch(self, name: str) -> Path:
"""Fetch the resource from the controller or store.
If successfully fetched, this returns a Path object to where the resource is stored
......@@ -592,10 +901,26 @@ class Resources:
class Pod:
def __init__(self, backend):
"""Represents the definition of a pod spec in Kubernetes models.
Currently only supports simple access to setting the Juju pod spec via :attr:`.set_spec`.
"""
def __init__(self, backend: '_ModelBackend'):
self._backend = backend
def set_spec(self, spec, k8s_resources=None):
def set_spec(self, spec: typing.Mapping, k8s_resources: typing.Mapping = None):
"""Set the specification for pods that Juju should start in kubernetes.
See `juju help-tool pod-spec-set` for details of what should be passed.
Args:
spec: The mapping defining the pod specification
k8s_resources: Additional kubernetes specific specification.
Returns:
None
"""
if not self._backend.is_leader():
raise ModelError('cannot set a pod spec as this unit is not a leader')
self._backend.pod_spec_set(spec, k8s_resources)
......@@ -604,11 +929,11 @@ class Pod:
class StorageMapping(Mapping):
"""Map of storage names to lists of Storage instances."""
def __init__(self, storage_names, backend):
def __init__(self, storage_names: typing.Iterable[str], backend: '_ModelBackend'):
self._backend = backend
self._storage_map = {storage_name: None for storage_name in storage_names}
def __contains__(self, key):
def __contains__(self, key: str):
return key in self._storage_map
def __len__(self):
......@@ -617,7 +942,7 @@ class StorageMapping(Mapping):
def __iter__(self):
return iter(self._storage_map)
def __getitem__(self, storage_name):
def __getitem__(self, storage_name: str) -> typing.List['Storage']:
storage_list = self._storage_map[storage_name]
if storage_list is None:
storage_list = self._storage_map[storage_name] = []
......@@ -625,7 +950,7 @@ class StorageMapping(Mapping):
storage_list.append(Storage(storage_name, storage_id, self._backend))
return storage_list
def request(self, storage_name, count=1):
def request(self, storage_name: str, count: int = 1):
"""Requests new storage instances of a given name.
Uses storage-add tool to request additional storage. Juju will notify the unit
......@@ -638,6 +963,12 @@ class StorageMapping(Mapping):
class Storage:
"""Represents a storage as defined in metadata.yaml.
Attributes:
name: Simple string name of the storage
id: The provider id for storage
"""
def __init__(self, storage_name, storage_id, backend):
self.name = storage_name
......@@ -647,6 +978,7 @@ class Storage:
@property
def location(self):
"""Return the location of the storage."""
if self._location is None:
raw = self._backend.storage_get('{}/{}'.format(self.name, self.id), "location")
self._location = Path(raw)
......@@ -654,10 +986,13 @@ class Storage:
class ModelError(Exception):
"""Base class for exceptions raised when interacting with the Model."""
pass
class TooManyRelatedAppsError(ModelError):
"""Raised by :meth:`Model.get_relation` if there is more than one related application."""
def __init__(self, relation_name, num_related, max_supported):
super().__init__('Too many remote applications on {} ({} > {})'.format(
relation_name, num_related, max_supported))
......@@ -667,34 +1002,51 @@ class TooManyRelatedAppsError(ModelError):
class RelationDataError(ModelError):
pass
"""Raised by ``Relation.data[entity][key] = 'foo'`` if the data is invalid.
This is raised if you're either trying to set a value to something that isn't a string,
or if you are trying to set a value in a bucket that you don't have access to. (eg,
another application/unit or setting your application data but you aren't the leader.)
"""
class RelationNotFoundError(ModelError):
pass
"""Backend error when querying juju for a given relation and that relation doesn't exist."""
class InvalidStatusError(ModelError):
pass
"""Raised if trying to set an Application or Unit status to something invalid."""
class ModelBackend:
class _ModelBackend:
"""Represents the connection between the Model representation and talking to Juju.
Charm authors should not directly interact with the ModelBackend, it is a private
implementation of Model.
"""
LEASE_RENEWAL_PERIOD = datetime.timedelta(seconds=30)
def __init__(self):
self.unit_name = os.environ['JUJU_UNIT_NAME']
def __init__(self, unit_name=None, model_name=None):
if unit_name is None:
self.unit_name = os.environ['JUJU_UNIT_NAME']
else:
self.unit_name = unit_name
if model_name is None:
model_name = os.environ.get('JUJU_MODEL_NAME')
self.model_name = model_name
self.app_name = self.unit_name.split('/')[0]
self._is_leader = None
self._leader_check_time = None
def _run(self, *args, return_output=False, use_json=False):
kwargs = dict(stdout=PIPE, stderr=PIPE)
kwargs = dict(stdout=PIPE, stderr=PIPE, check=True)
args = (shutil.which(args[0]),) + args[1:]
if use_json:
args += ('--format=json',)
try:
result = run(args, check=True, **kwargs)
result = run(args, **kwargs)
except CalledProcessError as e:
raise ModelError(e.stderr)
if return_output:
......@@ -724,10 +1076,18 @@ class ModelBackend:
if not isinstance(is_app, bool):
raise TypeError('is_app parameter to relation_get must be a boolean')
if is_app:
version = JujuVersion.from_environ()
if not version.has_app_data():
raise RuntimeError(
'getting application data is not supported on Juju version {}'.format(version))
args = ['relation-get', '-r', str(relation_id), '-', member_name]
if is_app:
args.append('--app')
try:
return self._run('relation-get', '-r', str(relation_id),
'-', member_name, '--app={}'.format(is_app),
return_output=True, use_json=True)
return self._run(*args, return_output=True, use_json=True)
except ModelError as e:
if 'relation not found' in str(e):
raise RelationNotFoundError() from e
......@@ -737,9 +1097,18 @@ class ModelBackend:
if not isinstance(is_app, bool):
raise TypeError('is_app parameter to relation_set must be a boolean')
if is_app:
version = JujuVersion.from_environ()
if not version.has_app_data():
raise RuntimeError(
'setting application data is not supported on Juju version {}'.format(version))
args = ['relation-set', '-r', str(relation_id), '{}={}'.format(key, value)]
if is_app:
args.append('--app')
try:
return self._run('relation-set', '-r', str(relation_id),
'{}={}'.format(key, value), '--app={}'.format(is_app))
return self._run(*args)
except ModelError as e:
if 'relation not found' in str(e):
raise RelationNotFoundError() from e
......@@ -773,12 +1142,14 @@ class ModelBackend:
def pod_spec_set(self, spec, k8s_resources):
tmpdir = Path(tempfile.mkdtemp('-pod-spec-set'))
try:
spec_path = tmpdir / 'spec.json'
spec_path.write_text(json.dumps(spec))
spec_path = tmpdir / 'spec.yaml'
with spec_path.open("wt", encoding="utf8") as f:
yaml.dump(spec, stream=f, Dumper=_DefaultDumper)
args = ['--file', str(spec_path)]
if k8s_resources:
k8s_res_path = tmpdir / 'k8s-resources.json'
k8s_res_path.write_text(json.dumps(k8s_resources))
k8s_res_path = tmpdir / 'k8s-resources.yaml'
with k8s_res_path.open("wt", encoding="utf8") as f:
yaml.dump(k8s_resources, stream=f, Dumper=_DefaultDumper)
args.extend(['--k8s-resources', str(k8s_res_path)])
self._run('pod-spec-set', *args)
finally:
......@@ -787,16 +1158,43 @@ class ModelBackend:
def status_get(self, *, is_app=False):
"""Get a status of a unit or an application.
app -- A boolean indicating whether the status should be retrieved for a unit
or an application.
Args:
is_app: A boolean indicating whether the status should be retrieved for a unit
or an application.
"""
return self._run('status-get', '--include-data', '--application={}'.format(is_app))
content = self._run(
'status-get', '--include-data', '--application={}'.format(is_app),
use_json=True,
return_output=True)
# Unit status looks like (in YAML):
# message: 'load: 0.28 0.26 0.26'
# status: active
# status-data: {}
# Application status looks like (in YAML):
# application-status:
# message: 'load: 0.28 0.26 0.26'
# status: active
# status-data: {}
# units:
# uo/0:
# message: 'load: 0.28 0.26 0.26'
# status: active
# status-data: {}
if is_app:
return {'status': content['application-status']['status'],
'message': content['application-status']['message']}
else:
return content
def status_set(self, status, message='', *, is_app=False):
"""Set a status of a unit or an application.
app -- A boolean indicating whether the status should be set for a unit or an
application.
Args:
status: The status to set.
message: The message to set in the status.
is_app: A boolean indicating whether the status should be set for a unit or an
application.
"""
if not isinstance(is_app, bool):
raise TypeError('is_app parameter must be boolean')
......@@ -832,13 +1230,14 @@ class ModelBackend:
self._run('application-version-set', '--', version)
def juju_log(self, level, message):
self._run('juju-log', '--log-level', level, message)
self._run('juju-log', '--log-level', level, "--", message)
def network_get(self, binding_name, relation_id=None):
"""Return network info provided by network-get for a given binding.
binding_name -- A name of a binding (relation name or extra-binding name).
relation_id -- An optional relation id to get network info for.
Args:
binding_name: A name of a binding (relation name or extra-binding name).
relation_id: An optional relation id to get network info for.
"""
cmd = ['network-get', binding_name]
if relation_id is not None:
......
# Copyright 2019-2020 Canonical Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Structures to offer storage to the charm (through Juju or locally)."""
from datetime import timedelta
import pickle
import shutil
import subprocess
import sqlite3
import typing
import yaml
def _run(args, **kw):
cmd = shutil.which(args[0])
if cmd is None:
raise FileNotFoundError(args[0])
return subprocess.run([cmd, *args[1:]], **kw)
class SQLiteStorage:
"""Storage using SQLite backend."""
DB_LOCK_TIMEOUT = timedelta(hours=1)
def __init__(self, filename):
# The isolation_level argument is set to None such that the implicit
# transaction management behavior of the sqlite3 module is disabled.
self._db = sqlite3.connect(str(filename),
isolation_level=None,
timeout=self.DB_LOCK_TIMEOUT.total_seconds())
self._setup()
def _setup(self):
"""Make the database ready to be used as storage."""
# Make sure that the database is locked until the connection is closed,
# not until the transaction ends.
self._db.execute("PRAGMA locking_mode=EXCLUSIVE")
c = self._db.execute("BEGIN")
c.execute("SELECT count(name) FROM sqlite_master WHERE type='table' AND name='snapshot'")
if c.fetchone()[0] == 0:
# Keep in mind what might happen if the process dies somewhere below.
# The system must not be rendered permanently broken by that.
self._db.execute("CREATE TABLE snapshot (handle TEXT PRIMARY KEY, data BLOB)")
self._db.execute('''
CREATE TABLE notice (
sequence INTEGER PRIMARY KEY AUTOINCREMENT,
event_path TEXT,
observer_path TEXT,
method_name TEXT)
''')
self._db.commit()
def close(self):
"""Part of the Storage API, close the storage backend."""
self._db.close()
def commit(self):
"""Part of the Storage API, commit latest changes in the storage backend."""
self._db.commit()
# There's commit but no rollback. For abort to be supported, we'll need logic that
# can rollback decisions made by third-party code in terms of the internal state
# of objects that have been snapshotted, and hooks to let them know about it and
# take the needed actions to undo their logic until the last snapshot.
# This is doable but will increase significantly the chances for mistakes.
def save_snapshot(self, handle_path: str, snapshot_data: typing.Any) -> None:
"""Part of the Storage API, persist a snapshot data under the given handle.
Args:
handle_path: The string identifying the snapshot.
snapshot_data: The data to be persisted. (as returned by Object.snapshot()). This
might be a dict/tuple/int, but must only contain 'simple' python types.
"""
# Use pickle for serialization, so the value remains portable.
raw_data = pickle.dumps(snapshot_data)
self._db.execute("REPLACE INTO snapshot VALUES (?, ?)", (handle_path, raw_data))
def load_snapshot(self, handle_path: str) -> typing.Any:
"""Part of the Storage API, retrieve a snapshot that was previously saved.
Args:
handle_path: The string identifying the snapshot.
Raises:
NoSnapshotError: if there is no snapshot for the given handle_path.
"""
c = self._db.cursor()
c.execute("SELECT data FROM snapshot WHERE handle=?", (handle_path,))
row = c.fetchone()
if row:
return pickle.loads(row[0])
raise NoSnapshotError(handle_path)
def drop_snapshot(self, handle_path: str):
"""Part of the Storage API, remove a snapshot that was previously saved.
Dropping a snapshot that doesn't exist is treated as a no-op.
"""
self._db.execute("DELETE FROM snapshot WHERE handle=?", (handle_path,))
def list_snapshots(self) -> typing.Generator[str, None, None]:
"""Return the name of all snapshots that are currently saved."""
c = self._db.cursor()
c.execute("SELECT handle FROM snapshot")
while True:
rows = c.fetchmany()
if not rows:
break
for row in rows:
yield row[0]
def save_notice(self, event_path: str, observer_path: str, method_name: str) -> None:
"""Part of the Storage API, record an notice (event and observer)."""
self._db.execute('INSERT INTO notice VALUES (NULL, ?, ?, ?)',
(event_path, observer_path, method_name))
def drop_notice(self, event_path: str, observer_path: str, method_name: str) -> None:
"""Part of the Storage API, remove a notice that was previously recorded."""
self._db.execute('''
DELETE FROM notice
WHERE event_path=?
AND observer_path=?
AND method_name=?
''', (event_path, observer_path, method_name))
def notices(self, event_path: str = None) ->\
typing.Generator[typing.Tuple[str, str, str], None, None]:
"""Part of the Storage API, return all notices that begin with event_path.
Args:
event_path: If supplied, will only yield events that match event_path. If not
supplied (or None/'') will return all events.
Returns:
Iterable of (event_path, observer_path, method_name) tuples
"""
if event_path:
c = self._db.execute('''
SELECT event_path, observer_path, method_name
FROM notice
WHERE event_path=?
ORDER BY sequence
''', (event_path,))
else:
c = self._db.execute('''
SELECT event_path, observer_path, method_name
FROM notice
ORDER BY sequence
''')
while True:
rows = c.fetchmany()
if not rows:
break
for row in rows:
yield tuple(row)
class JujuStorage:
"""Storing the content tracked by the Framework in Juju.
This uses :class:`_JujuStorageBackend` to interact with state-get/state-set
as the way to store state for the framework and for components.
"""
NOTICE_KEY = "#notices#"
def __init__(self, backend: '_JujuStorageBackend' = None):
self._backend = backend
if backend is None:
self._backend = _JujuStorageBackend()
def close(self):
"""Part of the Storage API, close the storage backend.
Nothing to be done for Juju backend, as it's transactional.
"""
def commit(self):
"""Part of the Storage API, commit latest changes in the storage backend.
Nothing to be done for Juju backend, as it's transactional.
"""
def save_snapshot(self, handle_path: str, snapshot_data: typing.Any) -> None:
"""Part of the Storage API, persist a snapshot data under the given handle.
Args:
handle_path: The string identifying the snapshot.
snapshot_data: The data to be persisted. (as returned by Object.snapshot()). This
might be a dict/tuple/int, but must only contain 'simple' python types.
"""
self._backend.set(handle_path, snapshot_data)
def load_snapshot(self, handle_path):
"""Part of the Storage API, retrieve a snapshot that was previously saved.
Args:
handle_path: The string identifying the snapshot.
Raises:
NoSnapshotError: if there is no snapshot for the given handle_path.
"""
try:
content = self._backend.get(handle_path)
except KeyError:
raise NoSnapshotError(handle_path)
return content
def drop_snapshot(self, handle_path):
"""Part of the Storage API, remove a snapshot that was previously saved.
Dropping a snapshot that doesn't exist is treated as a no-op.
"""
self._backend.delete(handle_path)
def save_notice(self, event_path: str, observer_path: str, method_name: str):
"""Part of the Storage API, record an notice (event and observer)."""
notice_list = self._load_notice_list()
notice_list.append([event_path, observer_path, method_name])
self._save_notice_list(notice_list)
def drop_notice(self, event_path: str, observer_path: str, method_name: str):
"""Part of the Storage API, remove a notice that was previously recorded."""
notice_list = self._load_notice_list()
notice_list.remove([event_path, observer_path, method_name])
self._save_notice_list(notice_list)
def notices(self, event_path: str = None):
"""Part of the Storage API, return all notices that begin with event_path.
Args:
event_path: If supplied, will only yield events that match event_path. If not
supplied (or None/'') will return all events.
Returns:
Iterable of (event_path, observer_path, method_name) tuples
"""
notice_list = self._load_notice_list()
for row in notice_list:
if event_path and row[0] != event_path:
continue
yield tuple(row)
def _load_notice_list(self) -> typing.List[typing.Tuple[str]]:
"""Load a notice list from current key.
Returns:
List of (event_path, observer_path, method_name) tuples; empty if no key or is None.
"""
try:
notice_list = self._backend.get(self.NOTICE_KEY)
except KeyError:
return []
if notice_list is None:
return []
return notice_list
def _save_notice_list(self, notices: typing.List[typing.Tuple[str]]) -> None:
"""Save a notice list under current key.
Args:
notices: List of (event_path, observer_path, method_name) tuples.
"""
self._backend.set(self.NOTICE_KEY, notices)
class _SimpleLoader(getattr(yaml, 'CSafeLoader', yaml.SafeLoader)):
"""Handle a couple basic python types.
yaml.SafeLoader can handle all the basic int/float/dict/set/etc that we want. The only one
that it *doesn't* handle is tuples. We don't want to support arbitrary types, so we just
subclass SafeLoader and add tuples back in.
"""
# Taken from the example at:
# https://stackoverflow.com/questions/9169025/how-can-i-add-a-python-tuple-to-a-yaml-file-using-pyyaml
construct_python_tuple = yaml.Loader.construct_python_tuple
_SimpleLoader.add_constructor(
u'tag:yaml.org,2002:python/tuple',
_SimpleLoader.construct_python_tuple)
class _SimpleDumper(getattr(yaml, 'CSafeDumper', yaml.SafeDumper)):
"""Add types supported by 'marshal'.
YAML can support arbitrary types, but that is generally considered unsafe (like pickle). So
we want to only support dumping out types that are safe to load.
"""
_SimpleDumper.represent_tuple = yaml.Dumper.represent_tuple
_SimpleDumper.add_representer(tuple, _SimpleDumper.represent_tuple)
def juju_backend_available() -> bool:
"""Check if Juju state storage is available."""
p = shutil.which('state-get')
return p is not None
class _JujuStorageBackend:
"""Implements the interface from the Operator framework to Juju's state-get/set/etc."""
def set(self, key: str, value: typing.Any) -> None:
"""Set a key to a given value.
Args:
key: The string key that will be used to find the value later
value: Arbitrary content that will be returned by get().
Raises:
CalledProcessError: if 'state-set' returns an error code.
"""
# default_flow_style=None means that it can use Block for
# complex types (types that have nested types) but use flow
# for simple types (like an array). Not all versions of PyYAML
# have the same default style.
encoded_value = yaml.dump(value, Dumper=_SimpleDumper, default_flow_style=None)
content = yaml.dump(
{key: encoded_value}, encoding='utf8', default_style='|',
default_flow_style=False,
Dumper=_SimpleDumper)
_run(["state-set", "--file", "-"], input=content, check=True)
def get(self, key: str) -> typing.Any:
"""Get the bytes value associated with a given key.
Args:
key: The string key that will be used to find the value
Raises:
CalledProcessError: if 'state-get' returns an error code.
"""
# We don't capture stderr here so it can end up in debug logs.
p = _run(["state-get", key], stdout=subprocess.PIPE, check=True, universal_newlines=True)
if p.stdout == '' or p.stdout == '\n':
raise KeyError(key)
return yaml.load(p.stdout, Loader=_SimpleLoader)
def delete(self, key: str) -> None:
"""Remove a key from being tracked.
Args:
key: The key to stop storing
Raises:
CalledProcessError: if 'state-delete' returns an error code.
"""
_run(["state-delete", key], check=True)
class NoSnapshotError(Exception):
"""Exception to flag that there is no snapshot for the given handle_path."""
def __init__(self, handle_path):
self.handle_path = handle_path
def __str__(self):
return 'no snapshot data found for {} object'.format(self.handle_path)
......@@ -12,12 +12,23 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Infrastructure to build unittests for Charms using the Operator Framework."""
import inspect
import pathlib
from textwrap import dedent
import random
import tempfile
import typing
import yaml
from contextlib import contextmanager
from textwrap import dedent
from ops import charm, framework, model
from ops import (
charm,
framework,
model,
storage,
)
# OptionalYAML is something like metadata.yaml or actions.yaml. You can
......@@ -52,6 +63,9 @@ class Harness:
actions: A string or file-like object containing the contents of
actions.yaml. If not supplied, we will look for a 'actions.yaml' file in the
parent directory of the Charm.
config: A string or file-like object containing the contents of
config.yaml. If not supplied, we will look for a 'config.yaml' file in the
parent directory of the Charm.
"""
def __init__(
......@@ -59,9 +73,8 @@ class Harness:
charm_cls: typing.Type[charm.CharmBase],
*,
meta: OptionalYAML = None,
actions: OptionalYAML = None):
# TODO: jam 2020-03-05 We probably want to take config as a parameter as well, since
# it would define the default values of config that the charm would see.
actions: OptionalYAML = None,
config: OptionalYAML = None):
self._charm_cls = charm_cls
self._charm = None
self._charm_dir = 'no-disk-path' # this may be updated by _create_meta
......@@ -71,8 +84,12 @@ class Harness:
self._hooks_enabled = True
self._relation_id_counter = 0
self._backend = _TestingModelBackend(self._unit_name, self._meta)
self._model = model.Model(self._unit_name, self._meta, self._backend)
self._framework = framework.Framework(":memory:", self._charm_dir, self._meta, self._model)
self._model = model.Model(self._meta, self._backend)
self._storage = storage.SQLiteStorage(':memory:')
self._oci_resources = {}
self._framework = framework.Framework(
self._storage, self._charm_dir, self._meta, self._model)
self._update_config(key_values=self._load_config_defaults(config))
@property
def charm(self) -> charm.CharmBase:
......@@ -96,8 +113,8 @@ class Harness:
def begin(self) -> None:
"""Instantiate the Charm and start handling events.
Before calling begin(), there is no Charm instance, so changes to the Model won't emit
events. You must call begin before :attr:`.charm` is valid.
Before calling :meth:`begin`, there is no Charm instance, so changes to the Model won't
emit events. You must call :meth:`.begin` before :attr:`.charm` is valid.
"""
if self._charm is not None:
raise RuntimeError('cannot call the begin method on the harness more than once')
......@@ -118,7 +135,96 @@ class Harness:
# Note: jam 2020-03-01 This is so that errors in testing say MyCharm has no attribute foo,
# rather than TestCharm has no attribute foo.
TestCharm.__name__ = self._charm_cls.__name__
self._charm = TestCharm(self._framework, self._framework.meta.name)
self._charm = TestCharm(self._framework)
def begin_with_initial_hooks(self) -> None:
"""Called when you want the Harness to fire the same hooks that Juju would fire at startup.
This triggers install, relation-created, config-changed, start, and any relation-joined
hooks. Based on what relations have been defined before you called begin().
Note that all of these are fired before returning control to the test suite, so if you
want to introspect what happens at each step, you need to fire them directly
(eg Charm.on.install.emit()).
To use this with all the normal hooks, you should instantiate the harness, setup any
relations that you want active when the charm starts, and then call this method.
Example::
harness = Harness(MyCharm)
# Do initial setup here
relation_id = harness.add_relation('db', 'postgresql')
harness.add_relation_unit(relation_id, 'postgresql/0')
harness.update_relation_data(relation_id, 'postgresql/0', {'key': 'val'})
harness.set_leader(True)
harness.update_config({'initial': 'config'})
harness.begin_with_initial_hooks()
# This will cause
# install, db-relation-created('postgresql'), leader-elected, config-changed, start
# db-relation-joined('postrgesql/0'), db-relation-changed('postgresql/0')
# To be fired.
"""
self.begin()
# TODO: jam 2020-08-03 This should also handle storage-attached hooks once we have support
# for dealing with storage.
self._charm.on.install.emit()
# Juju itself iterates what relation to fire based on a map[int]relation, so it doesn't
# guarantee a stable ordering between relation events. It *does* give a stable ordering
# of joined units for a given relation.
items = list(self._meta.relations.items())
random.shuffle(items)
this_app_name = self._meta.name
for relname, rel_meta in items:
if rel_meta.role == charm.RelationRole.peer:
# If the user has directly added a relation, leave it be, but otherwise ensure
# that peer relations are always established at before leader-elected.
rel_ids = self._backend._relation_ids_map.get(relname)
if rel_ids is None:
self.add_relation(relname, self._meta.name)
else:
random.shuffle(rel_ids)
for rel_id in rel_ids:
self._emit_relation_created(relname, rel_id, this_app_name)
else:
rel_ids = self._backend._relation_ids_map.get(relname, [])
random.shuffle(rel_ids)
for rel_id in rel_ids:
app_name = self._backend._relation_app_and_units[rel_id]["app"]
self._emit_relation_created(relname, rel_id, app_name)
if self._backend._is_leader:
self._charm.on.leader_elected.emit()
else:
self._charm.on.leader_settings_changed.emit()
self._charm.on.config_changed.emit()
self._charm.on.start.emit()
all_ids = list(self._backend._relation_names.items())
random.shuffle(all_ids)
for rel_id, rel_name in all_ids:
rel_app_and_units = self._backend._relation_app_and_units[rel_id]
app_name = rel_app_and_units["app"]
# Note: Juju *does* fire relation events for a given relation in the sorted order of
# the unit names. It also always fires relation-changed immediately after
# relation-joined for the same unit.
# Juju only fires relation-changed (app) if there is data for the related application
relation = self._model.get_relation(rel_name, rel_id)
if self._backend._relation_data[rel_id].get(app_name):
app = self._model.get_app(app_name)
self._charm.on[rel_name].relation_changed.emit(
relation, app, None)
for unit_name in sorted(rel_app_and_units["units"]):
remote_unit = self._model.get_unit(unit_name)
self._charm.on[rel_name].relation_joined.emit(
relation, remote_unit.app, remote_unit)
self._charm.on[rel_name].relation_changed.emit(
relation, remote_unit.app, remote_unit)
def cleanup(self) -> None:
"""Called by your test infrastructure to cleanup any temporary directories/files/etc.
Currently this only needs to be called if you test with resources. But it is reasonable
to always include a `testcase.addCleanup(harness.cleanup)` just in case.
"""
self._backend._cleanup()
def _create_meta(self, charm_metadata, action_metadata):
"""Create a CharmMeta object.
......@@ -149,6 +255,83 @@ class Harness:
return charm.CharmMeta.from_yaml(charm_metadata, action_metadata)
def _load_config_defaults(self, charm_config):
"""Load default values from config.yaml.
Handle the case where a user doesn't supply explicit config snippets.
"""
filename = inspect.getfile(self._charm_cls)
charm_dir = pathlib.Path(filename).parents[1]
if charm_config is None:
config_path = charm_dir / 'config.yaml'
if config_path.is_file():
charm_config = config_path.read_text()
self._charm_dir = charm_dir
else:
# The simplest of config that the framework can support
charm_config = '{}'
elif isinstance(charm_config, str):
charm_config = dedent(charm_config)
charm_config = yaml.load(charm_config, Loader=yaml.SafeLoader)
charm_config = charm_config.get('options', {})
return {key: value['default'] for key, value in charm_config.items()
if 'default' in value}
def add_oci_resource(self, resource_name: str,
contents: typing.Mapping[str, str] = None) -> None:
"""Add oci resources to the backend.
This will register an oci resource and create a temporary file for processing metadata
about the resource. A default set of values will be used for all the file contents
unless a specific contents dict is provided.
Args:
resource_name: Name of the resource to add custom contents to.
contents: Optional custom dict to write for the named resource.
"""
if not contents:
contents = {'registrypath': 'registrypath',
'username': 'username',
'password': 'password',
}
if resource_name not in self._meta.resources.keys():
raise RuntimeError('Resource {} is not a defined resources'.format(resource_name))
if self._meta.resources[resource_name].type != "oci-image":
raise RuntimeError('Resource {} is not an OCI Image'.format(resource_name))
as_yaml = yaml.dump(contents, Dumper=yaml.SafeDumper)
self._backend._resources_map[resource_name] = ('contents.yaml', as_yaml)
def add_resource(self, resource_name: str, content: typing.AnyStr) -> None:
"""Add content for a resource to the backend.
This will register the content, so that a call to `Model.resources.fetch(resource_name)`
will return a path to a file containing that content.
Args:
resource_name: The name of the resource being added
content: Either string or bytes content, which will be the content of the filename
returned by resource-get. If contents is a string, it will be encoded in utf-8
"""
if resource_name not in self._meta.resources.keys():
raise RuntimeError('Resource {} is not a defined resources'.format(resource_name))
record = self._meta.resources[resource_name]
if record.type != "file":
raise RuntimeError(
'Resource {} is not a file, but actually {}'.format(resource_name, record.type))
filename = record.filename
if filename is None:
filename = resource_name
self._backend._resources_map[resource_name] = (filename, content)
def populate_oci_resources(self) -> None:
"""Populate all OCI resources."""
for name, data in self._meta.resources.items():
if data.type == "oci-image":
self.add_oci_resource(name)
def disable_hooks(self) -> None:
"""Stop emitting hook events when the model changes.
......@@ -167,6 +350,27 @@ class Harness:
"""
self._hooks_enabled = True
@contextmanager
def hooks_disabled(self):
"""A context manager to run code with hooks disabled.
Example::
with harness.hooks_disabled():
# things in here don't fire events
harness.set_leader(True)
harness.update_config(unset=['foo', 'bar'])
# things here will again fire events
"""
if self._hooks_enabled:
self.disable_hooks()
try:
yield None
finally:
self.enable_hooks()
else:
yield None
def _next_relation_id(self):
rel_id = self._relation_id_counter
self._relation_id_counter += 1
......@@ -191,16 +395,27 @@ class Harness:
self._backend.unit_name: {},
self._backend.app_name: {},
}
self._backend._relation_app_and_units[rel_id] = {
"app": remote_app,
"units": [],
}
# Reload the relation_ids list
if self._model is not None:
self._model.relations._invalidate(relation_name)
self._emit_relation_created(relation_name, rel_id, remote_app)
return rel_id
def _emit_relation_created(self, relation_name: str, relation_id: int,
remote_app: str) -> None:
"""Trigger relation-created for a given relation with a given remote application."""
if self._charm is None or not self._hooks_enabled:
return rel_id
relation = self._model.get_relation(relation_name, rel_id)
return
if self._charm is None or not self._hooks_enabled:
return
relation = self._model.get_relation(relation_name, relation_id)
app = self._model.get_app(remote_app)
self._charm.on[relation_name].relation_created.emit(
relation, app)
return rel_id
def add_relation_unit(self, relation_id: int, remote_unit_name: str) -> None:
"""Add a new unit to a relation.
......@@ -210,24 +425,34 @@ class Harness:
rel_id = harness.add_relation('db', 'postgresql')
harness.add_relation_unit(rel_id, 'postgresql/0')
This will trigger a `relation_joined` event and a `relation_changed` event.
This will trigger a `relation_joined` event. This would naturally be
followed by a `relation_changed` event, which you can trigger with
:meth:`.update_relation_data`. This separation is artificial in the
sense that Juju will always fire the two, but is intended to make
testing relations and their data bags slightly more natural.
Args:
relation_id: The integer relation identifier (as returned by add_relation).
remote_unit_name: A string representing the remote unit that is being added.
Return:
None
"""
self._backend._relation_list_map[relation_id].append(remote_unit_name)
self._backend._relation_data[relation_id][remote_unit_name] = {}
# TODO: jam 2020-08-03 This is where we could assert that the unit name matches the
# application name (eg you don't have a relation to 'foo' but add units of 'bar/0'
self._backend._relation_app_and_units[relation_id]["units"].append(remote_unit_name)
relation_name = self._backend._relation_names[relation_id]
# Make sure that the Model reloads the relation_list for this relation_id, as well as
# reloading the relation data for this unit.
if self._model is not None:
self._model.relations._invalidate(relation_name)
remote_unit = self._model.get_unit(remote_unit_name)
relation = self._model.get_relation(relation_name, relation_id)
relation.data[remote_unit]._invalidate()
unit_cache = relation.data.get(remote_unit, None)
if unit_cache is not None:
unit_cache._invalidate()
self._model.relations._invalidate(relation_name)
if self._charm is None or not self._hooks_enabled:
return
self._charm.on[relation_name].relation_joined.emit(
......@@ -245,15 +470,34 @@ class Harness:
app_or_unit: The name of the application or unit whose data we want to read
Return:
a dict containing the relation data for `app_or_unit` or None.
Raises:
KeyError: if relation_id doesn't exist
"""
return self._backend._relation_data[relation_id].get(app_or_unit, None)
def get_pod_spec(self) -> (typing.Mapping, typing.Mapping):
"""Return the content of the pod spec as last set by the charm.
This returns both the pod spec and any k8s_resources that were supplied.
See the signature of Model.pod.set_spec
"""
return self._backend._pod_spec
def get_workload_version(self) -> str:
"""Read the workload version that was set by the unit."""
return self._backend._workload_version
def set_model_name(self, name: str) -> None:
"""Set the name of the Model that this is representing.
This cannot be called once begin() has been called. But it lets you set the value that
will be returned by Model.name.
"""
if self._charm is not None:
raise RuntimeError('cannot set the Model name after begin()')
self._backend.model_name = name
def update_relation_data(
self,
relation_id: int,
......@@ -296,7 +540,7 @@ class Harness:
if app_or_unit == self._model.app.name:
# updating our own app only generates an event if it is a peer relation and we
# aren't the leader
is_peer = self._meta.relations[relation_name].role == 'peers'
is_peer = self._meta.relations[relation_name].role.is_peer()
if not is_peer:
return
if self._model.unit.is_leader():
......@@ -320,30 +564,52 @@ class Harness:
args = (relation, app)
self._charm.on[rel_name].relation_changed.emit(*args)
def update_config(
def _update_config(
self,
key_values: typing.Mapping[str, str] = None,
unset: typing.Iterable[str] = (),
) -> None:
"""Update the config as seen by the charm.
This will trigger a `config_changed` event.
This will *not* trigger a `config_changed` event, and is intended for internal use.
Note that the `key_values` mapping will only add or update configuration items.
To remove existing ones, see the `unset` parameter.
Args:
key_values: A Mapping of key:value pairs to update in config.
unset: An iterable of keys to remove from Config. (Note that this does
not currently reset the config values to the default defined in config.yaml.)
"""
# NOTE: jam 2020-03-01 Note that this sort of works "by accident". Config
# is a LazyMapping, but its _load returns a dict and this method mutates
# the dict that Config is caching. Arguably we should be doing some sort
# of charm.framework.model.config._invalidate()
config = self._backend._config
if key_values is not None:
for key, value in key_values.items():
config[key] = value
for key in unset:
config.pop(key, None)
# NOTE: jam 2020-03-01 Note that this sort of works "by accident". Config
# is a LazyMapping, but its _load returns a dict and this method mutates
# the dict that Config is caching. Arguably we should be doing some sort
# of charm.framework.model.config._invalidate()
def update_config(
self,
key_values: typing.Mapping[str, str] = None,
unset: typing.Iterable[str] = (),
) -> None:
"""Update the config as seen by the charm.
This will trigger a `config_changed` event.
Note that the `key_values` mapping will only add or update configuration items.
To remove existing ones, see the `unset` parameter.
Args:
key_values: A Mapping of key:value pairs to update in config.
unset: An iterable of keys to remove from Config. (Note that this does
not currently reset the config values to the default defined in config.yaml.)
"""
self._update_config(key_values, unset)
if self._charm is None or not self._hooks_enabled:
return
self._charm.on.config_changed.emit()
......@@ -363,7 +629,56 @@ class Harness:
if is_leader and not was_leader and self._charm is not None and self._hooks_enabled:
self._charm.on.leader_elected.emit()
def _get_backend_calls(self, reset: bool = True) -> list:
"""Return the calls that we have made to the TestingModelBackend.
This is useful mostly for testing the framework itself, so that we can assert that we
do/don't trigger extra calls.
Args:
reset: If True, reset the calls list back to empty, if false, the call list is
preserved.
Return:
``[(call1, args...), (call2, args...)]``
"""
calls = self._backend._calls.copy()
if reset:
self._backend._calls.clear()
return calls
def _record_calls(cls):
"""Replace methods on cls with methods that record that they have been called.
Iterate all attributes of cls, and for public methods, replace them with a wrapped method
that records the method called along with the arguments and keyword arguments.
"""
for meth_name, orig_method in cls.__dict__.items():
if meth_name.startswith('_'):
continue
def decorator(orig_method):
def wrapped(self, *args, **kwargs):
full_args = (orig_method.__name__,) + args
if kwargs:
full_args = full_args + (kwargs,)
self._calls.append(full_args)
return orig_method(self, *args, **kwargs)
return wrapped
setattr(cls, meth_name, decorator(orig_method))
return cls
class _ResourceEntry:
"""Tracks the contents of a Resource."""
def __init__(self, resource_name):
self.name = resource_name
@_record_calls
class _TestingModelBackend:
"""This conforms to the interface for ModelBackend but provides canned data.
......@@ -375,6 +690,7 @@ class _TestingModelBackend:
def __init__(self, unit_name, meta):
self.unit_name = unit_name
self.app_name = self.unit_name.split('/')[0]
self.model_name = None
self._calls = []
self._meta = meta
self._is_leader = None
......@@ -382,13 +698,29 @@ class _TestingModelBackend:
self._relation_names = {} # reverse map from relation_id to relation_name
self._relation_list_map = {} # relation_id: [unit_name,...]
self._relation_data = {} # {relation_id: {name: data}}
# {relation_id: {"app": app_name, "units": ["app/0",...]}
self._relation_app_and_units = {}
self._config = {}
self._is_leader = False
self._resources_map = {}
self._resources_map = {} # {resource_name: resource_content}
self._pod_spec = None
self._app_status = None
self._unit_status = None
self._app_status = {'status': 'unknown', 'message': ''}
self._unit_status = {'status': 'maintenance', 'message': ''}
self._workload_version = None
self._resource_dir = None
def _cleanup(self):
if self._resource_dir is not None:
self._resource_dir.cleanup()
self._resource_dir = None
def _get_resource_dir(self) -> pathlib.Path:
if self._resource_dir is None:
# In actual Juju, the resource path for a charm's resource is
# $AGENT_DIR/resources/$RESOURCE_NAME/$RESOURCE_FILENAME
# However, charms shouldn't depend on this.
self._resource_dir = tempfile.TemporaryDirectory(prefix='tmp-ops-test-resource-')
return pathlib.Path(self._resource_dir.name)
def relation_ids(self, relation_name):
try:
......@@ -435,7 +767,24 @@ class _TestingModelBackend:
self._workload_version = version
def resource_get(self, resource_name):
return self._resources_map[resource_name]
if resource_name not in self._resources_map:
raise model.ModelError(
"ERROR could not download resource: HTTP request failed: "
"Get https://.../units/unit-{}/resources/{}: resource#{}/{} not found".format(
self.unit_name.replace('/', '-'), resource_name, self.app_name, resource_name
))
filename, contents = self._resources_map[resource_name]
resource_dir = self._get_resource_dir()
resource_filename = resource_dir / resource_name / filename
if not resource_filename.exists():
if isinstance(contents, bytes):
mode = 'wb'
else:
mode = 'wt'
resource_filename.parent.mkdir(exist_ok=True)
with resource_filename.open(mode=mode) as resource_file:
resource_file.write(contents)
return resource_filename
def pod_spec_set(self, spec, k8s_resources):
self._pod_spec = (spec, k8s_resources)
......@@ -448,9 +797,9 @@ class _TestingModelBackend:
def status_set(self, status, message='', *, is_app=False):
if is_app:
self._app_status = (status, message)
self._app_status = {'status': status, 'message': message}
else:
self._unit_status = (status, message)
self._unit_status = {'status': status, 'message': message}
def storage_list(self, name):
raise NotImplementedError(self.storage_list)
......
# this is a generated file
version = '1.1.0'
from .error import *
from .tokens import *
from .events import *
from .nodes import *
from .loader import *
from .dumper import *
__version__ = '5.3.1'
try:
from .cyaml import *
__with_libyaml__ = True
except ImportError:
__with_libyaml__ = False
import io
#------------------------------------------------------------------------------
# Warnings control
#------------------------------------------------------------------------------
# 'Global' warnings state:
_warnings_enabled = {
'YAMLLoadWarning': True,
}
# Get or set global warnings' state
def warnings(settings=None):
if settings is None:
return _warnings_enabled
if type(settings) is dict:
for key in settings:
if key in _warnings_enabled:
_warnings_enabled[key] = settings[key]
# Warn when load() is called without Loader=...
class YAMLLoadWarning(RuntimeWarning):
pass
def load_warning(method):
if _warnings_enabled['YAMLLoadWarning'] is False:
return
import warnings
message = (
"calling yaml.%s() without Loader=... is deprecated, as the "
"default Loader is unsafe. Please read "
"https://msg.pyyaml.org/load for full details."
) % method
warnings.warn(message, YAMLLoadWarning, stacklevel=3)
#------------------------------------------------------------------------------
def scan(stream, Loader=Loader):
"""
Scan a YAML stream and produce scanning tokens.
"""
loader = Loader(stream)
try:
while loader.check_token():
yield loader.get_token()
finally:
loader.dispose()
def parse(stream, Loader=Loader):
"""
Parse a YAML stream and produce parsing events.
"""
loader = Loader(stream)
try:
while loader.check_event():
yield loader.get_event()
finally:
loader.dispose()
def compose(stream, Loader=Loader):
"""
Parse the first YAML document in a stream
and produce the corresponding representation tree.
"""
loader = Loader(stream)
try:
return loader.get_single_node()
finally:
loader.dispose()
def compose_all(stream, Loader=Loader):
"""
Parse all YAML documents in a stream
and produce corresponding representation trees.
"""
loader = Loader(stream)
try:
while loader.check_node():
yield loader.get_node()
finally:
loader.dispose()
def load(stream, Loader=None):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
"""
if Loader is None:
load_warning('load')
Loader = FullLoader
loader = Loader(stream)
try:
return loader.get_single_data()
finally:
loader.dispose()
def load_all(stream, Loader=None):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
"""
if Loader is None:
load_warning('load_all')
Loader = FullLoader
loader = Loader(stream)
try:
while loader.check_data():
yield loader.get_data()
finally:
loader.dispose()
def full_load(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
Resolve all tags except those known to be
unsafe on untrusted input.
"""
return load(stream, FullLoader)
def full_load_all(stream):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
Resolve all tags except those known to be
unsafe on untrusted input.
"""
return load_all(stream, FullLoader)
def safe_load(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
Resolve only basic YAML tags. This is known
to be safe for untrusted input.
"""
return load(stream, SafeLoader)
def safe_load_all(stream):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
Resolve only basic YAML tags. This is known
to be safe for untrusted input.
"""
return load_all(stream, SafeLoader)
def unsafe_load(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
Resolve all tags, even those known to be
unsafe on untrusted input.
"""
return load(stream, UnsafeLoader)
def unsafe_load_all(stream):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
Resolve all tags, even those known to be
unsafe on untrusted input.
"""
return load_all(stream, UnsafeLoader)
def emit(events, stream=None, Dumper=Dumper,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None):
"""
Emit YAML parsing events into a stream.
If stream is None, return the produced string instead.
"""
getvalue = None
if stream is None:
stream = io.StringIO()
getvalue = stream.getvalue
dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
try:
for event in events:
dumper.emit(event)
finally:
dumper.dispose()
if getvalue:
return getvalue()
def serialize_all(nodes, stream=None, Dumper=Dumper,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None):
"""
Serialize a sequence of representation trees into a YAML stream.
If stream is None, return the produced string instead.
"""
getvalue = None
if stream is None:
if encoding is None:
stream = io.StringIO()
else:
stream = io.BytesIO()
getvalue = stream.getvalue
dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break,
encoding=encoding, version=version, tags=tags,
explicit_start=explicit_start, explicit_end=explicit_end)
try:
dumper.open()
for node in nodes:
dumper.serialize(node)
dumper.close()
finally:
dumper.dispose()
if getvalue:
return getvalue()
def serialize(node, stream=None, Dumper=Dumper, **kwds):
"""
Serialize a representation tree into a YAML stream.
If stream is None, return the produced string instead.
"""
return serialize_all([node], stream, Dumper=Dumper, **kwds)
def dump_all(documents, stream=None, Dumper=Dumper,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
"""
Serialize a sequence of Python objects into a YAML stream.
If stream is None, return the produced string instead.
"""
getvalue = None
if stream is None:
if encoding is None:
stream = io.StringIO()
else:
stream = io.BytesIO()
getvalue = stream.getvalue
dumper = Dumper(stream, default_style=default_style,
default_flow_style=default_flow_style,
canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break,
encoding=encoding, version=version, tags=tags,
explicit_start=explicit_start, explicit_end=explicit_end, sort_keys=sort_keys)
try:
dumper.open()
for data in documents:
dumper.represent(data)
dumper.close()
finally:
dumper.dispose()
if getvalue:
return getvalue()
def dump(data, stream=None, Dumper=Dumper, **kwds):
"""
Serialize a Python object into a YAML stream.
If stream is None, return the produced string instead.
"""
return dump_all([data], stream, Dumper=Dumper, **kwds)
def safe_dump_all(documents, stream=None, **kwds):
"""
Serialize a sequence of Python objects into a YAML stream.
Produce only basic YAML tags.
If stream is None, return the produced string instead.
"""
return dump_all(documents, stream, Dumper=SafeDumper, **kwds)
def safe_dump(data, stream=None, **kwds):
"""
Serialize a Python object into a YAML stream.
Produce only basic YAML tags.
If stream is None, return the produced string instead.
"""
return dump_all([data], stream, Dumper=SafeDumper, **kwds)
def add_implicit_resolver(tag, regexp, first=None,
Loader=None, Dumper=Dumper):
"""
Add an implicit scalar detector.
If an implicit scalar value matches the given regexp,
the corresponding tag is assigned to the scalar.
first is a sequence of possible initial characters or None.
"""
if Loader is None:
loader.Loader.add_implicit_resolver(tag, regexp, first)
loader.FullLoader.add_implicit_resolver(tag, regexp, first)
loader.UnsafeLoader.add_implicit_resolver(tag, regexp, first)
else:
Loader.add_implicit_resolver(tag, regexp, first)
Dumper.add_implicit_resolver(tag, regexp, first)
def add_path_resolver(tag, path, kind=None, Loader=None, Dumper=Dumper):
"""
Add a path based resolver for the given tag.
A path is a list of keys that forms a path
to a node in the representation tree.
Keys can be string values, integers, or None.
"""
if Loader is None:
loader.Loader.add_path_resolver(tag, path, kind)
loader.FullLoader.add_path_resolver(tag, path, kind)
loader.UnsafeLoader.add_path_resolver(tag, path, kind)
else:
Loader.add_path_resolver(tag, path, kind)
Dumper.add_path_resolver(tag, path, kind)
def add_constructor(tag, constructor, Loader=None):
"""
Add a constructor for the given tag.
Constructor is a function that accepts a Loader instance
and a node object and produces the corresponding Python object.
"""
if Loader is None:
loader.Loader.add_constructor(tag, constructor)
loader.FullLoader.add_constructor(tag, constructor)
loader.UnsafeLoader.add_constructor(tag, constructor)
else:
Loader.add_constructor(tag, constructor)
def add_multi_constructor(tag_prefix, multi_constructor, Loader=None):
"""
Add a multi-constructor for the given tag prefix.
Multi-constructor is called for a node if its tag starts with tag_prefix.
Multi-constructor accepts a Loader instance, a tag suffix,
and a node object and produces the corresponding Python object.
"""
if Loader is None:
loader.Loader.add_multi_constructor(tag_prefix, multi_constructor)
loader.FullLoader.add_multi_constructor(tag_prefix, multi_constructor)
loader.UnsafeLoader.add_multi_constructor(tag_prefix, multi_constructor)
else:
Loader.add_multi_constructor(tag_prefix, multi_constructor)
def add_representer(data_type, representer, Dumper=Dumper):
"""
Add a representer for the given type.
Representer is a function accepting a Dumper instance
and an instance of the given data type
and producing the corresponding representation node.
"""
Dumper.add_representer(data_type, representer)
def add_multi_representer(data_type, multi_representer, Dumper=Dumper):
"""
Add a representer for the given type.
Multi-representer is a function accepting a Dumper instance
and an instance of the given data type or subtype
and producing the corresponding representation node.
"""
Dumper.add_multi_representer(data_type, multi_representer)
class YAMLObjectMetaclass(type):
"""
The metaclass for YAMLObject.
"""
def __init__(cls, name, bases, kwds):
super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds)
if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None:
if isinstance(cls.yaml_loader, list):
for loader in cls.yaml_loader:
loader.add_constructor(cls.yaml_tag, cls.from_yaml)
else:
cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml)
cls.yaml_dumper.add_representer(cls, cls.to_yaml)
class YAMLObject(metaclass=YAMLObjectMetaclass):
"""
An object that can dump itself to a YAML stream
and load itself from a YAML stream.
"""
__slots__ = () # no direct instantiation, so allow immutable subclasses
yaml_loader = [Loader, FullLoader, UnsafeLoader]
yaml_dumper = Dumper
yaml_tag = None
yaml_flow_style = None
@classmethod
def from_yaml(cls, loader, node):
"""
Convert a representation node to a Python object.
"""
return loader.construct_yaml_object(node, cls)
@classmethod
def to_yaml(cls, dumper, data):
"""
Convert a Python object to a representation node.
"""
return dumper.represent_yaml_object(cls.yaml_tag, data, cls,
flow_style=cls.yaml_flow_style)
__all__ = ['Composer', 'ComposerError']
from .error import MarkedYAMLError
from .events import *
from .nodes import *
class ComposerError(MarkedYAMLError):
pass
class Composer:
def __init__(self):
self.anchors = {}
def check_node(self):
# Drop the STREAM-START event.
if self.check_event(StreamStartEvent):
self.get_event()
# If there are more documents available?
return not self.check_event(StreamEndEvent)
def get_node(self):
# Get the root node of the next document.
if not self.check_event(StreamEndEvent):
return self.compose_document()
def get_single_node(self):
# Drop the STREAM-START event.
self.get_event()
# Compose a document if the stream is not empty.
document = None
if not self.check_event(StreamEndEvent):
document = self.compose_document()
# Ensure that the stream contains no more documents.
if not self.check_event(StreamEndEvent):
event = self.get_event()
raise ComposerError("expected a single document in the stream",
document.start_mark, "but found another document",
event.start_mark)
# Drop the STREAM-END event.
self.get_event()
return document
def compose_document(self):
# Drop the DOCUMENT-START event.
self.get_event()
# Compose the root node.
node = self.compose_node(None, None)
# Drop the DOCUMENT-END event.
self.get_event()
self.anchors = {}
return node
def compose_node(self, parent, index):
if self.check_event(AliasEvent):
event = self.get_event()
anchor = event.anchor
if anchor not in self.anchors:
raise ComposerError(None, None, "found undefined alias %r"
% anchor, event.start_mark)
return self.anchors[anchor]
event = self.peek_event()
anchor = event.anchor
if anchor is not None:
if anchor in self.anchors:
raise ComposerError("found duplicate anchor %r; first occurrence"
% anchor, self.anchors[anchor].start_mark,
"second occurrence", event.start_mark)
self.descend_resolver(parent, index)
if self.check_event(ScalarEvent):
node = self.compose_scalar_node(anchor)
elif self.check_event(SequenceStartEvent):
node = self.compose_sequence_node(anchor)
elif self.check_event(MappingStartEvent):
node = self.compose_mapping_node(anchor)
self.ascend_resolver()
return node
def compose_scalar_node(self, anchor):
event = self.get_event()
tag = event.tag
if tag is None or tag == '!':
tag = self.resolve(ScalarNode, event.value, event.implicit)
node = ScalarNode(tag, event.value,
event.start_mark, event.end_mark, style=event.style)
if anchor is not None:
self.anchors[anchor] = node
return node
def compose_sequence_node(self, anchor):
start_event = self.get_event()
tag = start_event.tag
if tag is None or tag == '!':
tag = self.resolve(SequenceNode, None, start_event.implicit)
node = SequenceNode(tag, [],
start_event.start_mark, None,
flow_style=start_event.flow_style)
if anchor is not None:
self.anchors[anchor] = node
index = 0
while not self.check_event(SequenceEndEvent):
node.value.append(self.compose_node(node, index))
index += 1
end_event = self.get_event()
node.end_mark = end_event.end_mark
return node
def compose_mapping_node(self, anchor):
start_event = self.get_event()
tag = start_event.tag
if tag is None or tag == '!':
tag = self.resolve(MappingNode, None, start_event.implicit)
node = MappingNode(tag, [],
start_event.start_mark, None,
flow_style=start_event.flow_style)
if anchor is not None:
self.anchors[anchor] = node
while not self.check_event(MappingEndEvent):
#key_event = self.peek_event()
item_key = self.compose_node(node, None)
#if item_key in node.value:
# raise ComposerError("while composing a mapping", start_event.start_mark,
# "found duplicate key", key_event.start_mark)
item_value = self.compose_node(node, item_key)
#node.value[item_key] = item_value
node.value.append((item_key, item_value))
end_event = self.get_event()
node.end_mark = end_event.end_mark
return node
__all__ = [
'BaseConstructor',
'SafeConstructor',
'FullConstructor',
'UnsafeConstructor',
'Constructor',
'ConstructorError'
]
from .error import *
from .nodes import *
import collections.abc, datetime, base64, binascii, re, sys, types
class ConstructorError(MarkedYAMLError):
pass
class BaseConstructor:
yaml_constructors = {}
yaml_multi_constructors = {}
def __init__(self):
self.constructed_objects = {}
self.recursive_objects = {}
self.state_generators = []
self.deep_construct = False
def check_data(self):
# If there are more documents available?
return self.check_node()
def check_state_key(self, key):
"""Block special attributes/methods from being set in a newly created
object, to prevent user-controlled methods from being called during
deserialization"""
if self.get_state_keys_blacklist_regexp().match(key):
raise ConstructorError(None, None,
"blacklisted key '%s' in instance state found" % (key,), None)
def get_data(self):
# Construct and return the next document.
if self.check_node():
return self.construct_document(self.get_node())
def get_single_data(self):
# Ensure that the stream contains a single document and construct it.
node = self.get_single_node()
if node is not None:
return self.construct_document(node)
return None
def construct_document(self, node):
data = self.construct_object(node)
while self.state_generators:
state_generators = self.state_generators
self.state_generators = []
for generator in state_generators:
for dummy in generator:
pass
self.constructed_objects = {}
self.recursive_objects = {}
self.deep_construct = False
return data
def construct_object(self, node, deep=False):
if node in self.constructed_objects:
return self.constructed_objects[node]
if deep:
old_deep = self.deep_construct
self.deep_construct = True
if node in self.recursive_objects:
raise ConstructorError(None, None,
"found unconstructable recursive node", node.start_mark)
self.recursive_objects[node] = None
constructor = None
tag_suffix = None
if node.tag in self.yaml_constructors:
constructor = self.yaml_constructors[node.tag]
else:
for tag_prefix in self.yaml_multi_constructors:
if tag_prefix is not None and node.tag.startswith(tag_prefix):
tag_suffix = node.tag[len(tag_prefix):]
constructor = self.yaml_multi_constructors[tag_prefix]
break
else:
if None in self.yaml_multi_constructors:
tag_suffix = node.tag
constructor = self.yaml_multi_constructors[None]
elif None in self.yaml_constructors:
constructor = self.yaml_constructors[None]
elif isinstance(node, ScalarNode):
constructor = self.__class__.construct_scalar
elif isinstance(node, SequenceNode):
constructor = self.__class__.construct_sequence
elif isinstance(node, MappingNode):
constructor = self.__class__.construct_mapping
if tag_suffix is None:
data = constructor(self, node)
else:
data = constructor(self, tag_suffix, node)
if isinstance(data, types.GeneratorType):
generator = data
data = next(generator)
if self.deep_construct:
for dummy in generator:
pass
else:
self.state_generators.append(generator)
self.constructed_objects[node] = data
del self.recursive_objects[node]
if deep:
self.deep_construct = old_deep
return data
def construct_scalar(self, node):
if not isinstance(node, ScalarNode):
raise ConstructorError(None, None,
"expected a scalar node, but found %s" % node.id,
node.start_mark)
return node.value
def construct_sequence(self, node, deep=False):
if not isinstance(node, SequenceNode):
raise ConstructorError(None, None,
"expected a sequence node, but found %s" % node.id,
node.start_mark)
return [self.construct_object(child, deep=deep)
for child in node.value]
def construct_mapping(self, node, deep=False):
if not isinstance(node, MappingNode):
raise ConstructorError(None, None,
"expected a mapping node, but found %s" % node.id,
node.start_mark)
mapping = {}
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
if not isinstance(key, collections.abc.Hashable):
raise ConstructorError("while constructing a mapping", node.start_mark,
"found unhashable key", key_node.start_mark)
value = self.construct_object(value_node, deep=deep)
mapping[key] = value
return mapping
def construct_pairs(self, node, deep=False):
if not isinstance(node, MappingNode):
raise ConstructorError(None, None,
"expected a mapping node, but found %s" % node.id,
node.start_mark)
pairs = []
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
value = self.construct_object(value_node, deep=deep)
pairs.append((key, value))
return pairs
@classmethod
def add_constructor(cls, tag, constructor):
if not 'yaml_constructors' in cls.__dict__:
cls.yaml_constructors = cls.yaml_constructors.copy()
cls.yaml_constructors[tag] = constructor
@classmethod
def add_multi_constructor(cls, tag_prefix, multi_constructor):
if not 'yaml_multi_constructors' in cls.__dict__:
cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy()
cls.yaml_multi_constructors[tag_prefix] = multi_constructor
class SafeConstructor(BaseConstructor):
def construct_scalar(self, node):
if isinstance(node, MappingNode):
for key_node, value_node in node.value:
if key_node.tag == 'tag:yaml.org,2002:value':
return self.construct_scalar(value_node)
return super().construct_scalar(node)
def flatten_mapping(self, node):
merge = []
index = 0
while index < len(node.value):
key_node, value_node = node.value[index]
if key_node.tag == 'tag:yaml.org,2002:merge':
del node.value[index]
if isinstance(value_node, MappingNode):
self.flatten_mapping(value_node)
merge.extend(value_node.value)
elif isinstance(value_node, SequenceNode):
submerge = []
for subnode in value_node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing a mapping",
node.start_mark,
"expected a mapping for merging, but found %s"
% subnode.id, subnode.start_mark)
self.flatten_mapping(subnode)
submerge.append(subnode.value)
submerge.reverse()
for value in submerge:
merge.extend(value)
else:
raise ConstructorError("while constructing a mapping", node.start_mark,
"expected a mapping or list of mappings for merging, but found %s"
% value_node.id, value_node.start_mark)
elif key_node.tag == 'tag:yaml.org,2002:value':
key_node.tag = 'tag:yaml.org,2002:str'
index += 1
else:
index += 1
if merge:
node.value = merge + node.value
def construct_mapping(self, node, deep=False):
if isinstance(node, MappingNode):
self.flatten_mapping(node)
return super().construct_mapping(node, deep=deep)
def construct_yaml_null(self, node):
self.construct_scalar(node)
return None
bool_values = {
'yes': True,
'no': False,
'true': True,
'false': False,
'on': True,
'off': False,
}
def construct_yaml_bool(self, node):
value = self.construct_scalar(node)
return self.bool_values[value.lower()]
def construct_yaml_int(self, node):
value = self.construct_scalar(node)
value = value.replace('_', '')
sign = +1
if value[0] == '-':
sign = -1
if value[0] in '+-':
value = value[1:]
if value == '0':
return 0
elif value.startswith('0b'):
return sign*int(value[2:], 2)
elif value.startswith('0x'):
return sign*int(value[2:], 16)
elif value[0] == '0':
return sign*int(value, 8)
elif ':' in value:
digits = [int(part) for part in value.split(':')]
digits.reverse()
base = 1
value = 0
for digit in digits:
value += digit*base
base *= 60
return sign*value
else:
return sign*int(value)
inf_value = 1e300
while inf_value != inf_value*inf_value:
inf_value *= inf_value
nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99).
def construct_yaml_float(self, node):
value = self.construct_scalar(node)
value = value.replace('_', '').lower()
sign = +1
if value[0] == '-':
sign = -1
if value[0] in '+-':
value = value[1:]
if value == '.inf':
return sign*self.inf_value
elif value == '.nan':
return self.nan_value
elif ':' in value:
digits = [float(part) for part in value.split(':')]
digits.reverse()
base = 1
value = 0.0
for digit in digits:
value += digit*base
base *= 60
return sign*value
else:
return sign*float(value)
def construct_yaml_binary(self, node):
try:
value = self.construct_scalar(node).encode('ascii')
except UnicodeEncodeError as exc:
raise ConstructorError(None, None,
"failed to convert base64 data into ascii: %s" % exc,
node.start_mark)
try:
if hasattr(base64, 'decodebytes'):
return base64.decodebytes(value)
else:
return base64.decodestring(value)
except binascii.Error as exc:
raise ConstructorError(None, None,
"failed to decode base64 data: %s" % exc, node.start_mark)
timestamp_regexp = re.compile(
r'''^(?P<year>[0-9][0-9][0-9][0-9])
-(?P<month>[0-9][0-9]?)
-(?P<day>[0-9][0-9]?)
(?:(?:[Tt]|[ \t]+)
(?P<hour>[0-9][0-9]?)
:(?P<minute>[0-9][0-9])
:(?P<second>[0-9][0-9])
(?:\.(?P<fraction>[0-9]*))?
(?:[ \t]*(?P<tz>Z|(?P<tz_sign>[-+])(?P<tz_hour>[0-9][0-9]?)
(?::(?P<tz_minute>[0-9][0-9]))?))?)?$''', re.X)
def construct_yaml_timestamp(self, node):
value = self.construct_scalar(node)
match = self.timestamp_regexp.match(node.value)
values = match.groupdict()
year = int(values['year'])
month = int(values['month'])
day = int(values['day'])
if not values['hour']:
return datetime.date(year, month, day)
hour = int(values['hour'])
minute = int(values['minute'])
second = int(values['second'])
fraction = 0
tzinfo = None
if values['fraction']:
fraction = values['fraction'][:6]
while len(fraction) < 6:
fraction += '0'
fraction = int(fraction)
if values['tz_sign']:
tz_hour = int(values['tz_hour'])
tz_minute = int(values['tz_minute'] or 0)
delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute)
if values['tz_sign'] == '-':
delta = -delta
tzinfo = datetime.timezone(delta)
elif values['tz']:
tzinfo = datetime.timezone.utc
return datetime.datetime(year, month, day, hour, minute, second, fraction,
tzinfo=tzinfo)
def construct_yaml_omap(self, node):
# Note: we do not check for duplicate keys, because it's too
# CPU-expensive.
omap = []
yield omap
if not isinstance(node, SequenceNode):
raise ConstructorError("while constructing an ordered map", node.start_mark,
"expected a sequence, but found %s" % node.id, node.start_mark)
for subnode in node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing an ordered map", node.start_mark,
"expected a mapping of length 1, but found %s" % subnode.id,
subnode.start_mark)
if len(subnode.value) != 1:
raise ConstructorError("while constructing an ordered map", node.start_mark,
"expected a single mapping item, but found %d items" % len(subnode.value),
subnode.start_mark)
key_node, value_node = subnode.value[0]
key = self.construct_object(key_node)
value = self.construct_object(value_node)
omap.append((key, value))
def construct_yaml_pairs(self, node):
# Note: the same code as `construct_yaml_omap`.
pairs = []
yield pairs
if not isinstance(node, SequenceNode):
raise ConstructorError("while constructing pairs", node.start_mark,
"expected a sequence, but found %s" % node.id, node.start_mark)
for subnode in node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing pairs", node.start_mark,
"expected a mapping of length 1, but found %s" % subnode.id,
subnode.start_mark)
if len(subnode.value) != 1:
raise ConstructorError("while constructing pairs", node.start_mark,
"expected a single mapping item, but found %d items" % len(subnode.value),
subnode.start_mark)
key_node, value_node = subnode.value[0]
key = self.construct_object(key_node)
value = self.construct_object(value_node)
pairs.append((key, value))
def construct_yaml_set(self, node):
data = set()
yield data
value = self.construct_mapping(node)
data.update(value)
def construct_yaml_str(self, node):
return self.construct_scalar(node)
def construct_yaml_seq(self, node):
data = []
yield data
data.extend(self.construct_sequence(node))
def construct_yaml_map(self, node):
data = {}
yield data
value = self.construct_mapping(node)
data.update(value)
def construct_yaml_object(self, node, cls):
data = cls.__new__(cls)
yield data
if hasattr(data, '__setstate__'):
state = self.construct_mapping(node, deep=True)
data.__setstate__(state)
else:
state = self.construct_mapping(node)
data.__dict__.update(state)
def construct_undefined(self, node):
raise ConstructorError(None, None,
"could not determine a constructor for the tag %r" % node.tag,
node.start_mark)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:null',
SafeConstructor.construct_yaml_null)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:bool',
SafeConstructor.construct_yaml_bool)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:int',
SafeConstructor.construct_yaml_int)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:float',
SafeConstructor.construct_yaml_float)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:binary',
SafeConstructor.construct_yaml_binary)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:timestamp',
SafeConstructor.construct_yaml_timestamp)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:omap',
SafeConstructor.construct_yaml_omap)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:pairs',
SafeConstructor.construct_yaml_pairs)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:set',
SafeConstructor.construct_yaml_set)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:str',
SafeConstructor.construct_yaml_str)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:seq',
SafeConstructor.construct_yaml_seq)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:map',
SafeConstructor.construct_yaml_map)
SafeConstructor.add_constructor(None,
SafeConstructor.construct_undefined)
class FullConstructor(SafeConstructor):
# 'extend' is blacklisted because it is used by
# construct_python_object_apply to add `listitems` to a newly generate
# python instance
def get_state_keys_blacklist(self):
return ['^extend$', '^__.*__$']
def get_state_keys_blacklist_regexp(self):
if not hasattr(self, 'state_keys_blacklist_regexp'):
self.state_keys_blacklist_regexp = re.compile('(' + '|'.join(self.get_state_keys_blacklist()) + ')')
return self.state_keys_blacklist_regexp
def construct_python_str(self, node):
return self.construct_scalar(node)
def construct_python_unicode(self, node):
return self.construct_scalar(node)
def construct_python_bytes(self, node):
try:
value = self.construct_scalar(node).encode('ascii')
except UnicodeEncodeError as exc:
raise ConstructorError(None, None,
"failed to convert base64 data into ascii: %s" % exc,
node.start_mark)
try:
if hasattr(base64, 'decodebytes'):
return base64.decodebytes(value)
else:
return base64.decodestring(value)
except binascii.Error as exc:
raise ConstructorError(None, None,
"failed to decode base64 data: %s" % exc, node.start_mark)
def construct_python_long(self, node):
return self.construct_yaml_int(node)
def construct_python_complex(self, node):
return complex(self.construct_scalar(node))
def construct_python_tuple(self, node):
return tuple(self.construct_sequence(node))
def find_python_module(self, name, mark, unsafe=False):
if not name:
raise ConstructorError("while constructing a Python module", mark,
"expected non-empty name appended to the tag", mark)
if unsafe:
try:
__import__(name)
except ImportError as exc:
raise ConstructorError("while constructing a Python module", mark,
"cannot find module %r (%s)" % (name, exc), mark)
if name not in sys.modules:
raise ConstructorError("while constructing a Python module", mark,
"module %r is not imported" % name, mark)
return sys.modules[name]
def find_python_name(self, name, mark, unsafe=False):
if not name:
raise ConstructorError("while constructing a Python object", mark,
"expected non-empty name appended to the tag", mark)
if '.' in name:
module_name, object_name = name.rsplit('.', 1)
else:
module_name = 'builtins'
object_name = name
if unsafe:
try:
__import__(module_name)
except ImportError as exc:
raise ConstructorError("while constructing a Python object", mark,
"cannot find module %r (%s)" % (module_name, exc), mark)
if module_name not in sys.modules:
raise ConstructorError("while constructing a Python object", mark,
"module %r is not imported" % module_name, mark)
module = sys.modules[module_name]
if not hasattr(module, object_name):
raise ConstructorError("while constructing a Python object", mark,
"cannot find %r in the module %r"
% (object_name, module.__name__), mark)
return getattr(module, object_name)
def construct_python_name(self, suffix, node):
value = self.construct_scalar(node)
if value:
raise ConstructorError("while constructing a Python name", node.start_mark,
"expected the empty value, but found %r" % value, node.start_mark)
return self.find_python_name(suffix, node.start_mark)
def construct_python_module(self, suffix, node):
value = self.construct_scalar(node)
if value:
raise ConstructorError("while constructing a Python module", node.start_mark,
"expected the empty value, but found %r" % value, node.start_mark)
return self.find_python_module(suffix, node.start_mark)
def make_python_instance(self, suffix, node,
args=None, kwds=None, newobj=False, unsafe=False):
if not args:
args = []
if not kwds:
kwds = {}
cls = self.find_python_name(suffix, node.start_mark)
if not (unsafe or isinstance(cls, type)):
raise ConstructorError("while constructing a Python instance", node.start_mark,
"expected a class, but found %r" % type(cls),
node.start_mark)
if newobj and isinstance(cls, type):
return cls.__new__(cls, *args, **kwds)
else:
return cls(*args, **kwds)
def set_python_instance_state(self, instance, state, unsafe=False):
if hasattr(instance, '__setstate__'):
instance.__setstate__(state)
else:
slotstate = {}
if isinstance(state, tuple) and len(state) == 2:
state, slotstate = state
if hasattr(instance, '__dict__'):
if not unsafe and state:
for key in state.keys():
self.check_state_key(key)
instance.__dict__.update(state)
elif state:
slotstate.update(state)
for key, value in slotstate.items():
if not unsafe:
self.check_state_key(key)
setattr(instance, key, value)
def construct_python_object(self, suffix, node):
# Format:
# !!python/object:module.name { ... state ... }
instance = self.make_python_instance(suffix, node, newobj=True)
yield instance
deep = hasattr(instance, '__setstate__')
state = self.construct_mapping(node, deep=deep)
self.set_python_instance_state(instance, state)
def construct_python_object_apply(self, suffix, node, newobj=False):
# Format:
# !!python/object/apply # (or !!python/object/new)
# args: [ ... arguments ... ]
# kwds: { ... keywords ... }
# state: ... state ...
# listitems: [ ... listitems ... ]
# dictitems: { ... dictitems ... }
# or short format:
# !!python/object/apply [ ... arguments ... ]
# The difference between !!python/object/apply and !!python/object/new
# is how an object is created, check make_python_instance for details.
if isinstance(node, SequenceNode):
args = self.construct_sequence(node, deep=True)
kwds = {}
state = {}
listitems = []
dictitems = {}
else:
value = self.construct_mapping(node, deep=True)
args = value.get('args', [])
kwds = value.get('kwds', {})
state = value.get('state', {})
listitems = value.get('listitems', [])
dictitems = value.get('dictitems', {})
instance = self.make_python_instance(suffix, node, args, kwds, newobj)
if state:
self.set_python_instance_state(instance, state)
if listitems:
instance.extend(listitems)
if dictitems:
for key in dictitems:
instance[key] = dictitems[key]
return instance
def construct_python_object_new(self, suffix, node):
return self.construct_python_object_apply(suffix, node, newobj=True)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/none',
FullConstructor.construct_yaml_null)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/bool',
FullConstructor.construct_yaml_bool)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/str',
FullConstructor.construct_python_str)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/unicode',
FullConstructor.construct_python_unicode)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/bytes',
FullConstructor.construct_python_bytes)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/int',
FullConstructor.construct_yaml_int)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/long',
FullConstructor.construct_python_long)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/float',
FullConstructor.construct_yaml_float)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/complex',
FullConstructor.construct_python_complex)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/list',
FullConstructor.construct_yaml_seq)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/tuple',
FullConstructor.construct_python_tuple)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/dict',
FullConstructor.construct_yaml_map)
FullConstructor.add_multi_constructor(
'tag:yaml.org,2002:python/name:',
FullConstructor.construct_python_name)
FullConstructor.add_multi_constructor(
'tag:yaml.org,2002:python/module:',
FullConstructor.construct_python_module)
FullConstructor.add_multi_constructor(
'tag:yaml.org,2002:python/object:',
FullConstructor.construct_python_object)
FullConstructor.add_multi_constructor(
'tag:yaml.org,2002:python/object/new:',
FullConstructor.construct_python_object_new)
class UnsafeConstructor(FullConstructor):
def find_python_module(self, name, mark):
return super(UnsafeConstructor, self).find_python_module(name, mark, unsafe=True)
def find_python_name(self, name, mark):
return super(UnsafeConstructor, self).find_python_name(name, mark, unsafe=True)
def make_python_instance(self, suffix, node, args=None, kwds=None, newobj=False):
return super(UnsafeConstructor, self).make_python_instance(
suffix, node, args, kwds, newobj, unsafe=True)
def set_python_instance_state(self, instance, state):
return super(UnsafeConstructor, self).set_python_instance_state(
instance, state, unsafe=True)
UnsafeConstructor.add_multi_constructor(
'tag:yaml.org,2002:python/object/apply:',
UnsafeConstructor.construct_python_object_apply)
# Constructor is same as UnsafeConstructor. Need to leave this in place in case
# people have extended it directly.
class Constructor(UnsafeConstructor):
pass
__all__ = [
'CBaseLoader', 'CSafeLoader', 'CFullLoader', 'CUnsafeLoader', 'CLoader',
'CBaseDumper', 'CSafeDumper', 'CDumper'
]
from _yaml import CParser, CEmitter
from .constructor import *
from .serializer import *
from .representer import *
from .resolver import *
class CBaseLoader(CParser, BaseConstructor, BaseResolver):
def __init__(self, stream):
CParser.__init__(self, stream)
BaseConstructor.__init__(self)
BaseResolver.__init__(self)
class CSafeLoader(CParser, SafeConstructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
SafeConstructor.__init__(self)
Resolver.__init__(self)
class CFullLoader(CParser, FullConstructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
FullConstructor.__init__(self)
Resolver.__init__(self)
class CUnsafeLoader(CParser, UnsafeConstructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
UnsafeConstructor.__init__(self)
Resolver.__init__(self)
class CLoader(CParser, Constructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
Constructor.__init__(self)
Resolver.__init__(self)
class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)
class CSafeDumper(CEmitter, SafeRepresenter, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
SafeRepresenter.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)
class CDumper(CEmitter, Serializer, Representer, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)
__all__ = ['BaseDumper', 'SafeDumper', 'Dumper']
from .emitter import *
from .serializer import *
from .representer import *
from .resolver import *
class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
Emitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
Serializer.__init__(self, encoding=encoding,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)
class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
Emitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
Serializer.__init__(self, encoding=encoding,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
SafeRepresenter.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)
class Dumper(Emitter, Serializer, Representer, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
Emitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
Serializer.__init__(self, encoding=encoding,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)
# Emitter expects events obeying the following grammar:
# stream ::= STREAM-START document* STREAM-END
# document ::= DOCUMENT-START node DOCUMENT-END
# node ::= SCALAR | sequence | mapping
# sequence ::= SEQUENCE-START node* SEQUENCE-END
# mapping ::= MAPPING-START (node node)* MAPPING-END
__all__ = ['Emitter', 'EmitterError']
from .error import YAMLError
from .events import *
class EmitterError(YAMLError):
pass
class ScalarAnalysis:
def __init__(self, scalar, empty, multiline,
allow_flow_plain, allow_block_plain,
allow_single_quoted, allow_double_quoted,
allow_block):
self.scalar = scalar
self.empty = empty
self.multiline = multiline
self.allow_flow_plain = allow_flow_plain
self.allow_block_plain = allow_block_plain
self.allow_single_quoted = allow_single_quoted
self.allow_double_quoted = allow_double_quoted
self.allow_block = allow_block
class Emitter:
DEFAULT_TAG_PREFIXES = {
'!' : '!',
'tag:yaml.org,2002:' : '!!',
}
def __init__(self, stream, canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None):
# The stream should have the methods `write` and possibly `flush`.
self.stream = stream
# Encoding can be overridden by STREAM-START.
self.encoding = None
# Emitter is a state machine with a stack of states to handle nested
# structures.
self.states = []
self.state = self.expect_stream_start
# Current event and the event queue.
self.events = []
self.event = None
# The current indentation level and the stack of previous indents.
self.indents = []
self.indent = None
# Flow level.
self.flow_level = 0
# Contexts.
self.root_context = False
self.sequence_context = False
self.mapping_context = False
self.simple_key_context = False
# Characteristics of the last emitted character:
# - current position.
# - is it a whitespace?
# - is it an indention character
# (indentation space, '-', '?', or ':')?
self.line = 0
self.column = 0
self.whitespace = True
self.indention = True
# Whether the document requires an explicit document indicator
self.open_ended = False
# Formatting details.
self.canonical = canonical
self.allow_unicode = allow_unicode
self.best_indent = 2
if indent and 1 < indent < 10:
self.best_indent = indent
self.best_width = 80
if width and width > self.best_indent*2:
self.best_width = width
self.best_line_break = '\n'
if line_break in ['\r', '\n', '\r\n']:
self.best_line_break = line_break
# Tag prefixes.
self.tag_prefixes = None
# Prepared anchor and tag.
self.prepared_anchor = None
self.prepared_tag = None
# Scalar analysis and style.
self.analysis = None
self.style = None
def dispose(self):
# Reset the state attributes (to clear self-references)
self.states = []
self.state = None
def emit(self, event):
self.events.append(event)
while not self.need_more_events():
self.event = self.events.pop(0)
self.state()
self.event = None
# In some cases, we wait for a few next events before emitting.
def need_more_events(self):
if not self.events:
return True
event = self.events[0]
if isinstance(event, DocumentStartEvent):
return self.need_events(1)
elif isinstance(event, SequenceStartEvent):
return self.need_events(2)
elif isinstance(event, MappingStartEvent):
return self.need_events(3)
else:
return False
def need_events(self, count):
level = 0
for event in self.events[1:]:
if isinstance(event, (DocumentStartEvent, CollectionStartEvent)):
level += 1
elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)):
level -= 1
elif isinstance(event, StreamEndEvent):
level = -1
if level < 0:
return False
return (len(self.events) < count+1)
def increase_indent(self, flow=False, indentless=False):
self.indents.append(self.indent)
if self.indent is None:
if flow:
self.indent = self.best_indent
else:
self.indent = 0
elif not indentless:
self.indent += self.best_indent
# States.
# Stream handlers.
def expect_stream_start(self):
if isinstance(self.event, StreamStartEvent):
if self.event.encoding and not hasattr(self.stream, 'encoding'):
self.encoding = self.event.encoding
self.write_stream_start()
self.state = self.expect_first_document_start
else:
raise EmitterError("expected StreamStartEvent, but got %s"
% self.event)
def expect_nothing(self):
raise EmitterError("expected nothing, but got %s" % self.event)
# Document handlers.
def expect_first_document_start(self):
return self.expect_document_start(first=True)
def expect_document_start(self, first=False):
if isinstance(self.event, DocumentStartEvent):
if (self.event.version or self.event.tags) and self.open_ended:
self.write_indicator('...', True)
self.write_indent()
if self.event.version:
version_text = self.prepare_version(self.event.version)
self.write_version_directive(version_text)
self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy()
if self.event.tags:
handles = sorted(self.event.tags.keys())
for handle in handles:
prefix = self.event.tags[handle]
self.tag_prefixes[prefix] = handle
handle_text = self.prepare_tag_handle(handle)
prefix_text = self.prepare_tag_prefix(prefix)
self.write_tag_directive(handle_text, prefix_text)
implicit = (first and not self.event.explicit and not self.canonical
and not self.event.version and not self.event.tags
and not self.check_empty_document())
if not implicit:
self.write_indent()
self.write_indicator('---', True)
if self.canonical:
self.write_indent()
self.state = self.expect_document_root
elif isinstance(self.event, StreamEndEvent):
if self.open_ended:
self.write_indicator('...', True)
self.write_indent()
self.write_stream_end()
self.state = self.expect_nothing
else:
raise EmitterError("expected DocumentStartEvent, but got %s"
% self.event)
def expect_document_end(self):
if isinstance(self.event, DocumentEndEvent):
self.write_indent()
if self.event.explicit:
self.write_indicator('...', True)
self.write_indent()
self.flush_stream()
self.state = self.expect_document_start
else:
raise EmitterError("expected DocumentEndEvent, but got %s"
% self.event)
def expect_document_root(self):
self.states.append(self.expect_document_end)
self.expect_node(root=True)
# Node handlers.
def expect_node(self, root=False, sequence=False, mapping=False,
simple_key=False):
self.root_context = root
self.sequence_context = sequence
self.mapping_context = mapping
self.simple_key_context = simple_key
if isinstance(self.event, AliasEvent):
self.expect_alias()
elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)):
self.process_anchor('&')
self.process_tag()
if isinstance(self.event, ScalarEvent):
self.expect_scalar()
elif isinstance(self.event, SequenceStartEvent):
if self.flow_level or self.canonical or self.event.flow_style \
or self.check_empty_sequence():
self.expect_flow_sequence()
else:
self.expect_block_sequence()
elif isinstance(self.event, MappingStartEvent):
if self.flow_level or self.canonical or self.event.flow_style \
or self.check_empty_mapping():
self.expect_flow_mapping()
else:
self.expect_block_mapping()
else:
raise EmitterError("expected NodeEvent, but got %s" % self.event)
def expect_alias(self):
if self.event.anchor is None:
raise EmitterError("anchor is not specified for alias")
self.process_anchor('*')
self.state = self.states.pop()
def expect_scalar(self):
self.increase_indent(flow=True)
self.process_scalar()
self.indent = self.indents.pop()
self.state = self.states.pop()
# Flow sequence handlers.
def expect_flow_sequence(self):
self.write_indicator('[', True, whitespace=True)
self.flow_level += 1
self.increase_indent(flow=True)
self.state = self.expect_first_flow_sequence_item
def expect_first_flow_sequence_item(self):
if isinstance(self.event, SequenceEndEvent):
self.indent = self.indents.pop()
self.flow_level -= 1
self.write_indicator(']', False)
self.state = self.states.pop()
else:
if self.canonical or self.column > self.best_width:
self.write_indent()
self.states.append(self.expect_flow_sequence_item)
self.expect_node(sequence=True)
def expect_flow_sequence_item(self):
if isinstance(self.event, SequenceEndEvent):
self.indent = self.indents.pop()
self.flow_level -= 1
if self.canonical:
self.write_indicator(',', False)
self.write_indent()
self.write_indicator(']', False)
self.state = self.states.pop()
else:
self.write_indicator(',', False)
if self.canonical or self.column > self.best_width:
self.write_indent()
self.states.append(self.expect_flow_sequence_item)
self.expect_node(sequence=True)
# Flow mapping handlers.
def expect_flow_mapping(self):
self.write_indicator('{', True, whitespace=True)
self.flow_level += 1
self.increase_indent(flow=True)
self.state = self.expect_first_flow_mapping_key
def expect_first_flow_mapping_key(self):
if isinstance(self.event, MappingEndEvent):
self.indent = self.indents.pop()
self.flow_level -= 1
self.write_indicator('}', False)
self.state = self.states.pop()
else:
if self.canonical or self.column > self.best_width:
self.write_indent()
if not self.canonical and self.check_simple_key():
self.states.append(self.expect_flow_mapping_simple_value)
self.expect_node(mapping=True, simple_key=True)
else:
self.write_indicator('?', True)
self.states.append(self.expect_flow_mapping_value)
self.expect_node(mapping=True)
def expect_flow_mapping_key(self):
if isinstance(self.event, MappingEndEvent):
self.indent = self.indents.pop()
self.flow_level -= 1
if self.canonical:
self.write_indicator(',', False)
self.write_indent()
self.write_indicator('}', False)
self.state = self.states.pop()
else:
self.write_indicator(',', False)
if self.canonical or self.column > self.best_width:
self.write_indent()
if not self.canonical and self.check_simple_key():
self.states.append(self.expect_flow_mapping_simple_value)
self.expect_node(mapping=True, simple_key=True)
else:
self.write_indicator('?', True)
self.states.append(self.expect_flow_mapping_value)
self.expect_node(mapping=True)
def expect_flow_mapping_simple_value(self):
self.write_indicator(':', False)
self.states.append(self.expect_flow_mapping_key)
self.expect_node(mapping=True)
def expect_flow_mapping_value(self):
if self.canonical or self.column > self.best_width:
self.write_indent()
self.write_indicator(':', True)
self.states.append(self.expect_flow_mapping_key)
self.expect_node(mapping=True)
# Block sequence handlers.
def expect_block_sequence(self):
indentless = (self.mapping_context and not self.indention)
self.increase_indent(flow=False, indentless=indentless)
self.state = self.expect_first_block_sequence_item
def expect_first_block_sequence_item(self):
return self.expect_block_sequence_item(first=True)
def expect_block_sequence_item(self, first=False):
if not first and isinstance(self.event, SequenceEndEvent):
self.indent = self.indents.pop()
self.state = self.states.pop()
else:
self.write_indent()
self.write_indicator('-', True, indention=True)
self.states.append(self.expect_block_sequence_item)
self.expect_node(sequence=True)
# Block mapping handlers.
def expect_block_mapping(self):
self.increase_indent(flow=False)
self.state = self.expect_first_block_mapping_key
def expect_first_block_mapping_key(self):
return self.expect_block_mapping_key(first=True)
def expect_block_mapping_key(self, first=False):
if not first and isinstance(self.event, MappingEndEvent):
self.indent = self.indents.pop()
self.state = self.states.pop()
else:
self.write_indent()
if self.check_simple_key():
self.states.append(self.expect_block_mapping_simple_value)
self.expect_node(mapping=True, simple_key=True)
else:
self.write_indicator('?', True, indention=True)
self.states.append(self.expect_block_mapping_value)
self.expect_node(mapping=True)
def expect_block_mapping_simple_value(self):
self.write_indicator(':', False)
self.states.append(self.expect_block_mapping_key)
self.expect_node(mapping=True)
def expect_block_mapping_value(self):
self.write_indent()
self.write_indicator(':', True, indention=True)
self.states.append(self.expect_block_mapping_key)
self.expect_node(mapping=True)
# Checkers.
def check_empty_sequence(self):
return (isinstance(self.event, SequenceStartEvent) and self.events
and isinstance(self.events[0], SequenceEndEvent))
def check_empty_mapping(self):
return (isinstance(self.event, MappingStartEvent) and self.events
and isinstance(self.events[0], MappingEndEvent))
def check_empty_document(self):
if not isinstance(self.event, DocumentStartEvent) or not self.events:
return False
event = self.events[0]
return (isinstance(event, ScalarEvent) and event.anchor is None
and event.tag is None and event.implicit and event.value == '')
def check_simple_key(self):
length = 0
if isinstance(self.event, NodeEvent) and self.event.anchor is not None:
if self.prepared_anchor is None:
self.prepared_anchor = self.prepare_anchor(self.event.anchor)
length += len(self.prepared_anchor)
if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \
and self.event.tag is not None:
if self.prepared_tag is None:
self.prepared_tag = self.prepare_tag(self.event.tag)
length += len(self.prepared_tag)
if isinstance(self.event, ScalarEvent):
if self.analysis is None:
self.analysis = self.analyze_scalar(self.event.value)
length += len(self.analysis.scalar)
return (length < 128 and (isinstance(self.event, AliasEvent)
or (isinstance(self.event, ScalarEvent)
and not self.analysis.empty and not self.analysis.multiline)
or self.check_empty_sequence() or self.check_empty_mapping()))
# Anchor, Tag, and Scalar processors.
def process_anchor(self, indicator):
if self.event.anchor is None:
self.prepared_anchor = None
return
if self.prepared_anchor is None:
self.prepared_anchor = self.prepare_anchor(self.event.anchor)
if self.prepared_anchor:
self.write_indicator(indicator+self.prepared_anchor, True)
self.prepared_anchor = None
def process_tag(self):
tag = self.event.tag
if isinstance(self.event, ScalarEvent):
if self.style is None:
self.style = self.choose_scalar_style()
if ((not self.canonical or tag is None) and
((self.style == '' and self.event.implicit[0])
or (self.style != '' and self.event.implicit[1]))):
self.prepared_tag = None
return
if self.event.implicit[0] and tag is None:
tag = '!'
self.prepared_tag = None
else:
if (not self.canonical or tag is None) and self.event.implicit:
self.prepared_tag = None
return
if tag is None:
raise EmitterError("tag is not specified")
if self.prepared_tag is None:
self.prepared_tag = self.prepare_tag(tag)
if self.prepared_tag:
self.write_indicator(self.prepared_tag, True)
self.prepared_tag = None
def choose_scalar_style(self):
if self.analysis is None:
self.analysis = self.analyze_scalar(self.event.value)
if self.event.style == '"' or self.canonical:
return '"'
if not self.event.style and self.event.implicit[0]:
if (not (self.simple_key_context and
(self.analysis.empty or self.analysis.multiline))
and (self.flow_level and self.analysis.allow_flow_plain
or (not self.flow_level and self.analysis.allow_block_plain))):
return ''
if self.event.style and self.event.style in '|>':
if (not self.flow_level and not self.simple_key_context
and self.analysis.allow_block):
return self.event.style
if not self.event.style or self.event.style == '\'':
if (self.analysis.allow_single_quoted and
not (self.simple_key_context and self.analysis.multiline)):
return '\''
return '"'
def process_scalar(self):
if self.analysis is None:
self.analysis = self.analyze_scalar(self.event.value)
if self.style is None:
self.style = self.choose_scalar_style()
split = (not self.simple_key_context)
#if self.analysis.multiline and split \
# and (not self.style or self.style in '\'\"'):
# self.write_indent()
if self.style == '"':
self.write_double_quoted(self.analysis.scalar, split)
elif self.style == '\'':
self.write_single_quoted(self.analysis.scalar, split)
elif self.style == '>':
self.write_folded(self.analysis.scalar)
elif self.style == '|':
self.write_literal(self.analysis.scalar)
else:
self.write_plain(self.analysis.scalar, split)
self.analysis = None
self.style = None
# Analyzers.
def prepare_version(self, version):
major, minor = version
if major != 1:
raise EmitterError("unsupported YAML version: %d.%d" % (major, minor))
return '%d.%d' % (major, minor)
def prepare_tag_handle(self, handle):
if not handle:
raise EmitterError("tag handle must not be empty")
if handle[0] != '!' or handle[-1] != '!':
raise EmitterError("tag handle must start and end with '!': %r" % handle)
for ch in handle[1:-1]:
if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-_'):
raise EmitterError("invalid character %r in the tag handle: %r"
% (ch, handle))
return handle
def prepare_tag_prefix(self, prefix):
if not prefix:
raise EmitterError("tag prefix must not be empty")
chunks = []
start = end = 0
if prefix[0] == '!':
end = 1
while end < len(prefix):
ch = prefix[end]
if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-;/?!:@&=+$,_.~*\'()[]':
end += 1
else:
if start < end:
chunks.append(prefix[start:end])
start = end = end+1
data = ch.encode('utf-8')
for ch in data:
chunks.append('%%%02X' % ord(ch))
if start < end:
chunks.append(prefix[start:end])
return ''.join(chunks)
def prepare_tag(self, tag):
if not tag:
raise EmitterError("tag must not be empty")
if tag == '!':
return tag
handle = None
suffix = tag
prefixes = sorted(self.tag_prefixes.keys())
for prefix in prefixes:
if tag.startswith(prefix) \
and (prefix == '!' or len(prefix) < len(tag)):
handle = self.tag_prefixes[prefix]
suffix = tag[len(prefix):]
chunks = []
start = end = 0
while end < len(suffix):
ch = suffix[end]
if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-;/?:@&=+$,_.~*\'()[]' \
or (ch == '!' and handle != '!'):
end += 1
else:
if start < end:
chunks.append(suffix[start:end])
start = end = end+1
data = ch.encode('utf-8')
for ch in data:
chunks.append('%%%02X' % ch)
if start < end:
chunks.append(suffix[start:end])
suffix_text = ''.join(chunks)
if handle:
return '%s%s' % (handle, suffix_text)
else:
return '!<%s>' % suffix_text
def prepare_anchor(self, anchor):
if not anchor:
raise EmitterError("anchor must not be empty")
for ch in anchor:
if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-_'):
raise EmitterError("invalid character %r in the anchor: %r"
% (ch, anchor))
return anchor
def analyze_scalar(self, scalar):
# Empty scalar is a special case.
if not scalar:
return ScalarAnalysis(scalar=scalar, empty=True, multiline=False,
allow_flow_plain=False, allow_block_plain=True,
allow_single_quoted=True, allow_double_quoted=True,
allow_block=False)
# Indicators and special characters.
block_indicators = False
flow_indicators = False
line_breaks = False
special_characters = False
# Important whitespace combinations.
leading_space = False
leading_break = False
trailing_space = False
trailing_break = False
break_space = False
space_break = False
# Check document indicators.
if scalar.startswith('---') or scalar.startswith('...'):
block_indicators = True
flow_indicators = True
# First character or preceded by a whitespace.
preceded_by_whitespace = True
# Last character or followed by a whitespace.
followed_by_whitespace = (len(scalar) == 1 or
scalar[1] in '\0 \t\r\n\x85\u2028\u2029')
# The previous character is a space.
previous_space = False
# The previous character is a break.
previous_break = False
index = 0
while index < len(scalar):
ch = scalar[index]
# Check for indicators.
if index == 0:
# Leading indicators are special characters.
if ch in '#,[]{}&*!|>\'\"%@`':
flow_indicators = True
block_indicators = True
if ch in '?:':
flow_indicators = True
if followed_by_whitespace:
block_indicators = True
if ch == '-' and followed_by_whitespace:
flow_indicators = True
block_indicators = True
else:
# Some indicators cannot appear within a scalar as well.
if ch in ',?[]{}':
flow_indicators = True
if ch == ':':
flow_indicators = True
if followed_by_whitespace:
block_indicators = True
if ch == '#' and preceded_by_whitespace:
flow_indicators = True
block_indicators = True
# Check for line breaks, special, and unicode characters.
if ch in '\n\x85\u2028\u2029':
line_breaks = True
if not (ch == '\n' or '\x20' <= ch <= '\x7E'):
if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF'
or '\uE000' <= ch <= '\uFFFD'
or '\U00010000' <= ch < '\U0010ffff') and ch != '\uFEFF':
unicode_characters = True
if not self.allow_unicode:
special_characters = True
else:
special_characters = True
# Detect important whitespace combinations.
if ch == ' ':
if index == 0:
leading_space = True
if index == len(scalar)-1:
trailing_space = True
if previous_break:
break_space = True
previous_space = True
previous_break = False
elif ch in '\n\x85\u2028\u2029':
if index == 0:
leading_break = True
if index == len(scalar)-1:
trailing_break = True
if previous_space:
space_break = True
previous_space = False
previous_break = True
else:
previous_space = False
previous_break = False
# Prepare for the next character.
index += 1
preceded_by_whitespace = (ch in '\0 \t\r\n\x85\u2028\u2029')
followed_by_whitespace = (index+1 >= len(scalar) or
scalar[index+1] in '\0 \t\r\n\x85\u2028\u2029')
# Let's decide what styles are allowed.
allow_flow_plain = True
allow_block_plain = True
allow_single_quoted = True
allow_double_quoted = True
allow_block = True
# Leading and trailing whitespaces are bad for plain scalars.
if (leading_space or leading_break
or trailing_space or trailing_break):
allow_flow_plain = allow_block_plain = False
# We do not permit trailing spaces for block scalars.
if trailing_space:
allow_block = False
# Spaces at the beginning of a new line are only acceptable for block
# scalars.
if break_space:
allow_flow_plain = allow_block_plain = allow_single_quoted = False
# Spaces followed by breaks, as well as special character are only
# allowed for double quoted scalars.
if space_break or special_characters:
allow_flow_plain = allow_block_plain = \
allow_single_quoted = allow_block = False
# Although the plain scalar writer supports breaks, we never emit
# multiline plain scalars.
if line_breaks:
allow_flow_plain = allow_block_plain = False
# Flow indicators are forbidden for flow plain scalars.
if flow_indicators:
allow_flow_plain = False
# Block indicators are forbidden for block plain scalars.
if block_indicators:
allow_block_plain = False
return ScalarAnalysis(scalar=scalar,
empty=False, multiline=line_breaks,
allow_flow_plain=allow_flow_plain,
allow_block_plain=allow_block_plain,
allow_single_quoted=allow_single_quoted,
allow_double_quoted=allow_double_quoted,
allow_block=allow_block)
# Writers.
def flush_stream(self):
if hasattr(self.stream, 'flush'):
self.stream.flush()
def write_stream_start(self):
# Write BOM if needed.
if self.encoding and self.encoding.startswith('utf-16'):
self.stream.write('\uFEFF'.encode(self.encoding))
def write_stream_end(self):
self.flush_stream()
def write_indicator(self, indicator, need_whitespace,
whitespace=False, indention=False):
if self.whitespace or not need_whitespace:
data = indicator
else:
data = ' '+indicator
self.whitespace = whitespace
self.indention = self.indention and indention
self.column += len(data)
self.open_ended = False
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
def write_indent(self):
indent = self.indent or 0
if not self.indention or self.column > indent \
or (self.column == indent and not self.whitespace):
self.write_line_break()
if self.column < indent:
self.whitespace = True
data = ' '*(indent-self.column)
self.column = indent
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
def write_line_break(self, data=None):
if data is None:
data = self.best_line_break
self.whitespace = True
self.indention = True
self.line += 1
self.column = 0
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
def write_version_directive(self, version_text):
data = '%%YAML %s' % version_text
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
self.write_line_break()
def write_tag_directive(self, handle_text, prefix_text):
data = '%%TAG %s %s' % (handle_text, prefix_text)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
self.write_line_break()
# Scalar streams.
def write_single_quoted(self, text, split=True):
self.write_indicator('\'', True)
spaces = False
breaks = False
start = end = 0
while end <= len(text):
ch = None
if end < len(text):
ch = text[end]
if spaces:
if ch is None or ch != ' ':
if start+1 == end and self.column > self.best_width and split \
and start != 0 and end != len(text):
self.write_indent()
else:
data = text[start:end]
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
start = end
elif breaks:
if ch is None or ch not in '\n\x85\u2028\u2029':
if text[start] == '\n':
self.write_line_break()
for br in text[start:end]:
if br == '\n':
self.write_line_break()
else:
self.write_line_break(br)
self.write_indent()
start = end
else:
if ch is None or ch in ' \n\x85\u2028\u2029' or ch == '\'':
if start < end:
data = text[start:end]
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
start = end
if ch == '\'':
data = '\'\''
self.column += 2
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
start = end + 1
if ch is not None:
spaces = (ch == ' ')
breaks = (ch in '\n\x85\u2028\u2029')
end += 1
self.write_indicator('\'', False)
ESCAPE_REPLACEMENTS = {
'\0': '0',
'\x07': 'a',
'\x08': 'b',
'\x09': 't',
'\x0A': 'n',
'\x0B': 'v',
'\x0C': 'f',
'\x0D': 'r',
'\x1B': 'e',
'\"': '\"',
'\\': '\\',
'\x85': 'N',
'\xA0': '_',
'\u2028': 'L',
'\u2029': 'P',
}
def write_double_quoted(self, text, split=True):
self.write_indicator('"', True)
start = end = 0
while end <= len(text):
ch = None
if end < len(text):
ch = text[end]
if ch is None or ch in '"\\\x85\u2028\u2029\uFEFF' \
or not ('\x20' <= ch <= '\x7E'
or (self.allow_unicode
and ('\xA0' <= ch <= '\uD7FF'
or '\uE000' <= ch <= '\uFFFD'))):
if start < end:
data = text[start:end]
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
start = end
if ch is not None:
if ch in self.ESCAPE_REPLACEMENTS:
data = '\\'+self.ESCAPE_REPLACEMENTS[ch]
elif ch <= '\xFF':
data = '\\x%02X' % ord(ch)
elif ch <= '\uFFFF':
data = '\\u%04X' % ord(ch)
else:
data = '\\U%08X' % ord(ch)
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
start = end+1
if 0 < end < len(text)-1 and (ch == ' ' or start >= end) \
and self.column+(end-start) > self.best_width and split:
data = text[start:end]+'\\'
if start < end:
start = end
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
self.write_indent()
self.whitespace = False
self.indention = False
if text[start] == ' ':
data = '\\'
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
end += 1
self.write_indicator('"', False)
def determine_block_hints(self, text):
hints = ''
if text:
if text[0] in ' \n\x85\u2028\u2029':
hints += str(self.best_indent)
if text[-1] not in '\n\x85\u2028\u2029':
hints += '-'
elif len(text) == 1 or text[-2] in '\n\x85\u2028\u2029':
hints += '+'
return hints
def write_folded(self, text):
hints = self.determine_block_hints(text)
self.write_indicator('>'+hints, True)
if hints[-1:] == '+':
self.open_ended = True
self.write_line_break()
leading_space = True
spaces = False
breaks = True
start = end = 0
while end <= len(text):
ch = None
if end < len(text):
ch = text[end]
if breaks:
if ch is None or ch not in '\n\x85\u2028\u2029':
if not leading_space and ch is not None and ch != ' ' \
and text[start] == '\n':
self.write_line_break()
leading_space = (ch == ' ')
for br in text[start:end]:
if br == '\n':
self.write_line_break()
else:
self.write_line_break(br)
if ch is not None:
self.write_indent()
start = end
elif spaces:
if ch != ' ':
if start+1 == end and self.column > self.best_width:
self.write_indent()
else:
data = text[start:end]
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
start = end
else:
if ch is None or ch in ' \n\x85\u2028\u2029':
data = text[start:end]
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
if ch is None:
self.write_line_break()
start = end
if ch is not None:
breaks = (ch in '\n\x85\u2028\u2029')
spaces = (ch == ' ')
end += 1
def write_literal(self, text):
hints = self.determine_block_hints(text)
self.write_indicator('|'+hints, True)
if hints[-1:] == '+':
self.open_ended = True
self.write_line_break()
breaks = True
start = end = 0
while end <= len(text):
ch = None
if end < len(text):
ch = text[end]
if breaks:
if ch is None or ch not in '\n\x85\u2028\u2029':
for br in text[start:end]:
if br == '\n':
self.write_line_break()
else:
self.write_line_break(br)
if ch is not None:
self.write_indent()
start = end
else:
if ch is None or ch in '\n\x85\u2028\u2029':
data = text[start:end]
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
if ch is None:
self.write_line_break()
start = end
if ch is not None:
breaks = (ch in '\n\x85\u2028\u2029')
end += 1
def write_plain(self, text, split=True):
if self.root_context:
self.open_ended = True
if not text:
return
if not self.whitespace:
data = ' '
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
self.whitespace = False
self.indention = False
spaces = False
breaks = False
start = end = 0
while end <= len(text):
ch = None
if end < len(text):
ch = text[end]
if spaces:
if ch != ' ':
if start+1 == end and self.column > self.best_width and split:
self.write_indent()
self.whitespace = False
self.indention = False
else:
data = text[start:end]
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
start = end
elif breaks:
if ch not in '\n\x85\u2028\u2029':
if text[start] == '\n':
self.write_line_break()
for br in text[start:end]:
if br == '\n':
self.write_line_break()
else:
self.write_line_break(br)
self.write_indent()
self.whitespace = False
self.indention = False
start = end
else:
if ch is None or ch in ' \n\x85\u2028\u2029':
data = text[start:end]
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
start = end
if ch is not None:
spaces = (ch == ' ')
breaks = (ch in '\n\x85\u2028\u2029')
end += 1
__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError']
class Mark:
def __init__(self, name, index, line, column, buffer, pointer):
self.name = name
self.index = index
self.line = line
self.column = column
self.buffer = buffer
self.pointer = pointer
def get_snippet(self, indent=4, max_length=75):
if self.buffer is None:
return None
head = ''
start = self.pointer
while start > 0 and self.buffer[start-1] not in '\0\r\n\x85\u2028\u2029':
start -= 1
if self.pointer-start > max_length/2-1:
head = ' ... '
start += 5
break
tail = ''
end = self.pointer
while end < len(self.buffer) and self.buffer[end] not in '\0\r\n\x85\u2028\u2029':
end += 1
if end-self.pointer > max_length/2-1:
tail = ' ... '
end -= 5
break
snippet = self.buffer[start:end]
return ' '*indent + head + snippet + tail + '\n' \
+ ' '*(indent+self.pointer-start+len(head)) + '^'
def __str__(self):
snippet = self.get_snippet()
where = " in \"%s\", line %d, column %d" \
% (self.name, self.line+1, self.column+1)
if snippet is not None:
where += ":\n"+snippet
return where
class YAMLError(Exception):
pass
class MarkedYAMLError(YAMLError):
def __init__(self, context=None, context_mark=None,
problem=None, problem_mark=None, note=None):
self.context = context
self.context_mark = context_mark
self.problem = problem
self.problem_mark = problem_mark
self.note = note
def __str__(self):
lines = []
if self.context is not None:
lines.append(self.context)
if self.context_mark is not None \
and (self.problem is None or self.problem_mark is None
or self.context_mark.name != self.problem_mark.name
or self.context_mark.line != self.problem_mark.line
or self.context_mark.column != self.problem_mark.column):
lines.append(str(self.context_mark))
if self.problem is not None:
lines.append(self.problem)
if self.problem_mark is not None:
lines.append(str(self.problem_mark))
if self.note is not None:
lines.append(self.note)
return '\n'.join(lines)
# Abstract classes.
class Event(object):
def __init__(self, start_mark=None, end_mark=None):
self.start_mark = start_mark
self.end_mark = end_mark
def __repr__(self):
attributes = [key for key in ['anchor', 'tag', 'implicit', 'value']
if hasattr(self, key)]
arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
for key in attributes])
return '%s(%s)' % (self.__class__.__name__, arguments)
class NodeEvent(Event):
def __init__(self, anchor, start_mark=None, end_mark=None):
self.anchor = anchor
self.start_mark = start_mark
self.end_mark = end_mark
class CollectionStartEvent(NodeEvent):
def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None,
flow_style=None):
self.anchor = anchor
self.tag = tag
self.implicit = implicit
self.start_mark = start_mark
self.end_mark = end_mark
self.flow_style = flow_style
class CollectionEndEvent(Event):
pass
# Implementations.
class StreamStartEvent(Event):
def __init__(self, start_mark=None, end_mark=None, encoding=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.encoding = encoding
class StreamEndEvent(Event):
pass
class DocumentStartEvent(Event):
def __init__(self, start_mark=None, end_mark=None,
explicit=None, version=None, tags=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.explicit = explicit
self.version = version
self.tags = tags
class DocumentEndEvent(Event):
def __init__(self, start_mark=None, end_mark=None,
explicit=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.explicit = explicit
class AliasEvent(NodeEvent):
pass
class ScalarEvent(NodeEvent):
def __init__(self, anchor, tag, implicit, value,
start_mark=None, end_mark=None, style=None):
self.anchor = anchor
self.tag = tag
self.implicit = implicit
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
self.style = style
class SequenceStartEvent(CollectionStartEvent):
pass
class SequenceEndEvent(CollectionEndEvent):
pass
class MappingStartEvent(CollectionStartEvent):
pass
class MappingEndEvent(CollectionEndEvent):
pass
__all__ = ['BaseLoader', 'FullLoader', 'SafeLoader', 'Loader', 'UnsafeLoader']
from .reader import *
from .scanner import *
from .parser import *
from .composer import *
from .constructor import *
from .resolver import *
class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
BaseConstructor.__init__(self)
BaseResolver.__init__(self)
class FullLoader(Reader, Scanner, Parser, Composer, FullConstructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
FullConstructor.__init__(self)
Resolver.__init__(self)
class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
SafeConstructor.__init__(self)
Resolver.__init__(self)
class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
Constructor.__init__(self)
Resolver.__init__(self)
# UnsafeLoader is the same as Loader (which is and was always unsafe on
# untrusted input). Use of either Loader or UnsafeLoader should be rare, since
# FullLoad should be able to load almost all YAML safely. Loader is left intact
# to ensure backwards compatibility.
class UnsafeLoader(Reader, Scanner, Parser, Composer, Constructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
Constructor.__init__(self)
Resolver.__init__(self)
class Node(object):
def __init__(self, tag, value, start_mark, end_mark):
self.tag = tag
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
def __repr__(self):
value = self.value
#if isinstance(value, list):
# if len(value) == 0:
# value = '<empty>'
# elif len(value) == 1:
# value = '<1 item>'
# else:
# value = '<%d items>' % len(value)
#else:
# if len(value) > 75:
# value = repr(value[:70]+u' ... ')
# else:
# value = repr(value)
value = repr(value)
return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value)
class ScalarNode(Node):
id = 'scalar'
def __init__(self, tag, value,
start_mark=None, end_mark=None, style=None):
self.tag = tag
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
self.style = style
class CollectionNode(Node):
def __init__(self, tag, value,
start_mark=None, end_mark=None, flow_style=None):
self.tag = tag
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
self.flow_style = flow_style
class SequenceNode(CollectionNode):
id = 'sequence'
class MappingNode(CollectionNode):
id = 'mapping'
# The following YAML grammar is LL(1) and is parsed by a recursive descent
# parser.
#
# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
# implicit_document ::= block_node DOCUMENT-END*
# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
# block_node_or_indentless_sequence ::=
# ALIAS
# | properties (block_content | indentless_block_sequence)?
# | block_content
# | indentless_block_sequence
# block_node ::= ALIAS
# | properties block_content?
# | block_content
# flow_node ::= ALIAS
# | properties flow_content?
# | flow_content
# properties ::= TAG ANCHOR? | ANCHOR TAG?
# block_content ::= block_collection | flow_collection | SCALAR
# flow_content ::= flow_collection | SCALAR
# block_collection ::= block_sequence | block_mapping
# flow_collection ::= flow_sequence | flow_mapping
# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
# indentless_sequence ::= (BLOCK-ENTRY block_node?)+
# block_mapping ::= BLOCK-MAPPING_START
# ((KEY block_node_or_indentless_sequence?)?
# (VALUE block_node_or_indentless_sequence?)?)*
# BLOCK-END
# flow_sequence ::= FLOW-SEQUENCE-START
# (flow_sequence_entry FLOW-ENTRY)*
# flow_sequence_entry?
# FLOW-SEQUENCE-END
# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
# flow_mapping ::= FLOW-MAPPING-START
# (flow_mapping_entry FLOW-ENTRY)*
# flow_mapping_entry?
# FLOW-MAPPING-END
# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
#
# FIRST sets:
#
# stream: { STREAM-START }
# explicit_document: { DIRECTIVE DOCUMENT-START }
# implicit_document: FIRST(block_node)
# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
# block_sequence: { BLOCK-SEQUENCE-START }
# block_mapping: { BLOCK-MAPPING-START }
# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY }
# indentless_sequence: { ENTRY }
# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
# flow_sequence: { FLOW-SEQUENCE-START }
# flow_mapping: { FLOW-MAPPING-START }
# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
__all__ = ['Parser', 'ParserError']
from .error import MarkedYAMLError
from .tokens import *
from .events import *
from .scanner import *
class ParserError(MarkedYAMLError):
pass
class Parser:
# Since writing a recursive-descendant parser is a straightforward task, we
# do not give many comments here.
DEFAULT_TAGS = {
'!': '!',
'!!': 'tag:yaml.org,2002:',
}
def __init__(self):
self.current_event = None
self.yaml_version = None
self.tag_handles = {}
self.states = []
self.marks = []
self.state = self.parse_stream_start
def dispose(self):
# Reset the state attributes (to clear self-references)
self.states = []
self.state = None
def check_event(self, *choices):
# Check the type of the next event.
if self.current_event is None:
if self.state:
self.current_event = self.state()
if self.current_event is not None:
if not choices:
return True
for choice in choices:
if isinstance(self.current_event, choice):
return True
return False
def peek_event(self):
# Get the next event.
if self.current_event is None:
if self.state:
self.current_event = self.state()
return self.current_event
def get_event(self):
# Get the next event and proceed further.
if self.current_event is None:
if self.state:
self.current_event = self.state()
value = self.current_event
self.current_event = None
return value
# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
# implicit_document ::= block_node DOCUMENT-END*
# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
def parse_stream_start(self):
# Parse the stream start.
token = self.get_token()
event = StreamStartEvent(token.start_mark, token.end_mark,
encoding=token.encoding)
# Prepare the next state.
self.state = self.parse_implicit_document_start
return event
def parse_implicit_document_start(self):
# Parse an implicit document.
if not self.check_token(DirectiveToken, DocumentStartToken,
StreamEndToken):
self.tag_handles = self.DEFAULT_TAGS
token = self.peek_token()
start_mark = end_mark = token.start_mark
event = DocumentStartEvent(start_mark, end_mark,
explicit=False)
# Prepare the next state.
self.states.append(self.parse_document_end)
self.state = self.parse_block_node
return event
else:
return self.parse_document_start()
def parse_document_start(self):
# Parse any extra document end indicators.
while self.check_token(DocumentEndToken):
self.get_token()
# Parse an explicit document.
if not self.check_token(StreamEndToken):
token = self.peek_token()
start_mark = token.start_mark
version, tags = self.process_directives()
if not self.check_token(DocumentStartToken):
raise ParserError(None, None,
"expected '<document start>', but found %r"
% self.peek_token().id,
self.peek_token().start_mark)
token = self.get_token()
end_mark = token.end_mark
event = DocumentStartEvent(start_mark, end_mark,
explicit=True, version=version, tags=tags)
self.states.append(self.parse_document_end)
self.state = self.parse_document_content
else:
# Parse the end of the stream.
token = self.get_token()
event = StreamEndEvent(token.start_mark, token.end_mark)
assert not self.states
assert not self.marks
self.state = None
return event
def parse_document_end(self):
# Parse the document end.
token = self.peek_token()
start_mark = end_mark = token.start_mark
explicit = False
if self.check_token(DocumentEndToken):
token = self.get_token()
end_mark = token.end_mark
explicit = True
event = DocumentEndEvent(start_mark, end_mark,
explicit=explicit)
# Prepare the next state.
self.state = self.parse_document_start
return event
def parse_document_content(self):
if self.check_token(DirectiveToken,
DocumentStartToken, DocumentEndToken, StreamEndToken):
event = self.process_empty_scalar(self.peek_token().start_mark)
self.state = self.states.pop()
return event
else:
return self.parse_block_node()
def process_directives(self):
self.yaml_version = None
self.tag_handles = {}
while self.check_token(DirectiveToken):
token = self.get_token()
if token.name == 'YAML':
if self.yaml_version is not None:
raise ParserError(None, None,
"found duplicate YAML directive", token.start_mark)
major, minor = token.value
if major != 1:
raise ParserError(None, None,
"found incompatible YAML document (version 1.* is required)",
token.start_mark)
self.yaml_version = token.value
elif token.name == 'TAG':
handle, prefix = token.value
if handle in self.tag_handles:
raise ParserError(None, None,
"duplicate tag handle %r" % handle,
token.start_mark)
self.tag_handles[handle] = prefix
if self.tag_handles:
value = self.yaml_version, self.tag_handles.copy()
else:
value = self.yaml_version, None
for key in self.DEFAULT_TAGS:
if key not in self.tag_handles:
self.tag_handles[key] = self.DEFAULT_TAGS[key]
return value
# block_node_or_indentless_sequence ::= ALIAS
# | properties (block_content | indentless_block_sequence)?
# | block_content
# | indentless_block_sequence
# block_node ::= ALIAS
# | properties block_content?
# | block_content
# flow_node ::= ALIAS
# | properties flow_content?
# | flow_content
# properties ::= TAG ANCHOR? | ANCHOR TAG?
# block_content ::= block_collection | flow_collection | SCALAR
# flow_content ::= flow_collection | SCALAR
# block_collection ::= block_sequence | block_mapping
# flow_collection ::= flow_sequence | flow_mapping
def parse_block_node(self):
return self.parse_node(block=True)
def parse_flow_node(self):
return self.parse_node()
def parse_block_node_or_indentless_sequence(self):
return self.parse_node(block=True, indentless_sequence=True)
def parse_node(self, block=False, indentless_sequence=False):
if self.check_token(AliasToken):
token = self.get_token()
event = AliasEvent(token.value, token.start_mark, token.end_mark)
self.state = self.states.pop()
else:
anchor = None
tag = None
start_mark = end_mark = tag_mark = None
if self.check_token(AnchorToken):
token = self.get_token()
start_mark = token.start_mark
end_mark = token.end_mark
anchor = token.value
if self.check_token(TagToken):
token = self.get_token()
tag_mark = token.start_mark
end_mark = token.end_mark
tag = token.value
elif self.check_token(TagToken):
token = self.get_token()
start_mark = tag_mark = token.start_mark
end_mark = token.end_mark
tag = token.value
if self.check_token(AnchorToken):
token = self.get_token()
end_mark = token.end_mark
anchor = token.value
if tag is not None:
handle, suffix = tag
if handle is not None:
if handle not in self.tag_handles:
raise ParserError("while parsing a node", start_mark,
"found undefined tag handle %r" % handle,
tag_mark)
tag = self.tag_handles[handle]+suffix
else:
tag = suffix
#if tag == '!':
# raise ParserError("while parsing a node", start_mark,
# "found non-specific tag '!'", tag_mark,
# "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.")
if start_mark is None:
start_mark = end_mark = self.peek_token().start_mark
event = None
implicit = (tag is None or tag == '!')
if indentless_sequence and self.check_token(BlockEntryToken):
end_mark = self.peek_token().end_mark
event = SequenceStartEvent(anchor, tag, implicit,
start_mark, end_mark)
self.state = self.parse_indentless_sequence_entry
else:
if self.check_token(ScalarToken):
token = self.get_token()
end_mark = token.end_mark
if (token.plain and tag is None) or tag == '!':
implicit = (True, False)
elif tag is None:
implicit = (False, True)
else:
implicit = (False, False)
event = ScalarEvent(anchor, tag, implicit, token.value,
start_mark, end_mark, style=token.style)
self.state = self.states.pop()
elif self.check_token(FlowSequenceStartToken):
end_mark = self.peek_token().end_mark
event = SequenceStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=True)
self.state = self.parse_flow_sequence_first_entry
elif self.check_token(FlowMappingStartToken):
end_mark = self.peek_token().end_mark
event = MappingStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=True)
self.state = self.parse_flow_mapping_first_key
elif block and self.check_token(BlockSequenceStartToken):
end_mark = self.peek_token().start_mark
event = SequenceStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=False)
self.state = self.parse_block_sequence_first_entry
elif block and self.check_token(BlockMappingStartToken):
end_mark = self.peek_token().start_mark
event = MappingStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=False)
self.state = self.parse_block_mapping_first_key
elif anchor is not None or tag is not None:
# Empty scalars are allowed even if a tag or an anchor is
# specified.
event = ScalarEvent(anchor, tag, (implicit, False), '',
start_mark, end_mark)
self.state = self.states.pop()
else:
if block:
node = 'block'
else:
node = 'flow'
token = self.peek_token()
raise ParserError("while parsing a %s node" % node, start_mark,
"expected the node content, but found %r" % token.id,
token.start_mark)
return event
# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
def parse_block_sequence_first_entry(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_block_sequence_entry()
def parse_block_sequence_entry(self):
if self.check_token(BlockEntryToken):
token = self.get_token()
if not self.check_token(BlockEntryToken, BlockEndToken):
self.states.append(self.parse_block_sequence_entry)
return self.parse_block_node()
else:
self.state = self.parse_block_sequence_entry
return self.process_empty_scalar(token.end_mark)
if not self.check_token(BlockEndToken):
token = self.peek_token()
raise ParserError("while parsing a block collection", self.marks[-1],
"expected <block end>, but found %r" % token.id, token.start_mark)
token = self.get_token()
event = SequenceEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
# indentless_sequence ::= (BLOCK-ENTRY block_node?)+
def parse_indentless_sequence_entry(self):
if self.check_token(BlockEntryToken):
token = self.get_token()
if not self.check_token(BlockEntryToken,
KeyToken, ValueToken, BlockEndToken):
self.states.append(self.parse_indentless_sequence_entry)
return self.parse_block_node()
else:
self.state = self.parse_indentless_sequence_entry
return self.process_empty_scalar(token.end_mark)
token = self.peek_token()
event = SequenceEndEvent(token.start_mark, token.start_mark)
self.state = self.states.pop()
return event
# block_mapping ::= BLOCK-MAPPING_START
# ((KEY block_node_or_indentless_sequence?)?
# (VALUE block_node_or_indentless_sequence?)?)*
# BLOCK-END
def parse_block_mapping_first_key(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_block_mapping_key()
def parse_block_mapping_key(self):
if self.check_token(KeyToken):
token = self.get_token()
if not self.check_token(KeyToken, ValueToken, BlockEndToken):
self.states.append(self.parse_block_mapping_value)
return self.parse_block_node_or_indentless_sequence()
else:
self.state = self.parse_block_mapping_value
return self.process_empty_scalar(token.end_mark)
if not self.check_token(BlockEndToken):
token = self.peek_token()
raise ParserError("while parsing a block mapping", self.marks[-1],
"expected <block end>, but found %r" % token.id, token.start_mark)
token = self.get_token()
event = MappingEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_block_mapping_value(self):
if self.check_token(ValueToken):
token = self.get_token()
if not self.check_token(KeyToken, ValueToken, BlockEndToken):
self.states.append(self.parse_block_mapping_key)
return self.parse_block_node_or_indentless_sequence()
else:
self.state = self.parse_block_mapping_key
return self.process_empty_scalar(token.end_mark)
else:
self.state = self.parse_block_mapping_key
token = self.peek_token()
return self.process_empty_scalar(token.start_mark)
# flow_sequence ::= FLOW-SEQUENCE-START
# (flow_sequence_entry FLOW-ENTRY)*
# flow_sequence_entry?
# FLOW-SEQUENCE-END
# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
#
# Note that while production rules for both flow_sequence_entry and
# flow_mapping_entry are equal, their interpretations are different.
# For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?`
# generate an inline mapping (set syntax).
def parse_flow_sequence_first_entry(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_flow_sequence_entry(first=True)
def parse_flow_sequence_entry(self, first=False):
if not self.check_token(FlowSequenceEndToken):
if not first:
if self.check_token(FlowEntryToken):
self.get_token()
else:
token = self.peek_token()
raise ParserError("while parsing a flow sequence", self.marks[-1],
"expected ',' or ']', but got %r" % token.id, token.start_mark)
if self.check_token(KeyToken):
token = self.peek_token()
event = MappingStartEvent(None, None, True,
token.start_mark, token.end_mark,
flow_style=True)
self.state = self.parse_flow_sequence_entry_mapping_key
return event
elif not self.check_token(FlowSequenceEndToken):
self.states.append(self.parse_flow_sequence_entry)
return self.parse_flow_node()
token = self.get_token()
event = SequenceEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_flow_sequence_entry_mapping_key(self):
token = self.get_token()
if not self.check_token(ValueToken,
FlowEntryToken, FlowSequenceEndToken):
self.states.append(self.parse_flow_sequence_entry_mapping_value)
return self.parse_flow_node()
else:
self.state = self.parse_flow_sequence_entry_mapping_value
return self.process_empty_scalar(token.end_mark)
def parse_flow_sequence_entry_mapping_value(self):
if self.check_token(ValueToken):
token = self.get_token()
if not self.check_token(FlowEntryToken, FlowSequenceEndToken):
self.states.append(self.parse_flow_sequence_entry_mapping_end)
return self.parse_flow_node()
else:
self.state = self.parse_flow_sequence_entry_mapping_end
return self.process_empty_scalar(token.end_mark)
else:
self.state = self.parse_flow_sequence_entry_mapping_end
token = self.peek_token()
return self.process_empty_scalar(token.start_mark)
def parse_flow_sequence_entry_mapping_end(self):
self.state = self.parse_flow_sequence_entry
token = self.peek_token()
return MappingEndEvent(token.start_mark, token.start_mark)
# flow_mapping ::= FLOW-MAPPING-START
# (flow_mapping_entry FLOW-ENTRY)*
# flow_mapping_entry?
# FLOW-MAPPING-END
# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
def parse_flow_mapping_first_key(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_flow_mapping_key(first=True)
def parse_flow_mapping_key(self, first=False):
if not self.check_token(FlowMappingEndToken):
if not first:
if self.check_token(FlowEntryToken):
self.get_token()
else:
token = self.peek_token()
raise ParserError("while parsing a flow mapping", self.marks[-1],
"expected ',' or '}', but got %r" % token.id, token.start_mark)
if self.check_token(KeyToken):
token = self.get_token()
if not self.check_token(ValueToken,
FlowEntryToken, FlowMappingEndToken):
self.states.append(self.parse_flow_mapping_value)
return self.parse_flow_node()
else:
self.state = self.parse_flow_mapping_value
return self.process_empty_scalar(token.end_mark)
elif not self.check_token(FlowMappingEndToken):
self.states.append(self.parse_flow_mapping_empty_value)
return self.parse_flow_node()
token = self.get_token()
event = MappingEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_flow_mapping_value(self):
if self.check_token(ValueToken):
token = self.get_token()
if not self.check_token(FlowEntryToken, FlowMappingEndToken):
self.states.append(self.parse_flow_mapping_key)
return self.parse_flow_node()
else:
self.state = self.parse_flow_mapping_key
return self.process_empty_scalar(token.end_mark)
else:
self.state = self.parse_flow_mapping_key
token = self.peek_token()
return self.process_empty_scalar(token.start_mark)
def parse_flow_mapping_empty_value(self):
self.state = self.parse_flow_mapping_key
return self.process_empty_scalar(self.peek_token().start_mark)
def process_empty_scalar(self, mark):
return ScalarEvent(None, None, (True, False), '', mark, mark)
# This module contains abstractions for the input stream. You don't have to
# looks further, there are no pretty code.
#
# We define two classes here.
#
# Mark(source, line, column)
# It's just a record and its only use is producing nice error messages.
# Parser does not use it for any other purposes.
#
# Reader(source, data)
# Reader determines the encoding of `data` and converts it to unicode.
# Reader provides the following methods and attributes:
# reader.peek(length=1) - return the next `length` characters
# reader.forward(length=1) - move the current position to `length` characters.
# reader.index - the number of the current character.
# reader.line, stream.column - the line and the column of the current character.
__all__ = ['Reader', 'ReaderError']
from .error import YAMLError, Mark
import codecs, re
class ReaderError(YAMLError):
def __init__(self, name, position, character, encoding, reason):
self.name = name
self.character = character
self.position = position
self.encoding = encoding
self.reason = reason
def __str__(self):
if isinstance(self.character, bytes):
return "'%s' codec can't decode byte #x%02x: %s\n" \
" in \"%s\", position %d" \
% (self.encoding, ord(self.character), self.reason,
self.name, self.position)
else:
return "unacceptable character #x%04x: %s\n" \
" in \"%s\", position %d" \
% (self.character, self.reason,
self.name, self.position)
class Reader(object):
# Reader:
# - determines the data encoding and converts it to a unicode string,
# - checks if characters are in allowed range,
# - adds '\0' to the end.
# Reader accepts
# - a `bytes` object,
# - a `str` object,
# - a file-like object with its `read` method returning `str`,
# - a file-like object with its `read` method returning `unicode`.
# Yeah, it's ugly and slow.
def __init__(self, stream):
self.name = None
self.stream = None
self.stream_pointer = 0
self.eof = True
self.buffer = ''
self.pointer = 0
self.raw_buffer = None
self.raw_decode = None
self.encoding = None
self.index = 0
self.line = 0
self.column = 0
if isinstance(stream, str):
self.name = "<unicode string>"
self.check_printable(stream)
self.buffer = stream+'\0'
elif isinstance(stream, bytes):
self.name = "<byte string>"
self.raw_buffer = stream
self.determine_encoding()
else:
self.stream = stream
self.name = getattr(stream, 'name', "<file>")
self.eof = False
self.raw_buffer = None
self.determine_encoding()
def peek(self, index=0):
try:
return self.buffer[self.pointer+index]
except IndexError:
self.update(index+1)
return self.buffer[self.pointer+index]
def prefix(self, length=1):
if self.pointer+length >= len(self.buffer):
self.update(length)
return self.buffer[self.pointer:self.pointer+length]
def forward(self, length=1):
if self.pointer+length+1 >= len(self.buffer):
self.update(length+1)
while length:
ch = self.buffer[self.pointer]
self.pointer += 1
self.index += 1
if ch in '\n\x85\u2028\u2029' \
or (ch == '\r' and self.buffer[self.pointer] != '\n'):
self.line += 1
self.column = 0
elif ch != '\uFEFF':
self.column += 1
length -= 1
def get_mark(self):
if self.stream is None:
return Mark(self.name, self.index, self.line, self.column,
self.buffer, self.pointer)
else:
return Mark(self.name, self.index, self.line, self.column,
None, None)
def determine_encoding(self):
while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2):
self.update_raw()
if isinstance(self.raw_buffer, bytes):
if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
self.raw_decode = codecs.utf_16_le_decode
self.encoding = 'utf-16-le'
elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
self.raw_decode = codecs.utf_16_be_decode
self.encoding = 'utf-16-be'
else:
self.raw_decode = codecs.utf_8_decode
self.encoding = 'utf-8'
self.update(1)
NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]')
def check_printable(self, data):
match = self.NON_PRINTABLE.search(data)
if match:
character = match.group()
position = self.index+(len(self.buffer)-self.pointer)+match.start()
raise ReaderError(self.name, position, ord(character),
'unicode', "special characters are not allowed")
def update(self, length):
if self.raw_buffer is None:
return
self.buffer = self.buffer[self.pointer:]
self.pointer = 0
while len(self.buffer) < length:
if not self.eof:
self.update_raw()
if self.raw_decode is not None:
try:
data, converted = self.raw_decode(self.raw_buffer,
'strict', self.eof)
except UnicodeDecodeError as exc:
character = self.raw_buffer[exc.start]
if self.stream is not None:
position = self.stream_pointer-len(self.raw_buffer)+exc.start
else:
position = exc.start
raise ReaderError(self.name, position, character,
exc.encoding, exc.reason)
else:
data = self.raw_buffer
converted = len(data)
self.check_printable(data)
self.buffer += data
self.raw_buffer = self.raw_buffer[converted:]
if self.eof:
self.buffer += '\0'
self.raw_buffer = None
break
def update_raw(self, size=4096):
data = self.stream.read(size)
if self.raw_buffer is None:
self.raw_buffer = data
else:
self.raw_buffer += data
self.stream_pointer += len(data)
if not data:
self.eof = True
__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer',
'RepresenterError']
from .error import *
from .nodes import *
import datetime, copyreg, types, base64, collections
class RepresenterError(YAMLError):
pass
class BaseRepresenter:
yaml_representers = {}
yaml_multi_representers = {}
def __init__(self, default_style=None, default_flow_style=False, sort_keys=True):
self.default_style = default_style
self.sort_keys = sort_keys
self.default_flow_style = default_flow_style
self.represented_objects = {}
self.object_keeper = []
self.alias_key = None
def represent(self, data):
node = self.represent_data(data)
self.serialize(node)
self.represented_objects = {}
self.object_keeper = []
self.alias_key = None
def represent_data(self, data):
if self.ignore_aliases(data):
self.alias_key = None
else:
self.alias_key = id(data)
if self.alias_key is not None:
if self.alias_key in self.represented_objects:
node = self.represented_objects[self.alias_key]
#if node is None:
# raise RepresenterError("recursive objects are not allowed: %r" % data)
return node
#self.represented_objects[alias_key] = None
self.object_keeper.append(data)
data_types = type(data).__mro__
if data_types[0] in self.yaml_representers:
node = self.yaml_representers[data_types[0]](self, data)
else:
for data_type in data_types:
if data_type in self.yaml_multi_representers:
node = self.yaml_multi_representers[data_type](self, data)
break
else:
if None in self.yaml_multi_representers:
node = self.yaml_multi_representers[None](self, data)
elif None in self.yaml_representers:
node = self.yaml_representers[None](self, data)
else:
node = ScalarNode(None, str(data))
#if alias_key is not None:
# self.represented_objects[alias_key] = node
return node
@classmethod
def add_representer(cls, data_type, representer):
if not 'yaml_representers' in cls.__dict__:
cls.yaml_representers = cls.yaml_representers.copy()
cls.yaml_representers[data_type] = representer
@classmethod
def add_multi_representer(cls, data_type, representer):
if not 'yaml_multi_representers' in cls.__dict__:
cls.yaml_multi_representers = cls.yaml_multi_representers.copy()
cls.yaml_multi_representers[data_type] = representer
def represent_scalar(self, tag, value, style=None):
if style is None:
style = self.default_style
node = ScalarNode(tag, value, style=style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
return node
def represent_sequence(self, tag, sequence, flow_style=None):
value = []
node = SequenceNode(tag, value, flow_style=flow_style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
best_style = True
for item in sequence:
node_item = self.represent_data(item)
if not (isinstance(node_item, ScalarNode) and not node_item.style):
best_style = False
value.append(node_item)
if flow_style is None:
if self.default_flow_style is not None:
node.flow_style = self.default_flow_style
else:
node.flow_style = best_style
return node
def represent_mapping(self, tag, mapping, flow_style=None):
value = []
node = MappingNode(tag, value, flow_style=flow_style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
best_style = True
if hasattr(mapping, 'items'):
mapping = list(mapping.items())
if self.sort_keys:
try:
mapping = sorted(mapping)
except TypeError:
pass
for item_key, item_value in mapping:
node_key = self.represent_data(item_key)
node_value = self.represent_data(item_value)
if not (isinstance(node_key, ScalarNode) and not node_key.style):
best_style = False
if not (isinstance(node_value, ScalarNode) and not node_value.style):
best_style = False
value.append((node_key, node_value))
if flow_style is None:
if self.default_flow_style is not None:
node.flow_style = self.default_flow_style
else:
node.flow_style = best_style
return node
def ignore_aliases(self, data):
return False
class SafeRepresenter(BaseRepresenter):
def ignore_aliases(self, data):
if data is None:
return True
if isinstance(data, tuple) and data == ():
return True
if isinstance(data, (str, bytes, bool, int, float)):
return True
def represent_none(self, data):
return self.represent_scalar('tag:yaml.org,2002:null', 'null')
def represent_str(self, data):
return self.represent_scalar('tag:yaml.org,2002:str', data)
def represent_binary(self, data):
if hasattr(base64, 'encodebytes'):
data = base64.encodebytes(data).decode('ascii')
else:
data = base64.encodestring(data).decode('ascii')
return self.represent_scalar('tag:yaml.org,2002:binary', data, style='|')
def represent_bool(self, data):
if data:
value = 'true'
else:
value = 'false'
return self.represent_scalar('tag:yaml.org,2002:bool', value)
def represent_int(self, data):
return self.represent_scalar('tag:yaml.org,2002:int', str(data))
inf_value = 1e300
while repr(inf_value) != repr(inf_value*inf_value):
inf_value *= inf_value
def represent_float(self, data):
if data != data or (data == 0.0 and data == 1.0):
value = '.nan'
elif data == self.inf_value:
value = '.inf'
elif data == -self.inf_value:
value = '-.inf'
else:
value = repr(data).lower()
# Note that in some cases `repr(data)` represents a float number
# without the decimal parts. For instance:
# >>> repr(1e17)
# '1e17'
# Unfortunately, this is not a valid float representation according
# to the definition of the `!!float` tag. We fix this by adding
# '.0' before the 'e' symbol.
if '.' not in value and 'e' in value:
value = value.replace('e', '.0e', 1)
return self.represent_scalar('tag:yaml.org,2002:float', value)
def represent_list(self, data):
#pairs = (len(data) > 0 and isinstance(data, list))
#if pairs:
# for item in data:
# if not isinstance(item, tuple) or len(item) != 2:
# pairs = False
# break
#if not pairs:
return self.represent_sequence('tag:yaml.org,2002:seq', data)
#value = []
#for item_key, item_value in data:
# value.append(self.represent_mapping(u'tag:yaml.org,2002:map',
# [(item_key, item_value)]))
#return SequenceNode(u'tag:yaml.org,2002:pairs', value)
def represent_dict(self, data):
return self.represent_mapping('tag:yaml.org,2002:map', data)
def represent_set(self, data):
value = {}
for key in data:
value[key] = None
return self.represent_mapping('tag:yaml.org,2002:set', value)
def represent_date(self, data):
value = data.isoformat()
return self.represent_scalar('tag:yaml.org,2002:timestamp', value)
def represent_datetime(self, data):
value = data.isoformat(' ')
return self.represent_scalar('tag:yaml.org,2002:timestamp', value)
def represent_yaml_object(self, tag, data, cls, flow_style=None):
if hasattr(data, '__getstate__'):
state = data.__getstate__()
else:
state = data.__dict__.copy()
return self.represent_mapping(tag, state, flow_style=flow_style)
def represent_undefined(self, data):
raise RepresenterError("cannot represent an object", data)
SafeRepresenter.add_representer(type(None),
SafeRepresenter.represent_none)
SafeRepresenter.add_representer(str,
SafeRepresenter.represent_str)
SafeRepresenter.add_representer(bytes,
SafeRepresenter.represent_binary)
SafeRepresenter.add_representer(bool,
SafeRepresenter.represent_bool)
SafeRepresenter.add_representer(int,
SafeRepresenter.represent_int)
SafeRepresenter.add_representer(float,
SafeRepresenter.represent_float)
SafeRepresenter.add_representer(list,
SafeRepresenter.represent_list)
SafeRepresenter.add_representer(tuple,
SafeRepresenter.represent_list)
SafeRepresenter.add_representer(dict,
SafeRepresenter.represent_dict)
SafeRepresenter.add_representer(set,
SafeRepresenter.represent_set)
SafeRepresenter.add_representer(datetime.date,
SafeRepresenter.represent_date)
SafeRepresenter.add_representer(datetime.datetime,
SafeRepresenter.represent_datetime)
SafeRepresenter.add_representer(None,
SafeRepresenter.represent_undefined)
class Representer(SafeRepresenter):
def represent_complex(self, data):
if data.imag == 0.0:
data = '%r' % data.real
elif data.real == 0.0:
data = '%rj' % data.imag
elif data.imag > 0:
data = '%r+%rj' % (data.real, data.imag)
else:
data = '%r%rj' % (data.real, data.imag)
return self.represent_scalar('tag:yaml.org,2002:python/complex', data)
def represent_tuple(self, data):
return self.represent_sequence('tag:yaml.org,2002:python/tuple', data)
def represent_name(self, data):
name = '%s.%s' % (data.__module__, data.__name__)
return self.represent_scalar('tag:yaml.org,2002:python/name:'+name, '')
def represent_module(self, data):
return self.represent_scalar(
'tag:yaml.org,2002:python/module:'+data.__name__, '')
def represent_object(self, data):
# We use __reduce__ API to save the data. data.__reduce__ returns
# a tuple of length 2-5:
# (function, args, state, listitems, dictitems)
# For reconstructing, we calls function(*args), then set its state,
# listitems, and dictitems if they are not None.
# A special case is when function.__name__ == '__newobj__'. In this
# case we create the object with args[0].__new__(*args).
# Another special case is when __reduce__ returns a string - we don't
# support it.
# We produce a !!python/object, !!python/object/new or
# !!python/object/apply node.
cls = type(data)
if cls in copyreg.dispatch_table:
reduce = copyreg.dispatch_table[cls](data)
elif hasattr(data, '__reduce_ex__'):
reduce = data.__reduce_ex__(2)
elif hasattr(data, '__reduce__'):
reduce = data.__reduce__()
else:
raise RepresenterError("cannot represent an object", data)
reduce = (list(reduce)+[None]*5)[:5]
function, args, state, listitems, dictitems = reduce
args = list(args)
if state is None:
state = {}
if listitems is not None:
listitems = list(listitems)
if dictitems is not None:
dictitems = dict(dictitems)
if function.__name__ == '__newobj__':
function = args[0]
args = args[1:]
tag = 'tag:yaml.org,2002:python/object/new:'
newobj = True
else:
tag = 'tag:yaml.org,2002:python/object/apply:'
newobj = False
function_name = '%s.%s' % (function.__module__, function.__name__)
if not args and not listitems and not dictitems \
and isinstance(state, dict) and newobj:
return self.represent_mapping(
'tag:yaml.org,2002:python/object:'+function_name, state)
if not listitems and not dictitems \
and isinstance(state, dict) and not state:
return self.represent_sequence(tag+function_name, args)
value = {}
if args:
value['args'] = args
if state or not isinstance(state, dict):
value['state'] = state
if listitems:
value['listitems'] = listitems
if dictitems:
value['dictitems'] = dictitems
return self.represent_mapping(tag+function_name, value)
def represent_ordered_dict(self, data):
# Provide uniform representation across different Python versions.
data_type = type(data)
tag = 'tag:yaml.org,2002:python/object/apply:%s.%s' \
% (data_type.__module__, data_type.__name__)
items = [[key, value] for key, value in data.items()]
return self.represent_sequence(tag, [items])
Representer.add_representer(complex,
Representer.represent_complex)
Representer.add_representer(tuple,
Representer.represent_tuple)
Representer.add_representer(type,
Representer.represent_name)
Representer.add_representer(collections.OrderedDict,
Representer.represent_ordered_dict)
Representer.add_representer(types.FunctionType,
Representer.represent_name)
Representer.add_representer(types.BuiltinFunctionType,
Representer.represent_name)
Representer.add_representer(types.ModuleType,
Representer.represent_module)
Representer.add_multi_representer(object,
Representer.represent_object)
__all__ = ['BaseResolver', 'Resolver']
from .error import *
from .nodes import *
import re
class ResolverError(YAMLError):
pass
class BaseResolver:
DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str'
DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq'
DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map'
yaml_implicit_resolvers = {}
yaml_path_resolvers = {}
def __init__(self):
self.resolver_exact_paths = []
self.resolver_prefix_paths = []
@classmethod
def add_implicit_resolver(cls, tag, regexp, first):
if not 'yaml_implicit_resolvers' in cls.__dict__:
implicit_resolvers = {}
for key in cls.yaml_implicit_resolvers:
implicit_resolvers[key] = cls.yaml_implicit_resolvers[key][:]
cls.yaml_implicit_resolvers = implicit_resolvers
if first is None:
first = [None]
for ch in first:
cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp))
@classmethod
def add_path_resolver(cls, tag, path, kind=None):
# Note: `add_path_resolver` is experimental. The API could be changed.
# `new_path` is a pattern that is matched against the path from the
# root to the node that is being considered. `node_path` elements are
# tuples `(node_check, index_check)`. `node_check` is a node class:
# `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None`
# matches any kind of a node. `index_check` could be `None`, a boolean
# value, a string value, or a number. `None` and `False` match against
# any _value_ of sequence and mapping nodes. `True` matches against
# any _key_ of a mapping node. A string `index_check` matches against
# a mapping value that corresponds to a scalar key which content is
# equal to the `index_check` value. An integer `index_check` matches
# against a sequence value with the index equal to `index_check`.
if not 'yaml_path_resolvers' in cls.__dict__:
cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy()
new_path = []
for element in path:
if isinstance(element, (list, tuple)):
if len(element) == 2:
node_check, index_check = element
elif len(element) == 1:
node_check = element[0]
index_check = True
else:
raise ResolverError("Invalid path element: %s" % element)
else:
node_check = None
index_check = element
if node_check is str:
node_check = ScalarNode
elif node_check is list:
node_check = SequenceNode
elif node_check is dict:
node_check = MappingNode
elif node_check not in [ScalarNode, SequenceNode, MappingNode] \
and not isinstance(node_check, str) \
and node_check is not None:
raise ResolverError("Invalid node checker: %s" % node_check)
if not isinstance(index_check, (str, int)) \
and index_check is not None:
raise ResolverError("Invalid index checker: %s" % index_check)
new_path.append((node_check, index_check))
if kind is str:
kind = ScalarNode
elif kind is list:
kind = SequenceNode
elif kind is dict:
kind = MappingNode
elif kind not in [ScalarNode, SequenceNode, MappingNode] \
and kind is not None:
raise ResolverError("Invalid node kind: %s" % kind)
cls.yaml_path_resolvers[tuple(new_path), kind] = tag
def descend_resolver(self, current_node, current_index):
if not self.yaml_path_resolvers:
return
exact_paths = {}
prefix_paths = []
if current_node:
depth = len(self.resolver_prefix_paths)
for path, kind in self.resolver_prefix_paths[-1]:
if self.check_resolver_prefix(depth, path, kind,
current_node, current_index):
if len(path) > depth:
prefix_paths.append((path, kind))
else:
exact_paths[kind] = self.yaml_path_resolvers[path, kind]
else:
for path, kind in self.yaml_path_resolvers:
if not path:
exact_paths[kind] = self.yaml_path_resolvers[path, kind]
else:
prefix_paths.append((path, kind))
self.resolver_exact_paths.append(exact_paths)
self.resolver_prefix_paths.append(prefix_paths)
def ascend_resolver(self):
if not self.yaml_path_resolvers:
return
self.resolver_exact_paths.pop()
self.resolver_prefix_paths.pop()
def check_resolver_prefix(self, depth, path, kind,
current_node, current_index):
node_check, index_check = path[depth-1]
if isinstance(node_check, str):
if current_node.tag != node_check:
return
elif node_check is not None:
if not isinstance(current_node, node_check):
return
if index_check is True and current_index is not None:
return
if (index_check is False or index_check is None) \
and current_index is None:
return
if isinstance(index_check, str):
if not (isinstance(current_index, ScalarNode)
and index_check == current_index.value):
return
elif isinstance(index_check, int) and not isinstance(index_check, bool):
if index_check != current_index:
return
return True
def resolve(self, kind, value, implicit):
if kind is ScalarNode and implicit[0]:
if value == '':
resolvers = self.yaml_implicit_resolvers.get('', [])
else:
resolvers = self.yaml_implicit_resolvers.get(value[0], [])
resolvers += self.yaml_implicit_resolvers.get(None, [])
for tag, regexp in resolvers:
if regexp.match(value):
return tag
implicit = implicit[1]
if self.yaml_path_resolvers:
exact_paths = self.resolver_exact_paths[-1]
if kind in exact_paths:
return exact_paths[kind]
if None in exact_paths:
return exact_paths[None]
if kind is ScalarNode:
return self.DEFAULT_SCALAR_TAG
elif kind is SequenceNode:
return self.DEFAULT_SEQUENCE_TAG
elif kind is MappingNode:
return self.DEFAULT_MAPPING_TAG
class Resolver(BaseResolver):
pass
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:bool',
re.compile(r'''^(?:yes|Yes|YES|no|No|NO
|true|True|TRUE|false|False|FALSE
|on|On|ON|off|Off|OFF)$''', re.X),
list('yYnNtTfFoO'))
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:float',
re.compile(r'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)?
|\.[0-9_]+(?:[eE][-+][0-9]+)?
|[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]*
|[-+]?\.(?:inf|Inf|INF)
|\.(?:nan|NaN|NAN))$''', re.X),
list('-+0123456789.'))
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:int',
re.compile(r'''^(?:[-+]?0b[0-1_]+
|[-+]?0[0-7_]+
|[-+]?(?:0|[1-9][0-9_]*)
|[-+]?0x[0-9a-fA-F_]+
|[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X),
list('-+0123456789'))
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:merge',
re.compile(r'^(?:<<)$'),
['<'])
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:null',
re.compile(r'''^(?: ~
|null|Null|NULL
| )$''', re.X),
['~', 'n', 'N', ''])
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:timestamp',
re.compile(r'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]
|[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]?
(?:[Tt]|[ \t]+)[0-9][0-9]?
:[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)?
(?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X),
list('0123456789'))
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:value',
re.compile(r'^(?:=)$'),
['='])
# The following resolver is only for documentation purposes. It cannot work
# because plain scalars cannot start with '!', '&', or '*'.
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:yaml',
re.compile(r'^(?:!|&|\*)$'),
list('!&*'))
# Scanner produces tokens of the following types:
# STREAM-START
# STREAM-END
# DIRECTIVE(name, value)
# DOCUMENT-START
# DOCUMENT-END
# BLOCK-SEQUENCE-START
# BLOCK-MAPPING-START
# BLOCK-END
# FLOW-SEQUENCE-START
# FLOW-MAPPING-START
# FLOW-SEQUENCE-END
# FLOW-MAPPING-END
# BLOCK-ENTRY
# FLOW-ENTRY
# KEY
# VALUE
# ALIAS(value)
# ANCHOR(value)
# TAG(value)
# SCALAR(value, plain, style)
#
# Read comments in the Scanner code for more details.
#
__all__ = ['Scanner', 'ScannerError']
from .error import MarkedYAMLError
from .tokens import *
class ScannerError(MarkedYAMLError):
pass
class SimpleKey:
# See below simple keys treatment.
def __init__(self, token_number, required, index, line, column, mark):
self.token_number = token_number
self.required = required
self.index = index
self.line = line
self.column = column
self.mark = mark
class Scanner:
def __init__(self):
"""Initialize the scanner."""
# It is assumed that Scanner and Reader will have a common descendant.
# Reader do the dirty work of checking for BOM and converting the
# input data to Unicode. It also adds NUL to the end.
#
# Reader supports the following methods
# self.peek(i=0) # peek the next i-th character
# self.prefix(l=1) # peek the next l characters
# self.forward(l=1) # read the next l characters and move the pointer.
# Had we reached the end of the stream?
self.done = False
# The number of unclosed '{' and '['. `flow_level == 0` means block
# context.
self.flow_level = 0
# List of processed tokens that are not yet emitted.
self.tokens = []
# Add the STREAM-START token.
self.fetch_stream_start()
# Number of tokens that were emitted through the `get_token` method.
self.tokens_taken = 0
# The current indentation level.
self.indent = -1
# Past indentation levels.
self.indents = []
# Variables related to simple keys treatment.
# A simple key is a key that is not denoted by the '?' indicator.
# Example of simple keys:
# ---
# block simple key: value
# ? not a simple key:
# : { flow simple key: value }
# We emit the KEY token before all keys, so when we find a potential
# simple key, we try to locate the corresponding ':' indicator.
# Simple keys should be limited to a single line and 1024 characters.
# Can a simple key start at the current position? A simple key may
# start:
# - at the beginning of the line, not counting indentation spaces
# (in block context),
# - after '{', '[', ',' (in the flow context),
# - after '?', ':', '-' (in the block context).
# In the block context, this flag also signifies if a block collection
# may start at the current position.
self.allow_simple_key = True
# Keep track of possible simple keys. This is a dictionary. The key
# is `flow_level`; there can be no more that one possible simple key
# for each level. The value is a SimpleKey record:
# (token_number, required, index, line, column, mark)
# A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
# '[', or '{' tokens.
self.possible_simple_keys = {}
# Public methods.
def check_token(self, *choices):
# Check if the next token is one of the given types.
while self.need_more_tokens():
self.fetch_more_tokens()
if self.tokens:
if not choices:
return True
for choice in choices:
if isinstance(self.tokens[0], choice):
return True
return False
def peek_token(self):
# Return the next token, but do not delete if from the queue.
# Return None if no more tokens.
while self.need_more_tokens():
self.fetch_more_tokens()
if self.tokens:
return self.tokens[0]
else:
return None
def get_token(self):
# Return the next token.
while self.need_more_tokens():
self.fetch_more_tokens()
if self.tokens:
self.tokens_taken += 1
return self.tokens.pop(0)
# Private methods.
def need_more_tokens(self):
if self.done:
return False
if not self.tokens:
return True
# The current token may be a potential simple key, so we
# need to look further.
self.stale_possible_simple_keys()
if self.next_possible_simple_key() == self.tokens_taken:
return True
def fetch_more_tokens(self):
# Eat whitespaces and comments until we reach the next token.
self.scan_to_next_token()
# Remove obsolete possible simple keys.
self.stale_possible_simple_keys()
# Compare the current indentation and column. It may add some tokens
# and decrease the current indentation level.
self.unwind_indent(self.column)
# Peek the next character.
ch = self.peek()
# Is it the end of stream?
if ch == '\0':
return self.fetch_stream_end()
# Is it a directive?
if ch == '%' and self.check_directive():
return self.fetch_directive()
# Is it the document start?
if ch == '-' and self.check_document_start():
return self.fetch_document_start()
# Is it the document end?
if ch == '.' and self.check_document_end():
return self.fetch_document_end()
# TODO: support for BOM within a stream.
#if ch == '\uFEFF':
# return self.fetch_bom() <-- issue BOMToken
# Note: the order of the following checks is NOT significant.
# Is it the flow sequence start indicator?
if ch == '[':
return self.fetch_flow_sequence_start()
# Is it the flow mapping start indicator?
if ch == '{':
return self.fetch_flow_mapping_start()
# Is it the flow sequence end indicator?
if ch == ']':
return self.fetch_flow_sequence_end()
# Is it the flow mapping end indicator?
if ch == '}':
return self.fetch_flow_mapping_end()
# Is it the flow entry indicator?
if ch == ',':
return self.fetch_flow_entry()
# Is it the block entry indicator?
if ch == '-' and self.check_block_entry():
return self.fetch_block_entry()
# Is it the key indicator?
if ch == '?' and self.check_key():
return self.fetch_key()
# Is it the value indicator?
if ch == ':' and self.check_value():
return self.fetch_value()
# Is it an alias?
if ch == '*':
return self.fetch_alias()
# Is it an anchor?
if ch == '&':
return self.fetch_anchor()
# Is it a tag?
if ch == '!':
return self.fetch_tag()
# Is it a literal scalar?
if ch == '|' and not self.flow_level:
return self.fetch_literal()
# Is it a folded scalar?
if ch == '>' and not self.flow_level:
return self.fetch_folded()
# Is it a single quoted scalar?
if ch == '\'':
return self.fetch_single()
# Is it a double quoted scalar?
if ch == '\"':
return self.fetch_double()
# It must be a plain scalar then.
if self.check_plain():
return self.fetch_plain()
# No? It's an error. Let's produce a nice error message.
raise ScannerError("while scanning for the next token", None,
"found character %r that cannot start any token" % ch,
self.get_mark())
# Simple keys treatment.
def next_possible_simple_key(self):
# Return the number of the nearest possible simple key. Actually we
# don't need to loop through the whole dictionary. We may replace it
# with the following code:
# if not self.possible_simple_keys:
# return None
# return self.possible_simple_keys[
# min(self.possible_simple_keys.keys())].token_number
min_token_number = None
for level in self.possible_simple_keys:
key = self.possible_simple_keys[level]
if min_token_number is None or key.token_number < min_token_number:
min_token_number = key.token_number
return min_token_number
def stale_possible_simple_keys(self):
# Remove entries that are no longer possible simple keys. According to
# the YAML specification, simple keys
# - should be limited to a single line,
# - should be no longer than 1024 characters.
# Disabling this procedure will allow simple keys of any length and
# height (may cause problems if indentation is broken though).
for level in list(self.possible_simple_keys):
key = self.possible_simple_keys[level]
if key.line != self.line \
or self.index-key.index > 1024:
if key.required:
raise ScannerError("while scanning a simple key", key.mark,
"could not find expected ':'", self.get_mark())
del self.possible_simple_keys[level]
def save_possible_simple_key(self):
# The next token may start a simple key. We check if it's possible
# and save its position. This function is called for
# ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
# Check if a simple key is required at the current position.
required = not self.flow_level and self.indent == self.column
# The next token might be a simple key. Let's save it's number and
# position.
if self.allow_simple_key:
self.remove_possible_simple_key()
token_number = self.tokens_taken+len(self.tokens)
key = SimpleKey(token_number, required,
self.index, self.line, self.column, self.get_mark())
self.possible_simple_keys[self.flow_level] = key
def remove_possible_simple_key(self):
# Remove the saved possible key position at the current flow level.
if self.flow_level in self.possible_simple_keys:
key = self.possible_simple_keys[self.flow_level]
if key.required:
raise ScannerError("while scanning a simple key", key.mark,
"could not find expected ':'", self.get_mark())
del self.possible_simple_keys[self.flow_level]
# Indentation functions.
def unwind_indent(self, column):
## In flow context, tokens should respect indentation.
## Actually the condition should be `self.indent >= column` according to
## the spec. But this condition will prohibit intuitively correct
## constructions such as
## key : {
## }
#if self.flow_level and self.indent > column:
# raise ScannerError(None, None,
# "invalid indentation or unclosed '[' or '{'",
# self.get_mark())
# In the flow context, indentation is ignored. We make the scanner less
# restrictive then specification requires.
if self.flow_level:
return
# In block context, we may need to issue the BLOCK-END tokens.
while self.indent > column:
mark = self.get_mark()
self.indent = self.indents.pop()
self.tokens.append(BlockEndToken(mark, mark))
def add_indent(self, column):
# Check if we need to increase indentation.
if self.indent < column:
self.indents.append(self.indent)
self.indent = column
return True
return False
# Fetchers.
def fetch_stream_start(self):
# We always add STREAM-START as the first token and STREAM-END as the
# last token.
# Read the token.
mark = self.get_mark()
# Add STREAM-START.
self.tokens.append(StreamStartToken(mark, mark,
encoding=self.encoding))
def fetch_stream_end(self):
# Set the current indentation to -1.
self.unwind_indent(-1)
# Reset simple keys.
self.remove_possible_simple_key()
self.allow_simple_key = False
self.possible_simple_keys = {}
# Read the token.
mark = self.get_mark()
# Add STREAM-END.
self.tokens.append(StreamEndToken(mark, mark))
# The steam is finished.
self.done = True
def fetch_directive(self):
# Set the current indentation to -1.
self.unwind_indent(-1)
# Reset simple keys.
self.remove_possible_simple_key()
self.allow_simple_key = False
# Scan and add DIRECTIVE.
self.tokens.append(self.scan_directive())
def fetch_document_start(self):
self.fetch_document_indicator(DocumentStartToken)
def fetch_document_end(self):
self.fetch_document_indicator(DocumentEndToken)
def fetch_document_indicator(self, TokenClass):
# Set the current indentation to -1.
self.unwind_indent(-1)
# Reset simple keys. Note that there could not be a block collection
# after '---'.
self.remove_possible_simple_key()
self.allow_simple_key = False
# Add DOCUMENT-START or DOCUMENT-END.
start_mark = self.get_mark()
self.forward(3)
end_mark = self.get_mark()
self.tokens.append(TokenClass(start_mark, end_mark))
def fetch_flow_sequence_start(self):
self.fetch_flow_collection_start(FlowSequenceStartToken)
def fetch_flow_mapping_start(self):
self.fetch_flow_collection_start(FlowMappingStartToken)
def fetch_flow_collection_start(self, TokenClass):
# '[' and '{' may start a simple key.
self.save_possible_simple_key()
# Increase the flow level.
self.flow_level += 1
# Simple keys are allowed after '[' and '{'.
self.allow_simple_key = True
# Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
start_mark = self.get_mark()
self.forward()
end_mark = self.get_mark()
self.tokens.append(TokenClass(start_mark, end_mark))
def fetch_flow_sequence_end(self):
self.fetch_flow_collection_end(FlowSequenceEndToken)
def fetch_flow_mapping_end(self):
self.fetch_flow_collection_end(FlowMappingEndToken)
def fetch_flow_collection_end(self, TokenClass):
# Reset possible simple key on the current level.
self.remove_possible_simple_key()
# Decrease the flow level.
self.flow_level -= 1
# No simple keys after ']' or '}'.
self.allow_simple_key = False
# Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
start_mark = self.get_mark()
self.forward()
end_mark = self.get_mark()
self.tokens.append(TokenClass(start_mark, end_mark))
def fetch_flow_entry(self):
# Simple keys are allowed after ','.
self.allow_simple_key = True
# Reset possible simple key on the current level.
self.remove_possible_simple_key()
# Add FLOW-ENTRY.
start_mark = self.get_mark()
self.forward()
end_mark = self.get_mark()
self.tokens.append(FlowEntryToken(start_mark, end_mark))
def fetch_block_entry(self):
# Block context needs additional checks.
if not self.flow_level:
# Are we allowed to start a new entry?
if not self.allow_simple_key:
raise ScannerError(None, None,
"sequence entries are not allowed here",
self.get_mark())
# We may need to add BLOCK-SEQUENCE-START.
if self.add_indent(self.column):
mark = self.get_mark()
self.tokens.append(BlockSequenceStartToken(mark, mark))
# It's an error for the block entry to occur in the flow context,
# but we let the parser detect this.
else:
pass
# Simple keys are allowed after '-'.
self.allow_simple_key = True
# Reset possible simple key on the current level.
self.remove_possible_simple_key()
# Add BLOCK-ENTRY.
start_mark = self.get_mark()
self.forward()
end_mark = self.get_mark()
self.tokens.append(BlockEntryToken(start_mark, end_mark))
def fetch_key(self):
# Block context needs additional checks.
if not self.flow_level:
# Are we allowed to start a key (not necessary a simple)?
if not self.allow_simple_key:
raise ScannerError(None, None,
"mapping keys are not allowed here",
self.get_mark())
# We may need to add BLOCK-MAPPING-START.
if self.add_indent(self.column):
mark = self.get_mark()
self.tokens.append(BlockMappingStartToken(mark, mark))
# Simple keys are allowed after '?' in the block context.
self.allow_simple_key = not self.flow_level
# Reset possible simple key on the current level.
self.remove_possible_simple_key()
# Add KEY.
start_mark = self.get_mark()
self.forward()
end_mark = self.get_mark()
self.tokens.append(KeyToken(start_mark, end_mark))
def fetch_value(self):
# Do we determine a simple key?
if self.flow_level in self.possible_simple_keys:
# Add KEY.
key = self.possible_simple_keys[self.flow_level]
del self.possible_simple_keys[self.flow_level]
self.tokens.insert(key.token_number-self.tokens_taken,
KeyToken(key.mark, key.mark))
# If this key starts a new block mapping, we need to add
# BLOCK-MAPPING-START.
if not self.flow_level:
if self.add_indent(key.column):
self.tokens.insert(key.token_number-self.tokens_taken,
BlockMappingStartToken(key.mark, key.mark))
# There cannot be two simple keys one after another.
self.allow_simple_key = False
# It must be a part of a complex key.
else:
# Block context needs additional checks.
# (Do we really need them? They will be caught by the parser
# anyway.)
if not self.flow_level:
# We are allowed to start a complex value if and only if
# we can start a simple key.
if not self.allow_simple_key:
raise ScannerError(None, None,
"mapping values are not allowed here",
self.get_mark())
# If this value starts a new block mapping, we need to add
# BLOCK-MAPPING-START. It will be detected as an error later by
# the parser.
if not self.flow_level:
if self.add_indent(self.column):
mark = self.get_mark()
self.tokens.append(BlockMappingStartToken(mark, mark))
# Simple keys are allowed after ':' in the block context.
self.allow_simple_key = not self.flow_level
# Reset possible simple key on the current level.
self.remove_possible_simple_key()
# Add VALUE.
start_mark = self.get_mark()
self.forward()
end_mark = self.get_mark()
self.tokens.append(ValueToken(start_mark, end_mark))
def fetch_alias(self):
# ALIAS could be a simple key.
self.save_possible_simple_key()
# No simple keys after ALIAS.
self.allow_simple_key = False
# Scan and add ALIAS.
self.tokens.append(self.scan_anchor(AliasToken))
def fetch_anchor(self):
# ANCHOR could start a simple key.
self.save_possible_simple_key()
# No simple keys after ANCHOR.
self.allow_simple_key = False
# Scan and add ANCHOR.
self.tokens.append(self.scan_anchor(AnchorToken))
def fetch_tag(self):
# TAG could start a simple key.
self.save_possible_simple_key()
# No simple keys after TAG.
self.allow_simple_key = False
# Scan and add TAG.
self.tokens.append(self.scan_tag())
def fetch_literal(self):
self.fetch_block_scalar(style='|')
def fetch_folded(self):
self.fetch_block_scalar(style='>')
def fetch_block_scalar(self, style):
# A simple key may follow a block scalar.
self.allow_simple_key = True
# Reset possible simple key on the current level.
self.remove_possible_simple_key()
# Scan and add SCALAR.
self.tokens.append(self.scan_block_scalar(style))
def fetch_single(self):
self.fetch_flow_scalar(style='\'')
def fetch_double(self):
self.fetch_flow_scalar(style='"')
def fetch_flow_scalar(self, style):
# A flow scalar could be a simple key.
self.save_possible_simple_key()
# No simple keys after flow scalars.
self.allow_simple_key = False
# Scan and add SCALAR.
self.tokens.append(self.scan_flow_scalar(style))
def fetch_plain(self):
# A plain scalar could be a simple key.
self.save_possible_simple_key()
# No simple keys after plain scalars. But note that `scan_plain` will
# change this flag if the scan is finished at the beginning of the
# line.
self.allow_simple_key = False
# Scan and add SCALAR. May change `allow_simple_key`.
self.tokens.append(self.scan_plain())
# Checkers.
def check_directive(self):
# DIRECTIVE: ^ '%' ...
# The '%' indicator is already checked.
if self.column == 0:
return True
def check_document_start(self):
# DOCUMENT-START: ^ '---' (' '|'\n')
if self.column == 0:
if self.prefix(3) == '---' \
and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
return True
def check_document_end(self):
# DOCUMENT-END: ^ '...' (' '|'\n')
if self.column == 0:
if self.prefix(3) == '...' \
and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
return True
def check_block_entry(self):
# BLOCK-ENTRY: '-' (' '|'\n')
return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029'
def check_key(self):
# KEY(flow context): '?'
if self.flow_level:
return True
# KEY(block context): '?' (' '|'\n')
else:
return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029'
def check_value(self):
# VALUE(flow context): ':'
if self.flow_level:
return True
# VALUE(block context): ':' (' '|'\n')
else:
return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029'
def check_plain(self):
# A plain scalar may start with any non-space character except:
# '-', '?', ':', ',', '[', ']', '{', '}',
# '#', '&', '*', '!', '|', '>', '\'', '\"',
# '%', '@', '`'.
#
# It may also start with
# '-', '?', ':'
# if it is followed by a non-space character.
#
# Note that we limit the last rule to the block context (except the
# '-' character) because we want the flow context to be space
# independent.
ch = self.peek()
return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \
or (self.peek(1) not in '\0 \t\r\n\x85\u2028\u2029'
and (ch == '-' or (not self.flow_level and ch in '?:')))
# Scanners.
def scan_to_next_token(self):
# We ignore spaces, line breaks and comments.
# If we find a line break in the block context, we set the flag
# `allow_simple_key` on.
# The byte order mark is stripped if it's the first character in the
# stream. We do not yet support BOM inside the stream as the
# specification requires. Any such mark will be considered as a part
# of the document.
#
# TODO: We need to make tab handling rules more sane. A good rule is
# Tabs cannot precede tokens
# BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
# KEY(block), VALUE(block), BLOCK-ENTRY
# So the checking code is
# if <TAB>:
# self.allow_simple_keys = False
# We also need to add the check for `allow_simple_keys == True` to
# `unwind_indent` before issuing BLOCK-END.
# Scanners for block, flow, and plain scalars need to be modified.
if self.index == 0 and self.peek() == '\uFEFF':
self.forward()
found = False
while not found:
while self.peek() == ' ':
self.forward()
if self.peek() == '#':
while self.peek() not in '\0\r\n\x85\u2028\u2029':
self.forward()
if self.scan_line_break():
if not self.flow_level:
self.allow_simple_key = True
else:
found = True
def scan_directive(self):
# See the specification for details.
start_mark = self.get_mark()
self.forward()
name = self.scan_directive_name(start_mark)
value = None
if name == 'YAML':
value = self.scan_yaml_directive_value(start_mark)
end_mark = self.get_mark()
elif name == 'TAG':
value = self.scan_tag_directive_value(start_mark)
end_mark = self.get_mark()
else:
end_mark = self.get_mark()
while self.peek() not in '\0\r\n\x85\u2028\u2029':
self.forward()
self.scan_directive_ignored_line(start_mark)
return DirectiveToken(name, value, start_mark, end_mark)
def scan_directive_name(self, start_mark):
# See the specification for details.
length = 0
ch = self.peek(length)
while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-_':
length += 1
ch = self.peek(length)
if not length:
raise ScannerError("while scanning a directive", start_mark,
"expected alphabetic or numeric character, but found %r"
% ch, self.get_mark())
value = self.prefix(length)
self.forward(length)
ch = self.peek()
if ch not in '\0 \r\n\x85\u2028\u2029':
raise ScannerError("while scanning a directive", start_mark,
"expected alphabetic or numeric character, but found %r"
% ch, self.get_mark())
return value
def scan_yaml_directive_value(self, start_mark):
# See the specification for details.
while self.peek() == ' ':
self.forward()
major = self.scan_yaml_directive_number(start_mark)
if self.peek() != '.':
raise ScannerError("while scanning a directive", start_mark,
"expected a digit or '.', but found %r" % self.peek(),
self.get_mark())
self.forward()
minor = self.scan_yaml_directive_number(start_mark)
if self.peek() not in '\0 \r\n\x85\u2028\u2029':
raise ScannerError("while scanning a directive", start_mark,
"expected a digit or ' ', but found %r" % self.peek(),
self.get_mark())
return (major, minor)
def scan_yaml_directive_number(self, start_mark):
# See the specification for details.
ch = self.peek()
if not ('0' <= ch <= '9'):
raise ScannerError("while scanning a directive", start_mark,
"expected a digit, but found %r" % ch, self.get_mark())
length = 0
while '0' <= self.peek(length) <= '9':
length += 1
value = int(self.prefix(length))
self.forward(length)
return value
def scan_tag_directive_value(self, start_mark):
# See the specification for details.
while self.peek() == ' ':
self.forward()
handle = self.scan_tag_directive_handle(start_mark)
while self.peek() == ' ':
self.forward()
prefix = self.scan_tag_directive_prefix(start_mark)
return (handle, prefix)
def scan_tag_directive_handle(self, start_mark):
# See the specification for details.
value = self.scan_tag_handle('directive', start_mark)
ch = self.peek()
if ch != ' ':
raise ScannerError("while scanning a directive", start_mark,
"expected ' ', but found %r" % ch, self.get_mark())
return value
def scan_tag_directive_prefix(self, start_mark):
# See the specification for details.
value = self.scan_tag_uri('directive', start_mark)
ch = self.peek()
if ch not in '\0 \r\n\x85\u2028\u2029':
raise ScannerError("while scanning a directive", start_mark,
"expected ' ', but found %r" % ch, self.get_mark())
return value
def scan_directive_ignored_line(self, start_mark):
# See the specification for details.
while self.peek() == ' ':
self.forward()
if self.peek() == '#':
while self.peek() not in '\0\r\n\x85\u2028\u2029':
self.forward()
ch = self.peek()
if ch not in '\0\r\n\x85\u2028\u2029':
raise ScannerError("while scanning a directive", start_mark,
"expected a comment or a line break, but found %r"
% ch, self.get_mark())
self.scan_line_break()
def scan_anchor(self, TokenClass):
# The specification does not restrict characters for anchors and
# aliases. This may lead to problems, for instance, the document:
# [ *alias, value ]
# can be interpreted in two ways, as
# [ "value" ]
# and
# [ *alias , "value" ]
# Therefore we restrict aliases to numbers and ASCII letters.
start_mark = self.get_mark()
indicator = self.peek()
if indicator == '*':
name = 'alias'
else:
name = 'anchor'
self.forward()
length = 0
ch = self.peek(length)
while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-_':
length += 1
ch = self.peek(length)
if not length:
raise ScannerError("while scanning an %s" % name, start_mark,
"expected alphabetic or numeric character, but found %r"
% ch, self.get_mark())
value = self.prefix(length)
self.forward(length)
ch = self.peek()
if ch not in '\0 \t\r\n\x85\u2028\u2029?:,]}%@`':
raise ScannerError("while scanning an %s" % name, start_mark,
"expected alphabetic or numeric character, but found %r"
% ch, self.get_mark())
end_mark = self.get_mark()
return TokenClass(value, start_mark, end_mark)
def scan_tag(self):
# See the specification for details.
start_mark = self.get_mark()
ch = self.peek(1)
if ch == '<':
handle = None
self.forward(2)
suffix = self.scan_tag_uri('tag', start_mark)
if self.peek() != '>':
raise ScannerError("while parsing a tag", start_mark,
"expected '>', but found %r" % self.peek(),
self.get_mark())
self.forward()
elif ch in '\0 \t\r\n\x85\u2028\u2029':
handle = None
suffix = '!'
self.forward()
else:
length = 1
use_handle = False
while ch not in '\0 \r\n\x85\u2028\u2029':
if ch == '!':
use_handle = True
break
length += 1
ch = self.peek(length)
handle = '!'
if use_handle:
handle = self.scan_tag_handle('tag', start_mark)
else:
handle = '!'
self.forward()
suffix = self.scan_tag_uri('tag', start_mark)
ch = self.peek()
if ch not in '\0 \r\n\x85\u2028\u2029':
raise ScannerError("while scanning a tag", start_mark,
"expected ' ', but found %r" % ch, self.get_mark())
value = (handle, suffix)
end_mark = self.get_mark()
return TagToken(value, start_mark, end_mark)
def scan_block_scalar(self, style):
# See the specification for details.
if style == '>':
folded = True
else:
folded = False
chunks = []
start_mark = self.get_mark()
# Scan the header.
self.forward()
chomping, increment = self.scan_block_scalar_indicators(start_mark)
self.scan_block_scalar_ignored_line(start_mark)
# Determine the indentation level and go to the first non-empty line.
min_indent = self.indent+1
if min_indent < 1:
min_indent = 1
if increment is None:
breaks, max_indent, end_mark = self.scan_block_scalar_indentation()
indent = max(min_indent, max_indent)
else:
indent = min_indent+increment-1
breaks, end_mark = self.scan_block_scalar_breaks(indent)
line_break = ''
# Scan the inner part of the block scalar.
while self.column == indent and self.peek() != '\0':
chunks.extend(breaks)
leading_non_space = self.peek() not in ' \t'
length = 0
while self.peek(length) not in '\0\r\n\x85\u2028\u2029':
length += 1
chunks.append(self.prefix(length))
self.forward(length)
line_break = self.scan_line_break()
breaks, end_mark = self.scan_block_scalar_breaks(indent)
if self.column == indent and self.peek() != '\0':
# Unfortunately, folding rules are ambiguous.
#
# This is the folding according to the specification:
if folded and line_break == '\n' \
and leading_non_space and self.peek() not in ' \t':
if not breaks:
chunks.append(' ')
else:
chunks.append(line_break)
# This is Clark Evans's interpretation (also in the spec
# examples):
#
#if folded and line_break == '\n':
# if not breaks:
# if self.peek() not in ' \t':
# chunks.append(' ')
# else:
# chunks.append(line_break)
#else:
# chunks.append(line_break)
else:
break
# Chomp the tail.
if chomping is not False:
chunks.append(line_break)
if chomping is True:
chunks.extend(breaks)
# We are done.
return ScalarToken(''.join(chunks), False, start_mark, end_mark,
style)
def scan_block_scalar_indicators(self, start_mark):
# See the specification for details.
chomping = None
increment = None
ch = self.peek()
if ch in '+-':
if ch == '+':
chomping = True
else:
chomping = False
self.forward()
ch = self.peek()
if ch in '0123456789':
increment = int(ch)
if increment == 0:
raise ScannerError("while scanning a block scalar", start_mark,
"expected indentation indicator in the range 1-9, but found 0",
self.get_mark())
self.forward()
elif ch in '0123456789':
increment = int(ch)
if increment == 0:
raise ScannerError("while scanning a block scalar", start_mark,
"expected indentation indicator in the range 1-9, but found 0",
self.get_mark())
self.forward()
ch = self.peek()
if ch in '+-':
if ch == '+':
chomping = True
else:
chomping = False
self.forward()
ch = self.peek()
if ch not in '\0 \r\n\x85\u2028\u2029':
raise ScannerError("while scanning a block scalar", start_mark,
"expected chomping or indentation indicators, but found %r"
% ch, self.get_mark())
return chomping, increment
def scan_block_scalar_ignored_line(self, start_mark):
# See the specification for details.
while self.peek() == ' ':
self.forward()
if self.peek() == '#':
while self.peek() not in '\0\r\n\x85\u2028\u2029':
self.forward()
ch = self.peek()
if ch not in '\0\r\n\x85\u2028\u2029':
raise ScannerError("while scanning a block scalar", start_mark,
"expected a comment or a line break, but found %r" % ch,
self.get_mark())
self.scan_line_break()
def scan_block_scalar_indentation(self):
# See the specification for details.
chunks = []
max_indent = 0
end_mark = self.get_mark()
while self.peek() in ' \r\n\x85\u2028\u2029':
if self.peek() != ' ':
chunks.append(self.scan_line_break())
end_mark = self.get_mark()
else:
self.forward()
if self.column > max_indent:
max_indent = self.column
return chunks, max_indent, end_mark
def scan_block_scalar_breaks(self, indent):
# See the specification for details.
chunks = []
end_mark = self.get_mark()
while self.column < indent and self.peek() == ' ':
self.forward()
while self.peek() in '\r\n\x85\u2028\u2029':
chunks.append(self.scan_line_break())
end_mark = self.get_mark()
while self.column < indent and self.peek() == ' ':
self.forward()
return chunks, end_mark
def scan_flow_scalar(self, style):
# See the specification for details.
# Note that we loose indentation rules for quoted scalars. Quoted
# scalars don't need to adhere indentation because " and ' clearly
# mark the beginning and the end of them. Therefore we are less
# restrictive then the specification requires. We only need to check
# that document separators are not included in scalars.
if style == '"':
double = True
else:
double = False
chunks = []
start_mark = self.get_mark()
quote = self.peek()
self.forward()
chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
while self.peek() != quote:
chunks.extend(self.scan_flow_scalar_spaces(double, start_mark))
chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
self.forward()
end_mark = self.get_mark()
return ScalarToken(''.join(chunks), False, start_mark, end_mark,
style)
ESCAPE_REPLACEMENTS = {
'0': '\0',
'a': '\x07',
'b': '\x08',
't': '\x09',
'\t': '\x09',
'n': '\x0A',
'v': '\x0B',
'f': '\x0C',
'r': '\x0D',
'e': '\x1B',
' ': '\x20',
'\"': '\"',
'\\': '\\',
'/': '/',
'N': '\x85',
'_': '\xA0',
'L': '\u2028',
'P': '\u2029',
}
ESCAPE_CODES = {
'x': 2,
'u': 4,
'U': 8,
}
def scan_flow_scalar_non_spaces(self, double, start_mark):
# See the specification for details.
chunks = []
while True:
length = 0
while self.peek(length) not in '\'\"\\\0 \t\r\n\x85\u2028\u2029':
length += 1
if length:
chunks.append(self.prefix(length))
self.forward(length)
ch = self.peek()
if not double and ch == '\'' and self.peek(1) == '\'':
chunks.append('\'')
self.forward(2)
elif (double and ch == '\'') or (not double and ch in '\"\\'):
chunks.append(ch)
self.forward()
elif double and ch == '\\':
self.forward()
ch = self.peek()
if ch in self.ESCAPE_REPLACEMENTS:
chunks.append(self.ESCAPE_REPLACEMENTS[ch])
self.forward()
elif ch in self.ESCAPE_CODES:
length = self.ESCAPE_CODES[ch]
self.forward()
for k in range(length):
if self.peek(k) not in '0123456789ABCDEFabcdef':
raise ScannerError("while scanning a double-quoted scalar", start_mark,
"expected escape sequence of %d hexdecimal numbers, but found %r" %
(length, self.peek(k)), self.get_mark())
code = int(self.prefix(length), 16)
chunks.append(chr(code))
self.forward(length)
elif ch in '\r\n\x85\u2028\u2029':
self.scan_line_break()
chunks.extend(self.scan_flow_scalar_breaks(double, start_mark))
else:
raise ScannerError("while scanning a double-quoted scalar", start_mark,
"found unknown escape character %r" % ch, self.get_mark())
else:
return chunks
def scan_flow_scalar_spaces(self, double, start_mark):
# See the specification for details.
chunks = []
length = 0
while self.peek(length) in ' \t':
length += 1
whitespaces = self.prefix(length)
self.forward(length)
ch = self.peek()
if ch == '\0':
raise ScannerError("while scanning a quoted scalar", start_mark,
"found unexpected end of stream", self.get_mark())
elif ch in '\r\n\x85\u2028\u2029':
line_break = self.scan_line_break()
breaks = self.scan_flow_scalar_breaks(double, start_mark)
if line_break != '\n':
chunks.append(line_break)
elif not breaks:
chunks.append(' ')
chunks.extend(breaks)
else:
chunks.append(whitespaces)
return chunks
def scan_flow_scalar_breaks(self, double, start_mark):
# See the specification for details.
chunks = []
while True:
# Instead of checking indentation, we check for document
# separators.
prefix = self.prefix(3)
if (prefix == '---' or prefix == '...') \
and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
raise ScannerError("while scanning a quoted scalar", start_mark,
"found unexpected document separator", self.get_mark())
while self.peek() in ' \t':
self.forward()
if self.peek() in '\r\n\x85\u2028\u2029':
chunks.append(self.scan_line_break())
else:
return chunks
def scan_plain(self):
# See the specification for details.
# We add an additional restriction for the flow context:
# plain scalars in the flow context cannot contain ',' or '?'.
# We also keep track of the `allow_simple_key` flag here.
# Indentation rules are loosed for the flow context.
chunks = []
start_mark = self.get_mark()
end_mark = start_mark
indent = self.indent+1
# We allow zero indentation for scalars, but then we need to check for
# document separators at the beginning of the line.
#if indent == 0:
# indent = 1
spaces = []
while True:
length = 0
if self.peek() == '#':
break
while True:
ch = self.peek(length)
if ch in '\0 \t\r\n\x85\u2028\u2029' \
or (ch == ':' and
self.peek(length+1) in '\0 \t\r\n\x85\u2028\u2029'
+ (u',[]{}' if self.flow_level else u''))\
or (self.flow_level and ch in ',?[]{}'):
break
length += 1
if length == 0:
break
self.allow_simple_key = False
chunks.extend(spaces)
chunks.append(self.prefix(length))
self.forward(length)
end_mark = self.get_mark()
spaces = self.scan_plain_spaces(indent, start_mark)
if not spaces or self.peek() == '#' \
or (not self.flow_level and self.column < indent):
break
return ScalarToken(''.join(chunks), True, start_mark, end_mark)
def scan_plain_spaces(self, indent, start_mark):
# See the specification for details.
# The specification is really confusing about tabs in plain scalars.
# We just forbid them completely. Do not use tabs in YAML!
chunks = []
length = 0
while self.peek(length) in ' ':
length += 1
whitespaces = self.prefix(length)
self.forward(length)
ch = self.peek()
if ch in '\r\n\x85\u2028\u2029':
line_break = self.scan_line_break()
self.allow_simple_key = True
prefix = self.prefix(3)
if (prefix == '---' or prefix == '...') \
and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
return
breaks = []
while self.peek() in ' \r\n\x85\u2028\u2029':
if self.peek() == ' ':
self.forward()
else:
breaks.append(self.scan_line_break())
prefix = self.prefix(3)
if (prefix == '---' or prefix == '...') \
and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
return
if line_break != '\n':
chunks.append(line_break)
elif not breaks:
chunks.append(' ')
chunks.extend(breaks)
elif whitespaces:
chunks.append(whitespaces)
return chunks
def scan_tag_handle(self, name, start_mark):
# See the specification for details.
# For some strange reasons, the specification does not allow '_' in
# tag handles. I have allowed it anyway.
ch = self.peek()
if ch != '!':
raise ScannerError("while scanning a %s" % name, start_mark,
"expected '!', but found %r" % ch, self.get_mark())
length = 1
ch = self.peek(length)
if ch != ' ':
while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-_':
length += 1
ch = self.peek(length)
if ch != '!':
self.forward(length)
raise ScannerError("while scanning a %s" % name, start_mark,
"expected '!', but found %r" % ch, self.get_mark())
length += 1
value = self.prefix(length)
self.forward(length)
return value
def scan_tag_uri(self, name, start_mark):
# See the specification for details.
# Note: we do not check if URI is well-formed.
chunks = []
length = 0
ch = self.peek(length)
while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-;/?:@&=+$,_.!~*\'()[]%':
if ch == '%':
chunks.append(self.prefix(length))
self.forward(length)
length = 0
chunks.append(self.scan_uri_escapes(name, start_mark))
else:
length += 1
ch = self.peek(length)
if length:
chunks.append(self.prefix(length))
self.forward(length)
length = 0
if not chunks:
raise ScannerError("while parsing a %s" % name, start_mark,
"expected URI, but found %r" % ch, self.get_mark())
return ''.join(chunks)
def scan_uri_escapes(self, name, start_mark):
# See the specification for details.
codes = []
mark = self.get_mark()
while self.peek() == '%':
self.forward()
for k in range(2):
if self.peek(k) not in '0123456789ABCDEFabcdef':
raise ScannerError("while scanning a %s" % name, start_mark,
"expected URI escape sequence of 2 hexdecimal numbers, but found %r"
% self.peek(k), self.get_mark())
codes.append(int(self.prefix(2), 16))
self.forward(2)
try:
value = bytes(codes).decode('utf-8')
except UnicodeDecodeError as exc:
raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark)
return value
def scan_line_break(self):
# Transforms:
# '\r\n' : '\n'
# '\r' : '\n'
# '\n' : '\n'
# '\x85' : '\n'
# '\u2028' : '\u2028'
# '\u2029 : '\u2029'
# default : ''
ch = self.peek()
if ch in '\r\n\x85':
if self.prefix(2) == '\r\n':
self.forward(2)
else:
self.forward()
return '\n'
elif ch in '\u2028\u2029':
self.forward()
return ch
return ''
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment