| Adam Israel | 0cd1c02 | 2019-09-03 18:26:08 -0400 | [diff] [blame] | 1 | # Copyright 2019 Canonical Ltd. |
| 2 | # |
| 3 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | # you may not use this file except in compliance with the License. |
| 5 | # You may obtain a copy of the License at |
| 6 | # |
| 7 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | # |
| 9 | # Unless required by applicable law or agreed to in writing, software |
| 10 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | # See the License for the specific language governing permissions and |
| 13 | # limitations under the License. |
| 14 | import logging |
| 15 | import os |
| 16 | import re |
| 17 | import shlex |
| 18 | import tempfile |
| 19 | import time |
| 20 | import uuid |
| 21 | from subprocess import CalledProcessError |
| 22 | |
| 23 | import paramiko |
| 24 | import n2vc.exceptions |
| 25 | |
| 26 | from juju.client import client |
| 27 | |
| 28 | arches = [ |
| 29 | [re.compile(r"amd64|x86_64"), "amd64"], |
| 30 | [re.compile(r"i?[3-9]86"), "i386"], |
| 31 | [re.compile(r"(arm$)|(armv.*)"), "armhf"], |
| 32 | [re.compile(r"aarch64"), "arm64"], |
| 33 | [re.compile(r"ppc64|ppc64el|ppc64le"), "ppc64el"], |
| 34 | [re.compile(r"s390x?"), "s390x"], |
| 35 | |
| 36 | ] |
| 37 | |
| 38 | |
| 39 | def normalize_arch(rawArch): |
| 40 | """Normalize the architecture string.""" |
| 41 | for arch in arches: |
| 42 | if arch[0].match(rawArch): |
| 43 | return arch[1] |
| 44 | |
| 45 | |
| 46 | DETECTION_SCRIPT = """#!/bin/bash |
| 47 | set -e |
| 48 | os_id=$(grep '^ID=' /etc/os-release | tr -d '"' | cut -d= -f2) |
| 49 | if [ "$os_id" = 'centos' ]; then |
| 50 | os_version=$(grep '^VERSION_ID=' /etc/os-release | tr -d '"' | cut -d= -f2) |
| 51 | echo "centos$os_version" |
| 52 | else |
| 53 | lsb_release -cs |
| 54 | fi |
| 55 | uname -m |
| 56 | grep MemTotal /proc/meminfo |
| 57 | cat /proc/cpuinfo |
| 58 | """ |
| 59 | |
| 60 | INITIALIZE_UBUNTU_SCRIPT = """set -e |
| 61 | (id ubuntu &> /dev/null) || useradd -m ubuntu -s /bin/bash |
| 62 | umask 0077 |
| 63 | temp=$(mktemp) |
| 64 | echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' > $temp |
| 65 | install -m 0440 $temp /etc/sudoers.d/90-juju-ubuntu |
| 66 | rm $temp |
| 67 | su ubuntu -c 'install -D -m 0600 /dev/null ~/.ssh/authorized_keys' |
| 68 | export authorized_keys="{}" |
| 69 | if [ ! -z "$authorized_keys" ]; then |
| 70 | su ubuntu -c 'echo $authorized_keys >> ~/.ssh/authorized_keys' |
| 71 | fi |
| 72 | """ |
| 73 | |
| 74 | IPTABLES_SCRIPT = """#!/bin/bash |
| 75 | set -e |
| Adam Israel | e8e6de1 | 2019-10-07 17:01:16 -0400 | [diff] [blame] | 76 | apt-get update |
| Adam Israel | 0cd1c02 | 2019-09-03 18:26:08 -0400 | [diff] [blame] | 77 | DEBIAN_FRONTEND=noninteractive apt-get install -yqq iptables-persistent |
| 78 | iptables -t nat -A OUTPUT -p tcp -d {} -j DNAT --to-destination {} |
| 79 | netfilter-persistent save |
| 80 | """ |
| 81 | |
| 82 | class SSHProvisioner: |
| 83 | """Provision a manually created machine via SSH.""" |
| 84 | |
| 85 | def __init__(self, user, host, private_key_path, log=None): |
| 86 | |
| 87 | self.host = host |
| 88 | self.user = user |
| 89 | self.private_key_path = private_key_path |
| 90 | |
| 91 | if log: |
| 92 | self.log = log |
| 93 | else: |
| 94 | self.log = logging.getLogger(__name__) |
| 95 | |
| 96 | def _get_ssh_client(self, host=None, user=None, private_key_path=None): |
| 97 | """Return a connected Paramiko ssh object. |
| 98 | |
| 99 | :param str host: The host to connect to. |
| 100 | :param str user: The user to connect as. |
| 101 | :param str key: The private key to authenticate with. |
| 102 | |
| 103 | :return: object: A paramiko.SSHClient |
| 104 | :raises: :class:`paramiko.ssh_exception.SSHException` if the |
| 105 | connection failed |
| 106 | """ |
| 107 | |
| 108 | if not host: |
| 109 | host = self.host |
| 110 | |
| 111 | if not user: |
| 112 | user = self.user |
| 113 | |
| 114 | if not private_key_path: |
| 115 | private_key_path = self.private_key_path |
| 116 | |
| 117 | ssh = paramiko.SSHClient() |
| 118 | ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) |
| 119 | |
| 120 | pkey = None |
| 121 | |
| 122 | # Read the private key into a paramiko.RSAKey |
| 123 | if os.path.exists(private_key_path): |
| 124 | with open(private_key_path, 'r') as f: |
| 125 | pkey = paramiko.RSAKey.from_private_key(f) |
| 126 | |
| 127 | ####################################################################### |
| 128 | # There is a bug in some versions of OpenSSH 4.3 (CentOS/RHEL5) where # |
| 129 | # the server may not send the SSH_MSG_USERAUTH_BANNER message except # |
| 130 | # when responding to an auth_none request. For example, paramiko will # |
| 131 | # attempt to use password authentication when a password is set, but # |
| 132 | # the server could deny that, instead requesting keyboard-interactive.# |
| 133 | # The hack to workaround this is to attempt a reconnect, which will # |
| 134 | # receive the right banner, and authentication can proceed. See the # |
| 135 | # following for more info: # |
| 136 | # https://github.com/paramiko/paramiko/issues/432 # |
| 137 | # https://github.com/paramiko/paramiko/pull/438 # |
| 138 | ####################################################################### |
| 139 | |
| 140 | retry = 10 |
| 141 | attempts = 0 |
| 142 | delay = 15 |
| 143 | while attempts <= retry: |
| 144 | try: |
| 145 | attempts += 1 |
| 146 | |
| 147 | # Attempt to establish a SSH connection |
| 148 | ssh.connect( |
| 149 | host, |
| 150 | port=22, |
| 151 | username=user, |
| 152 | pkey=pkey, |
| 153 | # allow_agent=False, |
| 154 | # look_for_keys=False, |
| 155 | ) |
| 156 | break |
| 157 | except paramiko.ssh_exception.SSHException as e: |
| 158 | if 'Error reading SSH protocol banner' == str(e): |
| 159 | # Once more, with feeling |
| 160 | ssh.connect(host, port=22, username=user, pkey=pkey) |
| 161 | else: |
| 162 | # Reraise the original exception |
| 163 | self.log.debug("Unhandled exception caught: {}".format(e)) |
| 164 | raise e |
| 165 | except Exception as e: |
| 166 | if 'Unable to connect to port' in str(e): |
| 167 | self.log.debug("Waiting for VM to boot, sleeping {} seconds".format(delay)) |
| 168 | if attempts > retry: |
| 169 | raise e |
| 170 | else: |
| 171 | time.sleep(delay) |
| 172 | # Slowly back off the retry |
| 173 | delay += 15 |
| 174 | else: |
| 175 | self.log.debug(e) |
| 176 | raise e |
| 177 | return ssh |
| 178 | |
| 179 | def _run_command(self, ssh, cmd, pty=True): |
| 180 | """Run a command remotely via SSH. |
| 181 | |
| 182 | :param object ssh: The SSHClient |
| 183 | :param str cmd: The command to execute |
| 184 | :param list cmd: The `shlex.split` command to execute |
| 185 | :param bool pty: Whether to allocate a pty |
| 186 | |
| 187 | :return: tuple: The stdout and stderr of the command execution |
| 188 | :raises: :class:`CalledProcessError` if the command fails |
| 189 | """ |
| 190 | |
| 191 | if isinstance(cmd, str): |
| 192 | cmd = shlex.split(cmd) |
| 193 | |
| 194 | if type(cmd) is not list: |
| 195 | cmd = [cmd] |
| 196 | |
| 197 | cmds = ' '.join(cmd) |
| 198 | stdin, stdout, stderr = ssh.exec_command(cmds, get_pty=pty) |
| 199 | retcode = stdout.channel.recv_exit_status() |
| 200 | |
| 201 | if retcode > 0: |
| 202 | output = stderr.read().strip() |
| 203 | raise CalledProcessError(returncode=retcode, cmd=cmd, |
| 204 | output=output) |
| 205 | return ( |
| 206 | stdout.read().decode('utf-8').strip(), |
| 207 | stderr.read().decode('utf-8').strip() |
| 208 | ) |
| 209 | |
| 210 | def _init_ubuntu_user(self): |
| 211 | """Initialize the ubuntu user. |
| 212 | |
| 213 | :return: bool: If the initialization was successful |
| 214 | :raises: :class:`paramiko.ssh_exception.AuthenticationException` |
| 215 | if the authentication fails |
| 216 | """ |
| 217 | ssh = None |
| 218 | try: |
| 219 | # Run w/o allocating a pty, so we fail if sudo prompts for a passwd |
| 220 | ssh = self._get_ssh_client() |
| 221 | stdout, stderr = self._run_command(ssh, "sudo -n true", pty=False) |
| 222 | except paramiko.ssh_exception.AuthenticationException: |
| 223 | raise n2vc.exceptions.AuthenticationFailed(self.user) |
| 224 | except paramiko.ssh_exception.NoValidConnectionsError: |
| 225 | raise n2vc.exceptions.NoRouteToHost(self.host) |
| 226 | finally: |
| 227 | if ssh: |
| 228 | ssh.close() |
| 229 | |
| 230 | # Infer the public key |
| 231 | public_key = None |
| 232 | public_key_path = "{}.pub".format(self.private_key_path) |
| 233 | |
| 234 | if not os.path.exists(public_key_path): |
| 235 | raise FileNotFoundError( |
| 236 | "Public key '{}' doesn't exist.".format(public_key_path) |
| 237 | ) |
| 238 | |
| 239 | with open(public_key_path, "r") as f: |
| 240 | public_key = f.readline() |
| 241 | |
| 242 | script = INITIALIZE_UBUNTU_SCRIPT.format(public_key) |
| 243 | |
| 244 | try: |
| 245 | ssh = self._get_ssh_client() |
| 246 | |
| 247 | self._run_command( |
| 248 | ssh, |
| 249 | ["sudo", "/bin/bash -c " + shlex.quote(script)], |
| 250 | pty=True |
| 251 | ) |
| 252 | except paramiko.ssh_exception.AuthenticationException as e: |
| 253 | raise e |
| 254 | finally: |
| 255 | ssh.close() |
| 256 | |
| 257 | return True |
| 258 | |
| 259 | def _detect_hardware_and_os(self, ssh): |
| 260 | """Detect the target hardware capabilities and OS series. |
| 261 | |
| 262 | :param object ssh: The SSHClient |
| 263 | :return: str: A raw string containing OS and hardware information. |
| 264 | """ |
| 265 | |
| 266 | info = { |
| 267 | 'series': '', |
| 268 | 'arch': '', |
| 269 | 'cpu-cores': '', |
| 270 | 'mem': '', |
| 271 | } |
| 272 | |
| 273 | stdout, stderr = self._run_command( |
| 274 | ssh, |
| 275 | ["sudo", "/bin/bash -c " + shlex.quote(DETECTION_SCRIPT)], |
| 276 | pty=True, |
| 277 | ) |
| 278 | |
| 279 | lines = stdout.split("\n") |
| 280 | |
| 281 | # Remove extraneous line if DNS resolution of hostname famils |
| 282 | # i.e. sudo: unable to resolve host test-1-mgmtvm-1: Connection timed out |
| 283 | if 'unable to resolve host' in lines[0]: |
| 284 | lines = lines[1:] |
| 285 | |
| 286 | info['series'] = lines[0].strip() |
| 287 | info['arch'] = normalize_arch(lines[1].strip()) |
| 288 | |
| 289 | memKb = re.split(r'\s+', lines[2])[1] |
| 290 | |
| 291 | # Convert megabytes -> kilobytes |
| 292 | info['mem'] = round(int(memKb) / 1024) |
| 293 | |
| 294 | # Detect available CPUs |
| 295 | recorded = {} |
| 296 | for line in lines[3:]: |
| 297 | physical_id = "" |
| 298 | |
| 299 | if line.find("physical id") == 0: |
| 300 | physical_id = line.split(":")[1].strip() |
| 301 | elif line.find("cpu cores") == 0: |
| 302 | cores = line.split(":")[1].strip() |
| 303 | |
| 304 | if physical_id not in recorded.keys(): |
| 305 | info['cpu-cores'] += cores |
| 306 | recorded[physical_id] = True |
| 307 | |
| 308 | return info |
| 309 | |
| 310 | def provision_machine(self): |
| 311 | """Perform the initial provisioning of the target machine. |
| 312 | |
| 313 | :return: bool: The client.AddMachineParams |
| 314 | :raises: :class:`paramiko.ssh_exception.AuthenticationException` |
| 315 | if the upload fails |
| 316 | """ |
| 317 | params = client.AddMachineParams() |
| 318 | |
| 319 | if self._init_ubuntu_user(): |
| 320 | try: |
| 321 | ssh = self._get_ssh_client() |
| 322 | |
| 323 | hw = self._detect_hardware_and_os(ssh) |
| 324 | params.series = hw['series'] |
| 325 | params.instance_id = "manual:{}".format(self.host) |
| 326 | params.nonce = "manual:{}:{}".format( |
| 327 | self.host, |
| 328 | str(uuid.uuid4()), # a nop for Juju w/manual machines |
| 329 | ) |
| 330 | params.hardware_characteristics = { |
| 331 | 'arch': hw['arch'], |
| 332 | 'mem': int(hw['mem']), |
| 333 | 'cpu-cores': int(hw['cpu-cores']), |
| 334 | } |
| 335 | params.addresses = [{ |
| 336 | 'value': self.host, |
| 337 | 'type': 'ipv4', |
| 338 | 'scope': 'public', |
| 339 | }] |
| 340 | |
| 341 | except paramiko.ssh_exception.AuthenticationException as e: |
| 342 | raise e |
| 343 | finally: |
| 344 | ssh.close() |
| 345 | |
| 346 | return params |
| 347 | |
| 348 | async def install_agent(self, connection, nonce, machine_id, api): |
| 349 | """ |
| 350 | :param object connection: Connection to Juju API |
| 351 | :param str nonce: The nonce machine specification |
| 352 | :param str machine_id: The id assigned to the machine |
| 353 | |
| 354 | :return: bool: If the initialization was successful |
| 355 | """ |
| 356 | # The path where the Juju agent should be installed. |
| 357 | data_dir = "/var/lib/juju" |
| 358 | |
| 359 | # Disabling this prevents `apt-get update` from running initially, so |
| 360 | # charms will fail to deploy |
| 361 | disable_package_commands = False |
| 362 | |
| 363 | client_facade = client.ClientFacade.from_connection(connection) |
| 364 | results = await client_facade.ProvisioningScript( |
| 365 | data_dir=data_dir, |
| 366 | disable_package_commands=disable_package_commands, |
| 367 | machine_id=machine_id, |
| 368 | nonce=nonce, |
| 369 | ) |
| 370 | |
| 371 | """Get the IP of the controller |
| 372 | |
| 373 | Parse the provisioning script, looking for the first apiaddress. |
| 374 | |
| 375 | Example: |
| 376 | apiaddresses: |
| 377 | - 10.195.8.2:17070 |
| 378 | - 127.0.0.1:17070 |
| 379 | - '[::1]:17070' |
| 380 | """ |
| 381 | m = re.search('apiaddresses:\n- (\d+\.\d+\.\d+\.\d+):17070', results.script) |
| 382 | apiaddress = m.group(1) |
| 383 | |
| 384 | """Add IP Table rule |
| 385 | |
| 386 | In order to route the traffic to the private ip of the Juju controller |
| 387 | we use a DNAT rule to tell the machine that the destination for the |
| 388 | private address is the public address of the machine where the Juju |
| 389 | controller is running in LXD. That machine will have a complimentary |
| 390 | iptables rule, routing traffic to the appropriate LXD container. |
| 391 | """ |
| 392 | |
| 393 | script = IPTABLES_SCRIPT.format(apiaddress, api) |
| 394 | |
| 395 | # Run this in a retry loop, because dpkg may be running and cause the |
| 396 | # script to fail. |
| 397 | retry = 10 |
| 398 | attempts = 0 |
| 399 | delay = 15 |
| Adam Israel | e8e6de1 | 2019-10-07 17:01:16 -0400 | [diff] [blame] | 400 | |
| Adam Israel | 0cd1c02 | 2019-09-03 18:26:08 -0400 | [diff] [blame] | 401 | while attempts <= retry: |
| 402 | try: |
| 403 | attempts += 1 |
| 404 | |
| 405 | self._run_configure_script(script) |
| 406 | break |
| 407 | except Exception as e: |
| 408 | self.log.debug("Waiting for dpkg, sleeping {} seconds".format(delay)) |
| 409 | if attempts > retry: |
| 410 | raise e |
| 411 | else: |
| 412 | time.sleep(delay) |
| 413 | # Slowly back off the retry |
| 414 | delay += 15 |
| 415 | |
| 416 | # self.log.debug("Running configure script") |
| 417 | self._run_configure_script(results.script) |
| 418 | # self.log.debug("Configure script finished") |
| 419 | |
| 420 | |
| 421 | |
| 422 | def _run_configure_script(self, script: str): |
| 423 | """Run the script to install the Juju agent on the target machine. |
| 424 | |
| 425 | :param str script: The script returned by the ProvisioningScript API |
| 426 | :raises: :class:`paramiko.ssh_exception.AuthenticationException` |
| 427 | if the upload fails |
| 428 | """ |
| 429 | _, tmpFile = tempfile.mkstemp() |
| 430 | with open(tmpFile, 'w') as f: |
| 431 | f.write(script) |
| 432 | try: |
| 433 | # get ssh client |
| 434 | ssh = self._get_ssh_client( |
| 435 | user="ubuntu", |
| 436 | ) |
| 437 | |
| 438 | # copy the local copy of the script to the remote machine |
| 439 | sftp = paramiko.SFTPClient.from_transport(ssh.get_transport()) |
| 440 | sftp.put( |
| 441 | tmpFile, |
| 442 | tmpFile, |
| 443 | ) |
| 444 | |
| 445 | # run the provisioning script |
| 446 | stdout, stderr = self._run_command( |
| 447 | ssh, |
| 448 | "sudo /bin/bash {}".format(tmpFile), |
| 449 | ) |
| 450 | |
| 451 | except paramiko.ssh_exception.AuthenticationException as e: |
| 452 | raise e |
| 453 | finally: |
| 454 | os.remove(tmpFile) |
| 455 | ssh.close() |