Enable lint, flake8 and unit tests
[osm/N2VC.git] / n2vc / provisioner.py
1 # Copyright 2019 Canonical Ltd.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 import logging
15 import os
16 import re
17 import shlex
18 from subprocess import CalledProcessError
19 import tempfile
20 import time
21 import uuid
22
23 from juju.client import client
24 import n2vc.exceptions
25 import paramiko
26
27
28 arches = [
29 [re.compile(r"amd64|x86_64"), "amd64"],
30 [re.compile(r"i?[3-9]86"), "i386"],
31 [re.compile(r"(arm$)|(armv.*)"), "armhf"],
32 [re.compile(r"aarch64"), "arm64"],
33 [re.compile(r"ppc64|ppc64el|ppc64le"), "ppc64el"],
34 [re.compile(r"s390x?"), "s390x"],
35 ]
36
37
38 def normalize_arch(rawArch):
39 """Normalize the architecture string."""
40 for arch in arches:
41 if arch[0].match(rawArch):
42 return arch[1]
43
44
45 DETECTION_SCRIPT = """#!/bin/bash
46 set -e
47 os_id=$(grep '^ID=' /etc/os-release | tr -d '"' | cut -d= -f2)
48 if [ "$os_id" = 'centos' ]; then
49 os_version=$(grep '^VERSION_ID=' /etc/os-release | tr -d '"' | cut -d= -f2)
50 echo "centos$os_version"
51 else
52 lsb_release -cs
53 fi
54 uname -m
55 grep MemTotal /proc/meminfo
56 cat /proc/cpuinfo
57 """
58
59 INITIALIZE_UBUNTU_SCRIPT = """set -e
60 (id ubuntu &> /dev/null) || useradd -m ubuntu -s /bin/bash
61 umask 0077
62 temp=$(mktemp)
63 echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' > $temp
64 install -m 0440 $temp /etc/sudoers.d/90-juju-ubuntu
65 rm $temp
66 su ubuntu -c 'install -D -m 0600 /dev/null ~/.ssh/authorized_keys'
67 export authorized_keys="{}"
68 if [ ! -z "$authorized_keys" ]; then
69 su ubuntu -c 'echo $authorized_keys >> ~/.ssh/authorized_keys'
70 fi
71 """
72
73 IPTABLES_SCRIPT = """#!/bin/bash
74 set -e
75 apt-get update
76 DEBIAN_FRONTEND=noninteractive apt-get install -yqq iptables-persistent
77 iptables -t nat -A OUTPUT -p tcp -d {} -j DNAT --to-destination {}
78 netfilter-persistent save
79 """
80
81
82 class SSHProvisioner:
83 """Provision a manually created machine via SSH."""
84
85 def __init__(self, user, host, private_key_path, log=None):
86
87 self.host = host
88 self.user = user
89 self.private_key_path = private_key_path
90
91 if log:
92 self.log = log
93 else:
94 self.log = logging.getLogger(__name__)
95
96 def _get_ssh_client(self, host=None, user=None, private_key_path=None):
97 """Return a connected Paramiko ssh object.
98
99 :param str host: The host to connect to.
100 :param str user: The user to connect as.
101 :param str key: The private key to authenticate with.
102
103 :return: object: A paramiko.SSHClient
104 :raises: :class:`paramiko.ssh_exception.SSHException` if the
105 connection failed
106 """
107
108 if not host:
109 host = self.host
110
111 if not user:
112 user = self.user
113
114 if not private_key_path:
115 private_key_path = self.private_key_path
116
117 ssh = paramiko.SSHClient()
118 ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
119
120 pkey = None
121
122 # Read the private key into a paramiko.RSAKey
123 if os.path.exists(private_key_path):
124 with open(private_key_path, "r") as f:
125 pkey = paramiko.RSAKey.from_private_key(f)
126
127 #######################################################################
128 # There is a bug in some versions of OpenSSH 4.3 (CentOS/RHEL5) where #
129 # the server may not send the SSH_MSG_USERAUTH_BANNER message except #
130 # when responding to an auth_none request. For example, paramiko will #
131 # attempt to use password authentication when a password is set, but #
132 # the server could deny that, instead requesting keyboard-interactive.#
133 # The hack to workaround this is to attempt a reconnect, which will #
134 # receive the right banner, and authentication can proceed. See the #
135 # following for more info: #
136 # https://github.com/paramiko/paramiko/issues/432 #
137 # https://github.com/paramiko/paramiko/pull/438 #
138 #######################################################################
139
140 retry = 10
141 attempts = 0
142 delay = 15
143 while attempts <= retry:
144 try:
145 attempts += 1
146
147 # Attempt to establish a SSH connection
148 ssh.connect(
149 host,
150 port=22,
151 username=user,
152 pkey=pkey,
153 # allow_agent=False,
154 # look_for_keys=False,
155 )
156 break
157 except paramiko.ssh_exception.SSHException as e:
158 if "Error reading SSH protocol banner" == str(e):
159 # Once more, with feeling
160 ssh.connect(host, port=22, username=user, pkey=pkey)
161 else:
162 # Reraise the original exception
163 self.log.debug("Unhandled exception caught: {}".format(e))
164 raise e
165 except Exception as e:
166 if "Unable to connect to port" in str(e):
167 self.log.debug(
168 "Waiting for VM to boot, sleeping {} seconds".format(delay)
169 )
170 if attempts > retry:
171 raise e
172 else:
173 time.sleep(delay)
174 # Slowly back off the retry
175 delay += 15
176 else:
177 self.log.debug(e)
178 raise e
179 return ssh
180
181 def _run_command(self, ssh, cmd, pty=True):
182 """Run a command remotely via SSH.
183
184 :param object ssh: The SSHClient
185 :param str cmd: The command to execute
186 :param list cmd: The `shlex.split` command to execute
187 :param bool pty: Whether to allocate a pty
188
189 :return: tuple: The stdout and stderr of the command execution
190 :raises: :class:`CalledProcessError` if the command fails
191 """
192
193 if isinstance(cmd, str):
194 cmd = shlex.split(cmd)
195
196 if type(cmd) is not list:
197 cmd = [cmd]
198
199 cmds = " ".join(cmd)
200 _, stdout, stderr = ssh.exec_command(cmds, get_pty=pty)
201 retcode = stdout.channel.recv_exit_status()
202
203 if retcode > 0:
204 output = stderr.read().strip()
205 raise CalledProcessError(returncode=retcode, cmd=cmd, output=output)
206 return (
207 stdout.read().decode("utf-8").strip(),
208 stderr.read().decode("utf-8").strip(),
209 )
210
211 def _init_ubuntu_user(self):
212 """Initialize the ubuntu user.
213
214 :return: bool: If the initialization was successful
215 :raises: :class:`paramiko.ssh_exception.AuthenticationException`
216 if the authentication fails
217 """
218 ssh = None
219 try:
220 # Run w/o allocating a pty, so we fail if sudo prompts for a passwd
221 ssh = self._get_ssh_client()
222 self._run_command(ssh, "sudo -n true", pty=False)
223 except paramiko.ssh_exception.AuthenticationException:
224 raise n2vc.exceptions.AuthenticationFailed(self.user)
225 except paramiko.ssh_exception.NoValidConnectionsError:
226 raise n2vc.exceptions.NoRouteToHost(self.host)
227 finally:
228 if ssh:
229 ssh.close()
230
231 # Infer the public key
232 public_key_path = "{}.pub".format(self.private_key_path)
233
234 if not os.path.exists(public_key_path):
235 raise FileNotFoundError(
236 "Public key '{}' doesn't exist.".format(public_key_path)
237 )
238
239 with open(public_key_path, "r") as f:
240 public_key = f.readline()
241
242 script = INITIALIZE_UBUNTU_SCRIPT.format(public_key)
243
244 try:
245 ssh = self._get_ssh_client()
246
247 self._run_command(
248 ssh, ["sudo", "/bin/bash -c " + shlex.quote(script)], pty=True
249 )
250 except paramiko.ssh_exception.AuthenticationException as e:
251 raise e
252 finally:
253 ssh.close()
254
255 return True
256
257 def _detect_hardware_and_os(self, ssh):
258 """Detect the target hardware capabilities and OS series.
259
260 :param object ssh: The SSHClient
261 :return: str: A raw string containing OS and hardware information.
262 """
263
264 info = {
265 "series": "",
266 "arch": "",
267 "cpu-cores": "",
268 "mem": "",
269 }
270
271 stdout, _ = self._run_command(
272 ssh, ["sudo", "/bin/bash -c " + shlex.quote(DETECTION_SCRIPT)], pty=True,
273 )
274
275 lines = stdout.split("\n")
276
277 # Remove extraneous line if DNS resolution of hostname famils
278 # i.e. sudo: unable to resolve host test-1-mgmtvm-1: Connection timed out
279 if "unable to resolve host" in lines[0]:
280 lines = lines[1:]
281
282 info["series"] = lines[0].strip()
283 info["arch"] = normalize_arch(lines[1].strip())
284
285 memKb = re.split(r"\s+", lines[2])[1]
286
287 # Convert megabytes -> kilobytes
288 info["mem"] = round(int(memKb) / 1024)
289
290 # Detect available CPUs
291 recorded = {}
292 for line in lines[3:]:
293 physical_id = ""
294
295 if line.find("physical id") == 0:
296 physical_id = line.split(":")[1].strip()
297 elif line.find("cpu cores") == 0:
298 cores = line.split(":")[1].strip()
299
300 if physical_id not in recorded.keys():
301 info["cpu-cores"] += cores
302 recorded[physical_id] = True
303
304 return info
305
306 def provision_machine(self):
307 """Perform the initial provisioning of the target machine.
308
309 :return: bool: The client.AddMachineParams
310 :raises: :class:`paramiko.ssh_exception.AuthenticationException`
311 if the upload fails
312 """
313 params = client.AddMachineParams()
314
315 if self._init_ubuntu_user():
316 try:
317 ssh = self._get_ssh_client()
318
319 hw = self._detect_hardware_and_os(ssh)
320 params.series = hw["series"]
321 params.instance_id = "manual:{}".format(self.host)
322 params.nonce = "manual:{}:{}".format(
323 self.host, str(uuid.uuid4()), # a nop for Juju w/manual machines
324 )
325 params.hardware_characteristics = {
326 "arch": hw["arch"],
327 "mem": int(hw["mem"]),
328 "cpu-cores": int(hw["cpu-cores"]),
329 }
330 params.addresses = [
331 {"value": self.host, "type": "ipv4", "scope": "public"}
332 ]
333
334 except paramiko.ssh_exception.AuthenticationException as e:
335 raise e
336 finally:
337 ssh.close()
338
339 return params
340
341 async def install_agent(self, connection, nonce, machine_id, api):
342 """
343 :param object connection: Connection to Juju API
344 :param str nonce: The nonce machine specification
345 :param str machine_id: The id assigned to the machine
346
347 :return: bool: If the initialization was successful
348 """
349 # The path where the Juju agent should be installed.
350 data_dir = "/var/lib/juju"
351
352 # Disabling this prevents `apt-get update` from running initially, so
353 # charms will fail to deploy
354 disable_package_commands = False
355
356 client_facade = client.ClientFacade.from_connection(connection)
357 results = await client_facade.ProvisioningScript(
358 data_dir=data_dir,
359 disable_package_commands=disable_package_commands,
360 machine_id=machine_id,
361 nonce=nonce,
362 )
363
364 """Get the IP of the controller
365
366 Parse the provisioning script, looking for the first apiaddress.
367
368 Example:
369 apiaddresses:
370 - 10.195.8.2:17070
371 - 127.0.0.1:17070
372 - '[::1]:17070'
373 """
374 m = re.search(r"apiaddresses:\n- (\d+\.\d+\.\d+\.\d+):17070", results.script)
375 apiaddress = m.group(1)
376
377 """Add IP Table rule
378
379 In order to route the traffic to the private ip of the Juju controller
380 we use a DNAT rule to tell the machine that the destination for the
381 private address is the public address of the machine where the Juju
382 controller is running in LXD. That machine will have a complimentary
383 iptables rule, routing traffic to the appropriate LXD container.
384 """
385
386 script = IPTABLES_SCRIPT.format(apiaddress, api)
387
388 # Run this in a retry loop, because dpkg may be running and cause the
389 # script to fail.
390 retry = 10
391 attempts = 0
392 delay = 15
393
394 while attempts <= retry:
395 try:
396 attempts += 1
397
398 self._run_configure_script(script)
399 break
400 except Exception as e:
401 self.log.debug("Waiting for dpkg, sleeping {} seconds".format(delay))
402 if attempts > retry:
403 raise e
404 else:
405 time.sleep(delay)
406 # Slowly back off the retry
407 delay += 15
408
409 # self.log.debug("Running configure script")
410 self._run_configure_script(results.script)
411 # self.log.debug("Configure script finished")
412
413 def _run_configure_script(self, script: str):
414 """Run the script to install the Juju agent on the target machine.
415
416 :param str script: The script returned by the ProvisioningScript API
417 :raises: :class:`paramiko.ssh_exception.AuthenticationException`
418 if the upload fails
419 """
420 _, tmpFile = tempfile.mkstemp()
421 with open(tmpFile, "w") as f:
422 f.write(script)
423 try:
424 # get ssh client
425 ssh = self._get_ssh_client(user="ubuntu",)
426
427 # copy the local copy of the script to the remote machine
428 sftp = paramiko.SFTPClient.from_transport(ssh.get_transport())
429 sftp.put(
430 tmpFile, tmpFile,
431 )
432
433 # run the provisioning script
434 self._run_command(
435 ssh, "sudo /bin/bash {}".format(tmpFile),
436 )
437
438 except paramiko.ssh_exception.AuthenticationException as e:
439 raise e
440 finally:
441 os.remove(tmpFile)
442 ssh.close()