243d67da30f4a7981defd9569ff6d188c324b349
[osm/N2VC.git] / n2vc / provisioner.py
1 # Copyright 2019 Canonical Ltd.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 import logging
15 import os
16 import re
17 import shlex
18 import tempfile
19 import time
20 import uuid
21 from subprocess import CalledProcessError
22
23 import paramiko
24 import n2vc.exceptions
25
26 from juju.client import client
27
28 arches = [
29 [re.compile(r"amd64|x86_64"), "amd64"],
30 [re.compile(r"i?[3-9]86"), "i386"],
31 [re.compile(r"(arm$)|(armv.*)"), "armhf"],
32 [re.compile(r"aarch64"), "arm64"],
33 [re.compile(r"ppc64|ppc64el|ppc64le"), "ppc64el"],
34 [re.compile(r"s390x?"), "s390x"],
35
36 ]
37
38
39 def normalize_arch(rawArch):
40 """Normalize the architecture string."""
41 for arch in arches:
42 if arch[0].match(rawArch):
43 return arch[1]
44
45
46 DETECTION_SCRIPT = """#!/bin/bash
47 set -e
48 os_id=$(grep '^ID=' /etc/os-release | tr -d '"' | cut -d= -f2)
49 if [ "$os_id" = 'centos' ]; then
50 os_version=$(grep '^VERSION_ID=' /etc/os-release | tr -d '"' | cut -d= -f2)
51 echo "centos$os_version"
52 else
53 lsb_release -cs
54 fi
55 uname -m
56 grep MemTotal /proc/meminfo
57 cat /proc/cpuinfo
58 """
59
60 INITIALIZE_UBUNTU_SCRIPT = """set -e
61 (id ubuntu &> /dev/null) || useradd -m ubuntu -s /bin/bash
62 umask 0077
63 temp=$(mktemp)
64 echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' > $temp
65 install -m 0440 $temp /etc/sudoers.d/90-juju-ubuntu
66 rm $temp
67 su ubuntu -c 'install -D -m 0600 /dev/null ~/.ssh/authorized_keys'
68 export authorized_keys="{}"
69 if [ ! -z "$authorized_keys" ]; then
70 su ubuntu -c 'echo $authorized_keys >> ~/.ssh/authorized_keys'
71 fi
72 """
73
74 IPTABLES_SCRIPT = """#!/bin/bash
75 set -e
76 DEBIAN_FRONTEND=noninteractive apt-get install -yqq iptables-persistent
77 iptables -t nat -A OUTPUT -p tcp -d {} -j DNAT --to-destination {}
78 netfilter-persistent save
79 """
80
81 class SSHProvisioner:
82 """Provision a manually created machine via SSH."""
83
84 def __init__(self, user, host, private_key_path, log=None):
85
86 self.host = host
87 self.user = user
88 self.private_key_path = private_key_path
89
90 if log:
91 self.log = log
92 else:
93 self.log = logging.getLogger(__name__)
94
95 def _get_ssh_client(self, host=None, user=None, private_key_path=None):
96 """Return a connected Paramiko ssh object.
97
98 :param str host: The host to connect to.
99 :param str user: The user to connect as.
100 :param str key: The private key to authenticate with.
101
102 :return: object: A paramiko.SSHClient
103 :raises: :class:`paramiko.ssh_exception.SSHException` if the
104 connection failed
105 """
106
107 if not host:
108 host = self.host
109
110 if not user:
111 user = self.user
112
113 if not private_key_path:
114 private_key_path = self.private_key_path
115
116 ssh = paramiko.SSHClient()
117 ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
118
119 pkey = None
120
121 # Read the private key into a paramiko.RSAKey
122 if os.path.exists(private_key_path):
123 with open(private_key_path, 'r') as f:
124 pkey = paramiko.RSAKey.from_private_key(f)
125
126 #######################################################################
127 # There is a bug in some versions of OpenSSH 4.3 (CentOS/RHEL5) where #
128 # the server may not send the SSH_MSG_USERAUTH_BANNER message except #
129 # when responding to an auth_none request. For example, paramiko will #
130 # attempt to use password authentication when a password is set, but #
131 # the server could deny that, instead requesting keyboard-interactive.#
132 # The hack to workaround this is to attempt a reconnect, which will #
133 # receive the right banner, and authentication can proceed. See the #
134 # following for more info: #
135 # https://github.com/paramiko/paramiko/issues/432 #
136 # https://github.com/paramiko/paramiko/pull/438 #
137 #######################################################################
138
139 retry = 10
140 attempts = 0
141 delay = 15
142 while attempts <= retry:
143 try:
144 attempts += 1
145
146 # Attempt to establish a SSH connection
147 ssh.connect(
148 host,
149 port=22,
150 username=user,
151 pkey=pkey,
152 # allow_agent=False,
153 # look_for_keys=False,
154 )
155 break
156 except paramiko.ssh_exception.SSHException as e:
157 if 'Error reading SSH protocol banner' == str(e):
158 # Once more, with feeling
159 ssh.connect(host, port=22, username=user, pkey=pkey)
160 else:
161 # Reraise the original exception
162 self.log.debug("Unhandled exception caught: {}".format(e))
163 raise e
164 except Exception as e:
165 if 'Unable to connect to port' in str(e):
166 self.log.debug("Waiting for VM to boot, sleeping {} seconds".format(delay))
167 if attempts > retry:
168 raise e
169 else:
170 time.sleep(delay)
171 # Slowly back off the retry
172 delay += 15
173 else:
174 self.log.debug(e)
175 raise e
176 return ssh
177
178 def _run_command(self, ssh, cmd, pty=True):
179 """Run a command remotely via SSH.
180
181 :param object ssh: The SSHClient
182 :param str cmd: The command to execute
183 :param list cmd: The `shlex.split` command to execute
184 :param bool pty: Whether to allocate a pty
185
186 :return: tuple: The stdout and stderr of the command execution
187 :raises: :class:`CalledProcessError` if the command fails
188 """
189
190 if isinstance(cmd, str):
191 cmd = shlex.split(cmd)
192
193 if type(cmd) is not list:
194 cmd = [cmd]
195
196 cmds = ' '.join(cmd)
197 stdin, stdout, stderr = ssh.exec_command(cmds, get_pty=pty)
198 retcode = stdout.channel.recv_exit_status()
199
200 if retcode > 0:
201 output = stderr.read().strip()
202 raise CalledProcessError(returncode=retcode, cmd=cmd,
203 output=output)
204 return (
205 stdout.read().decode('utf-8').strip(),
206 stderr.read().decode('utf-8').strip()
207 )
208
209 def _init_ubuntu_user(self):
210 """Initialize the ubuntu user.
211
212 :return: bool: If the initialization was successful
213 :raises: :class:`paramiko.ssh_exception.AuthenticationException`
214 if the authentication fails
215 """
216 ssh = None
217 try:
218 # Run w/o allocating a pty, so we fail if sudo prompts for a passwd
219 ssh = self._get_ssh_client()
220 stdout, stderr = self._run_command(ssh, "sudo -n true", pty=False)
221 except paramiko.ssh_exception.AuthenticationException:
222 raise n2vc.exceptions.AuthenticationFailed(self.user)
223 except paramiko.ssh_exception.NoValidConnectionsError:
224 raise n2vc.exceptions.NoRouteToHost(self.host)
225 finally:
226 if ssh:
227 ssh.close()
228
229 # Infer the public key
230 public_key = None
231 public_key_path = "{}.pub".format(self.private_key_path)
232
233 if not os.path.exists(public_key_path):
234 raise FileNotFoundError(
235 "Public key '{}' doesn't exist.".format(public_key_path)
236 )
237
238 with open(public_key_path, "r") as f:
239 public_key = f.readline()
240
241 script = INITIALIZE_UBUNTU_SCRIPT.format(public_key)
242
243 try:
244 ssh = self._get_ssh_client()
245
246 self._run_command(
247 ssh,
248 ["sudo", "/bin/bash -c " + shlex.quote(script)],
249 pty=True
250 )
251 except paramiko.ssh_exception.AuthenticationException as e:
252 raise e
253 finally:
254 ssh.close()
255
256 return True
257
258 def _detect_hardware_and_os(self, ssh):
259 """Detect the target hardware capabilities and OS series.
260
261 :param object ssh: The SSHClient
262 :return: str: A raw string containing OS and hardware information.
263 """
264
265 info = {
266 'series': '',
267 'arch': '',
268 'cpu-cores': '',
269 'mem': '',
270 }
271
272 stdout, stderr = self._run_command(
273 ssh,
274 ["sudo", "/bin/bash -c " + shlex.quote(DETECTION_SCRIPT)],
275 pty=True,
276 )
277
278 lines = stdout.split("\n")
279
280 # Remove extraneous line if DNS resolution of hostname famils
281 # i.e. sudo: unable to resolve host test-1-mgmtvm-1: Connection timed out
282 if 'unable to resolve host' in lines[0]:
283 lines = lines[1:]
284
285 info['series'] = lines[0].strip()
286 info['arch'] = normalize_arch(lines[1].strip())
287
288 memKb = re.split(r'\s+', lines[2])[1]
289
290 # Convert megabytes -> kilobytes
291 info['mem'] = round(int(memKb) / 1024)
292
293 # Detect available CPUs
294 recorded = {}
295 for line in lines[3:]:
296 physical_id = ""
297
298 if line.find("physical id") == 0:
299 physical_id = line.split(":")[1].strip()
300 elif line.find("cpu cores") == 0:
301 cores = line.split(":")[1].strip()
302
303 if physical_id not in recorded.keys():
304 info['cpu-cores'] += cores
305 recorded[physical_id] = True
306
307 return info
308
309 def provision_machine(self):
310 """Perform the initial provisioning of the target machine.
311
312 :return: bool: The client.AddMachineParams
313 :raises: :class:`paramiko.ssh_exception.AuthenticationException`
314 if the upload fails
315 """
316 params = client.AddMachineParams()
317
318 if self._init_ubuntu_user():
319 try:
320 ssh = self._get_ssh_client()
321
322 hw = self._detect_hardware_and_os(ssh)
323 params.series = hw['series']
324 params.instance_id = "manual:{}".format(self.host)
325 params.nonce = "manual:{}:{}".format(
326 self.host,
327 str(uuid.uuid4()), # a nop for Juju w/manual machines
328 )
329 params.hardware_characteristics = {
330 'arch': hw['arch'],
331 'mem': int(hw['mem']),
332 'cpu-cores': int(hw['cpu-cores']),
333 }
334 params.addresses = [{
335 'value': self.host,
336 'type': 'ipv4',
337 'scope': 'public',
338 }]
339
340 except paramiko.ssh_exception.AuthenticationException as e:
341 raise e
342 finally:
343 ssh.close()
344
345 return params
346
347 async def install_agent(self, connection, nonce, machine_id, api):
348 """
349 :param object connection: Connection to Juju API
350 :param str nonce: The nonce machine specification
351 :param str machine_id: The id assigned to the machine
352
353 :return: bool: If the initialization was successful
354 """
355 # The path where the Juju agent should be installed.
356 data_dir = "/var/lib/juju"
357
358 # Disabling this prevents `apt-get update` from running initially, so
359 # charms will fail to deploy
360 disable_package_commands = False
361
362 client_facade = client.ClientFacade.from_connection(connection)
363 results = await client_facade.ProvisioningScript(
364 data_dir=data_dir,
365 disable_package_commands=disable_package_commands,
366 machine_id=machine_id,
367 nonce=nonce,
368 )
369
370 """Get the IP of the controller
371
372 Parse the provisioning script, looking for the first apiaddress.
373
374 Example:
375 apiaddresses:
376 - 10.195.8.2:17070
377 - 127.0.0.1:17070
378 - '[::1]:17070'
379 """
380 m = re.search('apiaddresses:\n- (\d+\.\d+\.\d+\.\d+):17070', results.script)
381 apiaddress = m.group(1)
382
383 """Add IP Table rule
384
385 In order to route the traffic to the private ip of the Juju controller
386 we use a DNAT rule to tell the machine that the destination for the
387 private address is the public address of the machine where the Juju
388 controller is running in LXD. That machine will have a complimentary
389 iptables rule, routing traffic to the appropriate LXD container.
390 """
391
392 script = IPTABLES_SCRIPT.format(apiaddress, api)
393
394 # Run this in a retry loop, because dpkg may be running and cause the
395 # script to fail.
396 retry = 10
397 attempts = 0
398 delay = 15
399 while attempts <= retry:
400 try:
401 attempts += 1
402
403 self._run_configure_script(script)
404 break
405 except Exception as e:
406 self.log.debug("Waiting for dpkg, sleeping {} seconds".format(delay))
407 if attempts > retry:
408 raise e
409 else:
410 time.sleep(delay)
411 # Slowly back off the retry
412 delay += 15
413
414 # self.log.debug("Running configure script")
415 self._run_configure_script(results.script)
416 # self.log.debug("Configure script finished")
417
418
419
420 def _run_configure_script(self, script: str):
421 """Run the script to install the Juju agent on the target machine.
422
423 :param str script: The script returned by the ProvisioningScript API
424 :raises: :class:`paramiko.ssh_exception.AuthenticationException`
425 if the upload fails
426 """
427 _, tmpFile = tempfile.mkstemp()
428 with open(tmpFile, 'w') as f:
429 f.write(script)
430 try:
431 # get ssh client
432 ssh = self._get_ssh_client(
433 user="ubuntu",
434 )
435
436 # copy the local copy of the script to the remote machine
437 sftp = paramiko.SFTPClient.from_transport(ssh.get_transport())
438 sftp.put(
439 tmpFile,
440 tmpFile,
441 )
442
443 # run the provisioning script
444 stdout, stderr = self._run_command(
445 ssh,
446 "sudo /bin/bash {}".format(tmpFile),
447 )
448
449 except paramiko.ssh_exception.AuthenticationException as e:
450 raise e
451 finally:
452 os.remove(tmpFile)
453 ssh.close()