Wait for cloud-init to finish before provisioning
[osm/N2VC.git] / n2vc / provisioner.py
1 # Copyright 2019 Canonical Ltd.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 import logging
15 import os
16 import re
17 from subprocess import CalledProcessError
18 import tempfile
19 import uuid
20
21 from juju.client import client
22 import asyncio
23
24 arches = [
25 [re.compile(r"amd64|x86_64"), "amd64"],
26 [re.compile(r"i?[3-9]86"), "i386"],
27 [re.compile(r"(arm$)|(armv.*)"), "armhf"],
28 [re.compile(r"aarch64"), "arm64"],
29 [re.compile(r"ppc64|ppc64el|ppc64le"), "ppc64el"],
30 [re.compile(r"s390x?"), "s390x"],
31 ]
32
33
34 def normalize_arch(rawArch):
35 """Normalize the architecture string."""
36 for arch in arches:
37 if arch[0].match(rawArch):
38 return arch[1]
39
40
41 DETECTION_SCRIPT = """#!/bin/bash
42 set -e
43 os_id=$(grep '^ID=' /etc/os-release | tr -d '"' | cut -d= -f2)
44 if [ "$os_id" = 'centos' ] || [ "$os_id" = 'rhel' ] ; then
45 os_version=$(grep '^VERSION_ID=' /etc/os-release | tr -d '"' | cut -d= -f2)
46 echo "$os_id$os_version"
47 else
48 lsb_release -cs
49 fi
50 uname -m
51 grep MemTotal /proc/meminfo
52 cat /proc/cpuinfo
53 """
54
55 INITIALIZE_UBUNTU_SCRIPT = """set -e
56 (id ubuntu &> /dev/null) || useradd -m ubuntu -s /bin/bash
57 umask 0077
58 temp=$(mktemp)
59 echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' > $temp
60 install -m 0440 $temp /etc/sudoers.d/90-juju-ubuntu
61 rm $temp
62 su ubuntu -c '[ -f ~/.ssh/authorized_keys ] || install -D -m 0600 /dev/null ~/.ssh/authorized_keys'
63 export authorized_keys="{}"
64 if [ ! -z "$authorized_keys" ]; then
65 su ubuntu -c 'echo $authorized_keys >> ~/.ssh/authorized_keys'
66 fi
67 """
68
69 IPTABLES_SCRIPT = """#!/bin/bash
70 set -e
71 [ -v `which netfilter-persistent` ] && apt update \
72 && DEBIAN_FRONTEND=noninteractive apt-get install -yqq iptables-persistent
73 iptables -t nat -A OUTPUT -p tcp -d {} -j DNAT --to-destination {}
74 netfilter-persistent save
75 """
76
77 IPTABLES_SCRIPT_RHEL = """#!/bin/bash
78 set -e
79 [ -v `which firewalld` ] && yum install -q -y firewalld
80 systemctl is-active --quiet firewalld || systemctl start firewalld \
81 && firewall-cmd --permanent --zone=public --set-target=ACCEPT
82 systemctl is-enabled --quiet firewalld || systemctl enable firewalld
83 firewall-cmd --direct --permanent --add-rule ipv4 nat OUTPUT 0 -d {} -p tcp \
84 -j DNAT --to-destination {}
85 firewall-cmd --reload
86 """
87
88 CLOUD_INIT_WAIT_SCRIPT = """#!/bin/bash
89 set -e
90 cloud-init status --wait
91 """
92
93
94 class AsyncSSHProvisioner:
95 """Provision a manually created machine via SSH."""
96
97 user = ""
98 host = ""
99 private_key_path = ""
100
101 def __init__(self, user, host, private_key_path, log=None):
102 self.host = host
103 self.user = user
104 self.private_key_path = private_key_path
105 self.log = log if log else logging.getLogger(__name__)
106
107 async def _scp(self, source_file, destination_file):
108 """Execute an scp command. Requires a fully qualified source and
109 destination.
110
111 :param str source_file: Path to the source file
112 :param str destination_file: Path to the destination file
113 """
114 cmd = [
115 "scp",
116 "-i",
117 os.path.expanduser(self.private_key_path),
118 "-o",
119 "StrictHostKeyChecking=no",
120 "-q",
121 "-B",
122 ]
123 destination = "{}@{}:{}".format(self.user, self.host, destination_file)
124 cmd.extend([source_file, destination])
125 process = await asyncio.create_subprocess_exec(*cmd)
126 await process.wait()
127 if process.returncode != 0:
128 raise CalledProcessError(returncode=process.returncode, cmd=cmd)
129
130 async def _ssh(self, command):
131 """Run a command remotely via SSH.
132
133 :param str command: The command to execute
134 :return: tuple: The stdout and stderr of the command execution
135 :raises: :class:`CalledProcessError` if the command fails
136 """
137
138 destination = "{}@{}".format(self.user, self.host)
139 cmd = [
140 "ssh",
141 "-i",
142 os.path.expanduser(self.private_key_path),
143 "-o",
144 "StrictHostKeyChecking=no",
145 "-q",
146 destination,
147 ]
148 cmd.extend([command])
149 process = await asyncio.create_subprocess_exec(
150 *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
151 )
152 stdout, stderr = await process.communicate()
153
154 if process.returncode != 0:
155 output = stderr.decode("utf-8").strip()
156 raise CalledProcessError(
157 returncode=process.returncode, cmd=cmd, output=output
158 )
159 return (stdout.decode("utf-8").strip(), stderr.decode("utf-8").strip())
160
161 async def _init_ubuntu_user(self):
162 """Initialize the ubuntu user.
163
164 :return: bool: If the initialization was successful
165 :raises: :class:`CalledProcessError` if the _ssh command fails
166 """
167 retry = 10
168 attempts = 0
169 delay = 15
170 while attempts <= retry:
171 try:
172 attempts += 1
173 # Attempt to establish a SSH connection
174 stdout, stderr = await self._ssh("sudo -n true")
175 break
176 except CalledProcessError as e:
177 self.log.debug(
178 "Waiting for VM to boot, sleeping {} seconds".format(delay)
179 )
180 if attempts > retry:
181 raise e
182 else:
183 await asyncio.sleep(delay)
184 # Slowly back off the retry
185 delay += 15
186
187 # Infer the public key
188 public_key = None
189 public_key_path = "{}.pub".format(self.private_key_path)
190
191 if not os.path.exists(public_key_path):
192 raise FileNotFoundError(
193 "Public key '{}' doesn't exist.".format(public_key_path)
194 )
195
196 with open(public_key_path, "r") as f:
197 public_key = f.readline()
198
199 script = INITIALIZE_UBUNTU_SCRIPT.format(public_key)
200
201 stdout, stderr = await self._run_configure_script(script)
202
203 return True
204
205 async def _detect_hardware_and_os(self):
206 """Detect the target hardware capabilities and OS series.
207
208 :return: str: A raw string containing OS and hardware information.
209 """
210
211 info = {
212 "series": "",
213 "arch": "",
214 "cpu-cores": "",
215 "mem": "",
216 }
217
218 stdout, stderr = await self._run_configure_script(DETECTION_SCRIPT)
219
220 lines = stdout.split("\n")
221 info["series"] = lines[0].strip()
222 info["arch"] = normalize_arch(lines[1].strip())
223
224 memKb = re.split(r"\s+", lines[2])[1]
225
226 # Convert megabytes -> kilobytes
227 info["mem"] = round(int(memKb) / 1024)
228
229 # Detect available CPUs
230 recorded = {}
231 for line in lines[3:]:
232 physical_id = ""
233 print(line)
234
235 if line.find("physical id") == 0:
236 physical_id = line.split(":")[1].strip()
237 elif line.find("cpu cores") == 0:
238 cores = line.split(":")[1].strip()
239
240 if physical_id not in recorded.keys():
241 info["cpu-cores"] += cores
242 recorded[physical_id] = True
243
244 return info
245
246 async def provision_machine(self):
247 """Perform the initial provisioning of the target machine.
248
249 :return: bool: The client.AddMachineParams
250 """
251 params = client.AddMachineParams()
252
253 if await self._init_ubuntu_user():
254 hw = await self._detect_hardware_and_os()
255 params.series = hw["series"]
256 params.instance_id = "manual:{}".format(self.host)
257 params.nonce = "manual:{}:{}".format(
258 self.host, str(uuid.uuid4()),
259 ) # a nop for Juju w/manual machines
260 params.hardware_characteristics = {
261 "arch": hw["arch"],
262 "mem": int(hw["mem"]),
263 "cpu-cores": int(hw["cpu-cores"]),
264 }
265 params.addresses = [{"value": self.host, "type": "ipv4", "scope": "public"}]
266
267 return params
268
269 async def install_agent(self, connection, nonce, machine_id, proxy=None, series=None):
270 """
271 :param object connection: Connection to Juju API
272 :param str nonce: The nonce machine specification
273 :param str machine_id: The id assigned to the machine
274 :param str proxy: IP of the API_PROXY
275 :param str series: OS name
276
277 :return: bool: If the initialization was successful
278 """
279 # The path where the Juju agent should be installed.
280 data_dir = "/var/lib/juju"
281
282 # Disabling this prevents `apt-get update` from running initially, so
283 # charms will fail to deploy
284 disable_package_commands = False
285
286 client_facade = client.ClientFacade.from_connection(connection)
287 results = await client_facade.ProvisioningScript(
288 data_dir=data_dir,
289 disable_package_commands=disable_package_commands,
290 machine_id=machine_id,
291 nonce=nonce,
292 )
293
294 """Get the IP of the controller
295
296 Parse the provisioning script, looking for the first apiaddress.
297
298 Example:
299 apiaddresses:
300 - 10.195.8.2:17070
301 - 127.0.0.1:17070
302 - '[::1]:17070'
303 """
304 try:
305 # Wait until cloud-init finish
306 await self._run_configure_script(CLOUD_INIT_WAIT_SCRIPT)
307 except Exception:
308 self.log.debug("cloud-init not present in machine {}".format(machine_id))
309
310 if proxy:
311 m = re.search(r"apiaddresses:\n- (\d+\.\d+\.\d+\.\d+):17070", results.script)
312 apiaddress = m.group(1)
313
314 """Add IP Table rule
315
316 In order to route the traffic to the private ip of the Juju controller
317 we use a DNAT rule to tell the machine that the destination for the
318 private address is the public address of the machine where the Juju
319 controller is running in LXD. That machine will have a complimentary
320 iptables rule, routing traffic to the appropriate LXD container.
321 """
322
323 if series and ("centos" in series or "rhel" in series):
324 script = IPTABLES_SCRIPT_RHEL.format(apiaddress, proxy)
325 else:
326 script = IPTABLES_SCRIPT.format(apiaddress, proxy)
327
328 # Run this in a retry loop, because dpkg may be running and cause the
329 # script to fail.
330 retry = 10
331 attempts = 0
332 delay = 15
333
334 while attempts <= retry:
335 try:
336 attempts += 1
337 stdout, stderr = await self._run_configure_script(script)
338 break
339 except Exception as e:
340 self.log.debug("Waiting for DNAT rules to be applied and saved, "
341 "sleeping {} seconds".format(delay))
342 if attempts > retry:
343 raise e
344 else:
345 await asyncio.sleep(delay)
346 # Slowly back off the retry
347 delay += 15
348
349 # self.log.debug("Running configure script")
350 await self._run_configure_script(results.script)
351 # self.log.debug("Configure script finished")
352
353 async def _run_configure_script(self, script, root=True):
354 """Run the script to install the Juju agent on the target machine.
355
356 :param str script: The script to be executed
357 """
358 _, tmpFile = tempfile.mkstemp()
359 with open(tmpFile, "w") as f:
360 f.write(script)
361 f.close()
362
363 # copy the local copy of the script to the remote machine
364 await self._scp(tmpFile, tmpFile)
365
366 # run the provisioning script
367 return await self._ssh(
368 "{} /bin/bash {}".format("sudo" if root else "", tmpFile)
369 )