831190bb6499e1234286b8d6f1465fde586d11a8
[osm/LCM.git] / osm_lcm / lcm_helm_conn.py
1 ##
2 # Copyright 2020 Telefonica Investigacion y Desarrollo, S.A.U.
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 # http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13 # implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #
17 ##
18 import functools
19 import yaml
20 import asyncio
21 import socket
22 import uuid
23 import os
24
25 from grpclib.client import Channel
26
27 from osm_lcm.frontend_pb2 import PrimitiveRequest
28 from osm_lcm.frontend_pb2 import SshKeyRequest, SshKeyReply
29 from osm_lcm.frontend_grpc import FrontendExecutorStub
30 from osm_lcm.lcm_utils import LcmBase
31
32 from osm_lcm.data_utils.database.database import Database
33 from osm_lcm.data_utils.filesystem.filesystem import Filesystem
34
35 from n2vc.n2vc_conn import N2VCConnector
36 from n2vc.k8s_helm_conn import K8sHelmConnector
37 from n2vc.k8s_helm3_conn import K8sHelm3Connector
38 from n2vc.exceptions import N2VCBadArgumentsException, N2VCException, N2VCExecutionException
39
40 from osm_lcm.lcm_utils import deep_get
41
42
43 def retryer(max_wait_time=60, delay_time=10):
44 def wrapper(func):
45 retry_exceptions = (
46 ConnectionRefusedError
47 )
48
49 @functools.wraps(func)
50 async def wrapped(*args, **kwargs):
51 wait_time = max_wait_time
52 while wait_time > 0:
53 try:
54 return await func(*args, **kwargs)
55 except retry_exceptions:
56 wait_time = wait_time - delay_time
57 await asyncio.sleep(delay_time)
58 continue
59 else:
60 return ConnectionRefusedError
61 return wrapped
62 return wrapper
63
64
65 class LCMHelmConn(N2VCConnector, LcmBase):
66 _KUBECTL_OSM_NAMESPACE = "osm"
67 _KUBECTL_OSM_CLUSTER_NAME = "_system-osm-k8s"
68 _EE_SERVICE_PORT = 50050
69
70 # Time beetween retries
71 _EE_RETRY_DELAY = 10
72 # Initial max retry time
73 _MAX_INITIAL_RETRY_TIME = 300
74 # Other retry time
75 _MAX_RETRY_TIME = 30
76
77 def __init__(self,
78 log: object = None,
79 loop: object = None,
80 url: str = None,
81 username: str = None,
82 vca_config: dict = None,
83 on_update_db=None, ):
84 """
85 Initialize EE helm connector.
86 """
87
88 self.db = Database().instance.db
89 self.fs = Filesystem().instance.fs
90
91 # parent class constructor
92 N2VCConnector.__init__(
93 self,
94 log=log,
95 loop=loop,
96 url=url,
97 username=username,
98 vca_config=vca_config,
99 on_update_db=on_update_db,
100 db=self.db,
101 fs=self.fs
102 )
103
104 self.log.debug("Initialize helm N2VC connector")
105
106 # TODO - Obtain data from configuration
107 self._ee_service_port = self._EE_SERVICE_PORT
108
109 self._retry_delay = self._EE_RETRY_DELAY
110 self._max_retry_time = self._MAX_RETRY_TIME
111 self._initial_retry_time = self._MAX_INITIAL_RETRY_TIME
112
113 # initialize helm connector for helmv2 and helmv3
114 self._k8sclusterhelm2 = K8sHelmConnector(
115 kubectl_command=self.vca_config.get("kubectlpath"),
116 helm_command=self.vca_config.get("helmpath"),
117 fs=self.fs,
118 db=self.db,
119 log=self.log,
120 on_update_db=None,
121 )
122
123 self._k8sclusterhelm3 = K8sHelm3Connector(
124 kubectl_command=self.vca_config.get("kubectlpath"),
125 helm_command=self.vca_config.get("helm3path"),
126 fs=self.fs,
127 log=self.log,
128 db=self.db,
129 on_update_db=None,
130 )
131
132 self._system_cluster_id = None
133 self.log.info("Helm N2VC connector initialized")
134
135 # TODO - ¿reuse_ee_id?
136 async def create_execution_environment(self,
137 namespace: str,
138 db_dict: dict,
139 reuse_ee_id: str = None,
140 progress_timeout: float = None,
141 total_timeout: float = None,
142 config: dict = None,
143 artifact_path: str = None,
144 vca_type: str = None,
145 *kargs, **kwargs) -> (str, dict):
146 """
147 Creates a new helm execution environment deploying the helm-chat indicated in the
148 attifact_path
149 :param str namespace: This param is not used, all helm charts are deployed in the osm
150 system namespace
151 :param dict db_dict: where to write to database when the status changes.
152 It contains a dictionary with {collection: str, filter: {}, path: str},
153 e.g. {collection: "nsrs", filter: {_id: <nsd-id>, path:
154 "_admin.deployed.VCA.3"}
155 :param str reuse_ee_id: ee id from an older execution. TODO - right now this params is not used
156 :param float progress_timeout:
157 :param float total_timeout:
158 :param dict config: General variables to instantiate KDU
159 :param str artifact_path: path of package content
160 :param str vca_type: Type of vca, must be type helm or helm-v3
161 :returns str, dict: id of the new execution environment including namespace.helm_id
162 and credentials object set to None as all credentials should be osm kubernetes .kubeconfig
163 """
164
165 self.log.info(
166 "create_execution_environment: namespace: {}, artifact_path: {}, db_dict: {}, "
167 "reuse_ee_id: {}".format(
168 namespace, artifact_path, db_dict, reuse_ee_id)
169 )
170
171 # Validate artifact-path is provided
172 if artifact_path is None or len(artifact_path) == 0:
173 raise N2VCBadArgumentsException(
174 message="artifact_path is mandatory", bad_args=["artifact_path"]
175 )
176
177 # Validate artifact-path exists and sync path
178 from_path = os.path.split(artifact_path)[0]
179 self.fs.sync(from_path)
180
181 # remove / in charm path
182 while artifact_path.find("//") >= 0:
183 artifact_path = artifact_path.replace("//", "/")
184
185 # check charm path
186 if self.fs.file_exists(artifact_path):
187 helm_chart_path = artifact_path
188 else:
189 msg = "artifact path does not exist: {}".format(artifact_path)
190 raise N2VCBadArgumentsException(message=msg, bad_args=["artifact_path"])
191
192 if artifact_path.startswith("/"):
193 full_path = self.fs.path + helm_chart_path
194 else:
195 full_path = self.fs.path + "/" + helm_chart_path
196
197 while full_path.find("//") >= 0:
198 full_path = full_path.replace("//", "/")
199
200 try:
201 # Call helm conn install
202 # Obtain system cluster id from database
203 system_cluster_uuid = await self._get_system_cluster_id()
204 # Add parameter osm if exist to global
205 if config and config.get("osm"):
206 if not config.get("global"):
207 config["global"] = {}
208 config["global"]["osm"] = config.get("osm")
209
210 self.log.debug("install helm chart: {}".format(full_path))
211 if vca_type == "helm":
212 helm_id = await self._k8sclusterhelm2.install(system_cluster_uuid, kdu_model=full_path,
213 namespace=self._KUBECTL_OSM_NAMESPACE,
214 params=config,
215 db_dict=db_dict,
216 timeout=progress_timeout)
217 else:
218 helm_id = await self._k8sclusterhelm3.install(system_cluster_uuid, kdu_model=full_path,
219 namespace=self._KUBECTL_OSM_NAMESPACE,
220 params=config,
221 db_dict=db_dict,
222 timeout=progress_timeout)
223
224 ee_id = "{}:{}.{}".format(vca_type, self._KUBECTL_OSM_NAMESPACE, helm_id)
225 return ee_id, None
226 except N2VCException:
227 raise
228 except Exception as e:
229 self.log.error("Error deploying chart ee: {}".format(e), exc_info=True)
230 raise N2VCException("Error deploying chart ee: {}".format(e))
231
232 async def register_execution_environment(self, namespace: str, credentials: dict, db_dict: dict,
233 progress_timeout: float = None, total_timeout: float = None,
234 *kargs, **kwargs) -> str:
235 # nothing to do
236 pass
237
238 async def install_configuration_sw(self,
239 ee_id: str,
240 artifact_path: str,
241 db_dict: dict,
242 progress_timeout: float = None,
243 total_timeout: float = None,
244 config: dict = None,
245 num_units: int = 1,
246 vca_type: str = None
247 ):
248 # nothing to do
249 pass
250
251 async def add_relation(self, ee_id_1: str, ee_id_2: str, endpoint_1: str, endpoint_2: str):
252 # nothing to do
253 pass
254
255 async def remove_relation(self):
256 # nothing to to
257 pass
258
259 async def get_status(self, namespace: str, yaml_format: bool = True):
260 # not used for this connector
261 pass
262
263 async def get_ee_ssh_public__key(self, ee_id: str, db_dict: dict, progress_timeout: float = None,
264 total_timeout: float = None) -> str:
265 """
266 Obtains ssh-public key from ee executing GetSShKey method from the ee.
267
268 :param str ee_id: the id of the execution environment returned by
269 create_execution_environment or register_execution_environment
270 :param dict db_dict:
271 :param float progress_timeout:
272 :param float total_timeout:
273 :returns: public key of the execution environment
274 """
275
276 self.log.info(
277 "get_ee_ssh_public_key: ee_id: {}, db_dict: {}".format(
278 ee_id, db_dict)
279 )
280
281 # check arguments
282 if ee_id is None or len(ee_id) == 0:
283 raise N2VCBadArgumentsException(
284 message="ee_id is mandatory", bad_args=["ee_id"]
285 )
286
287 try:
288 # Obtain ip_addr for the ee service, it is resolved by dns from the ee name by kubernetes
289 version, namespace, helm_id = self._get_ee_id_parts(ee_id)
290 ip_addr = socket.gethostbyname(helm_id)
291
292 # Obtain ssh_key from the ee, this method will implement retries to allow the ee
293 # install libraries and start successfully
294 ssh_key = await self._get_ssh_key(ip_addr)
295 return ssh_key
296 except Exception as e:
297 self.log.error("Error obtaining ee ssh_key: {}".format(e), exc_info=True)
298 raise N2VCException("Error obtaining ee ssh_ke: {}".format(e))
299
300 async def exec_primitive(self, ee_id: str, primitive_name: str, params_dict: dict, db_dict: dict = None,
301 progress_timeout: float = None, total_timeout: float = None) -> str:
302 """
303 Execute a primitive in the execution environment
304
305 :param str ee_id: the one returned by create_execution_environment or
306 register_execution_environment with the format namespace.helm_id
307 :param str primitive_name: must be one defined in the software. There is one
308 called 'config', where, for the proxy case, the 'credentials' of VM are
309 provided
310 :param dict params_dict: parameters of the action
311 :param dict db_dict: where to write into database when the status changes.
312 It contains a dict with
313 {collection: <str>, filter: {}, path: <str>},
314 e.g. {collection: "nslcmops", filter:
315 {_id: <nslcmop_id>, path: "_admin.VCA"}
316 It will be used to store information about intermediate notifications
317 :param float progress_timeout:
318 :param float total_timeout:
319 :returns str: primitive result, if ok. It raises exceptions in case of fail
320 """
321
322 self.log.info("exec primitive for ee_id : {}, primitive_name: {}, params_dict: {}, db_dict: {}".format(
323 ee_id, primitive_name, params_dict, db_dict
324 ))
325
326 # check arguments
327 if ee_id is None or len(ee_id) == 0:
328 raise N2VCBadArgumentsException(
329 message="ee_id is mandatory", bad_args=["ee_id"]
330 )
331 if primitive_name is None or len(primitive_name) == 0:
332 raise N2VCBadArgumentsException(
333 message="action_name is mandatory", bad_args=["action_name"]
334 )
335 if params_dict is None:
336 params_dict = dict()
337
338 try:
339 version, namespace, helm_id = self._get_ee_id_parts(ee_id)
340 ip_addr = socket.gethostbyname(helm_id)
341 except Exception as e:
342 self.log.error("Error getting ee ip ee: {}".format(e))
343 raise N2VCException("Error getting ee ip ee: {}".format(e))
344
345 if primitive_name == "config":
346 try:
347 # Execute config primitive, higher timeout to check the case ee is starting
348 status, detailed_message = await self._execute_config_primitive(ip_addr, params_dict, db_dict=db_dict)
349 self.log.debug("Executed config primitive ee_id_ {}, status: {}, message: {}".format(
350 ee_id, status, detailed_message))
351 if status != "OK":
352 self.log.error("Error configuring helm ee, status: {}, message: {}".format(
353 status, detailed_message))
354 raise N2VCExecutionException(
355 message="Error configuring helm ee_id: {}, status: {}, message: {}: ".format(
356 ee_id, status, detailed_message
357 ),
358 primitive_name=primitive_name,
359 )
360 except Exception as e:
361 self.log.error("Error configuring helm ee: {}".format(e))
362 raise N2VCExecutionException(
363 message="Error configuring helm ee_id: {}, {}".format(
364 ee_id, e
365 ),
366 primitive_name=primitive_name,
367 )
368 return "CONFIG OK"
369 else:
370 try:
371 # Execute primitive
372 status, detailed_message = await self._execute_primitive(ip_addr, primitive_name,
373 params_dict, db_dict=db_dict)
374 self.log.debug("Executed primitive {} ee_id_ {}, status: {}, message: {}".format(
375 primitive_name, ee_id, status, detailed_message))
376 if status != "OK" and status != "PROCESSING":
377 self.log.error(
378 "Execute primitive {} returned not ok status: {}, message: {}".format(
379 primitive_name, status, detailed_message)
380 )
381 raise N2VCExecutionException(
382 message="Execute primitive {} returned not ok status: {}, message: {}".format(
383 primitive_name, status, detailed_message
384 ),
385 primitive_name=primitive_name,
386 )
387 except Exception as e:
388 self.log.error(
389 "Error executing primitive {}: {}".format(primitive_name, e)
390 )
391 raise N2VCExecutionException(
392 message="Error executing primitive {} into ee={} : {}".format(
393 primitive_name, ee_id, e
394 ),
395 primitive_name=primitive_name,
396 )
397 return detailed_message
398
399 async def deregister_execution_environments(self):
400 # nothing to be done
401 pass
402
403 async def delete_execution_environment(self, ee_id: str, db_dict: dict = None, total_timeout: float = None):
404 """
405 Delete an execution environment
406 :param str ee_id: id of the execution environment to delete, included namespace.helm_id
407 :param dict db_dict: where to write into database when the status changes.
408 It contains a dict with
409 {collection: <str>, filter: {}, path: <str>},
410 e.g. {collection: "nsrs", filter:
411 {_id: <nsd-id>, path: "_admin.deployed.VCA.3"}
412 :param float total_timeout:
413 """
414
415 self.log.info("ee_id: {}".format(ee_id))
416
417 # check arguments
418 if ee_id is None:
419 raise N2VCBadArgumentsException(
420 message="ee_id is mandatory", bad_args=["ee_id"]
421 )
422
423 try:
424
425 # Obtain cluster_uuid
426 system_cluster_uuid = await self._get_system_cluster_id()
427
428 # Get helm_id
429 version, namespace, helm_id = self._get_ee_id_parts(ee_id)
430
431 # Uninstall chart, for backward compatibility we must assume that if there is no
432 # version it is helm-v2
433 if version == "helm-v3":
434 await self._k8sclusterhelm3.uninstall(system_cluster_uuid, helm_id)
435 else:
436 await self._k8sclusterhelm2.uninstall(system_cluster_uuid, helm_id)
437 self.log.info("ee_id: {} deleted".format(ee_id))
438 except N2VCException:
439 raise
440 except Exception as e:
441 self.log.error("Error deleting ee id: {}: {}".format(ee_id, e), exc_info=True)
442 raise N2VCException("Error deleting ee id {}: {}".format(ee_id, e))
443
444 async def delete_namespace(self, namespace: str, db_dict: dict = None, total_timeout: float = None):
445 # method not implemented for this connector, execution environments must be deleted individually
446 pass
447
448 async def install_k8s_proxy_charm(
449 self,
450 charm_name: str,
451 namespace: str,
452 artifact_path: str,
453 db_dict: dict,
454 progress_timeout: float = None,
455 total_timeout: float = None,
456 config: dict = None,
457 *kargs, **kwargs
458 ) -> str:
459 pass
460
461 @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
462 async def _get_ssh_key(self, ip_addr):
463 channel = Channel(ip_addr, self._ee_service_port)
464 try:
465 stub = FrontendExecutorStub(channel)
466 self.log.debug("get ssh key, ip_addr: {}".format(ip_addr))
467 reply: SshKeyReply = await stub.GetSshKey(SshKeyRequest())
468 return reply.message
469 finally:
470 channel.close()
471
472 @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
473 async def _execute_config_primitive(self, ip_addr, params, db_dict=None):
474 return await self._execute_primitive_internal(ip_addr, "config", params, db_dict=db_dict)
475
476 @retryer(max_wait_time=_MAX_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
477 async def _execute_primitive(self, ip_addr, primitive_name, params, db_dict=None):
478 return await self._execute_primitive_internal(ip_addr, primitive_name, params, db_dict=db_dict)
479
480 async def _execute_primitive_internal(self, ip_addr, primitive_name, params, db_dict=None):
481
482 channel = Channel(ip_addr, self._ee_service_port)
483 try:
484 stub = FrontendExecutorStub(channel)
485 async with stub.RunPrimitive.open() as stream:
486 primitive_id = str(uuid.uuid1())
487 result = None
488 self.log.debug("Execute primitive internal: id:{}, name:{}, params: {}".
489 format(primitive_id, primitive_name, params))
490 await stream.send_message(
491 PrimitiveRequest(id=primitive_id, name=primitive_name, params=yaml.dump(params)), end=True)
492 async for reply in stream:
493 self.log.debug("Received reply: {}".format(reply))
494 result = reply
495 # If db_dict provided write notifs in database
496 if db_dict:
497 self._write_op_detailed_status(db_dict, reply.status, reply.detailed_message)
498 if result:
499 return reply.status, reply.detailed_message
500 else:
501 return "ERROR", "No result received"
502 finally:
503 channel.close()
504
505 def _write_op_detailed_status(self, db_dict, status, detailed_message):
506
507 # write ee_id to database: _admin.deployed.VCA.x
508 try:
509 the_table = db_dict["collection"]
510 the_filter = db_dict["filter"]
511 update_dict = {"detailed-status": "{}: {}".format(status, detailed_message)}
512 # self.log.debug('Writing ee_id to database: {}'.format(the_path))
513 self.db.set_one(
514 table=the_table,
515 q_filter=the_filter,
516 update_dict=update_dict,
517 fail_on_empty=True,
518 )
519 except asyncio.CancelledError:
520 raise
521 except Exception as e:
522 self.log.error("Error writing detailedStatus to database: {}".format(e))
523
524 async def _get_system_cluster_id(self):
525 if not self._system_cluster_id:
526 db_k8cluster = self.db.get_one("k8sclusters", {"name": self._KUBECTL_OSM_CLUSTER_NAME})
527 k8s_hc_id = deep_get(db_k8cluster, ("_admin", "helm-chart-v3", "id"))
528 if not k8s_hc_id:
529 try:
530 # backward compatibility for existing clusters that have not been initialized for helm v3
531 cluster_id = db_k8cluster.get("_id")
532 k8s_credentials = yaml.safe_dump(db_k8cluster.get("credentials"))
533 k8s_hc_id, uninstall_sw = await self._k8sclusterhelm3.init_env(k8s_credentials,
534 reuse_cluster_uuid=cluster_id)
535 db_k8scluster_update = {"_admin.helm-chart-v3.error_msg": None,
536 "_admin.helm-chart-v3.id": k8s_hc_id,
537 "_admin.helm-chart-v3}.created": uninstall_sw,
538 "_admin.helm-chart-v3.operationalState": "ENABLED"}
539 self.update_db_2("k8sclusters", cluster_id, db_k8scluster_update)
540 except Exception as e:
541 self.log.error("error initializing helm-v3 cluster: {}".format(str(e)))
542 raise N2VCException("K8s system cluster '{}' has not been initialized for helm-chart-v3".format(
543 cluster_id))
544 self._system_cluster_id = k8s_hc_id
545 return self._system_cluster_id
546
547 def _get_ee_id_parts(self, ee_id):
548 """
549 Parses ee_id stored at database that can be either 'version:namespace.helm_id' or only
550 namespace.helm_id for backward compatibility
551 If exists helm version can be helm-v3 or helm (helm-v2 old version)
552 """
553 version, _, part_id = ee_id.rpartition(':')
554 namespace, _, helm_id = part_id.rpartition('.')
555 return version, namespace, helm_id