Reformat LCM to standardized format
[osm/LCM.git] / osm_lcm / lcm_helm_conn.py
1 ##
2 # Copyright 2020 Telefonica Investigacion y Desarrollo, S.A.U.
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 # http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13 # implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #
17 ##
18 import functools
19 import yaml
20 import asyncio
21 import socket
22 import uuid
23 import os
24
25 from grpclib.client import Channel
26
27 from osm_lcm.frontend_pb2 import PrimitiveRequest
28 from osm_lcm.frontend_pb2 import SshKeyRequest, SshKeyReply
29 from osm_lcm.frontend_grpc import FrontendExecutorStub
30 from osm_lcm.lcm_utils import LcmBase
31
32 from osm_lcm.data_utils.database.database import Database
33 from osm_lcm.data_utils.filesystem.filesystem import Filesystem
34
35 from n2vc.n2vc_conn import N2VCConnector
36 from n2vc.k8s_helm_conn import K8sHelmConnector
37 from n2vc.k8s_helm3_conn import K8sHelm3Connector
38 from n2vc.exceptions import (
39 N2VCBadArgumentsException,
40 N2VCException,
41 N2VCExecutionException,
42 )
43
44 from osm_lcm.lcm_utils import deep_get
45
46
47 def retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_delay"):
48 def wrapper(func):
49 retry_exceptions = ConnectionRefusedError
50
51 @functools.wraps(func)
52 async def wrapped(*args, **kwargs):
53 # default values for wait time and delay_time
54 delay_time = 10
55 max_wait_time = 300
56
57 # obtain arguments from variable names
58 self = args[0]
59 if self.__dict__.get(max_wait_time_var):
60 max_wait_time = self.__dict__.get(max_wait_time_var)
61 if self.__dict__.get(delay_time_var):
62 delay_time = self.__dict__.get(delay_time_var)
63
64 wait_time = max_wait_time
65 while wait_time > 0:
66 try:
67 return await func(*args, **kwargs)
68 except retry_exceptions:
69 wait_time = wait_time - delay_time
70 await asyncio.sleep(delay_time)
71 continue
72 else:
73 return ConnectionRefusedError
74
75 return wrapped
76
77 return wrapper
78
79
80 class LCMHelmConn(N2VCConnector, LcmBase):
81 _KUBECTL_OSM_NAMESPACE = "osm"
82 _KUBECTL_OSM_CLUSTER_NAME = "_system-osm-k8s"
83 _EE_SERVICE_PORT = 50050
84
85 # Initial max retry time
86 _MAX_INITIAL_RETRY_TIME = 600
87 # Max retry time for normal operations
88 _MAX_RETRY_TIME = 30
89 # Time beetween retries, retry time after a connection error is raised
90 _EE_RETRY_DELAY = 10
91
92 def __init__(
93 self,
94 log: object = None,
95 loop: object = None,
96 vca_config: dict = None,
97 on_update_db=None,
98 ):
99 """
100 Initialize EE helm connector.
101 """
102
103 self.db = Database().instance.db
104 self.fs = Filesystem().instance.fs
105
106 # parent class constructor
107 N2VCConnector.__init__(
108 self, log=log, loop=loop, on_update_db=on_update_db, db=self.db, fs=self.fs
109 )
110
111 self.vca_config = vca_config
112 self.log.debug("Initialize helm N2VC connector")
113 self.log.debug("initial vca_config: {}".format(vca_config))
114
115 # TODO - Obtain data from configuration
116 self._ee_service_port = self._EE_SERVICE_PORT
117
118 self._retry_delay = self._EE_RETRY_DELAY
119
120 if self.vca_config and self.vca_config.get("eegrpcinittimeout"):
121 self._initial_retry_time = self.vca_config.get("eegrpcinittimeout")
122 self.log.debug("Initial retry time: {}".format(self._initial_retry_time))
123 else:
124 self._initial_retry_time = self._MAX_INITIAL_RETRY_TIME
125 self.log.debug(
126 "Applied default retry time: {}".format(self._initial_retry_time)
127 )
128
129 if self.vca_config and self.vca_config.get("eegrpctimeout"):
130 self._max_retry_time = self.vca_config.get("eegrpctimeout")
131 self.log.debug("Retry time: {}".format(self._max_retry_time))
132 else:
133 self._max_retry_time = self._MAX_RETRY_TIME
134 self.log.debug(
135 "Applied default retry time: {}".format(self._max_retry_time)
136 )
137
138 # initialize helm connector for helmv2 and helmv3
139 self._k8sclusterhelm2 = K8sHelmConnector(
140 kubectl_command=self.vca_config.get("kubectlpath"),
141 helm_command=self.vca_config.get("helmpath"),
142 fs=self.fs,
143 db=self.db,
144 log=self.log,
145 on_update_db=None,
146 )
147
148 self._k8sclusterhelm3 = K8sHelm3Connector(
149 kubectl_command=self.vca_config.get("kubectlpath"),
150 helm_command=self.vca_config.get("helm3path"),
151 fs=self.fs,
152 log=self.log,
153 db=self.db,
154 on_update_db=None,
155 )
156
157 self._system_cluster_id = None
158 self.log.info("Helm N2VC connector initialized")
159
160 # TODO - ¿reuse_ee_id?
161 async def create_execution_environment(
162 self,
163 namespace: str,
164 db_dict: dict,
165 reuse_ee_id: str = None,
166 progress_timeout: float = None,
167 total_timeout: float = None,
168 config: dict = None,
169 artifact_path: str = None,
170 vca_type: str = None,
171 *kargs,
172 **kwargs,
173 ) -> (str, dict):
174 """
175 Creates a new helm execution environment deploying the helm-chat indicated in the
176 attifact_path
177 :param str namespace: This param is not used, all helm charts are deployed in the osm
178 system namespace
179 :param dict db_dict: where to write to database when the status changes.
180 It contains a dictionary with {collection: str, filter: {}, path: str},
181 e.g. {collection: "nsrs", filter: {_id: <nsd-id>, path:
182 "_admin.deployed.VCA.3"}
183 :param str reuse_ee_id: ee id from an older execution. TODO - right now this params is not used
184 :param float progress_timeout:
185 :param float total_timeout:
186 :param dict config: General variables to instantiate KDU
187 :param str artifact_path: path of package content
188 :param str vca_type: Type of vca, must be type helm or helm-v3
189 :returns str, dict: id of the new execution environment including namespace.helm_id
190 and credentials object set to None as all credentials should be osm kubernetes .kubeconfig
191 """
192
193 self.log.info(
194 "create_execution_environment: namespace: {}, artifact_path: {}, db_dict: {}, "
195 "reuse_ee_id: {}".format(namespace, artifact_path, db_dict, reuse_ee_id)
196 )
197
198 # Validate artifact-path is provided
199 if artifact_path is None or len(artifact_path) == 0:
200 raise N2VCBadArgumentsException(
201 message="artifact_path is mandatory", bad_args=["artifact_path"]
202 )
203
204 # Validate artifact-path exists and sync path
205 from_path = os.path.split(artifact_path)[0]
206 self.fs.sync(from_path)
207
208 # remove / in charm path
209 while artifact_path.find("//") >= 0:
210 artifact_path = artifact_path.replace("//", "/")
211
212 # check charm path
213 if self.fs.file_exists(artifact_path):
214 helm_chart_path = artifact_path
215 else:
216 msg = "artifact path does not exist: {}".format(artifact_path)
217 raise N2VCBadArgumentsException(message=msg, bad_args=["artifact_path"])
218
219 if artifact_path.startswith("/"):
220 full_path = self.fs.path + helm_chart_path
221 else:
222 full_path = self.fs.path + "/" + helm_chart_path
223
224 while full_path.find("//") >= 0:
225 full_path = full_path.replace("//", "/")
226
227 try:
228 # Call helm conn install
229 # Obtain system cluster id from database
230 system_cluster_uuid = await self._get_system_cluster_id()
231 # Add parameter osm if exist to global
232 if config and config.get("osm"):
233 if not config.get("global"):
234 config["global"] = {}
235 config["global"]["osm"] = config.get("osm")
236
237 self.log.debug("install helm chart: {}".format(full_path))
238 if vca_type == "helm":
239 helm_id = self._k8sclusterhelm2.generate_kdu_instance_name(
240 db_dict=db_dict,
241 kdu_model=full_path,
242 )
243 await self._k8sclusterhelm2.install(
244 system_cluster_uuid,
245 kdu_model=full_path,
246 kdu_instance=helm_id,
247 namespace=self._KUBECTL_OSM_NAMESPACE,
248 params=config,
249 db_dict=db_dict,
250 timeout=progress_timeout,
251 )
252 else:
253 helm_id = self._k8sclusterhelm2.generate_kdu_instance_name(
254 db_dict=db_dict,
255 kdu_model=full_path,
256 )
257 await self._k8sclusterhelm3.install(
258 system_cluster_uuid,
259 kdu_model=full_path,
260 kdu_instance=helm_id,
261 namespace=self._KUBECTL_OSM_NAMESPACE,
262 params=config,
263 db_dict=db_dict,
264 timeout=progress_timeout,
265 )
266
267 ee_id = "{}:{}.{}".format(vca_type, self._KUBECTL_OSM_NAMESPACE, helm_id)
268 return ee_id, None
269 except N2VCException:
270 raise
271 except Exception as e:
272 self.log.error("Error deploying chart ee: {}".format(e), exc_info=True)
273 raise N2VCException("Error deploying chart ee: {}".format(e))
274
275 async def register_execution_environment(
276 self,
277 namespace: str,
278 credentials: dict,
279 db_dict: dict,
280 progress_timeout: float = None,
281 total_timeout: float = None,
282 *kargs,
283 **kwargs,
284 ) -> str:
285 # nothing to do
286 pass
287
288 async def install_configuration_sw(self, *args, **kwargs):
289 # nothing to do
290 pass
291
292 async def add_relation(self, *args, **kwargs):
293 # nothing to do
294 pass
295
296 async def remove_relation(self):
297 # nothing to to
298 pass
299
300 async def get_status(self, *args, **kwargs):
301 # not used for this connector
302 pass
303
304 async def get_ee_ssh_public__key(
305 self,
306 ee_id: str,
307 db_dict: dict,
308 progress_timeout: float = None,
309 total_timeout: float = None,
310 **kwargs,
311 ) -> str:
312 """
313 Obtains ssh-public key from ee executing GetSShKey method from the ee.
314
315 :param str ee_id: the id of the execution environment returned by
316 create_execution_environment or register_execution_environment
317 :param dict db_dict:
318 :param float progress_timeout:
319 :param float total_timeout:
320 :returns: public key of the execution environment
321 """
322
323 self.log.info(
324 "get_ee_ssh_public_key: ee_id: {}, db_dict: {}".format(ee_id, db_dict)
325 )
326
327 # check arguments
328 if ee_id is None or len(ee_id) == 0:
329 raise N2VCBadArgumentsException(
330 message="ee_id is mandatory", bad_args=["ee_id"]
331 )
332
333 try:
334 # Obtain ip_addr for the ee service, it is resolved by dns from the ee name by kubernetes
335 version, namespace, helm_id = self._get_ee_id_parts(ee_id)
336 ip_addr = socket.gethostbyname(helm_id)
337
338 # Obtain ssh_key from the ee, this method will implement retries to allow the ee
339 # install libraries and start successfully
340 ssh_key = await self._get_ssh_key(ip_addr)
341 return ssh_key
342 except Exception as e:
343 self.log.error("Error obtaining ee ssh_key: {}".format(e), exc_info=True)
344 raise N2VCException("Error obtaining ee ssh_ke: {}".format(e))
345
346 async def exec_primitive(
347 self,
348 ee_id: str,
349 primitive_name: str,
350 params_dict: dict,
351 db_dict: dict = None,
352 progress_timeout: float = None,
353 total_timeout: float = None,
354 **kwargs,
355 ) -> str:
356 """
357 Execute a primitive in the execution environment
358
359 :param str ee_id: the one returned by create_execution_environment or
360 register_execution_environment with the format namespace.helm_id
361 :param str primitive_name: must be one defined in the software. There is one
362 called 'config', where, for the proxy case, the 'credentials' of VM are
363 provided
364 :param dict params_dict: parameters of the action
365 :param dict db_dict: where to write into database when the status changes.
366 It contains a dict with
367 {collection: <str>, filter: {}, path: <str>},
368 e.g. {collection: "nslcmops", filter:
369 {_id: <nslcmop_id>, path: "_admin.VCA"}
370 It will be used to store information about intermediate notifications
371 :param float progress_timeout:
372 :param float total_timeout:
373 :returns str: primitive result, if ok. It raises exceptions in case of fail
374 """
375
376 self.log.info(
377 "exec primitive for ee_id : {}, primitive_name: {}, params_dict: {}, db_dict: {}".format(
378 ee_id, primitive_name, params_dict, db_dict
379 )
380 )
381
382 # check arguments
383 if ee_id is None or len(ee_id) == 0:
384 raise N2VCBadArgumentsException(
385 message="ee_id is mandatory", bad_args=["ee_id"]
386 )
387 if primitive_name is None or len(primitive_name) == 0:
388 raise N2VCBadArgumentsException(
389 message="action_name is mandatory", bad_args=["action_name"]
390 )
391 if params_dict is None:
392 params_dict = dict()
393
394 try:
395 version, namespace, helm_id = self._get_ee_id_parts(ee_id)
396 ip_addr = socket.gethostbyname(helm_id)
397 except Exception as e:
398 self.log.error("Error getting ee ip ee: {}".format(e))
399 raise N2VCException("Error getting ee ip ee: {}".format(e))
400
401 if primitive_name == "config":
402 try:
403 # Execute config primitive, higher timeout to check the case ee is starting
404 status, detailed_message = await self._execute_config_primitive(
405 ip_addr, params_dict, db_dict=db_dict
406 )
407 self.log.debug(
408 "Executed config primitive ee_id_ {}, status: {}, message: {}".format(
409 ee_id, status, detailed_message
410 )
411 )
412 if status != "OK":
413 self.log.error(
414 "Error configuring helm ee, status: {}, message: {}".format(
415 status, detailed_message
416 )
417 )
418 raise N2VCExecutionException(
419 message="Error configuring helm ee_id: {}, status: {}, message: {}: ".format(
420 ee_id, status, detailed_message
421 ),
422 primitive_name=primitive_name,
423 )
424 except Exception as e:
425 self.log.error("Error configuring helm ee: {}".format(e))
426 raise N2VCExecutionException(
427 message="Error configuring helm ee_id: {}, {}".format(ee_id, e),
428 primitive_name=primitive_name,
429 )
430 return "CONFIG OK"
431 else:
432 try:
433 # Execute primitive
434 status, detailed_message = await self._execute_primitive(
435 ip_addr, primitive_name, params_dict, db_dict=db_dict
436 )
437 self.log.debug(
438 "Executed primitive {} ee_id_ {}, status: {}, message: {}".format(
439 primitive_name, ee_id, status, detailed_message
440 )
441 )
442 if status != "OK" and status != "PROCESSING":
443 self.log.error(
444 "Execute primitive {} returned not ok status: {}, message: {}".format(
445 primitive_name, status, detailed_message
446 )
447 )
448 raise N2VCExecutionException(
449 message="Execute primitive {} returned not ok status: {}, message: {}".format(
450 primitive_name, status, detailed_message
451 ),
452 primitive_name=primitive_name,
453 )
454 except Exception as e:
455 self.log.error(
456 "Error executing primitive {}: {}".format(primitive_name, e)
457 )
458 raise N2VCExecutionException(
459 message="Error executing primitive {} into ee={} : {}".format(
460 primitive_name, ee_id, e
461 ),
462 primitive_name=primitive_name,
463 )
464 return detailed_message
465
466 async def deregister_execution_environments(self):
467 # nothing to be done
468 pass
469
470 async def delete_execution_environment(
471 self,
472 ee_id: str,
473 db_dict: dict = None,
474 total_timeout: float = None,
475 **kwargs,
476 ):
477 """
478 Delete an execution environment
479 :param str ee_id: id of the execution environment to delete, included namespace.helm_id
480 :param dict db_dict: where to write into database when the status changes.
481 It contains a dict with
482 {collection: <str>, filter: {}, path: <str>},
483 e.g. {collection: "nsrs", filter:
484 {_id: <nsd-id>, path: "_admin.deployed.VCA.3"}
485 :param float total_timeout:
486 """
487
488 self.log.info("ee_id: {}".format(ee_id))
489
490 # check arguments
491 if ee_id is None:
492 raise N2VCBadArgumentsException(
493 message="ee_id is mandatory", bad_args=["ee_id"]
494 )
495
496 try:
497
498 # Obtain cluster_uuid
499 system_cluster_uuid = await self._get_system_cluster_id()
500
501 # Get helm_id
502 version, namespace, helm_id = self._get_ee_id_parts(ee_id)
503
504 # Uninstall chart, for backward compatibility we must assume that if there is no
505 # version it is helm-v2
506 if version == "helm-v3":
507 await self._k8sclusterhelm3.uninstall(system_cluster_uuid, helm_id)
508 else:
509 await self._k8sclusterhelm2.uninstall(system_cluster_uuid, helm_id)
510 self.log.info("ee_id: {} deleted".format(ee_id))
511 except N2VCException:
512 raise
513 except Exception as e:
514 self.log.error(
515 "Error deleting ee id: {}: {}".format(ee_id, e), exc_info=True
516 )
517 raise N2VCException("Error deleting ee id {}: {}".format(ee_id, e))
518
519 async def delete_namespace(
520 self, namespace: str, db_dict: dict = None, total_timeout: float = None
521 ):
522 # method not implemented for this connector, execution environments must be deleted individually
523 pass
524
525 async def install_k8s_proxy_charm(
526 self,
527 charm_name: str,
528 namespace: str,
529 artifact_path: str,
530 db_dict: dict,
531 progress_timeout: float = None,
532 total_timeout: float = None,
533 config: dict = None,
534 *kargs,
535 **kwargs,
536 ) -> str:
537 pass
538
539 @retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_delay")
540 async def _get_ssh_key(self, ip_addr):
541 channel = Channel(ip_addr, self._ee_service_port)
542 try:
543 stub = FrontendExecutorStub(channel)
544 self.log.debug("get ssh key, ip_addr: {}".format(ip_addr))
545 reply: SshKeyReply = await stub.GetSshKey(SshKeyRequest())
546 return reply.message
547 finally:
548 channel.close()
549
550 @retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_delay")
551 async def _execute_config_primitive(self, ip_addr, params, db_dict=None):
552 return await self._execute_primitive_internal(
553 ip_addr, "config", params, db_dict=db_dict
554 )
555
556 @retryer(max_wait_time_var="_max_retry_time", delay_time_var="_retry_delay")
557 async def _execute_primitive(self, ip_addr, primitive_name, params, db_dict=None):
558 return await self._execute_primitive_internal(
559 ip_addr, primitive_name, params, db_dict=db_dict
560 )
561
562 async def _execute_primitive_internal(
563 self, ip_addr, primitive_name, params, db_dict=None
564 ):
565
566 channel = Channel(ip_addr, self._ee_service_port)
567 try:
568 stub = FrontendExecutorStub(channel)
569 async with stub.RunPrimitive.open() as stream:
570 primitive_id = str(uuid.uuid1())
571 result = None
572 self.log.debug(
573 "Execute primitive internal: id:{}, name:{}, params: {}".format(
574 primitive_id, primitive_name, params
575 )
576 )
577 await stream.send_message(
578 PrimitiveRequest(
579 id=primitive_id, name=primitive_name, params=yaml.dump(params)
580 ),
581 end=True,
582 )
583 async for reply in stream:
584 self.log.debug("Received reply: {}".format(reply))
585 result = reply
586 # If db_dict provided write notifs in database
587 if db_dict:
588 self._write_op_detailed_status(
589 db_dict, reply.status, reply.detailed_message
590 )
591 if result:
592 return reply.status, reply.detailed_message
593 else:
594 return "ERROR", "No result received"
595 finally:
596 channel.close()
597
598 def _write_op_detailed_status(self, db_dict, status, detailed_message):
599
600 # write ee_id to database: _admin.deployed.VCA.x
601 try:
602 the_table = db_dict["collection"]
603 the_filter = db_dict["filter"]
604 update_dict = {"detailed-status": "{}: {}".format(status, detailed_message)}
605 # self.log.debug('Writing ee_id to database: {}'.format(the_path))
606 self.db.set_one(
607 table=the_table,
608 q_filter=the_filter,
609 update_dict=update_dict,
610 fail_on_empty=True,
611 )
612 except asyncio.CancelledError:
613 raise
614 except Exception as e:
615 self.log.error("Error writing detailedStatus to database: {}".format(e))
616
617 async def _get_system_cluster_id(self):
618 if not self._system_cluster_id:
619 db_k8cluster = self.db.get_one(
620 "k8sclusters", {"name": self._KUBECTL_OSM_CLUSTER_NAME}
621 )
622 k8s_hc_id = deep_get(db_k8cluster, ("_admin", "helm-chart-v3", "id"))
623 if not k8s_hc_id:
624 try:
625 # backward compatibility for existing clusters that have not been initialized for helm v3
626 cluster_id = db_k8cluster.get("_id")
627 k8s_credentials = yaml.safe_dump(db_k8cluster.get("credentials"))
628 k8s_hc_id, uninstall_sw = await self._k8sclusterhelm3.init_env(
629 k8s_credentials, reuse_cluster_uuid=cluster_id
630 )
631 db_k8scluster_update = {
632 "_admin.helm-chart-v3.error_msg": None,
633 "_admin.helm-chart-v3.id": k8s_hc_id,
634 "_admin.helm-chart-v3}.created": uninstall_sw,
635 "_admin.helm-chart-v3.operationalState": "ENABLED",
636 }
637 self.update_db_2("k8sclusters", cluster_id, db_k8scluster_update)
638 except Exception as e:
639 self.log.error(
640 "error initializing helm-v3 cluster: {}".format(str(e))
641 )
642 raise N2VCException(
643 "K8s system cluster '{}' has not been initialized for helm-chart-v3".format(
644 cluster_id
645 )
646 )
647 self._system_cluster_id = k8s_hc_id
648 return self._system_cluster_id
649
650 def _get_ee_id_parts(self, ee_id):
651 """
652 Parses ee_id stored at database that can be either 'version:namespace.helm_id' or only
653 namespace.helm_id for backward compatibility
654 If exists helm version can be helm-v3 or helm (helm-v2 old version)
655 """
656 version, _, part_id = ee_id.rpartition(":")
657 namespace, _, helm_id = part_id.rpartition(".")
658 return version, namespace, helm_id