978b061eca02dcc32c1c46bed55311ce62b8f36b
[osm/LCM.git] / osm_lcm / lcm_helm_conn.py
1 ##
2 # Copyright 2020 Telefonica Investigacion y Desarrollo, S.A.U.
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 # http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13 # implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #
17 ##
18 import functools
19 import yaml
20 import asyncio
21 import socket
22 import uuid
23 import os
24
25 from grpclib.client import Channel
26
27 from osm_lcm.frontend_pb2 import PrimitiveRequest
28 from osm_lcm.frontend_pb2 import SshKeyRequest, SshKeyReply
29 from osm_lcm.frontend_grpc import FrontendExecutorStub
30 from osm_lcm.lcm_utils import LcmBase
31
32 from osm_lcm.data_utils.database.database import Database
33 from osm_lcm.data_utils.filesystem.filesystem import Filesystem
34
35 from n2vc.n2vc_conn import N2VCConnector
36 from n2vc.k8s_helm_conn import K8sHelmConnector
37 from n2vc.k8s_helm3_conn import K8sHelm3Connector
38 from n2vc.exceptions import N2VCBadArgumentsException, N2VCException, N2VCExecutionException
39
40 from osm_lcm.lcm_utils import deep_get
41
42
43 def retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_delay"):
44 def wrapper(func):
45 retry_exceptions = (
46 ConnectionRefusedError
47 )
48
49 @functools.wraps(func)
50 async def wrapped(*args, **kwargs):
51 # default values for wait time and delay_time
52 delay_time = 10
53 max_wait_time = 300
54
55 # obtain arguments from variable names
56 self = args[0]
57 if self.__dict__.get(max_wait_time_var):
58 max_wait_time = self.__dict__.get(max_wait_time_var)
59 if self.__dict__.get(delay_time_var):
60 delay_time = self.__dict__.get(delay_time_var)
61
62 wait_time = max_wait_time
63 while wait_time > 0:
64 try:
65 return await func(*args, **kwargs)
66 except retry_exceptions:
67 wait_time = wait_time - delay_time
68 await asyncio.sleep(delay_time)
69 continue
70 else:
71 return ConnectionRefusedError
72 return wrapped
73 return wrapper
74
75
76 class LCMHelmConn(N2VCConnector, LcmBase):
77 _KUBECTL_OSM_NAMESPACE = "osm"
78 _KUBECTL_OSM_CLUSTER_NAME = "_system-osm-k8s"
79 _EE_SERVICE_PORT = 50050
80
81 # Initial max retry time
82 _MAX_INITIAL_RETRY_TIME = 600
83 # Max retry time for normal operations
84 _MAX_RETRY_TIME = 30
85 # Time beetween retries, retry time after a connection error is raised
86 _EE_RETRY_DELAY = 10
87
88 def __init__(self,
89 log: object = None,
90 loop: object = None,
91 url: str = None,
92 username: str = None,
93 vca_config: dict = None,
94 on_update_db=None, ):
95 """
96 Initialize EE helm connector.
97 """
98
99 self.db = Database().instance.db
100 self.fs = Filesystem().instance.fs
101
102 # parent class constructor
103 N2VCConnector.__init__(
104 self,
105 log=log,
106 loop=loop,
107 url=url,
108 username=username,
109 vca_config=vca_config,
110 on_update_db=on_update_db,
111 db=self.db,
112 fs=self.fs
113 )
114
115 self.log.debug("Initialize helm N2VC connector")
116 self.log.debug("initial vca_config: {}".format(vca_config))
117
118 # TODO - Obtain data from configuration
119 self._ee_service_port = self._EE_SERVICE_PORT
120
121 self._retry_delay = self._EE_RETRY_DELAY
122
123 if self.vca_config and self.vca_config.get("eegrpcinittimeout"):
124 self._initial_retry_time = self.vca_config.get("eegrpcinittimeout")
125 self.log.debug("Initial retry time: {}".format(self._initial_retry_time))
126 else:
127 self._initial_retry_time = self._MAX_INITIAL_RETRY_TIME
128 self.log.debug("Applied default retry time: {}".format(self._initial_retry_time))
129
130 if self.vca_config and self.vca_config.get("eegrpctimeout"):
131 self._max_retry_time = self.vca_config.get("eegrpctimeout")
132 self.log.debug("Retry time: {}".format(self._max_retry_time))
133 else:
134 self._max_retry_time = self._MAX_RETRY_TIME
135 self.log.debug("Applied default retry time: {}".format(self._max_retry_time))
136
137 # initialize helm connector for helmv2 and helmv3
138 self._k8sclusterhelm2 = K8sHelmConnector(
139 kubectl_command=self.vca_config.get("kubectlpath"),
140 helm_command=self.vca_config.get("helmpath"),
141 fs=self.fs,
142 db=self.db,
143 log=self.log,
144 on_update_db=None,
145 )
146
147 self._k8sclusterhelm3 = K8sHelm3Connector(
148 kubectl_command=self.vca_config.get("kubectlpath"),
149 helm_command=self.vca_config.get("helm3path"),
150 fs=self.fs,
151 log=self.log,
152 db=self.db,
153 on_update_db=None,
154 )
155
156 self._system_cluster_id = None
157 self.log.info("Helm N2VC connector initialized")
158
159 # TODO - ¿reuse_ee_id?
160 async def create_execution_environment(self,
161 namespace: str,
162 db_dict: dict,
163 reuse_ee_id: str = None,
164 progress_timeout: float = None,
165 total_timeout: float = None,
166 config: dict = None,
167 artifact_path: str = None,
168 vca_type: str = None,
169 *kargs, **kwargs) -> (str, dict):
170 """
171 Creates a new helm execution environment deploying the helm-chat indicated in the
172 attifact_path
173 :param str namespace: This param is not used, all helm charts are deployed in the osm
174 system namespace
175 :param dict db_dict: where to write to database when the status changes.
176 It contains a dictionary with {collection: str, filter: {}, path: str},
177 e.g. {collection: "nsrs", filter: {_id: <nsd-id>, path:
178 "_admin.deployed.VCA.3"}
179 :param str reuse_ee_id: ee id from an older execution. TODO - right now this params is not used
180 :param float progress_timeout:
181 :param float total_timeout:
182 :param dict config: General variables to instantiate KDU
183 :param str artifact_path: path of package content
184 :param str vca_type: Type of vca, must be type helm or helm-v3
185 :returns str, dict: id of the new execution environment including namespace.helm_id
186 and credentials object set to None as all credentials should be osm kubernetes .kubeconfig
187 """
188
189 self.log.info(
190 "create_execution_environment: namespace: {}, artifact_path: {}, db_dict: {}, "
191 "reuse_ee_id: {}".format(
192 namespace, artifact_path, db_dict, reuse_ee_id)
193 )
194
195 # Validate artifact-path is provided
196 if artifact_path is None or len(artifact_path) == 0:
197 raise N2VCBadArgumentsException(
198 message="artifact_path is mandatory", bad_args=["artifact_path"]
199 )
200
201 # Validate artifact-path exists and sync path
202 from_path = os.path.split(artifact_path)[0]
203 self.fs.sync(from_path)
204
205 # remove / in charm path
206 while artifact_path.find("//") >= 0:
207 artifact_path = artifact_path.replace("//", "/")
208
209 # check charm path
210 if self.fs.file_exists(artifact_path):
211 helm_chart_path = artifact_path
212 else:
213 msg = "artifact path does not exist: {}".format(artifact_path)
214 raise N2VCBadArgumentsException(message=msg, bad_args=["artifact_path"])
215
216 if artifact_path.startswith("/"):
217 full_path = self.fs.path + helm_chart_path
218 else:
219 full_path = self.fs.path + "/" + helm_chart_path
220
221 while full_path.find("//") >= 0:
222 full_path = full_path.replace("//", "/")
223
224 try:
225 # Call helm conn install
226 # Obtain system cluster id from database
227 system_cluster_uuid = await self._get_system_cluster_id()
228 # Add parameter osm if exist to global
229 if config and config.get("osm"):
230 if not config.get("global"):
231 config["global"] = {}
232 config["global"]["osm"] = config.get("osm")
233
234 self.log.debug("install helm chart: {}".format(full_path))
235 if vca_type == "helm":
236 helm_id = self._k8sclusterhelm2.generate_kdu_instance_name(
237 db_dict=db_dict,
238 kdu_model=full_path,
239 )
240 await self._k8sclusterhelm2.install(system_cluster_uuid, kdu_model=full_path,
241 kdu_instance=helm_id,
242 namespace=self._KUBECTL_OSM_NAMESPACE,
243 params=config,
244 db_dict=db_dict,
245 timeout=progress_timeout)
246 else:
247 helm_id = self._k8sclusterhelm2.generate_kdu_instance_name(
248 db_dict=db_dict,
249 kdu_model=full_path,
250 )
251 await self._k8sclusterhelm3.install(system_cluster_uuid, kdu_model=full_path,
252 kdu_instance=helm_id,
253 namespace=self._KUBECTL_OSM_NAMESPACE,
254 params=config,
255 db_dict=db_dict,
256 timeout=progress_timeout)
257
258 ee_id = "{}:{}.{}".format(vca_type, self._KUBECTL_OSM_NAMESPACE, helm_id)
259 return ee_id, None
260 except N2VCException:
261 raise
262 except Exception as e:
263 self.log.error("Error deploying chart ee: {}".format(e), exc_info=True)
264 raise N2VCException("Error deploying chart ee: {}".format(e))
265
266 async def register_execution_environment(self, namespace: str, credentials: dict, db_dict: dict,
267 progress_timeout: float = None, total_timeout: float = None,
268 *kargs, **kwargs) -> str:
269 # nothing to do
270 pass
271
272 async def install_configuration_sw(self,
273 ee_id: str,
274 artifact_path: str,
275 db_dict: dict,
276 progress_timeout: float = None,
277 total_timeout: float = None,
278 config: dict = None,
279 num_units: int = 1,
280 vca_type: str = None
281 ):
282 # nothing to do
283 pass
284
285 async def add_relation(self, ee_id_1: str, ee_id_2: str, endpoint_1: str, endpoint_2: str):
286 # nothing to do
287 pass
288
289 async def remove_relation(self):
290 # nothing to to
291 pass
292
293 async def get_status(self, namespace: str, yaml_format: bool = True):
294 # not used for this connector
295 pass
296
297 async def get_ee_ssh_public__key(self, ee_id: str, db_dict: dict, progress_timeout: float = None,
298 total_timeout: float = None) -> str:
299 """
300 Obtains ssh-public key from ee executing GetSShKey method from the ee.
301
302 :param str ee_id: the id of the execution environment returned by
303 create_execution_environment or register_execution_environment
304 :param dict db_dict:
305 :param float progress_timeout:
306 :param float total_timeout:
307 :returns: public key of the execution environment
308 """
309
310 self.log.info(
311 "get_ee_ssh_public_key: ee_id: {}, db_dict: {}".format(
312 ee_id, db_dict)
313 )
314
315 # check arguments
316 if ee_id is None or len(ee_id) == 0:
317 raise N2VCBadArgumentsException(
318 message="ee_id is mandatory", bad_args=["ee_id"]
319 )
320
321 try:
322 # Obtain ip_addr for the ee service, it is resolved by dns from the ee name by kubernetes
323 version, namespace, helm_id = self._get_ee_id_parts(ee_id)
324 ip_addr = socket.gethostbyname(helm_id)
325
326 # Obtain ssh_key from the ee, this method will implement retries to allow the ee
327 # install libraries and start successfully
328 ssh_key = await self._get_ssh_key(ip_addr)
329 return ssh_key
330 except Exception as e:
331 self.log.error("Error obtaining ee ssh_key: {}".format(e), exc_info=True)
332 raise N2VCException("Error obtaining ee ssh_ke: {}".format(e))
333
334 async def exec_primitive(self, ee_id: str, primitive_name: str, params_dict: dict, db_dict: dict = None,
335 progress_timeout: float = None, total_timeout: float = None) -> str:
336 """
337 Execute a primitive in the execution environment
338
339 :param str ee_id: the one returned by create_execution_environment or
340 register_execution_environment with the format namespace.helm_id
341 :param str primitive_name: must be one defined in the software. There is one
342 called 'config', where, for the proxy case, the 'credentials' of VM are
343 provided
344 :param dict params_dict: parameters of the action
345 :param dict db_dict: where to write into database when the status changes.
346 It contains a dict with
347 {collection: <str>, filter: {}, path: <str>},
348 e.g. {collection: "nslcmops", filter:
349 {_id: <nslcmop_id>, path: "_admin.VCA"}
350 It will be used to store information about intermediate notifications
351 :param float progress_timeout:
352 :param float total_timeout:
353 :returns str: primitive result, if ok. It raises exceptions in case of fail
354 """
355
356 self.log.info("exec primitive for ee_id : {}, primitive_name: {}, params_dict: {}, db_dict: {}".format(
357 ee_id, primitive_name, params_dict, db_dict
358 ))
359
360 # check arguments
361 if ee_id is None or len(ee_id) == 0:
362 raise N2VCBadArgumentsException(
363 message="ee_id is mandatory", bad_args=["ee_id"]
364 )
365 if primitive_name is None or len(primitive_name) == 0:
366 raise N2VCBadArgumentsException(
367 message="action_name is mandatory", bad_args=["action_name"]
368 )
369 if params_dict is None:
370 params_dict = dict()
371
372 try:
373 version, namespace, helm_id = self._get_ee_id_parts(ee_id)
374 ip_addr = socket.gethostbyname(helm_id)
375 except Exception as e:
376 self.log.error("Error getting ee ip ee: {}".format(e))
377 raise N2VCException("Error getting ee ip ee: {}".format(e))
378
379 if primitive_name == "config":
380 try:
381 # Execute config primitive, higher timeout to check the case ee is starting
382 status, detailed_message = await self._execute_config_primitive(ip_addr, params_dict, db_dict=db_dict)
383 self.log.debug("Executed config primitive ee_id_ {}, status: {}, message: {}".format(
384 ee_id, status, detailed_message))
385 if status != "OK":
386 self.log.error("Error configuring helm ee, status: {}, message: {}".format(
387 status, detailed_message))
388 raise N2VCExecutionException(
389 message="Error configuring helm ee_id: {}, status: {}, message: {}: ".format(
390 ee_id, status, detailed_message
391 ),
392 primitive_name=primitive_name,
393 )
394 except Exception as e:
395 self.log.error("Error configuring helm ee: {}".format(e))
396 raise N2VCExecutionException(
397 message="Error configuring helm ee_id: {}, {}".format(
398 ee_id, e
399 ),
400 primitive_name=primitive_name,
401 )
402 return "CONFIG OK"
403 else:
404 try:
405 # Execute primitive
406 status, detailed_message = await self._execute_primitive(ip_addr, primitive_name,
407 params_dict, db_dict=db_dict)
408 self.log.debug("Executed primitive {} ee_id_ {}, status: {}, message: {}".format(
409 primitive_name, ee_id, status, detailed_message))
410 if status != "OK" and status != "PROCESSING":
411 self.log.error(
412 "Execute primitive {} returned not ok status: {}, message: {}".format(
413 primitive_name, status, detailed_message)
414 )
415 raise N2VCExecutionException(
416 message="Execute primitive {} returned not ok status: {}, message: {}".format(
417 primitive_name, status, detailed_message
418 ),
419 primitive_name=primitive_name,
420 )
421 except Exception as e:
422 self.log.error(
423 "Error executing primitive {}: {}".format(primitive_name, e)
424 )
425 raise N2VCExecutionException(
426 message="Error executing primitive {} into ee={} : {}".format(
427 primitive_name, ee_id, e
428 ),
429 primitive_name=primitive_name,
430 )
431 return detailed_message
432
433 async def deregister_execution_environments(self):
434 # nothing to be done
435 pass
436
437 async def delete_execution_environment(self, ee_id: str, db_dict: dict = None, total_timeout: float = None):
438 """
439 Delete an execution environment
440 :param str ee_id: id of the execution environment to delete, included namespace.helm_id
441 :param dict db_dict: where to write into database when the status changes.
442 It contains a dict with
443 {collection: <str>, filter: {}, path: <str>},
444 e.g. {collection: "nsrs", filter:
445 {_id: <nsd-id>, path: "_admin.deployed.VCA.3"}
446 :param float total_timeout:
447 """
448
449 self.log.info("ee_id: {}".format(ee_id))
450
451 # check arguments
452 if ee_id is None:
453 raise N2VCBadArgumentsException(
454 message="ee_id is mandatory", bad_args=["ee_id"]
455 )
456
457 try:
458
459 # Obtain cluster_uuid
460 system_cluster_uuid = await self._get_system_cluster_id()
461
462 # Get helm_id
463 version, namespace, helm_id = self._get_ee_id_parts(ee_id)
464
465 # Uninstall chart, for backward compatibility we must assume that if there is no
466 # version it is helm-v2
467 if version == "helm-v3":
468 await self._k8sclusterhelm3.uninstall(system_cluster_uuid, helm_id)
469 else:
470 await self._k8sclusterhelm2.uninstall(system_cluster_uuid, helm_id)
471 self.log.info("ee_id: {} deleted".format(ee_id))
472 except N2VCException:
473 raise
474 except Exception as e:
475 self.log.error("Error deleting ee id: {}: {}".format(ee_id, e), exc_info=True)
476 raise N2VCException("Error deleting ee id {}: {}".format(ee_id, e))
477
478 async def delete_namespace(self, namespace: str, db_dict: dict = None, total_timeout: float = None):
479 # method not implemented for this connector, execution environments must be deleted individually
480 pass
481
482 async def install_k8s_proxy_charm(
483 self,
484 charm_name: str,
485 namespace: str,
486 artifact_path: str,
487 db_dict: dict,
488 progress_timeout: float = None,
489 total_timeout: float = None,
490 config: dict = None,
491 *kargs, **kwargs
492 ) -> str:
493 pass
494
495 @retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_delay")
496 async def _get_ssh_key(self, ip_addr):
497 channel = Channel(ip_addr, self._ee_service_port)
498 try:
499 stub = FrontendExecutorStub(channel)
500 self.log.debug("get ssh key, ip_addr: {}".format(ip_addr))
501 reply: SshKeyReply = await stub.GetSshKey(SshKeyRequest())
502 return reply.message
503 finally:
504 channel.close()
505
506 @retryer(max_wait_time_var="_initial_retry_time", delay_time_var="_retry_delay")
507 async def _execute_config_primitive(self, ip_addr, params, db_dict=None):
508 return await self._execute_primitive_internal(ip_addr, "config", params, db_dict=db_dict)
509
510 @retryer(max_wait_time_var="_max_retry_time", delay_time_var="_retry_delay")
511 async def _execute_primitive(self, ip_addr, primitive_name, params, db_dict=None):
512 return await self._execute_primitive_internal(ip_addr, primitive_name, params, db_dict=db_dict)
513
514 async def _execute_primitive_internal(self, ip_addr, primitive_name, params, db_dict=None):
515
516 channel = Channel(ip_addr, self._ee_service_port)
517 try:
518 stub = FrontendExecutorStub(channel)
519 async with stub.RunPrimitive.open() as stream:
520 primitive_id = str(uuid.uuid1())
521 result = None
522 self.log.debug("Execute primitive internal: id:{}, name:{}, params: {}".
523 format(primitive_id, primitive_name, params))
524 await stream.send_message(
525 PrimitiveRequest(id=primitive_id, name=primitive_name, params=yaml.dump(params)), end=True)
526 async for reply in stream:
527 self.log.debug("Received reply: {}".format(reply))
528 result = reply
529 # If db_dict provided write notifs in database
530 if db_dict:
531 self._write_op_detailed_status(db_dict, reply.status, reply.detailed_message)
532 if result:
533 return reply.status, reply.detailed_message
534 else:
535 return "ERROR", "No result received"
536 finally:
537 channel.close()
538
539 def _write_op_detailed_status(self, db_dict, status, detailed_message):
540
541 # write ee_id to database: _admin.deployed.VCA.x
542 try:
543 the_table = db_dict["collection"]
544 the_filter = db_dict["filter"]
545 update_dict = {"detailed-status": "{}: {}".format(status, detailed_message)}
546 # self.log.debug('Writing ee_id to database: {}'.format(the_path))
547 self.db.set_one(
548 table=the_table,
549 q_filter=the_filter,
550 update_dict=update_dict,
551 fail_on_empty=True,
552 )
553 except asyncio.CancelledError:
554 raise
555 except Exception as e:
556 self.log.error("Error writing detailedStatus to database: {}".format(e))
557
558 async def _get_system_cluster_id(self):
559 if not self._system_cluster_id:
560 db_k8cluster = self.db.get_one("k8sclusters", {"name": self._KUBECTL_OSM_CLUSTER_NAME})
561 k8s_hc_id = deep_get(db_k8cluster, ("_admin", "helm-chart-v3", "id"))
562 if not k8s_hc_id:
563 try:
564 # backward compatibility for existing clusters that have not been initialized for helm v3
565 cluster_id = db_k8cluster.get("_id")
566 k8s_credentials = yaml.safe_dump(db_k8cluster.get("credentials"))
567 k8s_hc_id, uninstall_sw = await self._k8sclusterhelm3.init_env(k8s_credentials,
568 reuse_cluster_uuid=cluster_id)
569 db_k8scluster_update = {"_admin.helm-chart-v3.error_msg": None,
570 "_admin.helm-chart-v3.id": k8s_hc_id,
571 "_admin.helm-chart-v3}.created": uninstall_sw,
572 "_admin.helm-chart-v3.operationalState": "ENABLED"}
573 self.update_db_2("k8sclusters", cluster_id, db_k8scluster_update)
574 except Exception as e:
575 self.log.error("error initializing helm-v3 cluster: {}".format(str(e)))
576 raise N2VCException("K8s system cluster '{}' has not been initialized for helm-chart-v3".format(
577 cluster_id))
578 self._system_cluster_id = k8s_hc_id
579 return self._system_cluster_id
580
581 def _get_ee_id_parts(self, ee_id):
582 """
583 Parses ee_id stored at database that can be either 'version:namespace.helm_id' or only
584 namespace.helm_id for backward compatibility
585 If exists helm version can be helm-v3 or helm (helm-v2 old version)
586 """
587 version, _, part_id = ee_id.rpartition(':')
588 namespace, _, helm_id = part_id.rpartition('.')
589 return version, namespace, helm_id