Feature 9952: Distributed Proxy Charms
[osm/LCM.git] / osm_lcm / lcm_helm_conn.py
1 ##
2 # Copyright 2020 Telefonica Investigacion y Desarrollo, S.A.U.
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 # http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13 # implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #
17 ##
18 import functools
19 import yaml
20 import asyncio
21 import socket
22 import uuid
23 import os
24
25 from grpclib.client import Channel
26
27 from osm_lcm.frontend_pb2 import PrimitiveRequest
28 from osm_lcm.frontend_pb2 import SshKeyRequest, SshKeyReply
29 from osm_lcm.frontend_grpc import FrontendExecutorStub
30 from osm_lcm.lcm_utils import LcmBase
31
32 from n2vc.n2vc_conn import N2VCConnector
33 from n2vc.k8s_helm_conn import K8sHelmConnector
34 from n2vc.k8s_helm3_conn import K8sHelm3Connector
35 from n2vc.exceptions import N2VCBadArgumentsException, N2VCException, N2VCExecutionException
36
37 from osm_lcm.lcm_utils import deep_get
38
39
40 def retryer(max_wait_time=60, delay_time=10):
41 def wrapper(func):
42 retry_exceptions = (
43 ConnectionRefusedError
44 )
45
46 @functools.wraps(func)
47 async def wrapped(*args, **kwargs):
48 wait_time = max_wait_time
49 while wait_time > 0:
50 try:
51 return await func(*args, **kwargs)
52 except retry_exceptions:
53 wait_time = wait_time - delay_time
54 await asyncio.sleep(delay_time)
55 continue
56 else:
57 return ConnectionRefusedError
58 return wrapped
59 return wrapper
60
61
62 class LCMHelmConn(N2VCConnector, LcmBase):
63 _KUBECTL_OSM_NAMESPACE = "osm"
64 _KUBECTL_OSM_CLUSTER_NAME = "_system-osm-k8s"
65 _EE_SERVICE_PORT = 50050
66
67 # Time beetween retries
68 _EE_RETRY_DELAY = 10
69 # Initial max retry time
70 _MAX_INITIAL_RETRY_TIME = 300
71 # Other retry time
72 _MAX_RETRY_TIME = 30
73
74 def __init__(self,
75 db: object,
76 fs: object,
77 log: object = None,
78 loop: object = None,
79 url: str = None,
80 username: str = None,
81 vca_config: dict = None,
82 on_update_db=None, ):
83 """
84 Initialize EE helm connector.
85 """
86
87 # parent class constructor
88 N2VCConnector.__init__(
89 self,
90 db=db,
91 fs=fs,
92 log=log,
93 loop=loop,
94 url=url,
95 username=username,
96 vca_config=vca_config,
97 on_update_db=on_update_db,
98 )
99
100 self.log.debug("Initialize helm N2VC connector")
101
102 # TODO - Obtain data from configuration
103 self._ee_service_port = self._EE_SERVICE_PORT
104
105 self._retry_delay = self._EE_RETRY_DELAY
106 self._max_retry_time = self._MAX_RETRY_TIME
107 self._initial_retry_time = self._MAX_INITIAL_RETRY_TIME
108
109 # initialize helm connector for helmv2 and helmv3
110 self._k8sclusterhelm2 = K8sHelmConnector(
111 kubectl_command=self.vca_config.get("kubectlpath"),
112 helm_command=self.vca_config.get("helmpath"),
113 fs=self.fs,
114 log=self.log,
115 db=self.db,
116 on_update_db=None,
117 )
118
119 self._k8sclusterhelm3 = K8sHelm3Connector(
120 kubectl_command=self.vca_config.get("kubectlpath"),
121 helm_command=self.vca_config.get("helm3path"),
122 fs=self.fs,
123 log=self.log,
124 db=self.db,
125 on_update_db=None,
126 )
127
128 self._system_cluster_id = None
129 self.log.info("Helm N2VC connector initialized")
130
131 # TODO - ¿reuse_ee_id?
132 async def create_execution_environment(self,
133 namespace: str,
134 db_dict: dict,
135 reuse_ee_id: str = None,
136 progress_timeout: float = None,
137 total_timeout: float = None,
138 config: dict = None,
139 artifact_path: str = None,
140 vca_type: str = None,
141 *kargs, **kwargs) -> (str, dict):
142 """
143 Creates a new helm execution environment deploying the helm-chat indicated in the
144 attifact_path
145 :param str namespace: This param is not used, all helm charts are deployed in the osm
146 system namespace
147 :param dict db_dict: where to write to database when the status changes.
148 It contains a dictionary with {collection: str, filter: {}, path: str},
149 e.g. {collection: "nsrs", filter: {_id: <nsd-id>, path:
150 "_admin.deployed.VCA.3"}
151 :param str reuse_ee_id: ee id from an older execution. TODO - right now this params is not used
152 :param float progress_timeout:
153 :param float total_timeout:
154 :param dict config: General variables to instantiate KDU
155 :param str artifact_path: path of package content
156 :param str vca_type: Type of vca, must be type helm or helm-v3
157 :returns str, dict: id of the new execution environment including namespace.helm_id
158 and credentials object set to None as all credentials should be osm kubernetes .kubeconfig
159 """
160
161 self.log.info(
162 "create_execution_environment: namespace: {}, artifact_path: {}, db_dict: {}, "
163 "reuse_ee_id: {}".format(
164 namespace, artifact_path, db_dict, reuse_ee_id)
165 )
166
167 # Validate artifact-path is provided
168 if artifact_path is None or len(artifact_path) == 0:
169 raise N2VCBadArgumentsException(
170 message="artifact_path is mandatory", bad_args=["artifact_path"]
171 )
172
173 # Validate artifact-path exists and sync path
174 from_path = os.path.split(artifact_path)[0]
175 self.fs.sync(from_path)
176
177 # remove / in charm path
178 while artifact_path.find("//") >= 0:
179 artifact_path = artifact_path.replace("//", "/")
180
181 # check charm path
182 if self.fs.file_exists(artifact_path):
183 helm_chart_path = artifact_path
184 else:
185 msg = "artifact path does not exist: {}".format(artifact_path)
186 raise N2VCBadArgumentsException(message=msg, bad_args=["artifact_path"])
187
188 if artifact_path.startswith("/"):
189 full_path = self.fs.path + helm_chart_path
190 else:
191 full_path = self.fs.path + "/" + helm_chart_path
192
193 try:
194 # Call helm conn install
195 # Obtain system cluster id from database
196 system_cluster_uuid = await self._get_system_cluster_id()
197 # Add parameter osm if exist to global
198 if config and config.get("osm"):
199 if not config.get("global"):
200 config["global"] = {}
201 config["global"]["osm"] = config.get("osm")
202
203 self.log.debug("install helm chart: {}".format(full_path))
204 if vca_type == "helm":
205 helm_id = await self._k8sclusterhelm2.install(system_cluster_uuid, kdu_model=full_path,
206 namespace=self._KUBECTL_OSM_NAMESPACE,
207 params=config,
208 db_dict=db_dict,
209 timeout=progress_timeout)
210 else:
211 helm_id = await self._k8sclusterhelm3.install(system_cluster_uuid, kdu_model=full_path,
212 namespace=self._KUBECTL_OSM_NAMESPACE,
213 params=config,
214 db_dict=db_dict,
215 timeout=progress_timeout)
216
217 ee_id = "{}:{}.{}".format(vca_type, self._KUBECTL_OSM_NAMESPACE, helm_id)
218 return ee_id, None
219 except N2VCException:
220 raise
221 except Exception as e:
222 self.log.error("Error deploying chart ee: {}".format(e), exc_info=True)
223 raise N2VCException("Error deploying chart ee: {}".format(e))
224
225 async def register_execution_environment(self, namespace: str, credentials: dict, db_dict: dict,
226 progress_timeout: float = None, total_timeout: float = None,
227 *kargs, **kwargs) -> str:
228 # nothing to do
229 pass
230
231 async def install_configuration_sw(self,
232 ee_id: str,
233 artifact_path: str,
234 db_dict: dict,
235 progress_timeout: float = None,
236 total_timeout: float = None,
237 config: dict = None,
238 num_units: int = 1,
239 vca_type: str = None
240 ):
241 # nothing to do
242 pass
243
244 async def add_relation(self, ee_id_1: str, ee_id_2: str, endpoint_1: str, endpoint_2: str):
245 # nothing to do
246 pass
247
248 async def remove_relation(self):
249 # nothing to to
250 pass
251
252 async def get_status(self, namespace: str, yaml_format: bool = True):
253 # not used for this connector
254 pass
255
256 async def get_ee_ssh_public__key(self, ee_id: str, db_dict: dict, progress_timeout: float = None,
257 total_timeout: float = None) -> str:
258 """
259 Obtains ssh-public key from ee executing GetSShKey method from the ee.
260
261 :param str ee_id: the id of the execution environment returned by
262 create_execution_environment or register_execution_environment
263 :param dict db_dict:
264 :param float progress_timeout:
265 :param float total_timeout:
266 :returns: public key of the execution environment
267 """
268
269 self.log.info(
270 "get_ee_ssh_public_key: ee_id: {}, db_dict: {}".format(
271 ee_id, db_dict)
272 )
273
274 # check arguments
275 if ee_id is None or len(ee_id) == 0:
276 raise N2VCBadArgumentsException(
277 message="ee_id is mandatory", bad_args=["ee_id"]
278 )
279
280 try:
281 # Obtain ip_addr for the ee service, it is resolved by dns from the ee name by kubernetes
282 version, namespace, helm_id = self._get_ee_id_parts(ee_id)
283 ip_addr = socket.gethostbyname(helm_id)
284
285 # Obtain ssh_key from the ee, this method will implement retries to allow the ee
286 # install libraries and start successfully
287 ssh_key = await self._get_ssh_key(ip_addr)
288 return ssh_key
289 except Exception as e:
290 self.log.error("Error obtaining ee ssh_key: {}".format(e), exc_info=True)
291 raise N2VCException("Error obtaining ee ssh_ke: {}".format(e))
292
293 async def exec_primitive(self, ee_id: str, primitive_name: str, params_dict: dict, db_dict: dict = None,
294 progress_timeout: float = None, total_timeout: float = None) -> str:
295 """
296 Execute a primitive in the execution environment
297
298 :param str ee_id: the one returned by create_execution_environment or
299 register_execution_environment with the format namespace.helm_id
300 :param str primitive_name: must be one defined in the software. There is one
301 called 'config', where, for the proxy case, the 'credentials' of VM are
302 provided
303 :param dict params_dict: parameters of the action
304 :param dict db_dict: where to write into database when the status changes.
305 It contains a dict with
306 {collection: <str>, filter: {}, path: <str>},
307 e.g. {collection: "nslcmops", filter:
308 {_id: <nslcmop_id>, path: "_admin.VCA"}
309 It will be used to store information about intermediate notifications
310 :param float progress_timeout:
311 :param float total_timeout:
312 :returns str: primitive result, if ok. It raises exceptions in case of fail
313 """
314
315 self.log.info("exec primitive for ee_id : {}, primitive_name: {}, params_dict: {}, db_dict: {}".format(
316 ee_id, primitive_name, params_dict, db_dict
317 ))
318
319 # check arguments
320 if ee_id is None or len(ee_id) == 0:
321 raise N2VCBadArgumentsException(
322 message="ee_id is mandatory", bad_args=["ee_id"]
323 )
324 if primitive_name is None or len(primitive_name) == 0:
325 raise N2VCBadArgumentsException(
326 message="action_name is mandatory", bad_args=["action_name"]
327 )
328 if params_dict is None:
329 params_dict = dict()
330
331 try:
332 version, namespace, helm_id = self._get_ee_id_parts(ee_id)
333 ip_addr = socket.gethostbyname(helm_id)
334 except Exception as e:
335 self.log.error("Error getting ee ip ee: {}".format(e))
336 raise N2VCException("Error getting ee ip ee: {}".format(e))
337
338 if primitive_name == "config":
339 try:
340 # Execute config primitive, higher timeout to check the case ee is starting
341 status, detailed_message = await self._execute_config_primitive(ip_addr, params_dict, db_dict=db_dict)
342 self.log.debug("Executed config primitive ee_id_ {}, status: {}, message: {}".format(
343 ee_id, status, detailed_message))
344 if status != "OK":
345 self.log.error("Error configuring helm ee, status: {}, message: {}".format(
346 status, detailed_message))
347 raise N2VCExecutionException(
348 message="Error configuring helm ee_id: {}, status: {}, message: {}: ".format(
349 ee_id, status, detailed_message
350 ),
351 primitive_name=primitive_name,
352 )
353 except Exception as e:
354 self.log.error("Error configuring helm ee: {}".format(e))
355 raise N2VCExecutionException(
356 message="Error configuring helm ee_id: {}, {}".format(
357 ee_id, e
358 ),
359 primitive_name=primitive_name,
360 )
361 return "CONFIG OK"
362 else:
363 try:
364 # Execute primitive
365 status, detailed_message = await self._execute_primitive(ip_addr, primitive_name,
366 params_dict, db_dict=db_dict)
367 self.log.debug("Executed primitive {} ee_id_ {}, status: {}, message: {}".format(
368 primitive_name, ee_id, status, detailed_message))
369 if status != "OK" and status != "PROCESSING":
370 self.log.error(
371 "Execute primitive {} returned not ok status: {}, message: {}".format(
372 primitive_name, status, detailed_message)
373 )
374 raise N2VCExecutionException(
375 message="Execute primitive {} returned not ok status: {}, message: {}".format(
376 primitive_name, status, detailed_message
377 ),
378 primitive_name=primitive_name,
379 )
380 except Exception as e:
381 self.log.error(
382 "Error executing primitive {}: {}".format(primitive_name, e)
383 )
384 raise N2VCExecutionException(
385 message="Error executing primitive {} into ee={} : {}".format(
386 primitive_name, ee_id, e
387 ),
388 primitive_name=primitive_name,
389 )
390 return detailed_message
391
392 async def deregister_execution_environments(self):
393 # nothing to be done
394 pass
395
396 async def delete_execution_environment(self, ee_id: str, db_dict: dict = None, total_timeout: float = None):
397 """
398 Delete an execution environment
399 :param str ee_id: id of the execution environment to delete, included namespace.helm_id
400 :param dict db_dict: where to write into database when the status changes.
401 It contains a dict with
402 {collection: <str>, filter: {}, path: <str>},
403 e.g. {collection: "nsrs", filter:
404 {_id: <nsd-id>, path: "_admin.deployed.VCA.3"}
405 :param float total_timeout:
406 """
407
408 self.log.info("ee_id: {}".format(ee_id))
409
410 # check arguments
411 if ee_id is None:
412 raise N2VCBadArgumentsException(
413 message="ee_id is mandatory", bad_args=["ee_id"]
414 )
415
416 try:
417
418 # Obtain cluster_uuid
419 system_cluster_uuid = await self._get_system_cluster_id()
420
421 # Get helm_id
422 version, namespace, helm_id = self._get_ee_id_parts(ee_id)
423
424 # Uninstall chart, for backward compatibility we must assume that if there is no
425 # version it is helm-v2
426 if version == "helm-v3":
427 await self._k8sclusterhelm3.uninstall(system_cluster_uuid, helm_id)
428 else:
429 await self._k8sclusterhelm2.uninstall(system_cluster_uuid, helm_id)
430 self.log.info("ee_id: {} deleted".format(ee_id))
431 except N2VCException:
432 raise
433 except Exception as e:
434 self.log.error("Error deleting ee id: {}: {}".format(ee_id, e), exc_info=True)
435 raise N2VCException("Error deleting ee id {}: {}".format(ee_id, e))
436
437 async def delete_namespace(self, namespace: str, db_dict: dict = None, total_timeout: float = None):
438 # method not implemented for this connector, execution environments must be deleted individually
439 pass
440
441 async def install_k8s_proxy_charm(
442 self,
443 charm_name: str,
444 namespace: str,
445 artifact_path: str,
446 db_dict: dict,
447 progress_timeout: float = None,
448 total_timeout: float = None,
449 config: dict = None,
450 *kargs, **kwargs
451 ) -> str:
452 pass
453
454 @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
455 async def _get_ssh_key(self, ip_addr):
456 channel = Channel(ip_addr, self._ee_service_port)
457 try:
458 stub = FrontendExecutorStub(channel)
459 self.log.debug("get ssh key, ip_addr: {}".format(ip_addr))
460 reply: SshKeyReply = await stub.GetSshKey(SshKeyRequest())
461 return reply.message
462 finally:
463 channel.close()
464
465 @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
466 async def _execute_config_primitive(self, ip_addr, params, db_dict=None):
467 return await self._execute_primitive_internal(ip_addr, "config", params, db_dict=db_dict)
468
469 @retryer(max_wait_time=_MAX_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
470 async def _execute_primitive(self, ip_addr, primitive_name, params, db_dict=None):
471 return await self._execute_primitive_internal(ip_addr, primitive_name, params, db_dict=db_dict)
472
473 async def _execute_primitive_internal(self, ip_addr, primitive_name, params, db_dict=None):
474
475 channel = Channel(ip_addr, self._ee_service_port)
476 try:
477 stub = FrontendExecutorStub(channel)
478 async with stub.RunPrimitive.open() as stream:
479 primitive_id = str(uuid.uuid1())
480 result = None
481 self.log.debug("Execute primitive internal: id:{}, name:{}, params: {}".
482 format(primitive_id, primitive_name, params))
483 await stream.send_message(
484 PrimitiveRequest(id=primitive_id, name=primitive_name, params=yaml.dump(params)), end=True)
485 async for reply in stream:
486 self.log.debug("Received reply: {}".format(reply))
487 result = reply
488 # If db_dict provided write notifs in database
489 if db_dict:
490 self._write_op_detailed_status(db_dict, reply.status, reply.detailed_message)
491 if result:
492 return reply.status, reply.detailed_message
493 else:
494 return "ERROR", "No result received"
495 finally:
496 channel.close()
497
498 def _write_op_detailed_status(self, db_dict, status, detailed_message):
499
500 # write ee_id to database: _admin.deployed.VCA.x
501 try:
502 the_table = db_dict["collection"]
503 the_filter = db_dict["filter"]
504 update_dict = {"detailed-status": "{}: {}".format(status, detailed_message)}
505 # self.log.debug('Writing ee_id to database: {}'.format(the_path))
506 self.db.set_one(
507 table=the_table,
508 q_filter=the_filter,
509 update_dict=update_dict,
510 fail_on_empty=True,
511 )
512 except asyncio.CancelledError:
513 raise
514 except Exception as e:
515 self.log.error("Error writing detailedStatus to database: {}".format(e))
516
517 async def _get_system_cluster_id(self):
518 if not self._system_cluster_id:
519 db_k8cluster = self.db.get_one("k8sclusters", {"name": self._KUBECTL_OSM_CLUSTER_NAME})
520 k8s_hc_id = deep_get(db_k8cluster, ("_admin", "helm-chart-v3", "id"))
521 if not k8s_hc_id:
522 try:
523 # backward compatibility for existing clusters that have not been initialized for helm v3
524 cluster_id = db_k8cluster.get("_id")
525 k8s_credentials = yaml.safe_dump(db_k8cluster.get("credentials"))
526 k8s_hc_id, uninstall_sw = await self._k8sclusterhelm3.init_env(k8s_credentials,
527 reuse_cluster_uuid=cluster_id)
528 db_k8scluster_update = {"_admin.helm-chart-v3.error_msg": None,
529 "_admin.helm-chart-v3.id": k8s_hc_id,
530 "_admin.helm-chart-v3}.created": uninstall_sw,
531 "_admin.helm-chart-v3.operationalState": "ENABLED"}
532 self.update_db_2("k8sclusters", cluster_id, db_k8scluster_update)
533 except Exception as e:
534 self.log.error("error initializing helm-v3 cluster: {}".format(str(e)))
535 raise N2VCException("K8s system cluster '{}' has not been initialized for helm-chart-v3".format(
536 cluster_id))
537 self._system_cluster_id = k8s_hc_id
538 return self._system_cluster_id
539
540 def _get_ee_id_parts(self, ee_id):
541 """
542 Parses ee_id stored at database that can be either 'version:namespace.helm_id' or only
543 namespace.helm_id for backward compatibility
544 If exists helm version can be helm-v3 or helm (helm-v2 old version)
545 """
546 version, _, part_id = ee_id.rpartition(':')
547 namespace, _, helm_id = part_id.rpartition('.')
548 return version, namespace, helm_id