Syncronize with fs before deploy package with ee
[osm/LCM.git] / osm_lcm / lcm_helm_conn.py
1 ##
2 # Copyright 2020 Telefonica Investigacion y Desarrollo, S.A.U.
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 # http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13 # implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #
17 ##
18 import functools
19 import yaml
20 import asyncio
21 import socket
22 import uuid
23 import os
24
25 from grpclib.client import Channel
26
27 from osm_lcm.frontend_pb2 import PrimitiveRequest
28 from osm_lcm.frontend_pb2 import SshKeyRequest, SshKeyReply
29 from osm_lcm.frontend_grpc import FrontendExecutorStub
30
31 from n2vc.n2vc_conn import N2VCConnector
32 from n2vc.k8s_helm_conn import K8sHelmConnector
33 from n2vc.exceptions import N2VCBadArgumentsException, N2VCException, N2VCExecutionException
34
35 from osm_lcm.lcm_utils import deep_get
36
37
38 def retryer(max_wait_time=60, delay_time=10):
39 def wrapper(func):
40 retry_exceptions = (
41 ConnectionRefusedError
42 )
43
44 @functools.wraps(func)
45 async def wrapped(*args, **kwargs):
46 wait_time = max_wait_time
47 while wait_time > 0:
48 try:
49 return await func(*args, **kwargs)
50 except retry_exceptions:
51 wait_time = wait_time - delay_time
52 await asyncio.sleep(delay_time)
53 continue
54 else:
55 return ConnectionRefusedError
56 return wrapped
57 return wrapper
58
59
60 class LCMHelmConn(N2VCConnector):
61 _KUBECTL_OSM_NAMESPACE = "osm"
62 _KUBECTL_OSM_CLUSTER_NAME = "_system-osm-k8s"
63 _EE_SERVICE_PORT = 50050
64
65 # Time beetween retries
66 _EE_RETRY_DELAY = 10
67 # Initial max retry time
68 _MAX_INITIAL_RETRY_TIME = 300
69 # Other retry time
70 _MAX_RETRY_TIME = 30
71
72 def __init__(self,
73 db: object,
74 fs: object,
75 log: object = None,
76 loop: object = None,
77 url: str = None,
78 username: str = None,
79 vca_config: dict = None,
80 on_update_db=None, ):
81 """
82 Initialize EE helm connector.
83 """
84
85 # parent class constructor
86 N2VCConnector.__init__(
87 self,
88 db=db,
89 fs=fs,
90 log=log,
91 loop=loop,
92 url=url,
93 username=username,
94 vca_config=vca_config,
95 on_update_db=on_update_db,
96 )
97
98 self.log.debug("Initialize helm N2VC connector")
99
100 # TODO - Obtain data from configuration
101 self._ee_service_port = self._EE_SERVICE_PORT
102
103 self._retry_delay = self._EE_RETRY_DELAY
104 self._max_retry_time = self._MAX_RETRY_TIME
105 self._initial_retry_time = self._MAX_INITIAL_RETRY_TIME
106
107 # initialize helm connector
108 self._k8sclusterhelm = K8sHelmConnector(
109 kubectl_command=self.vca_config.get("kubectlpath"),
110 helm_command=self.vca_config.get("helmpath"),
111 fs=self.fs,
112 log=self.log,
113 db=self.db,
114 on_update_db=None,
115 )
116
117 self._system_cluster_id = None
118 self.log.info("Helm N2VC connector initialized")
119
120 # TODO - ¿reuse_ee_id?
121 async def create_execution_environment(self,
122 namespace: str,
123 db_dict: dict,
124 reuse_ee_id: str = None,
125 progress_timeout: float = None,
126 total_timeout: float = None,
127 config: dict = None,
128 artifact_path: str = None,
129 vca_type: str = None) -> (str, dict):
130 """
131 Creates a new helm execution environment deploying the helm-chat indicated in the
132 attifact_path
133 :param str namespace: This param is not used, all helm charts are deployed in the osm
134 system namespace
135 :param dict db_dict: where to write to database when the status changes.
136 It contains a dictionary with {collection: str, filter: {}, path: str},
137 e.g. {collection: "nsrs", filter: {_id: <nsd-id>, path:
138 "_admin.deployed.VCA.3"}
139 :param str reuse_ee_id: ee id from an older execution. TODO - right now this params is not used
140 :param float progress_timeout:
141 :param float total_timeout:
142 :param dict config: General variables to instantiate KDU
143 :param str artifact_path: path of package content
144 :param str vca_type: Type of vca, not used as assumed of type helm
145 :returns str, dict: id of the new execution environment including namespace.helm_id
146 and credentials object set to None as all credentials should be osm kubernetes .kubeconfig
147 """
148
149 self.log.info(
150 "create_execution_environment: namespace: {}, artifact_path: {}, db_dict: {}, "
151 "reuse_ee_id: {}".format(
152 namespace, artifact_path, db_dict, reuse_ee_id)
153 )
154
155 # Validate artifact-path is provided
156 if artifact_path is None or len(artifact_path) == 0:
157 raise N2VCBadArgumentsException(
158 message="artifact_path is mandatory", bad_args=["artifact_path"]
159 )
160
161 # Validate artifact-path exists and sync path
162 from_path = os.path.split(artifact_path)[0]
163 self.fs.sync(from_path)
164
165 # remove / in charm path
166 while artifact_path.find("//") >= 0:
167 artifact_path = artifact_path.replace("//", "/")
168
169 # check charm path
170 if self.fs.file_exists(artifact_path):
171 helm_chart_path = artifact_path
172 else:
173 msg = "artifact path does not exist: {}".format(artifact_path)
174 raise N2VCBadArgumentsException(message=msg, bad_args=["artifact_path"])
175
176 if artifact_path.startswith("/"):
177 full_path = self.fs.path + helm_chart_path
178 else:
179 full_path = self.fs.path + "/" + helm_chart_path
180
181 try:
182 # Call helm conn install
183 # Obtain system cluster id from database
184 system_cluster_uuid = self._get_system_cluster_id()
185 # Add parameter osm if exist to global
186 if config and config.get("osm"):
187 if not config.get("global"):
188 config["global"] = {}
189 config["global"]["osm"] = config.get("osm")
190
191 self.log.debug("install helm chart: {}".format(full_path))
192 helm_id = await self._k8sclusterhelm.install(system_cluster_uuid, kdu_model=full_path,
193 namespace=self._KUBECTL_OSM_NAMESPACE,
194 params=config,
195 db_dict=db_dict,
196 timeout=progress_timeout)
197
198 ee_id = "{}.{}".format(self._KUBECTL_OSM_NAMESPACE, helm_id)
199 return ee_id, None
200 except N2VCException:
201 raise
202 except Exception as e:
203 self.log.error("Error deploying chart ee: {}".format(e), exc_info=True)
204 raise N2VCException("Error deploying chart ee: {}".format(e))
205
206 async def register_execution_environment(self, namespace: str, credentials: dict, db_dict: dict,
207 progress_timeout: float = None, total_timeout: float = None) -> str:
208 # nothing to do
209 pass
210
211 async def install_configuration_sw(self,
212 ee_id: str,
213 artifact_path: str,
214 db_dict: dict,
215 progress_timeout: float = None,
216 total_timeout: float = None,
217 config: dict = None,
218 num_units: int = 1,
219 vca_type: str = None
220 ):
221 # nothing to do
222 pass
223
224 async def add_relation(self, ee_id_1: str, ee_id_2: str, endpoint_1: str, endpoint_2: str):
225 # nothing to do
226 pass
227
228 async def remove_relation(self):
229 # nothing to to
230 pass
231
232 async def get_status(self, namespace: str, yaml_format: bool = True):
233 # not used for this connector
234 pass
235
236 async def get_ee_ssh_public__key(self, ee_id: str, db_dict: dict, progress_timeout: float = None,
237 total_timeout: float = None) -> str:
238 """
239 Obtains ssh-public key from ee executing GetSShKey method from the ee.
240
241 :param str ee_id: the id of the execution environment returned by
242 create_execution_environment or register_execution_environment
243 :param dict db_dict:
244 :param float progress_timeout:
245 :param float total_timeout:
246 :returns: public key of the execution environment
247 """
248
249 self.log.info(
250 "get_ee_ssh_public_key: ee_id: {}, db_dict: {}".format(
251 ee_id, db_dict)
252 )
253
254 # check arguments
255 if ee_id is None or len(ee_id) == 0:
256 raise N2VCBadArgumentsException(
257 message="ee_id is mandatory", bad_args=["ee_id"]
258 )
259
260 try:
261 # Obtain ip_addr for the ee service, it is resolved by dns from the ee name by kubernetes
262 namespace, helm_id = self._get_ee_id_parts(ee_id)
263 ip_addr = socket.gethostbyname(helm_id)
264
265 # Obtain ssh_key from the ee, this method will implement retries to allow the ee
266 # install libraries and start successfully
267 ssh_key = await self._get_ssh_key(ip_addr)
268 return ssh_key
269 except Exception as e:
270 self.log.error("Error obtaining ee ssh_key: {}".format(e), exc_info=True)
271 raise N2VCException("Error obtaining ee ssh_ke: {}".format(e))
272
273 async def exec_primitive(self, ee_id: str, primitive_name: str, params_dict: dict, db_dict: dict = None,
274 progress_timeout: float = None, total_timeout: float = None) -> str:
275 """
276 Execute a primitive in the execution environment
277
278 :param str ee_id: the one returned by create_execution_environment or
279 register_execution_environment with the format namespace.helm_id
280 :param str primitive_name: must be one defined in the software. There is one
281 called 'config', where, for the proxy case, the 'credentials' of VM are
282 provided
283 :param dict params_dict: parameters of the action
284 :param dict db_dict: where to write into database when the status changes.
285 It contains a dict with
286 {collection: <str>, filter: {}, path: <str>},
287 e.g. {collection: "nslcmops", filter:
288 {_id: <nslcmop_id>, path: "_admin.VCA"}
289 It will be used to store information about intermediate notifications
290 :param float progress_timeout:
291 :param float total_timeout:
292 :returns str: primitive result, if ok. It raises exceptions in case of fail
293 """
294
295 self.log.info("exec primitive for ee_id : {}, primitive_name: {}, params_dict: {}, db_dict: {}".format(
296 ee_id, primitive_name, params_dict, db_dict
297 ))
298
299 # check arguments
300 if ee_id is None or len(ee_id) == 0:
301 raise N2VCBadArgumentsException(
302 message="ee_id is mandatory", bad_args=["ee_id"]
303 )
304 if primitive_name is None or len(primitive_name) == 0:
305 raise N2VCBadArgumentsException(
306 message="action_name is mandatory", bad_args=["action_name"]
307 )
308 if params_dict is None:
309 params_dict = dict()
310
311 try:
312 namespace, helm_id = self._get_ee_id_parts(ee_id)
313 ip_addr = socket.gethostbyname(helm_id)
314 except Exception as e:
315 self.log.error("Error getting ee ip ee: {}".format(e))
316 raise N2VCException("Error getting ee ip ee: {}".format(e))
317
318 if primitive_name == "config":
319 try:
320 # Execute config primitive, higher timeout to check the case ee is starting
321 status, detailed_message = await self._execute_config_primitive(ip_addr, params_dict, db_dict=db_dict)
322 self.log.debug("Executed config primitive ee_id_ {}, status: {}, message: {}".format(
323 ee_id, status, detailed_message))
324 if status != "OK":
325 self.log.error("Error configuring helm ee, status: {}, message: {}".format(
326 status, detailed_message))
327 raise N2VCExecutionException(
328 message="Error configuring helm ee_id: {}, status: {}, message: {}: ".format(
329 ee_id, status, detailed_message
330 ),
331 primitive_name=primitive_name,
332 )
333 except Exception as e:
334 self.log.error("Error configuring helm ee: {}".format(e))
335 raise N2VCExecutionException(
336 message="Error configuring helm ee_id: {}, {}".format(
337 ee_id, e
338 ),
339 primitive_name=primitive_name,
340 )
341 return "CONFIG OK"
342 else:
343 try:
344 # Execute primitive
345 status, detailed_message = await self._execute_primitive(ip_addr, primitive_name,
346 params_dict, db_dict=db_dict)
347 self.log.debug("Executed primitive {} ee_id_ {}, status: {}, message: {}".format(
348 primitive_name, ee_id, status, detailed_message))
349 if status != "OK" and status != "PROCESSING":
350 self.log.error(
351 "Execute primitive {} returned not ok status: {}, message: {}".format(
352 primitive_name, status, detailed_message)
353 )
354 raise N2VCExecutionException(
355 message="Execute primitive {} returned not ok status: {}, message: {}".format(
356 primitive_name, status, detailed_message
357 ),
358 primitive_name=primitive_name,
359 )
360 except Exception as e:
361 self.log.error(
362 "Error executing primitive {}: {}".format(primitive_name, e)
363 )
364 raise N2VCExecutionException(
365 message="Error executing primitive {} into ee={} : {}".format(
366 primitive_name, ee_id, e
367 ),
368 primitive_name=primitive_name,
369 )
370 return detailed_message
371
372 async def deregister_execution_environments(self):
373 # nothing to be done
374 pass
375
376 async def delete_execution_environment(self, ee_id: str, db_dict: dict = None, total_timeout: float = None):
377 """
378 Delete an execution environment
379 :param str ee_id: id of the execution environment to delete, included namespace.helm_id
380 :param dict db_dict: where to write into database when the status changes.
381 It contains a dict with
382 {collection: <str>, filter: {}, path: <str>},
383 e.g. {collection: "nsrs", filter:
384 {_id: <nsd-id>, path: "_admin.deployed.VCA.3"}
385 :param float total_timeout:
386 """
387
388 self.log.info("ee_id: {}".format(ee_id))
389
390 # check arguments
391 if ee_id is None:
392 raise N2VCBadArgumentsException(
393 message="ee_id is mandatory", bad_args=["ee_id"]
394 )
395
396 try:
397
398 # Obtain cluster_uuid
399 system_cluster_uuid = self._get_system_cluster_id()
400
401 # Get helm_id
402 namespace, helm_id = self._get_ee_id_parts(ee_id)
403
404 # Uninstall chart
405 await self._k8sclusterhelm.uninstall(system_cluster_uuid, helm_id)
406 self.log.info("ee_id: {} deleted".format(ee_id))
407 except N2VCException:
408 raise
409 except Exception as e:
410 self.log.error("Error deleting ee id: {}: {}".format(ee_id, e), exc_info=True)
411 raise N2VCException("Error deleting ee id {}: {}".format(ee_id, e))
412
413 async def delete_namespace(self, namespace: str, db_dict: dict = None, total_timeout: float = None):
414 # method not implemented for this connector, execution environments must be deleted individually
415 pass
416
417 async def install_k8s_proxy_charm(
418 self,
419 charm_name: str,
420 namespace: str,
421 artifact_path: str,
422 db_dict: dict,
423 progress_timeout: float = None,
424 total_timeout: float = None,
425 config: dict = None,
426 ) -> str:
427 pass
428
429 @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
430 async def _get_ssh_key(self, ip_addr):
431 channel = Channel(ip_addr, self._ee_service_port)
432 try:
433 stub = FrontendExecutorStub(channel)
434 self.log.debug("get ssh key, ip_addr: {}".format(ip_addr))
435 reply: SshKeyReply = await stub.GetSshKey(SshKeyRequest())
436 return reply.message
437 finally:
438 channel.close()
439
440 @retryer(max_wait_time=_MAX_INITIAL_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
441 async def _execute_config_primitive(self, ip_addr, params, db_dict=None):
442 return await self._execute_primitive_internal(ip_addr, "config", params, db_dict=db_dict)
443
444 @retryer(max_wait_time=_MAX_RETRY_TIME, delay_time=_EE_RETRY_DELAY)
445 async def _execute_primitive(self, ip_addr, primitive_name, params, db_dict=None):
446 return await self._execute_primitive_internal(ip_addr, primitive_name, params, db_dict=db_dict)
447
448 async def _execute_primitive_internal(self, ip_addr, primitive_name, params, db_dict=None):
449
450 channel = Channel(ip_addr, self._ee_service_port)
451 try:
452 stub = FrontendExecutorStub(channel)
453 async with stub.RunPrimitive.open() as stream:
454 primitive_id = str(uuid.uuid1())
455 result = None
456 self.log.debug("Execute primitive internal: id:{}, name:{}, params: {}".
457 format(primitive_id, primitive_name, params))
458 await stream.send_message(
459 PrimitiveRequest(id=primitive_id, name=primitive_name, params=yaml.dump(params)), end=True)
460 async for reply in stream:
461 self.log.debug("Received reply: {}".format(reply))
462 result = reply
463 # If db_dict provided write notifs in database
464 if db_dict:
465 self._write_op_detailed_status(db_dict, reply.status, reply.detailed_message)
466 if result:
467 return reply.status, reply.detailed_message
468 else:
469 return "ERROR", "No result received"
470 finally:
471 channel.close()
472
473 def _write_op_detailed_status(self, db_dict, status, detailed_message):
474
475 # write ee_id to database: _admin.deployed.VCA.x
476 try:
477 the_table = db_dict["collection"]
478 the_filter = db_dict["filter"]
479 update_dict = {"detailed-status": "{}: {}".format(status, detailed_message)}
480 # self.log.debug('Writing ee_id to database: {}'.format(the_path))
481 self.db.set_one(
482 table=the_table,
483 q_filter=the_filter,
484 update_dict=update_dict,
485 fail_on_empty=True,
486 )
487 except asyncio.CancelledError:
488 raise
489 except Exception as e:
490 self.log.error("Error writing detailedStatus to database: {}".format(e))
491
492 def _get_system_cluster_id(self):
493 if not self._system_cluster_id:
494 db_k8cluster = self.db.get_one("k8sclusters", {"name": self._KUBECTL_OSM_CLUSTER_NAME})
495 k8s_hc_id = deep_get(db_k8cluster, ("_admin", "helm-chart", "id"))
496 if not k8s_hc_id:
497 self.log.error("osm system cluster has not been properly initialized for helm connector, "
498 "helm-chart id is not defined")
499 raise N2VCException("osm system cluster has not been properly initialized for helm connector")
500 self._system_cluster_id = k8s_hc_id
501 return self._system_cluster_id
502
503 def _get_ee_id_parts(self, ee_id):
504 namespace, _, helm_id = ee_id.partition('.')
505 return namespace, helm_id