GcpDatastoreCleanUp.py 1.68 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import json
import os
from google.api_core.retry import Retry
from google.cloud import datastore

from Utility import Utility, RunEnv

schema_namespace = os.environ.get("SCHEMA_NAMESPACE")
schema_kind = os.environ.get("SCHEMA_KIND")
default_namespace = "dataecosystem"
default_kind = "schema"


def cleanup_datastore():
  if schema_namespace is None:
    datastore_client = datastore.Client(namespace=default_namespace)
    print(
        "SCHEMA_NAMESPACE is empty, using default namespace: " + default_namespace)
  else:
    datastore_client = datastore.Client(namespace=schema_namespace)
    print("SCHEMA_NAMESPACE not empty, using var value: " + schema_namespace)

  if schema_kind is None:
    print("SCHEMA_KIND is empty, using default kind: " + default_kind)
    kind_to_use = default_kind
  else:
    print("SCHEMA_KIND not empty, using var value: " + schema_kind)
    kind_to_use = schema_kind

  deployments = Utility.path_to_deployments()
  bootstrap_options = json.loads(RunEnv.BOOTSTRAP_OPTIONS)

  for option in bootstrap_options:
    schema_path = option['folder']
    load_sequence = option['load-sequence']
    path = os.path.join(deployments, RunEnv.SCHEMAS_FOLDER, schema_path,
                        load_sequence)
    print("Schemas sequence location: " + path)
    sequence = Utility.load_json(path)
    for item in sequence:
      complete_key = datastore_client.key(kind_to_use, item['kind'].replace(
          '{{schema-authority}}', RunEnv.DATA_PARTITION))
      print("Key to delete: " + complete_key.__str__())
      range = Retry(initial=5, maximum=20, multiplier=2, deadline=120)
      datastore_client.delete(key=complete_key, retry=range)


if __name__ == '__main__':
  cleanup_datastore()