Commit 238ba023 authored by Cyril Monmouton's avatar Cyril Monmouton
Browse files

Restore optimization to remove select distinct index to be loaded

parent e21cad5a
Pipeline #83339 failed with stages
in 12 minutes and 24 seconds
......@@ -345,10 +345,14 @@ class DaskBulkStorage:
async def _build_session_index(
self, chunk_metas: List[session_meta.SessionFileMeta], record_id: str, from_bulk_id: str
) -> pd.Index:
"""Combine all chunks indexes + previous version index"""
# list one file per different index_hash.
# read chunks indexes from paquet
indexes = self._map_with_trace(_load_index_from_meta, chunk_metas,
"""
Combine all chunks indexes + previous version index
List one file per different index_hash.
Read chunks indexes from parquet
"""
chunks_meta_with_different_indexes = {hash_index: meta
for hash_index, meta in chunk_metas}.values()
indexes = self._map_with_trace(_load_index_from_meta, chunks_meta_with_different_indexes,
storage_options=self._parameters.storage_options)
if from_bulk_id:
# read the index of previous version
......@@ -419,6 +423,7 @@ class DaskBulkStorage:
catalog.change_columns_info(chunk_group)
@capture_timings('_save_session_index')
@with_trace('_save_session_index')
async def _save_session_index(self, path: str, index: pd.Index) -> str:
index_folder = pathBuilder.join(path, '_wdms_index_')
self._fs.mkdirs(pathBuilder.remove_protocol(index_folder)[0]) # TODO for local storage
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment