Commit 9212a6bd authored by Jeremie Hallal's avatar Jeremie Hallal
Browse files

persist after calling set_index

parent 796b3f03
Pipeline #65640 failed with stages
in 49 seconds
......@@ -76,7 +76,7 @@ class SessionFileMeta:
def set_index(ddf: dd.DataFrame):
"""Set index of the dask dataFrame only if needed."""
if not ddf.known_divisions:
return ddf.set_index(ddf.index, sorted=True)
return ddf.set_index(ddf.index, sorted=True).persist()
return ddf
......@@ -91,3 +91,4 @@ def do_merge(df1: dd.DataFrame, df2: dd.DataFrame):
if share_items(df1.columns, df2.columns):
return df2.combine_first(df1)
return df1.join(df2, how='outer') # join seems faster when there no columns in common
......@@ -20,6 +20,7 @@ import random
import numpy.testing as npt
import pandas as pd
import pytest
import dask.dataframe as dd
from .fixtures import with_wdms_env
from ..request_builders.wdms.crud.log import build_request_create_log, build_request_delete_log
......@@ -24,7 +24,6 @@ import mock
from app.utils import DaskException
from app.utils import DaskClient
from dask.utils import parse_bytes
from app.helper import logger
from app.bulk_persistence.dask.dask_bulk_storage import (BulkNotFound,
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment