diff --git a/app/bulk_persistence/__init__.py b/app/bulk_persistence/__init__.py index d75320c60588cbc5b33040b314beb447667ceba1..3be6223fb3d4ea387159b0d3199ba3197964e3a0 100644 --- a/app/bulk_persistence/__init__.py +++ b/app/bulk_persistence/__init__.py @@ -17,13 +17,10 @@ from .bulk_filter import BulkReadFilters, BulkReadFilterOperator from .model_chunking import GetDataParams, DataframeBasicDescribe, DataframeDescribe from .dask.dask_bulk_storage import DaskBulkStorage from .dask.dask_bulk_storage_local import make_local_dask_bulk_storage -from .dask.storage_path_builder import hash_record_id -from .dask.traces import trace_dataframe_attributes, submit_with_trace, trace_attributes_root_span from .dataframe_persistence import create_and_store_dataframe, get_dataframe, download_bulk from .dataframe_serializer import DataframeSerializerAsync, DataframeSerializerSync -from .dataframe_validators import auto_cast_columns_to_string, columns_type_must_be_string, DataFrameValidationFunc, no_validation from .json_orient import JSONOrient -from .mime_types import MimeTypes, MimeType +from .mime_types import MimeTypes from .tenant_provider import resolve_tenant from .exceptions import UnknownChannelsException, InvalidBulkException, NoBulkException, NoDataException, RecordNotFoundException from .consistency_checks import ConsistencyException, DataConsistencyChecks @@ -33,7 +30,6 @@ from .capture_timings import capture_timings from .sessions_storage import Session, SessionsStorage, \ SessionNotFound, SessionInvalidState, SessionUpdatedEtagUnmatched, SessionException, \ SessionState, SessionUpdateMode, SessionInternal, CommitSessionResponse -from .dask.errors import BulkError, BulkRecordNotFound, BulkCurvesNotFound, TooManyColumnsRequested, FilterError, internal_bulk_exceptions # TMP: this should probably not be exposed outside of the bulk_persistence package from .temp_dir import get_temp_dir diff --git a/app/bulk_persistence/blob_storage.py b/app/bulk_persistence/blob_storage.py index 95d69411236c9ad7d71601e401f63c1de1df3130..0bfd476a6ccf1db7b52217090978496929709ffa 100644 --- a/app/bulk_persistence/blob_storage.py +++ b/app/bulk_persistence/blob_storage.py @@ -52,7 +52,7 @@ from .mime_types import MimeType, MimeTypes # - using faster format, e.g. hd5 # - threshold about the busyness of the service (if not busy and not huge data -> direct write) # - better proc fork and arg serialization -from app.helper.traces import with_trace +from ..helper.traces import with_trace def export_to_parquet( diff --git a/app/bulk_persistence/capture_timings.py b/app/bulk_persistence/capture_timings.py index fafe58ff13a1ed71c1fedf4cae7128d26eaf26f0..10e3f1e7723e50706e0a5e4d304bbcf40a141ffa 100644 --- a/app/bulk_persistence/capture_timings.py +++ b/app/bulk_persistence/capture_timings.py @@ -4,7 +4,7 @@ from functools import wraps import asyncio from time import perf_counter, process_time -from app.helper.logger import get_logger +from ..helper.logger import get_logger def make_log_captured_timing_handler(level=INFO): diff --git a/app/bulk_persistence/dask/__init__.py b/app/bulk_persistence/dask/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..ab4e198a3be638df9e91aee2a28956d0c8f16e58 100644 --- a/app/bulk_persistence/dask/__init__.py +++ b/app/bulk_persistence/dask/__init__.py @@ -0,0 +1 @@ +from . import dask_config diff --git a/app/bulk_persistence/dask/bulk_catalog.py b/app/bulk_persistence/dask/bulk_catalog.py index 22e2b2d11a1ecaecc4be5f2cc7487fc54e0677b4..3f091266905db80f5b1107f3e2992c59e00c4cac 100644 --- a/app/bulk_persistence/dask/bulk_catalog.py +++ b/app/bulk_persistence/dask/bulk_catalog.py @@ -22,7 +22,7 @@ from contextlib import suppress from dataclasses import dataclass from typing import Dict, Iterable, List, NamedTuple, Optional, Set -from app.helper.traces import with_trace +from ...helper.traces import with_trace from ..capture_timings import capture_timings from .storage_path_builder import join, remove_protocol from .utils import worker_capture_timing_handlers diff --git a/app/bulk_persistence/dask/session_file_meta.py b/app/bulk_persistence/dask/session_file_meta.py index ae8f2faacd98a6b8910ed7d34710af152d6fed50..448bd1d479a74a1f8ffb13d39860b83e314c5405 100644 --- a/app/bulk_persistence/dask/session_file_meta.py +++ b/app/bulk_persistence/dask/session_file_meta.py @@ -23,8 +23,8 @@ from distributed.worker import get_client import pandas as pd from .utils import share_items -from app.helper.logger import get_logger -from app.helper.traces import with_trace +from ...helper.logger import get_logger +from ...helper.traces import with_trace from ..sessions_storage import Session from ..capture_timings import capture_timings diff --git a/app/bulk_persistence/dask/utils.py b/app/bulk_persistence/dask/utils.py index 06750ef66d2278b99ec33afd17d945022809a42e..350f1d736ed549197536c7008995c7d6f8ebfbdb 100644 --- a/app/bulk_persistence/dask/utils.py +++ b/app/bulk_persistence/dask/utils.py @@ -20,7 +20,7 @@ import dask.dataframe as dd import pandas as pd import pyarrow.parquet as pa -from app.helper.logger import get_logger +from ...helper.logger import get_logger from ..capture_timings import capture_timings diff --git a/app/bulk_persistence/dataframe_persistence.py b/app/bulk_persistence/dataframe_persistence.py index 89d5585aaf4aec1f36ddccdcd69387fc34f55b36..6f677a599982b6933ecdd81de15c9208b186ad3d 100644 --- a/app/bulk_persistence/dataframe_persistence.py +++ b/app/bulk_persistence/dataframe_persistence.py @@ -30,7 +30,7 @@ from .bulk_id import new_bulk_id from .dask.errors import internal_bulk_exceptions from .mime_types import MimeTypes, MimeType from .tenant_provider import resolve_tenant -from app.helper.traces import with_trace +from ..helper.traces import with_trace async def create_and_store_dataframe(ctx: Context, df: pd.DataFrame) -> str: diff --git a/app/bulk_persistence/dataframe_serializer.py b/app/bulk_persistence/dataframe_serializer.py index 84cdee1c7ac75374e4a87ad0e04122aff0a1f7b3..a2b059eab0cd6b7bf782877f7437b9b00f9411d6 100644 --- a/app/bulk_persistence/dataframe_serializer.py +++ b/app/bulk_persistence/dataframe_serializer.py @@ -24,7 +24,7 @@ from pydantic import BaseModel from .json_orient import JSONOrient from .mime_types import MimeTypes, MimeType from app.pool_executor import get_pool_executor -from app.helper.traces import with_trace +from ..helper.traces import with_trace class DataframeSerializerSync: diff --git a/app/consistency/reference_check.py b/app/consistency/reference_check.py index 40eb75f0bb83a1fed0dca0cd6e2c11f1bb8a3e55..a473d4eef3d01774979a3ca0bc63385b87b119cf 100644 --- a/app/consistency/reference_check.py +++ b/app/consistency/reference_check.py @@ -1,7 +1,7 @@ import pandas as pd import math from pydantic import BaseModel -from app.bulk_persistence import ConsistencyException, DataConsistencyChecks +from app.bulk_persistence.consistency_checks import ConsistencyException, DataConsistencyChecks class ReferenceCurveException(ConsistencyException): diff --git a/app/consistency/trajectory_consistency.py b/app/consistency/trajectory_consistency.py index 205599fef106e6d7a29db9c259a6a6ee2ba3a757..982ee0487bf03f0af8c4f951160b54f5eedd1e5a 100644 --- a/app/consistency/trajectory_consistency.py +++ b/app/consistency/trajectory_consistency.py @@ -7,9 +7,10 @@ from odes_storage.models import Record from app.model.osdu_model import WellboreTrajectory110 from app.helper.traces import with_trace -from app.bulk_persistence import ConsistencyException, DataConsistencyChecks, \ - BulkRecordNotFound, \ - DaskBulkStorage, submit_with_trace +from app.bulk_persistence.consistency_checks import ConsistencyException, DataConsistencyChecks +from app.bulk_persistence.dask.dask_bulk_storage import BulkRecordNotFound +from app.bulk_persistence import DaskBulkStorage +from app.bulk_persistence.dask.traces import submit_with_trace from app.model.model_utils import from_record from app.context import get_ctx diff --git a/app/consistency/welllog_consistency.py b/app/consistency/welllog_consistency.py index 5409a8471652ec58ed5770c2159719128b844539..650d7c7a53cbe0bc67302dd7cdeb6d9a1ece9d0e 100644 --- a/app/consistency/welllog_consistency.py +++ b/app/consistency/welllog_consistency.py @@ -7,8 +7,10 @@ from dask.dataframe.core import DataFrame as DaskDataFrame from odes_storage.models import Record from app.helper.traces import with_trace -from app.bulk_persistence import BulkRecordNotFound, \ - DaskBulkStorage, ConsistencyException, DataConsistencyChecks, submit_with_trace +from app.bulk_persistence.consistency_checks import ConsistencyException, DataConsistencyChecks +from app.bulk_persistence.dask.dask_bulk_storage import BulkRecordNotFound +from app.bulk_persistence import DaskBulkStorage +from app.bulk_persistence.dask.traces import submit_with_trace from app.model.model_utils import from_record from app.model.osdu_model import WellLog110 from app.context import get_ctx diff --git a/app/model/log_bulk.py b/app/model/log_bulk.py index acf15efa0b9183ee5a160e3d952dd461c69db204..1ab79edc1a7eb9a7d8a9ab657fb0750a936bd8ce 100644 --- a/app/model/log_bulk.py +++ b/app/model/log_bulk.py @@ -18,7 +18,7 @@ from jsonpath_ng import parse as parse_jsonpath from jsonpath_ng.jsonpath import Parent as JsonParent from odes_storage.models import Record -from app.bulk_persistence import BulkURI +from app.bulk_persistence.bulk_uri import BulkURI class LogBulkHelper: diff --git a/app/routers/bulk/bulk_routes.py b/app/routers/bulk/bulk_routes.py index 08f23c18ae198bfd65f7a5722ddefe1711494870..6f1c3d1ed5f46ce07970caf226ac7f05b663190c 100644 --- a/app/routers/bulk/bulk_routes.py +++ b/app/routers/bulk/bulk_routes.py @@ -57,16 +57,18 @@ from app.routers.sessions import ( ) # imports from bulk persistence -from app.bulk_persistence import (auto_cast_columns_to_string, - DataFrameValidationFunc, no_validation, - JSONOrient, - get_dataframe, download_bulk, - DaskBulkStorage, - MimeTypes, MimeType, - trace_dataframe_attributes, trace_attributes_root_span, - BulkError, BulkRecordNotFound, FilterError, TooManyColumnsRequested, - DataConsistencyChecks - ) +from app.bulk_persistence.dataframe_validators import (auto_cast_columns_to_string, + DataFrameValidationFunc, + no_validation) +from app.bulk_persistence import JSONOrient, get_dataframe, download_bulk +from app.bulk_persistence import DaskBulkStorage +from app.bulk_persistence.dask.errors import BulkError, BulkRecordNotFound, FilterError, TooManyColumnsRequested +from app.bulk_persistence.mime_types import MimeTypes, MimeType +from app.bulk_persistence.dask.traces import trace_dataframe_attributes, trace_attributes_root_span + + +from app.bulk_persistence import DataConsistencyChecks + router = APIRouter(route_class=TracingRoute) # router dedicated to bulk APIs diff --git a/app/routers/bulk/bulk_uri_dependencies.py b/app/routers/bulk/bulk_uri_dependencies.py index 7840d07a3f94478a735afa2873817d9388b8fe0a..cc5923984cf733a40ca38e59e5ff96141643f471 100644 --- a/app/routers/bulk/bulk_uri_dependencies.py +++ b/app/routers/bulk/bulk_uri_dependencies.py @@ -3,7 +3,7 @@ from abc import ABC from typing import Optional from fastapi import Request -from app.bulk_persistence import BulkURI +from app.bulk_persistence.bulk_uri import BulkURI from app.model.log_bulk import LogBulkHelper diff --git a/app/routers/bulk/utils.py b/app/routers/bulk/utils.py index 2d50124571c24ac1b2cb9ee29964b09757d066bb..3351b6389b76952c36664f30e4a1e4c1a8978649 100644 --- a/app/routers/bulk/utils.py +++ b/app/routers/bulk/utils.py @@ -11,11 +11,16 @@ import dask.dataframe as dd import pandas as pd from pyarrow.lib import ArrowInvalid -from app.bulk_persistence import DaskBulkStorage, DataframeSerializerAsync, \ - MimeTypes, MimeType, JSONOrient, trace_dataframe_attributes, capture_timings, \ - auto_cast_columns_to_string, columns_type_must_be_string, \ - no_validation, DataFrameValidationFunc, \ - FilterError, internal_bulk_exceptions, BulkCurvesNotFound +from app.bulk_persistence.dask.errors import FilterError, internal_bulk_exceptions, BulkCurvesNotFound +from app.bulk_persistence.dask.traces import trace_dataframe_attributes +from app.bulk_persistence import DaskBulkStorage +from app.bulk_persistence.dataframe_validators import auto_cast_columns_to_string, columns_type_must_be_string, \ + no_validation, DataFrameValidationFunc +from app.bulk_persistence import DataframeSerializerAsync +from app.bulk_persistence.mime_types import MimeTypes, MimeType +from app.bulk_persistence import JSONOrient +from app.bulk_persistence import capture_timings + from app.clients.storage_service_client import get_storage_record_service from app.context import get_ctx, Context from app.utils import OpenApiHandler diff --git a/app/routers/common_parameters.py b/app/routers/common_parameters.py index 4ab2d78fce0dfd0ae3950af4bd8edde1e3edd854..bfdf16b3d10fd791f05caefd2c4a8212be7660e7 100644 --- a/app/routers/common_parameters.py +++ b/app/routers/common_parameters.py @@ -1,7 +1,7 @@ from fastapi import Query, Request, HTTPException from pandas import DataFrame -from app.bulk_persistence import MimeType, MimeTypes +from app.bulk_persistence.mime_types import MimeType, MimeTypes from app.bulk_persistence import JSONOrient diff --git a/app/routers/ddms_v2/log_ddms_v2.py b/app/routers/ddms_v2/log_ddms_v2.py index d05107bb33a1a20acf4bc5cad116d42142e91224..bb30a694abea7cb1c602ec566e133a23809b1073 100644 --- a/app/routers/ddms_v2/log_ddms_v2.py +++ b/app/routers/ddms_v2/log_ddms_v2.py @@ -38,7 +38,7 @@ from odes_storage.models import ( from pydantic import BaseModel, Field from app.bulk_persistence import DataframeSerializerAsync, DataframeSerializerSync, JSONOrient, MimeTypes, get_dataframe -from app.bulk_persistence import BulkURI +from app.bulk_persistence.bulk_uri import BulkURI from app.clients.storage_service_client import get_storage_record_service from app.model.log_bulk import LogBulkHelper from app.model.model_curated import log diff --git a/app/routers/ddms_v2/persistence.py b/app/routers/ddms_v2/persistence.py index 4d6ea54df8f7422ba402a531a4389d4adc1c33e1..ce5eb4af3b06cc95c1dd9b4c95f5c4703c3402ca 100644 --- a/app/routers/ddms_v2/persistence.py +++ b/app/routers/ddms_v2/persistence.py @@ -16,10 +16,12 @@ import pandas as pd from odes_storage.models import Record -from app.bulk_persistence import create_and_store_dataframe, get_dataframe, trace_dataframe_attributes +from app.bulk_persistence import create_and_store_dataframe +from app.bulk_persistence import get_dataframe from app.context import Context from app.model.log_bulk import LogBulkHelper +from app.bulk_persistence.dask.traces import trace_dataframe_attributes from app.helper.traces import with_trace from app.helper.logger import get_logger diff --git a/app/routers/delete/delete_bulk_data.py b/app/routers/delete/delete_bulk_data.py index 53ef583636ba3ccd949ded382b43f86624f26c4c..e1544396d244065882db225ee465ad0176cc57c5 100644 --- a/app/routers/delete/delete_bulk_data.py +++ b/app/routers/delete/delete_bulk_data.py @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from app.bulk_persistence import resolve_tenant, hash_record_id +from app.bulk_persistence import resolve_tenant from osdu.core.api.storage.blob_storage_base import BlobStorageBase import asyncio +from app.bulk_persistence.dask.storage_path_builder import hash_record_id from app.clients import StorageRecordServiceClient from app.clients.storage_service_client import get_storage_record_service from app.routers.bulk.bulk_uri_dependencies import BulkIdAccess diff --git a/tests/unit/model/log_bulk_test.py b/tests/unit/model/log_bulk_test.py index 31fd7ca431ed55c7c7b639f47df4724743e2fde1..b5483639f16e6390fba91b4721997f308a861dd2 100644 --- a/tests/unit/model/log_bulk_test.py +++ b/tests/unit/model/log_bulk_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from app.model.log_bulk import LogBulkHelper -from app.bulk_persistence import BulkURI +from app.bulk_persistence.bulk_uri import BulkURI from tests.unit.test_utils import basic_record import uuid import pytest diff --git a/tests/unit/routers/filter_test.py b/tests/unit/routers/filter_test.py index 2cebb3386fff90c0970e3544875613523e5bd44b..b7a76ba3db1105e3348915ee6cf446efe0d6737e 100644 --- a/tests/unit/routers/filter_test.py +++ b/tests/unit/routers/filter_test.py @@ -1,7 +1,8 @@ import pytest +from app.bulk_persistence.dask.errors import FilterError from app.bulk_persistence import GetDataParams, \ - BulkReadFilterOperator, BulkReadFilters, FilterError + BulkReadFilterOperator, BulkReadFilters @pytest.mark.parametrize("filters, expected", [