diff --git a/app/bulk_persistence/__init__.py b/app/bulk_persistence/__init__.py index 3be6223fb3d4ea387159b0d3199ba3197964e3a0..d75320c60588cbc5b33040b314beb447667ceba1 100644 --- a/app/bulk_persistence/__init__.py +++ b/app/bulk_persistence/__init__.py @@ -17,10 +17,13 @@ from .bulk_filter import BulkReadFilters, BulkReadFilterOperator from .model_chunking import GetDataParams, DataframeBasicDescribe, DataframeDescribe from .dask.dask_bulk_storage import DaskBulkStorage from .dask.dask_bulk_storage_local import make_local_dask_bulk_storage +from .dask.storage_path_builder import hash_record_id +from .dask.traces import trace_dataframe_attributes, submit_with_trace, trace_attributes_root_span from .dataframe_persistence import create_and_store_dataframe, get_dataframe, download_bulk from .dataframe_serializer import DataframeSerializerAsync, DataframeSerializerSync +from .dataframe_validators import auto_cast_columns_to_string, columns_type_must_be_string, DataFrameValidationFunc, no_validation from .json_orient import JSONOrient -from .mime_types import MimeTypes +from .mime_types import MimeTypes, MimeType from .tenant_provider import resolve_tenant from .exceptions import UnknownChannelsException, InvalidBulkException, NoBulkException, NoDataException, RecordNotFoundException from .consistency_checks import ConsistencyException, DataConsistencyChecks @@ -30,6 +33,7 @@ from .capture_timings import capture_timings from .sessions_storage import Session, SessionsStorage, \ SessionNotFound, SessionInvalidState, SessionUpdatedEtagUnmatched, SessionException, \ SessionState, SessionUpdateMode, SessionInternal, CommitSessionResponse +from .dask.errors import BulkError, BulkRecordNotFound, BulkCurvesNotFound, TooManyColumnsRequested, FilterError, internal_bulk_exceptions # TMP: this should probably not be exposed outside of the bulk_persistence package from .temp_dir import get_temp_dir diff --git a/app/bulk_persistence/blob_storage.py b/app/bulk_persistence/blob_storage.py index 0bfd476a6ccf1db7b52217090978496929709ffa..95d69411236c9ad7d71601e401f63c1de1df3130 100644 --- a/app/bulk_persistence/blob_storage.py +++ b/app/bulk_persistence/blob_storage.py @@ -52,7 +52,7 @@ from .mime_types import MimeType, MimeTypes # - using faster format, e.g. hd5 # - threshold about the busyness of the service (if not busy and not huge data -> direct write) # - better proc fork and arg serialization -from ..helper.traces import with_trace +from app.helper.traces import with_trace def export_to_parquet( diff --git a/app/bulk_persistence/capture_timings.py b/app/bulk_persistence/capture_timings.py index 10e3f1e7723e50706e0a5e4d304bbcf40a141ffa..fafe58ff13a1ed71c1fedf4cae7128d26eaf26f0 100644 --- a/app/bulk_persistence/capture_timings.py +++ b/app/bulk_persistence/capture_timings.py @@ -4,7 +4,7 @@ from functools import wraps import asyncio from time import perf_counter, process_time -from ..helper.logger import get_logger +from app.helper.logger import get_logger def make_log_captured_timing_handler(level=INFO): diff --git a/app/bulk_persistence/dask/__init__.py b/app/bulk_persistence/dask/__init__.py index ab4e198a3be638df9e91aee2a28956d0c8f16e58..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 --- a/app/bulk_persistence/dask/__init__.py +++ b/app/bulk_persistence/dask/__init__.py @@ -1 +0,0 @@ -from . import dask_config diff --git a/app/bulk_persistence/dask/bulk_catalog.py b/app/bulk_persistence/dask/bulk_catalog.py index 3f091266905db80f5b1107f3e2992c59e00c4cac..22e2b2d11a1ecaecc4be5f2cc7487fc54e0677b4 100644 --- a/app/bulk_persistence/dask/bulk_catalog.py +++ b/app/bulk_persistence/dask/bulk_catalog.py @@ -22,7 +22,7 @@ from contextlib import suppress from dataclasses import dataclass from typing import Dict, Iterable, List, NamedTuple, Optional, Set -from ...helper.traces import with_trace +from app.helper.traces import with_trace from ..capture_timings import capture_timings from .storage_path_builder import join, remove_protocol from .utils import worker_capture_timing_handlers diff --git a/app/bulk_persistence/dask/session_file_meta.py b/app/bulk_persistence/dask/session_file_meta.py index 448bd1d479a74a1f8ffb13d39860b83e314c5405..ae8f2faacd98a6b8910ed7d34710af152d6fed50 100644 --- a/app/bulk_persistence/dask/session_file_meta.py +++ b/app/bulk_persistence/dask/session_file_meta.py @@ -23,8 +23,8 @@ from distributed.worker import get_client import pandas as pd from .utils import share_items -from ...helper.logger import get_logger -from ...helper.traces import with_trace +from app.helper.logger import get_logger +from app.helper.traces import with_trace from ..sessions_storage import Session from ..capture_timings import capture_timings diff --git a/app/bulk_persistence/dask/utils.py b/app/bulk_persistence/dask/utils.py index 350f1d736ed549197536c7008995c7d6f8ebfbdb..06750ef66d2278b99ec33afd17d945022809a42e 100644 --- a/app/bulk_persistence/dask/utils.py +++ b/app/bulk_persistence/dask/utils.py @@ -20,7 +20,7 @@ import dask.dataframe as dd import pandas as pd import pyarrow.parquet as pa -from ...helper.logger import get_logger +from app.helper.logger import get_logger from ..capture_timings import capture_timings diff --git a/app/bulk_persistence/dataframe_persistence.py b/app/bulk_persistence/dataframe_persistence.py index 6f677a599982b6933ecdd81de15c9208b186ad3d..89d5585aaf4aec1f36ddccdcd69387fc34f55b36 100644 --- a/app/bulk_persistence/dataframe_persistence.py +++ b/app/bulk_persistence/dataframe_persistence.py @@ -30,7 +30,7 @@ from .bulk_id import new_bulk_id from .dask.errors import internal_bulk_exceptions from .mime_types import MimeTypes, MimeType from .tenant_provider import resolve_tenant -from ..helper.traces import with_trace +from app.helper.traces import with_trace async def create_and_store_dataframe(ctx: Context, df: pd.DataFrame) -> str: diff --git a/app/bulk_persistence/dataframe_serializer.py b/app/bulk_persistence/dataframe_serializer.py index a2b059eab0cd6b7bf782877f7437b9b00f9411d6..84cdee1c7ac75374e4a87ad0e04122aff0a1f7b3 100644 --- a/app/bulk_persistence/dataframe_serializer.py +++ b/app/bulk_persistence/dataframe_serializer.py @@ -24,7 +24,7 @@ from pydantic import BaseModel from .json_orient import JSONOrient from .mime_types import MimeTypes, MimeType from app.pool_executor import get_pool_executor -from ..helper.traces import with_trace +from app.helper.traces import with_trace class DataframeSerializerSync: diff --git a/app/consistency/reference_check.py b/app/consistency/reference_check.py index a473d4eef3d01774979a3ca0bc63385b87b119cf..40eb75f0bb83a1fed0dca0cd6e2c11f1bb8a3e55 100644 --- a/app/consistency/reference_check.py +++ b/app/consistency/reference_check.py @@ -1,7 +1,7 @@ import pandas as pd import math from pydantic import BaseModel -from app.bulk_persistence.consistency_checks import ConsistencyException, DataConsistencyChecks +from app.bulk_persistence import ConsistencyException, DataConsistencyChecks class ReferenceCurveException(ConsistencyException): diff --git a/app/consistency/trajectory_consistency.py b/app/consistency/trajectory_consistency.py index 982ee0487bf03f0af8c4f951160b54f5eedd1e5a..205599fef106e6d7a29db9c259a6a6ee2ba3a757 100644 --- a/app/consistency/trajectory_consistency.py +++ b/app/consistency/trajectory_consistency.py @@ -7,10 +7,9 @@ from odes_storage.models import Record from app.model.osdu_model import WellboreTrajectory110 from app.helper.traces import with_trace -from app.bulk_persistence.consistency_checks import ConsistencyException, DataConsistencyChecks -from app.bulk_persistence.dask.dask_bulk_storage import BulkRecordNotFound -from app.bulk_persistence import DaskBulkStorage -from app.bulk_persistence.dask.traces import submit_with_trace +from app.bulk_persistence import ConsistencyException, DataConsistencyChecks, \ + BulkRecordNotFound, \ + DaskBulkStorage, submit_with_trace from app.model.model_utils import from_record from app.context import get_ctx diff --git a/app/consistency/welllog_consistency.py b/app/consistency/welllog_consistency.py index 650d7c7a53cbe0bc67302dd7cdeb6d9a1ece9d0e..5409a8471652ec58ed5770c2159719128b844539 100644 --- a/app/consistency/welllog_consistency.py +++ b/app/consistency/welllog_consistency.py @@ -7,10 +7,8 @@ from dask.dataframe.core import DataFrame as DaskDataFrame from odes_storage.models import Record from app.helper.traces import with_trace -from app.bulk_persistence.consistency_checks import ConsistencyException, DataConsistencyChecks -from app.bulk_persistence.dask.dask_bulk_storage import BulkRecordNotFound -from app.bulk_persistence import DaskBulkStorage -from app.bulk_persistence.dask.traces import submit_with_trace +from app.bulk_persistence import BulkRecordNotFound, \ + DaskBulkStorage, ConsistencyException, DataConsistencyChecks, submit_with_trace from app.model.model_utils import from_record from app.model.osdu_model import WellLog110 from app.context import get_ctx diff --git a/app/model/log_bulk.py b/app/model/log_bulk.py index 1ab79edc1a7eb9a7d8a9ab657fb0750a936bd8ce..acf15efa0b9183ee5a160e3d952dd461c69db204 100644 --- a/app/model/log_bulk.py +++ b/app/model/log_bulk.py @@ -18,7 +18,7 @@ from jsonpath_ng import parse as parse_jsonpath from jsonpath_ng.jsonpath import Parent as JsonParent from odes_storage.models import Record -from app.bulk_persistence.bulk_uri import BulkURI +from app.bulk_persistence import BulkURI class LogBulkHelper: diff --git a/app/routers/bulk/bulk_routes.py b/app/routers/bulk/bulk_routes.py index 6f1c3d1ed5f46ce07970caf226ac7f05b663190c..08f23c18ae198bfd65f7a5722ddefe1711494870 100644 --- a/app/routers/bulk/bulk_routes.py +++ b/app/routers/bulk/bulk_routes.py @@ -57,18 +57,16 @@ from app.routers.sessions import ( ) # imports from bulk persistence -from app.bulk_persistence.dataframe_validators import (auto_cast_columns_to_string, - DataFrameValidationFunc, - no_validation) -from app.bulk_persistence import JSONOrient, get_dataframe, download_bulk -from app.bulk_persistence import DaskBulkStorage -from app.bulk_persistence.dask.errors import BulkError, BulkRecordNotFound, FilterError, TooManyColumnsRequested -from app.bulk_persistence.mime_types import MimeTypes, MimeType -from app.bulk_persistence.dask.traces import trace_dataframe_attributes, trace_attributes_root_span - - -from app.bulk_persistence import DataConsistencyChecks - +from app.bulk_persistence import (auto_cast_columns_to_string, + DataFrameValidationFunc, no_validation, + JSONOrient, + get_dataframe, download_bulk, + DaskBulkStorage, + MimeTypes, MimeType, + trace_dataframe_attributes, trace_attributes_root_span, + BulkError, BulkRecordNotFound, FilterError, TooManyColumnsRequested, + DataConsistencyChecks + ) router = APIRouter(route_class=TracingRoute) # router dedicated to bulk APIs diff --git a/app/routers/bulk/bulk_uri_dependencies.py b/app/routers/bulk/bulk_uri_dependencies.py index cc5923984cf733a40ca38e59e5ff96141643f471..7840d07a3f94478a735afa2873817d9388b8fe0a 100644 --- a/app/routers/bulk/bulk_uri_dependencies.py +++ b/app/routers/bulk/bulk_uri_dependencies.py @@ -3,7 +3,7 @@ from abc import ABC from typing import Optional from fastapi import Request -from app.bulk_persistence.bulk_uri import BulkURI +from app.bulk_persistence import BulkURI from app.model.log_bulk import LogBulkHelper diff --git a/app/routers/bulk/utils.py b/app/routers/bulk/utils.py index 3351b6389b76952c36664f30e4a1e4c1a8978649..2d50124571c24ac1b2cb9ee29964b09757d066bb 100644 --- a/app/routers/bulk/utils.py +++ b/app/routers/bulk/utils.py @@ -11,16 +11,11 @@ import dask.dataframe as dd import pandas as pd from pyarrow.lib import ArrowInvalid -from app.bulk_persistence.dask.errors import FilterError, internal_bulk_exceptions, BulkCurvesNotFound -from app.bulk_persistence.dask.traces import trace_dataframe_attributes -from app.bulk_persistence import DaskBulkStorage -from app.bulk_persistence.dataframe_validators import auto_cast_columns_to_string, columns_type_must_be_string, \ - no_validation, DataFrameValidationFunc -from app.bulk_persistence import DataframeSerializerAsync -from app.bulk_persistence.mime_types import MimeTypes, MimeType -from app.bulk_persistence import JSONOrient -from app.bulk_persistence import capture_timings - +from app.bulk_persistence import DaskBulkStorage, DataframeSerializerAsync, \ + MimeTypes, MimeType, JSONOrient, trace_dataframe_attributes, capture_timings, \ + auto_cast_columns_to_string, columns_type_must_be_string, \ + no_validation, DataFrameValidationFunc, \ + FilterError, internal_bulk_exceptions, BulkCurvesNotFound from app.clients.storage_service_client import get_storage_record_service from app.context import get_ctx, Context from app.utils import OpenApiHandler diff --git a/app/routers/common_parameters.py b/app/routers/common_parameters.py index bfdf16b3d10fd791f05caefd2c4a8212be7660e7..4ab2d78fce0dfd0ae3950af4bd8edde1e3edd854 100644 --- a/app/routers/common_parameters.py +++ b/app/routers/common_parameters.py @@ -1,7 +1,7 @@ from fastapi import Query, Request, HTTPException from pandas import DataFrame -from app.bulk_persistence.mime_types import MimeType, MimeTypes +from app.bulk_persistence import MimeType, MimeTypes from app.bulk_persistence import JSONOrient diff --git a/app/routers/ddms_v2/log_ddms_v2.py b/app/routers/ddms_v2/log_ddms_v2.py index bb30a694abea7cb1c602ec566e133a23809b1073..d05107bb33a1a20acf4bc5cad116d42142e91224 100644 --- a/app/routers/ddms_v2/log_ddms_v2.py +++ b/app/routers/ddms_v2/log_ddms_v2.py @@ -38,7 +38,7 @@ from odes_storage.models import ( from pydantic import BaseModel, Field from app.bulk_persistence import DataframeSerializerAsync, DataframeSerializerSync, JSONOrient, MimeTypes, get_dataframe -from app.bulk_persistence.bulk_uri import BulkURI +from app.bulk_persistence import BulkURI from app.clients.storage_service_client import get_storage_record_service from app.model.log_bulk import LogBulkHelper from app.model.model_curated import log diff --git a/app/routers/ddms_v2/persistence.py b/app/routers/ddms_v2/persistence.py index ce5eb4af3b06cc95c1dd9b4c95f5c4703c3402ca..4d6ea54df8f7422ba402a531a4389d4adc1c33e1 100644 --- a/app/routers/ddms_v2/persistence.py +++ b/app/routers/ddms_v2/persistence.py @@ -16,12 +16,10 @@ import pandas as pd from odes_storage.models import Record -from app.bulk_persistence import create_and_store_dataframe -from app.bulk_persistence import get_dataframe +from app.bulk_persistence import create_and_store_dataframe, get_dataframe, trace_dataframe_attributes from app.context import Context from app.model.log_bulk import LogBulkHelper -from app.bulk_persistence.dask.traces import trace_dataframe_attributes from app.helper.traces import with_trace from app.helper.logger import get_logger diff --git a/app/routers/delete/delete_bulk_data.py b/app/routers/delete/delete_bulk_data.py index e1544396d244065882db225ee465ad0176cc57c5..53ef583636ba3ccd949ded382b43f86624f26c4c 100644 --- a/app/routers/delete/delete_bulk_data.py +++ b/app/routers/delete/delete_bulk_data.py @@ -12,11 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from app.bulk_persistence import resolve_tenant +from app.bulk_persistence import resolve_tenant, hash_record_id from osdu.core.api.storage.blob_storage_base import BlobStorageBase import asyncio -from app.bulk_persistence.dask.storage_path_builder import hash_record_id from app.clients import StorageRecordServiceClient from app.clients.storage_service_client import get_storage_record_service from app.routers.bulk.bulk_uri_dependencies import BulkIdAccess diff --git a/tests/unit/model/log_bulk_test.py b/tests/unit/model/log_bulk_test.py index b5483639f16e6390fba91b4721997f308a861dd2..31fd7ca431ed55c7c7b639f47df4724743e2fde1 100644 --- a/tests/unit/model/log_bulk_test.py +++ b/tests/unit/model/log_bulk_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from app.model.log_bulk import LogBulkHelper -from app.bulk_persistence.bulk_uri import BulkURI +from app.bulk_persistence import BulkURI from tests.unit.test_utils import basic_record import uuid import pytest diff --git a/tests/unit/routers/filter_test.py b/tests/unit/routers/filter_test.py index b7a76ba3db1105e3348915ee6cf446efe0d6737e..2cebb3386fff90c0970e3544875613523e5bd44b 100644 --- a/tests/unit/routers/filter_test.py +++ b/tests/unit/routers/filter_test.py @@ -1,8 +1,7 @@ import pytest -from app.bulk_persistence.dask.errors import FilterError from app.bulk_persistence import GetDataParams, \ - BulkReadFilterOperator, BulkReadFilters + BulkReadFilterOperator, BulkReadFilters, FilterError @pytest.mark.parametrize("filters, expected", [