Commit c9040373 authored by Luc Yriarte's avatar Luc Yriarte
Browse files

Merge branch 'bulk-persistence-module-exports' into 'master'

Bulk persistence module exports

See merge request !441
parents 3cff952c 67891573
Pipeline #103934 failed with stages
in 54 minutes and 47 seconds
......@@ -17,10 +17,13 @@ from .bulk_filter import BulkReadFilters, BulkReadFilterOperator
from .model_chunking import GetDataParams, DataframeBasicDescribe, DataframeDescribe
from .dask.dask_bulk_storage import DaskBulkStorage
from .dask.dask_bulk_storage_local import make_local_dask_bulk_storage
from .dask.storage_path_builder import hash_record_id
from .dask.traces import trace_dataframe_attributes, submit_with_trace, trace_attributes_root_span
from .dataframe_persistence import create_and_store_dataframe, get_dataframe, download_bulk
from .dataframe_serializer import DataframeSerializerAsync, DataframeSerializerSync
from .dataframe_validators import auto_cast_columns_to_string, columns_type_must_be_string, DataFrameValidationFunc, no_validation
from .json_orient import JSONOrient
from .mime_types import MimeTypes
from .mime_types import MimeTypes, MimeType
from .tenant_provider import resolve_tenant
from .exceptions import UnknownChannelsException, InvalidBulkException, NoBulkException, NoDataException, RecordNotFoundException
from .consistency_checks import ConsistencyException, DataConsistencyChecks
......@@ -30,6 +33,7 @@ from .capture_timings import capture_timings
from .sessions_storage import Session, SessionsStorage, \
SessionNotFound, SessionInvalidState, SessionUpdatedEtagUnmatched, SessionException, \
SessionState, SessionUpdateMode, SessionInternal, CommitSessionResponse
from .dask.errors import BulkError, BulkRecordNotFound, BulkCurvesNotFound, TooManyColumnsRequested, FilterError, internal_bulk_exceptions
# TMP: this should probably not be exposed outside of the bulk_persistence package
from .temp_dir import get_temp_dir
......@@ -52,7 +52,7 @@ from .mime_types import MimeType, MimeTypes
# - using faster format, e.g. hd5
# - threshold about the busyness of the service (if not busy and not huge data -> direct write)
# - better proc fork and arg serialization
from ..helper.traces import with_trace
from app.helper.traces import with_trace
def export_to_parquet(
......
......@@ -4,7 +4,7 @@ from functools import wraps
import asyncio
from time import perf_counter, process_time
from ..helper.logger import get_logger
from app.helper.logger import get_logger
def make_log_captured_timing_handler(level=INFO):
......
......@@ -22,7 +22,7 @@ from contextlib import suppress
from dataclasses import dataclass
from typing import Dict, Iterable, List, NamedTuple, Optional, Set
from ...helper.traces import with_trace
from app.helper.traces import with_trace
from ..capture_timings import capture_timings
from .storage_path_builder import join, remove_protocol
from .utils import worker_capture_timing_handlers
......
......@@ -23,8 +23,8 @@ from distributed.worker import get_client
import pandas as pd
from .utils import share_items
from ...helper.logger import get_logger
from ...helper.traces import with_trace
from app.helper.logger import get_logger
from app.helper.traces import with_trace
from ..sessions_storage import Session
from ..capture_timings import capture_timings
......
......@@ -20,7 +20,7 @@ import dask.dataframe as dd
import pandas as pd
import pyarrow.parquet as pa
from ...helper.logger import get_logger
from app.helper.logger import get_logger
from ..capture_timings import capture_timings
......
......@@ -30,7 +30,7 @@ from .bulk_id import new_bulk_id
from .dask.errors import internal_bulk_exceptions
from .mime_types import MimeTypes, MimeType
from .tenant_provider import resolve_tenant
from ..helper.traces import with_trace
from app.helper.traces import with_trace
async def create_and_store_dataframe(ctx: Context, df: pd.DataFrame) -> str:
......
......@@ -24,7 +24,7 @@ from pydantic import BaseModel
from .json_orient import JSONOrient
from .mime_types import MimeTypes, MimeType
from app.pool_executor import get_pool_executor
from ..helper.traces import with_trace
from app.helper.traces import with_trace
class DataframeSerializerSync:
......
import pandas as pd
import math
from pydantic import BaseModel
from app.bulk_persistence.consistency_checks import ConsistencyException, DataConsistencyChecks
from app.bulk_persistence import ConsistencyException, DataConsistencyChecks
class ReferenceCurveException(ConsistencyException):
......
......@@ -7,10 +7,9 @@ from odes_storage.models import Record
from app.model.osdu_model import WellboreTrajectory110
from app.helper.traces import with_trace
from app.bulk_persistence.consistency_checks import ConsistencyException, DataConsistencyChecks
from app.bulk_persistence.dask.dask_bulk_storage import BulkRecordNotFound
from app.bulk_persistence import DaskBulkStorage
from app.bulk_persistence.dask.traces import submit_with_trace
from app.bulk_persistence import ConsistencyException, DataConsistencyChecks, \
BulkRecordNotFound, \
DaskBulkStorage, submit_with_trace
from app.model.model_utils import from_record
from app.context import get_ctx
......
......@@ -7,10 +7,8 @@ from dask.dataframe.core import DataFrame as DaskDataFrame
from odes_storage.models import Record
from app.helper.traces import with_trace
from app.bulk_persistence.consistency_checks import ConsistencyException, DataConsistencyChecks
from app.bulk_persistence.dask.dask_bulk_storage import BulkRecordNotFound
from app.bulk_persistence import DaskBulkStorage
from app.bulk_persistence.dask.traces import submit_with_trace
from app.bulk_persistence import BulkRecordNotFound, \
DaskBulkStorage, ConsistencyException, DataConsistencyChecks, submit_with_trace
from app.model.model_utils import from_record
from app.model.osdu_model import WellLog110
from app.context import get_ctx
......
......@@ -18,7 +18,7 @@ from jsonpath_ng import parse as parse_jsonpath
from jsonpath_ng.jsonpath import Parent as JsonParent
from odes_storage.models import Record
from app.bulk_persistence.bulk_uri import BulkURI
from app.bulk_persistence import BulkURI
class LogBulkHelper:
......
......@@ -57,18 +57,16 @@ from app.routers.sessions import (
)
# imports from bulk persistence
from app.bulk_persistence.dataframe_validators import (auto_cast_columns_to_string,
DataFrameValidationFunc,
no_validation)
from app.bulk_persistence import JSONOrient, get_dataframe, download_bulk
from app.bulk_persistence import DaskBulkStorage
from app.bulk_persistence.dask.errors import BulkError, BulkRecordNotFound, FilterError, TooManyColumnsRequested
from app.bulk_persistence.mime_types import MimeTypes, MimeType
from app.bulk_persistence.dask.traces import trace_dataframe_attributes, trace_attributes_root_span
from app.bulk_persistence import DataConsistencyChecks
from app.bulk_persistence import (auto_cast_columns_to_string,
DataFrameValidationFunc, no_validation,
JSONOrient,
get_dataframe, download_bulk,
DaskBulkStorage,
MimeTypes, MimeType,
trace_dataframe_attributes, trace_attributes_root_span,
BulkError, BulkRecordNotFound, FilterError, TooManyColumnsRequested,
DataConsistencyChecks
)
router = APIRouter(route_class=TracingRoute) # router dedicated to bulk APIs
......
......@@ -3,7 +3,7 @@ from abc import ABC
from typing import Optional
from fastapi import Request
from app.bulk_persistence.bulk_uri import BulkURI
from app.bulk_persistence import BulkURI
from app.model.log_bulk import LogBulkHelper
......
......@@ -11,16 +11,11 @@ import dask.dataframe as dd
import pandas as pd
from pyarrow.lib import ArrowInvalid
from app.bulk_persistence.dask.errors import FilterError, internal_bulk_exceptions, BulkCurvesNotFound
from app.bulk_persistence.dask.traces import trace_dataframe_attributes
from app.bulk_persistence import DaskBulkStorage
from app.bulk_persistence.dataframe_validators import auto_cast_columns_to_string, columns_type_must_be_string, \
no_validation, DataFrameValidationFunc
from app.bulk_persistence import DataframeSerializerAsync
from app.bulk_persistence.mime_types import MimeTypes, MimeType
from app.bulk_persistence import JSONOrient
from app.bulk_persistence import capture_timings
from app.bulk_persistence import DaskBulkStorage, DataframeSerializerAsync, \
MimeTypes, MimeType, JSONOrient, trace_dataframe_attributes, capture_timings, \
auto_cast_columns_to_string, columns_type_must_be_string, \
no_validation, DataFrameValidationFunc, \
FilterError, internal_bulk_exceptions, BulkCurvesNotFound
from app.clients.storage_service_client import get_storage_record_service
from app.context import get_ctx, Context
from app.utils import OpenApiHandler
......
from fastapi import Query, Request, HTTPException
from pandas import DataFrame
from app.bulk_persistence.mime_types import MimeType, MimeTypes
from app.bulk_persistence import MimeType, MimeTypes
from app.bulk_persistence import JSONOrient
......
......@@ -38,7 +38,7 @@ from odes_storage.models import (
from pydantic import BaseModel, Field
from app.bulk_persistence import DataframeSerializerAsync, DataframeSerializerSync, JSONOrient, MimeTypes, get_dataframe
from app.bulk_persistence.bulk_uri import BulkURI
from app.bulk_persistence import BulkURI
from app.clients.storage_service_client import get_storage_record_service
from app.model.log_bulk import LogBulkHelper
from app.model.model_curated import log
......
......@@ -16,12 +16,10 @@ import pandas as pd
from odes_storage.models import Record
from app.bulk_persistence import create_and_store_dataframe
from app.bulk_persistence import get_dataframe
from app.bulk_persistence import create_and_store_dataframe, get_dataframe, trace_dataframe_attributes
from app.context import Context
from app.model.log_bulk import LogBulkHelper
from app.bulk_persistence.dask.traces import trace_dataframe_attributes
from app.helper.traces import with_trace
from app.helper.logger import get_logger
......
......@@ -12,11 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from app.bulk_persistence import resolve_tenant
from app.bulk_persistence import resolve_tenant, hash_record_id
from osdu.core.api.storage.blob_storage_base import BlobStorageBase
import asyncio
from app.bulk_persistence.dask.storage_path_builder import hash_record_id
from app.clients import StorageRecordServiceClient
from app.clients.storage_service_client import get_storage_record_service
from app.routers.bulk.bulk_uri_dependencies import BulkIdAccess
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment