Commit f8873f68 authored by Cyril Monmouton's avatar Cyril Monmouton
Browse files

Merge branch 'master' of...

Merge branch 'master' of https://community.opengroup.org/osdu/platform/domain-data-mgmt-services/wellbore/wellbore-domain-services into feature/remove-alpha-v3-APIs

# Conflicts:
#	spec/generated/openapi.json
parents c96f02ab 8ac809e5
Pipeline #51693 failed with stage
in 16 minutes and 24 seconds
......@@ -15,7 +15,7 @@
import json
import asyncio
from io import BytesIO
from typing import Union, AnyStr, IO, Optional, List
from typing import Union, AnyStr, IO, Optional, List, Dict
from pathlib import Path
import numpy as np
......@@ -48,23 +48,12 @@ class DataframeSerializerSync:
columns: List[Union[str, int, float]] = None
index: List[Union[str, int, float]] = None
class IndexFormat(BaseModel):
# TODO
pass
class ColumnFormat(BaseModel):
# TODO
pass
class RecordsFormat(BaseModel):
# TODO
pass
__root__: Dict[str, Dict[Union[str, int, float], Union[str, int, float]]]
schema_dict = {
JSONOrient.split: SplitFormat.schema(),
JSONOrient.index: IndexFormat.schema(),
JSONOrient.columns: ColumnFormat.schema(),
JSONOrient.records: RecordsFormat.schema()
JSONOrient.columns: ColumnFormat.schema()
}
return schema_dict[JSONOrient.get(orient)]
......
......@@ -17,12 +17,8 @@ from typing import Union
class JSONOrient(str, Enum):
# not allow 'table' because very verbose then comes with significant overhead
# not allow 'values' because cannot carry index nor column
split = "split"
index = "index"
columns = "columns"
records = "records"
@classmethod
def get(cls, orient: Union[str, "JSONOrient"]) -> "JSONOrient":
......
......@@ -152,7 +152,7 @@ class DataFrameRender:
@staticmethod
@with_trace('df_render')
async def df_render(df, params: GetDataParams, accept: str = None):
async def df_render(df, params: GetDataParams, accept: str = None, orient: Optional[JSONOrient] = None):
if params.describe:
return {
"numberOfRows": await DataFrameRender.get_size(df),
......@@ -166,7 +166,9 @@ class DataFrameRender:
return Response(pdf.to_parquet(engine="pyarrow"), media_type=MimeTypes.PARQUET.type)
if MimeTypes.JSON.type in accept:
return Response(pdf.to_json(index=True, date_format='iso'), media_type=MimeTypes.JSON.type)
return Response(
pdf.to_json(index=True, date_format='iso', orient=orient.value), media_type=MimeTypes.JSON.type
)
if MimeTypes.CSV.type in accept:
return Response(pdf.to_csv(), media_type=MimeTypes.CSV.type)
......@@ -281,6 +283,7 @@ async def get_data_version(
record_id: str, version: int,
request: Request,
ctrl_p: GetDataParams = Depends(),
orient: JSONOrient = Depends(json_orient_parameter),
ctx: Context = Depends(get_ctx),
dask_blob_storage: DaskBulkStorage = Depends(with_dask_blob_storage),
):
......@@ -303,7 +306,7 @@ async def get_data_version(
raise BulkNotFound(record_id=record_id, bulk_id=bulk_id)
df = await DataFrameRender.process_params(df, ctrl_p)
return await DataFrameRender.df_render(df, ctrl_p, request.headers.get('Accept'))
return await DataFrameRender.df_render(df, ctrl_p, request.headers.get('Accept'), orient=orient)
except BulkError as ex:
ex.raise_as_http()
......@@ -330,10 +333,11 @@ async def get_data(
record_id: str,
request: Request,
ctrl_p: GetDataParams = Depends(),
orient: JSONOrient = Depends(json_orient_parameter),
ctx: Context = Depends(get_ctx),
dask_blob_storage: DaskBulkStorage = Depends(with_dask_blob_storage),
):
return await get_data_version(record_id, None, request, ctrl_p, ctx, dask_blob_storage)
return await get_data_version(record_id, None, request, ctrl_p, orient, ctx, dask_blob_storage)
@router_bulk.patch(
......
This diff is collapsed.
......@@ -152,10 +152,8 @@ class ParquetSerializer:
class JsonSerializer:
mime_type = 'application/json'
# TODO There's an inconsistency in service, cannot specify orient in json and default is 'columns'
# (which different from legacy which is 'split')
def read(self, json_content):
return pd.read_json(json_content, orient='columns')
return pd.read_json(json_content, orient='split')
def dump(self, df):
return df.to_json(orient='split')
......
......@@ -3,6 +3,7 @@ from io import BytesIO
from fastapi import HTTPException
from app.bulk_persistence import JSONOrient
from app.model.model_chunking import GetDataParams
from app.routers.bulk_utils import DataFrameRender, get_df_from_request
import pandas as pd
......@@ -75,10 +76,11 @@ async def test_df_render_accept_parquet(default_get_params, basic_dataframe, acc
@pytest.mark.asyncio
async def test_df_render_accept_json(default_get_params, basic_dataframe):
response = await DataFrameRender.df_render(basic_dataframe, default_get_params, "application/json")
@pytest.mark.parametrize("orient", [JSONOrient.split, JSONOrient.columns])
async def test_df_render_accept_json(default_get_params, basic_dataframe, orient):
response = await DataFrameRender.df_render(basic_dataframe, default_get_params, "application/json", orient)
assert 'application/json' == response.headers.get('Content-Type')
actual = pd.read_json(response.body, orient='columns')
actual = pd.read_json(response.body, orient=orient)
assert_frame_equal(basic_dataframe, actual)
......
......@@ -33,17 +33,11 @@ dataframe_dict = {
'split': {'index': Reference_df.index.tolist(),
'columns': Reference_df.columns.tolist(),
'data': Reference_df.values.tolist()},
'index': {
str(row_val): {
str(col_val): Reference_df[col_val].tolist()[count] for col_val in Reference_df.columns.tolist()
} for count, row_val in enumerate(Reference_df.index.tolist())
},
'columns': {
str(col_val): {
str(row_val): Reference_df[col_val].tolist()[count] for count, row_val in enumerate(Reference_df.index.tolist())
} for col_val in Reference_df.columns.tolist()
},
'records': [{c: v for c, v in zip(Reference_df.columns, row_values)} for row_values in Reference_df.values]
}
}
......
......@@ -75,7 +75,7 @@ def _create_df_from_response(response):
elif content_type == 'text/csv; charset=utf-8':
return pd.read_csv(f, index_col=0)
elif content_type == 'application/json':
return pd.read_json(f, dtype=True)
return pd.read_json(f, dtype=True, orient='split')
else:
raise ValueError(f"Unknown content-type: '{content_type}'")
......@@ -486,7 +486,7 @@ def test_add_curve_by_chunk_overlap_different_cols(setup_client, entity_type):
(['E'], range(15)), # overlap both side
])
data_response = client.get(f'{chunking_url}/{record_id}/data', headers={'Accept': 'application/json'})
data_response = client.get(f'{chunking_url}/{record_id}/data?orient=columns', headers={'Accept': 'application/json'})
assert data_response.status_code == 200
with_new_col = pd.DataFrame.from_dict(data_response.json())
assert list(with_new_col.columns) == ['A', 'B', 'C', 'D', 'E', 'MD']
......
......@@ -127,7 +127,7 @@ def client_with_log(client):
assert response.status_code in range(200, 209), "Delete test log failed"
@pytest.mark.parametrize("orient_value", ["split", "index", "columns", "records"])
@pytest.mark.parametrize("orient_value", ["split", "columns"])
def test_log_get_data_orient_param_validation(client_with_log, orient_value):
client, log_id, _ = client_with_log
response = client.get(f"/ddms/v2/logs/{log_id}/data", params={"orient":orient_value}, headers=headers)
......@@ -150,28 +150,12 @@ def test_log_get_orient_param_validation_negative(client_with_log):
"data": [[0.0, 1001], [0.5, 1002], [1.0, 1003]]
}
),
(
"index",
{
"0": {"Ref": 0.0, "col_100X": 1001},
"1": {"Ref": 0.5, "col_100X": 1002},
"2": {"Ref": 1.0, "col_100X": 1003},
}
),
(
"columns",
{
"Ref": {"0": 0.0, "1": 0.5, "2": 1.0},
"col_100X": {"0": 1001, "1": 1002, "2": 1003},
}
),
(
"records",
[
{"Ref": 0.0, "col_100X": 1001},
{"Ref": 0.5, "col_100X": 1002},
{"Ref": 1.0, "col_100X": 1003}
]
)
])
def test_log_post_data_orient_param_validation(client_with_log, orient_value, data):
......@@ -195,7 +179,7 @@ def test_log_version_data(client_with_log):
assert response.json() == prev_data, "response json body should match previous version data"
@pytest.mark.parametrize("orient_value", ["split", "index", "columns", "records"])
@pytest.mark.parametrize("orient_value", ["split", "columns"])
def test_log_version_data_orient_param_validation(client_with_log, orient_value):
client, log_id, version_id = client_with_log
......
......@@ -143,14 +143,6 @@ def client_with_log(client):
"data": [[1.0, 10, 11, 12], [1.5, 20, 21, 22], [2.0, 30, 31, 32]]
}
),
(
"index",
{
"0": {"MD": 1.0, "X": 10, "Y": 11, "Z": 12},
"1": {"MD": 1.5, "X": 20, "Y": 21, "Z": 22},
"2": {"MD": 2.0, "X": 30, "Y": 31, "Z": 32},
}
),
(
"columns",
{
......@@ -159,14 +151,6 @@ def client_with_log(client):
"Y": {"0": 11, "1": 21, "2": 31},
"Z": {"0": 12, "1": 22, "2": 32},
}
),
(
"records",
[
{"MD": 1.0, "X": 10, "Y": 11, "Z": 12},
{"MD": 1.5, "X": 20, "Y": 21, "Z": 22},
{"MD": 2.0, "X": 30, "Y": 31, "Z": 32}
]
)
])
def test_traj_bulk(client, orient_value, data):
......@@ -267,7 +251,7 @@ def test_log_version_data(client_with_log):
assert response.json() == prev_data, "response json body should match previous version data"
@pytest.mark.parametrize("orient_value", ["split", "index", "columns", "records"])
@pytest.mark.parametrize("orient_value", ["split", "columns"])
def test_log_version_data_orient_param_validation(client_with_log, orient_value):
client, log_id, version_id = client_with_log
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment