Commit c60b8ae9 authored by Cyril Monmouton's avatar Cyril Monmouton
Browse files

Merge branch 'fix/legacy-logs-columns-not-converted' into 'master'

Legacy-Log/Bug: after retrieving legacy Log convert columns name to ensure...

See merge request !245
parents 1045c8e1 f435411b
Pipeline #68034 canceled with stages
in 19 seconds
......@@ -33,7 +33,7 @@ from app.routers.sessions import (SessionInternal, UpdateSessionState, UpdateSes
WithSessionStorages, get_session_dependencies)
from app.routers.record_utils import fetch_record
from app.routers.bulk.utils import (with_dask_blob_storage, get_check_input_df_func, get_df_from_request,
set_bulk_field_and_send_record, DataFrameRender)
set_bulk_field_and_send_record, DataFrameRender, _check_df_columns_type_legacy)
from app.routers.bulk.bulk_uri_dependencies import (get_bulk_id_access, BulkIdAccess,
BULK_URN_PREFIX_VERSION)
......@@ -153,6 +153,7 @@ async def get_data_version(
df = await dask_blob_storage.load_bulk(record_id, bulk_id)
elif prefix is None:
df = await get_dataframe(ctx, bulk_id)
_check_df_columns_type_legacy(df)
else:
raise BulkNotFound(record_id=record_id, bulk_id=bulk_id)
......
......@@ -7,6 +7,7 @@ from fastapi.testclient import TestClient
import pytest
import numpy as np
import pandas as pd
import pandas.api.types as ptypes
import pyarrow.parquet as pq
import pyarrow as pa
......@@ -215,7 +216,6 @@ def test_post_data_merge_extension_properties(setup_client):
assert get_response.json()["data"]["ExtensionProperties"] == expected
@pytest.mark.parametrize("entity_type", EntityTypeParams)
@pytest.mark.parametrize("content_type_header,create_func", [
('application/x-parquet', lambda df: df.to_parquet(engine="pyarrow")),
......@@ -769,6 +769,34 @@ def test_session_chunk_int(setup_client, entity_type, content_type_header, creat
assert chunk_response_1.status_code == expected_code
def test_legacy_logs_int_columns(setup_client):
"""
Ensure legacy v2 Log containing columns name as int type are correctly converted to string
to ensure to_parquet is possible.
"""
client = setup_client
entity_type = "Log"
record_id = _create_record(client, entity_type)
chunking_url = Definitions[entity_type]['chunking_url']
base_url = Definitions[entity_type]['base_url']
json_data = {t: np.random.rand(10) for t in [int(42), float(-42)]}
df_data = pd.DataFrame(json_data)
data_to_send = df_data.to_json(orient='split', date_format='iso')
write_legacy_log_response = client.post(f'{base_url}/{record_id}/data',
data=data_to_send,
headers={'content-type': 'application/json'})
assert write_legacy_log_response.status_code == 200
read_dask_log_response = client.get(f'{chunking_url}/{record_id}/data',
headers={'content-type': 'application/parquet'})
assert read_dask_log_response.status_code == 200
result_df = _create_df_from_response(read_dask_log_response)
assert ptypes.is_string_dtype(result_df.columns)
@pytest.mark.parametrize("data_format", ['parquet', 'json'])
@pytest.mark.parametrize("accept_content", ['application/x-parquet', 'application/json'])
@pytest.mark.parametrize("columns_name", [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment