Commit 755878f0 authored by Luc Yriarte's avatar Luc Yriarte
Browse files

Merge branch 'master' into can_run_check_disabled

parents a0b7a107 5d8130d3
Pipeline #86460 failed with stage
in 41 seconds
......@@ -34,7 +34,7 @@ variables:
OSDU_GCP_VENDOR: gcp
OSDU_GCP_HELM_PACKAGE_CHARTS: "devops/gcp/deploy devops/gcp/configmap"
OSDU_GCP_HELM_CONFIG_SERVICE_VARS: "--set data.os_wellbore_ddms_data_project_id=$OSDU_GCP_PROJECT"
OSDU_GCP_HELM_DEPLOYMENT_SERVICE_VARS: "--set data.image=$CI_REGISTRY_IMAGE/osdu-gcp:$CI_COMMIT_SHORT_SHA --set data.serviceAccountName=workload-identity-wellbore"
OSDU_GCP_HELM_DEPLOYMENT_SERVICE_VARS: "--set data.image=$CI_REGISTRY_IMAGE/osdu-gcp:$CI_COMMIT_SHORT_SHA --set data.serviceAccountName=$OSDU_GCP_SERVICE-k8s"
OSDU_GCP_HELM_CONFIG_SERVICE: wellbore-config
OSDU_GCP_HELM_DEPLOYMENT_SERVICE: wellbore-deploy
OSDU_GCP_INT_TEST_TYPE: python
......@@ -46,9 +46,6 @@ include:
- project: "osdu/platform/ci-cd-pipelines"
file: "build/python.yml"
- project: "osdu/platform/ci-cd-pipelines"
file: "scanners/fossa-python.yml"
- project: "osdu/platform/ci-cd-pipelines"
file: "scanners/gitlab-ultimate.yml"
......@@ -90,7 +87,6 @@ verify_existing_requirements:
expire_in: 2 days
compile-and-unit-test:
artifacts:
when: always
......@@ -98,8 +94,6 @@ compile-and-unit-test:
- all-requirements.txt
- spec/generated/openapi.json
containerize:
extends: .skipForTriggeringMergeRequests
stage: containerize
......@@ -230,9 +224,136 @@ osdu-gcp-test:
extends:
- .osdu-gcp-variables
# Allow failure on private development deployments
ibm-deploy-devpri:
allow_failure: true
# --------------------------------------------------------------------------------
# Experimental FOSSA jobs. These will be promoted to the standard ci-cd-pipelines after
# they've had some testing in a real project
fossa-analyze:
image: $CI_REGISTRY/divido/fossa-with-cache/incremental:latest
stage: scan
needs: ['compile-and-unit-test']
rules:
- if: $FOSSA_API_KEY
variables:
FOSSA_OUTPUT_DIR: fossa-output
artifacts:
paths:
- fossa-output
when: always
expire_in: 2 days
script:
# fossa-with-cache needs a CI_COMMIT_BRANCH defined to know how to parse the FOSSA API results
# When building tags, this isn't defined by GitLab. In that case, we use the tag name instead. If that's not defined
# then things will fail and we'll have to make this smarter
- test -z "$CI_COMMIT_BRANCH" && export CI_COMMIT_BRANCH="$CI_COMMIT_TAG"
- |
if [ ! -e all-requirements.txt ]; then
echo "I was expecting a file named 'all-requirements.txt' to have been generated by compile-and-unit-test"
echo "However, that file doesn't seem to exist"
echo "----------------------------------------"
echo "That file should have been the output of a 'pip freeze', so that I knew what the full list of deep"
echo "dependencies were. I can't reasonably generate that in this job, because I don't know what python image"
echo "is appropriate. If this structure has been changed in the build/python.yml, you may need to update this"
echo "logic as well (in scanners/fossa-python.yml)"
exit 1
fi
# This variable is used by the python build environment to refer to the set of requirements that need to
# be compiled down into the single 'all-requirements.txt'. Here, we override it to supply fossa-with-cache
# with a direct answer.
- PIP_REQUIREMENTS=all-requirements.txt fossa-with-cache
fossa-check-notice:
image: $CI_REGISTRY/divido/fossa-with-cache/incremental:latest
stage: scan
needs: ['fossa-analyze']
tags: ['osdu-small']
rules:
- if: $FOSSA_API_KEY
artifacts:
when: on_failure
paths:
- fossa-output/cached-NOTICE
- fossa-output/generated-clean-NOTICE
expire_in: 2 days
script:
# Check to see if a newer commit exists for the pipeline's branch, and if it does, use that NOTICE instead of this one's
- |
if [ "$CI_COMMIT_BRANCH" != "" ]; then
colorCmd="\e[32;1m"
colorReset="\e[0m"
function echoCmd() {
echo -e "${colorCmd}>" "$@" "${colorReset}"
}
echoCmd git fetch
git fetch
echoCmd git diff --name-only HEAD origin/$CI_COMMIT_BRANCH
branchDiffs="$(git diff --name-only HEAD origin/$CI_COMMIT_BRANCH)"
echo $branchDiffs
echo "--------------------"
if [ "$branchDiffs" == "NOTICE" ]; then
echo "The branch associated with this pipeline ($CI_COMMIT_BRANCH) has been changed, but the only changes are the NOTICE file"
echo "I will use the NOTICE file from origin/$CI_COMMIT_BRANCH ($(git rev-parse --short origin/$CI_COMMIT_BRANCH)) as the basis for comparison"
echoCmd git checkout origin/$CI_COMMIT_BRANCH -- NOTICE
git checkout origin/$CI_COMMIT_BRANCH -- NOTICE
elif [ "$branchDiffs" == "" ]; then
echo "The branch associated with this pipeline ($CI_COMMIT_BRANCH) has not been changed since the commit that spawned this pipeline"
echo "I will use the NOTICE file from the pipeline's commit ($CI_COMMIT_SHORT_SHA) as the basis for comparison"
else
echo "The branch associated with this pipeline ($CI_COMMIT_BRANCH) has been changed, but the changes include more than just the NOTICE file"
echo "I will use the NOTICE file from the pipeline's commit ($CI_COMMIT_SHORT_SHA) as the basis for comparison"
fi
fi
# Use a cached NOTICE if available, otherwise use a generated one
- |
if [ -e fossa-output/cached-NOTICE ]; then
fossaGeneratedNotice=fossa-output/cached-NOTICE;
elif [ -e fossa-output/generated-clean-NOTICE ]; then
fossaGeneratedNotice=fossa-output/generated-clean-NOTICE
else
echo "Couldn't find either a cached-NOTICE or generated-clean-NOTICE in the fossa-output/ directory"
echo
echo "At least one of these should have been generated by a previous job stage (fossa-analyze) and stored"
echo "as an artifact. Something must be wrong in the CI setup"
exit 1
fi
echo "Comparing with $fossaGeneratedNotice"
# If the comparison finds differences, let the user know what to do next
- |
if ! fossa-compare-notices NOTICE $fossaGeneratedNotice; then
echo --------------------------------------------------------------------------------
echo "There are differences in the NOTICE file"
echo "Please review these differences, and if they look appropriate based on your"
echo "changes, update the committed NOTICE file"
echo "--------------------"
echo "If you make changes to the NOTICE file (and only the NOTICE file), you can"
echo "re-run this single stage of the pipeline alone rather than the whole pipeline"
echo "One way to achieve this:"
echo "$ wget -O NOTICE '${CI_PROJECT_URL}/-/jobs/${CI_JOB_ID}/artifacts/raw/${fossaGeneratedNotice}?inline=false'"
echo "$ git add NOTICE"
echo "$ git commit -m 'Updating NOTICE'"
echo "$ git push -o ci.skip"
echo "Then retry this job"
exit 1
fi
# 3rd-Party Software License Notice
Generated by fossa-cli (https://github.com/fossas/fossa-cli).
Formatted by fossa-with-cache (https://community.opengroup.org/divido/fossa-with-cache).
This software includes the following software and licenses:
========================================================================
......@@ -15,18 +16,18 @@ The following software have components provided under the terms of this license:
- coverage (from https://github.com/nedbat/coveragepy)
- cryptography (from https://github.com/pyca/cryptography)
- google-api-core (from https://github.com/GoogleCloudPlatform/google-cloud-python)
- google-auth (from https://github.com/googleapis/google-auth-library-python)
- google-auth (from https://github.com/GoogleCloudPlatform/google-auth-library-python, https://github.com/googleapis/google-auth-library-python)
- google-auth-oauthlib (from https://github.com/GoogleCloudPlatform/google-auth-library-python-oauthlib)
- google-cloud-core (from https://github.com/GoogleCloudPlatform/google-cloud-python)
- google-cloud-monitoring (from https://github.com/GoogleCloudPlatform/google-cloud-python)
- google-cloud-trace (from https://github.com/googleapis/googleapis)
- googleapis-common-protos (from https://github.com/googleapis/googleapis)
- grpcio (from https://grpc.io)
- importlib-metadata (from )
- importlib-metadata
- jsonpath-ng (from https://github.com/h2non/jsonpath-ng)
- msgpack (from http://msgpack.org/)
- multidict (from https://github.com/aio-libs/multidict/)
- numpy (from )
- numpy
- openapi-spec-validator (from https://github.com/p1c2u/openapi-spec-validator)
- opencensus (from https://github.com/census-instrumentation/opencensus-python)
- opencensus-context (from https://github.com/census-instrumentation/opencensus-python/tree/master/context/opencensus-context)
......@@ -36,14 +37,14 @@ The following software have components provided under the terms of this license:
- opencensus-ext-stackdriver (from https://github.com/census-instrumentation/opencensus-python/tree/master/contrib/opencensus-ext-stackdriver)
- opencensus-proto (from https://github.com/census-instrumentation/opencensus-proto/tree/master/gen-python)
- packaging (from https://github.com/pypa/packaging)
- pandas (from https://pandas.pydata.org)
- pandas (from http://pandas.pydata.org, https://pandas.pydata.org)
- pep517 (from https://github.com/takluyver/pep517)
- pyarrow (from https://arrow.apache.org/)
- pytest-asyncio (from https://github.com/pytest-dev/pytest-asyncio)
- pytest-dependency (from https://github.com/RKrahl/pytest-dependency)
- python-dateutil (from https://dateutil.readthedocs.org)
- python-multipart (from http://github.com/andrew-d/python-multipart)
- requests (from https://requests.readthedocs.io)
- requests (from http://python-requests.org, https://requests.readthedocs.io)
- rfc3986 (from https://rfc3986.readthedocs.org)
- rsa (from https://stuvel.eu/rsa)
- s3transfer (from https://github.com/boto/s3transfer)
......@@ -64,7 +65,7 @@ The following software have components provided under the terms of this license:
- grpcio (from https://grpc.io)
- locket (from http://github.com/mwilliamson/locket.py)
- mock (from https://github.com/testing-cabal/mock)
- numpy (from )
- numpy
- packaging (from https://github.com/pypa/packaging)
- ply (from http://www.dabeaz.com/ply/)
- pyasn1 (from http://sourceforge.net/projects/pyasn1/)
......@@ -79,7 +80,7 @@ BSD-3-Clause
The following software have components provided under the terms of this license:
- HeapDict (from http://stutzbachenterprises.com/)
- Jinja2 (from https://palletsprojects.com/p/jinja/)
- Jinja2 (from http://jinja.pocoo.org/, https://palletsprojects.com/p/jinja/)
- MarkupSafe (from https://palletsprojects.com/p/markupsafe/)
- adlfs (from https://github.com/hayesgb/adlfs/)
- asgiref (from http://github.com/django/asgiref/)
......@@ -100,11 +101,11 @@ The following software have components provided under the terms of this license:
- isodate (from http://cheeseshop.python.org/pypi/isodate)
- locket (from http://github.com/mwilliamson/locket.py)
- mock (from https://github.com/testing-cabal/mock)
- numpy (from )
- numpy
- oauthlib (from https://github.com/idan/oauthlib)
- openapi-schema-validator (from https://github.com/p1c2u/openapi-schema-validator)
- packaging (from https://github.com/pypa/packaging)
- pandas (from https://pandas.pydata.org)
- pandas (from http://pandas.pydata.org, https://pandas.pydata.org)
- partd (from http://github.com/dask/partd/)
- pip-tools (from https://github.com/jazzband/pip-tools/)
- ply (from http://www.dabeaz.com/ply/)
......@@ -136,8 +137,8 @@ The following software have components provided under the terms of this license:
- distributed (from https://distributed.readthedocs.io/en/latest/)
- fsspec (from http://github.com/intake/filesystem_spec)
- gcsfs (from https://github.com/dask/gcsfs)
- numpy (from )
- pandas (from https://pandas.pydata.org)
- numpy
- pandas (from http://pandas.pydata.org, https://pandas.pydata.org)
- partd (from http://github.com/dask/partd/)
- s3fs (from http://github.com/dask/s3fs/)
- toolz (from http://github.com/pytoolz/toolz/)
......@@ -147,7 +148,7 @@ CC-BY-SA-3.0
========================================================================
The following software have components provided under the terms of this license:
- numpy (from )
- numpy
========================================================================
GPL-2.0-only
......@@ -225,14 +226,14 @@ MIT
The following software have components provided under the terms of this license:
- PyJWT (from http://github.com/jpadilla/pyjwt)
- PyYAML (from )
- PyYAML
- adal (from https://github.com/AzureAD/azure-activedirectory-library-for-python)
- aiohttp (from https://github.com/aio-libs/aiohttp/)
- aioitertools (from https://github.com/jreese/aioitertools)
- aioredis (from https://github.com/aio-libs/aioredis)
- anyio (from https://pypi.org/project/anyio/3.4.0/)
- anyio (from https://pypi.org/project/anyio/3.3.0/, https://pypi.org/project/anyio/3.4.0/)
- asgiref (from http://github.com/django/asgiref/)
- attrs (from https://attrs.readthedocs.io/)
- attrs (from https://attrs.readthedocs.io/, https://www.attrs.org/)
- azure-common (from https://github.com/Azure/azure-sdk-for-python)
- azure-core (from https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/core/azure-core)
- azure-datalake-store (from https://github.com/Azure/azure-data-lake-store-python)
......@@ -242,28 +243,30 @@ The following software have components provided under the terms of this license:
- azure-keyvault-keys (from https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/keyvault/azure-keyvault-keys)
- azure-keyvault-secrets (from https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/keyvault/azure-keyvault-secrets)
- azure-storage-blob (from https://github.com/Azure/azure-storage-python)
- backoff (from )
- backoff
- botocore (from https://github.com/boto/botocore)
- cachetools (from https://github.com/tkem/cachetools)
- cffi (from )
- cffi
- charset-normalizer (from https://github.com/ousret/charset_normalizer)
- coverage (from https://github.com/nedbat/coveragepy)
- deepdiff (from https://github.com/seperman/deepdiff)
- fastapi (from https://github.com/tiangolo/fastapi)
- grpcio (from https://grpc.io)
- h11 (from )
- h11
- iniconfig (from http://github.com/RonnyPfannschmidt/iniconfig)
- jmespath (from https://github.com/jmespath/jmespath.py)
- jsonschema (from )
- jsonschema
- mockito (from https://github.com/kaste/mockito-python)
- msal (from https://github.com/AzureAD/microsoft-authentication-library-for-python)
- msal-extensions (from https://pypi.org/project/msal-extensions/0.1.3/)
- msrest (from https://github.com/Azure/msrest-for-python)
- munch (from http://github.com/Infinidat/munch)
- natsort (from https://github.com/SethMMorton/natsort)
- numpy (from )
- pandas (from https://pandas.pydata.org)
- numpy
- ordered-set (from http://github.com/LuminosoInsight/ordered-set)
- pandas (from http://pandas.pydata.org, https://pandas.pydata.org)
- pep517 (from https://github.com/takluyver/pep517)
- pluggy (from )
- pluggy
- py (from http://pylib.readthedocs.org/)
- pyarrow (from https://arrow.apache.org/)
- pydantic (from https://github.com/samuelcolvin/pydantic)
......@@ -281,7 +284,7 @@ The following software have components provided under the terms of this license:
- sniffio (from https://github.com/python-trio/sniffio)
- structlog (from http://www.structlog.org/)
- toml (from https://github.com/uiri/toml)
- tomli (from https://pypi.org/project/tomli/2.0.0/)
- tomli (from https://pypi.org/project/tomli/1.2.2/, https://pypi.org/project/tomli/2.0.0/)
- urllib3 (from https://urllib3.readthedocs.io/)
- xmltodict (from https://github.com/martinblech/xmltodict)
- zipp (from https://github.com/jaraco/zipp)
......@@ -299,14 +302,14 @@ NCSA
========================================================================
The following software have components provided under the terms of this license:
- numpy (from )
- numpy
========================================================================
OPL-1.0
========================================================================
The following software have components provided under the terms of this license:
- numpy (from )
- numpy
========================================================================
OpenSSL
......@@ -323,10 +326,10 @@ The following software have components provided under the terms of this license:
- async-timeout (from https://github.com/aio-libs/async_timeout/)
- coverage (from https://github.com/nedbat/coveragepy)
- distributed (from https://distributed.readthedocs.io/en/latest/)
- google-auth (from https://github.com/googleapis/google-auth-library-python)
- google-auth (from https://github.com/GoogleCloudPlatform/google-auth-library-python, https://github.com/googleapis/google-auth-library-python)
- google-auth-oauthlib (from https://github.com/GoogleCloudPlatform/google-auth-library-python-oauthlib)
- numpy (from )
- pandas (from https://pandas.pydata.org)
- numpy
- pandas (from http://pandas.pydata.org, https://pandas.pydata.org)
- ply (from http://www.dabeaz.com/ply/)
- portalocker (from https://github.com/WoLpH/portalocker)
- python-dateutil (from https://dateutil.readthedocs.org)
......@@ -341,7 +344,7 @@ SunPro
========================================================================
The following software have components provided under the terms of this license:
- numpy (from )
- numpy
========================================================================
Unlicense
......@@ -370,7 +373,7 @@ Zlib
The following software have components provided under the terms of this license:
- grpcio (from https://grpc.io)
- numpy (from )
- numpy
========================================================================
public-domain
......@@ -379,9 +382,7 @@ The following software have components provided under the terms of this license:
- botocore (from https://github.com/boto/botocore)
- grpcio (from https://grpc.io)
- numpy (from )
- pandas (from https://pandas.pydata.org)
- numpy
- pandas (from http://pandas.pydata.org, https://pandas.pydata.org)
- py (from http://pylib.readthedocs.org/)
- pytz (from http://pythonhosted.org/pytz)
......@@ -36,7 +36,7 @@ from .errors import BulkRecordNotFound, BulkNotProcessable, internal_bulk_except
from .traces import map_with_trace, submit_with_trace
from .utils import (by_pairs, do_merge, worker_capture_timing_handlers,
get_num_rows, set_index, index_union)
from ..dataframe_validators import (assert_df_validate, validate_index,
from ..dataframe_validators import (assert_df_validate, validate_index, validate_number_of_columns,
columns_not_in_reserved_names, is_reserved_column_name)
from .. import DataframeSerializerSync
from . import storage_path_builder as pathBuilder
......@@ -243,7 +243,9 @@ class DaskBulkStorage:
"""Write the data frame to the blob storage."""
bulk_id = bulk_id or new_bulk_id()
assert_df_validate(dataframe=ddf, validation_funcs=[validate_index, columns_not_in_reserved_names])
assert_df_validate(dataframe=ddf, validation_funcs=[validate_number_of_columns,
validate_index,
columns_not_in_reserved_names])
ddf = dd.from_pandas(ddf, npartitions=1, name=f"from_pandas-{uuid.uuid4()}")
ddf = await self.client.scatter(ddf)
......@@ -259,8 +261,9 @@ class DaskBulkStorage:
@with_trace('session_add_chunk')
async def session_add_chunk(self, session: Session, pdf: pd.DataFrame):
"""add new chunk to the given session"""
assert_df_validate(dataframe=pdf, validation_funcs=[validate_index, columns_not_in_reserved_names])
assert_df_validate(dataframe=pdf, validation_funcs=[validate_number_of_columns,
validate_index,
columns_not_in_reserved_names])
# sort column by names
pdf = pdf[sorted(pdf.columns)]
filename = session_meta.generate_chunk_filename(pdf)
......@@ -316,8 +319,9 @@ class DaskBulkStorage:
catalog.nb_rows = max(get_num_rows(d) for d in datasets)
for file, schema in zip(files, schemas):
index_columns = schema.pandas_metadata.get('index_columns', [])
columns = {name: str(dtype) for name, dtype in zip(schema.names, schema.types)
if not is_reserved_column_name(name)}
if name not in index_columns and not is_reserved_column_name(name)}
chunk_group = ChunkGroup(set(columns.keys()), [self._relative_path(record_id, file)], list(columns.values()))
catalog.add_chunk(chunk_group)
......
......@@ -23,12 +23,14 @@ from distributed.worker import get_client
import pandas as pd
from app.bulk_persistence.dask.utils import share_items
from app.helper.logger import get_logger
from app.helper.traces import with_trace
from app.persistence.sessions_storage import Session
from app.utils import capture_timings
from .storage_path_builder import add_protocol, record_session_path
class SessionFileMeta:
"""The class extract information about chunks."""
......@@ -161,13 +163,21 @@ def get_next_chunk_files(
"""
chunks_info.sort(key=attrgetter('time'))
cache: Dict[str, SessionFileMeta] = {}
cache: Dict[str, List[SessionFileMeta]] = {}
columns_in_cache = set() # keep track of colunms present in the cache
for chunk in chunks_info:
if chunk.shape in cache: # if other chunks with same shape
if any(chunk.overlap(c) for c in cache[chunk.shape]): # rows overlaps
yield cache[chunk.shape]
del cache[chunk.shape]
# looking for overlaped chunk
for i, cached_chunk in enumerate(cache[chunk.shape]):
if chunk.overlap(cached_chunk):
if chunk.index_hash == cached_chunk.index_hash:
# if chunks are identical in shape and index just keep the last one
get_logger().info(f"Duplicated chunk skipped : '{chunk.path}'")
cache[chunk.shape].pop(i)
else:
yield cache[chunk.shape]
del cache[chunk.shape]
break
elif not columns_in_cache.isdisjoint(chunk.columns): # else if columns conflicts
conflicting_chunk = next(metas[0] for metas in cache.values()
if chunk.has_common_columns(metas[0]))
......
......@@ -4,6 +4,8 @@ import re
import pandas as pd
from app.bulk_persistence.dask.errors import BulkNotProcessable
from app.conf import Config
ValidationResult = Tuple[bool, str] # Tuple (is_dataframe_valid, failure_reason)
......@@ -63,6 +65,13 @@ def validate_index(df: pd.DataFrame) -> ValidationResult:
return ValidationSuccess
def validate_number_of_columns(df: pd.DataFrame) -> ValidationResult:
""" Verify max number of columns """
if len(df.columns) > Config.max_columns_per_chunk_write.value:
return False, f"Too many columns : maximum allowed '{Config.max_columns_per_chunk_write.value}'"
return ValidationSuccess
PandasReservedIndexColRegexp = re.compile(r'__index_level_\d+__')
def is_reserved_column_name(name: str) -> bool:
......
......@@ -192,6 +192,12 @@ class ConfigurationContainer:
default="500",
factory=lambda x: int(x))
max_columns_per_chunk_write: EnvVar = EnvVar(
key='MAX_COLUMNS_PER_CHUNK_WRITE',
description='Max number of columns that can be write per chunk',
default="500",
factory=lambda x: int(x))
_environment_dict: Dict = os.environ
_contextual_loader: Callable = None
......
......@@ -13,11 +13,15 @@
# limitations under the License.
from functools import wraps
from asyncio import iscoroutinefunction
from typing import Callable
from fastapi.routing import APIRoute
from opencensus.common.transports.async_ import AsyncTransport
from opencensus.trace import base_exporter
from opencensus.trace.propagation.trace_context_http_header_format import TraceContextPropagator
from opencensus.trace.span import SpanKind
from starlette.requests import Request
from starlette.responses import Response
from app.conf import Config
from app.helper.utils import rename_cloud_role_func, COMPONENT
......@@ -39,6 +43,20 @@ How to add specific span in a method
"""
class TracingRoute(APIRoute):
def get_route_handler(self) -> Callable:
original_route_handler = super().get_route_handler()
path = self.path
async def custom_route_handler(request: Request) -> Response:
# https://www.starlette.io/requests/#other-state
request.state.traced_route = path
response: Response = await original_route_handler(request)
return response
return custom_route_handler
def get_trace_propagator() -> TraceContextPropagator:
"""
Returns the implementation of standard tracing propagation as defined
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any
from typing import Any, Callable
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
......@@ -26,7 +26,6 @@ from opencensus.trace.span import SpanKind
from app.helper import traces, utils
from app.utils import get_or_create_ctx
from app import conf
from inspect import isfunction as is_function
class TracingMiddleware(BaseHTTPMiddleware):
......@@ -34,34 +33,6 @@ class TracingMiddleware(BaseHTTPMiddleware):
super().__init__(**kwargs)
self._trace_propagator = traces.get_trace_propagator()
@staticmethod
def _retrieve_raw_path(request):
"""
Returns the raw path of given request, else default request's url path
E.g.:
/ddms/v2/wellbores/{wellboreid} instead of /ddms/v2/wellbores/opendes:doc:blablabla14587
It retrieves the raw path by finding the APIRoute object by name. By default the name of the route is the name
of python method where there is the implementation.
>>> @router.get('/wellbores/{wellboreid}')
>>> async def get_wellbore(wellboreid: str, ctx: Context):
>>> # instructions here
In this example 'get_wellbore' is called_endpoint_func variable, this function's name is needed to retrieve
the APIRoute that contains the raw path.
"""
called_endpoint_func = request.scope.get('endpoint')
if called_endpoint_func and is_function(called_endpoint_func):
function_name = called_endpoint_func.__name__
called_routes = [route for route in request.app.routes
if route.name == function_name]
if called_routes:
return called_routes[0].path
return request.url.path
@staticmethod
def _before_request(request: Request, tracer: open_tracer.Tracer):
tracer.add_attribute_to_current_span(
......@@ -121,8 +92,11 @@ class TracingMiddleware(BaseHTTPMiddleware):
tracer.add_attribute_to_current_span(attribute_key=utils.HTTP_STATUS_CODE,
attribute_value=status)
tracer.add_attribute_to_current_span(attribute_key=utils.HTTP_ROUTE,
attribute_value=TracingMiddleware._retrieve_raw_path(request))
if hasattr(request.state, "traced_route"):
# This is set in Request state by the appropriate TracingRoute instance
# otherwise the value set in _before_request is used
tracer.add_attribute_to_current_span(attribute_key=utils.HTTP_ROUTE,
attribute_value=request.state.traced_route)
if response:
response_content_type = response.headers.get("Content-type")
......
......@@ -27,9 +27,10 @@ from app.clients.storage_service_client import get_storage_record_service
from app.conf import Config
from app.routers.common_parameters import REQUIRED_ROLES_WRITE
from app.utils import Context, get_ctx
from app.helper.traces import TracingRoute
router = APIRouter()
router = APIRouter(route_class=TracingRoute)
router.prefix = '/log-recognition'
router.tags = ['log-recognition']
......
......@@ -18,8 +18,9 @@ from app import __version__, __app_name__, __build_number__
from app.conf import Config
from typing import Dict </