Commit 60d1e7c9 authored by Spencer Sutton's avatar Spencer Sutton
Browse files

Merge branch 'master' into dev

parents dd3111c3 12801b32
......@@ -27,6 +27,7 @@ variables:
OSDU_GCP_STORAGE_URL: https://os-storage-dot-nice-etching-277309.uc.r.appspot.com/api/storage/v2/
OSDU_SECURITY_HTTPS_CERTIFICATE_TRUST: 'true'
OSDU_GCP_TEST_SUBDIR: testing/integration-tests/$OSDU_GCP_SERVICE-test-$OSDU_GCP_VENDOR
OSDU_GCP_HELM_PACKAGE_CHARTS: "devops/gcp/deploy devops/gcp/configmap"
IBM_BUILD_SUBDIR: provider/search-ibm
IBM_INT_TEST_SUBDIR: testing/integration-tests/search-test-ibm
......@@ -62,6 +63,9 @@ include:
- project: 'osdu/platform/ci-cd-pipelines'
file: 'cloud-providers/osdu-gcp-cloudrun.yml'
- project: 'osdu/platform/ci-cd-pipelines'
file: 'publishing/pages.yml'
aws-test-java:
tags: ['aws-internal-test']
......
This diff is collapsed.
......@@ -21,4 +21,4 @@ image:
tag: latest
policy:
enabled: false
\ No newline at end of file
enabled: true
\ No newline at end of file
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
apiVersion: v2
name: gcp-search-configmap
description: A Helm chart for Kubernetes
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"
{{/*
Expand the name of the chart.
*/}}
{{- define "configmap.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "configmap.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "configmap.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "configmap.labels" -}}
helm.sh/chart: {{ include "configmap.chart" . }}
{{ include "configmap.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "configmap.selectorLabels" -}}
app.kubernetes.io/name: {{ include "configmap.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "configmap.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "configmap.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}
apiVersion: v1
kind: ConfigMap
metadata:
labels:
app: {{ .Values.conf.app_name }}
name: {{ .Values.conf.configmap }}
namespace: {{ .Release.Namespace }}
data:
LOG_LEVEL: "{{ .Values.data.log_level }}"
ENVIRONMENT: {{ .Values.data.environment }}
GOOGLE_CLOUD_PROJECT: {{ .Values.data.google_cloud_project_id }}
REDIS_GROUP_HOST: "{{ .Values.data.redis_group_host }}"
REDIS_SEARCH_HOST: "{{ .Values.data.redis_search_host }}"
REDIS_SEARCH_PORT: "{{ .Values.data.redis_search_port }}"
INDEXER_HOST: {{ .Values.data.indexer_host }}
AUTHORIZE_API: {{ .Values.data.authorize_api }}
ENTITLEMENTS_HOST: {{ .Values.data.authorize_api }}
SECURITY_HTTPS_CERTIFICATE_TRUST: "{{ .Values.data.security_https_certificate_trust }}"
GOOGLE_AUDIENCES: "{{ .Values.data.audiences }}"
PARTITION_API: "{{ .Values.data.partition_api }}"
POLICY_API: "{{ .Values.data.policy_api }}"
# Default values for configmap.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
data:
log_level: "INFO"
environment: "dev"
google_cloud_project_id: ""
redis_group_host: ""
redis_search_host: ""
redis_search_port: "6379"
indexer_host: ""
authorize_api: ""
security_https_certificate_trust: "true"
audiences: ""
partition_api: ""
policy_api: ""
conf:
configmap: "search-config"
app_name: "search"
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
apiVersion: v2
name: gcp-search-deploy
description: A Helm chart for Kubernetes
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"
{{/*
Expand the name of the chart.
*/}}
{{- define "deploy.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "deploy.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "deploy.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "deploy.labels" -}}
helm.sh/chart: {{ include "deploy.chart" . }}
{{ include "deploy.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "deploy.selectorLabels" -}}
app.kubernetes.io/name: {{ include "deploy.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "deploy.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "deploy.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: "{{ .Values.conf.app_name }}"
name: "{{ .Values.conf.app_name }}"
namespace: {{ .Release.Namespace }}
spec:
selector:
matchLabels:
app: "{{ .Values.conf.app_name }}"
replicas: 1
template:
metadata:
labels:
app: "{{ .Values.conf.app_name }}"
spec:
containers:
- name: "{{ .Values.conf.app_name }}"
image: "{{ .Values.app.image }}"
env:
- name: "LOG_LEVEL"
valueFrom:
configMapKeyRef:
key: LOG_LEVEL
name: "{{ .Values.conf.configmap }}"
- name: "ENVIRONMENT"
valueFrom:
configMapKeyRef:
key: ENVIRONMENT
name: "{{ .Values.conf.configmap }}"
- name: "GOOGLE_CLOUD_PROJECT"
valueFrom:
configMapKeyRef:
key: GOOGLE_CLOUD_PROJECT
name: "{{ .Values.conf.configmap }}"
- name: "REDIS_GROUP_HOST"
valueFrom:
configMapKeyRef:
key: REDIS_GROUP_HOST
name: "{{ .Values.conf.configmap }}"
- name: "REDIS_SEARCH_HOST"
valueFrom:
configMapKeyRef:
key: REDIS_SEARCH_HOST
name: "{{ .Values.conf.configmap }}"
- name: "REDIS_SEARCH_PORT"
valueFrom:
configMapKeyRef:
key: REDIS_SEARCH_PORT
name: "{{ .Values.conf.configmap }}"
- name: "INDEXER_HOST"
valueFrom:
configMapKeyRef:
key: INDEXER_HOST
name: "{{ .Values.conf.configmap }}"
- name: "AUTHORIZE_API"
valueFrom:
configMapKeyRef:
key: AUTHORIZE_API
name: "{{ .Values.conf.configmap }}"
- name: "ENTITLEMENTS_HOST"
valueFrom:
configMapKeyRef:
key: ENTITLEMENTS_HOST
name: "{{ .Values.conf.configmap }}"
- name: "SECURITY_HTTPS_CERTIFICATE_TRUST"
valueFrom:
configMapKeyRef:
key: SECURITY_HTTPS_CERTIFICATE_TRUST
name: "{{ .Values.conf.configmap }}"
- name: "GOOGLE_AUDIENCES"
valueFrom:
configMapKeyRef:
key: GOOGLE_AUDIENCES
name: "{{ .Values.conf.configmap }}"
- name: "PARTITION_API"
valueFrom:
configMapKeyRef:
key: PARTITION_API
name: "{{ .Values.conf.configmap }}"
- name: "POLICY_API"
valueFrom:
configMapKeyRef:
key: POLICY_API
name: "{{ .Values.conf.configmap }}"
securityContext:
allowPrivilegeEscalation: false
runAsUser: 0
ports:
- containerPort: 8080
resources:
requests:
cpu: "{{ .Values.app.requests_cpu }}"
memory: "{{ .Values.app.requests_memory }}"
limits:
cpu: "{{ .Values.app.limits_cpu }}"
memory: "{{ .Values.app.limits_memory }}"
serviceAccountName: "{{ .Values.app.serviceAccountName }}"
apiVersion: v1
kind: Service
metadata:
name: "{{ .Values.conf.app_name }}"
annotations:
cloud.google.com/neg: '{"ingress": true}'
namespace: {{ .Release.Namespace }}
labels:
app: "{{ .Values.conf.app_name }}"
service: "{{ .Values.conf.app_name }}"
spec:
ports:
- protocol: TCP
port: 80
targetPort: 8080
name: http
selector:
app: "{{ .Values.conf.app_name }}"
apiVersion: networking.istio.io/v1alpha3
kind: VirtualService
metadata:
name: "{{ .Values.conf.app_name }}"
namespace: {{ .Release.Namespace }}
spec:
hosts:
- "*"
gateways:
- service-gateway
http:
- match:
- uri:
prefix: "/api/search"
route:
- destination:
port:
number: 80
host: {{ .Values.conf.app_name }}.{{ .Release.Namespace }}.svc.cluster.local
# Default values for legal-deploy.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
app:
requests_cpu: "0.25"
requests_memory: "128M"
limits_cpu: "1"
limits_memory: "1G"
serviceAccountName: ""
image: ""
conf:
configmap: "search-config"
app_name: "search"
......@@ -297,7 +297,7 @@ definitions:
type: integer
format: int32
minimum: 0
description: The maximum number of results to return from the given offset. If no limit is provided, then it will return 10 items. Max number of items which can be fetched by the query is 100.
description: The maximum number of results to return from the given offset. If no limit is provided, then it will return 10 items. Max number of items which can be fetched by the query is 1000.
query:
type: string
description: The query string in Lucene query string syntax.
......@@ -369,7 +369,7 @@ definitions:
type: integer
format: int32
minimum: 0
description: The maximum number of results to return from the given offset. If no limit is provided, then it will return 10 items. Max number of items which can be fetched by the query is 100.
description: The maximum number of results to return from the given offset. If no limit is provided, then it will return 10 items. Max number of items which can be fetched by the query is 1000.
query:
type: string
description: The query string in Lucene query string syntax.
......
......@@ -10,7 +10,11 @@
+ [Grouping](#grouping)
+ [Reserved characters](#reserved-characters)
+ [Wildcards](#wildcards)
* [Query by "nested" arrays objects](#nested-queries)
* [Sort](#sort-queries)
+ [Sort by "nested" arrays objects](#nested-sort)
* [Aggregation](#aggregation)
+ [Aggregation by "nested" arrays objects](#nested-aggregation)
* [Range Queries](#range-queries)
* [Geo-Spatial Queries](#geo-spatial-queries)
+ [Geo Distance](#geo-distance)
......@@ -70,7 +74,7 @@ Data Ecosystem search provides a JSON-style domain-specific language that you ca
POST /api/search/v2/query
{
"kind": "common:welldb:wellbore:1.0.0",
"query": "data.Status:Active",
"query": "data.Status:Active AND nested(data.VerticalMeasurements)",
"offset": 0,
"limit": 30,
"sort": {
......@@ -140,9 +144,9 @@ __Note:__ : It can take a delay of atleast 30 seconds once records are successfu
| Parameter | Description |
| :--- | :--- |
| kind | The kind of the record to query e.g. 'common:welldb:wellbore:1.0.0'. kind is a __required__ field and can be formatted as OSDU-Data-Partition-Id:data-source-id:entity-type:schema-version |
| query | The query string in Lucene query string syntax. |
| query | Query string based on Lucene query string syntax, supplemented with a specific format for describing queries to fields of object arrays indexed with the "nested" hint. |
| offset | The starting offset from which to return results. |
| limit | The maximum number of results to return from the given offset. If no limit is provided, then it will return __10__ items. Max number of items which can be fetched by the query is __100__. (If you wish to fetch large set of items, please use [query_with_cursor](#query-with-cursor) API). |
| limit | The maximum number of results to return from the given offset. If no limit is provided, then it will return __10__ items. Max number of items which can be fetched by the query is __1000__. (If you wish to fetch large set of items, please use [query_with_cursor](#query-with-cursor) API). |
| sort | Allows you to add one or more sorts on specific fields. The length of fields and the length of order must match. Order value must be either ASC or DESC (case insensitive). For more details, ability and limitation about this feature, please refer to [Sort](#sort_querires)
| queryAsOwner | If true, the result only contains the records that the user owns. If false, the result contains all records that the user is entitled to see. Default value is false |
| spatialFilter | A spatial filter to apply, please see [Geo-Spatial Queries](#geo-spatial-queries). |
......@@ -318,8 +322,98 @@ If you need to use date in your query, it has to be in one of the following form
For more info please refer [Date format](http://www.joda.org/joda-time/apidocs/org/joda/time/format/ISODateTimeFormat.html#dateOptionalTimeParser--)
## Query by "nested" arrays objects <a name="nested-queries"></a>
Starting from version 0.9.0 we can set "nested" hints in data schemes object array nodes.
It leads to accurate indexing of those arrays objects in the underlying Elasticsearch engine,
which then indexes such hinted arrays' objects as separate documents,
giving an option to query parent documents by children arrays objects fields data.
Those (say "nested arrays") queries require more complicated syntax in native ES Query DSL
which we wish to avoid in the OSDU Search query DSL, so we have developed our own simplified dialect
to describe such requests in the Search service in the form of the ```nested()``` function:
- for one level "nested array":
```json
nested(<path-to-root-nested-array-node>, <root-nested-array-object-fields-query>)
```
- for nested (multi-level) "nested array" queries
```json
nested(<path-to-root-nested-array-node>, nested(<path-to-subrootA-nested-array-node>, <subrootA-nested-array-object-fields-query>))
```
Multi-level nested queries are not limited in their depth. You nest them as required by the certain schema.
Several examples of the root and multi-level nested queries examples you can see in the below paragraphs.
The syntax of those queries is the same we learned from the above sections.
The only distinction is that their conditions are scoped by the own fields of objects of the array,
pointed in the first argument of the current nested(path,(conditions)) function.
### Single-level one condition nested query
It queries for wellboremarkerset WPCs having any Marker with MarkerMeasuredDepth field value greater than 10000
```json
{
"kind" : "osdu:wks:work-product-component--wellboremarkerset:1.0.0",
"query":"nested(data.Markers, (MarkerMeasuredDepth:(>10000)))"
}
```
### Single-level several conditions nested query
It queries for wellboremarkerset WPCs having any Marker with both conditions match:
(MarkerMeasuredDepth > 10000 AND PositiveVerticalDelta < 13000)
```json
{
"kind" : "osdu:wks:work-product-component--wellboremarkerset:1.0.0",
"query":"nested(data.Markers, (MarkerMeasuredDepth:(>10000) AND PositiveVerticalDelta:(<13000)))"
}
```
### Combination of single-level nested queries
It queries for wellboremarkerset WPCs having any Marker with any of conditions match:
(MarkerMeasuredDepth>10000 OR SurfaceDipAzimuth:<30000)
```json
{
"kind" : "osdu:wks:work-product-component--wellboremarkerset:1.0.0",
"query":"nested(data.Markers, (MarkerMeasuredDepth:(>10000))) OR nested(data.Markers, (SurfaceDipAzimuth:(<30000)))"
}
```
### Multi-level nested queries
Assume we have a schema (not yet exists in OSDU schemas) describing document with "nested" array,
containing another "nested" array as a value of one of its fields.
For example, let's fantasize that the data.Markers Marker object has a nested 'Revisions' array of Revision objects
having two own fields: "RevisionDate" and "RevisionEngeneer". An indexed document might then look like this:
```json
"data": {
...
"Markers": [
{
...
"MarkerMeasuredDepth": 12345.6,
"PositiveVerticalDelta": 12345.6,
"Revisions": [
"RevisionDate": "2020-02-13T09:13:15.55+0000",
"RevisionEngineer": "John Smith"
]
}
]
}
```
We then might wish to search for wellboremarkerset WPCs having any Marker revised on a certain date by a certain engineer:
```json
{
"kind" : "osdu:wks:work-product-component--wellboremarkerset:1.0.0",
"query":"nested(data.Markers, nested(data.Markers.Revisions, (RevisionDate:\"2020-02-13T09:13:15.55+0000\" AND RevisionEngineer:\"John Smith\")))"
}
```
### Nested and "non-nested" queries parts combinations
We can combine both types of queries in one request, eg:
```json
{
"kind" : "osdu:wks:work-product-component--wellboremarkerset:1.0.0",
"query":"data.Name:\"Example Name\" AND nested(data.Markers, (MarkerMeasuredDepth:(>10000)))"
}
```
## Sort <a name="sort-queries"></a>
The sort feature supports int, float, double, long and datetime, but it does not support array object, nested object or string field as of now, and for the records contain such types won't return in the response.
Starting from version 0.9.0 we can set "nested" hints in data schemes object array nodes and use such way indexed data for sorting. See in below "Sort by nested arrays objects".
The records either does not have the sorted fields or have empty value will be listed last in the result.
......@@ -339,12 +433,79 @@ E.g. Given
}
}
```
The above request payload asks search service to sort on "data.Id" in an ascending order, and the expected response will have "totalCount: 10" (instead of 20, please note that the 10 returned records are only from common:welldb:wellbore:1.0.0 because the data.Id in common:welldb:well:1.0.0 is of data type string, which is not currently supported - and therefore, will not be returned) and should list the 5 records which have empty data.Id value at last.
The above request payload asks search service to sort on "data.Id" in ascending order, and the expected response will have "totalCount: 10" (instead of 20, please note that the 10 returned records are only from common:welldb:wellbore:1.0.0 because the data.Id in common:welldb:well:1.0.0 is of data type string, which is not currently supported - and therefore, will not be returned) and should list the 5 records which have empty data.Id value at last.