Commit e04b8189 authored by snehal jagtap's avatar snehal jagtap
Browse files

initial commit

parent 64976ee6
Pipeline #62035 failed with stages
in 3 minutes and 7 seconds
# Created by .ignore support plugin (hsz.mobi)
### Scala template
*.class
*.log
# sbt specific
.cache
.history
.lib/
dist/*
target/
lib_managed/
src_managed/
project/boot/
project/plugins/project/
# Scala-IDE specific
.scala_dependencies
.worksheet
# ENSIME specific
.ensime_cache/
.ensime
/.idea/
/.vs
# Intro
This service takes inbound streams from google pub sub topics and stores them into bigtable bi-temporal storage
## Planned -
Ability to stream individual time series to consumers on a pub sub endpoint ( one topic per stream)
## Steps to build
sbt clean test coverageReport universal:packageZipTarball
Note - ( Integration tests require access to the app default credentials on runtime environment, it needs to be set before executing it:test) - Not enabled as of now
run build.sh to create docker container -
build.sh $(Build.BuildNumber) $(SONAR_URL) $(SONAR_LOGIN) $(ACCOUNT_JSON_PATH)
gcloud docker -- push gcr.io/production-data-integration/ingestion-pipeline:$(Build.BuildNumber)
gcloud docker -- push gcr.io/production-data-integration/ingestion-pipeline:latest
docker rmi -f gcr.io/production-data-integration/ingestion-pipeline:$(Build.BuildNumber) gcr.io/production-data-integration/ingestion-pipeline:latest
##Steps to build locally
Set environment variable BIGTABLE_EMULATOR_HOST to localhost:8461 and run BigTable Emulator on that port
Run following commands in Terminal
set BIGTABLE_EMULATOR_HOST=localhost:8461
gcloud beta emulators bigtable start --host-port=localhost:8461
Run
sbt clean coverage test it:test coverageReport coverageOff
Run
##Steps to deploy
to dev -
$(System.DefaultWorkingDirectory)/Ingestion-Pipeline/$(Build.BuildNumber)/deploy.sh $(Build.BuildNumber) dev-env $(ACCOUNT_JSON_PATH)
##Configuration -
A single configuration file named app-config.conf is required to load the app inside container.
It should be made available in the k8s environment using "secret" named ingestion-pipeline-<ENV_TAG>-config and mounted on volume path /opt/config-mount
\ No newline at end of file
name: $(Date:yyyyMMdd)$(Rev:.r)
trigger:
- master
- feature/*
variables:
- name: splitPipelineEnabled
value: true
resources:
repositories:
- repository: templates
type: git
name: azure-pipeline-templates
ref: master
- repository: variables
type: git
name: vsts-release-management
ref: master
pipelines:
- pipeline: Automation-Tests
project: PDI
source: Automation-Tests
branch: master
- pipeline: vsts-testresult-uploader
project: PDI
source: vsts-testresult-uploader
branch: master
- pipeline: pds-restassured-tests
project: PDI
source: pds-restassured-tests
branch: master
stages:
- template: build-stage.yml@templates
parameters:
name: osdu-cloud-data-ingestion
- ${{ if eq( variables['splitPipelineEnabled'], 'false' ) }}:
- template: run-deploy-stage-list.yml@templates
parameters:
name: osdu-cloud-data-ingestion
restAssuredSanityTest:
enabled: true
tagName: AvocetSanity
smokeTest:
enabled: false
sanityTest:
enabled: false
testXMLFile: PDI-SanityTest-AvocetTestHarness.xml
regressionTest:
enabled: false
- ${{ if eq( variables['splitPipelineEnabled'], 'true' ) }}:
- template: run-deploy-stage-list-sp.yml@templates
parameters:
name: osdu-cloud-data-ingestion
restAssuredSanityTest:
enabled: true
tagName: AvocetSanity
smokeTest:
enabled: false
sanityTest:
enabled: false
testXMLFile: PDI-SanityTest-AvocetTestHarness.xml
regressionTest:
enabled: false
\ No newline at end of file
import org.opengroup.osdu.production.dependencies.CommanCompilerFlags._
import org.opengroup.osdu.production.dependencies.CommonLibs._
import sbt.Keys._
import sbt._
lazy val scalaVer = "2.12.4"
lazy val osduOrg = "org.opengroup.osdu.production"
lazy val ItTest = config("it") extend Test
fork in IntegrationTest := false
parallelExecution in IntegrationTest := false
coverageExcludedPackages := "org.opengroup.osdu.production.domain.generated;org.opengroup.osdu.production.backup;org.opengroup.osdu.production.restore.*"
coverageMinimum := 90
coverageFailOnMinimum := true
lazy val core =
Project("osdu-cloud-data-ingestion", file("."))
.configs(IntegrationTest)
.configs(ItTest)
.settings(Defaults.itSettings: _*)
.settings(
organization := osduOrg,
scalaVersion := scalaVer,
scalacOptions ++= org.opengroup.osdu.production.dependencies.CommanCompilerFlags.scalaOptions,
javacOptions ++= javac18,
fork in Test := true,
resolvers ++= Seq(GCSResolver.forBucket("pdi-ivy-snapshots-repo"), GCSResolver.forBucket("pdi-ivy-release-repo")),
dependencyOverrides += "commons-codec" % "commons-codec" % "1.13",
libraryDependencies ++= Seq( "org.quartz-scheduler" % "quartz" % "2.3.2" ),
libraryDependencies ++= Seq(
typeSafeconfig,
scalactic,
scalaLogging,
logBack,
scalaTest,
akkaActor,
akkaTestKit,
akkaSlf4j,
akkaStream,
scoverage,
mockito,
scalaMock,
scalaCheck,
json4sNative,
bigtableGrpcClient,
cloudPubsub,
protobuf,
googleCloudStorage,
"org.opengroup.osdu.production" %% "osdu-redis-common" % "0.2.13" exclude("io.spray", "spray-json_2.12") exclude("commons-io", "commons-io"),
"org.opengroup.osdu.production" %% "osdu-akka-stream-commons" % "0.2.26",
"org.opengroup.osdu.production" %% "osdu-scala-commons" % "0.3.36",
"org.opengroup.osdu.production" %% "osdu-security-commons" % "0.1.40" exclude("com.fasterxml.jackson.core","jackson-databind"),
"com.fasterxml.jackson.core" % "jackson-databind" % "2.11.2",
akkaQuartzScheduler,
"commons-io" % "commons-io" % "2.7"
).map(_.exclude("org.slf4j", "slf4j-log4j12"))
.map(_.exclude("com.google.guava","guava-20.0"))
.map(_.exclude("com.google.guava","guava-19.0"))
.map(_.exclude("commons-codec" , "commons-codec"))
.map(_.exclude("com.google.oauth-client", "google-oauth-client"))
.map(_.exclude("com.mchange", "c3p0"))
.map(_.exclude("org.quartz-scheduler", "quartz"))
.map(_.exclude("org.apache.httpcomponents", "httpclient")),
libraryDependencies ++= Seq ("org.apache.httpcomponents" % "httpclient" % "4.5.13")
).enablePlugins(JavaAppPackaging)
.enablePlugins(UniversalPlugin)
name := "osdu-cloud-data-ingestion"
version := "1.0.0-SNAPSHOT"
name in Universal := name.value
packageName in Universal := packageName.value
mainClass in Compile := Some("org.opengroup.osdu.production.App")
mappings in Universal += ((resourceDirectory in Compile).value / "logback.xml") -> "logback.xml"
mappings in Universal += ((resourceDirectory in Compile).value / "logback-restore.xml") -> "logback-restore.xml"
mappings in Universal += ((resourceDirectory in Compile).value / "restore.conf") -> "restore.conf"
publishArtifact in(Compile, packageDoc) := false
publishArtifact in(Universal, packageDoc) := false
publishArtifact in(Universal, packageSrc) := false
lazy val ClasspathPattern = "declare -r app_classpath=\"(.*)\"\n".r
bashScriptDefines := bashScriptDefines.value.map {
case ClasspathPattern(classpath) => "declare -r app_classpath=\".:" + classpath + "\"\n"
case _@entry => entry
}
artifactName := { (sv: ScalaVersion, module: ModuleID, artifact: Artifact) =>
artifact.name + "-" + module.revision + "-" + artifact.`type` + "." + artifact.extension
}
#!/bin/bash
declare -A envVars=( ["BUILD_BUILDNUMBER"]=$BUILD_BUILDNUMBER ["GOOGLE_PROJECT_NAME"]=$GOOGLE_PROJECT_NAME ["CONTAINER_REGISTRY_NAME"]=$CONTAINER_REGISTRY_NAME ["SONAR_URL"]=$SONAR_URL
["SONAR_LOGIN"]=$1 ["NEXUS_PASSWORD"]=$2 ["BINTRAY_PASSWORD"]=$3 )
declare -a missingEnvVars
for key in ${!envVars[@]}
do
if [[ -z ${envVars[${key}]} ]]
then
missingEnvVars+=( ${key} )
fi
done
if [ ${#missingEnvVars[@]} -gt 0 ]
then
joined=$(printf ", %s" "${missingEnvVars[@]}")
echo "Missing ${joined:1}"
exit 1
fi
export SONAR_LOGIN=$1
export NEXUS_PASSWORD=$2
export BINTRAY_PASSWORD=$3
export BIGTABLE_EMULATOR_HOST=bigtable:8461
sed -i -e "s/PROJECT_BUILD_VERSION/${BUILD_BUILDNUMBER}/g" sonar-project.properties
sed -i -e "s/GLOBAL_GOOGLE_PROJECT_NAME/${GLOBAL_GOOGLE_PROJECT_NAME}/g" src/it/resources/it-test.conf
sed -i -e "s/GOOGLE_PROJECT_NAME/${GOOGLE_PROJECT_NAME}/g" src/it/resources/it-test.conf
sbt -sbt-launch-repo https://repo1.maven.org/maven2 clean coverage test it:test coverageReport coverageOff
#Fail if coverage has dropped
rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
sbt universal:packageBin
sbt dependencyCheckAggregate
if [ ! -f /usr/bin/sonar-scanner ]
then
wget https://sonarsource.bintray.com/SonarQube/org/sonarsource/scanner/cli/sonar-scanner-cli/3.0.3.778/sonar-scanner-cli-3.0.3.778.zip && unzip sonar-scanner-cli-3.0.3.778.zip
cp -fR sonar-scanner-3.0.3.778/* /usr/
fi
sonar-scanner -Dsonar.host.url=${SONAR_URL} -Dsonar.login=${SONAR_LOGIN}
tar -cvzf coverage.tar.gz target/scala-2.12/scoverage-report
cp target/universal/osdu-cloud-data-ingestion.zip build/osdu-cloud-data-ingestion.zip
unzip build/osdu-cloud-data-ingestion.zip -d build
rm build/osdu-cloud-data-ingestion.zip
cd build
docker build -f Dockerfile -t gcr.io/${CONTAINER_REGISTRY_NAME}/osdu-cloud-data-ingestion:${BUILD_BUILDNUMBER} -t gcr.io/${CONTAINER_REGISTRY_NAME}/osdu-cloud-data-ingestion:latest .
## Backup container
docker build -f Dockerfile_backup -t gcr.io/${CONTAINER_REGISTRY_NAME}/timeseries-backup:${BUILD_BUILDNUMBER} -t gcr.io/${CONTAINER_REGISTRY_NAME}/timeseries-backup:latest .
## restore container
docker build -f Dockerfile_restore -t gcr.io/${CONTAINER_REGISTRY_NAME}/timeseries-restore:${BUILD_BUILDNUMBER} -t gcr.io/${CONTAINER_REGISTRY_NAME}/timeseries-restore:latest .
FROM openjdk:8u265-jre-slim
COPY osdu-cloud-data-ingestion /opt/osdu-cloud-data-ingestion
COPY run.sh /opt/osdu-cloud-data-ingestion/bin
RUN cd /opt \
&& chmod +x -R /opt/osdu-cloud-data-ingestion/bin
WORKDIR /opt/osdu-cloud-data-ingestion/bin
ENTRYPOINT [ "/opt/osdu-cloud-data-ingestion/bin/run.sh", "app-conf" ]
\ No newline at end of file
FROM openjdk:8u265-jre-slim
COPY osdu-cloud-data-ingestion /opt/osdu-cloud-data-ingestion
COPY run_backup.sh /opt/osdu-cloud-data-ingestion/bin
RUN cd /opt \
&& chmod +x -R /opt/osdu-cloud-data-ingestion/bin
WORKDIR /opt/osdu-cloud-data-ingestion/bin
ENTRYPOINT [ "/opt/osdu-cloud-data-ingestion/bin/run_backup.sh"]
\ No newline at end of file
FROM openjdk:8u265-jre-slim
COPY osdu-cloud-data-ingestion /opt/osdu-cloud-data-ingestion
COPY restore-data-job.sh /opt/osdu-cloud-data-ingestion/bin
COPY restore-metadata-publish-job.sh /opt/osdu-cloud-data-ingestion/bin
RUN cd /opt \
&& chmod +x -R /opt/osdu-cloud-data-ingestion/bin
WORKDIR /opt/osdu-cloud-data-ingestion/bin
ENTRYPOINT [ "/opt/osdu-cloud-data-ingestion/bin/restore-data-job.sh"]
\ No newline at end of file
#!/bin/bash
echo $GOOGLE_PROJECT_NAME
echo $BACKUP_TENANT_NAME
echo $RESTORE_TENANT_NAME
rm -f /opt/osdu-cloud-data-ingestion/logback.xml
cp /opt/osdu-cloud-data-ingestion/logback-restore.xml /opt/osdu-cloud-data-ingestion/logback.xml
/opt/osdu-cloud-data-ingestion/bin/restore-data-job /opt/osdu-cloud-data-ingestion/restore.conf
\ No newline at end of file
#!/bin/bash
echo $GOOGLE_PROJECT_NAME
echo $BACKUP_TENANT_NAME
echo $RESTORE_TENANT_NAME
mv -f /opt/osdu-cloud-data-ingestion/logback-restore.xml /opt/osdu-cloud-data-ingestion/logback.xml
/opt/osdu-cloud-data-ingestion/bin/restore-metadata-publish-job /opt/osdu-cloud-data-ingestion/restore.conf
\ No newline at end of file
#!/bin/bash
ln -s /opt/config-mount/app-config.conf /opt/osdu-cloud-data-ingestion/bin/app-conf.conf
ln -s /opt/config-mount/metrics.conf /opt/osdu-cloud-data-ingestion/bin/metrics.conf
/opt/osdu-cloud-data-ingestion/bin/osdu-cloud-data-ingestion $1
#!/bin/bash
/opt/osdu-cloud-data-ingestion/bin/backup-job
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: timeseries-backup-job
spec:
schedule: "*/5 * * * *"
concurrencyPolicy: Forbid
startingDeadlineSeconds: 100
jobTemplate:
spec:
template:
spec:
containers:
- name: timeseries-backup-job
image: IMAGE_NAME
resources:
requests:
memory: "4Gi"
env:
- name: ENV-TAG-KEY
value: "ENV_TAG"
- name: GOOGLE-PROJECT-NAME-KEY
value: "GLOBAL_GOOGLE_PROJECT_NAME"
- name: BUCKET-NAME-KEY
value: "ENV_TAG-prodops-backup-bucket"
restartPolicy: Never
#!/bin/bash
declare -A envVars=( ["BUILD_BUILDNUMBER"]=$BUILD_BUILDNUMBER ["GOOGLE_PROJECT_NAME"]=$GOOGLE_PROJECT_NAME ["CONTAINER_REGISTRY_NAME"]=$CONTAINER_REGISTRY_NAME
["COMPUTE_ZONE"]=$COMPUTE_ZONE ["CLUSTER_NAME"]=$CLUSTER_NAME ["CLUSTER_TYPE"]=$CLUSTER_TYPE ["SOURCE_CONTAINER_REGISTRY_NAME"]=$SOURCE_CONTAINER_REGISTRY_NAME )
declare -a missingEnvVars
declare -a imageList
for key in ${!envVars[@]}
do
if [[ -z ${envVars[${key}]} ]]
then
missingEnvVars+=(${key})
fi
done
if [ ${#missingEnvVars[@]} -gt 0 ]
then
joined=$(printf ", %s" "${missingEnvVars[@]}")
echo "Missing ${joined:1}"
exit 1
fi
if [ ${CLUSTER_TYPE} == 'Zonal' ]; then
gcloud container clusters get-credentials ${CLUSTER_NAME} --zone ${COMPUTE_ZONE}
elif [ ${CLUSTER_TYPE} == "Regional" ]; then
export CLOUDSDK_CONTAINER_USE_V1_API_CLIENT=false && export CLOUDSDK_CONTAINER_USE_V1_API=false && gcloud beta container clusters get-credentials ${CLUSTER_NAME} --region ${COMPUTE_ZONE} --project ${GOOGLE_PROJECT_NAME}
fi
kubectl cluster-info --token=`gcloud auth application-default print-access-token` --cluster=gke_${GOOGLE_PROJECT_NAME}_${COMPUTE_ZONE}_${CLUSTER_NAME}
imageList=$(gcloud container images list-tags gcr.io/${CONTAINER_REGISTRY_NAME}/ingestion-pipeline --filter=${BUILD_BUILDNUMBER} --format=text)
if [ -z "${imageList[0]}" ]; then
gcloud container images -q add-tag gcr.io/${SOURCE_CONTAINER_REGISTRY_NAME}/ingestion-pipeline:${BUILD_BUILDNUMBER} gcr.io/${CONTAINER_REGISTRY_NAME}/ingestion-pipeline:${BUILD_BUILDNUMBER}
gcloud container images -q add-tag gcr.io/${SOURCE_CONTAINER_REGISTRY_NAME}/timeseries-backup:${BUILD_BUILDNUMBER} gcr.io/${CONTAINER_REGISTRY_NAME}/timeseries-backup:${BUILD_BUILDNUMBER}
if [ `echo $?` == 1 ]; then
echo "Error occured while copying image"
exit 1
fi
else
echo "Image already exist(s)"
fi
sed -i -e "s/IMAGE_NAME/gcr.io\/${CONTAINER_REGISTRY_NAME}\/ingestion-pipeline:${BUILD_BUILDNUMBER}/g" ingestion-pipeline-deployment.yml
sed -i -e "s/IMAGE_NAME/gcr.io\/${CONTAINER_REGISTRY_NAME}\/timeseries-backup:${BUILD_BUILDNUMBER}/g" backup-cronjob.yaml
sed -i -e "s/ENV_TAG/${CLUSTER_NAME}/g" backup-cronjob.yaml
sed -i -e "s/GLOBAL_GOOGLE_PROJECT_NAME/${GLOBAL_GOOGLE_PROJECT_NAME}/g" backup-cronjob.yaml
if [ "`kubectl get secrets -l name=ingestion-pipeline 2>/dev/null | wc -l`" -gt "0" ]; then
declare -a config_list=(`kubectl get secrets -l name=ingestion-pipeline --sort-by=.metadata.creationTimestamp -o jsonpath={.items[*].metadata.name}`)
current_config_name=`echo ${config_list[-1]}`
sed -i -e "s/CONFIGNAME/$current_config_name/g" ingestion-pipeline-deployment.yml
else
echo "Configs for this deployment are required and does not exist. Kindly create one"
fi
kubectl apply -f backup-cronjob.yaml --cluster=gke_${GOOGLE_PROJECT_NAME}_${COMPUTE_ZONE}_${CLUSTER_NAME}
kubectl apply -f ingestion-pipeline-deployment.yml --cluster=gke_${GOOGLE_PROJECT_NAME}_${COMPUTE_ZONE}_${CLUSTER_NAME}
kubectl apply -f timeseries-ingestion-service.yaml
kubectl delete hpa osdu-cloud-data-ingestion --ignore-not-found=true
apiVersion: apps/v1
kind: Deployment
metadata:
name: ingestion-pipeline
spec:
replicas: 3
template:
metadata:
labels:
app: ingestion-pipeline
spec:
containers:
- name: ingestion-pipeline
image: IMAGE_NAME
resources:
requests:
memory: "1Gi"
volumeMounts:
- name: application-config
mountPath: /opt/config-mount
volumes:
- name: application-config
secret:
secretName: CONFIGNAME
selector:
matchLabels:
app: ingestion-pipeline
kind: Job
apiVersion: batch/v1
metadata:
name: timeseries-restore-data-job
namespace: default
spec:
parallelism: 400
template:
spec:
containers:
- name: timeseries-restore-data-job
image: gcr.io/<REPLACE_CONTAINER_REGISTRY_GOOGLE_PROJECT_NAME>/timeseries-restore:latest
imagePullPolicy: Always
resources:
requests:
memory: "6Gi"
command: ["/opt/osdu-cloud-data-ingestion/bin/restore-data-job.sh"]
env:
- name: GOOGLE_PROJECT_NAME
value: <REPLACE_GOOGLE_PROJECT_NAME>
- name: GLOBAL_GOOGLE_PROJECT_NAME
value: <REPLACE_GLOBAL_GOOGLE_PROJECT_NAME>
- name: BACKUP_TENANT_NAME
value: <REPLACE_BACKUP_TENANT_NAME>
- name: RESTORE_TENANT_NAME
value: <REPLACE_RESTORE_TENANT_NAME>
restartPolicy: Never
kind: Job
apiVersion: batch/v1
metadata:
name: timeseries-restore-metadata-publish-job
namespace: default
spec:
template:
spec:
containers:
- name: timeseries-restore-metadata-publish-job
image: gcr.io/<REPLACE_CONTAINER_REGISTRY_GOOGLE_PROJECT_NAME>/timeseries-restore:latest
command: ["/opt/osdu-cloud-data-ingestion/bin/restore-metadata-publish-job.sh"]
imagePullPolicy: Always
env:
- name: GOOGLE_PROJECT_NAME
value: <REPLACE_GOOGLE_PROJECT_NAME>
- name: GLOBAL_GOOGLE_PROJECT_NAME
value: <REPLACE_GLOBAL_GOOGLE_PROJECT_NAME>
- name: BACKUP_TENANT_NAME
value: <REPLACE_BACKUP_TENANT_NAME>
- name: RESTORE_TENANT_NAME
value: <REPLACE_RESTORE_TENANT_NAME>
restartPolicy: Never
apiVersion: autoscaling/v2beta1
kind: HorizontalPodAutoscaler
metadata:
name: ingestion-pipeline
spec:
scaleTargetRef:
apiVersion: extensions/v1beta1
kind: Deployment
name: ingestion-pipeline
minReplicas: 1
maxReplicas: 4
metrics:
- type: Pods
pods:
metricName: network_packets_received_rate
targetAverageValue: 10000m
- type: Pods
pods:
metricName: network_packets_transmit_rate
targetAverageValue: 10000m
\ No newline at end of file
apiVersion: v1
kind: Service
metadata:
name: ingestion-pipeline-metrics
annotations:
prometheus.io/port: "9091"
prometheus.io/scrape: "true"
spec:
type: NodePort
selector:
app : ingestion-pipeline
ports:
- protocol: TCP
port: 9091
targetPort: 9091
name: ingestion-pipeline-metrics
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment