Commit e04b8189 authored by snehal jagtap's avatar snehal jagtap
Browse files

initial commit

parent 64976ee6
Pipeline #62035 failed with stages
in 3 minutes and 7 seconds
metrics_config {
stack_driver {
kubernetesHostNameValue = "KUBERNETES_SERVICE_HOST",
monitoredResourceName = "k8s_cluster",
labels = {
clusterNameValue = "cluster_name",
locationNameValue = "location"
}
}
custom_metrics = [
{
"name": "sli_pss_writeback_api_data_ingestion_latency",
"description": "Metric to capture the data ingestion latency of writeback API (single + batch)",
"type": {
"count": false,
"sum": false,
"distribution": true
},
"unit": "ms",
"labels": {
"project": "project",
"tenant": "tenant",
"request_type":"request_type"
}
}
]
}
\ No newline at end of file
package org.opengroup.osdu.production
import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import akka.util.Timeout
import com.google.bigtable.v2.{ReadRowsRequest, RowSet}
import com.google.cloud.bigtable.grpc.BigtableSession
import com.google.cloud.pubsub.v1.{Publisher, TopicAdminClient}
import com.google.common.primitives.Longs
import com.google.protobuf.ByteString
import com.google.pubsub.v1.{ProjectTopicName, PubsubMessage}
import org.opengroup.osdu.production.common.LogF
import org.opengroup.osdu.production.common.auth.{CfsServiceAuthConfiguration, ServiceOAuthProvider}
import com.typesafe.config.{Config, ConfigFactory}
import org.opengroup.osdu.production.config.{BigTableConfiguration, ServiceConfiguration}
import org.opengroup.osdu.production.domain.generated.DataPointsList
import org.opengroup.osdu.production.domain.generated.DataPointsList.DataPoints
import org.opengroup.osdu.production.graph.GraphBuilder
import org.opengroup.osdu.production.storage.BigTableMigration
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.time.{Millis, Seconds, Span}
import org.scalatest.{AsyncFunSpec, BeforeAndAfterAll, DiagrammedAssertions, Matchers}
import java.util.concurrent.{Executors, TimeUnit}
import scala.concurrent.ExecutionContext
import scala.concurrent.duration._
import scala.language.postfixOps
class EndToEndITTests extends AsyncFunSpec with Matchers with DiagrammedAssertions with ScalaFutures with BeforeAndAfterAll with LogF with TestHelper {
implicit val ec: ExecutionContext = ExecutionContext.fromExecutor(Executors.newFixedThreadPool(20))
private val testConf: Config = ConfigFactory.load("it-test")
implicit val sc: ServiceConfiguration = ServiceConfiguration(testConf)
private val currentTime = System.currentTimeMillis()
private val topicAdminClient = TopicAdminClient.create()
implicit val actorSystem: ActorSystem = ActorSystem("test-system", Some(testConf), defaultExecutionContext = Some(ec))
implicit val defaultPatience = PatienceConfig(timeout = Span(5, Seconds), interval = Span(500, Millis))
implicit val akkaTimeout = Timeout(50000 millis)
implicit val mat: ActorMaterializer = ActorMaterializer()
implicit val bigTableConf: BigTableConfiguration = BigTableConfiguration(testConf)
val session = new BigtableSession(sc.bigTable.opts)
val serviceAuthProvider = new ServiceOAuthProvider {
override def getToken(cfsServiceAuthConfiguration: CfsServiceAuthConfiguration): String = "DummyToken"
}
val cachecoherencePublisher: Publisher = Publisher.newBuilder(ProjectTopicName.of(testConf.getString("cloud.global-project.name"), testConf.getString("cloud.pubsub.topic.name"))).build()
val cachecoherencePublisher2: Publisher = Publisher.newBuilder(ProjectTopicName.of(testConf.getString("cloud.global-project.name"), testConf.getString("cloud.pubsub.cache-coherence-outbound-2.outbound.topic.name"))).build()
implicit val serviceContext = ServiceContext()(mat, actorSystem, sc, serviceAuthProvider, cachecoherencePublisher, cachecoherencePublisher2)
override protected def beforeAll: Unit = {
Option(topicAdminClient.getTopic(ProjectTopicName.of(sc.pubsub.project, sc.pubsub.topicId))).getOrElse(topicAdminClient.createTopic(ProjectTopicName.of(sc.pubsub.project, sc.pubsub.topicId)))
BigTableMigration.runMigration(testConf)(session)
cachecoherencePublisher.publish(PubsubMessage.newBuilder().setData(DataPoints.newBuilder().build().toByteString).putAttributes("test", "test").putAttributes("requestId", "requestId").build()).get(10, TimeUnit.SECONDS)
cachecoherencePublisher2.publish(PubsubMessage.newBuilder().setData(DataPointsList.DataPoints.newBuilder().build().toByteString).putAttributes("test", "test").putAttributes("requestId", "requestId").build()).get(10, TimeUnit.SECONDS)
val messageIds = cachecoherencePublisher.publish(createPubSubMessageForSpecificTime(20, 1, currentTime)).get(10, TimeUnit.SECONDS)
val messageIds2 = cachecoherencePublisher2.publish(createPubSubMessageForSpecificTime(20, 1, currentTime)).get(10, TimeUnit.SECONDS)
}
describe("Cloud data ingestion pipeline with streaming data collection and storage") {
it("should be able to handle valid messages flowing through pub sub to bigtable") {
GraphBuilder.runnableGraph(session, testConf).run()
val StreamId: Long = 10000
val rowSetFilter = RowSet.newBuilder()
val expectedTimeStamps = (1 to 20).map(n => currentTime + n + dataOffset).toSet
expectedTimeStamps.foreach(n => rowSetFilter.addRowKeys(ByteString.copyFrom(Longs.toByteArray(StreamId) ++ Longs.toByteArray(n))))
val request = ReadRowsRequest.newBuilder().setRowsLimit(10000).setRows(rowSetFilter.build()).setTableName(s"${sc.bigTable.instanceName}/tables/${sc.bigTable.dataTableName}").build()
Thread.sleep(10000)
val resultSet = session.getDataClient.readRows(request)
val resultTimeStamps = resultSet.next(10000).map(row => Longs.fromByteArray(row.getKey.toByteArray.slice(8, 16))).toSet
assertResult(expectedTimeStamps)(resultTimeStamps)
assert(resultTimeStamps.size == 20)
}
it("should handle existing tables gracefully") {
BigTableMigration.runMigration(testConf)(session)
assert(true)
}
}
override protected def afterAll: Unit = {
cachecoherencePublisher.shutdown()
cachecoherencePublisher2.shutdown()
topicAdminClient.shutdownNow()
session.getTableAdminClientWrapper.deleteTable(sc.bigTable.dataTableName)
session.getTableAdminClientWrapper.deleteTable(sc.bigTable.indexTableName)
session.getTableAdminClientWrapper.deleteTable(sc.bigTable.changeLogTable)
mat.shutdown()
session.close()
actorSystem.terminate().foreach(print)
}
}
package org.opengroup.osdu.production
import com.google.pubsub.v1.PubsubMessage
import org.opengroup.osdu.production.domain.generated.DataPointsList.{ DataPoint, DataPointValue, DataPoints }
import org.opengroup.osdu.production.domain.generated.DataPointsList.{ DataPoint, DataPointValue, DataPoints }
import scala.languageFeature.postfixOps
trait TestHelper {
def createPubSubMessage(number: Int, status: Int): Seq[PubsubMessage] = {
(1 to number).map { _ =>
val values = createDataPoint(status, 1, Math.random() * 10000)
val points = DataPoints.newBuilder()
points.addDataPoints(0, values)
PubsubMessage.newBuilder().setData(points.build().toByteString).build()
}
}
def createPubSubMessagesForSpecificTime(number: Int, status: Int, time: Long): Seq[PubsubMessage] = {
// this will create multiple versions in bigtable, all datapoints have same physical time.
(1 to number).map { _ =>
val values = createDataPoint(status, 1, Math.random() * 10000, time)
val points = DataPoints.newBuilder()
points.addDataPoints(0, values)
PubsubMessage.newBuilder().setData(points.build().toByteString).build()
}
}
def createPubSubMessageForSpecificTime(number: Int, status: Int, time: Long): PubsubMessage = {
// this will create multiple versions in bigtable, all datapoints have same physical time.
val points = DataPoints.newBuilder()
(1 to number).map { t =>
val values = createDataPoint(status, 1, Math.random() * 10000, time + t)
points.addDataPoints(t - 1, values)
}
PubsubMessage.newBuilder().setData(points.build().toByteString).build()
}
def createDataPoint(status: Int, valueType: Int, value: Double, time: Long = System.currentTimeMillis()): DataPoint = {
val point = DataPoint.newBuilder()
point.setAgentId(10000000)
point.setSchemaVersion(134534534)
point.setStatus(status)
point.setStreamId(10000)
point.setTimeStamp(time)
val dpValue = DataPointValue.newBuilder()
dpValue.setDataPointDouble(value)
point.setValueType(valueType)
point.setValue(dpValue.build())
point.build()
}
}
package org.opengroup.osdu.production
import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.Sink
import com.google.cloud.pubsub.v1.Publisher
import com.google.pubsub.v1.ProjectTopicName
import org.opengroup.osdu.production.common.LogF
import org.opengroup.osdu.production.common.auth.{ CfsServiceAuthConfiguration, ServiceOAuthProvider }
import org.opengroup.osdu.production.common.entities.{ EventTypes, ServiceLog }
import org.opengroup.osdu.production.config.ServiceConfiguration
import org.opengroup.osdu.production.domain.UnitCatalogChangeWrapper
import org.opengroup.osdu.production.graph.UnitCatalogUpdatePipeline
import org.opengroup.osdu.production.impl.{ RedisCommandImpl, RedisMessageListenerImpl }
import org.opengroup.osdu.production.storage.{ TargetUnitConversionInfo, UnitCatalog }
import com.typesafe.config.{ Config, ConfigFactory }
import org.opengroup.osdu.production.config.ServiceConfiguration
import org.opengroup.osdu.production.domain.UnitCatalogChangeWrapper
import org.opengroup.osdu.production.graph.UnitCatalogUpdatePipeline
import org.opengroup.osdu.production.storage.UnitCatalog
import org.scalactic.{ ErrorMessage, Or }
import org.scalatest.{ AsyncFunSpec, BeforeAndAfterAll, DiagrammedAssertions, Matchers }
import redis.embedded.RedisServer
import java.util.concurrent.{ CountDownLatch, Executors, TimeUnit }
import scala.concurrent.ExecutionContext
import scala.util.Try
class UnitCatalogUpdateITTests extends AsyncFunSpec with Matchers with DiagrammedAssertions with BeforeAndAfterAll with LogF with UnitCatalogUpdateTestHelper {
implicit val ec: ExecutionContext = ExecutionContext.fromExecutor(Executors.newFixedThreadPool(20))
private val testConf: Config = ConfigFactory.load("it-test")
implicit val sc: ServiceConfiguration = ServiceConfiguration(testConf)
implicit val actorSystem: ActorSystem = ActorSystem("test-system", Some(testConf), defaultExecutionContext = Some(ec))
implicit val mat: ActorMaterializer = ActorMaterializer()
implicit val redisCommand = RedisCommandImpl(sc.redisConfig.host, sc.redisConfig.port, Array())
private val redisServer = RedisServer.builder().setting(s"bind ${sc.redisConfig.host}").port(sc.redisConfig.port).build()
val serviceAuthProvider = new ServiceOAuthProvider {
override def getToken(cfsServiceAuthConfiguration: CfsServiceAuthConfiguration): String = "DummyToken"
}
val cachecoherencePublisher: Publisher = Publisher.newBuilder(ProjectTopicName.of(testConf.getString("cloud.global-project.name"), testConf.getString("cloud.pubsub.topic.name"))).build()
val cachecoherencePublisher2: Publisher = Publisher.newBuilder(ProjectTopicName.of(testConf.getString("cloud.global-project.name"), testConf.getString("cloud.pubsub.topic.name"))).build()
implicit val serviceContext = ServiceContext()(mat, actorSystem, sc, serviceAuthProvider, cachecoherencePublisher, cachecoherencePublisher2)
describe("Update unit catalog pipeline test") {
it("should received test message and react") {
val latch = new CountDownLatch(1)
val config = ConfigFactory.load("it-test")
implicit val sc = ServiceConfiguration(config)
val sink =
Sink.foreach[Or[UnitCatalogChangeWrapper, ErrorMessage]](x => latch.countDown().logInfo(x => ServiceLog(s"processed the command successfully $x", EventTypes.DataProcess, "")))
val queue = UnitCatalogUpdatePipeline.create(sink)
RedisMessageListenerImpl(sc.redisConfig.host, sc.redisConfig.port).listenToRedisMessage(sc.redisConfig.messageChannel, (bytes: Array[Byte]) => {
queue.offer(bytes)
()
})
assert(latch.await(30, TimeUnit.SECONDS))
}
}
override protected def beforeAll: Unit = {
Try(redisServer.stop())
UnitCatalog.catalog.clear()
UnitCatalog.addUnitToCatalog(unitModify.unit.sourceUnit, TargetUnitConversionInfo("toUnit1", 1, 1, 1, 1))
UnitCatalog.addUnitToCatalog(unitDelete.unit.sourceUnit, TargetUnitConversionInfo(unitDelete.unit.toUnit, unitDelete.unit.a, unitDelete.unit.b, unitDelete.unit.c, unitDelete.unit.d))
redisServer.start()
publishRedisMessage(redisCommand, sc.redisConfig.messageChannel, Seq(unitAdd, unitModify, unitDelete))
()
}
override protected def afterAll: Unit = {
if (redisServer != null) redisServer.stop()
mat.shutdown()
actorSystem.terminate().foreach(print)
}
}
\ No newline at end of file
syntax = "proto3";
message BigTableRecord {
int32 storageSchemaVersion = 1;
int32 statusCode = 2;
int64 dataTypeCode = 3;
DataPointValue value = 4;
int64 sourceVersionTime = 5;
}
message DataPointValue {
int64 DataPoint_Long = 1;
double DataPoint_Double = 2;
int64 DataPoint_DateTime = 3;
string DataPoint_String = 4;
bool DataPoint_Boolean = 5;
}
env = "local"
cloud {
global-project {
name = "production-data-integration"
}
project {
name = "production-data-integration"
}
pubsub {
subscription {
name = "data-test-naman"
ack-deadline-seconds = 60
parallel-pull-count = 10
retain-acked-messages = true
}
topic {
name = "A0000001-test"
}
cache-coherence-outbound {
outbound {
topic {
name = "buckets-gateway-local"
}
}
publishRetryCount = 3
}
}
bigtable {
instance {
name = "timeseries"
}
table {
data {
name = "ts-local"
}
index {
name = "tsi-local"
}
changelog {
name = "changelog-local"
}
}
columnFamily {
names = ["d"]
}
user {
agent = "osdu-user-agent"
}
}
memorystore {
host = "localhost"
port = 6379
message-channel = "unit-catalog-channel"
}
}
bootloader.class = org.opengroup.osdu.production.PipelineApp
quartz {
schedules {
probeSchedule {
description = "A cron job that runs every 2 minutes"
expression = "0 * * ? * * *"
}
}
}
batch{
dataPoints{
throttleCost = 25000,
segmentSize = 1000
}
}
modifyAckDeadlineJob{
jobIntervalMilliseconds = 100000
ackDeadlineMilliseconds = 600000
#this should be less than pubSub message ack deadline which is 600 seconds
oldPubSubMessageOffsetMilliseconds = 500000
}
unitCatalogService{
hostname = "unit-catalog",
port = 9090,
geturi = "/unit-catalog/api/v1/catalog"
secret = ""
}
\ No newline at end of file
<configuration>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>
<root level="warn">
<appender-ref ref="STDOUT" />
</root>
<logger name="org.opengroup.osdu.production" level="info" />
</configuration>
\ No newline at end of file
<configuration>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%msg</pattern>
</encoder>
</appender>
<root level="error">
<appender-ref ref="STDOUT" />
</root>
<logger name="org.opengroup.osdu.production" level="info" />
</configuration>
\ No newline at end of file
metrics_config {
stack_driver {
kubernetesHostNameValue = "KUBERNETES_SERVICE_HOST",
monitoredResourceName = "k8s_cluster",
labels = {
clusterNameValue = "cluster_name",
locationNameValue = "location"
}
}
custom_metrics = [
{
"name": "sli_pss_writeback_api_data_ingestion_latency",
"description": "Metric to capture the data ingestion latency of writeback API (single + batch)",
"type": {
"count": false,
"sum": false,
"distribution": true
},
"unit": "ms",
"labels": {
"project": "project",
"tenant": "tenant",
"request_type":"request_type"
}
}
]
}
\ No newline at end of file
cloud {
project {
name = "GOOGLE-PROJECT-NAME"
}
pubsub {
subscription = "bigtable-restore-metadata-subscription-ENV-TAG",
topic = "bigtable-restore-metadata-ENV-TAG",
}
bigtable {
instance {
name = "timeseries"
}
table {
data {
name = "ts-ENV-TAG-v2"
}
index {
name = "tsi-ENV-TAG-v2"
}
changelog {
name = "changelog-ENV-TAG"
}
}
columnFamily {
names = ["d"]
}
user {
agent = "osdu-user-agent"
}
}
}
modifyAckDeadlineJob{
jobIntervalMilliseconds = 100000
ackDeadlineMilliseconds = 600000
#this should be less than pubSub message ack deadline which is 600 seconds
oldPubSubMessageOffsetMilliseconds = 500000
}
package org.opengroup.osdu.production
import akka.actor.ActorSystem
import akka.stream.{ ActorMaterializer, ActorMaterializerSettings, Supervision }
import com.codahale.metrics.ConsoleReporter
import com.google.cloud.bigtable.metrics.{ BigtableClientMetrics, DropwizardMetricRegistry }
import org.opengroup.osdu.production.common.LogF
import org.opengroup.osdu.production.common.auth.{ RawHttpExecutor, ServiceToServiceAuthTokenProvider }
import org.opengroup.osdu.production.common.entities.{ EventTypes, ServiceLog }
import com.typesafe.config.ConfigFactory
import org.opengroup.osdu.production.config.ServiceConfiguration
import java.util.concurrent.TimeUnit
import scala.util.Try
object App extends App with LogF {
val conf = ConfigFactory.load(args(0))
val rawRequestExecutor = new RawHttpExecutor()
implicit val serviceAuthProvider = new ServiceToServiceAuthTokenProvider(rawRequestExecutor)
implicit val system: ActorSystem = ActorSystem("osdu-cloud-data-ingestion-bigtable", conf)
implicit val sc: ServiceConfiguration = ServiceConfiguration(conf)
val registry = new DropwizardMetricRegistry()
BigtableClientMetrics.setMetricRegistry(registry)
val reporter: Unit = ConsoleReporter.forRegistry(registry.getRegistry)
.convertRatesTo(TimeUnit.SECONDS)
.convertDurationsTo(TimeUnit.MILLISECONDS)
.build().start(30, TimeUnit.MINUTES)
val decider: Supervision.Decider = {
case ex: Exception
logger.info(s"System shutting down : ${ex}")
System.exit(-1); Supervision.stop
}
implicit val mat = ActorMaterializer(ActorMaterializerSettings(system).withSupervisionStrategy(decider))
val done = Class.forName(Try(args(1)).toOption.fold("org.opengroup.osdu.production.PipelineApp")(identity)).newInstance.asInstanceOf[IngestionRunnableApp].start(conf)
done.recover {
case ex => loggerF.logError(_ => ServiceLog(s"error received from pipeline $ex", EventTypes.Service_Terminate, "")); System.exit(-1)
}(mat.system.dispatcher)
}
package org.opengroup.osdu.production
import akka.Done
import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.Sink
import com.google.cloud.bigtable.grpc.BigtableSession
import com.google.cloud.pubsub.v1.Publisher
import com.google.pubsub.v1.ProjectTopicName
import org.opengroup.osdu.production.common.auth.ServiceOAuthProvider
import org.opengroup.osdu.production.common.entities.{ EventTypes, LogCategories, StructuredLog }
import org.opengroup.osdu.production.common.ProbesJobScheduler
import org.opengroup.osdu.production.common.{ LogF, ProbesJobScheduler }
import org.opengroup.osdu.production.config.ServiceConfiguration
import org.opengroup.osdu.production.graph.{ GraphBuilder, UnitCatalogUpdateGraphBuilder }
import org.opengroup.osdu.production.impl.RedisMessageListenerImpl
import org.opengroup.osdu.production.storage.UnitCatalog
import com.typesafe.config.Config
import org.opengroup.osdu.production.config.ServiceConfiguration
import org.opengroup.osdu.production.graph.{ GraphBuilder, UnitCatalogUpdateGraphBuilder }
import org.opengroup.osdu.production.storage.UnitCatalog
import org.scalactic.{ Bad, Good }
import scala.concurrent.{ ExecutionContext, Future }
trait IngestionRunnableApp {
def start(conf: Config)(implicit mat: ActorMaterializer, system: ActorSystem, sc: ServiceConfiguration, serviceOAuthProvider: ServiceOAuthProvider): Future[Done]
}
class PipelineApp extends IngestionRunnableApp with LogF {
def start(conf: Config)(implicit mat: ActorMaterializer, system: ActorSystem, sc: ServiceConfiguration, serviceOAuthProvider: ServiceOAuthProvider): Future[Done] = {
val bigTableSession = new BigtableSession(sc.bigTable.opts)
ProbesJobScheduler.scheduleJob(conf, bigTableSession)
implicit val ec: ExecutionContext = system.dispatcher
implicit val cachecoherencePublisher: Publisher = Publisher.newBuilder(ProjectTopicName.of(sc.cachecoherencePublishConfig.project, sc.cachecoherencePublishConfig.topicId)).build()
implicit val cachecoherencePublisher2: Publisher = Publisher.newBuilder(ProjectTopicName.of(sc.cachecoherencePublishConfig2.project, sc.cachecoherencePublishConfig2.topicId)).build()
implicit val serviceContext: ServiceContext = ServiceContext()(mat: ActorMaterializer, system: ActorSystem, sc: ServiceConfiguration, serviceOAuthProvider: ServiceOAuthProvider, cachecoherencePublisher: Publisher, cachecoherencePublisher2: Publisher)
sc.init()
UnitCatalog.populateCatalogCache.flatMap { _ =>
val queue = UnitCatalogUpdateGraphBuilder.runnableGraph(Sink.foreach {
case Good(unitCatalogChangeWrapper) =>
val discarded = loggerF.logInfo(_ => StructuredLog(s"Processing of notification from unit catalog for unit ${unitCatalogChangeWrapper.unit} and action ${unitCatalogChangeWrapper.action} was successful", EventTypes.DataProcess, "", LogCategories.Synchronous))
case Bad(ex) =>
// making sure if some thing fails in notification pipeline, pod restarts and we get the latest unit catalog