Skip to main content

datahub

ben.wangzLess than 1 minute

datahub

prepare

  1. k8s is ready
  2. argocd is ready and logged in
  3. elastic search is ready
  4. kafka with zookeeper mode is ready
  5. mariadb/mysql is ready

installation

  1. create secret datahub-credentials
    • kafka-sasl-plaintext
      kubectl -n application \
          create secret generic datahub-credentials \
          --from-literal=mysql-root-password="$(kubectl get secret mariadb-credentials --namespace database -o jsonpath='{.data.mariadb-root-password}' | base64 -d)" \
          --from-literal=security.protocol="SASL_PLAINTEXT" \
          --from-literal=sasl.mechanism="SCRAM-SHA-256" \
          --from-literal=sasl.jaas.config="org.apache.kafka.common.security.scram.ScramLoginModule required username=\"user1\" password=\"$(kubectl get secret kafka-user-passwords --namespace database -o jsonpath='{.data.client-passwords}' | base64 -d | cut -d , -f 1)\";"
      
  2. prepare datahub.yaml
    • kafka-sasl-plaintext
      apiVersion: argoproj.io/v1alpha1
      kind: Application
      metadata:
        name: datahub
      spec:
        syncPolicy:
          syncOptions:
          - CreateNamespace=true
        project: default
        source:
          repoURL: https://helm.datahubproject.io
          chart: datahub
          targetRevision: 0.4.8
          helm:
            releaseName: datahub
            values: |
              global:
                springKafkaConfigurationOverrides:
                  security.protocol: SASL_PLAINTEXT
                  sasl.mechanism: SCRAM-SHA-256
                credentialsAndCertsSecrets:
                  name: datahub-credentials
                  secureEnv:
                    sasl.jaas.config: sasl.jaas.config
                elasticsearch:
                  host: elastic-search-elasticsearch.application.svc.cluster.local
                  port: 9200
                  skipcheck: "false"
                  insecure: "false"
                  useSSL: "false"
                kafka:
                  bootstrap:
                    server: kafka.database.svc.cluster.local:9092
                  zookeeper:
                    server: kafka-zookeeper.database.svc.cluster.local:2181
                neo4j:
                  host: neo4j.database.svc.cluster.local:7474
                  uri: bolt://neo4j.database.svc.cluster.local
                  username: neo4j
                  password:
                    secretRef: datahub-credentials
                    secretKey: neo4j-password
                sql:
                  datasource:
                    host: mariadb.database.svc.cluster.local:3306
                    hostForMysqlClient: mariadb.database.svc.cluster.local
                    port: 3306
                    url: jdbc:mysql://mariadb.database.svc.cluster.local:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
                    driver: com.mysql.cj.jdbc.Driver
                    username: root
                    password:
                      secretRef: datahub-credentials
                      secretKey: mysql-root-password
              datahub-gms:
                enabled: true
                replicaCount: 1
                image:
                  repository: docker.io/acryldata/datahub-gms
                service:
                  type: ClusterIP
                ingress:
                  enabled: false
              datahub-frontend:
                enabled: true
                replicaCount: 1
                image:
                  repository: docker.io/acryldata/datahub-frontend-react
                defaultUserCredentials:
                  randomAdminPassword: true
                service:
                  type: ClusterIP
                ingress:
                  enabled: true
                  className: nginx
                  annotations:
                    cert-manager.io/cluster-issuer: self-signed-ca-issuer
                  hosts:
                  - host: datahub.dev.geekcity.tech
                    paths:
                    - /
                  tls:
                  - secretName: "datahub.dev.geekcity.tech-tls"
                    hosts:
                    - datahub.dev.geekcity.tech
              acryl-datahub-actions:
                enabled: true
                replicaCount: 1
                image:
                  repository: docker.io/acryldata/datahub-actions
              datahub-mae-consumer:
                replicaCount: 1
                image:
                  repository: docker.io/acryldata/datahub-mae-consumer
                ingress:
                  enabled: false
              datahub-mce-consumer:
                replicaCount: 1
                image:
                  repository: docker.io/acryldata/datahub-mce-consumer
                ingress:
                  enabled: false
              datahub-ingestion-cron:
                enabled: false
                image:
                  repository: docker.io/acryldata/datahub-ingestion
              elasticsearchSetupJob:
                enabled: true
                image:
                  repository: docker.io/acryldata/datahub-elasticsearch-setup
              kafkaSetupJob:
                enabled: true
                image:
                  repository: docker.io/acryldata/datahub-kafka-setup
              mysqlSetupJob:
                enabled: true
                image:
                  repository: docker.io/acryldata/datahub-mysql-setup
              postgresqlSetupJob:
                enabled: false
                image:
                  repository: docker.io/acryldata/datahub-postgres-setup
              datahubUpgrade:
                enabled: true
                image:
                  repository: docker.io/acryldata/datahub-upgrade
              datahubSystemUpdate:
                image:
                  repository: docker.io/acryldata/datahub-upgrade
        destination:
          server: https://kubernetes.default.svc
          namespace: application
      
      
  3. apply to k8s
    • kubectl -n argocd apply -f datahub.yaml
      
  4. sync by argocd
    • argocd app sync argocd/datahub
      

visit with browser

  1. extract credentials
    • kubectl -n application get secret datahub-user-secret -o jsonpath='{.data.user\.props}' | base64 -d
      
  2. with http
    • datahub.dev.geekcity.tech should be resolved to nginx-ingress
      • for example, add $K8S_MASTER_IP datahub.dev.geekcity.tech to /etc/hosts
    • datahub frontend: https://datahub.dev.geekcity.tech:32443
    • api
      • NOTE: login first
      • restful: https://datahub.astronomy.zhejianglab.com:32443/openapi/swagger-ui/index.html
      • graphql: https://datahub.astronomy.zhejianglab.com:32443/api/graphiql

ingest metadata from s3