Hadoop-spark-kafka-zookeeper docker compose

Hadoop-spark-kafka-zookeeper docker compose

Docker compose is awesome, especially if you need to spin up your local development environment.

the following is the docker-compose I use at home.

version: '2.1'

services:
  zoo1:
    image: zookeeper:3.4.9
    restart: unless-stopped
    hostname: zoo1
    ports:
      - "2181:2181"
    environment:
        ZOO_MY_ID: 1
        ZOO_PORT: 2181
        ZOO_SERVERS: server.1=zoo1:2888:3888
    volumes:
      - ./full-stack/zoo1/data:/data
      - ./full-stack/zoo1/datalog:/datalog


  kafka1:
    image: confluentinc/cp-kafka:5.2.1
    hostname: kafka1
    ports:
      - "9092:9092"
    environment:
      KAFKA_ADVERTISED_LISTENERS: LISTENER_DOCKER_INTERNAL://kafka1:19092,LISTENER_DOCKER_EXTERNAL://${DOCKER_HOST_IP:-127.0.0.1}:9092
      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: LISTENER_DOCKER_INTERNAL:PLAINTEXT,LISTENER_DOCKER_EXTERNAL:PLAINTEXT
      KAFKA_INTER_BROKER_LISTENER_NAME: LISTENER_DOCKER_INTERNAL
      KAFKA_ZOOKEEPER_CONNECT: "zoo1:2181"
      KAFKA_BROKER_ID: 1
      KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
    volumes:
      - ./full-stack/kafka1/data:/var/lib/kafka/data
    depends_on:
      - zoo1

  kafka-schema-registry:
    image: confluentinc/cp-schema-registry:5.2.1
    hostname: kafka-schema-registry
    ports:
      - "8081:8081"
    environment:
      SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
      SCHEMA_REGISTRY_HOST_NAME: kafka-schema-registry
      SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:8081
    depends_on:
      - zoo1
      - kafka1

  schema-registry-ui:
    image: landoop/schema-registry-ui:0.9.4
    hostname: kafka-schema-registry-ui
    ports:
      - "8001:8000"
    environment:
      SCHEMAREGISTRY_URL: http://kafka-schema-registry:8081/
      PROXY: "true"
    depends_on:
      - kafka-schema-registry

  kafka-rest-proxy:
    image: confluentinc/cp-kafka-rest:5.2.1
    hostname: kafka-rest-proxy
    ports:
      - "8082:8082"
    environment:
      # KAFKA_REST_ZOOKEEPER_CONNECT: zoo1:2181
      KAFKA_REST_LISTENERS: http://0.0.0.0:8082/
      KAFKA_REST_SCHEMA_REGISTRY_URL: http://kafka-schema-registry:8081/
      KAFKA_REST_HOST_NAME: kafka-rest-proxy
      KAFKA_REST_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
    depends_on:
      - zoo1
      - kafka1
      - kafka-schema-registry

  kafka-topics-ui:
    image: landoop/kafka-topics-ui:0.9.4
    hostname: kafka-topics-ui
    ports:
      - "8000:8000"
    environment:
      KAFKA_REST_PROXY_URL: "http://kafka-rest-proxy:8082/"
      PROXY: "true"
    depends_on:
      - zoo1
      - kafka1
      - kafka-schema-registry
      - kafka-rest-proxy

  kafka-connect:
    image: confluentinc/cp-kafka-connect:5.2.1
    hostname: kafka-connect
    ports:
      - "8083:8083"
    environment:
      CONNECT_BOOTSTRAP_SERVERS: "kafka1:19092"
      CONNECT_REST_PORT: 8083
      CONNECT_GROUP_ID: compose-connect-group
      CONNECT_CONFIG_STORAGE_TOPIC: docker-connect-configs
      CONNECT_OFFSET_STORAGE_TOPIC: docker-connect-offsets
      CONNECT_STATUS_STORAGE_TOPIC: docker-connect-status
      CONNECT_KEY_CONVERTER: io.confluent.connect.avro.AvroConverter
      CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL: 'http://kafka-schema-registry:8081'
      CONNECT_VALUE_CONVERTER: io.confluent.connect.avro.AvroConverter
      CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: 'http://kafka-schema-registry:8081'
      CONNECT_INTERNAL_KEY_CONVERTER: "org.apache.kafka.connect.json.JsonConverter"
      CONNECT_INTERNAL_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter"
      CONNECT_REST_ADVERTISED_HOST_NAME: "kafka-connect"
      CONNECT_LOG4J_ROOT_LOGLEVEL: "INFO"
      CONNECT_LOG4J_LOGGERS: "org.apache.kafka.connect.runtime.rest=WARN,org.reflections=ERROR"
      CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: "1"
      CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: "1"
      CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: "1"
      CONNECT_PLUGIN_PATH: '/usr/share/java,/etc/kafka-connect/jars'
    volumes:
      - ./connectors:/etc/kafka-connect/jars/
    depends_on:
      - zoo1
      - kafka1
      - kafka-schema-registry
      - kafka-rest-proxy

  kafka-connect-ui:
    image: landoop/kafka-connect-ui:0.9.4
    hostname: kafka-connect-ui
    ports:
      - "8003:8000"
    environment:
      CONNECT_URL: "http://kafka-connect:8083/"
      PROXY: "true"
    depends_on:
      - kafka-connect

  ksql-server:
    image: confluentinc/cp-ksql-server:5.2.1
    hostname: ksql-server
    ports:
      - "8088:8088"
    environment:
      KSQL_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
      KSQL_LISTENERS: http://0.0.0.0:8088/
      KSQL_KSQL_SERVICE_ID: ksql-server_
    depends_on:
      - zoo1
      - kafka1

  zoonavigator-web:
    image: elkozmon/zoonavigator-web:0.5.1
    ports:
     - "8004:8000"
    environment:
      API_HOST: "zoonavigator-api"
      API_PORT: 9000
      AUTO_CONNECT_CONNECTION_STRING: "zoo1:2181"
    links:
     - zoonavigator-api
    depends_on:
     - zoonavigator-api

  zoonavigator-api:
    image: elkozmon/zoonavigator-api:0.5.1
    environment:
      SERVER_HTTP_PORT: 9000
    depends_on:
      - zoo1

  namenode:
    image: uhopper/hadoop-namenode
    hostname: namenode
    container_name: namenode
    domainname: hadoop
    volumes:
      - ./hadoop/dfs/name:/hadoop/dfs/name
    environment:
      - CLUSTER_NAME=cluster

  datanode1:
    image: uhopper/hadoop-datanode
    hostname: datanode1
    container_name: datanode1
    domainname: hadoop
    volumes:
      - ./hadoop/dfs/data:/hadoop/dfs/data
    environment:
      - CORE_CONF_fs_defaultFS=hdfs://namenode:8020

  resourcemanager:
    image: uhopper/hadoop-resourcemanager
    hostname: resourcemanager
    container_name: resourcemanager
    domainname: hadoop
    environment:
      - CORE_CONF_fs_defaultFS=hdfs://namenode:8020
      - YARN_CONF_yarn_log___aggregation___enable=true

  nodemanager1:
    image: uhopper/hadoop-nodemanager
    hostname: nodemanager1
    container_name: nodemanager1
    domainname: hadoop
    environment:
      - CORE_CONF_fs_defaultFS=hdfs://namenode:8020
      - YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
      - YARN_CONF_yarn_log___aggregation___enable=true
      - YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs

  spark:
    image: uhopper/hadoop-spark
    hostname: spark
    container_name: spark
    domainname: hadoop
    environment:
      - CORE_CONF_fs_defaultFS=hdfs://namenode:8020
      - YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
    command: tail -f /var/log/dmesg