Skip to content

docker-compose 快速搭建 hadoop环境

本人只是用作本地临时来测试 hadoop 功能,完全够用了

尝试了几个版本,测试了这个版本可用

参考来自 https://cloud.tencent.com/developer/beta/article/1150829

docker 镜像

docker pull bde2020/hadoop-namenode:1.1.0-hadoop2.7.1-java8
docker pull bde2020/hadoop-datanode:1.1.0-hadoop2.7.1-java8
docker pull bde2020/hadoop-resourcemanager:1.1.0-hadoop2.7.1-java8
docker pull bde2020/hadoop-historyserver:1.1.0-hadoop2.7.1-java8
docker pull bde2020/hadoop-nodemanager:1.1.0-hadoop2.7.1-java8

hadoop.env

CORE_CONF_fs_defaultFS=hdfs://namenode:8020
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*

HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false

YARN_CONF_yarn_log___aggregation___enable=true
YARN_CONF_yarn_resourcemanager_recovery_enabled=true
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
YARN_CONF_yarn_timeline___service_enabled=true
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
YARN_CONF_yarn_timeline___service_hostname=historyserver
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
YARN_CONF_yarn_resourcemanager_resource___tracker_address=resourcemanager:8031

docker-compose.yml

以下我暴露了8020端口来调试

version: "2"

services:
  namenode:
    image: bde2020/hadoop-namenode:1.1.0-hadoop2.7.1-java8
    container_name: namenode
    ports:
      - 8020:8020
    volumes:
      - hadoop_namenode:/hadoop/dfs/name
    environment:
      - CLUSTER_NAME=test
    env_file:
      - ./hadoop.env

  resourcemanager:
    image: bde2020/hadoop-resourcemanager:1.1.0-hadoop2.7.1-java8
    container_name: resourcemanager
    depends_on:
      - namenode
      - datanode1
      - datanode2
      - datanode3
    env_file:
      - ./hadoop.env

  historyserver:
    image: bde2020/hadoop-historyserver:1.1.0-hadoop2.7.1-java8
    container_name: historyserver
    depends_on:
      - namenode
      - datanode1
      - datanode2
      - datanode3
    volumes:
      - hadoop_historyserver:/hadoop/yarn/timeline
    env_file:
      - ./hadoop.env

  nodemanager1:
    image: bde2020/hadoop-nodemanager:1.1.0-hadoop2.7.1-java8
    container_name: nodemanager1
    depends_on:
      - namenode
      - datanode1
      - datanode2
      - datanode3
    env_file:
      - ./hadoop.env

  datanode1:
    image: bde2020/hadoop-datanode:1.1.0-hadoop2.7.1-java8
    container_name: datanode1
    depends_on:
      - namenode
    volumes:
      - hadoop_datanode1:/hadoop/dfs/data
    env_file:
      - ./hadoop.env

  datanode2:
    image: bde2020/hadoop-datanode:1.1.0-hadoop2.7.1-java8
    container_name: datanode2
    depends_on:
      - namenode
    volumes:
      - hadoop_datanode2:/hadoop/dfs/data
    env_file:
      - ./hadoop.env

  datanode3:
    image: bde2020/hadoop-datanode:1.1.0-hadoop2.7.1-java8
    container_name: datanode3
    depends_on:
      - namenode
    volumes:
      - hadoop_datanode3:/hadoop/dfs/data
    env_file:
      - ./hadoop.env

volumes:
  hadoop_namenode:
  hadoop_datanode1:
  hadoop_datanode2:
  hadoop_datanode3:
  hadoop_historyserver:

启动

docker-compose up -d

查看状态

╰─ docker-compose ps
NAME                COMMAND                  SERVICE             STATUS              PORTS
datanode1           "/entrypoint.sh /run…"   datanode1           running (healthy)   50075/tcp
datanode2           "/entrypoint.sh /run…"   datanode2           running (healthy)   50075/tcp
datanode3           "/entrypoint.sh /run…"   datanode3           running (healthy)   50075/tcp
historyserver       "/entrypoint.sh /run…"   historyserver       exited (139)
namenode            "/entrypoint.sh /run…"   namenode            running (healthy)   50070/tcp
nodemanager1        "/entrypoint.sh /run…"   nodemanager1        running (healthy)   8042/tcp
resourcemanager     "/entrypoint.sh /run…"   resourcemanager     running (healthy)   8088/tcp

我这边 historyserver 是退出状态,不清楚啥原因

提交作用

sudo docker exec -it namenode /bin/bash

准备数据来提交

cd /opt/hadoop-2.7.1

# 创建用户目录
hdfs dfs -mkdir /user
hdfs dfs -mkdir /user/root

# 准备数据
hdfs dfs -mkdir input
hdfs dfs -put etc/hadoop/*.xml input

# 提交作业
hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar grep input output 'dfs[a-z.]+'

# 查看作业执行结果
hdfs dfs -cat output/*

停止集群

可以通过CTRL+C来终止集群,也可以通过"sudodocker-composestop"o
停止集群后,创建的容器并不会被删除,此时可以使用"SUdOdocker-composerm"来删除已经停止的容器。也可以使用"SUdO
docker-composedown"来停止并删除容器。
删除容器后,使用"SUdOdockervolumels”可以看到上面集群使用的volume信息,我们可以使用"SUdOdockerrm<volume>”来删
除。

Comments