1.安装JDK
# add-apt-repository ppa:webupd8team/java
# apt-get update
# apt-get install oracle-java8-installer
# apt-get install oracle-java8-set-default
2.配置SSH
$ apt-get install ssh -y
$ ssh-keygen
$ ssh-copy-id localhost
3.安装Hadoop
# sudo chown -R hadoop:hadoop /opt
$ tar -zxvf hadoop-2.7.2.tar.gz -C /opt
4.配置Hadoop
4.1.~/.bashrc 尾部追加如下内容:
export JAVA_HOME=/usr/lib/jvm/java-8-oracle
export HADOOP_INSTALL=/opt/hadoop-2.7.2
export HADOOP_HOME=$HADOOP_INSTALL
export PATH=$PATH:$HADOOP_INSTALL/bin
export PATH=$PATH:$HADOOP_INSTALL/sbin
export HADOOP_MAPRED_HOME=$HADOOP_INSTALL
export HADOOP_COMMON_HOME=$HADOOP_INSTALL
export HADOOP_HDFS_HOME=$HADOOP_INSTALL
export HADOOP_CONF_DIR=$HADOOP_INSTALL/etc/hadoop
export YARN_HOME=$HADOOP_INSTALL
export YARN_CONF_DIR=$HADOOP_INSTALL/etc/hadoop
4.2.编辑文件 ${HADOOP_INSTALL}/etc/hadoop/core-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000/</value>
<description>NameNode URI</description>
</property>
</configuration>
4.3.编辑文件 ${HADOOP_INSTALL}/etc/hadoop/hdfs-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///home/bd/hdfs/datanode</value>
<description>DataNode directory</description>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///home/bd/hdfs/namenode</value>
<description>NameNode directory for namespace and transaction logs storage.</description>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.datanode.use.datanode.hostname</name>
<value>false</value>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
</configuration>
4.4.编辑文件 ${HADOOP_INSTALL}/etc/hadoop/mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
4.5.编辑文件 ${HADOOP_INSTALL}/etc/hadoop/yarn-site.xml
<?xml version="1.0"?>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>localhost:8025</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>localhost:8030</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>localhost:8040</value>
</property>
<property>
<name>yarn.nodemanager.address</name>
<value>localhost:8050</value>
</property>
<property>
<name>yarn.nodemanager.localizer.address</name>
<value>localhost:8060</value>
</property>
</configuration>
4.6.编辑文件 ${HADOOP_INSTALL}/etc/hadoop/slaves
localhost
4.7.修改文件 ${HADOOP_INSTALL}/etc/hadoop/hadoop-env.sh
# 只修改这里
# The java implementation to use.
export JAVA_HOME=/usr/lib/jvm/java-8-oracle
5.启动Hadoop
5.1.格式化文件系统
$ hadoop namenode -format
5.2.启动Hadoop
$ start-all.sh
5.3.查看进程
$ jps
5825 SecondaryNameNode
5971 ResourceManager
5492 NameNode
6100 NodeManager
5614 DataNode
6399 Jps
上述线程全部启动成功,证明集群运行正常。