Last active
September 17, 2017 08:06
-
-
Save selvait90/deffd2308708726f6877 to your computer and use it in GitHub Desktop.
Hadoop Workshop
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Prerequisite | |
| sudo apt-get update | |
| java -version | |
| sudo apt-get install default-jre | |
| sudo apt-get install default-jdk | |
| sudo apt-get remove openssh-client | |
| sudo apt-get install openssh-client | |
| sudo apt-get install openssh-server | |
| # Java | |
| sudo apt-get update | |
| java -version | |
| sudo apt-get install default-jre | |
| sudo apt-get install default-jdk | |
| - Java installed location /usr/lib/jvm/java-8-openjdk-amd64/ | |
| # Environment | |
| sudo apt-get install vim | |
| # ssh keys | |
| sudo apt-get remove openssh-client | |
| sudo apt-get install openssh-client | |
| sudo apt-get install openssh-server | |
| ssh localhost | |
| ssh-keygen -t rsa | |
| cd .ssh/ | |
| touch authorized_keys | |
| cat id_rsa.pub >> authorized_keys | |
| chmod 644 authorized_keys | |
| ssh localhost | |
| # Hadoop | |
| cd | |
| mkdir app | |
| cd app | |
| cp ../hadoop-workshop/app-deliverable/hadoop-1.2.1.tar.gz . | |
| tar -xzvf hadoop-1.2.1.tar.gz | |
| cd hadoop-1.2.1 | |
| cd conf | |
| 1) core-site.xml | |
| fs.default.name - hdfs://localhost:10011 | |
| hadoop.tmp.dir - /home/ubuntu/hadoop/tmp | |
| 2) hdfs-site.xml | |
| dfs.replication - 1 | |
| dfs.name.dir - /home/ubuntu/hadoop/name | |
| dfs.data.dir - /home/ubuntu/hadoop/data | |
| 3) mapred-site.xml | |
| mapred.tasktracker.map.tasks.maximum - 4 | |
| mapred.tasktracker.reduce.tasks.maximum - 2 | |
| 4) hadoop-env.sh | |
| export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ | |
| export HADOOP_HOME=/home/ubuntu/app/hadoop-1.2.1 | |
| export HADOOP_PID_DIR=/home/ubuntu/hadoop/pids | |
| cd .. | |
| ./bin/hadoop namenode -format | |
| ./bin/hadoop-daemon.sh start namenode | |
| tailf logs/hadoop-ubuntu-namenode-*.log | |
| ./bin/hadoop-daemon.sh start datanode | |
| tailf logs/hadoop-ubuntu-datanode-*.log | |
| ./bin/hadoop-daemon.sh start jobtracker | |
| tailf logs/hadoop-ubuntu-jobtracker-*.log | |
| ./bin/hadoop-daemon.sh start tasktracker | |
| tailf logs/hadoop-ubuntu-tasktracker-*.log | |
| ./bin/hadoop-daemon.sh start secondarynamenode | |
| tailf logs/hadoop-ubuntu-secondarynamenode-*.log | |
| $ jps | |
| 13353 JobTracker | |
| 13579 TaskTracker | |
| 12940 DataNode | |
| 13179 SecondaryNameNode | |
| 12708 NameNode | |
| 13650 Jps | |
| # access in browser | |
| localhost:15070 | |
| localhost:15030 | |
| # Add environment variable for hadoop | |
| vi /home/ubuntu/.bashrc | |
| export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64/ | |
| export HADOOP_HOME=/home/ubuntu/app/hadoop-1.2.1 | |
| source ~/.bashrc | |
| # Additional commands Commands | |
| $HADOOP_HOME/bin/start-dfs.sh | |
| $HADOOP_HOME/bin/start-mapred.sh | |
| $HADOOP_HOME/bin/start-all.sh | |
| $HADOOP_HOME/bin/hadoop-daemon.sh start namenode | |
| jps | |
| $HADOOP_HOME/bin/hadoop-daemon.sh start datanode | |
| jps | |
| $HADOOP_HOME/bin/hadoop-daemon.sh start jobtracker | |
| jps | |
| $HADOOP_HOME/bin/hadoop-daemon.sh start tasktracker | |
| ########################## OUTPUT ################################## | |
| # ./bin/hadoop namenode -format | |
| ubuntu@selvapc:~/app/hadoop-1.2.1$ ./bin/hadoop namenode -format [173/1902] | |
| Warning: $HADOOP_HOME is deprecated. | |
| 17/01/05 17:02:43 INFO namenode.NameNode: STARTUP_MSG: | |
| /************************************************************ | |
| STARTUP_MSG: Starting NameNode | |
| STARTUP_MSG: host = selvapc/127.0.1.1 | |
| STARTUP_MSG: args = [-format] | |
| STARTUP_MSG: version = 1.2.1 | |
| STARTUP_MSG: build = https://svn.apache.org/repos/asf/hadoop/common/branches/branch-1.2 -r 1503152; com | |
| piled by 'mattf' on Mon Jul 22 15:23:09 PDT 2013 | |
| STARTUP_MSG: java = 1.8.0_111 | |
| ************************************************************/ | |
| 17/01/05 17:02:43 INFO util.GSet: Computing capacity for map BlocksMap | |
| 17/01/05 17:02:43 INFO util.GSet: VM type = 64-bit | |
| 17/01/05 17:02:43 INFO util.GSet: 2.0% max memory = 1013645312 | |
| 17/01/05 17:02:43 INFO util.GSet: capacity = 2^21 = 2097152 entries | |
| 17/01/05 17:02:43 INFO util.GSet: recommended=2097152, actual=2097152 | |
| 17/01/05 17:02:44 INFO namenode.FSNamesystem: fsOwner=ubuntu | |
| 17/01/05 17:02:44 INFO namenode.FSNamesystem: supergroup=supergroup | |
| 17/01/05 17:02:44 INFO namenode.FSNamesystem: isPermissionEnabled=true | |
| 17/01/05 17:02:44 INFO namenode.FSNamesystem: dfs.block.invalidate.limit=100 | |
| 17/01/05 17:02:44 INFO namenode.FSNamesystem: isAccessTokenEnabled=false accessKeyUpdateInterval=0 min(s) | |
| , accessTokenLifetime=0 min(s) | |
| 17/01/05 17:02:44 INFO namenode.FSEditLog: dfs.namenode.edits.toleration.length = 0 | |
| 17/01/05 17:02:44 INFO namenode.NameNode: Caching file names occuring more than 10 times | |
| 17/01/05 17:02:44 INFO common.Storage: Image file /home/ubuntu/hadoop/name/current/fsimage of size 112 by | |
| tes saved in 0 seconds. | |
| 17/01/05 17:02:44 INFO namenode.FSEditLog: closing edit log: position=4, editlog=/home/ubuntu/hadoop/name | |
| /current/edits | |
| 17/01/05 17:02:44 INFO namenode.FSEditLog: close success: truncate to 4, editlog=/home/ubuntu/hadoop/name | |
| /current/edits | |
| 17/01/05 17:02:44 INFO common.Storage: Storage directory /home/ubuntu/hadoop/name has been successfully f | |
| ormatted. | |
| 17/01/05 17:02:44 INFO namenode.NameNode: SHUTDOWN_MSG: | |
| /************************************************************ | |
| SHUTDOWN_MSG: Shutting down NameNode at selvapc/127.0.1.1 | |
| ************************************************************/ | |
| # format, start, format | |
| 2014-09-17 04:37:46,975 ERROR org.apache.hadoop.hdfs.server.datanode.DataNode: java.io.IOException: Incompatible namespaceIDs in /home/selva/hadoop/tmp/dfs/data: namenode namespaceID = 1949108614; datanode namespaceID = 870281182 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| cd | |
| mkdir handson | |
| cd handson | |
| cp ../hadoop-workshop/handson/hadoop/WordCount.java . | |
| mkdir wordcount_classes | |
| javac -classpath ${HADOOP_HOME}/hadoop-core-1.2.1.jar -d wordcount_classes WordCount.java | |
| find wordcount_classes/ | |
| jar -cvf wordcount.jar -C wordcount_classes/ . | |
| cp ../hadoop-workshop/handson/hadoop/input.txt . | |
| $HADOOP_HOME/bin/hadoop fs -copyFromLocal input.txt /home/ubuntu/wordcount/inputdata/input.txt | |
| $HADOOP_HOME/bin/hadoop fs -ls /home/ubuntu/wordcount/inputdata | |
| $HADOOP_HOME/bin/hadoop fs -cat /home/ubuntu/wordcount/inputdata/input.txt | |
| $HADOOP_HOME/bin/hadoop jar wordcount.jar org.apache.hadoop.examples.WordCount /home/ubuntu/wordcount/inputdata /home/ubuntu/wordcount/outputdata | |
| ############ Users Count for a Web Application ############# | |
| cd | |
| cd handson | |
| cp ../hadoop-workshop/handson/hadoop/IPCount.java . | |
| mkdir ipcount_classes | |
| javac -classpath ${HADOOP_HOME}/hadoop-core-1.2.1.jar -d ipcount_classes IPCount.java | |
| find ipcount_classes/ | |
| jar -cvf ipcount.jar -C ipcount_classes/ . | |
| cp ../hadoop-workshop/handson/hadoop/access_log . | |
| $HADOOP_HOME/bin/hadoop fs -copyFromLocal access_log /home/ubuntu/ipcount/inputdata/access_log | |
| $HADOOP_HOME/bin/hadoop fs -ls /home/ubuntu/ipcount/inputdata | |
| $HADOOP_HOME/bin/hadoop fs -cat /home/ubuntu/ipcount/inputdata/access_log | |
| $HADOOP_HOME/bin/hadoop jar ipcount.jar org.apache.hadoop.examples.IPCount /home/ubuntu/ipcount/inputdata /home/ubuntu/ipcount/outputdata |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package org.apache.hadoop.examples; | |
| import java.io.IOException; | |
| import java.util.StringTokenizer; | |
| import org.apache.hadoop.conf.Configuration; | |
| import org.apache.hadoop.fs.Path; | |
| import org.apache.hadoop.io.IntWritable; | |
| import org.apache.hadoop.io.Text; | |
| import org.apache.hadoop.mapreduce.Job; | |
| import org.apache.hadoop.mapreduce.Mapper; | |
| import org.apache.hadoop.mapreduce.Reducer; | |
| import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; | |
| import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; | |
| import org.apache.hadoop.util.GenericOptionsParser; | |
| public class WordCount { | |
| public static class TokenizerMapper | |
| extends Mapper<Object, Text, Text, IntWritable>{ | |
| private final static IntWritable one = new IntWritable(1); | |
| private Text word = new Text(); | |
| public void map(Object key, Text value, Context context | |
| ) throws IOException, InterruptedException { | |
| StringTokenizer itr = new StringTokenizer(value.toString()); | |
| while (itr.hasMoreTokens()) { | |
| word.set(itr.nextToken()); | |
| context.write(word, one); | |
| } | |
| } | |
| } | |
| public static class IntSumReducer | |
| extends Reducer<Text,IntWritable,Text,IntWritable> { | |
| private IntWritable result = new IntWritable(); | |
| public void reduce(Text key, Iterable<IntWritable> values, | |
| Context context | |
| ) throws IOException, InterruptedException { | |
| int sum = 0; | |
| for (IntWritable val : values) { | |
| sum += val.get(); | |
| } | |
| result.set(sum); | |
| context.write(key, result); | |
| } | |
| } | |
| public static void main(String[] args) throws Exception { | |
| Configuration conf = new Configuration(); | |
| Job job = new Job(conf, "word count"); | |
| job.setJarByClass(WordCount.class); | |
| job.setMapperClass(TokenizerMapper.class); | |
| job.setCombinerClass(IntSumReducer.class); | |
| job.setReducerClass(IntSumReducer.class); | |
| job.setOutputKeyClass(Text.class); | |
| job.setOutputValueClass(IntWritable.class); | |
| FileInputFormat.addInputPath(job, new Path(args[0])); | |
| FileOutputFormat.setOutputPath(job, new Path(args[1])); | |
| System.exit(job.waitForCompletion(true) ? 0 : 1); | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| one two three two three four five five | |
| five one two three four one one two | |
| one one two five five five four |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| * IP address | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~$ ifconfig | |
| wlan0 Link encap:Ethernet HWaddr 00:1e:2a:37:55:03 | |
| inet addr:10.1.15.103 Bcast:10.1.15.255 Mask:255.255.255.0 | |
| inet6 addr: fe80::21e:2aff:fe37:5503/64 Scope:Link | |
| * ssh connection | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~$ ssh [email protected] | |
| - Master | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/.ssh$ scp id_rsa.pub 10.1.15.122:/home/ubuntu/ | |
| - Slave | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/.ssh$ scp id_rsa.pub 10.1.15.103:/home/ubuntu/ | |
| - Master | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~$ cat id_rsa.pub >> .ssh/authorized_keys | |
| - Slave | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~$ cat id_rsa.pub >> .ssh/authorized_keys | |
| * masters change | |
| - Master & slave | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~$ cd $HADOOP_HOME | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1$ cd conf | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1/conf$ vi masters | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1/conf$ cat masters | |
| 10.1.15.103 | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1$ cat conf/slaves | |
| 10.1.15.103 | |
| 10.1.15.122 | |
| * localhost to ipaddress changes | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1/conf$ grep -iR "localhost" . | |
| ./masters:localhost | |
| ./core-site.xml: <value>hdfs://localhost:10011</value> | |
| ./mapred-site.xml: <value>localhost:10012</value> | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1/conf$ cat masters | |
| localhost | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1/conf$ vi masters | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1/conf$ cat masters | |
| 10.1.15.103 | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1/conf$ cat slaves | |
| 10.1.15.103 | |
| 10.1.15.122 | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1/conf$ vi core-site.xml | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1/conf$ vi mapred-site.xml | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1/conf$ | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1/conf$ | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1/conf$ grep -iR "localhost" . | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1/conf$ | |
| * Start node | |
| - Master | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1$ ./bin/hadoop-daemon.sh start namenode | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1$ ./bin/hadoop-daemon.sh start jobtracker | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1$ ./bin/hadoop-daemon.sh start datanode | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1$ ./bin/hadoop-daemon.sh start tasktracker | |
| - Slave | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1$ ./bin/hadoop-daemon.sh start datanode | |
| ubuntu@ubuntu-HP-Pro-3330-MT:~/app/hadoop-1.2.1$ ./bin/hadoop-daemon.sh start tasktracker | |
| 2014-09-17 04:37:46,975 ERROR org.apache.hadoop.hdfs.server.datanode.DataNode: java.io.IOException: Incompatible namespaceIDs in /home/selva/hadoop/tmp/dfs/data: namenode namespaceID = 1949108614; datanode namespaceID = 870281182 | |
| /home/student/hadoop/tmp/dfs/data/current/VERSION |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC/tBn4Sx/+0TgPNxrwEFzmwTYIcsNchA1aqxpI0eTWwmPBxquzrujo7bU84oF6oCBfUozgV2aZyDkVKGxQAyDHhAMlPoXe7oDHLvv9nfdU94VDWPzrG/E0/y+uQk6MryeFQjBA/bwHlVNgE/aSfBsmdznKU32pKmV1CXnHyohCFhWvJPtHrt29CUt2jvK0DT+f7oaaE8ipZHQlHNozQCHCwvUtHaUCSNBtXj4KUd1ObtVa4SSPJ8RcVLSzY1FYt+GbJL2z8xxkIFFWNdVkbrrfTnU/YX2FzMtp0ydwy5cPrJMLoZdsJlaVbU9mq2A1SUiuteOhqEgCqbsRehL+dc7x student@student |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| https://drive.google.com/file/d/0Bxu-zrDMylMqYTdycUJSSElkWEE/view?usp=sharing | |
| https://drive.google.com/file/d/0Bxu-zrDMylMqQmdWMElkTGJkT3M/view?usp=sharing |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment