hdfs,mapreduce

该篇博客展示了在Hadoop集群上执行一系列操作,包括使用HDFS命令创建目录、上传文件、查看文件系统状态、运行MapReduce示例(如WordCount和PI计算)。此外,还涉及到了文件权限、所有权的变更以及垃圾回收机制。
摘要由CSDN通过智能技术生成
[root@master ~]# hdfs dfs -ls /
Found 4 items
drwxr-xr-x   - root supergroup          0 2022-03-21 18:41 /testHDFSshell1
drwxr-xr-x   - root supergroup          0 2022-03-21 18:46 /testHDFSshell2
drwxr-xr-x   - root supergroup          0 2022-03-21 18:46 /testHDFSshell3
drwxr-xr-x   - root supergroup          0 2022-03-21 18:46 /testHDFSshell4
[root@master ~]# hadoop fs -ls /
Found 4 items
drwxr-xr-x   - root supergroup          0 2022-03-21 18:41 /testHDFSshell1
drwxr-xr-x   - root supergroup          0 2022-03-21 18:46 /testHDFSshell2
drwxr-xr-x   - root supergroup          0 2022-03-21 18:46 /testHDFSshell3
drwxr-xr-x   - root supergroup          0 2022-03-21 18:46 /testHDFSshell4
[root@master ~]# hdfs dfs -ls hdfs://master:9000/
Found 5 items
drwxr-xr-x   - root supergroup          0 2022-03-22 10:07 hdfs://master:9000/data
drwxr-xr-x   - root supergroup          0 2022-03-21 18:41 hdfs://master:9000/testHDFSshell1
drwxr-xr-x   - root supergroup          0 2022-03-21 18:46 hdfs://master:9000/testHDFSshell2
drwxr-xr-x   - root supergroup          0 2022-03-21 18:46 hdfs://master:9000/testHDFSshell3
drwxr-xr-x   - root supergroup          0 2022-03-21 18:46 hdfs://master:9000/testHDFSshell4
[root@master ~]# cd $HADOOP_HOME
[root@master hadoop-2.7.6]# pwd
/usr/local/soft/hadoop-2.7.6
[root@master hadoop-2.7.6]# echo $HADOOP_HOME 
/usr/local/soft/hadoop-2.7.6
[root@master hadoop-2.7.6]# cd etc/hadoop/
[root@master hadoop]# ls
capacity-scheduler.xml      kms-env.sh
configuration.xsl           kms-log4j.properties
container-executor.cfg      kms-site.xml
core-site.xml               log4j.properties
core-site.xml.0             mapred-env.cmd
hadoop-env.cmd              mapred-env.sh
hadoop-env.sh               mapred-queues.xml.template
hadoop-env.sh.0             mapred-site.xml
hadoop-metrics2.properties  mapred-site.xml.template
hadoop-metrics.properties   slaves
hadoop-policy.xml           slaves.0
hdfs-site.xml               ssl-client.xml.example
hdfs-site.xml.0             ssl-server.xml.example
httpfs-env.sh               yarn-env.cmd
httpfs-log4j.properties     yarn-env.sh
httpfs-signature.secret     yarn-site.xml
httpfs-site.xml             yarn-site.xml.0
kms-acls.xml
[root@master hadoop]# vim core-site.xml
[root@master hadoop]# python
Python 2.7.5 (default, Oct 14 2020, 14:45:30) 
[GCC 4.8.5 20150623 (Red Hat 4.8.5-44)] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import this
The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!
>>> exit()
[root@master hadoop]# cd /usr/local/soft/
[root@master soft]# ls
0??  data          jdk1.8.0_171  redis        shell    show
A??  hadoop-2.7.6  packages      redis-6.2.6  shell01  test.txt
[root@master soft]# cd data/
[root@master data]# pwd
/usr/local/soft/data
[root@master data]# ls
new_db.sql  score.sql  student.sql
[root@master data]# touch theZenOfPython.txt
[root@master data]# vim theZenOfPython.txt 
[root@master data]# hdfs dfs -mkdir /data
[root@master data]# hdfs dfs -mkdir /data/shell
[root@master data]# hdfs dfs -put theZenOfPython.txt /data/shell/
[root@master data]# hdfs dfs -ls /data/shell/
Found 1 items
-rw-r--r--   1 root supergroup       1122 2022-03-22 16:57 /data/shell/theZenOfPython.txt
[root@master data]# hdfs dfs -cat /data/shell/theZenOfPython.txt
[root@master data]# hdfs dfs -mv /data/shell/theZenOfPython.txt  /data/shell/theZen.txt
[root@master data]# hdfs dfs -ls /data/shell
Found 1 items
-rw-r--r--   1 root supergroup       1122 2022-03-22 16:57 /data/shell/theZen.txt
[root@master data]# hdfs dfs -vim  /data/shell/theZenOfPython.txt  /data/shell/theZen.txt
-vim: Unknown command
[root@master data]# hdfs dfs -cp /data/shell/theZen.txt  /data/shell/theZenCopy.txt
[root@master data]# hdfs dfs -ls /data/shell/
Found 2 items
-rw-r--r--   1 root supergroup       1122 2022-03-22 16:57 /data/shell/theZen.txt
-rw-r--r--   1 root supergroup       1122 2022-03-22 17:07 /data/shell/theZenCopy.txt
[root@master data]# ls
new_db.sql  score.sql  student.sql  theZenOfPython.txt
[root@master data]# ls -l theZenOfPython.txt 
-rw-r--r--. 1 root root 1122 3月  22 16:54 theZenOfPython.txt
[root@master data]# chmod u+x theZenOfPython.txt 
[root@master data]# chmod g+x theZenOfPython.txt 
[root@master data]# chmod o+x theZenOfPython.txt 
[root@master data]# ls -l theZenOfPython.txt 
-rwxr-xr-x. 1 root root 1122 3月  22 16:54 theZenOfPython.txt
[root@master data]# chmod 777 theZenOfPython.txt 
[root@master data]# ls -l theZenOfPython.txt 
-rwxrwxrwx. 1 root root 1122 3月  22 16:54 theZenOfPython.txt
[root@master data]# hdfs dfs -chmod 777  /data/shell/theZen.txt
[root@master data]# su test
[test@master data]$ exit
exit
[root@master data]# ls
new_db.sql  score.sql  student.sql  theZenOfPython.txt
[root@master data]# ll theZenOfPython.txt 
-rwxrwxrwx. 1 root root 1122 3月  22 16:54 theZenOfPython.txt
[root@master data]# chown test theZenOfPython.txt 
[root@master data]# ll theZenOfPython.txt 
-rwxrwxrwx. 1 test root 1122 3月  22 16:54 theZenOfPython.txt
[root@master data]# chown test:test theZenOfPython.txt 
[root@master data]# ll theZenOfPython.txt 
-rwxrwxrwx. 1 test test 1122 3月  22 16:54 theZenOfPython.txt
[root@master data]# hdfs dfs -chown hdfs:hadoop /data/shell/theZ
en.txt
[root@master data]# df -h
文件系统                 容量  已用  可用 已用% 挂载点
devtmpfs                 1.9G     0  1.9G    0% /dev
tmpfs                    1.9G     0  1.9G    0% /dev/shm
tmpfs                    1.9G   13M  1.9G    1% /run
tmpfs                    1.9G     0  1.9G    0% /sys/fs/cgroup
/dev/mapper/centos-root   47G  7.1G   40G   16% /
/dev/sda1               1014M  185M  830M   19% /boot
tmpfs                    378M     0  378M    0% /run/user/0
[root@master data]# hdfs dfs -df -h
Filesystem            Size  Used  Available  Use%
hdfs://master:9000  93.9 G  40 K     83.0 G    0%
[root@master data]# hdfs dfs -rm /data/shell/theZenCopy.txt
22/03/22 17:46:39 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 1440 minutes, Emptier interval = 0 minutes.
22/03/22 17:46:39 INFO fs.TrashPolicyDefault: Moved: 'hdfs://master:9000/data/shell/theZenCopy.txt' to trash at: hdfs://master:9000/user/root/.Trash/Current/data/shell/theZenCopy.txt
Moved: 'hdfs://master:9000/data/shell/theZenCopy.txt' to trash at: hdfs://master:9000/user/root/.Trash/Current
[root@master data]# hdfs dfs -cp /data/shell/theZen.txt /data/shell/theZenCopy1.txt
[root@master data]# hdfs dfs -rm -skipTrash /data/shell/theZenCopy1.txt
Deleted /data/shell/theZenCopy1.txt
[root@master data]# hdfs dfs -rm -r /testHDFSshell1
22/03/22 19:15:59 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 1440 minutes, Emptier interval = 0 minutes.
22/03/22 19:15:59 INFO fs.TrashPolicyDefault: Moved: 'hdfs://master:9000/testHDFSshell1' to trash at: hdfs://master:9000/user/root/.Trash/Current/testHDFSshell1
Moved: 'hdfs://master:9000/testHDFSshell1' to trash at: hdfs://master:9000/user/root/.Trash/Current
[root@master data]# hdfs dfs -rm -r /testHDFSshell2
22/03/22 19:16:38 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 1440 minutes, Emptier interval = 0 minutes.
22/03/22 19:16:38 INFO fs.TrashPolicyDefault: Moved: 'hdfs://master:9000/testHDFSshell2' to trash at: hdfs://master:9000/user/root/.Trash/Current/testHDFSshell2
Moved: 'hdfs://master:9000/testHDFSshell2' to trash at: hdfs://master:9000/user/root/.Trash/Current
[root@master data]# hdfs dfs -rm -r /testHDFSshell3
22/03/22 19:16:47 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 1440 minutes, Emptier interval = 0 minutes.
22/03/22 19:16:47 INFO fs.TrashPolicyDefault: Moved: 'hdfs://master:9000/testHDFSshell3' to trash at: hdfs://master:9000/user/root/.Trash/Current/testHDFSshell3
Moved: 'hdfs://master:9000/testHDFSshell3' to trash at: hdfs://master:9000/user/root/.Trash/Current
[root@master data]# hdfs dfs -rm -r /testHDFSshell4
22/03/22 19:16:51 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 1440 minutes, Emptier interval = 0 minutes.
22/03/22 19:16:51 INFO fs.TrashPolicyDefault: Moved: 'hdfs://master:9000/testHDFSshell4' to trash at: hdfs://master:9000/user/root/.Trash/Current/testHDFSshell4
Moved: 'hdfs://master:9000/testHDFSshell4' to trash at: hdfs://master:9000/user/root/.Trash/Current
[root@master data]# hdfs dfs -get /data/shell/theZen.txt ./
[root@master data]# ls
new_db.sql  student.sql         theZen.txt
score.sql   theZenOfPython.txt
[root@master data]# cd ..
[root@master soft]# ls
0??  data          jdk1.8.0_171  redis        shell    show
A??  hadoop-2.7.6  packages      redis-6.2.6  shell01  test.txt
[root@master soft]# cd hadoop-2.7.6/
[root@master hadoop-2.7.6]# ls
bin  include  libexec      logs        README.txt  share
etc  lib      LICENSE.txt  NOTICE.txt  sbin        tmp
[root@master hadoop-2.7.6]# pwd
/usr/local/soft/hadoop-2.7.6
[root@master hadoop-2.7.6]# cd share/hadoop/mapreduce/
[root@master mapreduce]# ls
hadoop-mapreduce-client-app-2.7.6.jar
hadoop-mapreduce-client-common-2.7.6.jar
hadoop-mapreduce-client-core-2.7.6.jar
hadoop-mapreduce-client-hs-2.7.6.jar
hadoop-mapreduce-client-hs-plugins-2.7.6.jar
hadoop-mapreduce-client-jobclient-2.7.6.jar
hadoop-mapreduce-client-jobclient-2.7.6-tests.jar
hadoop-mapreduce-client-shuffle-2.7.6.jar
hadoop-mapreduce-examples-2.7.6.jar
lib
lib-examples
sources
[root@master mapreduce]# cd /usr/local/soft/data
[root@master data]# ls
new_db.sql  student.sql         theZen.txt
score.sql   theZenOfPython.txt
[root@master data]# vim wordcount
[root@master data]# hdfs dfs -mkdir -p /data/wc/input
[root@master data]# hdfs dfs -put wordcount /data/wc/input/
[root@master mapreduce]# hadoop jar hadoop-mapreduce-examples-2.7.6.jar wordcount /data/wc/input/ /data/wc/output
22/03/22 19:34:21 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.49.110:8032
22/03/22 19:34:22 INFO input.FileInputFormat: Total input paths to process : 1
22/03/22 19:34:22 INFO mapreduce.JobSubmitter: number of splits:1
22/03/22 19:34:23 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1647858149677_0001
22/03/22 19:34:23 INFO impl.YarnClientImpl: Submitted application application_1647858149677_0001
22/03/22 19:34:23 INFO mapreduce.Job: The url to track the job: http://master:8088/proxy/application_1647858149677_0001/
22/03/22 19:34:23 INFO mapreduce.Job: Running job: job_1647858149677_0001
22/03/22 19:34:34 INFO mapreduce.Job: Job job_1647858149677_0001 running in uber mode : false
22/03/22 19:34:34 INFO mapreduce.Job:  map 0% reduce 0%
22/03/22 19:34:42 INFO mapreduce.Job:  map 100% reduce 0%
22/03/22 19:34:49 INFO mapreduce.Job:  map 100% reduce 100%
22/03/22 19:34:49 INFO mapreduce.Job: Job job_1647858149677_0001 completed successfully
22/03/22 19:34:50 INFO mapreduce.Job: Counters: 49
	File System Counters
		FILE: Number of bytes read=115
		FILE: Number of bytes written=245637
		FILE: Number of read operations=0
		FILE: Number of large read operations=0
		FILE: Number of write operations=0
		HDFS: Number of bytes read=197
		HDFS: Number of bytes written=73
		HDFS: Number of read operations=6
		HDFS: Number of large read operations=0
		HDFS: Number of write operations=2
	Job Counters 
		Launched map tasks=1
		Launched reduce tasks=1
		Data-local map tasks=1
		Total time spent by all maps in occupied slots (ms)=5995
		Total time spent by all reduces in occupied slots (ms)=3830
		Total time spent by all map tasks (ms)=5995
		Total time spent by all reduce tasks (ms)=3830
		Total vcore-milliseconds taken by all map tasks=5995
		Total vcore-milliseconds taken by all reduce tasks=3830
		Total megabyte-milliseconds taken by all map tasks=6138880
		Total megabyte-milliseconds taken by all reduce tasks=3921920
	Map-Reduce Framework
		Map input records=9
		Map output records=15
		Map output bytes=149
		Map output materialized bytes=115
		Input split bytes=107
		Combine input records=15
		Combine output records=9
		Reduce input groups=9
		Reduce shuffle bytes=115
		Reduce input records=9
		Reduce output records=9
		Spilled Records=18
		Shuffled Maps =1
		Failed Shuffles=0
		Merged Map outputs=1
		GC time elapsed (ms)=178
		CPU time spent (ms)=1500
		Physical memory (bytes) snapshot=300212224
		Virtual memory (bytes) snapshot=4160380928
		Total committed heap usage (bytes)=138780672
	Shuffle Errors
		BAD_ID=0
		CONNECTION=0
		IO_ERROR=0
		WRONG_LENGTH=0
		WRONG_MAP=0
		WRONG_REDUCE=0
	File Input Format Counters 
		Bytes Read=90
	File Output Format Counters 
		Bytes Written=73
[root@master mapreduce]# hdfs dfs -cat /data/wc/output/*
c	1
flink	2
hadoop	1
hive	1
java	4
mysql	2
python	2
redis	1
springboot	1

[root@master mapreduce]# hadoop jar hadoop-mapreduce-examples-2.7.6.jar pi 10 1000
Number of Maps  = 10
Samples per Map = 1000
Wrote input for Map #0
Wrote input for Map #1
Wrote input for Map #2
Wrote input for Map #3
Wrote input for Map #4
Wrote input for Map #5
Wrote input for Map #6
Wrote input for Map #7
Wrote input for Map #8
Wrote input for Map #9
Starting Job
22/03/22 19:51:56 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.49.110:8032
22/03/22 19:51:57 INFO input.FileInputFormat: Total input paths to process : 10
22/03/22 19:51:57 INFO mapreduce.JobSubmitter: number of splits:10
22/03/22 19:51:57 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1647858149677_0002
22/03/22 19:51:58 INFO impl.YarnClientImpl: Submitted application application_1647858149677_0002
22/03/22 19:51:58 INFO mapreduce.Job: The url to track the job: http://master:8088/proxy/application_1647858149677_0002/
22/03/22 19:51:58 INFO mapreduce.Job: Running job: job_1647858149677_0002
22/03/22 19:52:05 INFO mapreduce.Job: Job job_1647858149677_0002 running in uber mode : false
22/03/22 19:52:05 INFO mapreduce.Job:  map 0% reduce 0%
22/03/22 19:52:22 INFO mapreduce.Job:  map 10% reduce 0%
22/03/22 19:52:23 INFO mapreduce.Job:  map 30% reduce 0%
22/03/22 19:52:24 INFO mapreduce.Job:  map 40% reduce 0%
22/03/22 19:52:35 INFO mapreduce.Job:  map 50% reduce 0%
22/03/22 19:52:36 INFO mapreduce.Job:  map 100% reduce 13%
22/03/22 19:52:38 INFO mapreduce.Job:  map 100% reduce 100%
22/03/22 19:52:38 INFO mapreduce.Job: Job job_1647858149677_0002 completed successfully
22/03/22 19:52:38 INFO mapreduce.Job: Counters: 50
	File System Counters
		FILE: Number of bytes read=226
		FILE: Number of bytes written=1353957
		FILE: Number of read operations=0
		FILE: Number of large read operations=0
		FILE: Number of write operations=0
		HDFS: Number of bytes read=2610
		HDFS: Number of bytes written=215
		HDFS: Number of read operations=43
		HDFS: Number of large read operations=0
		HDFS: Number of write operations=3
	Job Counters 
		Launched map tasks=10
		Launched reduce tasks=1
		Data-local map tasks=9
		Rack-local map tasks=1
		Total time spent by all maps in occupied slots (ms)=231390
		Total time spent by all reduces in occupied slots (ms)=11380
		Total time spent by all map tasks (ms)=231390
		Total time spent by all reduce tasks (ms)=11380
		Total vcore-milliseconds taken by all map tasks=231390
		Total vcore-milliseconds taken by all reduce tasks=11380
		Total megabyte-milliseconds taken by all map tasks=236943360
		Total megabyte-milliseconds taken by all reduce tasks=11653120
	Map-Reduce Framework
		Map input records=10
		Map output records=20
		Map output bytes=180
		Map output materialized bytes=280
		Input split bytes=1430
		Combine input records=0
		Combine output records=0
		Reduce input groups=2
		Reduce shuffle bytes=280
		Reduce input records=20
		Reduce output records=0
		Spilled Records=40
		Shuffled Maps =10
		Failed Shuffles=0
		Merged Map outputs=10
		GC time elapsed (ms)=4702
		CPU time spent (ms)=8800
		Physical memory (bytes) snapshot=1392967680
		Virtual memory (bytes) snapshot=22847873024
		Total committed heap usage (bytes)=1232994304
	Shuffle Errors
		BAD_ID=0
		CONNECTION=0
		IO_ERROR=0
		WRONG_LENGTH=0
		WRONG_MAP=0
		WRONG_REDUCE=0
	File Input Format Counters 
		Bytes Read=1180
	File Output Format Counters 
		Bytes Written=97
Job Finished in 41.604 seconds
Estimated value of Pi is 3.14080000000000000000
[root@master data]# cd ..
[root@master soft]# cd hadoop-2.7.6/
[root@master hadoop-2.7.6]# ls
bin  include  libexec      logs        README.txt  share
etc  lib      LICENSE.txt  NOTICE.txt  sbin        tmp
[root@master hadoop-2.7.6]# pwd
/usr/local/soft/hadoop-2.7.6
[root@master hadoop-2.7.6]# cd tmp/
[root@master tmp]# ls
dfs
[root@master tmp]# cd dfs/
[root@master dfs]# ls
name  namesecondary
[root@master dfs]# cd name
[root@master name]# ls
current  in_use.lock
[root@master name]# cd current/
[root@master current]# ls
edits_0000000000000000001-0000000000000000001
edits_0000000000000000002-0000000000000000002
edits_0000000000000000003-0000000000000000004
edits_0000000000000000005-0000000000000000010
edits_0000000000000000011-0000000000000000014
edits_0000000000000000015-0000000000000000022
edits_0000000000000000023-0000000000000000040
edits_inprogress_0000000000000000041
fsimage_0000000000000000022
fsimage_0000000000000000022.md5
fsimage_0000000000000000040
fsimage_0000000000000000040.md5
seen_txid
VERSION
[root@master current]# cat fsimage_0000000000000000022.md5
efa22663f2af0ba4788507831370cc67 *fsimage_0000000000000000022
[root@master current]# md5sum fsimage_0000000000000000022
efa22663f2af0ba4788507831370cc67  fsimage_0000000000000000022

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值