赞
踩
./hdfs dfs -help
: 获取命令帮助hdfs dfs -copyFromLocal /home/hadoop/mk.txt /test/
: 上传文件hdfs dfs -copyToLocal
: 下载文件hdfs -get
: 下载文件hdfs -put
: 上传文件jps
: 检查hadoop是否已经启动from hdfs3 import HDFileSystem
host = 'localhost'
port = 9000
client = HDFileSystem(host='localhost', port=9000)
$ ipython Python 3.7.3 (default, Mar 28 2019, 10:38:38) [MSC v.1915 32 bit (Intel)] Type 'copyright', 'credits' or 'license' for more information IPython 7.4.0 -- An enhanced Interactive Python. Type '?' for help. In [1]: from functools import reduce In [2]: l = ['a', 'bb', 'ccc'] In [3]: l_count = map(len, l) In [4]: l_count Out[4]: <map at 0x4a06b70> In [5]: l_sum = reduce(lambda x, y: x+y, l_count) In [6]: l_sum Out[6]: 6 In [7]:
- 使用python脚本演示
# hdfs_map.py
import sys
def read_input(file):
for line in file:
yield line.split()
def main():
data = read_input(sys.stdin)
for words in data:
for word in words:
print('%s%s%d' % (word, '\t', 1))
if __name__ == '__main__':
main()
# hdfs_reduce.py import sys from operator import itemgetter from itertools import groupby def read_mapper_output(file, separator='\t'): for line in file: yield line.rstrip().split(separator, 1) def main(): data = read_mapper_output(sys.stdin) for word, group in groupby(data, itemgetter(0)): total_count = sum(int(count) for word, count in group) print('%s%s%d' % (word, '\t', total_count)) if __name__ == '__main__': main()
将两个python脚本上传到hadoop框架中运行
/usr/local/hadoop/bin/hadoop \
jar /usr/local/hadoop/share/hadoop/tools/lib/hadoop-streaming-2.7.2.jar \
-files "hdfs_map.py, hdfs_reduce.py" \ # 将map和reduce两个处理逻辑上传
-input /test/mk.txt # 计划进行统计的文档内容
-output /tmp/wordcounttest # 结果输出的目录
-mapper "python3 hdfs_map.py" # mapper 处理逻辑
-reduce "python3 hdfs_reduce.py" # reduce 处理逻辑
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。