from hdfs.client import Client
from conf.settings import DFS
# 关于python操作hdfs的API可以查看官网:
# https://hdfscli.readthedocs.io/en/latest/api.html
# client = Client(url, root=None, proxy=None, timeout=None, session=None)
# client = Client("http://hadoop:50070")
# client = InsecureClient("http://120.78.186.82:50070", user='ann');
class hdfsOperator(object):
def __init__(self, client):
self.client = client
# 读取hdfs文件内容,将每行存入数组返回
def read_hdfs_file(self, filename):
# with client.read('samples.csv', encoding='utf-8', delimiter='\n') as reader:
# for line in reader:
# pass
lines = []
with self.client.read(filename, encoding='utf-8', delimiter='\n') as reader:
for line in reader:
# pass
# print line.strip()
lines.append(
Python操作HDFS封装类,拷贝过去直接用
最新推荐文章于 2024-03-19 15:01:36 发布
本文介绍了一种Python封装类,用于便捷地操作Hadoop分布式文件系统(HDFS)。通过这个类,开发者可以轻松实现HDFS上的文件拷贝、读写等操作,提升工作效率。
摘要由CSDN通过智能技术生成