目录
1. 读取一个excel文件路径,返回读取的内容
import pandas as pd
filepath = r'C:\Users\zhf\Desktop\华为杯\原型系统V10\输入文件\输入文件夹\数据集500.xlsx'
pd.set_option('max_colwidth', 500)
data = pd.read_excel(filepath) # sheet参数要改
print(data)
2. 读取一个excel文件路径,返回读取的内容为列表
import pandas as pd
import numpy as np
filepath = r'C:\Users\zhf\Desktop\华为杯\原型系统V10\输入文件\输入文件夹\数据集500.xlsx'
pd.set_option('max_colwidth', 500)
data = pd.read_excel(filepath) # sheet参数要改
print(np.array(data).tolist())
3. 读取excel文件的一列,返回读取的内容为列表
import pandas as pd
import numpy as np
filepath = r'C:\Users\zhf\Desktop\华为杯\原型系统V10\中间文件\临时文件\tu.xlsx'
column = 0
tudata = pd.read_excel(filepath)
tulist = np.array(tudata.iloc[:, column:(column + 1)]).tolist()
tu = []
for everyone in tulist:
tu.append(everyone[0])
print(tu) # 返回一个列表
结果为:
['3link15_disk5_errorcode', 'powerController_powerCycle_test_109', 'Power_Type_check']
4. 读取一个txt文件路径,返回读取的内容为列表
import pandas as pd
import numpy as np
import codecs
filepath = r'C:\Users\zhf\Desktop\华为杯\原型系统V10\词典文件\停用词.txt'
file = codecs.open(filepath, "r", "utf-8")
stopwords = [line.strip().split() for line in file]
print(stopwords)
file.close()
5. 读入一个路径的txt文件,分割里面的内容读取
import pandas as pd
import numpy as np
import codecs
filepath = r'C:\Users\zhf\Desktop\华为杯\原型系统V10\中间文件\数据集202006\xxxxx209\无tu的数据集\_xxxxx209_固态坏了_.txt'
file = codecs.open(filepath, "r", "utf-8")
tulist = [line.strip().split("$&@&$") for line in file]
print(tulist)
6. 读入一个路径的txt文件,返回为一个字符串
import pandas as pd
import numpy as np
import codecs
filepath = r'C:\Users\zhf\Desktop\华为杯\原型系统V10\中间文件\数据集202006\xxxxx209\无tu的数据集\_xxxxx209_固态坏了_.txt'
file = codecs.open(filepath, "r", "utf-8")
notulist = [line.strip() for line in file] # 多使用列表推导式
one_str = ""
for eve in notulist:
one_str = one_str + eve + " "
print(notulist)
7. 读入一个路径的txt文件,返回其样本长度
import pandas as pd
import numpy as np
import codecs
filepath = r'C:\Users\zhf\Desktop\华为杯\原型系统V10\中间文件\数据集201908\xxxxx209\无tu的数据集\_xxxxx209_来料错误_.txt'
file = codecs.open(filepath, "r", "utf-8")
print(len(file.read()))
file.close()