文章目录
python读取csv文件
import csv
csvfile = open('E:/tests/2022-01/data-text.csv', 'rb') #rb 以只读和二进制的方式打开文件
reader = csv.reader(csvfile)
for row in reader:
print (row)
报错 Error: iterator should return strings, not bytes (did you open the file in text mode?)
解决办法:将文件打开类型设置为rt
或者r
import csv
csvfile = open('E:/tests/2022-01/data-text.csv', 'r')
reader = csv.reader(csvfile)
for row in reader:
print (row)
读取数据记录展示为字典形式
import csv
csvfile = open('E:/tests/2022-01/data-text.csv', 'r')
reader = csv.DictReader(csvfile)
for row in reader:
print (row)
python解析excel文件
处理excel文件的主要库:xlrd(读取)、xlwt(写入)、xlutils(高级操作)
1.查看工作表中的sheet名
import xlrd
book = xlrd.open_workbook(file_path)
for sheet in book.sheets():
print (sheet.name)
2.查看工作表指定sheet的内容
import xlrd
book = xlrd.open_workbook(file_path)
sheet = book.sheet_by_name('sheet1')
print (sheet)
print(sheet.nrows) #返回sheet表中的记录数
#遍历行数,返回每一行的内容(扩展:返回任意行的数据)
for i in range(sheet.nrows):
print (sheet.row_values(i))
#遍历行数,返回指定范围的数据(执行结果返回第14-19行数据)
count = 0
for i in range(sheet.nrows):
if count < 20:
if i >= 14:
row = sheet.row_values(i)
print (i, row )
count += 1
其中:
代码 | 解释 |
---|---|
sheet.name | 返回sheet名 |
sheet.nrows | 返回sheet中数据总行数 |
sheet.row_values(i) | 返回第i行数据(此时每行数据以列表形式展示) |
3.查看sheet中每个元素
#嵌套for循环,将每行元素以单个形式展示
for i in range(sheet.nrows):
print (sheet.row_values(i))
for cell in row:
print (cell)
4.将提取信息以字典形式展示
- 设置country作为data字段的键
import xlrd
book = xlrd.open_workbook('E:/tests/2022-01/SOWC 2014 Stat Tables_Table 9.xlsx')
sheet = book.sheet_by_name('Table 9 ')
#print (sheet)
count = 0
data = {}
for i in range(sheet.nrows):
if count < 10:
if i >= 14:
row = sheet.row_values(i)
country = row[1]
data[country] = {} #将国家设为data的键,对应的值设为另一个字典
count += 1
print(data)
#本次执行结果展示行数为14-23行数据,i<14时count+=1不执行,此时count=0
- 设置country键对应值(以字典形式存储)
import xlrd
book = xlrd.open_workbook('E:/tests/2022-01/SOWC 2014 Stat Tables_Table 9.xlsx')
sheet = book.sheet_by_name('Table 9 ')
data = {}
for i in range(14, sheet.nrows):
row = sheet.row_values(i)
country = row[1]
data[country] = {'child_labor': {'total': [],'male': [],'female': []},
'child_marriage': {'married_by_15': [],'married_by_18': []}
}
print (data['Afghanistan'])
#data[country]具体数据用索引获得
data[country] = {'child_labor': {'total': [row[4], row[5]],'male': [row[6], row[7]],'female': [row[8], row[9]]},
'child_marriage': {'married_by_15': [row[10], row[11]],'married_by_18': [row[12], row[13]]}
}
#pprint按输出数据的结构展示
import pprint
pprint.pprint(data)
- 上述代码整理
import xlrd
book = xlrd.open_workbook('E:/tests/2022-01/SOWC 2014 Stat Tables_Table 9.xlsx')
sheet = book.sheet_by_name('Table 9 ')
data = {}
for i in range(14, sheet.nrows):
row = sheet.row_values(i)
country = row[1]
data[country] = {'child_labor': {'total': [row[4], row[5]],'male': [row[6], row[7]],'female': [row[8], row[9]]},
'child_marriage': {'married_by_15': [row[10], row[11]],'married_by_18': [row[12], row[13]]}
}
if country == 'Zimbabwe':
break
import pprint
pprint.pprint(data)
python 判断excel文件是否存在
Filepath = 'E:/文件路径/'
for i in range(1,8):
Filename = Filepath + '%d'%(i)+'.xlsx' #excel文件名
if os.path.exists(Filename) == False:
writer = pd.ExcelWriter(Filename)
writer.save()
writer.close()