从test.xlsx中读入,通过限制条件进行筛选过滤后,再回写到test2.xls中。
import xlrd
import xlwt
data = xlrd.open_workbook('test.xlsx')
sheet_name = data.sheet_names()
sheet1_name = data.sheet_names()[0]
sheet1 = data.sheet_by_name(sheet1_name)
num_rows = sheet1.nrows
num_cols = sheet1.ncols
workbook = xlwt.Workbook()
sheet2 = workbook.add_sheet('Sheet1', cell_overwrite_ok=True)
sheet1_title = sheet1.row_values(0)
for i in range(len(sheet1_title)):
sheet2.write(0, i, sheet1_title[i])
j = 1
k = 0
for row in range(1, num_rows):
# 获取微博内容
blog_content = sheet1.cell_value(row, 3)
# 过滤长度
if len(blog_content) > 15:
continue
# 过滤’#‘号
flag1 = "#" in blog_content
if flag1:
continue
k = 0
for col in range(num_cols):
cell = sheet1.cell_value(row, col)
sheet2.write(j, k, cell)
k = k + 1
j = j + 1
workbook.save('test2.xls')
print('创建xls完成')
参考资料:
https://www.cnblogs.com/crazymagic/articles/9752287.html
https://www.cnblogs.com/tynam/p/11204895.html
https://blog.csdn.net/jcwang710448116/article/details/82424816
https://www.cnblogs.com/z-bear/p/9455136.html
https://zhuanlan.zhihu.com/p/93421672
https://www.jianshu.com/p/1e994a894586