操作excel
后缀为xlsx,不支持xls
import openpyxl
# coding=utf-8
import requests, re, pymongo, json, threading, queue
from bs4 import BeautifulSoup
client = pymongo.MongoClient('127.0.0.1', 27017)
db = client.alexa
collection = db.total_excel
workbook = openpyxl.load_workbook('20200516.xlsx')
worksheet = workbook.get_sheet_by_name('have_email_uv')
row3=[item.value for item in list(worksheet.rows)[1]]
for i in range(2,664):
row = [item.value for item in list(worksheet.rows)[i]]
url = row[0]
title = row[1]
pv = row[2]
uv = row[3]
email = row[4]
url = re.sub('/.*','',url)
email2 = re.sub('[\u4e00-\u9fa5]|䣺| º|å|º|º','',email)
# print('url', url)
# print('title', title)
# print('pv',pv)
# print('uv',uv)
# print('email',email)
print('email2',email2)
# collection.update({'url': url},
# {'$set': {'title': title, 'email': email2, 'pv': pv, 'uv': uv
#
# }}, upsert=True)