from selenium import webdriver
import requests
import re
import xlwt
import time
class getSku():
def __init__(self):
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
self.getUrl()
print('*************************************************************************')
sheetName = input('操作结束,输入sku包名字:')
sku_sheet = []
name_sheet = [sheetName]
while True:
self.url = self.browser.current_url
tem_list = []
print('抓取中,请稍后。。。')
try:
for i in range(1, 8):
time.sleep(0.5)
data = {
'page': '%d' % i
}
req = requests.get(self.url, headers=self.headers, params=data)
tem_sku = re.findall(r'li data-sku="(\d+)', req.text)
for j in tem_sku:
tem_list.append(j)
except:
pass
if len(tem_list)>200:
tem_list = tem_list[:200]
sku_sheet.append(tem_list)
inputName = input('抓取完成,如需继续请在操作后输入sku包名称,否则输入 1:')
if inputName == '1':
break
else:
name_sheet.append(inputName)
self.createExcel(name_sheet, sku_sheet)
self.browser.quit()
print('生成结束!')
a = input()
def getUrl(self):
self.browser = webdriver.Chrome()
self.browser.get('https://www.jd.com/')
self.browser.maximize_window()
def createExcel(self, name_sheet, sku_sheet):
workbook = xlwt.Workbook(encoding='utf-8')
worksheet = workbook.add_sheet('Sheet1')
for i in range(len(sku_sheet)):
worksheet.write(0, i, name_sheet[i])
for j in range(len(sku_sheet[i])):
worksheet.write(j+1, i, int(sku_sheet[i][j]))
workbook.save('sku包.xls')
def main():
aaa = getSku()
if __name__ == '__main__':
main()