豆瓣爬虫:根据tag 爬书籍,实现urllib、BeautifulSoup、导出excel
import time
import urllib
import urllib.parse
import numpy as np
from bs4 import BeautifulSoup
import random
from openpyxl import Workbook
hds = [{
'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}, \
{
'User-Agent': 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.12 Safari/535.11'}, \
{
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)'}]
def bookListByTags(tags):
return_list = []
for tag in tags:
page = 0