因为制作B2b网站需要,需要入库企业信息数据。所以目光锁定企查查数据,废话不多说,开干!
#-*- coding-8 -*-
import requests
import lxml
import sys
from bs4 import BeautifulSoup
import xlwt
import time
import urllib
def craw(url,key_word,x):
User_Agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0'
# if x == 0:
# re = 'http://www.qichacha.com/search?key='+key_word
# else:
# re = 'https://www.qichacha.com/search?key={}#p:{}&'.format(key_word,x-1)
re = r'https://www.qichacha.com/search?key='+key_word
headers = {
'Host':'www.qichacha.com',
'Connection': 'keep-alive',
'Accept':r'text/html, */*; q=0.01',
'X-Requested-With': 'XMLHttpRequest',
'User-Agent':r'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
'Referer': re,
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'