中国邮政编码
http://www.yb21.cn
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import json
from lxml import etree
from multiprocessing import Manager, cpu_count, Pool
import requests
from urllib.parse import urljoin
import pandas as pd
from datetime import datetime
import time
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'
}
class PostSpider(object):
url = "http://www.yb21.cn"
def index_page(self, url_queue):
res = requests.get(self.url, headers=headers)
res.encoding = "gbk"
html = etree.HTML(res.text)
city_href = html.xpath("//a/@href")
for href in city_href:
url_queue.put(urljoin(self.url, href))
def spider(self, url_queue,