import requests
from xml.parsers.expat import ParserCreate
#xml- extensible markup language 用来数据传输
class DefaultSaxHandler(object):
def __init__(self,provinces):
self.provinces = provinces
def start_element(self,name,attrs):
if name != 'map':
name = attrs['title']
number = attrs['href']
self.provinces.append((name,number))
def end_element(self,name):
pass
def char_data(self,text):
pass
def get_province_entry(url):
#GB2312是字符编码名称,属简体中文编码的一种
#get text,GB2312 decode
content = requests.get(url).content.decode('gb2312')
start = content.find('<map name=\"map_86\" id=\"map_86\">')
end = content.find('</map>')
content = content[start:end+len('<\map>')].strip()
provinces = []
#create SaxHandler
handler = DefaultSaxHandler(provinces)
# initiat parser
parser = ParserCreate()
parser.StartElementHandler = handler.start_element
parser.EndElementHandler = handler.end_element
parser.CharacterDataHandler = handler.char_data
#parse data
parser.Parse(content)
return provinces
provinces = get_province_entry('http://www.ip138.com/post')
print(provinces)
[(‘新疆’, ‘/83/’), (‘西藏’, ‘/85/’), (‘青海’, ‘/81/’), (‘甘肃’, ‘/73/’), (‘四川’, ‘/61/’), (‘云南’, ‘/65/’), (‘宁夏’, ‘/75/’), (‘内蒙古’, ‘/01/’), (‘黑龙江’, ‘/15/’), (‘吉林’, ‘/13/’), (‘辽宁’, ‘/11/’), (‘河北’, ‘/50/’), (‘北京’, ‘/10/’), (‘天津’, ‘/30/’), (‘陕西’, ‘/71/’), (‘山西’, ‘/03/’), (‘山东’, ‘/25/’), (‘河南’, ‘/45/’), (‘重庆’, ‘/40/’), (‘湖北’, ‘/43/’), (‘安徽’, ‘/23/’), (‘江苏’, ‘/21/’), (‘上海’, ‘/20/’), (‘贵州’, ‘/55/’), (‘广西’, ‘/53/’), (‘湖南’, ‘/41/’), (‘江西’, ‘/33/’), (‘浙江’, ‘/31/’), (‘福建’, ‘/35/’), (‘广东’, ‘/51/’), (‘海南’, ‘/57/’), (‘台湾’, ‘/taiwang/’), (‘澳门’, ‘/aomen/’), (‘香港’, ‘/xianggang/’)]
import requests
import threading
def display_info(code):
url = 'http://hq.sinajs.cn/list='+code
response = requests.get(url).text
print(response)
def single_thread(codes):
for code in codes:
code = code.strip()
display_info(code)
def multi_thread(tasks):
threads = [threading.Thread(target = single_thread,args = (codes,)) for codes in tasks]
# 启动线程
for t in threads:
t.start()
# 等待线程结束
for t in threads:
t.join()
if __name__ == '__main__':
codes = ['sh600001','sh600002','sh600003','sh600004','sh600005','sh600006']
thread_len=int(len(codes)/4)#这里用int, 否则做切片的时候会不支持
# 计算每个线程要做多少工作
t1 = codes[0:thread_len]
t2 = codes[thread_len:thread_len*2]
t3 = codes[thread_len*2:thread_len*3]
t4 = codes[thread_len*4:]
multi_thread([t1,t2,t3,t4])