爬虫(1)——获取邮编,获取股票(实例)

import requests
from xml.parsers.expat import ParserCreate 
#xml- extensible markup language 用来数据传输

class DefaultSaxHandler(object):
    def __init__(self,provinces):
        self.provinces = provinces
        
    def start_element(self,name,attrs):
        if name != 'map':
            name = attrs['title']
            number = attrs['href']
            self.provinces.append((name,number))
            
    def end_element(self,name):
        pass
    def char_data(self,text):
        pass

def get_province_entry(url):
    #GB2312是字符编码名称,属简体中文编码的一种
    #get text,GB2312 decode
    content = requests.get(url).content.decode('gb2312')
    start = content.find('<map name=\"map_86\" id=\"map_86\">')
    end = content.find('</map>')
    content = content[start:end+len('<\map>')].strip()
    
    provinces = []
    #create SaxHandler
    handler = DefaultSaxHandler(provinces)
    # initiat parser
    parser = ParserCreate()
    parser.StartElementHandler = handler.start_element
    parser.EndElementHandler = handler.end_element
    parser.CharacterDataHandler = handler.char_data
    
    #parse data
    parser.Parse(content)
    return provinces

provinces = get_province_entry('http://www.ip138.com/post')
print(provinces)

[(‘新疆’, ‘/83/’), (‘西藏’, ‘/85/’), (‘青海’, ‘/81/’), (‘甘肃’, ‘/73/’), (‘四川’, ‘/61/’), (‘云南’, ‘/65/’), (‘宁夏’, ‘/75/’), (‘内蒙古’, ‘/01/’), (‘黑龙江’, ‘/15/’), (‘吉林’, ‘/13/’), (‘辽宁’, ‘/11/’), (‘河北’, ‘/50/’), (‘北京’, ‘/10/’), (‘天津’, ‘/30/’), (‘陕西’, ‘/71/’), (‘山西’, ‘/03/’), (‘山东’, ‘/25/’), (‘河南’, ‘/45/’), (‘重庆’, ‘/40/’), (‘湖北’, ‘/43/’), (‘安徽’, ‘/23/’), (‘江苏’, ‘/21/’), (‘上海’, ‘/20/’), (‘贵州’, ‘/55/’), (‘广西’, ‘/53/’), (‘湖南’, ‘/41/’), (‘江西’, ‘/33/’), (‘浙江’, ‘/31/’), (‘福建’, ‘/35/’), (‘广东’, ‘/51/’), (‘海南’, ‘/57/’), (‘台湾’, ‘/taiwang/’), (‘澳门’, ‘/aomen/’), (‘香港’, ‘/xianggang/’)]

import requests
import threading

def display_info(code):
    url = 'http://hq.sinajs.cn/list='+code
    response = requests.get(url).text
    print(response)

def single_thread(codes):
    for code in codes:
        code = code.strip()
        display_info(code)
def multi_thread(tasks):
    threads = [threading.Thread(target = single_thread,args = (codes,)) for codes in tasks]
    # 启动线程    
    for t in threads:
        t.start()
    # 等待线程结束        
    for t in threads:
        t.join()


if __name__ == '__main__':
    codes = ['sh600001','sh600002','sh600003','sh600004','sh600005','sh600006']
    thread_len=int(len(codes)/4)#这里用int, 否则做切片的时候会不支持
       
    # 计算每个线程要做多少工作
    t1 = codes[0:thread_len]
    t2 = codes[thread_len:thread_len*2]
    t3 = codes[thread_len*2:thread_len*3]
    t4 = codes[thread_len*4:]
    
    multi_thread([t1,t2,t3,t4])
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值