牛仔网股评

import requests,os,re,random,time
from bs4 import BeautifulSoup
def header():
    headers = [
    {"User-Agent": "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4%d2; .NET CLR 2.0.5%d27)"%(random.randint(1,100),random.randint(1,100))},
    {"User-Agent": "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.5%d7; Media Center PC 5.0; .NET CLR 3.0.0%d6)"%(random.randint(1,100),random.randint(1,100))},
    {"User-Agent": "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4%d; .NET CLR 2.0.5%d7)"%(random.randint(1,100),random.randint(1,100))},
    {"User-Agent": "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)"},
    {"User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.3%d9; .NET CLR 3.0.30729; .NET CLR 2.0.5%d7; Media Center PC 6.0)"%(random.randint(1,100),random.randint(1,100))},
    {"User-Agent": "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.5567; .NET CLR 3.5.3%d; .NET CLR 3.0.30729; .NET CLR 1.0.3%d; .NET CLR 1.1.4322)"%(random.randint(1,100),random.randint(1,100))},
    {"User-Agent": "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.5%d7; InfoPath.2; .NET CLR 3.0.0%d6.30)"%(random.randint(1,100),random.randint(1,100))},
    {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/4%d.3) Arora/0.3 (Change: 2%d c9dfb30)"%(random.randint(1,100),random.randint(1,100))},
    {"User-Agent": "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/4%d.3) Arora/0.%d"%(random.randint(1,100),random.randint(1,100))},
    {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1%d.2pre) Gecko/20%d15 K-Ninja/2.8"%(random.randint(1,100),random.randint(1,100))},
    {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20%d705 Firefox/3%d Kapiko/3.0"%(random.randint(1,100),random.randint(1,100))},
    {"User-Agent": "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5"},
    {"User-Agent": "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1%d.8-1.fc10 Kazehakase/0%d6"%(random.randint(1,100),random.randint(1,100))},
    {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.%d3.56 Safari/5%d1"%(random.randint(1,100),random.randint(1,100))},
    {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1%d6.7 Safari/5%d.20"%(random.randint(1,100),random.randint(1,100))},
    {"User-Agent": "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52"},
    {"User-Agent": "Mozilla/5.0 (Android; Mobile; rv:27.0) Gecko/27.0 Firefox/27.0"},
    {"User-Agent": "BlackBerry9700/5.0.0.862 Profile/MIDP-2.1 Configuration/CLDC-1.1 VendorID/331 UNTRUSTED/1.0 3gpp-gba"},
    {"User-Agent": "Mozilla/5.0 (BlackBerry; U; BlackBerry 9930; en-US) AppleWebKit/534.11+ (KHTML, like Gecko) Version/7.0.0.241 Mobile Safari/534.11+"},
    {"User-Agent": "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; zh-TW) AppleWebKit/534.8+ (KHTML, like Gecko) Version/6.0.0.448 Mobile Safari/534.8+"},
    {"User-Agent": "Mozilla/5.0 (Linux; U; Android 4.0.4; en-gb; GT-I9300 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"},
    {"User-Agent": "Mozilla/5.0 (Linux; U; Android 2.2; en-us; SCH-I800 Build/FROYO) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"},
    {"User-Agent": "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"},
    {"User-Agent": "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)"},
    {"User-Agent": "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0; Touch)"},
    {"User-Agent": "Mozilla/5.0(iPad; U; CPU iPhone OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B314 Safari/531.21.10"},
    {"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 5_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B179 Safari/7534.48.3"},
    {"User-Agent": "Opera/9.80 (J2ME/MIDP; Opera Mini/9.80 (J2ME/22.478; U; en) Presto/2.5.25 Version/10.54"},
    {"User-Agent": "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_3; en-us; Silk/1.1.0-80) AppleWebKit/533.16 (KHTML, like Gecko) Version/5.0 Safari/533.16 Silk-Accelerated=true"},
    {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.13+ (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2"},
    {"User-Agent": "Mozilla/5.0 (Linux; Android 4.4.4; Nexus 5 Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.117 Mobile Safari/537.36"},
    {"User-Agent": "Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166  Safari/535.19"},
    {"User-Agent": "Mozilla/5.0 (MeeGo; NokiaN9) AppleWebKit/534.13 (KHTML, like Gecko) NokiaBrowser/8.5.0 Mobile Safari/534.13"},
    {"User-Agent": "Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/12.0.024; Profile/MIDP-2.1 Configuration/CLDC-1.1; en-us) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.12344"},
    {"User-Agent": "Mozilla/5.0 (X11; U; Linux armv7l; no-NO; rv:1.9.2.3pre) Gecko/20100723 Firefox/3.5 Maemo Browser 1.7.4.8 RX-51 N900"},
    {"User-Agent": "Mozilla/5.0 (PlayBook; U; RIM Tablet OS 2.0.1; en-US) AppleWebKit/535.8+ (KHTML, like Gecko) Version/7.2.0.1 Safari/535.8+"},
    {"User-Agent": "Mozilla/5.0 (PLAYSTATION 3 4.60) AppleWebKit/531.22.8 (KHTML, like Gecko)"},
    {"User-Agent": "Mozilla/5.0 (PlayStation Vita 3.12) AppleWebKit/536.26 (KHTML, like Gecko) Silk/3.2"},
    {"User-Agent": "Mozilla/5.0 (Linux; Android 4.4.2; en-us; SAMSUNG SCH-I545 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Version/1.5 Chrome/28.0.1500.94 Mobile Safari/537.36"},
    {"User-Agent": "Mozilla/5.0 (Linux; Android 4.1.2; GT-I8190 Build/JZO54K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.117 Mobile Safari/537.36"},
    {"User-Agent": "Mozilla/5.0 (Linux; Android 4.4.2; en-gb; SAMSUNG SM-G900F Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Version/1.6 Chrome/28.0.1500.94 Mobile Safari/537.36"},
    {"User-Agent": "Mozilla/5.0 (SAMSUNG; SAMSUNG-GT-S8530/S8530DDLC2; U; Bada/2.0; en-us) AppleWebKit/534.20 (KHTML, like Gecko) Dolfin/3.0 Mobile WVGA SMM-MMS/1.2.0 OPN-B"},
    {"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:15.0) Gecko/20100101 Firefox/15.0.1"},
    {"User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"},
    {"User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; SAMSUNG; SGH-i917)"},
    {"User-Agent": "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; ARM; Trident/6.0)"},
    {"User-Agent": "Mozilla/5.0 (iPhone; U; CPU iPhone OS) (compatible; Googlebot-Mobile/2.1; http://www.google.com/bot.html)"},
    {"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"},
    {"User-Agent": "DoCoMo/2.0 N905i(c100;TB;W24H16) (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)"},
    {"User-Agent": "SAMSUNG-SGH-I617/UCHJ1 Mozilla/4.0 (compatible; MSIE 6.0; Windows CE; IEMobile 7.11)"},
    {"User-Agent": "Mozilla/4.0 (compatible; MSIE 6.0; Windows CE; IEMobile 8.12; MSIEMobile 6.0) 320x240; VZW; UTStar-XV6175.1; Windows Mobile 6.5 Standard;"},
    {"User-Agent": "Opera/9.80 (Android 2.3.3; Linux; Opera Mobi/ADR-1202011015; U; en) Presto/2.9.201 Version/11.50"},
    {"User-Agent": "Opera/9.80 (BREW; Opera Mini/5.0/27.2370; U; en) Presto/2.8.119 240X320 Samsung SCH-U380"},
    {"User-Agent": "Mozilla/5.0 (Windows; U; Win 9x 4.90; en-GB; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1"},
    {"User-Agent": "Mozilla/5.0 (X11; U; SunOS sun4u; en-US; rv:1.6) Gecko/20040503"},


]
    return random.choice(headers)


def parser(url,name):
    try:
        data=requests.get(url,headers=header()).content.decode('utf-8') 
        bs1=BeautifulSoup(data,'lxml')
        urls=bs1.find_all("h2","f24 fb")
        infos=bs1.find_all("p","list-info gray01")
        for i in range(len(urls)):
            title=urls[i].a.get_text()
            if  os.path.isfile('d://牛仔//%s.txt'%title):
                continue
            link=urls[i].a.get('href')
            info=infos[i].text
            title=info+','+title
            title=re.sub(r'\/|\\|\*|\>|\<|\?|\:|\"|\|','',title)
            cdata=requests.get(link,headers=header()).content.decode('utf-8')
            bs2=BeautifulSoup(cdata,'lxml')
            a=bs2.find("div","autoline").text
            print(info)
            with open('d://牛仔//%s//%s.txt'%(name,title),'w',errors='replace')as f:
                f.write(a)
        if re.match(r'^http://pinglun.9666.cn/author/\w+/$',url):
            wnum=bs1.find("h3","pb20")
            num=int(wnum.em.text)
            urlc=[url+'?pager.offset=%d0&pageNo=%d&pageSize=10'%(i,i+1) for i in range(1,num//10)]
            print(urlc)
            for i in urlc:
                parser(i,name)
        else:
            pass
    except:
        pass        


def Consumer():
    root_url='http://live.9666.cn/getBroadcastListByAZ/'
    data=requests.get(root_url,headers=header()).content.decode('utf-8')
    bs=BeautifulSoup(data,'lxml')
    list=bs.find("div","list")
    list.find_all("li")
    a=list.find_all("li")
    for i in a:
        name=i.a.get("title")
        slink=i.a.get("href")
        data=requests.get(slink,headers=header()).content.decode('utf-8')
        bsn=BeautifulSoup(data,'lxml')
        t='d://牛仔//%s'%name
        if not os.path.isdir(t): 
            os.mkdir(t)
            surl=re.search('http://pinglun.9666.cn/author/.*?/',data).group()
            yield name,surl


def produce(c):
    c.send(None)
    n = 0
    while n < 1000:
        n = n + 1
        print('开始')
        time.sleep(random.randint(1,12))
        name,start_url= c.send(n)
        print(start_url,name)
        parser(start_url,name)
    c.close()


c=Consumer()
produce(c)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值