python3_股票信息爬取

股票信息爬取

交易信息爬取

程序结构设计

步骤1:从东方财富网获得股票代码 http://quote.eastmoney.com/stock_list.html
步骤2:根据股票代码列表,从百度股票网获取股票信息 http://gupiao.baidu.com/stock/
步骤3:股票信息存入数据库

技术路线:

requests–>Beautiful|re–>mysql

结构框架

import requests
import re
from bs4 import BeautifulSoup
import pymysql

def gethtmltext(url):	#获取网页代码
    return r.text

def getstocklist(url,lst):	#分析网页代码,返回股票代码list(如:sh000001)
    return lit

def getstockinfo(html):		#分析网页代码,返回股票详细信息
    return a,b
    
def savemysql(table,data_attrs,data_values): 保存数据库
	cursor.execute(sql)
    conn.commit()

def main():
    getstocklist(url,lis)
    url=start_url+lt+'.html'
    html = gethtmltext(url)
    col_name,row_value=getstockinfo(html)
    savemysql(table='stock_info',data_attrs=col_name,data_values=row_value)

main()

获取股票信息

import requests
import re
from bs4 import BeautifulSoup
import pymysql

def gethtmltext(url):
    try:
        r=requests.get(url)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        return r.text
    except:
        return ''

def getstocklist(url,lst):
    html=gethtmltext(url)
    demo=BeautifulSoup(html,'html.parser')
    a=demo.find_all('a')
    for i in a:
        try:
            href = i.attrs['href']
            lst.append(re.findall(r'[s][hz]\d{6}', href)[0])
        except:
            continue

def getstockinfo(html):
    soup=BeautifulSoup(html,'html.parser')
    div=soup.find('div',attrs={'class':'stock-bets'})
    stock_name=div.find_all(attrs={'class':'bets-name'})[0].text.split()[0]
    stock_no=re.findall(r'[s][hz]\d{6}',div.a.attrs['href'])[0]
    stock_info_name = div.find_all('dt')
    stock_info_value = div.find_all('dd')
    col_name='股票名称'+','+'股票代码'+','
    row_value="'"+str(stock_name)+"','"+str(stock_no)+"',"
    for i in range(len(stock_info_name)):
        try:
            col_name=col_name+stock_info_name[i].text+','
            row_value=row_value+"'"+str.strip(stock_info_value[i].string)+"',"
        except:
            continue
    return col_name,row_value

def savemysql(table,data_attrs,data_values):
    conn = pymysql.connect(host='localhost', user='root', passwd='xxxx',  charset='utf8', database='gethtml')
    cursor=conn.cursor()
    try:
        sql="insert into %s(%s) values(%s)" %(table,data_attrs[:-1],data_values[:-1])
        cursor.execute(sql)
        conn.commit()
    except:
        print("insert into %s error:" % table,sql)
    cursor.close()
    conn.close()


def create_table(table_name,url):
    conn=pymysql.connect(host='localhost',user='root',passwd='xxxx',charset='utf8',database='gethtml')
    cursor=conn.cursor()
    html = gethtmltext(url)
    col_name,row_name=getstockinfo(html)
    sql="create table %s(%s) charset='utf8mb4'" %(table_name,re.sub(r'\,',' varchar(20) ,',col_name)[:-1])
    try:
        cursor.execute(sql)
        conn.commit()
    except:
        print(sql)
        print('create table error')
    cursor.close()
    conn.close()


def main():
    print('开始爬取数据:stock_info from baidu.com/stock')
    url='http://quote.eastmoney.com/stock_list.html'
    start_url='http://gupiao.baidu.com/stock/'
    lis=[]
    getstocklist(url,lis)
    #判断数据库是否安装
    # create_table('stock_info','https://gupiao.baidu.com/stock/sz300687.html')
    count=1
    for lt in lis:
        url=start_url+lt+'.html'
        html = gethtmltext(url)
        if len(html)<10000:
            continue
        print('\r当前速度:%s/%s'%(count ,len(lis)), end='')
        col_name,row_value=getstockinfo(html)
        savemysql(table='stock_info',data_attrs=col_name,data_values=row_value)
        count=count+1
main()

代码还需要进一步优化
再接再厉

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值