python爬取上海期货交易所数据

最新推荐文章于 2024-04-01 14:44:30 发布

runner668

最新推荐文章于 2024-04-01 14:44:30 发布

阅读量2w

点赞数 3

分类专栏： python

本文链接：https://blog.csdn.net/runner668/article/details/80622288

版权

一：爬虫的常规方法

爬虫的常用套路是table-tr（行）-th/td（元素）

'''
Created on Feb 28, 2017


@author: hcq908
'''
import csv
import os
# import re
from urllib.request import urlopen
from bs4 import BeautifulSoup


if __name__ == '__main__':
    iCntTable = 0;
    html = urlopen("https://en.wikipedia.org/wiki/Comparison_of_text_editors")
    #html = urlopen("http://www.shfe.com.cn/bourseService/businessdata/summaryinquiry/index.html?paramid=trading_daily")
    bsObj = BeautifulSoup(html, "html.parser")
    oTables = bsObj.find_all("table")#选定第一个表格
    for table in oTables:
        iCntTable  =iCntTable + 1;
        print('处理第%d个表格 \n'%iCntTable)
        #获取表格名称
        #sTitleTag = table.find('caption');#标题只有一个，注意有的没有标题等
        #print(sTitleTag)
    #     sMatchText = re.compile(r'<[^>]+>', re.S)
    #     sTextRemain = sMatchText.sub('', sTitleTag)
        sTitleName=  chr(iCntTable)+'.csv';
        
        #路劲不存在是需要新建
        sDir = './files';
        if not os.path.exists(s