老鱼Python数据分析——篇七:将爬取的数据保存到CSV文件

使用Python操作CSV文件非常方便,三行代码搞定。

一、向文件中追加数据

import csv

# 将数据写入文件
with open("d:\\data.csv", "a", newline="") as cf:
    w = csv.writer(cf)
    w.writerow([1001, "北京"])
    w.writerow([1002, "上海"])
    w.writerow([1003, "广州"])
    cf.close()

结果:

 

二、从文件中读取数据

# 将数据从文件读出
with open("d:\\data.csv", "r") as cf:
    d = csv.reader(cf)
    for row in d:
        print(row)

结果:

 

三、如果将代码多执行一次,就会变成这样:

 所以最好判断文件是否存在,如果存在则删除,代码如下:

import csv
import os

# 判断文件是否存在
if os.path.exists("d:\\data.csv"):
    os.remove("d:\\data.csv")
    
# 将数据写入文件
with open("d:\\data.csv", "a", newline="") as cf:
    w = csv.writer(cf)
    w.writerow([1001, "北京"])
    w.writerow([1002, "上海"])
    w.writerow([1003, "广州"])
    cf.close()

# 将数据从文件读出
with open("d:\\data.csv", "r") as cf:
    d = csv.reader(cf)
    for row in d:
        print(row)

OK,现在已经学会CSV文件的操作了,将从网页爬到的动态数据保存到CSV文件完整代码如下:

# 先安装selenium,再安装chromeDriver
from selenium import webdriver
from bs4 import BeautifulSoup
import time
import csv
from Items import FuPanData
import os

def saveFuPan():
    try:
        browser = webdriver.Chrome()
        browser.get("https://xuangubao.cn/dingpan")
        page = browser.page_source
        soup = BeautifulSoup(page,"lxml")

        # 得到日期
        # today = soup.find("div", attrs={"class", "ban-chart-date-container"}).find_all("p")
        # print(today[0].get_text().strip(),today[1].get_text().strip(),"日")
        date = time.strftime('%Y-%m-%d',time.localtime(time.time()))
        print(date)
        # 查询涨跌数据
        spans = soup.find("div", attrs={"class":"ban-chart"}).find_all("span")
        up = spans[3].get_text().strip()
        down = spans[4].get_text().strip()
        limitUp = spans[6].get_text().strip()
        limitDown = spans[7].get_text().strip()
        bomb = spans[8].get_text().strip()
        print("涨:",up)
        print("跌:",down)
        print("涨停:",limitUp)
        print("跌停:",limitDown)
        print("炸板率:", bomb)

        # 创建每天复盘数据对象
        todayData = FuPanData(date, up, down,limitUp, limitDown, bomb, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)


        # 连板个股相关操作
        listCount = []  # 连板个数
        guList = soup.find("table", attrs={"class", "table hit-pool__table"}).find_all("tr")

        # 判断当天连板个股是否保存
        if os.path.exists("d:\\" + date + ".csv"):
            os.remove("d:\\" + date + ".csv")

        # 循环读取连板个股
        for gu in guList[1:]:
            tds = gu.find_all("td")
            guName = tds[1].find_all("span")[0].get_text().strip()
            guCode = tds[1].find_all("a")[0].get_text().strip()[-6:]
            # print(guName,"(",guCode,")",":",tds[12].get_text().strip())
            listCount.append(tds[12].get_text().strip())  # 将连接数据保存到list
            # 将个股保存到CSV文件
            if tds[12].get_text().strip()!="首板":
                with open("d:\\"+date+".csv", "a", newline="") as apdFile:
                    w = csv.writer(apdFile)
                    w.writerow([guName,guCode,tds[12].get_text().strip()])

        # 显示不同连板的个数
        for i in set(listCount):
            print("{0}:{1}".format(i, listCount.count(i)))
            if i == "首板":
                todayData.ban1 = listCount.count(i)
            elif i =="2连板":
                todayData.ban2 = listCount.count(i)
            elif i =="3连板":
                todayData.ban3 = listCount.count(i)
            elif i =="4连板":
                todayData.ban4 = listCount.count(i)
            elif i =="5连板":
                todayData.ban5 = listCount.count(i)
            elif i =="6连板":
                todayData.ban6 = listCount.count(i)
            elif i =="7连板":
                todayData.ban7 = listCount.count(i)
            elif i =="8连板":
                todayData.ban8 = listCount.count(i)
            elif i =="9连板":
                todayData.ban9 = listCount.count(i)
            elif i =="10连板":
                todayData.ban10 = listCount.count(i)
            else:
                todayData.ban10s = listCount.count(i)


        # 判断是否保存过数据 state为True表示已保存过
        with open("d:\\dapanData.csv", "r") as csvfile:
            line = csvfile.readlines()[-1]
            d = line.split(",")[0]
            state = False
            # 判断是否有历史数据,如果有历史数据,是否有今天的数据
            if(len(d.strip())>0):
                state = int(date.split("-")[0])==int(d.split("/")[0]) and \
                        int(date.split("-")[1])==int(d.split("/")[1]) and \
                        int(date.split("-")[2])==int(d.split("/")[2])
            # 将数据保存到csv文件
            if not state:
                dapanData = [todayData.date, todayData.up, todayData.down, todayData.limitUp, todayData.limitDown, todayData.bomb,
                             todayData.ban1, todayData.ban2, todayData.ban3, todayData.ban4, todayData.ban5, todayData.ban6,
                             todayData.ban7, todayData.ban8, todayData.ban9, todayData.ban10, todayData.ban10s]
                # print(dapanData)
                with open("d:\\dapanData.csv", "a", newline="") as apdFile:
                    w = csv.writer(apdFile)
                    w.writerow(dapanData)
    except Exception as e:
        print("出错了")
        print ("str(e):\t", str(e))
        browser.close()
    else:
        print("今天数据已保存")
        browser.close()


if __name__ == "__main__":
    saveFuPan()

结果如下:

   

数据完美保存,谢谢大家的阅读,欢迎留言交流!

 

评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值