雪球网

import requests
import json
import pymysql

class mysql_conn(object):
    # 魔术方法, 初始化, 构造函数
    def __init__(self):
        self.db = pymysql.connect(host='127.0.0.1',user = 'root',password = '123456',port = 3306,database='py11_mysql')
        self.cursor = self.db.cursor()
    # 执行modify(修改)相关的操作
    def execute_modify_mysql(self, sql):
        self.cursor.execute(sql)
        self.db.commit()
    # 魔术方法, 析构化 ,析构函数
    def __del__(self):
        self.cursor.close()
        self.db.close()





def xueqiu(sql):
    lists = {}
    lists['page'] = '-1'
    i = 0
    while i <= sql:
        # 伪装headers
        headers = {
            'Cookie':'aliyungf_tc=AQAAAOOwalcrOAkAufryckXjyKnpJi7J; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; u=401534335935345; device_id=e6cadb56338d7e4620a72bf0a6cd197e',
            'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36',
        }
        # url
        url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id='+lists["page"]+'&count=10&category=105'

        # 提交url和headers
        response = requests.get(url,headers=headers)
        # print(response.text)
        # 转换为字典格式
        res_json = json.loads(response.text)

        # 根据list取到值
        res_list = res_json['list']
        lists['page'] = str(res_json['next_max_id'])

        my = mysql_conn()
        # 遍历 res_list
        for list_item_dict in res_list:
            # list 列表内的一个item, 他是一个dict
            data_str = list_item_dict['data']
            data_dict = json.loads(data_str)
            data = {}
            data['id'] = data_dict['id']
            data['title'] = pymysql.escape_string(data_dict['title'])
            data['description'] = pymysql.escape_string(data_dict['description'])
            data['target'] = pymysql.escape_string(data_dict['target'])
            # print(data)
        #  执行sql语句

            my.execute_modify_mysql('insert into xueqiu(uid,title,description,target) values ("{id}","{title}","{description}","{target}")'.format(**data))
        i += 1

xueqiu(3)
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是使用Python爬虫实现球网沪深港美股情爬取的方法: 1. 首先,我们需要安装requests和beautifulsoup4库。可以使用以下命令进行安装: ```shell pip install requests pip install beautifulsoup4 ``` 2. 导入所需库: ```python import requests from bs4 import BeautifulSoup ``` 3. 构造请求头和URL: ```python headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'} url = 'https://xueqiu.com/hq#exchange=CN&firstName=1&secondName=1_0' ``` 4. 发送请求并解析HTML: ```python response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, 'html.parser') ``` 5. 提取所需数据: ```python # 沪深股市 hs_stocks = soup.select('#table_wrapper-table > div > table > tbody > tr') for stock in hs_stocks: name = stock.select('td')[0].text price = stock.select('td')[1].text print(name, price) # 港股 hk_stocks = soup.select('#table_wrapper-table-HK > div > table > tbody > tr') for stock in hk_stocks: name = stock.select('td')[0].text price = stock.select('td')[1].text print(name, price) # 美股 us_stocks = soup.select('#table_wrapper-table-US > div > table > tbody > tr') for stock in us_stocks: name = stock.select('td')[0].text price = stock.select('td')[1].text print(name, price) ``` 6. 完整代码: ```python import requests from bs4 import BeautifulSoup headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'} url = 'https://xueqiu.com/hq#exchange=CN&firstName=1&secondName=1_0' response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, 'html.parser') # 沪深股市 hs_stocks = soup.select('#table_wrapper-table > div > table > tbody > tr') for stock in hs_stocks: name = stock.select('td')[0].text price = stock.select('td')[1].text print(name, price) # 港股 hk_stocks = soup.select('#table_wrapper-table-HK > div > table > tbody > tr') for stock in hk_stocks: name = stock.select('td')[0].text price = stock.select('td')[1].text print(name, price) # 美股 us_stocks = soup.select('#table_wrapper-table-US > div > table > tbody > tr') for stock in us_stocks: name = stock.select('td')[0].text price = stock.select('td')[1].text print(name, price) ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值