美股恐惧贪婪指数监控-CSDN博客

本文链接：https://blog.csdn.net/SJshenjian/article/details/146884557

1. 创建索引

PUT fear_greed_stock
{
  "mappings": {
    "dynamic": "strict",
    "properties": {
      "c_date": {
        "type": "date",
        "format": "yyyy-MM-dd"
      },
      "val": {
        "type": "float"
      },
      "tag": {
        "type": "keyword"
      },
      "type": {
        "type": "keyword"
      },
      "previous_close": {
        "type": "float"
      },
      "previous_1_week": {
        "type": "float"
      },
      "previous_1_month": {
        "type": "float"
      },
      "previous_1_year": {
        "type": "float"
      },
      "metric_value": {
        "type": "float"
      }
    }
  }
}

2. Python核心代码

import requests
import json
from datetime import datetime
from elasticsearch import helpers
import time
import logging
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

from utils.datautils import get_unique_id
from utils.esutils import es_client

# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# 索引名称
INDEX_NAME = "fear_greed_stock"


def parse_timestamp(timestamp):
    """解析时间戳，支持字符串和毫秒时间戳格式"""
    try:
        if isinstance(timestamp, str):
            return datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S+00:00").strftime("%Y-%m-%d")
        else:
            return datetime.fromtimestamp(int(timestamp) / 1000).strftime("%Y-%m-%d")
    except Exception as e:
        logger.error(f"时间戳解析失败: {timestamp}, 错误: {e}")
        return None


def process_fear_greed_data(data, es_client, index_name=INDEX_NAME):
    """处理Fear and Greed Index数据并存储到Elasticsearch"""
    actions = []
    processed_count = 0

    try:
        # 提取总体指数
        fear_greed = data.get('fear_and_greed', {})
        if not fear_greed:
            logger.warning("未找到fear_and_greed数据")
            return processed_count

        # 解析总体指数
        score = fear_greed.get('score')
        rating = fear_greed.get('rating')
        timestamp = fear_greed.get('timestamp')
        previous_close = fear_greed.get('previous_close')
        previous_1_week = fear_greed.get('previous_1_week')
        previous_1_month = fear_greed.get('previous_1_month')
        previous_1_year = fear_greed.get('previous_1_year')

        date = parse_timestamp(timestamp)
        if not date:
            logger.warning("无效的时间戳，跳过记录")
            return processed_count

        # 构造总体指数记录
        info = {
            "c_date": date,
            "val": score,
            "tag": rating,
            "previous_close": previous_close,
            "previous_1_week": previous_1_week,
            "previous_1_month": previous_1_month,
            "previous_1_year": previous_1_year,
            "type": "overall"
        }

        action = {
            "_op_type": "index",
            "_index": index_name,
            "_id": get_unique_id(f"{date}_overall"),
            "_source": info
        }
        actions.append(action)
        processed_count += 1

        # 提取子指标
        sub_metrics = [
            "market_momentum_sp500", "market_momentum_sp125", "stock_price_strength",
            "stock_price_breadth", "put_call_options", "market_volatility_vix",
            "market_volatility_vix_50", "junk_bond_demand", "safe_haven_demand"
        ]

        for metric in sub_metrics:
            metric_data = data.get(metric, {})
            metric_timestamp = metric_data.get('timestamp')
            metric_score = metric_data.get('score')
            metric_rating = metric_data.get('rating')
            metric_value = metric_data.get('data', [{}])[0].get('y') if metric_data.get('data') else None

            metric_date = parse_timestamp(metric_timestamp)
            if not metric_date:
                logger.warning(f"{metric} 时间戳无效，跳过")
                continue

            metric_info = {
                "c_date": metric_date,
                "val": metric_score,
                "tag": metric_rating,
                "metric_value": metric_value,
                "type": metric
            }

            action = {
                "_op_type": "index",
                "_index": index_name,
                "_id": get_unique_id(f"{metric_date}_{metric}"),
                "_source": metric_info
            }
            actions.append(action)
            processed_count += 1

            # 批量写入
            if len(actions) >= 5000:
                try:
                    helpers.bulk(es_client, actions)
                    logger.info(f"已处理 {processed_count} 条数据")
                    actions.clear()
                    time.sleep(0.5)  # 每次批量操作后休眠0.5秒
                except Exception as e:
                    logger.error(f"批量写入失败: {e}")
                    time.sleep(2)  # 失败后稍长休眠

        # 处理剩余数据
        if actions:
            try:
                helpers.bulk(es_client, actions)
                logger.info(f"已处理 {processed_count} 条数据")
            except Exception as e:
                logger.error(f"剩余数据写入失败: {e}")

    except Exception as e:
        logger.error(f"数据处理失败: {e}")

    return processed_count


def do():
    # 主流程
    try:
        date = datetime.now().strftime("%Y-%m-%d")
        url = f"https://production.dataviz.cnn.io/index/fearandgreed/graphdata/{date}"

        # 设置请求头，模拟浏览器
        headers = {
            "Accept": "application/json",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        }

        # 设置代理（可选）
        proxies = {
            "http": "http://127.0.0.1:7890",
            "https": "http://127.0.0.1:7890"
        }

        # 配置请求重试机制
        session = requests.Session()
        retries = Retry(total=3, backoff_factor=1, status_forcelist=[418, 429, 500, 502, 503, 504])
        session.mount("https://", HTTPAdapter(max_retries=retries))

        # 发送请求
        response = session.get(url, headers=headers, proxies=proxies, timeout=10)
        response.raise_for_status()  # 检查请求是否成功

        # 解析数据
        data = response.json()
        processed_count = process_fear_greed_data(data, es_client, INDEX_NAME)
        logger.info(f"总共处理 {processed_count} 条数据")

    except requests.exceptions.HTTPError as e:
        if response.status_code == 418:
            logger.error("遇到418错误：服务器检测到爬虫行为，建议更换代理或增加延迟后重试")
        else:
            logger.error(f"请求失败: {e}")
    except requests.exceptions.RequestException as e:
        logger.error(f"请求过程中发生错误: {e}")
    finally:
        session.close()