import pandas as pd import matplotlib.pyplot as plt import matplotlib.dates as mdates # 读取数据 laos_data = pd.read_excel("A.xlsx") indonesia_data = pd.read_excel("B.xlsx") # 确保日期列是datetime类型 laos_data['时间'] = pd.to_datetime(A_data['时间']) indonesia_data['时间'] = pd.to_datetime(B_data['时间']) # 根据日期对文章数量进行分组统计,按季度采样 laos_article_count = A_data.groupby(laos_data['时间'].dt.to_period('Q')).size() indonesia_article_count = Bdata.groupby(indonesia_data['时间'].dt.to_period('Q')).size() # 获取季度第一个月和最后一个月的日期 A_dates = A_article_count.index.to_timestamp() B_dates = A_article_count.index.to_timestamp() + pd.DateOffset(months=2) # 绘制对比图表 plt.figure(figsize=(12, 6)) plt.plot(A_dates, A_article_count.values, marker='o', label='A News') plt.plot(B_dates, B_article_count.values, marker='o', label='B News') plt.xlabel("Quarter") plt.ylabel("Number of Articles") plt.title("Comparison of Article Publications between A News and B News") plt.legend() # 在每个数据点上显示标签数量 for i, txt in enumerate(A_article_count.values): plt.annotate(txt, (A_dates[i], A_article_count.values[i]), textcoords="offset points", xytext=(0, 10), ha='center') for i, txt in enumerate(B_article_count.values): plt.annotate(txt, (B_dates[i], B_article_count.values[i]), textcoords="offset points", xytext=(0, 10), ha='center') # 格式化季度显示为年份和季度的组合 ax = plt.gca() ax.xaxis.set_major_locator(mdates.YearLocator()) ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%b')) plt.xticks(rotation=45) plt.tight_layout() plt.show()
两个新闻源发布新闻时间折线图对比
最新推荐文章于 2024-06-10 22:18:41 发布