guardian_df['date'] = pd.to_datetime(guardian_df['date'])
keyword_tags = guardian_df.columns[4:-2] # Assuming columns from 5th to the second-to-last are keyword tags
weekly_date_ranges = pd.date_range(guardian_df['date'].min(), guardian_df['date'].max() + timedelta(days=6 - guardian_df['date'].max().weekday()), freq='W-Sun')
X = pd.DataFrame(index=weekly_date_ranges, columns=list(keyword_tags) + ['positive_title', 'negative_title'])
for week_start in weekly_date_ranges:
week_end = week_start + timedelta(days=6)
week_data = guardian_df[(guardian_df['date'] >= week_start) & (guardian_df['date'] <= week_end)]
weekly_frequencies = week_data[list(keyword_tags) + ['positive_title', 'negative_title']].sum()
X.loc[week_start] = weekly_frequencies
X_ord = X[X.sum().sort_values(ascending=False).index]
print(X_ord.tail(2))
q4_1....第一版
最新推荐文章于 2024-10-04 20:08:50 发布