前一篇,说了如何采集微博热搜数据并存储,这篇就讲讲如何将采集到的热搜数据视频化。新年新气象,更新一波。PS:这号鸽到都年更了(`・ω・´)
基本想法就是将热度数值确定一个椭圆的数据,将热搜词条放在一张图里,将不同时间的热搜数据图片做成视频,这样就可以直观地看到热搜的变化趋势,再添加BGM就成了。
Talk is cheap, show me the code
# -*- coding: utf-8 -*-
import pandas as pd
import random
from tqdm import tqdm
import datetime
import time
import math
import numpy as np
import pygame #采用pygame绘制图片
import sys
from pygame.locals import *
import cv2
from PIL import ImageGrab #截屏获取图片形成视频
import moviepy.editor as mpe
import My_Email as my
class WeiboHotnessVideo:
def __init__(self, date):
'''
初始化对象
Parameters
----------
date : str
日期,如20200202.
Returns
-------
None.
'''
self.folder_path = r'XXXXXXXXXXXX\weibo_hotness'
self.date = date #当前天日期
self.next_date = datetime.date(int(date[:4]), int(date[4:6]), int(date[-2:])) + datetime.timedelta(days=1) #下一天日期
self.next_date = datetime.datetime.strftime(self.next_date, '%Y%m%d')
self.starttime = '0430' #每天的开始和结束时间
self.frame_number = 10 #每列数据间帧数
self.fps = 60 #pygame帧率
self.video_fps = 60 #生成的视频帧率
self.background_color = (255, 255, 255) #视频背景颜色
self.hotness_video() #数据视频化
self.add_BGM() #配音
def hotness_video(self):
'''
Returns
-------
None.
'''
all_frame_data = self.hotness_data_settle() #处理热度数据
#重置文件夹地址
self.folder_path = r'D:\weibo_hotness_video'
#录屏初始化
p = ImageGrab.grab()#获得当前屏幕
a,b=p.size#获得当前屏幕的大小
self.screen_width = a
self.screen_height = b
self.screen_ratio = self.screen_height/self.screen_width
fourcc = cv2.VideoWriter_fourcc(*'XVID')#编码格式
#输出文件命名为test.mp4,帧率为60,可以自己设置
video = cv2.VideoWriter(self.folder_path + '\VIDEO\%s.avi'%self.date, fourcc, self.video_fps, (a, b))
# pygame 初始化
pygame.init()
FPSClock = pygame.time.Clock()
screen = pygame.display.set_mode((self.screen_width, self.screen_height))
hotness_title_number = len(all_frame_data)
previous_frame_data = None
#制作片头
movie_start_text = '%s年%s月%s日微博热搜动态'%(self.date[:4], self.date[4:6], self.date[-2:])
#设置片头背景图片
background = pygame.image.load(self.folder_path + '\movie_start.jpg')
#确定文字格式和大小
my_font = pygame.font.Font(self.folder_path + r'\STXINGKA.TTF',120)
#新建文本图章
textImage = my_font.render(movie_start_text, True, (255,153,0))
#文本框尺寸
text_size_x, text_size_y = textImage.get_size()
for i in range(0, int(1.5*self.video_fps)):
screen.blit(background,(0,0)) #对齐的坐标
#绘制文本
pos_x = 0.5*(self.screen_width-text_size_x) #横坐标
pos_y = 0.5*(self.screen_height-text_size_y) #纵坐标
if i < self.video_fps:
visible_area = pygame.Rect(0,0, text_size_x*i/self.video_fps, text_size_y) #可见区域
else:
visible_area = pygame.Rect(0,0, text_size_x, text_size_y)
screen.blit(textImage, (pos_x, pos_y), visible_area)
pygame.display.update() #刷新显示
#录屏
im = ImageGrab.grab()
imm=cv2.cvtColor(np.array(im), cv2.COLOR_RGB2BGR)#转为opencv的BGR格式
video.write(imm)
#帧率
FPSClock.tick(self.fps)
# pygame.quit()
# sys.exit()
#循环跌打绘制每帧数据
print('开始循环跌打绘制每帧数据!!!')
for frame in tqdm(all_frame_data.columns):
for event in pygame.event.get():
if event.type == QUIT:
pygame.quit()
sys.exit()
#背景颜色为白色
screen.fill(self.background_color)
#当前帧数据
current_frame_data = all_frame_data[[frame]].sort_values(by = frame, ascending=True)
#当前帧时间
frame_time = current_frame_data.columns[0][:-2]
frame_time = frame_time[:4]+'/'+frame_time[4:6]+'/'+frame_time[6:8]+' ' + frame_time[8:10]+':'+frame_time[-2:]
#temp_previous_frame_data记录位置数据
temp_index = current_frame_data[current_frame_data[frame] > 0].index
temp_previous_frame_data = pd.DataFrame(index = temp_index, columns = ['pos_x','pos_y', 'size_x', 'size_y'])
for i in range(0, hotness_title_number):
text = current_frame_data.index[i] #热搜文本
hotness_number = current_frame_data.iloc[i, 0] #热度
if hotness_number == 0:
continue
#设置字体颜色,默认黑色
text_color = (0, 0, 0)
#设置图元颜色以及椭圆尺寸
if hotness_number < 1E4:
color = (0, 128, 0) #椭圆填充颜色
size_x = 40 #确定椭圆尺寸,size是椭圆的矩形尺寸
elif hotness_number >= 1E4 and hotness_number < 2E5:
R_color = int(round(((hotness_number-1E4)/1.9E5)*128))
color = (R_color, 128, 0)
size_x = round(math.sqrt(hotness_number/25000000)*self.screen_width, 2) #确定椭圆尺寸,size_x是椭圆的矩形长边尺寸
elif hotness_number >= 2E5 and hotness_number < 1E6:
R_color = 127 + int(round(((hotness_number-2E5)/8E5)*128))
color = (R_color, 128, 0)
size_x = round(math.sqrt(hotness_number/25000000)*self.screen_width, 2)
elif hotness_number >= 1E6 and hotness_number <5E6:
G_color = 128 - int(round(((hotness_number-1E6)/4E6)*128))
color = (255, G_color, 0)
size_x = round(math.sqrt(hotness_number/25000000)*self.screen_width, 2)
elif hotness_number >= 5E6 and hotness_number < 1.5E7:
R_color = 255 - int(round(((hotness_number-5E6)/1E7)*128))
color = (R_color, 0, 0)
size_x = round(math.sqrt(hotness_number/25000000)*self.screen_width, 2)
elif hotness_number >= 1.5E7 and hotness_number < 2E8:
R_color = 128 - int(round(((hotness_number-1.5E7)/1.85E8)*128))
color = (R_color, 0, 0)
text_color = (255,255,255)
size_x = round((0.775+math.sqrt(hotness_number/2E8))*self.screen_width, 2)
else:
color = (0, 0, 0)
text_color = (255,255,255)
size_x = round(2*self.screen_width, 2)
size_y = round(size_x*self.screen_ratio, 2)
#确定椭圆位置
if previous_frame_data is None:
pos_x, pos_y = self.calculate_localtion(temp_previous_frame_data, 0.8, size_x, size_y)
#创建previous_frame_data
temp_previous_frame_data.loc[text] = [pos_x, pos_y, size_x, size_y]
else:
#如果前一帧有则位置不变
if text in previous_frame_data.index:
pos_x = previous_frame_data.loc[text]['pos_x']
pos_y = previous_frame_data.loc[text]['pos_y']
else: #没有则随机位置
pos_x, pos_y = self.calculate_localtion(previous_frame_data, 0.8, size_x, size_y)
#temp_previous_frame_data记录位置数据
temp_previous_frame_data.loc[text] = [pos_x, pos_y, size_x, size_y]
#绘制椭圆
pygame.draw.ellipse(screen, color, [pos_x-0.5*size_x, pos_y-0.5*size_y, size_x, size_y])
pygame.draw.ellipse(screen, (0,0,0), [pos_x-0.5*size_x, pos_y-0.5*size_y, size_x, size_y], 3)
#绘制文字,首先根据长度切割文本
if len(text)<7: #根据字数划分text,长于7个字符则分割
text_top = None
text_center = text
text_bottom = None
else:
split = int(math.ceil(len(text)*3/7)) #中间字段字数
first = int(math.ceil(len(text)-split)/2)
text_top = text[:first]
text_center = text[first:first+split]
text_bottom = text[first+split:]
#根据热度和是否分割字符串确定字符显示大小
if text_top is None:
text_size = int(round(size_x/len(text)))
else:
if len(text) < 9:
text_size = int(round(0.6*size_x/len(text_center)))
elif len(text) < 11:
text_size = int(round(0.7*size_x/len(text_center)))
else:
text_size = int(round(size_x/len(text_center)))
#确定文字格式和大小
my_font = pygame.font.Font(self.folder_path + r'\STXINGKA.TTF',text_size)
#绘制文本
if text_top is not None:
#新建文本图章
textImage = my_font.render(text_top, True, text_color)
#文本框尺寸
text_size_x, text_size_y = textImage.get_size()
screen.blit(textImage, (pos_x - 0.5*text_size_x, pos_y - 1.3*text_size_y))
if text_bottom is not None:
#新建文本图章
textImage = my_font.render(text_bottom, True, text_color)
#文本框尺寸
text_size_x, text_size_y = textImage.get_size()
screen.blit(textImage, (pos_x - 0.5*text_size_x, pos_y + 0.3*text_size_y))
#新建文本图章
textImage = my_font.render(text_center, True, text_color)
#文本框尺寸
text_size_x, text_size_y = textImage.get_size()
screen.blit(textImage, (pos_x - 0.5*text_size_x, pos_y - 0.5*text_size_y))
#确定备注文字格式和大小
my_font = pygame.font.Font(self.folder_path + '\STXINGKA.TTF',40)
#确定文本框背景颜色
if text_color == (0,0,0):
bcolor = (255,255,255)
else:
bcolor = (0,0,0)
textImage = my_font.render(frame_time, True, text_color, bcolor)
#文本框尺寸
text_size_x, text_size_y = textImage.get_size()
screen.blit(textImage, (4, self.screen_height-44))
previous_frame_data = temp_previous_frame_data
pygame.display.update()
#录屏
im = ImageGrab.grab()
imm=cv2.cvtColor(np.array(im), cv2.COLOR_RGB2BGR)#转为opencv的BGR格式
video.write(imm)
#帧率
FPSClock.tick(self.fps)
video.release()
pygame.quit()
def hotness_data_settle(self):
'''
根据输入日期将数据划分, 插值为对应的帧数,输出插值后的整合数据
Returns
-------
DataFrame, 整理合并后的热度数据.
'''
try:
data_current = pd.read_csv(self.folder_path + r'\data\%s.csv'%self.date, index_col=0) #前一天热度数据
data_next = pd.read_csv(self.folder_path + '\data\%s.csv'%self.next_date, index_col=0) #后一天热度数据
except Exception as e:
print(e)
sys.exit()
#截取需要的时间段热度数据
data_current = data_current.loc[data_current.index >= int(self.starttime)]
data_next = data_next.loc[data_next.index <= int(self.starttime)]
#热度数据时间标准化
data_current.index = [self.date+'0'*(4-len(str(x))) + str(x) for x in data_current.index]
data_next.index = [self.next_date+'0'*(4-len(str(x))) + str(x) for x in data_next.index]
all_data = data_current.append(data_next) #合并热度数据
all_frame_data = pd.DataFrame()
print('开始进行数据整理合并!!!')
for i in tqdm(range(1, len(all_data))):
data_early = list(all_data.iloc[i-1]) #前一帧数据
data_latter = list(all_data.iloc[i]) #后一帧数据
time_early = str(all_data.index[i-1]) #前一帧时间
time_latter = str(all_data.index[i]) #后一帧时间
time_early = datetime.datetime.strptime(time_early, '%Y%m%d%H%M')
time_latter = datetime.datetime.strptime(time_latter, '%Y%m%d%H%M')
time_difference = time_latter - time_early #时间差
time_list = [time_early+datetime.timedelta(seconds=30*x) for x in range(0, int(time_difference.total_seconds()/30))]
time_list = [datetime.datetime.strftime(x, '%Y%m%d%H%M%S') for x in time_list]
data_frame = pd.DataFrame(columns=time_list)
for j in range(0, 50):
text_early = data_early[2*j]
hotness_early = data_early[2*j+1]
if text_early in data_latter: #前后都有一样的text的话插值,每5min10帧
hotness_latter = data_latter[data_latter.index(text_early)+1]
frame_increment = (hotness_latter - hotness_early)/(len(time_list)) #每帧增加的热度
data_frame.loc[text_early] = [hotness_early + x*frame_increment for x in range(0,len(time_list))]
else: #后面没有则10,8,6,4,2,0插值
frame_increment = 2*hotness_latter/(len(time_list))
middle_point = int(0.5*len(time_list))+1 #分割中间点
list_head = [hotness_early + x*frame_increment for x in range(0, middle_point)]
data_frame.loc[text_early] = list_head + [0 for x in range(1, middle_point-1)]
for j in range(0, 50):
text_latter = data_latter[2*j]
if text_latter in data_frame.index:
pass
else: #前面没有的话则后面插值
hotness_latter = data_latter[2*j+1]
frame_increment = 2*hotness_latter/(len(time_list))
middle_point = int(0.5*len(time_list))+1 #分割中间点
list_tail = [x*frame_increment for x in range(1, middle_point-1)]
data_frame.loc[text_latter] = [0 for x in range(0, middle_point)] + list_tail
all_frame_data = pd.concat([all_frame_data, data_frame], axis = 1, join='outer', sort=False)
all_frame_data.fillna(0, inplace=True)
return(all_frame_data)
def calculate_localtion(self, data, space_level, size_x, size_y):
'''
根据给定的已存在图元的x/y坐标数据计算新图元的坐标数据
Parameters
----------
data : DataFrame
已存在图元的x/y坐标数据.
space_level : DataFrame
图元间间距等级.
size_x : Float
当前图元x坐标.
size_y : Float
当前图元y坐标.
Returns
-------
pos_x : float
新图元的x坐标.
pos_y : float
新图元的y坐标.
'''
for i in range(0,10):
pos_x = round(random.uniform(self.screen_width*0.1, self.screen_width*0.9), 2)
pos_y = round(random.uniform(self.screen_height*0.1, self.screen_height*0.9), 2)
flag = 0 #flag
for index in data.index:
previous_pos_x = data.loc[index]['pos_x']
previous_pos_y = data.loc[index]['pos_y']
previous_size_x = data.loc[index]['size_x']
previous_size_y = data.loc[index]['size_y']
if abs(pos_x-previous_pos_x) < space_level*abs(previous_size_x+size_x) and abs(pos_y-previous_pos_y) < space_level*abs(previous_size_y+size_y):
flag = 1 #如果间距过近则flag=1,终止迭代
break
if flag == 0:
return pos_x, pos_y
return pos_x, pos_y #迭代后依旧未找到合适的值则返回随机值
def add_BGM(self):
'''
为hotness_video函数生成的视频添加BGM
Returns
-------
None.
'''
video_path = self.folder_path + '\VIDEO\%s.avi'%self.date
BGM_path = self.folder_path + '\BGM\专题片纪录片常用流行音乐-大气恢弘有气质-公司介绍(Corp_爱给网_aigei_com.mp3'
video = mpe.VideoFileClip(video_path)
video_duration = video.duration
audio_clip = mpe.AudioFileClip(BGM_path).set_end(video_duration)
video = video.set_audio(audio_clip)
export_video_path = self.folder_path + '\VIDEO\%sX.mp4'%self.date
video.write_videofile(export_video_path)
if __name__ == '__main__':
yesterday = datetime.datetime.today() + datetime.timedelta(-1)
yesterday = yesterday.strftime('%Y%m%d')
video = WeiboHotnessVideo(yesterday)
这样就完成了所有的工作,生成并保存了微博热搜数据随时间的变化趋势视频。
2021年12月30日微博热搜
好的,本期内容就是这样,感谢大家的关注,再见!!!
