python人物关系可视化百年孤独_利用python对《乘风破浪的姐姐》可视化

最新推荐文章于 2021-05-26 16:01:47 发布

weixin_39955149

最新推荐文章于 2021-05-26 16:01:47 发布

阅读量1.1k

点赞数

文章标签： python人物关系可视化百年孤独

本文链接：https://blog.csdn.net/weixin_39955149/article/details/111721555

版权

import os

import jieba

import pandas as pd

from bs4 import BeautifulSoup

import matplotlib.pyplot as plt

from pyecharts.charts import Page, Sankey, WordCloud, Radar

from pyecharts.components import Image

from pyecharts.options import ComponentTitleOpts

from collections import Counter

from pyecharts.globals import SymbolType

from pyecharts import options as opts

from pyecharts.options.global_options import ThemeType

from pyecharts import options as opts

from collections import Counter

import random

#显示所有列

pd.set_option('display.max_columns', None)

#显示所有行

pd.set_option('display.max_rows', None)

#设置value的显示长度为100，默认为50

pd.set_option('max_colwidth',100)

def get_cut_words(content_series):

# 读入停用词表

import jieba

stop_words = []

with open("data/stopwords.txt", 'r', encoding='utf-8') as f:

lines = f.readlines()

for line in lines:

stop_words.append(line.strip())

# 添加关键词

my_words = ['杜华', '辣鸡', '导演组', '节目组', '不公平', '黄圣依', '无杜华版']

for i in my_words:

jieba.add_word(i)

my_words2 = my_words_list

for j in my_words2:

jieba.add_word(j)

# 自定义停用词

my_stop_words = ['第一期', '一堆', '三个', '真的', '哈哈哈', '哈哈哈哈', '啊啊啊']

stop_words.extend(my_stop_words)

# 分词

word_num = jieba.lcut(content_series.str.cat(sep='。'), cut_all=False)

# 条件筛选

word_num_selected = [i for i in word_num if i not in stop_words and len(i) >= 2]

return word_num_selected

def show_all():

page = Page()

page.add(

image1,

wc,

radar

)

out_html = 'data/sister/%s.html' % name

page.render(out_html)

# 修改样式

with open(os.path.join(os.path.abspath("."), out_html), 'r+', encoding="utf8") as html:

html_bf = BeautifulSoup(html, "lxml")

divs = html_bf.find_all("div")

print(len(divs))

divs[0][

"style"] = "align=\"center\";margin:0 auto;text-align:center;"

divs[1][

"style"] = "width:550px;height:350px;position:absolute;top:120px;left:700px;border-style:solid;border-color:#444444;border-width:0px;" # 修改图表大小、位置、边框

divs[2][

"style"] = "width:700px;height:700px;position:absolute;top:120px;left:20px;border-style:solid;border-color:#444444;border-width:0px;" # 修改图表大小、位置、边框

divs[3][

"style"] = "width:600px;height:400px;position:absolute;top:300px;left:1250px;border-style:solid;border-color:#444444;border-width:0px;" # 修改图表大小、位置、边框

# 修改页面背景色

body = html_bf.find("body")

body["style"] = "background-color:#333333;"

# 追加标题

div_title = "

body.insert(0, BeautifulSoup(div_title, "lxml").div)

html_new = str(html_bf)

html.seek(0, 0)

html.truncate()

html.write(html_new)

html.close()

df = pd.read_csv('data/sister_data.csv', encoding='utf-8', sep='\t')

# 弹幕数据

df_all = pd.DataFrame()

for i in range(1, 9):

tmp = pd.read_csv('sister/sister/danmu_info_%d.csv' % i, encoding='utf-8', sep='\t')

df_all = df_all.append(tmp)

# print(df['names'].tolist())

df.sort_values('总分', ascending=False, inplace=True)

# 昵称

df['昵称'] = ['蓝盈莹|盈莹', '黄龄', '丹妮', '孟佳', '梦辰',

'可唯', '宁静|静静子|静姐', '霏霏', '希怡', '袁咏琳',

'圣依|依依子', '金晨', '阿朵', '含韵', '白冰',

'钟丽缇', '茜|茜茜子', '张萌|萌萌子', '婧汐', '丁当',

'许飞', '刘芸|芸芸子', '吴昕|昕昕子|昕姐|昕昕', '伊能静', '松伶',

'丽坤', '张雨绮|雨绮|绮绮子', '海陆', '金莎', '王智']

print(df.head(5))

print(df.columns)

for name in df.names.tolist():

image1 = Image()

img_src = (

"../img/%s.jpg" % name # html 路径问题

)

image1.add(

src=img_src,

style_opts={"width": "345px", "height": "584px", "style": "margin-top: 15px"},

)

image1.set_global_opts(

title_opts=ComponentTitleOpts(

title_style={"style": "color: white; font-size: 18px; font-weight:bold;"},

subtitle_style={"style": "color: white;font-size: 12px;"})

)

# 雷达图

value = df[["个人特质", "声乐表现力", "成团潜力", "舞台表现力"]][df.names == name].values[0]

data = [{"value": [float(i) for i in value], "name": "分数"}]

c_schema = [

{"name": "个人特质", "max": 25, "min": 0},

{"name": "声乐表现力", "max": 25, "min": 0},

{"name": "成团潜力", "max": 25, "min": 0},

{"name": "舞台表现力", "max": 25, "min": 0},

]

radar = (

Radar()

.set_colors(["#4587E7"])

.add_schema(

schema=c_schema,

shape="circle",

center=["50%", "50%"],

radius="80%",

angleaxis_opts=opts.AngleAxisOpts(

min_=0,

max_=360,

is_clockwise=False,

interval=5,

axistick_opts=opts.AxisTickOpts(is_show=False),

axislabel_opts=opts.LabelOpts(is_show=False),

axisline_opts=opts.AxisLineOpts(is_show=False),

splitline_opts=opts.SplitLineOpts(is_show=False),

radiusaxis_opts=opts.RadiusAxisOpts(

min_=0,

max_=25,

interval=5,

splitarea_opts=opts.SplitAreaOpts(

is_show=True, areastyle_opts=opts.AreaStyleOpts(opacity=1)

polar_opts=opts.PolarOpts(),

splitarea_opt=opts.SplitAreaOpts(is_show=False),

splitline_opt=opts.SplitLineOpts(is_show=False),

)

.add(

series_name="分数",

data=data,

color="#f9713c",

areastyle_opts=opts.AreaStyleOpts(opacity=0.1),

linestyle_opts=opts.LineStyleOpts(width=1),

)

# 弹幕词云

tmp = df[df.names == name]

my_words_list = df.昵称.str.cat(sep='。').replace('|', '。').split('。')

print(tmp.昵称.values[0])

text1 = get_cut_words(content_series=df_all.content[df_all.content.str.contains(tmp.昵称.values[0])])

wordCount_dict = Counter(text1)

choices_number = 200

count_list = sorted(wordCount_dict.items(), key=lambda x:x[1],reverse=True)

count_list = count_list[:choices_number]

keyword_list = [k[0] for k in count_list]

value_list = [k[1] for k in count_list]

wc = (

WordCloud()

.add(series_name="弹幕词云", data_pair=count_list, word_size_range=[20, 100],

textstyle_opts=opts.TextStyleOpts(font_family="cursive"),shape=SymbolType.DIAMOND)

.set_global_opts(

tooltip_opts=opts.TooltipOpts(is_show=True),

)

show_all()

weixin_39955149

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python人物关系可视化百年孤独_利用python对《乘风破浪的姐姐》可视化

import osimport jiebaimport pandas as pdfrom bs4 import BeautifulSoupimport matplotlib.pyplot as pltfrom pyecharts.charts import Page, Sankey, WordCloud, Radarfrom pyecharts.components import Imagefro...
复制链接

扫一扫