python人物关系可视化百年孤独_利用python对《乘风破浪的姐姐》可视化

import os

import jieba

import pandas as pd

from bs4 import BeautifulSoup

import matplotlib.pyplot as plt

from pyecharts.charts import Page, Sankey, WordCloud, Radar

from pyecharts.components import Image

from pyecharts.options import ComponentTitleOpts

from collections import Counter

from pyecharts.globals import SymbolType

from pyecharts import options as opts

from pyecharts.options.global_options import ThemeType

from pyecharts import options as opts

from collections import Counter

import random

#显示所有列

pd.set_option('display.max_columns', None)

#显示所有行

pd.set_option('display.max_rows', None)

#设置value的显示长度为100,默认为50

pd.set_option('max_colwidth',100)

def get_cut_words(content_series):

# 读入停用词表

import jieba

stop_words = []

with open("data/stopwords.txt", 'r', encoding='utf-8') as f:

lines = f.readlines()

for line in lines:

stop_words.append(line.strip())

# 添加关键词

my_words = ['杜华', '辣鸡', '导演组', '节目组', '不公平', '黄圣依', '无杜华版']

for i in my_words:

jieba.add_word(i)

my_words2 = my_words_list

for j in my_words2:

jieba.add_word(j)

# 自定义停用词

my_stop_words = ['第一期', '一堆', '三个', '真的', '哈哈哈', '哈哈哈哈', '啊啊啊']

stop_words.extend(my_stop_words)

# 分词

word_num = jieba.lcut(content_series.str.cat(sep='。'), cut_all=False)

# 条件筛选

word_num_selected = [i for i in word_num if i not in stop_words and len(i) >= 2]

return word_num_selected

def show_all():

page = Page()

page.add(

image1,

wc,

radar

)

out_html = 'data/sister/%s.html' % name

page.render(out_html)

# 修改样式

with open(os.path.join(os.path.abspath("."), out_html), 'r+', encoding="utf8") as html:

html_bf = BeautifulSoup(html, "lxml")

divs = html_bf.find_all("div")

print(len(divs))

divs[0][

"style"] = "align=\"center\";margin:0 auto;text-align:center;"

divs[1][

"style"] = "width:550px;height:350px;position:absolute;top:120px;left:700px;border-style:solid;border-color:#444444;border-width:0px;" # 修改图表大小、位置、边框

divs[2][

"style"] = "width:700px;height:700px;position:absolute;top:120px;left:20px;border-style:solid;border-color:#444444;border-width:0px;" # 修改图表大小、位置、边框

divs[3][

"style"] = "width:600px;height:400px;position:absolute;top:300px;left:1250px;border-style:solid;border-color:#444444;border-width:0px;" # 修改图表大小、位置、边框

# 修改页面背景色

body = html_bf.find("body")

body["style"] = "background-color:#333333;"

# 追加标题

div_title = "

\n
"

body.insert(0, BeautifulSoup(div_title, "lxml").div)

html_new = str(html_bf)

html.seek(0, 0)

html.truncate()

html.write(html_new)

html.close()

df = pd.read_csv('data/sister_data.csv', encoding='utf-8', sep='\t')

# 弹幕数据

df_all = pd.DataFrame()

for i in range(1, 9):

tmp = pd.read_csv('sister/sister/danmu_info_%d.csv' % i, encoding='utf-8', sep='\t')

df_all = df_all.append(tmp)

# print(df['names'].tolist())

df.sort_values('总分', ascending=False, inplace=True)

# 昵称

df['昵称'] = ['蓝盈莹|盈莹', '黄龄', '丹妮', '孟佳', '梦辰',

'可唯', '宁静|静静子|静姐', '霏霏', '希怡', '袁咏琳',

'圣依|依依子', '金晨', '阿朵', '含韵', '白冰',

'钟丽缇', '茜|茜茜子', '张萌|萌萌子', '婧汐', '丁当',

'许飞', '刘芸|芸芸子', '吴昕|昕昕子|昕姐|昕昕', '伊能静', '松伶',

'丽坤', '张雨绮|雨绮|绮绮子', '海陆', '金莎', '王智']

print(df.head(5))

print(df.columns)

for name in df.names.tolist():

image1 = Image()

img_src = (

"../img/%s.jpg" % name # html 路径问题

)

image1.add(

src=img_src,

style_opts={"width": "345px", "height": "584px", "style": "margin-top: 15px"},

)

image1.set_global_opts(

title_opts=ComponentTitleOpts(

title_style={"style": "color: white; font-size: 18px; font-weight:bold;"},

subtitle_style={"style": "color: white;font-size: 12px;"})

)

# 雷达图

value = df[["个人特质", "声乐表现力", "成团潜力", "舞台表现力"]][df.names == name].values[0]

data = [{"value": [float(i) for i in value], "name": "分数"}]

c_schema = [

{"name": "个人特质", "max": 25, "min": 0},

{"name": "声乐表现力", "max": 25, "min": 0},

{"name": "成团潜力", "max": 25, "min": 0},

{"name": "舞台表现力", "max": 25, "min": 0},

]

radar = (

Radar()

.set_colors(["#4587E7"])

.add_schema(

schema=c_schema,

shape="circle",

center=["50%", "50%"],

radius="80%",

angleaxis_opts=opts.AngleAxisOpts(

min_=0,

max_=360,

is_clockwise=False,

interval=5,

axistick_opts=opts.AxisTickOpts(is_show=False),

axislabel_opts=opts.LabelOpts(is_show=False),

axisline_opts=opts.AxisLineOpts(is_show=False),

splitline_opts=opts.SplitLineOpts(is_show=False),

),

radiusaxis_opts=opts.RadiusAxisOpts(

min_=0,

max_=25,

interval=5,

splitarea_opts=opts.SplitAreaOpts(

is_show=True, areastyle_opts=opts.AreaStyleOpts(opacity=1)

),

),

polar_opts=opts.PolarOpts(),

splitarea_opt=opts.SplitAreaOpts(is_show=False),

splitline_opt=opts.SplitLineOpts(is_show=False),

)

.add(

series_name="分数",

data=data,

color="#f9713c",

areastyle_opts=opts.AreaStyleOpts(opacity=0.1),

linestyle_opts=opts.LineStyleOpts(width=1),

)

)

# 弹幕词云

tmp = df[df.names == name]

my_words_list = df.昵称.str.cat(sep='。').replace('|', '。').split('。')

print(tmp.昵称.values[0])

text1 = get_cut_words(content_series=df_all.content[df_all.content.str.contains(tmp.昵称.values[0])])

wordCount_dict = Counter(text1)

choices_number = 200

count_list = sorted(wordCount_dict.items(), key=lambda x:x[1],reverse=True)

count_list = count_list[:choices_number]

keyword_list = [k[0] for k in count_list]

value_list = [k[1] for k in count_list]

wc = (

WordCloud()

.add(series_name="弹幕词云", data_pair=count_list, word_size_range=[20, 100],

textstyle_opts=opts.TextStyleOpts(font_family="cursive"),shape=SymbolType.DIAMOND)

.set_global_opts(

tooltip_opts=opts.TooltipOpts(is_show=True),

)

)

show_all()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值