python爬虫数据可视化豆瓣评分top250_豆瓣电影top250爬虫系列(三)--- python+Echarts数据可视化...

前两篇我们分别爬取了电影数据,也将爬取到的数据存到了数据库;

接下来我们要对现有的数据进行分析,已获得一些有效信息;

我这里只是进行了简单的可视化分析,运用Echarts插件生成各种图标;

python连接mysql数据库查询电影信息,并生成json数据,存储到本地文件里,以供前端js读取生成可视化图表:

查询电影类型数量并返回json数据,其后写入文件里面

typeNameList = ['剧情','喜剧','动作','爱情','科幻','悬疑','惊悚','恐怖','犯罪',

'同性','音乐','歌舞','传记','历史','战争','西部','奇幻','冒险',

'灾难','武侠','情色']

def getMovieTypeJson():

typeNumList = []

for type in typeNameList:

sql = r"select count(type) from movie where type like '%{}%'".format(type)

dataM = getJsonData(sql)

typeNumList.append(int(str(dataM).strip(r'(').strip(r',)')))

return {'typeNameList' : typeNameList, 'typeNumList' : typeNumList}

def writeTypeJsonFile(path):

with open(path, 'w') as f:

json.dump(getMovieTypeJson(), f)

#执行写入操作

writeTypeJsonFile(r'C:\Users\Administrator\Desktop\books\movieType.txt')

对应前端页面:

var dom = document.getElementById("container");

var myChart = echarts.init(dom);

var app = {};

option = null;

var typeNameList = [];

var typeNumList = []

$.ajaxSettings.async = false;

$.getJSON ("../Desktop/books/movieType.txt", function (data) {

typeNameList = data.typeNameList;

typeNumList = data.typeNumList;

//alert(typeNumList)

})

app.title = '豆瓣top250电影类型统计 - 条形图';

option = {

title: {

text: '豆瓣top250电影类型统计',

subtext: '数据来自豆瓣'

},

tooltip: {

trigger: 'axis',

axisPointer: {

type: 'shadow'

}

},

legend: {

data: ['电影类型数量', '2012年']

},

grid: {

left: '3%',

right: '4%',

bottom: '3%',

containLabel: true

},

xAxis: {

type: 'value',

boundaryGap: [0, 0.01]

},

yAxis: {

type: 'category',

data: typeNameList

},

series: [

{

name: '电影类型数量',

type: 'bar',

data: typeNumList

}

]

};

if (option && typeof option === "object") {

myChart.setOption(option, true);

}

生成图表结果:

08a8de677887?utm_campaign=maleskine&utm_content=note&utm_medium=seo_notes&utm_source=recommendation

type.jpg

按照type --> age --> country --> score --> movieLength --> title的顺序进行循环

def getMovieTreeJson():

jsonFinal = '{"types": ['

for type in typeNameList:

sql = r"select distinct age from movie where type like '%{}%' order by age desc".format(type)

ageList = getJsonData(sql)

jsonFinal += '{{"name":"{}", "children":['.format(type)

for age in getPureList(ageList):

sql = r"select distinct country from movie where age = '{}' and type like '%{}%'".format(age, type)

countryList = getJsonData(sql)

countryArr = []

jsonFinal += '{{"name":"{}", "children":['.format(age)

for country in getPureList(countryList):

if country.split(" ")[0] not in countryArr:

countryArr.append(country.split(" ")[0])

else:

continue

sql = r"select distinct score from movie where age = '{}' and type like '%{}%' and country like '{}%'" \

r"order by score desc".format(age, type, country.split(" ")[0])

scoreList = getJsonData(sql)

jsonFinal += '{{"name":"{}", "children":['.format(country.split(" ")[0])

for score in getPureList(scoreList):

sql = r"select distinct movieLength from movie where age = '{}' and type like '%{}%' and country like '{}%'" \

r"and score = '{}' order by score desc".format(age, type, country.split(" ")[0], score)

movieLengthList = getJsonData(sql)

jsonFinal += '{{"name":"分数{}", "children":['.format(score)

for movieLength in getPureList(movieLengthList):

jsonFinal += '{{"name":"时长{}", "children":['.format(movieLength)

sql = r"select title, note from movie where age = '{}' and type like '%{}%' and country like '{}%'" \

r"and score = '{}' and movieLength = '{}' order by score desc".format(

age, type, country.split(" ")[0], score, movieLength)

titleNoteList = getJsonData(sql)

# print(age, type, country.split(" ")[0], score, movieLength, str(titleNoteList[0]).strip(","))

for title, note in titleNoteList:

jsonFinal += '{{"name":"{}", "value":"{}"}},'.format(title, note)

# print(jsonFinal[:-1])

jsonFinal = jsonFinal[:-1] + ']},'

jsonFinal = jsonFinal[:-1] + ']},'

jsonFinal = jsonFinal[:-1] + ']},'

jsonFinal = jsonFinal[:-1] + ']},'

jsonFinal = jsonFinal[:-1] + ']},'

jsonFinal = jsonFinal[:-1] + ']},'

return jsonFinal[:-1]

def writeTreeJsonFile(path):

with open(path, 'w') as f:

json.dump(getMovieTreeJson(), f)

writeTreeJsonFile(r'C:\Users\Administrator\Desktop\books\movieTreeJson.txt')

对应html页面

var myChart;

var dataEnd = {};

// http://echarts.baidu.com/examples/data/asset/data/flare.json ../Desktop/books/movieTreeJson.txt

$.ajaxSettings.async = false;

$.get('../Desktop/books/movieTreeJson.txt', function (data) {

var subStr = data.substring(1,data.length-1).replace(/\\"/g, "\"");

//alert(subStr);

dataEnd = JSON.parse(subStr);

//alert(dataEnd)

});

for(var i=0; i<21; i++) {

initEcharts("container-"+i, i);

}

function initEcharts(name, index) {

myChart = echarts.init(document.getElementById(name));

option = null;

myChart.showLoading();

myChart.hideLoading();

myChart.setOption(option = {

tooltip: {

trigger: 'item',

triggerOn: 'mousemove'

},

series: [

{

type: 'tree',

data: [dataEnd.types[i]],

top: '18%',

bottom: '14%',

layout: 'radial',

symbol: 'emptyCircle',

symbolSize: 7,

initialTreeDepth: 3,

animationDurationUpdate: 750

}

]

});

if (option && typeof option === "object") {

myChart.setOption(option, true);

}

}

图标结果是21种电影类型,这里指贴出其中一张示例

08a8de677887?utm_campaign=maleskine&utm_content=note&utm_medium=seo_notes&utm_source=recommendation

tree.png

查询年代得分:

def getAgeScoreJson():

ageScoreMap = {}

ageScoreMap['ages'] = ['Growth']

ageScoreMap['ageNames'] = []

sql = r'select DISTINCT age from movie ORDER BY age desc'

ageList = getPureList(getJsonData(sql))

# print(ageList)

for age in ageList:

avgScoreList = []

for type in typeNameList:

sql = r"select avg(score) from movie where age = '{}' and type like '%{}%'".format(age, type)

avgScore = str(getPureList(getJsonData(sql))).strip("['").strip("']")

if avgScore == 'None':

avgScore = 0

avgScoreList.append(round(float(avgScore)))

ageScoreMap[str(age)] = avgScoreList

ageScoreMap['ages'].append(str(age))

# ageScoreMap['ageNames'].append('result.type' + str(age))

ageScoreMap['names'] = typeNameList

return ageScoreMap

def writeAgeScoreJsonFile(path):

with open(path, 'w') as f:

json.dump(getAgeScoreJson(), f)

writeAgeScoreJsonFile(r'C:\Users\Administrator\Desktop\books\movieAgeScoreJson.txt')

前端页面:

var dom = document.getElementById("container");

var myChart = echarts.init(dom);

var app = {};

option = null;

myChart.showLoading();

$.get('../Desktop/books/movieAgeScoreJson.txt', function (result) {

result = JSON.parse(result);

//alert(result)

var series = []

for (var i=1;i<=result.ages.length;i++) {

series.push({

name: result.ages[i],

type: 'bar',

data: result[result.ages[i]]

})

}

myChart.hideLoading();

option = {

tooltip : {

trigger: 'axis',

axisPointer: {

type: 'shadow',

label: {

show: true

}

}

},

toolbox: {

show : true,

feature : {

mark : {show: true},

dataView : {show: true, readOnly: false},

magicType: {show: true, type: ['line', 'bar']},

restore : {show: true},

saveAsImage : {show: true}

}

},

calculable : true,

legend: {

data: result.ages,

itemGap: 5

},

grid: {

top: '12%',

left: '1%',

right: '10%',

containLabel: true

},

xAxis: [

{

type : 'category',

data : result.names

}

],

yAxis: [

{

type : 'value',

name : 'average score',

axisLabel: {

formatter: function (a) {

//alert(a)

return a;

}

}

}

],

dataZoom: [

{

show: true,

start: 94,

end: 100

},

{

type: 'inside',

start: 94,

end: 100

},

{

show: true,

yAxisIndex: 0,

filterMode: 'empty',

width: 30,

height: '80%',

showDataShadow: false,

left: '93%'

}

],

series : series

};

myChart.setOption(option);

});;

if (option && typeof option === "object") {

myChart.setOption(option, true);

}

生成图表结果:

08a8de677887?utm_campaign=maleskine&utm_content=note&utm_medium=seo_notes&utm_source=recommendation

scores.png

08a8de677887?utm_campaign=maleskine&utm_content=note&utm_medium=seo_notes&utm_source=recommendation

subscore.png

tips:

其实还可以生成词云图、折线图等各种其他形式图表;

本文只对电影表进行了分析,并没有对演员表、评论表、获奖表分析;

以后有时间再扩展;

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值