头歌大数据答案(自用)

第一关

# 命令行
start-all.sh
nohup hive --service metastore &
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.functions._
object cleandata {
  def main(args: Array[String]): Unit = {
    //创建spark对象
    val spark = SparkSession.builder().appName("HiveSupport").master("local[*]")
    .config("spark.sql.warehouse.dir", "hdfs://127.0.0.1:9000/opt/hive/warehouse")
    .config("hive.metastore.uris", "thrift://127.0.0.1:9083")
    .config("dfs.client.use.datanode.hostname", "true")
    .enableHiveSupport().getOrCreate()
    //############# Begin ############
     //创建hive数据库daobidata
    spark.sql("create database daobidata")
    //创建hive数据表
    spark.sql("use daobidata")
    //创建diedata表
    spark.sql("create table if not exists diedata(bianh int,com_name string," +
      "com_addr string,cat string,se_cat string,com_des string,born_data string," +
      "death_data string,live_days int,financing string,total_money int,death_reason string,"+
      "invest_name string,ceo_name string,ceo_des string"+
      ")row format delimited fields terminated by ',';")
    //将本地datadie.csv文件导入至hive数据库diedata表中
    spark.sql("load data local inpath '/data/workspace/myshixun/data/datadie.csv' into table diedata;")
   
    //进入diedata表进行清洗操作,删除为空的数据,根据倒闭原因切分出最主要原因,根据成立时间切分出,企业成立的年份,根据倒闭时间切分出,企业倒闭的年份
    val c1 = spark.table("diedata").na.drop("any").distinct()
      .withColumn("death_reason",split(col("death_reason")," ")(0))
      .withColumn("bornyear",split(col("born_data"),"/")(0))
      .withColumn("deathyear",split(col("death_data"),"/")(0))
      
    c1.createOrReplaceTempView("c1")
    //创建die_data表
    spark.sql("create table if not exists die_data(bianh int,com_name string," +
      "com_addr string,cat string,se_cat string,com_des string,born_data string," +
      "death_data string,live_days int,financing string,total_money int,death_reason string,"+
      "invest_name string,ceo_name string,ceo_des string,bornyear string,deathyear string"+
      ")row format delimited fields terminated by ',';")
    //将清洗完的数据导入至die_data表中
    spark.sql("insert overwrite table die_data select * from c1")
   //############# End ##############
    spark.stop()
  }
}

第二关

import org.apache.spark.sql.{SaveMode, SparkSession}
object citydiedata {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("SparkCleanJob")
      .master("local[*]").getOrCreate()
    //************* Begin **************
    //读取数据,用逗号分隔,第一行不做为数据,做为标题
    val df1 = spark.read.option("delimiter", ",").option("header",true).csv("/data/workspace/myshixun/die_data.csv")
    df1.createOrReplaceTempView("df1")
    //使用spark SQL语句,根据城市统计企业倒闭top5
    val df=spark.sql("select df1.com_addr as com_addr,count(df1.com_addr) as saddr from df1 group by df1.com_addr order by saddr desc limit 5")
      .repartition(1)
      .write
      //连接数据库
      .format("jdbc")
      .option("url", "jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8")
      .option("driver","com.mysql.jdbc.Driver")
      //保存至数据库的数据表名
      .option("dbtable", "addr")
      //用户名
      .option("user", "root")
      //连接数据库的密码
      .option("password", "123123")
      //不破坏数据表结构,在后添加
      .mode(SaveMode.Append)
      .save()
    //************ End ***********
    spark.stop()
  }
}   

import org.apache.spark.sql.{SaveMode, SparkSession}
object industrydata {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("SparkCleanJob")
      .master("local[*]").getOrCreate()
     //########## Begin ############
    //读取数据,用逗号分隔,第一行不做为数据,做为标题
    val df1 = spark.read.option("delimiter", ",").option("header",true).csv("/data/workspace/myshixun/die_data.csv")
    df1.createOrReplaceTempView("df1")
    //使用spark SQL语句,根据行业统计企业倒闭top10
    val df=spark.sql("select df1.cat as industry,count(df1.cat) as catindustry from df1 group by df1.cat order by catindustry desc limit 10 ")
      .repartition(1)
      .write
      //连接数据库
      .format("jdbc")
      .option("url", "jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8")
      .option("driver","com.mysql.jdbc.Driver")
      //数据表名
      .option("dbtable", "industry")
      .option("user", "root")
      .option("password", "123123")
      //不破坏数据表结构,在后添加
      .mode(SaveMode.Append)
      .save()
    //############ End ###########
    spark.stop()
  }
}  

import org.apache.spark.sql.{SaveMode, SparkSession}
object closedown {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("SparkCleanJob")
      .master("local[*]").getOrCreate()
    //############ Begin ###########
    //读取数据,用逗号分隔,第一行不做为数据,做为标题
    val df1 = spark.read.option("delimiter", ",").option("header",true).csv("/data/workspace/myshixun/die_data.csv")
    df1.createOrReplaceTempView("df1")
    //使用spark SQL语句,根据倒闭原因字段,找到企业倒闭的主要原因,统计主要原因的个数
    val df=spark.sql("select df1.death_reason as death_reason,count(df1.death_reason) as dreason from df1 group by df1.death_reason order by dreason desc")
      .repartition(1)
      .write
      //连接数据库
      .format("jdbc")
      //数据库名
      .option("url", "jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8")
      .option("driver","com.mysql.jdbc.Driver")
      //数据表名
      .option("dbtable", "cldown")
      .option("user", "root")
      .option("password", "123123")
      //不破坏表结构,在后面添加
      .mode(SaveMode.Append)
      .save()
    //############ End ###########
    spark.stop()
  }
}

import org.apache.spark.sql.{SaveMode, SparkSession}
object comfinanc {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("SparkCleanJob")
      .master("local[*]").getOrCreate()
      //############ Begin ###########
    //读取数据,用逗号分隔,去除表头,第一行不做为数据,做为标题
    val df1 = spark.read.option("delimiter", ",").option("header",true).csv("/data/workspace/myshixun/die_data.csv")
    df1.createOrReplaceTempView("df1")
    //使用spark SQL语句,根据行业细分领域字段,统计企业倒闭分布情况top20
    val df=spark.sql("select df1.se_cat as se_cat,count(df1.se_cat) as countsecat from df1 group by df1.se_cat order by countsecat desc limit 10")
      .repartition(1)
      .write
      //连接数据库
      .format("jdbc")
      .option("url", "jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8")
      .option("driver","com.mysql.jdbc.Driver")
      //数据表名
      .option("dbtable", "secat")
      .option("user", "root")
      .option("password", "123123")
      //不破坏表结构,在后面添加
      .mode(SaveMode.Append)
      .save()
    //使用spark SQL语句,统计倒闭企业融资情况
    val d1=spark.sql("select df1.financing as financing,count(df1.financing) as countfinanc from df1 group by df1.financing order by countfinanc desc")
      .repartition(1)
      .write
      //连接数据库
      .format("jdbc")
      .option("url", "jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8")
      .option("driver","com.mysql.jdbc.Driver")
      //数据表名
      .option("dbtable", "financing")
      .option("user", "root")
      .option("password", "123123")
      //不破坏表结构,在后面添加
      .mode(SaveMode.Append)
      .save()
        //########## End #########
    spark.stop()
  }
}

import org.apache.spark.sql.{SaveMode, SparkSession}
object yeardata {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("SparkCleanJob")
      .master("local[*]").getOrCreate()
    //############ Begin ###########
    //读取数据,用逗号分隔,第一行不做为数据,做为标题
    val df1 = spark.read.option("delimiter", ",").option("header",true).csv("/data/workspace/myshixun/die_data.csv")
    df1.createOrReplaceTempView("df1")
    //根据企业成立时间字段,统计每年有多少成立的企业
    val d1=spark.sql("select df1.bornyear as bornyear,count(df1.bornyear) as byear from df1 group by df1.bornyear order by bornyear desc limit 10")
    .repartition(1)
        .write
        //连接数据库
        .format("jdbc")
        .option("url", "jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8")
        .option("driver","com.mysql.jdbc.Driver")
        //数据表名
        .option("dbtable", "bornyear")
        .option("user", "root")
        .option("password", "123123")
        //不破坏表结构,在后面添加
        .mode(SaveMode.Append)
        .save()
    //根据企业倒闭年份字段,统计企业每个年份倒闭的数量
    val d2=spark.sql("select df1.deathyear as deathyear,count(df1.deathyear) as dyear from df1 group by df1.deathyear order by deathyear desc limit 10")
      .repartition(1)
          .write
          //连接数据库
          .format("jdbc")
          //数据库名
          .option("url", "jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8")
          .option("driver","com.mysql.jdbc.Driver")
          //数据表名
          .option("dbtable", "deathyear")
          .option("user", "root")
          .option("password", "123123")
          //不破坏表结构,在后面添加
          .mode(SaveMode.Append)
          .save()
    //############# End ############
    spark.stop()
  }
}

第三关

from app import db
class diedata(db.Model):
    __tablename__ = "addr"
    #**************** Begin ************#
    ID = db.Column(db.Integer, primary_key=True)  ##序号 主键
    com_addr = db.Column(db.String(255))  ##城市
    saddr = db.Column(db.Integer)  ##统计企业倒闭数量
    #************* End *************#
from flask import render_template
from app.views import index
from app import db
from app.model.models import diedata
@index.route("/city")
def index1():
    selectdata = db.session.query(diedata.com_addr).all()
    selectdata1 = db.session.query(diedata.saddr).all()
    list1 =[]
    list2=[]
    #********** Begin **********#
    #获取城市倒闭企业top5的数据
    for k in selectdata:
        data = {
            "com_addr": k.com_addr,
        }
        list1.append(data)
    for i in selectdata1:
        list2.append(i[0])
    return render_template("test3.html", com_addr=list1, saddr=list2)
    #*********** End ***********#
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>城市倒闭企业统计情况</title>
    <script type="text/javascript" src="../static/js/echarts-all-3.js" ></script>
</head>
<body>
<!--准备一个DOM容器-->
    <div id="main" style="width: 1500px;height: 650px;"></div>
    <script>
    var myChart = echarts.init(document.getElementById('main'));
    //*********** Begin ***************
    com_addr=[]
        {% for a in com_addr %}
            com_addr.push('{{ a.com_addr }}');
        {% endfor %}
        var saddr={{saddr|tojson}};
    option = {
       title:{
                text:'城市倒闭企业top5展示图',
                left:'center'
            },
       legend: {
            data: ['城市倒闭企业个数'], //这里设置柱状图上面的方块,名称跟series里的name保持一致
            align: 'right', //图例显示的位置:靠左,靠右还是居中的设置.不设置则居中
            right: 10,
        },
      xAxis: {
        type: 'category',
        data: com_addr
      },
      yAxis: {
        type: 'value',
        name: '倒闭个数',
                axisLabel: {
                    formatter: '{value} 个'
                }
      },
      series: [
        {
          data: saddr,
          type: 'bar',
          name: '城市倒闭企业个数',
          itemStyle: {
                        normal: {
                        color:'blue',
                        lineStyle:{
                            color:'blue'
                        },
                        label : {show: true}
                      }
                    }
        }
      ]
    };
    myChart.setOption(option);
    //************ End ***************
    </script>
</body>
</html>

from app import db
class diedata(db.Model):
    __tablename__ = "industrydata"
    #************* Begin ************
    
    ID = db.Column(db.Integer, primary_key=True)  ##序号 主键
    industry = db.Column(db.String(255))  ##行业名
    catindustry = db.Column(db.Integer)  ##行业倒闭数
    #************* End ************
from flask import render_template
from app.views import index
from app import db
from app.model.models import diedata
@index.route("/industry")
def index1():
    #************* Begin ************
    selectdata = db.session.query(diedata.industry).all()
    selectdata1 = db.session.query(diedata.catindustry).all()
    list1 =[]
    list2=[]
    for k in selectdata:
        data = {
            "industry": k.industry,
        }
        list1.append(data)
    for i in selectdata1:
        list2.append(i[0])
    return render_template("test3.html", industry=list1, catindustry=list2)
    #************* End *************
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>城市倒闭企业统计情况</title>
    <script type="text/javascript" src="../static/js/echarts-all-3.js" ></script>
</head>
<body>
<!--准备一个DOM容器-->
    <div id="main" style="width: 1500px;height: 650px;"></div>
    <script>
    var myChart = echarts.init(document.getElementById('main'));
    //************* Begin ************
    industry=[]
        {% for a in industry %}
            industry.push('{{ a.industry }}');
        {% endfor %}
        var catindustry={{catindustry|tojson}};
    option = {
        title:{
                text:'行业企业倒闭top10折线图',
                left:'center'
            },
        legend: {
            data: ['行业企业倒闭数'], //这里设置柱状图上面的方块,名称跟series里的name保持一致
            align: 'right', //图例显示的位置:靠左,靠右还是居中的设置.不设置则居中
            right: 10,
        },
          xAxis: {
            type: 'category',
            name: '行业分类',
                axisLabel: {
                    formatter: '{value}'
                },
            data: industry
          },
          yAxis: {
            type: 'value',
            name: '行业企业倒闭数',
                axisLabel: {
                    formatter: '{value} 个'
                }
          },
          series: [
            {
              name:'行业企业倒闭数',
              data: catindustry,
              type: 'line',
              smooth: true,
              label:{
                    show:true
                },
              itemStyle: {
                        normal: {
                        color:'green',
                        lineStyle:{
                            color:'green'
                        },
                        label : {show: true}
                      }
                    }
            }
          ]
        };
    myChart.setOption(option);
    //************* End ************
    </script>
</body>
</html>

from app import db
class diedata(db.Model):
    __tablename__ = "closedown"
    ############ Begin ###########
    ID = db.Column(db.Integer, primary_key=True)  ##序号 主键
    death_reason = db.Column(db.String(255))  ##倒闭原因
    dreason = db.Column(db.Integer)  ##倒闭原因统计
    ############ End ###########
from flask import render_template
from app.views import index
from app import db
from app.model.models import diedata
@index.route("/deathreason")
def index1():
    selectdata = db.session.query(diedata.death_reason,diedata.dreason).all()
    list1 =[]
    ############# Begin ############
    for k in selectdata:
        data = {
            "name": k.death_reason,
            "value":k.dreason
        }
        list1.append(data)
    return render_template("test3.html", datas=list1)
    ############# End ############
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>企业倒闭的原因</title>
    <script type="text/javascript" src="../static/js/echarts-all-3.js" ></script>
</head>
<body>
<!--准备一个DOM容器-->
    <div id="main" style="width: 1500px;height: 650px;"></div>
    <script>
    var myChart = echarts.init(document.getElementById('main'));
    //########### Begin #############
    var datas={{datas|tojson}};
        option = {
            title: {
                text: '企业倒闭原因结果统计图',
                left: 'center'
              },
              legend: {
                top: 'bottom',
                data:datas
              },
               tooltip: {
                trigger: 'item',
                formatter: '{b} : {c} ({d}%)'
              },
              toolbox: {
                show: true
              },
              series: [
                {
                  type: 'pie',
                  radius: [50, 250],
                  center: ['50%', '50%'],
                  roseType: 'area',
                  itemStyle: {
                    borderRadius: 8
                  },
                  data:datas
                }
              ]
            };
    myChart.setOption(option);
    //########### End #############
    </script>
</body>
</html>

from app import db
class diedata(db.Model):
    __tablename__ = "secat"
    ############## Begin ###########
    ID = db.Column(db.Integer, primary_key=True)  ##序号 主键
    se_cat = db.Column(db.String(255))  ##细分领域
    countsecat = db.Column(db.Integer)  ##细分领域企业倒闭数
    ############## End ############
class diedata1(db.Model):
    __tablename__ = "financing"
    ############## Begin ###########
    ID = db.Column(db.Integer, primary_key=True)  ##序号 主键
    financing = db.Column(db.String(255))  ##融资名
    countfinanc = db.Column(db.Integer)  ##融资个数
    ############## End ############
from flask import render_template
from app.views import index
from app import db
from app.model.models import diedata
from app.model.models import diedata1
@index.route("/fincat")
def index1():
    selectdata = db.session.query(diedata.se_cat).all()
    selectdata1 =db.session.query(diedata.countsecat).all()
    selectdata2=db.session.query(diedata1.financing).all()
    selectdata3=db.session.query(diedata1.countfinanc).all()
    list1 =[]
    list2 = []
    list3 = []
    list4 = []
    ############## Begin ###########
    for i in selectdata:
        data = {
            "se_cat": i.se_cat,
        }
        list1.append(data)
    for j in selectdata1:
        list2.append(j[0])
    for x in selectdata2:
        data = {
            "financing": x.financing,
        }
        list3.append(data)
    for y in selectdata3:
        list4.append(y[0])
    return render_template("test3.html", se_cat=list1,countsecat=list2,financing=list3,countfinanc=list4)
    ############## End ###########
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>企业融资以及细分领域倒闭企业数据</title>
    <script type="text/javascript" src="../static/js/echarts-all-3.js" ></script>
</head>
<body>
<!--准备一个DOM容器-->
    <div id="main" style="width: 1500px;height: 650px;"></div>
    <script>
    var myChart = echarts.init(document.getElementById('main'));
    //############## Begin ###########
    se_cat=[]
        {% for a in se_cat %}
            se_cat.push('{{ a.se_cat }}');
        {% endfor %}
        var countsecat={{countsecat|tojson}};
        financing=[]
            {% for b in financing %}
            financing.push('{{ b.financing }}');
        {% endfor %}
        var countfinanc={{countfinanc|tojson}};
        option = {
          title: [
            {
              left: 'center',
              text: '细分领域企业倒闭数'
            },
            {
              top: '55%',
              left: 'center',
              text: '企业融资情况'
            }
          ],
          tooltip: {
            trigger: 'axis'
          },
          legend: {
                  data: ['细分领域', '融资'],
                  left: 10
                },
          xAxis: [
            {
              data: se_cat
            },
            {
              data: financing,
              gridIndex: 1
            }
          ],
          yAxis: [
            {},
            {
              gridIndex: 1
            }
          ],
          grid: [
            {
              bottom: '60%'
            },
            {
              top: '60%'
            }
          ],
          series: [
            {
              name:'细分领域',
              type: 'bar',
              showSymbol: true,
              data: countsecat,
              label:{
                    show:true
                },
              itemStyle: {
                        normal: {
                        color:'red',
                        lineStyle:{
                            color:'red'
                        },
                        label : {show: true}
                      }
                    }
            },
            {
              name:'融资',
              type: 'line',
              showSymbol: true,
              data: countfinanc,
              xAxisIndex: 1,
              yAxisIndex: 1,
              label:{
                    show:true
                },
              itemStyle: {
                        normal: {
                        color:'green',
                        lineStyle:{
                            color:'green'
                        },
                        label : {show: true}
                      }
                    }
            }
          ]
        };
    myChart.setOption(option);
    //############## End ###########
    </script>
</body>
</html>

from app import db
class diedata(db.Model):
    __tablename__ = "bornyear"
    ########### Begin ##########
    ID = db.Column(db.Integer, primary_key=True)  ##序号 主键
    bornyear = db.Column(db.String(255))  ##成立年份
    byear = db.Column(db.Integer)  ##计数
    ########### End ##########
class diedata1(db.Model):
    __tablename__ = "deathyear"
    ########### Begin ##########
    ID = db.Column(db.Integer, primary_key=True)  ##序号 主键
    deathyear = db.Column(db.String(255))  ##倒闭年份
    dyear = db.Column(db.Integer)  ##计数
    ########### End ##########
from flask import render_template
from app.views import index
from app import db
from app.model.models import diedata
from app.model.models import diedata1
@index.route("/ydata")
def index1():
    ########### Begin ##########
    selectdata = db.session.query(diedata.bornyear,diedata.byear).all()
    selectdata1 =db.session.query(diedata1.deathyear,diedata1.dyear).all()
    list1 =[]
    list2 = []
    list3 = []
    list4 = []
    for x in selectdata:
        list1.append(str(x[0])+'年')
        list2.append(x[1])
    for j in selectdata1:
        list3.append(str(j[0])+'年')
        list4.append(j[1])
    ############ End ############
    return render_template("test3.html", bornyear=list1,byear=list2,deathyear=list3,dyear=list4)
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>企业成立年份和倒闭年份</title>
    <script type="text/javascript" src="../static/js/echarts-all-3.js" ></script>
</head>
<body>
<!--准备一个DOM容器-->
    <div id="main" style="width: 1500px;height: 650px;"></div>
    <script>
    //########### Begin ###########
    var myChart = echarts.init(document.getElementById('main'));
        var bornyear={{bornyear|tojson}};
        var byear={{byear|tojson}};
        var deathyear={{deathyear|tojson}};
        var dyear={{dyear|tojson}};
        option = {
          title: [
            {
              left: 'center',
              text: '企业成立年份柱状图'
            },
            {
              top: '55%',
              left: 'center',
              text: '企业倒闭年份柱状图'
            }
          ],
          tooltip: {
            trigger: 'axis'
          },
          legend: {
                  data: ['成立年份', '倒闭年份'],
                  left: 10
                },
          xAxis: [
            {
              data: bornyear
            },
            {
              data: deathyear,
              gridIndex: 1
            }
          ],
          yAxis: [
            {},
            {
              gridIndex: 1
            }
          ],
          grid: [
            {
              bottom: '60%'
            },
            {
              top: '60%'
            }
          ],
          series: [
            {
              name:'成立年份',
              type: 'bar',
              showSymbol: true,
              data: byear,
              label:{
                    show:true
                },
              itemStyle: {
                        normal: {
                        color:'red',
                        lineStyle:{
                            color:'red'
                        },
                        label : {show: true}
                      }
                    }
            },
            {
              name:'倒闭年份',
              type: 'bar',
              showSymbol: true,
              data: dyear,
              xAxisIndex: 1,
              yAxisIndex: 1,
              label:{
                    show:true
                },
              itemStyle: {
                        normal: {
                        color:'green',
                        lineStyle:{
                            color:'green'
                        },
                        label : {show: true}
                      }
                    }
            }
          ]
        };
    myChart.setOption(option);
    //########### End ###########
    </script>
</body>
</html>
  • 12
    点赞
  • 23
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值