大数据开发基础实验四

大数据开发基础实验四

一级目录

docker exec -it hbase-master bash

docker cp /root/ch07-graph-2.0.0-jar-with-dependencies.jar c5f727be11c8:home/
docker cp /root/medsamp2016a.xml c5f727be11c8:home/
docker cp /root/medsamp2016a.xml hbase-master:/
docker cp /root/ch07-graph-2.0.0-jar-with-dependencies.jar hbase-master:/

cd ./hadoop-docker-centos7/
./start-hadoop-images.sh

docker exec -it mysql bash
mysql -u root -p
hadoop

grant all privileges on *.* to 'root'@'%' identified by 'hadoop' with grant option

创建数据库
create DATABASE graph;

exit
hadoop fs -mkdir /medline
hadoop fs -put medsamp2016a.xml /medline
spark-shell --jars ch07-graph-2.0.0-jar-with-dependencies.jar
import edu.umd.cloud9.collection.XMLInputFormat
import org.apache.spark.sql.{Dataset, SparkSession, Row}
import org.apache.hadoop.io.{Text, LongWritable}
import org.apache.hadoop.conf.Configuration
def loadMedline(spark: SparkSession, path: String): Dataset[String] = {
	import spark.implicits._
	val conf = new Configuration()
	conf.set(XMLInputFormat.START_TAG_KEY, "<MedlineCitation ")
	conf.set(XMLInputFormat.END_TAG_KEY, "</MedlineCitation>")
	val sc = spark.sparkContext
	val in = sc.newAPIHadoopFile(path, classOf[XMLInputFormat],classOf[LongWritable], classOf[Text], conf)
	in.map(line => line._2.toString).toDS()
}
val spark = SparkSession.builder().getOrCreate()
import spark.implicits._
val medlineRaw: Dataset[String] = loadMedline(spark, "hdfs:///medline")
from flask import Flask,render_template
from mysql import Mysql

app = Flask(__name__)
@app.route('/')
def getdata():
    db = Mysql()
    items1 = db.getItems1()
    items2 = db.getItems2()
    items3 = db.getItems3()
    items4 = db.getItems4()
    return render_template('echarts.html', items1 = items1, items2 = items2, items3 = items3, items4 = items4)

if __name__ == '__main__':
    app.run(debug = True) #debug=True发生错误时会返回发生错误的地方

import pymysql
class Mysql(object):
    def __init__(self):
        try:
            self.conn = pymysql.connect(host='192.168.235.131',user='root',password='hadoop',database='graph',port=3306, charset="utf8")
            self.cursor = self.conn.cursor()  # 用来获得python执行Mysql命令的方法(游标操作)
            print("连接数据库成功")
        except:
            print("连接失败")

    def getItems1(self):
        sql= "select * from topicDist order by cnt desc limit 10"    #获取网络主要主题的频率
        self.cursor.execute(sql)
        items1 = self.cursor.fetchall()  #接收全部的返回结果行
        print(items1)
        return items1

    def getItems2(self):
        sql = "select * from cooccurs order by cnt desc limit 10"  # 获取伴生二元组数据表的内容
        self.cursor.execute(sql)
        items2 = self.cursor.fetchall()  # 接收全部的返回结果行
        print(items2)
        return items2

    def getItems3(self):
        sql = "select * from name_degree order by degree desc limit 10"  # 获取初始图中度数最高的10个顶点
        self.cursor.execute(sql)
        items3 = self.cursor.fetchall()  # 接收全部的返回结果行
        print(items3)
        return items3
    def getItems4(self):
        sql = "select * from filter_degree order by degree desc limit 10"  # 获取过滤后图中度数最高的10个顶点
        self.cursor.execute(sql)
        items4 = self.cursor.fetchall()  # 接收全部的返回结果行
        print(items4)
        return items4



docker cp /tmp/test_data/ml-100k c5f727be11c8:home/
docker cp /tmp/sy4/sy4py c5f727be11c8:
docker cp /tmp/sy4py 2455efdf7063:

docker cp /tmp/sy4py e3a363163252:
cd echarts_view
vim mysql.py
vim app.py
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值