大数据开发基础实验四
一级目录
docker exec -it hbase-master bash
docker cp /root/ch07-graph-2.0.0-jar-with-dependencies.jar c5f727be11c8:home/
docker cp /root/medsamp2016a.xml c5f727be11c8:home/
docker cp /root/medsamp2016a.xml hbase-master:/
docker cp /root/ch07-graph-2.0.0-jar-with-dependencies.jar hbase-master:/
cd ./hadoop-docker-centos7/
./start-hadoop-images.sh
docker exec -it mysql bash
mysql -u root -p
hadoop
grant all privileges on *.* to 'root'@'%' identified by 'hadoop' with grant option
创建数据库
create DATABASE graph;
exit
hadoop fs -mkdir /medline
hadoop fs -put medsamp2016a.xml /medline
spark-shell --jars ch07-graph-2.0.0-jar-with-dependencies.jar
import edu.umd.cloud9.collection.XMLInputFormat
import org.apache.spark.sql.{Dataset, SparkSession, Row}
import org.apache.hadoop.io.{Text, LongWritable}
import org.apache.hadoop.conf.Configuration
def loadMedline(spark: SparkSession, path: String): Dataset[String] = {
import spark.implicits._
val conf = new Configuration()
conf.set(XMLInputFormat.START_TAG_KEY, "<MedlineCitation ")
conf.set(XMLInputFormat.END_TAG_KEY, "</MedlineCitation>")
val sc = spark.sparkContext
val in = sc.newAPIHadoopFile(path, classOf[XMLInputFormat],classOf[LongWritable], classOf[Text], conf)
in.map(line => line._2.toString).toDS()
}
val spark = SparkSession.builder().getOrCreate()
import spark.implicits._
val medlineRaw: Dataset[String] = loadMedline(spark, "hdfs:///medline")
from flask import Flask,render_template
from mysql import Mysql
app = Flask(__name__)
@app.route('/')
def getdata():
db = Mysql()
items1 = db.getItems1()
items2 = db.getItems2()
items3 = db.getItems3()
items4 = db.getItems4()
return render_template('echarts.html', items1 = items1, items2 = items2, items3 = items3, items4 = items4)
if __name__ == '__main__':
app.run(debug = True) #debug=True发生错误时会返回发生错误的地方
import pymysql
class Mysql(object):
def __init__(self):
try:
self.conn = pymysql.connect(host='192.168.235.131',user='root',password='hadoop',database='graph',port=3306, charset="utf8")
self.cursor = self.conn.cursor() # 用来获得python执行Mysql命令的方法(游标操作)
print("连接数据库成功")
except:
print("连接失败")
def getItems1(self):
sql= "select * from topicDist order by cnt desc limit 10" #获取网络主要主题的频率
self.cursor.execute(sql)
items1 = self.cursor.fetchall() #接收全部的返回结果行
print(items1)
return items1
def getItems2(self):
sql = "select * from cooccurs order by cnt desc limit 10" # 获取伴生二元组数据表的内容
self.cursor.execute(sql)
items2 = self.cursor.fetchall() # 接收全部的返回结果行
print(items2)
return items2
def getItems3(self):
sql = "select * from name_degree order by degree desc limit 10" # 获取初始图中度数最高的10个顶点
self.cursor.execute(sql)
items3 = self.cursor.fetchall() # 接收全部的返回结果行
print(items3)
return items3
def getItems4(self):
sql = "select * from filter_degree order by degree desc limit 10" # 获取过滤后图中度数最高的10个顶点
self.cursor.execute(sql)
items4 = self.cursor.fetchall() # 接收全部的返回结果行
print(items4)
return items4
docker cp /tmp/test_data/ml-100k c5f727be11c8:home/
docker cp /tmp/sy4/sy4py c5f727be11c8:
docker cp /tmp/sy4py 2455efdf7063:
docker cp /tmp/sy4py e3a363163252:
cd echarts_view
vim mysql.py
vim app.py