文件中数据格式:1^A1002^A1522718982.606^time_local=03/Apr/2018:09:29:42 +0800&http_referer=-&http_user_agent=Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36&-=master1^A/do?t=99b61873a98742a3a29a4a6d64bc043f&en=pv&ct=1521163377&v=1&pl=pc
源代码:
import org.apache.spark._ object LogTransfer { def transfer(args: Array[String]): Unit = { val sparkConf = new SparkConf().setAppName("LogTransfer").setMaster("local") val sc = new SparkContext(sparkConf) val logFile = sc.textFile(args(0)) val logLines = logFile.map(line => line.split("""\^A/do\?|\^A""")) // logLines.first.foreach(println) // 此处无法只能用foreach,不能用map,为什么???????? logLines.foreach(log =>{ println("---------------------------------") log.foreach(println) // 业务数据代码...... }) sc.stop() } def main(args: Array[String]): Unit = {
// args传入文件路径 transfer(args)}}