日期处理格式备份: object GetPostTime { val month = "[A-Z][a-zA-Z]{1,9}" val year = "\\d{4}" val twoNum = "\\d{1,2}" val pattern1 = s"($twoNum $month $year)".r //1 April 2018 val pattern2 = s"(${twoNum} $month at $twoNum:$twoNum)".r //1 April at 00:04 val pattern3 = s"($twoNum $month)".r //10 May val pattern4 = s"(${twoNum}月${twoNum}日 ${twoNum}:${twoNum})".r //2月27日 12:59 val pattern5 = s"($month ${twoNum} *at ${twoNum}:${twoNum} [apAP][Mm])".r //June 28 at 12:26 AM val pattern6 = s"今天 (${twoNum}):(${twoNum})".r //今天 07:19 val pattern7 = s"(昨天|Yesterday|Yesterday at) (${twoNum}):(${twoNum})".r //昨天 07:19 val pattern8 = s"(${twoNum}) (分钟|mins|min)".r //8 分钟 val pattern9 = s"(${twoNum}) (hrs|hr|小时|h)".r //8 hrs 8 hr 8 小时 val pattern10 = "(刚刚|Just now)".r //刚刚 val pattern11 = s"(\\d\\.\\d{3,10}E9)".r //刚刚 val pattern12 = s"(${twoNum}) *月".r val pattern13 = s"(${twoNum} $month $year at ${twoNum}:${twoNum})".r val pattern14 = s"($month ${twoNum}, $year at ${twoNum}:${twoNum} [apAP][Mm])".r val pattern15 = s"($year-${twoNum}-${twoNum} ${twoNum}:${twoNum}:${twoNum})".r val pattern16 = s"(${year}年${twoNum}月${twoNum}日)".r //2014年2月8日 val pattern17 = s"(${year}年${twoNum}月)".r //2014年3月 val pattern18 = s"($month $twoNum, $year)".r //October 24, 2018 val pattern19 = s"Yesterday at ($twoNum):($twoNum) ([apAP][Mm])".r //Yesterday at 11:24 AM //val pattern20 =s"Yesterday at ($twoNum):($twoNum)".r//Yesterday at 11:26 val pattern20 = s"($month $year)".r //February 2015 val pattern21 = s"(${twoNum}月${twoNum}日)".r //1月9日 val pattern22 = s"(昨天|Yesterday)".r //昨天 val pattern23 = s"($year)年*".r //2018/2018年 val pattern24 = s"($month $twoNum)".r //April 10 val pattern25 = s"($month)".r //April def getTime(env: String, inputPath: String, outputPath: String): Unit = { val ss = SCFactory.getSs(env, "ExtractPostTime") deleteDir(outputPath, env, ss.sparkContext) ss.read.json(s"${inputPath}/fb_timeline_store_data") .select("uptime", "public_timestamp", "public_date") .orderBy("public_date") .coalesce(1).write.option("header", "true") .csv(s"${outputPath}/postTime") ss.read.json(s"${inputPath}/fb_content_comment_data") .select("uptime", "comment_time").orderBy("comment_time"
scala 处理各类时间格式汇总
最新推荐文章于 2023-04-11 15:49:16 发布