library(rjson)
library(rvest)
library(tcltk)
library(RMySQL)
library(DBI)
library(sqldf)
#76,6504469694752227853 1, 6475547135747031309
pb <- tkProgressBar("进度","已完成 %", 0, 100) #开启进度条以及表头
start<-Sys.time() #获取开始时候的系统时间
#length(jinrimobile)
a <- 7000:9000 #选取SQL导入进来的表数量
jinriM <- NULL
for(i in a){
info<- sprintf("已完成 %d%%", round(i*100/length(a))) #进度条的百分比
setTkProgressBar(pb, i*100/length(a), sprintf("进度 (%s)", info),info)
jinriM <- rbind(jinriM, jinriMfun(i,jinrimobile)) #主要内容
}
conn <- dbConnect(MySQL(), dbname = "test", username="qizhaoyu_bj", password="pHPV*p9aQYiLa$DT", host="117.78.61.202", port=3311)
strSQL <- paste('insert into jinriMM values',paste(sprintf("('%s','%s','%s','%s')", jinriM$id.i.,jinriM$purl,jinriM$title,jinriM$labol),collapse=','),sep = '')
dbSendQuery(conn, strSQL)#写进数据库
#其他的输出方式
#write.csv(sohufinalM,"sohuqi.csv")
#sohufinalM
close(pb) #关闭进度条
end<-Sys.time()
end-start #显示运行时间
library(rjson)
library(rvest)
jinriMfun <- function(i, id){
purl <- paste0("https://m.toutiao.com/i",id[i],"/info/?_signature=RrQf6BARHOK0VFeGYQy-3ka0H.&i=",id[i])
web <- paste(readLines(purl))
jsondata <- try(fromJSON(web),silent = T)
if(length(web)==0 | "try-error" %in% class(web)){
title <- NA
labol <- "error"
data.frame(id[i],purl,title,labol)
}else if(is.null(jsondata$data$title)){
# |"该新闻已删除" %in% data$title 研究一下能如何搞定它
title <- "title_NULL"
labol <- "error2"
data.frame(id[i],purl,title,labol)
}else{
title <- jsondata$data$title
labol <- "jinri_M"
data.frame(id[i],purl,title,labol)
}}
library(RMySQL)
library(DBI)
library(sqldf)
# (host="117.78.61.202", user="qizhaoyu_bj",
# password="pHPV*p9aQYiLa$DT", db="test", port=3311)
conn <- dbConnect(MySQL(), dbname = "test", username="111111111111", password="2222222222", host="1232334202", port=3331)
jinriMF<- dbReadTable(conn, "news_jinri_mobile")
jinrimobile <- jinriMF[,1]
#sohusqlMFF
dbDisconnect(conn)
print("sql conn is OK")
#source(paste(getwd(),"GARPFunc.R",sep="/"),encoding="utf-8")
今日头条的不知道应该怎么搞。现在一头包。偶尔跑着跑着就停了。