中国裁判文书网:http://wenshu.court.gov.cn/Assets/js/Lawyee.CPWSW.DictData.js
library("RCurl")
library("js")
library("dplyr")
txt<-getURL('http://wenshu.court.gov.cn/Assets/js/Lawyee.CPWSW.DictData.js',.encoding='UTF-8')
txt<-uglify_reformat(txt, beautify = TRUE) #格式化
txt<-esprima_tokenize(txt,range=FALSE,loc=FALSE,comment=FALSE)
txt<-txt[which(txt$type != 'Punctuator'),] %>% .[4:nrow(.),] #去除标点符号+无用的行
txt<-txt[which(txt$value != 'key'),] %>% .[which(.$value != '"1"' & .$value != '"2"' & .$value != '"3"' & .$value != '"4"'),] #去除无用的key标签
#更新txt的rownames
rownames(txt)<-order(as.numeric(rownames(txt))) #order返回索引
case_info<-data.frame(id=txt$value[which(txt$value=='id')+1],parentId=txt$value[which(txt$value=='parentId')+1],name=txt$value[which(txt$value=='name')+1],stringsAsFactors = F)
case_info$id<-gsub("\"","",case_info$id)
case_info$parentId<-gsub("\"","",case_info$parentId)
case_info$name<-gsub("\"","",case_info$name)
爬取结果: