week1

最新推荐文章于 2024-08-22 10:50:42 发布

porco_bosso

最新推荐文章于 2024-08-22 10:50:42 发布

阅读量856

点赞数

分类专栏： Getting and Cleaning Data 文章标签：下载文件读取execle 读取xml 读取json

本文链接：https://blog.csdn.net/KeepYourNoteBook/article/details/23157409

版权

Getting and Cleaning Data 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

目录操作

getwd();
setwd('../')

if(!file.exists(directoryname)){
     dir.create(directoryname)
}

下载文件

#http不需要method，https在mac下需要method='curl'
dowload.file(url=fileurl,destfile='./camara.csv',method='curl')

读取文件

read.table('camara.csv',sep=',',HEAD=TRUE)
#read.csv('camara.csv')
head(camara)

读取Excel

#请使用命令setwd("")来到达此文件路径，再运行
if(!file.exists("data")){dir.create("data")}#如果不存在data目录，创建data目录
mfile = "./data/cameras.xlsx"
if(!file.exists(mfile)){
	fileUrl<-"https://data.baltimorecity.gov/api/views/dz54-2aru/rows.xlsx?accessType=DOWNLOAD"
	#查看help(download.file)
	setInternet2(use = TRUE)
	download.file(fileUrl,destfile=mfile,mode="wb")
	dateDownloaded<-date()
}


library(xlsx)#excel包
cameraData<-read.xlsx(mfile,sheetIndex=1,header=TRUE)#读取xlsx文件的第一个工作表
head(cameraData)


#读取一部分，指定行列范围colIndex  rowIndex
colIndex <- 2:3
rowIndex <- 1:4
cameraDataSubset <- read.xlsx(mfile,sheetIndex=1,
		colIndex=colIndex,rowIndex=rowIndex)
cameraDataSubset

#########################################################
#1.写excel，使用write.xlsx函数                           #
#2.read.xlsx2读取文件时较read.xlsx快，但在读取一部分时不稳定#
#3.大量读写excel，使用XLContent包                        #
########################################################

读取xml

library(XML)
fileUrl<-"http://www.w3schools.com/xml/simple.xml"
#读取xml使用xmlTreeParse
doc<-xmlTreeParse(fileUrl,useInternal=TRUE)
rootNode<-xmlRoot(doc)
print(xmlName(rootNode))
print(names(rootNode))
print(rootNode[[1]])
print(rootNode[[1]][[1]])
#xmlSApply提前node下所有信息
print(xmlSApply(rootNode[[1]],xmlValue))

##################################################
fileUrl<-"http://espn.go.com/nfl/team/_/name/bal/baltimore-ravens"
#读取html使用htmlTreeParse
doc<-htmlTreeParse(fileUrl,useInternal=TRUE)
#xpath 
#/node 顶部node
#//node 任意级别node
#node[@attr-name] 有某属性的node
#node[@attr-name='bob']某属性值为bob的node
scores <- xpathSApply(doc,"//li[@class='score']",xmlValue)
teams <- xpathSApply(doc,"//li[@class='team-name']",xmlValue)
scores

读取JSON

library(jsonlite)

#读取json
jsonData <- fromJSON("https://api.github.com/users/jtleek/repos")
names(jsonData)
names(jsonData$owner)
jsonData$owner$login
#转为json
iris<-data.frame(foo=1:4,bar=c(T,T,F,F))
myjson <- toJSON(iris, pretty=TRUE)
cat(myjson)