DATA MINING-Shmueli
1:bar chart
barplot(y, names.arg = x,xlab = "x", ylab = "y")
# or
library(ggplot2)
ggplot(data.frame) + geom_bar(aes(x = x, y = y), stat = "identity")
y variable is a column in the dataset stat=“identity”
the variable is not a column in the dataset stat=“bin”
2: scatter plot
plot(y ~ x, xlab = "x", ylab = "y")
# or
library(ggplot2)
ggplot(housing.df) + geom_point(aes(x = x, y = y), colour = "color", alpha = )
3:histogram
hist(data, xlab = "data")
library(ggplot2)
ggplot(data.frame) + geom_histogram(aes(x = data), binwidth = n)
4:boxplot
boxplot(x ~ y, xlab = "x", ylab = "y")
# or
ggplot(data.frame) + geom_boxplot(aes(x = as.factor(x), y = y)) + xlab("x")
par(mfcol = c(r, c))
boxplot
boxplot
.......r x c
5:heatmap
heatmap(cor(data.frame), Rowv = NA, Colv = NA) #with no values
heatmap.2(cor(data.frame), Rowv = FALSE, Colv = FALSE, dendrogram = "none",
cellnote = round(cor(data.frame),n_decimal),
notecol = "black", key = FALSE, trace = 'none', margins = c(10,10))
#with values
# or
library(ggplot2)
library(reshape) # to generate input for the plot
cor.mat <- round(cor(data.frame),n)
melted.cor.mat <- melt(cor.mat)
ggplot(melted.cor.mat, aes(x = X1, y = X2, fill = value)) +
geom_tile() +
geom_text(aes(x = X1, y = X2, label = value))
#missing value
heatmap(1 * is.na(data.frame), Rowv = NA, Colv = NA)
7
#添加颜色
par(xpd=TRUE) # allow legend to be displayed outside of plot area
plot(y~ x, ylab = "NOX", xlab = "LSTAT",col = ifelse(condition, "black", "gray"))
legend("topleft", inset=c(x, y),legend = c("A_condition", "B_condition"), col = c("black", "gray"), pch = 1, cex = 大小 )
# or
library(ggplot2)
ggplot(housing.df, aes(y = column_name, x = column_name, colour= 需要颜色区分的变量)) + geom_point(alpha = 0.6)
#alpha:颜色深浅
data.framet$column[data.frame$solumn_2 == sth] #条件限制
# simple plot
plot(housing.df[, c(1, 3, 12, 13)])
ggpairs(housing.df[, c(1, 3, 12, 13)])
8: fit curve
#tslm:fit linear models to time series
data.lm <- tslm(data ~ trend + I(trend^2))
data.lm <- tslm(data ~ trend + season) #查看season的变化
plot(data, xlab = "", ylab = "", ylim = c())
lines(data.lm$fitted, lwd = 2)
9: Visualizing Networked Data
install.packages("igraph")
library(igraph)
ebay.df <- read.csv("eBayNetwork.csv")
# transform node ids to factors
ebay.df[,1] <- as.factor(ebay.df[,1])
ebay.df[,2] <- as.factor(ebay.df[,2])
graph.edges <- as.matrix(ebay.df[,1:2])
g <- graph.edgelist(graph.edges, directed = FALSE)
isBuyer <- V(g)$name %in% graph.edges[,2]
plot(g, vertex.label = NA, vertex.color = ifelse(isBuyer, "gray", "black"),
vertex.size = ifelse(isBuyer, 7, 10))
10:Visualizing Hierarchical Data: Treemaps
library(treemap)
tree.df$negative.feedback <- 1* (tree.df$Seller.Feedback < 0)
treemap(tree.df, index = c("Category","Sub.Category", "Brand"),
vSize = "High.Bid", vColor = "negative.feedback", fun.aggregate = "mean",
align.labels = list(c("left", "top"), c("right", "bottom"), c("center", "center")),
palette = rev(gray.colors(3)), type = "manual", title = "")
11:Visualizing Geographical Data: Map Charts
library(ggmap)
Map <- get_map("Denver, CO", zoom = 3)
ggmap(Map) + geom_point(aes(x = longitude, y = latitude), data = data.frame,
alpha = , colour = "red", size = 0.5)
library(mosaic)
mWorldMap(df, key = "key_column", fill = "score") + coord_map()+(if not continuous (scale_fill_continuous(name = "Happiness")))