我想扩展此处给出的示例
How to plot a contour line showing where 95% of values fall within,in R and in ggplot2
获取具有三个维度(x,y和z)的数据,而不是绘制轮廓线,我想获取x,y和z值的限制。
这是上一篇文章的示例。
library(ggplot2)
set.seed(1001)
d <- data.frame(x=rnorm(1000),y=rnorm(1000))
kd <- ks::kde(d,compute.cont=TRUE)
contour_95 <- with(kd,contourLines(x=eval.points[[1]],y=eval.points[[2]],z=estimate,levels=cont["5%"])[[1]])
contour_95 <- data.frame(contour_95)
ggplot(data=d,aes(x,y)) +
geom_point() +
geom_path(aes(x,y),data=contour_95) +
theme_bw()
然后,可以像这样获得轮廓的极限:
range(contour_95$x)
range(contour_95$y)
我很想知道如何在指定的百分位数处获得3-D轮廓的x,y和z范围。
ks:kde可以处理更大的尺寸,但不能使用轮廓线()。
这就是我尝试过的...
set.seed(1001)
d <- data.frame(x=rnorm(1000),y=rnorm(1000),y=rnorm(1000))
kd <- ks::kde(d,compute.cont=TRUE)
#what kd$estimates are > 95th percentile?
#make function that can extract from 3d array
multi.which <- function(A){
if ( is.vector(A) ) return(which(A))
d <- dim(A)
T <- which(A) - 1
nd <- length(d)
t( sapply(T,function(t){
I <- integer(nd)
I[1] <- t %% d[1]
sapply(2:nd,function(j){
I[j] <<- (t %/% prod(d[1:(j-1)])) %% d[j]
})
I
}) + 1 )
}
#extract those estimates that have >density than 95th percentile
ests <- multi.which(kd$estimate > kd$cont["5%"])
#make into a long dataframe with column number in the second column and row number in first column
col1=rep(1,nrow(ests))
col2=rep(2,nrow(ests))
col3=rep(3,nrow(ests))
rows=c(ests[,1],ests[,2],3])
cols=c(col1,col2,col3)
index=cbind(rows,cols)#this is the index so we can extract the coordinates in multi-D space
car::some(index)
#get coordinates with this function
fExtract <- function(dat,indexDat){
dat[as.matrix(indexDat)]
}
#pull three coordinates (x,y,z) from eval.points into 3 columns
eval.pts <- cbind(kd$eval.points[[1]],kd$eval.points[[2]],kd$eval.points[[3]])
v <- fExtract(eval.pts,index) #one long vector
#re-create the three columns of x,y and z coordinates of points at higher density than 95th percentile
x1 <- v[1:nrow(ests)]
y1 <- v[(nrow(ests)+1):(2*nrow(ests))]
z1 <- v[(2*nrow(ests)+1):(3*nrow(ests))]
#the three coordinates.
fin <- cbind(x1,y1,z1)
#get range of each dimension
range(x1)
range(y1)
range(z1)
但是我不确定这是正确的。