假设除以lorem开头的列之外,其他任何列中都没有NA,您可以执行以下操作
lapply(df.list,function(df) {
df[is.na(df)] <- 0
df$mean <- apply(df[,grep("lorem",names(df))],1,mean)
return (df)
})
# [[1]]
# foo lorem1968 lorem1969 mean
# 1 1 6 0 3.0
# 2 2 0 17 8.5
# 3 3 0 0 0.0
# 4 4 8 19 13.5
# 5 5 0 20 10.0
#
# [[2]]
# ipsum lorem1969 lorem1970 mean
# 1 11 0 22 11.0
# 2 12 17 0 8.5
# 3 13 0 24 12.0
# 4 14 19 0 9.5
# 5 15 20 0 10.0
在@akrun回答之后,您可以使用rowMeans
代替apply(df[,mean)
,即
lapply(df.list,function(df) {
df[is.na(df)] <- 0
df$mean <- rowMeans(df[,names(df))])
return (df)
})
,
使用dplyr
,tidyr
和purrr
,您可以执行以下操作:
map(df.list,~ select_at(.x,vars(contains("lorem"))) %>%
mutate_all(~ replace_na(.,0)) %>%
mutate(avg = rowMeans(.)))
[[1]]
lorem1968 lorem1969 avg
1 6 0 3.0
2 0 17 8.5
3 0 0 0.0
4 8 19 13.5
5 0 20 10.0
[[2]]
lorem1969 lorem1970 avg
1 0 22 11.0
2 17 0 8.5
3 0 24 12.0
4 19 0 9.5
5 20 0 10.0
如果您实际上还想保留其他列:
map(df.list,~ mutate_at(.x,vars(contains("lorem")),~ replace_na(.,0)) %>%
mutate(avg = rowMeans(select(.,starts_with("lorem")))))
,
我们可以使用base R
。用list
遍历lapply
,使用grep
查找与“ lorem”匹配的列名的索引,后跟一个或多个数字,replace
{{1 }}中的那些列为0,并NA
中的原始数据集transform
通过获取这些“ lorem”列中的list
来创建新列“ avg”
mean
,
这是一种data.table方法,它依赖于data.table
调用中的按引用更新lapply()
。
library(data.table)
lapply(df.list,setDT)
lapply(df.list,function(dt) {
cols <- grep('^lorem',names(dt))
setnafill(dt,fill = 0L,cols = cols)
dt[,mean_lorem := rowMeans(.SD),.SDcols = cols]
})
#> [[1]]
#> foo lorem1968 lorem1969 mean_lorem
#> 1: 1 6 0 3.0
#> 2: 2 0 17 8.5
#> 3: 3 0 0 0.0
#> 4: 4 8 19 13.5
#> 5: 5 0 20 10.0
#>
#> [[2]]
#> ipsum lorem1969 lorem1970 mean_lorem
#> 1: 11 0 22 11.0
#> 2: 12 17 0 8.5
#> 3: 13 0 24 12.0
#> 4: 14 19 0 9.5
#> 5: 15 20 0 10.0
,
另一种选择是使用rowSums
来节省一些将NA转换为0的时间:
lapply(df.list,function(x) {
i1 <- grep("^lorem\\d+$",names(x))
transform(x,avg = rowSums(x[i1],na.rm=TRUE) / ncol(x[i1]))
})
计时代码:
set.seed(0L)
ndf <- 1e4
nr <- 1e4
nc <- 2
df.list <- replicate(ndf,data.frame(id=1:nr,matrix(sample(c(1,NA_real_),nr*nc,TRUE),ncol=nc)),simplify=FALSE)
mtd0 <- function() {
lapply(df.list,function(x) {
i1 <- grep("^X\\d+$",names(x))
x[i1] <- replace(x[i1],is.na(x[i1]),0)
transform(x,avg = rowMeans(x[i1],na.rm = TRUE))
})
}
mtd2 <- function() {
lapply(df.list,names(x))
transform(x,na.rm=TRUE) / ncol(x[i1]))
})
}
bench::mark(mtd0(),mtd2(),check=FALSE)
时间:
# A tibble: 2 x 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result memory time gc
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm> <list> <list> <list> <list>
1 mtd0() 35.51s 35.51s 0.0282 7.83GB 0.422 1 15 35.51s <list [10,000]> <df[,3] [151,107 x 3]> <bch:tm> <tibble [1 x 3]>
2 mtd2() 8.91s 8.91s 0.112 2.98GB 1.12 1 10 8.91s <list [10,3] [30,314 x 3]> <bch:tm> <tibble [1 x 3]>
,
您可以尝试这样的事情:
foo <- 1:5
lorem1968 <- c(6,NA,8,NA)
lorem1969 <- c(NA,17,19,20)
df1 <- data.frame(foo,lorem1968,lorem1969)
ipsum <- 11:15
lorem1970 <- c(22,24,NA)
df2 <- data.frame(ipsum,lorem1969,lorem1970)
df.list <- list(df1,df2)
#Create function
replace_f <- function(x)
{
#Replace NA by 0
x[is.na(x)] <- 0
#Compute mean
#Variable selection
index <- which(grepl("lorem",names(x)))
x$Avg <- apply(x[,index],mean)
return(x)
}
df.list2 <- lapply(df.list,replace_f)
df.list2
[[1]]
foo lorem1968 lorem1969 Avg
1 1 6 0 3.0
2 2 0 17 8.5
3 3 0 0 0.0
4 4 8 19 13.5
5 5 0 20 10.0
[[2]]
ipsum lorem1969 lorem1970 Avg
1 11 0 22 11.0
2 12 17 0 8.5
3 13 0 24 12.0
4 14 19 0 9.5
5 15 20 0 10.0
本文链接:https://www.f2er.com/3131461.html