花费的时间比预期的要长
library(tidyverse)
df_input <- data.frame( school_id= c(1,2,3,4,5,6),village= c("A","B","C","D","D"),year_est = c(2002,2002,2004,2001,2004))
df_input %>%
group_by(village,year_est) %>%
summarise(school_est = n(),.groups = 'drop') %>%
complete(nesting(village),year_est = seq(min(year_est),max(year_est),1),fill = list(school_est = 0)) %>%
group_by(village) %>%
mutate(school_est = cumsum(school_est)) %>%
ungroup()
#> # A tibble: 16 x 3
#> village year_est school_est
#> <chr> <dbl> <dbl>
#> 1 A 2001 0
#> 2 A 2002 1
#> 3 A 2003 1
#> 4 A 2004 1
#> 5 B 2001 0
#> 6 B 2002 1
#> 7 B 2003 1
#> 8 B 2004 2
#> 9 C 2001 1
#> 10 C 2002 1
#> 11 C 2003 1
#> 12 C 2004 1
#> 13 D 2001 0
#> 14 D 2002 0
#> 15 D 2003 0
#> 16 D 2004 2
由 reprex package (v2.0.0) 于 2021 年 6 月 28 日创建
,
我们可以使用complete
library(dplyr)
library(tidyr)
df_input %>%
count(village,year = year_est,name = 'school_est') %>%
complete(village,year = min(year):max(year),fill = list(school_est = 0)) %>%
mutate(school_est = ave(school_est,village,FUN = cumsum))
-输出
# A tibble: 16 x 3
village year school_est
<chr> <dbl> <dbl>
1 A 2001 0
2 A 2002 1
3 A 2003 1
4 A 2004 1
5 B 2001 0
6 B 2002 1
7 B 2003 1
8 B 2004 2
9 C 2001 1
10 C 2002 1
11 C 2003 1
12 C 2004 1
13 D 2001 0
14 D 2002 0
15 D 2003 0
16 D 2004 2
或者使用 base R
out <- transform(as.data.frame(table(transform(df_input,year_est = factor(year_est,levels = min(year_est):max(year_est)))[-1])),Freq = ave(Freq,FUN = cumsum))
out[order(out$village),]
village year_est Freq
1 A 2001 0
5 A 2002 1
9 A 2003 1
13 A 2004 1
2 B 2001 0
6 B 2002 1
10 B 2003 1
14 B 2004 2
3 C 2001 1
7 C 2002 1
11 C 2003 1
15 C 2004 1
4 D 2001 0
8 D 2002 0
12 D 2003 0
16 D 2004 2
本文链接:https://www.f2er.com/181946.html