我们首先可以从startdate
获得小时,每小时获得count
行数,type
得到小时。 complete
缺少小时数,并用0填充其计数,然后使用pivot_wider
获取宽格式的数据。
library(dplyr)
library(tidyr)
df %>%
mutate(hr = lubridate::hour(startdate)) %>%
count(hr,type) %>%
complete(type,hr = seq(0,max(hr)),fill = list(n = 0)) %>%
pivot_wider(names_from = type,values_from = n)
# A tibble: 4 x 3
# hr version1 version2
# <int> <dbl> <dbl>
#1 0 0 0
#2 1 0 0
#3 2 2 1
#4 3 1 1
,
开始日期变量出了点问题。因此,我使用软件包lubridate
对其进行了设置
library(dplyr)
library(tidyr)
type = c('version1','version1','version2','version2')
startdate = lubridate::ymd_hms(c('2017-11-1T02:11:02.000','2018-3-25T02:13:02.000','2019-3-14T03:45:02.000','2017-3-14T02:55:02.000','2018-3-14T03:45:02.000'))
tibble(type = type,startdate = startdate) %>%
count(type,hour = lubridate::hour(startdate)) %>%
spread(type,n)
# A tibble: 2 x 3
hour version1 version2
<int> <int> <int>
1 2 2 1
2 3 1 1
,
Base R解决方案:
# Extract the hour and store it as a vector:
df$hour <- gsub(".* ","",trunc(df$startdate,units = "hours"))
# Count the number of observations of each type in each hour:
df$type_hour_cnt <- with(df,ave(paste(type,hour,sep = " - "),paste(type,FUN = seq_along))
# Reshape dataframe:
df <- as.data.frame(as.matrix(xtabs(type_hour_cnt ~ hour + type,df,sparse = T)))
# Extract rownames and store them as "hour" vector and then delete row.names:
df <- data.frame(cbind(hour = row.names(df),df),row.names = NULL)
本文链接:https://www.f2er.com/3118503.html