如何创建一个表来计算R中两个单独表中的值之间的百分比变化?

我有两个数据表,每个数据表分别代表一年中的每月利差和明年的相同。

我想根据我的索引(“类别名称”)计算值的百分比变化,因此每个行值都代表从第一年到下一年的百分比变化。

参考下面的第一年表:

`Category Name`   Oct   Nov   Dec   Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep  Average `Percent Total`
   <fct>           <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>           <dbl>
 1 Salty snacks    36340 43950 34246 26758 36151 51390 35299 43337 35251 32449 46309 33534 37918.             39.4 
 2 Candy           23003 23814 19271 14269 21902 30986 22054 27269 22651 20789 30090 21632 23144.             24.0 
 3 Nutrition Bar   10563 12182  9657  7029 10740 15392 10926 14955 12370 10914 16130 11945 11900.             12.4 
 4 Cookies         10751 11679  9051  6752  9416 14467 10993 14364 12207 11066 14862  9656 11272              11.7 
 5 Nuts/Trailmix    4018  4371  3512  2676  3935  5586  3820  5182  4555  3869  5672  3938  4261.              4.43
 6 pastry           3531  4328  3666  2853  3906  5396  3521  4318  3571  3520  4830  4026  3956.              4.11
 7 Meat Sticks      2131  2669  1919  1575  2156  3021  2088  2630  2369  2043  2759  2091  2288.              2.38
 8 Other            1500  2191  2133  1650  2151  2696  1196  1169   760   751   954   747  1492.              1.55
 9 Crackers            1     0     0     0     0     0     0     0    13    39    86    23    13.5             0.01
10 Food                0     0     0     0     0     2     7     3    15    25    30     9     7.58            0.01

这是下一个年份表,具有相同的布局/参数:

`Category Name`   Oct   Nov   Dec   Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep Average `Percent Total`
   <fct>           <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>           <dbl>
 1 Salty snacks    35562 38043 37697 33079 31942 35862 35646 43987 33551 34916 46172 35511 36831.            39.1 
 2 Candy           22722 21071 20496 17663 19470 21834 21924 26846 21046 22482 29402 22477 22286.            23.6 
 3 Nutrition Bar   12339 12975 12073 10668 11813 12999 12897 17062 12982 13506 17234 13033 13298.            14.1 
 4 Cookies         10042  9712  9489  7820  8472  9936  9756 11867  9307  9525 12123  9544  9799.            10.4 
 5 pastry           5322  5243  5185  4518  4546  4868  4819  5792  4275  4484  5547  4486  4924.             5.23
 6 Nuts/Trailmix    4236  4279  4034  3656  3733  4364  4171  5514  4209  4486  5595  4409  4390.             4.66
 7 Meat Sticks      2067  2195  2024  2085  2068  2236  2342  2814  2337  2311  3028  2428  2328.             2.47
 8 Crackers           12     4     2     1     0    49   185   459   355   550   839   705   263.             0.28
 9 Other             433   177    89    38    28    17     9     4     2     0     1     1    66.6            0.07
10 Food                4     0     5     2     0     0    11    46    48    65   121    77    31.6            0.03

我正在尝试创建一个新表,该表显示每个类别(咸味小吃,糖果等)每个月的变化百分比。我的计划是以此为基础,并从中创建一个格式表。谢谢!

bierlaopo 回答:如何创建一个表来计算R中两个单独表中的值之间的百分比变化?

在基数R中,您可以基于df2排列df1中的行,然后执行计算。

df2 <- df2[match(df1$Category_Name,df2$Category_Name),]
cbind(df1[1],round((df2[2:13] - df1[2:13])/df1[2:13] * 100,2))

或者使用dplyrtidyr可以将数据转换为长格式,将它们结合起来执行计算,然后再次将数据转换为宽格式。

library(dplyr)
library(tidyr)

df1 %>%
  select(-Average,-Percent_Total) %>%
  pivot_longer(cols = -Category_Name) %>%
  left_join(df2 %>%
  select(-Average,-Percent_Total) %>%
  pivot_longer(cols = -Category_Name),by = c("Category_Name","name")) %>%
  mutate(change = (value.y - value.x)/value.x * 100) %>%
  select(-value.x,-value.y) %>%
  mutate(change = na_if(change,Inf)) %>%
  pivot_wider(names_from = name,values_from = change)


# A tibble: 10 x 13
#   Category_Name     Oct    Nov    Dec   Jan    Feb     Mar     Apr     May     Jun     Jul     Aug     Sep
#   <fct>           <dbl>  <dbl>  <dbl> <dbl>  <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
# 1 Salty_Snacks    -2.14 -13.4   10.1   23.6 -11.6   -30.2    0.983    1.50   -4.82    7.60  -0.296    5.90
# 2 Candy           -1.22 -11.5    6.36  23.8 -11.1   -29.5   -0.589   -1.55   -7.09    8.14  -2.29     3.91
# 3 Nutrition_Bar   16.8    6.51  25.0   51.8   9.99  -15.5   18.0     14.1     4.95   23.7    6.84     9.11
# 4 Cookies         -6.59 -16.8    4.84  15.8 -10.0   -31.3  -11.3    -17.4   -23.8   -13.9  -18.4     -1.16
# 5 Nuts/Trailmix    5.43  -2.10  14.9   36.6  -5.13  -21.9    9.19     6.41   -7.60   15.9   -1.36    12.0 
# 6 Pastry          50.7   21.1   41.4   58.4  16.4    -9.79  36.9     34.1    19.7    27.4   14.8     11.4 
# 7 Meat_Sticks     -3.00 -17.8    5.47  32.4  -4.08  -26.0   12.2      7.00   -1.35   13.1    9.75    16.1 
# 8 Other          -71.1  -91.9  -95.8  -97.7 -98.7   -99.4  -99.2    -99.7   -99.7  -100    -99.9    -99.9 
# 9 Crackers      1100     NA     NA     NA   NaN      NA     NA       NA    2631.   1310.   876.    2965.  
#10 Food            NA    NaN     NA     NA   NaN    -100     57.1   1433.    220.    160    303.     756.  

数据

df1 <- structure(list(Category_Name = structure(c(10L,1L,6L,2L,7L,9L,5L,8L,3L,4L),.Label = c("Candy","Cookies","Crackers","Food","Meat_Sticks","Nutrition_Bar","Nuts/Trailmix","Other","Pastry","Salty_Snacks"),class = "factor"),Oct = c(36340L,23003L,10563L,10751L,4018L,3531L,2131L,1500L,0L),Nov = c(43950L,23814L,12182L,11679L,4371L,4328L,2669L,2191L,0L,Dec = c(34246L,19271L,9657L,9051L,3512L,3666L,1919L,2133L,Jan = c(26758L,14269L,7029L,6752L,2676L,2853L,1575L,1650L,Feb = c(36151L,21902L,10740L,9416L,3935L,3906L,2156L,2151L,0L
),Mar = c(51390L,30986L,15392L,14467L,5586L,5396L,3021L,2696L,2L),Apr = c(35299L,22054L,10926L,10993L,3820L,3521L,2088L,1196L,7L),May = c(43337L,27269L,14955L,14364L,5182L,4318L,2630L,1169L,3L),Jun = c(35251L,22651L,12370L,12207L,4555L,3571L,2369L,760L,13L,15L
),Jul = c(32449L,20789L,10914L,11066L,3869L,3520L,2043L,751L,39L,25L),Aug = c(46309L,30090L,16130L,14862L,5672L,4830L,2759L,954L,86L,30L),Sep = c(33534L,21632L,11945L,9656L,3938L,4026L,2091L,747L,23L,9L),Average = c(37918,23144,11900,11272,4261,3956,2288,1492,13.5,7.58),Percent_Total = c(39.4,24,12.4,11.7,4.43,4.11,2.38,1.55,0.01,0.01)),class = "data.frame",row.names = c("1","2","3","4","5","6","7","8","9","10"))

df2 <- structure(list(Category_Name = structure(c(10L,Oct = c(35562L,22722L,12339L,10042L,5322L,4236L,2067L,12L,433L,Nov = c(38043L,21071L,12975L,9712L,5243L,4279L,2195L,4L,177L,Dec = c(37697L,20496L,12073L,9489L,5185L,4034L,2024L,89L,5L),Jan = c(33079L,17663L,10668L,7820L,4518L,3656L,2085L,38L,Feb = c(31942L,19470L,11813L,8472L,4546L,3733L,2068L,28L,Mar = c(35862L,21834L,12999L,9936L,4868L,4364L,2236L,49L,17L,Apr = c(35646L,21924L,12897L,9756L,4819L,4171L,2342L,185L,11L),May = c(43987L,26846L,17062L,11867L,5792L,5514L,2814L,459L,46L),Jun = c(33551L,21046L,12982L,9307L,4275L,4209L,2337L,355L,48L
),Jul = c(34916L,22482L,13506L,9525L,4484L,4486L,2311L,550L,65L),Aug = c(46172L,29402L,17234L,12123L,5547L,5595L,3028L,839L,121L),Sep = c(35511L,22477L,13033L,9544L,4409L,2428L,705L,77L),Average = c(36831,22286,13298,9799,4924,4390,2328,263,66.6,31.6),Percent_Total = c(39.1,23.6,14.1,10.4,5.23,4.66,2.47,0.28,0.07,0.03)),"10"))
本文链接:https://www.f2er.com/2937278.html

大家都在问