我有一小段PCA值,其中第一列是标签。我想要做的是生成前两个“主要”组件中每个组件的一对图,每个类别组合的PC1 vs PC2散布图(此处包括3个)。附件是我的数据集的一部分的文本csv。我尝试使用ggpair,但无法弄清楚如何将附加的数据整理成所需的格式,部分原因是它不知道如何忽略NA。
p1 <- ggplot(irdf,aes(PC1,PC2,colour=Category)) +
geom_point() +
stat_ellipse()
这段代码在数据重叠的地方产生了一个散点图。
CSV:
,Category,PC1,PC3,PC4
120,A,-0.004010778,-0.001078139,0.002560689,-0.002477077
121,0.00137385,-0.003251322,-0.0012312,0.002884072
122,0.002884072
123,0.002884072
124,0.002884072
125,-0.005530058,0.000206478,-0.003651934,0.000106734
126,0.000106734
127,0.000106734
128,0.000106734
129,C,0.05459126,0.004348095,0.01402585,-0.001921671
130,0.005382812,0.004915268,-0.001480362,0.000944579
131,0.002448355,0.012424871,-0.008416275,0.000361269
132,-0.000936612,0.003991578,-0.001986253,0.001497678
133,-0.003822607,0.024979002,-0.009948887,-0.0015799
196,-0.020830672,0.015967276,-0.003338178,0.004129992
197,0.003977212,0.002084443,-2.15E-05,0.002564937
198,0.008799044,-0.009520658,-0.007143308,0.00033937
199,-0.003273719,-0.005096245,-0.001200353,0.00038518
200,-0.021442262,0.022713865,-0.011194163,0.000664158
201,-0.005399877,0.004867002,0.001075927,0.000930286
202,-0.001082562,-0.00174273,0.0030125,0.002118374
203,0.004913886,-0.006648075,-0.004697749,0.000364076
204,0.00141842,0.000320859,0.006396322,0.005044266
205,0.002011682,-0.00444237,0.000991321,0.00328382
206,0.003169989,-0.003892362,-0.001063221,-0.000253309
207,0.002139646,-0.006559916,-0.001683837,0.000612287
208,J,-0.001921671
209,0.003098164,-0.005939099,-0.00471109,-0.000507244
210,0.004762399,-0.005833026,-0.003249797,-0.000238942
211,-0.001493766,5.86E-05,0.002030541,0.001575403
212,-0.001501058,0.000437217,0.003820577,0.001316014
213,-0.002753525,0.000915653,0.001490557,0.00172809
214,-0.001670349,0.001603777,0.003935583,0.001931291
215,-0.001667739,0.000914704,0.001519535,0.000950852
216,0.002700166,-0.002849805,-0.005260336,-0.000976289
217,-0.002990762,-0.001229752,0.001240857,0.001728149
218,-0.001466045,-0.002434853,0.00041103,0.000900405
219,-0.002735407,-0.001136517,0.003169908,0.002208108
220,-0.001163076,-0.000313097,-0.001645696,0.000254576
221,0.001285879,-0.004654998,-0.001721707,0.000129652
更新
我确实设法解决了自己的问题,但感觉很难看。诀窍是用所有可能的散布对填充小块,如下所示。如果没有那么丑陋的方法,请让我知道吗?
i<-1
joined_subset_full<-NULL
for (Cell_r in unique(subset$Cell)){
for (Cell_c in unique(subset$Cell)){
subset_df_c<- subset(irdf,Cell==Cell_c)
subset_df_r<- subset(irdf,Cell==Cell_r)
cell_c_tibble <- as_tibble(data.frame(Cell_c))
cell_r_tibble <- as_tibble(data.frame(Cell_r))
joined_subset_c <- cbind(subset_df_c,cell_r_tibble,cell_c_tibble)
joined_subset_r<- cbind(subset_df_r,cell_c_tibble,cell_r_tibble)
joined_subset_full[[i]] <- rbind(joined_subset_c,joined_subset_r)
i <- i +1
}
}
joined_subset_facet_tibble = bind_rows(joined_subset_full)
p1 <- ggplot(joined_subset_facet_tibble,aes(`Comp 1`,`Comp 2`,colour=Cell)) +
geom_point() +
stat_ellipse()+
facet_grid(Cell_r~Cell_c)
p1