我正在尝试使用循环功能从网站上抓取一些文本,但是在选择向量列表中的下一项时,循环功能并没有移动。感谢任何有用的建议。谢谢
library(rvest)
library(xml2)
ID <- c(1:2)
Land <- c('Afghanistan','Ägypten')
url <- c('afghanistan','aegypten')
Text <- (NA)
data <- data.frame(ID,Land,Text)
for(i in url) {
nam <- paste("https://www.reporter-ohne-grenzen.de",i,sep = "/")
assign(nam,i)
webpage <- read_html(paste0(nam,i))
data$Text <- i <- webpage %>% html_nodes('div.text') %>% .[[1]] %>% html_text()
}
嗯,不确定我是否明确了我的问题。这是我想要的数据输出的示例。
library(rvest)
library(xml2)
ID <- c(1:2)
Land <- c('Afghanistan',Text)
afghanistan <- 'https://www.reporter-ohne-grenzen.de/afghanistan'
afghanistan <- read_html(afghanistan)
afghanistan <- html_nodes(afghanistan,'div.text')
afghanistan <- html_text(afghanistan)[[1]]
aegypten <- 'https://www.reporter-ohne-grenzen.de/aegypten'
aegypten <- read_html(aegypten)
aegypten <- html_nodes(aegypten,'div.text')
aegypten <- html_text(aegypten)[[1]]
# desired data output
data$Text <- c(afghanistan,aegypten)
我不想在180个国家/地区重复这些行。
aegypten <- 'https://www.reporter-ohne-grenzen.de/aegypten'
aegypten <- read_html(aegypten)
aegypten <- html_nodes(aegypten,'div.text')
aegypten <- html_text(aegypten)[[1]]
以下是解决方法:
library(rvest)
library(xml2)
ID <- c(1:4)
Land <- c('Afghanistan','Ägypten','Deutschland','Italien')
Url <- c('afghanistan','aegypten','deutschland','italien')
Text <- NA
data <- data.frame(ID,Text)
website <- 'https://www.reporter-ohne-grenzen.de'
for (i in ID) {
country <- Url[i]
html_url <- paste(website,country,sep='/')
output <- read_html(html_url)
output <- html_nodes(output,'div.text')
output <- html_text(output)[[1]]
data$Text[i] <- output
}