R语言实现,后续添加豆瓣评分与电影简介
generateFilmCalendar <- function(){
################# This function is aimed to generate a google-calendar-adapted #################
################# csv for exploring the film news #################
rm(list = ls())
################# Load Packages #################
library(rvest)
library(tidyverse)
################# Load WebPage #################
film <- read_html("https://www.cfa.org.cn/tabid/562/Default.aspx")
################# Film Date and start time #################
Year <- 2018
Month <- film %>%
html_nodes(".fa_month") %>%
html_text() %>%
str_replace_all("月", "")
Day <- film %>%
html_nodes(".fa_date") %>%
html_text()
Start_Date <- str_c(Day,"/", Month, "/", Year)
Start_time <- film %>%
html_nodes(".fysj") %>%
html_text() %>%
str_trim()
################# Film Name & Location #################
Subject <- film %>%
html_nodes(".yp") %>%
html_text() %>%
str_trim()
Location <- film %>%
html_nodes(".zt") %>%
html_text() %>%
str_trim()
################# Description #################
Language <- film %>%
html_nodes(".yz") %>%
html_text() %>%
str_trim()
Caption <- film %>%
html_nodes(".zm") %>%
html_text() %>%
str_trim()
Price <- film %>%
html_nodes(".pj") %>%
html_text() %>%
str_trim()
Goupiao <- film %>%
html_nodes(".gp") %>%
html_text() %>%
str_trim()
Description <- str_c(Language, "\n",
Caption, "\n",
Goupiao, "\n",
Price)
################# Output the calendar file #################
Xiaoxitian <- data.frame(Subject, Start_time, Location, Description) %>%
as.tbl()
Xiaoxitian
Xiaoxitian$Start_Date <- ""
position <- which(Xiaoxitian$Subject == "影片")
for(i in 1:nrow(Xiaoxitian)){
for(j in 1:length(position)){
if(i >= position[j]){
Xiaoxitian$Start_Date[i] <- Start_Date[j]
}
}
}
Xiaoxitian <- Xiaoxitian %>%
mutate(End_Date = Start_Date) %>%
distinct(Subject, .keep_all = TRUE) %>%
filter(Subject != "影片")
write.csv(Xiaoxitian,
"C:/Users/Administrator/Desktop/小西天影片排片.csv")
}
generateFilmCalendar()