# this is still subject to visual inspection because some cities are not in database, so check names = NA 
framexi19<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=303&filter_year=2019")
framexi18<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=303&filter_year=2018")
framexi17<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=303&filter_year=2017")
framexi16<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=303&filter_year=2016")
framexi15<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=303&filter_year=2015")
framexi14<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=303&filter_year=2014")
framexi13<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=303&filter_year=2013")
framehu13<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2013")
framehu12<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2012")
framehu11<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2011")
framehu10<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2010")
framehu09<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2009")
framehu08<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2008")
framehu07<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2007")
framehu06<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2006")
framehu05<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2005")
framehu04<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2004")
framehu03<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2003")


## DOMESTIC scraper to vomit out province names for me, then create a year column
## then count (i) number of days spent in province per year

combined_xi<-rbind(framexi19,framexi18,framexi17,framexi16,framexi15,framexi14,framexi13)
combined_xi$country.etc[is.na(combined_xi$country.etc)==TRUE]<-"China"
combined_hu<-rbind(framehu13,framehu12,framehu11,framehu10,framehu09,framehu08,framehu07,framehu06,framehu05,framehu04,framehu03)
combined_hu$country.etc[is.na(combined_hu$country.etc)==TRUE]<-"China"
datatable(combined_xi,extensions="Buttons",options = list(dom = 'Bfrtip',
                                               buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
combined_xiint<-combined_xi%>%
  filter(str_detect(tolower(combined_xi$event),"travelled")==TRUE)%>%
  filter(!(country.etc=="China"))%>%
  select(date, name, country.etc)

xi_countrycount<-combined_xiint %>% 
  select(date,country.etc)%>%
  distinct(date, .keep_all = TRUE)%>%
  group_by(country.etc) %>% 
  tally()

datatable(xi_countrycount,extensions="Buttons",options = list(dom = 'Bfrtip',
                                               buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))
combined_huint<-combined_hu%>%
  filter(str_detect(tolower(combined_hu$event),"travelled")==TRUE)%>%
  filter(!(country.etc=="China"))%>%
  select(date, name, country.etc)

hu_countrycount<-combined_huint %>% 
  select(date,country.etc)%>%
  distinct(date, .keep_all = TRUE)%>%
  group_by(country.etc) %>% 
  tally()

datatable(hu_countrycount,extensions="Buttons",options = list(dom = 'Bfrtip',
                                               buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

Interesting stories to tell here: 1. Rates of visits over time 2. Country differences: Russia, US, Japan, Xi’s Bric’s focus and intensity

Analyse distribution across years and months

combined_xiint$year<-format(as.POSIXct(combined_xiint$date,format="%m/%d/%Y"),"%Y")
combined_xiint$month<-format(as.POSIXct(combined_xiint$date,format="%m/%d/%Y"),"%m")
combined_xiint$year<-str_replace_all(combined_xiint$year,"00","20")

combined_huint$year<-format(as.POSIXct(combined_huint$date,format="%m/%d/%Y"),"%Y")
combined_huint$month<-format(as.POSIXct(combined_huint$date,format="%m/%d/%Y"),"%m")
combined_huint$year<-str_replace_all(combined_huint$year,"00","20")

Xi’s travel patterns over years

xi_travel_year1<-combined_xiint %>% 
  select(date,year)%>%
  distinct(date, .keep_all = TRUE)%>%
  group_by(year) %>% 
  tally()

datatable(xi_travel_year1,extensions="Buttons",options = list(dom = 'Bfrtip',
                                               buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

Hu’s travel patterns over years

hu_travel_year1<-combined_huint %>% 
  select(date,year)%>%
  distinct(date, .keep_all = TRUE)%>%
  group_by(year) %>% 
  tally()

datatable(hu_travel_year1,extensions="Buttons",options = list(dom = 'Bfrtip',
                                               buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

`