President: In Out Rate & Radius

1. Radius

For this the radius can be in cm/inches (whichever is better), it reflects the number of visits per 10000000 (which is inversed to reflect a larger radius for circles). The bigger the radius the more visits per 10 milion people.

citydata<-read_html("https://en.wikipedia.org/wiki/List_of_Chinese_administrative_divisions_by_population")
data<-citydata%>%
  html_nodes(xpath="//*[@id='mw-context-text']/div/table[3]")%>%
  html_table()


url<-"https://en.wikipedia.org/wiki/List_of_Chinese_administrative_divisions_by_population"
citydata<-read_html(url)
pagename_head<-'//*[@id="mw-content-text"]/div/table'
table_head<-html_nodes(citydata,xpath=pagename_head)[3]
a<-table_head%>%
    html_table()
provincepopulation<-as.data.frame(a)
provincepopulation<-provincepopulation[,c(1,2)]
provincepopulation$Administrative.Division<-str_replace_all(provincepopulation$Administrative.Division, "[[:punct:]]", " ")
str_extract(provincepopulation$Administrative.Division, "[^_]+")

##  [1] "China"                            
##  [2] "Guangdong"                        
##  [3] "Shandong"                         
##  [4] "Henan"                            
##  [5] "Sichuan"                          
##  [6] "Jiangsu"                          
##  [7] "Hebei"                            
##  [8] "Hunan"                            
##  [9] "Anhui"                            
## [10] "Hubei"                            
## [11] "Zhejiang"                         
## [12] "Guangxi FN 12 "                   
## [13] "Yunnan"                           
## [14] "Jiangxi"                          
## [15] "Liaoning"                         
## [16] "Fujian"                           
## [17] "Shaanxi"                          
## [18] "Heilongjiang"                     
## [19] "Shanxi"                           
## [20] "Guizhou"                          
## [21] "Chongqing FN 19 "                 
## [22] "Jilin"                            
## [23] "Gansu"                            
## [24] "Inner Mongolia FN 12 "            
## [25] "Xinjiang FN 12 "                  
## [26] "Shanghai< FN 19 "                 
## [27] "Beijing FN 19 "                   
## [28] "Tianjin"                          
## [29] "Hainan"                           
## [30] "Hong Kong FN 22 "                 
## [31] "Ningxia FN 12 "                   
## [32] "Qinghai"                          
## [33] "Tibet"                            
## [34] "Macau FN 23 "                     
## [35] "Republic of China  Taiwan  FN 39 "

provincepopulation$Administrative.Division<-str_extract(provincepopulation$Administrative.Division, '\\w*')
provincepopulation$Administrative.Division<-str_replace_all(provincepopulation$Administrative.Division,"Republic","Taiwan")
provincepopulation$Administrative.Division<-str_replace_all(provincepopulation$Administrative.Division,"Hong","Hong Kong")
provincepopulation$Administrative.Division<-str_replace_all(provincepopulation$Administrative.Division,"Inner","Inner Mongolia")


provincepopulation<-provincepopulation[-1,]

provincepopulation$Administrative.Division<-as.data.frame(tolower(provincepopulation$Administrative.Division))

xi_provincecount<-read_csv("xi_provincecount.csv")

## Parsed with column specification:
## cols(
##   province = col_character(),
##   n = col_double()
## )

xi_provincecount$province<-str_replace_all(xi_provincecount$province,"uygur","xinjiang")
names(provincepopulation)[2]<-"population"
names(provincepopulation)[1]<-"province"
replacement<-provincepopulation$province
names(replacement)<-"province"
combining<-cbind(replacement,provincepopulation$population)
xi_radii<-inner_join(xi_provincecount,combining)

## Joining, by = "province"

## Warning: Column `province` joining character vector and factor, coercing
## into character vector

names(xi_radii)[3]<-"population"
xi_radii$population <- as.numeric(gsub(",","",xi_radii$population))
xi_radii<-xi_radii%>%
  mutate(radii=(1/((population/n)/10000000)))

Datatable for Xi radius

datatable(xi_radii,extensions="Buttons",options = list(dom = 'Bfrtip',
                                               buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

Same for Hu Jintao

hu_provincecount<-read_csv("hu_provincecount.csv")

## Parsed with column specification:
## cols(
##   province = col_character(),
##   n = col_double()
## )

hu_provincecount$province<-str_replace_all(hu_provincecount$province,"uygur","xinjiang")
names(provincepopulation)[2]<-"population"
names(provincepopulation)[1]<-"province"
replacement<-provincepopulation$province
names(replacement)<-"province"
combining<-cbind(replacement,provincepopulation$population)
hu_radii<-inner_join(hu_provincecount,combining)

## Joining, by = "province"

## Warning: Column `province` joining character vector and factor, coercing
## into character vector

names(hu_radii)[3]<-"population"
hu_radii$population <- as.numeric(gsub(",","",hu_radii$population))
hu_radii<-hu_radii%>%
  mutate(radii=(1/((population/n)/10000000)))

Data table for Hu radius

datatable(hu_radii,extensions="Buttons",options = list(dom = 'Bfrtip',
                                               buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

2. In/Out travel ratio

travel history scraper

cv_scrapeR<-function(webpage){
  data("world.cities")
  cv<-read_html(webpage)
  cv_table<-cv%>%
    html_nodes(xpath="//*[@class='link12']")%>%
    html_text()
  cv_frame_2018<-as.data.frame(cv_table)
  cv_frame_2018a<-as.data.frame(cv_frame_2018[sapply(gregexpr("\\W+", cv_frame_2018$cv_table), length) >5,])
  cv_frame_2018b<-as.data.frame(cv_frame_2018[sapply(gregexpr("\\d", cv_frame_2018$cv_table), length) >5,])
  cv_frame_2018xi<-cbind.fill(cv_frame_2018b,cv_frame_2018a)
  colnames(cv_frame_2018xi)[1:2] <- c("date", "event")
  world.cities1<-world.cities%>%
    filter(nchar(name)>4)%>%
    filter(pop>55000)
  cities<-as.list(world.cities1$name)
  cities_str<-paste(unlist(cities), collapse='|')
  citieslist<-as.data.frame(str_match(cv_frame_2018xi$event,cities_str))
  country<-as.list(world.cities$country.etc)
  country_str<-paste(unlist(country), collapse='|')
  countrylist<-as.data.frame(str_extract(cv_frame_2018xi$event,country_str))
  xi2018travel<-cbind(cv_frame_2018xi,citieslist,countrylist)
  colnames(xi2018travel)[3:4] <- c("name", "visitorvisitor")
  xi2018travel$name<-as.character(xi2018travel$name)
  xi2018travel$name[xi2018travel$name=="China"]<-"Beijing"
  combinedframe<-left_join(xi2018travel,world.cities1,by="name")
}

## biodata scraper
biodata_scrapeR<-function(webpage){
cv<-read_html(webpage)
cv_table<-cv%>%
  html_nodes(xpath="//*[@id='dataPanel']/table")%>%
  html_table()
cv_frame_2018<-as.data.frame(cv_table)
bioframe<-cv_frame_2018[,c(1,2)]
}

cv<-read_html("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=303&filter_year=2018")
cv_table<-cv%>%
  html_nodes(xpath="//*[@class='link12']")%>%
  html_text()
cv_frame_2018<-as.data.frame(cv_table)
cv_frame_2018a<-as.data.frame(cv_frame_2018[sapply(gregexpr("\\W+", cv_frame_2018$cv_table), length) >5,])
cv_frame_2018b<-as.data.frame(cv_frame_2018[sapply(gregexpr("\\d", cv_frame_2018$cv_table), length) >5,])
cv_frame_2018xi<-cbind.fill(cv_frame_2018b,cv_frame_2018a)
colnames(cv_frame_2018xi)[1:2] <- c("date", "event")

#string detect cities and then combine to get country, lat, long
world.cities1<-world.cities%>%
  filter(nchar(name)>4)%>%
  filter(pop>55000)
cities<-as.list(world.cities1$name)
cities_str<-paste(unlist(cities), collapse='|')
citieslist<-as.data.frame(str_match(cv_frame_2018xi$event,cities_str))
country<-as.list(world.cities$country.etc)
country_str<-paste(unlist(country), collapse='|')
countrylist<-as.data.frame(str_extract(cv_frame_2018xi$event,country_str))
xi2018travel<-cbind(cv_frame_2018xi,citieslist,countrylist)
colnames(xi2018travel)[3:4] <- c("name", "visitorvisitor")
xi2018travel$name<-as.character(xi2018travel$name)
xi2018travel$name[xi2018travel$name=="China"]<-"Beijing"
combinedframe<-left_join(xi2018travel,world.cities1,by="name")

# this is still subject to visual inspection because some cities are not in database, so check names = NA 
framexi19<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=303&filter_year=2019")
framexi18<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=303&filter_year=2018")
framexi17<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=303&filter_year=2017")
framexi16<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=303&filter_year=2016")
framexi15<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=303&filter_year=2015")
framexi14<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=303&filter_year=2014")
framexi13<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=303&filter_year=2013")
framehu13<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2013")
framehu12<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2012")
framehu11<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2011")
framehu10<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2010")
framehu09<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2009")
framehu08<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2008")
framehu07<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2007")
framehu06<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2006")
framehu05<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2005")
framehu04<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2004")
framehu03<-cv_scrapeR("http://www.chinavitae.com/vip/index.php?mode=events&type=cv&id=19&filter_year=2003")


combined_xi<-rbind(framexi19,framexi18,framexi17,framexi16,framexi15,framexi14,framexi13)
combined_xi$country.etc[is.na(combined_xi$country.etc)==TRUE]<-"China"

combined_hu<-rbind(framehu13,framehu12,framehu11,framehu10,framehu09,framehu08,framehu07,framehu06,framehu05,framehu04,framehu03)
combined_hu$country.etc[is.na(combined_hu$country.etc)==TRUE]<-"China"

Xi and Hu overseas combined overseas trips

Contain all information frames

xi_overseas_details<-combined_xi%>%
  filter(str_detect(tolower(combined_xi$event),"travelled")==TRUE)%>%
  filter(!(country.etc=="China"))

hu_overseas_details<-combined_hu%>%
  filter(str_detect(tolower(combined_hu$event),"travelled")==TRUE)%>%
  filter(!(country.etc=="China"))

datatable(xi_overseas_details,extensions="Buttons",options = list(dom = 'Bfrtip',
                                               buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

datatable(hu_overseas_details,extensions="Buttons",options = list(dom = 'Bfrtip',
                                               buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html

xi_overseas<-read_csv("combined_xiint.csv")

## Parsed with column specification:
## cols(
##   date = col_character(),
##   name = col_character(),
##   country.etc = col_character(),
##   year = col_double(),
##   month = col_character()
## )

xi_overseastally<-xi_overseas %>% 
  select(date,country.etc,year)%>%
  distinct(date, .keep_all = TRUE)

hu_overseas<-read_csv("combined_huint.csv")

## Parsed with column specification:
## cols(
##   date = col_character(),
##   name = col_character(),
##   country.etc = col_character(),
##   year = col_double(),
##   month = col_character()
## )

hu_overseastally<-hu_overseas %>% 
  select(date,country.etc,year)%>%
  distinct(date, .keep_all = TRUE)

Xi and Hu overseas visitors to China

xi_visitors<-combined_xi%>%
  filter(str_detect(tolower(combined_xi$event),"was in beijing municipality ")==TRUE)
xi_nonavisitors<-xi_visitors%>%
  drop_na(visitorvisitor)%>%
  select(date,visitorvisitor)
xi_nonavisitors$year<-format(as.POSIXct(xi_nonavisitors$date,format="%m/%d/%Y"),"%Y")
xi_nonavisitors$year<-str_replace_all(xi_nonavisitors$year,"00","20")

### Days spent hosting overseas guests
xi_travelyears1<-xi_nonavisitors %>% 
  group_by(year) %>% 
  tally()

hu_visitors<-combined_hu%>%
  filter(str_detect(tolower(combined_hu$event),"was in beijing municipality ")==TRUE)
hu_nonavisitors<-hu_visitors%>%
  drop_na(visitorvisitor)%>%
  select(date,visitorvisitor)

hu_nonavisitors$year<-format(as.POSIXct(hu_nonavisitors$date,format="%m/%d/%Y"),"%Y")
hu_nonavisitors$year<-str_replace_all(hu_nonavisitors$year,"00","20")

### Days spent hosting overseas guests
hu_travelyears1<-hu_nonavisitors %>% 
  group_by(year) %>% 
  tally()

colnames(hu_travelyears1)[2] <- c("visited_days")
colnames(xi_travelyears1)[2] <- c("visited_days")

In Out Join

hu_travelout<-read_csv("hutravelout.csv")

## Parsed with column specification:
## cols(
##   year = col_double(),
##   n = col_double()
## )

xi_travelout<-read_csv("xitravelout.csv")

## Parsed with column specification:
## cols(
##   year = col_double(),
##   n = col_double()
## )

colnames(hu_travelout)[2] <- c("visiting")
colnames(xi_travelout)[2] <- c("visiting")

hu_travelyears1$year<-as.numeric(hu_travelyears1$year)
xi_travelyears1$year<-as.numeric(xi_travelyears1$year)

hu_inout<-inner_join(hu_travelyears1,hu_travelout)

## Joining, by = "year"

xi_inout<-inner_join(xi_travelyears1,xi_travelout)

## Joining, by = "year"

Datatable for Hu days in China (visited him) hosting guests and him visiting others abroad

datatable(hu_inout,extensions="Buttons",options = list(dom = 'Bfrtip',
                                               buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

Datatable for Xi days in China (visited him) hosting guests and him visiting others abroad

datatable(xi_inout,extensions="Buttons",options = list(dom = 'Bfrtip',
                                               buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))