::p_load(tidyverse, sf, httr, tmap) pacman
In-class Exercise 4
Overview
Getting Started
Geocoding using SLA API
#url<-"https://www.onemap.gov.sg/api/common/elastic/search"
#csv<-read_csv("In-class_Ex4/aspatial/Generalinformationofschools.csv")
#postcodes<-csv$`postal_code`
#found<-data.frame()
#not_found<-data.frame()
#for(postcode in postcodes){
#query<-list('searchVal'=postcode,'returnGeom'='Y'#,'getAddrDetails'='Y','pageNum'='1')
# res<- GET(url,query=query)
#
# if((content(res)$found)!=0){
# found<-rbind(found,data.frame(content(res))[4:13])
# } else{
# not_found = data.frame(postcode)
# }
#}
#merged = merge(csv, found, by.x = 'postal_code', by.y = 'results.POSTAL', all = TRUE)
#write.csv(merged, file = "In-class_Ex4/aspatial/schools.csv")
#write.csv(not_found, file = "In-class_Ex4/aspatial/not_found.csv")
Converting an aspatial data into a simple feature tibble data.frame
Importing and tidying schools data
<- read_csv("In-class_Ex4/aspatial/schools.csv") %>%
schools rename(latitude = "results.LATITUDE",
longitude = "results.LONGITUDE") %>%
select(postal_code, school_name, latitude, longitude)
New names:
Rows: 350 Columns: 41
── Column specification
──────────────────────────────────────────────────────── Delimiter: "," chr
(35): school_name, url_address, address, telephone_no, telephone_no_2, f... dbl
(6): ...1, postal_code, results.X, results.Y, results.LATITUDE, results...
ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
Specify the column types or set `show_col_types = FALSE` to quiet this message.
• `` -> `...1`
<- st_as_sf(schools,
schools_sf coords = c("longitude", "latitude"),
crs=4326) %>%
st_transform(crs=3414)
write_rds(schools_sf, "In-class_Ex4/rds/schools.rds")
Plotting a point simple feature layer
To ensure that schools sf tibble data.frame has been projected and converted correctly, you can plot the schools point data for visual inspection.
First, let us import MPSZ-2019 shapefile into R environment and save it as an sf tibble data.frame called mpsz.
<- st_read(dsn = "In-class_Ex4/geospatial/",
mpsz layer = "MPSZ-2019") %>%
st_transform(crs = 3414)
Reading layer `MPSZ-2019' from data source
`D:\y1zaoWang\ISSS624\In-class_Ex4\geospatial' using driver `ESRI Shapefile'
Simple feature collection with 332 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS: WGS 84
Using the steps you learned in previous exercises, create a point symbol map showing the location of schools with OSM as the background map.
tmap_options(check.and.fix = TRUE)
tm_shape(mpsz) +
tm_polygons() +
tm_shape(schools_sf) +
tm_dots()
Warning: The shape mpsz is invalid. See sf::st_is_valid
Performing point-in-polygon count process
Next, we will count the number of schools located inside the planning subzones.
$`SCHOOL_COUNT`<- lengths(
mpszst_intersects(
mpsz, schools_sf))
It is always a good practice to examine the summary statistics of the derived variable.
summary(mpsz$SCHOOL_COUNT)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.000 0.000 0.000 1.054 2.000 12.000
Data Integration and Final Touch-up
<- st_read(dsn = "In-class_Ex4/geospatial",
business_sf layer = "Business")
Reading layer `Business' from data source
`D:\y1zaoWang\ISSS624\In-class_Ex4\geospatial' using driver `ESRI Shapefile'
Simple feature collection with 6550 features and 3 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 3669.148 ymin: 25408.41 xmax: 47034.83 ymax: 50148.54
Projected CRS: SVY21 / Singapore TM
tmap_options(check.and.fix = TRUE)
tm_shape(mpsz) +
tm_polygons() +
tm_shape(business_sf) +
tm_dots()
Warning: The shape mpsz is invalid. See sf::st_is_valid
$`BUSINESS_COUNT`<- lengths(
mpszst_intersects(
mpsz, business_sf))
summary(mpsz$BUSINESS_COUNT)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.00 0.00 2.00 19.73 13.00 307.00
<- read_rds("In-class_Ex4/rds0/flow_data_tidy.rds")
flow_data flow_data
Simple feature collection with 14734 features and 12 fields
Geometry type: LINESTRING
Dimension: XY
Bounding box: xmin: 5105.594 ymin: 25813.33 xmax: 49483.22 ymax: 49552.79
Projected CRS: SVY21 / Singapore TM
First 10 features:
ORIGIN_SZ DESTIN_SZ MORNING_PEAK dist ORIGIN_AGE7_12 ORIGIN_AGE13_24
1 AMSZ01 AMSZ01 1998 50.0000 310 710
2 AMSZ01 AMSZ02 8289 810.4491 310 710
3 AMSZ01 AMSZ03 8971 1360.9294 310 710
4 AMSZ01 AMSZ04 2252 840.4432 310 710
5 AMSZ01 AMSZ05 6136 1076.7916 310 710
6 AMSZ01 AMSZ06 2148 805.2979 310 710
7 AMSZ01 AMSZ07 1620 1798.7526 310 710
8 AMSZ01 AMSZ08 1925 2576.0199 310 710
9 AMSZ01 AMSZ09 1773 1204.2846 310 710
10 AMSZ01 AMSZ10 63 1417.8035 310 710
ORIGIN_AGE25_64 DESTIN_AGE7_12 DESTIN_AGE13_24 DESTIN_AGE25_64 SCHOOL_COUNT
1 2780 310.00 710.00 2780.00 0.99
2 2780 1140.00 2770.00 15700.00 2.00
3 2780 1010.00 2650.00 14240.00 2.00
4 2780 980.00 2000.00 11320.00 1.00
5 2780 810.00 1920.00 9650.00 3.00
6 2780 1050.00 2390.00 12460.00 2.00
7 2780 420.00 1120.00 3620.00 0.99
8 2780 390.00 1150.00 4350.00 0.99
9 2780 1190.00 3260.00 13350.00 3.00
10 2780 0.99 0.99 0.99 1.00
RETAIL_COUNT geometry
1 1.00 LINESTRING (29501.77 39419....
2 0.99 LINESTRING (29501.77 39419....
3 6.00 LINESTRING (29501.77 39419....
4 0.99 LINESTRING (29501.77 39419....
5 0.99 LINESTRING (29501.77 39419....
6 0.99 LINESTRING (29501.77 39419....
7 1.00 LINESTRING (29501.77 39419....
8 117.00 LINESTRING (29501.77 39419....
9 0.99 LINESTRING (29501.77 39419....
10 20.00 LINESTRING (29501.77 39419....
<- mpsz %>%
mpsz_tidy st_drop_geometry() %>%
select(SUBZONE_C, SCHOOL_COUNT, BUSINESS_COUNT)
Now, we will append SCHOOL_COUNT and BUSINESS_COUNT fields from mpsz_tidy data.frame into flow_data sf tibble data.frame by using the code chunk below.
<- flow_data %>%
flow_data left_join(mpsz_tidy,
by = c("DESTIN_SZ" = "SUBZONE_C")) %>%
rename(TRIPS = MORNING_PEAK,
DIST = dist)
summary(flow_data)
ORIGIN_SZ DESTIN_SZ TRIPS DIST
Length:14734 Length:14734 Min. : 1 Min. : 50
Class :character Class :character 1st Qu.: 14 1st Qu.: 3346
Mode :character Mode :character Median : 76 Median : 6067
Mean : 1021 Mean : 6880
3rd Qu.: 426 3rd Qu.: 9729
Max. :232187 Max. :26136
ORIGIN_AGE7_12 ORIGIN_AGE13_24 ORIGIN_AGE25_64 DESTIN_AGE7_12
Min. : 0.99 Min. : 0.99 Min. : 0.99 Min. : 0.99
1st Qu.: 240.00 1st Qu.: 440.00 1st Qu.: 2200.00 1st Qu.: 240.00
Median : 700.00 Median : 1350.00 Median : 6810.00 Median : 720.00
Mean :1031.86 Mean : 2268.84 Mean :10487.62 Mean :1033.40
3rd Qu.:1480.00 3rd Qu.: 3260.00 3rd Qu.:15770.00 3rd Qu.:1500.00
Max. :6340.00 Max. :16380.00 Max. :74610.00 Max. :6340.00
DESTIN_AGE13_24 DESTIN_AGE25_64 SCHOOL_COUNT.x RETAIL_COUNT
Min. : 0.99 Min. : 0.99 Min. : 0.990 Min. : 0.99
1st Qu.: 460.00 1st Qu.: 2200.00 1st Qu.: 0.990 1st Qu.: 0.99
Median : 1420.00 Median : 7030.00 Median : 1.000 Median : 3.00
Mean : 2290.35 Mean :10574.46 Mean : 1.987 Mean : 16.47
3rd Qu.: 3260.00 3rd Qu.:15830.00 3rd Qu.: 2.000 3rd Qu.: 12.00
Max. :16380.00 Max. :74610.00 Max. :12.000 Max. :307.00
SCHOOL_COUNT.y BUSINESS_COUNT geometry
Min. : 0.000 Min. : 0.00 LINESTRING :14734
1st Qu.: 0.000 1st Qu.: 0.00 epsg:3414 : 0
Median : 1.000 Median : 3.00 +proj=tmer...: 0
Mean : 1.583 Mean : 16.17
3rd Qu.: 2.000 3rd Qu.: 12.00
Max. :12.000 Max. :307.00
The print report above reveals that variables ORIGIN_AGE7_12, ORIGIN_AGE13_24, ORIGIN_AGE25_64, DESTIN_AGE7_12, DESTIN_AGE13_24, DESTIN_AGE25_64 consist of 0 values.
In view of this, code chunk below will be used to replace zero values to 0.99.
$BUSINESS_COUNT <- ifelse(
flow_data$BUSINESS_COUNT == 0,
flow_data0.99, flow_data$BUSINESS_COUNT)
summary(flow_data)
ORIGIN_SZ DESTIN_SZ TRIPS DIST
Length:14734 Length:14734 Min. : 1 Min. : 50
Class :character Class :character 1st Qu.: 14 1st Qu.: 3346
Mode :character Mode :character Median : 76 Median : 6067
Mean : 1021 Mean : 6880
3rd Qu.: 426 3rd Qu.: 9729
Max. :232187 Max. :26136
ORIGIN_AGE7_12 ORIGIN_AGE13_24 ORIGIN_AGE25_64 DESTIN_AGE7_12
Min. : 0.99 Min. : 0.99 Min. : 0.99 Min. : 0.99
1st Qu.: 240.00 1st Qu.: 440.00 1st Qu.: 2200.00 1st Qu.: 240.00
Median : 700.00 Median : 1350.00 Median : 6810.00 Median : 720.00
Mean :1031.86 Mean : 2268.84 Mean :10487.62 Mean :1033.40
3rd Qu.:1480.00 3rd Qu.: 3260.00 3rd Qu.:15770.00 3rd Qu.:1500.00
Max. :6340.00 Max. :16380.00 Max. :74610.00 Max. :6340.00
DESTIN_AGE13_24 DESTIN_AGE25_64 SCHOOL_COUNT.x RETAIL_COUNT
Min. : 0.99 Min. : 0.99 Min. : 0.990 Min. : 0.99
1st Qu.: 460.00 1st Qu.: 2200.00 1st Qu.: 0.990 1st Qu.: 0.99
Median : 1420.00 Median : 7030.00 Median : 1.000 Median : 3.00
Mean : 2290.35 Mean :10574.46 Mean : 1.987 Mean : 16.47
3rd Qu.: 3260.00 3rd Qu.:15830.00 3rd Qu.: 2.000 3rd Qu.: 12.00
Max. :16380.00 Max. :74610.00 Max. :12.000 Max. :307.00
SCHOOL_COUNT.y BUSINESS_COUNT geometry
Min. : 0.000 Min. : 0.99 LINESTRING :14734
1st Qu.: 0.000 1st Qu.: 0.99 epsg:3414 : 0
Median : 1.000 Median : 3.00 +proj=tmer...: 0
Mean : 1.583 Mean : 16.47
3rd Qu.: 2.000 3rd Qu.: 12.00
Max. :12.000 Max. :307.00
Notice that all the 0 values have been replaced by 0.99.
Before we move on to calibrate the Spatial Interaction Models, let us save flow_data sf tibble data.frame into an rds file. Call the file flow_data_tidy.
write_rds(flow_data,
"In-class_Ex4/rds/flow_data_tidy.rds")