In-class Exercise 4

Author

WYZ

Overview

Getting Started

pacman::p_load(tidyverse, sf, httr, tmap)

Geocoding using SLA API

#url<-"https://www.onemap.gov.sg/api/common/elastic/search"

#csv<-read_csv("In-class_Ex4/aspatial/Generalinformationofschools.csv")
#postcodes<-csv$`postal_code`

#found<-data.frame()
#not_found<-data.frame()

#for(postcode in postcodes){
  #query<-list('searchVal'=postcode,'returnGeom'='Y'#,'getAddrDetails'='Y','pageNum'='1')
 # res<- GET(url,query=query)
 # 
 # if((content(res)$found)!=0){
  #  found<-rbind(found,data.frame(content(res))[4:13])
 # } else{
 #   not_found = data.frame(postcode)
#  }
#}
#merged = merge(csv, found, by.x = 'postal_code', by.y = 'results.POSTAL', all = TRUE)
#write.csv(merged, file = "In-class_Ex4/aspatial/schools.csv")
#write.csv(not_found, file = "In-class_Ex4/aspatial/not_found.csv")

Converting an aspatial data into a simple feature tibble data.frame

Importing and tidying schools data

schools <- read_csv("In-class_Ex4/aspatial/schools.csv") %>%
  rename(latitude = "results.LATITUDE",
         longitude = "results.LONGITUDE") %>%
  select(postal_code, school_name, latitude, longitude)
New names:
Rows: 350 Columns: 41
── Column specification
──────────────────────────────────────────────────────── Delimiter: "," chr
(35): school_name, url_address, address, telephone_no, telephone_no_2, f... dbl
(6): ...1, postal_code, results.X, results.Y, results.LATITUDE, results...
ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
Specify the column types or set `show_col_types = FALSE` to quiet this message.
• `` -> `...1`
schools_sf <- st_as_sf(schools,
                       coords = c("longitude", "latitude"),
                       crs=4326) %>%
  st_transform(crs=3414)
write_rds(schools_sf, "In-class_Ex4/rds/schools.rds")

Plotting a point simple feature layer

To ensure that schools sf tibble data.frame has been projected and converted correctly, you can plot the schools point data for visual inspection.

First, let us import MPSZ-2019 shapefile into R environment and save it as an sf tibble data.frame called mpsz.

mpsz <- st_read(dsn = "In-class_Ex4/geospatial/",
                layer = "MPSZ-2019") %>%
  st_transform(crs = 3414)
Reading layer `MPSZ-2019' from data source 
  `D:\y1zaoWang\ISSS624\In-class_Ex4\geospatial' using driver `ESRI Shapefile'
Simple feature collection with 332 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS:  WGS 84

Using the steps you learned in previous exercises, create a point symbol map showing the location of schools with OSM as the background map.

tmap_options(check.and.fix = TRUE)
tm_shape(mpsz) +
  tm_polygons() +
tm_shape(schools_sf) +
  tm_dots()
Warning: The shape mpsz is invalid. See sf::st_is_valid

Performing point-in-polygon count process

Next, we will count the number of schools located inside the planning subzones.

mpsz$`SCHOOL_COUNT`<- lengths(
  st_intersects(
    mpsz, schools_sf))

It is always a good practice to examine the summary statistics of the derived variable.

summary(mpsz$SCHOOL_COUNT)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.000   0.000   0.000   1.054   2.000  12.000 

Data Integration and Final Touch-up

business_sf <- st_read(dsn = "In-class_Ex4/geospatial",
                      layer = "Business")
Reading layer `Business' from data source 
  `D:\y1zaoWang\ISSS624\In-class_Ex4\geospatial' using driver `ESRI Shapefile'
Simple feature collection with 6550 features and 3 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 3669.148 ymin: 25408.41 xmax: 47034.83 ymax: 50148.54
Projected CRS: SVY21 / Singapore TM
tmap_options(check.and.fix = TRUE)
tm_shape(mpsz) +
  tm_polygons() +
tm_shape(business_sf) +
  tm_dots()
Warning: The shape mpsz is invalid. See sf::st_is_valid

mpsz$`BUSINESS_COUNT`<- lengths(
  st_intersects(
    mpsz, business_sf))
summary(mpsz$BUSINESS_COUNT)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.00    0.00    2.00   19.73   13.00  307.00 
flow_data <- read_rds("In-class_Ex4/rds0/flow_data_tidy.rds")
flow_data
Simple feature collection with 14734 features and 12 fields
Geometry type: LINESTRING
Dimension:     XY
Bounding box:  xmin: 5105.594 ymin: 25813.33 xmax: 49483.22 ymax: 49552.79
Projected CRS: SVY21 / Singapore TM
First 10 features:
   ORIGIN_SZ DESTIN_SZ MORNING_PEAK      dist ORIGIN_AGE7_12 ORIGIN_AGE13_24
1     AMSZ01    AMSZ01         1998   50.0000            310             710
2     AMSZ01    AMSZ02         8289  810.4491            310             710
3     AMSZ01    AMSZ03         8971 1360.9294            310             710
4     AMSZ01    AMSZ04         2252  840.4432            310             710
5     AMSZ01    AMSZ05         6136 1076.7916            310             710
6     AMSZ01    AMSZ06         2148  805.2979            310             710
7     AMSZ01    AMSZ07         1620 1798.7526            310             710
8     AMSZ01    AMSZ08         1925 2576.0199            310             710
9     AMSZ01    AMSZ09         1773 1204.2846            310             710
10    AMSZ01    AMSZ10           63 1417.8035            310             710
   ORIGIN_AGE25_64 DESTIN_AGE7_12 DESTIN_AGE13_24 DESTIN_AGE25_64 SCHOOL_COUNT
1             2780         310.00          710.00         2780.00         0.99
2             2780        1140.00         2770.00        15700.00         2.00
3             2780        1010.00         2650.00        14240.00         2.00
4             2780         980.00         2000.00        11320.00         1.00
5             2780         810.00         1920.00         9650.00         3.00
6             2780        1050.00         2390.00        12460.00         2.00
7             2780         420.00         1120.00         3620.00         0.99
8             2780         390.00         1150.00         4350.00         0.99
9             2780        1190.00         3260.00        13350.00         3.00
10            2780           0.99            0.99            0.99         1.00
   RETAIL_COUNT                       geometry
1          1.00 LINESTRING (29501.77 39419....
2          0.99 LINESTRING (29501.77 39419....
3          6.00 LINESTRING (29501.77 39419....
4          0.99 LINESTRING (29501.77 39419....
5          0.99 LINESTRING (29501.77 39419....
6          0.99 LINESTRING (29501.77 39419....
7          1.00 LINESTRING (29501.77 39419....
8        117.00 LINESTRING (29501.77 39419....
9          0.99 LINESTRING (29501.77 39419....
10        20.00 LINESTRING (29501.77 39419....
mpsz_tidy <- mpsz %>%
  st_drop_geometry() %>%
  select(SUBZONE_C, SCHOOL_COUNT, BUSINESS_COUNT)

Now, we will append SCHOOL_COUNT and BUSINESS_COUNT fields from mpsz_tidy data.frame into flow_data sf tibble data.frame by using the code chunk below.

flow_data <- flow_data %>%
  left_join(mpsz_tidy,
            by = c("DESTIN_SZ" = "SUBZONE_C")) %>%
  rename(TRIPS = MORNING_PEAK,
         DIST = dist)
summary(flow_data)
  ORIGIN_SZ          DESTIN_SZ             TRIPS             DIST      
 Length:14734       Length:14734       Min.   :     1   Min.   :   50  
 Class :character   Class :character   1st Qu.:    14   1st Qu.: 3346  
 Mode  :character   Mode  :character   Median :    76   Median : 6067  
                                       Mean   :  1021   Mean   : 6880  
                                       3rd Qu.:   426   3rd Qu.: 9729  
                                       Max.   :232187   Max.   :26136  
 ORIGIN_AGE7_12    ORIGIN_AGE13_24    ORIGIN_AGE25_64    DESTIN_AGE7_12   
 Min.   :   0.99   Min.   :    0.99   Min.   :    0.99   Min.   :   0.99  
 1st Qu.: 240.00   1st Qu.:  440.00   1st Qu.: 2200.00   1st Qu.: 240.00  
 Median : 700.00   Median : 1350.00   Median : 6810.00   Median : 720.00  
 Mean   :1031.86   Mean   : 2268.84   Mean   :10487.62   Mean   :1033.40  
 3rd Qu.:1480.00   3rd Qu.: 3260.00   3rd Qu.:15770.00   3rd Qu.:1500.00  
 Max.   :6340.00   Max.   :16380.00   Max.   :74610.00   Max.   :6340.00  
 DESTIN_AGE13_24    DESTIN_AGE25_64    SCHOOL_COUNT.x    RETAIL_COUNT   
 Min.   :    0.99   Min.   :    0.99   Min.   : 0.990   Min.   :  0.99  
 1st Qu.:  460.00   1st Qu.: 2200.00   1st Qu.: 0.990   1st Qu.:  0.99  
 Median : 1420.00   Median : 7030.00   Median : 1.000   Median :  3.00  
 Mean   : 2290.35   Mean   :10574.46   Mean   : 1.987   Mean   : 16.47  
 3rd Qu.: 3260.00   3rd Qu.:15830.00   3rd Qu.: 2.000   3rd Qu.: 12.00  
 Max.   :16380.00   Max.   :74610.00   Max.   :12.000   Max.   :307.00  
 SCHOOL_COUNT.y   BUSINESS_COUNT            geometry    
 Min.   : 0.000   Min.   :  0.00   LINESTRING   :14734  
 1st Qu.: 0.000   1st Qu.:  0.00   epsg:3414    :    0  
 Median : 1.000   Median :  3.00   +proj=tmer...:    0  
 Mean   : 1.583   Mean   : 16.17                        
 3rd Qu.: 2.000   3rd Qu.: 12.00                        
 Max.   :12.000   Max.   :307.00                        

The print report above reveals that variables ORIGIN_AGE7_12, ORIGIN_AGE13_24, ORIGIN_AGE25_64, DESTIN_AGE7_12, DESTIN_AGE13_24, DESTIN_AGE25_64 consist of 0 values.

In view of this, code chunk below will be used to replace zero values to 0.99.

flow_data$BUSINESS_COUNT <- ifelse(
  flow_data$BUSINESS_COUNT == 0,
  0.99, flow_data$BUSINESS_COUNT)
summary(flow_data)
  ORIGIN_SZ          DESTIN_SZ             TRIPS             DIST      
 Length:14734       Length:14734       Min.   :     1   Min.   :   50  
 Class :character   Class :character   1st Qu.:    14   1st Qu.: 3346  
 Mode  :character   Mode  :character   Median :    76   Median : 6067  
                                       Mean   :  1021   Mean   : 6880  
                                       3rd Qu.:   426   3rd Qu.: 9729  
                                       Max.   :232187   Max.   :26136  
 ORIGIN_AGE7_12    ORIGIN_AGE13_24    ORIGIN_AGE25_64    DESTIN_AGE7_12   
 Min.   :   0.99   Min.   :    0.99   Min.   :    0.99   Min.   :   0.99  
 1st Qu.: 240.00   1st Qu.:  440.00   1st Qu.: 2200.00   1st Qu.: 240.00  
 Median : 700.00   Median : 1350.00   Median : 6810.00   Median : 720.00  
 Mean   :1031.86   Mean   : 2268.84   Mean   :10487.62   Mean   :1033.40  
 3rd Qu.:1480.00   3rd Qu.: 3260.00   3rd Qu.:15770.00   3rd Qu.:1500.00  
 Max.   :6340.00   Max.   :16380.00   Max.   :74610.00   Max.   :6340.00  
 DESTIN_AGE13_24    DESTIN_AGE25_64    SCHOOL_COUNT.x    RETAIL_COUNT   
 Min.   :    0.99   Min.   :    0.99   Min.   : 0.990   Min.   :  0.99  
 1st Qu.:  460.00   1st Qu.: 2200.00   1st Qu.: 0.990   1st Qu.:  0.99  
 Median : 1420.00   Median : 7030.00   Median : 1.000   Median :  3.00  
 Mean   : 2290.35   Mean   :10574.46   Mean   : 1.987   Mean   : 16.47  
 3rd Qu.: 3260.00   3rd Qu.:15830.00   3rd Qu.: 2.000   3rd Qu.: 12.00  
 Max.   :16380.00   Max.   :74610.00   Max.   :12.000   Max.   :307.00  
 SCHOOL_COUNT.y   BUSINESS_COUNT            geometry    
 Min.   : 0.000   Min.   :  0.99   LINESTRING   :14734  
 1st Qu.: 0.000   1st Qu.:  0.99   epsg:3414    :    0  
 Median : 1.000   Median :  3.00   +proj=tmer...:    0  
 Mean   : 1.583   Mean   : 16.47                        
 3rd Qu.: 2.000   3rd Qu.: 12.00                        
 Max.   :12.000   Max.   :307.00                        

Notice that all the 0 values have been replaced by 0.99.

Before we move on to calibrate the Spatial Interaction Models, let us save flow_data sf tibble data.frame into an rds file. Call the file flow_data_tidy.

write_rds(flow_data,
          "In-class_Ex4/rds/flow_data_tidy.rds")