Preparing HDB data

Author

Dr. Kam Tin Seong

pacman::p_load(tidyverse, sf, httr, jsonlite, rvest, tmap)
resale <- read_csv("data/HDB/rawdata/resale.csv") %>%
  filter(month >= "2023-01" & month <= "2024-09")
Rows: 192234 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (8): month, town, flat_type, block, street_name, storey_range, flat_mode...
dbl (3): floor_area_sqm, lease_commence_date, resale_price

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
resale_tidy <- resale %>%
  mutate(address = paste(block,street_name)) %>%
  mutate(remaining_lease_yr = as.integer(
    str_sub(remaining_lease, 0, 2)))%>%
  mutate(remaining_lease_mth = as.integer(
    str_sub(remaining_lease, 9, 11)))
resale_selected <- resale_tidy %>%
  filter(month == "2024-09")
add_list <- sort(unique(resale_selected$address))
get_coords <- function(add_list){
  
  # Create a data frame to store all retrieved coordinates
  postal_coords <- data.frame()
    
  for (i in add_list){
    #print(i)

    r <- GET('https://www.onemap.gov.sg/api/common/elastic/search?',
           query=list(searchVal=i,
                     returnGeom='Y',
                     getAddrDetails='Y'))
    data <- fromJSON(rawToChar(r$content))
    found <- data$found
    res <- data$results
    
    # Create a new data frame for each address
    new_row <- data.frame()
    
    # If single result, append 
    if (found == 1){
      postal <- res$POSTAL 
      lat <- res$LATITUDE
      lng <- res$LONGITUDE
      new_row <- data.frame(address= i, 
                            postal = postal, 
                            latitude = lat, 
                            longitude = lng)
    }
    
    # If multiple results, drop NIL and append top 1
    else if (found > 1){
      # Remove those with NIL as postal
      res_sub <- res[res$POSTAL != "NIL", ]
      
      # Set as NA first if no Postal
      if (nrow(res_sub) == 0) {
          new_row <- data.frame(address= i, 
                                postal = NA, 
                                latitude = NA, 
                                longitude = NA)
      }
      
      else{
        top1 <- head(res_sub, n = 1)
        postal <- top1$POSTAL 
        lat <- top1$LATITUDE
        lng <- top1$LONGITUDE
        new_row <- data.frame(address= i, 
                              postal = postal, 
                              latitude = lat, 
                              longitude = lng)
      }
    }

    else {
      new_row <- data.frame(address= i, 
                            postal = NA, 
                            latitude = NA, 
                            longitude = NA)
    }
    
    # Add the row
    postal_coords <- rbind(postal_coords, new_row)
  }
  return(postal_coords)
}
coords <- get_coords(add_list)
write_rds(coords, "data/HDB/rds/coords.rds")

28 Oct u2024

Eldercare in shaprefile format - in svy21 projection but 2009

eldercare <- st_read(dsn = "data/HDB/rawdata",
                      layer = "ELDERCARE") %>%
  st_transform(crs = 3414)
Reading layer `ELDERCARE' from data source 
  `C:\zjho008\ISSS626-GAA\In-class_Ex\In-class_Ex08\data\HDB\rawdata' 
  using driver `ESRI Shapefile'
Simple feature collection with 133 features and 18 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 14481.92 ymin: 28218.43 xmax: 41665.14 ymax: 46804.9
Projected CRS: SVY21

Buffering

st_buffer() of sf package is used to create a buffer of 1km around each eldercare feature.

buffer_1km <- st_buffer(eldercare,
                        dist = 1000) # 1000metres = 1km

import kml file - in wgs84 since retrieved from internet

CHAS <- st_read("data/HDB/rawdata/CHASClinics.kml") %>%
  st_transform(crs = 3414)
Reading layer `MOH_CHAS_CLINICS' from data source 
  `C:\zjho008\ISSS626-GAA\In-class_Ex\In-class_Ex08\data\HDB\rawdata\CHASClinics.kml' 
  using driver `KML'
Simple feature collection with 1193 features and 2 fields
Geometry type: POINT
Dimension:     XYZ
Bounding box:  xmin: 103.5818 ymin: 1.016264 xmax: 103.9903 ymax: 1.456037
z_range:       zmin: 0 zmax: 0
Geodetic CRS:  WGS 84
CHAS #if spatstats then need to drop. but sf is okay.
Simple feature collection with 1193 features and 2 fields
Geometry type: POINT
Dimension:     XYZ
Bounding box:  xmin: 2.127126e-08 ymin: -5.585825e-07 xmax: 45475.65 ymax: 48626.7
z_range:       zmin: 0 zmax: 0
Projected CRS: SVY21 / Singapore TM
First 10 features:
     Name
1   kml_1
2   kml_2
3   kml_3
4   kml_4
5   kml_5
6   kml_6
7   kml_7
8   kml_8
9   kml_9
10 kml_10
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 Description
1                               <center><table><tr><th colspan='2' align='center'><em>Attributes</em></th></tr><tr bgcolor="#E3E3F3"> <th>HCI_CODE</th> <td>15M0211</td> </tr><tr bgcolor=""> <th>HCI_NAME</th> <td>Acumed Medical Group</td> </tr><tr bgcolor="#E3E3F3"> <th>LICENCE_TYPE</th> <td>MC</td> </tr><tr bgcolor=""> <th>HCI_TEL</th> <td>68615755</td> </tr><tr bgcolor="#E3E3F3"> <th>POSTAL_CD</th> <td>629117</td> </tr><tr bgcolor=""> <th>ADDR_TYPE</th> <td>B</td> </tr><tr bgcolor="#E3E3F3"> <th>BLK_HSE_NO</th> <td>1</td> </tr><tr bgcolor=""> <th>FLOOR_NO</th> <td>01</td> </tr><tr bgcolor="#E3E3F3"> <th>UNIT_NO</th> <td>23</td> </tr><tr bgcolor=""> <th>STREET_NAME</th> <td>JOO KOON CIRCLE</td> </tr><tr bgcolor="#E3E3F3"> <th>BUILDING_NAME</th> <td>FAIRPRICE HUB</td> </tr><tr bgcolor=""> <th>CLINIC_PROGRAMME_CODE</th> <td>CDMP,CHAS</td> </tr><tr bgcolor="#E3E3F3"> <th>X_COORDINATE</th> <td>10760.19030171</td> </tr><tr bgcolor=""> <th>Y_COORDINATE</th> <td>34206.12386783</td> </tr><tr bgcolor="#E3E3F3"> <th>INC_CRC</th> <td>781AF3FC7E4D0301</td> </tr><tr bgcolor=""> <th>FMEL_UPD_D</th> <td>20210926121846</td> </tr></table></center>
2  <center><table><tr><th colspan='2' align='center'><em>Attributes</em></th></tr><tr bgcolor="#E3E3F3"> <th>HCI_CODE</th> <td>16M0004</td> </tr><tr bgcolor=""> <th>HCI_NAME</th> <td>Acumed Medical Group</td> </tr><tr bgcolor="#E3E3F3"> <th>LICENCE_TYPE</th> <td>MC</td> </tr><tr bgcolor=""> <th>HCI_TEL</th> <td>65701390</td> </tr><tr bgcolor="#E3E3F3"> <th>POSTAL_CD</th> <td>637370</td> </tr><tr bgcolor=""> <th>ADDR_TYPE</th> <td>B</td> </tr><tr bgcolor="#E3E3F3"> <th>BLK_HSE_NO</th> <td>128</td> </tr><tr bgcolor=""> <th>FLOOR_NO</th> <td>01</td> </tr><tr bgcolor="#E3E3F3"> <th>UNIT_NO</th> <td>03</td> </tr><tr bgcolor=""> <th>STREET_NAME</th> <td>TUAS SOUTH AVENUE 3</td> </tr><tr bgcolor="#E3E3F3"> <th>BUILDING_NAME</th> <td>JTC BIOMED ONE @ TUAS BIOMEDICAL PARK</td> </tr><tr bgcolor=""> <th>CLINIC_PROGRAMME_CODE</th> <td>CDMP,CHAS</td> </tr><tr bgcolor="#E3E3F3"> <th>X_COORDINATE</th> <td>4985.53397365</td> </tr><tr bgcolor=""> <th>Y_COORDINATE</th> <td>30167.98804538</td> </tr><tr bgcolor="#E3E3F3"> <th>INC_CRC</th> <td>4132D1A643933D75</td> </tr><tr bgcolor=""> <th>FMEL_UPD_D</th> <td>20210926121846</td> </tr></table></center>
3                                   <center><table><tr><th colspan='2' align='center'><em>Attributes</em></th></tr><tr bgcolor="#E3E3F3"> <th>HCI_CODE</th> <td>21M0229</td> </tr><tr bgcolor=""> <th>HCI_NAME</th> <td>Advantage Medical Clinic</td> </tr><tr bgcolor="#E3E3F3"> <th>LICENCE_TYPE</th> <td>MC</td> </tr><tr bgcolor=""> <th>HCI_TEL</th> <td>87840093</td> </tr><tr bgcolor="#E3E3F3"> <th>POSTAL_CD</th> <td>268802</td> </tr><tr bgcolor=""> <th>ADDR_TYPE</th> <td>B</td> </tr><tr bgcolor="#E3E3F3"> <th>BLK_HSE_NO</th> <td>1</td> </tr><tr bgcolor=""> <th>FLOOR_NO</th> <td>03</td> </tr><tr bgcolor="#E3E3F3"> <th>UNIT_NO</th> <td>12</td> </tr><tr bgcolor=""> <th>STREET_NAME</th> <td>FIFTH AVENUE</td> </tr><tr bgcolor="#E3E3F3"> <th>BUILDING_NAME</th> <td>GUTHRIE HOUSE</td> </tr><tr bgcolor=""> <th>CLINIC_PROGRAMME_CODE</th> <td>CHAS</td> </tr><tr bgcolor="#E3E3F3"> <th>X_COORDINATE</th> <td>23812.58956922</td> </tr><tr bgcolor=""> <th>Y_COORDINATE</th> <td>34780.57533007</td> </tr><tr bgcolor="#E3E3F3"> <th>INC_CRC</th> <td>1429766D7056AA8D</td> </tr><tr bgcolor=""> <th>FMEL_UPD_D</th> <td>20210926121846</td> </tr></table></center>
4                          <center><table><tr><th colspan='2' align='center'><em>Attributes</em></th></tr><tr bgcolor="#E3E3F3"> <th>HCI_CODE</th> <td>19M0160</td> </tr><tr bgcolor=""> <th>HCI_NAME</th> <td>Affinity Medical Clinic</td> </tr><tr bgcolor="#E3E3F3"> <th>LICENCE_TYPE</th> <td>MC</td> </tr><tr bgcolor=""> <th>HCI_TEL</th> <td>62808080</td> </tr><tr bgcolor="#E3E3F3"> <th>POSTAL_CD</th> <td>550253</td> </tr><tr bgcolor=""> <th>ADDR_TYPE</th> <td>A</td> </tr><tr bgcolor="#E3E3F3"> <th>BLK_HSE_NO</th> <td>253</td> </tr><tr bgcolor=""> <th>FLOOR_NO</th> <td>01</td> </tr><tr bgcolor="#E3E3F3"> <th>UNIT_NO</th> <td>187</td> </tr><tr bgcolor=""> <th>STREET_NAME</th> <td>SERANGOON CENTRAL DRIVE</td> </tr><tr bgcolor="#E3E3F3"> <th>BUILDING_NAME</th> <td></td> </tr><tr bgcolor=""> <th>CLINIC_PROGRAMME_CODE</th> <td>CDMP,CHAS,ISP</td> </tr><tr bgcolor="#E3E3F3"> <th>X_COORDINATE</th> <td>32176.99089858</td> </tr><tr bgcolor=""> <th>Y_COORDINATE</th> <td>37348.34529089</td> </tr><tr bgcolor="#E3E3F3"> <th>INC_CRC</th> <td>25E32F8453AEABFD</td> </tr><tr bgcolor=""> <th>FMEL_UPD_D</th> <td>20210926121846</td> </tr></table></center>
5        <center><table><tr><th colspan='2' align='center'><em>Attributes</em></th></tr><tr bgcolor="#E3E3F3"> <th>HCI_CODE</th> <td>9404401</td> </tr><tr bgcolor=""> <th>HCI_NAME</th> <td>Allergy, Arthritis & Rheumatism Clinic</td> </tr><tr bgcolor="#E3E3F3"> <th>LICENCE_TYPE</th> <td>MD</td> </tr><tr bgcolor=""> <th>HCI_TEL</th> <td>64749438</td> </tr><tr bgcolor="#E3E3F3"> <th>POSTAL_CD</th> <td>217562</td> </tr><tr bgcolor=""> <th>ADDR_TYPE</th> <td>B</td> </tr><tr bgcolor="#E3E3F3"> <th>BLK_HSE_NO</th> <td>1</td> </tr><tr bgcolor=""> <th>FLOOR_NO</th> <td>14</td> </tr><tr bgcolor="#E3E3F3"> <th>UNIT_NO</th> <td>13</td> </tr><tr bgcolor=""> <th>STREET_NAME</th> <td>FARRER PARK STATION ROAD</td> </tr><tr bgcolor="#E3E3F3"> <th>BUILDING_NAME</th> <td>CONNEXION</td> </tr><tr bgcolor=""> <th>CLINIC_PROGRAMME_CODE</th> <td>CDMP,CHAS</td> </tr><tr bgcolor="#E3E3F3"> <th>X_COORDINATE</th> <td>30293.80179285</td> </tr><tr bgcolor=""> <th>Y_COORDINATE</th> <td>32752.14975386</td> </tr><tr bgcolor="#E3E3F3"> <th>INC_CRC</th> <td>E04B63460D9DF892</td> </tr><tr bgcolor=""> <th>FMEL_UPD_D</th> <td>20210926121846</td> </tr></table></center>
6                                        <center><table><tr><th colspan='2' align='center'><em>Attributes</em></th></tr><tr bgcolor="#E3E3F3"> <th>HCI_CODE</th> <td>17M0228</td> </tr><tr bgcolor=""> <th>HCI_NAME</th> <td>Allhealth Family Clinic</td> </tr><tr bgcolor="#E3E3F3"> <th>LICENCE_TYPE</th> <td>MC</td> </tr><tr bgcolor=""> <th>HCI_TEL</th> <td>67026467</td> </tr><tr bgcolor="#E3E3F3"> <th>POSTAL_CD</th> <td>523872</td> </tr><tr bgcolor=""> <th>ADDR_TYPE</th> <td>A</td> </tr><tr bgcolor="#E3E3F3"> <th>BLK_HSE_NO</th> <td>872C</td> </tr><tr bgcolor=""> <th>FLOOR_NO</th> <td>01</td> </tr><tr bgcolor="#E3E3F3"> <th>UNIT_NO</th> <td>01</td> </tr><tr bgcolor=""> <th>STREET_NAME</th> <td>TAMPINES STREET 86</td> </tr><tr bgcolor="#E3E3F3"> <th>BUILDING_NAME</th> <td></td> </tr><tr bgcolor=""> <th>CLINIC_PROGRAMME_CODE</th> <td>CHAS</td> </tr><tr bgcolor="#E3E3F3"> <th>X_COORDINATE</th> <td>38940.52504647</td> </tr><tr bgcolor=""> <th>Y_COORDINATE</th> <td>37479.79536068</td> </tr><tr bgcolor="#E3E3F3"> <th>INC_CRC</th> <td>5B0B49E22956F1B3</td> </tr><tr bgcolor=""> <th>FMEL_UPD_D</th> <td>20210926121846</td> </tr></table></center>
7                                          <center><table><tr><th colspan='2' align='center'><em>Attributes</em></th></tr><tr bgcolor="#E3E3F3"> <th>HCI_CODE</th> <td>19M0341</td> </tr><tr bgcolor=""> <th>HCI_NAME</th> <td>Ally Health</td> </tr><tr bgcolor="#E3E3F3"> <th>LICENCE_TYPE</th> <td>MC</td> </tr><tr bgcolor=""> <th>HCI_TEL</th> <td>66977700</td> </tr><tr bgcolor="#E3E3F3"> <th>POSTAL_CD</th> <td>140462</td> </tr><tr bgcolor=""> <th>ADDR_TYPE</th> <td>A</td> </tr><tr bgcolor="#E3E3F3"> <th>BLK_HSE_NO</th> <td>46-2</td> </tr><tr bgcolor=""> <th>FLOOR_NO</th> <td>01</td> </tr><tr bgcolor="#E3E3F3"> <th>UNIT_NO</th> <td>372</td> </tr><tr bgcolor=""> <th>STREET_NAME</th> <td>COMMONWEALTH DRIVE</td> </tr><tr bgcolor="#E3E3F3"> <th>BUILDING_NAME</th> <td></td> </tr><tr bgcolor=""> <th>CLINIC_PROGRAMME_CODE</th> <td>CDMP,CHAS,ISP</td> </tr><tr bgcolor="#E3E3F3"> <th>X_COORDINATE</th> <td>24094.68699586</td> </tr><tr bgcolor=""> <th>Y_COORDINATE</th> <td>31368.73817961</td> </tr><tr bgcolor="#E3E3F3"> <th>INC_CRC</th> <td>C5F9B14203C0367F</td> </tr><tr bgcolor=""> <th>FMEL_UPD_D</th> <td>20210926121846</td> </tr></table></center>
8       <center><table><tr><th colspan='2' align='center'><em>Attributes</em></th></tr><tr bgcolor="#E3E3F3"> <th>HCI_CODE</th> <td>16M0043</td> </tr><tr bgcolor=""> <th>HCI_NAME</th> <td>Ang Mo Kio Family Medicine Clinic</td> </tr><tr bgcolor="#E3E3F3"> <th>LICENCE_TYPE</th> <td>MC</td> </tr><tr bgcolor=""> <th>HCI_TEL</th> <td>65541133</td> </tr><tr bgcolor="#E3E3F3"> <th>POSTAL_CD</th> <td>569841</td> </tr><tr bgcolor=""> <th>ADDR_TYPE</th> <td></td> </tr><tr bgcolor="#E3E3F3"> <th>BLK_HSE_NO</th> <td>4190</td> </tr><tr bgcolor=""> <th>FLOOR_NO</th> <td>03</td> </tr><tr bgcolor="#E3E3F3"> <th>UNIT_NO</th> <td>01</td> </tr><tr bgcolor=""> <th>STREET_NAME</th> <td>ANG MO KIO AVENUE 6</td> </tr><tr bgcolor="#E3E3F3"> <th>BUILDING_NAME</th> <td>BROADWAY PLAZA</td> </tr><tr bgcolor=""> <th>CLINIC_PROGRAMME_CODE</th> <td>CDMP,CHAS,ISP</td> </tr><tr bgcolor="#E3E3F3"> <th>X_COORDINATE</th> <td>29405.66118025</td> </tr><tr bgcolor=""> <th>Y_COORDINATE</th> <td>39327.37454678</td> </tr><tr bgcolor="#E3E3F3"> <th>INC_CRC</th> <td>4A712102C8E400A3</td> </tr><tr bgcolor=""> <th>FMEL_UPD_D</th> <td>20210926121846</td> </tr></table></center>
9                            <center><table><tr><th colspan='2' align='center'><em>Attributes</em></th></tr><tr bgcolor="#E3E3F3"> <th>HCI_CODE</th> <td>16C0397</td> </tr><tr bgcolor=""> <th>HCI_NAME</th> <td>Anteh Dispensary Pte Ltd</td> </tr><tr bgcolor="#E3E3F3"> <th>LICENCE_TYPE</th> <td>MC</td> </tr><tr bgcolor=""> <th>HCI_TEL</th> <td>67440040</td> </tr><tr bgcolor="#E3E3F3"> <th>POSTAL_CD</th> <td>398664</td> </tr><tr bgcolor=""> <th>ADDR_TYPE</th> <td></td> </tr><tr bgcolor="#E3E3F3"> <th>BLK_HSE_NO</th> <td>1</td> </tr><tr bgcolor=""> <th>FLOOR_NO</th> <td>01</td> </tr><tr bgcolor="#E3E3F3"> <th>UNIT_NO</th> <td>02</td> </tr><tr bgcolor=""> <th>STREET_NAME</th> <td>LORONG 22 GEYLANG</td> </tr><tr bgcolor="#E3E3F3"> <th>BUILDING_NAME</th> <td>GRANDVIEW SUITES</td> </tr><tr bgcolor=""> <th>CLINIC_PROGRAMME_CODE</th> <td>CHAS</td> </tr><tr bgcolor="#E3E3F3"> <th>X_COORDINATE</th> <td>33468.48699051</td> </tr><tr bgcolor=""> <th>Y_COORDINATE</th> <td>32785.67660058</td> </tr><tr bgcolor="#E3E3F3"> <th>INC_CRC</th> <td>7E06EFB3781F029E</td> </tr><tr bgcolor=""> <th>FMEL_UPD_D</th> <td>20210926121846</td> </tr></table></center>
10        <center><table><tr><th colspan='2' align='center'><em>Attributes</em></th></tr><tr bgcolor="#E3E3F3"> <th>HCI_CODE</th> <td>20M0227</td> </tr><tr bgcolor=""> <th>HCI_NAME</th> <td>Appletree Medical Centre</td> </tr><tr bgcolor="#E3E3F3"> <th>LICENCE_TYPE</th> <td>MC</td> </tr><tr bgcolor=""> <th>HCI_TEL</th> <td>64548361</td> </tr><tr bgcolor="#E3E3F3"> <th>POSTAL_CD</th> <td>560416</td> </tr><tr bgcolor=""> <th>ADDR_TYPE</th> <td>A</td> </tr><tr bgcolor="#E3E3F3"> <th>BLK_HSE_NO</th> <td>416</td> </tr><tr bgcolor=""> <th>FLOOR_NO</th> <td>01</td> </tr><tr bgcolor="#E3E3F3"> <th>UNIT_NO</th> <td>997</td> </tr><tr bgcolor=""> <th>STREET_NAME</th> <td>ANG MO KIO AVENUE 10</td> </tr><tr bgcolor="#E3E3F3"> <th>BUILDING_NAME</th> <td>TECK GHEE HEARTLANDS</td> </tr><tr bgcolor=""> <th>CLINIC_PROGRAMME_CODE</th> <td>CDMP,CHAS,ISP</td> </tr><tr bgcolor="#E3E3F3"> <th>X_COORDINATE</th> <td>30424.4244873</td> </tr><tr bgcolor=""> <th>Y_COORDINATE</th> <td>38509.61595566</td> </tr><tr bgcolor="#E3E3F3"> <th>INC_CRC</th> <td>68380F0447C3CEAD</td> </tr><tr bgcolor=""> <th>FMEL_UPD_D</th> <td>20210926121846</td> </tr></table></center>
                        geometry
1  POINT Z (10760.19 34206.12 0)
2  POINT Z (4985.534 30167.99 0)
3  POINT Z (23812.59 34780.58 0)
4  POINT Z (32176.99 37348.35 0)
5   POINT Z (30293.8 32752.15 0)
6   POINT Z (38940.53 37479.8 0)
7  POINT Z (24094.69 31368.74 0)
8  POINT Z (29405.66 39327.37 0)
9  POINT Z (33468.49 32785.68 0)
10 POINT Z (30424.42 38509.62 0)

Visualising Polygon then point (otherwise point will cover over)

tmap_mode("view")
tmap mode set to interactive viewing
tm_shape(buffer_1km) + # 1km circle then counting the dots - CHAS clinics
  tm_polygons() +
tm_shape(CHAS) +
  tm_dots()

Counting Points (point in polygon count - a boundary is required done by creating the buffer)

buffer_1km$pts_count <- lengths( # pts_count is a new field created
  st_intersects(buffer_1km, CHAS))

Note one dot outside of SG - to fix or remove if possible.

Data Preparation

Proximity - data from data.gov.sg. they are in kml file - are in wgs84 and have to do st_transform.

Retail site - shopping mall list - use the httr.

Overlap is workable if building hedonic model. for geospatial then no even so if using gwr. - same blocks of buildings will have multiple transactions.

There is also the function of spatstat jitter. Look at in class ex 8 for spatial jitter