Section 18 Appendix: Setting up the postgis database
Setting up a postgis data base often involves importing data held on the file systems as shapefiles.
For example Natural England provide open access (after registering as a user) to many shapefiles, such as the priority habitat inventory for the south of England
The priority habitats shapefile for the South of England when downloaded and unzipped occupies over 3GB.
The following instructions are only directly relevant to the BU server. However, the concepts can be implemented on any system running postgis
18.1 Creating a data base
Code to construct the data base has been wrapped up into some higher level functions.
knitr::opts_chunk$set(echo = TRUE,eval=FALSE)
A fresh data base can be created by an authorised super user of Postgis from the command line. As the superuser can drop any table at any time, or even the delete the whole data base, we do not usually log on as the superuser. Details for reprocibility are in the R package for the course.
# library(giscourse)
# giscourse::make_db("gis_course2")
18.2 Connecting to the server
Queries can be run in the database directly from R. The initial data base does not have the plr extension so it can be created.
# conn<-sconnect(db="gis_course2")
# dbSendQuery(conn, "create extension plr")
18.3 Loading the data files
The natural England data for the course consists of four large shape files. There are many ways to place them into the data base. We could load them into R using st_read and the load them into the data base using st_write. If the files were on a laptop or PC connected to the university network, but not within the same filesystem as postgis the files could be loaded through the QGIS data manager (figure coming up showing how).
The fastest way of loading a whole directory is to use the commandline. The shp2pgsql loader is fast and efficient.
A cheatsheet on how to use shp2pgsql is provided here.
http://www.bostongis.com/pgsql2shp_shp2pgsql_quickguide.bqg
However typing out commands for each file would get tedious. Sprintf can be used in R to form a function that makes it simpler.
flnms<-dir("big_data/shapefiles",pattern="shp")
flnms
This is completely BU specific and assumes the use of the Docker containers. However the same containers can be pulled and built on another system.
add_shp
db<-"gis_course2"
system(add_shp(flnm=flnms[1],tabnm="ancient_woodland",db=db))
system(add_shp(flnm=flnms[2],tabnm="counties",db=db))
system(add_shp(flnm=flnms[3],tabnm="local_reserves",db=db))
system(add_shp(flnm=flnms[4],tabnm="ph_v2_1",db=db))
system(add_shp(flnm=flnms[5],tabnm="sssi",db=db))
path<-"big_data/shapefiles/geology"
flnms<-dir(path,pattern="shp")
system(add_shp(flnm=flnms[1],pth=path,db=db))
system(add_shp(flnm=flnms[3],pth=path,db=db))
path<-"big_data/admin"
flnms<-dir(path,pattern="shp")
flnms
system(add_shp(flnm=flnms[1],pth=path,tabnm= "counties",db=db))
system(add_shp(flnm=flnms[2],pth=path,tabnm= "local_authorities",db=db))
system(add_shp(flnm=flnms[2],pth=path,tabnm= "parishes",db=db))
18.4 Loading Open Street Map data
A large amount of open street map data can be loaded directly into the server using the osm2pgsql system command. The data file can downloaded from here.
http://download.geofabrik.de/europe/great-britain/england.html
system("export PGPASSWORD='docker';osm2pgsql --slim --host postgis --user docker -d gis_course -C 12000 --hstore --prefix dorset -E 27700 big_data/dorset-latest.osm.pbf")
18.5 Loading directly
Smaller data sources can be easily added to the data base directly from R. For example the natural earth administrative districts is a light weight data set consisting of fairly simple polygons
library(dplyr)
library(rnaturalearth)
library(sf)
library(mapview)
nat_earth_countries<-st_as_sf(rnaturalearth::ne_countries(scale = 10))
write_sf(nat_earth_countries,conn,"nat_earth_countries_hi")
nat_earth_countries<-st_as_sf(rnaturalearth::ne_countries())
write_sf(nat_earth_countries,conn,"nat_earth_countries")
nat_earth_adm1<-ne_states(country = NULL, geounit = NULL, iso_a2 = NULL, spdf = NULL, returnclass = c("sf"))
write_sf(nat_earth_adm1,conn,overwrite=TRUE)
query<- "CREATE INDEX adm_ind ON nat_earth_adm1 USING GIST (geometry) "
dbSendQuery(conn,query)
nat_earth_adm1 %>% filter(admin =="United Kingdom") %>% st_transform(27700)->nat_earth_uk
write_sf(nat_earth_uk,conn) #Writes to the data base
dbListTables(conn)
query<- "drop table if exists calc_grass;create table calc_grass as select * from ph_v2_1 WHERE main_habit ILIKE '%calc%grass%'"
dbSendQuery(conn,query)
query<- "CREATE INDEX cg_ind ON calc_grass USING GIST (geom) "
dbSendQuery(conn,query)
18.6 National grid graticules
path<-"big_data/os_grid/OSGB_Grids-master/Shapefile"
flnms<-dir(path,pattern="shp")
flnms
f<-function(x) system(add_shp(flnm=x,pth=path,db=db))
lapply(flnms,f)
18.7 Lidar data
setwd("big_data/lidar")
db<-"gis_course2"
fls<-dir(pattern="DSM.*.asc")
merge_rasters(fls,dbn=db,tabnm="dsm2m")
fls<-dir(pattern="DTM.*.asc")
merge_rasters(fls,tabnm = "dtm2m")
18.8 Moving tables from other data bases into the gis_course data base
# system("PGPASSWORD=docker pg_dump -h postgis -U docker -t gbif gis_course | PGPASSWORD=docker psql -h postgis -U docker -d gis_course2")
# system("PGPASSWORD=!SECRET! pg_dump -h postgis -U !SECRET! -t sssi_units_england bu | PGPASSWORD=!SECRET! psql -h postgis -U !SECRET! -d gis_course")
# system("PGPASSWORD=!SECRET! pg_dump -h postgis -U !SECRET! -n nat_england bu_data | PGPASSWORD=!SECRET! psql -h postgis -U !SECRET! -d gis_course")
18.9 Ordenance survey road links and waterways
library(sf)
library(mapview)
setwd("big_data/os_grid/data")
dir()
fls<-dir(pattern="*RoadLink.shp")
d<-st_read(fls[1])
st_write(d,conn,"roads")
for(i in 2:length(fls)){
d<-st_read(fls[i])
st_write(d,conn,"roads",append=TRUE)
}
query<- "CREATE INDEX rd_ind ON roads USING GIST (geometry) "
dbSendQuery(conn,query)
d<-st_read("WatercourseLink.shp")
st_write(d,conn,"waterways")
query<- "CREATE INDEX wt_ind ON waterways USING GIST (geometry) "
dbSendQuery(conn,query)
query<-"drop table if exists dorset_roads;create table dorset_roads as select r.* from roads r, dorset d where st_intersects(d.geometry,r.geometry); select * from dorset_roads"
d<-st_read(conn,query=query)
mapview(d)
# fls<-dir(pattern="Water.*.shp")
# d<-st_read(fls[1])
# st_write(d,conn,"watercourse")
# for(i in 2:50){
# d<-st_read(fls[i])
# st_write(d,conn,"watercourse",append=TRUE)
# }
# conn <- dbConnect("PostgreSQL", host = "postgis",
# dbname = "gis_course" ,user = "secret", password = "secret")
#
#
library(giscourse)
cn<-sconnect()
dbSendQuery(cn,"GRANT ALL PRIVILEGES on ALL TABLES IN SCHEMA PUBLIC TO gis_course")
#
#
#
# dbSendQuery(conn,"ALTER ROLE gis_course SET statement_timeout = 30000")
# dbSendQuery(conn,"update pg_attribute set attname=lower(attname);")
# dbSendQuery(conn,"GRANT ALL PRIVILEGES on ALL TABLES IN SCHEMA nat_england TO gis_course")
#
# dbDisconnect(conn)
18.9.1 Save and restore
Last run on April 21 when the course material is complete up to distribution modelling.
system("PGPASSWORD=docker pg_dump -h postgis -U docker gis_course | gzip > gis_course_backup.gz")
18.9.2 Restore
# system("PGPASSWORD=docker createdb -U docker -h postgis --template=template_postgis gis_course_backup")
#
system("gunzip -c gis_course_backup.gz |PGPASSWORD=docker psql -h postgis -U docker gis_course_backup")
# system("gunzip < gis_course_backup.gz |PGPASSWORD=docker psql -h postgis -U docker gis_course")