Source code for vtra.preprocess.convert_hazard_data

"""Pre-process hazard data

Purpose
-------

Convert GeoTiff raster hazard datasets to shapefiles based on masking and selecting values from
    - Single-band raster files
    - Multi-band (3-bands) raster files

Input data requirements
-----------------------

1. Correct paths to all hazard datasets
2. Single-band GeoTiff hazard raster files with:
    - values - between 0 and 1000
    - raster grid geometry
    - projection systems: Default assumed = EPSG:32648

3. Multi-band GeoTiff hazard raster files with:
    - 3-bands
    - values - in each band between 0 and 255
    - raster grid geometry
    - projection systems: Default assumed = EPSG:32648

Results
-------

1. Shapefiles whose names show the hazard models and their selected range of values
    - ID - equal to 1
    - geometry - Shapely Polygon outline of selected hazard

"""

import os
import subprocess
import json
import sys


from vtra.utils import load_config

import fiona
import fiona.crs
import rasterio
import numpy as np
import pandas as pd

[docs]def glofris_data_details(file_name,root_dir): """Read names of GLOFRIS files and create attributes Parameters - file_name - String name of GeoTff file - root_dir - String path to directory of file Outputs df - Pandas DataFrame written to csv file with columns: - file_name - String - hazard_type - String - year - Integer: 2016 or 2030 - climate_scenario - String: RCP4.5 or RCP8.5 or none - probability - Float: 1/(return period) - banded - Boolean: True or False - bands - Integer """ for root, dirs, files in os.walk(root_dir): for file in files: if file.endswith(".tif") or file.endswith(".tiff"): fname = file.split('.tif') fname = fname[0] print (fname) if '2030' in fname: year = 2030 else: year = 2016 if 'rcp4p5' in fname: sc = 'rcp 4.5' elif 'rcp8p5' in fname: sc = 'rcp 8.5' else: sc = 'none' f_all.append((fname,'flooding',year,sc,1.0/float(fname[-5:]),'FALSE','none')) df = pd.DataFrame(f_all,columns = ['file_name', 'hazard_type', 'year', 'climate_scenario', 'probability','banded', 'bands']) df.to_csv(os.path.join(root_dir,'glofris_files.csv'),index = False)
[docs]def raster_rewrite(in_raster,out_raster,nodata): """Rewrite a raster to reproject and change no data value Parameters - in_raster - String name of input GeoTff file path - out_raster - String name of output GeoTff file path - nodata - Float value of data that is treated as no data Outputs Reproject and replace raster with nodata = -1 """ with rasterio.open(in_raster) as dataset: data_array = dataset.read() data_array[np.where(np.isnan(data_array))] = nodata with rasterio.open(out_raster, 'w', driver='GTIff', height=data_array.shape[1], # numpy of rows width=data_array.shape[2], # number of columns count=dataset.count, # number of bands dtype=data_array.dtype, # this must match the dtype of our array crs=dataset.crs, transform=dataset.transform) as out_data: out_data.write(data_array) # optional second parameter is the band number to write to out_data.nodata = -1 # set the raster's nodata value os.remove(in_raster) os.rename(out_raster,in_raster)
[docs]def raster_projections_and_databands(file_path): """Extract projection, data bands numbers and valuees from raster Parameters - file_path - String name of input GeoTff file path Outputs - counts - Number of bans in raster - crs - Projection system of raster - data_vals - Numpy array of raster values """ with rasterio.open(file_path) as dataset: counts = dataset.count if dataset.crs: crs = dataset.crs.to_string() else: crs = 'invalid/unknown' data_array = dataset.read() if dataset.count > 1: data_list = [] for i in range(0,dataset.count): data_list.append(data_array[i].reshape(dataset.height*dataset.width).tolist()) data_vals = list(set(list(zip(*data_list)))) else: data_vals = list(set(data_array.reshape(dataset.count*dataset.height*dataset.width).tolist())) if all(isinstance(x, int) for x in data_vals) is False: data_vals = [] return counts,crs, data_vals
[docs]def convert_geotiff_to_vector_with_threshold(from_threshold,to_threshold, infile, infile_epsg,tmpfile_1, tmpfile_2, outfile): """Convert GeoTiff raster file to Shapefile with geometries based on raster threshold ranges Parameters - from_threshold - Float value of lower bound of GeoTiff threshold value to be selected - to_threshold - Float value of upper bound of GeoTiff threshold value to be selected - infile - String name of input GeoTff file path - infile_epsg - Integer value of EPSG Projection number of raster - tmpfile_1 - Stirng name of tmp file 1 - tmpfile_2 - Stirng name of tmp file 2 - outfile - Stirng name of output shapefile Outputs Shapefile with Polygon geometries of rasters based on raster threshold ranges """ args = [ "gdal_calc.py", '-A', infile, '--outfile={}'.format(tmpfile_1), '--calc=logical_and(A>={0}, A<{1})'.format(from_threshold,to_threshold), '--type=Byte', '--NoDataValue=0', '--co=SPARSE_OK=YES', '--co=NBITS=1', '--quiet', '--co=COMPRESS=LZW' ] subprocess.run(args) subprocess.run([ "gdal_edit.py", '-a_srs', 'EPSG:{}'.format(infile_epsg), tmpfile_1 ]) subprocess.run([ "gdal_polygonize.py", tmpfile_1, '-q', '-f', 'ESRI Shapefile', tmpfile_2 ]) subprocess.run([ "ogr2ogr", '-a_srs', 'EPSG:{}'.format(infile_epsg), '-t_srs', 'EPSG:4326', outfile, tmpfile_2 ]) subprocess.run(["rm", tmpfile_1]) subprocess.run(["rm", tmpfile_2]) subprocess.run(["rm", tmpfile_2.replace('shp', 'shx')]) subprocess.run(["rm", tmpfile_2.replace('shp', 'dbf')]) subprocess.run(["rm", tmpfile_2.replace('shp', 'prj')])
[docs]def convert_geotiff_to_vector_with_multibands(band_colors, infile, infile_epsg,tmpfile_1, tmpfile_2, outfile): """Convert multi-band GeoTiff raster file to Shapefile with geometries based on raster band color values Parameters - band_colors - Tuple with 3-values each corresponding to the values in raster bands - infile - String name of input GeoTff file path - infile_epsg - Integer value of EPSG Projection number of raster - tmpfile_1 - Stirng name of tmp file 1 - tmpfile_2 - Stirng name of tmp file 2 - outfile - Stirng name of output shapefile Outputs Shapefile with Polygon geometries of rasters based on raster band values """ args = [ "gdal_calc.py", '-A', infile, '--A_band=1', '-B', infile, '--B_band=2', '-C', infile, '--C_band=3', '--outfile={}'.format(tmpfile_1), '--type=Byte', '--NoDataValue=0', '--calc=logical_and(A=={0}, B=={1},C=={2})'.format(band_colors[0],band_colors[1],band_colors[2]), '--co=SPARSE_OK=YES', '--co=NBITS=1', '--quiet', '--co=COMPRESS=LZW' ] subprocess.run(args) subprocess.run([ "gdal_edit.py", '-a_srs', 'EPSG:{}'.format(infile_epsg), tmpfile_1 ]) subprocess.run([ "gdal_polygonize.py", tmpfile_1, '-q', '-f', 'ESRI Shapefile', tmpfile_2 ]) subprocess.run([ "ogr2ogr", '-a_srs', 'EPSG:{}'.format(infile_epsg), '-t_srs', 'EPSG:4326', outfile, tmpfile_2 ]) subprocess.run(["rm", tmpfile_1]) subprocess.run(["rm", tmpfile_2]) subprocess.run(["rm", tmpfile_2.replace('shp', 'shx')]) subprocess.run(["rm", tmpfile_2.replace('shp', 'dbf')]) subprocess.run(["rm", tmpfile_2.replace('shp', 'prj')])
[docs]def convert(threshold, infile, tmpfile_1, outfile): """Convert GeoTiff raster file to Shapefile with geometries based on raster threshold less that 999 Parameters - threshold - Float value of lower bound of GeoTiff threshold value to be selected - infile - String name of input GeoTff file path - tmpfile_1 - Stirng name of tmp file 1 - outfile - Stirng name of output shapefile Outputs Shapefile with Polygon geometries of rasters based on raster values above a threshold """ args = [ "gdal_calc.py", '-A', infile, '--outfile={}'.format(tmpfile_1), '--calc=logical_and(A>={}, A<999)'.format(threshold), '--type=Byte', '--NoDataValue=0', '--co=SPARSE_OK=YES', '--co=NBITS=1', '--co=COMPRESS=LZW' ] subprocess.run(args) subprocess.run([ "gdal_polygonize.py", tmpfile_1, '-q', '-f', 'ESRI Shapefile', outfile ])
[docs]def main(): """Process hazard data 1. Specify the paths from where to read and write: - Input data - Hazard data 2. Supply input data and parameters - Thresholds of flood hazards - Values of bands to be selected - Color code of multi-band rasters - Specific file names that might require some specific operations """ data_path = load_config()['paths']['data'] root_dir = os.path.join(data_path,'Hazard_data') thresholds = [1,2,3,4,999] band_vals_1 = [3,4] band_vals_2 = [4,5] color_codes_1 = [(255,190,190),(245,0,0),(255,0,0)] color_codes_2 = [(255,170,0),(255,128,0)] specific_files = ['LSZ_NgheAn_to_PhuYen.tif'] f_all = [] for root, dirs, files in os.walk(root_dir): for file in files: if file.endswith(".tif") or file.endswith(".tiff"): band_nums, crs, unique_data_values = raster_projections_and_databands(os.path.join(root, file)) print (file,crs, unique_data_values) if 'epsg' in crs: crs_split = crs.split(':') s_crs = [int(c) for c in crs_split if c.isdigit() is True][0] else: s_crs = 32648 if not unique_data_values: # threshold based datasets for t in range(len(thresholds)-1): thr_1 = thresholds[t] thr_2 = thresholds[t+1] in_file = os.path.join(root,file) tmp_1 = os.path.join(root,file.split(".tif")[0] + '_mask.tiff') tmp_2 = os.path.join(root,file.split(".tif")[0] + '_mask.shp') out_file = os.path.join(root,file.split(".tif")[0] + '_{0}m-{1}m_threshold.shp'.format(thr_1,thr_2)) convert_geotiff_to_vector_with_threshold(thr_1,thr_2,in_file,s_crs,tmp_1, tmp_2, out_file) elif band_nums == 1: # code value based dataset if file in specific_files: code_vals = band_vals_1 else: code_vals = band_vals_2 for c in code_vals: in_file = os.path.join(root,file) tmp_1 = os.path.join(root,file.split(".tif")[0] + '_mask.tiff') tmp_2 = os.path.join(root,file.split(".tif")[0] + '_mask.shp') out_file = os.path.join(root,file.split(".tif")[0] + '_{}_band.shp'.format(c)) convert_geotiff_to_vector_with_threshold(c,c+1,in_file,s_crs,tmp_1, tmp_2, out_file) if band_nums == 3: # multi-band color datasets # remove nodata values from the bands in the raster raster_rewrite(os.path.join(root, file),os.path.join(root, 'test.tif'),0) for dv in unique_data_values: if dv in color_codes_1: thr = 5 bc = dv in_file = os.path.join(root,file) tmp_1 = os.path.join(root,file.split(".tif")[0] + '_mask.tiff') tmp_2 = os.path.join(root,file.split(".tif")[0] + '_mask.shp') out_file = os.path.join(root,file.split(".tif")[0] + '_{}_band.shp'.format(thr)) convert_geotiff_to_vector_with_multibands(bc,in_file,s_crs,tmp_1, tmp_2, out_file) elif dv in color_codes_2: thr = 4 bc = dv in_file = os.path.join(root,file) tmp_1 = os.path.join(root,file.split(".tif")[0] + '_mask.tiff') tmp_2 = os.path.join(root,file.split(".tif")[0] + '_mask.shp') out_file = os.path.join(root,file.split(".tif")[0] + '_{}_band.shp'.format(thr)) convert_geotiff_to_vector_with_multibands(bc,in_file,s_crs,tmp_1, tmp_2, out_file)
if __name__ == "__main__": main()