Katherine Peck
This page is a supplement to a poster presented in the "Remote Sensing, Part I: LiDAR and Satellite Imagery" session at the 88th Annual Meeting of the Society for American Archaeology, Portland, OR, 2023. Code references are included as comments throughout. This project was originally developed in Dr. Xi Gong's GEOG 528 (Advanced Programming for GIS) at the University of New Mexico.
This tool utilizes a number of Python modules, including rasterio
(for raster import/export), geopandas
(for building geodataframes), and matplotlib
(for creating maps and figures). This tool also utilizes scikit-image
(skimage
), a module which provides image processing functions in Python and scikit-learn
(sklearn
) a machine learning module.
%matplotlib inline
from decimal import Decimal
#Building geodataframes
import geopandas as gpd
#Creating shapes
from shapely.geometry import shape, box
#GPD dependency for spatial analysis
import rtree
#Plotting maps
import matplotlib.pyplot as plt
#For manual legends and scalebar
import matplotlib.patches as mpatches
import matplotlib.lines as mlines
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import matplotlib.font_manager as fm
#Read in and save new rasters, converting raster to vector, reproject rasters
import rasterio
from rasterio import features
from rasterio.plot import reshape_as_image
from rasterio.warp import calculate_default_transform, reproject, Resampling
#For calculating stretch numbers for contrast adjustment
import numpy as np
#Gaussian Filter
from scipy import ndimage as ndi
#Import functions needed for image processing
from skimage import io, feature, exposure, img_as_float
from skimage.filters import sato, threshold_otsu
from skimage.color import rgb2gray
from skimage.restoration import denoise_bilateral
from skimage.morphology import remove_small_holes
#Import clustering function for unsupervised classification
from sklearn.cluster import KMeans
#Copy and manage files
import shutil
#Set and access directories
import os
Users set tool variables at the beginning. Variables include filepaths for saving shapefiles, rasters, and maps and thresholds for specific functions. Each is described in the code comments.
#Set all variables - these are the function parameters. See README.txt for more information on parameters.
#Unprocessed geotiff of archaeological features
unprocessedRaster = 'TestRasters\\Safford_IMG1.tif'
#How much Gaussian blur during image processing?
blurValue = 30
#What sigma value do you want to use for bilateral denoising?
denoiseSigma = 0.18
#Where will the figure of the image processing steps be saved? (optional)
imageProcessFP = "FinalMaps\\"
#How many clusters for kmeans? (2 recommended)
setClusters = 2
#Where should the clustered rasters be saved (note that filepath is defined as raw string)?
clusterFP = r'ResultRasters\\'
#At what threshold do you want to remove noise from the binary image?
removeThresh = 1500
#Filepath for shapefile of manually mapped archaeological features in vicinity of test raster
manualFeatures = 'FeatureShapefiles\\Safford.shp'
#Filepath for binary raster (before shape conversion)
binaryRasterFP = 'ResultRasters\\'
#EPSG of a projected coordinate system in meters for final mapping
UTM_EPSG = "EPSG:32612"
#Where should the final map result be saved?
final_img_FP = 'FinalMaps\\'
#Where will the detected shapefiles be saved?
outSHP = 'DetectedShapefiles\\'
This tool is composed of a series of functions, defined and described below. This makes the tool more flexible, as each tool could be run on its own or adapted to a different purpose.
def importRaster(rasterFilepath):
"""Takes raster (as filepath of .tif) and reads all bands,
returns reshaped numpy array into image for processing in scikit-image.
See rasterio documentation: https://rasterio.readthedocs.io/en/latest/topics/image_processing.html"""
img = rasterio.open(rasterFilepath).read()
reshaped = reshape_as_image(img)
return reshaped
def processImage(original, blurValue, denoiseSigma):
"""Processes reshaped raster
Original is a numpy array of your raster image
blurValue is the value for the Gaussian blur
denoiseSigma is the value for bilateral denoise
"""
#Convert to grayscale
try:
grayscale = rgb2gray(original)
except Exception as e:
print(e)
return
#Contrast enhancement
#Code reference:
#https://scikit-image.org/docs/dev/user_guide/transforming_image_data.html?highlight=contrast#contrast-and-exposure
v_min, v_max = np.percentile(grayscale, (0.2,99.8))
if blurValue == int or float:
hi_contrast = exposure.rescale_intensity(grayscale, in_range=(v_min, v_max))
contrast_blur = ndi.gaussian_filter(hi_contrast, blurValue)
else:
print("blurValue must be a number!")
return
#Bilateral denoising
#https://scikit-image.org/docs/stable/api/skimage.restoration.html#skimage.restoration.denoise_bilateral
#https://scikit-image.org/docs/stable/auto_examples/filters/plot_denoise.html#bilateral-filter
#Bilateral denoising used because it is "edge-preserving"
#A larger sigma_color value "results in averaging of pixels with larger radiometric differences"
denoised = denoise_bilateral(contrast_blur, sigma_color = denoiseSigma)
#Sato filtering to detect ridges
#Line-enhancement filter originally developed for detecting "curvilinear structures" like blood vessels in med. images
#See Sato et al. 1997
#See also discussion in scikit-image documentation:
#"ridge filters...detect ridge structures where the intensity changes perpendicular but not along the structure."
#https://scikit-image.org/docs/stable/auto_examples/edges/plot_ridge_filter.html
sato_blur = sato(denoised)
return sato_blur
def reshapeKmeans(processedImg):
"""Reshapes numpy array after processing to 1D array in processImage().
Necessary for unsupervised classification.
Code reference: https://towardsdatascience.com/sentinel-2-image-clustering-in-python-58f7f2c8a7f6"""
kmeans_reshape = processedImg.reshape(processedImg.shape[0] * processedImg.shape[1], 1)
return kmeans_reshape
#Not necessary for analysis process
#This function is the same as above, but also exports a figure showing the image process steps
def showImageProcess(original, blurValue, denoiseSigma, imageProcessFP):
"""Alternate function to process reshaped raster
As with processImage above:
Original is a numpy array of your raster image
blurValue is the value for the Gaussian blur
denoiseSigma is the value for bilateral denoise
In addition, this function exports a figure showing the process results
"""
original = importRaster(original)
#Convert to grayscale
try:
grayscale = rgb2gray(original)
except Exception as e:
print(e)
return
#Contrast enhancement
#Code reference:
#https://scikit-image.org/docs/dev/user_guide/transforming_image_data.html?highlight=contrast#contrast-and-exposure
v_min, v_max = np.percentile(grayscale, (0.2,99.8))
if blurValue == int or float:
hi_contrast = exposure.rescale_intensity(grayscale, in_range=(v_min, v_max))
contrast_blur = ndi.gaussian_filter(hi_contrast, blurValue)
else:
print("blurValue must be a number!")
return
#Bilateral denoising
#https://scikit-image.org/docs/stable/api/skimage.restoration.html#skimage.restoration.denoise_bilateral
#https://scikit-image.org/docs/stable/auto_examples/filters/plot_denoise.html#bilateral-filter
#Bilateral denoising used because it is "edge-preserving"
#A larger sigma_color value "results in averaging of pixels with larger radiometric differences"
denoised = denoise_bilateral(contrast_blur, sigma_color = denoiseSigma)
#Sato filtering to detect ridges
#Line-enhancement filter originally developed for detecting "curvilinear structures" like blood vessels in med. images
#See Sato et al. 1997
#See also discussion in scikit-image documentation:
#"ridge filters...detect ridge structures where the intensity changes perpendicular but not along the structure."
#https://scikit-image.org/docs/stable/auto_examples/edges/plot_ridge_filter.html
sato_blur = sato(denoised)
fig, axes = plt.subplots(2, 3, figsize=(80, 40))
ax = axes.ravel()
axes[0,0].imshow(original, cmap=plt.cm.gray)
axes[0,0].set_title('Original image', fontsize = 80)
axes[0,0].axis('off')
axes[0,1].imshow(grayscale, cmap=plt.cm.gray)
axes[0,1].set_title('Grayscale', fontsize = 80)
axes[0,1].axis('off')
axes[0,2].imshow(hi_contrast, cmap=plt.cm.gray)
axes[0,2].set_title('Contrast Adjustment', fontsize = 80)
axes[0,2].axis('off')
axes[1,0].imshow(contrast_blur, cmap=plt.cm.gray)
axes[1,0].set_title('Gaussian Blur', fontsize = 80)
axes[1,0].axis('off')
axes[1,1].imshow(denoised, cmap=plt.cm.gray)
axes[1,1].set_title('Bilateral Denoising', fontsize = 80)
axes[1,1].axis('off')
axes[1,2].imshow(sato_blur, cmap=plt.cm.gray)
axes[1,2].set_title('Sato Filter', fontsize = 80)
axes[1,2].axis('off')
plt.savefig(imageProcessFP + "imageProcess.png", format = 'png')
plt.tight_layout()
return sato_blur
def detectFeats(setClusters, unprocessedRaster, clusterFP):
"""Applies Kmeans to processed raster.
setClusters = number of clusters for final classification
unprocessed raster = filepath of original geotiff
clusterFP = Folder to save clustered rasters"""
toProcess = importRaster(unprocessedRaster)
print("Raster import complete")
filter_blur = processImage(toProcess, blurValue, denoiseSigma)
print("Image processing complete")
kmeans = KMeans(n_clusters=setClusters).fit(reshapeKmeans(filter_blur))
print("Processed image reshape complete")
#Kmeans Clustering
#https://scikit-learn.org/stable/modules/clustering.html#k-means
#K means is a form of unsupervised classification
#Separates image by n clusters depending on pixel values
#Clustering script adapted from
#https://medium.com/@sidakmenyadik/k-means-clustering-for-the-image-with-scikit-image-mri-scan-python-part-1-c2d2a319276
clustered = kmeans.cluster_centers_[kmeans.labels_]
labels = kmeans.labels_
ogRaster = rasterio.open(unprocessedRaster)
transform = ogRaster.transform
print("Kmeans parameters specified")
for n in range(setClusters):
image_cluster = []
for i in range(len(labels)):
if(labels[i]) == n:
image_cluster.append(float(clustered[i]))
else:
image_cluster.append(1)
reshape_clustered = np.array(image_cluster).reshape(filter_blur.shape)
#Save each kmeans result as a georeferenced geotiff using the transform from the original raster
new_tif = rasterio.open(clusterFP + "clustered" + str(n) + '.tif', 'w', driver = 'GTiff',
height = reshape_clustered.shape[0], width = reshape_clustered.shape[1],
count = 3, dtype = reshape_clustered.dtype,
crs = '+proj=latlong', transform = transform)
new_tif.write(reshape_clustered, 1)
print(clusterFP + "clustered" + str(n) + '.tif' + " saved!")
new_tif.close()
imgList = []
clusterList = []
clusterpath = clusterFP[0:]
print("Searching " + clusterpath + "...")
#Find all tifs in filepath
for tif in os.listdir(clusterpath):
#Read in tifs and reshape
tifname = clusterpath + tif
image = reshape_as_image((rasterio.open(tifname)).read())
#Add the filepath of each image to imgList
imgList.append(str(clusterpath + tif))
#Find average of numpy array
#Appent to clusterList
clusterList.append(np.average(image))
maxIndex = clusterList.index(max(clusterList))
#Return image with the highest average
print("Image chosen")
return rasterio.open(imgList[maxIndex])
def cleanNoise(kmeansraster, binaryRasterFP, removeThresh):
"""kmeansraster = classified raster of archaeological features
removeThresh = threshold (size for small hole removal)"""
raster = reshape_as_image(kmeansraster.read())
gray = rgb2gray(raster)
#Otsu threshold algorithm to create binary image
#Script:
#https://scikit-image.org/docs/dev/auto_examples/segmentation/plot_thresholding.html
#We need to convert this raster to a binary image later to remove small objects
#We want the features to be black for this step and for the masking to vectorize
#Therefore, we're selecting the raster with the higher average value
#non-feature area has higher value in the array
thresh = threshold_otsu(gray)
binaryThresh = gray > thresh
arr = binaryThresh > 0
#Remove small objects to remove noise from final result
#Documentation:
#https://scikit-image.org/docs/dev/api/skimage.morphology.html#skimage.morphology.remove_small_objects
soRemoved = remove_small_holes(arr, area_threshold = removeThresh)
#Rasterio can't write bool datatype rasters (i.e. binary rasters like this one) by default
#passing nbits = 1 (from GDAL, which rasterio is built on) tells rasterio that this is a binary datatype
#See https://gis.stackexchange.com/questions/338410/rasterio-invalid-dtype-bool
profile = kmeansraster.profile
with rasterio.open(binaryRasterFP + 'finalFeatures.tif', 'w', nbits = 1, **profile) as dst:
dst.write(soRemoved, 1)
print(binaryRasterFP + "finalFeatures.tif saved")
return binaryRasterFP + 'finalFeatures.tif'
def vectorize(rasterFP, unprocessedRaster, outSHP):
"""With rasterFP (processed binary image), extract shapes via masking
Save results in same CRS as unprocessedRaster (the Geotiff input)"""
#Vectorize Raster
#See Rasterio documentation https://rasterio.readthedocs.io/en/latest/topics/features.html
transform = rasterio.open(unprocessedRaster).transform
with rasterio.open(rasterFP) as src:
black = np.float32(src.read())
#The mask for this raster will be Band 1 values < 255 (everything that is not a feature)
mask = black < 255
print("Mask created")
#Use feature.shapes to extract masks
#Returns GeoJSON-like dictionary
shapeDict = features.shapes(black, mask=mask, transform = transform)
print("Features in dictionary")
#Convert from the GeoJSON-like objects to a geodataframe
#Reference: https://gis.stackexchange.com/questions/379412/how-to-create-a-geopandas-geodataframe-from-rasterio-features
feats = []
geoms = []
#Build shapely polygons, append geometry to lists
for key, value in shapeDict:
feats.append(value)
geoms.append(shape(key))
ogRaster = rasterio.open(unprocessedRaster)
#set CRS to original raster's CRS
crs = ogRaster.crs
#Pass features and geometry lists to new geodataframe with original raster's CRS
features_gdf = gpd.GeoDataFrame({'feats': feats, 'geometry': geoms }, crs = crs)
print("Initial GDF built")
#Select only features == 0 (the archaeological features themselves)
archfeats = features_gdf[features_gdf.feats==0]
print("GeoDataFrame cleaned")
#Save features as shapefile
archfeats.to_file(outSHP + "DetectedFeatures.shp")
print("DetectedFeatures.shp saved")
return archfeats
def assessAccuracy(manualFeatures, archFeats, unprocessedRaster, UTM_EPSG, final_img_FP):
"""manualFeatures = shapefile of previously mapped features for testing
archFeats = detected features
unprocessedRaster = original raster
UTM_EPSG = EPSG coordinates of a system in meters, in format 'EPSG:####''
final_img_FP = filepath for final map"""
#Read in shapefile of mapped features to test model accuracy
featsToClip = gpd.GeoDataFrame.from_file(manualFeatures)
print("Manual features read in")
#Turn original raster's bounding box into a geodataframe
#https://gis.stackexchange.com/questions/352445/make-shapefile-from-raster-bounds-in-python
OGRaster = rasterio.open(unprocessedRaster)
bounds = OGRaster.bounds
geoms = box(*bounds)
boundingBox = gpd.GeoDataFrame({"id":1,"geometry":[geoms]})
boundingBox = boundingBox.set_crs(crs = featsToClip.crs)
print("BoundingBox calculated")
#Clip features to bounding box of original raster
if featsToClip.crs == boundingBox.crs:
clippedFeats = gpd.clip(featsToClip, boundingBox)
clippedFeats = clippedFeats[~clippedFeats.is_empty]
else:
print("Not in same CRS!", featsToClip.crs, boundingBox.crs)
return
clippedFeats = clippedFeats.to_crs(UTM_EPSG)
print("Clipped Features clipped, projected")
lengthsGeo = []
for feats in clippedFeats['geometry']:
lengthsGeo.append(feats.length)
lenLin = sum(lengthsGeo)
#Set CRS of archFeats == CRS of clippedFeats (UTM)
archfeats = archFeats.to_crs(clippedFeats.crs)
print("Detected Features projected")
#Clip clippedFeats by polygon file of shapes captured by auto detection = detectedFeats
if clippedFeats.crs == archfeats.crs:
detectedFeats = gpd.clip(clippedFeats, archfeats)
detectedFeats = detectedFeats[~detectedFeats.is_empty]
else:
print("Not in same CRS!", featsToClip.crs, boundingBox.crs)
return
#Calculate length of all features in new clipped file = lenDetected
lengthsDet = []
for feats in detectedFeats['geometry']:
lengthsDet.append(feats.length)
lenDetected = sum(lengthsDet)
#lenDetected/lenLine * 100 = percentSuccess
#percentSuccess = Decimal(lenDetected/lenLin * 100).quantize(Decimal("1.00"))
percentSuccess = Decimal(lenDetected/lenLin * 100).quantize(Decimal("1.00"))
print("Model detected approximately " + str(percentSuccess) + "% of all features")
#Export final map
#Using original raster as basemap, so has to be reprojected to the same UTM CRS
dst_crs = clippedFeats.crs
newRaster = os.path.dirname(unprocessedRaster) + "\\TestRaster_Proj.tif"
#Copy raster to preserve CRS of original raster
shutil.copy(unprocessedRaster, newRaster)
print("Original raster copied")
if OGRaster.crs != clippedFeats.crs:
#Script reference:
#https://stackoverflow.com/questions/60288953/how-to-change-the-crs-of-a-raster-with-rasterio
with rasterio.open(unprocessedRaster) as src:
transform, width, height = calculate_default_transform(src.crs, dst_crs, src.width, src.height, *src.bounds)
kwargs = src.meta.copy()
kwargs.update({
'crs': dst_crs,
'transform': transform,
'width': width,
'height': height
})
with rasterio.open(newRaster, 'w', **kwargs) as dst:
for i in range(1, src.count + 1):
reproject(
source=rasterio.band(src, i),
destination=rasterio.band(dst, i),
src_transform=src.transform,
src_crs=src.crs,
dst_transform=transform,
dst_crs=dst_crs,
resampling=Resampling.nearest)
projImage = rasterio.open(newRaster)
print("Original raster reprojected")
else:
projImage = rasterio.open(unprocessedRaster)
fontprops = fm.FontProperties(size=18)
fig, ax = plt.subplots(1, 2, figsize = (20,20))
rasterio.plot.show(projImage, ax=ax[0])
clippedFeats.plot(ax = ax[0], color = 'white', alpha = .25)
rasterio.plot.show(projImage, ax=ax[1])
clippedFeats.plot(ax = ax[1], color = 'black')
archfeats.plot(ax = ax[1], color='orange', alpha = .8)
detectedFeats.plot(ax = ax[1], color='fuchsia')
#Add AnchoredSizeBar artist as scalebar
#https://stackoverflow.com/questions/39786714/how-to-insert-scale-bar-in-a-map-in-matplotlib
#Because projection is in meters, pixel size is also in meters
scalebar = AnchoredSizeBar(ax[0].transData,
50, '50 m', 'lower right',
pad=0.1,
color='white',
frameon=False,
size_vertical=1,
fontproperties=fontprops)
ax[0].add_artist(scalebar)
ax[0].set_axis_off()
ax[1].set_axis_off()
#Set custom legend
#See Documentation:
#https://matplotlib.org/2.0.2/users/legend_guide.html
autofeats = mpatches.Patch(color='orange', label='Automatically Detected from Image')
black_line = mlines.Line2D([], [], color='black', label='Manually Mapped Features')
yell_line = mlines.Line2D([], [], color='fuchsia', label='Successfully Captured')
plt.legend(handles=[autofeats, black_line, yell_line],
loc = 'lower center',
bbox_to_anchor=(.73, .32),
bbox_transform=fig.transFigure, ncol=3)
ax[0].set_title("Original Image with Features Highlighted", fontsize = 22)
ax[1].set_title("Automatic Detection - " + str(percentSuccess) + "% of Feature Length Detected", fontsize = 22)
plt.tight_layout()
plt.savefig(final_img_FP + "finalDetection.png", format = 'png')
Once the functions are defined, the tool can be run using four lines of code. showImageProcess()
is optional but, when run, will display and save an image showing the image processing steps for the input raster.
#Image processing
#Image processing is included in the detectFeats function
#Here, the standalone image procesing function is run to display an example of the process results before detection
showImageProcess(unprocessedRaster, blurValue, denoiseSigma, imageProcessFP)
array([[0.0010234 , 0.00121824, 0.00100072, ..., 0.00171207, 0.00171631, 0.00171844], [0.00101694, 0.00121064, 0.00099933, ..., 0.00255935, 0.00256567, 0.00256886], [0.00100596, 0.00119771, 0.00099678, ..., 0.00338498, 0.0033933 , 0.00339749], ..., [0.00768267, 0.00767426, 0.00765845, ..., 0.00272158, 0.00259343, 0.00230446], [0.00587602, 0.00586955, 0.00588355, ..., 0.00301353, 0.00312208, 0.00306531], [0.00395019, 0.00446678, 0.00585088, ..., 0.00311852, 0.00400908, 0.0035413 ]])