%matplotlib inline
from decimal import Decimal
#Building geodataframes
import geopandas as gpd
#Creating shapes
from shapely.geometry import shape, box
#GPD dependency for spatial analysis
import rtree
#Plotting maps
import matplotlib.pyplot as plt
#For manual legends and scalebar
import matplotlib.patches as mpatches
import matplotlib.lines as mlines
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import matplotlib.font_manager as fm
#Read in and save new rasters, converting raster to vector, reproject rasters
import rasterio
from rasterio import features
from rasterio.plot import reshape_as_image
from rasterio.warp import calculate_default_transform, reproject, Resampling
#For calculating stretch numbers for contrast adjustment
import numpy as np
#Gaussian Filter
from scipy import ndimage as ndi
#Import functions needed for image processing
from skimage import io, feature, exposure, img_as_float
from skimage.filters import sato, threshold_otsu
from skimage.color import rgb2gray
from skimage.restoration import denoise_bilateral
from skimage.morphology import remove_small_holes
#Import clustering function for unsupervised classification
from sklearn.cluster import KMeans
#Copy and manage files
import shutil
#Set and access directories
import os


#Set all variables - these are the function parameters. See README.txt for more information on parameters.

#Unprocessed geotiff of archaeological features
unprocessedRaster = 'TestRasters\\Safford_IMG1.tif'
#How much Gaussian blur during image processing?
blurValue = 30
#What sigma value do you want to use for bilateral denoising?
denoiseSigma = 0.18
#Where will the figure of the image processing steps be saved? (optional)
imageProcessFP = "FinalMaps\\"
#How many clusters for kmeans? (2 recommended)
setClusters = 2
#Where should the clustered rasters be saved (note that filepath is defined as raw string)?
clusterFP = r'ResultRasters\\'
#At what threshold do you want to remove noise from the binary image?
removeThresh = 1500
#Filepath for shapefile of manually mapped archaeological features in vicinity of test raster
manualFeatures = 'FeatureShapefiles\\Safford.shp'
#Filepath for binary raster (before shape conversion)
binaryRasterFP = 'ResultRasters\\'
#EPSG of a projected coordinate system in meters for final mapping
UTM_EPSG = "EPSG:32612"
#Where should the final map result be saved?
final_img_FP = 'FinalMaps\\'
#Where will the detected shapefiles be saved?
outSHP = 'DetectedShapefiles\\'


def importRaster(rasterFilepath):
    """Takes raster (as filepath of .tif) and reads all bands,
    returns reshaped numpy array into image for processing in scikit-image.
    See rasterio documentation: https://rasterio.readthedocs.io/en/latest/topics/image_processing.html"""
    img = rasterio.open(rasterFilepath).read()
    reshaped = reshape_as_image(img)
    return reshaped


def processImage(original, blurValue, denoiseSigma):
    """Processes reshaped raster
    Original is a numpy array of your raster image
    blurValue is the value for the Gaussian blur
    denoiseSigma is the value for bilateral denoise
    """
    #Convert to grayscale
    try:
        grayscale = rgb2gray(original)
    except Exception as e:
        print(e)
        return
    #Contrast enhancement
    #Code reference:
    #https://scikit-image.org/docs/dev/user_guide/transforming_image_data.html?highlight=contrast#contrast-and-exposure
    v_min, v_max = np.percentile(grayscale, (0.2,99.8))
    if blurValue == int or float:
        hi_contrast = exposure.rescale_intensity(grayscale, in_range=(v_min, v_max))
        contrast_blur = ndi.gaussian_filter(hi_contrast, blurValue)
    else: 
        print("blurValue must be a number!")
        return
    #Bilateral denoising
    #https://scikit-image.org/docs/stable/api/skimage.restoration.html#skimage.restoration.denoise_bilateral
    #https://scikit-image.org/docs/stable/auto_examples/filters/plot_denoise.html#bilateral-filter
    #Bilateral denoising used because it is "edge-preserving"
    #A larger sigma_color value "results in averaging of pixels with larger radiometric differences"
    denoised = denoise_bilateral(contrast_blur, sigma_color = denoiseSigma)
    #Sato filtering to detect ridges
    #Line-enhancement filter originally developed for detecting "curvilinear structures" like blood vessels in med. images
    #See Sato et al. 1997
    #See also discussion in scikit-image documentation:
    #"ridge filters...detect ridge structures where the intensity changes perpendicular but not along the structure."
    #https://scikit-image.org/docs/stable/auto_examples/edges/plot_ridge_filter.html
    sato_blur = sato(denoised)
    return sato_blur


def reshapeKmeans(processedImg):
    """Reshapes numpy array after processing to 1D array in processImage().
    Necessary for unsupervised classification.
    Code reference: https://towardsdatascience.com/sentinel-2-image-clustering-in-python-58f7f2c8a7f6"""
    kmeans_reshape = processedImg.reshape(processedImg.shape[0] * processedImg.shape[1], 1)
    return kmeans_reshape


#Not necessary for analysis process
#This function is the same as above, but also exports a figure showing the image process steps
def showImageProcess(original, blurValue, denoiseSigma, imageProcessFP):
    """Alternate function to process reshaped raster
    As with processImage above:
    Original is a numpy array of your raster image
    blurValue is the value for the Gaussian blur
    denoiseSigma is the value for bilateral denoise
    In addition, this function exports a figure showing the process results
    """
    original = importRaster(original)
    #Convert to grayscale
    try:
        grayscale = rgb2gray(original)
    except Exception as e:
        print(e)
        return
    #Contrast enhancement
    #Code reference:
    #https://scikit-image.org/docs/dev/user_guide/transforming_image_data.html?highlight=contrast#contrast-and-exposure
    v_min, v_max = np.percentile(grayscale, (0.2,99.8))
    if blurValue == int or float:
        hi_contrast = exposure.rescale_intensity(grayscale, in_range=(v_min, v_max))
        contrast_blur = ndi.gaussian_filter(hi_contrast, blurValue)
    else: 
        print("blurValue must be a number!")
        return
    #Bilateral denoising
    #https://scikit-image.org/docs/stable/api/skimage.restoration.html#skimage.restoration.denoise_bilateral
    #https://scikit-image.org/docs/stable/auto_examples/filters/plot_denoise.html#bilateral-filter
    #Bilateral denoising used because it is "edge-preserving"
    #A larger sigma_color value "results in averaging of pixels with larger radiometric differences"
    denoised = denoise_bilateral(contrast_blur, sigma_color = denoiseSigma)
    #Sato filtering to detect ridges
    #Line-enhancement filter originally developed for detecting "curvilinear structures" like blood vessels in med. images
    #See Sato et al. 1997
    #See also discussion in scikit-image documentation:
    #"ridge filters...detect ridge structures where the intensity changes perpendicular but not along the structure."
    #https://scikit-image.org/docs/stable/auto_examples/edges/plot_ridge_filter.html
    sato_blur = sato(denoised)
    fig, axes = plt.subplots(2, 3, figsize=(80, 40))
    ax = axes.ravel()
    axes[0,0].imshow(original, cmap=plt.cm.gray)
    axes[0,0].set_title('Original image', fontsize = 80)
    axes[0,0].axis('off')
    axes[0,1].imshow(grayscale, cmap=plt.cm.gray)
    axes[0,1].set_title('Grayscale', fontsize = 80)
    axes[0,1].axis('off')
    axes[0,2].imshow(hi_contrast, cmap=plt.cm.gray)
    axes[0,2].set_title('Contrast Adjustment', fontsize = 80)
    axes[0,2].axis('off')
    axes[1,0].imshow(contrast_blur, cmap=plt.cm.gray)
    axes[1,0].set_title('Gaussian Blur', fontsize = 80)
    axes[1,0].axis('off')
    axes[1,1].imshow(denoised, cmap=plt.cm.gray)
    axes[1,1].set_title('Bilateral Denoising', fontsize = 80)
    axes[1,1].axis('off')
    axes[1,2].imshow(sato_blur, cmap=plt.cm.gray)
    axes[1,2].set_title('Sato Filter', fontsize = 80)
    axes[1,2].axis('off')
    plt.savefig(imageProcessFP + "imageProcess.png", format = 'png')
    plt.tight_layout()
    return sato_blur


def detectFeats(setClusters, unprocessedRaster, clusterFP):
    """Applies Kmeans to processed raster.
    setClusters = number of clusters for final classification
    unprocessed raster = filepath of original geotiff
    clusterFP = Folder to save clustered rasters"""
    toProcess = importRaster(unprocessedRaster)
    print("Raster import complete")
    filter_blur = processImage(toProcess, blurValue, denoiseSigma)
    print("Image processing complete")
    kmeans = KMeans(n_clusters=setClusters).fit(reshapeKmeans(filter_blur))
    print("Processed image reshape complete")
    #Kmeans Clustering
    #https://scikit-learn.org/stable/modules/clustering.html#k-means
    #K means is a form of unsupervised classification
    #Separates image by n clusters depending on pixel values
    #Clustering script adapted from
    #https://medium.com/@sidakmenyadik/k-means-clustering-for-the-image-with-scikit-image-mri-scan-python-part-1-c2d2a319276
    clustered = kmeans.cluster_centers_[kmeans.labels_]
    labels = kmeans.labels_
    ogRaster = rasterio.open(unprocessedRaster)
    transform = ogRaster.transform
    print("Kmeans parameters specified")
    for n in range(setClusters):
        image_cluster = []
        for i in range(len(labels)):
            if(labels[i]) == n:
                image_cluster.append(float(clustered[i]))
            else:
                image_cluster.append(1)
        reshape_clustered = np.array(image_cluster).reshape(filter_blur.shape)
        #Save each kmeans result as a georeferenced geotiff using the transform from the original raster
        new_tif = rasterio.open(clusterFP + "clustered" + str(n) + '.tif', 'w', driver = 'GTiff',
                               height = reshape_clustered.shape[0], width = reshape_clustered.shape[1],
                               count = 3, dtype = reshape_clustered.dtype,
                               crs = '+proj=latlong', transform = transform)
        new_tif.write(reshape_clustered, 1)
        print(clusterFP + "clustered" + str(n) + '.tif' + " saved!")
        new_tif.close()
    imgList = []
    clusterList = []
    clusterpath = clusterFP[0:]
    print("Searching " + clusterpath + "...")
    #Find all tifs in filepath
    for tif in os.listdir(clusterpath):
        #Read in tifs and reshape
        tifname = clusterpath + tif
        image = reshape_as_image((rasterio.open(tifname)).read())
        #Add the filepath of each image to imgList
        imgList.append(str(clusterpath + tif))
        #Find average of numpy array
        #Appent to clusterList
        clusterList.append(np.average(image))
    maxIndex = clusterList.index(max(clusterList))
    #Return image with the highest average
    print("Image chosen")
    return rasterio.open(imgList[maxIndex])


def cleanNoise(kmeansraster, binaryRasterFP, removeThresh):
    """kmeansraster = classified raster of archaeological features
    removeThresh = threshold (size for small hole removal)"""
    raster = reshape_as_image(kmeansraster.read())
    gray = rgb2gray(raster)
    #Otsu threshold algorithm to create binary image
    #Script:
    #https://scikit-image.org/docs/dev/auto_examples/segmentation/plot_thresholding.html
    #We need to convert this raster to a binary image later to remove small objects
    #We want the features to be black for this step and for the masking to vectorize
    #Therefore, we're selecting the raster with the higher average value
    #non-feature area has higher value in the array
    thresh = threshold_otsu(gray)
    binaryThresh = gray > thresh
    arr = binaryThresh > 0
    #Remove small objects to remove noise from final result
    #Documentation:
    #https://scikit-image.org/docs/dev/api/skimage.morphology.html#skimage.morphology.remove_small_objects
    soRemoved = remove_small_holes(arr, area_threshold = removeThresh)
    #Rasterio can't write bool datatype rasters (i.e. binary rasters like this one) by default
    #passing nbits = 1 (from GDAL, which rasterio is built on) tells rasterio that this is a binary datatype
    #See https://gis.stackexchange.com/questions/338410/rasterio-invalid-dtype-bool
    profile = kmeansraster.profile
    with rasterio.open(binaryRasterFP + 'finalFeatures.tif', 'w', nbits = 1, **profile) as dst:
        dst.write(soRemoved, 1)
    print(binaryRasterFP + "finalFeatures.tif saved")
    return binaryRasterFP + 'finalFeatures.tif'


def vectorize(rasterFP, unprocessedRaster, outSHP):
    """With rasterFP (processed binary image), extract shapes via masking
    Save results in same CRS as unprocessedRaster (the Geotiff input)"""
    #Vectorize Raster
    #See Rasterio documentation https://rasterio.readthedocs.io/en/latest/topics/features.html
    transform = rasterio.open(unprocessedRaster).transform
    with rasterio.open(rasterFP) as src:
        black = np.float32(src.read())
    #The mask for this raster will be Band 1 values < 255 (everything that is not a feature)
    mask = black < 255
    print("Mask created")
    #Use feature.shapes to extract masks
    #Returns GeoJSON-like dictionary
    shapeDict = features.shapes(black, mask=mask, transform = transform)
    print("Features in dictionary")
    #Convert from the GeoJSON-like objects to a geodataframe
    #Reference: https://gis.stackexchange.com/questions/379412/how-to-create-a-geopandas-geodataframe-from-rasterio-features
    feats = []
    geoms = []
    #Build shapely polygons, append geometry to lists
    for key, value in shapeDict:
        feats.append(value)
        geoms.append(shape(key))
    ogRaster = rasterio.open(unprocessedRaster)
    #set CRS to original raster's CRS        
    crs = ogRaster.crs
    #Pass features and geometry lists to new geodataframe with original raster's CRS
    features_gdf = gpd.GeoDataFrame({'feats': feats, 'geometry': geoms }, crs = crs)
    print("Initial GDF built")
    #Select only features == 0 (the archaeological features themselves)
    archfeats = features_gdf[features_gdf.feats==0]
    print("GeoDataFrame cleaned")
    #Save features as shapefile
    archfeats.to_file(outSHP + "DetectedFeatures.shp")
    print("DetectedFeatures.shp saved")
    return archfeats


def assessAccuracy(manualFeatures, archFeats, unprocessedRaster, UTM_EPSG, final_img_FP):
    """manualFeatures = shapefile of previously mapped features for testing
    archFeats = detected features
    unprocessedRaster = original raster
    UTM_EPSG = EPSG coordinates of a system in meters, in format 'EPSG:####''
    final_img_FP = filepath for final map"""
    #Read in shapefile of mapped features to test model accuracy
    featsToClip = gpd.GeoDataFrame.from_file(manualFeatures)
    print("Manual features read in")
    #Turn original raster's bounding box into a geodataframe
    #https://gis.stackexchange.com/questions/352445/make-shapefile-from-raster-bounds-in-python
    OGRaster = rasterio.open(unprocessedRaster)
    bounds = OGRaster.bounds
    geoms = box(*bounds)
    boundingBox = gpd.GeoDataFrame({"id":1,"geometry":[geoms]})
    boundingBox = boundingBox.set_crs(crs = featsToClip.crs)
    print("BoundingBox calculated")
    #Clip features to bounding box of original raster
    if featsToClip.crs == boundingBox.crs:
        clippedFeats = gpd.clip(featsToClip, boundingBox)
        clippedFeats = clippedFeats[~clippedFeats.is_empty]
    else:
        print("Not in same CRS!", featsToClip.crs, boundingBox.crs)
        return
    clippedFeats = clippedFeats.to_crs(UTM_EPSG)
    print("Clipped Features clipped, projected")
    lengthsGeo = []
    for feats in clippedFeats['geometry']:
        lengthsGeo.append(feats.length)
    lenLin = sum(lengthsGeo)
    #Set CRS of archFeats == CRS of clippedFeats (UTM)
    archfeats = archFeats.to_crs(clippedFeats.crs)
    print("Detected Features projected")
    #Clip clippedFeats by polygon file of shapes captured by auto detection = detectedFeats
    if clippedFeats.crs == archfeats.crs:
        detectedFeats = gpd.clip(clippedFeats, archfeats)
        detectedFeats = detectedFeats[~detectedFeats.is_empty]
    else:
        print("Not in same CRS!", featsToClip.crs, boundingBox.crs)
        return
    #Calculate length of all features in new clipped file = lenDetected
    lengthsDet = []
    for feats in detectedFeats['geometry']:
        lengthsDet.append(feats.length)
    lenDetected = sum(lengthsDet)
    #lenDetected/lenLine * 100 = percentSuccess
    #percentSuccess = Decimal(lenDetected/lenLin * 100).quantize(Decimal("1.00"))
    percentSuccess = Decimal(lenDetected/lenLin * 100).quantize(Decimal("1.00"))
    print("Model detected approximately " + str(percentSuccess) + "% of all features")
    #Export final map
    #Using original raster as basemap, so has to be reprojected to the same UTM CRS
    dst_crs = clippedFeats.crs
    newRaster = os.path.dirname(unprocessedRaster) + "\\TestRaster_Proj.tif"
    #Copy raster to preserve CRS of original raster
    shutil.copy(unprocessedRaster, newRaster)
    print("Original raster copied")
    if OGRaster.crs != clippedFeats.crs:
        #Script reference:
        #https://stackoverflow.com/questions/60288953/how-to-change-the-crs-of-a-raster-with-rasterio
        with rasterio.open(unprocessedRaster) as src:
            transform, width, height = calculate_default_transform(src.crs, dst_crs, src.width, src.height, *src.bounds)
            kwargs = src.meta.copy()
            kwargs.update({
                'crs': dst_crs,
                'transform': transform,
                'width': width,
                'height': height
            })
           
            with rasterio.open(newRaster, 'w', **kwargs) as dst:
                for i in range(1, src.count + 1):
                    reproject(
                        source=rasterio.band(src, i),
                        destination=rasterio.band(dst, i),
                        src_transform=src.transform,
                        src_crs=src.crs,
                        dst_transform=transform,
                        dst_crs=dst_crs,
                        resampling=Resampling.nearest)
        projImage = rasterio.open(newRaster)
        print("Original raster reprojected")
    else:
        projImage = rasterio.open(unprocessedRaster)
    fontprops = fm.FontProperties(size=18)
    fig, ax = plt.subplots(1, 2, figsize = (20,20))
    rasterio.plot.show(projImage, ax=ax[0])
    clippedFeats.plot(ax = ax[0], color = 'white', alpha = .25)
    rasterio.plot.show(projImage, ax=ax[1])
    clippedFeats.plot(ax = ax[1], color = 'black')
    archfeats.plot(ax = ax[1], color='orange', alpha = .8)
    detectedFeats.plot(ax = ax[1], color='fuchsia')
    #Add AnchoredSizeBar artist as scalebar
    #https://stackoverflow.com/questions/39786714/how-to-insert-scale-bar-in-a-map-in-matplotlib
    #Because projection is in meters, pixel size is also in meters
    scalebar = AnchoredSizeBar(ax[0].transData,
                               50, '50 m', 'lower right', 
                               pad=0.1,
                               color='white',
                               frameon=False,
                               size_vertical=1,
                               fontproperties=fontprops)
    ax[0].add_artist(scalebar)
    ax[0].set_axis_off()
    ax[1].set_axis_off()
    #Set custom legend
    #See Documentation:
    #https://matplotlib.org/2.0.2/users/legend_guide.html
    autofeats = mpatches.Patch(color='orange', label='Automatically Detected from Image')
    black_line = mlines.Line2D([], [], color='black', label='Manually Mapped Features')
    yell_line = mlines.Line2D([], [], color='fuchsia', label='Successfully Captured')
    plt.legend(handles=[autofeats, black_line, yell_line],
              loc = 'lower center',
               bbox_to_anchor=(.73, .32),
              bbox_transform=fig.transFigure, ncol=3)
    ax[0].set_title("Original Image with Features Highlighted", fontsize = 22)
    ax[1].set_title("Automatic Detection - " + str(percentSuccess) + "% of Feature Length Detected", fontsize = 22)
    plt.tight_layout()
    plt.savefig(final_img_FP + "finalDetection.png", format = 'png')


#Image processing
#Image processing is included in the detectFeats function
#Here, the standalone image procesing function is run to display an example of the process results before detection
showImageProcess(unprocessedRaster, blurValue, denoiseSigma, imageProcessFP)

array([[0.0010234 , 0.00121824, 0.00100072, ..., 0.00171207, 0.00171631,
        0.00171844],
       [0.00101694, 0.00121064, 0.00099933, ..., 0.00255935, 0.00256567,
        0.00256886],
       [0.00100596, 0.00119771, 0.00099678, ..., 0.00338498, 0.0033933 ,
        0.00339749],
       ...,
       [0.00768267, 0.00767426, 0.00765845, ..., 0.00272158, 0.00259343,
        0.00230446],
       [0.00587602, 0.00586955, 0.00588355, ..., 0.00301353, 0.00312208,
        0.00306531],
       [0.00395019, 0.00446678, 0.00585088, ..., 0.00311852, 0.00400908,
        0.0035413 ]])


#Detect features
#Use clusters, raster path, and filepath for final cluster raster from variables set at beginning
detectedFeatures = detectFeats(setClusters, unprocessedRaster, clusterFP)

Raster import complete
Image processing complete
Processed image reshape complete
Kmeans parameters specified
ResultRasters\\clustered0.tif saved!
ResultRasters\\clustered1.tif saved!
Searching ResultRasters\\...
Image chosen


#Remove small "holes" i.e. small objects from final
#Use detectedFeatures raster as input, filepath for binary raster, and threshold from variables set at beginning
cleaned = cleanNoise(detectedFeatures, binaryRasterFP, removeThresh)

ResultRasters\finalFeatures.tif saved


#Vectorize raster created above
#input of raster created above, original unprocessedRaster (for crs info and profile), and output filepath from above vars
archFeats = vectorize(cleaned, unprocessedRaster, outSHP)

Mask created
Features in dictionary
Initial GDF built
GeoDataFrame cleaned

C:\Users\kathe\miniconda3\envs\528_py38\lib\site-packages\geopandas\io\file.py:362: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
  pd.Int64Index,

DetectedFeatures.shp saved


#Assess accuracy and visualize data
#Input shapefile of features mapped manually, vectorized features (above), desired projection in meters, and map filepath
assessAccuracy(manualFeatures, archFeats, unprocessedRaster, UTM_EPSG, final_img_FP)

Manual features read in
BoundingBox calculated
Clipped Features clipped, projected
Detected Features projected
Model detected approximately 61.69% of all features
Original raster copied
Original raster reprojected

Home

Automating Archaeological Feature Detection: Unsupervised Classification and Feature Extraction from Satellite Imagery¶

Import Modules¶

Set Variables¶

Defining functions¶

Analysis Demo¶