folium_choropleth_map_functions.py

folium_choropleth_map_functions.py#

# Functions for generating Folium-based choropleth maps

import folium
import geopandas
import numpy as np
import time
import os
from branca.utilities import color_brewer
# color_brewer source code: 
# https://github.com/python-visualization/branca/blob/main/branca/
# utilities.py

from selenium import webdriver

import branca.colormap as cm
# From https://python-visualization.github.io/folium/latest/
# advanced_guide/colormaps.html#StepColormap
from branca.colormap import StepColormap # From Folium's features.py
# source code: 
# https://github.com/python-visualization/folium/blob/main/
# folium/features.py

def cptt(
    starting_lat, starting_lon, gdf, 
    data_col, boundary_name_col,
    data_col_alias, boundary_name_alias,
    zoom_start=6, 
    bin_type='linear', 
    font_size='16px',
    bin_count=6, custom_threshold_list=[], 
    color_scheme='RdYlBu',
    tooltip_variable_list=[], tooltip_alias_list=[],
    save_html=True, save_screenshot=True,
    driver_window_width=3000,
    driver_window_height=1688,
    map_filename='map', html_map_folder='',
    png_map_folder='',
    geometry_col='geometry',
    tiles='OpenStreetMap', choropleth_opacity=0.6,
    add_boundary_labels=False, boundary_label_lon_shift=10,
    boundary_label_lat_shift=10, boundary_label_col='',
    round_boundary_labels=False,
    boundary_label_round_val=0,
    delete_html_file=False): 
    
    '''This function creates a chorolpeth map with a set of tooltips
    via folium.GeoJson(). (cptt stands for 'chorolpeth and tooltip.' 
    Creating these two items together saves
    storage space versus building them individually.
    The function also adds data labels to regions upon request.

    Note: Much of this function is based on the sample code found at 
    https://python-visualization.github.io/folium/latest/user_guide/
    geojson/geojson_popup_and_tooltip.html
    and https://python-visualization.github.io/folium/latest/user_guide/
    geojson/geojson.html .
    
    starting_lat and starting_lon: the initial latitude and
    longitude, respectively, to pass to folium.Map(). For maps
    of the contiguous 48 US states, consider using a starting latitude 
    of 38 and a starting longitude of -95.
    
    gdf: A GeoDataFrame containing both boundary outlines and statistical
    data to visualize. Note that, in order to reduce the size of the 
    resulting map, gdf will be condensed to include only those columns 
    necessary for creating the map and adding in tooltips.

    data_col: the column within gdf containing data to visualize.
    
    boundary_name_col: the column within gdf that displays names of
    the boundaries being visualized.

    data_col_alias and boundary_name_alias: The labels to use for
    data_col and county_boundary_name_col, respectively, within the 
    map's tooltips.

    zoom_start: The starting zoom setting to pass to folium.Map().

    bin_type: Set to 'linear' (the default argument) to create 
    equally spaced bins; 'percentile' to base choropleth colors on 
    percentiles (resulting in roughly equal numbers of results per bin);
    or 'custom' to pass in a list of custom bins. (The custom option can 
    be particularly useful when you wish to use the same set of bins for 
    multiple maps.)

    font_size: The size to use for data labels and colorbar fonts. Note 
    that increasing this size may cause colorbar labels to get cut off.

    bin_count: The number of separate colors to show within the map. This
    parameter will be ignored when bin_type is set to 'custom'.

    custom_threshold_list: A list of custom bin ranges to use for the map.
    Will only get applied when bin_type is set to 'custom'.
        
    color_scheme: The color scheme to use within the map (e.g. 'RdYlBu'). 
    Options can be found on https://colorbrewer2.org/ . 
    In order to reverse
    a scheme, add '_r' to the end (e.g. 'RdYlBu_r'). Source:
    https://github.com/python-visualization/branca/blob/main/branca/
    utilities.py

    tooltip_variable_list: A list of variables 
    (other than boundary_name_col
    and data_col, which will get added in automatically) 
    to display within the tooltip.

    tooltip_alias_list: The aliases to use for the variables within
    tooltip_variable_list.

    save_html: set to True to save this map as an HTML file.
    
    save_screenshot: set to True in order to generate a screenshot of 
    the map, then save it as a PNG file. In order for this screenshot 
    to get created, save_html must also be set to True.

    driver_window_width and driver_window_height: the default width and 
    height, respectively, of the Selenium driver window that will be 
    called to generate a screenshot. The default settings work well 
    for maps of the contiguous United States.

    map_filename: The name to use when saving the map. The script will add
    the correct extension (e.g. 'html') to this map, so leave that portion
    out of the argument.

    html_map_folder and png_map_folder: The folders 
    in which to store HTML and PNG versions of maps, respectively. 
    Note that, if you're generating screenshots, html_map_folder needs 
    to be an absolute path (so that it can get interpreted correctly by 
    the Selenium-driven browser). For instance, if your html_map_folder 
    is titled 'maps' and stored within your directory, pass 
    os.getcwd()+'/maps' as your html_map_folder argument.

    geometry_col: The column in gdf that stores shape boundary data.

    Tiles: the tile provider to use for your map. Note that different tile 
    services use different licenses for their tiles. If you don't want 
    to use any tiles, enter None (no quotes) as your argument 
    for this parameter.

    choropleth_opacity: a float, ranging from 0 to 1, that determines
    how opaque to make the colors within the choropleth map. If tiles 
    is set to None, consider setting choropleth_opacity to 1.

    add_boundary_labels: Set to True to label each boundary within 
    the map.
    
    boundary_label_lon_shift and boundary_label_lat_shift: Integers that 
    specify how far west and north to shift boundary labels so that they 
    appear more centered. (These values will get passed to the 
    icon_anchor parameter of the DivIcons.) Increases to the 
    font_size argument should likely be followed by increases to 
    boundary_label_lon_shift and_or boundary_label_lat_shift.

    boundary_label_col: The column to use as a source for boundary labels.
    You may choose to set this to be the same as data_col or boundary_name
    col, but you're not limited to those options.

    round_boundary_labels: set to True to round boundary labels by
    boundary_label_round_val (see below).

    boundary_label_round_val: An integer specifying the amount by which to 
    round the boundary labels. Set to 0 to round to whole numbers, to 1 to
    round to tenths, and so forth.

    delete_html_file: set to True in order to delete the HTML file saved 
    by this function. (This can be useful when you are only calling this 
    function only to create a screenshot of a map. Once the screenshot 
    has been saved, the original HTML file would no longer need to be 
    retained.) Note that, if save_html is set to False, the program 
    will *not* attempt to delete the HTML file, as it's likely that one 
    doesn't exist to begin with.
    '''


    # Creating a condensed version of gdf that can serve as the basis for 
    # the map: (Condensing the DataFrame prevents unnecessary columns
    # from getting included in the HTML output and thus increasing
    # the map's file size.)

    gdf_condensed = gdf.copy()[
    [boundary_name_col, data_col, geometry_col] + tooltip_variable_list]

    # You can uncomment the following two print statements to compare
    # the sizes of gdf and gdf_condensed.
    
    # print(gdf.memory_usage(deep=True).sum() / 1000000)
    # See https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.
    # memory_usage.html
    
    # print(gdf_condensed.memory_usage(deep=True).sum() / 1000000)

    # Removing any NaN data_col entries from our dataset so that they
    # won't interfere with our mapping code:
    # {data_col} is surrounded by ` characters to make this code 
    # compatible with column names that contain spaces.
    gdf_condensed.query(f"`{data_col}`.isna() == False", inplace=True)

    # Creating a blank map as the starting point for our choropleth:
    m = folium.Map([starting_lat, starting_lon], 
                   zoom_start=zoom_start, tiles=tiles,
                  zoom_control=False, font_size=font_size)
    # For more on the zoom_control variable, see
    # https://github.com/python-visualization/folium/blob/main/docs/user_guide/map.md .
    # This variable is set to False by default so that it won't show up 
    # within screenshots of maps.
   
    # Adding labels to each boundary:
    # (Note: this code will work better for larger shapes, such as US 
    # states, and likely less well for smaller boundaries like counties 
    # or zip codes.)
    # Adding these labels prior to creating our tooltips ensures that
    # the former won't block the latter when the user is interacting with
    # the map.

    # Calculating reference points for adding labels to each boundary:
    
    # The following code applies GeoPandas' representative_point() method
    # to calculate central points for each boundary that always lie within
    # that boundary. It then uses a list comprehension to create lists of
    # x and y coordinates that can be fed into folium mapping code.
    # (The POINT items returned by representative_point() don't appear
    # to work as well with Folium.)

    if add_boundary_labels == True:
        gdf_condensed['boundary_label_reference_points'] = [
            [coord.y, coord.x] for coord in 
         gdf_condensed['geometry'].representative_point()]
        
        # Documentation for representative_point(): 
        # https://geopandas.org/en/stable/docs/reference/api/geopandas.
        # GeoSeries.representative_point.html#geopandas.GeoSeries.
        # representative_point
        # x and y are attributes of Geoseries objects:
        # https://geopandas.org/en/stable/docs/reference/geoseries.html
    
        # Using these reference points to add the values stored
        # in boundary_label_col as boundary labels:
        for i in range(len(gdf_condensed)):
            boundary_label = gdf_condensed.iloc[i][
            boundary_label_col].copy()
            if round_boundary_labels == True:
                boundary_label = boundary_label.round(
                    boundary_label_round_val)
            folium.Marker(location=gdf_condensed.iloc[i][
                          'boundary_label_reference_points'],
                         icon=folium.features.DivIcon(
                             f"<b>{boundary_label}</b>",
                         icon_anchor=(boundary_label_lon_shift, 
                                        boundary_label_lat_shift))
                         ).add_to(m)
        # Part of the folium.Marker() call above is based on an example 
        # by StackOverflow user 'r-beginners',
        # who pointed out that you could pass a DivIcon to the 'icon' 
        # parameter 
        # within a Marker in order to add text labels to shapes. 
        # Source: https://stackoverflow.com/a/72588910/13097194
        
        # The use of icon_anchor to adjust the labels' locations
        # comes from the DivIcon documentation at
        # https://python-visualization.github.io/folium/latest/
        # reference.html#folium.features.DivIcon .
    
    # Creating the tooltips:

    tooltip_field_list = [
        boundary_name_col, data_col] + tooltip_variable_list

    alias_list = [boundary_name_alias, 
                  data_col_alias] + tooltip_alias_list
    
    # print(tooltip_field_list, alias_list)
    
    tooltip = folium.GeoJsonTooltip(
        fields= tooltip_field_list,
        aliases=alias_list,
        localize=True,
        sticky=False,
        labels=True,
        style="""
            background-color: #FFFFFF;
            border: 1px;
            border-radius: 1px;
            box-shadow: 1px;
        """,
        max_width=800
    )
    
    # Creating a set of colors to use for the choropleth map:
    if bin_type == 'custom': # In this case, bin_count will be overwritten
        # by the length of custom_threshold_list minus 1. (The number of
        # bins will always be one less than the number of thresholds, as 
        # two thresholds are needed to establish the boundaries for 
        # one bin.
        # For instance, if you have three thresholds (0, 1, and 2),
        # two bins can be created from this list: 0 to 1 and 1 to 2.)
        
        bin_count = len(custom_threshold_list) - 1
    color_range = color_brewer(color_scheme, n=bin_count)
    
    # Based on Choropleth() definition within
    # https://github.com/python-visualization/folium/blob/main/folium/
    # features.py
    
    # To reverse the set of colors passed to color_scheme, add '_r' 
    # to the end of the string (e.g. 'RdYlBu_r'). Source: 
    # https://github.com/python-visualization/branca/blob/main/branca
    # /utilities.py

    # Determining which colors to apply to each result:
    
    if bin_type == 'linear': # Equally-spaced bins will be used. 
        # The number of bins will be derived from the number of 
        # colors in color_range.
        stepped_cm = StepColormap(
            colors=color_range, 
            vmin=gdf_condensed[data_col].min(),
            vmax=gdf_condensed[data_col].max())
        # Based on: 
        # https://python-visualization.github.io/branca/colormap.html#
        # branca.colormap.StepColormap

    else: # In this case, a different approach to creating the 
        # StepColorMap
        # will be used that better accommodates non-equally-spaced bins.
        
        if bin_type == 'percentile': # In this case, 
            # percentile-based bins will
            # be used.
            bin_thresholds = list(gdf_condensed[data_col].quantile(
            np.linspace(0, 1, bin_count+1))) 
            # For np.linspace() documentation, see:
            # https://numpy.org/doc/stable/reference/generated/
            # numpy.linspace.html
    
        elif bin_type == 'custom': # This condition allows for a set of 
            # custom bins to be passed in.
            bin_thresholds = custom_threshold_list.copy()

        else:
            raise ValueError("bin_type must be set to 'linear', \
'percentile, or 'custom'.")
        
        # The following approach works for both 'percentile' and 
        # 'custom' bin_type conditions.
        stepped_cm = StepColormap(
            colors=color_range, 
            vmin=bin_thresholds[0], vmax=bin_thresholds[-1],
            index=bin_thresholds)
        # This code is based on the self.color_scale initialization 
        # within Folium's Choropleth() source code (available at
        # https://github.com/python-visualization/folium/blob/main/
        # folium/features.py )
    
    # The following code will both assign colors from StepColorMap
    # to each region *and* add in tooltips. This approach allows the
    # colors and tooltips to reference the same set of outlines,
    # thus allowing for a smaller file size.
    
    g = folium.GeoJson(
        gdf_condensed,
        style_function=lambda x: {
            "fillColor": stepped_cm(
                x["properties"][data_col]),
            "fillOpacity": choropleth_opacity,
            "weight":1,
            "color":"black"
        },
        tooltip=tooltip
    ).add_to(m)
    # The Folium.GeoJSON overview at
    # https://python-visualization.github.io/folium/latest/
    # user_guide/geojson/geojson.html
    # contributed to this code as well.
    # Note that we need to add ["properties"] in between x and 
    # [data_col], likely because gdf_condensed
    # is being interpreted as a GeoJSON object. I based this off of the
    # "if "e" in feature["properties"]["name"].lower()" line within
    # the above link.

    # Adding the color legend for the choropleth to the map:
    stepped_cm.add_to(m)
    # Based on:
    # https://python-visualization.github.io/folium/latest/user_guide/
    # geojson/geojson.html

    if save_html == True:
        m.save(f"{html_map_folder}/{map_filename}.html")

    # Generating a screenshot of the map:
    if save_screenshot == True:
        print("Generating screenshot.")
        options = webdriver.ChromeOptions()
        # Source: https://www.selenium.dev/documentation/webdriver/
        # browsers/chrome/
        options.add_argument(f'--window-size={driver_window_width},\
{driver_window_height}') # I found that this window
        # size, along with a starting zoom of 6 within our mapping code,
        # created a relatively detailed map of the contiguous 48 US 
        # states. 
        # If you'd like to create an even more detailed map, 
        # consider setting your starting zoom to 7 and your window size 
        # to 6000,3375.
        options.add_argument('--headless') # In my experience, this 
        # addition  (which prevents the Selenium-driven browser from 
        # displaying on your computer) was necessary for allowing 4K 
        # screenshots to get saved
        # as 3840x2160-pixel images. Without this line, the 
        # screenshots would  get rendered with a resolution of 
        # 3814x1868 pixels.
        # Source of the above two lines:  
        # https://www.selenium.dev/documentation/webdriver/
        # browsers/chrome/
        # and
        # https://github.com/GoogleChrome/chrome-launcher/blob/
        # main/docs/chrome-flags-for-tools.md
        
        # I learned about the necessity of using headless mode 
        # *somewhere* on  StackOverflow. Many answers to the question
        # linked below regarding generating screenshots reference it 
        # as an important step, for instance.
        # https://stackoverflow.com/questions/41721734/take-screenshot
        # -of-full-page-with-selenium-python-with-chromedriver/57338909
        
        # Launching the Selenium driver:
        driver = webdriver.Chrome(options=options) 
        # Source: https://www.selenium.dev/documentation/webdriver/
        # browsers/chrome/
        
        # Navigating to our map:
        # Note: In order to get the following code to work within
        # Linux, I needed to precede the local path with 'file://' as 
        # noted by GitHub user lukeis here: 
        # https://github.com/seleniumhq/selenium-google-code-issue-
        # archive/issues/3997#issuecomment-192014472
        driver.get(f"file://{html_map_folder}/{map_filename}.html")
        # Source: https://www.selenium.dev/documentation/
        time.sleep(3) # Helps ensure that the browser has enough 
        # time to download
        # map contents from the tile provider. This time might need to be
        # increased if a slow internet connection is in use. Conversely,
        # if no tiles are being incorporated into the map, 
        # there may not be any need to call
        # time.sleep().
        # Taking our screenshot and then saving it as a PNG image:
        driver.get_screenshot_as_file(
            f"{png_map_folder}/{map_filename}.png")
        # Source: 
        # https://selenium-python.readthedocs.io/api.html#selenium.
        # webdriver.remote.webdriver.WebDriver.get_screenshot_as_file
        
        # Exiting out of the webdriver:
        driver.quit()
        # Source: https://www.selenium.dev/documentation/
    
    if (delete_html_file == True) & (save_html == True):
        os.remove(f"{html_map_folder}/{map_filename}.html")
        print("Removed HTML copy of map.")
    
    return m


def create_map_and_screenshot(
    starting_lat, starting_lon, gdf, 
    data_col, boundary_name_col,
    data_col_alias, boundary_name_alias,
    html_zoom_start=5,
    screenshot_zoom_start=6, 
    font_size='16px',
    screenshot_font_size='16px',
    bin_type='linear', bin_count=6, 
    custom_threshold_list=[],
    color_scheme='RdYlBu',
    tooltip_variable_list=[], tooltip_alias_list=[],
    map_filename='map', html_map_folder='',
    png_map_folder='',
    geometry_col='geometry',
    tiles='OpenStreetMap', choropleth_opacity=0.6,
    add_boundary_labels=False, boundary_label_lon_shift=10,
    boundary_label_lat_shift=10, boundary_label_col='',
    round_boundary_labels=False,
    boundary_label_round_val=0): 
    
    '''
    This function calls cptt() twice in order to create separate PNG
    and HTML versions of a map. This approach allows separate zoom levels
    to be passed to each map, which can prevent one or both maps from 
    displaying a non-ideal zoom level.
    
    html_zoom_start and screenshot_zoom_start: the zoom settings to use
    for the HTML and PNG maps, respectively.

    font_size and screenshot_font_size: the font size settings to use
    for the HTML and PNG maps, respectively.
    
    For information on other variables, consult the documentation within 
    cptt().
    '''

    # Creating an HTML map optimized for generating a screenshot;
    # creating the screenshot; and then deleting the HTML copy of the map
    # (as we only needed it in order to create the screenshot):
    cptt(
    starting_lat=starting_lat, 
        starting_lon=starting_lon, gdf=gdf, 
    data_col=data_col, boundary_name_col=boundary_name_col,
    data_col_alias=data_col_alias, 
        boundary_name_alias=boundary_name_alias,
    zoom_start=screenshot_zoom_start, 
    bin_type=bin_type, 
    bin_count=bin_count, 
    font_size=screenshot_font_size,
    custom_threshold_list=custom_threshold_list, 
    color_scheme=color_scheme,
    tooltip_variable_list=tooltip_variable_list, 
        tooltip_alias_list=tooltip_alias_list,
    save_html=True, save_screenshot=True,
    map_filename=map_filename, html_map_folder=html_map_folder,
    png_map_folder=png_map_folder,
    geometry_col=geometry_col,
    tiles=tiles, choropleth_opacity=choropleth_opacity,
    add_boundary_labels=add_boundary_labels, 
        boundary_label_lon_shift=boundary_label_lon_shift,
    boundary_label_lat_shift=boundary_label_lat_shift, 
        boundary_label_col=boundary_label_col,
    round_boundary_labels=round_boundary_labels,
    boundary_label_round_val=boundary_label_round_val,
    delete_html_file=True)

    
    # Creating a copy of the map optimized for interactive viewing:
    # (This HTML file will get retained, whereas that created in order to 
    # produce the screenshot in the earlier cptt() call got deleted.)
    # Note that this code was called *after* the screenshot generation
    # code so that the latter's HTML map doesn't overwrite this one.
    
    m = cptt(
    starting_lat=starting_lat, 
        starting_lon=starting_lon,gdf=gdf, 
    data_col=data_col, boundary_name_col=boundary_name_col,
    data_col_alias=data_col_alias, 
        boundary_name_alias=boundary_name_alias,
    zoom_start=html_zoom_start, 
    font_size=font_size,
    bin_type=bin_type, 
    bin_count=bin_count, 
    custom_threshold_list=custom_threshold_list, 
    color_scheme=color_scheme,
    tooltip_variable_list=tooltip_variable_list, 
        tooltip_alias_list=tooltip_alias_list,
    save_html=True, save_screenshot=False,
    map_filename=map_filename, html_map_folder=html_map_folder,
    png_map_folder=png_map_folder,
    geometry_col=geometry_col,
    tiles=tiles, choropleth_opacity=choropleth_opacity,
    add_boundary_labels=add_boundary_labels, 
        boundary_label_lon_shift=boundary_label_lon_shift,
    boundary_label_lat_shift=boundary_label_lat_shift, 
        boundary_label_col=boundary_label_col,
    round_boundary_labels=round_boundary_labels,
    boundary_label_round_val=boundary_label_round_val,
    delete_html_file=False) 

    # Returning the map:
    return m