### Import modules
import numpy as np
import pandas as pd
import geopandas as gpd
import seaborn as sns
import networkx as nx
import osmnx as ox
import re
from shapely.geometry import Point, LineString, Polygon
import matplotlib.pyplot as plt
import os
import time
import warnings
import IPython
import requests
from IPython.display import display, clear_output
from shapely.ops import nearest_points   #for pharmacy_setting function
from scipy.stats import kruskal
from matplotlib import colors
from matplotlib.lines import Line2D
from matplotlib.animation import FuncAnimation
from matplotlib.colors import Normalize
from matplotlib.cm import ScalarMappable
import matplotlib.patheffects as path_effects
from tabulate import tabulate
from matplotlib.colors import ListedColormap
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
from matplotlib.font_manager import FontProperties
from matplotlib.patches import Patch
import imageio
import math
!pip install scikit-posthocs -q
import scikit_posthocs as sp

warnings.filterwarnings("ignore")
print('\n'.join(f'{m.__name__}=={m.__version__}' for m in globals().values() if getattr(m, '__version__', None)))

numpy==1.22.0
pandas==1.3.5
geopandas==0.10.2
seaborn==0.11.2
networkx==2.6.3
osmnx==1.1.2
re==2.2.1
IPython==8.3.0
requests==2.27.1
imageio==2.13.5
scikit_posthocs==0.8.1


# Set working directory
if os.path.basename(os.getcwd()) == 'code':
    os.chdir('../../')
os.getcwd()

'/home/jovyan/work/OR-VT-Pharmacy'


# Set a unique ID for the set of results
setID_file = "./data/derived/public/result_sets/setID.txt"

with open(setID_file, 'r') as f:
    last_id = int(f.read().strip())
setID = str(last_id + 1).zfill(3)
with open(setID_file, 'w') as f:
    f.write(setID)

# beta coefficient
beta = 262
# discretization preference: min or mean
discretize = "min"
# define three distances in minutes in ascending order 
dist_mins = [10, 20, 30]

# calculate distance thresholds in seconds and distance weights
distances = [i * 60 for i in dist_mins]

def dist_weight(dist, beta):
    result = math.exp(-(dist ** 2) / beta)
    return round(result, 2)

# calculate distance weights
if discretize == "min":
    weights = [1]
    for d in dist_mins[:-1]:
        weights.append(dist_weight(d, beta))
elif discretize == "mean":
    weights = []
    for ind in range(len(dist_mins)):
        if ind == 0:
            weights.append(1)
        else:
            dist = (dist_mins[ind-1] + dist_mins[ind]) / 2
            weights.append(dist_weight(dist, beta))

# set staffing source to "confidential" for original survey data 
# or switch to "simulated" for simulated data with the same statistical properties
staffing_src = "confidential"
            
# set status year to "2023" or "2025" for pharmacies as of December 2023 or June 2025
# or to 'closed' to vizualize closed pharmacies
temporal_extent = "2023"

# set the techFTE to the pharmacist full-time equivalent value of a pharmacy technician
techFTE = 0.5
tech_ratio = "pt" + str(int(techFTE * 10))

# switch from total_pop to other population variables, e.g. elderly_pop
demand_population = "total_pop"

# switch from county_subdivision to block to calculate at the block level
geog_level = "county_subdivision"

# save figures? Switch to True to save figures
figsave = False
def make_fig_file(fignum):
    fig_folder = "./results/figures/set_" + setID
    os.makedirs(fig_folder, exist_ok=True)
    return fig_folder + "/figure" + str(fignum) + ".png"

setDesc = "Result Set ID: " + setID + "\n" + \
          "Beta: " + str(beta) + "\n" + \
          "Discretization: " + discretize + " travel time" + "\n" + \
          "Distances: " + str(distances) + " (seconds) " + str(dist_mins) + " (minutes)" + "\n" + \
          "Weights: " + str(weights) + "\n" + \
          "Staffing data: " + staffing_src + "\n" +\
          "Time series: " + temporal_extent + "\n" + \
          "Pharmacy technician value: " + tech_ratio + " (" + str(techFTE) + ")" + "\n" + \
          "Demand population: " + demand_population + "\n" + \
          "Geographic level of aggregation: " + geog_level

setDesc_file = "./data/derived/public/result_sets/results_" + setID + ".txt"

with open(setDesc_file, 'w') as f:
    f.write(setDesc)

print(setDesc)

Result Set ID: 001
Beta: 262
Discretization: min travel time
Distances: [600, 1200, 1800] (seconds) [10, 20, 30] (minutes)
Weights: [1, 0.68, 0.22]
Staffing data: confidential
Time series: 2023
Pharmacy technician value: pt5 (0.5)
Demand population: total_pop
Geographic level of aggregation: county_subdivision


pharmacies = gpd.read_file('./data/raw/public/pharmacy/pharmacies_df.gpkg')
if staffing_src == "confidential":
    pharm_staffing = pd.read_csv('./data/raw/private/staffing_interpolated.csv')
else:
    pharm_staffing = pd.read_csv('./data/raw/public/pharmacy/pharm_simulated.csv')
    pharm_cols = ['pharmid', 'week_sim_staff', 'sat_sim_staff', 'sun_sim_staff']
    pharm_staffing = pharm_staffing[pharm_cols]
    
pharmacies_df = pd.merge(pharmacies, pharm_staffing, on='pharmid', how = 'left')


# Read in population data by town
population_df = gpd.read_file('./data/derived/public/population/county_subdivisions.gpkg')

# Read in metropolitan / micropolitan classifications (NECTAS)
nectas_df =  gpd.read_file('./data/raw/public/population/nectas.csv')
nectas_df['GEOID'] = nectas_df['fips_state'] + nectas_df['fips_county'] + nectas_df['fips_subdivision']

# Join NECTAS classifications to population data with subdivision and county FIPS
pop_df = pd.merge(population_df, nectas_df[['GEOID', 'necta']], on=['GEOID'], how = 'left')
pop_df.plot()

<AxesSubplot:>


# Calculate area and population density
pop_df.to_crs("EPSG:6589", inplace = True)
pop_df['town_area_km2'] = pop_df.geometry.area / 10**6
pop_df['pop_density'] = pop_df['total_pop']/pop_df['town_area_km2']


# Replace NaN values in 'necta' column with 'Rural'. All towns that are not metropolitan or micropolitan are rural. 
pop_df['necta'].fillna('Rural', inplace=True)

# Replace 'Metropolitan NECTA' and 'Micropolitan NECTA' with 'Metropolitan' and 'Micropolitan', respectively
pop_df['necta'] = pop_df['necta'].replace({'Metropolitan NECTA': 'Metropolitan', 'Micropolitan NECTA': 'Micropolitan'})

# select Vermont subdivisions
vermont_pop_df = pop_df[pop_df['GEOID'].str[:2] == '50']
#vermont_pop_df = vermont_pop_df.to_crs("EPSG:6589", inplace=True)

# summarize counts of NECTA types
necta_summary = vermont_pop_df[['necta', 'GEOID']].groupby("necta").count()
necta_summary.rename(columns={"GEOID": "N"}, inplace=True)
print(necta_summary)

                N
necta            
Metropolitan   35
Micropolitan   48
Rural         172


# Read in population data by block
if geog_level == "block":
    blocks25 = gpd.read_file('./data/derived/public/population/blocks25.gpkg')
    blocks33 = gpd.read_file('./data/derived/public/population/blocks33.gpkg')
    blocks36 = gpd.read_file('./data/derived/public/population/blocks36.gpkg')
    blocks50 = gpd.read_file('./data/derived/public/population/blocks50.gpkg')
    blocks_df = pd.concat([blocks25, blocks33, blocks36, blocks50])
    blocks_df.plot()
    blocks_df.to_crs("EPSG:6589", inplace = True)


# Prepare pharmacies for mapping
pharmacies_map = pharmacies_df.to_crs("EPSG:6589") # Match CRS for Vermont
pharmacies_map = pharmacies_map[pharmacies_map['state'] == 'VT']

# Custom color map for pop density
base_cmap = plt.get_cmap('Blues')
num_colors = 256

custom_colors = []
for i in range(num_colors):
    color = colors.rgb_to_hsv(base_cmap(i)[:3])  # Convert RGB to HSV
    color[1] *= 0.7  # Reduce saturation by one third
    custom_colors.append(colors.hsv_to_rgb(color))  # Convert back to RGB

custom_cmap = colors.LinearSegmentedColormap.from_list("custom_blues", custom_colors, N=num_colors)

# Map Population Density
fig1, ax = plt.subplots(figsize=(12.3, 10.8))
vermont_pop_df.plot(column='pop_density', scheme = 'jenkscaspallforced',k=5, cmap=custom_cmap, legend = False, ax=ax)

metropolitan_df = vermont_pop_df[vermont_pop_df['necta'] == 'Metropolitan']
micropolitan_df = vermont_pop_df[vermont_pop_df['necta'] == 'Micropolitan']

metropolitan_boundary = metropolitan_df.dissolve(by='necta')['geometry'].boundary
micropolitan_boundary = micropolitan_df.dissolve(by='necta')['geometry'].boundary

# Plot only the merged exterior boundaries of the 'Metropolitan NECTA' group
metropolitan_boundary.plot(ax=ax, color='black', linewidth=1.5)

# Plot only the merged exterior boundaries of the 'Micropolitan NECTA' group
micropolitan_boundary.plot(ax=ax, color='black', linestyle = 'dashed', linewidth=1.5)

vt_boundary = vermont_pop_df.dissolve().boundary
vt_boundary.plot(ax=ax, color='black', linewidth=.75)

# Legend
legend_elements = [
    Line2D([0], [0], marker='o', color='w', markerfacecolor='white', markeredgecolor='black', markersize=8, label='Open'),
    Line2D([0], [0], marker='o', color='w', markerfacecolor='greenyellow', markeredgecolor='black', markersize=8, label='Newly Opened'),
    Line2D([0], [0], marker='o', color='w', markerfacecolor='red', markeredgecolor='black', markersize=8, label='Closed'),
    Line2D([0], [0], color='black', lw=2, label='Metropolitan'),
    Line2D([0], [0], color='black', lw=2, linestyle= 'dashed', label='Micropolitan')
]

pd_colorbar = plt.cm.ScalarMappable(cmap=custom_cmap, norm=plt.Normalize(vmin=0, vmax=2046.6))
cbar = plt.colorbar(pd_colorbar, shrink = .38, pad=.001, label = 'Population Density (person/km^2)', location="bottom")
ax.legend(handles=legend_elements, loc='lower right', fontsize=10, bbox_to_anchor=(.95,.2), title="Pharmacy")

pharmacies_map[pharmacies_map['status'] == 'open'].plot(ax=ax, marker='o', facecolor='white', edgecolor='black', markersize=50, label='Pharmacy')
pharmacies_map[pharmacies_map['status'] == 'closed'].plot(ax=ax, marker='o', facecolor='red', edgecolor='black', markersize=50, label='Pharmacy (Closed)')
pharmacies_map[pharmacies_map['status'] == 'opened'].plot(ax=ax, marker='o', facecolor='greenyellow', edgecolor='black', markersize=50, label='Pharmacy (New)')

plt.text(0.22, 0.71, 'Burlington', transform=ax.transAxes, size=11, weight="bold", color='white',
         path_effects=[path_effects.Stroke(linewidth=2, foreground='black'), path_effects.Normal()])

plt.axis('off')
plt.show()

#Export figure to results/figures
if figsave:
    fig1.savefig(make_fig_file(1), dpi=300, bbox_inches='tight')


roads_processsed_path = "./data/derived/private/roads_processed.graphml"
# load the processed network graph
if os.path.exists(roads_processsed_path):
    print("Loading road network from", roads_processsed_path, "Please wait...", flush=True)
    G = ox.load_graphml(roads_processsed_path) 
    print("Data loaded.") 
    
else:
    print("Error: could not load the road network from file.")

Loading road network from ./data/derived/private/roads_processed.graphml Please wait...
Data loaded.


nodes = ox.graph_to_gdfs(G, nodes=True, edges=False)

# Create point geometries for each node in the graph, to make constructing catchment area polygons easier
for node, data in G.nodes(data=True):
    data['geometry']=Point(data['x'], data['y'])

# Drop four dead-end nodes for parking lot entrances
nodes.drop([205007938, 5976921845, 204567135, 61755372], inplace = True)


nodes_osmid = gpd.GeoDataFrame(nodes[["geometry"]]).reset_index().set_crs(epsg=4326, inplace=True)
# print(nodes_osmid.head())

pharmacies_osm = gpd.sjoin_nearest(pharmacies_df, nodes_osmid, distance_col="distances")

#rename column from osmid to nearest_osm, so that it works with other code
pharmacies_osm = pharmacies_osm.rename(columns={"osmid": "nearest_osm"})


# initialize catchment list, composed of 3 empty geodataframes for 3 distance bands
catchments = []
for distance in distances:
    catchments.append(gpd.GeoDataFrame())
    
# initialize results geodataframe to store final catchment areas
results = gpd.GeoDataFrame(columns = ["geometry","pharmid","weight"], crs = "EPSG:4326", geometry = "geometry")

rweights = weights[::-1]
rdistances = distances[::-1]
print(rweights)
print(rdistances)

# calculate catchment areas
print("Working on pharmacy:")
for ind in pharmacies_osm.index:
    print(str(ind + 1) + "/" + str(len(pharmacies_osm)), end = '\r')
    
    # create dictionary of nearest nodes
    nearest_nodes = nx.single_source_dijkstra_path_length(G, pharmacies_osm['nearest_osm'][ind], rdistances[0], "travel_time") 
    # creating the largest graph from which shorter drive times can be extracted

    # convert to a dataframe and join to nodes on the index (OSMID) to combine geographies with time (z)
    nearest_points = pd.DataFrame.from_dict(nearest_nodes, orient='index').rename(columns = {0: 'z' } )
    points = pd.merge(nodes, nearest_points, left_index=True, right_index=True, how='inner')

    polygons = []
    for d in range(len(rdistances)):
        points = points.query("z <= " + str(rdistances[d]))
        convex_hull = gpd.GeoDataFrame(gpd.GeoSeries(points.unary_union.convex_hull))
        convex_hull = convex_hull.rename(columns={0:'geometry'}).set_geometry('geometry')
        convex_hull["weight"] = rweights[d]
        polygons.append(convex_hull)
        if d > 0:
            polygons[d-1] = gpd.overlay(polygons[d-1], polygons[d], how = "difference")
    
    polygonsgdf = pd.concat(polygons)
    polygonsgdf.set_crs(crs="EPSG:4326", inplace=True)    
    polygonsgdf["pharmid"] = pharmacies_osm['pharmid'][ind]
    results = pd.concat([results, polygonsgdf])

[0.22, 0.68, 1]
[1800, 1200, 600]
Working on pharmacy:
195/195


print(len(pharmacies_osm), "pharmacies and", len(results), "polygons for", len(results) / len(pharmacies_osm), "polygons per pharmacy")

195 pharmacies and 585 polygons for 3.0 polygons per pharmacy


catchment_results_file = "./data/derived/public/catchments.gpkg"
# set to True to save the catchments file
if False:
    results.to_file(catchment_results_file)
    
# set to True to load the catchments file
if False:
    results = gpd.read_file(catchment_results_file)


results.explore("weight", categorical=True, cmap='GnBu', style_kwds={'fillOpacity': 0.2})
# try results.explore("weight")


# Reproject catchment results to VT State Plane
results.to_crs("EPSG:6589", inplace = True)


%%time
# Calculate population areas and overlay with catchments
# (fragments are where each individual catchment area overlaps with a different population area)
if geog_level == 'block':
    blocks_df['src_area'] = blocks_df.geometry.area
    fragments = gpd.overlay(blocks_df, results, how = 'intersection')
else:
    pop_df['src_area'] = pop_df.geometry.area
    fragments = gpd.overlay(pop_df, results, how = 'intersection')

# Calculate fragment areas
fragments['frag_area'] = fragments.geometry.area

# Calculate area ratios to see how much of a town is covered by the given fragment --> used to estimate the population in that fragment
fragments['area_ratio']= fragments['frag_area'] / fragments['src_area']

# Calculate fragment value by multiplying area_ratio by distance weight
fragments['value'] = fragments['weight'] * fragments['area_ratio']

# Calculate population served in each fragment
fragments['pop_value'] = fragments[demand_population]*fragments['value']

CPU times: user 3.07 s, sys: 8.11 ms, total: 3.08 s
Wall time: 3.08 s


# Sum population served per pharmacy 
sum_values = fragments[["pharmid","pop_value"]]
sum_values = sum_values.groupby(by = ['pharmid']).sum('pop_value')
sum_values.rename(columns={'pop_value': 'total_value'}, inplace = True) 
sum_values.head()

# Join summed values by pharmacy to pharmacies_df
pharm_pop = sum_values.merge(pharmacies_df, on='pharmid', how = 'left')
pd.set_option('display.max_columns', None)

# pop_join.head()


pop_join = pharm_pop.copy().drop(columns=['geometry'])
if temporal_extent == "2023":
    pop_join = pop_join.query("status != 'opened'")
elif temporal_extent == "2025":
    pop_join = pop_join.query("status != 'closed'")
elif temporal_extent == "closed":
    pop_join = pop_join.query("status == 'closed'")
print(len(pop_join), "pharmacy locations for", temporal_extent)

192 pharmacy locations for 2023


# Calculate number of staff that work at each pharmacy
if staffing_src == "confidential":
    pop_join['week_staff'] = pop_join['week_pharm'] + techFTE * pop_join['week_tech']
    pop_join['sat_staff'] = pop_join['sat_pharm'] + techFTE * pop_join['sat_tech']
    pop_join['sun_staff'] = pop_join['sun_pharm'] + techFTE * pop_join['sun_tech']
else:
    pop_join['week_staff'] = 1 + (pop_join['week_sim_staff'] - 1) * 2 * techFTE 
    pop_join['sat_staff'] = 1 + (pop_join['sat_sim_staff'] - 1) * 2 * techFTE 
    pop_join['sun_staff'] = 1 + (pop_join['sun_sim_staff'] - 1) * 2 * techFTE 
    print("Caution: Simulated pharmacy data produced overall staffing levels.")
    print("For the purpose of working backward to simulate varied pharmacy technician values, we assume one pharmacist FTE per location.")


# Calculate service to pop ratio for each pharmacy for Weekdays, Saturdays, and Sundays between 7am-11pm
def calculate_sp_ratios(pop_join, day_type):
    pop_join[f'sp_ratio_{day_type}'] = pop_join[f'{day_type}_staff'] / pop_join['total_value']
    
    for hour in range(7, 24):
        pop_join[f'sp_ratio_{hour}{day_type}'] = np.where((pop_join[f'{day_type}_open'] <= hour) & (pop_join[f'{day_type}_close'] >= hour), 
                                                       pop_join[f'{day_type}_staff'] / pop_join['total_value'], 
                                                       np.nan)
# Weekday
calculate_sp_ratios(pop_join, 'week')

# Saturday
calculate_sp_ratios(pop_join, 'sat')

# Sunday
calculate_sp_ratios(pop_join, 'sun')

# Only display this data for testing. It reveals confidential information at the pharmacy level.
# pop_join.head()


# Join to the fragments df
frag_join = fragments.merge(pop_join, on = 'pharmid', how = 'inner')

# Only display this data for testing. It reveals confidential information at the pharmacy level.
# frag_join.head()
print(len(frag_join), "features")

10473 features


# Calculate weighted service to population ratio for each slice of time (weight correspond to distance from the pharmacy still)
frag_join['sp_weighted_w'] =  frag_join['sp_ratio_week'] * frag_join['value'] 
frag_join['sp_weighted_s'] =  frag_join['sp_ratio_sat'] * frag_join['value']  
frag_join['sp_weighted_su'] =  frag_join['sp_ratio_sun'] * frag_join['value'] 

def calculate_weighted_sp_ratios(frag_join, day_type):
    for hour in range(7, 24):
        frag_join[f'sp_weighted_{hour}{day_type}'] = frag_join[f'sp_ratio_{hour}{day_type}'] * frag_join['value']

# Weekday
calculate_weighted_sp_ratios(frag_join, 'week')

# Saturday
calculate_weighted_sp_ratios(frag_join, 'sat')

# Sundayfrag_join
calculate_weighted_sp_ratios(frag_join, 'sun')

# Make all NaN values 0, since these values represent 0 access
frag_join.fillna(0, inplace = True)

# frag_join.head()


# Sum weighted service to population ratio by town
# use sdGEOID if geographic data was from blocks
# sdGEOID is the county subdivision GEOID joined by intersecting the block's point_on_surface
if geog_level == 'block':
    # Select ID and weighted service to population levels from fragments
    accessibility_blocks = frag_join.loc[:, ['sdGEOID', 'GEOID']].join(frag_join.loc[:,'sp_weighted_w':])
    # Sum weighted service to population ratio by block
    accessibility_blocks = accessibility_blocks.groupby('GEOID').sum()
    # Join county subdivision sdGEOID and block population by block GEOID
    accessibility_blocks = accessibility_blocks.merge(blocks_df[['GEOID','sdGEOID',demand_population]], on='GEOID', how='inner')

    # Find population-weighted average accessibility score by town
    const_columns = ['GEOID', 'sdGEOID', demand_population]
    popweighted_acc = accessibility_blocks.drop(columns=const_columns).mul(accessibility_blocks[demand_population], axis=0)
    accessibility_blocks = accessibility_blocks[const_columns].join(popweighted_acc)
    #    df_scaled = df.drop(columns='Multiplier').multiply(df['Multiplier'], axis=0)
    
    # replace block GEOID with county subdivision GEOID
    accessibility_blocks = accessibility_blocks.drop(['GEOID',demand_population], axis = 1).rename(columns={'sdGEOID': 'GEOID'})
    
    # group by county subdivision and calculate population-weighted average accessibility
    accessibility_towns = accessibility_blocks.groupby('GEOID').sum().reset_index()
    const_columns = ['GEOID', demand_population]
    accessibility_towns = accessibility_towns.merge(pop_df[const_columns], on='GEOID', how="left")
    popweighted_acc = accessibility_towns.drop(columns=const_columns).div(accessibility_towns[demand_population], axis=0)
    accessibility_towns = accessibility_towns[const_columns].join(popweighted_acc)

else:
    # Select ID and weighted service to population levels from fragments
    accessibility_towns = frag_join[['GEOID']].join(frag_join.loc[:,'sp_weighted_w':])
    # Sum weighted service to population ratio by town
    accessibility_towns = accessibility_towns.groupby('GEOID').sum().reset_index()
    
# accessibility_towns


#Rescale (multiply by large constant)
#Full Days
accessibility_towns['access_w'] = accessibility_towns['sp_weighted_w']*10000
accessibility_towns['access_s'] = accessibility_towns['sp_weighted_s']*10000
accessibility_towns['access_su'] = accessibility_towns['sp_weighted_su']*10000

def calculate_accessibility_times(accessibility_towns, day_type):
    for hour in range(7, 24):
        accessibility_towns[f'access_{hour}{day_type}'] = accessibility_towns[f'sp_weighted_{hour}{day_type}'] * 10000

# Weekday times
calculate_accessibility_times(accessibility_towns, 'week')

# Saturday times
calculate_accessibility_times(accessibility_towns, 'sat')

# Sunday times
calculate_accessibility_times(accessibility_towns, 'sun')


result_path = "./data/derived/public/result_sets/results_" + setID + ".gpkg"

# Select access columns from results
accessibility_map = accessibility_towns[['GEOID']].join(accessibility_towns[[col for col in accessibility_towns.columns if col.startswith('access')]])

# Select Vermont from all towns
vt_df = pop_df.loc[pop_df['GEOID'].str.startswith('50'), ['GEOID', 'NAME', 'necta', 'total_pop', 'elderly_pop', 'minority_pop', 'pop_density', 'geometry']]

# Join access results to Vermont towns
accessibility_map = vt_df.merge(accessibility_map, on="GEOID", how="inner")
                      
if True:
    accessibility_map.to_file(result_path)
    
# accessibility_map.head()


# set up mapping_df for results section.
# this code could be replaced with a function to load other results.
mapping_df = accessibility_map.copy()
# Map accessibility by Vermont town.
mapping_df.explore("access_w", tooltip=["access_w", "access_s", "access_su"], cmap="Greens", scheme='Quantiles', k=6, legend_kwds={'scale':False})


# Weekday accessibility by classification table
means_by_metro = mapping_df.groupby('necta').mean()
weekdaymean_by_metro = means_by_metro[['access_w']]

weekdaymean_by_metro.columns = ['Mean Access']
table_1 = tabulate(weekdaymean_by_metro, headers='keys', tablefmt='simple_grid')
print(table_1)

necta           Mean Access
------------  -------------
Metropolitan        4.19849
Micropolitan        4.2507
Rural               3.01231


# Check for normal distribution in weekday metro, micro, rural
access_w_metro = mapping_df[mapping_df['necta'] == 'Metropolitan']['access_w']
access_w_micro = mapping_df[mapping_df['necta'] == 'Micropolitan']['access_w']
access_w_rural = mapping_df[mapping_df['necta'] == 'Rural']['access_w']

plt.hist(access_w_rural, bins=10)
plt.hist(access_w_metro, bins=10)
plt.hist(access_w_micro, bins=10)
print("Not normal distribution. Cannot use ANOVA test. Use Kruskal-Wallis instead.")

Not normal distribution. Cannot use ANOVA test. Use Kruskal-Wallis instead.


# Kruskal Wallis Test for significant difference of means between necta classification during conventional business hours
h_statistic_1, p_value_1 = kruskal(access_w_metro, access_w_micro, access_w_rural)
                 
print("Kruskal-Wallis H Statistic:", h_statistic_1)
print("P-value:", p_value_1)

alpha = 0.05
if p_value_1 < alpha:
    print("Reject the null hypothesis. There is a significant difference in mean access during conventional weekday \nbusiness hours between metropolitan, micropolitan, and rural towns.")
else:
    print("Fail to reject the null hypothesis. There is no significant difference in mean access between groups.")

Kruskal-Wallis H Statistic: 27.690600217921656
P-value: 9.706497843353757e-07
Reject the null hypothesis. There is a significant difference in mean access during conventional weekday 
business hours between metropolitan, micropolitan, and rural towns.


dunn_data = np.concatenate([access_w_metro, access_w_micro, access_w_rural])
dunn_groups = ['access_w_metro']*len(access_w_metro) + ['access_w_micro']*len(access_w_micro) + ['access_w_rural']*len(access_w_rural)
dunn_df = pd.DataFrame({'value': dunn_data, 'group': dunn_groups})

# Perform Dunn's test with p-value adjustment (e.g., 'holm')
dunn_results = sp.posthoc_dunn(dunn_df, val_col='value', group_col='group', p_adjust='holm')
print("\nDunn's Post Hoc Test Results (p-values):\n", dunn_results)

Dunn's Post Hoc Test Results (p-values):
                 access_w_metro  access_w_micro  access_w_rural
access_w_metro        1.000000        0.647035        0.001077
access_w_micro        0.647035        1.000000        0.000016
access_w_rural        0.001077        0.000016        1.000000


# Create the scatter plot
import matplotlib.patches as mpatches

nectas = {'Rural': '#ccebc5',
          'Micropolitan': '#7bccc4', 
          'Metropolitan': '#0868ac'
          }

color_list = [nectas[group] for group in mapping_df['necta']]

legend_handles = []
for key, value in nectas.items():
    patch = mpatches.Patch(color=value, label=key)
    legend_handles.append(patch)

mapping_df.plot.scatter('pop_density', 'access_w', c=color_list, alpha=0.7)

# Add labels and title
plt.xlabel("Population Density")
plt.xscale('log')
plt.ylabel("Weekday Access")
plt.legend(handles=legend_handles)

<matplotlib.legend.Legend at 0x7fd5e88fa9d0>


# Mean accessibility by day table
mean_access_day = mapping_df[['access_w', 'access_s', 'access_su']].mean()
mean_access_day_df = mean_access_day.to_frame().rename(columns={0: 'Mean Access'})
mean_access_day_df.index = ['Weekday', 'Saturday', 'Sunday']
table_2 = tabulate(mean_access_day_df, headers='keys', tablefmt='simple_grid')
print(table_2)

            Mean Access
--------  -------------
Weekday         3.40823
Saturday        2.02426
Sunday          1.43059


# Map accessibility by day of the week
mapping_df1 = mapping_df
maxacc = mapping_df1[['access_w', 'access_s', 'access_su']].max().max()
#print(mapping_df1[['access_w', 'access_s', 'access_su']].min().min())  # Min accessibility value
#print(mapping_df1[['access_w', 'access_s', 'access_su']].max().max())  # Max accessibility value

fig2, axs = plt.subplots(1, 3, figsize=(22.5, 10), facecolor = 'white')
plt.subplots_adjust(wspace=-.4)

for i, column in enumerate(['access_w', 'access_s', 'access_su']):
    ax = axs[i]
    mapping_df1.plot(column=column, cmap='Greens', linewidth=0.2, ax=ax, edgecolor='0.8', legend=False,
                vmin=0, vmax=maxacc)
    mapping_df1.dissolve().boundary.plot(ax=ax, color='black', linewidth=1)
    
    # Plot only the merged exterior boundaries of the 'Metropolitan NECTA' group
    metropolitan_boundary.plot(ax=ax, color='black', linewidth=.9)

    # Plot only the merged exterior boundaries of the 'Micropolitan NECTA' group
    micropolitan_boundary.plot(ax=ax, color='black', linestyle = 'dashed', linewidth=.9)
    
    axs[0].set_title('a) Weekday', fontsize=14)
    axs[1].set_title('b) Saturday', fontsize=14)
    axs[2].set_title('c) Sunday', fontsize=14)
    ax.axis('off')

cbar = plt.colorbar(plt.cm.ScalarMappable(norm=plt.Normalize(vmin=0, vmax=maxacc), cmap='Greens'), ax=axs, #Max was set to 20 for visualization purposes
                    orientation='horizontal', pad=.07, shrink=.5)
cbar.set_label('Accessibility')

#fig.patch.set_edgecolor('black') # Figure Border
#fig.patch.set_linewidth(2)       # Figure Border
plt.subplots_adjust(right=1)

#cbar.ax.set_position([0.26, 0.05, .5, 0.05])
cbar.ax.set_position([0.45, 0.05, .2, 0.05])

ax.legend(handles=legend_elements, loc='lower right', fontsize=12, bbox_to_anchor=(.9,-.129))

plt.show()

#Save Figure
if figsave:
    fig2.savefig(make_fig_file(2), dpi=300, bbox_inches='tight')


linegraph_df = mapping_df.copy()

# Define time range and labels
hours = list(range(7, 23))  # 7 to 22 (10pm)
time_labels = [f"{h%12 or 12}{'am' if h < 12 else 'pm'}" for h in hours]

# Define suffixes for each day type
day_types = {
    'Weekday': 'week',
    'Saturday': 'sat',
    'Sunday': 'sun'
}

# Generate column names and calculate means
mean_values = {}
for day, suffix in day_types.items():
    access_columns = [f'access_{h}{suffix}' for h in hours]
    mean_values[day] = linegraph_df[access_columns].mean()

# Plot
fig3, ax = plt.subplots(figsize=(10, 6))
linestyles = {'Weekday': '-', 'Saturday': '-.', 'Sunday': ':'}

for day, values in mean_values.items():
    plt.plot(time_labels, values, label=day, color='black', linestyle=linestyles[day])

# Optional labels and legend
plt.ylabel('Mean Accessibility')
plt.xticks()
plt.yticks()
plt.legend()
plt.show()

# Save figure
if figsave:
    fig3.savefig(make_fig_file(3), dpi=300, bbox_inches='tight')


fig4, ax = plt.subplots(figsize=(10, 10), facecolor = "white")
mapping_df.plot(column='access_23week', cmap='Greens', ax=ax, linewidth=0.2, edgecolor= '.8', vmax=.5)
mapping_df.dissolve().boundary.plot(ax=ax, color='black', linewidth=1)

cb = plt.cm.ScalarMappable(cmap='Greens', norm=plt.Normalize(vmin=0, vmax=.5))
#cbar = plt.colorbar(cb, shrink = .5, orientation = 'horizontal', pad=0, label = 'Accessibility')

metropolitan_boundary.plot(ax=ax, color='black', linewidth=.8)
micropolitan_boundary.plot(ax=ax, color='black',linestyle = "dashed", linewidth=.8)

#fig.patch.set_edgecolor('black') #Border
#fig.patch.set_linewidth(2)  
plt.subplots_adjust(right = .5, top = .9)

# Colorbar and Legend
cbar = plt.colorbar(cb, shrink = .5, orientation = 'horizontal', pad=0, label = 'Accessibility')
ax.legend(handles=legend_elements, loc='lower right', fontsize=12, bbox_to_anchor=(.92,.135))

plt.axis('off')
plt.tight_layout()
plt.show()

# Save Figure
if figsave:
    fig4.savefig(make_fig_file(4), dpi=300, bbox_inches='tight')


access_w_test = pd.Series(mapping_df['access_w'])
access_s_test = pd.Series(mapping_df['access_s'])
access_su_test = pd.Series(mapping_df['access_su'])

# Check for distribution between days of week. 
plt.hist(access_w_test, bins=10) 
plt.hist(access_s_test , bins=10) 
plt.hist(access_su_test, bins=10) 
print("Distribution is not normal. Cannot use ANOVA test. Use Kruskal-Wallis instead.")

Distribution is not normal. Cannot use ANOVA test. Use Kruskal-Wallis instead.


# Run Kruskal-Wallis Test
h_statistic_2, p_value_2 = kruskal(access_w_test, access_s_test, access_su_test)
                 
print("Kruskal-Wallis H Statistic:", h_statistic_2)
print("P-value:", p_value_2)

alpha = 0.05
if p_value_2 < alpha:
    print("Reject the null hypothesis. There is a significant difference in mean access between weekdays, Saturdays, and Sundays.")
else:
    print("Fail to reject the null hypothesis. There is no significant difference in mean access between days.")

Kruskal-Wallis H Statistic: 202.67482244348162
P-value: 9.76610740072804e-45
Reject the null hypothesis. There is a significant difference in mean access between weekdays, Saturdays, and Sundays.


dunn_data = np.concatenate([access_w_test, access_s_test, access_su_test])
dunn_groups = ['access_w_test']*len(access_w_test) + ['access_s_test']*len(access_s_test) + ['access_su_test']*len(access_su_test)
dunn_df = pd.DataFrame({'value': dunn_data, 'group': dunn_groups})

# Perform Dunn's test with p-value adjustment (e.g., 'holm')
dunn_results = sp.posthoc_dunn(dunn_df, val_col='value', group_col='group', p_adjust='holm')
print("\nDunn's Post Hoc Test Results (p-values):\n", dunn_results)

Dunn's Post Hoc Test Results (p-values):
                 access_s_test  access_su_test  access_w_test
access_s_test    1.000000e+00    1.463181e-08   4.593845e-17
access_su_test   1.463181e-08    1.000000e+00   6.143013e-45
access_w_test    4.593845e-17    6.143013e-45   1.000000e+00


# Accessibility by Day and metropolitan/micropolitan Table
means_by_metro = mapping_df.groupby('necta').mean()[['access_w','access_s','access_su']]
means_by_metro = means_by_metro.rename(columns={"access_w": "Weekday", 
                                                "access_s": "Saturday",
                                                "access_su": "Sunday"})
means_by_metro = means_by_metro.transpose()
means_by_metro = means_by_metro.rename(columns={"Metropolitan": "Metro", 
                                                "Micropolitan": "Micro"})

MetroN = necta_summary.at['Metropolitan', 'N']
MicroN = necta_summary.at['Micropolitan', 'N']

def pctdiff(df, col1, col2):
    newcoldif = col1 + "_" + col2 + "_dif"
    newcolpct = col1 + "_" + col2 + "_pct"
    df[newcoldif] = df[col1] - df[col2]
    df[newcolpct] = df[newcoldif]/(df[col1] + df[col2]) * 100
    return df

means_by_metro = round(pctdiff(means_by_metro, "Metro", "Micro"), 2)
means_by_metro = round(pctdiff(means_by_metro, "Metro", "Rural"), 2)
means_by_metro = round(pctdiff(means_by_metro, "Micro", "Rural"), 2)

means_by_metro

# print(tabulate(means_by_metro, tablefmt = 'fancy_grid', headers=["","N","Weekday Mean Access", "Saturday Mean Access", "Sunday Mean Access"]))
#means_by_metro


custom_ticks = list(range(len(hours)))
metro_categories = ['Metropolitan', 'Micropolitan', 'Rural']
metro_colors = ['#1f78b4', '#a6cee3', '#b2df8a']
metro_styles = [':', '-.', '-']  # One per category

# Create linegraph dataframes for each day
linegraph_dfs = {}
for day, suffix in day_types.items():
    columns = [f'access_{h}{suffix}' for h in hours]
    grouped = mapping_df.groupby('necta')[columns].mean().T
    linegraph_dfs[day] = grouped

# Plotting
fig5, axs = plt.subplots(3, 1, figsize=(10, 12), sharey=True)

for i, (day, df) in enumerate(linegraph_dfs.items()):
    ax = axs[i]
    for j, category in enumerate(df.columns):
        ax.plot(time_labels, df[category],
                label=metro_categories[j] if i == 0 else "",  # Legend only once
                color=metro_colors[j],
                linestyle=metro_styles[j],
                linewidth=2)
    ax.set_title(day)
    ax.set_xlabel('')
    ax.set_ylabel('Mean Access Value')
    ax.set_xticks(custom_ticks)
    ax.set_xticklabels(time_labels, rotation=45)
    ax.grid(axis='y', linestyle='-', linewidth=0.08, color='black')

# Add legend only once
handles, labels = axs[0].get_legend_handles_labels()
fig5.legend(handles, metro_categories, loc='lower center',
            ncol=3, fontsize=12, bbox_to_anchor=(0.5, -0.025))

# Final layout and display
plt.tight_layout()
plt.show()

# Save figure
if figsave:
    fig5.savefig(make_fig_file(5), dpi=300, bbox_inches='tight')

necta	Metro	Micro	Rural	Metro_Micro_dif	Metro_Micro_pct	Metro_Rural_dif	Metro_Rural_pct	Micro_Rural_dif	Micro_Rural_pct
Weekday	4.20	4.25	3.01	-0.05	-0.62	1.19	16.50	1.24	17.08
Saturday	2.45	2.65	1.76	-0.20	-3.86	0.69	16.39	0.89	20.18
Sunday	1.70	2.14	1.18	-0.44	-11.55	0.52	18.06	0.96	28.92

Spatio-Temporal Accessibility of Pharmacy Care in Vermont, USA¶

Alternative Study Versions¶

Authors¶

Materials and procedure¶

Computational environment¶

Check Directories¶

Study parameters¶

Load Pharmacy Data¶

Load Population Data¶

Study Area¶

Mapping town types, population density, and pharmacy locations in VT¶

Load the Road Network¶

Analysis¶

Calculate catchment areas¶

Service-to-population ratio calculations¶

Step One: Service to Population Ratios for Service Points¶

Step Two: Local Service Accessibility¶

Results¶

Hypothesis 1 - Spatial Dimension¶

Weekday accessibility¶

Hypothesis 2 - Temporal Dimension¶

Accessibility variation by day of the week¶

Accessibility by time of day¶

Accessibility at extreme hours¶

Statistical Significance¶

Hypothesis 3 - Spatio-Temporal Dynamics¶

Time of day and NECTA classification¶