Source code for aequilibrae.transit.functions.breaking_links_for_stop_access

import logging
from typing import List

import geopandas as gpd
import numpy as np
import pandas as pd
import shapely
from shapely import Point
from shapely.ops import substring



[docs]
def split_links_at_stops(stops: gpd.GeoDataFrame, links: gpd.GeoDataFrame, tolerance: float = 50) -> List[
    gpd.GeoDataFrame]:
    """
    Breaks links at the closest points for the nodes (transit stops) nearby, within a certain tolerance.
    New nodes are created at split points with IDs continuing from the maximum of the existing `a_node` and `b_node` values.

    Args:
        stops: GeoDataFrame of points
        links: GeoDataFrame of linestrings. Must contain "a_node" and "b_node" columns.
        tolerance: Search radius.

    Returns:
        List[GeoDataFrame]: A list containing two GeoDataFrames:
            - broken_links: The provided links where some have been split at stop locations.
              Contains updated "a_node", "b_node", and "geometry". Original link attributes are
              preserved (duplicated for splits).
            - new_nodes: Point geometries representing the new nodes created at split locations,
              with IDs starting from `start_node_id`.
    """

    start_node_id = max(links.a_node.max(), links.b_node.max(), stops.stop_id.max()) + 1

    # --- Step 0: Make sure we are operating in metres  ---
    for df in [stops, links]:
        assert df.crs.axis_info[0].unit_name.lower() in ["metre", "meter"], \
            "Both GeoDataFrames must be in a CRS with metre/meter units."

    # --- Step 1: Matching Points to Lines (Same as before) ---
    # Buffer and intersection
    buffered_points = stops.copy()
    buffered_points["orig_geometry"] = buffered_points.geometry
    buffered_points = buffered_points.set_geometry(buffered_points.geometry.buffer(tolerance))

    matches_within = gpd.sjoin(buffered_points, links, how="inner", predicate="intersects")
    matches_within = matches_within.set_geometry("orig_geometry").drop(columns=["geometry"]).rename(
        columns={"orig_geometry": "geometry"})

    # Handle orphans
    matched_indices = matches_within.index.unique()
    orphans_mask = ~stops.index.isin(matched_indices)

    df_result = matches_within[["geometry", "index_right"]].copy()

    if orphans_mask.any():
        orphans = stops.loc[orphans_mask]
        nearest_orphans = gpd.sjoin_nearest(orphans, links, how="left", distance_col="dist_to_line")
        nearest_orphans = nearest_orphans[~nearest_orphans.index.duplicated(keep="first")]
        df_result = pd.concat([df_result, nearest_orphans[["geometry", "index_right"]]])

    # Merge line info
    # We need geometry and node IDs
    df_result = df_result.merge(
        links[["geometry", "a_node", "b_node"]],
        left_on="index_right",
        right_index=True,
        suffixes=("", "_line")
    )

    # Calculate distance along line (linear reference)
    line_geoms = df_result["geometry_line"].values
    point_geoms = df_result["geometry"].values

    # Use vectorized project if available, otherwise apply
    dist_along = shapely.line_locate_point(line_geoms, point_geoms)

    df_result["dist_along"] = dist_along
    df_result["line_length"] = df_result.geometry_line.length

    # --- Step 2: Prepare Splits ---

    # Filter out splits that are effectively at the start or end of the link
    # 1 metre is effectively "right there"
    epsilon = 1
    valid_splits = df_result[
        (df_result["dist_along"] > epsilon) &
        (df_result["dist_along"] < (df_result["line_length"] - epsilon))
    ].copy()

    if valid_splits.empty:
        logging.debug("No valid splits found within tolerance? The map-matching will likely fail.")
        return [links.copy(), gpd.GeoDataFrame([], columns=["node_id", "geometry"], crs=links.crs)]

    # Identify unique split locations per link
    # A point maps to (link_id, distance).
    # Multiple points might map to the same location (approx).
    # We round distance to avoid segments shorter than 1 metre
    valid_splits["splt_lc"] = valid_splits["dist_along"].round(0)

    # Get unique split nodes needed: (link_id, rounded_distance)
    unique_splits = valid_splits[["index_right", "splt_lc"]].drop_duplicates().sort_values(["index_right", "splt_lc"])

    # Assign new Node IDs
    # We create a mapping: (link_id, splt_lc) -> new_node_id
    num_new_nodes = len(unique_splits)
    new_node_ids = np.arange(start_node_id, start_node_id + num_new_nodes)
    unique_splits["new_node_id"] = new_node_ids

    # --- Step 3: Perform Methodical Splitting ---

    # We need to process ONLY the links that have splits.
    links_to_split_indices = unique_splits["index_right"].unique()

    # Separate links that are untouched
    untouched_links = links.drop(links_to_split_indices)

    links_to_split = links.loc[links_to_split_indices]

    new_segments = []

    # Iterate over links that need splitting
    # (Grouping by link to handle multiple splits per link)
    grouped_splits = unique_splits.groupby("index_right")

    new_points = []

    for link_idx, group_df in grouped_splits:
        original_link = links_to_split.loc[link_idx]
        geo = original_link.geometry
        original_a = original_link["a_node"]
        original_b = original_link["b_node"]

        # Sort splits by distance
        splits = group_df.sort_values("splt_lc")

        # Define chain of nodes: [Original_A, New_1, New_2, ..., Original_B]
        nodes = [original_a] + splits["new_node_id"].tolist() + [original_b]

        # Define cut points: [0.0, d1, d2, ..., Length]
        cuts = [0.0] + splits["splt_lc"].tolist() + [geo.length]

        # Create segments
        for i in range(len(cuts) - 1):
            start_dist = cuts[i]
            end_dist = cuts[i + 1]

            # Extract geometry substring
            seg_geo = substring(geo, start_dist, end_dist)

            # Create a copy of the original row data
            new_row = original_link.copy()
            new_row["geometry"] = seg_geo
            new_row["a_node"] = nodes[i]
            new_row["b_node"] = nodes[i + 1]
            new_segments.append(new_row)

            # Record the new points, as we will need them to create the connectors
            # For the last segment, we don"t need to add a point, as we already have the original b_node
            if nodes[i + 1] != original_b:
                new_points.append([nodes[i + 1], Point(tuple(seg_geo.coords[-1]))])

    # Combine everything
    new_segments_gdf = gpd.GeoDataFrame(new_segments, crs=links.crs)

    broken_links = pd.concat([untouched_links, new_segments_gdf], ignore_index=True)

    broken_links = broken_links.assign(original_id=broken_links.link_id)
    broken_links = broken_links.assign(link_id=np.zeros(broken_links.shape[0]) + 1)
    broken_links["distance"] = broken_links.geometry.length

    new_points_gdf = gpd.GeoDataFrame(new_points, columns=["node_id", "geometry"], crs=links.crs)

    return [broken_links, new_points_gdf]