wgu_repo/Data Structures and Algorithms II — C950/Task 2 - Python files + Screenshots/Python files and screenshots/data_loader.py

# Student ID: 012498637
# Student Name: Zakaria Benmoulay
# Course: C950 - Data Structures and Algorithms II

import csv
from package import Package
from hash_table import ChainingHashTable


def load_packages(filepath):
    # Let's read packages.csv and load all package records into a hash table.
    table = ChainingHashTable()

    with open(filepath, newline="", encoding="utf-8") as f:
        reader = csv.DictReader(f)

        for row in reader:
            # Build a Package object from each CSV row
            pkg = Package(
                package_id=row["PackageID"],
                address=row["Address"],
                city=row["City"],
                state=row["State"],
                zip_code=row["Zip"],
                deadline=row["Deadline"],
                weight=row["Weight"],
                notes=row["Notes"],
            )
            # Insert into the hash table using package ID as the key
            table.insert(pkg.package_id, pkg)

    # Let's return the hash table keyed by package ID
    return table


def load_distances(filepath):
    # Let's read distances.csv and build a symmetric 2-D distance matrix.
    address_list = []
    raw_rows = []

    with open(filepath, newline="", encoding="utf-8") as f:
        reader = csv.reader(f)

        # First row contains the column headers (location name + address)
        header = next(reader)

        # Extract the short street address from each header cell.
        # Header cells may contain a full location name followed by the
        # street address separated by whitespace; we take the last part.
        for cell in header[1:]:  # skip the first "Address" label column
            parts = [p.strip() for p in cell.replace("\n", "  ").split("  ") if p.strip()]
            address_list.append(parts[-1] if parts else cell.strip())

        # Read the remaining rows — each row is one location's distances
        for row in reader:
            raw_rows.append(row[1:])  # drop the first column (location label)

    m = len(address_list)

    # Create an m×m matrix filled with 0.0
    matrix = [[0.0] * m for _ in range(m)]

    # Fill in the matrix - the CSV only has the lower triangle
    for i, row in enumerate(raw_rows):
        for j, cell in enumerate(row):
            if cell.strip() == "" or cell is None:
                continue
            dist = float(cell)
            matrix[i][j] = dist
            matrix[j][i] = dist

    return address_list, matrix


def get_distance(address_list, distance_matrix, addr_a, addr_b):

    # Let's find the index of an address using flexible matching.
    def find_index(addr):
        addr_clean = addr.strip().lower()

        # Pass 1: exact match (fastest, covers most cases)
        for i, a in enumerate(address_list):
            if a.strip().lower() == addr_clean:
                return i

        # Pass 2: substring match (handles minor formatting differences)
        for i, a in enumerate(address_list):
            if addr_clean in a.strip().lower() or a.strip().lower() in addr_clean:
                return i

        # Raise an error if the address cannot be found.
        raise ValueError(f"Address not found in distance table: '{addr}'")

    i = find_index(addr_a)
    j = find_index(addr_b)
    return distance_matrix[i][j]