Ryanhub - file viewer
filename: scripts/sync_indexes.py
branch: main
back to repo
#!/usr/bin/env python3
"""Regenerate CSV indexes from markdown frontmatter."""

from __future__ import annotations

import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).resolve().parent))

from common import (  # noqa: E402
    COMPANIES_DIR,
    CONTACTS_DIR,
    DATA_DIR,
    LEADS_DIR,
    LEAD_CSV_FIELDS,
    CONTACT_CSV_FIELDS,
    COMPANY_CSV_FIELDS,
    ensure_interactions_csv,
    list_entity_files,
    load_interactions,
    parse_frontmatter,
    write_csv,
)


def lead_row(meta: dict) -> dict:
    return {
        "id": meta.get("id", ""),
        "title": meta.get("title", ""),
        "company": meta.get("company", ""),
        "company_id": meta.get("company_id", ""),
        "url": meta.get("url", ""),
        "source": meta.get("source", ""),
        "location": meta.get("location", ""),
        "remote": meta.get("remote", False),
        "employment_type": meta.get("employment_type", ""),
        "status": meta.get("status", ""),
        "match": meta.get("match", ""),
        "priority": meta.get("priority", ""),
        "date_found": meta.get("date_found", ""),
        "date_updated": meta.get("date_updated", ""),
        "apply_by": meta.get("apply_by", ""),
    }


def contact_row(meta: dict) -> dict:
    return {
        "id": meta.get("id", ""),
        "name": meta.get("name", ""),
        "company": meta.get("company", ""),
        "company_id": meta.get("company_id", ""),
        "role": meta.get("role", ""),
        "linkedin_url": meta.get("linkedin_url", ""),
        "source": meta.get("source", ""),
        "confidence": meta.get("confidence", ""),
        "status": meta.get("status", ""),
        "date_found": meta.get("date_found", ""),
        "date_updated": meta.get("date_updated", ""),
        "last_contacted": meta.get("last_contacted", ""),
        "next_followup": meta.get("next_followup", ""),
    }


def company_row(meta: dict) -> dict:
    return {
        "id": meta.get("id", ""),
        "name": meta.get("name", ""),
        "website": meta.get("website", ""),
        "careers_url": meta.get("careers_url", ""),
        "domain": meta.get("domain", ""),
        "status": meta.get("status", ""),
        "priority": meta.get("priority", ""),
        "date_added": meta.get("date_added", ""),
        "date_updated": meta.get("date_updated", ""),
    }


def main() -> None:
    lead_rows = []
    for path in list_entity_files(LEADS_DIR):
        meta, _ = parse_frontmatter(path)
        lead_rows.append(lead_row(meta))

    contact_rows = []
    for path in list_entity_files(CONTACTS_DIR):
        meta, _ = parse_frontmatter(path)
        contact_rows.append(contact_row(meta))

    company_rows = []
    for path in list_entity_files(COMPANIES_DIR):
        meta, _ = parse_frontmatter(path)
        company_rows.append(company_row(meta))

    write_csv(DATA_DIR / "leads.csv", LEAD_CSV_FIELDS, lead_rows)
    write_csv(DATA_DIR / "contacts.csv", CONTACT_CSV_FIELDS, contact_rows)
    write_csv(DATA_DIR / "companies.csv", COMPANY_CSV_FIELDS, company_rows)
    ensure_interactions_csv()

    interactions = load_interactions()
    print(f"Leads: {len(lead_rows)}")
    print(f"Contacts: {len(contact_rows)}")
    print(f"Companies: {len(company_rows)}")
    print(f"Interactions: {len(interactions)}")


if __name__ == "__main__":
    main()