Ryanhub - file viewer
filename: scripts/report.py
branch: main
back to repo
#!/usr/bin/env python3
"""Print human-readable funnel and follow-up reports."""

from __future__ import annotations

import argparse
import re
import sys
from datetime import date, timedelta
from pathlib import Path

sys.path.insert(0, str(Path(__file__).resolve().parent))

from common import (  # noqa: E402
    COMPANIES_DIR,
    CONTACTS_DIR,
    LEADS_DIR,
    contact_has_interaction_proof,
    lead_has_interaction_proof,
    lead_has_output_proof,
    list_entity_files,
    load_interactions,
    parse_frontmatter,
)

SCORE_FIELDS = [
    "technical_ambition",
    "research_energy",
    "builder_appreciation",
    "novelty",
    "talent_density",
    "learning_upside",
    "outreach_surface",
    "role_plausibility",
]

FOUNDER_ROLES = re.compile(
    r"\b(founder|co-founder|cofounder|ceo|cto|chief|president|research\s*lead|pi)\b",
    re.I,
)
FUNDING_STAGE = re.compile(r"\b(seed|series\s*[a-d]|stealth|pre-seed|funded|raised)\b", re.I)
FUNDING_NOTES = re.compile(r"\b(seed|series\s*[a-d]|funding|raised|stealth|launch)\b", re.I)
ACTIVE_COMPANY_STATUSES = {"target", "watching", "in_conversation"}
DRAFT_DONE_STATUSES = {
    "drafted",
    "messaged",
    "applied",
    "followed_up",
    "interviewing",
    "closed",
    "archived",
}
APPLIED_DONE_STATUSES = {"applied", "followed_up", "interviewing", "closed", "archived"}


def parse_date(value: str) -> date | None:
    value = str(value).strip()
    if not value:
        return None
    try:
        return date.fromisoformat(value)
    except ValueError:
        return None


def load_leads() -> list[dict]:
    leads = []
    for path in list_entity_files(LEADS_DIR):
        meta, body = parse_frontmatter(path)
        meta["_path"] = path
        meta["_body"] = body
        leads.append(meta)
    return leads


def load_contacts() -> list[dict]:
    contacts = []
    for path in list_entity_files(CONTACTS_DIR):
        meta, _ = parse_frontmatter(path)
        meta["_path"] = path
        contacts.append(meta)
    return contacts


def load_companies() -> list[dict]:
    companies = []
    for path in list_entity_files(COMPANIES_DIR):
        meta, body = parse_frontmatter(path)
        meta["_path"] = path
        meta["_body"] = body
        companies.append(meta)
    return companies


def parse_scores_from_text(text: str) -> dict[str, int]:
    scores: dict[str, int] = {}
    if not text:
        return scores
    for field in SCORE_FIELDS:
        match = re.search(rf"{field}\s*[:=]\s*(\d)", text, re.I)
        if match:
            value = int(match.group(1))
            if 1 <= value <= 5:
                scores[field] = value
    return scores


def company_scores(meta: dict) -> tuple[int, int, dict[str, int]]:
    scores = parse_scores_from_text(str(meta.get("notes", "")))
    for field in SCORE_FIELDS:
        raw = meta.get(field)
        if raw in ("", None):
            continue
        try:
            value = int(raw)
        except (TypeError, ValueError):
            continue
        if 1 <= value <= 5:
            scores[field] = value
    total = sum(scores.values())
    max_score = len(SCORE_FIELDS) * 5
    return total, max_score, scores


def company_is_researched(meta: dict) -> bool:
    notes = str(meta.get("notes", "")).strip()
    body = str(meta.get("_body", ""))
    if notes and len(notes) > 40:
        return True
    if "Why target" in body:
        section = body.split("# Why target", 1)[-1].split("#", 1)[0].strip()
        if section:
            return True
    return False


def company_has_founder_surface(meta: dict, contacts: list[dict]) -> bool:
    if any(str(meta.get(key, "")).strip() for key in ("x_url", "github_url", "linkedin_url")):
        return True
    _, _, scores = company_scores(meta)
    if scores.get("outreach_surface", 0) >= 4:
        return True
    company_id = str(meta.get("id", ""))
    for contact in contacts:
        if str(contact.get("company_id", "")) != company_id:
            continue
        role = str(contact.get("role", ""))
        if FOUNDER_ROLES.search(role):
            return True
    return False


def company_recently_funded(meta: dict, today: date, days: int = 60) -> bool:
    stage = str(meta.get("stage", ""))
    notes = str(meta.get("notes", ""))
    if not (FUNDING_STAGE.search(stage) or FUNDING_NOTES.search(notes)):
        return False
    added = parse_date(str(meta.get("date_added", ""))) or parse_date(str(meta.get("date_updated", "")))
    if added and (today - added).days <= days:
        return True
    return bool(FUNDING_STAGE.search(stage))


def format_lead(meta: dict) -> str:
    return f"{meta['id']} — {meta.get('title', '?')} @ {meta.get('company', '?')} [{meta.get('status', '?')}]"


def format_contact(meta: dict) -> str:
    company_id = meta.get("company_id") or "?"
    return (
        f"{meta['id']} — {meta.get('name', '?')} — "
        f"{meta.get('role', '?')} at {meta.get('company', '?')} — {company_id}"
    )


def format_contact_followup(meta: dict) -> str:
    followup = meta.get("next_followup") or "no date"
    last = meta.get("last_contacted") or "unknown"
    return (
        f"{meta['id']} — messaged {last} — next_followup {followup}"
    )


def format_company(meta: dict, tag: str) -> str:
    total, max_score, scores = company_scores(meta)
    score_text = f"{total}/{max_score}" if scores else "unscored"
    return f"{meta['id']} — {meta.get('name', '?')} — {score_text} — {tag}"


def numbered(items: list[str], limit: int | None = None) -> list[str]:
    subset = items if limit is None else items[:limit]
    return [f"{index}. {item}" for index, item in enumerate(subset, start=1)]


def print_section(title: str, items: list[str], numbered_output: bool = False) -> None:
    print(title + ":")
    if items:
        display = numbered(items) if numbered_output else [f"  - {item}" for item in items]
        for line in display:
            print(f"  {line}" if numbered_output else line)
    else:
        print("  (none)")
    print()


def top_target_companies(companies: list[dict], contacts: list[dict]) -> list[str]:
    ranked = []
    for meta in companies:
        if meta.get("status") not in ACTIVE_COMPANY_STATUSES:
            continue
        total, max_score, scores = company_scores(meta)
        if not scores:
            continue
        company_contacts = [
            c for c in contacts if str(c.get("company_id", "")) == str(meta.get("id", ""))
        ]
        if not company_contacts:
            tag = "contact needed"
        elif company_has_founder_surface(meta, contacts):
            tag = "founder reachable"
        else:
            tag = "watching"
        ranked.append((total, meta, tag))
    ranked.sort(key=lambda item: (-item[0], item[1].get("name", "")))
    return [format_company(meta, tag) for _, meta, tag in ranked]


def companies_needing_contacts(companies: list[dict], contacts: list[dict]) -> list[str]:
    items = []
    for meta in companies:
        if meta.get("status") not in ACTIVE_COMPANY_STATUSES:
            continue
        total, _, scores = company_scores(meta)
        if total < 24 and not scores:
            continue
        company_id = str(meta.get("id", ""))
        has_contacts = any(str(c.get("company_id", "")) == company_id for c in contacts)
        if not has_contacts:
            items.append(format_company(meta, "contact needed"))
    return sorted(items)


def contacts_needing_first_message(contacts: list[dict]) -> list[str]:
    items = []
    for meta in contacts:
        if meta.get("status") in {"messaged", "replied", "followed_up", "dead", "archived"}:
            continue
        if contact_has_interaction_proof(str(meta.get("id", "")), {"message", "email"}):
            continue
        if meta.get("status") in {"found", "researched", "drafted"}:
            items.append(format_contact(meta))
    return sorted(items)


def recently_funded_not_researched(companies: list[dict], today: date) -> list[str]:
    items = []
    for meta in companies:
        if meta.get("status") not in ACTIVE_COMPANY_STATUSES:
            continue
        if not company_recently_funded(meta, today):
            continue
        if company_is_researched(meta):
            continue
        items.append(format_company(meta, "needs research"))
    return sorted(items)


def high_fit_no_open_role(companies: list[dict], leads: list[dict]) -> list[str]:
    items = []
    for meta in companies:
        if meta.get("status") not in ACTIVE_COMPANY_STATUSES:
            continue
        total, _, scores = company_scores(meta)
        if total < 28:
            continue
        company_id = str(meta.get("id", ""))
        active_leads = [
            lead
            for lead in leads
            if str(lead.get("company_id", "")) == company_id
            and lead.get("status") not in {"closed", "archived"}
            and lead.get("match") in {"strong", "moderate"}
        ]
        if not active_leads:
            items.append(format_company(meta, "no open role tracked"))
    return sorted(items)


def founder_outreach_available(companies: list[dict], contacts: list[dict]) -> list[str]:
    items = []
    for meta in companies:
        if meta.get("status") not in ACTIVE_COMPANY_STATUSES:
            continue
        if not company_has_founder_surface(meta, contacts):
            continue
        tag = "founder outreach available"
        if not company_is_researched(meta):
            tag = "founder reachable — needs research"
        items.append(format_company(meta, tag))
    return sorted(items)


def strong_not_drafted(leads: list[dict]) -> list[str]:
    items = []
    for meta in leads:
        if meta.get("match") != "strong":
            continue
        if meta.get("status") in DRAFT_DONE_STATUSES:
            continue
        if lead_has_output_proof(meta):
            continue
        items.append(format_lead(meta))
    return sorted(items)


def leads_needing_contact(leads: list[dict]) -> list[str]:
    items = []
    for meta in leads:
        if meta.get("status") in {"contact_needed", "researched"}:
            items.append(format_lead(meta))
            continue
        if meta.get("status") == "found" and meta.get("match") in {"strong", "moderate"}:
            contacts = meta.get("contacts") or []
            if not contacts:
                items.append(format_lead(meta))
    return sorted(items)


def drafted_not_applied(leads: list[dict]) -> list[str]:
    items = []
    for meta in leads:
        status = meta.get("status")
        if status == "drafted" or (lead_has_output_proof(meta) and status not in APPLIED_DONE_STATUSES):
            items.append(format_lead(meta))
    return sorted(items)


def messages_needing_followup(contacts: list[dict], today: date) -> list[str]:
    items = []
    for meta in contacts:
        followup = parse_date(str(meta.get("next_followup", "")))
        if followup and followup <= today:
            items.append(format_contact_followup(meta))
    return sorted(items)


def applications_needing_followup(leads: list[dict], today: date) -> list[str]:
    items = []
    for meta in leads:
        if meta.get("status") != "applied":
            continue
        updated = parse_date(str(meta.get("date_updated", "")))
        if updated and (today - updated).days >= 7:
            items.append(format_lead(meta))
    return sorted(items)


def stale_leads(leads: list[dict], today: date, days: int = 14) -> list[str]:
    cutoff = today - timedelta(days=days)
    items = []
    for meta in leads:
        if meta.get("status") in {"closed", "archived"}:
            continue
        if meta.get("match") in {"weak", "bad"}:
            items.append(format_lead(meta))
            continue
        updated = parse_date(str(meta.get("date_updated", ""))) or parse_date(
            str(meta.get("date_found", ""))
        )
        if updated and updated < cutoff:
            items.append(format_lead(meta))
    return sorted(set(items))


def companies_report(companies: list[dict], contacts: list[dict], leads: list[dict], today: date) -> None:
    print("=== Companies ===\n")
    print_section("Top target companies", top_target_companies(companies, contacts), numbered_output=True)
    print_section("Companies needing contacts", companies_needing_contacts(companies, contacts))
    print_section(
        "Recently funded companies not researched",
        recently_funded_not_researched(companies, today),
    )
    print_section("High-fit companies with no open role", high_fit_no_open_role(companies, leads))
    print_section(
        "Companies with founder outreach available",
        founder_outreach_available(companies, contacts),
    )


def contacts_report(contacts: list[dict]) -> None:
    print("=== Contacts ===\n")
    print_section("Contacts needing first message", contacts_needing_first_message(contacts), numbered_output=True)


def followups_report(leads: list[dict], contacts: list[dict], today: date) -> None:
    print("=== Follow-ups ===\n")
    print_section("Follow-ups due", messages_needing_followup(contacts, today), numbered_output=True)
    print_section("Applications needing follow-up", applications_needing_followup(leads, today))


def strong_report(leads: list[dict]) -> None:
    print("=== Strong leads ===\n")
    print_section("Strong leads not drafted", strong_not_drafted(leads), numbered_output=True)


def today_report(leads: list[dict], contacts: list[dict], companies: list[dict], today: date) -> None:
    print(f"=== Today ({today.isoformat()}) ===\n")
    print_section("Follow-ups due", messages_needing_followup(contacts, today), numbered_output=True)
    print_section(
        "Top target companies",
        top_target_companies(companies, contacts)[:3],
        numbered_output=True,
    )
    print_section(
        "Contacts needing first message",
        contacts_needing_first_message(contacts)[:3],
        numbered_output=True,
    )
    print_section("Strong leads not drafted", strong_not_drafted(leads)[:5], numbered_output=True)
    print_section("Drafted but not applied", drafted_not_applied(leads)[:5])


def full_report(leads: list[dict], contacts: list[dict], companies: list[dict], today: date) -> None:
    print("=== Job hunt report ===\n")
    print_section("Top target companies", top_target_companies(companies, contacts), numbered_output=True)
    print_section("Companies needing contacts", companies_needing_contacts(companies, contacts))
    print_section(
        "Contacts needing first message",
        contacts_needing_first_message(contacts),
        numbered_output=True,
    )
    print_section(
        "Recently funded companies not researched",
        recently_funded_not_researched(companies, today),
    )
    print_section("High-fit companies with no open role", high_fit_no_open_role(companies, leads))
    print_section(
        "Companies with founder outreach available",
        founder_outreach_available(companies, contacts),
    )
    print_section("Strong leads not drafted", strong_not_drafted(leads), numbered_output=True)
    print_section("Leads needing contact research", leads_needing_contact(leads))
    print_section("Drafted but not applied", drafted_not_applied(leads))
    print_section("Messages needing follow-up", messages_needing_followup(contacts, today))
    print_section("Applications needing follow-up", applications_needing_followup(leads, today))
    print_section("Stale leads older than 14 days", stale_leads(leads, today))


def main() -> None:
    parser = argparse.ArgumentParser(description="Print job hunt reports.")
    parser.add_argument("--today", action="store_true", help="Compact today view")
    parser.add_argument("--companies", action="store_true", help="Company-focused view")
    parser.add_argument("--contacts", action="store_true", help="Contact-focused view")
    parser.add_argument("--followups", action="store_true", help="Follow-up focused view")
    parser.add_argument("--strong", action="store_true", help="Strong leads view")
    args = parser.parse_args()

    today = date.today()
    leads = load_leads()
    contacts = load_contacts()
    companies = load_companies()
    _ = load_interactions()

    if args.today:
        today_report(leads, contacts, companies, today)
    elif args.companies:
        companies_report(companies, contacts, leads, today)
    elif args.contacts:
        contacts_report(contacts)
    elif args.followups:
        followups_report(leads, contacts, today)
    elif args.strong:
        strong_report(leads)
    else:
        full_report(leads, contacts, companies, today)


if __name__ == "__main__":
    main()