#!/usr/bin/env python3
"""Process CommBank transactions with categorization."""

import json
import re
import hashlib
from datetime import datetime, timedelta
from collections import defaultdict

# Load merchant categories
with open('/Users/kimhansen/Desktop/03 Workspace/ceo-agents/chl-effectiveness/.claude/context/merchant-categories.json', 'r') as f:
    categories_config = json.load(f)

mappings = categories_config['mappings']
exclude_from_spending = categories_config['exclude_from_spending']
variable_budget_categories = categories_config['variable_budget_categories']

def categorize_transaction(description):
    """Match description against patterns and return category."""
    for mapping in mappings:
        pattern = mapping['pattern']
        try:
            if re.search(pattern, description, re.IGNORECASE):
                return mapping['category'], mapping.get('notes', '')
        except re.error:
            continue
    return 'other', 'Uncategorized'

def parse_csv_line(line):
    """Parse a CommBank CSV line."""
    # Format: Date,Amount,Description,Balance
    parts = []
    current = ''
    in_quotes = False

    for char in line:
        if char == '"':
            in_quotes = not in_quotes
        elif char == ',' and not in_quotes:
            parts.append(current.strip().strip('"'))
            current = ''
        else:
            current += char
    parts.append(current.strip().strip('"'))

    if len(parts) >= 3:
        date_str = parts[0]
        amount_str = parts[1].replace('+', '').replace(',', '')
        description = parts[2]

        try:
            amount = float(amount_str)
            date = datetime.strptime(date_str, '%d/%m/%Y')
            return {
                'date': date,
                'amount': amount,
                'description': description
            }
        except (ValueError, IndexError):
            return None
    return None

def generate_hash(date, description, amount):
    """Generate unique hash for deduplication."""
    key = f"{date.strftime('%Y-%m-%d')}|{description}|{amount}"
    return hashlib.md5(key.encode()).hexdigest()[:12]

# Read and parse transactions
transactions = []
csv_path = '/Users/kimhansen/Desktop/03 Workspace/ceo-agents/chl-effectiveness/.claude/data/transactions/imports/NetBank Accounts Data.csv'

with open(csv_path, 'r') as f:
    for line in f:
        line = line.strip()
        if not line:
            continue

        tx = parse_csv_line(line)
        if tx:
            category, notes = categorize_transaction(tx['description'])
            tx['category'] = category
            tx['category_notes'] = notes
            tx['hash'] = generate_hash(tx['date'], tx['description'], tx['amount'])
            transactions.append(tx)

# Sort by date descending
transactions.sort(key=lambda x: x['date'], reverse=True)

# Calculate date ranges
today = datetime.now()
week_start = today - timedelta(days=7)
month_start = today - timedelta(days=30)

# Separate by timeframe
this_week = [t for t in transactions if t['date'] >= week_start]
this_month = [t for t in transactions if t['date'] >= month_start]
all_transactions = transactions

# Aggregate by category
def aggregate_spending(txs):
    """Aggregate spending by category, excluding non-spending categories."""
    by_category = defaultdict(lambda: {'total': 0, 'count': 0, 'transactions': []})

    for tx in txs:
        if tx['amount'] < 0:  # Only expenses
            cat = tx['category']
            if cat not in exclude_from_spending:
                by_category[cat]['total'] += abs(tx['amount'])
                by_category[cat]['count'] += 1
                by_category[cat]['transactions'].append(tx)

    return by_category

# Weekly budget reference
weekly_budgets = {
    'groceries': 150,
    'dining_out': 116,
    'transport': 70,
    'sports_fitness': 50,
    'personal': 50,
    'entertainment': 20,
    'retail': 30,
    'other': 50,
}

# Print report
print("=" * 80)
print("TRANSACTION CATEGORIZATION REPORT")
print("=" * 80)
print(f"Total transactions: {len(transactions)}")
print(f"Date range: {transactions[-1]['date'].strftime('%Y-%m-%d')} to {transactions[0]['date'].strftime('%Y-%m-%d')}")
print()

# Category summary for all transactions
print("-" * 80)
print("CATEGORY BREAKDOWN (ALL TRANSACTIONS)")
print("-" * 80)

all_by_category = aggregate_spending(all_transactions)
total_spending = sum(c['total'] for c in all_by_category.values())

for cat in sorted(all_by_category.keys(), key=lambda x: all_by_category[x]['total'], reverse=True):
    data = all_by_category[cat]
    pct = (data['total'] / total_spending * 100) if total_spending > 0 else 0
    print(f"{cat:20} {data['count']:3} txs  ${data['total']:>8.2f}  ({pct:5.1f}%)")

print("-" * 80)
print(f"{'TOTAL SPENDING':20}       ${total_spending:>8.2f}")
print()

# This week summary
print("-" * 80)
print("THIS WEEK (Last 7 days)")
print("-" * 80)

week_by_category = aggregate_spending(this_week)
week_total = sum(c['total'] for c in week_by_category.values())

print(f"{'Category':20} {'Spent':>10} {'Budget':>10} {'Status':>12}")
print("-" * 60)

for cat in variable_budget_categories:
    spent = week_by_category.get(cat, {'total': 0})['total']
    budget = weekly_budgets.get(cat, 0)
    if budget > 0:
        variance = spent - budget
        status = f"{'OVER' if variance > 0 else 'UNDER'} ${abs(variance):.0f}"
    else:
        status = "-"
    print(f"{cat:20} ${spent:>8.2f} ${budget:>8.2f} {status:>12}")

print("-" * 60)
week_budget = sum(weekly_budgets.values())
week_variance = week_total - week_budget
print(f"{'TOTAL':20} ${week_total:>8.2f} ${week_budget:>8.2f} {'OVER' if week_variance > 0 else 'UNDER'} ${abs(week_variance):.0f}")
print()

# Uber Eats spotlight
print("-" * 80)
print("UBER EATS ANALYSIS")
print("-" * 80)

uber_eats = [t for t in all_transactions if 'UBER' in t['description'].upper() and 'EATS' in t['description'].upper()]
uber_total = sum(abs(t['amount']) for t in uber_eats)
uber_count = len(uber_eats)

print(f"Total Uber Eats orders: {uber_count}")
print(f"Total Uber Eats spend: ${uber_total:.2f}")
print(f"Average order: ${uber_total/uber_count:.2f}" if uber_count > 0 else "No orders")
print()

# Week-by-week Uber Eats
uber_by_week = defaultdict(lambda: {'count': 0, 'total': 0})
for t in uber_eats:
    week_key = t['date'].strftime('%Y-W%W')
    uber_by_week[week_key]['count'] += 1
    uber_by_week[week_key]['total'] += abs(t['amount'])

print("Uber Eats by week:")
for week in sorted(uber_by_week.keys(), reverse=True)[:6]:
    data = uber_by_week[week]
    print(f"  {week}: {data['count']} orders, ${data['total']:.2f}")
print()

# Uncategorized transactions
print("-" * 80)
print("TRANSACTIONS CATEGORIZED AS 'OTHER' (Review needed)")
print("-" * 80)

other_txs = [t for t in transactions if t['category'] == 'other' and t['amount'] < 0]
if other_txs:
    for t in other_txs[:20]:
        print(f"  {t['date'].strftime('%Y-%m-%d')} ${abs(t['amount']):>8.2f}  {t['description'][:60]}")
else:
    print("  None - all transactions categorized!")
print()

# Monthly spending trend
print("-" * 80)
print("MONTHLY SPENDING TREND")
print("-" * 80)

by_month = defaultdict(lambda: {'total': 0, 'count': 0})
for t in transactions:
    if t['amount'] < 0 and t['category'] not in exclude_from_spending:
        month_key = t['date'].strftime('%Y-%m')
        by_month[month_key]['total'] += abs(t['amount'])
        by_month[month_key]['count'] += 1

for month in sorted(by_month.keys(), reverse=True):
    data = by_month[month]
    print(f"  {month}: ${data['total']:>8.2f} ({data['count']} transactions)")

print()
print("=" * 80)
