#!/usr/bin/env python3
"""Save processed transactions to transactions-log.json."""

import json
import re
import hashlib
from datetime import datetime
from collections import defaultdict

# Load merchant categories
with open('/Users/kimhansen/Desktop/03 Workspace/ceo-agents/chl-effectiveness/.claude/context/merchant-categories.json', 'r') as f:
    categories_config = json.load(f)

mappings = categories_config['mappings']

def categorize_transaction(description):
    """Match description against patterns and return category."""
    for mapping in mappings:
        pattern = mapping['pattern']
        try:
            if re.search(pattern, description, re.IGNORECASE):
                return mapping['category'], mapping.get('notes', '')
        except re.error:
            continue
    return 'other', 'Uncategorized'

def parse_csv_line(line):
    """Parse a CommBank CSV line."""
    parts = []
    current = ''
    in_quotes = False

    for char in line:
        if char == '"':
            in_quotes = not in_quotes
        elif char == ',' and not in_quotes:
            parts.append(current.strip().strip('"'))
            current = ''
        else:
            current += char
    parts.append(current.strip().strip('"'))

    if len(parts) >= 3:
        date_str = parts[0]
        amount_str = parts[1].replace('+', '').replace(',', '')
        description = parts[2]

        try:
            amount = float(amount_str)
            date = datetime.strptime(date_str, '%d/%m/%Y')
            return {
                'date': date,
                'amount': amount,
                'description': description
            }
        except (ValueError, IndexError):
            return None
    return None

def generate_hash(date, description, amount):
    """Generate unique hash for deduplication."""
    key = f"{date.strftime('%Y-%m-%d')}|{description}|{amount}"
    return hashlib.md5(key.encode()).hexdigest()[:12]

# Read and parse transactions
transactions = []
csv_path = '/Users/kimhansen/Desktop/03 Workspace/ceo-agents/chl-effectiveness/.claude/data/transactions/imports/NetBank Accounts Data.csv'

with open(csv_path, 'r') as f:
    for line in f:
        line = line.strip()
        if not line:
            continue

        tx = parse_csv_line(line)
        if tx:
            category, notes = categorize_transaction(tx['description'])
            tx_hash = generate_hash(tx['date'], tx['description'], tx['amount'])
            transactions.append({
                'id': tx_hash,
                'date': tx['date'].strftime('%Y-%m-%d'),
                'description': tx['description'],
                'amount': tx['amount'],
                'category': category,
                'category_notes': notes,
                'source_file': 'NetBank Accounts Data.csv'
            })

# Sort by date descending
transactions.sort(key=lambda x: x['date'], reverse=True)

# Build the log structure
log = {
    'transactions': transactions,
    'processed_hashes': [t['id'] for t in transactions],
    'last_import': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'processed_files': ['NetBank Accounts Data.csv'],
    'summary': {
        'total_transactions': len(transactions),
        'date_range': {
            'start': transactions[-1]['date'] if transactions else None,
            'end': transactions[0]['date'] if transactions else None
        },
        'by_category': {}
    }
}

# Calculate category summaries
by_cat = defaultdict(lambda: {'count': 0, 'total': 0})
for tx in transactions:
    if tx['amount'] < 0:
        by_cat[tx['category']]['count'] += 1
        by_cat[tx['category']]['total'] += abs(tx['amount'])

log['summary']['by_category'] = {k: {'count': v['count'], 'total': round(v['total'], 2)}
                                  for k, v in sorted(by_cat.items(), key=lambda x: x[1]['total'], reverse=True)}

# Save to file
output_path = '/Users/kimhansen/Desktop/03 Workspace/ceo-agents/chl-effectiveness/.claude/data/transactions/transactions-log.json'
with open(output_path, 'w') as f:
    json.dump(log, f, indent=2)

print(f"Saved {len(transactions)} transactions to {output_path}")
print(f"\nTop categories by spending:")
for cat, data in list(log['summary']['by_category'].items())[:10]:
    print(f"  {cat}: {data['count']} txs, ${data['total']:,.2f}")
