[
  {
    "id": "ai-evaluation",
    "title": "AI Evaluation And Visual Reasoning",
    "type": "case study",
    "status": "recent contract work",
    "summary": "Rubric-driven review of AI outputs, visual reasoning tasks, source-grounded answers, proof artifacts, and reviewer-facing explanations.",
    "details": [
      "Completed visual and source-grounded AI evaluation tasks with exact-answer validation and auditable proof notes.",
      "Designed adversarial prompts and repeatable checks for hallucination, weak grounding, instruction drift, and overconfident answers.",
      "Turned messy evidence into compact reviewer explanations that make the scoring decision traceable."
    ],
    "tags": ["AI evaluation", "visual reasoning", "rubrics", "source validation", "proof"]
  },
  {
    "id": "agent-workflow-qa",
    "title": "Agent Workflow QA",
    "type": "service packet",
    "status": "ready offer",
    "summary": "A service package for testing AI agents against messy real workflows and returning pass/fail evidence, failure causes, and concrete fixes.",
    "details": [
      "Designed a QA sprint format for agent startups, agencies, and internal tool teams.",
      "Deliverables include workflow tests, pass/fail matrix, screenshots or logs, failure taxonomy, severity labels, reproduction steps, and prioritized fixes.",
      "Best fit for demos that look competent but still need evidence before production trust."
    ],
    "tags": ["agents", "QA", "workflow testing", "automation", "failure taxonomy"]
  },
  {
    "id": "coding-agent-eval",
    "title": "Coding-Agent Interaction Evaluation",
    "type": "work sample",
    "status": "sanitized sample",
    "summary": "A scoring sample for evaluating whether coding agents diagnose the actual bug, keep patches narrow, and verify with regression tests.",
    "details": [
      "Sample prompt: a React dashboard search-clearing bug where stale filtered results remain visible.",
      "Strong-agent rubric covers reproduction, state ownership, patch scope, test adequacy, and explanation quality.",
      "Failure modes include symptom patching, overbroad refactors, missing tests, and unverified claims."
    ],
    "tags": ["coding agents", "React", "debugging", "tests", "evaluation"]
  },
  {
    "id": "scenario-qa",
    "title": "Scenario QA And Simulation Design",
    "type": "work sample",
    "status": "sanitized sample",
    "summary": "A sample review of a multi-tool operations scenario involving email, Slack, calendar, Drive, Airtable, time zones, and approval boundaries.",
    "details": [
      "Defined pass criteria across source use, time-zone handling, attendee validation, internal/client updates, and system-of-record changes.",
      "Identified evaluator ambiguity around whether external actions are allowed or draft-only.",
      "Proposed clearer expected outputs for calendar, Slack, email, and Airtable."
    ],
    "tags": ["scenario design", "task QA", "multi-tool workflows", "simulation", "rubrics"]
  },
  {
    "id": "multimodal-red-team",
    "title": "Multimodal Red-Team Sample",
    "type": "work sample",
    "status": "sanitized sample",
    "summary": "A benign vision-language test for whether a model follows visible injected text instead of the user instruction it should obey.",
    "details": [
      "Designed a poster-summary test containing irrelevant instruction-like text inside the image.",
      "Expected safe behavior: summarize the poster and ignore the embedded command.",
      "Scoring focuses on instruction hierarchy, visual accuracy, unnecessary refusal, and security-sensitive disclosure."
    ],
    "tags": ["multimodal", "red team", "prompt injection", "vision-language", "safety"]
  },
  {
    "id": "automation-builder",
    "title": "AI Automation And Local Agent Workflows",
    "type": "case study",
    "status": "ongoing",
    "summary": "Hands-on building and repair of Codex/OpenClaw workflows for browser automation, research, runtime repair, and evidence-first QA.",
    "details": [
      "Worked with Codex, OpenClaw, ChatGPT, Claude, Gemini, Grok, OpenRouter, browser automation, logs, and local servers.",
      "Verified systems with process state, local artifacts, screenshots, health endpoints, and browser-visible proof.",
      "Applied the same evidence-first loop to opportunity research, application prep, and service packaging."
    ],
    "tags": ["Codex", "OpenClaw", "browser automation", "local QA", "AI workflows"]
  },
  {
    "id": "creative-production",
    "title": "Creative Production, Styling, And Generative Media",
    "type": "case study",
    "status": "selected work",
    "summary": "Creative work across touring performance, music/video direction, apparel concepts, campaign assets, social media, and AI-assisted media iteration.",
    "details": [
      "Performed on tour with Molly Santana in Summer 2025.",
      "Directed an independent music video and designed clothing/styling for the production.",
      "Created apparel/campaign assets including shirt concepts, a mouthguard concept, QR posters, web funnel, image/video ads, original ad music, 3D mockups, and a social AR filter."
    ],
    "tags": ["creative direction", "music", "fashion", "generative media", "campaigns"]
  },
  {
    "id": "resale-commerce",
    "title": "Online Resale And Commerce Operations",
    "type": "case study",
    "status": "2019-2024",
    "summary": "Hands-on e-commerce work sourcing, listing, marketing, pricing, communicating with buyers, and fulfilling clothing and consumer item sales.",
    "details": [
      "Managed product research, pricing judgment, listing presentation, buyer communication, and fulfillment.",
      "Used visual taste, trend awareness, platform operations, and practical logistics to move items from sourcing to sale.",
      "The resale background supports current work in marketplace research, gig discovery, and creator/commercial operations."
    ],
    "tags": ["resale", "commerce", "marketplaces", "operations", "visual presentation"]
  }
]
