Gmail Detective Forensic Collection
Forensically collect Gmail data with 100+ targeted searches, full body analysis, longevity-first scoring, behavioral metrics, and relationship graph construction
Source Code
import fs from "fs";
import path from "path";
const [analysisOutputPath, relationshipsOutputPath, writingSamplesOutputPath] =
process.argv.slice(2);
if (!analysisOutputPath || !relationshipsOutputPath || !writingSamplesOutputPath) {
console.error(
"Usage: gmail.detective.collect <analysisOutputPath> <relationshipsOutputPath> <writingSamplesOutputPath>"
);
process.exit(1);
}
// Configuration - more aggressive than Gmail Discovery
const INBOX_MAX_MESSAGES = 200;
const SENT_MAX_MESSAGES = 150;
const DISCOVERY_MAX_PER_QUERY = 25; // More per query for forensic depth
const LOOKBACK_DAYS = 365; // Full year for better timeline/relationship analysis
const lookbackDate = new Date(Date.now() - LOOKBACK_DAYS * 24 * 60 * 60 * 1000);
const formatDate = (d) =>
d.toLocaleDateString("en-US", { month: "short", day: "numeric", year: "numeric" });
/**
* Format date as relative time ago
*/
function formatTimeAgo(date) {
if (!date || isNaN(date.getTime())) return "unknown";
const seconds = Math.floor((Date.now() - date.getTime()) / 1000);
if (seconds < 60) return "just now";
const minutes = Math.floor(seconds / 60);
if (minutes < 60) return `${minutes}m ago`;
const hours = Math.floor(minutes / 60);
if (hours < 24) return `${hours}h ago`;
const days = Math.floor(hours / 24);
if (days < 7) return `${days}d ago`;
if (days < 30) return `${Math.floor(days / 7)}w ago`;
if (days < 365) return `${Math.floor(days / 30)}mo ago`;
const years = Math.floor(days / 365);
return `${years}y ago`;
}
// =============================================================================
// DISCOVERY QUERIES - 100+ targeted searches organized by tier
// =============================================================================
const DISCOVERY_QUERIES = [
// ==========================================================================
// TIER 1: HIGH-CONFIDENCE PERSONAL FACTS
// User wrote about themselves OR verification emails prove ownership
// ==========================================================================
// Family - from user's own sent emails
{
tier: 1,
category: "children",
query: 'in:sent ("my son" OR "my daughter" OR "my kids" OR "the kids" OR "our kids")',
},
{
tier: 1,
category: "partner",
query:
'in:sent ("my husband" OR "my wife" OR "my partner" OR "my boyfriend" OR "my girlfriend" OR "my fiancé" OR "my fiancee") -"business partner" -"design partner"',
},
{
tier: 1,
category: "pets",
query: 'in:sent ("my dog" OR "my cat" OR "our dog" OR "our cat" OR "my pet" OR "our pet")',
},
{
tier: 1,
category: "siblings",
query: 'in:sent ("my brother" OR "my sister" OR "my sibling")',
},
{
tier: 1,
category: "parents",
query: 'in:sent ("my mom" OR "my dad" OR "my mother" OR "my father" OR "my parents")',
},
{
tier: 1,
category: "in_laws",
query:
'in:sent ("my mother-in-law" OR "my father-in-law" OR "my in-laws" OR "my brother-in-law" OR "my sister-in-law")',
},
// Contact info - user sharing their own details
{
tier: 1,
category: "phone_numbers",
query:
'in:sent ("my number is" OR "my cell is" OR "call me at" OR "text me at" OR "reach me at" OR "my phone")',
},
{
tier: 1,
category: "birthday",
query: 'to:me subject:"happy birthday"',
},
// Location - shipping/delivery proves address
{
tier: 1,
category: "location_shipping",
query:
'from:(amazon OR fedex OR ups OR usps OR dhl) ("delivered to" OR "shipping address" OR "shipped to")',
},
{
tier: 1,
category: "location_utility",
query:
'from:(pg&e OR "pacific gas" OR edison OR "national grid" OR xfinity OR comcast OR spectrum OR "at&t") (account OR service OR bill)',
},
// Messaging platforms - verification proves account
{ tier: 1, category: "whatsapp", query: 'from:whatsapp ("verification" OR "code")' },
{ tier: 1, category: "signal_app", query: 'from:signal ("verification" OR "code")' },
{ tier: 1, category: "telegram", query: 'from:telegram ("verification" OR "code")' },
{
tier: 1,
category: "slack",
query: 'from:slack ("verification code" OR "sign in code" OR "confirmation")',
},
{ tier: 1, category: "discord_verify", query: 'from:discord ("verify" OR "confirmation")' },
// ==========================================================================
// TIER 2: TOOL USAGE - HIGH SIGNAL FOR WORKFLOW
// Shows what the user actively works with
// ==========================================================================
// SaaS and subscriptions
{
tier: 2,
category: "saas_trials",
query: 'subject:("trial started" OR "trial ending" OR "days left" OR "trial expires")',
},
{
tier: 2,
category: "receipts_stripe",
query: 'from:stripe subject:("receipt" OR "payment" OR "invoice")',
},
{
tier: 2,
category: "receipts_paypal",
query: 'from:paypal subject:("receipt" OR "payment" OR "invoice")',
},
{
tier: 2,
category: "subscriptions",
query:
'subject:("subscription confirmed" OR "you subscribed" OR "thanks for subscribing" OR "welcome to")',
},
// Project management tools
{
tier: 2,
category: "linear",
query: 'from:linear subject:("assigned" OR "mentioned" OR "commented")',
},
{
tier: 2,
category: "jira",
query: 'from:jira subject:("assigned" OR "mentioned" OR "commented")',
},
{
tier: 2,
category: "asana",
query: 'from:asana subject:("assigned" OR "mentioned" OR "task")',
},
{
tier: 2,
category: "notion",
query: 'from:notion subject:("mentioned" OR "commented" OR "invited")',
},
{ tier: 2, category: "monday", query: 'from:monday subject:("assigned" OR "update")' },
{ tier: 2, category: "trello", query: 'from:trello subject:("added" OR "mentioned")' },
{ tier: 2, category: "clickup", query: 'from:clickup subject:("assigned" OR "mentioned")' },
// Security tools
{
tier: 2,
category: "password_mgr_1password",
query: 'from:1password subject:("security" OR "new device" OR "sign in")',
},
{
tier: 2,
category: "password_mgr_lastpass",
query: 'from:lastpass subject:("security" OR "new device")',
},
{
tier: 2,
category: "password_mgr_bitwarden",
query: 'from:bitwarden subject:("security" OR "verification")',
},
// Video/meeting platforms
{
tier: 2,
category: "zoom",
query: 'from:zoom ("meeting" OR "webinar" OR "recording")',
},
{
tier: 2,
category: "calendly",
query: 'from:calendly ("scheduled" OR "confirmed" OR "canceled")',
},
{ tier: 2, category: "cal_com", query: 'from:cal.com ("scheduled" OR "confirmed")' },
// Design tools
{ tier: 2, category: "figma", query: 'from:figma ("commented" OR "invited" OR "shared")' },
{ tier: 2, category: "canva", query: 'from:canva ("design" OR "team" OR "shared")' },
{ tier: 2, category: "miro", query: 'from:miro ("invited" OR "board" OR "commented")' },
// ==========================================================================
// TIER 3: PROFESSIONAL INTELLIGENCE
// Career signals, education, credentials
// ==========================================================================
// Recruiting and job search
{
tier: 3,
category: "recruiters",
query:
'subject:("opportunity" OR "role" OR "position") ("recruiter" OR "recruiting" OR "talent")',
},
{
tier: 3,
category: "job_applications",
query:
'subject:("application received" OR "application submitted" OR "thank you for applying")',
},
{
tier: 3,
category: "job_offers",
query: 'subject:("offer letter" OR "job offer" OR "offer of employment")',
},
{
tier: 3,
category: "interview_scheduled",
query: 'subject:("interview" OR "phone screen" OR "technical interview") ("scheduled" OR "confirmed")',
},
// Contracts and signatures
{
tier: 3,
category: "docusign",
query: 'from:docusign ("complete" OR "signed" OR "review")',
},
{
tier: 3,
category: "hellosign",
query: 'from:hellosign ("complete" OR "signed" OR "review")',
},
{
tier: 3,
category: "adobe_sign",
query: 'from:adobe subject:("sign" OR "agreement" OR "contract")',
},
// Conferences and events
{
tier: 3,
category: "conferences",
query:
'subject:("ticket confirmation" OR "registration confirmed" OR "event registration") -meeting -calendar',
},
{
tier: 3,
category: "conference_speaking",
query: 'subject:("speaker" OR "presentation" OR "talk") ("accepted" OR "confirmed")',
},
// Education and certifications
{
tier: 3,
category: "coursera",
query: 'from:coursera ("enrolled" OR "completed" OR "certificate")',
},
{
tier: 3,
category: "udemy",
query: 'from:udemy ("enrolled" OR "completed" OR "certificate")',
},
{
tier: 3,
category: "linkedin_learning",
query: 'from:linkedin subject:("learning" OR "course" OR "certificate")',
},
{
tier: 3,
category: "certifications",
query: 'subject:("certificate" OR "certification" OR "completed course" OR "course complete")',
},
{
tier: 3,
category: "university_edu",
query: 'from:.edu ("enrollment" OR "registration" OR "transcript" OR "degree")',
},
{
tier: 3,
category: "student_loans",
query:
'from:(navient OR nelnet OR "great lakes" OR mohela OR fedloan) ("payment" OR "statement")',
},
// Professional newsletters
{ tier: 3, category: "substack", query: 'from:substack ("subscribed" OR "new post")' },
{
tier: 3,
category: "newsletters",
query: 'subject:("you subscribed" OR "newsletter") -unsubscribe',
},
// Social platforms
{ tier: 3, category: "linkedin", query: "from:linkedin" },
{ tier: 3, category: "twitter", query: "from:twitter OR from:x.com" },
{ tier: 3, category: "instagram", query: "from:instagram" },
{ tier: 3, category: "github", query: "from:github" },
{ tier: 3, category: "gitlab", query: "from:gitlab" },
{ tier: 3, category: "stack_overflow", query: "from:stackoverflow" },
// ==========================================================================
// TIER 4: INFRASTRUCTURE SIGNALS (Tech Users)
// Shows side projects, technical interests
// ==========================================================================
// Hosting and deployment
{
tier: 4,
category: "vercel",
query: 'from:vercel subject:("deployed" OR "deployment" OR "build")',
},
{
tier: 4,
category: "netlify",
query: 'from:netlify subject:("deployed" OR "deployment" OR "build")',
},
{
tier: 4,
category: "heroku",
query: 'from:heroku subject:("deployed" OR "deployment" OR "build")',
},
{ tier: 4, category: "railway", query: 'from:railway ("deployment" OR "build")' },
{ tier: 4, category: "render", query: 'from:render ("deployment" OR "build")' },
{
tier: 4,
category: "aws",
query: 'from:amazon subject:("AWS" OR "web services") ("account" OR "billing")',
},
{ tier: 4, category: "gcp", query: 'from:google subject:("cloud" OR "GCP") ("account" OR "billing")' },
{ tier: 4, category: "digitalocean", query: 'from:digitalocean ("droplet" OR "account")' },
// Domain registrars
{
tier: 4,
category: "namecheap",
query: 'from:namecheap subject:("renewal" OR "registered" OR "expiring")',
},
{
tier: 4,
category: "godaddy",
query: 'from:godaddy subject:("renewal" OR "registered" OR "expiring")',
},
{
tier: 4,
category: "cloudflare",
query: 'from:cloudflare subject:("domain" OR "DNS" OR "SSL")',
},
{
tier: 4,
category: "porkbun",
query: 'from:porkbun subject:("renewal" OR "registered")',
},
// Cloud storage patterns
{
tier: 4,
category: "cloud_sharing",
query: 'in:sent ("dropbox.com/s/" OR "drive.google.com/file" OR "notion.so/")',
},
// Developer tools
{ tier: 4, category: "npm", query: 'from:npm ("published" OR "security" OR "package")' },
{ tier: 4, category: "docker", query: 'from:docker ("account" OR "hub" OR "image")' },
// ==========================================================================
// TIER 5: FINANCIAL FORENSICS
// Banking, investments, crypto, fintech
// ==========================================================================
// Traditional banking
{
tier: 5,
category: "banking_chase",
query: 'from:chase subject:(statement OR alert OR payment)',
},
{
tier: 5,
category: "banking_bofa",
query: 'from:bankofamerica subject:(statement OR alert)',
},
{
tier: 5,
category: "banking_wells",
query: 'from:wellsfargo subject:(statement OR alert)',
},
{ tier: 5, category: "banking_citi", query: 'from:citi subject:(statement OR alert)' },
{
tier: 5,
category: "banking_capital_one",
query: 'from:capitalone subject:(statement OR alert)',
},
// Investments
{ tier: 5, category: "fidelity", query: 'from:fidelity ("statement" OR "account")' },
{ tier: 5, category: "schwab", query: 'from:schwab ("statement" OR "account")' },
{ tier: 5, category: "vanguard", query: 'from:vanguard ("statement" OR "account")' },
{ tier: 5, category: "robinhood", query: 'from:robinhood ("statement" OR "trade")' },
{ tier: 5, category: "etrade", query: 'from:etrade ("statement" OR "trade")' },
{ tier: 5, category: "td_ameritrade", query: 'from:tdameritrade ("statement" OR "trade")' },
// Crypto exchanges
{ tier: 5, category: "coinbase", query: 'from:coinbase ("transaction" OR "account")' },
{ tier: 5, category: "binance", query: 'from:binance ("transaction" OR "account")' },
{ tier: 5, category: "kraken", query: 'from:kraken ("transaction" OR "account")' },
{ tier: 5, category: "gemini", query: 'from:gemini ("transaction" OR "account")' },
{ tier: 5, category: "crypto_com", query: 'from:crypto.com ("transaction" OR "account")' },
// Fintech
{ tier: 5, category: "venmo", query: 'from:venmo ("paid" OR "received" OR "request")' },
{ tier: 5, category: "cashapp", query: 'from:cash.app ("paid" OR "received")' },
{ tier: 5, category: "zelle", query: 'subject:zelle ("sent" OR "received" OR "payment")' },
{ tier: 5, category: "wise", query: 'from:wise ("transfer" OR "payment")' },
{ tier: 5, category: "revolut", query: 'from:revolut ("payment" OR "transfer")' },
// Tax software
{ tier: 5, category: "turbotax", query: 'from:turbotax ("return" OR "refund" OR "filed")' },
{ tier: 5, category: "hr_block", query: 'from:hrblock ("return" OR "refund" OR "filed")' },
// Invoicing
{
tier: 5,
category: "freshbooks",
query: 'from:freshbooks ("invoice" OR "payment" OR "client")',
},
{
tier: 5,
category: "quickbooks",
query: 'from:quickbooks ("invoice" OR "payment" OR "client")',
},
{ tier: 5, category: "wave", query: 'from:waveapps ("invoice" OR "payment")' },
// ==========================================================================
// TIER 6: LIFESTYLE SIGNALS
// Shopping, travel, health, entertainment
// ==========================================================================
// E-commerce
{ tier: 6, category: "amazon_orders", query: 'from:amazon "your order"' },
{ tier: 6, category: "ebay", query: 'from:ebay ("order" OR "bid" OR "won")' },
{ tier: 6, category: "etsy", query: 'from:etsy ("order" OR "purchase")' },
{ tier: 6, category: "target", query: 'from:target ("order" OR "pickup")' },
{ tier: 6, category: "walmart", query: 'from:walmart ("order" OR "pickup")' },
{ tier: 6, category: "costco", query: 'from:costco ("order" OR "membership")' },
// Food delivery
{ tier: 6, category: "doordash", query: 'from:doordash ("order" OR "delivery")' },
{ tier: 6, category: "ubereats", query: 'from:uber subject:("eats" OR "order" OR "delivery")' },
{ tier: 6, category: "grubhub", query: 'from:grubhub ("order" OR "delivery")' },
{ tier: 6, category: "instacart", query: 'from:instacart ("order" OR "delivery")' },
{ tier: 6, category: "postmates", query: 'from:postmates ("order" OR "delivery")' },
// Travel
{
tier: 6,
category: "travel_flights",
query:
'subject:("flight confirmation" OR "itinerary" OR "boarding pass") (from:airline OR from:united OR from:delta OR from:american OR from:southwest OR from:jetblue)',
},
{
tier: 6,
category: "travel_hotels",
query:
'subject:("reservation confirmed" OR "booking confirmed") (from:marriott OR from:hilton OR from:hyatt OR from:ihg OR from:booking.com OR from:hotels.com)',
},
{
tier: 6,
category: "airbnb",
query: 'from:airbnb ("reservation" OR "booking" OR "trip")',
},
{ tier: 6, category: "expedia", query: 'from:expedia ("itinerary" OR "booking")' },
{ tier: 6, category: "kayak", query: 'from:kayak ("booking" OR "confirmation")' },
// Health and fitness
{
tier: 6,
category: "health_appointments",
query:
'subject:("appointment confirmed" OR "appointment reminder" OR "your visit") (from:doctor OR from:health OR from:medical OR from:hospital)',
},
{
tier: 6,
category: "pharmacy",
query: 'from:(cvs OR walgreens OR "rite aid") ("prescription" OR "refill")',
},
{ tier: 6, category: "peloton", query: 'from:peloton ("workout" OR "class" OR "membership")' },
{ tier: 6, category: "strava", query: 'from:strava ("activity" OR "kudos")' },
{
tier: 6,
category: "gym_membership",
query:
'("gym" OR "fitness" OR "membership") from:(equinox OR "planet fitness" OR "24 hour" OR orangetheory OR crossfit)',
},
// Pets
{ tier: 6, category: "pet_vet", query: '("veterinary" OR "vet" OR "animal hospital") ("appointment" OR "reminder")' },
{ tier: 6, category: "pet_insurance", query: 'from:(petplan OR trupanion OR "healthy paws" OR embrace) ("policy" OR "claim")' },
{ tier: 6, category: "pet_supplies", query: 'from:(chewy OR petco OR petsmart) ("order" OR "shipment")' },
// Entertainment
{ tier: 6, category: "spotify", query: "from:spotify" },
{ tier: 6, category: "netflix", query: "from:netflix" },
{ tier: 6, category: "hulu", query: "from:hulu" },
{ tier: 6, category: "disney_plus", query: "from:disneyplus" },
{ tier: 6, category: "hbo_max", query: "from:hbomax OR from:max.com" },
{ tier: 6, category: "youtube", query: 'from:youtube ("membership" OR "premium")' },
{ tier: 6, category: "discord", query: 'from:discord subject:("joined" OR "invited" OR "server")' },
{ tier: 6, category: "twitch", query: 'from:twitch ("subscription" OR "follow")' },
{
tier: 6,
category: "gaming",
query: 'from:(steam OR playstation OR xbox OR nintendo OR "epic games") ("purchase" OR "receipt")',
},
// Automotive
{
tier: 6,
category: "car_insurance",
query:
'from:(geico OR "state farm" OR progressive OR allstate OR usaa) ("policy" OR "premium" OR "renewal")',
},
{
tier: 6,
category: "car_purchase",
query: 'subject:("vehicle" OR "car") ("purchase" OR "lease" OR "financing")',
},
{
tier: 6,
category: "car_service",
query: '("oil change" OR "service appointment" OR "maintenance") ("scheduled" OR "reminder")',
},
// Real estate and home
{
tier: 6,
category: "real_estate",
query: 'from:(zillow OR redfin OR realtor.com OR trulia) ("listing" OR "home" OR "saved")',
},
{
tier: 6,
category: "mortgage",
query: 'subject:("mortgage" OR "loan") ("statement" OR "payment" OR "application")',
},
{
tier: 6,
category: "home_insurance",
query: '("homeowner" OR "renters" OR "property") ("insurance" OR "policy")',
},
{
tier: 6,
category: "moving",
query: 'from:(uhaul OR pods OR "two men" OR movingcompany) ("reservation" OR "booking")',
},
// ==========================================================================
// TIER 7: LEGAL AND OFFICIAL
// Government, legal, insurance
// ==========================================================================
{ tier: 7, category: "government", query: "from:.gov" },
{
tier: 7,
category: "irs",
query: 'from:irs.gov OR subject:("IRS" OR "tax return" OR "refund status")',
},
{ tier: 7, category: "dmv", query: '("DMV" OR "motor vehicle") ("registration" OR "renewal" OR "license")' },
{
tier: 7,
category: "legal_correspondence",
query: 'from:("law firm" OR "attorney" OR "lawyer" OR esq) -spam',
},
{
tier: 7,
category: "insurance_claims",
query: 'subject:("claim" OR "claim number") ("insurance" OR "filed")',
},
{
tier: 7,
category: "jury_duty",
query: '("jury duty" OR "jury service" OR "summons")',
},
// ==========================================================================
// TIER 8: LIFE EVENTS (for timeline reconstruction)
// Major milestones detectable in email
// ==========================================================================
{
tier: 8,
category: "wedding",
query: '("wedding registry" OR "bridal shower" OR "save the date" OR "wedding invitation")',
},
{
tier: 8,
category: "baby",
query: '("baby registry" OR "baby shower" OR "expecting" OR "due date" OR "newborn")',
},
{
tier: 8,
category: "graduation",
query: 'subject:("graduation" OR "commencement" OR "diploma" OR "degree")',
},
{
tier: 8,
category: "new_job",
query: 'subject:("first day" OR "onboarding" OR "welcome to the team" OR "new employee")',
},
{
tier: 8,
category: "relocation",
query:
'("change of address" OR "new address" OR "we moved" OR "forwarding mail")',
},
];
console.log("Gmail Detective: Forensic inbox analysis starting...");
console.log(` Lookback: ${LOOKBACK_DAYS} days (${formatDate(lookbackDate)} - ${formatDate(new Date())})`);
console.log(` Inbox: up to ${INBOX_MAX_MESSAGES} messages`);
console.log(` Sent: up to ${SENT_MAX_MESSAGES} messages`);
console.log(` Discovery queries: ${DISCOVERY_QUERIES.length}`);
/**
* Fetch message IDs matching a query
*/
async function fetchMessageIds(query, maxResults) {
const ids = [];
let pageToken = null;
while (ids.length < maxResults) {
const remaining = maxResults - ids.length;
const pageSize = Math.min(remaining, 100);
const url = new URL("https://gmail.googleapis.com/gmail/v1/users/me/messages");
url.searchParams.set("maxResults", pageSize.toString());
if (query) url.searchParams.set("q", query);
if (pageToken) url.searchParams.set("pageToken", pageToken);
const res = await fetch(url.toString(), {
headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" },
});
const text = await res.text();
if (!res.ok) {
throw new Error(`Gmail API failed: ${res.status} - ${text}`);
}
let data;
try {
data = JSON.parse(text);
} catch {
throw new Error(`Gmail API returned invalid JSON: ${text.slice(0, 200)}`);
}
if (!data.messages || data.messages.length === 0) break;
ids.push(...data.messages.map((m) => m.id).slice(0, remaining));
pageToken = data.nextPageToken;
if (!pageToken) break;
}
return ids;
}
/**
* Fetch message details with parallel requests
*/
async function fetchMessages(messageIds, format = "metadata") {
const CONCURRENCY = 25;
const results = [];
for (let i = 0; i < messageIds.length; i += CONCURRENCY) {
const batch = messageIds.slice(i, i + CONCURRENCY);
const fetched = await Promise.all(
batch.map(async (id) => {
let url = `https://gmail.googleapis.com/gmail/v1/users/me/messages/${id}?format=${format}`;
if (format === "metadata") {
url +=
"&metadataHeaders=Subject&metadataHeaders=From&metadataHeaders=To&metadataHeaders=Date&metadataHeaders=Cc";
}
const res = await fetch(url, {
headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" },
});
if (!res.ok) return null;
try {
return await res.json();
} catch {
return null;
}
})
);
results.push(...fetched.filter(Boolean));
if ((i + CONCURRENCY) % 100 === 0 || i + CONCURRENCY >= messageIds.length) {
console.log(` Fetched ${Math.min(i + CONCURRENCY, messageIds.length)}/${messageIds.length}...`);
}
}
return results;
}
/**
* Extract email address from header
*/
function extractEmail(header) {
if (!header) return "unknown";
const match = header.match(/<([^>]+)>/);
return match ? match[1].toLowerCase() : header.toLowerCase().trim();
}
/**
* Extract name from header
*/
function extractName(header) {
if (!header) return "Unknown";
const match = header.match(/^([^<]+)</);
if (match) return match[1].trim().replace(/"/g, "");
return header.split("@")[0];
}
/**
* Get header value from message
*/
function getHeader(msg, name) {
const header = msg.payload?.headers?.find(
(h) => h.name.toLowerCase() === name.toLowerCase()
);
return header ? header.value : "";
}
/**
* Extract plain text body from Gmail message payload
*/
function extractBodyText(payload) {
if (!payload) return "";
if (payload.body?.data) {
try {
return Buffer.from(payload.body.data, "base64").toString("utf-8");
} catch {
return "";
}
}
if (payload.parts) {
for (const part of payload.parts) {
if (part.mimeType === "text/plain" && part.body?.data) {
try {
return Buffer.from(part.body.data, "base64").toString("utf-8");
} catch {
continue;
}
}
if (part.parts) {
for (const nested of part.parts) {
if (nested.mimeType === "text/plain" && nested.body?.data) {
try {
return Buffer.from(nested.body.data, "base64").toString("utf-8");
} catch {
continue;
}
}
}
}
}
}
return "";
}
/**
* LONGEVITY-FIRST CONTACT SCORING
* Values relationship depth and consistency over recency
*/
function scoreContact(contact) {
let score = 0;
const breakdown = {
consistency: 0,
historicalPeak: 0,
relationshipAge: 0,
recency: 0,
depth: 0,
};
// 1. CONSISTENCY (0-30 pts) - spread over time beats bursts
const uniqueMonths = contact.uniqueMonths || 0;
breakdown.consistency = Math.min(30, uniqueMonths * 5);
score += breakdown.consistency;
// 2. HISTORICAL PEAK (0-20 pts) - max engagement in any month
const peakEmails = contact.peakMonthEmails || 0;
breakdown.historicalPeak = Math.min(20, peakEmails * 2);
score += breakdown.historicalPeak;
// 3. RELATIONSHIP AGE (0-15 pts) - how long known?
const yearsKnown = (contact.spanDays || 0) / 365;
breakdown.relationshipAge =
yearsKnown >= 2 ? 15 : yearsKnown >= 1 ? 10 : yearsKnown >= 0.5 ? 5 : 0;
score += breakdown.relationshipAge;
// 4. RECENCY (0-15 pts) - bonus for active, NOT penalty for dormant
const daysSince = contact.daysSinceLastContact || 999;
breakdown.recency =
daysSince <= 30 ? 15 : daysSince <= 90 ? 10 : daysSince <= 180 ? 5 : 0;
score += breakdown.recency;
// 5. DEPTH (0-20 pts) - back-and-forth conversation
if (contact.bidirectional) {
breakdown.depth += 10;
score += 10;
}
const threadDepthBonus = Math.min(10, (contact.avgMessagesPerThread || 0) * 2);
breakdown.depth += threadDepthBonus;
score += threadDepthBonus;
// Penalize obvious noise
const email = contact.email.toLowerCase();
const noisePatterns = [
"noreply",
"no-reply",
"notifications",
"mailer-daemon",
"postmaster",
"donotreply",
"automated",
"newsletter",
"marketing",
"promo",
];
if (noisePatterns.some((p) => email.includes(p))) {
score -= 100;
}
// Derive status
let status = "active";
if (daysSince > 180 && score >= 50) {
status = "dormant-important";
} else if (daysSince > 180) {
status = "dormant";
}
// Confidence tier
let confidence = "low";
if (contact.totalInteractions >= 10 && contact.spanDays > 30) {
confidence = "high";
} else if (contact.totalInteractions >= 5) {
confidence = "medium";
}
return { score, status, confidence, breakdown };
}
/**
* Compute behavioral metrics from email timestamps
*/
function computeBehavioralMetrics(sentMessages, inboxMessages) {
const metrics = {
hourDistribution: new Array(24).fill(0),
dayDistribution: new Array(7).fill(0),
peakHours: [],
nightOwl: false,
emailsPerWeek: 0,
busiestDay: "",
quietestDay: "",
avgThreadDepth: 0,
startsConversations: 0,
tendsToReply: 0,
};
// Analyze sent email timing
const sentDates = [];
for (const msg of sentMessages) {
const dateStr = getHeader(msg, "Date");
if (!dateStr) continue;
const date = new Date(dateStr);
if (isNaN(date.getTime())) continue;
sentDates.push(date);
metrics.hourDistribution[date.getHours()]++;
metrics.dayDistribution[date.getDay()]++;
}
// Find peak hours (top 3)
const hoursSorted = metrics.hourDistribution
.map((count, hour) => ({ hour, count }))
.sort((a, b) => b.count - a.count);
metrics.peakHours = hoursSorted.slice(0, 3).map((h) => `${h.hour}:00`);
// Night owl detection (more emails 10pm-4am than 6am-noon)
const nightEmails =
metrics.hourDistribution.slice(22, 24).reduce((a, b) => a + b, 0) +
metrics.hourDistribution.slice(0, 4).reduce((a, b) => a + b, 0);
const morningEmails = metrics.hourDistribution.slice(6, 12).reduce((a, b) => a + b, 0);
metrics.nightOwl = nightEmails > morningEmails;
// Emails per week
if (sentDates.length >= 2) {
const firstDate = new Date(Math.min(...sentDates.map((d) => d.getTime())));
const lastDate = new Date(Math.max(...sentDates.map((d) => d.getTime())));
const weeks = Math.max(1, (lastDate - firstDate) / (7 * 24 * 60 * 60 * 1000));
metrics.emailsPerWeek = Math.round((sentMessages.length / weeks) * 10) / 10;
}
// Busiest/quietest day
const dayNames = ["Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"];
const maxDay = metrics.dayDistribution.indexOf(Math.max(...metrics.dayDistribution));
const minDay = metrics.dayDistribution.indexOf(Math.min(...metrics.dayDistribution));
metrics.busiestDay = dayNames[maxDay];
metrics.quietestDay = dayNames[minDay];
// Thread analysis
const threadCounts = new Map();
for (const msg of [...sentMessages, ...inboxMessages]) {
const threadId = msg.threadId;
if (!threadId) continue;
threadCounts.set(threadId, (threadCounts.get(threadId) || 0) + 1);
}
if (threadCounts.size > 0) {
const depths = [...threadCounts.values()];
metrics.avgThreadDepth = Math.round((depths.reduce((a, b) => a + b, 0) / depths.length) * 10) / 10;
}
// Conversation initiation ratio
const sentThreads = new Set(sentMessages.map((m) => m.threadId));
const inboxThreads = new Set(inboxMessages.map((m) => m.threadId));
const userInitiated = [...sentThreads].filter((t) => !inboxThreads.has(t)).length;
if (sentThreads.size > 0) {
metrics.startsConversations = Math.round((userInitiated / sentThreads.size) * 100) / 100;
metrics.tendsToReply = Math.round((1 - metrics.startsConversations) * 100) / 100;
}
return metrics;
}
/**
* Build relationship graph with clusters and bridges
*/
function buildRelationshipGraph(contacts, sentMessages, inboxMessages) {
const graph = {
clusters: [],
bridges: [],
introductions: [],
innerCircle: [],
dormantImportant: [],
regularContacts: [],
occasionalContacts: [],
};
// Domain-based clustering
const domainGroups = new Map();
for (const contact of contacts) {
const domain = contact.email.split("@")[1];
if (!domain) continue;
// Skip common email providers
if (["gmail.com", "yahoo.com", "hotmail.com", "outlook.com", "icloud.com", "me.com"].includes(domain)) {
continue;
}
if (!domainGroups.has(domain)) {
domainGroups.set(domain, []);
}
domainGroups.get(domain).push(contact);
}
// Create clusters from domains with 2+ contacts
for (const [domain, members] of domainGroups) {
if (members.length >= 2) {
graph.clusters.push({
name: domain.split(".")[0],
type: "domain",
commonDomain: domain,
memberCount: members.length,
members: members.slice(0, 10).map((c) => ({
name: c.name,
email: c.email,
score: c.signalScore,
})),
});
}
}
// Find bridge contacts (in multiple domain clusters)
const contactDomains = new Map();
for (const msg of [...sentMessages, ...inboxMessages]) {
const to = getHeader(msg, "To");
const cc = getHeader(msg, "Cc");
const from = getHeader(msg, "From");
const recipients = [to, cc, from]
.filter(Boolean)
.join(",")
.split(",")
.map((r) => extractEmail(r.trim()))
.filter(Boolean);
for (const email of recipients) {
const domain = email.split("@")[1];
if (!domain) continue;
if (!contactDomains.has(email)) {
contactDomains.set(email, new Set());
}
contactDomains.get(email).add(domain);
}
}
for (const [email, domains] of contactDomains) {
const relevantDomains = [...domains].filter((d) =>
graph.clusters.some((c) => c.commonDomain === d)
);
if (relevantDomains.length >= 2) {
const contact = contacts.find((c) => c.email === email);
graph.bridges.push({
contact: contact?.name || email,
email,
connects: relevantDomains.map((d) => d.split(".")[0]),
});
}
}
// Tier contacts by score and status
const sortedContacts = [...contacts]
.filter((c) => c.signalScore >= 0)
.sort((a, b) => b.signalScore - a.signalScore);
graph.innerCircle = sortedContacts.slice(0, 5).map((c) => ({
name: c.name,
email: c.email,
score: c.signalScore,
status: c.status,
}));
graph.dormantImportant = sortedContacts
.filter((c) => c.status === "dormant-important")
.slice(0, 10)
.map((c) => ({
name: c.name,
email: c.email,
score: c.signalScore,
lastContact: c.lastContactDate,
}));
graph.regularContacts = sortedContacts
.filter((c) => c.status === "active")
.slice(5, 25)
.map((c) => ({
name: c.name,
email: c.email,
score: c.signalScore,
}));
graph.occasionalContacts = sortedContacts.slice(25, 50).map((c) => ({
name: c.name,
email: c.email,
score: c.signalScore,
}));
return graph;
}
try {
// Phase 1: Fetch inbox and sent message IDs
console.log("\nPhase 1: Listing message IDs...");
const [inboxIds, sentIds] = await Promise.all([
fetchMessageIds(
`newer_than:${LOOKBACK_DAYS}d -in:sent -category:promotions -category:updates -category:social -category:forums`,
INBOX_MAX_MESSAGES
),
fetchMessageIds(`in:sent newer_than:${LOOKBACK_DAYS}d`, SENT_MAX_MESSAGES),
]);
console.log(` Inbox: ${inboxIds.length} messages`);
console.log(` Sent: ${sentIds.length} messages`);
if (inboxIds.length === 0 && sentIds.length === 0) {
console.error("\n✗ No messages found in the lookback period.");
console.log(JSON.stringify({ success: false, error: "no_messages_found" }));
process.exit(1);
}
// Phase 2: Fetch message details
console.log("\nPhase 2: Fetching message details...");
const [inboxDetails, sentDetails] = await Promise.all([
fetchMessages(inboxIds, "metadata"),
fetchMessages(sentIds, "full"), // Full for body analysis
]);
console.log(` Inbox: ${inboxDetails.length} fetched`);
console.log(` Sent: ${sentDetails.length} fetched`);
// Phase 3: Discovery searches with FULL BODY fetching
console.log("\nPhase 3: Running discovery searches (full body)...");
const discoveryResults = await Promise.all(
DISCOVERY_QUERIES.map(async ({ tier, category, query }) => {
try {
const ids = await fetchMessageIds(query, DISCOVERY_MAX_PER_QUERY);
if (ids.length === 0) {
return { tier, category, query, count: 0, emails: [] };
}
// Fetch FULL bodies for discovery - this enables inference to read content
const messages = await fetchMessages(ids, "full");
return {
tier,
category,
query,
count: messages.length,
emails: messages.map((m) => {
const dateStr = getHeader(m, "Date");
const bodyText = extractBodyText(m.payload);
return {
id: m.id,
threadId: m.threadId,
from: getHeader(m, "From"),
to: getHeader(m, "To"),
subject: getHeader(m, "Subject"),
snippet: m.snippet,
// Include full body for inference to analyze
body: bodyText.slice(0, 5000), // Cap at 5k chars per email
timeAgo: formatTimeAgo(new Date(dateStr)),
date: dateStr,
};
}),
};
} catch (err) {
console.log(` Warning: ${category} search failed: ${err.message}`);
return { tier, category, query, count: 0, emails: [], error: err.message };
}
})
);
const discoveryWithResults = discoveryResults.filter((r) => r.count > 0);
console.log(` Discovery: ${discoveryWithResults.length}/${DISCOVERY_QUERIES.length} categories found`);
// Log discovery hits by tier
const tierCounts = {};
for (const r of discoveryWithResults) {
tierCounts[r.tier] = (tierCounts[r.tier] || 0) + 1;
}
for (const [tier, count] of Object.entries(tierCounts).sort((a, b) => a[0] - b[0])) {
console.log(` Tier ${tier}: ${count} categories`);
}
// Phase 4: Process contacts with enhanced metrics
console.log("\nPhase 4: Analyzing contacts...");
const contactsReceived = new Map();
const contactsSent = new Map();
const labelCounts = {};
const dateVolume = {};
// Process inbox
for (const msg of inboxDetails) {
const from = getHeader(msg, "From");
const date = getHeader(msg, "Date");
const senderEmail = extractEmail(from);
const senderName = extractName(from);
const msgDate = date ? new Date(date) : null;
if (!contactsReceived.has(senderEmail)) {
contactsReceived.set(senderEmail, {
name: senderName,
count: 0,
dates: [],
threadIds: new Set(),
monthlyEmails: new Map(),
});
}
const contact = contactsReceived.get(senderEmail);
contact.count++;
if (msgDate && !isNaN(msgDate.getTime())) {
contact.dates.push(msgDate);
const monthKey = msgDate.toISOString().slice(0, 7);
contact.monthlyEmails.set(monthKey, (contact.monthlyEmails.get(monthKey) || 0) + 1);
}
if (msg.threadId) contact.threadIds.add(msg.threadId);
// Track labels
for (const label of msg.labelIds || []) {
labelCounts[label] = (labelCounts[label] || 0) + 1;
}
// Track volume by date
if (date) {
const d = new Date(date);
if (!isNaN(d.getTime())) {
const dateKey = d.toISOString().split("T")[0];
dateVolume[dateKey] = (dateVolume[dateKey] || 0) + 1;
}
}
}
// Process sent
const writingSamples = [];
for (const msg of sentDetails) {
const to = getHeader(msg, "To");
const cc = getHeader(msg, "Cc");
const date = getHeader(msg, "Date");
const subject = getHeader(msg, "Subject");
const msgDate = date ? new Date(date) : null;
const recipients = [to, cc]
.filter(Boolean)
.join(",")
.split(",")
.map((r) => r.trim())
.filter(Boolean);
for (const recipient of recipients) {
const email = extractEmail(recipient);
const name = extractName(recipient);
if (!contactsSent.has(email)) {
contactsSent.set(email, {
name: name,
count: 0,
dates: [],
threadIds: new Set(),
monthlyEmails: new Map(),
});
}
const contact = contactsSent.get(email);
contact.count++;
if (msgDate && !isNaN(msgDate.getTime())) {
contact.dates.push(msgDate);
const monthKey = msgDate.toISOString().slice(0, 7);
contact.monthlyEmails.set(monthKey, (contact.monthlyEmails.get(monthKey) || 0) + 1);
}
if (msg.threadId) contact.threadIds.add(msg.threadId);
if (name && name !== "Unknown") contact.name = name;
}
// Extract writing samples
const bodyText = extractBodyText(msg.payload);
if (bodyText && bodyText.trim().length >= 50) {
writingSamples.push({
text: bodyText,
metadata: {
id: msg.id,
date: date || new Date().toISOString(),
subject: subject || "",
},
});
}
}
// Merge contacts with enhanced metrics
const allContacts = new Map();
for (const [email, data] of contactsReceived) {
allContacts.set(email, {
email,
name: data.name,
receivedFrom: data.count,
sentTo: 0,
dates: [...data.dates],
threadIds: new Set(data.threadIds),
monthlyEmails: new Map(data.monthlyEmails),
});
}
for (const [email, data] of contactsSent) {
if (!allContacts.has(email)) {
allContacts.set(email, {
email,
name: data.name,
receivedFrom: 0,
sentTo: 0,
dates: [],
threadIds: new Set(),
monthlyEmails: new Map(),
});
}
const contact = allContacts.get(email);
contact.sentTo = data.count;
contact.dates.push(...data.dates);
for (const t of data.threadIds) contact.threadIds.add(t);
for (const [month, count] of data.monthlyEmails) {
contact.monthlyEmails.set(month, (contact.monthlyEmails.get(month) || 0) + count);
}
if (data.name && data.name !== "Unknown") contact.name = data.name;
}
// Build final contacts with scoring
const contacts = [...allContacts.values()]
.map((c) => {
const totalInteractions = c.receivedFrom + c.sentTo;
const bidirectional = c.receivedFrom > 0 && c.sentTo > 0;
const sortedDates = c.dates.sort((a, b) => a.getTime() - b.getTime());
const firstContactDate = sortedDates.length > 0 ? sortedDates[0] : null;
const lastContactDate = sortedDates.length > 0 ? sortedDates[sortedDates.length - 1] : null;
const spanDays =
firstContactDate && lastContactDate
? Math.floor((lastContactDate.getTime() - firstContactDate.getTime()) / (1000 * 60 * 60 * 24))
: 0;
const daysSinceLastContact = lastContactDate
? Math.floor((Date.now() - lastContactDate.getTime()) / (1000 * 60 * 60 * 24))
: 999;
const uniqueThreads = c.threadIds.size;
const avgMessagesPerThread = uniqueThreads > 0 ? totalInteractions / uniqueThreads : totalInteractions;
// Compute consistency metrics
const uniqueMonths = c.monthlyEmails.size;
const monthlyVolumes = [...c.monthlyEmails.values()];
const peakMonthEmails = monthlyVolumes.length > 0 ? Math.max(...monthlyVolumes) : 0;
return {
email: c.email,
name: c.name,
receivedFrom: c.receivedFrom,
sentTo: c.sentTo,
totalInteractions,
bidirectional,
firstContactDate: firstContactDate?.toISOString() || null,
lastContactDate: lastContactDate?.toISOString() || null,
spanDays,
daysSinceLastContact,
uniqueThreads,
avgMessagesPerThread: Math.round(avgMessagesPerThread * 10) / 10,
uniqueMonths,
peakMonthEmails,
};
})
.map((c) => {
const { score, status, confidence, breakdown } = scoreContact(c);
return { ...c, signalScore: score, status, confidence, breakdown };
})
.filter((c) => c.signalScore >= 0)
.sort((a, b) => b.signalScore - a.signalScore || b.totalInteractions - a.totalInteractions);
console.log(` Contacts analyzed: ${contacts.length}`);
console.log(` High-confidence: ${contacts.filter((c) => c.confidence === "high").length}`);
console.log(` Dormant-important: ${contacts.filter((c) => c.status === "dormant-important").length}`);
// Phase 5: Compute behavioral metrics
console.log("\nPhase 5: Computing behavioral metrics...");
const behavioral = computeBehavioralMetrics(sentDetails, inboxDetails);
console.log(` Peak hours: ${behavioral.peakHours.join(", ")}`);
console.log(` Emails/week: ${behavioral.emailsPerWeek}`);
console.log(` Night owl: ${behavioral.nightOwl}`);
// Phase 6: Build relationship graph
console.log("\nPhase 6: Building relationship graph...");
const relationshipGraph = buildRelationshipGraph(contacts, sentDetails, inboxDetails);
console.log(` Clusters: ${relationshipGraph.clusters.length}`);
console.log(` Bridges: ${relationshipGraph.bridges.length}`);
console.log(` Inner circle: ${relationshipGraph.innerCircle.length}`);
console.log(` Dormant-important: ${relationshipGraph.dormantImportant.length}`);
// Ensure output directories exist
for (const outputPath of [analysisOutputPath, relationshipsOutputPath, writingSamplesOutputPath]) {
const dir = path.dirname(outputPath);
if (dir && dir !== ".") fs.mkdirSync(dir, { recursive: true });
}
// Write main analysis data
const analysisData = {
collectedAt: new Date().toISOString(),
period: {
start: formatDate(lookbackDate),
end: formatDate(new Date()),
days: LOOKBACK_DAYS,
},
summary: {
inboxMessages: inboxDetails.length,
sentMessages: sentDetails.length,
uniqueContacts: contacts.length,
bidirectionalContacts: contacts.filter((c) => c.bidirectional).length,
highConfidenceContacts: contacts.filter((c) => c.confidence === "high").length,
discoveryCategories: discoveryWithResults.length,
totalDiscoveryEmails: discoveryWithResults.reduce((sum, r) => sum + r.count, 0),
},
behavioral,
contacts: contacts.slice(0, 100), // Top 100
labels: Object.entries(labelCounts)
.sort((a, b) => b[1] - a[1])
.map(([label, count]) => ({ label, count })),
volumeByDate: Object.entries(dateVolume)
.sort((a, b) => a[0].localeCompare(b[0]))
.map(([date, count]) => ({ date, count })),
// Discovery results with FULL BODY for inference to analyze
discovery: Object.fromEntries(
discoveryWithResults.map((r) => [
r.category,
{ tier: r.tier, query: r.query, count: r.count, emails: r.emails },
])
),
};
const analysisJson = JSON.stringify(analysisData, null, 2);
fs.writeFileSync(analysisOutputPath, analysisJson);
console.log(`\n✓ Analysis written to: ${analysisOutputPath} (${(Buffer.byteLength(analysisJson) / 1024).toFixed(1)}KB)`);
// Write relationship graph
const relationshipsJson = JSON.stringify(relationshipGraph, null, 2);
fs.writeFileSync(relationshipsOutputPath, relationshipsJson);
console.log(`✓ Relationships written to: ${relationshipsOutputPath} (${(Buffer.byteLength(relationshipsJson) / 1024).toFixed(1)}KB)`);
// Write writing samples
const writingSamplesData = {
source: "gmail-detective",
analyzedAt: new Date().toISOString(),
context: {
timePeriod: `${LOOKBACK_DAYS}d`,
sampleCount: writingSamples.length,
minLength: 50,
},
samples: writingSamples,
};
const samplesJson = JSON.stringify(writingSamplesData, null, 2);
fs.writeFileSync(writingSamplesOutputPath, samplesJson);
console.log(`✓ Writing samples written to: ${writingSamplesOutputPath} (${(Buffer.byteLength(samplesJson) / 1024).toFixed(1)}KB)`);
// Final summary
console.log("\n✓ Gmail Detective forensic collection complete");
console.log(` Period: ${analysisData.period.start} - ${analysisData.period.end}`);
console.log(` Messages: ${inboxDetails.length} inbox, ${sentDetails.length} sent`);
console.log(` Contacts: ${contacts.length} (${contacts.filter((c) => c.confidence === "high").length} high-confidence)`);
console.log(` Discovery: ${discoveryWithResults.length} categories with ${analysisData.summary.totalDiscoveryEmails} emails`);
console.log(` Writing samples: ${writingSamples.length}`);
if (contacts.length > 0) {
console.log("\n Top contacts (longevity-first scoring):");
contacts.slice(0, 5).forEach((c) => {
const direction = c.bidirectional ? "↔" : c.receivedFrom > 0 ? "←" : "→";
console.log(` ${direction} ${c.name || c.email}: score=${c.signalScore}, status=${c.status}`);
});
}
console.log(
JSON.stringify({
success: true,
analysisOutputPath,
relationshipsOutputPath,
writingSamplesOutputPath,
inboxCount: inboxDetails.length,
sentCount: sentDetails.length,
contactCount: contacts.length,
writingSampleCount: writingSamples.length,
discoveryCategories: discoveryWithResults.length,
discoveryHits: Object.keys(analysisData.discovery),
})
);
} catch (error) {
console.error("Failed:", error.message);
throw error;
}