👾 Code
Fetch and Aggregate Sent Emails
Fetch sent Gmail messages and aggregate patterns in one step. Outputs aggregated profile data directly without intermediate files for fast profile building.
Source Code
const [timeRange = "30d", maxResults = "500"] = process.argv.slice(2);
const maxResultsNum = parseInt(maxResults);
const BATCH_SIZE = 50; // Smaller batches to avoid timeouts
// Convert time range to Gmail query format
const timeRangeMap = {
"30d": "newer_than:30d",
"90d": "newer_than:90d",
"1y": "newer_than:1y",
"all": ""
};
const timeQuery = timeRangeMap[timeRange] || timeRangeMap["30d"];
const searchQuery = timeQuery ? `in:sent ${timeQuery}` : "in:sent";
console.log(`Fetching sent messages with query: ${searchQuery}`);
console.log(`Maximum results: ${maxResultsNum}`);
try {
// Step 1: Fetch list of sent message IDs
const listRes = await fetch(
`https://gmail.googleapis.com/gmail/v1/users/me/messages?maxResults=${maxResultsNum}&q=${encodeURIComponent(searchQuery)}`,
{
headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" }
}
);
if (!listRes.ok) {
console.error(`Gmail API list failed: ${listRes.status}`);
console.error(await listRes.text());
throw new Error(`Failed to list sent messages: ${listRes.status}`);
}
const listData = await listRes.json();
if (!listData.messages || listData.messages.length === 0) {
console.log("No sent messages found in specified time range");
console.log(JSON.stringify({
error: "No sent messages found",
timeRange: timeRange
}, null, 2));
process.exit(0);
}
console.log(`Found ${listData.messages.length} message(s), fetching in batches of ${BATCH_SIZE}...`);
const messages = [];
const messageIds = listData.messages.map(m => m.id);
// Step 2: Fetch messages in batches with parallel processing
for (let i = 0; i < messageIds.length; i += BATCH_SIZE) {
const batch = messageIds.slice(i, i + BATCH_SIZE);
console.log(`Fetching batch ${Math.floor(i / BATCH_SIZE) + 1} (${batch.length} messages)...`);
const batchResults = await Promise.all(
batch.map(async (msgId) => {
try {
const detailRes = await fetch(
`https://gmail.googleapis.com/gmail/v1/users/me/messages/${msgId}?format=metadata&metadataHeaders=Subject&metadataHeaders=To&metadataHeaders=Cc&metadataHeaders=Date`,
{
headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" }
}
);
if (!detailRes.ok) {
console.error(`Failed to fetch message ${msgId}: ${detailRes.status}`);
return null;
}
const detail = await detailRes.json();
const getHeader = (name) => {
const header = detail.payload.headers.find(h => h.name.toLowerCase() === name.toLowerCase());
return header ? header.value : "";
};
return {
id: detail.id,
threadId: detail.threadId,
subject: getHeader("Subject"),
to: getHeader("To"),
cc: getHeader("Cc"),
snippet: detail.snippet,
date: getHeader("Date"),
internalDate: detail.internalDate,
labelIds: detail.labelIds || []
};
} catch (error) {
console.error(`Error fetching message ${msgId}:`, error.message);
return null;
}
})
);
const validMessages = batchResults.filter(m => m !== null);
messages.push(...validMessages);
console.log(`✓ Batch complete: ${validMessages.length}/${batch.length} messages fetched (total: ${messages.length}/${messageIds.length})`);
// Early stop if we have enough data for pattern analysis (300+ messages)
if (messages.length >= 300 && i + BATCH_SIZE < messageIds.length) {
console.log(`✓ Collected ${messages.length} messages - sufficient for pattern analysis, stopping early`);
break;
}
}
console.log(`✓ Fetched ${messages.length} sent messages successfully`);
// Step 3: Filter cold/marketing emails for personality analysis
const coldEmailPatterns = [
/unsubscribe/i,
/opt.out/i,
/click here/i,
/view in browser/i,
/follow us on/i,
/\[BULK\]/i,
/\[MARKETING\]/i,
/newsletter/i,
/promotional/i
];
const isColdEmail = (email) => {
const text = `${email.subject || ''} ${email.snippet || ''}`.toLowerCase();
return coldEmailPatterns.some(pattern => pattern.test(text));
};
const personalEmails = messages.filter(e => !isColdEmail(e));
const coldEmailCount = messages.length - personalEmails.length;
if (coldEmailCount > 0) {
console.log(`Filtered out ${coldEmailCount} cold/marketing emails (${(coldEmailCount / messages.length * 100).toFixed(1)}%), analyzing ${personalEmails.length} personal emails`);
}
const analysisEmails = personalEmails.length > 0 ? personalEmails : messages;
console.log(`Aggregating ${analysisEmails.length} emails...`);
// Step 4: Aggregate patterns
// Time range analysis
const dates = analysisEmails.map(e => parseInt(e.internalDate)).filter(d => !isNaN(d));
const timeRange = {
start: dates.length > 0 ? new Date(Math.min(...dates)).toISOString() : null,
end: dates.length > 0 ? new Date(Math.max(...dates)).toISOString() : null,
totalCount: messages.length,
personalCount: analysisEmails.length,
coldEmailsFiltered: coldEmailCount
};
// Recipient analysis
const recipientCounts = {};
const domainCounts = {};
let internalCount = 0;
let externalCount = 0;
analysisEmails.forEach(email => {
const recipients = [];
if (email.to) recipients.push(...email.to.split(',').map(r => r.trim()));
if (email.cc) recipients.push(...email.cc.split(',').map(r => r.trim()));
recipients.forEach(recipient => {
const emailMatch = recipient.match(/<([^>]+)>/) || [null, recipient];
const emailAddr = emailMatch[1] || recipient;
recipientCounts[emailAddr] = (recipientCounts[emailAddr] || 0) + 1;
const domain = emailAddr.split('@')[1] || 'unknown';
domainCounts[domain] = (domainCounts[domain] || 0) + 1;
if (domain.includes('company') || domain.includes('corp') || domain.includes('.internal')) {
internalCount++;
} else {
externalCount++;
}
});
});
const topRecipients = Object.entries(recipientCounts)
.sort((a, b) => b[1] - a[1])
.slice(0, 10)
.map(([email, count]) => ({ email, count }));
const topDomains = Object.entries(domainCounts)
.sort((a, b) => b[1] - a[1])
.slice(0, 10)
.map(([domain, count]) => ({ domain, count }));
const recipientStats = {
topRecipients,
topDomains,
uniqueRecipients: Object.keys(recipientCounts).length,
internalVsExternal: {
internal: internalCount,
external: externalCount,
ratio: internalCount > 0 ? (externalCount / internalCount).toFixed(2) : 'N/A'
}
};
// Topic clustering
const subjectWords = {};
const threadCounts = {};
analysisEmails.forEach(email => {
threadCounts[email.threadId] = (threadCounts[email.threadId] || 0) + 1;
const subject = email.subject || '';
const words = subject.toLowerCase()
.replace(/re:|fwd:|fw:/gi, '')
.split(/\s+/)
.filter(w => w.length > 3 && !['the', 'and', 'for', 'with', 'from'].includes(w));
words.forEach(word => {
subjectWords[word] = (subjectWords[word] || 0) + 1;
});
});
const topTopics = Object.entries(subjectWords)
.sort((a, b) => b[1] - a[1])
.slice(0, 15)
.map(([word, count]) => ({ keyword: word, count }));
// Time pattern analysis
let morningCount = 0;
let afternoonCount = 0;
let eveningCount = 0;
let nightCount = 0;
let weekendCount = 0;
analysisEmails.forEach(email => {
const date = new Date(parseInt(email.internalDate));
const hour = date.getHours();
const day = date.getDay();
if (day === 0 || day === 6) weekendCount++;
if (hour >= 6 && hour < 12) morningCount++;
else if (hour >= 12 && hour < 18) afternoonCount++;
else if (hour >= 18 && hour < 22) eveningCount++;
else nightCount++;
});
const timePatterns = {
timeBuckets: {
morning: morningCount,
afternoon: afternoonCount,
evening: eveningCount,
night: nightCount
},
weekendRatio: (weekendCount / analysisEmails.length * 100).toFixed(1) + '%'
};
// Length patterns
const lengths = analysisEmails.map(e => (e.snippet || '').length);
const avgLength = lengths.reduce((a, b) => a + b, 0) / lengths.length;
const lengthBuckets = {
brief: lengths.filter(l => l < 100).length,
standard: lengths.filter(l => l >= 100 && l < 200).length,
detailed: lengths.filter(l => l >= 200 && l < 300).length,
long: lengths.filter(l => l >= 300).length
};
const lengthPatterns = {
avgSnippetLength: Math.round(avgLength),
distribution: lengthBuckets
};
// Phrase analysis
const openers = {};
const commonWords = {};
analysisEmails.forEach(email => {
const snippet = email.snippet || '';
const words = snippet.toLowerCase().split(/\s+/);
if (words.length >= 2) {
const opener = words.slice(0, 2).join(' ');
openers[opener] = (openers[opener] || 0) + 1;
}
words.forEach(word => {
if (word.length > 3) {
commonWords[word] = (commonWords[word] || 0) + 1;
}
});
});
const topOpeners = Object.entries(openers)
.sort((a, b) => b[1] - a[1])
.slice(0, 10)
.map(([phrase, count]) => ({ phrase, count }));
const topWords = Object.entries(commonWords)
.sort((a, b) => b[1] - a[1])
.slice(0, 20)
.map(([word, count]) => ({ word, count }));
const phraseAnalysis = {
topOpeners,
topWords
};
// Communication stats
const threadDepths = Object.values(threadCounts);
const avgThreadDepth = threadDepths.reduce((a, b) => a + b, 0) / threadDepths.length;
const communicationStats = {
totalThreads: Object.keys(threadCounts).length,
avgThreadDepth: avgThreadDepth.toFixed(1),
singleEmailThreads: threadDepths.filter(d => d === 1).length,
multiEmailThreads: threadDepths.filter(d => d > 1).length
};
// Project signals
const projectKeywords = ['project', 'meeting', 'deadline', 'review', 'deliverable', 'presentation', 'report'];
const projectEmails = analysisEmails.filter(e =>
projectKeywords.some(kw => (e.subject || '').toLowerCase().includes(kw) || (e.snippet || '').toLowerCase().includes(kw))
);
const projectSignals = {
projectRelatedCount: projectEmails.length,
projectRatio: (projectEmails.length / analysisEmails.length * 100).toFixed(1) + '%',
keywordMatches: projectKeywords.map(kw => ({
keyword: kw,
count: analysisEmails.filter(e => (e.subject || '').toLowerCase().includes(kw) || (e.snippet || '').toLowerCase().includes(kw)).length
})).filter(m => m.count > 0)
};
// Step 5: Output aggregated data
const aggregates = {
timeRange,
recipientStats,
topics: topTopics,
timePatterns,
lengthPatterns,
phraseAnalysis,
communicationStats,
projectSignals
};
console.log(`✓ Aggregation complete`);
console.log(JSON.stringify(aggregates, null, 2));
} catch (error) {
console.error("Error in fetch and aggregate:", error.message);
throw error;
}