👾 Code

Fetch and Aggregate Sent Emails

Fetch sent Gmail messages and aggregate patterns in one step. Outputs aggregated profile data directly without intermediate files for fast profile building.

Source Code


              const [timeRange = "30d", maxResults = "500"] = process.argv.slice(2);
const maxResultsNum = parseInt(maxResults);
const BATCH_SIZE = 50; // Smaller batches to avoid timeouts

// Convert time range to Gmail query format
const timeRangeMap = {
  "30d": "newer_than:30d",
  "90d": "newer_than:90d",
  "1y": "newer_than:1y",
  "all": ""
};

const timeQuery = timeRangeMap[timeRange] || timeRangeMap["30d"];
const searchQuery = timeQuery ? `in:sent ${timeQuery}` : "in:sent";

console.log(`Fetching sent messages with query: ${searchQuery}`);
console.log(`Maximum results: ${maxResultsNum}`);

try {
  // Step 1: Fetch list of sent message IDs
  const listRes = await fetch(
    `https://gmail.googleapis.com/gmail/v1/users/me/messages?maxResults=${maxResultsNum}&q=${encodeURIComponent(searchQuery)}`,
    {
      headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" }
    }
  );

  if (!listRes.ok) {
    console.error(`Gmail API list failed: ${listRes.status}`);
    console.error(await listRes.text());
    throw new Error(`Failed to list sent messages: ${listRes.status}`);
  }

  const listData = await listRes.json();

  if (!listData.messages || listData.messages.length === 0) {
    console.log("No sent messages found in specified time range");
    console.log(JSON.stringify({
      error: "No sent messages found",
      timeRange: timeRange
    }, null, 2));
    process.exit(0);
  }

  console.log(`Found ${listData.messages.length} message(s), fetching in batches of ${BATCH_SIZE}...`);

  const messages = [];
  const messageIds = listData.messages.map(m => m.id);

  // Step 2: Fetch messages in batches with parallel processing
  for (let i = 0; i < messageIds.length; i += BATCH_SIZE) {
    const batch = messageIds.slice(i, i + BATCH_SIZE);
    console.log(`Fetching batch ${Math.floor(i / BATCH_SIZE) + 1} (${batch.length} messages)...`);

    const batchResults = await Promise.all(
      batch.map(async (msgId) => {
        try {
          const detailRes = await fetch(
            `https://gmail.googleapis.com/gmail/v1/users/me/messages/${msgId}?format=metadata&metadataHeaders=Subject&metadataHeaders=To&metadataHeaders=Cc&metadataHeaders=Date`,
            {
              headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" }
            }
          );

          if (!detailRes.ok) {
            console.error(`Failed to fetch message ${msgId}: ${detailRes.status}`);
            return null;
          }

          const detail = await detailRes.json();

          const getHeader = (name) => {
            const header = detail.payload.headers.find(h => h.name.toLowerCase() === name.toLowerCase());
            return header ? header.value : "";
          };

          return {
            id: detail.id,
            threadId: detail.threadId,
            subject: getHeader("Subject"),
            to: getHeader("To"),
            cc: getHeader("Cc"),
            snippet: detail.snippet,
            date: getHeader("Date"),
            internalDate: detail.internalDate,
            labelIds: detail.labelIds || []
          };
        } catch (error) {
          console.error(`Error fetching message ${msgId}:`, error.message);
          return null;
        }
      })
    );

    const validMessages = batchResults.filter(m => m !== null);
    messages.push(...validMessages);

    console.log(`✓ Batch complete: ${validMessages.length}/${batch.length} messages fetched (total: ${messages.length}/${messageIds.length})`);

    // Early stop if we have enough data for pattern analysis (300+ messages)
    if (messages.length >= 300 && i + BATCH_SIZE < messageIds.length) {
      console.log(`✓ Collected ${messages.length} messages - sufficient for pattern analysis, stopping early`);
      break;
    }
  }

  console.log(`✓ Fetched ${messages.length} sent messages successfully`);

  // Step 3: Filter cold/marketing emails for personality analysis
  const coldEmailPatterns = [
    /unsubscribe/i,
    /opt.out/i,
    /click here/i,
    /view in browser/i,
    /follow us on/i,
    /\[BULK\]/i,
    /\[MARKETING\]/i,
    /newsletter/i,
    /promotional/i
  ];

  const isColdEmail = (email) => {
    const text = `${email.subject || ''} ${email.snippet || ''}`.toLowerCase();
    return coldEmailPatterns.some(pattern => pattern.test(text));
  };

  const personalEmails = messages.filter(e => !isColdEmail(e));
  const coldEmailCount = messages.length - personalEmails.length;

  if (coldEmailCount > 0) {
    console.log(`Filtered out ${coldEmailCount} cold/marketing emails (${(coldEmailCount / messages.length * 100).toFixed(1)}%), analyzing ${personalEmails.length} personal emails`);
  }

  const analysisEmails = personalEmails.length > 0 ? personalEmails : messages;

  console.log(`Aggregating ${analysisEmails.length} emails...`);

  // Step 4: Aggregate patterns

  // Time range analysis
  const dates = analysisEmails.map(e => parseInt(e.internalDate)).filter(d => !isNaN(d));
  const timeRange = {
    start: dates.length > 0 ? new Date(Math.min(...dates)).toISOString() : null,
    end: dates.length > 0 ? new Date(Math.max(...dates)).toISOString() : null,
    totalCount: messages.length,
    personalCount: analysisEmails.length,
    coldEmailsFiltered: coldEmailCount
  };

  // Recipient analysis
  const recipientCounts = {};
  const domainCounts = {};
  let internalCount = 0;
  let externalCount = 0;

  analysisEmails.forEach(email => {
    const recipients = [];
    if (email.to) recipients.push(...email.to.split(',').map(r => r.trim()));
    if (email.cc) recipients.push(...email.cc.split(',').map(r => r.trim()));

    recipients.forEach(recipient => {
      const emailMatch = recipient.match(/<([^>]+)>/) || [null, recipient];
      const emailAddr = emailMatch[1] || recipient;

      recipientCounts[emailAddr] = (recipientCounts[emailAddr] || 0) + 1;

      const domain = emailAddr.split('@')[1] || 'unknown';
      domainCounts[domain] = (domainCounts[domain] || 0) + 1;

      if (domain.includes('company') || domain.includes('corp') || domain.includes('.internal')) {
        internalCount++;
      } else {
        externalCount++;
      }
    });
  });

  const topRecipients = Object.entries(recipientCounts)
    .sort((a, b) => b[1] - a[1])
    .slice(0, 10)
    .map(([email, count]) => ({ email, count }));

  const topDomains = Object.entries(domainCounts)
    .sort((a, b) => b[1] - a[1])
    .slice(0, 10)
    .map(([domain, count]) => ({ domain, count }));

  const recipientStats = {
    topRecipients,
    topDomains,
    uniqueRecipients: Object.keys(recipientCounts).length,
    internalVsExternal: {
      internal: internalCount,
      external: externalCount,
      ratio: internalCount > 0 ? (externalCount / internalCount).toFixed(2) : 'N/A'
    }
  };

  // Topic clustering
  const subjectWords = {};
  const threadCounts = {};

  analysisEmails.forEach(email => {
    threadCounts[email.threadId] = (threadCounts[email.threadId] || 0) + 1;

    const subject = email.subject || '';
    const words = subject.toLowerCase()
      .replace(/re:|fwd:|fw:/gi, '')
      .split(/\s+/)
      .filter(w => w.length > 3 && !['the', 'and', 'for', 'with', 'from'].includes(w));

    words.forEach(word => {
      subjectWords[word] = (subjectWords[word] || 0) + 1;
    });
  });

  const topTopics = Object.entries(subjectWords)
    .sort((a, b) => b[1] - a[1])
    .slice(0, 15)
    .map(([word, count]) => ({ keyword: word, count }));

  // Time pattern analysis
  let morningCount = 0;
  let afternoonCount = 0;
  let eveningCount = 0;
  let nightCount = 0;
  let weekendCount = 0;

  analysisEmails.forEach(email => {
    const date = new Date(parseInt(email.internalDate));
    const hour = date.getHours();
    const day = date.getDay();

    if (day === 0 || day === 6) weekendCount++;

    if (hour >= 6 && hour < 12) morningCount++;
    else if (hour >= 12 && hour < 18) afternoonCount++;
    else if (hour >= 18 && hour < 22) eveningCount++;
    else nightCount++;
  });

  const timePatterns = {
    timeBuckets: {
      morning: morningCount,
      afternoon: afternoonCount,
      evening: eveningCount,
      night: nightCount
    },
    weekendRatio: (weekendCount / analysisEmails.length * 100).toFixed(1) + '%'
  };

  // Length patterns
  const lengths = analysisEmails.map(e => (e.snippet || '').length);
  const avgLength = lengths.reduce((a, b) => a + b, 0) / lengths.length;

  const lengthBuckets = {
    brief: lengths.filter(l => l < 100).length,
    standard: lengths.filter(l => l >= 100 && l < 200).length,
    detailed: lengths.filter(l => l >= 200 && l < 300).length,
    long: lengths.filter(l => l >= 300).length
  };

  const lengthPatterns = {
    avgSnippetLength: Math.round(avgLength),
    distribution: lengthBuckets
  };

  // Phrase analysis
  const openers = {};
  const commonWords = {};

  analysisEmails.forEach(email => {
    const snippet = email.snippet || '';
    const words = snippet.toLowerCase().split(/\s+/);

    if (words.length >= 2) {
      const opener = words.slice(0, 2).join(' ');
      openers[opener] = (openers[opener] || 0) + 1;
    }

    words.forEach(word => {
      if (word.length > 3) {
        commonWords[word] = (commonWords[word] || 0) + 1;
      }
    });
  });

  const topOpeners = Object.entries(openers)
    .sort((a, b) => b[1] - a[1])
    .slice(0, 10)
    .map(([phrase, count]) => ({ phrase, count }));

  const topWords = Object.entries(commonWords)
    .sort((a, b) => b[1] - a[1])
    .slice(0, 20)
    .map(([word, count]) => ({ word, count }));

  const phraseAnalysis = {
    topOpeners,
    topWords
  };

  // Communication stats
  const threadDepths = Object.values(threadCounts);
  const avgThreadDepth = threadDepths.reduce((a, b) => a + b, 0) / threadDepths.length;

  const communicationStats = {
    totalThreads: Object.keys(threadCounts).length,
    avgThreadDepth: avgThreadDepth.toFixed(1),
    singleEmailThreads: threadDepths.filter(d => d === 1).length,
    multiEmailThreads: threadDepths.filter(d => d > 1).length
  };

  // Project signals
  const projectKeywords = ['project', 'meeting', 'deadline', 'review', 'deliverable', 'presentation', 'report'];
  const projectEmails = analysisEmails.filter(e =>
    projectKeywords.some(kw => (e.subject || '').toLowerCase().includes(kw) || (e.snippet || '').toLowerCase().includes(kw))
  );

  const projectSignals = {
    projectRelatedCount: projectEmails.length,
    projectRatio: (projectEmails.length / analysisEmails.length * 100).toFixed(1) + '%',
    keywordMatches: projectKeywords.map(kw => ({
      keyword: kw,
      count: analysisEmails.filter(e => (e.subject || '').toLowerCase().includes(kw) || (e.snippet || '').toLowerCase().includes(kw)).length
    })).filter(m => m.count > 0)
  };

  // Step 5: Output aggregated data
  const aggregates = {
    timeRange,
    recipientStats,
    topics: topTopics,
    timePatterns,
    lengthPatterns,
    phraseAnalysis,
    communicationStats,
    projectSignals
  };

  console.log(`✓ Aggregation complete`);
  console.log(JSON.stringify(aggregates, null, 2));

} catch (error) {
  console.error("Error in fetch and aggregate:", error.message);
  throw error;
}