code icon Code

Fetch Gmail Messages

Fetch emails using parallel requests for speed and reliability.

Source Code

import fs from "fs";
import path from "path";

const [query = "", maxResults = "50", outputPath = "session/emails.json"] =
  process.argv.slice(2);
const maxResultsNum = Math.min(parseInt(maxResults) || 50, 500);

console.log(
  `Fetching up to ${maxResultsNum} messages${
    query ? ` matching: ${query}` : ""
  }`
);

/**
 * Fetch message details with parallel requests.
 */
async function fetchMessages(messageIds) {
  const CONCURRENCY = 25;
  const results = [];

  console.log(`Fetching ${messageIds.length} messages...`);

  for (let i = 0; i < messageIds.length; i += CONCURRENCY) {
    const batch = messageIds.slice(i, i + CONCURRENCY);
    const fetched = await Promise.all(
      batch.map(async (id) => {
        const url =
          `https://gmail.googleapis.com/gmail/v1/users/me/messages/${id}?format=metadata` +
          "&metadataHeaders=Subject&metadataHeaders=From&metadataHeaders=To&metadataHeaders=Date&metadataHeaders=Cc";
        const res = await fetch(url, {
          headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" },
        });
        if (!res.ok) return null;
        return res.json();
      })
    );
    results.push(...fetched.filter(Boolean));

    console.log(
      `  Fetched ${Math.min(i + CONCURRENCY, messageIds.length)}/${
        messageIds.length
      }...`
    );
  }

  return results;
}

try {
  const messages = [];
  let pageToken = null;
  let allMessageIds = [];

  // Phase 1: List all message IDs (fast, just IDs)
  console.log("Phase 1: Listing message IDs...");
  while (allMessageIds.length < maxResultsNum) {
    const remaining = maxResultsNum - allMessageIds.length;
    const pageSize = Math.min(remaining, 100);

    const url = new URL(
      "https://gmail.googleapis.com/gmail/v1/users/me/messages"
    );
    url.searchParams.set("maxResults", pageSize.toString());
    if (query) url.searchParams.set("q", query);
    if (pageToken) url.searchParams.set("pageToken", pageToken);

    const listRes = await fetch(url.toString(), {
      headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" },
    });

    if (!listRes.ok) {
      const errorText = await listRes.text();
      console.error(`Gmail API error: ${listRes.status}`);
      console.error(errorText);
      throw new Error(`Gmail API failed: ${listRes.status}`);
    }

    const listData = await listRes.json();

    if (!listData.messages || listData.messages.length === 0) {
      console.log("No more messages found");
      break;
    }

    const newIds = listData.messages.map((m) => m.id).slice(0, remaining);
    allMessageIds.push(...newIds);
    console.log(`  Found ${allMessageIds.length} messages so far...`);

    pageToken = listData.nextPageToken;
    if (!pageToken) break;
  }

  if (allMessageIds.length === 0) {
    console.log("No messages found matching query");
    console.log(JSON.stringify({ count: 0, messages: [] }));
    process.exit(0);
  }

  // Phase 2: Fetch all message details
  console.log(
    `\nPhase 2: Fetching details for ${allMessageIds.length} messages...`
  );
  const details = await fetchMessages(allMessageIds);

  // Transform to our format
  for (const detail of details) {
    const getHeader = (name) => {
      const header = detail.payload?.headers?.find(
        (h) => h.name.toLowerCase() === name.toLowerCase()
      );
      return header ? header.value : "";
    };

    messages.push({
      id: detail.id,
      threadId: detail.threadId,
      subject: getHeader("Subject"),
      from: getHeader("From"),
      to: getHeader("To"),
      cc: getHeader("Cc"),
      date: getHeader("Date"),
      snippet: detail.snippet,
      labelIds: detail.labelIds || [],
      sizeEstimate: detail.sizeEstimate,
    });
  }

  // Compute summary stats
  const senderCounts = {};
  const dates = [];
  for (const msg of messages) {
    const sender = extractEmail(msg.from);
    senderCounts[sender] = (senderCounts[sender] || 0) + 1;
    if (msg.date) {
      const parsed = new Date(msg.date);
      if (!isNaN(parsed.getTime())) dates.push(parsed);
    }
  }

  const topSenders = Object.entries(senderCounts)
    .sort((a, b) => b[1] - a[1])
    .slice(0, 5)
    .map(([email, count]) => ({ email, count }));

  dates.sort((a, b) => a - b);
  const dateRange =
    dates.length > 0
      ? {
          oldest: dates[0].toISOString(),
          newest: dates[dates.length - 1].toISOString(),
        }
      : null;

  // Ensure output directory exists
  const dir = path.dirname(outputPath);
  if (dir && dir !== ".") {
    fs.mkdirSync(dir, { recursive: true });
  }

  // Write full results to file
  const output = {
    query: query || null,
    fetchedAt: new Date().toISOString(),
    count: messages.length,
    dateRange,
    messages,
  };

  fs.writeFileSync(outputPath, JSON.stringify(output, null, 2));

  // Log summary for agent context
  console.log(`\nāœ“ Fetched ${messages.length} messages`);
  console.log(`  Written to: ${outputPath}`);
  if (dateRange) {
    console.log(
      `  Date range: ${dateRange.oldest.split("T")[0]} to ${
        dateRange.newest.split("T")[0]
      }`
    );
  }
  console.log(`  Top senders:`);
  for (const s of topSenders) {
    console.log(`    - ${s.email}: ${s.count} messages`);
  }

  // Return minimal summary object
  console.log(
    JSON.stringify({
      success: true,
      outputPath,
      count: messages.length,
      dateRange,
      topSenders,
    })
  );
} catch (error) {
  console.error("Error fetching messages:", error.message);
  throw error;
}

function extractEmail(fromHeader) {
  if (!fromHeader) return "unknown";
  const match = fromHeader.match(/<([^>]+)>/);
  return match ? match[1].toLowerCase() : fromHeader.toLowerCase().trim();
}