code icon Code

Fetch arXiv Paper

Fetch paper metadata and PDF content from arXiv given a URL or paper ID

Source Code

import { arXiv } from "arxiv-api";
import fs from "fs";
import path from "path";

const extractPaperId = (input) => {
  // Handle full URLs like https://arxiv.org/abs/2401.12345 or arxiv.org/pdf/2401.12345
  const urlMatch = input.match(/arxiv\.org\/(?:abs|pdf)\/(\d+\.\d+)/);
  if (urlMatch) return urlMatch[1];

  // Handle direct IDs like 2401.12345
  const idMatch = input.match(/^(\d+\.\d+)/);
  if (idMatch) return idMatch[1];

  return input;
};

const [paper_id] = process.argv.slice(2);

if (!paper_id) {
  console.error("Error: paper_id is required");
  process.exit(1);
}

const paperId = extractPaperId(paper_id);
console.log(`Fetching paper: ${paperId}`);

try {
  const papers = await arXiv.search({
    searchQueryParams: [{ include: [{ name: paperId }] }],
    start: 0,
    maxResults: 1,
  });

  if (!papers || papers.length === 0) {
    console.error(`No paper found with ID: ${paperId}`);
    process.exit(1);
  }

  const paper = papers[0];

  const result = {
    id: paperId,
    title: paper.title,
    authors: paper.authors.map((a) => a.name),
    abstract: paper.summary,
    published: paper.published,
    updated: paper.updated,
    categories: paper.categories,
    pdfUrl: paper.pdf,
    arxivUrl: `https://arxiv.org/abs/${paperId}`,
  };

  // Write to session file
  const sessionDir = "./session";
  if (!fs.existsSync(sessionDir)) {
    fs.mkdirSync(sessionDir, { recursive: true });
  }

  const outputPath = path.join(sessionDir, "paper-content.json");
  fs.writeFileSync(outputPath, JSON.stringify(result, null, 2));

  console.log(`Paper: "${result.title}"`);
  console.log(`Authors: ${result.authors.join(", ")}`);
  console.log(`Categories: ${result.categories.join(", ")}`);
  console.log(`Wrote paper metadata to ${outputPath}`);
} catch (error) {
  console.error(`Failed to fetch paper: ${error.message}`);
  process.exit(1);
}