Fetch arXiv Paper
Fetch paper metadata and PDF content from arXiv given a URL or paper ID
Source Code
import { arXiv } from "arxiv-api";
import fs from "fs";
import path from "path";
const extractPaperId = (input) => {
// Handle full URLs like https://arxiv.org/abs/2401.12345 or arxiv.org/pdf/2401.12345
const urlMatch = input.match(/arxiv\.org\/(?:abs|pdf)\/(\d+\.\d+)/);
if (urlMatch) return urlMatch[1];
// Handle direct IDs like 2401.12345
const idMatch = input.match(/^(\d+\.\d+)/);
if (idMatch) return idMatch[1];
return input;
};
const [paper_id] = process.argv.slice(2);
if (!paper_id) {
console.error("Error: paper_id is required");
process.exit(1);
}
const paperId = extractPaperId(paper_id);
console.log(`Fetching paper: ${paperId}`);
try {
const papers = await arXiv.search({
searchQueryParams: [{ include: [{ name: paperId }] }],
start: 0,
maxResults: 1,
});
if (!papers || papers.length === 0) {
console.error(`No paper found with ID: ${paperId}`);
process.exit(1);
}
const paper = papers[0];
const result = {
id: paperId,
title: paper.title,
authors: paper.authors.map((a) => a.name),
abstract: paper.summary,
published: paper.published,
updated: paper.updated,
categories: paper.categories,
pdfUrl: paper.pdf,
arxivUrl: `https://arxiv.org/abs/${paperId}`,
};
// Write to session file
const sessionDir = "./session";
if (!fs.existsSync(sessionDir)) {
fs.mkdirSync(sessionDir, { recursive: true });
}
const outputPath = path.join(sessionDir, "paper-content.json");
fs.writeFileSync(outputPath, JSON.stringify(result, null, 2));
console.log(`Paper: "${result.title}"`);
console.log(`Authors: ${result.authors.join(", ")}`);
console.log(`Categories: ${result.categories.join(", ")}`);
console.log(`Wrote paper metadata to ${outputPath}`);
} catch (error) {
console.error(`Failed to fetch paper: ${error.message}`);
process.exit(1);
}