The Contents API enables you to extract clean, structured content from web pages with optional AI-powered processing, including summarization and structured data extraction.

Basic Usage

import { Valyu } from "valyu-js";

const valyu = new Valyu();

const response = await valyu.contents([
  "https://en.wikipedia.org/wiki/Machine_learning"
]);

console.log(`Processed ${response.urls_processed} of ${response.urls_requested} URLs`);
response.results?.forEach(result => {
  console.log(`Title: ${result.title}`);
  console.log(`Content length: ${result.length} characters`);
  console.log(`Content preview: ${result.content.substring(0, 200)}...`);
});

Parameters

URLs (Required)

ParameterTypeDescription
urlsstring[]Array of URLs to process (maximum 10 URLs)

Options (Optional)

ParameterTypeDescriptionDefault
summaryboolean | string | objectAI processing configuration: false (none), true (auto), string (custom), or JSON schemafalse
extractEffort"normal" | "high" | "auto"Processing effort level for content extraction”normal”
responseLengthstring | numberContent length per URL: "short" (25k), "medium" (50k), "large" (100k), "max", or custom”short”

Response Format

interface ContentsResponse {
  success: boolean;
  error?: string | null;
  tx_id?: string;
  urls_requested?: number;
  urls_processed?: number;
  urls_failed?: number;
  results?: ContentResult[];
  total_cost_dollars?: number;
  total_characters?: number;
}

interface ContentResult {
  url: string;
  title: string;
  content: string | object; // string for raw content, object for structured
  length: number;
  source: string;
  summary_success?: boolean;
  data_type?: string;
  image_url?: Record<string, string>;
  citation?: string;
}

Parameter Examples

Basic Content Extraction

Extract clean content without AI processing:
const response = await valyu.contents([
  "https://www.python.org",
  "https://nodejs.org"
]);

response.results?.forEach(result => {
  console.log(`${result.title}: ${result.length} characters`);
});

AI Summary (Boolean)

Get automatic AI summaries of the extracted content:
const response = await valyu.contents([
  "https://en.wikipedia.org/wiki/Artificial_intelligence"
], {
  summary: true,
  responseLength: "medium"
});

if (response.results?.[0]?.content) {
  console.log("AI Summary:", response.results[0].content);
}

Custom Summary Instructions

Provide specific instructions for AI summarization:
const response = await valyu.contents([
  "https://techcrunch.com/latest-ai-news"
], {
  summary: "Summarize the main AI trends mentioned in exactly 3 bullet points",
  responseLength: "medium",
  extractEffort: "high"
});

Structured Data Extraction

Extract specific data points using JSON schema:
const response = await valyu.contents([
  "https://www.openai.com"
], {
  extractEffort: "high",
  responseLength: "large",
  summary: {
    type: "object",
    properties: {
      company_name: { 
        type: "string",
        description: "The name of the company"
      },
      industry: { 
        type: "string",
        enum: ["tech", "finance", "healthcare", "retail", "other"],
        description: "Primary industry sector"
      },
      key_products: {
        type: "array",
        items: { type: "string" },
        maxItems: 5,
        description: "Main products or services"
      },
      founded_year: {
        type: "number",
        description: "Year the company was founded"
      }
    },
    required: ["company_name", "industry"]
  },

});

if (response.results?.[0]?.content) {
  console.log("Extracted data:", response.results[0].content);
}

Response Length Control

Control the amount of content extracted per URL:
const response = await valyu.contents([
  "https://arxiv.org/abs/2301.00001",
  "https://www.nature.com/articles/nature12373",
  "https://www.science.org/doi/10.1126/science.1234567"
], {
  responseLength: "large", // More content for academic papers
  summary: "Extract the main research findings and methodology",
  extractEffort: "high"
});

Extract Effort Levels

Control the extraction quality and processing intensity:
// Normal (default) - Fast
const normalResponse = await valyu.contents(urls, {
  extractEffort: "normal"
});

// High - Enhanced quality for complex layouts and JS heavy pages
const highQualityResponse = await valyu.contents(urls, {
  extractEffort: "high"
});

// Auto - Intelligent effort selection
const autoResponse = await valyu.contents(urls, {
  extractEffort: "auto"
});

Response Length Options

Control content length with predefined or custom limits:
// Predefined lengths
const shortResponse = await valyu.contents(urls, {
  responseLength: "short" // 25k characters
});

const mediumResponse = await valyu.contents(urls, {
  responseLength: "medium" // 50k characters  
});

const largeResponse = await valyu.contents(urls, {
  responseLength: "large" // 100k characters
});

const fullResponse = await valyu.contents(urls, {
  responseLength: "max" // No limit
});

// Custom length
const customResponse = await valyu.contents(urls, {
  responseLength: 15000 // Custom character limit
});

Use Case Examples

Research Paper Analysis

Build an AI-powered academic research assistant that extracts and analyzes research papers:
async function analyzeResearchPaper(paperUrl: string) {
  const response = await valyu.contents([paperUrl], {
    summary: {
      type: "object",
      properties: {
        title: { type: "string" },
        authors: { 
          type: "array", 
          items: { type: "string" } 
        },
        abstract: { type: "string" },
        key_contributions: {
          type: "array",
          items: { type: "string" },
          maxItems: 5,
          description: "Main contributions of the research"
        },
        methodology: { 
          type: "string",
          description: "Research methodology and approach"
        },
        results_summary: { 
          type: "string",
          description: "Summary of key findings and results"
        },
        implications: {
          type: "string",
          description: "Broader implications and significance"
        },
        citations_count: { type: "number" },
        publication_date: { type: "string" }
      },
      required: ["title", "abstract", "key_contributions", "methodology"]
    },
    responseLength: "max",
    extractEffort: "high"
  });

  if (response.success && response.results?.[0]?.content) {
    const analysis = response.results[0].content as any;
    
    console.log("=== Research Paper Analysis ===");
    console.log(`Title: ${analysis.title}`);
    console.log(`Authors: ${analysis.authors?.join(", ")}`);
    console.log(`\nAbstract: ${analysis.abstract}`);
    
    console.log("\nKey Contributions:");
    analysis.key_contributions?.forEach((contrib: string, i: number) => {
      console.log(`${i + 1}. ${contrib}`);
    });
    
    console.log(`\nMethodology: ${analysis.methodology}`);
    console.log(`\nResults: ${analysis.results_summary}`);
    console.log(`\nImplications: ${analysis.implications}`);
    
    return analysis;
  }
  
  return null;
}

// Usage
const paperAnalysis = await analyzeResearchPaper(
  "https://arxiv.org/abs/2024.01234"
);

E-commerce Product Intelligence

Create a product research tool that extracts comprehensive product data:
async function analyzeProducts(productUrls: string[]) {
  const response = await valyu.contents(productUrls, {
    summary: {
      type: "object",
      properties: {
        products: {
          type: "array",
          items: {
            type: "object",
            properties: {
              product_name: { type: "string" },
              brand: { type: "string" },
              price: { type: "string" },
              original_price: { type: "string" },
              discount_percentage: { type: "string" },
              description: { type: "string" },
              key_features: {
                type: "array",
                items: { type: "string" },
                maxItems: 8
              },
              specifications: {
                type: "object",
                description: "Technical specifications"
              },
              customer_rating: { type: "number" },
              review_count: { type: "number" },
              availability: { 
                type: "string",
                enum: ["in_stock", "out_of_stock", "limited", "pre_order"]
              },
              shipping_info: { type: "string" },
              warranty_info: { type: "string" }
            },
            required: ["product_name", "price", "description"]
          }
        },
        comparison_summary: {
          type: "string",
          description: "Overall comparison of the products"
        }
      }
    },
    extractEffort: "high",
    responseLength: "large"
  });

  if (response.success && response.results?.[0]?.content) {
    const analysis = response.results[0].content as any;
    
    console.log("=== Product Analysis ===");
    analysis.products?.forEach((product: any, i: number) => {
      console.log(`\n${i + 1}. ${product.product_name}`);
      console.log(`   Brand: ${product.brand}`);
      console.log(`   Price: ${product.price}`);
      console.log(`   Rating: ${product.customer_rating}/5 (${product.review_count} reviews)`);
      console.log(`   Availability: ${product.availability}`);
      
      if (product.key_features?.length > 0) {
        console.log("   Key Features:");
        product.key_features.forEach((feature: string) => {
          console.log(`     • ${feature}`);
        });
      }
    });
    
    console.log(`\n=== Comparison Summary ===`);
    console.log(analysis.comparison_summary);
    
    return analysis;
  }
  
  return null;
}

// Usage
const productComparison = await analyzeProducts([
  "https://amazon.com/product1",
  "https://bestbuy.com/product2",
  "https://target.com/product3"
]);

Technical Documentation Processor

Build a documentation analysis tool that extracts API information and technical details:
async function processDocumentation(docUrls: string[]) {
  const response = await valyu.contents(docUrls, {
    summary: {
      type: "object",
      properties: {
        documentation_overview: {
          type: "string",
          description: "Overview of what the documentation covers"
        },
        api_endpoints: {
          type: "array",
          items: {
            type: "object", 
            properties: {
              method: { type: "string" },
              path: { type: "string" },
              description: { type: "string" },
              parameters: {
                type: "array",
                items: {
                  type: "object",
                  properties: {
                    name: { type: "string" },
                    type: { type: "string" },
                    required: { type: "boolean" },
                    description: { type: "string" }
                  }
                }
              },
              response_format: { type: "string" }
            }
          }
        },
        authentication: {
          type: "object",
          properties: {
            method: { type: "string" },
            description: { type: "string" },
            example: { type: "string" }
          }
        },
        rate_limits: { type: "string" },
        code_examples: {
          type: "array",
          items: {
            type: "object",
            properties: {
              language: { type: "string" },
              example: { type: "string" },
              description: { type: "string" }
            }
          }
        },
        common_errors: {
          type: "array",
          items: { type: "string" }
        }
      },
      required: ["documentation_overview", "api_endpoints", "authentication"]
    },
    extractEffort: "high",
    responseLength: "large"
  });

  if (response.success && response.results?.[0]?.content) {
    const docs = response.results[0].content as any;
    
    console.log("=== API Documentation Analysis ===");
    console.log(`\nOverview: ${docs.documentation_overview}`);
    
    console.log("\n=== Authentication ===");
    console.log(`Method: ${docs.authentication?.method}`);
    console.log(`Description: ${docs.authentication?.description}`);
    
    console.log("\n=== API Endpoints ===");
    docs.api_endpoints?.forEach((endpoint: any, i: number) => {
      console.log(`\n${i + 1}. ${endpoint.method} ${endpoint.path}`);
      console.log(`   Description: ${endpoint.description}`);
      
      if (endpoint.parameters?.length > 0) {
        console.log("   Parameters:");
        endpoint.parameters.forEach((param: any) => {
          const required = param.required ? "(required)" : "(optional)";
          console.log(`     • ${param.name} (${param.type}) ${required}: ${param.description}`);
        });
      }
    });
    
    if (docs.rate_limits) {
      console.log(`\n=== Rate Limits ===`);
      console.log(docs.rate_limits);
    }
    
    return docs;
  }
  
  return null;
}

// Usage
const apiDocs = await processDocumentation([
  "https://docs.example.com/api-reference",
  "https://developers.service.com/guide"
]);

Error Handling

const response = await valyu.contents(urls, options);

if (!response.success) {
  console.error("Contents extraction failed:", response.error);
  return;
}

// Check for partial failures
if (response.urls_failed && response.urls_failed > 0) {
  console.warn(`${response.urls_failed} of ${response.urls_requested} URLs failed`);
}

// Process successful results
response.results?.forEach((result, index) => {
  console.log(`Result ${index + 1}:`);
  console.log(`  Title: ${result.title}`);
  console.log(`  URL: ${result.url}`);
  console.log(`  Length: ${result.length} characters`);
  
  if (result.summary_success) {
    console.log(`  Summary: ${result.content}`);
  }
});