Basic Usage
Copy
from valyu import Valyu
valyu = Valyu()
response = valyu.contents([
"https://en.wikipedia.org/wiki/Machine_learning"
])
print(f"Processed {response.urls_processed} of {response.urls_requested} URLs")
if response.results:
for result in response.results:
print(f"Title: {result.title}")
print(f"Content length: {result.length} characters")
print(f"Content preview: {result.content[:200]}...")
Parameters
URLs (Required)
Parameter | Type | Description |
---|---|---|
urls | List[str] | Array of URLs to process (maximum 10 URLs) |
Options (Optional)
Parameter | Type | Description | Default |
---|---|---|---|
summary | bool | str | dict | AI processing configuration: False (none), True (auto), string (custom), or JSON schema | False |
extract_effort | "normal" | "high" | "auto" | Processing effort level for content extraction | ”normal” |
response_length | str | int | Content length per URL: "short" (25k), "medium" (50k), "large" (100k), "max" , or custom | ”short” |
Response Format
Copy
class ContentsResponse:
success: bool
error: Optional[str]
tx_id: str
urls_requested: int
urls_processed: int
urls_failed: int
results: List[ContentsResult]
total_cost_dollars: float
total_characters: int
class ContentsResult:
url: str
title: str
content: Union[str, dict] # string for raw content, dict for structured
length: int
source: str
summary_success: Optional[bool]
data_type: Optional[str]
image_url: Optional[Dict[str, str]]
citation: Optional[str]
Parameter Examples
Basic Content Extraction
Extract clean content without AI processing:Copy
response = valyu.contents([
"https://www.python.org",
"https://nodejs.org"
])
if response.results:
for result in response.results:
print(f"{result.title}: {result.length} characters")
AI Summary (Boolean)
Get automatic AI summaries of the extracted content:Copy
response = valyu.contents([
"https://en.wikipedia.org/wiki/Artificial_intelligence"
], summary=True, response_length="medium")
if response.results and response.results[0].content:
print("AI Summary:", response.results[0].content)
Custom Summary Instructions
Provide specific instructions for AI summarization:Copy
response = valyu.contents([
"https://techcrunch.com/latest-ai-news"
],
summary="Summarize the main AI trends mentioned in exactly 3 bullet points",
response_length="medium",
extract_effort="high")
Structured Data Extraction
Extract specific data points using JSON schema:Copy
response = valyu.contents([
"https://www.openai.com"
],
extract_effort="high",
response_length="large",
summary={
"type": "object",
"properties": {
"company_name": {
"type": "string",
"description": "The name of the company"
},
"industry": {
"type": "string",
"enum": ["tech", "finance", "healthcare", "retail", "other"],
"description": "Primary industry sector"
},
"key_products": {
"type": "array",
"items": {"type": "string"},
"maxItems": 5,
"description": "Main products or services"
},
"founded_year": {
"type": "number",
"description": "Year the company was founded"
}
},
"required": ["company_name", "industry"]
})
if response.results and response.results[0].content:
print("Extracted data:", response.results[0].content)
Response Length Control
Control the amount of content extracted per URL:Copy
response = valyu.contents([
"https://arxiv.org/abs/2301.00001",
"https://www.nature.com/articles/nature12373",
"https://www.science.org/doi/10.1126/science.1234567"
],
response_length="large", # More content for academic papers
summary="Extract the main research findings and methodology",
extract_effort="high")
Extract Effort Levels
Control the extraction quality and processing intensity:Copy
# Normal (default) - Fast
normal_response = valyu.contents(urls, extract_effort="normal")
# High - Enhanced quality for complex layouts and JS heavy pages
high_quality_response = valyu.contents(urls, extract_effort="high")
# Auto - Intelligent effort selection
auto_response = valyu.contents(urls, extract_effort="auto")
Response Length Options
Control content length with predefined or custom limits:Copy
# Predefined lengths
short_response = valyu.contents(urls, response_length="short") # 25k characters
medium_response = valyu.contents(urls, response_length="medium") # 50k characters
large_response = valyu.contents(urls, response_length="large") # 100k characters
full_response = valyu.contents(urls, response_length="max") # No limit
# Custom length
custom_response = valyu.contents(urls, response_length=15000) # Custom character limit
Use Case Examples
Research Paper Analysis
Build an AI-powered academic research assistant that extracts and analyzes research papers:Copy
def analyze_research_paper(paper_url: str):
response = valyu.contents([paper_url],
summary={
"type": "object",
"properties": {
"title": {"type": "string"},
"authors": {
"type": "array",
"items": {"type": "string"}
},
"abstract": {"type": "string"},
"key_contributions": {
"type": "array",
"items": {"type": "string"},
"maxItems": 5,
"description": "Main contributions of the research"
},
"methodology": {
"type": "string",
"description": "Research methodology and approach"
},
"results_summary": {
"type": "string",
"description": "Summary of key findings and results"
},
"implications": {
"type": "string",
"description": "Broader implications and significance"
},
"citations_count": {"type": "number"},
"publication_date": {"type": "string"}
},
"required": ["title", "abstract", "key_contributions", "methodology"]
},
response_length="max",
extract_effort="high")
if response.success and response.results and response.results[0].summary:
analysis = response.results[0].summary
print("=== Research Paper Analysis ===")
print(f"Title: {analysis['title']}")
print(f"Authors: {', '.join(analysis.get('authors', []))}")
print(f"\nAbstract: {analysis['abstract']}")
print("\nKey Contributions:")
for i, contrib in enumerate(analysis.get('key_contributions', []), 1):
print(f"{i}. {contrib}")
print(f"\nMethodology: {analysis['methodology']}")
print(f"\nResults: {analysis['results_summary']}")
print(f"\nImplications: {analysis['implications']}")
return analysis
return None
# Usage
paper_analysis = analyze_research_paper(
"https://arxiv.org/abs/2024.01234"
)
E-commerce Product Intelligence
Create a product research tool that extracts comprehensive product data:Copy
def analyze_products(product_urls: List[str]):
response = valyu.contents(product_urls,
summary={
"type": "object",
"properties": {
"products": {
"type": "array",
"items": {
"type": "object",
"properties": {
"product_name": {"type": "string"},
"brand": {"type": "string"},
"price": {"type": "string"},
"original_price": {"type": "string"},
"discount_percentage": {"type": "string"},
"description": {"type": "string"},
"key_features": {
"type": "array",
"items": {"type": "string"},
"maxItems": 8
},
"specifications": {
"type": "object",
"description": "Technical specifications"
},
"customer_rating": {"type": "number"},
"review_count": {"type": "number"},
"availability": {
"type": "string",
"enum": ["in_stock", "out_of_stock", "limited", "pre_order"]
},
"shipping_info": {"type": "string"},
"warranty_info": {"type": "string"}
},
"required": ["product_name", "price", "description"]
}
},
"comparison_summary": {
"type": "string",
"description": "Overall comparison of the products"
}
}
},
extract_effort="high",
response_length="large")
if response.success and response.results and response.results[0].content:
analysis = response.results[0].content
print("=== Product Analysis ===")
for i, product in enumerate(analysis.get('products', []), 1):
print(f"\n{i}. {product['product_name']}")
print(f" Brand: {product['brand']}")
print(f" Price: {product['price']}")
print(f" Rating: {product['customer_rating']}/5 ({product['review_count']} reviews)")
print(f" Availability: {product['availability']}")
if product.get('key_features'):
print(" Key Features:")
for feature in product['key_features']:
print(f" • {feature}")
print(f"\n=== Comparison Summary ===")
print(analysis['comparison_summary'])
return analysis
return None
# Usage
product_comparison = analyze_products([
"https://amazon.com/product1",
"https://bestbuy.com/product2",
"https://target.com/product3"
])
Technical Documentation Processor
Build a documentation analysis tool that extracts API information and technical details:Copy
def process_documentation(doc_urls: List[str]):
response = valyu.contents(doc_urls,
summary={
"type": "object",
"properties": {
"documentation_overview": {
"type": "string",
"description": "Overview of what the documentation covers"
},
"api_endpoints": {
"type": "array",
"items": {
"type": "object",
"properties": {
"method": {"type": "string"},
"path": {"type": "string"},
"description": {"type": "string"},
"parameters": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"type": {"type": "string"},
"required": {"type": "boolean"},
"description": {"type": "string"}
}
}
},
"response_format": {"type": "string"}
}
}
},
"authentication": {
"type": "object",
"properties": {
"method": {"type": "string"},
"description": {"type": "string"},
"example": {"type": "string"}
}
},
"rate_limits": {"type": "string"},
"code_examples": {
"type": "array",
"items": {
"type": "object",
"properties": {
"language": {"type": "string"},
"example": {"type": "string"},
"description": {"type": "string"}
}
}
},
"common_errors": {
"type": "array",
"items": {"type": "string"}
}
},
"required": ["documentation_overview", "api_endpoints", "authentication"]
},
extract_effort="high",
response_length="large")
if response.success and response.results and response.results[0].content:
docs = response.results[0].content
print("=== API Documentation Analysis ===")
print(f"\nOverview: {docs['documentation_overview']}")
print("\n=== Authentication ===")
auth = docs.get('authentication', {})
print(f"Method: {auth.get('method')}")
print(f"Description: {auth.get('description')}")
print("\n=== API Endpoints ===")
for i, endpoint in enumerate(docs.get('api_endpoints', []), 1):
print(f"\n{i}. {endpoint['method']} {endpoint['path']}")
print(f" Description: {endpoint['description']}")
if endpoint.get('parameters'):
print(" Parameters:")
for param in endpoint['parameters']:
required = "(required)" if param['required'] else "(optional)"
print(f" • {param['name']} ({param['type']}) {required}: {param['description']}")
if docs.get('rate_limits'):
print(f"\n=== Rate Limits ===")
print(docs['rate_limits'])
return docs
return None
# Usage
api_docs = process_documentation([
"https://docs.example.com/api-reference",
"https://developers.service.com/guide"
])
Error Handling
Copy
response = valyu.contents(urls, **options)
if not response.success:
print("Contents extraction failed:", response.error)
return
# Check for partial failures
if response.urls_failed and response.urls_failed > 0:
print(f"{response.urls_failed} of {response.urls_requested} URLs failed")
# Process successful results
if response.results:
for index, result in enumerate(response.results):
print(f"Result {index + 1}:")
print(f" Title: {result.title}")
print(f" URL: {result.url}")
print(f" Length: {result.length} characters")
if result.summary_success:
print(f" Summary: {result.content}")