ScrapeGraphAI · VinciGit00 · Jun 4, 2025 · Jun 10, 2025
diff --git a/scrapegraph-py/examples/async/smartscraper_infinite_scroll.py b/scrapegraph-py/examples/async/smartscraper_infinite_scroll.py
@@ -0,0 +1,68 @@
+"""
+Example of using SmartScraper with infinite scrolling in asynchronous mode.
+This example demonstrates how to scrape content from multiple webpages concurrently using infinite scrolling.
+"""
+
+import asyncio
+from scrapegraph_py import AsyncClient
+from scrapegraph_py.logger import sgai_logger
+
+# Set up logging
+sgai_logger.set_logging(level="INFO")
+
+async def scrape_with_infinite_scroll(client: AsyncClient, url: str, prompt: str, max_pages: int = 10):
+    """Helper function to perform a single scraping task with infinite scrolling"""
+    response = await client.smartscraper(
+        website_url=url,
+        user_prompt=prompt,
+        infinite_scrolling=True,
+        max_pages=max_pages
+    )
+    return response
+
+async def main():
+    # Initialize the async client with your API key
+    async with AsyncClient(api_key="your-api-key-here") as sgai_client:
+        # Example 1: Scrape multiple pages concurrently
+        tasks = [
+            scrape_with_infinite_scroll(
+                sgai_client,
+                "https://example.com/products",
+                "Extract all product names and prices",
+                max_pages=20
+            ),
+            scrape_with_infinite_scroll(
+                sgai_client,
+                "https://example.com/articles",
+                "Extract all article titles and authors",
+                max_pages=15
+            ),
+            scrape_with_infinite_scroll(
+                sgai_client,
+                "https://example.com/news",
+                "Extract all news headlines and dates",
+                max_pages=10
+            )
+        ]
+
+        # Wait for all scraping tasks to complete
+        results = await asyncio.gather(*tasks)
+
+        # Process and print results
+        for i, result in enumerate(results, 1):
+            print(f"\nExample {i} Results:")
+            print(f"Request ID: {result['request_id']}")
+            print(f"Result: {result['result']}")
+
+        # Example 2: Single page without infinite scrolling
+        response = await sgai_client.smartscraper(
+            website_url="https://example.com/static-page",
+            user_prompt="Extract the main heading and first paragraph",
+            infinite_scrolling=False
+        )
+        print("\nExample 4 - Without infinite scrolling:")
+        print(f"Request ID: {response['request_id']}")
+        print(f"Result: {response['result']}")
+
+if __name__ == "__main__":
+    asyncio.run(main()) 
diff --git a/scrapegraph-py/examples/sync/smartscraper_infinite_scroll.py b/scrapegraph-py/examples/sync/smartscraper_infinite_scroll.py
@@ -0,0 +1,33 @@
+"""
+Example of using SmartScraper with infinite scrolling in synchronous mode.
+This example demonstrates how to scrape content from a webpage that requires scrolling to load more content.
+"""
+
+from scrapegraph_py import Client
+from scrapegraph_py.logger import sgai_logger
+import time
+
+# Set up logging
+sgai_logger.set_logging(level="INFO")
+
+def main():
+    # Initialize the client with your API key
+    sgai_client = Client(api_key="your-api-key-here")
+
+    response1 = sgai_client.smartscraper(
+        website_url="https://www.ycombinator.com/companies",
+        user_prompt="Extract all the companies and their info",
+        infinite_scrolling=True,
+        max_pages=10,
+    )
+
+    print("\nExample 1 - Basic infinite scrolling:")
+    print(f"Request ID: {response1['request_id']}")
+    print(f"Result: {response1['result']}")
+
+
+    # Always close the client when done
+    sgai_client.close()
+
+if __name__ == "__main__":
+    main()
diff --git a/scrapegraph-py/scrapegraph_py/async_client.py b/scrapegraph-py/scrapegraph_py/async_client.py
@@ -174,6 +174,8 @@ async def smartscraper(
         website_html: Optional[str] = None,
         headers: Optional[dict[str, str]] = None,
         output_schema: Optional[BaseModel] = None,
+        infinite_scrolling: bool = False,
+        max_pages: int = 10,
     ):
         """Send a smartscraper request"""
         logger.info("🔍 Starting smartscraper request")
@@ -184,13 +186,17 @@ async def smartscraper(
         if headers:
             logger.debug("🔧 Using custom headers")
         logger.debug(f"📝 Prompt: {user_prompt}")
+        if infinite_scrolling:
+            logger.debug(f"🔄 Infinite scrolling enabled with max_pages={max_pages}")
 
         request = SmartScraperRequest(
             website_url=website_url,
             website_html=website_html,
             headers=headers,
             user_prompt=user_prompt,
             output_schema=output_schema,
+            infinite_scrolling=infinite_scrolling,
+            max_pages=max_pages,
         )
         logger.debug("✅ Request validation passed")
 

diff --git a/scrapegraph-py/scrapegraph_py/client.py b/scrapegraph-py/scrapegraph_py/client.py
@@ -182,6 +182,8 @@ def smartscraper(
         website_html: Optional[str] = None,
         headers: Optional[dict[str, str]] = None,
         output_schema: Optional[BaseModel] = None,
+        infinite_scrolling: bool = False,
+        max_pages: int = 10,
     ):
         """Send a smartscraper request"""
         logger.info("🔍 Starting smartscraper request")
@@ -192,13 +194,17 @@ def smartscraper(
         if headers:
             logger.debug("🔧 Using custom headers")
         logger.debug(f"📝 Prompt: {user_prompt}")
+        if infinite_scrolling:
+            logger.debug(f"🔄 Infinite scrolling enabled with max_pages={max_pages}")
 
         request = SmartScraperRequest(
             website_url=website_url,
             website_html=website_html,
             headers=headers,
             user_prompt=user_prompt,
             output_schema=output_schema,
+            infinite_scrolling=infinite_scrolling,
+            max_pages=max_pages,
         )
         logger.debug("✅ Request validation passed")
 

diff --git a/scrapegraph-py/scrapegraph_py/models/smartscraper.py b/scrapegraph-py/scrapegraph_py/models/smartscraper.py
@@ -4,7 +4,7 @@
 from uuid import UUID
 
 from bs4 import BeautifulSoup
-from pydantic import BaseModel, Field, model_validator
+from pydantic import BaseModel, Field, model_validator, conint
 
 
 class SmartScraperRequest(BaseModel):
@@ -28,6 +28,14 @@ class SmartScraperRequest(BaseModel):
         },
         description="Optional headers to send with the request, including cookies and user agent",
     )
+    infinite_scrolling: bool = Field(
+        default=False,
+        description="Enable infinite scrolling to load more content dynamically",
+    )
+    max_pages: conint(ge=1, le=1000) = Field(
+        default=10,
+        description="Maximum number of pages to scroll when infinite_scrolling is enabled",
+    )
     output_schema: Optional[Type[BaseModel]] = None
 
     @model_validator(mode="after")

diff --git a/scrapegraph-py/uv.lock b/scrapegraph-py/uv.lock