Unlike screenshots saved locally, files downloaded during browser automation are stored in Browserbase’s cloud storage. Retrieve them using the Browserbase API.A common use case for headless browsers is downloading files from web pages. Browserbase syncs every downloaded file to cloud storage and appends a Unix timestamp to avoid naming conflicts (e.g., sample.pdf becomes sample-1719265797164.pdf).
Perform the download action in your automation script
Node.js
Python
import { chromium } from "playwright-core";import { Browserbase } from "@browserbasehq/sdk";(async () => { const bb = new Browserbase({ apiKey: process.env.BROWSERBASE_API_KEY! }); const session = await bb.sessions.create(); const browser = await chromium.connectOverCDP(session.connectUrl); const defaultContext = browser.contexts()[0]; const page = defaultContext.pages()[0]; // Required to avoid playwright overriding location const client = await defaultContext.newCDPSession(page); await client.send("Browser.setDownloadBehavior", { behavior: "allow", downloadPath: "downloads", eventsEnabled: true, }); await page.goto("https://browser-tests-alpha.vercel.app/api/download-test"); const [download] = await Promise.all([ page.waitForEvent("download"), page.locator("#download").click(), ]); let downloadError = await download.failure(); if (downloadError !== null) { console.log("Error happened on download:", downloadError); throw new Error(downloadError); } // Store the session ID to retrieve downloads later console.log("Download completed. Session ID:", session.id); await page.close(); await browser.close();})().catch((error) => console.error(error.message));
from playwright.sync_api import sync_playwrightfrom browserbase import Browserbaseimport osbb = Browserbase(api_key=os.environ["BROWSERBASE_API_KEY"])session = bb.sessions.create()print(f"Session ID: {session.id}")with sync_playwright() as playwright: # Use the session ID to connect to the browser browser = playwright.chromium.connect_over_cdp(session.connect_url) # Required to avoid playwright overriding the location cdp_session = browser.new_browser_cdp_session() cdp_session.send( "Browser.setDownloadBehavior", { "behavior": "allow", "downloadPath": "downloads", "eventsEnabled": True, }, ) context = browser.contexts[0] page = context.pages[0] # Head to the test downloads page page.goto("https://browser-tests-alpha.vercel.app/api/download-test") # Click the download button print("Downloading file to the remote browser...") page.get_by_role("link", name="Download File").click() # Clean up page.close() browser.close()print("Download completed. Use the session ID to retrieve the files.")
Critical: setDownloadBehavior ConfigurationWhen using Playwright or Puppeteer, you must call Browser.setDownloadBehavior via CDP to ensure downloads are synced to Browserbase’s storage. Pay special attention to the downloadPath parameter—it must be set to "downloads" exactly as shown in the examples above.Common misconfiguration issues:
Using an absolute path (e.g., /tmp/downloads) instead of "downloads"
Omitting the setDownloadBehavior call entirely
Setting behavior to something other than "allow"
Without proper configuration, your downloads won’t be available for retrieval.
Opening a PDF URL in a browser session also triggers a download to Browserbase’s cloud storage. To view the PDF instead of downloading it, configure your browser settings as shown here.
After triggering downloads, retrieve them using the Downloads API. The API provides granular access to individual downloaded files — you can list, filter, retrieve, and delete downloads.Filenames don’t include the timestamp suffix Browserbase adds during storage.
Files sync in real time — large downloads may not be immediately available
through the /downloads endpoint. The code below includes retry logic to
handle this.
List all downloads for a session with optional filtering by filename, MIME type, file size, and creation time.
Node.js
Python
Node
import { writeFileSync } from "node:fs";const API_KEY = process.env.BROWSERBASE_API_KEY!;async function saveDownloadsOnDisk(sessionId: string, retryForSeconds: number) { const endTime = Date.now() + retryForSeconds * 1000; while (Date.now() < endTime) { try { // List individual downloads for the session const listResponse = await fetch( `https://api.browserbase.com/v1/downloads?sessionId=${sessionId}`, { headers: { "x-bb-api-key": API_KEY } } ); const { downloads, total } = await listResponse.json(); if (total > 0) { console.log(`Found ${total} download(s)`); for (const download of downloads) { // Download each file individually const fileResponse = await fetch( `https://api.browserbase.com/v1/downloads/${download.id}`, { headers: { "x-bb-api-key": API_KEY, Accept: "application/octet-stream", }, } ); const buffer = Buffer.from(await fileResponse.arrayBuffer()); writeFileSync(download.filename, buffer); console.log(`Saved: ${download.filename} (${download.size} bytes)`); } return; } } catch (e) { console.error("Error fetching downloads:", e); throw e; } // Wait 2 seconds before retrying await new Promise((resolve) => setTimeout(resolve, 2000)); } throw new Error("No downloads found within the retry period");}(async () => { // Use the session ID from your browser automation to retrieve downloads const sessionId = "your-session-id"; await saveDownloadsOnDisk(sessionId, 20); // wait up to 20s console.log("Downloads complete");})().catch((error) => { console.error("Download failed:", error);});
Python
import osimport timeimport requestsAPI_KEY = os.environ["BROWSERBASE_API_KEY"]def save_downloads_on_disk(session_id: str, retry_seconds: int = 20): """ List and download individual files from a session. Retries for the specified number of seconds if no downloads are found. :param session_id: The session ID from your browser automation :param retry_seconds: How long to retry if no downloads are found """ end_time = time.time() + retry_seconds while time.time() < end_time: try: # List individual downloads for the session list_response = requests.get( "https://api.browserbase.com/v1/downloads", params={"sessionId": session_id}, headers={"x-bb-api-key": API_KEY}, ) data = list_response.json() if data["total"] > 0: print(f"Found {data['total']} download(s)") for download in data["downloads"]: # Download each file individually file_response = requests.get( f"https://api.browserbase.com/v1/downloads/{download['id']}", headers={ "x-bb-api-key": API_KEY, "Accept": "application/octet-stream", }, ) with open(download["filename"], "wb") as f: f.write(file_response.content) print(f"Saved: {download['filename']} ({download['size']} bytes)") return except Exception as e: print(f"Error fetching downloads: {e}") raise time.sleep(2) # Wait 2 seconds before retrying raise TimeoutError("No downloads found within the retry period")# Use the session ID from your browser automation to retrieve downloadssession_id = "your-session-id"try: save_downloads_on_disk(session_id) print("Downloads complete")except Exception as e: print(f"Failed to retrieve downloads: {e}")
Retrieve metadata or file content for a specific download. Use Accept: application/json for metadata, or Accept: application/octet-stream to download the file (default if no Accept header is provided).