Introduction
Features
Guides
Integrations
- Get Started with Integrations
- AgentKit
- Braintrust
- Browser Use
- CrewAI
- Langchain
- OpenAI CUA
- Stripe
- Val Town
- Vercel
Downloads
Triggering and retrieving downloaded files
Unlike screenshots and PDFs which are saved locally, files downloaded during browser automation are stored in Browserbase’s cloud storage. These files must be retrieved using our API.
A typical use case for headless browsers is downloading files from
web pages. Our browsers are configured to sync any file you download to our
storage infrastructure. We add a Unix timestamp onto the end of the file name to avoid naming conflicts when downloading multiple files (e.g., sample.pdf
will become sample-1719265797164.pdf
).
Triggering Downloads
First, trigger a download in your browser automation:
-
Create a browser session and get the session ID
-
Connect to the session using your preferred framework
-
Configure your library’s downloads location
-
Perform the download action in your automation script
import { chromium } from "playwright-core";
import { Browserbase } from "@browserbasehq/sdk";
(async () => {
const bb = new Browserbase({ apiKey: process.env.BROWSERBASE_API_KEY! });
const session = await bb.sessions.create({
projectId: process.env.BROWSERBASE_PROJECT_ID!,
});
const browser = await chromium.connectOverCDP(session.connectUrl);
const defaultContext = browser.contexts()[0];
const page = defaultContext.pages()[0];
// Required to avoid playwright overriding location
const client = await defaultContext.newCDPSession(page);
await client.send("Browser.setDownloadBehavior", {
behavior: "allow",
downloadPath: "downloads",
eventsEnabled: true,
});
await page.goto("https://browser-tests-alpha.vercel.app/api/download-test");
const [download] = await Promise.all([
page.waitForEvent("download"),
page.locator("#download").click(),
]);
let downloadError = await download.failure();
if (downloadError !== null) {
console.log("Error happened on download:", downloadError);
throw new Error(downloadError);
}
// Store the session ID to retrieve downloads later
console.log("Download completed. Session ID:", session.id);
await page.close();
await browser.close();
})().catch((error) => console.error(error.message));
import { chromium } from "playwright-core";
import { Browserbase } from "@browserbasehq/sdk";
(async () => {
const bb = new Browserbase({ apiKey: process.env.BROWSERBASE_API_KEY! });
const session = await bb.sessions.create({
projectId: process.env.BROWSERBASE_PROJECT_ID!,
});
const browser = await chromium.connectOverCDP(session.connectUrl);
const defaultContext = browser.contexts()[0];
const page = defaultContext.pages()[0];
// Required to avoid playwright overriding location
const client = await defaultContext.newCDPSession(page);
await client.send("Browser.setDownloadBehavior", {
behavior: "allow",
downloadPath: "downloads",
eventsEnabled: true,
});
await page.goto("https://browser-tests-alpha.vercel.app/api/download-test");
const [download] = await Promise.all([
page.waitForEvent("download"),
page.locator("#download").click(),
]);
let downloadError = await download.failure();
if (downloadError !== null) {
console.log("Error happened on download:", downloadError);
throw new Error(downloadError);
}
// Store the session ID to retrieve downloads later
console.log("Download completed. Session ID:", session.id);
await page.close();
await browser.close();
})().catch((error) => console.error(error.message));
from playwright.sync_api import sync_playwright
from browserbase import Browserbase
import os
bb = Browserbase(api_key=os.environ["BROWSERBASE_API_KEY"])
session = bb.sessions.create(project_id=os.environ["BROWSERBASE_PROJECT_ID"])
print(f"Session ID: {session.id}")
with sync_playwright() as playwright: # Use the session ID to connect to the browser
browser = playwright.chromium.connect_over_cdp(session.connect_url)
# Required to avoid playwright overriding the location
cdp_session = browser.new_browser_cdp_session()
cdp_session.send(
"Browser.setDownloadBehavior",
{
"behavior": "allow",
"downloadPath": "downloads",
"eventsEnabled": True,
},
)
context = browser.contexts[0]
page = context.pages[0]
# Head to the test downloads page
page.goto("https://browser-tests-alpha.vercel.app/api/download-test")
# Click the download button
print("Downloading file to the remote browser...")
page.get_by_role("link", name="Download File").click()
# Clean up
page.close()
browser.close()
print("Download completed. Use the session ID to retrieve the files.")
Retrieving Downloaded Files
After triggering downloads in your browser session, you can retrieve them using the Session Downloads API. The files are returned as a ZIP archive.
We sync the files in real-time; the size of your downloads might affect their
immediate availability through the /downloads
endpoint. The code below
includes retry logic to handle this case.
import { writeFileSync } from "node:fs";
import { Browserbase } from "@browserbasehq/sdk";
async function saveDownloadsOnDisk(sessionId: string, retryForSeconds: number) {
return new Promise<void>((resolve, reject) => {
let pooler: any;
const timeout = setTimeout(() => {
if (pooler) {
clearInterval(pooler);
}
}, retryForSeconds);
async function fetchDownloads() {
try {
const bb = new Browserbase({ apiKey: process.env.BROWSERBASE_API_KEY! });
const response = await bb.sessions.downloads.list(sessionId);
const downloadBuffer = await response.arrayBuffer();
if (downloadBuffer.byteLength > 0) {
writeFileSync("downloads.zip", Buffer.from(downloadBuffer));
clearInterval(pooler);
clearTimeout(timeout);
resolve();
}
} catch (e) {
clearInterval(pooler);
clearTimeout(timeout);
reject(e);
}
}
pooler = setInterval(fetchDownloads, 2000);
});
}
(async () => {
// Use the session ID from your browser automation to retrieve downloads
const sessionId = "your-session-id";
await saveDownloadsOnDisk(sessionId, 20000); // wait up to 20s
console.log("Downloaded files are in downloads.zip");
})().catch(error => {
console.error('Download failed:', error);
});
import { writeFileSync } from "node:fs";
import { Browserbase } from "@browserbasehq/sdk";
async function saveDownloadsOnDisk(sessionId: string, retryForSeconds: number) {
return new Promise<void>((resolve, reject) => {
let pooler: any;
const timeout = setTimeout(() => {
if (pooler) {
clearInterval(pooler);
}
}, retryForSeconds);
async function fetchDownloads() {
try {
const bb = new Browserbase({ apiKey: process.env.BROWSERBASE_API_KEY! });
const response = await bb.sessions.downloads.list(sessionId);
const downloadBuffer = await response.arrayBuffer();
if (downloadBuffer.byteLength > 0) {
writeFileSync("downloads.zip", Buffer.from(downloadBuffer));
clearInterval(pooler);
clearTimeout(timeout);
resolve();
}
} catch (e) {
clearInterval(pooler);
clearTimeout(timeout);
reject(e);
}
}
pooler = setInterval(fetchDownloads, 2000);
});
}
(async () => {
// Use the session ID from your browser automation to retrieve downloads
const sessionId = "your-session-id";
await saveDownloadsOnDisk(sessionId, 20000); // wait up to 20s
console.log("Downloaded files are in downloads.zip");
})().catch(error => {
console.error('Download failed:', error);
});
import os
import time
from browserbase import Browserbase
def get_zipped_downloads(session_id: str, retry_seconds: int = 20) -> bytes:
"""
Get a zipped archive of the files that were downloaded during the session.
Retries for the specified number of seconds if no downloads are found.
:param session_id: The session ID from your browser automation
:param retry_seconds: How long to retry if no downloads are found
:returns: the zipped file data
"""
end_time = time.time() + retry_seconds
bb = Browserbase(api_key=os.environ["BROWSERBASE_API_KEY"])
while time.time() < end_time:
try:
response = bb.sessions.downloads.list(session_id)
if response and response.status_code == 200:
return response.read() # Get the actual bytes content
except Exception as e:
print(f"Error fetching downloads: {e}")
time.sleep(2) # Wait 2 seconds before retrying
raise TimeoutError("No downloads found within the retry period")
# Use the session ID from your browser automation to retrieve downloads
session_id = "your-session-id"
try:
zipped_downloads = get_zipped_downloads(session_id)
with open("downloads.zip", "wb") as file:
file.write(zipped_downloads)
print("Downloaded files are in downloads.zip")
except Exception as e:
print(f"Failed to retrieve downloads: {e}")
Session Downloads API
Learn more about the available params and response fields
Was this page helpful?