Getting Started with LLM Sandbox¶
This guide will help you get up and running with LLM Sandbox in just a few minutes.
Prerequisites¶
Before you begin, ensure you have:
- Python 3.10 or higher installed
- Container runtime (at least one of the following):
- Docker Desktop or Docker Engine
- Kubernetes cluster (local or remote)
- Podman
Installation¶
Basic Installation¶
Install the core package:
Backend-Specific Installation¶
Install with support for specific backends:
# Docker backend (most common)
pip install 'llm-sandbox[docker]'
# Kubernetes backend
pip install 'llm-sandbox[k8s]'
# Podman backend
pip install 'llm-sandbox[podman]'
# All backends
pip install 'llm-sandbox[docker,k8s,podman]'
Development Installation¶
For contributing or development:
Quick Start¶
Your First Sandbox Session¶
Let's run a simple Python code in a sandbox:
from llm_sandbox import SandboxSession
# Create and use a sandbox session
with SandboxSession(lang="python") as session:
result = session.run("""
print("Hello from LLM Sandbox!")
print("I'm running in a secure container.")
""")
print(result.stdout)
Output:
Installing Libraries¶
Install and use Python packages dynamically:
from llm_sandbox import SandboxSession
with SandboxSession(lang="python") as session:
# Run code with numpy
result = session.run("""
import numpy as np
# Create an array
arr = np.array([1, 2, 3, 4, 5])
print(f"Array: {arr}")
print(f"Mean: {np.mean(arr)}")
print(f"Sum: {np.sum(arr)}")
""", libraries=["numpy"])
print(result.stdout)
Output:
Working with Different Languages¶
LLM Sandbox supports multiple programming languages:
JavaScript Example¶
with SandboxSession(lang="javascript") as session:
result = session.run("""
const greeting = "Hello from Node.js!";
console.log(greeting);
// Using a library
const axios = require('axios');
console.log("Axios loaded successfully!");
""", libraries=["axios"])
print(result.stdout)
Output:
Java Example¶
with SandboxSession(lang="java") as session:
result = session.run("""
public class HelloWorld {
public static void main(String[] args) {
System.out.println("Hello from Java!");
// Print Java version
String version = System.getProperty("java.version");
System.out.println("Java version: " + version);
}
}
""")
print(result.stdout)
Output:
C++ Example¶
with SandboxSession(lang="cpp") as session:
result = session.run("""
#include <iostream>
int main() {
std::cout << "Hello from C++!" << std::endl;
return 0;
}
""")
print(result.stdout)
Output:
Go Example¶
with SandboxSession(lang="go") as session:
result = session.run("""
package main
import "fmt"
func main() {
fmt.Println("Hello from Go!")
}
""")
print(result.stdout)
Output:
Capturing Plots and Visualizations¶
LLM Sandbox can automatically capture plots generated by your code by using the ArtifactSandboxSession class. Plotting is currently supported for Python and R languages, with support for additional languages coming in the future.
Basic Plot Capture¶
# ruff: noqa: T201
import base64
from pathlib import Path
from llm_sandbox import ArtifactSandboxSession
code = """
import matplotlib.pyplot as plt
import numpy as np
plt.style.use('default')
# Generate data
x = np.linspace(0, 10, 100)
y1 = np.sin(x) + np.random.normal(0, 0.1, 100)
y2 = np.cos(x) + np.random.normal(0, 0.1, 100)
# Create plot
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes[0, 0].plot(x, y1, 'b-', alpha=0.7)
axes[0, 0].set_title('Sine Wave')
axes[0, 1].scatter(x[::5], y2[::5], c='red', alpha=0.6)
axes[0, 1].set_title('Cosine Scatter')
axes[1, 0].hist(y1, bins=20, alpha=0.7, color='green')
axes[1, 0].set_title('Sine Distribution')
axes[1, 1].bar(range(10), np.random.rand(10), alpha=0.7)
axes[1, 1].set_title('Random Bar Chart')
plt.tight_layout()
plt.show()
print('Plot generated successfully!')
"""
# Create a sandbox session
with ArtifactSandboxSession(lang="python", verbose=True) as session:
# Run Python code safely
result = session.run(code)
print(result.stdout) # Output: Plot generated successfully!
for plot in result.plots:
with Path("docs/assets/example.png").open("wb") as f:
f.write(base64.b64decode(plot.content_base64))
Output:

Plot Accumulation and Clearing¶
By default, plots accumulate across multiple code executions within the same session, allowing you to build a gallery of visualizations. You can manually clear plots when needed or automatically clear them before each run:
from llm_sandbox import ArtifactSandboxSession, SandboxBackend
with ArtifactSandboxSession(
lang="python",
backend=SandboxBackend.DOCKER,
enable_plotting=True
) as session:
# First run - generates plots 1-3
result1 = session.run("""
import matplotlib.pyplot as plt
plt.plot([1, 2, 3])
plt.show()
""")
print(f"Total plots: {len(result1.plots)}") # Output: 1
# Second run - accumulates, now have plots 1-4
result2 = session.run("""
import matplotlib.pyplot as plt
plt.plot([4, 5, 6])
plt.show()
""")
print(f"Total plots: {len(result2.plots)}") # Output: 2
# Manually clear plots and reset counter
session.clear_plots()
# Third run - starts fresh from plot 1
result3 = session.run("""
import matplotlib.pyplot as plt
plt.plot([7, 8, 9])
plt.show()
""")
print(f"Total plots: {len(result3.plots)}") # Output: 1
# Or automatically clear before running
result4 = session.run("""
import matplotlib.pyplot as plt
plt.plot([10, 11, 12])
plt.show()
""", clear_plots=True) # Clears plots before execution
print(f"Total plots: {len(result4.plots)}") # Output: 1
R Language Support¶
Plot capture also works seamlessly with R, supporting multiple plotting libraries:
from llm_sandbox import ArtifactSandboxSession, SandboxBackend
with ArtifactSandboxSession(
lang="r",
backend=SandboxBackend.DOCKER,
enable_plotting=True
) as session:
result = session.run("""
# Base R plots
plot(1:10, main='Line Plot')
hist(rnorm(100), main='Histogram')
# ggplot2 (if installed)
library(ggplot2)
ggplot(mtcars, aes(x=wt, y=mpg)) +
geom_point() +
labs(title='MPG vs Weight')
""")
# All plots are captured automatically
print(f"Captured {len(result.plots)} plots")
for plot in result.plots:
print(f" Format: {plot.format}")
Supported Plotting Libraries:
- Python: matplotlib, seaborn, plotly, bokeh
- R: base R graphics, ggplot2, plotly, lattice
For complete examples, see:
Using Different Backends¶
Docker Backend (Default)¶
from llm_sandbox import SandboxSession, SandboxBackend
with SandboxSession(
backend=SandboxBackend.DOCKER,
lang="python"
) as session:
result = session.run("print('Running on Docker!')")
print(result.stdout)
Kubernetes Backend¶
with SandboxSession(
backend=SandboxBackend.KUBERNETES,
lang="python",
kube_namespace="default"
) as session:
result = session.run("print('Running on Kubernetes!')")
print(result.stdout)
Important: Custom Pod Manifests
When using custom pod manifests with Kubernetes, ensure your manifest includes these required configurations:
# Required pod manifest structure
pod_manifest = {
"apiVersion": "v1",
"kind": "Pod",
"metadata": {
"name": "your-pod-name", # Will be overridden with unique name
"namespace": "default",
},
"spec": {
"containers": [
{
"name": "my-container", # Can be any valid container name
"image": "your-image:latest",
"tty": True, # REQUIRED: Keeps container alive
"securityContext": { # REQUIRED: For proper permissions
"runAsUser": 0,
"runAsGroup": 0,
},
# Your other container settings...
}
],
"securityContext": { # REQUIRED: Pod-level security context
"runAsUser": 0,
"runAsGroup": 0,
},
},
}
with SandboxSession(
backend=SandboxBackend.KUBERNETES,
lang="python",
pod_manifest=pod_manifest
) as session:
result = session.run("print('Custom manifest working!')")
⚠️ Critical Requirements: - "tty": True is essential for keeping the container alive - Both pod-level and container-level securityContext are required for proper permissions - Container name can be any valid name (no longer restricted to "sandbox-container") - Missing any of these will cause connection or permission errors
Podman Backend¶
from podman import PodmanClient
client = PodmanClient(base_url="unix:///run/podman/podman.sock")
with SandboxSession(
backend=SandboxBackend.PODMAN,
client=client,
lang="python"
) as session:
result = session.run("print('Running on Podman!')")
print(result.stdout)
Working with Files¶
Copy files to and from the sandbox:
with SandboxSession(lang="python") as session:
# Copy file to sandbox
session.copy_to_runtime("local_data.csv", "/sandbox/data.csv")
# Process the file
result = session.run("""
import pandas as pd
# Read the CSV file
df = pd.read_csv('/sandbox/data.csv')
print(f"Shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print(f"First 5 rows:\n{df.head()}")
# Save processed data
df.to_csv('/sandbox/processed.csv', index=False)
""", libraries=["pandas"])
# Copy file back from sandbox
session.copy_from_runtime("/sandbox/processed.csv", "processed_data.csv")
Setting Resource Limits¶
Control resource usage with runtime configurations:
with SandboxSession(
lang="python",
runtime_configs={
"cpu_count": 2, # Limit to 2 CPU cores
"mem_limit": "512m", # Limit memory to 512MB
"timeout": 30, # 30 second timeout
}
) as session:
result = session.run("""
# This will run with limited resources
import multiprocessing
print(f"Available CPUs: {multiprocessing.cpu_count()}")
""")
print(result.stdout)
Basic Security Policies¶
Implement basic security checks:
from llm_sandbox import SandboxSession
from llm_sandbox.security import SecurityPolicy, SecurityPattern, SecurityIssueSeverity
# Create a security policy
policy = SecurityPolicy(
severity_threshold=SecurityIssueSeverity.MEDIUM,
patterns=[
SecurityPattern(
pattern=r"os\.system",
description="System command execution",
severity=SecurityIssueSeverity.HIGH
),
SecurityPattern(
pattern=r"eval\s*\(",
description="Dynamic code evaluation",
severity=SecurityIssueSeverity.MEDIUM
)
]
)
with SandboxSession(lang="python", security_policy=policy) as session:
# Check if code is safe before running
code = "print('This is safe code')"
is_safe, violations = session.is_safe(code)
if is_safe:
result = session.run(code)
print(result.stdout)
else:
print("Code failed security check:")
for v in violations:
print(f" - {v.description}")
Common Use Cases¶
1. LLM Code Execution¶
Execute code generated by an LLM safely:
Example with Langchain:
# ruff: noqa: E501
# Reference: https://python.langchain.com/docs/how_to/custom_tools/
import logging
from langchain import hub
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI
from llm_sandbox import SandboxSession
logging.basicConfig(level=logging.INFO, format="%(message)s")
logger = logging.getLogger(__name__)
@tool
def run_code(lang: str, code: str, libraries: list | None = None) -> str:
"""Run code in a sandboxed environment.
:param lang: The language of the code, must be one of ['python', 'java', 'javascript', 'cpp', 'go', 'ruby'].
:param code: The code to run.
:param libraries: The libraries to use, it is optional.
:return: The output of the code.
"""
with SandboxSession(lang=lang, verbose=False) as session:
return session.run(code, libraries).stdout
if __name__ == "__main__":
llm = ChatOpenAI(model="gpt-4.1-nano", temperature=0)
prompt = hub.pull("hwchase17/openai-functions-agent")
tools = [run_code]
agent = create_tool_calling_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
output = agent_executor.invoke({
"input": "Write python code to calculate Pi number by Monte Carlo method then run it."
})
logger.info("Agent: %s", output)
output = agent_executor.invoke({"input": "Write python code to calculate the factorial of a number then run it."})
logger.info("Agent: %s", output)
output = agent_executor.invoke({"input": "Write python code to calculate the Fibonacci sequence then run it."})
logger.info("Agent: %s", output)
output = agent_executor.invoke({"input": "Calculate the sum of the first 10000 numbers."})
logger.info("Agent: %s", output)
Example with Langgraph:
import logging
from langchain_core.tools import tool
from langgraph.prebuilt import create_react_agent
from llm_sandbox import SandboxSession
logging.basicConfig(level=logging.INFO, format="%(message)s")
logger = logging.getLogger(__name__)
@tool
def run_code(lang: str, code: str, libraries: list | None = None) -> str:
"""Run code in a sandboxed environment.
:param lang: The language of the code, must be one of ['python', 'java', 'javascript', 'cpp', 'go', 'ruby'].
:param code: The code to run.
:param libraries: The libraries to use, it is optional.
:return: The output of the code.
"""
with SandboxSession(lang=lang, verbose=False) as session:
return session.run(code, libraries).stdout
if __name__ == "__main__":
agent = create_react_agent(model="openai:gpt-4.1-nano", tools=[run_code])
logger.info(
"Agent: %s",
agent.invoke({
"messages": [
{
"role": "user",
"content": "Write python code to calculate Pi number by Monte Carlo method then run it.",
}
]
}),
)
logger.info(
"Agent: %s",
agent.invoke({
"messages": [
{"role": "user", "content": "Write python code to calculate the factorial of a number then run it."}
]
}),
)
logger.info(
"Agent: %s",
agent.invoke({
"messages": [
{"role": "user", "content": "Write python code to calculate the Fibonacci sequence then run it."}
]
}),
)
logger.info(
"Agent: %s",
agent.invoke({"messages": [{"role": "user", "content": "Calculate the sum of the first 10000 numbers."}]}),
)
Example with LlamaIndex:
# ruff: noqa: E501
# Reference: https://docs.llamaindex.ai/en/stable/module_guides/deploying/agents/tools/
import logging
import nest_asyncio
from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.tools import FunctionTool
from llama_index.llms.openai import OpenAI
from llm_sandbox import SandboxSession
nest_asyncio.apply()
logging.basicConfig(level=logging.INFO, format="%(message)s")
logger = logging.getLogger(__name__)
def run_code(lang: str, code: str, libraries: list | None = None) -> str:
"""Run code in a sandboxed environment.
:param lang: The language of the code, must be one of ['python', 'java', 'javascript', 'cpp', 'go', 'ruby'].
:param code: The code to run.
:param libraries: The libraries to use, it is optional.
:return: The output of the code.
"""
with SandboxSession(lang=lang, verbose=False) as session:
return session.run(code, libraries).stdout
if __name__ == "__main__":
llm = OpenAI(model="gpt-4.1-nano", temperature=0)
code_execution_tool = FunctionTool.from_defaults(fn=run_code)
agent_worker = FunctionCallingAgentWorker.from_tools(
[code_execution_tool],
llm=llm,
verbose=True,
allow_parallel_tool_calls=False,
)
agent = agent_worker.as_agent()
response = agent.chat("Write python code to calculate Pi number by Monte Carlo method then run it.")
logger.info(response)
response = agent.chat("Write python code to calculate the factorial of a number then run it.")
logger.info(response)
response = agent.chat("Write python code to calculate the Fibonacci sequence then run it.")
logger.info(response)
response = agent.chat("Calculate the sum of the first 10000 numbers.")
logger.info(response)
2. Data Analysis Pipeline¶
Run data analysis safely:
with SandboxSession(lang="python") as session:
result = session.run("""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Generate sample data
np.random.seed(42)
data = {
'sales': np.random.randint(100, 1000, 50),
'customers': np.random.randint(10, 100, 50),
'profit_margin': np.random.uniform(0.1, 0.5, 50)
}
df = pd.DataFrame(data)
# Analysis
print("Data Summary:")
print(df.describe())
print(f"\nTotal Sales: ${df['sales'].sum():,}")
print(f"Average Profit Margin: {df['profit_margin'].mean():.2%}")
# Visualization
plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.hist(df['sales'], bins=15, edgecolor='black')
plt.title('Sales Distribution')
plt.subplot(1, 3, 2)
plt.scatter(df['customers'], df['sales'])
plt.xlabel('Customers')
plt.ylabel('Sales')
plt.title('Sales vs Customers')
plt.subplot(1, 3, 3)
plt.boxplot(df['profit_margin'])
plt.title('Profit Margin Distribution')
plt.tight_layout()
plt.show()
""", libraries=["pandas", "numpy", "matplotlib"])
print(result.stdout)
3. Testing User-Submitted Code¶
Safely test code submitted by users:
def test_user_code(code: str, test_cases: list):
"""Test user code against test cases"""
with SandboxSession(lang="python") as session:
# Inject test framework
full_code = f"""
{code}
# Run test cases
test_results = []
"""
for i, test in enumerate(test_cases):
full_code += f"""
try:
result = {test['call']}
expected = {test['expected']}
passed = result == expected
test_results.append({{
'test': {i},
'passed': passed,
'expected': expected,
'actual': result
}})
except Exception as e:
test_results.append({{
'test': {i},
'passed': False,
'error': str(e)
}})
"""
full_code += """
# Print results
for result in test_results:
print(result)
"""
result = session.run(full_code)
return result.stdout
# Example usage
user_code = """
def add(a, b):
return a + b
def multiply(a, b):
return a * b
"""
test_cases = [
{"call": "add(2, 3)", "expected": 5},
{"call": "add(-1, 1)", "expected": 0},
{"call": "multiply(3, 4)", "expected": 12},
{"call": "multiply(0, 5)", "expected": 0},
]
print(test_user_code(user_code, test_cases))
Best Practices¶
1. Always Use Context Managers¶
Always use the with statement to ensure proper cleanup:
# Good ✓
with SandboxSession(lang="python") as session:
result = session.run("print('Hello')")
# Avoid ✗
session = SandboxSession(lang="python")
session.open()
result = session.run("print('Hello')")
session.close() # Easy to forget!
2. Handle Errors Gracefully¶
with SandboxSession(lang="python") as session:
try:
result = session.run(code)
if result.exit_code != 0:
print(f"Error: {result.stderr}")
else:
print(f"Output: {result.stdout}")
except Exception as e:
print(f"Sandbox error: {e}")
3. Use Security Policies¶
Always use security policies and check if the code is safe before running it:
from llm_sandbox import SandboxSession
from llm_sandbox.security import SecurityPolicy, SecurityPattern, SecurityIssueSeverity
# Create a security policy
policy = SecurityPolicy(
severity_threshold=SecurityIssueSeverity.MEDIUM,
patterns=[
SecurityPattern(
pattern=r"os\.system",
description="System command execution",
severity=SecurityIssueSeverity.HIGH
),
SecurityPattern(
pattern=r"eval\s*\(",
description="Dynamic code evaluation",
severity=SecurityIssueSeverity.MEDIUM
)
]
)
with SandboxSession(lang="python", security_policy=policy) as session:
# Check if code is safe before running
code = "print('This is safe code')"
is_safe, violations = session.is_safe(code)
if is_safe:
result = session.run(code)
print(result.stdout)
else:
print("Code failed security check:")
for v in violations:
print(f" - {v.description}")
4. Use Your Own Pre-built Images¶
You can use your own pre-built images with appropriate dependencies and environment set up by specifying the image parameter when creating a sandbox session. It can be useful for running code that requires specific dependencies or environment variables. Since the image is pre-built, it will be faster to run the code without the need to build the image or install dependencies.
from llm_sandbox import SandboxSession
with SandboxSession(
lang="python",
image="ghcr.io/vndee/sandbox-python-311-bullseye"
) as session:
result = session.run("print('Hello from my custom image!')")
print(result.stdout)
5. Skip Environment Setup for Production¶
For production deployments or when using pre-configured images, skip automatic environment setup for faster container startup:
from llm_sandbox import SandboxSession
# Skip environment setup when using custom images
with SandboxSession(
lang="python",
image="my-registry.com/python-ml:latest", # Pre-configured image
skip_environment_setup=True # Skip pip upgrades and venv creation
) as session:
result = session.run("import numpy; print('Ready!')")
When to use skip_environment_setup=True:
- Production deployments where startup time is critical
- Custom images with pre-installed packages
- CI/CD pipelines and batch processing
- Air-gapped environments without external package access
See the Configuration Guide for detailed information.
Troubleshooting¶
Container Runtime Not Found¶
Permission Errors¶
# Run as non-root user
with SandboxSession(
lang="python",
runtime_configs={"user": "1000:1000"},
workdir="/tmp/sandbox"
) as session:
pass
Import Errors¶
Next Steps¶
- Learn about Configuration Options
- Explore Security Policies
- Understand Container Backends
- Check out more Examples
- Read the API Reference