Getting Started with LLM Sandbox¶
This guide will help you get up and running with LLM Sandbox in just a few minutes.
Prerequisites¶
Before you begin, ensure you have:
- Python 3.10 or higher installed
- Container runtime (at least one of the following):
- Docker Desktop or Docker Engine
- Kubernetes cluster (local or remote)
- Podman
Installation¶
Basic Installation¶
Install the core package:
Backend-Specific Installation¶
Install with support for specific backends:
# Docker backend (most common)
pip install 'llm-sandbox[docker]'
# Kubernetes backend
pip install 'llm-sandbox[k8s]'
# Podman backend
pip install 'llm-sandbox[podman]'
# All backends
pip install 'llm-sandbox[docker,k8s,podman]'
Development Installation¶
For contributing or development:
Quick Start¶
Your First Sandbox Session¶
Let's run a simple Python code in a sandbox:
from llm_sandbox import SandboxSession
# Create and use a sandbox session
with SandboxSession(lang="python") as session:
result = session.run("""
print("Hello from LLM Sandbox!")
print("I'm running in a secure container.")
""")
print(result.stdout)
Output:
Installing Libraries¶
Install and use Python packages dynamically:
from llm_sandbox import SandboxSession
with SandboxSession(lang="python") as session:
# Run code with numpy
result = session.run("""
import numpy as np
# Create an array
arr = np.array([1, 2, 3, 4, 5])
print(f"Array: {arr}")
print(f"Mean: {np.mean(arr)}")
print(f"Sum: {np.sum(arr)}")
""", libraries=["numpy"])
print(result.stdout)
Output:
Working with Different Languages¶
LLM Sandbox supports multiple programming languages:
JavaScript Example¶
with SandboxSession(lang="javascript") as session:
result = session.run("""
const greeting = "Hello from Node.js!";
console.log(greeting);
// Using a library
const axios = require('axios');
console.log("Axios loaded successfully!");
""", libraries=["axios"])
print(result.stdout)
Output:
Java Example¶
with SandboxSession(lang="java") as session:
result = session.run("""
public class HelloWorld {
public static void main(String[] args) {
System.out.println("Hello from Java!");
// Print Java version
String version = System.getProperty("java.version");
System.out.println("Java version: " + version);
}
}
""")
print(result.stdout)
Output:
C++ Example¶
with SandboxSession(lang="cpp") as session:
result = session.run("""
#include <iostream>
int main() {
std::cout << "Hello from C++!" << std::endl;
return 0;
}
""")
print(result.stdout)
Output:
Go Example¶
with SandboxSession(lang="go") as session:
result = session.run("""
package main
import "fmt"
func main() {
fmt.Println("Hello from Go!")
}
""")
print(result.stdout)
Output:
Capturing Plots and Visualizations¶
LLM Sandbox can automatically capture plots generated by your code by using the ArtifactSandboxSession
class. It is currently only supported for Python code, but will be supported for other languages in the future.
# ruff: noqa: T201
import base64
from pathlib import Path
from llm_sandbox import ArtifactSandboxSession
code = """
import matplotlib.pyplot as plt
import numpy as np
plt.style.use('default')
# Generate data
x = np.linspace(0, 10, 100)
y1 = np.sin(x) + np.random.normal(0, 0.1, 100)
y2 = np.cos(x) + np.random.normal(0, 0.1, 100)
# Create plot
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes[0, 0].plot(x, y1, 'b-', alpha=0.7)
axes[0, 0].set_title('Sine Wave')
axes[0, 1].scatter(x[::5], y2[::5], c='red', alpha=0.6)
axes[0, 1].set_title('Cosine Scatter')
axes[1, 0].hist(y1, bins=20, alpha=0.7, color='green')
axes[1, 0].set_title('Sine Distribution')
axes[1, 1].bar(range(10), np.random.rand(10), alpha=0.7)
axes[1, 1].set_title('Random Bar Chart')
plt.tight_layout()
plt.show()
print('Plot generated successfully!')
"""
# Create a sandbox session
with ArtifactSandboxSession(lang="python", verbose=True) as session:
# Run Python code safely
result = session.run(code)
print(result.stdout) # Output: Plot generated successfully!
for plot in result.plots:
with Path("docs/assets/example.png").open("wb") as f:
f.write(base64.b64decode(plot.content_base64))
Output:
Using Different Backends¶
Docker Backend (Default)¶
from llm_sandbox import SandboxSession, SandboxBackend
with SandboxSession(
backend=SandboxBackend.DOCKER,
lang="python"
) as session:
result = session.run("print('Running on Docker!')")
print(result.stdout)
Kubernetes Backend¶
with SandboxSession(
backend=SandboxBackend.KUBERNETES,
lang="python",
kube_namespace="default"
) as session:
result = session.run("print('Running on Kubernetes!')")
print(result.stdout)
Podman Backend¶
from podman import PodmanClient
client = PodmanClient(base_url="unix:///run/podman/podman.sock")
with SandboxSession(
backend=SandboxBackend.PODMAN,
client=client,
lang="python"
) as session:
result = session.run("print('Running on Podman!')")
print(result.stdout)
Working with Files¶
Copy files to and from the sandbox:
with SandboxSession(lang="python") as session:
# Copy file to sandbox
session.copy_to_runtime("local_data.csv", "/sandbox/data.csv")
# Process the file
result = session.run("""
import pandas as pd
# Read the CSV file
df = pd.read_csv('/sandbox/data.csv')
print(f"Shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print(f"First 5 rows:\n{df.head()}")
# Save processed data
df.to_csv('/sandbox/processed.csv', index=False)
""", libraries=["pandas"])
# Copy file back from sandbox
session.copy_from_runtime("/sandbox/processed.csv", "processed_data.csv")
Setting Resource Limits¶
Control resource usage with runtime configurations:
with SandboxSession(
lang="python",
runtime_configs={
"cpu_count": 2, # Limit to 2 CPU cores
"mem_limit": "512m", # Limit memory to 512MB
"timeout": 30, # 30 second timeout
}
) as session:
result = session.run("""
# This will run with limited resources
import multiprocessing
print(f"Available CPUs: {multiprocessing.cpu_count()}")
""")
print(result.stdout)
Basic Security Policies¶
Implement basic security checks:
from llm_sandbox import SandboxSession
from llm_sandbox.security import SecurityPolicy, SecurityPattern, SecurityIssueSeverity
# Create a security policy
policy = SecurityPolicy(
severity_threshold=SecurityIssueSeverity.MEDIUM,
patterns=[
SecurityPattern(
pattern=r"os\.system",
description="System command execution",
severity=SecurityIssueSeverity.HIGH
),
SecurityPattern(
pattern=r"eval\s*\(",
description="Dynamic code evaluation",
severity=SecurityIssueSeverity.MEDIUM
)
]
)
with SandboxSession(lang="python", security_policy=policy) as session:
# Check if code is safe before running
code = "print('This is safe code')"
is_safe, violations = session.is_safe(code)
if is_safe:
result = session.run(code)
print(result.stdout)
else:
print("Code failed security check:")
for v in violations:
print(f" - {v.description}")
Common Use Cases¶
1. LLM Code Execution¶
Execute code generated by an LLM safely:
Example with Langchain:
# ruff: noqa: E501
# Reference: https://python.langchain.com/docs/how_to/custom_tools/
import logging
from langchain import hub
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI
from llm_sandbox import SandboxSession
logging.basicConfig(level=logging.INFO, format="%(message)s")
logger = logging.getLogger(__name__)
@tool
def run_code(lang: str, code: str, libraries: list | None = None) -> str:
"""Run code in a sandboxed environment.
:param lang: The language of the code, must be one of ['python', 'java', 'javascript', 'cpp', 'go', 'ruby'].
:param code: The code to run.
:param libraries: The libraries to use, it is optional.
:return: The output of the code.
"""
with SandboxSession(lang=lang, verbose=False) as session:
return session.run(code, libraries).stdout
if __name__ == "__main__":
llm = ChatOpenAI(model="gpt-4.1-nano", temperature=0)
prompt = hub.pull("hwchase17/openai-functions-agent")
tools = [run_code]
agent = create_tool_calling_agent(llm, tools, prompt) # type: ignore[arg-type]
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True) # type: ignore[arg-type]
output = agent_executor.invoke({
"input": "Write python code to calculate Pi number by Monte Carlo method then run it."
})
logger.info("Agent: %s", output)
output = agent_executor.invoke({"input": "Write python code to calculate the factorial of a number then run it."})
logger.info("Agent: %s", output)
output = agent_executor.invoke({"input": "Write python code to calculate the Fibonacci sequence then run it."})
logger.info("Agent: %s", output)
output = agent_executor.invoke({"input": "Calculate the sum of the first 10000 numbers."})
logger.info("Agent: %s", output)
Example with Langgraph:
import logging
from langchain_core.tools import tool
from langgraph.prebuilt import create_react_agent
from llm_sandbox import SandboxSession
logging.basicConfig(level=logging.INFO, format="%(message)s")
logger = logging.getLogger(__name__)
@tool
def run_code(lang: str, code: str, libraries: list | None = None) -> str:
"""Run code in a sandboxed environment.
:param lang: The language of the code, must be one of ['python', 'java', 'javascript', 'cpp', 'go', 'ruby'].
:param code: The code to run.
:param libraries: The libraries to use, it is optional.
:return: The output of the code.
"""
with SandboxSession(lang=lang, verbose=False) as session:
return session.run(code, libraries).stdout
if __name__ == "__main__":
agent = create_react_agent(model="openai:gpt-4.1-nano", tools=[run_code])
logger.info(
"Agent: %s",
agent.invoke({
"messages": [
{
"role": "user",
"content": "Write python code to calculate Pi number by Monte Carlo method then run it.",
}
]
}),
)
logger.info(
"Agent: %s",
agent.invoke({
"messages": [
{"role": "user", "content": "Write python code to calculate the factorial of a number then run it."}
]
}),
)
logger.info(
"Agent: %s",
agent.invoke({
"messages": [
{"role": "user", "content": "Write python code to calculate the Fibonacci sequence then run it."}
]
}),
)
logger.info(
"Agent: %s",
agent.invoke({"messages": [{"role": "user", "content": "Calculate the sum of the first 10000 numbers."}]}),
)
Example with LlamaIndex:
# ruff: noqa: E501
# Reference: https://docs.llamaindex.ai/en/stable/module_guides/deploying/agents/tools/
import logging
import nest_asyncio
from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.tools import FunctionTool
from llama_index.llms.openai import OpenAI
from llm_sandbox import SandboxSession
nest_asyncio.apply()
logging.basicConfig(level=logging.INFO, format="%(message)s")
logger = logging.getLogger(__name__)
def run_code(lang: str, code: str, libraries: list | None = None) -> str:
"""Run code in a sandboxed environment.
:param lang: The language of the code, must be one of ['python', 'java', 'javascript', 'cpp', 'go', 'ruby'].
:param code: The code to run.
:param libraries: The libraries to use, it is optional.
:return: The output of the code.
"""
with SandboxSession(lang=lang, verbose=False) as session:
return session.run(code, libraries).stdout
if __name__ == "__main__":
llm = OpenAI(model="gpt-4.1-nano", temperature=0)
code_execution_tool = FunctionTool.from_defaults(fn=run_code)
agent_worker = FunctionCallingAgentWorker.from_tools(
[code_execution_tool],
llm=llm,
verbose=True,
allow_parallel_tool_calls=False,
)
agent = agent_worker.as_agent()
response = agent.chat("Write python code to calculate Pi number by Monte Carlo method then run it.")
logger.info(response)
response = agent.chat("Write python code to calculate the factorial of a number then run it.")
logger.info(response)
response = agent.chat("Write python code to calculate the Fibonacci sequence then run it.")
logger.info(response)
response = agent.chat("Calculate the sum of the first 10000 numbers.")
logger.info(response)
2. Data Analysis Pipeline¶
Run data analysis safely:
with SandboxSession(lang="python") as session:
result = session.run("""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Generate sample data
np.random.seed(42)
data = {
'sales': np.random.randint(100, 1000, 50),
'customers': np.random.randint(10, 100, 50),
'profit_margin': np.random.uniform(0.1, 0.5, 50)
}
df = pd.DataFrame(data)
# Analysis
print("Data Summary:")
print(df.describe())
print(f"\nTotal Sales: ${df['sales'].sum():,}")
print(f"Average Profit Margin: {df['profit_margin'].mean():.2%}")
# Visualization
plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.hist(df['sales'], bins=15, edgecolor='black')
plt.title('Sales Distribution')
plt.subplot(1, 3, 2)
plt.scatter(df['customers'], df['sales'])
plt.xlabel('Customers')
plt.ylabel('Sales')
plt.title('Sales vs Customers')
plt.subplot(1, 3, 3)
plt.boxplot(df['profit_margin'])
plt.title('Profit Margin Distribution')
plt.tight_layout()
plt.show()
""", libraries=["pandas", "numpy", "matplotlib"])
print(result.stdout)
3. Testing User-Submitted Code¶
Safely test code submitted by users:
def test_user_code(code: str, test_cases: list):
"""Test user code against test cases"""
with SandboxSession(lang="python") as session:
# Inject test framework
full_code = f"""
{code}
# Run test cases
test_results = []
"""
for i, test in enumerate(test_cases):
full_code += f"""
try:
result = {test['call']}
expected = {test['expected']}
passed = result == expected
test_results.append({{
'test': {i},
'passed': passed,
'expected': expected,
'actual': result
}})
except Exception as e:
test_results.append({{
'test': {i},
'passed': False,
'error': str(e)
}})
"""
full_code += """
# Print results
for result in test_results:
print(result)
"""
result = session.run(full_code)
return result.stdout
# Example usage
user_code = """
def add(a, b):
return a + b
def multiply(a, b):
return a * b
"""
test_cases = [
{"call": "add(2, 3)", "expected": 5},
{"call": "add(-1, 1)", "expected": 0},
{"call": "multiply(3, 4)", "expected": 12},
{"call": "multiply(0, 5)", "expected": 0},
]
print(test_user_code(user_code, test_cases))
Best Practices¶
1. Always Use Context Managers¶
Always use the with
statement to ensure proper cleanup:
# Good ✓
with SandboxSession(lang="python") as session:
result = session.run("print('Hello')")
# Avoid ✗
session = SandboxSession(lang="python")
session.open()
result = session.run("print('Hello')")
session.close() # Easy to forget!
2. Handle Errors Gracefully¶
with SandboxSession(lang="python") as session:
try:
result = session.run(code)
if result.exit_code != 0:
print(f"Error: {result.stderr}")
else:
print(f"Output: {result.stdout}")
except Exception as e:
print(f"Sandbox error: {e}")
3. Use Security Policies¶
Always use security policies and check if the code is safe before running it:
from llm_sandbox import SandboxSession
from llm_sandbox.security import SecurityPolicy, SecurityPattern, SecurityIssueSeverity
# Create a security policy
policy = SecurityPolicy(
severity_threshold=SecurityIssueSeverity.MEDIUM,
patterns=[
SecurityPattern(
pattern=r"os\.system",
description="System command execution",
severity=SecurityIssueSeverity.HIGH
),
SecurityPattern(
pattern=r"eval\s*\(",
description="Dynamic code evaluation",
severity=SecurityIssueSeverity.MEDIUM
)
]
)
with SandboxSession(lang="python", security_policy=policy) as session:
# Check if code is safe before running
code = "print('This is safe code')"
is_safe, violations = session.is_safe(code)
if is_safe:
result = session.run(code)
print(result.stdout)
else:
print("Code failed security check:")
for v in violations:
print(f" - {v.description}")
4. Use Your Own Pre-built Images¶
You can use your own pre-built images with appropriate dependencies and environment set up by specifying the image
parameter when creating a sandbox session. It can be useful for running code that requires specific dependencies or environment variables. Since the image is pre-built, it will be faster to run the code without the need to build the image or install dependencies.
from llm_sandbox import SandboxSession
with SandboxSession(
lang="python",
image="ghcr.io/vndee/sandbox-python-311-bullseye"
) as session:
result = session.run("print('Hello from my custom image!')")
print(result.stdout)
Troubleshooting¶
Container Runtime Not Found¶
Permission Errors¶
# Run as non-root user
with SandboxSession(
lang="python",
runtime_configs={"user": "1000:1000"},
workdir="/tmp/sandbox"
) as session:
pass
Import Errors¶
Next Steps¶
- Learn about Configuration Options
- Explore Security Policies
- Understand Container Backends
- Check out more Examples
- Read the API Reference