LLM Framework Integrations¶
LLM Sandbox seamlessly integrates with popular LLM frameworks to provide secure code execution capabilities. This guide covers integration patterns and examples.
LangChain Integration¶
Basic Tool Integration¶
from langchain.tools import tool
from langchain.agents import initialize_agent, AgentType
from langchain.llms import OpenAI
from llm_sandbox import SandboxSession
@tool
def execute_code(code: str, language: str = "python") -> str:
"""
Execute code in a secure sandbox environment.
Args:
code: The code to execute
language: Programming language (python, javascript, java, cpp, go, ruby)
Returns:
The execution output
"""
with SandboxSession(lang=language, verbose=False) as session:
result = session.run(code)
if result.exit_code != 0:
return f"Error: {result.stderr}"
return result.stdout
# Create agent with code execution tool
llm = OpenAI(temperature=0)
tools = [execute_code]
agent = initialize_agent(
tools,
llm,
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
verbose=True
)
# Use the agent
response = agent.run(
"Write and execute Python code to calculate the factorial of 10"
)
print(response)
Advanced Tool with Libraries¶
from langchain.tools import StructuredTool
from pydantic import BaseModel, Field
from typing import List, Optional
class CodeExecutionInput(BaseModel):
"""Input for code execution tool"""
code: str = Field(description="The code to execute")
language: str = Field(default="python", description="Programming language")
libraries: Optional[List[str]] = Field(
default=None,
description="Libraries to install before execution"
)
def execute_code_with_libs(
code: str,
language: str = "python",
libraries: Optional[List[str]] = None
) -> str:
"""Execute code with optional library installation"""
try:
with SandboxSession(lang=language) as session:
result = session.run(code, libraries=libraries)
return f"Exit code: {result.exit_code}\n{result.stdout}"
except Exception as e:
return f"Execution error: {str(e)}"
# Create structured tool
code_tool = StructuredTool.from_function(
func=execute_code_with_libs,
name="CodeExecutor",
description="Execute code with optional library installation",
args_schema=CodeExecutionInput
)
# Use in agent
agent = initialize_agent(
[code_tool],
llm,
agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,
verbose=True
)
response = agent.run(
"Use numpy to create a 5x5 matrix of random numbers and calculate its determinant"
)
Chain with Code Execution¶
from langchain.chains import LLMChain, SequentialChain
from langchain.prompts import PromptTemplate
# Chain 1: Generate code
code_prompt = PromptTemplate(
input_variables=["task"],
template="""Write Python code to accomplish this task: {task}
Provide only the code, no explanations."""
)
code_chain = LLMChain(llm=llm, prompt=code_prompt, output_key="code")
# Chain 2: Execute code
def execute_code_chain(inputs: dict) -> dict:
code = inputs["code"]
with SandboxSession(lang="python") as session:
result = session.run(code)
return {"output": result.stdout, "error": result.stderr}
# Combine chains
from langchain.chains import TransformChain
execute_chain = TransformChain(
input_variables=["code"],
output_variables=["output", "error"],
transform=execute_code_chain
)
overall_chain = SequentialChain(
chains=[code_chain, execute_chain],
input_variables=["task"],
output_variables=["code", "output", "error"],
verbose=True
)
# Run the chain
result = overall_chain({"task": "Generate the first 20 Fibonacci numbers"})
print(f"Generated code:\n{result['code']}")
print(f"\nOutput:\n{result['output']}")
Memory and State Management¶
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationChain
class CodeExecutionMemory:
"""Custom memory for code execution history"""
def __init__(self):
self.executions = []
self.session = None
def start_session(self, **kwargs):
self.session = SandboxSession(**kwargs)
self.session.open()
def execute(self, code: str, libraries: List[str] = None):
if not self.session:
self.start_session(lang="python")
result = self.session.run(code, libraries)
self.executions.append({
"code": code,
"output": result.stdout,
"error": result.stderr,
"exit_code": result.exit_code
})
return result
def close_session(self):
if self.session:
self.session.close()
self.session = None
# Use with conversation chain
memory = ConversationSummaryMemory(llm=llm)
code_memory = CodeExecutionMemory()
conversation = ConversationChain(
llm=llm,
memory=memory,
verbose=True
)
# Interactive code development
code_memory.start_session(lang="python")
# First execution
result1 = code_memory.execute("data = [1, 2, 3, 4, 5]")
result2 = code_memory.execute("print(f'Sum: {sum(data)}')")
code_memory.close_session()
LangGraph Integration¶
Stateful Code Execution Workflow¶
from typing import TypedDict, Annotated, Sequence
from langgraph.graph import Graph, END
from langgraph.prebuilt import ToolExecutor, ToolInvocation
import operator
class CodeState(TypedDict):
"""State for code execution workflow"""
task: str
code: str
output: str
error: str
iterations: int
messages: Annotated[Sequence[str], operator.add]
# Define nodes
def generate_code(state: CodeState) -> CodeState:
"""Generate code based on task"""
# Use LLM to generate code
prompt = f"Write Python code for: {state['task']}"
code = llm.predict(prompt)
return {
"code": code,
"messages": [f"Generated code for task: {state['task']}"]
}
def execute_code(state: CodeState) -> CodeState:
"""Execute the generated code"""
with SandboxSession(lang="python") as session:
result = session.run(state["code"])
return {
"output": result.stdout,
"error": result.stderr,
"messages": [f"Executed code with exit code: {result.exit_code}"]
}
def check_output(state: CodeState) -> str:
"""Check if output is satisfactory"""
if state["error"]:
return "fix_code"
elif state["iterations"] >= 3:
return "end"
else:
return "end"
def fix_code(state: CodeState) -> CodeState:
"""Fix code based on error"""
prompt = f"""Fix this Python code that has an error:
Code:
{state['code']}
Error:
{state['error']}
"""
fixed_code = llm.predict(prompt)
return {
"code": fixed_code,
"iterations": state["iterations"] + 1,
"messages": [f"Attempted to fix code, iteration {state['iterations'] + 1}"]
}
# Build graph
workflow = Graph()
# Add nodes
workflow.add_node("generate", generate_code)
workflow.add_node("execute", execute_code)
workflow.add_node("fix", fix_code)
# Add edges
workflow.add_edge("generate", "execute")
workflow.add_conditional_edges(
"execute",
check_output,
{
"fix_code": "fix",
"end": END
}
)
workflow.add_edge("fix", "execute")
# Set entry point
workflow.set_entry_point("generate")
# Compile
app = workflow.compile()
# Run workflow
initial_state = {
"task": "Calculate the prime numbers between 1 and 50",
"code": "",
"output": "",
"error": "",
"iterations": 0,
"messages": []
}
result = app.invoke(initial_state)
print(f"Final output: {result['output']}")
Tool-Based Graph¶
from langgraph.prebuilt import create_react_agent
# Create code execution tool
@tool
def run_python_code(code: str, libraries: List[str] = None) -> str:
"""Run Python code with optional libraries"""
with SandboxSession(lang="python") as session:
result = session.run(code, libraries=libraries)
return result.stdout if result.exit_code == 0 else f"Error: {result.stderr}"
@tool
def run_data_analysis(
csv_data: str,
analysis_type: str = "summary"
) -> str:
"""Run data analysis on CSV data"""
code = f"""
import pandas as pd
import io
data = '''{csv_data}'''
df = pd.read_csv(io.StringIO(data))
if "{analysis_type}" == "summary":
print(df.describe())
elif "{analysis_type}" == "correlation":
print(df.corr())
else:
print(df.head())
"""
with SandboxSession(lang="python") as session:
result = session.run(code, libraries=["pandas"])
return result.stdout
# Create agent
tools = [run_python_code, run_data_analysis]
agent = create_react_agent(llm, tools)
# Use agent
response = agent.invoke({
"messages": [
("user", "Generate sample sales data and analyze it")
]
})
LlamaIndex Integration¶
Function Tool Integration¶
from llama_index.core.tools import FunctionTool
from llama_index.core.agent import ReActAgent
from llama_index.llms.openai import OpenAI
def execute_code_with_context(
code: str,
language: str = "python",
context: dict = None
) -> str:
"""
Execute code with optional context variables.
Args:
code: Code to execute
language: Programming language
context: Dictionary of variables to inject
Returns:
Execution output
"""
with SandboxSession(lang=language) as session:
# Inject context if provided
if context and language == "python":
context_code = "\n".join([
f"{key} = {repr(value)}"
for key, value in context.items()
])
full_code = f"{context_code}\n\n{code}"
else:
full_code = code
result = session.run(full_code)
return result.stdout
# Create LlamaIndex tool
code_tool = FunctionTool.from_defaults(
fn=execute_code_with_context,
name="code_executor",
description="Execute code in a sandboxed environment with optional context"
)
# Create agent
llm = OpenAI(model="gpt-4", temperature=0)
agent = ReActAgent.from_tools(
[code_tool],
llm=llm,
verbose=True
)
# Use agent
response = agent.chat(
"Calculate the compound interest for $10,000 at 5% annual rate over 10 years"
)
print(response)
Query Engine Integration¶
from llama_index.core import VectorStoreIndex, Document
from llama_index.core.query_engine import CustomQueryEngine
from llama_index.core.response_synthesizers import BaseSynthesizer
class CodeExecutionQueryEngine(CustomQueryEngine):
"""Query engine that executes code to answer questions"""
def __init__(self, llm, security_policy=None):
self.llm = llm
self.security_policy = security_policy
def custom_query(self, query_str: str) -> str:
# Generate code to answer the query
code_prompt = f"""
Write Python code to answer this question: {query_str}
The code should print the answer clearly.
"""
code = self.llm.complete(code_prompt).text
# Execute code safely
with SandboxSession(
lang="python",
security_policy=self.security_policy
) as session:
# Check if code is safe
if self.security_policy:
is_safe, violations = session.is_safe(code)
if not is_safe:
return f"Code failed security check: {violations}"
result = session.run(code)
if result.exit_code != 0:
return f"Execution error: {result.stderr}"
return result.stdout
# Use the query engine
from llm_sandbox.security import get_security_policy
query_engine = CodeExecutionQueryEngine(
llm=llm,
security_policy=get_security_policy("data_science")
)
response = query_engine.query(
"What is the correlation between height and weight in a generated dataset of 100 people?"
)
print(response)
OpenAI Function Calling¶
Direct Integration¶
import openai
import json
from typing import List, Dict
def create_code_execution_function() -> Dict:
"""Create OpenAI function specification for code execution"""
return {
"name": "execute_code",
"description": "Execute code in a secure sandbox environment",
"parameters": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "The code to execute"
},
"language": {
"type": "string",
"enum": ["python", "javascript", "java", "cpp", "go", "ruby"],
"description": "Programming language"
},
"libraries": {
"type": "array",
"items": {"type": "string"},
"description": "Libraries to install"
}
},
"required": ["code", "language"]
}
}
def handle_function_call(function_call) -> str:
"""Handle the function call from OpenAI"""
args = json.loads(function_call.arguments)
with SandboxSession(
lang=args["language"],
verbose=False
) as session:
result = session.run(
args["code"],
libraries=args.get("libraries")
)
return json.dumps({
"stdout": result.stdout,
"stderr": result.stderr,
"exit_code": result.exit_code
})
# Use with OpenAI
client = openai.OpenAI()
messages = [
{"role": "user", "content": "Calculate the first 10 prime numbers"}
]
response = client.chat.completions.create(
model="gpt-4",
messages=messages,
functions=[create_code_execution_function()],
function_call="auto"
)
# Handle function call if present
if response.choices[0].message.function_call:
function_result = handle_function_call(
response.choices[0].message.function_call
)
# Add function result to conversation
messages.append(response.choices[0].message)
messages.append({
"role": "function",
"name": "execute_code",
"content": function_result
})
# Get final response
final_response = client.chat.completions.create(
model="gpt-4",
messages=messages
)
print(final_response.choices[0].message.content)
Custom Framework Integration¶
Generic Integration Pattern¶
"""Async execution example for LLM Sandbox."""
import asyncio
from typing import Any, Protocol
from llm_sandbox import SandboxSession
class CodeExecutor(Protocol):
"""Protocol for code execution integration."""
def execute(
self, code: str, language: str = "python", libraries: list[str] | None = None, **kwargs: Any
) -> dict[str, Any]:
"""Execute code and return results."""
...
class SandboxCodeExecutor:
"""Sandbox implementation of CodeExecutor."""
def __init__(self, default_security_policy: Any = None) -> None:
"""Initialize the executor with optional security policy."""
self.default_security_policy = default_security_policy
def execute(
self,
code: str,
language: str = "python",
libraries: list[str] | None = None,
security_policy: Any = None,
**kwargs: Any,
) -> dict[str, Any]:
"""Execute code in sandbox.
Returns:
Dictionary with stdout, stderr, exit_code, and plots
"""
policy = security_policy or self.default_security_policy
try:
with SandboxSession(lang=language, security_policy=policy, **kwargs) as session:
result = session.run(code, libraries)
return {
"success": result.exit_code == 0,
"stdout": result.stdout,
"stderr": result.stderr,
"exit_code": result.exit_code,
"plots": getattr(result, "plots", []),
}
except Exception as e: # noqa: BLE001
return {"success": False, "error": str(e), "exit_code": -1}
async def execute_async(self, code: str, **kwargs: Any) -> dict[str, Any]:
"""Async wrapper for execution."""
loop = asyncio.get_event_loop()
return await loop.run_in_executor(None, lambda: self.execute(code, **kwargs))
async def main() -> None:
"""Demonstrate both sync and async execution."""
# Use in any framework
executor = SandboxCodeExecutor(default_security_policy=None)
# Sync execution
result = executor.execute("print('Hello, World!')", language="python")
print(f"Sync result: {result['stdout']}") # noqa: T201
# Async execution
result = await executor.execute_async("print('Hello, Async!')", language="python")
print(f"Async result: {result['stdout']}") # noqa: T201
# Run async
if __name__ == "__main__":
asyncio.run(main())
Middleware Pattern¶
from typing import Callable
import functools
import time
class CodeExecutionMiddleware:
"""Middleware for code execution with logging, caching, etc."""
def __init__(self):
self.cache = {}
self.execution_log = []
def with_logging(self, func: Callable) -> Callable:
"""Log all executions"""
@functools.wraps(func)
def wrapper(code: str, **kwargs):
start_time = time.time()
result = func(code, **kwargs)
self.execution_log.append({
"timestamp": time.time(),
"code": code,
"language": kwargs.get("language", "python"),
"duration": time.time() - start_time,
"success": result.get("success", False)
})
return result
return wrapper
def with_caching(self, func: Callable) -> Callable:
"""Cache execution results"""
@functools.wraps(func)
def wrapper(code: str, **kwargs):
cache_key = f"{code}:{kwargs}"
if cache_key in self.cache:
return self.cache[cache_key]
result = func(code, **kwargs)
self.cache[cache_key] = result
return result
return wrapper
def with_retry(self, max_attempts: int = 3) -> Callable:
"""Retry on failure"""
def decorator(func: Callable) -> Callable:
@functools.wraps(func)
def wrapper(code: str, **kwargs):
for attempt in range(max_attempts):
result = func(code, **kwargs)
if result.get("success"):
return result
if attempt < max_attempts - 1:
time.sleep(1) # Wait before retry
return result
return wrapper
return decorator
# Apply middleware
middleware = CodeExecutionMiddleware()
executor = SandboxCodeExecutor()
# Wrap with middleware
execute_with_features = middleware.with_logging(
middleware.with_caching(
middleware.with_retry(max_attempts=2)(
executor.execute
)
)
)
# Use enhanced executor
result = execute_with_features(
"print('Hello with middleware!')",
language="python"
)
Integration Best Practices¶
1. Error Handling¶
class RobustCodeExecutor:
"""Robust code executor with comprehensive error handling"""
def execute_safely(self, code: str, **kwargs):
try:
# Pre-execution validation
if not code or not code.strip():
return {"error": "Empty code provided"}
# Security check
with SandboxSession(**kwargs) as session:
is_safe, violations = session.is_safe(code)
if not is_safe:
return {
"error": "Security violation",
"violations": [
v.description for v in violations
]
}
# Execute
result = session.run(code)
# Post-execution validation
if result.exit_code != 0:
return {
"error": "Execution failed",
"stderr": result.stderr,
"exit_code": result.exit_code
}
return {
"success": True,
"output": result.stdout
}
except TimeoutError:
return {"error": "Execution timeout"}
except MemoryError:
return {"error": "Memory limit exceeded"}
except Exception as e:
return {"error": f"Unexpected error: {str(e)}"}
2. Resource Management¶
from contextlib import contextmanager
import threading
import queue
class ResourceManagedExecutor:
"""Executor with resource management"""
def __init__(self, max_concurrent=5):
self.semaphore = threading.Semaphore(max_concurrent)
self.execution_queue = queue.Queue()
@contextmanager
def acquire_resources(self):
"""Acquire execution resources"""
self.semaphore.acquire()
try:
yield
finally:
self.semaphore.release()
def execute(self, code: str, **kwargs):
"""Execute with resource management"""
with self.acquire_resources():
# Configure resource limits
runtime_configs = kwargs.get("runtime_configs", {})
runtime_configs.update({
"cpu_count": 1,
"mem_limit": "256m",
})
kwargs["runtime_configs"] = runtime_configs
with SandboxSession(**kwargs) as session:
return session.run(code)
3. Monitoring and Metrics¶
import time
from dataclasses import dataclass
from typing import List
@dataclass
class ExecutionMetrics:
"""Metrics for code execution"""
timestamp: float
duration: float
language: str
success: bool
code_length: int
memory_used: int = 0
cpu_time: float = 0.0
class MonitoredExecutor:
"""Executor with monitoring capabilities"""
def __init__(self):
self.metrics: List[ExecutionMetrics] = []
def execute_with_monitoring(self, code: str, **kwargs):
"""Execute code with monitoring"""
start_time = time.time()
try:
with SandboxSession(**kwargs) as session:
result = session.run(code)
# Collect metrics
metric = ExecutionMetrics(
timestamp=start_time,
duration=time.time() - start_time,
language=kwargs.get("lang", "python"),
success=result.exit_code == 0,
code_length=len(code)
)
self.metrics.append(metric)
return result
except Exception as e:
# Record failure
metric = ExecutionMetrics(
timestamp=start_time,
duration=time.time() - start_time,
language=kwargs.get("lang", "python"),
success=False,
code_length=len(code)
)
self.metrics.append(metric)
raise
def get_statistics(self):
"""Get execution statistics"""
if not self.metrics:
return {}
success_rate = sum(
1 for m in self.metrics if m.success
) / len(self.metrics)
avg_duration = sum(
m.duration for m in self.metrics
) / len(self.metrics)
return {
"total_executions": len(self.metrics),
"success_rate": success_rate,
"average_duration": avg_duration,
"languages": list(set(m.language for m in self.metrics))
}
Next Steps¶
- See practical Examples
- Learn about Security Policies
- Explore Backend Options
- Read the API Reference