File Management
The FileManager provides tools for working with files generated by sim2l simulations.
Overview
The FileManager helps you:
Retrieve files from run databases
Export files to the filesystem
Organize files in folder hierarchies
Search files across simulations
Batch operations on multiple files
Basic Usage
Get Files from a Run
from sim2l.database import FileManager
fm = FileManager()
# Get all files from a specific execution
files = fm.get_run_files("exec-2024-001")
for file in files:
print(f"{file['name']}: {file['size']} bytes")
print(f" Category: {file['category']}")
print(f" Content-Type: {file['content_type']}")
print(f" Created: {file['date_created']}")
Export Files
# Export a single file
success = fm.export_run_file(
execution_id="exec-2024-001",
file_name="output.dat",
output_path="/tmp/output.dat"
)
if success:
print("File exported successfully")
Batch Export
import os
# Get all files
files = fm.get_run_files("exec-2024-001")
# Create output directory
output_dir = "/tmp/sim2l_exports"
os.makedirs(output_dir, exist_ok=True)
# Export all files
for file in files:
output_path = os.path.join(output_dir, file['name'])
fm.export_run_file("exec-2024-001", file['name'], output_path)
print(f"Exported: {file['name']}")
Get Files for a Simulation
from sim2l.database import FileManager, get_session_manager
# Setup with remote catalog
session = get_session_manager().create_anonymous_session()
fm = FileManager(
cache_url="http://localhost:8001",
session_id=session.session_id
)
# Get all files for a simulation across all runs
files = fm.get_simulation_files("thermal_sim", "1.0.0")
print(f"Found {len(files)} files")
for file in files:
print(f" {file['execution_id']}: {file['name']}")
File Organization
Creating Folders
# Create a folder
folder = fm.create_folder(
name="simulation_outputs",
creator="user123",
metadata={'project': 'materials_research'}
)
print(f"Created folder: {folder['id']}")
Creating Files
# Create a file entry
file = fm.create_file(
name="result.csv",
size=2048,
uri="/data/result.csv",
creator="user123",
parent_id=folder['id'],
metadata={
'simulation': 'thermal_sim',
'version': '1.0.0'
}
)
Listing Folder Contents
# List folder contents
contents = fm.list_folder(folder['id'])
for item in contents:
if item.get('is_folder'):
print(f"[DIR] {item['name']}")
else:
print(f"[FILE] {item['name']} ({item['size']} bytes)")
Moving Files
# Move a file to a different folder
fm.move_file(file['id'], new_parent_id=another_folder['id'])
Deleting Files
# Delete a file
fm.delete_file(file['id'])
# Delete a folder and all contents recursively
fm.delete_file(folder['id'], recursive=True)
Common Workflows
Workflow 1: Export Simulation Results
from sim2l import configure, load
from sim2l.database import FileManager
import os
# Run simulation
configure(use_run_database=True)
sim = load("thermal_sim")
result = sim.run(temperature=350)
# Export all results
fm = FileManager()
files = fm.get_run_files(result.execution_id)
output_dir = f"/data/results/{result.execution_id}"
os.makedirs(output_dir, exist_ok=True)
for file in files:
if file['category'] == 'output':
output_path = os.path.join(output_dir, file['name'])
fm.export_run_file(result.execution_id, file['name'], output_path)
Workflow 2: Filter by File Type
# Get all files
files = fm.get_run_files("exec-2024-001")
# Filter for CSV files
csv_files = [f for f in files if f['name'].endswith('.csv')]
# Filter by category
log_files = [f for f in files if f['category'] == 'log']
output_files = [f for f in files if f['category'] == 'output']
# Export only outputs
for file in output_files:
fm.export_run_file("exec-2024-001", file['name'], f"/tmp/{file['name']}")
Workflow 3: Organize by Simulation
from sim2l.database import FileManager
fm = FileManager()
# Create main folder
main_folder = fm.create_folder(
name="thermal_sim_results",
creator="system"
)
# Create subfolders for each run
execution_ids = ["exec-001", "exec-002", "exec-003"]
for exec_id in execution_ids:
# Create folder for this run
run_folder = fm.create_folder(
name=f"run_{exec_id}",
creator="system",
parent_id=main_folder['id']
)
# Get files for this run
files = fm.get_run_files(exec_id)
# Register files in folder
for file in files:
fm.create_file(
name=file['name'],
size=file['size'],
uri=file['uri'],
creator="system",
parent_id=run_folder['id'],
metadata=file['metadata']
)
Workflow 4: Compare Results
import pandas as pd
import tempfile
import os
def compare_results(exec_id_1, exec_id_2, file_name):
"""Compare a file from two different runs."""
fm = FileManager()
# Export both files to temp directory
with tempfile.TemporaryDirectory() as tmpdir:
path1 = os.path.join(tmpdir, f"run1_{file_name}")
path2 = os.path.join(tmpdir, f"run2_{file_name}")
fm.export_run_file(exec_id_1, file_name, path1)
fm.export_run_file(exec_id_2, file_name, path2)
# Compare (example with CSV)
if file_name.endswith('.csv'):
df1 = pd.read_csv(path1)
df2 = pd.read_csv(path2)
diff = df1.compare(df2)
return diff
# Compare temperature outputs
diff = compare_results("exec-001", "exec-002", "temperature.csv")
print(diff)
Advanced Features
File Metadata
Each file includes rich metadata:
files = fm.get_run_files("exec-2024-001")
file = files[0]
print(f"ID: {file['id']}")
print(f"Name: {file['name']}")
print(f"Execution: {file['execution_id']}")
print(f"Category: {file['category']}") # output, log, artifact, etc.
print(f"Content-Type: {file['content_type']}") # MIME type
print(f"Size: {file['size']} bytes")
print(f"URI: {file['uri']}") # Original path
print(f"Created: {file['date_created']}")
print(f"Metadata: {file['metadata']}") # Custom metadata
Context Manager
with FileManager() as fm:
files = fm.get_run_files("exec-2024-001")
for file in files:
fm.export_run_file(
"exec-2024-001",
file['name'],
f"/tmp/{file['name']}"
)
# Automatic cleanup
Integration with Services
from sim2l.database import FileManager, get_session_manager
# Connect to cache service for file metadata storage
session = get_session_manager().create_anonymous_session()
fm = FileManager(
cache_url="http://localhost:8001",
session_id=session.session_id
)
# File metadata is now stored in distributed cache
# and shared across team
Best Practices
Export Important Files Don’t rely on run databases indefinitely. Export important results to permanent storage.
Use Meaningful Names Organize exported files with clear directory structures:
output_dir = f"/data/{simulation_name}/{version}/{date}/{execution_id}"
Filter Before Exporting Only export what you need:
files = fm.get_run_files(execution_id) important_files = [ f for f in files if f['category'] == 'output' and not f['name'].startswith('temp_') ]
Add Metadata Include context when creating file entries:
fm.create_file( name="result.csv", size=2048, uri="/data/result.csv", creator="user123", metadata={ 'project': 'materials_research', 'experiment': 'thermal_stress_01', 'date': '2024-01-15' } )
Handle Missing Files Always check if files exist:
files = fm.get_run_files(execution_id) if not files: print(f"No files found for {execution_id}") else: # Process files pass
API Reference
FileManager Class
Key Methods
File Retrieval:
get_run_files(execution_id)- Get all files from a runget_simulation_files(simulation_name, version)- Get files for a simulationget_file(file_id)- Get specific file metadata
File Export:
export_run_file(execution_id, file_name, output_path)- Export file to filesystem
File Operations:
create_file(name, size, uri, creator, ...)- Create file entryupdate_file(file_id, **updates)- Update file metadatadelete_file(file_id, recursive=False)- Delete filemove_file(file_id, new_parent_id)- Move file to folder
Folder Operations:
create_folder(name, creator, ...)- Create folderlist_folder(folder_id)- List folder contents
See Also
Database Services - Complete database architecture
Database API - API documentation
Examples - More code examples