File Management

The FileManager provides tools for working with files generated by sim2l simulations.

Overview

The FileManager helps you:

  • Retrieve files from run databases

  • Export files to the filesystem

  • Organize files in folder hierarchies

  • Search files across simulations

  • Batch operations on multiple files

Basic Usage

Get Files from a Run

from sim2l.database import FileManager

fm = FileManager()

# Get all files from a specific execution
files = fm.get_run_files("exec-2024-001")

for file in files:
    print(f"{file['name']}: {file['size']} bytes")
    print(f"  Category: {file['category']}")
    print(f"  Content-Type: {file['content_type']}")
    print(f"  Created: {file['date_created']}")

Export Files

# Export a single file
success = fm.export_run_file(
    execution_id="exec-2024-001",
    file_name="output.dat",
    output_path="/tmp/output.dat"
)

if success:
    print("File exported successfully")

Batch Export

import os

# Get all files
files = fm.get_run_files("exec-2024-001")

# Create output directory
output_dir = "/tmp/sim2l_exports"
os.makedirs(output_dir, exist_ok=True)

# Export all files
for file in files:
    output_path = os.path.join(output_dir, file['name'])
    fm.export_run_file("exec-2024-001", file['name'], output_path)
    print(f"Exported: {file['name']}")

Get Files for a Simulation

from sim2l.database import FileManager, get_session_manager

# Setup with remote catalog
session = get_session_manager().create_anonymous_session()
fm = FileManager(
    cache_url="http://localhost:8001",
    session_id=session.session_id
)

# Get all files for a simulation across all runs
files = fm.get_simulation_files("thermal_sim", "1.0.0")

print(f"Found {len(files)} files")
for file in files:
    print(f"  {file['execution_id']}: {file['name']}")

File Organization

Creating Folders

# Create a folder
folder = fm.create_folder(
    name="simulation_outputs",
    creator="user123",
    metadata={'project': 'materials_research'}
)

print(f"Created folder: {folder['id']}")

Creating Files

# Create a file entry
file = fm.create_file(
    name="result.csv",
    size=2048,
    uri="/data/result.csv",
    creator="user123",
    parent_id=folder['id'],
    metadata={
        'simulation': 'thermal_sim',
        'version': '1.0.0'
    }
)

Listing Folder Contents

# List folder contents
contents = fm.list_folder(folder['id'])

for item in contents:
    if item.get('is_folder'):
        print(f"[DIR] {item['name']}")
    else:
        print(f"[FILE] {item['name']} ({item['size']} bytes)")

Moving Files

# Move a file to a different folder
fm.move_file(file['id'], new_parent_id=another_folder['id'])

Deleting Files

# Delete a file
fm.delete_file(file['id'])

# Delete a folder and all contents recursively
fm.delete_file(folder['id'], recursive=True)

Common Workflows

Workflow 1: Export Simulation Results

from sim2l import configure, load
from sim2l.database import FileManager
import os

# Run simulation
configure(use_run_database=True)
sim = load("thermal_sim")
result = sim.run(temperature=350)

# Export all results
fm = FileManager()
files = fm.get_run_files(result.execution_id)

output_dir = f"/data/results/{result.execution_id}"
os.makedirs(output_dir, exist_ok=True)

for file in files:
    if file['category'] == 'output':
        output_path = os.path.join(output_dir, file['name'])
        fm.export_run_file(result.execution_id, file['name'], output_path)

Workflow 2: Filter by File Type

# Get all files
files = fm.get_run_files("exec-2024-001")

# Filter for CSV files
csv_files = [f for f in files if f['name'].endswith('.csv')]

# Filter by category
log_files = [f for f in files if f['category'] == 'log']
output_files = [f for f in files if f['category'] == 'output']

# Export only outputs
for file in output_files:
    fm.export_run_file("exec-2024-001", file['name'], f"/tmp/{file['name']}")

Workflow 3: Organize by Simulation

from sim2l.database import FileManager

fm = FileManager()

# Create main folder
main_folder = fm.create_folder(
    name="thermal_sim_results",
    creator="system"
)

# Create subfolders for each run
execution_ids = ["exec-001", "exec-002", "exec-003"]

for exec_id in execution_ids:
    # Create folder for this run
    run_folder = fm.create_folder(
        name=f"run_{exec_id}",
        creator="system",
        parent_id=main_folder['id']
    )

    # Get files for this run
    files = fm.get_run_files(exec_id)

    # Register files in folder
    for file in files:
        fm.create_file(
            name=file['name'],
            size=file['size'],
            uri=file['uri'],
            creator="system",
            parent_id=run_folder['id'],
            metadata=file['metadata']
        )

Workflow 4: Compare Results

import pandas as pd
import tempfile
import os

def compare_results(exec_id_1, exec_id_2, file_name):
    """Compare a file from two different runs."""

    fm = FileManager()

    # Export both files to temp directory
    with tempfile.TemporaryDirectory() as tmpdir:
        path1 = os.path.join(tmpdir, f"run1_{file_name}")
        path2 = os.path.join(tmpdir, f"run2_{file_name}")

        fm.export_run_file(exec_id_1, file_name, path1)
        fm.export_run_file(exec_id_2, file_name, path2)

        # Compare (example with CSV)
        if file_name.endswith('.csv'):
            df1 = pd.read_csv(path1)
            df2 = pd.read_csv(path2)

            diff = df1.compare(df2)
            return diff

# Compare temperature outputs
diff = compare_results("exec-001", "exec-002", "temperature.csv")
print(diff)

Advanced Features

File Metadata

Each file includes rich metadata:

files = fm.get_run_files("exec-2024-001")
file = files[0]

print(f"ID: {file['id']}")
print(f"Name: {file['name']}")
print(f"Execution: {file['execution_id']}")
print(f"Category: {file['category']}")  # output, log, artifact, etc.
print(f"Content-Type: {file['content_type']}")  # MIME type
print(f"Size: {file['size']} bytes")
print(f"URI: {file['uri']}")  # Original path
print(f"Created: {file['date_created']}")
print(f"Metadata: {file['metadata']}")  # Custom metadata

Context Manager

with FileManager() as fm:
    files = fm.get_run_files("exec-2024-001")
    for file in files:
        fm.export_run_file(
            "exec-2024-001",
            file['name'],
            f"/tmp/{file['name']}"
        )
# Automatic cleanup

Integration with Services

from sim2l.database import FileManager, get_session_manager

# Connect to cache service for file metadata storage
session = get_session_manager().create_anonymous_session()
fm = FileManager(
    cache_url="http://localhost:8001",
    session_id=session.session_id
)

# File metadata is now stored in distributed cache
# and shared across team

Best Practices

  1. Export Important Files Don’t rely on run databases indefinitely. Export important results to permanent storage.

  2. Use Meaningful Names Organize exported files with clear directory structures:

    output_dir = f"/data/{simulation_name}/{version}/{date}/{execution_id}"
    
  3. Filter Before Exporting Only export what you need:

    files = fm.get_run_files(execution_id)
    important_files = [
        f for f in files
        if f['category'] == 'output' and not f['name'].startswith('temp_')
    ]
    
  4. Add Metadata Include context when creating file entries:

    fm.create_file(
        name="result.csv",
        size=2048,
        uri="/data/result.csv",
        creator="user123",
        metadata={
            'project': 'materials_research',
            'experiment': 'thermal_stress_01',
            'date': '2024-01-15'
        }
    )
    
  5. Handle Missing Files Always check if files exist:

    files = fm.get_run_files(execution_id)
    if not files:
        print(f"No files found for {execution_id}")
    else:
        # Process files
        pass
    

API Reference

FileManager Class

Key Methods

File Retrieval:

  • get_run_files(execution_id) - Get all files from a run

  • get_simulation_files(simulation_name, version) - Get files for a simulation

  • get_file(file_id) - Get specific file metadata

File Export:

  • export_run_file(execution_id, file_name, output_path) - Export file to filesystem

File Operations:

  • create_file(name, size, uri, creator, ...) - Create file entry

  • update_file(file_id, **updates) - Update file metadata

  • delete_file(file_id, recursive=False) - Delete file

  • move_file(file_id, new_parent_id) - Move file to folder

Folder Operations:

  • create_folder(name, creator, ...) - Create folder

  • list_folder(folder_id) - List folder contents

See Also