Source code for palletdatagenerator.utils

"""Utility functions for PalletDataGenerator."""

import json
import logging
import random
import sys
from pathlib import Path
from typing import Any, Optional

# Setup logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)



[docs]
def setup_logging(level: str = "DEBUG", log_file: str = "output.log") -> None:
    """Setup logging configuration.

    Args:
        level: Logging level ('DEBUG', 'INFO', 'WARNING', 'ERROR')
        log_file: Optional log file path
    """
    log_level = getattr(logging, level.upper(), logging.INFO)

    # Create formatter
    formatter = logging.Formatter(
        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
    )

    # Setup console handler
    console_handler = logging.StreamHandler(sys.stdout)
    console_handler.setFormatter(formatter)

    # Setup file handler if specified
    handlers = [console_handler]
    if log_file:
        file_handler = logging.FileHandler(log_file)
        file_handler.setFormatter(formatter)
        handlers.append(file_handler)

    # Configure root logger
    logging.basicConfig(
        level=log_level,
        handlers=handlers,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    )




[docs]
def ensure_directory(path: str) -> Path:
    """Ensure directory exists, create if necessary.

    Args:
        path: Directory path to create

    Returns:
        Path object for the directory
    """
    path_obj = Path(path)
    path_obj.mkdir(parents=True, exist_ok=True)
    return path_obj




[docs]
def load_config(config_path: str) -> dict[str, Any]:
    """Load configuration from JSON or YAML file.

    Args:
        config_path: Path to configuration file

    Returns:
        Configuration dictionary

    Raises:
        FileNotFoundError: If config file doesn't exist
        ValueError: If config file format is invalid
    """
    config_path = Path(config_path)

    if not config_path.exists():
        raise FileNotFoundError(f"Configuration file not found: {config_path}")

    try:
        with open(config_path) as f:
            if config_path.suffix.lower() in [".yaml", ".yml"]:
                try:
                    import yaml

                    return yaml.safe_load(f)
                except ImportError as import_err:
                    raise ValueError(
                        "PyYAML required for YAML config files"
                    ) from import_err
            else:
                return json.load(f)
    except (json.JSONDecodeError, yaml.YAMLError) as e:
        raise ValueError(f"Invalid configuration file format: {e}") from e




[docs]
def save_config(config: dict[str, Any], config_path: str) -> None:
    """Save configuration to JSON file.

    Args:
        config: Configuration dictionary to save
        config_path: Path to save configuration file
    """
    config_path = Path(config_path)
    config_path.parent.mkdir(parents=True, exist_ok=True)

    with open(config_path, "w") as f:
        json.dump(config, f, indent=2, default=str)




[docs]
def set_random_seed(seed: int) -> None:
    """Set random seed for reproducible results.

    Args:
        seed: Random seed value
    """
    random.seed(seed)

    try:
        import numpy as np

        np.random.seed(seed)
    except ImportError:
        pass

    # Set Blender random seed if available
    try:
        import bpy

        bpy.context.scene.frame_set(seed % 1000)
    except ImportError:
        pass




[docs]
def validate_blender_environment() -> bool:
    """Validate that Blender environment is available and properly configured.

    Returns:
        True if Blender environment is valid, False otherwise
    """
    try:
        import bpy
        from bpy_extras.object_utils import world_to_camera_view
        from mathutils import Euler, Matrix, Vector

        # Check if we have a scene
        if not bpy.context.scene:
            logger.error("No active Blender scene found")
            return False

        # Check for Cycles addon
        if "cycles" not in bpy.context.preferences.addons:
            logger.warning("Cycles addon not found, some features may be limited")

        logger.info("Blender environment validation successful")
        return True

    except ImportError as e:
        logger.error(f"Blender Python API not available: {e}")
        return False
    except Exception as e:
        logger.error(f"Blender environment validation failed: {e}")
        return False




[docs]
def get_blender_version() -> tuple[int, int, int] | None:
    """Get Blender version information.

    Returns:
        Tuple of (major, minor, patch) version numbers or None if not available
    """
    try:
        import bpy

        return bpy.app.version
    except ImportError:
        return None




[docs]
def format_file_size(size_bytes: int) -> str:
    """Format file size in human-readable format.

    Args:
        size_bytes: Size in bytes

    Returns:
        Formatted size string (e.g., "1.5 MB")
    """
    if size_bytes == 0:
        return "0 B"

    size_names = ["B", "KB", "MB", "GB", "TB"]
    i = 0
    while size_bytes >= 1024 and i < len(size_names) - 1:
        size_bytes /= 1024.0
        i += 1

    # Format as integer if it's a whole number, otherwise with one decimal
    if size_bytes == int(size_bytes):
        return f"{int(size_bytes)} {size_names[i]}"
    return f"{size_bytes:.1f} {size_names[i]}"




[docs]
def get_system_info() -> dict[str, Any]:
    """Get system information for debugging and logging.

    Returns:
        Dictionary with system information
    """
    import platform

    info = {
        "platform": platform.platform(),
        "python_version": platform.python_version(),
        "architecture": platform.architecture(),
        "processor": platform.processor(),
    }

    # Add Blender info if available
    blender_version = get_blender_version()
    if blender_version:
        info["blender_version"] = ".".join(map(str, blender_version))

    # Add GPU info if available
    try:
        import bpy

        if "cycles" in bpy.context.preferences.addons:
            cycles_prefs = bpy.context.preferences.addons["cycles"].preferences
            devices = []
            for device in cycles_prefs.devices:
                devices.append(
                    {"name": device.name, "type": device.type, "use": device.use}
                )
            info["gpu_devices"] = devices
    except (ImportError, AttributeError):
        pass

    return info




[docs]
def create_dataset_manifest(dataset_info: dict[str, Any], output_path: str) -> None:
    """Create a comprehensive dataset manifest file.

    Args:
        dataset_info: Dictionary containing dataset information
        output_path: Path to save the manifest file
    """
    import datetime

    manifest = {
        "dataset_info": dataset_info,
        "generation_timestamp": datetime.datetime.now().isoformat(),
        "generator_version": "0.1.0",  # Would be dynamically set
        "system_info": get_system_info(),
        "file_structure": {
            "images": "RGB rendered images",
            "depth": "Depth maps (16-bit PNG)",
            "normals": "Surface normal maps",
            "index": "Object index maps for segmentation",
            "yolo_labels": "YOLO format annotations",
            "voc_xml": "PASCAL VOC format annotations",
            "annotations.json": "COCO format annotations",
        },
    }

    # Add file counts and sizes
    output_dir = Path(output_path).parent
    if output_dir.exists():
        file_stats = {}
        for subdir in ["images", "depth", "normals", "index", "yolo_labels", "voc_xml"]:
            subdir_path = output_dir / subdir
            if subdir_path.exists():
                files = list(subdir_path.glob("*"))
                total_size = sum(f.stat().st_size for f in files if f.is_file())
                file_stats[subdir] = {
                    "file_count": len([f for f in files if f.is_file()]),
                    "total_size": format_file_size(total_size),
                }
        manifest["file_statistics"] = file_stats

    # Save manifest
    with open(output_path, "w") as f:
        json.dump(manifest, f, indent=2, default=str)

    logger.info(f"Dataset manifest created: {output_path}")




[docs]
def verify_dataset_integrity(dataset_dir: str) -> dict[str, Any]:
    """Verify dataset integrity by checking file consistency.

    Args:
        dataset_dir: Path to dataset directory

    Returns:
        Dictionary with verification results
    """
    dataset_path = Path(dataset_dir)
    results = {"valid": True, "issues": [], "warnings": [], "statistics": {}}

    if not dataset_path.exists():
        results["valid"] = False
        results["issues"].append(f"Dataset directory does not exist: {dataset_dir}")
        return results

    # Check for required subdirectories
    required_dirs = ["images"]
    optional_dirs = ["depth", "normals", "index", "yolo_labels", "voc_xml"]

    for req_dir in required_dirs:
        if not (dataset_path / req_dir).exists():
            results["valid"] = False
            results["issues"].append(f"Required directory missing: {req_dir}")

    # Count files in each directory
    file_counts = {}
    for dir_name in required_dirs + optional_dirs:
        dir_path = dataset_path / dir_name
        if dir_path.exists():
            files = list(dir_path.glob("*"))
            file_counts[dir_name] = len([f for f in files if f.is_file()])

    results["statistics"]["file_counts"] = file_counts

    # Check for consistency between directories
    if "images" in file_counts:
        image_count = file_counts["images"]
        for dir_name, count in file_counts.items():
            if dir_name != "images" and count != image_count:
                results["warnings"].append(
                    f"File count mismatch: {dir_name} has {count} files, "
                    f"but images has {image_count} files"
                )

    # Check for annotation files
    annotation_files = {
        "coco": dataset_path / "annotations.json",
        "manifest": dataset_path / "dataset_manifest.json",
    }

    for ann_type, ann_path in annotation_files.items():
        if ann_path.exists():
            results["statistics"][f"{ann_type}_annotation"] = True
        else:
            results["warnings"].append(f"Missing {ann_type} annotation file")

    return results




[docs]
class ProgressTracker:
    """Progress tracking utility for long-running operations."""


[docs]
    def __init__(self, total: int, description: str = "Processing"):
        """Initialize progress tracker.

        Args:
            total: Total number of items to process
            description: Description of the operation
        """
        self.total = total
        self.current = 0
        self.description = description
        self.start_time = None



[docs]
    def start(self) -> None:
        """Start progress tracking."""
        import time

        self.start_time = time.time()
        logger.info(f"Starting {self.description}: 0/{self.total}")



[docs]
    def update(self, increment: int = 1) -> None:
        """Update progress.

        Args:
            increment: Number of items processed since last update
        """
        import time

        self.current += increment

        if self.start_time:
            elapsed = time.time() - self.start_time
            if elapsed > 0:
                rate = self.current / elapsed
                eta = (self.total - self.current) / rate if rate > 0 else 0

                logger.info(
                    f"{self.description}: {self.current}/{self.total} "
                    f"({self.current/self.total*100:.1f}%) "
                    f"- ETA: {eta:.1f}s"
                )



[docs]
    def finish(self) -> None:
        """Finish progress tracking."""
        import time

        if self.start_time:
            elapsed = time.time() - self.start_time
            logger.info(
                f"{self.description} completed: {self.total} items in {elapsed:.1f}s"
            )