Image Classification
Transformers
PyTorch
English
sybil
medical
cancer
ct-scan
risk-prediction
healthcare
vision
Instructions to use Lab-Rasool/sybil with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Lab-Rasool/sybil with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-classification", model="Lab-Rasool/sybil") pipe("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/parrots.png")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Lab-Rasool/sybil", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| """Image processor for Sybil CT scan preprocessing""" | |
| import cv2 | |
| import numpy as np | |
| import torch | |
| from typing import Dict, List, Optional, Union, Tuple | |
| from transformers.image_processing_utils import BaseImageProcessor, BatchFeature | |
| from transformers.utils import TensorType | |
| import pydicom | |
| from PIL import Image | |
| import torchio as tio | |
| def order_slices(dicoms: List) -> List: | |
| """Order DICOM slices by their position""" | |
| # Sort by ImagePositionPatient if available | |
| try: | |
| dicoms = sorted(dicoms, key=lambda x: float(x.ImagePositionPatient[2])) | |
| except (AttributeError, TypeError): | |
| # Fall back to InstanceNumber if ImagePositionPatient not available | |
| try: | |
| dicoms = sorted(dicoms, key=lambda x: int(x.InstanceNumber)) | |
| except (AttributeError, TypeError): | |
| pass # Keep original order if neither attribute is available | |
| return dicoms | |
| class SybilImageProcessor(BaseImageProcessor): | |
| """ | |
| Constructs a Sybil image processor for preprocessing CT scans. | |
| Args: | |
| voxel_spacing (`List[float]`, *optional*, defaults to `[0.703125, 0.703125, 2.5]`): | |
| Target voxel spacing for resampling (row, column, slice thickness). | |
| img_size (`List[int]`, *optional*, defaults to `[512, 512]`): | |
| Target image size after resizing. | |
| num_images (`int`, *optional*, defaults to `208`): | |
| Number of slices to use from the CT scan. | |
| windowing (`Dict[str, float]`, *optional*): | |
| Windowing parameters for CT scan visualization. | |
| Default uses lung window: center=-600, width=1500. | |
| normalize (`bool`, *optional*, defaults to `True`): | |
| Whether to normalize pixel values to [0, 1]. | |
| **kwargs: | |
| Additional keyword arguments passed to the parent class. | |
| """ | |
| model_input_names = ["pixel_values"] | |
| def __init__( | |
| self, | |
| voxel_spacing: List[float] = None, | |
| img_size: List[int] = None, | |
| num_images: int = 208, | |
| windowing: Dict[str, float] = None, | |
| normalize: bool = True, | |
| **kwargs | |
| ): | |
| super().__init__(**kwargs) | |
| self.voxel_spacing = voxel_spacing if voxel_spacing is not None else [0.703125, 0.703125, 2.5] | |
| self.img_size = img_size if img_size is not None else [512, 512] | |
| self.num_images = num_images | |
| # Default lung window settings | |
| self.windowing = windowing if windowing is not None else { | |
| "center": -600, | |
| "width": 1500 | |
| } | |
| self.normalize = normalize | |
| # TorchIO transforms for standardization | |
| self.resample_transform = tio.transforms.Resample(target=self.voxel_spacing) | |
| # Note: Original Sybil uses 200 depth, 256x256 images | |
| self.default_depth = 200 | |
| self.default_size = [256, 256] | |
| # TorchIO uses (H, W, D) ordering for target_shape, matching original Sybil | |
| self.padding_transform = tio.transforms.CropOrPad( | |
| target_shape=tuple(self.default_size + [self.default_depth]), # (256, 256, 200) | |
| padding_mode=0 | |
| ) | |
| def load_dicom_series(self, paths: List[str]) -> Tuple[np.ndarray, Dict]: | |
| """ | |
| Load a series of DICOM files. | |
| Args: | |
| paths: List of paths to DICOM files. | |
| Returns: | |
| Tuple of (volume array, metadata dict) | |
| """ | |
| dicoms = [] | |
| for path in paths: | |
| try: | |
| dcm = pydicom.dcmread(path, stop_before_pixels=False) | |
| dicoms.append(dcm) | |
| except Exception as e: | |
| print(f"Error reading DICOM file {path}: {e}") | |
| continue | |
| if not dicoms: | |
| raise ValueError("No valid DICOM files found") | |
| # Order slices by position | |
| dicoms = order_slices(dicoms) | |
| # Extract pixel arrays | |
| volume = np.stack([dcm.pixel_array.astype(np.float32) for dcm in dicoms]) | |
| # Extract metadata | |
| metadata = { | |
| "slice_thickness": float(dicoms[0].SliceThickness) if hasattr(dicoms[0], 'SliceThickness') else None, | |
| "pixel_spacing": list(map(float, dicoms[0].PixelSpacing)) if hasattr(dicoms[0], 'PixelSpacing') else None, | |
| "manufacturer": str(dicoms[0].Manufacturer) if hasattr(dicoms[0], 'Manufacturer') else None, | |
| "num_slices": len(dicoms) | |
| } | |
| # Apply rescale if present | |
| if hasattr(dicoms[0], 'RescaleSlope') and hasattr(dicoms[0], 'RescaleIntercept'): | |
| slope = float(dicoms[0].RescaleSlope) | |
| intercept = float(dicoms[0].RescaleIntercept) | |
| volume = volume * slope + intercept | |
| return volume, metadata | |
| def load_png_series(self, paths: List[str]) -> np.ndarray: | |
| """ | |
| Load a series of PNG files. | |
| Args: | |
| paths: List of paths to PNG files (must be in anatomical order). | |
| Returns: | |
| 3D volume array | |
| """ | |
| images = [] | |
| for path in paths: | |
| img = Image.open(path).convert('L') # Convert to grayscale | |
| images.append(np.array(img, dtype=np.float32)) | |
| return np.stack(images) | |
| def resize_slices(self, volume: np.ndarray, target_size: List[int] = None) -> np.ndarray: | |
| """ | |
| Resize each slice in the volume to target size using OpenCV bilinear interpolation. | |
| This exactly matches the original Sybil's per-slice 2D resize operation. | |
| Args: | |
| volume: 3D volume array (D, H, W). | |
| target_size: Target size [H, W]. Defaults to [256, 256]. | |
| Returns: | |
| Resized volume. | |
| """ | |
| if target_size is None: | |
| target_size = self.default_size # [256, 256] | |
| # Resize each slice using OpenCV (matching original Sybil exactly) | |
| resized_slices = [] | |
| for i in range(volume.shape[0]): | |
| slice_2d = volume[i] # Shape: (H, W) | |
| # cv2.resize expects dsize=(width, height), not (height, width)! | |
| resized = cv2.resize( | |
| slice_2d, | |
| dsize=(target_size[1], target_size[0]), # (W, H) | |
| interpolation=cv2.INTER_LINEAR | |
| ) | |
| resized_slices.append(resized) | |
| # Stack back into volume | |
| return np.stack(resized_slices, axis=0) | |
| def apply_windowing(self, volume: np.ndarray) -> np.ndarray: | |
| """ | |
| Apply DICOM-standard windowing to CT scan, matching the original Sybil implementation. | |
| This implements the same windowing as the original Sybil: | |
| - Uses DICOM standard formula with center-0.5 and width-1 adjustments | |
| - Outputs to 16-bit range [0, 65535] then divides by 256 for 8-bit parity | |
| - Results in [0, 255] range that will be normalized later | |
| Args: | |
| volume: 3D CT volume in Hounsfield Units. | |
| Returns: | |
| Windowed volume in [0, 255] range. | |
| """ | |
| center = self.windowing["center"] # -600 | |
| width = self.windowing["width"] # 1500 | |
| # DICOM standard windowing formula (matching original Sybil) | |
| bit_size = 16 | |
| y_min = 0 | |
| y_max = 2 ** bit_size - 1 # 65535 | |
| y_range = y_max - y_min | |
| # DICOM standard adjustments | |
| c = center - 0.5 # -600.5 | |
| w = width - 1 # 1499 | |
| # Calculate window boundaries | |
| lower_bound = c - w / 2 # -1350 | |
| upper_bound = c + w / 2 # 149.5 | |
| # Apply windowing with three regions | |
| below = volume <= lower_bound | |
| above = volume > upper_bound | |
| between = np.logical_and(~below, ~above) | |
| # Create output array | |
| windowed = np.zeros_like(volume, dtype=np.float32) | |
| # Apply windowing | |
| windowed[below] = y_min # Values <= -1350 -> 0 | |
| windowed[above] = y_max # Values > 149.5 -> 65535 | |
| if between.any(): | |
| # Linear interpolation for values in window | |
| windowed[between] = ((volume[between] - c) / w + 0.5) * y_range + y_min | |
| # Divide by 256 for 8-bit parity (matching original Sybil) | |
| # This gives range [0, 255] instead of [0, 65535] | |
| windowed = windowed // 256 | |
| return windowed | |
| def resample_volume( | |
| self, | |
| volume: torch.Tensor, | |
| original_spacing: Optional[List[float]] = None | |
| ) -> torch.Tensor: | |
| """ | |
| Resample volume to target voxel spacing. | |
| Uses affine matrix approach matching original Sybil exactly. | |
| Args: | |
| volume: 3D or 4D volume tensor (D, H, W) or (C, D, H, W). | |
| original_spacing: Original voxel spacing [H_spacing, W_spacing, D_spacing]. | |
| Returns: | |
| Resampled volume with same number of dimensions. | |
| """ | |
| # Handle both 3D (D, H, W) and 4D (C, D, H, W) volumes | |
| if len(volume.shape) == 3: | |
| # Single channel: (D, H, W) -> (1, D, H, W) | |
| volume_4d = volume.unsqueeze(0) | |
| squeeze_output = True | |
| elif len(volume.shape) == 4: | |
| # Multi-channel: (C, D, H, W) - already has channel dim | |
| volume_4d = volume | |
| squeeze_output = False | |
| else: | |
| raise ValueError(f"Expected 3D or 4D volume, got shape {volume.shape}") | |
| # Permute to TorchIO format: (C, D, H, W) -> (C, H, W, D) | |
| volume_tio = volume_4d.permute(0, 2, 3, 1) | |
| # Create affine matrix like original Sybil | |
| # Original uses torch.diag(voxel_spacing) where voxel_spacing has 4 elements | |
| if original_spacing is not None: | |
| # Add 1.0 as 4th element like original Sybil | |
| voxel_spacing_4d = torch.tensor(original_spacing + [1.0], dtype=torch.float32) | |
| affine = torch.diag(voxel_spacing_4d) | |
| else: | |
| affine = None | |
| # Create TorchIO subject with affine (not spacing!) | |
| subject = tio.Subject( | |
| image=tio.ScalarImage(tensor=volume_tio, affine=affine) | |
| ) | |
| # Apply resampling | |
| resampled = self.resample_transform(subject) | |
| # Permute back: (C, H, W, D) -> (C, D, H, W) | |
| result = resampled['image'].data.permute(0, 3, 1, 2) | |
| # Return with original number of dimensions | |
| if squeeze_output: | |
| return result.squeeze(0) | |
| else: | |
| return result | |
| def pad_or_crop_volume(self, volume: torch.Tensor) -> torch.Tensor: | |
| """ | |
| Pad or crop volume to target shape. | |
| Args: | |
| volume: 3D or 4D volume tensor (D, H, W) or (C, D, H, W). | |
| Returns: | |
| Padded/cropped volume with same number of dimensions. | |
| """ | |
| # Handle both 3D (D, H, W) and 4D (C, D, H, W) volumes | |
| if len(volume.shape) == 3: | |
| # Single channel: (D, H, W) -> (1, D, H, W) | |
| volume_4d = volume.unsqueeze(0) | |
| squeeze_output = True | |
| elif len(volume.shape) == 4: | |
| # Multi-channel: (C, D, H, W) - already has channel dim | |
| volume_4d = volume | |
| squeeze_output = False | |
| else: | |
| raise ValueError(f"Expected 3D or 4D volume, got shape {volume.shape}") | |
| # Permute to TorchIO format: (C, D, H, W) -> (C, H, W, D) | |
| volume_tio = volume_4d.permute(0, 2, 3, 1) | |
| # Create TorchIO subject | |
| subject = tio.Subject( | |
| image=tio.ScalarImage(tensor=volume_tio) | |
| ) | |
| # Apply padding/cropping | |
| transformed = self.padding_transform(subject) | |
| # Permute back: (C, H, W, D) -> (C, D, H, W) | |
| result = transformed['image'].data.permute(0, 3, 1, 2) | |
| # Return with original number of dimensions | |
| if squeeze_output: | |
| return result.squeeze(0) | |
| else: | |
| return result | |
| def preprocess( | |
| self, | |
| images: Union[List[str], np.ndarray, torch.Tensor], | |
| file_type: str = "dicom", | |
| voxel_spacing: Optional[List[float]] = None, | |
| return_tensors: Optional[Union[str, TensorType]] = None, | |
| **kwargs | |
| ) -> BatchFeature: | |
| """ | |
| Preprocess CT scan images. | |
| Args: | |
| images: Either list of file paths or numpy/torch array of images. | |
| file_type: Type of input files ("dicom" or "png"). | |
| voxel_spacing: Original voxel spacing (required for PNG files). | |
| return_tensors: The type of tensors to return. | |
| Returns: | |
| BatchFeature with preprocessed images. | |
| """ | |
| # Load images if paths are provided | |
| if isinstance(images, list) and isinstance(images[0], str): | |
| if file_type == "dicom": | |
| volume, metadata = self.load_dicom_series(images) | |
| if voxel_spacing is None and metadata["pixel_spacing"]: | |
| voxel_spacing = metadata["pixel_spacing"] + [metadata["slice_thickness"]] | |
| elif file_type == "png": | |
| if voxel_spacing is None: | |
| raise ValueError("voxel_spacing must be provided for PNG files") | |
| volume = self.load_png_series(images) | |
| else: | |
| raise ValueError(f"Unknown file type: {file_type}") | |
| elif isinstance(images, (np.ndarray, torch.Tensor)): | |
| volume = images | |
| else: | |
| raise ValueError("Images must be file paths, numpy array, or torch tensor") | |
| # Ensure volume is numpy array for initial processing | |
| if isinstance(volume, torch.Tensor): | |
| volume_np = volume.numpy() | |
| else: | |
| volume_np = volume | |
| # Apply windowing | |
| volume_np = self.apply_windowing(volume_np) | |
| # Resize each slice to 256x256 (matching original Sybil's per-slice resize) | |
| volume_np = self.resize_slices(volume_np, target_size=self.default_size) | |
| # NOTE: Original Sybil uses the ORIGINAL voxel spacing from DICOM metadata | |
| # even after resizing slices. This is physically incorrect (spacing should be | |
| # adjusted for the resize factor), but we match the original behavior here. | |
| # The voxel_spacing remains unchanged from DICOM metadata. | |
| # Convert to torch tensor for remaining operations | |
| volume = torch.from_numpy(volume_np).float() | |
| # Apply normalization BEFORE resampling (to match original Sybil) | |
| # Original Sybil normalizes each slice before assembly and 3D resampling | |
| # This ensures 3D interpolation happens on normalized values, not [0, 255] values | |
| # These values come from the original Sybil implementation's computed mean/std | |
| # on 8-bit windowed images [0, 255] | |
| img_mean = 128.1722 | |
| img_std = 87.1849 | |
| volume = (volume - img_mean) / img_std | |
| # Replicate to 3 channels BEFORE resampling (to match original Sybil) | |
| # Original Sybil replicates channels per-slice, then assembles 3-channel volume | |
| # Shape: (D, H, W) -> (3, D, H, W) | |
| volume = volume.unsqueeze(0).repeat(3, 1, 1, 1) # Now (3, D, H, W) | |
| # Resample if spacing is provided (3D resampling for voxel spacing adjustment) | |
| # This happens on 3-channel volume, matching original Sybil | |
| if voxel_spacing is not None: | |
| volume = self.resample_volume(volume, voxel_spacing) | |
| # Pad or crop to target shape (on 3-channel volume) | |
| volume = self.pad_or_crop_volume(volume) | |
| # Add batch dimension to match original Sybil output shape [1, C, D, H, W] | |
| volume = volume.unsqueeze(0) # Now (1, 3, D, H, W) | |
| # Prepare output | |
| data = {"pixel_values": volume} | |
| # Convert to requested tensor type | |
| if return_tensors == "pt": | |
| return BatchFeature(data=data, tensor_type=TensorType.PYTORCH) | |
| elif return_tensors == "np": | |
| data = {k: v.numpy() for k, v in data.items()} | |
| return BatchFeature(data=data, tensor_type=TensorType.NUMPY) | |
| else: | |
| return BatchFeature(data=data) | |
| def __call__( | |
| self, | |
| images: Union[List[str], List[List[str]], np.ndarray, torch.Tensor], | |
| **kwargs | |
| ) -> BatchFeature: | |
| """ | |
| Main method to prepare images for the model. | |
| Args: | |
| images: Images to preprocess. Can be: | |
| - List of file paths for a single series | |
| - List of lists of file paths for multiple series | |
| - Numpy array or torch tensor | |
| Returns: | |
| BatchFeature with preprocessed images ready for model input. | |
| """ | |
| # Handle batch processing | |
| if isinstance(images, list) and images and isinstance(images[0], list): | |
| # Multiple series | |
| batch_volumes = [] | |
| for series_paths in images: | |
| result = self.preprocess(series_paths, **kwargs) | |
| batch_volumes.append(result["pixel_values"]) | |
| # Stack into batch (B, C, D, H, W) | |
| pixel_values = torch.stack(batch_volumes) | |
| return BatchFeature(data={"pixel_values": pixel_values}) | |
| else: | |
| # Single series | |
| return self.preprocess(images, **kwargs) |