Source code for GenomeUtils.downloaders.downloader

#!/usr/bin/env python
"""
Filename: GenomeUtils/downloaders/downloader.py
Author: Arash Ayat
Copyright: 2025, Alexander Schliep
Version: 0.1.1
Description: This file defines the base Downloader class for handling file downloads.
License: LGPL-3.0-or-later
"""

from abc import ABC
import logging
from pathlib import Path
import shutil
import tempfile
from typing import Optional, Set

import requests


[docs] class Downloader(ABC): """Abstract base class for all downloaders.""" def __init__(self, download_dir: Optional[Path] = None): """ Initializes the Downloader. Args: download_dir: Directory for storing downloaded files. If None, uses a temporary directory. """ self._is_temp_cache = download_dir is None self.download_dir = download_dir or Path(tempfile.mkdtemp()) self.download_dir.mkdir(parents=True, exist_ok=True) self.logger = logging.getLogger(self.__class__.__name__) self._created_files: Set[Path] = set() def __repr__(self) -> str: return f"{self.__class__.__name__}(download_dir={self.download_dir})"
[docs] def download_file(self, url: str, filename: str = None, force: bool = False) -> Path: """ Download a single file from a URL and saves it in the cache directory. Args: url: The URL of the file to download. filename: The name of the file to be saved in the cache directory. force: If True, redownload the file even if it exists. Defaults to False. Returns: The path to the downloaded file. """ if filename is None: # Extract filename from URL, removing query parameters filename = url.split('/')[-1].split('?')[0] destination_path = self.download_dir / filename if not force and destination_path.exists(): self.logger.info(f"File '{filename}' already exists in cache. Skipping download.") return destination_path self.logger.info(f"Downloading '{filename}'...") with requests.get(url, stream=True) as r: r.raise_for_status() with open(destination_path, 'wb') as f: shutil.copyfileobj(r.raw, f) self._created_files.add(destination_path) return destination_path
[docs] def cleanup(self): """ Clean up created files. """ if self._is_temp_cache: if self.download_dir.exists(): shutil.rmtree(self.download_dir) else: for path in self._created_files: if path.exists(): path.unlink()