From c676cd2eb57587ac78b5a0820378c038fcee7b0e Mon Sep 17 00:00:00 2001 From: Bigsk Date: Mon, 14 Oct 2024 18:13:18 +0800 Subject: [PATCH] basic download function --- richka/__init__.py | 3 +- richka/__main__.py | 1 + richka/__version__.py | 2 +- richka/config.py | 34 +++++++++++++++++++- richka/core.py | 74 +++++++++++++++++++++++++++++++++++++++++++ setup.py | 2 +- 6 files changed, 112 insertions(+), 4 deletions(-) diff --git a/richka/__init__.py b/richka/__init__.py index d085c3a..7b0bb48 100644 --- a/richka/__init__.py +++ b/richka/__init__.py @@ -1 +1,2 @@ -from .config import * \ No newline at end of file +from .config import * +from .core import * \ No newline at end of file diff --git a/richka/__main__.py b/richka/__main__.py index e69de29..8b13789 100644 --- a/richka/__main__.py +++ b/richka/__main__.py @@ -0,0 +1 @@ + diff --git a/richka/__version__.py b/richka/__version__.py index 8a6a77f..a68acbb 100644 --- a/richka/__version__.py +++ b/richka/__version__.py @@ -5,4 +5,4 @@ __author__ = "Ian Xia" __author_email__ = "xia@ghink.net" __license__ = "MIT" -__copyright__ = "Copyright Ian Xia" \ No newline at end of file +__copyright__ = "Copyright Ian Xia" diff --git a/richka/config.py b/richka/config.py index cb5a0d9..eaeaebf 100644 --- a/richka/config.py +++ b/richka/config.py @@ -1,13 +1,45 @@ import richka +import logging + __VERSION = ("Alpha", 0, 0, 1) USER_AGENT = f"Richka{__VERSION[0]}/{__VERSION[1]}.{__VERSION[2]}.{__VERSION[3]}" HEADERS = {"user-agent": USER_AGENT} +COROUTINE_LIMIT = 10 +SLICE_THRESHOLD = 10 # MiB + +logger = logging.getLogger("Richka Engine") def set_user_agent(user_agent: str) -> None: + """ + Set Public User Agent for HTTP Requests + :param user_agent: String + :return: + """ richka.USER_AGENT = user_agent richka.HEADERS["user-agent"] = user_agent def set_headers(headers: dict) -> None: + """ + Set Public Headers for HTTP Requests + :param headers: Dictionary + :return: + """ for key, value in headers.items(): - richka.HEADERS[key.lower()] = value \ No newline at end of file + richka.HEADERS[key.lower()] = value + +def set_coroutine_limit(coroutine_limit: int) -> None: + """ + Set Coroutine Limit for HTTP Requests + :param coroutine_limit: Integer + :return: + """ + richka.COROUTINE_LIMIT = coroutine_limit + +def set_slice_threshold(slice_threshold: int) -> None: + """ + Set Slice Threshold for HTTP Requests + :param slice_threshold: Integer + :return: + """ + richka.SLICE_THRESHOLD = slice_threshold diff --git a/richka/core.py b/richka/core.py index e69de29..391d8bc 100644 --- a/richka/core.py +++ b/richka/core.py @@ -0,0 +1,74 @@ +import time +import asyncio + +import richka + +import aiohttp + +async def __download_range(session: aiohttp.ClientSession, url: str, start: int, end: int, destination: str) -> None: + richka.logger.info(f'Downloading part {start}-{end} of {url} to {destination}.') + + headers = {**richka.HEADERS, **{'range': f'bytes={start}-{end}'}} + + async with session.get(url, headers=headers) as response: + content = await response.read() + with open(destination, 'r+b') as f: + f.seek(start) + f.write(content) + + richka.logger.info(f'Downloaded part {start}-{end} of {destination}.') + +async def __download_single(session: aiohttp.ClientSession, url: str, destination: str) -> None: + richka.logger.info(f'Downloading {url} to {destination}.') + + async with session.get(url, headers=richka.HEADERS) as response: + content = await response.read() + with open(destination, 'r+b') as f: + f.write(content) + + richka.logger.info(f'Downloaded {url} to {destination}.') + +async def download(url: str, destination: str) -> float: + async with aiohttp.ClientSession() as session: + # Get file size + async with session.head(url) as response: + file_size = int(response.headers.get('Content-Length', 0)) + + if not file_size or file_size / pow(1024, 2) <= 10: + if not file_size: + richka.logger.info(f'Failed to get file size, directly downloading {url}.') + else: + richka.logger.info(f"Downloading {url} ({file_size}) to {destination} with signle mode.") + + # Create an empty file + with open(destination, 'wb') as f: + f.truncate(file_size) + + # Start task + start_time = time.time() + await __download_single(session, url, destination) + end_time = time.time() + return end_time - start_time + + richka.logger.info(f'Downloading {url} ({file_size}) to {destination} with slicing mode.') + + # Calc slice size + part_size = file_size // richka.COROUTINE_LIMIT + + # Create an empty file + with open(destination, 'wb') as f: + f.truncate(file_size) + + # Create coroutine tasks + tasks = [] + for i in range(richka.COROUTINE_LIMIT): + start = i * part_size + end = (start + part_size - 1) if i < richka.COROUTINE_LIMIT - 1 else (file_size - 1) + task = __download_range(session, url, start, end, destination) + tasks.append(task) + + # Start all task + start_time = time.time() + await asyncio.gather(*tasks) + end_time = time.time() + return end_time - start_time \ No newline at end of file diff --git a/setup.py b/setup.py index 181ca3b..683e8d4 100644 --- a/setup.py +++ b/setup.py @@ -39,4 +39,4 @@ }, package_data={'': ['README.md']}, include_package_data=True, - zip_safe=False) \ No newline at end of file + zip_safe=False)