From cdc2073460ba6a7354a34e72fb53dbd1373f4e19 Mon Sep 17 00:00:00 2001 From: Bigsk Date: Mon, 14 Oct 2024 22:33:58 +0800 Subject: [PATCH] single file download done --- README.md | 41 ++++++++++++++++ requirements.txt | 1 + richka/__init__.py | 3 +- richka/config.py | 27 +++++++++-- richka/controller.py | 88 ++++++++++++++++++++++++++++++++++ richka/core.py | 111 ++++++++++++++++++++++++++++++++----------- setup.py | 9 +++- 7 files changed, 246 insertions(+), 34 deletions(-) create mode 100644 requirements.txt create mode 100644 richka/controller.py diff --git a/README.md b/README.md index 432c2f6..b3147d6 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,43 @@ # Richka - Python Async Download Engine +#### Richka (From Ukrainian: Рiчка) means river, stands for the download speed of Richka Engine + +## Usage + +`import richka` and run script in your code, for example: + +``` +import richka + +import asyncio +import time +import threading + +# Create task controller +controller = richka.Controller() + +def download(): + global controller + + # Create download task + time_used, file_size = asyncio.run(richka.download("https://mirrors.tuna.tsinghua.edu.cn/videolan-ftp/vlc-iOS/3.6.4/VLC-iOS.ipa", "VLC-iOS.ipa", controller)) + + # Result + print("Time used:", time_used) + print(f"Speed: {file_size / time_used / pow(1024, 2)}MiB/s") + +def main(): + global controller + + # Progress monitor + while controller.status: + if controller.status == 1: + print(f"Download Progress: {round(controller.progress, 2)}% \r", end="") + time.sleep(0.1) + +if __name__ == "__main__": + threading.Thread(target=download).start() + main() + +``` +Then you'll get a file from Internet :D. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b7e0e83 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +aiohttp~=3.8.5 \ No newline at end of file diff --git a/richka/__init__.py b/richka/__init__.py index 7b0bb48..27ce982 100644 --- a/richka/__init__.py +++ b/richka/__init__.py @@ -1,2 +1,3 @@ from .config import * -from .core import * \ No newline at end of file +from .core import * +from .controller import * diff --git a/richka/config.py b/richka/config.py index eaeaebf..3ff4a18 100644 --- a/richka/config.py +++ b/richka/config.py @@ -7,13 +7,16 @@ HEADERS = {"user-agent": USER_AGENT} COROUTINE_LIMIT = 10 SLICE_THRESHOLD = 10 # MiB +TIMEOUT = 30 +RETRY_TIMES = 5 +CHUNK_SIZE = 102400 logger = logging.getLogger("Richka Engine") def set_user_agent(user_agent: str) -> None: """ Set Public User Agent for HTTP Requests - :param user_agent: String + :param user_agent: String User-Agent you want to set. :return: """ richka.USER_AGENT = user_agent @@ -22,7 +25,7 @@ def set_user_agent(user_agent: str) -> None: def set_headers(headers: dict) -> None: """ Set Public Headers for HTTP Requests - :param headers: Dictionary + :param headers: Dictionary Headers you want to set. :return: """ for key, value in headers.items(): @@ -31,7 +34,7 @@ def set_headers(headers: dict) -> None: def set_coroutine_limit(coroutine_limit: int) -> None: """ Set Coroutine Limit for HTTP Requests - :param coroutine_limit: Integer + :param coroutine_limit: Integer Coroutine number limit. :return: """ richka.COROUTINE_LIMIT = coroutine_limit @@ -39,7 +42,23 @@ def set_coroutine_limit(coroutine_limit: int) -> None: def set_slice_threshold(slice_threshold: int) -> None: """ Set Slice Threshold for HTTP Requests - :param slice_threshold: Integer + :param slice_threshold: Integer Slice threshold to enable coroutine download. :return: """ richka.SLICE_THRESHOLD = slice_threshold + +def set_timeout(timeout: int) -> None: + """ + Set Timeout for HTTP Requests + :param timeout: Integer Timeout time in seconds. + :return: + """ + richka.TIMEOUT = timeout + +def set_retry_times(retry_times: int) -> None: + """ + Set Retry Times for HTTP Requests + :param retry_times: Integer Allowed retry times. + :return: + """ + richka.RETRY_TIMES = retry_times diff --git a/richka/controller.py b/richka/controller.py new file mode 100644 index 0000000..c11c6af --- /dev/null +++ b/richka/controller.py @@ -0,0 +1,88 @@ +import asyncio + +class Controller: + def __init__(self): + self.__paused = False + self.__total_size = 0 + self.__downloaded_size = 0 + self.__downloaded_size_slice = {} + self.__lock = asyncio.Lock() # For async safe + + @property + def total_size(self) -> int: + """ + Get the total size of the file. + :return: Integer Size of the file. + """ + return self.__total_size + + @total_size.setter + def total_size(self, size: int) -> None: + """ + Set the total size of the file. + :param size: Integer Size of the file. + :return: None + """ + if not self.__total_size: + self.__total_size = size + + async def update_progress(self, downloaded_chunk_size: int, chunk_id: str = None) -> None: + """ + Update the progress of the download. Do not operate this! + :param downloaded_chunk_size: Integer Downloaded Size of the file. + :param chunk_id: String Chunk ID of the part. + :return: None + """ + async with self.__lock: + if chunk_id is None and self.__downloaded_size_slice == {}: + self.__downloaded_size = downloaded_chunk_size + else: + self.__downloaded_size_slice[chunk_id] = downloaded_chunk_size + self.__downloaded_size = sum(self.__downloaded_size_slice.values()) + + @property + def paused(self) -> bool: + """ + Get the paused state of the downloader. + :return: Boolean State of the downloader. + """ + return self.__paused + + def pause(self) -> None: + """ + Pause the downloader. + :return: None + """ + self.__paused = True + + def unpause(self) -> None: + """ + Unpause the downloader. + :return: None + """ + self.__paused = False + + @property + def status(self) -> int: + """ + Get the status of the downloader. + :return: Integer Status of the downloader. -1: Haven't Started -2: Paused 0: Done 1: Downloading + """ + if self.__downloaded_size == 0: + return -1 # Haven't started + elif self.__paused: + return -2 # Paused + elif self.__downloaded_size / self.__total_size == 1: + return 0 # Done + else: + return 1 # Downloading + + @property + def progress(self) -> float: + """ + Get the progress of the downloader. + :return: Float Progress of the downloader. + """ + if not self.__total_size: + return -1 + return self.__downloaded_size / self.__total_size * 100 diff --git a/richka/core.py b/richka/core.py index 391d8bc..e9a3036 100644 --- a/richka/core.py +++ b/richka/core.py @@ -1,44 +1,97 @@ import time import asyncio -import richka - import aiohttp -async def __download_range(session: aiohttp.ClientSession, url: str, start: int, end: int, destination: str) -> None: +import richka +from .controller import Controller + +async def __download_range(session: aiohttp.ClientSession, url: str, start: int, end: int, destination: str, controller: Controller = None) -> None: richka.logger.info(f'Downloading part {start}-{end} of {url} to {destination}.') headers = {**richka.HEADERS, **{'range': f'bytes={start}-{end}'}} - - async with session.get(url, headers=headers) as response: - content = await response.read() - with open(destination, 'r+b') as f: - f.seek(start) - f.write(content) - - richka.logger.info(f'Downloaded part {start}-{end} of {destination}.') - -async def __download_single(session: aiohttp.ClientSession, url: str, destination: str) -> None: + retry_times = richka.RETRY_TIMES + + while retry_times > 0: + try: + async with session.get(url, headers=headers, timeout=aiohttp.ClientTimeout(sock_read=richka.TIMEOUT, sock_connect=richka.TIMEOUT)) as response: + with open(destination, 'r+b') as f: + f.seek(start) + # Read stream + length = 0 + async for chunk in response.content.iter_chunked(richka.CHUNK_SIZE): + while controller.paused: + await asyncio.sleep(1) + # noinspection PyTypeChecker + f.write(chunk) + # noinspection PyTypeChecker + length += len(chunk) + # Update tracker + if controller is not None: + await controller.update_progress(length, chunk_id=f"{start}-{end}") + break + except (aiohttp.ClientError, asyncio.TimeoutError): + retry_times -= 1 + richka.logger.info(f'Download part {start}-{end} of {url} to {destination} failed for {richka.RETRY_TIMES - retry_times} times, retrying...') + await asyncio.sleep(1) + + if retry_times > 0: + richka.logger.info(f'Downloaded part {start}-{end} of {url} to {destination}.') + else: + raise TimeoutError(f'Download part {start}-{end} of {url} to {destination} timed out.') + +async def __download_single(session: aiohttp.ClientSession, url: str, destination: str, controller: Controller = None) -> None: richka.logger.info(f'Downloading {url} to {destination}.') - async with session.get(url, headers=richka.HEADERS) as response: - content = await response.read() - with open(destination, 'r+b') as f: - f.write(content) - - richka.logger.info(f'Downloaded {url} to {destination}.') - -async def download(url: str, destination: str) -> float: + retry_times = richka.RETRY_TIMES\ + + while retry_times > 0: + try: + async with session.get(url, headers=richka.HEADERS, timeout=aiohttp.ClientTimeout(sock_read=richka.TIMEOUT, sock_connect=richka.TIMEOUT)) as response: + with open(destination, 'r+b') as f: + # Read stream + length = 0 + async for chunk in response.content.iter_chunked(richka.CHUNK_SIZE): + while controller.paused: + await asyncio.sleep(1) + # noinspection PyTypeChecker + f.write(chunk) + # noinspection PyTypeChecker + length += len(chunk) + # Update tracker + if controller is not None: + await controller.update_progress(length) + break + except (aiohttp.ClientError, asyncio.TimeoutError): + retry_times -= 1 + richka.logger.info(f'Download {url} to {destination} failed for {richka.RETRY_TIMES - retry_times} times, retrying...') + await asyncio.sleep(1) + + if retry_times > 0: + richka.logger.info(f'Downloaded {url} to {destination}.') + else: + raise TimeoutError(f'Download {url} to {destination} timed out.') + +async def download(url: str, destination: str, controller: Controller = None) -> tuple[float, int]: + """ + Download a single file. + :param url: String Source URL. + :param destination: Destination Path. + :param controller: Download Controller. + :return: [Float, Integer] [Time Used, File Size] + """ async with aiohttp.ClientSession() as session: # Get file size async with session.head(url) as response: file_size = int(response.headers.get('Content-Length', 0)) - if not file_size or file_size / pow(1024, 2) <= 10: + if not file_size or file_size / pow(1024, 2) <= richka.SLICE_THRESHOLD: if not file_size: richka.logger.info(f'Failed to get file size, directly downloading {url}.') else: - richka.logger.info(f"Downloading {url} ({file_size}) to {destination} with signle mode.") + richka.logger.info(f"Downloading {url} ({file_size}) to {destination} with single mode.") + if controller is not None: + controller.total_size = file_size # Create an empty file with open(destination, 'wb') as f: @@ -46,11 +99,14 @@ async def download(url: str, destination: str) -> float: # Start task start_time = time.time() - await __download_single(session, url, destination) + await __download_single(session, url, destination, controller) end_time = time.time() - return end_time - start_time + richka.logger.info(f"Downloaded {url} ({file_size}) to {destination} with single mode.") + return end_time - start_time, file_size richka.logger.info(f'Downloading {url} ({file_size}) to {destination} with slicing mode.') + if controller is not None: + controller.total_size = file_size # Calc slice size part_size = file_size // richka.COROUTINE_LIMIT @@ -64,11 +120,12 @@ async def download(url: str, destination: str) -> float: for i in range(richka.COROUTINE_LIMIT): start = i * part_size end = (start + part_size - 1) if i < richka.COROUTINE_LIMIT - 1 else (file_size - 1) - task = __download_range(session, url, start, end, destination) + task = __download_range(session, url, start, end, destination, controller) tasks.append(task) # Start all task start_time = time.time() await asyncio.gather(*tasks) end_time = time.time() - return end_time - start_time \ No newline at end of file + richka.logger.info(f'Downloaded {url} ({file_size}) to {destination} with slicing mode.') + return end_time - start_time, file_size diff --git a/setup.py b/setup.py index 683e8d4..8ef2aa1 100644 --- a/setup.py +++ b/setup.py @@ -21,17 +21,22 @@ version=about["__version__"], description=about["__description__"], packages=find_packages(), + install_requires=[ + "aiohttp", + ], url=about["__url__"], license=about["__license__"], author=about["__author__"], author_email=about["__author_email__"], long_description_content_type="text/markdown", long_description=readme, - install_requires=[ - ], + python_requires='>=3.9', classifiers=[ 'License :: OSI Approved :: MIT License', 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', 'Programming Language :: Python :: 3 :: Only', ], entry_points={