Skip to content

Commit

Permalink
single file download done
Browse files Browse the repository at this point in the history
  • Loading branch information
bigsk05 committed Oct 14, 2024
1 parent c676cd2 commit cdc2073
Show file tree
Hide file tree
Showing 7 changed files with 246 additions and 34 deletions.
41 changes: 41 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,43 @@
# Richka - Python Async Download Engine

#### Richka (From Ukrainian: Рiчка) means river, stands for the download speed of Richka Engine

## Usage

`import richka` and run script in your code, for example:

```
import richka
import asyncio
import time
import threading
# Create task controller
controller = richka.Controller()
def download():
global controller
# Create download task
time_used, file_size = asyncio.run(richka.download("https://mirrors.tuna.tsinghua.edu.cn/videolan-ftp/vlc-iOS/3.6.4/VLC-iOS.ipa", "VLC-iOS.ipa", controller))
# Result
print("Time used:", time_used)
print(f"Speed: {file_size / time_used / pow(1024, 2)}MiB/s")
def main():
global controller
# Progress monitor
while controller.status:
if controller.status == 1:
print(f"Download Progress: {round(controller.progress, 2)}% \r", end="")
time.sleep(0.1)
if __name__ == "__main__":
threading.Thread(target=download).start()
main()
```
Then you'll get a file from Internet :D.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
aiohttp~=3.8.5
3 changes: 2 additions & 1 deletion richka/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .config import *
from .core import *
from .core import *
from .controller import *
27 changes: 23 additions & 4 deletions richka/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@
HEADERS = {"user-agent": USER_AGENT}
COROUTINE_LIMIT = 10
SLICE_THRESHOLD = 10 # MiB
TIMEOUT = 30
RETRY_TIMES = 5
CHUNK_SIZE = 102400

logger = logging.getLogger("Richka Engine")

def set_user_agent(user_agent: str) -> None:
"""
Set Public User Agent for HTTP Requests
:param user_agent: String
:param user_agent: String User-Agent you want to set.
:return:
"""
richka.USER_AGENT = user_agent
Expand All @@ -22,7 +25,7 @@ def set_user_agent(user_agent: str) -> None:
def set_headers(headers: dict) -> None:
"""
Set Public Headers for HTTP Requests
:param headers: Dictionary
:param headers: Dictionary Headers you want to set.
:return:
"""
for key, value in headers.items():
Expand All @@ -31,15 +34,31 @@ def set_headers(headers: dict) -> None:
def set_coroutine_limit(coroutine_limit: int) -> None:
"""
Set Coroutine Limit for HTTP Requests
:param coroutine_limit: Integer
:param coroutine_limit: Integer Coroutine number limit.
:return:
"""
richka.COROUTINE_LIMIT = coroutine_limit

def set_slice_threshold(slice_threshold: int) -> None:
"""
Set Slice Threshold for HTTP Requests
:param slice_threshold: Integer
:param slice_threshold: Integer Slice threshold to enable coroutine download.
:return:
"""
richka.SLICE_THRESHOLD = slice_threshold

def set_timeout(timeout: int) -> None:
"""
Set Timeout for HTTP Requests
:param timeout: Integer Timeout time in seconds.
:return:
"""
richka.TIMEOUT = timeout

def set_retry_times(retry_times: int) -> None:
"""
Set Retry Times for HTTP Requests
:param retry_times: Integer Allowed retry times.
:return:
"""
richka.RETRY_TIMES = retry_times
88 changes: 88 additions & 0 deletions richka/controller.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import asyncio

class Controller:
def __init__(self):
self.__paused = False
self.__total_size = 0
self.__downloaded_size = 0
self.__downloaded_size_slice = {}
self.__lock = asyncio.Lock() # For async safe

@property
def total_size(self) -> int:
"""
Get the total size of the file.
:return: Integer Size of the file.
"""
return self.__total_size

@total_size.setter
def total_size(self, size: int) -> None:
"""
Set the total size of the file.
:param size: Integer Size of the file.
:return: None
"""
if not self.__total_size:
self.__total_size = size

async def update_progress(self, downloaded_chunk_size: int, chunk_id: str = None) -> None:
"""
Update the progress of the download. Do not operate this!
:param downloaded_chunk_size: Integer Downloaded Size of the file.
:param chunk_id: String Chunk ID of the part.
:return: None
"""
async with self.__lock:
if chunk_id is None and self.__downloaded_size_slice == {}:
self.__downloaded_size = downloaded_chunk_size
else:
self.__downloaded_size_slice[chunk_id] = downloaded_chunk_size
self.__downloaded_size = sum(self.__downloaded_size_slice.values())

@property
def paused(self) -> bool:
"""
Get the paused state of the downloader.
:return: Boolean State of the downloader.
"""
return self.__paused

def pause(self) -> None:
"""
Pause the downloader.
:return: None
"""
self.__paused = True

def unpause(self) -> None:
"""
Unpause the downloader.
:return: None
"""
self.__paused = False

@property
def status(self) -> int:
"""
Get the status of the downloader.
:return: Integer Status of the downloader. -1: Haven't Started -2: Paused 0: Done 1: Downloading
"""
if self.__downloaded_size == 0:
return -1 # Haven't started
elif self.__paused:
return -2 # Paused
elif self.__downloaded_size / self.__total_size == 1:
return 0 # Done
else:
return 1 # Downloading

@property
def progress(self) -> float:
"""
Get the progress of the downloader.
:return: Float Progress of the downloader.
"""
if not self.__total_size:
return -1
return self.__downloaded_size / self.__total_size * 100
111 changes: 84 additions & 27 deletions richka/core.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,112 @@
import time
import asyncio

import richka

import aiohttp

async def __download_range(session: aiohttp.ClientSession, url: str, start: int, end: int, destination: str) -> None:
import richka
from .controller import Controller

async def __download_range(session: aiohttp.ClientSession, url: str, start: int, end: int, destination: str, controller: Controller = None) -> None:
richka.logger.info(f'Downloading part {start}-{end} of {url} to {destination}.')

headers = {**richka.HEADERS, **{'range': f'bytes={start}-{end}'}}

async with session.get(url, headers=headers) as response:
content = await response.read()
with open(destination, 'r+b') as f:
f.seek(start)
f.write(content)

richka.logger.info(f'Downloaded part {start}-{end} of {destination}.')

async def __download_single(session: aiohttp.ClientSession, url: str, destination: str) -> None:
retry_times = richka.RETRY_TIMES

while retry_times > 0:
try:
async with session.get(url, headers=headers, timeout=aiohttp.ClientTimeout(sock_read=richka.TIMEOUT, sock_connect=richka.TIMEOUT)) as response:
with open(destination, 'r+b') as f:
f.seek(start)
# Read stream
length = 0
async for chunk in response.content.iter_chunked(richka.CHUNK_SIZE):
while controller.paused:
await asyncio.sleep(1)
# noinspection PyTypeChecker
f.write(chunk)
# noinspection PyTypeChecker
length += len(chunk)
# Update tracker
if controller is not None:
await controller.update_progress(length, chunk_id=f"{start}-{end}")
break
except (aiohttp.ClientError, asyncio.TimeoutError):
retry_times -= 1
richka.logger.info(f'Download part {start}-{end} of {url} to {destination} failed for {richka.RETRY_TIMES - retry_times} times, retrying...')
await asyncio.sleep(1)

if retry_times > 0:
richka.logger.info(f'Downloaded part {start}-{end} of {url} to {destination}.')
else:
raise TimeoutError(f'Download part {start}-{end} of {url} to {destination} timed out.')

async def __download_single(session: aiohttp.ClientSession, url: str, destination: str, controller: Controller = None) -> None:
richka.logger.info(f'Downloading {url} to {destination}.')

async with session.get(url, headers=richka.HEADERS) as response:
content = await response.read()
with open(destination, 'r+b') as f:
f.write(content)

richka.logger.info(f'Downloaded {url} to {destination}.')

async def download(url: str, destination: str) -> float:
retry_times = richka.RETRY_TIMES\

while retry_times > 0:
try:
async with session.get(url, headers=richka.HEADERS, timeout=aiohttp.ClientTimeout(sock_read=richka.TIMEOUT, sock_connect=richka.TIMEOUT)) as response:
with open(destination, 'r+b') as f:
# Read stream
length = 0
async for chunk in response.content.iter_chunked(richka.CHUNK_SIZE):
while controller.paused:
await asyncio.sleep(1)
# noinspection PyTypeChecker
f.write(chunk)
# noinspection PyTypeChecker
length += len(chunk)
# Update tracker
if controller is not None:
await controller.update_progress(length)
break
except (aiohttp.ClientError, asyncio.TimeoutError):
retry_times -= 1
richka.logger.info(f'Download {url} to {destination} failed for {richka.RETRY_TIMES - retry_times} times, retrying...')
await asyncio.sleep(1)

if retry_times > 0:
richka.logger.info(f'Downloaded {url} to {destination}.')
else:
raise TimeoutError(f'Download {url} to {destination} timed out.')

async def download(url: str, destination: str, controller: Controller = None) -> tuple[float, int]:
"""
Download a single file.
:param url: String Source URL.
:param destination: Destination Path.
:param controller: Download Controller.
:return: [Float, Integer] [Time Used, File Size]
"""
async with aiohttp.ClientSession() as session:
# Get file size
async with session.head(url) as response:
file_size = int(response.headers.get('Content-Length', 0))

if not file_size or file_size / pow(1024, 2) <= 10:
if not file_size or file_size / pow(1024, 2) <= richka.SLICE_THRESHOLD:
if not file_size:
richka.logger.info(f'Failed to get file size, directly downloading {url}.')
else:
richka.logger.info(f"Downloading {url} ({file_size}) to {destination} with signle mode.")
richka.logger.info(f"Downloading {url} ({file_size}) to {destination} with single mode.")
if controller is not None:
controller.total_size = file_size

# Create an empty file
with open(destination, 'wb') as f:
f.truncate(file_size)

# Start task
start_time = time.time()
await __download_single(session, url, destination)
await __download_single(session, url, destination, controller)
end_time = time.time()
return end_time - start_time
richka.logger.info(f"Downloaded {url} ({file_size}) to {destination} with single mode.")
return end_time - start_time, file_size

richka.logger.info(f'Downloading {url} ({file_size}) to {destination} with slicing mode.')
if controller is not None:
controller.total_size = file_size

# Calc slice size
part_size = file_size // richka.COROUTINE_LIMIT
Expand All @@ -64,11 +120,12 @@ async def download(url: str, destination: str) -> float:
for i in range(richka.COROUTINE_LIMIT):
start = i * part_size
end = (start + part_size - 1) if i < richka.COROUTINE_LIMIT - 1 else (file_size - 1)
task = __download_range(session, url, start, end, destination)
task = __download_range(session, url, start, end, destination, controller)
tasks.append(task)

# Start all task
start_time = time.time()
await asyncio.gather(*tasks)
end_time = time.time()
return end_time - start_time
richka.logger.info(f'Downloaded {url} ({file_size}) to {destination} with slicing mode.')
return end_time - start_time, file_size
9 changes: 7 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,22 @@
version=about["__version__"],
description=about["__description__"],
packages=find_packages(),
install_requires=[
"aiohttp",
],
url=about["__url__"],
license=about["__license__"],
author=about["__author__"],
author_email=about["__author_email__"],
long_description_content_type="text/markdown",
long_description=readme,
install_requires=[
],
python_requires='>=3.9',
classifiers=[
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Programming Language :: Python :: 3 :: Only',
],
entry_points={
Expand Down

0 comments on commit cdc2073

Please sign in to comment.