Skip to content

Commit cdc2073

Browse files
committed
single file download done
1 parent c676cd2 commit cdc2073

File tree

7 files changed

+246
-34
lines changed

7 files changed

+246
-34
lines changed

README.md

+41
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,43 @@
11
# Richka - Python Async Download Engine
22

3+
#### Richka (From Ukrainian: Рiчка) means river, stands for the download speed of Richka Engine
4+
5+
## Usage
6+
7+
`import richka` and run script in your code, for example:
8+
9+
```
10+
import richka
11+
12+
import asyncio
13+
import time
14+
import threading
15+
16+
# Create task controller
17+
controller = richka.Controller()
18+
19+
def download():
20+
global controller
21+
22+
# Create download task
23+
time_used, file_size = asyncio.run(richka.download("https://mirrors.tuna.tsinghua.edu.cn/videolan-ftp/vlc-iOS/3.6.4/VLC-iOS.ipa", "VLC-iOS.ipa", controller))
24+
25+
# Result
26+
print("Time used:", time_used)
27+
print(f"Speed: {file_size / time_used / pow(1024, 2)}MiB/s")
28+
29+
def main():
30+
global controller
31+
32+
# Progress monitor
33+
while controller.status:
34+
if controller.status == 1:
35+
print(f"Download Progress: {round(controller.progress, 2)}% \r", end="")
36+
time.sleep(0.1)
37+
38+
if __name__ == "__main__":
39+
threading.Thread(target=download).start()
40+
main()
41+
42+
```
43+
Then you'll get a file from Internet :D.

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
aiohttp~=3.8.5

richka/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
from .config import *
2-
from .core import *
2+
from .core import *
3+
from .controller import *

richka/config.py

+23-4
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,16 @@
77
HEADERS = {"user-agent": USER_AGENT}
88
COROUTINE_LIMIT = 10
99
SLICE_THRESHOLD = 10 # MiB
10+
TIMEOUT = 30
11+
RETRY_TIMES = 5
12+
CHUNK_SIZE = 102400
1013

1114
logger = logging.getLogger("Richka Engine")
1215

1316
def set_user_agent(user_agent: str) -> None:
1417
"""
1518
Set Public User Agent for HTTP Requests
16-
:param user_agent: String
19+
:param user_agent: String User-Agent you want to set.
1720
:return:
1821
"""
1922
richka.USER_AGENT = user_agent
@@ -22,7 +25,7 @@ def set_user_agent(user_agent: str) -> None:
2225
def set_headers(headers: dict) -> None:
2326
"""
2427
Set Public Headers for HTTP Requests
25-
:param headers: Dictionary
28+
:param headers: Dictionary Headers you want to set.
2629
:return:
2730
"""
2831
for key, value in headers.items():
@@ -31,15 +34,31 @@ def set_headers(headers: dict) -> None:
3134
def set_coroutine_limit(coroutine_limit: int) -> None:
3235
"""
3336
Set Coroutine Limit for HTTP Requests
34-
:param coroutine_limit: Integer
37+
:param coroutine_limit: Integer Coroutine number limit.
3538
:return:
3639
"""
3740
richka.COROUTINE_LIMIT = coroutine_limit
3841

3942
def set_slice_threshold(slice_threshold: int) -> None:
4043
"""
4144
Set Slice Threshold for HTTP Requests
42-
:param slice_threshold: Integer
45+
:param slice_threshold: Integer Slice threshold to enable coroutine download.
4346
:return:
4447
"""
4548
richka.SLICE_THRESHOLD = slice_threshold
49+
50+
def set_timeout(timeout: int) -> None:
51+
"""
52+
Set Timeout for HTTP Requests
53+
:param timeout: Integer Timeout time in seconds.
54+
:return:
55+
"""
56+
richka.TIMEOUT = timeout
57+
58+
def set_retry_times(retry_times: int) -> None:
59+
"""
60+
Set Retry Times for HTTP Requests
61+
:param retry_times: Integer Allowed retry times.
62+
:return:
63+
"""
64+
richka.RETRY_TIMES = retry_times

richka/controller.py

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import asyncio
2+
3+
class Controller:
4+
def __init__(self):
5+
self.__paused = False
6+
self.__total_size = 0
7+
self.__downloaded_size = 0
8+
self.__downloaded_size_slice = {}
9+
self.__lock = asyncio.Lock() # For async safe
10+
11+
@property
12+
def total_size(self) -> int:
13+
"""
14+
Get the total size of the file.
15+
:return: Integer Size of the file.
16+
"""
17+
return self.__total_size
18+
19+
@total_size.setter
20+
def total_size(self, size: int) -> None:
21+
"""
22+
Set the total size of the file.
23+
:param size: Integer Size of the file.
24+
:return: None
25+
"""
26+
if not self.__total_size:
27+
self.__total_size = size
28+
29+
async def update_progress(self, downloaded_chunk_size: int, chunk_id: str = None) -> None:
30+
"""
31+
Update the progress of the download. Do not operate this!
32+
:param downloaded_chunk_size: Integer Downloaded Size of the file.
33+
:param chunk_id: String Chunk ID of the part.
34+
:return: None
35+
"""
36+
async with self.__lock:
37+
if chunk_id is None and self.__downloaded_size_slice == {}:
38+
self.__downloaded_size = downloaded_chunk_size
39+
else:
40+
self.__downloaded_size_slice[chunk_id] = downloaded_chunk_size
41+
self.__downloaded_size = sum(self.__downloaded_size_slice.values())
42+
43+
@property
44+
def paused(self) -> bool:
45+
"""
46+
Get the paused state of the downloader.
47+
:return: Boolean State of the downloader.
48+
"""
49+
return self.__paused
50+
51+
def pause(self) -> None:
52+
"""
53+
Pause the downloader.
54+
:return: None
55+
"""
56+
self.__paused = True
57+
58+
def unpause(self) -> None:
59+
"""
60+
Unpause the downloader.
61+
:return: None
62+
"""
63+
self.__paused = False
64+
65+
@property
66+
def status(self) -> int:
67+
"""
68+
Get the status of the downloader.
69+
:return: Integer Status of the downloader. -1: Haven't Started -2: Paused 0: Done 1: Downloading
70+
"""
71+
if self.__downloaded_size == 0:
72+
return -1 # Haven't started
73+
elif self.__paused:
74+
return -2 # Paused
75+
elif self.__downloaded_size / self.__total_size == 1:
76+
return 0 # Done
77+
else:
78+
return 1 # Downloading
79+
80+
@property
81+
def progress(self) -> float:
82+
"""
83+
Get the progress of the downloader.
84+
:return: Float Progress of the downloader.
85+
"""
86+
if not self.__total_size:
87+
return -1
88+
return self.__downloaded_size / self.__total_size * 100

richka/core.py

+84-27
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,112 @@
11
import time
22
import asyncio
33

4-
import richka
5-
64
import aiohttp
75

8-
async def __download_range(session: aiohttp.ClientSession, url: str, start: int, end: int, destination: str) -> None:
6+
import richka
7+
from .controller import Controller
8+
9+
async def __download_range(session: aiohttp.ClientSession, url: str, start: int, end: int, destination: str, controller: Controller = None) -> None:
910
richka.logger.info(f'Downloading part {start}-{end} of {url} to {destination}.')
1011

1112
headers = {**richka.HEADERS, **{'range': f'bytes={start}-{end}'}}
12-
13-
async with session.get(url, headers=headers) as response:
14-
content = await response.read()
15-
with open(destination, 'r+b') as f:
16-
f.seek(start)
17-
f.write(content)
18-
19-
richka.logger.info(f'Downloaded part {start}-{end} of {destination}.')
20-
21-
async def __download_single(session: aiohttp.ClientSession, url: str, destination: str) -> None:
13+
retry_times = richka.RETRY_TIMES
14+
15+
while retry_times > 0:
16+
try:
17+
async with session.get(url, headers=headers, timeout=aiohttp.ClientTimeout(sock_read=richka.TIMEOUT, sock_connect=richka.TIMEOUT)) as response:
18+
with open(destination, 'r+b') as f:
19+
f.seek(start)
20+
# Read stream
21+
length = 0
22+
async for chunk in response.content.iter_chunked(richka.CHUNK_SIZE):
23+
while controller.paused:
24+
await asyncio.sleep(1)
25+
# noinspection PyTypeChecker
26+
f.write(chunk)
27+
# noinspection PyTypeChecker
28+
length += len(chunk)
29+
# Update tracker
30+
if controller is not None:
31+
await controller.update_progress(length, chunk_id=f"{start}-{end}")
32+
break
33+
except (aiohttp.ClientError, asyncio.TimeoutError):
34+
retry_times -= 1
35+
richka.logger.info(f'Download part {start}-{end} of {url} to {destination} failed for {richka.RETRY_TIMES - retry_times} times, retrying...')
36+
await asyncio.sleep(1)
37+
38+
if retry_times > 0:
39+
richka.logger.info(f'Downloaded part {start}-{end} of {url} to {destination}.')
40+
else:
41+
raise TimeoutError(f'Download part {start}-{end} of {url} to {destination} timed out.')
42+
43+
async def __download_single(session: aiohttp.ClientSession, url: str, destination: str, controller: Controller = None) -> None:
2244
richka.logger.info(f'Downloading {url} to {destination}.')
2345

24-
async with session.get(url, headers=richka.HEADERS) as response:
25-
content = await response.read()
26-
with open(destination, 'r+b') as f:
27-
f.write(content)
28-
29-
richka.logger.info(f'Downloaded {url} to {destination}.')
30-
31-
async def download(url: str, destination: str) -> float:
46+
retry_times = richka.RETRY_TIMES\
47+
48+
while retry_times > 0:
49+
try:
50+
async with session.get(url, headers=richka.HEADERS, timeout=aiohttp.ClientTimeout(sock_read=richka.TIMEOUT, sock_connect=richka.TIMEOUT)) as response:
51+
with open(destination, 'r+b') as f:
52+
# Read stream
53+
length = 0
54+
async for chunk in response.content.iter_chunked(richka.CHUNK_SIZE):
55+
while controller.paused:
56+
await asyncio.sleep(1)
57+
# noinspection PyTypeChecker
58+
f.write(chunk)
59+
# noinspection PyTypeChecker
60+
length += len(chunk)
61+
# Update tracker
62+
if controller is not None:
63+
await controller.update_progress(length)
64+
break
65+
except (aiohttp.ClientError, asyncio.TimeoutError):
66+
retry_times -= 1
67+
richka.logger.info(f'Download {url} to {destination} failed for {richka.RETRY_TIMES - retry_times} times, retrying...')
68+
await asyncio.sleep(1)
69+
70+
if retry_times > 0:
71+
richka.logger.info(f'Downloaded {url} to {destination}.')
72+
else:
73+
raise TimeoutError(f'Download {url} to {destination} timed out.')
74+
75+
async def download(url: str, destination: str, controller: Controller = None) -> tuple[float, int]:
76+
"""
77+
Download a single file.
78+
:param url: String Source URL.
79+
:param destination: Destination Path.
80+
:param controller: Download Controller.
81+
:return: [Float, Integer] [Time Used, File Size]
82+
"""
3283
async with aiohttp.ClientSession() as session:
3384
# Get file size
3485
async with session.head(url) as response:
3586
file_size = int(response.headers.get('Content-Length', 0))
3687

37-
if not file_size or file_size / pow(1024, 2) <= 10:
88+
if not file_size or file_size / pow(1024, 2) <= richka.SLICE_THRESHOLD:
3889
if not file_size:
3990
richka.logger.info(f'Failed to get file size, directly downloading {url}.')
4091
else:
41-
richka.logger.info(f"Downloading {url} ({file_size}) to {destination} with signle mode.")
92+
richka.logger.info(f"Downloading {url} ({file_size}) to {destination} with single mode.")
93+
if controller is not None:
94+
controller.total_size = file_size
4295

4396
# Create an empty file
4497
with open(destination, 'wb') as f:
4598
f.truncate(file_size)
4699

47100
# Start task
48101
start_time = time.time()
49-
await __download_single(session, url, destination)
102+
await __download_single(session, url, destination, controller)
50103
end_time = time.time()
51-
return end_time - start_time
104+
richka.logger.info(f"Downloaded {url} ({file_size}) to {destination} with single mode.")
105+
return end_time - start_time, file_size
52106

53107
richka.logger.info(f'Downloading {url} ({file_size}) to {destination} with slicing mode.')
108+
if controller is not None:
109+
controller.total_size = file_size
54110

55111
# Calc slice size
56112
part_size = file_size // richka.COROUTINE_LIMIT
@@ -64,11 +120,12 @@ async def download(url: str, destination: str) -> float:
64120
for i in range(richka.COROUTINE_LIMIT):
65121
start = i * part_size
66122
end = (start + part_size - 1) if i < richka.COROUTINE_LIMIT - 1 else (file_size - 1)
67-
task = __download_range(session, url, start, end, destination)
123+
task = __download_range(session, url, start, end, destination, controller)
68124
tasks.append(task)
69125

70126
# Start all task
71127
start_time = time.time()
72128
await asyncio.gather(*tasks)
73129
end_time = time.time()
74-
return end_time - start_time
130+
richka.logger.info(f'Downloaded {url} ({file_size}) to {destination} with slicing mode.')
131+
return end_time - start_time, file_size

setup.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,22 @@
2121
version=about["__version__"],
2222
description=about["__description__"],
2323
packages=find_packages(),
24+
install_requires=[
25+
"aiohttp",
26+
],
2427
url=about["__url__"],
2528
license=about["__license__"],
2629
author=about["__author__"],
2730
author_email=about["__author_email__"],
2831
long_description_content_type="text/markdown",
2932
long_description=readme,
30-
install_requires=[
31-
],
33+
python_requires='>=3.9',
3234
classifiers=[
3335
'License :: OSI Approved :: MIT License',
3436
'Programming Language :: Python :: 3.9',
37+
'Programming Language :: Python :: 3.10',
38+
'Programming Language :: Python :: 3.11',
39+
'Programming Language :: Python :: 3.12',
3540
'Programming Language :: Python :: 3 :: Only',
3641
],
3742
entry_points={

0 commit comments

Comments
 (0)