Skip to content

Commit c676cd2

Browse files
committed
basic download function
1 parent b96a458 commit c676cd2

File tree

6 files changed

+112
-4
lines changed

6 files changed

+112
-4
lines changed

richka/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
from .config import *
1+
from .config import *
2+
from .core import *

richka/__main__.py

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+

richka/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@
55
__author__ = "Ian Xia"
66
__author_email__ = "xia@ghink.net"
77
__license__ = "MIT"
8-
__copyright__ = "Copyright Ian Xia"
8+
__copyright__ = "Copyright Ian Xia"

richka/config.py

+33-1
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,45 @@
11
import richka
22

3+
import logging
4+
35
__VERSION = ("Alpha", 0, 0, 1)
46
USER_AGENT = f"Richka{__VERSION[0]}/{__VERSION[1]}.{__VERSION[2]}.{__VERSION[3]}"
57
HEADERS = {"user-agent": USER_AGENT}
8+
COROUTINE_LIMIT = 10
9+
SLICE_THRESHOLD = 10 # MiB
10+
11+
logger = logging.getLogger("Richka Engine")
612

713
def set_user_agent(user_agent: str) -> None:
14+
"""
15+
Set Public User Agent for HTTP Requests
16+
:param user_agent: String
17+
:return:
18+
"""
819
richka.USER_AGENT = user_agent
920
richka.HEADERS["user-agent"] = user_agent
1021

1122
def set_headers(headers: dict) -> None:
23+
"""
24+
Set Public Headers for HTTP Requests
25+
:param headers: Dictionary
26+
:return:
27+
"""
1228
for key, value in headers.items():
13-
richka.HEADERS[key.lower()] = value
29+
richka.HEADERS[key.lower()] = value
30+
31+
def set_coroutine_limit(coroutine_limit: int) -> None:
32+
"""
33+
Set Coroutine Limit for HTTP Requests
34+
:param coroutine_limit: Integer
35+
:return:
36+
"""
37+
richka.COROUTINE_LIMIT = coroutine_limit
38+
39+
def set_slice_threshold(slice_threshold: int) -> None:
40+
"""
41+
Set Slice Threshold for HTTP Requests
42+
:param slice_threshold: Integer
43+
:return:
44+
"""
45+
richka.SLICE_THRESHOLD = slice_threshold

richka/core.py

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import time
2+
import asyncio
3+
4+
import richka
5+
6+
import aiohttp
7+
8+
async def __download_range(session: aiohttp.ClientSession, url: str, start: int, end: int, destination: str) -> None:
9+
richka.logger.info(f'Downloading part {start}-{end} of {url} to {destination}.')
10+
11+
headers = {**richka.HEADERS, **{'range': f'bytes={start}-{end}'}}
12+
13+
async with session.get(url, headers=headers) as response:
14+
content = await response.read()
15+
with open(destination, 'r+b') as f:
16+
f.seek(start)
17+
f.write(content)
18+
19+
richka.logger.info(f'Downloaded part {start}-{end} of {destination}.')
20+
21+
async def __download_single(session: aiohttp.ClientSession, url: str, destination: str) -> None:
22+
richka.logger.info(f'Downloading {url} to {destination}.')
23+
24+
async with session.get(url, headers=richka.HEADERS) as response:
25+
content = await response.read()
26+
with open(destination, 'r+b') as f:
27+
f.write(content)
28+
29+
richka.logger.info(f'Downloaded {url} to {destination}.')
30+
31+
async def download(url: str, destination: str) -> float:
32+
async with aiohttp.ClientSession() as session:
33+
# Get file size
34+
async with session.head(url) as response:
35+
file_size = int(response.headers.get('Content-Length', 0))
36+
37+
if not file_size or file_size / pow(1024, 2) <= 10:
38+
if not file_size:
39+
richka.logger.info(f'Failed to get file size, directly downloading {url}.')
40+
else:
41+
richka.logger.info(f"Downloading {url} ({file_size}) to {destination} with signle mode.")
42+
43+
# Create an empty file
44+
with open(destination, 'wb') as f:
45+
f.truncate(file_size)
46+
47+
# Start task
48+
start_time = time.time()
49+
await __download_single(session, url, destination)
50+
end_time = time.time()
51+
return end_time - start_time
52+
53+
richka.logger.info(f'Downloading {url} ({file_size}) to {destination} with slicing mode.')
54+
55+
# Calc slice size
56+
part_size = file_size // richka.COROUTINE_LIMIT
57+
58+
# Create an empty file
59+
with open(destination, 'wb') as f:
60+
f.truncate(file_size)
61+
62+
# Create coroutine tasks
63+
tasks = []
64+
for i in range(richka.COROUTINE_LIMIT):
65+
start = i * part_size
66+
end = (start + part_size - 1) if i < richka.COROUTINE_LIMIT - 1 else (file_size - 1)
67+
task = __download_range(session, url, start, end, destination)
68+
tasks.append(task)
69+
70+
# Start all task
71+
start_time = time.time()
72+
await asyncio.gather(*tasks)
73+
end_time = time.time()
74+
return end_time - start_time

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,4 @@
3939
},
4040
package_data={'': ['README.md']},
4141
include_package_data=True,
42-
zip_safe=False)
42+
zip_safe=False)

0 commit comments

Comments
 (0)