Merge pull request #4 from hbmartin/regex-refactor
Browse files- pytube/__main__.py +2 -1
- pytube/cipher.py +48 -36
- pytube/exceptions.py +25 -17
- pytube/extract.py +6 -1
- pytube/helpers.py +12 -47
- pytube/request.py +2 -6
- pytube/streams.py +4 -5
- tests/test_captions.py +41 -0
- tests/test_cipher.py +26 -0
- tests/test_exceptions.py +12 -4
- tests/test_extract.py +20 -0
- tests/test_helpers.py +2 -3
- tests/test_main.py +24 -0
- tests/test_streams.py +24 -13
pytube/__main__.py
CHANGED
@@ -175,7 +175,8 @@ class YouTube:
|
|
175 |
or '<img class="icon meh" src="/yts/img' # noqa: W503
|
176 |
not in self.watch_html # noqa: W503
|
177 |
):
|
178 |
-
raise VideoUnavailable(
|
|
|
179 |
self.embed_html = request.get(url=self.embed_url)
|
180 |
self.age_restricted = extract.is_age_restricted(self.watch_html)
|
181 |
self.vid_info_url = extract.video_info_url(
|
|
|
175 |
or '<img class="icon meh" src="/yts/img' # noqa: W503
|
176 |
not in self.watch_html # noqa: W503
|
177 |
):
|
178 |
+
raise VideoUnavailable(video_id=self.video_id)
|
179 |
+
|
180 |
self.embed_html = request.get(url=self.embed_url)
|
181 |
self.age_restricted = extract.is_age_restricted(self.watch_html)
|
182 |
self.vid_info_url = extract.video_info_url(
|
pytube/cipher.py
CHANGED
@@ -14,28 +14,27 @@ signature and decoding it.
|
|
14 |
|
15 |
"""
|
16 |
|
17 |
-
import logging
|
18 |
-
import pprint
|
19 |
import re
|
20 |
from itertools import chain
|
|
|
21 |
|
22 |
from pytube.exceptions import RegexMatchError
|
23 |
-
from pytube.helpers import regex_search
|
24 |
|
|
|
25 |
|
26 |
-
logger = logging.getLogger(__name__)
|
27 |
|
28 |
-
|
29 |
-
def get_initial_function_name(js):
|
30 |
"""Extract the name of the function responsible for computing the signature.
|
31 |
|
32 |
:param str js:
|
33 |
The contents of the base.js asset file.
|
34 |
-
|
|
|
|
|
35 |
"""
|
36 |
-
# c&&d.set("signature", EE(c));
|
37 |
|
38 |
-
|
39 |
r"\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
|
40 |
r"\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
|
41 |
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # noqa: E501
|
@@ -50,10 +49,19 @@ def get_initial_function_name(js):
|
|
50 |
]
|
51 |
|
52 |
logger.debug("finding initial function name")
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
|
56 |
-
def get_transform_plan(js):
|
57 |
"""Extract the "transform plan".
|
58 |
|
59 |
The "transform plan" is the functions that the ciphered signature is
|
@@ -80,7 +88,7 @@ def get_transform_plan(js):
|
|
80 |
return regex_search(pattern, js, group=1).split(";")
|
81 |
|
82 |
|
83 |
-
def get_transform_object(js, var):
|
84 |
"""Extract the "transform object".
|
85 |
|
86 |
The "transform object" contains the function definitions referenced in the
|
@@ -104,14 +112,15 @@ def get_transform_object(js, var):
|
|
104 |
"""
|
105 |
pattern = r"var %s={(.*?)};" % re.escape(var)
|
106 |
logger.debug("getting transform object")
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
|
|
112 |
|
113 |
|
114 |
-
def get_transform_map(js, var):
|
115 |
"""Build a transform function lookup.
|
116 |
|
117 |
Build a lookup table of obfuscated JavaScript function names to the
|
@@ -189,7 +198,7 @@ def swap(arr, b):
|
|
189 |
return list(chain([arr[r]], arr[1:r], [arr[0]], arr[r + 1 :]))
|
190 |
|
191 |
|
192 |
-
def map_functions(js_func):
|
193 |
"""For a given JavaScript transform function, return the Python equivalent.
|
194 |
|
195 |
:param str js_func:
|
@@ -213,12 +222,10 @@ def map_functions(js_func):
|
|
213 |
for pattern, fn in mapper:
|
214 |
if re.search(pattern, js_func):
|
215 |
return fn
|
216 |
-
raise RegexMatchError(
|
217 |
-
"could not find python equivalent function for: ", js_func,
|
218 |
-
)
|
219 |
|
220 |
|
221 |
-
def parse_function(js_func):
|
222 |
"""Parse the Javascript transform function.
|
223 |
|
224 |
Break a JavaScript transform function down into a two element ``tuple``
|
@@ -237,7 +244,13 @@ def parse_function(js_func):
|
|
237 |
|
238 |
"""
|
239 |
logger.debug("parsing transform function")
|
240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
|
242 |
|
243 |
def get_signature(js: str, ciphered_signature: str) -> str:
|
@@ -255,24 +268,23 @@ def get_signature(js: str, ciphered_signature: str) -> str:
|
|
255 |
|
256 |
"""
|
257 |
transform_plan = get_transform_plan(js)
|
258 |
-
# DE.AJ(a,15) => DE, AJ(a,15)
|
259 |
var, _ = transform_plan[0].split(".")
|
260 |
transform_map = get_transform_map(js, var)
|
261 |
signature = [s for s in ciphered_signature]
|
262 |
|
263 |
for js_func in transform_plan:
|
264 |
name, argument = parse_function(js_func)
|
265 |
-
signature = transform_map[name](signature,
|
266 |
logger.debug(
|
267 |
-
"applied transform function\n
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
),
|
277 |
)
|
|
|
278 |
return "".join(signature)
|
|
|
14 |
|
15 |
"""
|
16 |
|
|
|
|
|
17 |
import re
|
18 |
from itertools import chain
|
19 |
+
from typing import List, Tuple, Dict, Callable
|
20 |
|
21 |
from pytube.exceptions import RegexMatchError
|
22 |
+
from pytube.helpers import regex_search, create_logger
|
23 |
|
24 |
+
logger = create_logger()
|
25 |
|
|
|
26 |
|
27 |
+
def get_initial_function_name(js: str) -> str:
|
|
|
28 |
"""Extract the name of the function responsible for computing the signature.
|
29 |
|
30 |
:param str js:
|
31 |
The contents of the base.js asset file.
|
32 |
+
:rtype: str
|
33 |
+
:returns:
|
34 |
+
Function name from regex match
|
35 |
"""
|
|
|
36 |
|
37 |
+
function_patterns = [
|
38 |
r"\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
|
39 |
r"\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
|
40 |
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # noqa: E501
|
|
|
49 |
]
|
50 |
|
51 |
logger.debug("finding initial function name")
|
52 |
+
for pattern in function_patterns:
|
53 |
+
regex = re.compile(pattern)
|
54 |
+
results = regex.search(js)
|
55 |
+
if results:
|
56 |
+
logger.debug(
|
57 |
+
"finished regex search, matched: {pattern}".format(pattern=pattern)
|
58 |
+
)
|
59 |
+
return results.group(1)
|
60 |
+
|
61 |
+
raise RegexMatchError(caller="get_initial_function_name", pattern="multiple")
|
62 |
|
63 |
|
64 |
+
def get_transform_plan(js: str) -> List[str]:
|
65 |
"""Extract the "transform plan".
|
66 |
|
67 |
The "transform plan" is the functions that the ciphered signature is
|
|
|
88 |
return regex_search(pattern, js, group=1).split(";")
|
89 |
|
90 |
|
91 |
+
def get_transform_object(js: str, var: str) -> List[str]:
|
92 |
"""Extract the "transform object".
|
93 |
|
94 |
The "transform object" contains the function definitions referenced in the
|
|
|
112 |
"""
|
113 |
pattern = r"var %s={(.*?)};" % re.escape(var)
|
114 |
logger.debug("getting transform object")
|
115 |
+
regex = re.compile(pattern, flags=re.DOTALL)
|
116 |
+
results = regex.search(js)
|
117 |
+
if not results:
|
118 |
+
raise RegexMatchError(caller="get_transform_object", pattern=pattern)
|
119 |
+
|
120 |
+
return results.group(1).replace("\n", " ").split(", ")
|
121 |
|
122 |
|
123 |
+
def get_transform_map(js: str, var: str) -> Dict:
|
124 |
"""Build a transform function lookup.
|
125 |
|
126 |
Build a lookup table of obfuscated JavaScript function names to the
|
|
|
198 |
return list(chain([arr[r]], arr[1:r], [arr[0]], arr[r + 1 :]))
|
199 |
|
200 |
|
201 |
+
def map_functions(js_func: str) -> Callable:
|
202 |
"""For a given JavaScript transform function, return the Python equivalent.
|
203 |
|
204 |
:param str js_func:
|
|
|
222 |
for pattern, fn in mapper:
|
223 |
if re.search(pattern, js_func):
|
224 |
return fn
|
225 |
+
raise RegexMatchError(caller="map_functions", pattern="multiple")
|
|
|
|
|
226 |
|
227 |
|
228 |
+
def parse_function(js_func: str) -> Tuple[str, int]:
|
229 |
"""Parse the Javascript transform function.
|
230 |
|
231 |
Break a JavaScript transform function down into a two element ``tuple``
|
|
|
244 |
|
245 |
"""
|
246 |
logger.debug("parsing transform function")
|
247 |
+
pattern = r"\w+\.(\w+)\(\w,(\d+)\)"
|
248 |
+
regex = re.compile(pattern)
|
249 |
+
results = regex.search(js_func)
|
250 |
+
if not results:
|
251 |
+
raise RegexMatchError(caller="parse_function", pattern=pattern)
|
252 |
+
fn_name, fn_arg = results.groups()
|
253 |
+
return fn_name, int(fn_arg)
|
254 |
|
255 |
|
256 |
def get_signature(js: str, ciphered_signature: str) -> str:
|
|
|
268 |
|
269 |
"""
|
270 |
transform_plan = get_transform_plan(js)
|
|
|
271 |
var, _ = transform_plan[0].split(".")
|
272 |
transform_map = get_transform_map(js, var)
|
273 |
signature = [s for s in ciphered_signature]
|
274 |
|
275 |
for js_func in transform_plan:
|
276 |
name, argument = parse_function(js_func)
|
277 |
+
signature = transform_map[name](signature, argument)
|
278 |
logger.debug(
|
279 |
+
"applied transform function\n"
|
280 |
+
"output: %s\n"
|
281 |
+
"js_function: %s\n"
|
282 |
+
"argument: %d\n"
|
283 |
+
"function: %s",
|
284 |
+
"".join(signature),
|
285 |
+
name,
|
286 |
+
argument,
|
287 |
+
transform_map[name],
|
|
|
288 |
)
|
289 |
+
|
290 |
return "".join(signature)
|
pytube/exceptions.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
"""Library specific exception definitions."""
|
3 |
-
import
|
4 |
|
5 |
|
6 |
class PytubeError(Exception):
|
@@ -15,26 +15,25 @@ class PytubeError(Exception):
|
|
15 |
class ExtractError(PytubeError):
|
16 |
"""Data extraction based exception."""
|
17 |
|
18 |
-
def __init__(self, msg: str, video_id: str = "unknown id"):
|
19 |
-
"""Construct an instance of a :class:`ExtractError <ExtractError>`.
|
20 |
-
|
21 |
-
:param str msg:
|
22 |
-
User defined error message.
|
23 |
-
:param str video_id:
|
24 |
-
A YouTube video identifier.
|
25 |
-
"""
|
26 |
-
if video_id is not None:
|
27 |
-
msg = "{video_id}: {msg}".format(video_id=video_id, msg=msg)
|
28 |
-
|
29 |
-
super(ExtractError, self).__init__(msg)
|
30 |
-
|
31 |
-
self.exc_info = sys.exc_info()
|
32 |
-
self.video_id = video_id
|
33 |
-
|
34 |
|
35 |
class RegexMatchError(ExtractError):
|
36 |
"""Regex pattern did not return any matches."""
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
class LiveStreamError(ExtractError):
|
40 |
"""Video is a live stream."""
|
@@ -43,6 +42,15 @@ class LiveStreamError(ExtractError):
|
|
43 |
class VideoUnavailable(PytubeError):
|
44 |
"""Video is unavailable."""
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
class HTMLParseError(PytubeError):
|
48 |
"""HTML could not be parsed"""
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
"""Library specific exception definitions."""
|
3 |
+
from typing import Union, Pattern
|
4 |
|
5 |
|
6 |
class PytubeError(Exception):
|
|
|
15 |
class ExtractError(PytubeError):
|
16 |
"""Data extraction based exception."""
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
class RegexMatchError(ExtractError):
|
20 |
"""Regex pattern did not return any matches."""
|
21 |
|
22 |
+
def __init__(self, caller: str, pattern: Union[str, Pattern]):
|
23 |
+
"""
|
24 |
+
:param str caller:
|
25 |
+
Calling function
|
26 |
+
:param str pattern:
|
27 |
+
Pattern that failed to match
|
28 |
+
"""
|
29 |
+
super().__init__(
|
30 |
+
"{caller}: could not find match for {pattern}".format(
|
31 |
+
caller=caller, pattern=pattern
|
32 |
+
)
|
33 |
+
)
|
34 |
+
self.caller = caller
|
35 |
+
self.pattern = pattern
|
36 |
+
|
37 |
|
38 |
class LiveStreamError(ExtractError):
|
39 |
"""Video is a live stream."""
|
|
|
42 |
class VideoUnavailable(PytubeError):
|
43 |
"""Video is unavailable."""
|
44 |
|
45 |
+
def __init__(self, video_id: str):
|
46 |
+
"""
|
47 |
+
:param str video_id:
|
48 |
+
A YouTube video identifier.
|
49 |
+
"""
|
50 |
+
super().__init__("{video_id} is unavailable".format(video_id=video_id))
|
51 |
+
|
52 |
+
self.video_id = video_id
|
53 |
+
|
54 |
|
55 |
class HTMLParseError(PytubeError):
|
56 |
"""HTML could not be parsed"""
|
pytube/extract.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
"""This module contains all non-cipher related data extraction logic."""
|
3 |
import json
|
|
|
4 |
from collections import OrderedDict
|
5 |
|
6 |
from html.parser import HTMLParser
|
@@ -170,7 +171,11 @@ def mime_type_codec(mime_type_codec: str) -> Tuple[str, List[str]]:
|
|
170 |
|
171 |
"""
|
172 |
pattern = r"(\w+\/\w+)\;\scodecs=\"([a-zA-Z-0-9.,\s]*)\""
|
173 |
-
|
|
|
|
|
|
|
|
|
174 |
return mime_type, [c.strip() for c in codecs.split(",")]
|
175 |
|
176 |
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
"""This module contains all non-cipher related data extraction logic."""
|
3 |
import json
|
4 |
+
import re
|
5 |
from collections import OrderedDict
|
6 |
|
7 |
from html.parser import HTMLParser
|
|
|
171 |
|
172 |
"""
|
173 |
pattern = r"(\w+\/\w+)\;\scodecs=\"([a-zA-Z-0-9.,\s]*)\""
|
174 |
+
regex = re.compile(pattern)
|
175 |
+
results = regex.search(mime_type_codec)
|
176 |
+
if not results:
|
177 |
+
raise RegexMatchError(caller="mime_type_codec", pattern=pattern)
|
178 |
+
mime_type, codecs = results.groups()
|
179 |
return mime_type, [c.strip() for c in codecs.split(",")]
|
180 |
|
181 |
|
pytube/helpers.py
CHANGED
@@ -7,69 +7,34 @@ import re
|
|
7 |
|
8 |
from pytube.exceptions import RegexMatchError
|
9 |
|
10 |
-
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
13 |
|
14 |
-
def regex_search(pattern, string
|
15 |
"""Shortcut method to search a string for a given pattern.
|
16 |
|
17 |
:param str pattern:
|
18 |
A regular expression pattern.
|
19 |
:param str string:
|
20 |
A target string to search.
|
21 |
-
:param bool groups:
|
22 |
-
Should the return value be ``.groups()``.
|
23 |
:param int group:
|
24 |
Index of group to return.
|
25 |
-
:param int flags:
|
26 |
-
Expression behavior modifiers.
|
27 |
:rtype:
|
28 |
str or tuple
|
29 |
:returns:
|
30 |
Substring pattern matches.
|
31 |
"""
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
pprint.pformat(
|
44 |
-
{"pattern": p, "results": results.group(0),}, indent=2,
|
45 |
-
),
|
46 |
-
)
|
47 |
-
if groups:
|
48 |
-
return results.groups()
|
49 |
-
elif group is not None:
|
50 |
-
return results.group(group)
|
51 |
-
else:
|
52 |
-
return results
|
53 |
-
else:
|
54 |
-
regex = re.compile(pattern, flags)
|
55 |
-
results = regex.search(string)
|
56 |
-
if not results:
|
57 |
-
raise RegexMatchError(
|
58 |
-
"regex pattern ({pattern}) had zero matches".format(pattern=pattern),
|
59 |
-
)
|
60 |
-
else:
|
61 |
-
logger.debug(
|
62 |
-
"finished regex search: %s",
|
63 |
-
pprint.pformat(
|
64 |
-
{"pattern": pattern, "results": results.group(0),}, indent=2,
|
65 |
-
),
|
66 |
-
)
|
67 |
-
if groups:
|
68 |
-
return results.groups()
|
69 |
-
elif group is not None:
|
70 |
-
return results.group(group)
|
71 |
-
else:
|
72 |
-
return results
|
73 |
|
74 |
|
75 |
def safe_filename(s: str, max_length: int = 255) -> str:
|
|
|
7 |
|
8 |
from pytube.exceptions import RegexMatchError
|
9 |
|
|
|
10 |
logger = logging.getLogger(__name__)
|
11 |
|
12 |
|
13 |
+
def regex_search(pattern: str, string: str, group: int) -> str:
|
14 |
"""Shortcut method to search a string for a given pattern.
|
15 |
|
16 |
:param str pattern:
|
17 |
A regular expression pattern.
|
18 |
:param str string:
|
19 |
A target string to search.
|
|
|
|
|
20 |
:param int group:
|
21 |
Index of group to return.
|
|
|
|
|
22 |
:rtype:
|
23 |
str or tuple
|
24 |
:returns:
|
25 |
Substring pattern matches.
|
26 |
"""
|
27 |
+
regex = re.compile(pattern)
|
28 |
+
results = regex.search(string)
|
29 |
+
if not results:
|
30 |
+
raise RegexMatchError(caller="regex_search", pattern=pattern)
|
31 |
+
|
32 |
+
logger.debug(
|
33 |
+
"finished regex search: %s",
|
34 |
+
pprint.pformat({"pattern": pattern, "results": results.group(0),}, indent=2,),
|
35 |
+
)
|
36 |
+
|
37 |
+
return results.group(group)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
|
40 |
def safe_filename(s: str, max_length: int = 255) -> str:
|
pytube/request.py
CHANGED
@@ -3,12 +3,8 @@
|
|
3 |
from urllib.request import Request
|
4 |
from urllib.request import urlopen
|
5 |
|
6 |
-
# 403 forbidden fix
|
7 |
|
8 |
-
|
9 |
-
def get(
|
10 |
-
url, headers=False, streaming=False, chunk_size=8 * 1024,
|
11 |
-
):
|
12 |
"""Send an http GET request.
|
13 |
|
14 |
:param str url:
|
@@ -18,7 +14,7 @@ def get(
|
|
18 |
:param bool streaming:
|
19 |
Returns the response body in chunks via a generator.
|
20 |
:param int chunk_size:
|
21 |
-
The size in bytes of each chunk.
|
22 |
"""
|
23 |
|
24 |
req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
|
|
|
3 |
from urllib.request import Request
|
4 |
from urllib.request import urlopen
|
5 |
|
|
|
6 |
|
7 |
+
def get(url, headers=False, streaming=False, chunk_size=8192):
|
|
|
|
|
|
|
8 |
"""Send an http GET request.
|
9 |
|
10 |
:param str url:
|
|
|
14 |
:param bool streaming:
|
15 |
Returns the response body in chunks via a generator.
|
16 |
:param int chunk_size:
|
17 |
+
The size in bytes of each chunk. Defaults to 8*1024
|
18 |
"""
|
19 |
|
20 |
req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
|
pytube/streams.py
CHANGED
@@ -236,21 +236,20 @@ class Stream:
|
|
236 |
prefix=safe_filename(filename_prefix), filename=filename,
|
237 |
)
|
238 |
|
239 |
-
|
240 |
-
fp = os.path.join(output_path, filename)
|
241 |
bytes_remaining = self.filesize
|
242 |
logger.debug(
|
243 |
-
"downloading (%s total bytes) file to %s", self.filesize,
|
244 |
)
|
245 |
|
246 |
-
with open(
|
247 |
for chunk in request.get(self.url, streaming=True):
|
248 |
# reduce the (bytes) remainder by the length of the chunk.
|
249 |
bytes_remaining -= len(chunk)
|
250 |
# send to the on_progress callback.
|
251 |
self.on_progress(chunk, fh, bytes_remaining)
|
252 |
self.on_complete(fh)
|
253 |
-
return
|
254 |
|
255 |
def stream_to_buffer(self) -> io.BytesIO:
|
256 |
"""Write the media stream to buffer
|
|
|
236 |
prefix=safe_filename(filename_prefix), filename=filename,
|
237 |
)
|
238 |
|
239 |
+
file_path = os.path.join(output_path, filename)
|
|
|
240 |
bytes_remaining = self.filesize
|
241 |
logger.debug(
|
242 |
+
"downloading (%s total bytes) file to %s", self.filesize, file_path,
|
243 |
)
|
244 |
|
245 |
+
with open(file_path, "wb") as fh:
|
246 |
for chunk in request.get(self.url, streaming=True):
|
247 |
# reduce the (bytes) remainder by the length of the chunk.
|
248 |
bytes_remaining -= len(chunk)
|
249 |
# send to the on_progress callback.
|
250 |
self.on_progress(chunk, fh, bytes_remaining)
|
251 |
self.on_complete(fh)
|
252 |
+
return file_path
|
253 |
|
254 |
def stream_to_buffer(self) -> io.BytesIO:
|
255 |
"""Write the media stream to buffer
|
tests/test_captions.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pytube import Caption, CaptionQuery
|
2 |
+
|
3 |
+
|
4 |
+
def test_float_to_srt_time_format():
|
5 |
+
caption1 = Caption(
|
6 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
7 |
+
)
|
8 |
+
assert caption1.float_to_srt_time_format(3.89) == "00:00:03,890"
|
9 |
+
|
10 |
+
|
11 |
+
def test_caption_query_all():
|
12 |
+
caption1 = Caption(
|
13 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
14 |
+
)
|
15 |
+
caption2 = Caption(
|
16 |
+
{"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr"}
|
17 |
+
)
|
18 |
+
caption_query = CaptionQuery(captions=[caption1, caption2])
|
19 |
+
assert caption_query.captions == [caption1, caption2]
|
20 |
+
|
21 |
+
|
22 |
+
def test_caption_query_get_by_language_code_when_exists():
|
23 |
+
caption1 = Caption(
|
24 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
25 |
+
)
|
26 |
+
caption2 = Caption(
|
27 |
+
{"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr"}
|
28 |
+
)
|
29 |
+
caption_query = CaptionQuery(captions=[caption1, caption2])
|
30 |
+
assert caption_query.get_by_language_code("en") == caption1
|
31 |
+
|
32 |
+
|
33 |
+
def test_caption_query_get_by_language_code_when_not_exists():
|
34 |
+
caption1 = Caption(
|
35 |
+
{"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
|
36 |
+
)
|
37 |
+
caption2 = Caption(
|
38 |
+
{"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr"}
|
39 |
+
)
|
40 |
+
caption_query = CaptionQuery(captions=[caption1, caption2])
|
41 |
+
assert caption_query.get_by_language_code("hello") is None
|
tests/test_cipher.py
CHANGED
@@ -8,3 +8,29 @@ from pytube.exceptions import RegexMatchError
|
|
8 |
def test_map_functions():
|
9 |
with pytest.raises(RegexMatchError):
|
10 |
cipher.map_functions("asdf")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
def test_map_functions():
|
9 |
with pytest.raises(RegexMatchError):
|
10 |
cipher.map_functions("asdf")
|
11 |
+
|
12 |
+
|
13 |
+
def test_get_initial_function_name_with_no_match_should_error():
|
14 |
+
with pytest.raises(RegexMatchError):
|
15 |
+
cipher.get_initial_function_name("asdf")
|
16 |
+
|
17 |
+
|
18 |
+
def test_get_transform_object_with_no_match_should_error():
|
19 |
+
with pytest.raises(RegexMatchError):
|
20 |
+
cipher.get_transform_object("asdf", var="lt")
|
21 |
+
|
22 |
+
|
23 |
+
def test_parse_function_with_match():
|
24 |
+
fn_name, fn_arg = cipher.parse_function("DE.AJ(a,15)")
|
25 |
+
assert fn_name == "AJ"
|
26 |
+
assert fn_arg == 15
|
27 |
+
|
28 |
+
|
29 |
+
def test_parse_function_with_no_match_should_error():
|
30 |
+
with pytest.raises(RegexMatchError):
|
31 |
+
cipher.parse_function("asdf")
|
32 |
+
|
33 |
+
|
34 |
+
def test_reverse():
|
35 |
+
reversed_array = cipher.reverse([1, 2, 3, 4], None)
|
36 |
+
assert reversed_array == [4, 3, 2, 1]
|
tests/test_exceptions.py
CHANGED
@@ -1,9 +1,17 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
-
from pytube.exceptions import
|
3 |
|
4 |
|
5 |
-
def
|
6 |
try:
|
7 |
-
raise
|
8 |
-
except
|
9 |
assert e.video_id == "YLnZklYFe7E"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
from pytube.exceptions import VideoUnavailable, RegexMatchError
|
3 |
|
4 |
|
5 |
+
def test_video_unavailable():
|
6 |
try:
|
7 |
+
raise VideoUnavailable(video_id="YLnZklYFe7E")
|
8 |
+
except VideoUnavailable as e:
|
9 |
assert e.video_id == "YLnZklYFe7E"
|
10 |
+
assert str(e) == "YLnZklYFe7E is unavailable"
|
11 |
+
|
12 |
+
|
13 |
+
def test_regex_match_error():
|
14 |
+
try:
|
15 |
+
raise RegexMatchError(caller="hello", pattern="*")
|
16 |
+
except RegexMatchError as e:
|
17 |
+
assert str(e) == "hello: could not find match for *"
|
tests/test_extract.py
CHANGED
@@ -1,5 +1,9 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
"""Unit tests for the :module:`extract <extract>` module."""
|
|
|
|
|
|
|
|
|
3 |
from pytube import extract
|
4 |
|
5 |
|
@@ -61,3 +65,19 @@ def test_get_vid_desc(cipher_signature):
|
|
61 |
"http://weibo.com/psyoppa"
|
62 |
)
|
63 |
assert extract.get_vid_descr(cipher_signature.watch_html) == expected
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
"""Unit tests for the :module:`extract <extract>` module."""
|
3 |
+
import pytest
|
4 |
+
|
5 |
+
from pytube.exceptions import RegexMatchError
|
6 |
+
|
7 |
from pytube import extract
|
8 |
|
9 |
|
|
|
65 |
"http://weibo.com/psyoppa"
|
66 |
)
|
67 |
assert extract.get_vid_descr(cipher_signature.watch_html) == expected
|
68 |
+
|
69 |
+
|
70 |
+
def test_eurl():
|
71 |
+
url = extract.eurl("videoid")
|
72 |
+
assert url == "https://youtube.googleapis.com/v/videoid"
|
73 |
+
|
74 |
+
|
75 |
+
def test_mime_type_codec():
|
76 |
+
mime_type, mime_subtype = extract.mime_type_codec('audio/webm; codecs="opus"')
|
77 |
+
assert mime_type == "audio/webm"
|
78 |
+
assert mime_subtype == ["opus"]
|
79 |
+
|
80 |
+
|
81 |
+
def test_mime_type_codec_with_no_match_should_error():
|
82 |
+
with pytest.raises(RegexMatchError):
|
83 |
+
extract.mime_type_codec("audio/webm")
|
tests/test_helpers.py
CHANGED
@@ -7,12 +7,11 @@ from pytube.exceptions import RegexMatchError
|
|
7 |
|
8 |
def test_regex_search_no_match():
|
9 |
with pytest.raises(RegexMatchError):
|
10 |
-
helpers.regex_search("^a$", "",
|
11 |
|
12 |
|
13 |
def test_regex_search():
|
14 |
-
|
15 |
-
assert helpers.regex_search("^a$", "a") is not None
|
16 |
|
17 |
|
18 |
def test_safe_filename():
|
|
|
7 |
|
8 |
def test_regex_search_no_match():
|
9 |
with pytest.raises(RegexMatchError):
|
10 |
+
helpers.regex_search("^a$", "", group=0)
|
11 |
|
12 |
|
13 |
def test_regex_search():
|
14 |
+
assert helpers.regex_search("^a$", "a", group=0) == "a"
|
|
|
15 |
|
16 |
|
17 |
def test_safe_filename():
|
tests/test_main.py
CHANGED
@@ -1,7 +1,10 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
from unittest import mock
|
3 |
|
|
|
|
|
4 |
from pytube import YouTube
|
|
|
5 |
|
6 |
|
7 |
@mock.patch("pytube.__main__.YouTube")
|
@@ -10,3 +13,24 @@ def test_prefetch_deferred(MockYouTube):
|
|
10 |
instance.prefetch_descramble.return_value = None
|
11 |
YouTube("https://www.youtube.com/watch?v=9bZkp7q19f0", True)
|
12 |
assert not instance.prefetch_descramble.called
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
from unittest import mock
|
3 |
|
4 |
+
import pytest
|
5 |
+
|
6 |
from pytube import YouTube
|
7 |
+
from pytube.exceptions import VideoUnavailable
|
8 |
|
9 |
|
10 |
@mock.patch("pytube.__main__.YouTube")
|
|
|
13 |
instance.prefetch_descramble.return_value = None
|
14 |
YouTube("https://www.youtube.com/watch?v=9bZkp7q19f0", True)
|
15 |
assert not instance.prefetch_descramble.called
|
16 |
+
|
17 |
+
|
18 |
+
@mock.patch("urllib.request.install_opener")
|
19 |
+
def test_install_proxy(opener):
|
20 |
+
proxies = {"http": "http://www.example.com:3128/"}
|
21 |
+
YouTube(
|
22 |
+
"https://www.youtube.com/watch?v=9bZkp7q19f0",
|
23 |
+
defer_prefetch_init=True,
|
24 |
+
proxies=proxies,
|
25 |
+
)
|
26 |
+
opener.assert_called()
|
27 |
+
|
28 |
+
|
29 |
+
@mock.patch("pytube.request.get")
|
30 |
+
def test_video_unavailable(get):
|
31 |
+
get.return_value = None
|
32 |
+
youtube = YouTube(
|
33 |
+
"https://www.youtube.com/watch?v=9bZkp7q19f0", defer_prefetch_init=True
|
34 |
+
)
|
35 |
+
with pytest.raises(VideoUnavailable):
|
36 |
+
youtube.prefetch()
|
tests/test_streams.py
CHANGED
@@ -3,8 +3,6 @@ import random
|
|
3 |
|
4 |
from unittest import mock
|
5 |
|
6 |
-
import pytest
|
7 |
-
|
8 |
from pytube import request
|
9 |
from pytube import Stream
|
10 |
|
@@ -107,40 +105,53 @@ def test_author(cipher_signature):
|
|
107 |
assert cipher_signature.author == expected
|
108 |
|
109 |
|
110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
def test_repr_for_audio_streams(cipher_signature):
|
112 |
stream = str(cipher_signature.streams.filter(only_audio=True).first())
|
113 |
expected = (
|
114 |
-
'<Stream: itag="140" mime_type="audio/mp4" abr="128kbps" '
|
|
|
115 |
)
|
116 |
assert stream == expected
|
117 |
|
118 |
|
119 |
-
@pytest.mark.skip
|
120 |
def test_repr_for_video_streams(cipher_signature):
|
121 |
stream = str(cipher_signature.streams.filter(only_video=True).first())
|
122 |
expected = (
|
123 |
-
'<Stream: itag="137" mime_type="video/mp4" res="1080p" '
|
124 |
-
'
|
125 |
)
|
126 |
assert stream == expected
|
127 |
|
128 |
|
129 |
-
@pytest.mark.skip
|
130 |
def test_repr_for_progressive_streams(cipher_signature):
|
131 |
stream = str(cipher_signature.streams.filter(progressive=True).first())
|
132 |
expected = (
|
133 |
-
'<Stream: itag="18" mime_type="video/mp4" res="360p" '
|
134 |
-
'
|
135 |
)
|
136 |
assert stream == expected
|
137 |
|
138 |
|
139 |
-
@pytest.mark.skip
|
140 |
def test_repr_for_adaptive_streams(cipher_signature):
|
141 |
stream = str(cipher_signature.streams.filter(adaptive=True).first())
|
142 |
expected = (
|
143 |
-
'<Stream: itag="137" mime_type="video/mp4" res="1080p" '
|
144 |
-
'
|
145 |
)
|
146 |
assert stream == expected
|
|
|
3 |
|
4 |
from unittest import mock
|
5 |
|
|
|
|
|
6 |
from pytube import request
|
7 |
from pytube import Stream
|
8 |
|
|
|
105 |
assert cipher_signature.author == expected
|
106 |
|
107 |
|
108 |
+
def test_thumbnail_when_in_details(cipher_signature):
|
109 |
+
expected = "some url"
|
110 |
+
cipher_signature.player_config_args = {
|
111 |
+
"player_response": {
|
112 |
+
"videoDetails": {"thumbnail": {"thumbnails": [{"url": expected}]}}
|
113 |
+
}
|
114 |
+
}
|
115 |
+
assert cipher_signature.thumbnail_url == expected
|
116 |
+
|
117 |
+
|
118 |
+
def test_thumbnail_when_not_in_details(cipher_signature):
|
119 |
+
expected = "https://img.youtube.com/vi/9bZkp7q19f0/maxresdefault.jpg"
|
120 |
+
cipher_signature.player_config_args = {}
|
121 |
+
assert cipher_signature.thumbnail_url == expected
|
122 |
+
|
123 |
+
|
124 |
def test_repr_for_audio_streams(cipher_signature):
|
125 |
stream = str(cipher_signature.streams.filter(only_audio=True).first())
|
126 |
expected = (
|
127 |
+
'<Stream: itag="140" mime_type="audio/mp4" abr="128kbps" '
|
128 |
+
'acodec="mp4a.40.2" progressive="False" type="audio">'
|
129 |
)
|
130 |
assert stream == expected
|
131 |
|
132 |
|
|
|
133 |
def test_repr_for_video_streams(cipher_signature):
|
134 |
stream = str(cipher_signature.streams.filter(only_video=True).first())
|
135 |
expected = (
|
136 |
+
'<Stream: itag="137" mime_type="video/mp4" res="1080p" fps="30fps" '
|
137 |
+
'vcodec="avc1.640028" progressive="False" type="video">'
|
138 |
)
|
139 |
assert stream == expected
|
140 |
|
141 |
|
|
|
142 |
def test_repr_for_progressive_streams(cipher_signature):
|
143 |
stream = str(cipher_signature.streams.filter(progressive=True).first())
|
144 |
expected = (
|
145 |
+
'<Stream: itag="18" mime_type="video/mp4" res="360p" fps="30fps" '
|
146 |
+
'vcodec="avc1.42001E" acodec="mp4a.40.2" progressive="True" type="video">'
|
147 |
)
|
148 |
assert stream == expected
|
149 |
|
150 |
|
|
|
151 |
def test_repr_for_adaptive_streams(cipher_signature):
|
152 |
stream = str(cipher_signature.streams.filter(adaptive=True).first())
|
153 |
expected = (
|
154 |
+
'<Stream: itag="137" mime_type="video/mp4" res="1080p" fps="30fps" '
|
155 |
+
'vcodec="avc1.640028" progressive="False" type="video">'
|
156 |
)
|
157 |
assert stream == expected
|