hbmartin commited on
Commit
fd4bf4d
·
unverified ·
2 Parent(s): 6d844dd 0bbb369

Merge pull request #4 from hbmartin/regex-refactor

Browse files
pytube/__main__.py CHANGED
@@ -175,7 +175,8 @@ class YouTube:
175
  or '<img class="icon meh" src="/yts/img' # noqa: W503
176
  not in self.watch_html # noqa: W503
177
  ):
178
- raise VideoUnavailable("This video is unavailable.")
 
179
  self.embed_html = request.get(url=self.embed_url)
180
  self.age_restricted = extract.is_age_restricted(self.watch_html)
181
  self.vid_info_url = extract.video_info_url(
 
175
  or '<img class="icon meh" src="/yts/img' # noqa: W503
176
  not in self.watch_html # noqa: W503
177
  ):
178
+ raise VideoUnavailable(video_id=self.video_id)
179
+
180
  self.embed_html = request.get(url=self.embed_url)
181
  self.age_restricted = extract.is_age_restricted(self.watch_html)
182
  self.vid_info_url = extract.video_info_url(
pytube/cipher.py CHANGED
@@ -14,28 +14,27 @@ signature and decoding it.
14
 
15
  """
16
 
17
- import logging
18
- import pprint
19
  import re
20
  from itertools import chain
 
21
 
22
  from pytube.exceptions import RegexMatchError
23
- from pytube.helpers import regex_search
24
 
 
25
 
26
- logger = logging.getLogger(__name__)
27
 
28
-
29
- def get_initial_function_name(js):
30
  """Extract the name of the function responsible for computing the signature.
31
 
32
  :param str js:
33
  The contents of the base.js asset file.
34
-
 
 
35
  """
36
- # c&&d.set("signature", EE(c));
37
 
38
- pattern = [
39
  r"\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
40
  r"\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
41
  r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # noqa: E501
@@ -50,10 +49,19 @@ def get_initial_function_name(js):
50
  ]
51
 
52
  logger.debug("finding initial function name")
53
- return regex_search(pattern, js, group=1)
 
 
 
 
 
 
 
 
 
54
 
55
 
56
- def get_transform_plan(js):
57
  """Extract the "transform plan".
58
 
59
  The "transform plan" is the functions that the ciphered signature is
@@ -80,7 +88,7 @@ def get_transform_plan(js):
80
  return regex_search(pattern, js, group=1).split(";")
81
 
82
 
83
- def get_transform_object(js, var):
84
  """Extract the "transform object".
85
 
86
  The "transform object" contains the function definitions referenced in the
@@ -104,14 +112,15 @@ def get_transform_object(js, var):
104
  """
105
  pattern = r"var %s={(.*?)};" % re.escape(var)
106
  logger.debug("getting transform object")
107
- return (
108
- regex_search(pattern, js, group=1, flags=re.DOTALL)
109
- .replace("\n", " ")
110
- .split(", ")
111
- )
 
112
 
113
 
114
- def get_transform_map(js, var):
115
  """Build a transform function lookup.
116
 
117
  Build a lookup table of obfuscated JavaScript function names to the
@@ -189,7 +198,7 @@ def swap(arr, b):
189
  return list(chain([arr[r]], arr[1:r], [arr[0]], arr[r + 1 :]))
190
 
191
 
192
- def map_functions(js_func):
193
  """For a given JavaScript transform function, return the Python equivalent.
194
 
195
  :param str js_func:
@@ -213,12 +222,10 @@ def map_functions(js_func):
213
  for pattern, fn in mapper:
214
  if re.search(pattern, js_func):
215
  return fn
216
- raise RegexMatchError(
217
- "could not find python equivalent function for: ", js_func,
218
- )
219
 
220
 
221
- def parse_function(js_func):
222
  """Parse the Javascript transform function.
223
 
224
  Break a JavaScript transform function down into a two element ``tuple``
@@ -237,7 +244,13 @@ def parse_function(js_func):
237
 
238
  """
239
  logger.debug("parsing transform function")
240
- return regex_search(r"\w+\.(\w+)\(\w,(\d+)\)", js_func, groups=True)
 
 
 
 
 
 
241
 
242
 
243
  def get_signature(js: str, ciphered_signature: str) -> str:
@@ -255,24 +268,23 @@ def get_signature(js: str, ciphered_signature: str) -> str:
255
 
256
  """
257
  transform_plan = get_transform_plan(js)
258
- # DE.AJ(a,15) => DE, AJ(a,15)
259
  var, _ = transform_plan[0].split(".")
260
  transform_map = get_transform_map(js, var)
261
  signature = [s for s in ciphered_signature]
262
 
263
  for js_func in transform_plan:
264
  name, argument = parse_function(js_func)
265
- signature = transform_map[name](signature, int(argument))
266
  logger.debug(
267
- "applied transform function\n%s",
268
- pprint.pformat(
269
- {
270
- "output": "".join(signature),
271
- "js_function": name,
272
- "argument": int(argument),
273
- "function": transform_map[name],
274
- },
275
- indent=2,
276
- ),
277
  )
 
278
  return "".join(signature)
 
14
 
15
  """
16
 
 
 
17
  import re
18
  from itertools import chain
19
+ from typing import List, Tuple, Dict, Callable
20
 
21
  from pytube.exceptions import RegexMatchError
22
+ from pytube.helpers import regex_search, create_logger
23
 
24
+ logger = create_logger()
25
 
 
26
 
27
+ def get_initial_function_name(js: str) -> str:
 
28
  """Extract the name of the function responsible for computing the signature.
29
 
30
  :param str js:
31
  The contents of the base.js asset file.
32
+ :rtype: str
33
+ :returns:
34
+ Function name from regex match
35
  """
 
36
 
37
+ function_patterns = [
38
  r"\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
39
  r"\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
40
  r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # noqa: E501
 
49
  ]
50
 
51
  logger.debug("finding initial function name")
52
+ for pattern in function_patterns:
53
+ regex = re.compile(pattern)
54
+ results = regex.search(js)
55
+ if results:
56
+ logger.debug(
57
+ "finished regex search, matched: {pattern}".format(pattern=pattern)
58
+ )
59
+ return results.group(1)
60
+
61
+ raise RegexMatchError(caller="get_initial_function_name", pattern="multiple")
62
 
63
 
64
+ def get_transform_plan(js: str) -> List[str]:
65
  """Extract the "transform plan".
66
 
67
  The "transform plan" is the functions that the ciphered signature is
 
88
  return regex_search(pattern, js, group=1).split(";")
89
 
90
 
91
+ def get_transform_object(js: str, var: str) -> List[str]:
92
  """Extract the "transform object".
93
 
94
  The "transform object" contains the function definitions referenced in the
 
112
  """
113
  pattern = r"var %s={(.*?)};" % re.escape(var)
114
  logger.debug("getting transform object")
115
+ regex = re.compile(pattern, flags=re.DOTALL)
116
+ results = regex.search(js)
117
+ if not results:
118
+ raise RegexMatchError(caller="get_transform_object", pattern=pattern)
119
+
120
+ return results.group(1).replace("\n", " ").split(", ")
121
 
122
 
123
+ def get_transform_map(js: str, var: str) -> Dict:
124
  """Build a transform function lookup.
125
 
126
  Build a lookup table of obfuscated JavaScript function names to the
 
198
  return list(chain([arr[r]], arr[1:r], [arr[0]], arr[r + 1 :]))
199
 
200
 
201
+ def map_functions(js_func: str) -> Callable:
202
  """For a given JavaScript transform function, return the Python equivalent.
203
 
204
  :param str js_func:
 
222
  for pattern, fn in mapper:
223
  if re.search(pattern, js_func):
224
  return fn
225
+ raise RegexMatchError(caller="map_functions", pattern="multiple")
 
 
226
 
227
 
228
+ def parse_function(js_func: str) -> Tuple[str, int]:
229
  """Parse the Javascript transform function.
230
 
231
  Break a JavaScript transform function down into a two element ``tuple``
 
244
 
245
  """
246
  logger.debug("parsing transform function")
247
+ pattern = r"\w+\.(\w+)\(\w,(\d+)\)"
248
+ regex = re.compile(pattern)
249
+ results = regex.search(js_func)
250
+ if not results:
251
+ raise RegexMatchError(caller="parse_function", pattern=pattern)
252
+ fn_name, fn_arg = results.groups()
253
+ return fn_name, int(fn_arg)
254
 
255
 
256
  def get_signature(js: str, ciphered_signature: str) -> str:
 
268
 
269
  """
270
  transform_plan = get_transform_plan(js)
 
271
  var, _ = transform_plan[0].split(".")
272
  transform_map = get_transform_map(js, var)
273
  signature = [s for s in ciphered_signature]
274
 
275
  for js_func in transform_plan:
276
  name, argument = parse_function(js_func)
277
+ signature = transform_map[name](signature, argument)
278
  logger.debug(
279
+ "applied transform function\n"
280
+ "output: %s\n"
281
+ "js_function: %s\n"
282
+ "argument: %d\n"
283
+ "function: %s",
284
+ "".join(signature),
285
+ name,
286
+ argument,
287
+ transform_map[name],
 
288
  )
289
+
290
  return "".join(signature)
pytube/exceptions.py CHANGED
@@ -1,6 +1,6 @@
1
  # -*- coding: utf-8 -*-
2
  """Library specific exception definitions."""
3
- import sys
4
 
5
 
6
  class PytubeError(Exception):
@@ -15,26 +15,25 @@ class PytubeError(Exception):
15
  class ExtractError(PytubeError):
16
  """Data extraction based exception."""
17
 
18
- def __init__(self, msg: str, video_id: str = "unknown id"):
19
- """Construct an instance of a :class:`ExtractError <ExtractError>`.
20
-
21
- :param str msg:
22
- User defined error message.
23
- :param str video_id:
24
- A YouTube video identifier.
25
- """
26
- if video_id is not None:
27
- msg = "{video_id}: {msg}".format(video_id=video_id, msg=msg)
28
-
29
- super(ExtractError, self).__init__(msg)
30
-
31
- self.exc_info = sys.exc_info()
32
- self.video_id = video_id
33
-
34
 
35
  class RegexMatchError(ExtractError):
36
  """Regex pattern did not return any matches."""
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  class LiveStreamError(ExtractError):
40
  """Video is a live stream."""
@@ -43,6 +42,15 @@ class LiveStreamError(ExtractError):
43
  class VideoUnavailable(PytubeError):
44
  """Video is unavailable."""
45
 
 
 
 
 
 
 
 
 
 
46
 
47
  class HTMLParseError(PytubeError):
48
  """HTML could not be parsed"""
 
1
  # -*- coding: utf-8 -*-
2
  """Library specific exception definitions."""
3
+ from typing import Union, Pattern
4
 
5
 
6
  class PytubeError(Exception):
 
15
  class ExtractError(PytubeError):
16
  """Data extraction based exception."""
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  class RegexMatchError(ExtractError):
20
  """Regex pattern did not return any matches."""
21
 
22
+ def __init__(self, caller: str, pattern: Union[str, Pattern]):
23
+ """
24
+ :param str caller:
25
+ Calling function
26
+ :param str pattern:
27
+ Pattern that failed to match
28
+ """
29
+ super().__init__(
30
+ "{caller}: could not find match for {pattern}".format(
31
+ caller=caller, pattern=pattern
32
+ )
33
+ )
34
+ self.caller = caller
35
+ self.pattern = pattern
36
+
37
 
38
  class LiveStreamError(ExtractError):
39
  """Video is a live stream."""
 
42
  class VideoUnavailable(PytubeError):
43
  """Video is unavailable."""
44
 
45
+ def __init__(self, video_id: str):
46
+ """
47
+ :param str video_id:
48
+ A YouTube video identifier.
49
+ """
50
+ super().__init__("{video_id} is unavailable".format(video_id=video_id))
51
+
52
+ self.video_id = video_id
53
+
54
 
55
  class HTMLParseError(PytubeError):
56
  """HTML could not be parsed"""
pytube/extract.py CHANGED
@@ -1,6 +1,7 @@
1
  # -*- coding: utf-8 -*-
2
  """This module contains all non-cipher related data extraction logic."""
3
  import json
 
4
  from collections import OrderedDict
5
 
6
  from html.parser import HTMLParser
@@ -170,7 +171,11 @@ def mime_type_codec(mime_type_codec: str) -> Tuple[str, List[str]]:
170
 
171
  """
172
  pattern = r"(\w+\/\w+)\;\scodecs=\"([a-zA-Z-0-9.,\s]*)\""
173
- mime_type, codecs = regex_search(pattern, mime_type_codec, groups=True)
 
 
 
 
174
  return mime_type, [c.strip() for c in codecs.split(",")]
175
 
176
 
 
1
  # -*- coding: utf-8 -*-
2
  """This module contains all non-cipher related data extraction logic."""
3
  import json
4
+ import re
5
  from collections import OrderedDict
6
 
7
  from html.parser import HTMLParser
 
171
 
172
  """
173
  pattern = r"(\w+\/\w+)\;\scodecs=\"([a-zA-Z-0-9.,\s]*)\""
174
+ regex = re.compile(pattern)
175
+ results = regex.search(mime_type_codec)
176
+ if not results:
177
+ raise RegexMatchError(caller="mime_type_codec", pattern=pattern)
178
+ mime_type, codecs = results.groups()
179
  return mime_type, [c.strip() for c in codecs.split(",")]
180
 
181
 
pytube/helpers.py CHANGED
@@ -7,69 +7,34 @@ import re
7
 
8
  from pytube.exceptions import RegexMatchError
9
 
10
-
11
  logger = logging.getLogger(__name__)
12
 
13
 
14
- def regex_search(pattern, string, groups=False, group=None, flags=0):
15
  """Shortcut method to search a string for a given pattern.
16
 
17
  :param str pattern:
18
  A regular expression pattern.
19
  :param str string:
20
  A target string to search.
21
- :param bool groups:
22
- Should the return value be ``.groups()``.
23
  :param int group:
24
  Index of group to return.
25
- :param int flags:
26
- Expression behavior modifiers.
27
  :rtype:
28
  str or tuple
29
  :returns:
30
  Substring pattern matches.
31
  """
32
- if type(pattern) == list:
33
- for p in pattern:
34
- regex = re.compile(p, flags)
35
- results = regex.search(string)
36
- if not results:
37
- raise RegexMatchError(
38
- "regex pattern ({pattern}) had zero matches".format(pattern=p),
39
- )
40
- else:
41
- logger.debug(
42
- "finished regex search: %s",
43
- pprint.pformat(
44
- {"pattern": p, "results": results.group(0),}, indent=2,
45
- ),
46
- )
47
- if groups:
48
- return results.groups()
49
- elif group is not None:
50
- return results.group(group)
51
- else:
52
- return results
53
- else:
54
- regex = re.compile(pattern, flags)
55
- results = regex.search(string)
56
- if not results:
57
- raise RegexMatchError(
58
- "regex pattern ({pattern}) had zero matches".format(pattern=pattern),
59
- )
60
- else:
61
- logger.debug(
62
- "finished regex search: %s",
63
- pprint.pformat(
64
- {"pattern": pattern, "results": results.group(0),}, indent=2,
65
- ),
66
- )
67
- if groups:
68
- return results.groups()
69
- elif group is not None:
70
- return results.group(group)
71
- else:
72
- return results
73
 
74
 
75
  def safe_filename(s: str, max_length: int = 255) -> str:
 
7
 
8
  from pytube.exceptions import RegexMatchError
9
 
 
10
  logger = logging.getLogger(__name__)
11
 
12
 
13
+ def regex_search(pattern: str, string: str, group: int) -> str:
14
  """Shortcut method to search a string for a given pattern.
15
 
16
  :param str pattern:
17
  A regular expression pattern.
18
  :param str string:
19
  A target string to search.
 
 
20
  :param int group:
21
  Index of group to return.
 
 
22
  :rtype:
23
  str or tuple
24
  :returns:
25
  Substring pattern matches.
26
  """
27
+ regex = re.compile(pattern)
28
+ results = regex.search(string)
29
+ if not results:
30
+ raise RegexMatchError(caller="regex_search", pattern=pattern)
31
+
32
+ logger.debug(
33
+ "finished regex search: %s",
34
+ pprint.pformat({"pattern": pattern, "results": results.group(0),}, indent=2,),
35
+ )
36
+
37
+ return results.group(group)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
 
40
  def safe_filename(s: str, max_length: int = 255) -> str:
pytube/request.py CHANGED
@@ -3,12 +3,8 @@
3
  from urllib.request import Request
4
  from urllib.request import urlopen
5
 
6
- # 403 forbidden fix
7
 
8
-
9
- def get(
10
- url, headers=False, streaming=False, chunk_size=8 * 1024,
11
- ):
12
  """Send an http GET request.
13
 
14
  :param str url:
@@ -18,7 +14,7 @@ def get(
18
  :param bool streaming:
19
  Returns the response body in chunks via a generator.
20
  :param int chunk_size:
21
- The size in bytes of each chunk.
22
  """
23
 
24
  req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
 
3
  from urllib.request import Request
4
  from urllib.request import urlopen
5
 
 
6
 
7
+ def get(url, headers=False, streaming=False, chunk_size=8192):
 
 
 
8
  """Send an http GET request.
9
 
10
  :param str url:
 
14
  :param bool streaming:
15
  Returns the response body in chunks via a generator.
16
  :param int chunk_size:
17
+ The size in bytes of each chunk. Defaults to 8*1024
18
  """
19
 
20
  req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
pytube/streams.py CHANGED
@@ -236,21 +236,20 @@ class Stream:
236
  prefix=safe_filename(filename_prefix), filename=filename,
237
  )
238
 
239
- # file path
240
- fp = os.path.join(output_path, filename)
241
  bytes_remaining = self.filesize
242
  logger.debug(
243
- "downloading (%s total bytes) file to %s", self.filesize, fp,
244
  )
245
 
246
- with open(fp, "wb") as fh:
247
  for chunk in request.get(self.url, streaming=True):
248
  # reduce the (bytes) remainder by the length of the chunk.
249
  bytes_remaining -= len(chunk)
250
  # send to the on_progress callback.
251
  self.on_progress(chunk, fh, bytes_remaining)
252
  self.on_complete(fh)
253
- return fp
254
 
255
  def stream_to_buffer(self) -> io.BytesIO:
256
  """Write the media stream to buffer
 
236
  prefix=safe_filename(filename_prefix), filename=filename,
237
  )
238
 
239
+ file_path = os.path.join(output_path, filename)
 
240
  bytes_remaining = self.filesize
241
  logger.debug(
242
+ "downloading (%s total bytes) file to %s", self.filesize, file_path,
243
  )
244
 
245
+ with open(file_path, "wb") as fh:
246
  for chunk in request.get(self.url, streaming=True):
247
  # reduce the (bytes) remainder by the length of the chunk.
248
  bytes_remaining -= len(chunk)
249
  # send to the on_progress callback.
250
  self.on_progress(chunk, fh, bytes_remaining)
251
  self.on_complete(fh)
252
+ return file_path
253
 
254
  def stream_to_buffer(self) -> io.BytesIO:
255
  """Write the media stream to buffer
tests/test_captions.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pytube import Caption, CaptionQuery
2
+
3
+
4
+ def test_float_to_srt_time_format():
5
+ caption1 = Caption(
6
+ {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
7
+ )
8
+ assert caption1.float_to_srt_time_format(3.89) == "00:00:03,890"
9
+
10
+
11
+ def test_caption_query_all():
12
+ caption1 = Caption(
13
+ {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
14
+ )
15
+ caption2 = Caption(
16
+ {"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr"}
17
+ )
18
+ caption_query = CaptionQuery(captions=[caption1, caption2])
19
+ assert caption_query.captions == [caption1, caption2]
20
+
21
+
22
+ def test_caption_query_get_by_language_code_when_exists():
23
+ caption1 = Caption(
24
+ {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
25
+ )
26
+ caption2 = Caption(
27
+ {"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr"}
28
+ )
29
+ caption_query = CaptionQuery(captions=[caption1, caption2])
30
+ assert caption_query.get_by_language_code("en") == caption1
31
+
32
+
33
+ def test_caption_query_get_by_language_code_when_not_exists():
34
+ caption1 = Caption(
35
+ {"url": "url1", "name": {"simpleText": "name1"}, "languageCode": "en"}
36
+ )
37
+ caption2 = Caption(
38
+ {"url": "url2", "name": {"simpleText": "name2"}, "languageCode": "fr"}
39
+ )
40
+ caption_query = CaptionQuery(captions=[caption1, caption2])
41
+ assert caption_query.get_by_language_code("hello") is None
tests/test_cipher.py CHANGED
@@ -8,3 +8,29 @@ from pytube.exceptions import RegexMatchError
8
  def test_map_functions():
9
  with pytest.raises(RegexMatchError):
10
  cipher.map_functions("asdf")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def test_map_functions():
9
  with pytest.raises(RegexMatchError):
10
  cipher.map_functions("asdf")
11
+
12
+
13
+ def test_get_initial_function_name_with_no_match_should_error():
14
+ with pytest.raises(RegexMatchError):
15
+ cipher.get_initial_function_name("asdf")
16
+
17
+
18
+ def test_get_transform_object_with_no_match_should_error():
19
+ with pytest.raises(RegexMatchError):
20
+ cipher.get_transform_object("asdf", var="lt")
21
+
22
+
23
+ def test_parse_function_with_match():
24
+ fn_name, fn_arg = cipher.parse_function("DE.AJ(a,15)")
25
+ assert fn_name == "AJ"
26
+ assert fn_arg == 15
27
+
28
+
29
+ def test_parse_function_with_no_match_should_error():
30
+ with pytest.raises(RegexMatchError):
31
+ cipher.parse_function("asdf")
32
+
33
+
34
+ def test_reverse():
35
+ reversed_array = cipher.reverse([1, 2, 3, 4], None)
36
+ assert reversed_array == [4, 3, 2, 1]
tests/test_exceptions.py CHANGED
@@ -1,9 +1,17 @@
1
  # -*- coding: utf-8 -*-
2
- from pytube.exceptions import ExtractError
3
 
4
 
5
- def test_is_expected():
6
  try:
7
- raise ExtractError("ppfff", video_id="YLnZklYFe7E")
8
- except ExtractError as e:
9
  assert e.video_id == "YLnZklYFe7E"
 
 
 
 
 
 
 
 
 
1
  # -*- coding: utf-8 -*-
2
+ from pytube.exceptions import VideoUnavailable, RegexMatchError
3
 
4
 
5
+ def test_video_unavailable():
6
  try:
7
+ raise VideoUnavailable(video_id="YLnZklYFe7E")
8
+ except VideoUnavailable as e:
9
  assert e.video_id == "YLnZklYFe7E"
10
+ assert str(e) == "YLnZklYFe7E is unavailable"
11
+
12
+
13
+ def test_regex_match_error():
14
+ try:
15
+ raise RegexMatchError(caller="hello", pattern="*")
16
+ except RegexMatchError as e:
17
+ assert str(e) == "hello: could not find match for *"
tests/test_extract.py CHANGED
@@ -1,5 +1,9 @@
1
  # -*- coding: utf-8 -*-
2
  """Unit tests for the :module:`extract <extract>` module."""
 
 
 
 
3
  from pytube import extract
4
 
5
 
@@ -61,3 +65,19 @@ def test_get_vid_desc(cipher_signature):
61
  "http://weibo.com/psyoppa"
62
  )
63
  assert extract.get_vid_descr(cipher_signature.watch_html) == expected
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # -*- coding: utf-8 -*-
2
  """Unit tests for the :module:`extract <extract>` module."""
3
+ import pytest
4
+
5
+ from pytube.exceptions import RegexMatchError
6
+
7
  from pytube import extract
8
 
9
 
 
65
  "http://weibo.com/psyoppa"
66
  )
67
  assert extract.get_vid_descr(cipher_signature.watch_html) == expected
68
+
69
+
70
+ def test_eurl():
71
+ url = extract.eurl("videoid")
72
+ assert url == "https://youtube.googleapis.com/v/videoid"
73
+
74
+
75
+ def test_mime_type_codec():
76
+ mime_type, mime_subtype = extract.mime_type_codec('audio/webm; codecs="opus"')
77
+ assert mime_type == "audio/webm"
78
+ assert mime_subtype == ["opus"]
79
+
80
+
81
+ def test_mime_type_codec_with_no_match_should_error():
82
+ with pytest.raises(RegexMatchError):
83
+ extract.mime_type_codec("audio/webm")
tests/test_helpers.py CHANGED
@@ -7,12 +7,11 @@ from pytube.exceptions import RegexMatchError
7
 
8
  def test_regex_search_no_match():
9
  with pytest.raises(RegexMatchError):
10
- helpers.regex_search("^a$", "", groups=True)
11
 
12
 
13
  def test_regex_search():
14
- # TODO(nficano): should check isinstance
15
- assert helpers.regex_search("^a$", "a") is not None
16
 
17
 
18
  def test_safe_filename():
 
7
 
8
  def test_regex_search_no_match():
9
  with pytest.raises(RegexMatchError):
10
+ helpers.regex_search("^a$", "", group=0)
11
 
12
 
13
  def test_regex_search():
14
+ assert helpers.regex_search("^a$", "a", group=0) == "a"
 
15
 
16
 
17
  def test_safe_filename():
tests/test_main.py CHANGED
@@ -1,7 +1,10 @@
1
  # -*- coding: utf-8 -*-
2
  from unittest import mock
3
 
 
 
4
  from pytube import YouTube
 
5
 
6
 
7
  @mock.patch("pytube.__main__.YouTube")
@@ -10,3 +13,24 @@ def test_prefetch_deferred(MockYouTube):
10
  instance.prefetch_descramble.return_value = None
11
  YouTube("https://www.youtube.com/watch?v=9bZkp7q19f0", True)
12
  assert not instance.prefetch_descramble.called
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # -*- coding: utf-8 -*-
2
  from unittest import mock
3
 
4
+ import pytest
5
+
6
  from pytube import YouTube
7
+ from pytube.exceptions import VideoUnavailable
8
 
9
 
10
  @mock.patch("pytube.__main__.YouTube")
 
13
  instance.prefetch_descramble.return_value = None
14
  YouTube("https://www.youtube.com/watch?v=9bZkp7q19f0", True)
15
  assert not instance.prefetch_descramble.called
16
+
17
+
18
+ @mock.patch("urllib.request.install_opener")
19
+ def test_install_proxy(opener):
20
+ proxies = {"http": "http://www.example.com:3128/"}
21
+ YouTube(
22
+ "https://www.youtube.com/watch?v=9bZkp7q19f0",
23
+ defer_prefetch_init=True,
24
+ proxies=proxies,
25
+ )
26
+ opener.assert_called()
27
+
28
+
29
+ @mock.patch("pytube.request.get")
30
+ def test_video_unavailable(get):
31
+ get.return_value = None
32
+ youtube = YouTube(
33
+ "https://www.youtube.com/watch?v=9bZkp7q19f0", defer_prefetch_init=True
34
+ )
35
+ with pytest.raises(VideoUnavailable):
36
+ youtube.prefetch()
tests/test_streams.py CHANGED
@@ -3,8 +3,6 @@ import random
3
 
4
  from unittest import mock
5
 
6
- import pytest
7
-
8
  from pytube import request
9
  from pytube import Stream
10
 
@@ -107,40 +105,53 @@ def test_author(cipher_signature):
107
  assert cipher_signature.author == expected
108
 
109
 
110
- @pytest.mark.skip
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  def test_repr_for_audio_streams(cipher_signature):
112
  stream = str(cipher_signature.streams.filter(only_audio=True).first())
113
  expected = (
114
- '<Stream: itag="140" mime_type="audio/mp4" abr="128kbps" ' 'acodec="mp4a.40.2">'
 
115
  )
116
  assert stream == expected
117
 
118
 
119
- @pytest.mark.skip
120
  def test_repr_for_video_streams(cipher_signature):
121
  stream = str(cipher_signature.streams.filter(only_video=True).first())
122
  expected = (
123
- '<Stream: itag="137" mime_type="video/mp4" res="1080p" '
124
- 'fps="30fps" vcodec="avc1.640028">'
125
  )
126
  assert stream == expected
127
 
128
 
129
- @pytest.mark.skip
130
  def test_repr_for_progressive_streams(cipher_signature):
131
  stream = str(cipher_signature.streams.filter(progressive=True).first())
132
  expected = (
133
- '<Stream: itag="18" mime_type="video/mp4" res="360p" '
134
- 'fps="30fps" vcodec="avc1.42001E" acodec="mp4a.40.2">'
135
  )
136
  assert stream == expected
137
 
138
 
139
- @pytest.mark.skip
140
  def test_repr_for_adaptive_streams(cipher_signature):
141
  stream = str(cipher_signature.streams.filter(adaptive=True).first())
142
  expected = (
143
- '<Stream: itag="137" mime_type="video/mp4" res="1080p" '
144
- 'fps="30fps" vcodec="avc1.640028">'
145
  )
146
  assert stream == expected
 
3
 
4
  from unittest import mock
5
 
 
 
6
  from pytube import request
7
  from pytube import Stream
8
 
 
105
  assert cipher_signature.author == expected
106
 
107
 
108
+ def test_thumbnail_when_in_details(cipher_signature):
109
+ expected = "some url"
110
+ cipher_signature.player_config_args = {
111
+ "player_response": {
112
+ "videoDetails": {"thumbnail": {"thumbnails": [{"url": expected}]}}
113
+ }
114
+ }
115
+ assert cipher_signature.thumbnail_url == expected
116
+
117
+
118
+ def test_thumbnail_when_not_in_details(cipher_signature):
119
+ expected = "https://img.youtube.com/vi/9bZkp7q19f0/maxresdefault.jpg"
120
+ cipher_signature.player_config_args = {}
121
+ assert cipher_signature.thumbnail_url == expected
122
+
123
+
124
  def test_repr_for_audio_streams(cipher_signature):
125
  stream = str(cipher_signature.streams.filter(only_audio=True).first())
126
  expected = (
127
+ '<Stream: itag="140" mime_type="audio/mp4" abr="128kbps" '
128
+ 'acodec="mp4a.40.2" progressive="False" type="audio">'
129
  )
130
  assert stream == expected
131
 
132
 
 
133
  def test_repr_for_video_streams(cipher_signature):
134
  stream = str(cipher_signature.streams.filter(only_video=True).first())
135
  expected = (
136
+ '<Stream: itag="137" mime_type="video/mp4" res="1080p" fps="30fps" '
137
+ 'vcodec="avc1.640028" progressive="False" type="video">'
138
  )
139
  assert stream == expected
140
 
141
 
 
142
  def test_repr_for_progressive_streams(cipher_signature):
143
  stream = str(cipher_signature.streams.filter(progressive=True).first())
144
  expected = (
145
+ '<Stream: itag="18" mime_type="video/mp4" res="360p" fps="30fps" '
146
+ 'vcodec="avc1.42001E" acodec="mp4a.40.2" progressive="True" type="video">'
147
  )
148
  assert stream == expected
149
 
150
 
 
151
  def test_repr_for_adaptive_streams(cipher_signature):
152
  stream = str(cipher_signature.streams.filter(adaptive=True).first())
153
  expected = (
154
+ '<Stream: itag="137" mime_type="video/mp4" res="1080p" fps="30fps" '
155
+ 'vcodec="avc1.640028" progressive="False" type="video">'
156
  )
157
  assert stream == expected