Skip to content

Commit 4fd850a

Browse files
committed
blacken
1 parent e811e6c commit 4fd850a

File tree

4 files changed

+103
-92
lines changed

4 files changed

+103
-92
lines changed

docs/conf.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,21 @@
1-
21
import os
32
import sys
4-
sys.path.insert(0, os.path.abspath('../'))
5-
from scrapetube import __version__ as release
63

7-
8-
project = 'Scrapetube'
9-
copyright = '2021, Cheskel Twersky'
10-
author = 'Cheskel Twersky'
4+
sys.path.insert(0, os.path.abspath("../"))
5+
from scrapetube import __version__ as release
116

127

8+
project = "Scrapetube"
9+
copyright = "2021, Cheskel Twersky"
10+
author = "Cheskel Twersky"
1311

14-
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.coverage', 'sphinx.ext.napoleon']
1512

16-
templates_path = ['_templates']
13+
extensions = ["sphinx.ext.autodoc", "sphinx.ext.coverage", "sphinx.ext.napoleon"]
1714

15+
templates_path = ["_templates"]
1816

19-
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
2017

18+
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
2119

2220

23-
html_theme = 'sphinx_rtd_theme'
21+
html_theme = "sphinx_rtd_theme"

scrapetube/scrapetube.py

Lines changed: 66 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ def get_channel(
1111
channel_url: str = None,
1212
limit: int = None,
1313
sleep: int = 1,
14-
sort_by: Literal['newest', 'oldest', 'popular'] = 'newest'
15-
) -> Generator[dict, None, None]:
14+
sort_by: Literal["newest", "oldest", "popular"] = "newest",
15+
) -> Generator[dict, None, None]:
1616

1717
"""Get videos for a channel.
1818
@@ -36,22 +36,20 @@ def get_channel(
3636
Defaults to ``"newest"``.
3737
"""
3838

39-
sort_by_map = {
40-
'newest': 'dd',
41-
'oldest': 'da',
42-
'popular': 'p'
43-
}
44-
url = '{url}/videos?view=0&sort={sort_by}&flow=grid'.format(
45-
url= channel_url or f'https://www.youtube.com/channel/{channel_id}',
46-
sort_by= sort_by_map[sort_by]
47-
)
48-
api_endpoint = 'https://www.youtube.com/youtubei/v1/browse'
49-
videos = get_videos(url, api_endpoint, 'gridVideoRenderer', limit, sleep)
39+
sort_by_map = {"newest": "dd", "oldest": "da", "popular": "p"}
40+
url = "{url}/videos?view=0&sort={sort_by}&flow=grid".format(
41+
url=channel_url or f"https://www.youtube.com/channel/{channel_id}",
42+
sort_by=sort_by_map[sort_by],
43+
)
44+
api_endpoint = "https://www.youtube.com/youtubei/v1/browse"
45+
videos = get_videos(url, api_endpoint, "gridVideoRenderer", limit, sleep)
5046
for video in videos:
5147
yield video
5248

5349

54-
def get_playlist(playlist_id: str, limit: int = None, sleep: int = 1) -> Generator[dict, None, None]:
50+
def get_playlist(
51+
playlist_id: str, limit: int = None, sleep: int = 1
52+
) -> Generator[dict, None, None]:
5553
"""Get videos for a playlist.
5654
5755
Parameters:
@@ -63,9 +61,9 @@ def get_playlist(playlist_id: str, limit: int = None, sleep: int = 1) -> Generat
6361
Seconds to sleep between API calls to youtube, in order to prevent getting blocked. Defaults to ``1``.
6462
"""
6563

66-
url = f'https://www.youtube.com/playlist?list={playlist_id}'
67-
api_endpoint = 'https://www.youtube.com/youtubei/v1/browse'
68-
videos = get_videos(url, api_endpoint, 'playlistVideoRenderer', limit, sleep)
64+
url = f"https://www.youtube.com/playlist?list={playlist_id}"
65+
api_endpoint = "https://www.youtube.com/youtubei/v1/browse"
66+
videos = get_videos(url, api_endpoint, "playlistVideoRenderer", limit, sleep)
6967
for video in videos:
7068
yield video
7169

@@ -74,9 +72,9 @@ def get_search(
7472
query: str,
7573
limit: int = None,
7674
sleep: int = 1,
77-
sort_by: Literal['relevance', 'upload_date', 'view_count', 'rating'] = 'relevance',
78-
results_type: Literal['video', 'channel', 'playlist', 'movie'] = 'video'
79-
) -> Generator[dict, None, None]:
75+
sort_by: Literal["relevance", "upload_date", "view_count", "rating"] = "relevance",
76+
results_type: Literal["video", "channel", "playlist", "movie"] = "video",
77+
) -> Generator[dict, None, None]:
8078

8179
"""Search youtube and get videos.
8280
@@ -101,43 +99,51 @@ def get_search(
10199
"""
102100

103101
sort_by_map = {
104-
'relevance': 'A',
105-
'upload_date': 'I',
106-
'view_count': 'M',
107-
'rating': 'E'
102+
"relevance": "A",
103+
"upload_date": "I",
104+
"view_count": "M",
105+
"rating": "E",
108106
}
109107

110108
results_type_map = {
111-
'video': ['B', 'videoRenderer'],
112-
'channel': ['C', 'channelRenderer'],
113-
'playlist': ['D', 'playlistRenderer'],
114-
'movie': ['E', 'videoRenderer']
109+
"video": ["B", "videoRenderer"],
110+
"channel": ["C", "channelRenderer"],
111+
"playlist": ["D", "playlistRenderer"],
112+
"movie": ["E", "videoRenderer"],
115113
}
116114

117-
param_string = f'CA{sort_by_map[sort_by]}SAhA{results_type_map[results_type][0]}'
118-
url = f'https://www.youtube.com/results?search_query={query}&sp={param_string}'
119-
api_endpoint = 'https://www.youtube.com/youtubei/v1/search'
120-
videos = get_videos(url, api_endpoint, results_type_map[results_type][1], limit, sleep)
115+
param_string = f"CA{sort_by_map[sort_by]}SAhA{results_type_map[results_type][0]}"
116+
url = f"https://www.youtube.com/results?search_query={query}&sp={param_string}"
117+
api_endpoint = "https://www.youtube.com/youtubei/v1/search"
118+
videos = get_videos(
119+
url, api_endpoint, results_type_map[results_type][1], limit, sleep
120+
)
121121
for video in videos:
122122
yield video
123123

124124

125-
def get_videos(url: str, api_endpoint: str, selector: str, limit: int, sleep: int) -> Generator[dict, None, None]:
125+
def get_videos(
126+
url: str, api_endpoint: str, selector: str, limit: int, sleep: int
127+
) -> Generator[dict, None, None]:
126128
session = requests.Session()
127-
session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36'
129+
session.headers[
130+
"User-Agent"
131+
] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36"
128132
is_first = True
129133
quit = False
130134
count = 0
131135
while True:
132136
if is_first:
133137
html = get_initial_data(session, url)
134-
client = json.loads(get_json_from_html(
135-
html, 'INNERTUBE_CONTEXT', 2, '"}},') + '"}}')['client']
136-
api_key = get_json_from_html(html, 'innertubeApiKey', 3)
137-
session.headers['X-YouTube-Client-Name'] = '1'
138-
session.headers['X-YouTube-Client-Version'] = client['clientVersion']
139-
data = json.loads(get_json_from_html(
140-
html, 'var ytInitialData = ', 0, '};') + '}')
138+
client = json.loads(
139+
get_json_from_html(html, "INNERTUBE_CONTEXT", 2, '"}},') + '"}}'
140+
)["client"]
141+
api_key = get_json_from_html(html, "innertubeApiKey", 3)
142+
session.headers["X-YouTube-Client-Name"] = "1"
143+
session.headers["X-YouTube-Client-Version"] = client["clientVersion"]
144+
data = json.loads(
145+
get_json_from_html(html, "var ytInitialData = ", 0, "};") + "}"
146+
)
141147
next_data = get_next_data(data)
142148
is_first = False
143149
else:
@@ -161,40 +167,46 @@ def get_videos(url: str, api_endpoint: str, selector: str, limit: int, sleep: in
161167

162168
session.close()
163169

170+
164171
def get_initial_data(session: requests.Session, url: str) -> str:
165172
response = session.get(url)
166-
if 'uxe=' in response.request.url:
167-
session.cookies.set('CONSENT', 'YES+cb', domain='.youtube.com')
173+
if "uxe=" in response.request.url:
174+
session.cookies.set("CONSENT", "YES+cb", domain=".youtube.com")
168175
response = session.get(url)
169176

170177
html = response.text
171178
return html
172179

173180

174-
def get_ajax_data(session: requests.Session, api_endpoint: str, api_key: str, next_data: dict, client: dict) -> dict:
181+
def get_ajax_data(
182+
session: requests.Session,
183+
api_endpoint: str,
184+
api_key: str,
185+
next_data: dict,
186+
client: dict,
187+
) -> dict:
175188
data = {
176-
"context": {
177-
'clickTracking': next_data['click_params'],
178-
'client': client
179-
},
180-
'continuation': next_data['token']
189+
"context": {"clickTracking": next_data["click_params"], "client": client},
190+
"continuation": next_data["token"],
181191
}
182-
response = session.post(api_endpoint, params={'key': api_key}, json=data)
192+
response = session.post(api_endpoint, params={"key": api_key}, json=data)
183193
return response.json()
184194

185195

186196
def get_json_from_html(html: str, key: str, num_chars: int = 2, stop: str = '"') -> str:
187197
pos_begin = html.find(key) + len(key) + num_chars
188198
pos_end = html.find(stop, pos_begin)
189-
return html[pos_begin: pos_end]
199+
return html[pos_begin:pos_end]
190200

191201

192202
def get_next_data(data: dict) -> dict:
193-
raw_next_data = next(search_dict(data, 'continuationEndpoint'), None)
203+
raw_next_data = next(search_dict(data, "continuationEndpoint"), None)
194204
if not raw_next_data:
195205
return None
196-
next_data = {'token': raw_next_data['continuationCommand']['token'], 'click_params': {
197-
"clickTrackingParams": raw_next_data['clickTrackingParams']}}
206+
next_data = {
207+
"token": raw_next_data["continuationCommand"]["token"],
208+
"click_params": {"clickTrackingParams": raw_next_data["clickTrackingParams"]},
209+
}
198210

199211
return next_data
200212

setup.py

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,35 @@
1-
from setuptools import setup
21
import re
32

3+
from setuptools import setup
4+
45
with open("scrapetube/__init__.py", encoding="utf-8") as f:
56
version = re.findall(r"__version__ = \"(.+)\"", f.read())[0]
67

7-
with open('README.md', encoding='utf-8') as f:
8+
with open("README.md", encoding="utf-8") as f:
89
readme = f.read()
910

10-
with open('requirements.txt', encoding='utf-8') as f:
11+
with open("requirements.txt", encoding="utf-8") as f:
1112
requirements = [r.strip() for r in f]
1213

1314
setup(
14-
name = 'scrapetube',
15-
version = version,
16-
packages = ['scrapetube'],
17-
include_package_data = True,
18-
url = 'https://github.com/dermasmid/scrapetube',
19-
license = 'MIT',
20-
long_description = readme,
21-
long_description_content_type = 'text/markdown',
22-
author = 'Cheskel Twersky',
23-
author_email = 'twerskycheskel@gmail.com',
24-
description = 'Scrape youtube without the official youtube api and without selenium.',
25-
keywords = 'youtube python channel videos search playlist list get',
26-
classifiers = [
15+
name="scrapetube",
16+
version=version,
17+
packages=["scrapetube"],
18+
include_package_data=True,
19+
url="https://github.com/dermasmid/scrapetube",
20+
license="MIT",
21+
long_description=readme,
22+
long_description_content_type="text/markdown",
23+
author="Cheskel Twersky",
24+
author_email="twerskycheskel@gmail.com",
25+
description="Scrape youtube without the official youtube api and without selenium.",
26+
keywords="youtube python channel videos search playlist list get",
27+
classifiers=[
2728
"Programming Language :: Python :: 3",
2829
"License :: OSI Approved :: MIT License",
2930
"Operating System :: OS Independent",
3031
],
31-
project_urls={
32-
'Documentation': 'https://scrapetube.readthedocs.io/en/latest/'
33-
},
34-
install_requires = requirements,
35-
python_requires = '>=3.6',
32+
project_urls={"Documentation": "https://scrapetube.readthedocs.io/en/latest/"},
33+
install_requires=requirements,
34+
python_requires=">=3.6",
3635
)

tests/test.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,19 @@
22
import os
33
import platform
44

5-
if platform.system() == 'Windows':
6-
separator = '\\'
5+
if platform.system() == "Windows":
6+
separator = "\\"
77
else:
8-
separator = '/'
8+
separator = "/"
99

10-
sys.path.insert(0, '/'.join(os.path.dirname(os.path.realpath(__file__)).split(separator)[:-1]))
10+
sys.path.insert(
11+
0, "/".join(os.path.dirname(os.path.realpath(__file__)).split(separator)[:-1])
12+
)
1113

1214
import scrapetube
1315

1416

15-
videos = scrapetube.get_channel("UC9-y-6csu5WGm29I7JiwpnA", sort_by='popular')
17+
videos = scrapetube.get_channel("UC9-y-6csu5WGm29I7JiwpnA", sort_by="popular")
1618

1719
for video in videos:
18-
print(video['videoId'])
20+
print(video["videoId"])

0 commit comments

Comments
 (0)