@@ -11,8 +11,8 @@ def get_channel(
11
11
channel_url : str = None ,
12
12
limit : int = None ,
13
13
sleep : int = 1 ,
14
- sort_by : Literal [' newest' , ' oldest' , ' popular' ] = ' newest'
15
- ) -> Generator [dict , None , None ]:
14
+ sort_by : Literal [" newest" , " oldest" , " popular" ] = " newest" ,
15
+ ) -> Generator [dict , None , None ]:
16
16
17
17
"""Get videos for a channel.
18
18
@@ -36,22 +36,20 @@ def get_channel(
36
36
Defaults to ``"newest"``.
37
37
"""
38
38
39
- sort_by_map = {
40
- 'newest' : 'dd' ,
41
- 'oldest' : 'da' ,
42
- 'popular' : 'p'
43
- }
44
- url = '{url}/videos?view=0&sort={sort_by}&flow=grid' .format (
45
- url = channel_url or f'https://www.youtube.com/channel/{ channel_id } ' ,
46
- sort_by = sort_by_map [sort_by ]
47
- )
48
- api_endpoint = 'https://www.youtube.com/youtubei/v1/browse'
49
- videos = get_videos (url , api_endpoint , 'gridVideoRenderer' , limit , sleep )
39
+ sort_by_map = {"newest" : "dd" , "oldest" : "da" , "popular" : "p" }
40
+ url = "{url}/videos?view=0&sort={sort_by}&flow=grid" .format (
41
+ url = channel_url or f"https://www.youtube.com/channel/{ channel_id } " ,
42
+ sort_by = sort_by_map [sort_by ],
43
+ )
44
+ api_endpoint = "https://www.youtube.com/youtubei/v1/browse"
45
+ videos = get_videos (url , api_endpoint , "gridVideoRenderer" , limit , sleep )
50
46
for video in videos :
51
47
yield video
52
48
53
49
54
- def get_playlist (playlist_id : str , limit : int = None , sleep : int = 1 ) -> Generator [dict , None , None ]:
50
+ def get_playlist (
51
+ playlist_id : str , limit : int = None , sleep : int = 1
52
+ ) -> Generator [dict , None , None ]:
55
53
"""Get videos for a playlist.
56
54
57
55
Parameters:
@@ -63,9 +61,9 @@ def get_playlist(playlist_id: str, limit: int = None, sleep: int = 1) -> Generat
63
61
Seconds to sleep between API calls to youtube, in order to prevent getting blocked. Defaults to ``1``.
64
62
"""
65
63
66
- url = f' https://www.youtube.com/playlist?list={ playlist_id } '
67
- api_endpoint = ' https://www.youtube.com/youtubei/v1/browse'
68
- videos = get_videos (url , api_endpoint , ' playlistVideoRenderer' , limit , sleep )
64
+ url = f" https://www.youtube.com/playlist?list={ playlist_id } "
65
+ api_endpoint = " https://www.youtube.com/youtubei/v1/browse"
66
+ videos = get_videos (url , api_endpoint , " playlistVideoRenderer" , limit , sleep )
69
67
for video in videos :
70
68
yield video
71
69
@@ -74,9 +72,9 @@ def get_search(
74
72
query : str ,
75
73
limit : int = None ,
76
74
sleep : int = 1 ,
77
- sort_by : Literal [' relevance' , ' upload_date' , ' view_count' , ' rating' ] = ' relevance' ,
78
- results_type : Literal [' video' , ' channel' , ' playlist' , ' movie' ] = ' video'
79
- ) -> Generator [dict , None , None ]:
75
+ sort_by : Literal [" relevance" , " upload_date" , " view_count" , " rating" ] = " relevance" ,
76
+ results_type : Literal [" video" , " channel" , " playlist" , " movie" ] = " video" ,
77
+ ) -> Generator [dict , None , None ]:
80
78
81
79
"""Search youtube and get videos.
82
80
@@ -101,43 +99,51 @@ def get_search(
101
99
"""
102
100
103
101
sort_by_map = {
104
- ' relevance' : 'A' ,
105
- ' upload_date' : 'I' ,
106
- ' view_count' : 'M' ,
107
- ' rating' : 'E'
102
+ " relevance" : "A" ,
103
+ " upload_date" : "I" ,
104
+ " view_count" : "M" ,
105
+ " rating" : "E" ,
108
106
}
109
107
110
108
results_type_map = {
111
- ' video' : ['B' , ' videoRenderer' ],
112
- ' channel' : ['C' , ' channelRenderer' ],
113
- ' playlist' : ['D' , ' playlistRenderer' ],
114
- ' movie' : ['E' , ' videoRenderer' ]
109
+ " video" : ["B" , " videoRenderer" ],
110
+ " channel" : ["C" , " channelRenderer" ],
111
+ " playlist" : ["D" , " playlistRenderer" ],
112
+ " movie" : ["E" , " videoRenderer" ],
115
113
}
116
114
117
- param_string = f'CA{ sort_by_map [sort_by ]} SAhA{ results_type_map [results_type ][0 ]} '
118
- url = f'https://www.youtube.com/results?search_query={ query } &sp={ param_string } '
119
- api_endpoint = 'https://www.youtube.com/youtubei/v1/search'
120
- videos = get_videos (url , api_endpoint , results_type_map [results_type ][1 ], limit , sleep )
115
+ param_string = f"CA{ sort_by_map [sort_by ]} SAhA{ results_type_map [results_type ][0 ]} "
116
+ url = f"https://www.youtube.com/results?search_query={ query } &sp={ param_string } "
117
+ api_endpoint = "https://www.youtube.com/youtubei/v1/search"
118
+ videos = get_videos (
119
+ url , api_endpoint , results_type_map [results_type ][1 ], limit , sleep
120
+ )
121
121
for video in videos :
122
122
yield video
123
123
124
124
125
- def get_videos (url : str , api_endpoint : str , selector : str , limit : int , sleep : int ) -> Generator [dict , None , None ]:
125
+ def get_videos (
126
+ url : str , api_endpoint : str , selector : str , limit : int , sleep : int
127
+ ) -> Generator [dict , None , None ]:
126
128
session = requests .Session ()
127
- session .headers ['User-Agent' ] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36'
129
+ session .headers [
130
+ "User-Agent"
131
+ ] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36"
128
132
is_first = True
129
133
quit = False
130
134
count = 0
131
135
while True :
132
136
if is_first :
133
137
html = get_initial_data (session , url )
134
- client = json .loads (get_json_from_html (
135
- html , 'INNERTUBE_CONTEXT' , 2 , '"}},' ) + '"}}' )['client' ]
136
- api_key = get_json_from_html (html , 'innertubeApiKey' , 3 )
137
- session .headers ['X-YouTube-Client-Name' ] = '1'
138
- session .headers ['X-YouTube-Client-Version' ] = client ['clientVersion' ]
139
- data = json .loads (get_json_from_html (
140
- html , 'var ytInitialData = ' , 0 , '};' ) + '}' )
138
+ client = json .loads (
139
+ get_json_from_html (html , "INNERTUBE_CONTEXT" , 2 , '"}},' ) + '"}}'
140
+ )["client" ]
141
+ api_key = get_json_from_html (html , "innertubeApiKey" , 3 )
142
+ session .headers ["X-YouTube-Client-Name" ] = "1"
143
+ session .headers ["X-YouTube-Client-Version" ] = client ["clientVersion" ]
144
+ data = json .loads (
145
+ get_json_from_html (html , "var ytInitialData = " , 0 , "};" ) + "}"
146
+ )
141
147
next_data = get_next_data (data )
142
148
is_first = False
143
149
else :
@@ -161,40 +167,46 @@ def get_videos(url: str, api_endpoint: str, selector: str, limit: int, sleep: in
161
167
162
168
session .close ()
163
169
170
+
164
171
def get_initial_data (session : requests .Session , url : str ) -> str :
165
172
response = session .get (url )
166
- if ' uxe=' in response .request .url :
167
- session .cookies .set (' CONSENT' , ' YES+cb' , domain = ' .youtube.com' )
173
+ if " uxe=" in response .request .url :
174
+ session .cookies .set (" CONSENT" , " YES+cb" , domain = " .youtube.com" )
168
175
response = session .get (url )
169
176
170
177
html = response .text
171
178
return html
172
179
173
180
174
- def get_ajax_data (session : requests .Session , api_endpoint : str , api_key : str , next_data : dict , client : dict ) -> dict :
181
+ def get_ajax_data (
182
+ session : requests .Session ,
183
+ api_endpoint : str ,
184
+ api_key : str ,
185
+ next_data : dict ,
186
+ client : dict ,
187
+ ) -> dict :
175
188
data = {
176
- "context" : {
177
- 'clickTracking' : next_data ['click_params' ],
178
- 'client' : client
179
- },
180
- 'continuation' : next_data ['token' ]
189
+ "context" : {"clickTracking" : next_data ["click_params" ], "client" : client },
190
+ "continuation" : next_data ["token" ],
181
191
}
182
- response = session .post (api_endpoint , params = {' key' : api_key }, json = data )
192
+ response = session .post (api_endpoint , params = {" key" : api_key }, json = data )
183
193
return response .json ()
184
194
185
195
186
196
def get_json_from_html (html : str , key : str , num_chars : int = 2 , stop : str = '"' ) -> str :
187
197
pos_begin = html .find (key ) + len (key ) + num_chars
188
198
pos_end = html .find (stop , pos_begin )
189
- return html [pos_begin : pos_end ]
199
+ return html [pos_begin :pos_end ]
190
200
191
201
192
202
def get_next_data (data : dict ) -> dict :
193
- raw_next_data = next (search_dict (data , ' continuationEndpoint' ), None )
203
+ raw_next_data = next (search_dict (data , " continuationEndpoint" ), None )
194
204
if not raw_next_data :
195
205
return None
196
- next_data = {'token' : raw_next_data ['continuationCommand' ]['token' ], 'click_params' : {
197
- "clickTrackingParams" : raw_next_data ['clickTrackingParams' ]}}
206
+ next_data = {
207
+ "token" : raw_next_data ["continuationCommand" ]["token" ],
208
+ "click_params" : {"clickTrackingParams" : raw_next_data ["clickTrackingParams" ]},
209
+ }
198
210
199
211
return next_data
200
212
0 commit comments