diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..7c4f227 Binary files /dev/null and b/.DS_Store differ diff --git a/youtube_api_cmd.py b/Archive/youtube_api_cmd.py similarity index 100% rename from youtube_api_cmd.py rename to Archive/youtube_api_cmd.py diff --git a/README.md b/README.md index 2d92b6b..bb143fb 100644 --- a/README.md +++ b/README.md @@ -1,48 +1,2 @@ -

Python YouTube API

+Code courtesy to @srcecde. I modified the code to accept search terms, maximum result and number of pages to be crawled. This code can be run on terminal and outputs a csv file. -A basic Python YouTube v3 API to fetch data from YouTube using public API-Key without OAuth - -It fetch comments, perform search and return videos, channels and playlist in categorized form. - -You are required to get the API key from Google API console in order to use this script - -

How to use

- -Pass --c after file name for calling Video Comment function -Pass --s after file name for calling Search by Keyword -Pass --sc after file name for calling Search videos by YouTube ChannelId -
It is mandatory to pass any of the above argument after file name - -

Video Comments

- - -

Search by Keyword

- - -

Search Videos by YouTube ChannelId

- - -

YouTube API v3

- diff --git a/getVideo.py b/getVideo.py new file mode 100644 index 0000000..b74e21c --- /dev/null +++ b/getVideo.py @@ -0,0 +1,131 @@ +import argparse +import csv +from unidecode import unidecode +from urllib2 import urlopen +from urllib import urlencode +import json + +DEVELOPER_KEY = "" +YOUTUBE_SEARCH_URL = "https://www.googleapis.com/youtube/v3/search" +YOUTUBE_VIDEO_URL = "https://www.googleapis.com/youtube/v3/videos" + + +def openURL(url, parms): + """ + This function returns a dataset that matches values in parms. + """ + f = urlopen(url+"?"+urlencode(parms)) + data = f.read() + f.close() + matched_data = data.decode("utf-8") + return matched_data + +def youtube_search(options): + + parms = { + "q": options.q, # Specify the query. + "part": "id, snippet", + "maxResults": options.max_results, + "key": DEVELOPER_KEY, + "type": "video" + } + + match_result = openURL(YOUTUBE_SEARCH_URL, parms) + search_response = json.loads(match_result) + + # Get next page's token. + nextPageToken = search_response.get("nextPageToken") + + # Begin to write the data to a csv file. + csvFile = open("video_result.csv", "w") + csvWriter = csv.writer(csvFile) + csvWriter.writerow(["title","videoId","viewCount","likeCount","dislikeCount","commentCount","favoriteCount"]) + + for search_result in search_response.get("items", []): + if search_result["id"]["kind"] == "youtube#video": + title = search_result["snippet"]["title"] + title = unidecode(title) + videoId = search_result["id"]["videoId"] + + video_parms = {"id": videoId, "part": "statistics", "key": DEVELOPER_KEY} + video_match_result = openURL(YOUTUBE_VIDEO_URL, video_parms) + video_response = json.loads(video_match_result) + + for video_result in video_response.get("items",[]): + viewCount = video_result["statistics"]["viewCount"] + if 'likeCount' not in video_result["statistics"]: + likeCount = 0 + else: + likeCount = video_result["statistics"]["likeCount"] + if 'dislikeCount' not in video_result["statistics"]: + dislikeCount = 0 + else: + dislikeCount = video_result["statistics"]["dislikeCount"] + if 'commentCount' not in video_result["statistics"]: + commentCount = 0 + else: + commentCount = video_result["statistics"]["commentCount"] + if 'favoriteCount' not in video_result["statistics"]: + favoriteCount = 0 + else: + favoriteCount = video_result["statistics"]["favoriteCount"] + + csvWriter.writerow([title,videoId,viewCount,likeCount,dislikeCount,commentCount,favoriteCount]) + + page_count = 0 + + # Begin to parse next page's content. + while page_count <= options.page_num: + parms.update({"PageToken": nextPageToken}) + match_result = openURL(YOUTUBE_SEARCH_URL, parms) + search_response = json.loads(match_result) + nextPageToken = search_response.get("nextPageToken") + + for search_result in search_response.get("items", []): + if search_result["id"]["kind"] == "youtube#video": + title = search_result["snippet"]["title"] + title = unidecode(title) + videoId = search_result["id"]["videoId"] + + video_parms = {"id": videoId, "part": "statistics", "key": DEVELOPER_KEY} + video_match_result = openURL(YOUTUBE_VIDEO_URL, video_parms) + video_response = json.loads(video_match_result) + + for video_result in video_response.get("items",[]): + viewCount = video_result["statistics"]["viewCount"] + if 'likeCount' not in video_result["statistics"]: + likeCount = 0 + else: + likeCount = video_result["statistics"]["likeCount"] + if 'dislikeCount' not in video_result["statistics"]: + dislikeCount = 0 + else: + dislikeCount = video_result["statistics"]["dislikeCount"] + if 'commentCount' not in video_result["statistics"]: + commentCount = 0 + else: + commentCount = video_result["statistics"]["commentCount"] + if 'favoriteCount' not in video_result["statistics"]: + favoriteCount = 0 + else: + favoriteCount = video_result["statistics"]["favoriteCount"] + + csvWriter.writerow([title,videoId,viewCount,likeCount,dislikeCount,commentCount,favoriteCount]) + + page_count+=1 + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description = "Search on YouTube") + + # Parse the search term. + parser.add_argument("--q", help = "Search term", default = "Google") + + # Parse maximum results. + parser.add_argument("--max-results", help = "Max results", default = 50) + + # Parse number of pages to be crawled. + parser.add_argument("--page-num", help = "Number of pages to be pulled", default = 20) + + args = parser.parse_args() + youtube_search(args) +