srcecde · rayl-21 · Oct 10, 2017 · Dec 3, 2017 · Dec 3, 2017 · Dec 3, 2017
diff --git a/.DS_Store b/.DS_Store
diff --git a/youtube_api_cmd.py → Archive/youtube_api_cmd.py b/youtube_api_cmd.py → Archive/youtube_api_cmd.py
diff --git a/README.md b/README.md
@@ -1,48 +1,2 @@
-<h3>Python YouTube API</h3>
+Code courtesy to @srcecde. I modified the code to accept search terms, maximum result and number of pages to be crawled. This code can be run on terminal and outputs a csv file. 
 
-A basic Python YouTube v3 API to fetch data from YouTube using public API-Key without OAuth
-
-It fetch comments, perform search and return videos, channels and playlist in categorized form.
-
-You are required to get the API key from Google API console in order to use this script
-
-<h3>How to use</h3>
-
-<i>Pass --c after file name for calling Video Comment function</i>
-<i>Pass --s after file name for calling Search by Keyword</i>
-<i>Pass --sc after file name for calling Search videos by YouTube ChannelId</i>
-<br><b>It is mandatory to pass any of the above argument after file name</b>
-
-<h2>Video Comments</h2>
-<ul>
-<li>python youtube_api_cmd.py --max --videourl --key </li>
-<li>--max parameter for defining the maximum result you want (maxlimit = 100, default=20)</li>
-<li>--videourl parameter for defining the youtube URL</li>
-<li>--key parameter for defining your developer API key</li>
-<li>--videourl and --key parameter is mandatory. --max parameter is optional</li>
-</ul>
-
-<h2>Search by Keyword</h2>
-<ul>
-<li>python youtube_api_cmd.py --search --max --key</li>
-<li>--max parameter for defining the maximum result you want (maxlimit = 100, default=20)</li>
-<li>--search parameter for giving the keyword</li>
-<li>--r parameter for defining region (Country) For ex. --r=IN (Parameter should be a country code)</li>
-<li>--key parameter for defining your developer API key <i><b>Mandatory</b></i></li>
-<li>It will return Videos, Channel and Playlist in the respective category</li>
-</ul>
-
-<h2>Search Videos by YouTube ChannelId</h2>
-<ul>
-<li>python youtube_api_cmd.py --channelid --max --key</li>
-<li>--max parameter for defining the maximum result you want (maxlimit = 100, default=20)</li>
-<li>--channelid parameter for defining channel id <i><b>Mandatory</b></i></li>
-<li>--key parameter for defining your developer API key <i><b>Mandatory</b></i></li>
-<li>It will list of Videos from the defined YouTube ChannelId</li>
-</ul>
-
-<h3>YouTube API v3</h3>
-<ul>
-<li><a href="https://developers.google.com/youtube/v3/">YouTube API v3 Docs</a></li>
-<li><a href="http://code.google.com/apis/console">Obtain API Key</a></li>
-</ul>
diff --git a/getVideo.py b/getVideo.py
@@ -0,0 +1,131 @@
+import argparse
+import csv
+from unidecode import unidecode
+from urllib2 import urlopen
+from urllib import urlencode
+import json
+
+DEVELOPER_KEY = ""
+YOUTUBE_SEARCH_URL = "https://www.googleapis.com/youtube/v3/search"
+YOUTUBE_VIDEO_URL = "https://www.googleapis.com/youtube/v3/videos"
+
+
+def openURL(url, parms):
+    """
+    This function returns a dataset that matches values in parms.
+    """
+    f = urlopen(url+"?"+urlencode(parms))
+    data = f.read()
+    f.close()
+    matched_data = data.decode("utf-8")
+    return matched_data
+
+def youtube_search(options):
+
+    parms = {
+        "q": options.q, # Specify the query.
+        "part": "id, snippet", 
+        "maxResults": options.max_results, 
+        "key": DEVELOPER_KEY,
+        "type": "video"
+        }
+
+    match_result = openURL(YOUTUBE_SEARCH_URL, parms)
+    search_response = json.loads(match_result)
+
+    # Get next page's token.
+    nextPageToken = search_response.get("nextPageToken")
+
+    # Begin to write the data to a csv file.
+    csvFile = open("video_result.csv", "w")
+    csvWriter = csv.writer(csvFile)
+    csvWriter.writerow(["title","videoId","viewCount","likeCount","dislikeCount","commentCount","favoriteCount"])
+
+    for search_result in search_response.get("items", []):
+        if search_result["id"]["kind"] == "youtube#video":
+            title = search_result["snippet"]["title"]
+            title = unidecode(title)
+            videoId = search_result["id"]["videoId"]
+
+            video_parms = {"id": videoId, "part": "statistics", "key": DEVELOPER_KEY}
+            video_match_result = openURL(YOUTUBE_VIDEO_URL, video_parms)
+            video_response = json.loads(video_match_result)
+
+            for video_result in video_response.get("items",[]):
+                viewCount = video_result["statistics"]["viewCount"]
+                if 'likeCount' not in video_result["statistics"]:
+                    likeCount = 0
+                else:
+                    likeCount = video_result["statistics"]["likeCount"]
+                if 'dislikeCount' not in video_result["statistics"]:
+                    dislikeCount = 0
+                else:
+                    dislikeCount = video_result["statistics"]["dislikeCount"]
+                if 'commentCount' not in video_result["statistics"]:
+                    commentCount = 0
+                else:
+                    commentCount = video_result["statistics"]["commentCount"]
+                if 'favoriteCount' not in video_result["statistics"]:
+                    favoriteCount = 0
+                else:
+                    favoriteCount = video_result["statistics"]["favoriteCount"]
+
+            csvWriter.writerow([title,videoId,viewCount,likeCount,dislikeCount,commentCount,favoriteCount])
+
+    page_count = 0
+
+    # Begin to parse next page's content.
+    while page_count <= options.page_num:
+        parms.update({"PageToken": nextPageToken})
+        match_result = openURL(YOUTUBE_SEARCH_URL, parms)
+        search_response = json.loads(match_result)
+        nextPageToken = search_response.get("nextPageToken")
+
+        for search_result in search_response.get("items", []):
+            if search_result["id"]["kind"] == "youtube#video":
+                title = search_result["snippet"]["title"]
+                title = unidecode(title)
+                videoId = search_result["id"]["videoId"]
+
+                video_parms = {"id": videoId, "part": "statistics", "key": DEVELOPER_KEY}
+                video_match_result = openURL(YOUTUBE_VIDEO_URL, video_parms)
+                video_response = json.loads(video_match_result)
+
+                for video_result in video_response.get("items",[]):
+                    viewCount = video_result["statistics"]["viewCount"]
+                    if 'likeCount' not in video_result["statistics"]:
+                        likeCount = 0
+                    else:
+                        likeCount = video_result["statistics"]["likeCount"]
+                    if 'dislikeCount' not in video_result["statistics"]:
+                        dislikeCount = 0
+                    else:
+                        dislikeCount = video_result["statistics"]["dislikeCount"]
+                    if 'commentCount' not in video_result["statistics"]:
+                        commentCount = 0
+                    else:
+                        commentCount = video_result["statistics"]["commentCount"]
+                    if 'favoriteCount' not in video_result["statistics"]:
+                        favoriteCount = 0
+                    else:
+                        favoriteCount = video_result["statistics"]["favoriteCount"]
+
+                csvWriter.writerow([title,videoId,viewCount,likeCount,dislikeCount,commentCount,favoriteCount])
+
+        page_count+=1
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description = "Search on YouTube")
+
+    # Parse the search term.
+    parser.add_argument("--q", help = "Search term", default = "Google")
+
+    # Parse maximum results.
+    parser.add_argument("--max-results", help = "Max results", default = 50)
+
+    # Parse number of pages to be crawled.
+    parser.add_argument("--page-num", help = "Number of pages to be pulled", default = 20)
+
+    args = parser.parse_args()
+    youtube_search(args)
+