From ccaca4de0cdb735babbaedacc21f97a3f936c7ce Mon Sep 17 00:00:00 2001 From: RLee12 Date: Mon, 9 Oct 2017 20:23:10 -0700 Subject: [PATCH 1/5] Add files via upload --- getVideo.py | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 getVideo.py diff --git a/getVideo.py b/getVideo.py new file mode 100644 index 0000000..86d4e60 --- /dev/null +++ b/getVideo.py @@ -0,0 +1,131 @@ +import argparse +import csv +from unidecode import unidecode +from urllib2 import urlopen +from urllib import urlencode +import json + +DEVELOPER_KEY = "AIzaSyCfkJCXWirc5Wk8dV1sb3Rjv7eoZKdDNnU" +YOUTUBE_SEARCH_URL = "https://www.googleapis.com/youtube/v3/search" +YOUTUBE_VIDEO_URL = "https://www.googleapis.com/youtube/v3/videos" + + +def openURL(url, parms): + """ + This function returns a dataset that matches values in parms. + """ + f = urlopen(url+"?"+urlencode(parms)) + data = f.read() + f.close() + matched_data = data.decode("utf-8") + return matched_data + +def youtube_search(options): + + parms = { + "q": options.q, # Specify the query. + "part": "id, snippet", + "maxResults": options.max_results, + "key": DEVELOPER_KEY, + "type": "video" + } + + match_result = openURL(YOUTUBE_SEARCH_URL, parms) + search_response = json.loads(match_result) + + # Get next page's token. + nextPageToken = search_response.get("nextPageToken") + + # Begin to write the data to a csv file. + csvFile = open("video_result.csv", "w") + csvWriter = csv.writer(csvFile) + csvWriter.writerow(["title","videoId","viewCount","likeCount","dislikeCount","commentCount","favoriteCount"]) + + for search_result in search_response.get("items", []): + if search_result["id"]["kind"] == "youtube#video": + title = search_result["snippet"]["title"] + title = unidecode(title) + videoId = search_result["id"]["videoId"] + + video_parms = {"id": videoId, "part": "statistics", "key": DEVELOPER_KEY} + video_match_result = openURL(YOUTUBE_VIDEO_URL, video_parms) + video_response = json.loads(video_match_result) + + for video_result in video_response.get("items",[]): + viewCount = video_result["statistics"]["viewCount"] + if 'likeCount' not in video_result["statistics"]: + likeCount = 0 + else: + likeCount = video_result["statistics"]["likeCount"] + if 'dislikeCount' not in video_result["statistics"]: + dislikeCount = 0 + else: + dislikeCount = video_result["statistics"]["dislikeCount"] + if 'commentCount' not in video_result["statistics"]: + commentCount = 0 + else: + commentCount = video_result["statistics"]["commentCount"] + if 'favoriteCount' not in video_result["statistics"]: + favoriteCount = 0 + else: + favoriteCount = video_result["statistics"]["favoriteCount"] + + csvWriter.writerow([title,videoId,viewCount,likeCount,dislikeCount,commentCount,favoriteCount]) + + page_count = 0 + + # Begin to parse next page's content. + while page_count <= options.page_num: + parms.update({"PageToken": nextPageToken}) + match_result = openURL(YOUTUBE_SEARCH_URL, parms) + search_response = json.loads(match_result) + nextPageToken = search_response.get("nextPageToken") + + for search_result in search_response.get("items", []): + if search_result["id"]["kind"] == "youtube#video": + title = search_result["snippet"]["title"] + title = unidecode(title) + videoId = search_result["id"]["videoId"] + + video_parms = {"id": videoId, "part": "statistics", "key": DEVELOPER_KEY} + video_match_result = openURL(YOUTUBE_VIDEO_URL, video_parms) + video_response = json.loads(video_match_result) + + for video_result in video_response.get("items",[]): + viewCount = video_result["statistics"]["viewCount"] + if 'likeCount' not in video_result["statistics"]: + likeCount = 0 + else: + likeCount = video_result["statistics"]["likeCount"] + if 'dislikeCount' not in video_result["statistics"]: + dislikeCount = 0 + else: + dislikeCount = video_result["statistics"]["dislikeCount"] + if 'commentCount' not in video_result["statistics"]: + commentCount = 0 + else: + commentCount = video_result["statistics"]["commentCount"] + if 'favoriteCount' not in video_result["statistics"]: + favoriteCount = 0 + else: + favoriteCount = video_result["statistics"]["favoriteCount"] + + csvWriter.writerow([title,videoId,viewCount,likeCount,dislikeCount,commentCount,favoriteCount]) + + page_count+=1 + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description = "Search on YouTube") + + # Parse the search term. + parser.add_argument("--q", help = "Search term", default = "Google") + + # Parse maximum results. + parser.add_argument("--max-results", help = "Max results", default = 50) + + # Parse number of pages to be crawled. + parser.add_argument("--page-num", help = "Number of pages to be pulled", default = 20) + + args = parser.parse_args() + youtube_search(args) + From 8f296465f9fc1681e8902fd12ea884c133a206d7 Mon Sep 17 00:00:00 2001 From: RLee12 Date: Sun, 3 Dec 2017 02:45:50 -0800 Subject: [PATCH 2/5] Update README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 2d92b6b..b150f22 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ +This repo is forked from https://github.com/srcecde/python-youtube-api. Code courtesy to @srcecde. I modified to suit my need. + +
+

Python YouTube API

A basic Python YouTube v3 API to fetch data from YouTube using public API-Key without OAuth From 976303fd755c3165db70191ffa6bba6d033295a8 Mon Sep 17 00:00:00 2001 From: RayLee Date: Sun, 3 Dec 2017 03:49:19 -0800 Subject: [PATCH 3/5] Update README.md --- README.md | 52 +--------------------------------------------------- 1 file changed, 1 insertion(+), 51 deletions(-) diff --git a/README.md b/README.md index b150f22..bb143fb 100644 --- a/README.md +++ b/README.md @@ -1,52 +1,2 @@ -This repo is forked from https://github.com/srcecde/python-youtube-api. Code courtesy to @srcecde. I modified to suit my need. +Code courtesy to @srcecde. I modified the code to accept search terms, maximum result and number of pages to be crawled. This code can be run on terminal and outputs a csv file. -
- -

Python YouTube API

- -A basic Python YouTube v3 API to fetch data from YouTube using public API-Key without OAuth - -It fetch comments, perform search and return videos, channels and playlist in categorized form. - -You are required to get the API key from Google API console in order to use this script - -

How to use

- -Pass --c after file name for calling Video Comment function -Pass --s after file name for calling Search by Keyword -Pass --sc after file name for calling Search videos by YouTube ChannelId -
It is mandatory to pass any of the above argument after file name - -

Video Comments

-
    -
  • python youtube_api_cmd.py --max --videourl --key
  • -
  • --max parameter for defining the maximum result you want (maxlimit = 100, default=20)
  • -
  • --videourl parameter for defining the youtube URL
  • -
  • --key parameter for defining your developer API key
  • -
  • --videourl and --key parameter is mandatory. --max parameter is optional
  • -
- -

Search by Keyword

-
    -
  • python youtube_api_cmd.py --search --max --key
  • -
  • --max parameter for defining the maximum result you want (maxlimit = 100, default=20)
  • -
  • --search parameter for giving the keyword
  • -
  • --r parameter for defining region (Country) For ex. --r=IN (Parameter should be a country code)
  • -
  • --key parameter for defining your developer API key Mandatory
  • -
  • It will return Videos, Channel and Playlist in the respective category
  • -
- -

Search Videos by YouTube ChannelId

-
    -
  • python youtube_api_cmd.py --channelid --max --key
  • -
  • --max parameter for defining the maximum result you want (maxlimit = 100, default=20)
  • -
  • --channelid parameter for defining channel id Mandatory
  • -
  • --key parameter for defining your developer API key Mandatory
  • -
  • It will list of Videos from the defined YouTube ChannelId
  • -
- -

YouTube API v3

- From ad43a0c6c338c9de3d01ea5375e90fb5ecea8562 Mon Sep 17 00:00:00 2001 From: RayL Date: Sun, 3 Dec 2017 03:50:22 -0800 Subject: [PATCH 4/5] Created archive folder --- .DS_Store | Bin 0 -> 6148 bytes youtube_api_cmd.py => Archive/youtube_api_cmd.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 .DS_Store rename youtube_api_cmd.py => Archive/youtube_api_cmd.py (100%) diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..7c4f227747dc9076c92e2d6b5eedc71bc636bb5e GIT binary patch literal 6148 zcmeH~&q@O^5XNV;V8Kg|9`g!)gILx>&pmk)cEyU8mSR2UCHiVUkKdOWW!ElvQbZ<@ zeEBnzWPh+pMnrUR=;k6b5t$-Wm6egi?$Mz$H=aPX&haR(a{v4=U$xtjiT+|2_kJPQ zvX@rY(%b)L*W2zFi_Nxay2DE=^0qua-4f1;MfZ)xZcR#qReB3XH3$TOAP@wCz>g8Y zoy|18G>tL{1c4y%Nr2}=LZ)WdVp@+56g32Z@)=zd>Wn4mlU%cFF~taynk&>?)k6$5 zcl0N>%dW-L+(kY3P+$2Yc+t8#=1oN~$GbKm-yt*6uCRiH`rr|O2G5bJI%wmGb=YOsV#- Date: Tue, 5 Dec 2017 00:00:39 -0800 Subject: [PATCH 5/5] Update getVideo.py --- getVideo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/getVideo.py b/getVideo.py index 86d4e60..b74e21c 100644 --- a/getVideo.py +++ b/getVideo.py @@ -5,7 +5,7 @@ from urllib import urlencode import json -DEVELOPER_KEY = "AIzaSyCfkJCXWirc5Wk8dV1sb3Rjv7eoZKdDNnU" +DEVELOPER_KEY = "" YOUTUBE_SEARCH_URL = "https://www.googleapis.com/youtube/v3/search" YOUTUBE_VIDEO_URL = "https://www.googleapis.com/youtube/v3/videos"