Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions PatreonDownloader.App/Models/CommandLineOptions.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using CommandLine;
using PatreonDownloader.App.Enums;
using System;
using UniversalDownloaderPlatform.Common.Enums;

namespace PatreonDownloader.App.Models
Expand Down Expand Up @@ -64,5 +65,12 @@ class CommandLineOptions

[Option("proxy-server-address", Required = false, HelpText = "The address of proxy server to use in the following format: [<proxy-scheme>://]<proxy-host>[:<proxy-port>]. Supported protocols: http(s), socks4, socks4a, socks5.")]
public string ProxyServerAddress { get; set; }

[Option("published-after", Required = false, HelpText = "Ignore post published before this date.")]
public DateTime? PublishedAfter { get; set; }

[Option("published-before", Required = false, HelpText = "Ignore post published after this date.")]
public DateTime? PublishedBefore { get; set; }

}
}
10 changes: 6 additions & 4 deletions PatreonDownloader.App/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ private static async Task RunPatreonDownloader(CommandLineOptions commandLineOpt
_universalDownloader.CrawlerMessage += UniversalDownloaderOnCrawlerMessage;
_universalDownloader.FileDownloaded += UniversalDownloaderOnFileDownloaded;

PatreonDownloaderSettings settings = await InitializeSettings(commandLineOptions);
PatreonDownloaderSettings settings = InitializeSettings(commandLineOptions);
await _universalDownloader.Download(commandLineOptions.Url, settings);

_universalDownloader.StatusChanged -= UniversalDownloaderOnStatusChanged;
Expand All @@ -137,7 +137,7 @@ private static async Task RunPatreonDownloader(CommandLineOptions commandLineOpt
_universalDownloader = null;
}

private static async Task<PatreonDownloaderSettings> InitializeSettings(CommandLineOptions commandLineOptions)
private static PatreonDownloaderSettings InitializeSettings(CommandLineOptions commandLineOptions)
{
if (!string.IsNullOrWhiteSpace(commandLineOptions.ProxyServerAddress) &&
!Uri.TryCreate(commandLineOptions.ProxyServerAddress, UriKind.Absolute, out _))
Expand All @@ -163,7 +163,9 @@ private static async Task<PatreonDownloaderSettings> InitializeSettings(CommandL
MaxFilenameLength = commandLineOptions.MaxFilenameLength,
FallbackToContentTypeFilenames = commandLineOptions.FilenamesFallbackToContentType,
ProxyServerAddress = commandLineOptions.ProxyServerAddress,
IsUseLegacyFilenaming = commandLineOptions.IsUseLegacyFilenaming
IsUseLegacyFilenaming = commandLineOptions.IsUseLegacyFilenaming,
PublishedAfter = commandLineOptions.PublishedAfter,
PublishedBefore = commandLineOptions.PublishedBefore
};

if (settings.IsUseLegacyFilenaming && (settings.FileExistsAction == FileExistsAction.BackupIfDifferent || settings.FileExistsAction == FileExistsAction.ReplaceIfDifferent))
Expand Down Expand Up @@ -192,7 +194,7 @@ private static void UniversalDownloaderOnCrawlerMessage(object sender, CrawlerMe

private static void UniversalDownloaderOnNewCrawledUrl(object sender, NewCrawledUrlEventArgs e)
{
_logger.Info($" + {((PatreonCrawledUrl) e.CrawledUrl).UrlTypeAsFriendlyString}: {e.CrawledUrl.Url}");
_logger.Info($" +{((PatreonCrawledUrl)e.CrawledUrl).UrlTypeAsFriendlyString}: {e.CrawledUrl.Url}");
}

private static void UniversalDownloaderOnPostCrawlEnd(object sender, PostCrawlEventArgs e)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
using System;
using System.Collections.Generic;
using System.Text;
using UniversalDownloaderPlatform.Common.Enums;
using UniversalDownloaderPlatform.Common.Helpers;
using UniversalDownloaderPlatform.Common.Interfaces.Models;
using UniversalDownloaderPlatform.DefaultImplementations.Models;
using UniversalDownloaderPlatform.PuppeteerEngine.Interfaces;

Expand Down Expand Up @@ -54,6 +49,8 @@ public record PatreonDownloaderSettings : UniversalDownloaderPlatformSettings, I
public string CaptchaCookieRetrievalAddress { get { return "https://www.patreon.com/home"; } }
public Uri RemoteBrowserAddress { get; init; }
public bool IsHeadlessBrowser { get; init; }
public DateTime? PublishedAfter { get; init; }
public DateTime? PublishedBefore { get; init; }

public PatreonDownloaderSettings()
{
Expand Down
4 changes: 2 additions & 2 deletions PatreonDownloader.Implementation/PatreonDefaultPlugin.cs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ public Task BeforeStart(IUniversalDownloaderPlatformSettings settings)
return Task.CompletedTask;
}

public async Task<List<string>> ExtractSupportedUrls(string htmlContents)
public Task<List<string>> ExtractSupportedUrls(string htmlContents)
{
List<string> retList = new List<string>();
HtmlDocument doc = new HtmlDocument();
Expand Down Expand Up @@ -115,7 +115,7 @@ public async Task<List<string>> ExtractSupportedUrls(string htmlContents)
}
}

return retList;
return Task.FromResult(retList);
}

private bool IsAllowedUrl(string url)
Expand Down
60 changes: 44 additions & 16 deletions PatreonDownloader.Implementation/PatreonPageCrawler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
using PatreonDownloader.Implementation.Enums;
using PatreonDownloader.Implementation.Models;
using PatreonDownloader.Implementation.Models.JSONObjects.Posts;
using UniversalDownloaderPlatform.Common.Enums;
using UniversalDownloaderPlatform.Common.Events;
using UniversalDownloaderPlatform.Common.Interfaces;
using UniversalDownloaderPlatform.Common.Interfaces.Models;
Expand All @@ -25,7 +24,7 @@ internal sealed class PatreonPageCrawler : IPageCrawler
private PatreonDownloaderSettings _patreonDownloaderSettings;

public event EventHandler<PostCrawlEventArgs> PostCrawlStart;
public event EventHandler<PostCrawlEventArgs> PostCrawlEnd;
public event EventHandler<PostCrawlEventArgs> PostCrawlEnd;
public event EventHandler<NewCrawledUrlEventArgs> NewCrawledUrl;
public event EventHandler<CrawlerMessageEventArgs> CrawlerMessage;

Expand Down Expand Up @@ -103,7 +102,7 @@ await File.WriteAllTextAsync(Path.Combine(_patreonDownloaderSettings.DownloadDir
private async Task<ParsingResult> ParsePage(string json)
{
List<PatreonCrawledUrl> crawledUrls = new List<PatreonCrawledUrl>();
List<string> skippedIncludesList = new List<string>(); //List for all included data which current account doesn't have access to
List<string> skippedIncludesList = new List<string>(); //List for all included data which current account doesn't have access to or is filtered out

Root jsonRoot = JsonConvert.DeserializeObject<Root>(json);

Expand All @@ -126,18 +125,37 @@ private async Task<ParsingResult> ParsePage(string json)
{
_logger.Warn($"[{jsonEntry.Id}] Current user cannot view this post");

string[] skippedAttachments = jsonEntry.Relationships.AttachmentsMedia?.Data.Select(x => x.Id).ToArray() ?? new string[0];
string[] skippedMedia = jsonEntry.Relationships.Images?.Data.Select(x => x.Id).ToArray() ?? new string[0];
_logger.Debug($"[{jsonEntry.Id}] Adding {skippedAttachments.Length} attachments and {skippedMedia.Length} media items to skipped list");

skippedIncludesList.AddRange(skippedAttachments);
skippedIncludesList.AddRange(skippedMedia);
AddSkippedIncludesFromPost(jsonEntry, skippedIncludesList);

OnPostCrawlEnd(new PostCrawlEventArgs(jsonEntry.Id, false, "Current user cannot view this post"));
OnCrawlerMessage(new CrawlerMessageEventArgs(CrawlerMessageType.Warning, "Current user cannot view this post", jsonEntry.Id));
continue;
}

if (_patreonDownloaderSettings.PublishedAfter != null && jsonEntry.Attributes.PublishedAt < _patreonDownloaderSettings.PublishedAfter)
{
string msg = $" -Not crawling because published at {jsonEntry.Attributes.PublishedAt}";
_logger.Info(msg);

AddSkippedIncludesFromPost(jsonEntry, skippedIncludesList);

OnPostCrawlEnd(new PostCrawlEventArgs(jsonEntry.Id, false, msg));
OnCrawlerMessage(new CrawlerMessageEventArgs(CrawlerMessageType.Info, msg, jsonEntry.Id));
continue;
}

if (_patreonDownloaderSettings.PublishedBefore != null && jsonEntry.Attributes.PublishedAt > _patreonDownloaderSettings.PublishedBefore)
{
string msg = $" -Not crawling because published at {jsonEntry.Attributes.PublishedAt}";
_logger.Info(msg);

AddSkippedIncludesFromPost(jsonEntry, skippedIncludesList);

OnPostCrawlEnd(new PostCrawlEventArgs(jsonEntry.Id, false, msg));
OnCrawlerMessage(new CrawlerMessageEventArgs(CrawlerMessageType.Info, msg, jsonEntry.Id));
continue;
}

PatreonCrawledUrl entry = new PatreonCrawledUrl
{
PostId = jsonEntry.Id,
Expand All @@ -146,13 +164,13 @@ private async Task<ParsingResult> ParsePage(string json)
};

string additionalFilesSaveDirectory = _patreonDownloaderSettings.DownloadDirectory;
if (_patreonDownloaderSettings.IsUseSubDirectories &&
(_patreonDownloaderSettings.SaveDescriptions ||
if (_patreonDownloaderSettings.IsUseSubDirectories &&
(_patreonDownloaderSettings.SaveDescriptions ||
(jsonEntry.Attributes.Embed != null && _patreonDownloaderSettings.SaveEmbeds)
)
)
{
additionalFilesSaveDirectory = Path.Combine(_patreonDownloaderSettings.DownloadDirectory,
additionalFilesSaveDirectory = Path.Combine(_patreonDownloaderSettings.DownloadDirectory,
PostSubdirectoryHelper.CreateNameFromPattern(entry, _patreonDownloaderSettings.SubDirectoryPattern, _patreonDownloaderSettings.MaxSubdirectoryNameLength));
if (!Directory.Exists(additionalFilesSaveDirectory))
Directory.CreateDirectory(additionalFilesSaveDirectory);
Expand Down Expand Up @@ -353,10 +371,10 @@ await File.WriteAllTextAsync(
_logger.Debug($"[{jsonEntry.Id}] Verification: Started");
if (jsonEntry.Type != "attachment" && jsonEntry.Type != "media")
{
if (jsonEntry.Type != "user" &&
jsonEntry.Type != "campaign" &&
jsonEntry.Type != "access-rule" &&
jsonEntry.Type != "reward" &&
if (jsonEntry.Type != "user" &&
jsonEntry.Type != "campaign" &&
jsonEntry.Type != "access-rule" &&
jsonEntry.Type != "reward" &&
jsonEntry.Type != "poll_choice" &&
jsonEntry.Type != "poll_response")
{
Expand Down Expand Up @@ -400,6 +418,16 @@ await File.WriteAllTextAsync(
return new ParsingResult {CrawledUrls = crawledUrls, NextPage = jsonRoot.Links?.Next};
}

private void AddSkippedIncludesFromPost(RootData jsonEntry, List<string> skippedIncludesList)
{
string[] skippedAttachments = jsonEntry.Relationships.AttachmentsMedia?.Data.Select(x => x.Id).ToArray() ?? Array.Empty<string>();
string[] skippedMedia = jsonEntry.Relationships.Images?.Data.Select(x => x.Id).ToArray() ?? Array.Empty<string>();
_logger.Debug($"[{jsonEntry.Id}] Adding {skippedAttachments.Length} attachments and {skippedMedia.Length} media items to skipped list");

skippedIncludesList.AddRange(skippedAttachments);
skippedIncludesList.AddRange(skippedMedia);
}

private void OnPostCrawlStart(PostCrawlEventArgs e)
{
EventHandler<PostCrawlEventArgs> handler = PostCrawlStart;
Expand Down