Skip to content

Commit 11f0900

Browse files
Merge pull request #10 from gabriel-batistuta/dir_to_html
2 parents e3cc693 + 1592700 commit 11f0900

File tree

2 files changed

+25
-9
lines changed

2 files changed

+25
-9
lines changed

pypdf2htmlEX/core.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import subprocess
22
import os
33
from os.path import basename
4+
from typing import Union
45

56
class PDF():
67
def __init__(self, pdf_file_path:str, drm=None):
@@ -26,40 +27,55 @@ def add_extra_options(args, options):
2627
if self.drm is True:
2728
options = ["pdf2htmlEX", f"{self.file_path}", "--no-drm", "1"]
2829
options = add_extra_options(args, options)
30+
print(options)
31+
for i in options:
32+
print(i, end=" ")
2933
subprocess.call(options)
3034
else:
3135
options = ["pdf2htmlEX", f"{self.file_path}"]
3236
options = add_extra_options(args, options)
3337
subprocess.call(options)
3438

35-
def to_html(self, file_path=None):
39+
def to_html(self, dest_dir=None, pdf_filename=None, new_file_name=None):
3640
'''
3741
Converts the PDF file to HTML.
3842
3943
Parameters:
40-
file_path (str, optional): The path to save the converted HTML file. Defaults to None will save in the same directory that pdf_file_path of PDF class.
44+
dest_dir (str, optional): The path to save the converted HTML file. Defaults to None will save in the same directory that pdf_file_path of PDF class.
4145
'''
4246

43-
if file_path:
44-
args = ["--dest-dir", file_path]
47+
if dest_dir:
48+
args = ["--dest-dir", dest_dir]
4549
self.__make_html(args)
4650
else:
4751
self.__make_html()
52+
if pdf_filename and new_file_name:
53+
try:
54+
pdf_filename = pdf_filename.replace(".pdf", ".html")
55+
os.rename(f"{dest_dir}/{pdf_filename}", f"{dest_dir}/{new_file_name}")
56+
except Exception as e:
57+
print(f"Error renaming {dest_dir}/{pdf_filename}: {e}")
4858

49-
def dir_to_html(dir_path, dest_dir=None):
59+
def dir_to_html(dir_path, dest_dir=None, new_file_name=None):
5060
'''
5161
Converts all PDF files in a directory to HTML.
5262
5363
Parameters:
54-
dir_path (str): The path to the directory containing PDF files.
5564
dest_dir (str, optional): The path to the destination directory to save the HTML files. Defaults to None will save in same dir.
65+
new_file_name (str, optional): The new name for the HTML files. Generates sequential for pdfs, ex: pdf0, pdf1, pdf2. Defaults to None will maintly name them as is replacing .pdf for .html
5666
'''
5767

5868
pdf_files = [f"{dir_path}/{file}" for file in os.listdir(dir_path) if file.endswith(".pdf")]
5969

60-
for pdf_file in pdf_files:
70+
for i, pdf_file in enumerate(pdf_files):
6171
pdf = PDF(pdf_file, drm=True)
6272
if dest_dir:
63-
pdf.to_html(file_path=f'{dest_dir}/{basename(pdf_file).replace(".pdf", ".html")}')
73+
if new_file_name:
74+
if '.html' not in new_file_name:
75+
pdf.to_html(dest_dir=dest_dir, pdf_filename=basename(pdf_file), new_file_name=f"{new_file_name}_{i+1}.html")
76+
else:
77+
pdf.to_html(dest_dir=dest_dir, pdf_filename=basename(pdf_file), new_file_name=f'{new_file_name.replace(".html","")}_{i+1}.html')
78+
else:
79+
pdf.to_html(dest_dir=dest_dir)
6480
else:
6581
pdf.to_html()

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
readme = file.read()
55

66
setup(name='pypdf2htmlex',
7-
version='1.6',
7+
version='1.8',
88
license='MIT License',
99
author='Gabriel Batistuta',
1010
long_description=readme,

0 commit comments

Comments
 (0)