11import subprocess
22import os
33from os .path import basename
4+ from typing import Union
45
56class PDF ():
67 def __init__ (self , pdf_file_path :str , drm = None ):
@@ -26,40 +27,55 @@ def add_extra_options(args, options):
2627 if self .drm is True :
2728 options = ["pdf2htmlEX" , f"{ self .file_path } " , "--no-drm" , "1" ]
2829 options = add_extra_options (args , options )
30+ print (options )
31+ for i in options :
32+ print (i , end = " " )
2933 subprocess .call (options )
3034 else :
3135 options = ["pdf2htmlEX" , f"{ self .file_path } " ]
3236 options = add_extra_options (args , options )
3337 subprocess .call (options )
3438
35- def to_html (self , file_path = None ):
39+ def to_html (self , dest_dir = None , pdf_filename = None , new_file_name = None ):
3640 '''
3741 Converts the PDF file to HTML.
3842
3943 Parameters:
40- file_path (str, optional): The path to save the converted HTML file. Defaults to None will save in the same directory that pdf_file_path of PDF class.
44+ dest_dir (str, optional): The path to save the converted HTML file. Defaults to None will save in the same directory that pdf_file_path of PDF class.
4145 '''
4246
43- if file_path :
44- args = ["--dest-dir" , file_path ]
47+ if dest_dir :
48+ args = ["--dest-dir" , dest_dir ]
4549 self .__make_html (args )
4650 else :
4751 self .__make_html ()
52+ if pdf_filename and new_file_name :
53+ try :
54+ pdf_filename = pdf_filename .replace (".pdf" , ".html" )
55+ os .rename (f"{ dest_dir } /{ pdf_filename } " , f"{ dest_dir } /{ new_file_name } " )
56+ except Exception as e :
57+ print (f"Error renaming { dest_dir } /{ pdf_filename } : { e } " )
4858
49- def dir_to_html (dir_path , dest_dir = None ):
59+ def dir_to_html (dir_path , dest_dir = None , new_file_name = None ):
5060 '''
5161 Converts all PDF files in a directory to HTML.
5262
5363 Parameters:
54- dir_path (str): The path to the directory containing PDF files.
5564 dest_dir (str, optional): The path to the destination directory to save the HTML files. Defaults to None will save in same dir.
65+ new_file_name (str, optional): The new name for the HTML files. Generates sequential for pdfs, ex: pdf0, pdf1, pdf2. Defaults to None will maintly name them as is replacing .pdf for .html
5666 '''
5767
5868 pdf_files = [f"{ dir_path } /{ file } " for file in os .listdir (dir_path ) if file .endswith (".pdf" )]
5969
60- for pdf_file in pdf_files :
70+ for i , pdf_file in enumerate ( pdf_files ) :
6171 pdf = PDF (pdf_file , drm = True )
6272 if dest_dir :
63- pdf .to_html (file_path = f'{ dest_dir } /{ basename (pdf_file ).replace (".pdf" , ".html" )} ' )
73+ if new_file_name :
74+ if '.html' not in new_file_name :
75+ pdf .to_html (dest_dir = dest_dir , pdf_filename = basename (pdf_file ), new_file_name = f"{ new_file_name } _{ i + 1 } .html" )
76+ else :
77+ pdf .to_html (dest_dir = dest_dir , pdf_filename = basename (pdf_file ), new_file_name = f'{ new_file_name .replace (".html" ,"" )} _{ i + 1 } .html' )
78+ else :
79+ pdf .to_html (dest_dir = dest_dir )
6480 else :
6581 pdf .to_html ()
0 commit comments