I want to convert all the .doc files from a particular folder to .docx file.
I tried using the following code,
import subprocess
import os
for filename in os.listdir(os.getcwd()):
if filename.endswith('.doc'):
print filename
subprocess.call(['soffice', '--headless', '--convert-to', 'docx', filename])
But it gives me an error: OSError: [Errno 2] No such file or directory
I prefer to use the glob
module for tasks like that. Put this in a file doc2docx.py
. To make it executable, set chmod +x
. And optionally put that file in your $PATH
as well, to make it available "everywhere".
#!/usr/bin/env python
import glob
import subprocess
for doc in glob.iglob("*.doc"):
subprocess.call(['soffice', '--headless', '--convert-to', 'docx', doc])
Though ideally you'd leave the expansion to the shell itself, and call doc2docx.py
with the files as arguments, like doc2docx.py *.doc
:
#!/usr/bin/env python
import subprocess
import sys
if len(sys.argv) < 2:
sys.stderr.write("SYNOPSIS: %s file1 [file2] ...\n"%sys.argv[0])
for doc in sys.argv[1:]:
subprocess.call(['soffice', '--headless', '--convert-to', 'docx', doc])
As requested by @pyd, to output to a target directory myoutputdir
use:
#!/usr/bin/env python
import subprocess
import sys
if len(sys.argv) < 2:
sys.stderr.write("SYNOPSIS: %s file1 [file2] ...\n"%sys.argv[0])
for doc in sys.argv[1:]:
subprocess.call(['soffice', '--headless', '--convert-to', 'docx', '--outdir', 'myoutputdir', doc])
Here is a solution that worked for me. The other solutions proposed did not work on my Windows 10 machine using Python 3.
from glob import glob
import re
import os
import win32com.client as win32
from win32com.client import constants
# Create list of paths to .doc files
paths = glob('C:\\path\\to\\doc\\files\\**\\*.doc', recursive=True)
def save_as_docx(path):
# Opening MS Word
word = win32.gencache.EnsureDispatch('Word.Application')
doc = word.Documents.Open(path)
doc.Activate ()
# Rename path with .docx
new_file_abs = os.path.abspath(path)
new_file_abs = re.sub(r'\.\w+$', '.docx', new_file_abs)
# Save and Close
word.ActiveDocument.SaveAs(
new_file_abs, FileFormat=constants.wdFormatXMLDocument
)
doc.Close(False)
for path in paths:
save_as_docx(path)
Use os.path.join
to specify the correct directory.
import os, subprocess
main_dir = os.path.join('/', 'Users', 'username', 'Desktop', 'foldername')
for filename in os.listdir(main_dir):
if filename.endswith('.doc'):
print filename
subprocess.call(['soffice', '--headless', '--convert-to', 'docx', filename])
If you don't like to rely on sub-process calls, here is the version with COM client. It is useful if you are targeting windows users without LibreOffice installed.
#!/usr/bin/env python
import glob
import win32com.client
word = win32com.client.Dispatch("Word.Application")
word.visible = 0
for i, doc in enumerate(glob.iglob("*.doc")):
in_file = os.path.abspath(doc)
wb = word.Documents.Open(in_file)
out_file = os.path.abspath("out{}.docx".format(i))
wb.SaveAs2(out_file, FileFormat=16) # file format for docx
wb.Close()
word.Quit()
来源:https://stackoverflow.com/questions/38468442/multiple-doc-to-docx-file-conversion-using-python