forked from archive/andrewferrier_email2pdf
Experiment with using pdftotext.
This commit is contained in:
parent
0da30113f3
commit
d43781f247
2 changed files with 7 additions and 12 deletions
|
@ -30,4 +30,5 @@ COPY . /tmp/email2pdf/
|
|||
COPY docker/email2pdf/getmail /etc/cron.d/
|
||||
WORKDIR /tmp/email2pdf
|
||||
RUN make builddeb_real && sh -c 'ls -1 /tmp/email2pdf/*.deb | xargs -L 1 gdebi -n' && cp /tmp/email2pdf/*.deb /tmp
|
||||
RUN apt-get install -y xpdf-utils
|
||||
RUN apt-get clean && rm -rf /var/lib/apt/lists/* /var/tmp/*
|
||||
|
|
|
@ -369,18 +369,12 @@ class Email2PDFTestCase(unittest.TestCase):
|
|||
return None
|
||||
|
||||
def getPDFText(self, filename):
|
||||
try:
|
||||
with io.StringIO() as retstr:
|
||||
with open(filename, 'rb') as filehandle:
|
||||
rsrcmgr = PDFResourceManager()
|
||||
device = TextConverter(rsrcmgr, retstr, laparams=LAParams())
|
||||
pagenos = set()
|
||||
process_pdf(rsrcmgr, device, filehandle, pagenos, maxpages=0, password="", caching=True, check_extractable=True)
|
||||
device.close()
|
||||
string = retstr.getvalue()
|
||||
return string
|
||||
except PSException:
|
||||
return None
|
||||
with tempfile.NamedTemporaryFile() as temporaryFile:
|
||||
options = ['pdftotext', filename, temporaryFile.name]
|
||||
p = Popen(options)
|
||||
p.wait()
|
||||
with open(temporaryFile.name, 'rb') as f:
|
||||
return str(f.read(), 'utf-8')
|
||||
|
||||
def touch(self, fname):
|
||||
open(fname, 'w').close()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue