mirror of
https://github.com/andrewferrier/email2pdf.git
synced 2025-03-18 05:52:59 +00:00
Refactor handle_message_body into 'plain' and 'html'.
This commit is contained in:
parent
4c7e417b73
commit
3ebe10c49c
1 changed files with 53 additions and 36 deletions
89
email2pdf
89
email2pdf
|
@ -299,52 +299,69 @@ def handle_message_body(input_email):
|
|||
|
||||
part = find_part_by_content_type(input_email, "text/html")
|
||||
if part is None:
|
||||
part = find_part_by_content_type(input_email, "text/plain")
|
||||
assert part is not None
|
||||
payload = handle_plain_message_body(input_email)
|
||||
else:
|
||||
(payload, cid_parts_used) = handle_html_message_body(input_email, part)
|
||||
|
||||
if part['Content-Transfer-Encoding'] == '8bit':
|
||||
payload = part.get_payload(decode=False)
|
||||
assert isinstance(payload, str)
|
||||
logger.info("Email is pre-decoded because Content-Transfer-Encoding is 8bit")
|
||||
return (payload, cid_parts_used)
|
||||
|
||||
|
||||
def handle_plain_message_body(input_email):
|
||||
logger = logging.getLogger("email2pdf")
|
||||
|
||||
part = find_part_by_content_type(input_email, "text/plain")
|
||||
|
||||
if part['Content-Transfer-Encoding'] == '8bit':
|
||||
payload = part.get_payload(decode=False)
|
||||
assert isinstance(payload, str)
|
||||
logger.info("Email is pre-decoded because Content-Transfer-Encoding is 8bit")
|
||||
else:
|
||||
payload = part.get_payload(decode=True)
|
||||
assert isinstance(payload, bytes)
|
||||
charset = part.get_content_charset()
|
||||
if not charset:
|
||||
charset = 'utf-8'
|
||||
logger.info("Determined email is plain text, defaulting to charset utf-8")
|
||||
else:
|
||||
payload = part.get_payload(decode=True)
|
||||
assert isinstance(payload, bytes)
|
||||
charset = part.get_content_charset()
|
||||
if not charset:
|
||||
charset = 'utf-8'
|
||||
logger.info("Determined email is plain text, defaulting to charset utf-8")
|
||||
else:
|
||||
logger.info("Determined email is plain text with charset " + str(charset))
|
||||
logger.info("Determined email is plain text with charset " + str(charset))
|
||||
|
||||
if isinstance(payload, bytes):
|
||||
payload = str(payload, charset)
|
||||
|
||||
payload = html.escape(payload)
|
||||
payload = "<html><body><pre>\n" + payload + "\n</pre></body></html>"
|
||||
else:
|
||||
payload = part.get_payload(decode=True)
|
||||
charset = part.get_content_charset()
|
||||
if not charset:
|
||||
charset = 'utf-8'
|
||||
logger.info("Determined email is HTML with charset " + str(charset))
|
||||
|
||||
def cid_replace(cid_parts_used, matchobj):
|
||||
logger.debug("Looking for image for cid " + matchobj.group(1))
|
||||
image_part = find_part_by_content_id(input_email, matchobj.group(1))
|
||||
if image_part is not None:
|
||||
assert image_part['Content-Transfer-Encoding'] == 'base64'
|
||||
image_base64 = image_part.get_payload(decode=False)
|
||||
image_base64 = re.sub("[\r\n\t]", "", image_base64)
|
||||
image_decoded = image_part.get_payload(decode=True)
|
||||
mime_type = get_mime_type(image_decoded)
|
||||
cid_parts_used.add(image_part)
|
||||
return "data:" + mime_type + ";base64," + image_base64
|
||||
else:
|
||||
logger.warning("Could not find image cid " + matchobj.group(1) + " in email content.")
|
||||
return "broken"
|
||||
return payload
|
||||
|
||||
payload = re.sub(r'cid:([\w_@.-]+)', functools.partial(cid_replace, cid_parts_used),
|
||||
str(payload, charset))
|
||||
|
||||
def handle_html_message_body(input_email, part):
|
||||
logger = logging.getLogger("email2pdf")
|
||||
|
||||
cid_parts_used = set()
|
||||
|
||||
payload = part.get_payload(decode=True)
|
||||
charset = part.get_content_charset()
|
||||
if not charset:
|
||||
charset = 'utf-8'
|
||||
logger.info("Determined email is HTML with charset " + str(charset))
|
||||
|
||||
def cid_replace(cid_parts_used, matchobj):
|
||||
logger.debug("Looking for image for cid " + matchobj.group(1))
|
||||
image_part = find_part_by_content_id(input_email, matchobj.group(1))
|
||||
if image_part is not None:
|
||||
assert image_part['Content-Transfer-Encoding'] == 'base64'
|
||||
image_base64 = image_part.get_payload(decode=False)
|
||||
image_base64 = re.sub("[\r\n\t]", "", image_base64)
|
||||
image_decoded = image_part.get_payload(decode=True)
|
||||
mime_type = get_mime_type(image_decoded)
|
||||
cid_parts_used.add(image_part)
|
||||
return "data:" + mime_type + ";base64," + image_base64
|
||||
else:
|
||||
logger.warning("Could not find image cid " + matchobj.group(1) + " in email content.")
|
||||
return "broken"
|
||||
|
||||
payload = re.sub(r'cid:([\w_@.-]+)', functools.partial(cid_replace, cid_parts_used),
|
||||
str(payload, charset))
|
||||
|
||||
return (payload, cid_parts_used)
|
||||
|
||||
|
|
Loading…
Reference in a new issue