1
0
Fork 1
mirror of https://github.com/andrewferrier/email2pdf.git synced 2025-03-18 05:52:59 +00:00

Refactor handle_message_body into 'plain' and 'html'.

This commit is contained in:
Andrew Ferrier 2015-10-28 09:35:53 +00:00
parent 4c7e417b73
commit 3ebe10c49c

View file

@ -299,52 +299,69 @@ def handle_message_body(input_email):
part = find_part_by_content_type(input_email, "text/html")
if part is None:
part = find_part_by_content_type(input_email, "text/plain")
assert part is not None
payload = handle_plain_message_body(input_email)
else:
(payload, cid_parts_used) = handle_html_message_body(input_email, part)
if part['Content-Transfer-Encoding'] == '8bit':
payload = part.get_payload(decode=False)
assert isinstance(payload, str)
logger.info("Email is pre-decoded because Content-Transfer-Encoding is 8bit")
return (payload, cid_parts_used)
def handle_plain_message_body(input_email):
logger = logging.getLogger("email2pdf")
part = find_part_by_content_type(input_email, "text/plain")
if part['Content-Transfer-Encoding'] == '8bit':
payload = part.get_payload(decode=False)
assert isinstance(payload, str)
logger.info("Email is pre-decoded because Content-Transfer-Encoding is 8bit")
else:
payload = part.get_payload(decode=True)
assert isinstance(payload, bytes)
charset = part.get_content_charset()
if not charset:
charset = 'utf-8'
logger.info("Determined email is plain text, defaulting to charset utf-8")
else:
payload = part.get_payload(decode=True)
assert isinstance(payload, bytes)
charset = part.get_content_charset()
if not charset:
charset = 'utf-8'
logger.info("Determined email is plain text, defaulting to charset utf-8")
else:
logger.info("Determined email is plain text with charset " + str(charset))
logger.info("Determined email is plain text with charset " + str(charset))
if isinstance(payload, bytes):
payload = str(payload, charset)
payload = html.escape(payload)
payload = "<html><body><pre>\n" + payload + "\n</pre></body></html>"
else:
payload = part.get_payload(decode=True)
charset = part.get_content_charset()
if not charset:
charset = 'utf-8'
logger.info("Determined email is HTML with charset " + str(charset))
def cid_replace(cid_parts_used, matchobj):
logger.debug("Looking for image for cid " + matchobj.group(1))
image_part = find_part_by_content_id(input_email, matchobj.group(1))
if image_part is not None:
assert image_part['Content-Transfer-Encoding'] == 'base64'
image_base64 = image_part.get_payload(decode=False)
image_base64 = re.sub("[\r\n\t]", "", image_base64)
image_decoded = image_part.get_payload(decode=True)
mime_type = get_mime_type(image_decoded)
cid_parts_used.add(image_part)
return "data:" + mime_type + ";base64," + image_base64
else:
logger.warning("Could not find image cid " + matchobj.group(1) + " in email content.")
return "broken"
return payload
payload = re.sub(r'cid:([\w_@.-]+)', functools.partial(cid_replace, cid_parts_used),
str(payload, charset))
def handle_html_message_body(input_email, part):
logger = logging.getLogger("email2pdf")
cid_parts_used = set()
payload = part.get_payload(decode=True)
charset = part.get_content_charset()
if not charset:
charset = 'utf-8'
logger.info("Determined email is HTML with charset " + str(charset))
def cid_replace(cid_parts_used, matchobj):
logger.debug("Looking for image for cid " + matchobj.group(1))
image_part = find_part_by_content_id(input_email, matchobj.group(1))
if image_part is not None:
assert image_part['Content-Transfer-Encoding'] == 'base64'
image_base64 = image_part.get_payload(decode=False)
image_base64 = re.sub("[\r\n\t]", "", image_base64)
image_decoded = image_part.get_payload(decode=True)
mime_type = get_mime_type(image_decoded)
cid_parts_used.add(image_part)
return "data:" + mime_type + ";base64," + image_base64
else:
logger.warning("Could not find image cid " + matchobj.group(1) + " in email content.")
return "broken"
payload = re.sub(r'cid:([\w_@.-]+)', functools.partial(cid_replace, cid_parts_used),
str(payload, charset))
return (payload, cid_parts_used)