This commit is contained in:
2025-12-05 10:55:37 +01:00
commit 396e290394
1423 changed files with 825479 additions and 0 deletions

View File

153
src/paperless_mail/admin.py Normal file
View File

@@ -0,0 +1,153 @@
from django import forms
from django.contrib import admin
from django.utils.translation import gettext_lazy as _
from guardian.admin import GuardedModelAdmin
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
from paperless_mail.models import ProcessedMail
class MailAccountAdminForm(forms.ModelForm):
"""Metadata classes used by Django admin to display the form."""
class Meta:
"""Metadata class used by Django admin to display the form."""
model = MailAccount
widgets = {
"password": forms.PasswordInput(),
}
fields = [
"name",
"imap_server",
"username",
"imap_security",
"username",
"password",
"is_token",
"character_set",
]
class MailAccountAdmin(GuardedModelAdmin):
list_display = ("name", "imap_server", "username")
fieldsets = [
(None, {"fields": ["name", "imap_server", "imap_port"]}),
(
_("Authentication"),
{"fields": ["imap_security", "username", "password", "is_token"]},
),
(_("Advanced settings"), {"fields": ["character_set"]}),
]
form = MailAccountAdminForm
class MailRuleAdmin(GuardedModelAdmin):
radio_fields = {
"attachment_type": admin.VERTICAL,
"action": admin.VERTICAL,
"assign_title_from": admin.VERTICAL,
"assign_correspondent_from": admin.VERTICAL,
}
fieldsets = (
(None, {"fields": ("name", "order", "account", "enabled", "folder")}),
(
_("Filter"),
{
"description": _(
"Paperless will only process mails that match ALL of the "
"filters given below.",
),
"fields": (
"filter_from",
"filter_to",
"filter_subject",
"filter_body",
"filter_attachment_filename_include",
"filter_attachment_filename_exclude",
"maximum_age",
"consumption_scope",
"attachment_type",
),
},
),
(
_("Actions"),
{
"description": _(
"The action applied to the mail. This action is only "
"performed when the mail body or attachments were "
"consumed from the mail.",
),
"fields": ("action", "action_parameter"),
},
),
(
_("Metadata"),
{
"description": _(
"Assign metadata to documents consumed from this rule "
"automatically. If you do not assign tags, types or "
"correspondents here, paperless will still process all "
"matching rules that you have defined.",
),
"fields": (
"assign_title_from",
"assign_tags",
"assign_document_type",
"assign_correspondent_from",
"assign_correspondent",
),
},
),
)
list_filter = ("account",)
list_display = ("order", "name", "account", "folder", "action")
list_editable = ("order",)
list_display_links = ("name",)
sortable_by = []
ordering = ["order"]
raw_id_fields = ("assign_correspondent", "assign_document_type")
filter_horizontal = ("assign_tags",)
class ProcessedMailAdmin(admin.ModelAdmin):
class Meta:
model = ProcessedMail
fields = "__all__"
list_display = ("subject", "status", "processed", "received", "rule")
ordering = ["-processed"]
readonly_fields = [
"owner",
"rule",
"folder",
"uid",
"subject",
"received",
"processed",
"status",
"error",
]
list_display_links = ["subject"]
list_filter = ("status", "rule")
admin.site.register(MailAccount, MailAccountAdmin)
admin.site.register(MailRule, MailRuleAdmin)
admin.site.register(ProcessedMail, ProcessedMailAdmin)

View File

@@ -0,0 +1,18 @@
from django.apps import AppConfig
from django.conf import settings
from django.utils.translation import gettext_lazy as _
from paperless_mail.signals import mail_consumer_declaration
class PaperlessMailConfig(AppConfig):
name = "paperless_mail"
verbose_name = _("Paperless mail")
def ready(self):
from documents.signals import document_consumer_declaration
if settings.TIKA_ENABLED:
document_consumer_declaration.connect(mail_consumer_declaration)
AppConfig.ready(self)

View File

@@ -0,0 +1,12 @@
from django_filters import FilterSet
from paperless_mail.models import ProcessedMail
class ProcessedMailFilterSet(FilterSet):
class Meta:
model = ProcessedMail
fields = {
"rule": ["exact"],
"status": ["exact"],
}

970
src/paperless_mail/mail.py Normal file
View File

@@ -0,0 +1,970 @@
import datetime
import itertools
import logging
import ssl
import tempfile
import traceback
from datetime import date
from datetime import timedelta
from fnmatch import fnmatch
from pathlib import Path
from typing import TYPE_CHECKING
import magic
import pathvalidate
from celery import chord
from celery import shared_task
from celery.canvas import Signature
from django.conf import settings
from django.db import DatabaseError
from django.utils import timezone
from django.utils.timezone import is_naive
from django.utils.timezone import make_aware
from imap_tools import AND
from imap_tools import NOT
from imap_tools import MailAttachment
from imap_tools import MailBox
from imap_tools import MailboxFolderSelectError
from imap_tools import MailBoxUnencrypted
from imap_tools import MailMessage
from imap_tools import MailMessageFlags
from imap_tools import errors
from imap_tools.mailbox import MailBoxStartTls
from imap_tools.query import LogicOperator
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.data_models import DocumentSource
from documents.loggers import LoggingMixin
from documents.models import Correspondent
from documents.parsers import is_mime_type_supported
from documents.tasks import consume_file
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
from paperless_mail.models import ProcessedMail
from paperless_mail.oauth import PaperlessMailOAuth2Manager
from paperless_mail.preprocessor import MailMessageDecryptor
from paperless_mail.preprocessor import MailMessagePreprocessor
# Apple Mail sets multiple IMAP KEYWORD and the general "\Flagged" FLAG
# imaplib => conn.fetch(b"<message_id>", "FLAGS")
# no flag - (FLAGS (\\Seen $NotJunk NotJunk))'
# red - (FLAGS (\\Flagged \\Seen $NotJunk NotJunk))'
# orange - (FLAGS (\\Flagged \\Seen $NotJunk NotJunk $MailFlagBit0))'
# yellow - (FLAGS (\\Flagged \\Seen $NotJunk NotJunk $MailFlagBit1))'
# blue - (FLAGS (\\Flagged \\Seen $NotJunk NotJunk $MailFlagBit2))'
# green - (FLAGS (\\Flagged \\Seen $NotJunk NotJunk $MailFlagBit0 $MailFlagBit1))'
# violet - (FLAGS (\\Flagged \\Seen $NotJunk NotJunk $MailFlagBit0 $MailFlagBit2))'
# grey - (FLAGS (\\Flagged \\Seen $NotJunk NotJunk $MailFlagBit1 $MailFlagBit2))'
APPLE_MAIL_TAG_COLORS = {
"red": [],
"orange": ["$MailFlagBit0"],
"yellow": ["$MailFlagBit1"],
"blue": ["$MailFlagBit2"],
"green": ["$MailFlagBit0", "$MailFlagBit1"],
"violet": ["$MailFlagBit0", "$MailFlagBit2"],
"grey": ["$MailFlagBit1", "$MailFlagBit2"],
}
class MailError(Exception):
pass
class BaseMailAction:
"""
Base class for mail actions. A mail action is performed on a mail after
consumption of the document is complete and is used to signal to the user
that this mail was processed by paperless via the mail client.
Furthermore, mail actions reduce the amount of mails to be analyzed by
excluding mails on which the action was already performed (i.e., excluding
read mails when the action is to mark mails as read).
"""
def get_criteria(self) -> dict | LogicOperator:
"""
Returns filtering criteria/query for this mail action.
"""
return {}
def post_consume(
self,
M: MailBox,
message_uid: str,
parameter: str,
): # pragma: no cover
"""
Perform mail action on the given mail uid in the mailbox.
"""
raise NotImplementedError
class DeleteMailAction(BaseMailAction):
"""
A mail action that deletes mails after processing.
"""
def post_consume(self, M: MailBox, message_uid: str, parameter: str):
M.delete(message_uid)
class MarkReadMailAction(BaseMailAction):
"""
A mail action that marks mails as read after processing.
"""
def get_criteria(self):
return {"seen": False}
def post_consume(self, M: MailBox, message_uid: str, parameter: str):
M.flag(message_uid, [MailMessageFlags.SEEN], value=True)
class MoveMailAction(BaseMailAction):
"""
A mail action that moves mails to a different folder after processing.
"""
def post_consume(self, M, message_uid, parameter):
M.move(message_uid, parameter)
class FlagMailAction(BaseMailAction):
"""
A mail action that marks mails as important ("star") after processing.
"""
def get_criteria(self):
return {"flagged": False}
def post_consume(self, M: MailBox, message_uid: str, parameter: str):
M.flag(message_uid, [MailMessageFlags.FLAGGED], value=True)
class TagMailAction(BaseMailAction):
"""
A mail action that tags mails after processing.
"""
def __init__(self, parameter: str, *, supports_gmail_labels: bool):
# The custom tag should look like "apple:<color>"
if "apple:" in parameter.lower():
_, self.color = parameter.split(":")
self.color = self.color.strip()
if self.color.lower() not in APPLE_MAIL_TAG_COLORS:
raise MailError("Not a valid AppleMail tag color.")
self.keyword = None
else:
self.keyword = parameter
self.color = None
self.supports_gmail_labels = supports_gmail_labels
def get_criteria(self):
# AppleMail: We only need to check if mails are \Flagged
if self.color:
return {"flagged": False}
elif self.keyword:
if self.supports_gmail_labels:
return AND(NOT(gmail_label=self.keyword), no_keyword=self.keyword)
else:
return {"no_keyword": self.keyword}
else: # pragma: no cover
raise ValueError("This should never happen.")
def post_consume(self, M: MailBox, message_uid: str, parameter: str):
if self.supports_gmail_labels:
M.client.uid("STORE", message_uid, "+X-GM-LABELS", self.keyword)
# AppleMail
elif self.color:
# Remove all existing $MailFlagBits
M.flag(
message_uid,
set(itertools.chain(*APPLE_MAIL_TAG_COLORS.values())),
value=False,
)
# Set new $MailFlagBits
M.flag(message_uid, APPLE_MAIL_TAG_COLORS.get(self.color), value=True)
# Set the general \Flagged
# This defaults to the "red" flag in AppleMail and
# "stars" in Thunderbird or GMail
M.flag(message_uid, [MailMessageFlags.FLAGGED], value=True)
elif self.keyword:
M.flag(message_uid, [self.keyword], value=True)
else:
raise MailError("No keyword specified.")
def mailbox_login(mailbox: MailBox, account: MailAccount):
logger = logging.getLogger("paperless_mail")
try:
if account.is_token:
mailbox.xoauth2(account.username, account.password)
else:
try:
_ = account.password.encode("ascii")
use_ascii_login = True
except UnicodeEncodeError:
use_ascii_login = False
if use_ascii_login:
mailbox.login(account.username, account.password)
else:
logger.debug("Falling back to AUTH=PLAIN")
mailbox.login_utf8(account.username, account.password)
except Exception as e:
logger.error(
f"Error while authenticating account {account}: {e}",
exc_info=False,
)
raise MailError(
f"Error while authenticating account {account}",
) from e
@shared_task
def apply_mail_action(
result: list[str],
rule_id: int,
message_uid: str,
message_subject: str,
message_date: datetime.datetime,
):
"""
This shared task applies the mail action of a particular mail rule to the
given mail. Creates a ProcessedMail object, so that the mail won't be
processed in the future.
"""
rule = MailRule.objects.get(pk=rule_id)
account = MailAccount.objects.get(pk=rule.account.pk)
# Ensure the date is properly timezone aware
if is_naive(message_date):
message_date = make_aware(message_date)
try:
with get_mailbox(
server=account.imap_server,
port=account.imap_port,
security=account.imap_security,
) as M:
# Need to know the support for the possible tagging
supports_gmail_labels = "X-GM-EXT-1" in M.client.capabilities
mailbox_login(M, account)
M.folder.set(rule.folder)
action = get_rule_action(rule, supports_gmail_labels=supports_gmail_labels)
try:
action.post_consume(M, message_uid, rule.action_parameter)
except errors.ImapToolsError:
logger = logging.getLogger("paperless_mail")
logger.exception(
"Error while processing mail action during post_consume",
)
raise
ProcessedMail.objects.create(
owner=rule.owner,
rule=rule,
folder=rule.folder,
uid=message_uid,
subject=message_subject,
received=message_date,
status="SUCCESS",
)
except Exception:
ProcessedMail.objects.create(
owner=rule.owner,
rule=rule,
folder=rule.folder,
uid=message_uid,
subject=message_subject,
received=message_date,
status="FAILED",
error=traceback.format_exc(),
)
raise
@shared_task
def error_callback(
request,
exc,
tb,
rule_id: int,
message_uid: str,
message_subject: str,
message_date: datetime.datetime,
):
"""
A shared task that is called whenever something goes wrong during
consumption of a file. See queue_consumption_tasks.
"""
rule = MailRule.objects.get(pk=rule_id)
ProcessedMail.objects.create(
rule=rule,
folder=rule.folder,
uid=message_uid,
subject=message_subject,
received=make_aware(message_date) if is_naive(message_date) else message_date,
status="FAILED",
error=traceback.format_exc(),
)
def queue_consumption_tasks(
*,
consume_tasks: list[Signature],
rule: MailRule,
message: MailMessage,
):
"""
Queue a list of consumption tasks (Signatures for the consume_file shared
task) with celery.
"""
mail_action_task = apply_mail_action.s(
rule_id=rule.pk,
message_uid=message.uid,
message_subject=message.subject,
message_date=message.date,
)
chord(header=consume_tasks, body=mail_action_task).on_error(
error_callback.s(
rule_id=rule.pk,
message_uid=message.uid,
message_subject=message.subject,
message_date=message.date,
),
).delay()
def get_rule_action(rule: MailRule, *, supports_gmail_labels: bool) -> BaseMailAction:
"""
Returns a BaseMailAction instance for the given rule.
"""
if rule.action == MailRule.MailAction.FLAG:
return FlagMailAction()
elif rule.action == MailRule.MailAction.DELETE:
return DeleteMailAction()
elif rule.action == MailRule.MailAction.MOVE:
return MoveMailAction()
elif rule.action == MailRule.MailAction.MARK_READ:
return MarkReadMailAction()
elif rule.action == MailRule.MailAction.TAG:
return TagMailAction(
rule.action_parameter,
supports_gmail_labels=supports_gmail_labels,
)
else:
raise NotImplementedError("Unknown action.") # pragma: no cover
def make_criterias(rule: MailRule, *, supports_gmail_labels: bool):
"""
Returns criteria to be applied to MailBox.fetch for the given rule.
"""
maximum_age = date.today() - timedelta(days=rule.maximum_age)
criterias = {}
if rule.maximum_age > 0:
criterias["date_gte"] = maximum_age
if rule.filter_from:
criterias["from_"] = rule.filter_from
if rule.filter_to:
criterias["to"] = rule.filter_to
if rule.filter_subject:
criterias["subject"] = rule.filter_subject
if rule.filter_body:
criterias["body"] = rule.filter_body
rule_query = get_rule_action(
rule,
supports_gmail_labels=supports_gmail_labels,
).get_criteria()
if isinstance(rule_query, dict):
if len(rule_query) or criterias:
return AND(**rule_query, **criterias)
else:
return "ALL"
else:
return AND(rule_query, **criterias)
def get_mailbox(server, port, security) -> MailBox:
"""
Returns the correct MailBox instance for the given configuration.
"""
ssl_context = ssl.create_default_context()
if settings.EMAIL_CERTIFICATE_FILE is not None: # pragma: no cover
ssl_context.load_verify_locations(cafile=settings.EMAIL_CERTIFICATE_FILE)
if security == MailAccount.ImapSecurity.NONE:
mailbox = MailBoxUnencrypted(server, port)
elif security == MailAccount.ImapSecurity.STARTTLS:
mailbox = MailBoxStartTls(server, port, ssl_context=ssl_context)
elif security == MailAccount.ImapSecurity.SSL:
mailbox = MailBox(server, port, ssl_context=ssl_context)
else:
raise NotImplementedError("Unknown IMAP security") # pragma: no cover
return mailbox
class MailAccountHandler(LoggingMixin):
"""
The main class that handles mail accounts.
* processes all rules for a given mail account
* for each mail rule, fetches relevant mails, and queues documents from
matching mails for consumption
* marks processed mails in the database, so that they won't be processed
again
* runs mail actions on the mail server, when consumption is completed
"""
logging_name = "paperless_mail"
_message_preprocessor_types: list[type[MailMessagePreprocessor]] = [
MailMessageDecryptor,
]
def __init__(self) -> None:
super().__init__()
self.renew_logging_group()
self._init_preprocessors()
def _init_preprocessors(self):
self._message_preprocessors: list[MailMessagePreprocessor] = []
for preprocessor_type in self._message_preprocessor_types:
self._init_preprocessor(preprocessor_type)
def _init_preprocessor(self, preprocessor_type):
if preprocessor_type.able_to_run():
try:
self._message_preprocessors.append(preprocessor_type())
except Exception as e:
self.log.warning(
f"Error while initializing preprocessor {preprocessor_type.NAME}: {e}",
)
else:
self.log.debug(f"Skipping mail preprocessor {preprocessor_type.NAME}")
def _correspondent_from_name(self, name: str) -> Correspondent | None:
try:
return Correspondent.objects.get_or_create(
name=name,
defaults={
"match": name,
},
)[0]
except DatabaseError as e:
self.log.error(f"Error while retrieving correspondent {name}: {e}")
return None
def _get_title(
self,
message: MailMessage,
att: MailAttachment,
rule: MailRule,
) -> str | None:
if rule.assign_title_from == MailRule.TitleSource.FROM_SUBJECT:
return message.subject
elif rule.assign_title_from == MailRule.TitleSource.FROM_FILENAME:
return Path(att.filename).stem
elif rule.assign_title_from == MailRule.TitleSource.NONE:
return None
else:
raise NotImplementedError(
"Unknown title selector.",
) # pragma: no cover
def _get_correspondent(
self,
message: MailMessage,
rule: MailRule,
) -> Correspondent | None:
c_from = rule.assign_correspondent_from
if c_from == MailRule.CorrespondentSource.FROM_NOTHING:
return None
elif c_from == MailRule.CorrespondentSource.FROM_EMAIL:
return self._correspondent_from_name(message.from_)
elif c_from == MailRule.CorrespondentSource.FROM_NAME:
from_values = message.from_values
if from_values is not None and len(from_values.name) > 0:
return self._correspondent_from_name(from_values.name)
else:
return self._correspondent_from_name(message.from_)
elif c_from == MailRule.CorrespondentSource.FROM_CUSTOM:
return rule.assign_correspondent
else:
raise NotImplementedError(
"Unknown correspondent selector",
) # pragma: no cover
def handle_mail_account(self, account: MailAccount):
"""
Main entry method to handle a specific mail account.
"""
self.renew_logging_group()
self.log.debug(f"Processing mail account {account}")
total_processed_files = 0
try:
with get_mailbox(
account.imap_server,
account.imap_port,
account.imap_security,
) as M:
if (
account.is_token
and account.expiration is not None
and account.expiration < timezone.now()
):
manager = PaperlessMailOAuth2Manager()
if manager.refresh_account_oauth_token(account):
account.refresh_from_db()
else:
return total_processed_files
supports_gmail_labels = "X-GM-EXT-1" in M.client.capabilities
supports_auth_plain = "AUTH=PLAIN" in M.client.capabilities
self.log.debug(f"GMAIL Label Support: {supports_gmail_labels}")
self.log.debug(f"AUTH=PLAIN Support: {supports_auth_plain}")
mailbox_login(M, account)
self.log.debug(
f"Account {account}: Processing {account.rules.count()} rule(s)",
)
for rule in account.rules.order_by("order"):
if not rule.enabled:
self.log.debug(f"Rule {rule}: Skipping disabled rule")
continue
try:
total_processed_files += self._handle_mail_rule(
M,
rule,
supports_gmail_labels=supports_gmail_labels,
)
except Exception as e:
self.log.exception(
f"Rule {rule}: Error while processing rule: {e}",
)
except MailError:
raise
except Exception as e:
self.log.error(
f"Error while retrieving mailbox {account}: {e}",
exc_info=False,
)
return total_processed_files
def _preprocess_message(self, message: MailMessage):
for preprocessor in self._message_preprocessors:
message = preprocessor.run(message)
return message
def _handle_mail_rule(
self,
M: MailBox,
rule: MailRule,
*,
supports_gmail_labels: bool,
):
folders = [rule.folder]
# In case of MOVE, make sure also the destination exists
if rule.action == MailRule.MailAction.MOVE:
folders.insert(0, rule.action_parameter)
try:
for folder in folders:
self.log.debug(f"Rule {rule}: Selecting folder {folder}")
M.folder.set(folder)
except MailboxFolderSelectError as err:
self.log.error(
f"Unable to access folder {folder}, attempting folder listing",
)
try:
for folder_info in M.folder.list():
self.log.info(f"Located folder: {folder_info.name}")
except Exception as e:
self.log.error(
"Exception during folder listing, unable to provide list folders: "
+ str(e),
)
raise MailError(
f"Rule {rule}: Folder {folder} "
f"does not exist in account {rule.account}",
) from err
criterias = make_criterias(rule, supports_gmail_labels=supports_gmail_labels)
self.log.debug(
f"Rule {rule}: Searching folder with criteria {criterias}",
)
try:
messages = M.fetch(
criteria=criterias,
mark_seen=False,
charset=rule.account.character_set,
bulk=True,
)
except Exception as err:
raise MailError(
f"Rule {rule}: Error while fetching folder {rule.folder}",
) from err
mails_processed = 0
total_processed_files = 0
for message in messages:
if TYPE_CHECKING:
assert isinstance(message, MailMessage)
if ProcessedMail.objects.filter(
rule=rule,
uid=message.uid,
folder=rule.folder,
).exists():
self.log.debug(
f"Skipping mail '{message.uid}' subject '{message.subject}' from '{message.from_}', already processed.",
)
continue
try:
processed_files = self._handle_message(message, rule)
total_processed_files += processed_files
mails_processed += 1
except Exception as e:
self.log.exception(
f"Rule {rule}: Error while processing mail {message.uid}: {e}",
)
self.log.debug(f"Rule {rule}: Processed {mails_processed} matching mail(s)")
return total_processed_files
def _handle_message(self, message, rule: MailRule) -> int:
message = self._preprocess_message(message)
processed_elements = 0
# Skip Message handling when only attachments are to be processed but
# message doesn't have any.
if (
not message.attachments
and rule.consumption_scope == MailRule.ConsumptionScope.ATTACHMENTS_ONLY
):
return processed_elements
self.log.debug(
f"Rule {rule}: "
f"Processing mail {message.subject} from {message.from_} with "
f"{len(message.attachments)} attachment(s)",
)
tag_ids: list[int] = [tag.id for tag in rule.assign_tags.all()]
doc_type = rule.assign_document_type
if (
rule.consumption_scope == MailRule.ConsumptionScope.EML_ONLY
or rule.consumption_scope == MailRule.ConsumptionScope.EVERYTHING
):
processed_elements += self._process_eml(
message,
rule,
tag_ids,
doc_type,
)
if (
rule.consumption_scope == MailRule.ConsumptionScope.ATTACHMENTS_ONLY
or rule.consumption_scope == MailRule.ConsumptionScope.EVERYTHING
):
processed_elements += self._process_attachments(
message,
rule,
tag_ids,
doc_type,
)
return processed_elements
def filename_inclusion_matches(
self,
filter_attachment_filename_include: str | None,
filename: str,
) -> bool:
if filter_attachment_filename_include:
filter_attachment_filename_inclusions = (
filter_attachment_filename_include.split(",")
)
# Force the filename and pattern to the lowercase
# as this is system dependent otherwise
filename = filename.lower()
for filename_include in filter_attachment_filename_inclusions:
if filename_include and fnmatch(filename, filename_include.lower()):
return True
return False
return True
def filename_exclusion_matches(
self,
filter_attachment_filename_exclude: str | None,
filename: str,
) -> bool:
if filter_attachment_filename_exclude:
filter_attachment_filename_exclusions = (
filter_attachment_filename_exclude.split(",")
)
# Force the filename and pattern to the lowercase
# as this is system dependent otherwise
filename = filename.lower()
for filename_exclude in filter_attachment_filename_exclusions:
if filename_exclude and fnmatch(filename, filename_exclude.lower()):
return True
return False
def _process_attachments(
self,
message: MailMessage,
rule: MailRule,
tag_ids,
doc_type,
):
processed_attachments = 0
consume_tasks = []
for att in message.attachments:
if (
att.content_disposition != "attachment"
and rule.attachment_type
== MailRule.AttachmentProcessing.ATTACHMENTS_ONLY
):
self.log.debug(
f"Rule {rule}: "
f"Skipping attachment {att.filename} "
f"with content disposition {att.content_disposition}",
)
continue
if not self.filename_inclusion_matches(
rule.filter_attachment_filename_include,
att.filename,
):
# Force the filename and pattern to the lowercase
# as this is system dependent otherwise
self.log.debug(
f"Rule {rule}: "
f"Skipping attachment {att.filename} "
f"does not match pattern {rule.filter_attachment_filename_include}",
)
continue
elif self.filename_exclusion_matches(
rule.filter_attachment_filename_exclude,
att.filename,
):
self.log.debug(
f"Rule {rule}: "
f"Skipping attachment {att.filename} "
f"does match pattern {rule.filter_attachment_filename_exclude}",
)
continue
correspondent = self._get_correspondent(message, rule)
title = self._get_title(message, att, rule)
# don't trust the content type of the attachment. Could be
# generic application/octet-stream.
mime_type = magic.from_buffer(att.payload, mime=True)
if is_mime_type_supported(mime_type):
self.log.info(
f"Rule {rule}: "
f"Consuming attachment {att.filename} from mail "
f"{message.subject} from {message.from_}",
)
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
temp_dir = Path(
tempfile.mkdtemp(
prefix="paperless-mail-",
dir=settings.SCRATCH_DIR,
),
)
attachment_name = pathvalidate.sanitize_filename(att.filename)
if attachment_name:
temp_filename = temp_dir / attachment_name
else: # pragma: no cover
# Some cases may have no name (generally inline)
temp_filename = temp_dir / "no-name-attachment"
temp_filename.write_bytes(att.payload)
input_doc = ConsumableDocument(
source=DocumentSource.MailFetch,
original_file=temp_filename,
mailrule_id=rule.pk,
)
doc_overrides = DocumentMetadataOverrides(
title=title,
filename=pathvalidate.sanitize_filename(att.filename),
correspondent_id=correspondent.id if correspondent else None,
document_type_id=doc_type.id if doc_type else None,
tag_ids=tag_ids,
owner_id=(
rule.owner.id
if (rule.assign_owner_from_rule and rule.owner)
else None
),
)
consume_task = consume_file.s(
input_doc,
doc_overrides,
)
consume_tasks.append(consume_task)
processed_attachments += 1
else:
self.log.debug(
f"Rule {rule}: "
f"Skipping attachment {att.filename} "
f"since guessed mime type {mime_type} is not supported "
f"by paperless",
)
if len(consume_tasks) > 0:
queue_consumption_tasks(
consume_tasks=consume_tasks,
rule=rule,
message=message,
)
else:
# No files to consume, just mark as processed if it wasn't by .eml processing
if not ProcessedMail.objects.filter(
rule=rule,
uid=message.uid,
folder=rule.folder,
).exists():
ProcessedMail.objects.create(
rule=rule,
folder=rule.folder,
uid=message.uid,
subject=message.subject,
received=make_aware(message.date)
if is_naive(message.date)
else message.date,
status="PROCESSED_WO_CONSUMPTION",
)
return processed_attachments
def _process_eml(
self,
message: MailMessage,
rule: MailRule,
tag_ids,
doc_type,
):
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
_, temp_filename = tempfile.mkstemp(
prefix="paperless-mail-",
dir=settings.SCRATCH_DIR,
suffix=".eml",
)
with Path(temp_filename).open("wb") as f:
# Move "From"-header to beginning of file
# TODO: This ugly workaround is needed because the parser is
# chosen only by the mime_type detected via magic
# (see documents/consumer.py "mime_type = magic.from_file")
# Unfortunately magic sometimes fails to detect the mime
# type of .eml files correctly as message/rfc822 and instead
# detects text/plain.
# This also effects direct file consumption of .eml files
# which are not treated with this workaround.
from_element = None
for i, header in enumerate(message.obj._headers):
if header[0] == "From":
from_element = i
if from_element:
new_headers = [message.obj._headers.pop(from_element)]
new_headers += message.obj._headers
message.obj._headers = new_headers
f.write(message.obj.as_bytes())
correspondent = self._get_correspondent(message, rule)
self.log.info(
f"Rule {rule}: "
f"Consuming eml from mail "
f"{message.subject} from {message.from_}",
)
input_doc = ConsumableDocument(
source=DocumentSource.MailFetch,
original_file=temp_filename,
mailrule_id=rule.pk,
)
doc_overrides = DocumentMetadataOverrides(
title=message.subject,
filename=pathvalidate.sanitize_filename(f"{message.subject}.eml"),
correspondent_id=correspondent.id if correspondent else None,
document_type_id=doc_type.id if doc_type else None,
tag_ids=tag_ids,
owner_id=rule.owner.id if rule.owner else None,
)
consume_task = consume_file.s(
input_doc,
doc_overrides,
)
queue_consumption_tasks(
consume_tasks=[consume_task],
rule=rule,
message=message,
)
processed_elements = 1
return processed_elements

View File

@@ -0,0 +1,10 @@
from django.core.management.base import BaseCommand
from paperless_mail import tasks
class Command(BaseCommand):
help = "Manually triggers a fetching and processing of all mail accounts"
def handle(self, *args, **options):
tasks.process_mail_accounts()

View File

@@ -0,0 +1,154 @@
# Generated by Django 3.1.3 on 2020-11-15 22:54
import django.db.models.deletion
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
initial = True
dependencies = [
("documents", "1002_auto_20201111_1105"),
]
operations = [
migrations.CreateModel(
name="MailAccount",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("name", models.CharField(max_length=256, unique=True)),
("imap_server", models.CharField(max_length=256)),
("imap_port", models.IntegerField(blank=True, null=True)),
(
"imap_security",
models.PositiveIntegerField(
choices=[
(1, "No encryption"),
(2, "Use SSL"),
(3, "Use STARTTLS"),
],
default=2,
),
),
("username", models.CharField(max_length=256)),
("password", models.CharField(max_length=256)),
],
),
migrations.CreateModel(
name="MailRule",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("name", models.CharField(max_length=256)),
("folder", models.CharField(default="INBOX", max_length=256)),
(
"filter_from",
models.CharField(blank=True, max_length=256, null=True),
),
(
"filter_subject",
models.CharField(blank=True, max_length=256, null=True),
),
(
"filter_body",
models.CharField(blank=True, max_length=256, null=True),
),
("maximum_age", models.PositiveIntegerField(default=30)),
(
"action",
models.PositiveIntegerField(
choices=[
(1, "Delete"),
(2, "Move to specified folder"),
(3, "Mark as read, don't process read mails"),
(4, "Flag the mail, don't process flagged mails"),
],
default=3,
help_text="The action applied to the mail. This action is only performed when documents were consumed from the mail. Mails without attachments will remain entirely untouched.",
),
),
(
"action_parameter",
models.CharField(
blank=True,
help_text="Additional parameter for the action selected above, i.e., the target folder of the move to folder action.",
max_length=256,
null=True,
),
),
(
"assign_title_from",
models.PositiveIntegerField(
choices=[
(1, "Use subject as title"),
(2, "Use attachment filename as title"),
],
default=1,
),
),
(
"assign_correspondent_from",
models.PositiveIntegerField(
choices=[
(1, "Do not assign a correspondent"),
(2, "Use mail address"),
(3, "Use name (or mail address if not available)"),
(4, "Use correspondent selected below"),
],
default=1,
),
),
(
"account",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="rules",
to="paperless_mail.mailaccount",
),
),
(
"assign_correspondent",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="documents.correspondent",
),
),
(
"assign_document_type",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="documents.documenttype",
),
),
(
"assign_tag",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="documents.tag",
),
),
],
),
]

View File

@@ -0,0 +1,477 @@
# Generated by Django 4.2.13 on 2024-06-28 17:46
import django.db.migrations.operations.special
import django.db.models.deletion
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
replaces = [
("paperless_mail", "0001_initial"),
("paperless_mail", "0002_auto_20201117_1334"),
("paperless_mail", "0003_auto_20201118_1940"),
("paperless_mail", "0004_mailrule_order"),
("paperless_mail", "0005_help_texts"),
("paperless_mail", "0006_auto_20210101_2340"),
("paperless_mail", "0007_auto_20210106_0138"),
("paperless_mail", "0008_auto_20210516_0940"),
("paperless_mail", "0009_mailrule_assign_tags"),
]
dependencies = [
("documents", "1002_auto_20201111_1105"),
("documents", "1011_auto_20210101_2340"),
]
operations = [
migrations.CreateModel(
name="MailAccount",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("name", models.CharField(max_length=256, unique=True)),
("imap_server", models.CharField(max_length=256)),
("imap_port", models.IntegerField(blank=True, null=True)),
(
"imap_security",
models.PositiveIntegerField(
choices=[
(1, "No encryption"),
(2, "Use SSL"),
(3, "Use STARTTLS"),
],
default=2,
),
),
("username", models.CharField(max_length=256)),
("password", models.CharField(max_length=256)),
],
),
migrations.CreateModel(
name="MailRule",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("name", models.CharField(max_length=256)),
("folder", models.CharField(default="INBOX", max_length=256)),
(
"filter_from",
models.CharField(blank=True, max_length=256, null=True),
),
(
"filter_subject",
models.CharField(blank=True, max_length=256, null=True),
),
(
"filter_body",
models.CharField(blank=True, max_length=256, null=True),
),
("maximum_age", models.PositiveIntegerField(default=30)),
(
"action",
models.PositiveIntegerField(
choices=[
(1, "Delete"),
(2, "Move to specified folder"),
(3, "Mark as read, don't process read mails"),
(4, "Flag the mail, don't process flagged mails"),
],
default=3,
help_text="The action applied to the mail. This action is only performed when documents were consumed from the mail. Mails without attachments will remain entirely untouched.",
),
),
(
"action_parameter",
models.CharField(
blank=True,
help_text="Additional parameter for the action selected above, i.e., the target folder of the move to folder action.",
max_length=256,
null=True,
),
),
(
"assign_title_from",
models.PositiveIntegerField(
choices=[
(1, "Use subject as title"),
(2, "Use attachment filename as title"),
],
default=1,
),
),
(
"assign_correspondent_from",
models.PositiveIntegerField(
choices=[
(1, "Do not assign a correspondent"),
(2, "Use mail address"),
(3, "Use name (or mail address if not available)"),
(4, "Use correspondent selected below"),
],
default=1,
),
),
(
"account",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="rules",
to="paperless_mail.mailaccount",
),
),
(
"assign_correspondent",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="documents.correspondent",
),
),
(
"assign_document_type",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="documents.documenttype",
),
),
(
"assign_tag",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="documents.tag",
),
),
],
),
migrations.RunPython(
code=django.db.migrations.operations.special.RunPython.noop,
reverse_code=django.db.migrations.operations.special.RunPython.noop,
),
migrations.AlterField(
model_name="mailaccount",
name="imap_port",
field=models.IntegerField(
blank=True,
help_text="This is usually 143 for unencrypted and STARTTLS connections, and 993 for SSL connections.",
null=True,
),
),
migrations.AlterField(
model_name="mailrule",
name="name",
field=models.CharField(max_length=256, unique=True),
),
migrations.AddField(
model_name="mailrule",
name="order",
field=models.IntegerField(default=0),
),
migrations.AlterField(
model_name="mailrule",
name="action",
field=models.PositiveIntegerField(
choices=[
(3, "Mark as read, don't process read mails"),
(4, "Flag the mail, don't process flagged mails"),
(2, "Move to specified folder"),
(1, "Delete"),
],
default=3,
),
),
migrations.AlterField(
model_name="mailrule",
name="maximum_age",
field=models.PositiveIntegerField(
default=30,
help_text="Specified in days.",
),
),
migrations.AlterModelOptions(
name="mailaccount",
options={
"verbose_name": "mail account",
"verbose_name_plural": "mail accounts",
},
),
migrations.AlterModelOptions(
name="mailrule",
options={"verbose_name": "mail rule", "verbose_name_plural": "mail rules"},
),
migrations.AlterField(
model_name="mailaccount",
name="imap_port",
field=models.IntegerField(
blank=True,
help_text="This is usually 143 for unencrypted and STARTTLS connections, and 993 for SSL connections.",
null=True,
verbose_name="IMAP port",
),
),
migrations.AlterField(
model_name="mailaccount",
name="imap_security",
field=models.PositiveIntegerField(
choices=[(1, "No encryption"), (2, "Use SSL"), (3, "Use STARTTLS")],
default=2,
verbose_name="IMAP security",
),
),
migrations.AlterField(
model_name="mailaccount",
name="imap_server",
field=models.CharField(max_length=256, verbose_name="IMAP server"),
),
migrations.AlterField(
model_name="mailaccount",
name="name",
field=models.CharField(max_length=256, unique=True, verbose_name="name"),
),
migrations.AlterField(
model_name="mailaccount",
name="password",
field=models.CharField(max_length=256, verbose_name="password"),
),
migrations.AlterField(
model_name="mailaccount",
name="username",
field=models.CharField(max_length=256, verbose_name="username"),
),
migrations.AlterField(
model_name="mailrule",
name="account",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="rules",
to="paperless_mail.mailaccount",
verbose_name="account",
),
),
migrations.AlterField(
model_name="mailrule",
name="action",
field=models.PositiveIntegerField(
choices=[
(3, "Mark as read, don't process read mails"),
(4, "Flag the mail, don't process flagged mails"),
(2, "Move to specified folder"),
(1, "Delete"),
],
default=3,
verbose_name="action",
),
),
migrations.AlterField(
model_name="mailrule",
name="action_parameter",
field=models.CharField(
blank=True,
help_text="Additional parameter for the action selected above, i.e., the target folder of the move to folder action.",
max_length=256,
null=True,
verbose_name="action parameter",
),
),
migrations.AlterField(
model_name="mailrule",
name="assign_correspondent",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="documents.correspondent",
verbose_name="assign this correspondent",
),
),
migrations.AlterField(
model_name="mailrule",
name="assign_correspondent_from",
field=models.PositiveIntegerField(
choices=[
(1, "Do not assign a correspondent"),
(2, "Use mail address"),
(3, "Use name (or mail address if not available)"),
(4, "Use correspondent selected below"),
],
default=1,
verbose_name="assign correspondent from",
),
),
migrations.AlterField(
model_name="mailrule",
name="assign_document_type",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="documents.documenttype",
verbose_name="assign this document type",
),
),
migrations.AlterField(
model_name="mailrule",
name="assign_tag",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="documents.tag",
verbose_name="assign this tag",
),
),
migrations.AlterField(
model_name="mailrule",
name="assign_title_from",
field=models.PositiveIntegerField(
choices=[
(1, "Use subject as title"),
(2, "Use attachment filename as title"),
],
default=1,
verbose_name="assign title from",
),
),
migrations.AlterField(
model_name="mailrule",
name="filter_body",
field=models.CharField(
blank=True,
max_length=256,
null=True,
verbose_name="filter body",
),
),
migrations.AlterField(
model_name="mailrule",
name="filter_from",
field=models.CharField(
blank=True,
max_length=256,
null=True,
verbose_name="filter from",
),
),
migrations.AlterField(
model_name="mailrule",
name="filter_subject",
field=models.CharField(
blank=True,
max_length=256,
null=True,
verbose_name="filter subject",
),
),
migrations.AlterField(
model_name="mailrule",
name="folder",
field=models.CharField(
default="INBOX",
max_length=256,
verbose_name="folder",
),
),
migrations.AlterField(
model_name="mailrule",
name="maximum_age",
field=models.PositiveIntegerField(
default=30,
help_text="Specified in days.",
verbose_name="maximum age",
),
),
migrations.AlterField(
model_name="mailrule",
name="name",
field=models.CharField(max_length=256, unique=True, verbose_name="name"),
),
migrations.AlterField(
model_name="mailrule",
name="order",
field=models.IntegerField(default=0, verbose_name="order"),
),
migrations.AddField(
model_name="mailrule",
name="attachment_type",
field=models.PositiveIntegerField(
choices=[
(1, "Only process attachments."),
(2, "Process all files, including 'inline' attachments."),
],
default=1,
help_text="Inline attachments include embedded images, so it's best to combine this option with a filename filter.",
verbose_name="attachment type",
),
),
migrations.AddField(
model_name="mailrule",
name="filter_attachment_filename",
field=models.CharField(
blank=True,
help_text="Only consume documents which entirely match this filename if specified. Wildcards such as *.pdf or *invoice* are allowed. Case insensitive.",
max_length=256,
null=True,
verbose_name="filter attachment filename",
),
),
migrations.AddField(
model_name="mailaccount",
name="character_set",
field=models.CharField(
default="UTF-8",
help_text="The character set to use when communicating with the mail server, such as 'UTF-8' or 'US-ASCII'.",
max_length=256,
verbose_name="character set",
),
),
migrations.AlterField(
model_name="mailrule",
name="action_parameter",
field=models.CharField(
blank=True,
help_text="Additional parameter for the action selected above, i.e., the target folder of the move to folder action. Subfolders must be separated by dots.",
max_length=256,
null=True,
verbose_name="action parameter",
),
),
migrations.AlterField(
model_name="mailrule",
name="folder",
field=models.CharField(
default="INBOX",
help_text="Subfolders must be separated by dots.",
max_length=256,
verbose_name="folder",
),
),
migrations.AddField(
model_name="mailrule",
name="assign_tags",
field=models.ManyToManyField(
blank=True,
related_name="mail_rules_multi",
to="documents.tag",
verbose_name="assign this tag",
),
),
]

View File

@@ -0,0 +1,12 @@
# Generated by Django 3.1.3 on 2020-11-17 13:34
from django.db import migrations
from django.db.migrations import RunPython
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0001_initial"),
]
operations = [RunPython(migrations.RunPython.noop, migrations.RunPython.noop)]

View File

@@ -0,0 +1,27 @@
# Generated by Django 3.1.3 on 2020-11-18 19:40
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0002_auto_20201117_1334"),
]
operations = [
migrations.AlterField(
model_name="mailaccount",
name="imap_port",
field=models.IntegerField(
blank=True,
help_text="This is usually 143 for unencrypted and STARTTLS connections, and 993 for SSL connections.",
null=True,
),
),
migrations.AlterField(
model_name="mailrule",
name="name",
field=models.CharField(max_length=256, unique=True),
),
]

View File

@@ -0,0 +1,18 @@
# Generated by Django 3.1.3 on 2020-11-21 21:51
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0003_auto_20201118_1940"),
]
operations = [
migrations.AddField(
model_name="mailrule",
name="order",
field=models.IntegerField(default=0),
),
]

View File

@@ -0,0 +1,34 @@
# Generated by Django 3.1.3 on 2020-11-22 10:36
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0004_mailrule_order"),
]
operations = [
migrations.AlterField(
model_name="mailrule",
name="action",
field=models.PositiveIntegerField(
choices=[
(3, "Mark as read, don't process read mails"),
(4, "Flag the mail, don't process flagged mails"),
(2, "Move to specified folder"),
(1, "Delete"),
],
default=3,
),
),
migrations.AlterField(
model_name="mailrule",
name="maximum_age",
field=models.PositiveIntegerField(
default=30,
help_text="Specified in days.",
),
),
]

View File

@@ -0,0 +1,217 @@
# Generated by Django 3.1.4 on 2021-01-01 23:40
import django.db.models.deletion
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1011_auto_20210101_2340"),
("paperless_mail", "0005_help_texts"),
]
operations = [
migrations.AlterModelOptions(
name="mailaccount",
options={
"verbose_name": "mail account",
"verbose_name_plural": "mail accounts",
},
),
migrations.AlterModelOptions(
name="mailrule",
options={"verbose_name": "mail rule", "verbose_name_plural": "mail rules"},
),
migrations.AlterField(
model_name="mailaccount",
name="imap_port",
field=models.IntegerField(
blank=True,
help_text="This is usually 143 for unencrypted and STARTTLS connections, and 993 for SSL connections.",
null=True,
verbose_name="IMAP port",
),
),
migrations.AlterField(
model_name="mailaccount",
name="imap_security",
field=models.PositiveIntegerField(
choices=[(1, "No encryption"), (2, "Use SSL"), (3, "Use STARTTLS")],
default=2,
verbose_name="IMAP security",
),
),
migrations.AlterField(
model_name="mailaccount",
name="imap_server",
field=models.CharField(max_length=256, verbose_name="IMAP server"),
),
migrations.AlterField(
model_name="mailaccount",
name="name",
field=models.CharField(max_length=256, unique=True, verbose_name="name"),
),
migrations.AlterField(
model_name="mailaccount",
name="password",
field=models.CharField(max_length=256, verbose_name="password"),
),
migrations.AlterField(
model_name="mailaccount",
name="username",
field=models.CharField(max_length=256, verbose_name="username"),
),
migrations.AlterField(
model_name="mailrule",
name="account",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="rules",
to="paperless_mail.mailaccount",
verbose_name="account",
),
),
migrations.AlterField(
model_name="mailrule",
name="action",
field=models.PositiveIntegerField(
choices=[
(3, "Mark as read, don't process read mails"),
(4, "Flag the mail, don't process flagged mails"),
(2, "Move to specified folder"),
(1, "Delete"),
],
default=3,
verbose_name="action",
),
),
migrations.AlterField(
model_name="mailrule",
name="action_parameter",
field=models.CharField(
blank=True,
help_text="Additional parameter for the action selected above, i.e., the target folder of the move to folder action.",
max_length=256,
null=True,
verbose_name="action parameter",
),
),
migrations.AlterField(
model_name="mailrule",
name="assign_correspondent",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="documents.correspondent",
verbose_name="assign this correspondent",
),
),
migrations.AlterField(
model_name="mailrule",
name="assign_correspondent_from",
field=models.PositiveIntegerField(
choices=[
(1, "Do not assign a correspondent"),
(2, "Use mail address"),
(3, "Use name (or mail address if not available)"),
(4, "Use correspondent selected below"),
],
default=1,
verbose_name="assign correspondent from",
),
),
migrations.AlterField(
model_name="mailrule",
name="assign_document_type",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="documents.documenttype",
verbose_name="assign this document type",
),
),
migrations.AlterField(
model_name="mailrule",
name="assign_tag",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="documents.tag",
verbose_name="assign this tag",
),
),
migrations.AlterField(
model_name="mailrule",
name="assign_title_from",
field=models.PositiveIntegerField(
choices=[
(1, "Use subject as title"),
(2, "Use attachment filename as title"),
],
default=1,
verbose_name="assign title from",
),
),
migrations.AlterField(
model_name="mailrule",
name="filter_body",
field=models.CharField(
blank=True,
max_length=256,
null=True,
verbose_name="filter body",
),
),
migrations.AlterField(
model_name="mailrule",
name="filter_from",
field=models.CharField(
blank=True,
max_length=256,
null=True,
verbose_name="filter from",
),
),
migrations.AlterField(
model_name="mailrule",
name="filter_subject",
field=models.CharField(
blank=True,
max_length=256,
null=True,
verbose_name="filter subject",
),
),
migrations.AlterField(
model_name="mailrule",
name="folder",
field=models.CharField(
default="INBOX",
max_length=256,
verbose_name="folder",
),
),
migrations.AlterField(
model_name="mailrule",
name="maximum_age",
field=models.PositiveIntegerField(
default=30,
help_text="Specified in days.",
verbose_name="maximum age",
),
),
migrations.AlterField(
model_name="mailrule",
name="name",
field=models.CharField(max_length=256, unique=True, verbose_name="name"),
),
migrations.AlterField(
model_name="mailrule",
name="order",
field=models.IntegerField(default=0, verbose_name="order"),
),
]

View File

@@ -0,0 +1,37 @@
# Generated by Django 3.1.5 on 2021-01-06 01:38
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0006_auto_20210101_2340"),
]
operations = [
migrations.AddField(
model_name="mailrule",
name="attachment_type",
field=models.PositiveIntegerField(
choices=[
(1, "Only process attachments."),
(2, "Process all files, including 'inline' attachments."),
],
default=1,
help_text="Inline attachments include embedded images, so it's best to combine this option with a filename filter.",
verbose_name="attachment type",
),
),
migrations.AddField(
model_name="mailrule",
name="filter_attachment_filename",
field=models.CharField(
blank=True,
help_text="Only consume documents which entirely match this filename if specified. Wildcards such as *.pdf or *invoice* are allowed. Case insensitive.",
max_length=256,
null=True,
verbose_name="filter attachment filename",
),
),
]

View File

@@ -0,0 +1,44 @@
# Generated by Django 3.2.3 on 2021-05-16 09:40
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0007_auto_20210106_0138"),
]
operations = [
migrations.AddField(
model_name="mailaccount",
name="character_set",
field=models.CharField(
default="UTF-8",
help_text="The character set to use when communicating with the mail server, such as 'UTF-8' or 'US-ASCII'.",
max_length=256,
verbose_name="character set",
),
),
migrations.AlterField(
model_name="mailrule",
name="action_parameter",
field=models.CharField(
blank=True,
help_text="Additional parameter for the action selected above, i.e., the target folder of the move to folder action. Subfolders must be separated by dots.",
max_length=256,
null=True,
verbose_name="action parameter",
),
),
migrations.AlterField(
model_name="mailrule",
name="folder",
field=models.CharField(
default="INBOX",
help_text="Subfolders must be separated by dots.",
max_length=256,
verbose_name="folder",
),
),
]

View File

@@ -0,0 +1,37 @@
# Generated by Django 4.0.3 on 2022-03-28 17:40
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0008_auto_20210516_0940"),
]
operations = [
migrations.AlterField(
model_name="mailrule",
name="action",
field=models.PositiveIntegerField(
choices=[
(1, "Mark as read, don't process read mails"),
(2, "Flag the mail, don't process flagged mails"),
(3, "Move to specified folder"),
(4, "Delete"),
],
default=3,
verbose_name="action",
),
),
migrations.AlterField(
model_name="mailrule",
name="folder",
field=models.CharField(
default="INBOX",
help_text="Subfolders must be separated by a delimiter, often a dot ('.') or slash ('/'), but it varies by mail server.",
max_length=256,
verbose_name="folder",
),
),
]

View File

@@ -0,0 +1,23 @@
# Generated by Django 3.2.12 on 2022-03-11 15:00
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0008_auto_20210516_0940"),
]
operations = [
migrations.AddField(
model_name="mailrule",
name="assign_tags",
field=models.ManyToManyField(
blank=True,
related_name="mail_rules_multi",
to="documents.Tag",
verbose_name="assign this tag",
),
),
]

View File

@@ -0,0 +1,39 @@
# Generated by Django 3.2.12 on 2022-03-11 15:02
from django.db import migrations
def migrate_tag_to_tags(apps, schema_editor):
# Manual data migration, see
# https://docs.djangoproject.com/en/3.2/topics/migrations/#data-migrations
#
# Copy the assign_tag property to the new assign_tags set if it exists.
MailRule = apps.get_model("paperless_mail", "MailRule")
for mail_rule in MailRule.objects.all():
if mail_rule.assign_tag:
mail_rule.assign_tags.add(mail_rule.assign_tag)
mail_rule.save()
def migrate_tags_to_tag(apps, schema_editor):
# Manual data migration, see
# https://docs.djangoproject.com/en/3.2/topics/migrations/#data-migrations
#
# Copy the unique value in the assign_tags set to the old assign_tag property.
# Do nothing if the tag is not unique.
MailRule = apps.get_model("paperless_mail", "MailRule")
for mail_rule in MailRule.objects.all():
tags = mail_rule.assign_tags.all()
if len(tags) == 1:
mail_rule.assign_tag = tags[0]
mail_rule.save()
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0009_mailrule_assign_tags"),
]
operations = [
migrations.RunPython(migrate_tag_to_tags, migrate_tags_to_tag),
]

View File

@@ -0,0 +1,16 @@
# Generated by Django 3.2.12 on 2022-03-11 15:18
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0010_auto_20220311_1602"),
]
operations = [
migrations.RemoveField(
model_name="mailrule",
name="assign_tag",
),
]

View File

@@ -0,0 +1,321 @@
# Generated by Django 4.2.13 on 2024-06-28 17:47
import django.db.models.deletion
import django.utils.timezone
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
replaces = [
("paperless_mail", "0011_remove_mailrule_assign_tag"),
("paperless_mail", "0012_alter_mailrule_assign_tags"),
("paperless_mail", "0009_alter_mailrule_action_alter_mailrule_folder"),
("paperless_mail", "0013_merge_20220412_1051"),
("paperless_mail", "0014_alter_mailrule_action"),
("paperless_mail", "0015_alter_mailrule_action"),
("paperless_mail", "0016_mailrule_consumption_scope"),
("paperless_mail", "0017_mailaccount_owner_mailrule_owner"),
("paperless_mail", "0018_processedmail"),
("paperless_mail", "0019_mailrule_filter_to"),
("paperless_mail", "0020_mailaccount_is_token"),
("paperless_mail", "0021_alter_mailaccount_password"),
("paperless_mail", "0022_mailrule_assign_owner_from_rule_and_more"),
("paperless_mail", "0023_remove_mailrule_filter_attachment_filename_and_more"),
("paperless_mail", "0024_alter_mailrule_name_and_more"),
]
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("paperless_mail", "0010_auto_20220311_1602"),
]
operations = [
migrations.RemoveField(
model_name="mailrule",
name="assign_tag",
),
migrations.AlterField(
model_name="mailrule",
name="assign_tags",
field=models.ManyToManyField(
blank=True,
to="documents.tag",
verbose_name="assign this tag",
),
),
migrations.AlterField(
model_name="mailrule",
name="action",
field=models.PositiveIntegerField(
choices=[
(1, "Mark as read, don't process read mails"),
(2, "Flag the mail, don't process flagged mails"),
(3, "Move to specified folder"),
(4, "Delete"),
],
default=3,
verbose_name="action",
),
),
migrations.AlterField(
model_name="mailrule",
name="folder",
field=models.CharField(
default="INBOX",
help_text="Subfolders must be separated by a delimiter, often a dot ('.') or slash ('/'), but it varies by mail server.",
max_length=256,
verbose_name="folder",
),
),
migrations.AlterField(
model_name="mailrule",
name="action",
field=models.PositiveIntegerField(
choices=[
(1, "Delete"),
(2, "Move to specified folder"),
(3, "Mark as read, don't process read mails"),
(4, "Flag the mail, don't process flagged mails"),
],
default=3,
verbose_name="action",
),
),
migrations.AlterField(
model_name="mailrule",
name="action",
field=models.PositiveIntegerField(
choices=[
(1, "Delete"),
(2, "Move to specified folder"),
(3, "Mark as read, don't process read mails"),
(4, "Flag the mail, don't process flagged mails"),
(5, "Tag the mail with specified tag, don't process tagged mails"),
],
default=3,
verbose_name="action",
),
),
migrations.AddField(
model_name="mailrule",
name="consumption_scope",
field=models.PositiveIntegerField(
choices=[
(1, "Only process attachments."),
(
2,
"Process full Mail (with embedded attachments in file) as .eml",
),
(
3,
"Process full Mail (with embedded attachments in file) as .eml + process attachments as separate documents",
),
],
default=1,
verbose_name="consumption scope",
),
),
migrations.AddField(
model_name="mailaccount",
name="owner",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
migrations.AddField(
model_name="mailrule",
name="owner",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
migrations.CreateModel(
name="ProcessedMail",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"folder",
models.CharField(
editable=False,
max_length=256,
verbose_name="folder",
),
),
(
"uid",
models.CharField(
editable=False,
max_length=256,
verbose_name="uid",
),
),
(
"subject",
models.CharField(
editable=False,
max_length=256,
verbose_name="subject",
),
),
(
"received",
models.DateTimeField(editable=False, verbose_name="received"),
),
(
"processed",
models.DateTimeField(
default=django.utils.timezone.now,
editable=False,
verbose_name="processed",
),
),
(
"status",
models.CharField(
editable=False,
max_length=256,
verbose_name="status",
),
),
(
"error",
models.TextField(
blank=True,
editable=False,
null=True,
verbose_name="error",
),
),
(
"owner",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
(
"rule",
models.ForeignKey(
editable=False,
on_delete=django.db.models.deletion.CASCADE,
to="paperless_mail.mailrule",
),
),
],
options={
"abstract": False,
},
),
migrations.AddField(
model_name="mailrule",
name="filter_to",
field=models.CharField(
blank=True,
max_length=256,
null=True,
verbose_name="filter to",
),
),
migrations.AddField(
model_name="mailaccount",
name="is_token",
field=models.BooleanField(
default=False,
verbose_name="Is token authentication",
),
),
migrations.AlterField(
model_name="mailaccount",
name="password",
field=models.CharField(max_length=2048, verbose_name="password"),
),
migrations.AddField(
model_name="mailrule",
name="assign_owner_from_rule",
field=models.BooleanField(
default=True,
verbose_name="Assign the rule owner to documents",
),
),
migrations.AlterField(
model_name="mailrule",
name="assign_title_from",
field=models.PositiveIntegerField(
choices=[
(1, "Use subject as title"),
(2, "Use attachment filename as title"),
(3, "Do not assign title from rule"),
],
default=1,
verbose_name="assign title from",
),
),
migrations.RenameField(
model_name="mailrule",
old_name="filter_attachment_filename",
new_name="filter_attachment_filename_include",
),
migrations.AddField(
model_name="mailrule",
name="filter_attachment_filename_exclude",
field=models.CharField(
blank=True,
help_text="Do not consume documents which entirely match this filename if specified. Wildcards such as *.pdf or *invoice* are allowed. Case insensitive.",
max_length=256,
null=True,
verbose_name="filter attachment filename exclusive",
),
),
migrations.AlterField(
model_name="mailrule",
name="filter_attachment_filename_include",
field=models.CharField(
blank=True,
help_text="Only consume documents which entirely match this filename if specified. Wildcards such as *.pdf or *invoice* are allowed. Case insensitive.",
max_length=256,
null=True,
verbose_name="filter attachment filename inclusive",
),
),
migrations.AlterField(
model_name="mailrule",
name="name",
field=models.CharField(max_length=256, verbose_name="name"),
),
migrations.AddConstraint(
model_name="mailrule",
constraint=models.UniqueConstraint(
fields=("name", "owner"),
name="paperless_mail_mailrule_unique_name_owner",
),
),
migrations.AddConstraint(
model_name="mailrule",
constraint=models.UniqueConstraint(
condition=models.Q(("owner__isnull", True)),
fields=("name",),
name="paperless_mail_mailrule_name_unique",
),
),
]

View File

@@ -0,0 +1,22 @@
# Generated by Django 3.2.12 on 2022-03-11 16:21
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0011_remove_mailrule_assign_tag"),
]
operations = [
migrations.AlterField(
model_name="mailrule",
name="assign_tags",
field=models.ManyToManyField(
blank=True,
to="documents.Tag",
verbose_name="assign this tag",
),
),
]

View File

@@ -0,0 +1,12 @@
# Generated by Django 4.0.4 on 2022-04-12 08:51
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0009_alter_mailrule_action_alter_mailrule_folder"),
("paperless_mail", "0012_alter_mailrule_assign_tags"),
]
operations = []

View File

@@ -0,0 +1,27 @@
# Generated by Django 4.0.4 on 2022-04-18 22:57
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0013_merge_20220412_1051"),
]
operations = [
migrations.AlterField(
model_name="mailrule",
name="action",
field=models.PositiveIntegerField(
choices=[
(1, "Delete"),
(2, "Move to specified folder"),
(3, "Mark as read, don't process read mails"),
(4, "Flag the mail, don't process flagged mails"),
],
default=3,
verbose_name="action",
),
),
]

View File

@@ -0,0 +1,28 @@
# Generated by Django 4.0.4 on 2022-05-29 13:21
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0014_alter_mailrule_action"),
]
operations = [
migrations.AlterField(
model_name="mailrule",
name="action",
field=models.PositiveIntegerField(
choices=[
(1, "Delete"),
(2, "Move to specified folder"),
(3, "Mark as read, don't process read mails"),
(4, "Flag the mail, don't process flagged mails"),
(5, "Tag the mail with specified tag, don't process tagged mails"),
],
default=3,
verbose_name="action",
),
),
]

View File

@@ -0,0 +1,32 @@
# Generated by Django 4.0.4 on 2022-07-11 22:02
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0015_alter_mailrule_action"),
]
operations = [
migrations.AddField(
model_name="mailrule",
name="consumption_scope",
field=models.PositiveIntegerField(
choices=[
(1, "Only process attachments."),
(
2,
"Process full Mail (with embedded attachments in file) as .eml",
),
(
3,
"Process full Mail (with embedded attachments in file) as .eml + process attachments as separate documents",
),
],
default=1,
verbose_name="consumption scope",
),
),
]

View File

@@ -0,0 +1,38 @@
# Generated by Django 4.1.3 on 2022-12-06 04:48
import django.db.models.deletion
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("paperless_mail", "0016_mailrule_consumption_scope"),
]
operations = [
migrations.AddField(
model_name="mailaccount",
name="owner",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
migrations.AddField(
model_name="mailrule",
name="owner",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
]

View File

@@ -0,0 +1,105 @@
# Generated by Django 4.1.5 on 2023-03-03 18:38
import django.db.models.deletion
import django.utils.timezone
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("paperless_mail", "0017_mailaccount_owner_mailrule_owner"),
]
operations = [
migrations.CreateModel(
name="ProcessedMail",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"folder",
models.CharField(
editable=False,
max_length=256,
verbose_name="folder",
),
),
(
"uid",
models.CharField(
editable=False,
max_length=256,
verbose_name="uid",
),
),
(
"subject",
models.CharField(
editable=False,
max_length=256,
verbose_name="subject",
),
),
(
"received",
models.DateTimeField(editable=False, verbose_name="received"),
),
(
"processed",
models.DateTimeField(
default=django.utils.timezone.now,
editable=False,
verbose_name="processed",
),
),
(
"status",
models.CharField(
editable=False,
max_length=256,
verbose_name="status",
),
),
(
"error",
models.TextField(
blank=True,
editable=False,
null=True,
verbose_name="error",
),
),
(
"owner",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
(
"rule",
models.ForeignKey(
editable=False,
on_delete=django.db.models.deletion.CASCADE,
to="paperless_mail.mailrule",
),
),
],
options={
"abstract": False,
},
),
]

View File

@@ -0,0 +1,23 @@
# Generated by Django 4.1.7 on 2023-03-11 21:08
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0018_processedmail"),
]
operations = [
migrations.AddField(
model_name="mailrule",
name="filter_to",
field=models.CharField(
blank=True,
max_length=256,
null=True,
verbose_name="filter to",
),
),
]

View File

@@ -0,0 +1,21 @@
# Generated by Django 4.1.7 on 2023-03-22 17:51
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0019_mailrule_filter_to"),
]
operations = [
migrations.AddField(
model_name="mailaccount",
name="is_token",
field=models.BooleanField(
default=False,
verbose_name="Is token authentication",
),
),
]

View File

@@ -0,0 +1,18 @@
# Generated by Django 4.1.7 on 2023-04-20 15:03
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0020_mailaccount_is_token"),
]
operations = [
migrations.AlterField(
model_name="mailaccount",
name="password",
field=models.CharField(max_length=2048, verbose_name="password"),
),
]

View File

@@ -0,0 +1,34 @@
# Generated by Django 4.1.11 on 2023-09-18 18:50
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0021_alter_mailaccount_password"),
]
operations = [
migrations.AddField(
model_name="mailrule",
name="assign_owner_from_rule",
field=models.BooleanField(
default=True,
verbose_name="Assign the rule owner to documents",
),
),
migrations.AlterField(
model_name="mailrule",
name="assign_title_from",
field=models.PositiveIntegerField(
choices=[
(1, "Use subject as title"),
(2, "Use attachment filename as title"),
(3, "Do not assign title from rule"),
],
default=1,
verbose_name="assign title from",
),
),
]

View File

@@ -0,0 +1,40 @@
# Generated by Django 4.2.7 on 2023-12-04 03:06
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0022_mailrule_assign_owner_from_rule_and_more"),
]
operations = [
migrations.RenameField(
model_name="mailrule",
old_name="filter_attachment_filename",
new_name="filter_attachment_filename_include",
),
migrations.AddField(
model_name="mailrule",
name="filter_attachment_filename_exclude",
field=models.CharField(
blank=True,
help_text="Do not consume documents which entirely match this filename if specified. Wildcards such as *.pdf or *invoice* are allowed. Case insensitive.",
max_length=256,
null=True,
verbose_name="filter attachment filename exclusive",
),
),
migrations.AlterField(
model_name="mailrule",
name="filter_attachment_filename_include",
field=models.CharField(
blank=True,
help_text="Only consume documents which entirely match this filename if specified. Wildcards such as *.pdf or *invoice* are allowed. Case insensitive.",
max_length=256,
null=True,
verbose_name="filter attachment filename inclusive",
),
),
]

View File

@@ -0,0 +1,33 @@
# Generated by Django 4.2.11 on 2024-06-05 16:51
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0023_remove_mailrule_filter_attachment_filename_and_more"),
]
operations = [
migrations.AlterField(
model_name="mailrule",
name="name",
field=models.CharField(max_length=256, verbose_name="name"),
),
migrations.AddConstraint(
model_name="mailrule",
constraint=models.UniqueConstraint(
fields=("name", "owner"),
name="paperless_mail_mailrule_unique_name_owner",
),
),
migrations.AddConstraint(
model_name="mailrule",
constraint=models.UniqueConstraint(
condition=models.Q(("owner__isnull", True)),
fields=("name",),
name="paperless_mail_mailrule_name_unique",
),
),
]

View File

@@ -0,0 +1,52 @@
# Generated by Django 4.2.13 on 2024-07-09 16:39
import django.db.models.deletion
from django.conf import settings
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("paperless_mail", "0024_alter_mailrule_name_and_more"),
]
operations = [
migrations.AlterField(
model_name="mailaccount",
name="owner",
field=models.ForeignKey(
blank=True,
default=None,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
migrations.AlterField(
model_name="mailrule",
name="owner",
field=models.ForeignKey(
blank=True,
default=None,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
migrations.AlterField(
model_name="processedmail",
name="owner",
field=models.ForeignKey(
blank=True,
default=None,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to=settings.AUTH_USER_MODEL,
verbose_name="owner",
),
),
]

View File

@@ -0,0 +1,21 @@
# Generated by Django 5.1.1 on 2024-09-30 15:17
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
(
"paperless_mail",
"0025_alter_mailaccount_owner_alter_mailrule_owner_and_more",
),
]
operations = [
migrations.AddField(
model_name="mailrule",
name="enabled",
field=models.BooleanField(default=True, verbose_name="enabled"),
),
]

View File

@@ -0,0 +1,48 @@
# Generated by Django 5.1.1 on 2024-10-05 17:12
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0026_mailrule_enabled"),
]
operations = [
migrations.AlterField(
model_name="mailaccount",
name="password",
field=models.CharField(max_length=3072, verbose_name="password"),
),
migrations.AddField(
model_name="mailaccount",
name="expiration",
field=models.DateTimeField(
blank=True,
help_text="The expiration date of the refresh token. ",
null=True,
verbose_name="expiration",
),
),
migrations.AddField(
model_name="mailaccount",
name="account_type",
field=models.PositiveIntegerField(
choices=[(1, "IMAP"), (2, "Gmail OAuth"), (3, "Outlook OAuth")],
default=1,
verbose_name="account type",
),
),
migrations.AddField(
model_name="mailaccount",
name="refresh_token",
field=models.CharField(
blank=True,
help_text="The refresh token to use for token authentication e.g. with oauth2.",
max_length=3072,
null=True,
verbose_name="refresh token",
),
),
]

View File

@@ -0,0 +1,31 @@
# Generated by Django 5.1.1 on 2024-10-30 04:31
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
(
"paperless_mail",
"0027_mailaccount_expiration_mailaccount_account_type_and_more",
),
]
operations = [
migrations.AlterField(
model_name="mailaccount",
name="password",
field=models.TextField(verbose_name="password"),
),
migrations.AlterField(
model_name="mailaccount",
name="refresh_token",
field=models.TextField(
blank=True,
help_text="The refresh token to use for token authentication e.g. with oauth2.",
null=True,
verbose_name="refresh token",
),
),
]

View File

@@ -0,0 +1,28 @@
# Generated by Django 5.1.3 on 2024-11-24 12:39
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("paperless_mail", "0028_alter_mailaccount_password_and_more"),
]
operations = [
migrations.AddField(
model_name="mailrule",
name="pdf_layout",
field=models.PositiveIntegerField(
choices=[
(0, "System default"),
(1, "Text, then HTML"),
(2, "HTML, then text"),
(3, "HTML only"),
(4, "Text only"),
],
default=0,
verbose_name="pdf layout",
),
),
]

View File

@@ -0,0 +1,367 @@
from django.db import models
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
import documents.models as document_models
class MailAccount(document_models.ModelWithOwner):
class Meta:
verbose_name = _("mail account")
verbose_name_plural = _("mail accounts")
class ImapSecurity(models.IntegerChoices):
NONE = 1, _("No encryption")
SSL = 2, _("Use SSL")
STARTTLS = 3, _("Use STARTTLS")
class MailAccountType(models.IntegerChoices):
IMAP = 1, _("IMAP")
GMAIL_OAUTH = 2, _("Gmail OAuth")
OUTLOOK_OAUTH = 3, _("Outlook OAuth")
name = models.CharField(_("name"), max_length=256, unique=True)
imap_server = models.CharField(_("IMAP server"), max_length=256)
imap_port = models.IntegerField(
_("IMAP port"),
blank=True,
null=True,
help_text=_(
"This is usually 143 for unencrypted and STARTTLS "
"connections, and 993 for SSL connections.",
),
)
imap_security = models.PositiveIntegerField(
_("IMAP security"),
choices=ImapSecurity.choices,
default=ImapSecurity.SSL,
)
username = models.CharField(_("username"), max_length=256)
password = models.TextField(_("password"))
is_token = models.BooleanField(_("Is token authentication"), default=False)
character_set = models.CharField(
_("character set"),
max_length=256,
default="UTF-8",
help_text=_(
"The character set to use when communicating with the "
"mail server, such as 'UTF-8' or 'US-ASCII'.",
),
)
account_type = models.PositiveIntegerField(
_("account type"),
choices=MailAccountType.choices,
default=MailAccountType.IMAP,
)
refresh_token = models.TextField(
_("refresh token"),
blank=True,
null=True,
help_text=_(
"The refresh token to use for token authentication e.g. with oauth2.",
),
)
expiration = models.DateTimeField(
_("expiration"),
blank=True,
null=True,
help_text=_(
"The expiration date of the refresh token. ",
),
)
def __str__(self):
return self.name
class MailRule(document_models.ModelWithOwner):
class Meta:
verbose_name = _("mail rule")
verbose_name_plural = _("mail rules")
constraints = [
models.UniqueConstraint(
fields=["name", "owner"],
name="%(app_label)s_%(class)s_unique_name_owner",
),
models.UniqueConstraint(
name="%(app_label)s_%(class)s_name_unique",
fields=["name"],
condition=models.Q(owner__isnull=True),
),
]
class ConsumptionScope(models.IntegerChoices):
ATTACHMENTS_ONLY = 1, _("Only process attachments.")
EML_ONLY = 2, _("Process full Mail (with embedded attachments in file) as .eml")
EVERYTHING = (
3,
_(
"Process full Mail (with embedded attachments in file) as .eml "
"+ process attachments as separate documents",
),
)
class AttachmentProcessing(models.IntegerChoices):
ATTACHMENTS_ONLY = 1, _("Only process attachments.")
EVERYTHING = 2, _("Process all files, including 'inline' attachments.")
class PdfLayout(models.IntegerChoices):
DEFAULT = 0, _("System default")
TEXT_HTML = 1, _("Text, then HTML")
HTML_TEXT = 2, _("HTML, then text")
HTML_ONLY = 3, _("HTML only")
TEXT_ONLY = 4, _("Text only")
class MailAction(models.IntegerChoices):
DELETE = 1, _("Delete")
MOVE = 2, _("Move to specified folder")
MARK_READ = 3, _("Mark as read, don't process read mails")
FLAG = 4, _("Flag the mail, don't process flagged mails")
TAG = 5, _("Tag the mail with specified tag, don't process tagged mails")
class TitleSource(models.IntegerChoices):
FROM_SUBJECT = 1, _("Use subject as title")
FROM_FILENAME = 2, _("Use attachment filename as title")
NONE = 3, _("Do not assign title from rule")
class CorrespondentSource(models.IntegerChoices):
FROM_NOTHING = 1, _("Do not assign a correspondent")
FROM_EMAIL = 2, _("Use mail address")
FROM_NAME = 3, _("Use name (or mail address if not available)")
FROM_CUSTOM = 4, _("Use correspondent selected below")
name = models.CharField(_("name"), max_length=256)
order = models.IntegerField(_("order"), default=0)
account = models.ForeignKey(
MailAccount,
related_name="rules",
on_delete=models.CASCADE,
verbose_name=_("account"),
)
enabled = models.BooleanField(_("enabled"), default=True)
folder = models.CharField(
_("folder"),
default="INBOX",
max_length=256,
help_text=_(
"Subfolders must be separated by a delimiter, often a dot ('.') or"
" slash ('/'), but it varies by mail server.",
),
)
filter_from = models.CharField(
_("filter from"),
max_length=256,
null=True,
blank=True,
)
filter_to = models.CharField(
_("filter to"),
max_length=256,
null=True,
blank=True,
)
filter_subject = models.CharField(
_("filter subject"),
max_length=256,
null=True,
blank=True,
)
filter_body = models.CharField(
_("filter body"),
max_length=256,
null=True,
blank=True,
)
filter_attachment_filename_include = models.CharField(
_("filter attachment filename inclusive"),
max_length=256,
null=True,
blank=True,
help_text=_(
"Only consume documents which entirely match this "
"filename if specified. Wildcards such as *.pdf or "
"*invoice* are allowed. Case insensitive.",
),
)
filter_attachment_filename_exclude = models.CharField(
_("filter attachment filename exclusive"),
max_length=256,
null=True,
blank=True,
help_text=_(
"Do not consume documents which entirely match this "
"filename if specified. Wildcards such as *.pdf or "
"*invoice* are allowed. Case insensitive.",
),
)
maximum_age = models.PositiveIntegerField(
_("maximum age"),
default=30,
help_text=_("Specified in days."),
)
attachment_type = models.PositiveIntegerField(
_("attachment type"),
choices=AttachmentProcessing.choices,
default=AttachmentProcessing.ATTACHMENTS_ONLY,
help_text=_(
"Inline attachments include embedded images, so it's best "
"to combine this option with a filename filter.",
),
)
consumption_scope = models.PositiveIntegerField(
_("consumption scope"),
choices=ConsumptionScope.choices,
default=ConsumptionScope.ATTACHMENTS_ONLY,
)
pdf_layout = models.PositiveIntegerField(
_("pdf layout"),
choices=PdfLayout.choices,
default=PdfLayout.DEFAULT,
)
action = models.PositiveIntegerField(
_("action"),
choices=MailAction.choices,
default=MailAction.MARK_READ,
)
action_parameter = models.CharField(
_("action parameter"),
max_length=256,
blank=True,
null=True,
help_text=_(
"Additional parameter for the action selected above, "
"i.e., "
"the target folder of the move to folder action. "
"Subfolders must be separated by dots.",
),
)
assign_title_from = models.PositiveIntegerField(
_("assign title from"),
choices=TitleSource.choices,
default=TitleSource.FROM_SUBJECT,
)
assign_tags = models.ManyToManyField(
document_models.Tag,
blank=True,
verbose_name=_("assign this tag"),
)
assign_document_type = models.ForeignKey(
document_models.DocumentType,
null=True,
blank=True,
on_delete=models.SET_NULL,
verbose_name=_("assign this document type"),
)
assign_correspondent_from = models.PositiveIntegerField(
_("assign correspondent from"),
choices=CorrespondentSource.choices,
default=CorrespondentSource.FROM_NOTHING,
)
assign_correspondent = models.ForeignKey(
document_models.Correspondent,
null=True,
blank=True,
on_delete=models.SET_NULL,
verbose_name=_("assign this correspondent"),
)
assign_owner_from_rule = models.BooleanField(
_("Assign the rule owner to documents"),
default=True,
)
def __str__(self):
return f"{self.account.name}.{self.name}"
class ProcessedMail(document_models.ModelWithOwner):
rule = models.ForeignKey(
MailRule,
null=False,
blank=False,
on_delete=models.CASCADE,
editable=False,
)
folder = models.CharField(
_("folder"),
null=False,
blank=False,
max_length=256,
editable=False,
)
uid = models.CharField(
_("uid"),
null=False,
blank=False,
max_length=256,
editable=False,
)
subject = models.CharField(
_("subject"),
null=False,
blank=False,
max_length=256,
editable=False,
)
received = models.DateTimeField(
_("received"),
null=False,
blank=False,
editable=False,
)
processed = models.DateTimeField(
_("processed"),
default=timezone.now,
editable=False,
)
status = models.CharField(
_("status"),
null=False,
blank=False,
max_length=256,
editable=False,
)
error = models.TextField(
_("error"),
null=True,
blank=True,
editable=False,
)

118
src/paperless_mail/oauth.py Normal file
View File

@@ -0,0 +1,118 @@
import asyncio
import logging
import secrets
from datetime import timedelta
from django.conf import settings
from django.utils import timezone
from httpx_oauth.clients.google import GoogleOAuth2
from httpx_oauth.clients.microsoft import MicrosoftGraphOAuth2
from httpx_oauth.oauth2 import OAuth2Token
from httpx_oauth.oauth2 import RefreshTokenError
from paperless_mail.models import MailAccount
class PaperlessMailOAuth2Manager:
def __init__(self, state: str | None = None):
self._gmail_client = None
self._outlook_client = None
self.state = state if state is not None else secrets.token_urlsafe(32)
@property
def gmail_client(self) -> GoogleOAuth2:
if self._gmail_client is None:
self._gmail_client = GoogleOAuth2(
settings.GMAIL_OAUTH_CLIENT_ID,
settings.GMAIL_OAUTH_CLIENT_SECRET,
)
return self._gmail_client
@property
def outlook_client(self) -> MicrosoftGraphOAuth2:
if self._outlook_client is None:
self._outlook_client = MicrosoftGraphOAuth2(
settings.OUTLOOK_OAUTH_CLIENT_ID,
settings.OUTLOOK_OAUTH_CLIENT_SECRET,
)
return self._outlook_client
@property
def oauth_callback_url(self) -> str:
return f"{settings.OAUTH_CALLBACK_BASE_URL if settings.OAUTH_CALLBACK_BASE_URL is not None else settings.PAPERLESS_URL}{settings.BASE_URL}api/oauth/callback/"
@property
def oauth_redirect_url(self) -> str:
return f"{'http://localhost:4200/' if settings.DEBUG else settings.BASE_URL}mail" # e.g. "http://localhost:4200/mail" or "/mail"
def get_gmail_authorization_url(self) -> str:
return asyncio.run(
self.gmail_client.get_authorization_url(
redirect_uri=self.oauth_callback_url,
scope=["https://mail.google.com/"],
extras_params={"prompt": "consent", "access_type": "offline"},
state=self.state,
),
)
def get_outlook_authorization_url(self) -> str:
return asyncio.run(
self.outlook_client.get_authorization_url(
redirect_uri=self.oauth_callback_url,
scope=[
"offline_access",
"https://outlook.office.com/IMAP.AccessAsUser.All",
],
state=self.state,
),
)
def get_gmail_access_token(self, code: str) -> OAuth2Token:
return asyncio.run(
self.gmail_client.get_access_token(
code=code,
redirect_uri=self.oauth_callback_url,
),
)
def get_outlook_access_token(self, code: str) -> OAuth2Token:
return asyncio.run(
self.outlook_client.get_access_token(
code=code,
redirect_uri=self.oauth_callback_url,
),
)
def refresh_account_oauth_token(self, account: MailAccount) -> bool:
"""
Refreshes the oauth token for the given mail account.
"""
logger = logging.getLogger("paperless_mail")
logger.debug(f"Attempting to refresh oauth token for account {account}")
try:
result: OAuth2Token
if account.account_type == MailAccount.MailAccountType.GMAIL_OAUTH:
result = asyncio.run(
self.gmail_client.refresh_token(
refresh_token=account.refresh_token,
),
)
elif account.account_type == MailAccount.MailAccountType.OUTLOOK_OAUTH:
result = asyncio.run(
self.outlook_client.refresh_token(
refresh_token=account.refresh_token,
),
)
account.password = result["access_token"]
account.expiration = timezone.now() + timedelta(
seconds=result["expires_in"],
)
account.save()
logger.debug(f"Successfully refreshed oauth token for account {account}")
return True
except RefreshTokenError as e:
logger.error(f"Failed to refresh oauth token for account {account}: {e}")
return False
def validate_state(self, state: str) -> bool:
return settings.DEBUG or (len(state) > 0 and state == self.state)

View File

@@ -0,0 +1,478 @@
import re
from html import escape
from pathlib import Path
from bleach import clean
from bleach import linkify
from django.conf import settings
from django.utils.timezone import is_naive
from django.utils.timezone import make_aware
from gotenberg_client import GotenbergClient
from gotenberg_client.constants import A4
from gotenberg_client.options import Measurement
from gotenberg_client.options import MeasurementUnitType
from gotenberg_client.options import PageMarginsType
from gotenberg_client.options import PdfAFormat
from humanize import naturalsize
from imap_tools import MailAttachment
from imap_tools import MailMessage
from tika_client import TikaClient
from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.parsers import make_thumbnail_from_pdf
from paperless.models import OutputTypeChoices
from paperless_mail.models import MailRule
class MailDocumentParser(DocumentParser):
"""
This parser uses imap_tools to parse .eml files, generates pdf using
Gotenberg and sends the html part to a Tika server for text extraction.
"""
logging_name = "paperless.parsing.mail"
def _settings_to_gotenberg_pdfa(self) -> PdfAFormat | None:
"""
Converts our requested PDF/A output into the Gotenberg API
format
"""
if settings.OCR_OUTPUT_TYPE in {
OutputTypeChoices.PDF_A,
OutputTypeChoices.PDF_A2,
}:
return PdfAFormat.A2b
elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A1: # pragma: no cover
self.log.warning(
"Gotenberg does not support PDF/A-1a, choosing PDF/A-2b instead",
)
return PdfAFormat.A2b
elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A3: # pragma: no cover
return PdfAFormat.A3b
return None
def get_thumbnail(
self,
document_path: Path,
mime_type: str,
file_name=None,
) -> Path:
if not self.archive_path:
self.archive_path = self.generate_pdf(
self.parse_file_to_message(document_path),
)
return make_thumbnail_from_pdf(
self.archive_path,
self.tempdir,
self.logging_group,
)
def extract_metadata(self, document_path: Path, mime_type: str):
result = []
try:
mail = self.parse_file_to_message(document_path)
except ParseError as e:
self.log.warning(
f"Error while fetching document metadata for {document_path}: {e}",
)
return result
for key, value in mail.headers.items():
value = ", ".join(i for i in value)
try:
value.encode("utf-8")
except UnicodeEncodeError as e: # pragma: no cover
self.log.debug(f"Skipping header {key}: {e}")
continue
result.append(
{
"namespace": "",
"prefix": "header",
"key": key,
"value": value,
},
)
result.append(
{
"namespace": "",
"prefix": "",
"key": "attachments",
"value": ", ".join(
f"{attachment.filename}"
f"({naturalsize(attachment.size, binary=True, format='%.2f')})"
for attachment in mail.attachments
),
},
)
result.append(
{
"namespace": "",
"prefix": "",
"key": "date",
"value": mail.date.strftime("%Y-%m-%d %H:%M:%S %Z"),
},
)
result.sort(key=lambda item: (item["prefix"], item["key"]))
return result
def parse(
self,
document_path: Path,
mime_type: str,
file_name=None,
mailrule_id: int | None = None,
):
"""
Parses the given .eml into formatted text, based on the decoded email.
"""
def strip_text(text: str):
"""
Reduces the spacing of the given text string
"""
text = re.sub(r"\s+", " ", text)
text = re.sub(r"(\n *)+", "\n", text)
return text.strip()
def build_formatted_text(mail_message: MailMessage) -> str:
"""
Constructs a formatted string, based on the given email. Basically tries
to get most of the email content, included front matter, into a nice string
"""
fmt_text = f"Subject: {mail_message.subject}\n\n"
fmt_text += f"From: {mail_message.from_values.full}\n\n"
to_list = [address.full for address in mail_message.to_values]
fmt_text += f"To: {', '.join(to_list)}\n\n"
if mail_message.cc_values:
fmt_text += (
f"CC: {', '.join(address.full for address in mail.cc_values)}\n\n"
)
if mail_message.bcc_values:
fmt_text += (
f"BCC: {', '.join(address.full for address in mail.bcc_values)}\n\n"
)
if mail_message.attachments:
att = []
for a in mail.attachments:
attachment_size = naturalsize(a.size, binary=True, format="%.2f")
att.append(
f"{a.filename} ({attachment_size})",
)
fmt_text += f"Attachments: {', '.join(att)}\n\n"
if mail.html:
fmt_text += "HTML content: " + strip_text(self.tika_parse(mail.html))
fmt_text += f"\n\n{strip_text(mail.text)}"
return fmt_text
self.log.debug(f"Parsing file {document_path.name} into an email")
mail = self.parse_file_to_message(document_path)
self.log.debug("Building formatted text from email")
self.text = build_formatted_text(mail)
if is_naive(mail.date):
self.date = make_aware(mail.date)
else:
self.date = mail.date
self.log.debug("Creating a PDF from the email")
if mailrule_id:
rule = MailRule.objects.get(pk=mailrule_id)
self.archive_path = self.generate_pdf(mail, rule.pdf_layout)
else:
self.archive_path = self.generate_pdf(mail)
@staticmethod
def parse_file_to_message(filepath: Path) -> MailMessage:
"""
Parses the given .eml file into a MailMessage object
"""
try:
with filepath.open("rb") as eml:
parsed = MailMessage.from_bytes(eml.read())
if parsed.from_values is None:
raise ParseError(
f"Could not parse {filepath}: Missing 'from'",
)
except Exception as err:
raise ParseError(
f"Could not parse {filepath}: {err}",
) from err
return parsed
def tika_parse(self, html: str):
self.log.info("Sending content to Tika server")
try:
with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client:
parsed = client.tika.as_text.from_buffer(html, "text/html")
if parsed.content is not None:
return parsed.content.strip()
return ""
except Exception as err:
raise ParseError(
f"Could not parse content with tika server at "
f"{settings.TIKA_ENDPOINT}: {err}",
) from err
def generate_pdf(
self,
mail_message: MailMessage,
pdf_layout: MailRule.PdfLayout | None = None,
) -> Path:
archive_path = Path(self.tempdir) / "merged.pdf"
mail_pdf_file = self.generate_pdf_from_mail(mail_message)
pdf_layout = (
pdf_layout or settings.EMAIL_PARSE_DEFAULT_LAYOUT
) # EMAIL_PARSE_DEFAULT_LAYOUT is a MailRule.PdfLayout
# If no HTML content, create the PDF from the message
# Otherwise, create 2 PDFs and merge them with Gotenberg
if not mail_message.html:
archive_path.write_bytes(mail_pdf_file.read_bytes())
else:
pdf_of_html_content = self.generate_pdf_from_html(
mail_message.html,
mail_message.attachments,
)
self.log.debug("Merging email text and HTML content into single PDF")
with (
GotenbergClient(
host=settings.TIKA_GOTENBERG_ENDPOINT,
timeout=settings.CELERY_TASK_TIME_LIMIT,
) as client,
client.merge.merge() as route,
):
# Configure requested PDF/A formatting, if any
pdf_a_format = self._settings_to_gotenberg_pdfa()
if pdf_a_format is not None:
route.pdf_format(pdf_a_format)
match pdf_layout:
case MailRule.PdfLayout.HTML_TEXT:
route.merge([pdf_of_html_content, mail_pdf_file])
case MailRule.PdfLayout.HTML_ONLY:
route.merge([pdf_of_html_content])
case MailRule.PdfLayout.TEXT_ONLY:
route.merge([mail_pdf_file])
case MailRule.PdfLayout.TEXT_HTML | _:
route.merge([mail_pdf_file, pdf_of_html_content])
try:
response = route.run()
archive_path.write_bytes(response.content)
except Exception as err:
raise ParseError(
f"Error while merging email HTML into PDF: {err}",
) from err
return archive_path
def mail_to_html(self, mail: MailMessage) -> Path:
"""
Converts the given email into an HTML file, formatted
based on the given template
"""
def clean_html(text: str) -> str:
"""
Attempts to clean, escape and linkify the given HTML string
"""
if isinstance(text, list):
text = "\n".join([str(e) for e in text])
if not isinstance(text, str):
text = str(text)
text = escape(text)
text = clean(text)
text = linkify(text, parse_email=True)
text = text.replace("\n", "<br>")
return text
data = {}
data["subject"] = clean_html(mail.subject)
if data["subject"]:
data["subject_label"] = "Subject"
data["from"] = clean_html(mail.from_values.full)
if data["from"]:
data["from_label"] = "From"
data["to"] = clean_html(", ".join(address.full for address in mail.to_values))
if data["to"]:
data["to_label"] = "To"
data["cc"] = clean_html(", ".join(address.full for address in mail.cc_values))
if data["cc"]:
data["cc_label"] = "CC"
data["bcc"] = clean_html(", ".join(address.full for address in mail.bcc_values))
if data["bcc"]:
data["bcc_label"] = "BCC"
att = []
for a in mail.attachments:
att.append(
f"{a.filename} ({naturalsize(a.size, binary=True, format='%.2f')})",
)
data["attachments"] = clean_html(", ".join(att))
if data["attachments"]:
data["attachments_label"] = "Attachments"
data["date"] = clean_html(mail.date.astimezone().strftime("%Y-%m-%d %H:%M"))
data["content"] = clean_html(mail.text.strip())
from django.template.loader import render_to_string
html_file = Path(self.tempdir) / "email_as_html.html"
html_file.write_text(render_to_string("email_msg_template.html", context=data))
return html_file
def generate_pdf_from_mail(self, mail: MailMessage) -> Path:
"""
Creates a PDF based on the given email, using the email's values in a
an HTML template
"""
self.log.info("Converting mail to PDF")
css_file = Path(__file__).parent / "templates" / "output.css"
email_html_file = self.mail_to_html(mail)
with (
GotenbergClient(
host=settings.TIKA_GOTENBERG_ENDPOINT,
timeout=settings.CELERY_TASK_TIME_LIMIT,
) as client,
client.chromium.html_to_pdf() as route,
):
# Configure requested PDF/A formatting, if any
pdf_a_format = self._settings_to_gotenberg_pdfa()
if pdf_a_format is not None:
route.pdf_format(pdf_a_format)
try:
response = (
route.index(email_html_file)
.resource(css_file)
.margins(
PageMarginsType(
top=Measurement(0.1, MeasurementUnitType.Inches),
bottom=Measurement(0.1, MeasurementUnitType.Inches),
left=Measurement(0.1, MeasurementUnitType.Inches),
right=Measurement(0.1, MeasurementUnitType.Inches),
),
)
.size(A4)
.scale(1.0)
.run()
)
except Exception as err:
raise ParseError(
f"Error while converting email to PDF: {err}",
) from err
email_as_pdf_file = Path(self.tempdir) / "email_as_pdf.pdf"
email_as_pdf_file.write_bytes(response.content)
return email_as_pdf_file
def generate_pdf_from_html(
self,
orig_html: str,
attachments: list[MailAttachment],
) -> Path:
"""
Generates a PDF file based on the HTML and attachments of the email
"""
def clean_html_script(text: str):
compiled_open = re.compile(re.escape("<script"), re.IGNORECASE)
text = compiled_open.sub("<div hidden ", text)
compiled_close = re.compile(re.escape("</script"), re.IGNORECASE)
text = compiled_close.sub("</div", text)
return text
self.log.info("Converting message html to PDF")
tempdir = Path(self.tempdir)
html_clean = clean_html_script(orig_html)
html_clean_file = tempdir / "index.html"
html_clean_file.write_text(html_clean)
with (
GotenbergClient(
host=settings.TIKA_GOTENBERG_ENDPOINT,
timeout=settings.CELERY_TASK_TIME_LIMIT,
) as client,
client.chromium.html_to_pdf() as route,
):
# Configure requested PDF/A formatting, if any
pdf_a_format = self._settings_to_gotenberg_pdfa()
if pdf_a_format is not None:
route.pdf_format(pdf_a_format)
# Add attachments as resources, cleaning the filename and replacing
# it in the index file for inclusion
for attachment in attachments:
# Clean the attachment name to be valid
name_cid = f"cid:{attachment.content_id}"
name_clean = "".join(e for e in name_cid if e.isalnum())
# Write attachment payload to a temp file
temp_file = tempdir / name_clean
temp_file.write_bytes(attachment.payload)
route.resource(temp_file)
# Replace as needed the name with the clean name
html_clean = html_clean.replace(name_cid, name_clean)
# Now store the cleaned up HTML version
html_clean_file = tempdir / "index.html"
html_clean_file.write_text(html_clean)
# This is our index file, the main page basically
route.index(html_clean_file)
# Set page size, margins
route.margins(
PageMarginsType(
top=Measurement(0.1, MeasurementUnitType.Inches),
bottom=Measurement(0.1, MeasurementUnitType.Inches),
left=Measurement(0.1, MeasurementUnitType.Inches),
right=Measurement(0.1, MeasurementUnitType.Inches),
),
).size(A4).scale(1.0)
try:
response = route.run()
except Exception as err:
raise ParseError(
f"Error while converting document to PDF: {err}",
) from err
html_pdf = tempdir / "html.pdf"
html_pdf.write_bytes(response.content)
return html_pdf
def get_settings(self):
"""
This parser does not implement additional settings yet
"""
return None

View File

@@ -0,0 +1,103 @@
import abc
from email import message_from_bytes
from email import policy
from email.message import Message
from pathlib import Path
from django.conf import settings
from gnupg import GPG
from imap_tools import MailMessage
from documents.loggers import LoggingMixin
class MailMessagePreprocessor(abc.ABC):
"""
Defines the interface for preprocessors that alter messages before they are handled in MailAccountHandler
"""
NAME: str = "MailMessagePreprocessor"
@staticmethod
@abc.abstractmethod
def able_to_run() -> bool:
"""
Return True if the conditions are met for the preprocessor to run, False otherwise
If False, run(message) will not be called
"""
@abc.abstractmethod
def run(self, message: MailMessage) -> MailMessage:
"""
Performs the actual preprocessing task
"""
class MailMessageDecryptor(MailMessagePreprocessor, LoggingMixin):
logging_name = "paperless_mail_message_decryptor"
NAME = "MailMessageDecryptor"
def __init__(self):
super().__init__()
self.renew_logging_group()
self._gpg = GPG(gnupghome=settings.EMAIL_GNUPG_HOME)
@staticmethod
def able_to_run() -> bool:
if not settings.EMAIL_ENABLE_GPG_DECRYPTOR:
return False
if settings.EMAIL_GNUPG_HOME is None:
return True
return Path(settings.EMAIL_GNUPG_HOME).is_dir()
def run(self, message: MailMessage) -> MailMessage:
if not hasattr(message, "obj"):
self.log.debug("Message does not have 'obj' attribute")
return message
if message.obj.get_content_type() != "multipart/encrypted":
self.log.debug("Message not encrypted. Keep unchanged")
return message
self.log.debug("Message is encrypted.")
email_message = self._to_email_message(message)
decrypted_raw_message = self._gpg.decrypt(email_message.as_string())
if not decrypted_raw_message.ok:
self.log.debug(
f"Message decryption failed with status message "
f"{decrypted_raw_message.status}",
)
raise Exception(
f"Decryption failed: {decrypted_raw_message.status}, {decrypted_raw_message.stderr}",
)
self.log.debug("Message decrypted successfully.")
decrypted_message = self._build_decrypted_message(
decrypted_raw_message,
email_message,
)
return MailMessage(
[(f"UID {message.uid}".encode(), decrypted_message.as_bytes())],
)
@staticmethod
def _to_email_message(message: MailMessage) -> Message:
email_message = message_from_bytes(
message.obj.as_bytes(),
policy=policy.default,
)
return email_message
@staticmethod
def _build_decrypted_message(decrypted_raw_message, email_message):
decrypted_message = message_from_bytes(
decrypted_raw_message.data,
policy=policy.default,
)
for header, value in email_message.items():
if not decrypted_message.get(header):
decrypted_message.add_header(header, value)
return decrypted_message

View File

@@ -0,0 +1,150 @@
from rest_framework import serializers
from documents.serialisers import CorrespondentField
from documents.serialisers import DocumentTypeField
from documents.serialisers import OwnedObjectSerializer
from documents.serialisers import TagsField
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
from paperless_mail.models import ProcessedMail
class ObfuscatedPasswordField(serializers.CharField):
"""
Sends *** string instead of password in the clear
"""
def to_representation(self, value) -> str:
return "*" * max(10, len(value))
def to_internal_value(self, data):
return data
class MailAccountSerializer(OwnedObjectSerializer):
password = ObfuscatedPasswordField()
class Meta:
model = MailAccount
fields = [
"id",
"name",
"imap_server",
"imap_port",
"imap_security",
"username",
"password",
"character_set",
"is_token",
"owner",
"user_can_change",
"permissions",
"set_permissions",
"account_type",
"expiration",
]
def update(self, instance, validated_data):
if (
"password" in validated_data
and len(validated_data.get("password").replace("*", "")) == 0
):
validated_data.pop("password")
super().update(instance, validated_data)
return instance
class AccountField(serializers.PrimaryKeyRelatedField):
def get_queryset(self):
return MailAccount.objects.all().order_by("-id")
class MailRuleSerializer(OwnedObjectSerializer):
account = AccountField(required=True)
action_parameter = serializers.CharField(
allow_null=True,
required=False,
default="",
)
assign_correspondent = CorrespondentField(allow_null=True, required=False)
assign_tags = TagsField(many=True, allow_null=True, required=False)
assign_document_type = DocumentTypeField(allow_null=True, required=False)
order = serializers.IntegerField(required=False)
class Meta:
model = MailRule
fields = [
"id",
"name",
"account",
"enabled",
"folder",
"filter_from",
"filter_to",
"filter_subject",
"filter_body",
"filter_attachment_filename_include",
"filter_attachment_filename_exclude",
"maximum_age",
"action",
"action_parameter",
"assign_title_from",
"assign_tags",
"assign_correspondent_from",
"assign_correspondent",
"assign_document_type",
"assign_owner_from_rule",
"order",
"attachment_type",
"consumption_scope",
"pdf_layout",
"owner",
"user_can_change",
"permissions",
"set_permissions",
]
def update(self, instance, validated_data):
super().update(instance, validated_data)
return instance
def create(self, validated_data):
assign_tags = validated_data.pop("assign_tags", [])
mail_rule = super().create(validated_data)
if assign_tags:
mail_rule.assign_tags.set(assign_tags)
return mail_rule
def validate(self, attrs):
action = attrs.get("action")
action_parameter = attrs.get("action_parameter")
if (
action in [MailRule.MailAction.TAG, MailRule.MailAction.MOVE]
and not action_parameter
):
raise serializers.ValidationError("An action parameter is required.")
return attrs
def validate_maximum_age(self, value):
if value > 36500: # ~100 years
raise serializers.ValidationError("Maximum mail age is unreasonably large.")
return value
class ProcessedMailSerializer(OwnedObjectSerializer):
class Meta:
model = ProcessedMail
fields = [
"id",
"owner",
"rule",
"folder",
"uid",
"subject",
"received",
"processed",
"status",
"error",
]

View File

@@ -0,0 +1,14 @@
def get_parser(*args, **kwargs):
from paperless_mail.parsers import MailDocumentParser
return MailDocumentParser(*args, **kwargs)
def mail_consumer_declaration(sender, **kwargs):
return {
"parser": get_parser,
"weight": 20,
"mime_types": {
"message/rfc822": ".eml",
},
}

View File

@@ -0,0 +1,33 @@
import logging
from celery import shared_task
from paperless_mail.mail import MailAccountHandler
from paperless_mail.mail import MailError
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
logger = logging.getLogger("paperless.mail.tasks")
@shared_task
def process_mail_accounts(account_ids: list[int] | None = None) -> str:
total_new_documents = 0
accounts = (
MailAccount.objects.filter(pk__in=account_ids)
if account_ids
else MailAccount.objects.all()
)
for account in accounts:
if not MailRule.objects.filter(account=account, enabled=True).exists():
logger.info(f"No rules enabled for account {account}. Skipping.")
continue
try:
total_new_documents += MailAccountHandler().handle_mail_account(account)
except MailError:
logger.exception(f"Error while processing mail account {account}")
if total_new_documents > 0:
return f"Added {total_new_documents} document(s)."
else:
return "No new documents were added."

View File

@@ -0,0 +1,48 @@
{% autoescape off %}
<!doctype html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link href="output.css" rel="stylesheet">
</head>
<body class="bg-white w-screen flex flex-col items-center">
<div class="container max-w-4xl">
<!-- Header -->
<div class="grid gap-x-2 bg-slate-200 p-4">
<div class="col-start-9 col-span-4 row-start-1 text-right">{{ date }}</div>
<div class="col-start-1 row-start-1 text-slate-400 text-right">{{ from_label }}</div>
<div class="col-start-2 col-span-7 row-start-1">{{ from }}</div>
<div class="col-start-1 row-start-2 text-slate-400 text-right">{{ subject_label }}</div>
<div class=" col-start-2 col-span-10 row-start-2 font-bold">{{ subject }}</div>
<div class="col-start-1 row-start-3 text-slate-400 text-right">{{ to_label }}</div>
<div class="col-start-2 col-span-10 row-start-3 text-sm my-0.5">{{ to }}</div>
<div class="col-start-1 row-start-4 text-slate-400 text-right">{{ cc_label }}</div>
<div class="col-start-2 col-span-10 row-start-4 text-sm my-0.5">{{ cc }}</div>
<div class="col-start-1 row-start-5 text-slate-400 text-right">{{ bcc_label }}</div>
<div class="col-start-2 col-span-10 row-start-5" text-sm my-0.5>{{ bcc }}</div>
<div class="col-start-1 row-start-6 text-slate-400 text-right">{{ attachments_label }}</div>
<div class="col-start-2 col-span-10 row-start-6">{{ attachments }}</div>
</div>
<!-- Separator-->
<div class="border-t border-solid border-b w-full h-[1px] box-content border-black mb-5 bg-slate-200"></div>
<!-- Content-->
<div class="w-full break-words">{{ content }}</div>
</div>
</body>
</html>
{% endautoescape %}

View File

@@ -0,0 +1,3 @@
@tailwind base;
@tailwind components;
@tailwind utilities;

View File

@@ -0,0 +1,706 @@
/*
! tailwindcss v3.0.24 | MIT License | https://tailwindcss.com
*/
/*
1. Prevent padding and border from affecting element width. (https://github.com/mozdevs/cssremedy/issues/4)
2. Allow adding a border to an element by just adding a border-width. (https://github.com/tailwindcss/tailwindcss/pull/116)
*/
*,
::before,
::after {
box-sizing: border-box;
/* 1 */
border-width: 0;
/* 2 */
border-style: solid;
/* 2 */
border-color: #e5e7eb;
/* 2 */
}
::before,
::after {
--tw-content: '';
}
/*
1. Use a consistent sensible line-height in all browsers.
2. Prevent adjustments of font size after orientation changes in iOS.
3. Use a more readable tab size.
4. Use the user's configured `sans` font-family by default.
*/
html {
line-height: 1.5;
/* 1 */
-webkit-text-size-adjust: 100%;
/* 2 */
-moz-tab-size: 4;
/* 3 */
-o-tab-size: 4;
tab-size: 4;
/* 3 */
font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
/* 4 */
}
/*
1. Remove the margin in all browsers.
2. Inherit line-height from `html` so users can set them as a class directly on the `html` element.
*/
body {
margin: 0;
/* 1 */
line-height: inherit;
/* 2 */
}
/*
1. Add the correct height in Firefox.
2. Correct the inheritance of border color in Firefox. (https://bugzilla.mozilla.org/show_bug.cgi?id=190655)
3. Ensure horizontal rules are visible by default.
*/
hr {
height: 0;
/* 1 */
color: inherit;
/* 2 */
border-top-width: 1px;
/* 3 */
}
/*
Add the correct text decoration in Chrome, Edge, and Safari.
*/
abbr:where([title]) {
-webkit-text-decoration: underline dotted;
text-decoration: underline dotted;
}
/*
Remove the default font size and weight for headings.
*/
h1,
h2,
h3,
h4,
h5,
h6 {
font-size: inherit;
font-weight: inherit;
}
/*
Reset links to optimize for opt-in styling instead of opt-out.
*/
a {
color: inherit;
text-decoration: inherit;
}
/*
Add the correct font weight in Edge and Safari.
*/
b,
strong {
font-weight: bolder;
}
/*
1. Use the user's configured `mono` font family by default.
2. Correct the odd `em` font sizing in all browsers.
*/
code,
kbd,
samp,
pre {
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
/* 1 */
font-size: 1em;
/* 2 */
}
/*
Add the correct font size in all browsers.
*/
small {
font-size: 80%;
}
/*
Prevent `sub` and `sup` elements from affecting the line height in all browsers.
*/
sub,
sup {
font-size: 75%;
line-height: 0;
position: relative;
vertical-align: baseline;
}
sub {
bottom: -0.25em;
}
sup {
top: -0.5em;
}
/*
1. Remove text indentation from table contents in Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=999088, https://bugs.webkit.org/show_bug.cgi?id=201297)
2. Correct table border color inheritance in all Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=935729, https://bugs.webkit.org/show_bug.cgi?id=195016)
3. Remove gaps between table borders by default.
*/
table {
text-indent: 0;
/* 1 */
border-color: inherit;
/* 2 */
border-collapse: collapse;
/* 3 */
}
/*
1. Change the font styles in all browsers.
2. Remove the margin in Firefox and Safari.
3. Remove default padding in all browsers.
*/
button,
input,
optgroup,
select,
textarea {
font-family: inherit;
/* 1 */
font-size: 100%;
/* 1 */
line-height: inherit;
/* 1 */
color: inherit;
/* 1 */
margin: 0;
/* 2 */
padding: 0;
/* 3 */
}
/*
Remove the inheritance of text transform in Edge and Firefox.
*/
button,
select {
text-transform: none;
}
/*
1. Correct the inability to style clickable types in iOS and Safari.
2. Remove default button styles.
*/
button,
[type='button'],
[type='reset'],
[type='submit'] {
-webkit-appearance: button;
/* 1 */
background-color: transparent;
/* 2 */
background-image: none;
/* 2 */
}
/*
Use the modern Firefox focus style for all focusable elements.
*/
:-moz-focusring {
outline: auto;
}
/*
Remove the additional `:invalid` styles in Firefox. (https://github.com/mozilla/gecko-dev/blob/2f9eacd9d3d995c937b4251a5557d95d494c9be1/layout/style/res/forms.css#L728-L737)
*/
:-moz-ui-invalid {
box-shadow: none;
}
/*
Add the correct vertical alignment in Chrome and Firefox.
*/
progress {
vertical-align: baseline;
}
/*
Correct the cursor style of increment and decrement buttons in Safari.
*/
::-webkit-inner-spin-button,
::-webkit-outer-spin-button {
height: auto;
}
/*
1. Correct the odd appearance in Chrome and Safari.
2. Correct the outline style in Safari.
*/
[type='search'] {
-webkit-appearance: textfield;
/* 1 */
outline-offset: -2px;
/* 2 */
}
/*
Remove the inner padding in Chrome and Safari on macOS.
*/
::-webkit-search-decoration {
-webkit-appearance: none;
}
/*
1. Correct the inability to style clickable types in iOS and Safari.
2. Change font properties to `inherit` in Safari.
*/
::-webkit-file-upload-button {
-webkit-appearance: button;
/* 1 */
font: inherit;
/* 2 */
}
/*
Add the correct display in Chrome and Safari.
*/
summary {
display: list-item;
}
/*
Removes the default spacing and border for appropriate elements.
*/
blockquote,
dl,
dd,
h1,
h2,
h3,
h4,
h5,
h6,
hr,
figure,
p,
pre {
margin: 0;
}
fieldset {
margin: 0;
padding: 0;
}
legend {
padding: 0;
}
ol,
ul,
menu {
list-style: none;
margin: 0;
padding: 0;
}
/*
Prevent resizing textareas horizontally by default.
*/
textarea {
resize: vertical;
}
/*
1. Reset the default placeholder opacity in Firefox. (https://github.com/tailwindlabs/tailwindcss/issues/3300)
2. Set the default placeholder color to the user's configured gray 400 color.
*/
input::-moz-placeholder, textarea::-moz-placeholder {
opacity: 1;
/* 1 */
color: #9ca3af;
/* 2 */
}
input:-ms-input-placeholder, textarea:-ms-input-placeholder {
opacity: 1;
/* 1 */
color: #9ca3af;
/* 2 */
}
input::placeholder,
textarea::placeholder {
opacity: 1;
/* 1 */
color: #9ca3af;
/* 2 */
}
/*
Set the default cursor for buttons.
*/
button,
[role="button"] {
cursor: pointer;
}
/*
Make sure disabled buttons don't get the pointer cursor.
*/
:disabled {
cursor: default;
}
/*
1. Make replaced elements `display: block` by default. (https://github.com/mozdevs/cssremedy/issues/14)
2. Add `vertical-align: middle` to align replaced elements more sensibly by default. (https://github.com/jensimmons/cssremedy/issues/14#issuecomment-634934210)
This can trigger a poorly considered lint error in some tools but is included by design.
*/
img,
svg,
video,
canvas,
audio,
iframe,
embed,
object {
display: block;
/* 1 */
vertical-align: middle;
/* 2 */
}
/*
Constrain images and videos to the parent width and preserve their intrinsic aspect ratio. (https://github.com/mozdevs/cssremedy/issues/14)
*/
img,
video {
max-width: 100%;
height: auto;
}
/*
Ensure the default browser behavior of the `hidden` attribute.
*/
[hidden] {
display: none;
}
*, ::before, ::after {
--tw-translate-x: 0;
--tw-translate-y: 0;
--tw-rotate: 0;
--tw-skew-x: 0;
--tw-skew-y: 0;
--tw-scale-x: 1;
--tw-scale-y: 1;
--tw-pan-x: ;
--tw-pan-y: ;
--tw-pinch-zoom: ;
--tw-scroll-snap-strictness: proximity;
--tw-ordinal: ;
--tw-slashed-zero: ;
--tw-numeric-figure: ;
--tw-numeric-spacing: ;
--tw-numeric-fraction: ;
--tw-ring-inset: ;
--tw-ring-offset-width: 0px;
--tw-ring-offset-color: #fff;
--tw-ring-color: rgb(59 130 246 / 0.5);
--tw-ring-offset-shadow: 0 0 #0000;
--tw-ring-shadow: 0 0 #0000;
--tw-shadow: 0 0 #0000;
--tw-shadow-colored: 0 0 #0000;
--tw-blur: ;
--tw-brightness: ;
--tw-contrast: ;
--tw-grayscale: ;
--tw-hue-rotate: ;
--tw-invert: ;
--tw-saturate: ;
--tw-sepia: ;
--tw-drop-shadow: ;
--tw-backdrop-blur: ;
--tw-backdrop-brightness: ;
--tw-backdrop-contrast: ;
--tw-backdrop-grayscale: ;
--tw-backdrop-hue-rotate: ;
--tw-backdrop-invert: ;
--tw-backdrop-opacity: ;
--tw-backdrop-saturate: ;
--tw-backdrop-sepia: ;
}
.container {
width: 100%;
}
@media (min-width: 640px) {
.container {
max-width: 640px;
}
}
@media (min-width: 768px) {
.container {
max-width: 768px;
}
}
@media (min-width: 1024px) {
.container {
max-width: 1024px;
}
}
@media (min-width: 1280px) {
.container {
max-width: 1280px;
}
}
@media (min-width: 1536px) {
.container {
max-width: 1536px;
}
}
.col-span-2 {
grid-column: span 2 / span 2;
}
.col-span-8 {
grid-column: span 8 / span 8;
}
.col-span-10 {
grid-column: span 10 / span 10;
}
.col-span-3 {
grid-column: span 3 / span 3;
}
.col-span-4 {
grid-column: span 4 / span 4;
}
.col-span-7 {
grid-column: span 7 / span 7;
}
.col-start-11 {
grid-column-start: 11;
}
.col-start-1 {
grid-column-start: 1;
}
.col-start-2 {
grid-column-start: 2;
}
.col-start-10 {
grid-column-start: 10;
}
.col-start-9 {
grid-column-start: 9;
}
.row-start-1 {
grid-row-start: 1;
}
.row-start-2 {
grid-row-start: 2;
}
.row-start-3 {
grid-row-start: 3;
}
.row-start-4 {
grid-row-start: 4;
}
.row-start-5 {
grid-row-start: 5;
}
.row-start-6 {
grid-row-start: 6;
}
.my-1 {
margin-top: 0.25rem;
margin-bottom: 0.25rem;
}
.my-0\.5 {
margin-top: 0.125rem;
margin-bottom: 0.125rem;
}
.my-0 {
margin-top: 0px;
margin-bottom: 0px;
}
.mb-5 {
margin-bottom: 1.25rem;
}
.box-content {
box-sizing: content-box;
}
.flex {
display: flex;
}
.grid {
display: grid;
}
.h-\[1px\] {
height: 1px;
}
.w-screen {
width: 100vw;
}
.w-full {
width: 100%;
}
.max-w-4xl {
max-width: 56rem;
}
.grid-cols-12 {
grid-template-columns: repeat(12, minmax(0, 1fr));
}
.grid-rows-5 {
grid-template-rows: repeat(5, minmax(0, 1fr));
}
.flex-col {
flex-direction: column;
}
.items-center {
align-items: center;
}
.gap-x-2 {
-moz-column-gap: 0.5rem;
column-gap: 0.5rem;
}
.whitespace-pre-line {
white-space: pre-line;
}
.break-words {
overflow-wrap: break-word;
}
.border-t {
border-top-width: 1px;
}
.border-b {
border-bottom-width: 1px;
}
.border-solid {
border-style: solid;
}
.border-black {
--tw-border-opacity: 1;
border-color: rgb(0 0 0 / var(--tw-border-opacity));
}
.bg-white {
--tw-bg-opacity: 1;
background-color: rgb(255 255 255 / var(--tw-bg-opacity));
}
.bg-slate-200 {
--tw-bg-opacity: 1;
background-color: rgb(226 232 240 / var(--tw-bg-opacity));
}
.p-4 {
padding: 1rem;
}
.text-right {
text-align: right;
}
.text-sm {
font-size: 0.875rem;
line-height: 1.25rem;
}
.font-bold {
font-weight: 700;
}
.text-slate-400 {
--tw-text-opacity: 1;
color: rgb(148 163 184 / var(--tw-text-opacity));
}
.text-blue-600 {
--tw-text-opacity: 1;
color: rgb(37 99 235 / var(--tw-text-opacity));
}
.underline {
-webkit-text-decoration-line: underline;
text-decoration-line: underline;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,5 @@
{
"devDependencies": {
"tailwindcss": "^3.4.4"
}
}

View File

@@ -0,0 +1,7 @@
module.exports = {
content: ['./*.html'],
theme: {
extend: {},
},
plugins: [],
}

View File

View File

@@ -0,0 +1,89 @@
import os
from collections.abc import Generator
from pathlib import Path
import pytest
from paperless_mail.mail import MailAccountHandler
from paperless_mail.models import MailAccount
from paperless_mail.parsers import MailDocumentParser
@pytest.fixture(scope="session")
def sample_dir() -> Path:
return (Path(__file__).parent / Path("samples")).resolve()
@pytest.fixture(scope="session")
def broken_email_file(sample_dir: Path) -> Path:
return sample_dir / "broken.eml"
@pytest.fixture(scope="session")
def simple_txt_email_file(sample_dir: Path) -> Path:
return sample_dir / "simple_text.eml"
@pytest.fixture(scope="session")
def simple_txt_email_pdf_file(sample_dir: Path) -> Path:
return sample_dir / "simple_text.eml.pdf"
@pytest.fixture(scope="session")
def simple_txt_email_thumbnail_file(sample_dir: Path) -> Path:
return sample_dir / "simple_text.eml.pdf.webp"
@pytest.fixture(scope="session")
def html_email_file(sample_dir: Path) -> Path:
return sample_dir / "html.eml"
@pytest.fixture(scope="session")
def html_email_pdf_file(sample_dir: Path) -> Path:
return sample_dir / "html.eml.pdf"
@pytest.fixture(scope="session")
def html_email_thumbnail_file(sample_dir: Path) -> Path:
return sample_dir / "html.eml.pdf.webp"
@pytest.fixture(scope="session")
def html_email_html_file(sample_dir: Path) -> Path:
return sample_dir / "html.eml.html"
@pytest.fixture(scope="session")
def merged_pdf_first(sample_dir: Path) -> Path:
return sample_dir / "first.pdf"
@pytest.fixture(scope="session")
def merged_pdf_second(sample_dir: Path) -> Path:
return sample_dir / "second.pdf"
@pytest.fixture()
def mail_parser() -> MailDocumentParser:
return MailDocumentParser(logging_group=None)
@pytest.fixture()
def live_mail_account() -> Generator[MailAccount, None, None]:
try:
account = MailAccount.objects.create(
name="test",
imap_server=os.environ["PAPERLESS_MAIL_TEST_HOST"],
username=os.environ["PAPERLESS_MAIL_TEST_USER"],
password=os.environ["PAPERLESS_MAIL_TEST_PASSWD"],
imap_port=993,
)
yield account
finally:
account.delete()
@pytest.fixture()
def mail_account_handler() -> MailAccountHandler:
return MailAccountHandler()

View File

@@ -0,0 +1 @@
This is not a valid eml.

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,45 @@
<!doctype html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link href="output.css" rel="stylesheet">
</head>
<body class="bg-white w-screen flex flex-col items-center">
<div class="container max-w-4xl">
<!-- Header -->
<div class="grid gap-x-2 bg-slate-200 p-4">
<div class="col-start-9 col-span-4 row-start-1 text-right">2022-10-15 09:23</div>
<div class="col-start-1 row-start-1 text-slate-400 text-right">From</div>
<div class="col-start-2 col-span-7 row-start-1">Name &lt;<a href="mailto:someone@example.de">someone@example.de</a>&gt;</div>
<div class="col-start-1 row-start-2 text-slate-400 text-right">Subject</div>
<div class=" col-start-2 col-span-10 row-start-2 font-bold">HTML Message</div>
<div class="col-start-1 row-start-3 text-slate-400 text-right">To</div>
<div class="col-start-2 col-span-10 row-start-3 text-sm my-0.5"><a href="mailto:someone@example.de">someone@example.de</a></div>
<div class="col-start-1 row-start-4 text-slate-400 text-right"></div>
<div class="col-start-2 col-span-10 row-start-4 text-sm my-0.5"></div>
<div class="col-start-1 row-start-5 text-slate-400 text-right"></div>
<div class="col-start-2 col-span-10 row-start-5" text-sm my-0.5></div>
<div class="col-start-1 row-start-6 text-slate-400 text-right">Attachments</div>
<div class="col-start-2 col-span-10 row-start-6">IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (600.24 KiB)</div>
</div>
<!-- Separator-->
<div class="border-t border-solid border-b w-full h-[1px] box-content border-black mb-5 bg-slate-200"></div>
<!-- Content-->
<div class="w-full break-words">Some Text<br><br>and an embedded image.</div>
</div>
</body>
</html>

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.0 KiB

View File

@@ -0,0 +1,19 @@
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
</head>
<body>
<p>Some Text</p>
<p>
<img src="cid:part1.pNdUSz0s.D3NqVtPg@example.de" alt="Has to be rewritten to work..">
<img src="https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png" alt="This image should not be shown.">
</p>
<p>and an embedded image.<br>
</p>
<p id="changeme">Paragraph unchanged.</p>
<scRipt>
document.getElementById("changeme").innerHTML = "Paragraph changed via Java Script.";
</script>
</body>
</html>

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.9 KiB

Binary file not shown.

View File

@@ -0,0 +1,25 @@
Return-Path: <mail@someserver.de>
Delivered-To: mail@someserver.de
Received: from mail.someserver.org ([::1])
by e1acdba3bd07 with LMTP
id KBKZGD2YR2NTCgQAjubtDA
(envelope-from <mail@someserver.de>)
for <mail@someserver.de>; Wed, 10 Oct 2022 11:40:46 +0200
Received: from [127.0.0.1] (localhost [127.0.0.1]) by localhost (Mailerdaemon) with ESMTPSA id 2BC9064C1616
for <some@one.de>; Wed, 12 Oct 2022 21:40:46 +0200 (CEST)
Message-ID: <6e99e34d-e20a-80c4-ea61-d8234b612be9@someserver.de>
Date: Wed, 12 Oct 2022 21:40:43 +0200
MIME-Version: 1.0
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101
Thunderbird/102.3.1
Content-Language: en-US
To: some@one.de
Cc: asdasd@æsdasd.de, asdadasdasdasda.asdasd@æsdasd.de
Bcc: fdf@fvf.de
From: Some One <mail@someserver.de>
Content-Type: text/plain; charset=UTF-8; format=flowed
Content-Transfer-Encoding: 7bit
X-Last-TLS-Session-Version: TLSv1.3
Subject: Simple Text Mail
This is just a simple Text Mail.

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.2 KiB

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,61 @@
import os
import warnings
import pytest
from paperless_mail.mail import MailAccountHandler
from paperless_mail.mail import MailError
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
# Only run if the environment is setup
# And the environment is not empty (forks, I think)
@pytest.mark.skipif(
"PAPERLESS_MAIL_TEST_HOST" not in os.environ
or not len(os.environ["PAPERLESS_MAIL_TEST_HOST"]),
reason="Live server testing not enabled",
)
@pytest.mark.django_db()
class TestMailLiveServer:
def test_process_non_gmail_server_flag(
self,
mail_account_handler: MailAccountHandler,
live_mail_account: MailAccount,
):
try:
rule1 = MailRule.objects.create(
name="testrule",
account=live_mail_account,
action=MailRule.MailAction.FLAG,
)
mail_account_handler.handle_mail_account(live_mail_account)
rule1.delete()
except MailError as e:
pytest.fail(f"Failure: {e}")
except Exception as e:
warnings.warn(f"Unhandled exception: {e}")
def test_process_non_gmail_server_tag(
self,
mail_account_handler: MailAccountHandler,
live_mail_account: MailAccount,
):
try:
rule2 = MailRule.objects.create(
name="testrule",
account=live_mail_account,
action=MailRule.MailAction.TAG,
)
mail_account_handler.handle_mail_account(live_mail_account)
rule2.delete()
except MailError as e:
pytest.fail(f"Failure: {e}")
except Exception as e:
warnings.warn(f"Unhandled exception: {e}")

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,375 @@
from datetime import timedelta
from unittest import mock
from django.conf import settings
from django.contrib.auth.models import Permission
from django.contrib.auth.models import User
from django.test import TestCase
from django.test import override_settings
from django.utils import timezone
from httpx_oauth.oauth2 import GetAccessTokenError
from httpx_oauth.oauth2 import RefreshTokenError
from rest_framework import status
from paperless_mail.mail import MailAccountHandler
from paperless_mail.models import MailAccount
from paperless_mail.oauth import PaperlessMailOAuth2Manager
class TestMailOAuth(
TestCase,
):
def setUp(self) -> None:
self.user = User.objects.create_user("testuser")
self.user.user_permissions.add(
*Permission.objects.filter(
codename__in=[
"add_mailaccount",
],
),
)
self.user.save()
self.client.force_login(self.user)
self.mail_account_handler = MailAccountHandler()
# Mock settings
settings.OAUTH_CALLBACK_BASE_URL = "http://localhost:8000"
settings.GMAIL_OAUTH_CLIENT_ID = "test_gmail_client_id"
settings.GMAIL_OAUTH_CLIENT_SECRET = "test_gmail_client_secret"
settings.OUTLOOK_OAUTH_CLIENT_ID = "test_outlook_client_id"
settings.OUTLOOK_OAUTH_CLIENT_SECRET = "test_outlook_client_secret"
super().setUp()
def test_generate_paths(self):
"""
GIVEN:
- Mocked settings for OAuth callback and base URLs
WHEN:
- get_oauth_callback_url and get_oauth_redirect_url are called
THEN:
- Correct URLs are generated
"""
# Callback URL
oauth_manager = PaperlessMailOAuth2Manager()
with override_settings(OAUTH_CALLBACK_BASE_URL="http://paperless.example.com"):
self.assertEqual(
oauth_manager.oauth_callback_url,
"http://paperless.example.com/api/oauth/callback/",
)
with override_settings(
OAUTH_CALLBACK_BASE_URL=None,
PAPERLESS_URL="http://paperless.example.com",
):
self.assertEqual(
oauth_manager.oauth_callback_url,
"http://paperless.example.com/api/oauth/callback/",
)
with override_settings(
OAUTH_CALLBACK_BASE_URL=None,
PAPERLESS_URL="http://paperless.example.com",
BASE_URL="/paperless/",
):
self.assertEqual(
oauth_manager.oauth_callback_url,
"http://paperless.example.com/paperless/api/oauth/callback/",
)
# Redirect URL
with override_settings(DEBUG=True):
self.assertEqual(
oauth_manager.oauth_redirect_url,
"http://localhost:4200/mail",
)
with override_settings(DEBUG=False):
self.assertEqual(
oauth_manager.oauth_redirect_url,
"/mail",
)
@mock.patch(
"paperless_mail.oauth.PaperlessMailOAuth2Manager.get_gmail_access_token",
)
@mock.patch(
"paperless_mail.oauth.PaperlessMailOAuth2Manager.get_outlook_access_token",
)
def test_oauth_callback_view_success(
self,
mock_get_outlook_access_token,
mock_get_gmail_access_token,
):
"""
GIVEN:
- Mocked settings for Gmail and Outlook OAuth client IDs and secrets
WHEN:
- OAuth callback is called with a code and scope
- OAuth callback is called with a code and no scope
THEN:
- Gmail mail account is created
- Outlook mail account is created
"""
mock_get_gmail_access_token.return_value = {
"access_token": "test_access_token",
"refresh_token": "test_refresh_token",
"expires_in": 3600,
}
mock_get_outlook_access_token.return_value = {
"access_token": "test_access_token",
"refresh_token": "test_refresh_token",
"expires_in": 3600,
}
session = self.client.session
session.update(
{
"oauth_state": "test_state",
},
)
session.save()
# Test Google OAuth callback
response = self.client.get(
"/api/oauth/callback/?code=test_code&scope=https://mail.google.com/&state=test_state",
)
self.assertEqual(response.status_code, status.HTTP_302_FOUND)
self.assertIn("oauth_success=1", response.url)
mock_get_gmail_access_token.assert_called_once()
self.assertTrue(
MailAccount.objects.filter(imap_server="imap.gmail.com").exists(),
)
# Test Outlook OAuth callback
response = self.client.get(
"/api/oauth/callback/?code=test_code&state=test_state",
)
self.assertEqual(response.status_code, status.HTTP_302_FOUND)
self.assertIn("oauth_success=1", response.url)
self.assertTrue(
MailAccount.objects.filter(imap_server="outlook.office365.com").exists(),
)
@mock.patch("httpx_oauth.oauth2.BaseOAuth2.get_access_token")
def test_oauth_callback_view_fails(self, mock_get_access_token):
"""
GIVEN:
- Mocked settings for Gmail and Outlook OAuth client IDs and secrets
WHEN:
- OAuth callback is called and get access token returns an error
THEN:
- No mail account is created
- Error is logged
"""
mock_get_access_token.side_effect = GetAccessTokenError("test_error")
session = self.client.session
session.update(
{
"oauth_state": "test_state",
},
)
session.save()
with self.assertLogs("paperless_mail", level="ERROR") as cm:
# Test Google OAuth callback
response = self.client.get(
"/api/oauth/callback/?code=test_code&scope=https://mail.google.com/&state=test_state",
)
self.assertEqual(response.status_code, status.HTTP_302_FOUND)
self.assertIn("oauth_success=0", response.url)
self.assertFalse(
MailAccount.objects.filter(imap_server="imap.gmail.com").exists(),
)
# Test Outlook OAuth callback
response = self.client.get(
"/api/oauth/callback/?code=test_code&state=test_state",
)
self.assertEqual(response.status_code, status.HTTP_302_FOUND)
self.assertIn("oauth_success=0", response.url)
self.assertFalse(
MailAccount.objects.filter(
imap_server="outlook.office365.com",
).exists(),
)
self.assertIn("Error getting access token: test_error", cm.output[0])
def test_oauth_callback_view_insufficient_permissions(self):
"""
GIVEN:
- Mocked settings for Gmail and Outlook OAuth client IDs and secrets
- User without add_mailaccount permission
WHEN:
- OAuth callback is called
THEN:
- 400 bad request returned, no mail accounts are created
"""
self.user.user_permissions.remove(
*Permission.objects.filter(
codename__in=[
"add_mailaccount",
],
),
)
self.user.save()
response = self.client.get(
"/api/oauth/callback/?code=test_code&scope=https://mail.google.com/",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertFalse(
MailAccount.objects.filter(imap_server="imap.gmail.com").exists(),
)
self.assertFalse(
MailAccount.objects.filter(imap_server="outlook.office365.com").exists(),
)
def test_oauth_callback_view_no_code(self):
"""
GIVEN:
- Mocked settings for Gmail and Outlook OAuth client IDs and secrets
WHEN:
- OAuth callback is called without a code
THEN:
- 400 bad request returned, no mail accounts are created
"""
response = self.client.get(
"/api/oauth/callback/",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertFalse(
MailAccount.objects.filter(imap_server="imap.gmail.com").exists(),
)
self.assertFalse(
MailAccount.objects.filter(imap_server="outlook.office365.com").exists(),
)
def test_oauth_callback_view_invalid_state(self):
"""
GIVEN:
- Mocked settings for Gmail and Outlook OAuth client IDs and secrets
WHEN:
- OAuth callback is called with an invalid state
THEN:
- 400 bad request returned, no mail accounts are created
"""
response = self.client.get(
"/api/oauth/callback/?code=test_code&state=invalid_state",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertFalse(
MailAccount.objects.filter(imap_server="imap.gmail.com").exists(),
)
self.assertFalse(
MailAccount.objects.filter(imap_server="outlook.office365.com").exists(),
)
@mock.patch("paperless_mail.mail.get_mailbox")
@mock.patch(
"httpx_oauth.oauth2.BaseOAuth2.refresh_token",
)
def test_refresh_token_on_handle_mail_account(
self,
mock_refresh_token,
mock_get_mailbox,
):
"""
GIVEN:
- Mail account with refresh token and expiration
WHEN:
- handle_mail_account is called
THEN:
- Refresh token is called
"""
mock_mailbox = mock.MagicMock()
mock_get_mailbox.return_value.__enter__.return_value = mock_mailbox
mail_account = MailAccount.objects.create(
name="Test Gmail Mail Account",
username="test_username",
imap_security=MailAccount.ImapSecurity.SSL,
imap_port=993,
account_type=MailAccount.MailAccountType.GMAIL_OAUTH,
is_token=True,
refresh_token="test_refresh_token",
expiration=timezone.now() - timedelta(days=1),
)
mock_refresh_token.return_value = {
"access_token": "test_access_token",
"refresh_token": "test_refresh_token",
"expires_in": 3600,
}
self.mail_account_handler.handle_mail_account(mail_account)
mock_refresh_token.assert_called_once()
mock_refresh_token.reset_mock()
mock_refresh_token.return_value = {
"access_token": "test_access_token",
"refresh_token": "test_refresh",
"expires_in": 3600,
}
outlook_mail_account = MailAccount.objects.create(
name="Test Outlook Mail Account",
username="test_username",
imap_security=MailAccount.ImapSecurity.SSL,
imap_port=993,
account_type=MailAccount.MailAccountType.OUTLOOK_OAUTH,
is_token=True,
refresh_token="test_refresh_token",
expiration=timezone.now() - timedelta(days=1),
)
self.mail_account_handler.handle_mail_account(outlook_mail_account)
mock_refresh_token.assert_called_once()
@mock.patch("paperless_mail.mail.get_mailbox")
@mock.patch(
"httpx_oauth.oauth2.BaseOAuth2.refresh_token",
)
def test_refresh_token_on_handle_mail_account_fails(
self,
mock_refresh_token,
mock_get_mailbox,
):
"""
GIVEN:
- Mail account with refresh token and expiration
WHEN:
- handle_mail_account is called
- Refresh token is called but fails
THEN:
- Error is logged
- 0 processed mails is returned
"""
mock_mailbox = mock.MagicMock()
mock_get_mailbox.return_value.__enter__.return_value = mock_mailbox
mail_account = MailAccount.objects.create(
name="Test Gmail Mail Account",
username="test_username",
imap_security=MailAccount.ImapSecurity.SSL,
imap_port=993,
account_type=MailAccount.MailAccountType.GMAIL_OAUTH,
is_token=True,
refresh_token="test_refresh_token",
expiration=timezone.now() - timedelta(days=1),
)
mock_refresh_token.side_effect = RefreshTokenError("test_error")
with self.assertLogs("paperless_mail", level="ERROR") as cm:
# returns 0 processed mails
self.assertEqual(
self.mail_account_handler.handle_mail_account(mail_account),
0,
)
mock_refresh_token.assert_called_once()
self.assertIn(
f"Failed to refresh oauth token for account {mail_account}: test_error",
cm.output[0],
)

View File

@@ -0,0 +1,729 @@
import datetime
import logging
from pathlib import Path
from unittest import mock
import httpx
import pytest
from django.test.html import parse_html
from pytest_django.fixtures import SettingsWrapper
from pytest_httpx import HTTPXMock
from pytest_mock import MockerFixture
from documents.parsers import ParseError
from paperless_mail.parsers import MailDocumentParser
class TestEmailFileParsing:
"""
Tests around reading a file and parsing it into a
MailMessage
"""
def test_parse_error_missing_file(
self,
mail_parser: MailDocumentParser,
sample_dir: Path,
):
"""
GIVEN:
- Fresh parser
WHEN:
- A nonexistent file should be parsed
THEN:
- An Exception is thrown
"""
# Check if exception is raised when parsing fails.
test_file = sample_dir / "doesntexist.eml"
assert not test_file.exists()
with pytest.raises(ParseError):
mail_parser.parse(test_file, "messages/rfc822")
def test_parse_error_invalid_email(
self,
mail_parser: MailDocumentParser,
broken_email_file: Path,
):
"""
GIVEN:
- Fresh parser
WHEN:
- A faulty file should be parsed
THEN:
- An Exception is thrown
"""
# Check if exception is raised when the mail is faulty.
with pytest.raises(ParseError):
mail_parser.parse(broken_email_file, "messages/rfc822")
def test_parse_simple_text_email_file(
self,
mail_parser: MailDocumentParser,
simple_txt_email_file: Path,
):
"""
GIVEN:
- Fresh parser
WHEN:
- A .eml file should be parsed
THEN:
- The content of the mail should be available in the parse result.
"""
# Parse Test file and check relevant content
parsed_msg = mail_parser.parse_file_to_message(simple_txt_email_file)
assert parsed_msg.date.year == 2022
assert parsed_msg.date.month == 10
assert parsed_msg.date.day == 12
assert parsed_msg.date.hour == 21
assert parsed_msg.date.minute == 40
assert parsed_msg.date.second == 43
assert parsed_msg.date.tzname() == "UTC+02:00"
assert parsed_msg.from_ == "mail@someserver.de"
assert parsed_msg.subject == "Simple Text Mail"
assert parsed_msg.text == "This is just a simple Text Mail.\n"
assert parsed_msg.to == ("some@one.de",)
class TestEmailMetadataExtraction:
"""
Tests extraction of metadata from an email
"""
def test_extract_metadata_fail(
self,
caplog: pytest.LogCaptureFixture,
mail_parser: MailDocumentParser,
):
"""
GIVEN:
- Fresh start
WHEN:
- Metadata extraction is triggered for nonexistent file
THEN:
- A log warning should be generated
"""
# Validate if warning is logged when parsing fails
assert mail_parser.extract_metadata("na", "message/rfc822") == []
assert len(caplog.records) == 1
record = caplog.records[0]
assert record.levelno == logging.WARNING
assert record.name == "paperless.parsing.mail"
assert "Error while fetching document metadata for na" in record.message
def test_extract_metadata(
self,
mail_parser: MailDocumentParser,
simple_txt_email_file: Path,
):
"""
GIVEN:
- Fresh start
WHEN:
- Metadata extraction is triggered
THEN:
- metadata is returned
"""
# Validate Metadata parsing returns the expected results
metadata = mail_parser.extract_metadata(simple_txt_email_file, "message/rfc822")
assert {
"namespace": "",
"prefix": "",
"key": "attachments",
"value": "",
} in metadata
assert {
"namespace": "",
"prefix": "",
"key": "date",
"value": "2022-10-12 21:40:43 UTC+02:00",
} in metadata
assert {
"namespace": "",
"prefix": "header",
"key": "content-language",
"value": "en-US",
} in metadata
assert {
"namespace": "",
"prefix": "header",
"key": "content-type",
"value": "text/plain; charset=UTF-8; format=flowed",
} in metadata
assert {
"namespace": "",
"prefix": "header",
"key": "date",
"value": "Wed, 12 Oct 2022 21:40:43 +0200",
} in metadata
assert {
"namespace": "",
"prefix": "header",
"key": "delivered-to",
"value": "mail@someserver.de",
} in metadata
assert {
"namespace": "",
"prefix": "header",
"key": "from",
"value": "Some One <mail@someserver.de>",
} in metadata
assert {
"namespace": "",
"prefix": "header",
"key": "message-id",
"value": "<6e99e34d-e20a-80c4-ea61-d8234b612be9@someserver.de>",
} in metadata
assert {
"namespace": "",
"prefix": "header",
"key": "mime-version",
"value": "1.0",
} in metadata
assert {
"namespace": "",
"prefix": "header",
"key": "received",
"value": "from mail.someserver.org ([::1])\n\tby e1acdba3bd07 with LMTP\n\tid KBKZGD2YR2NTCgQAjubtDA\n\t(envelope-from <mail@someserver.de>)\n\tfor <mail@someserver.de>; Wed, 10 Oct 2022 11:40:46 +0200, from [127.0.0.1] (localhost [127.0.0.1]) by localhost (Mailerdaemon) with ESMTPSA id 2BC9064C1616\n\tfor <some@one.de>; Wed, 12 Oct 2022 21:40:46 +0200 (CEST)",
} in metadata
assert {
"namespace": "",
"prefix": "header",
"key": "return-path",
"value": "<mail@someserver.de>",
} in metadata
assert {
"namespace": "",
"prefix": "header",
"key": "subject",
"value": "Simple Text Mail",
} in metadata
assert {
"namespace": "",
"prefix": "header",
"key": "to",
"value": "some@one.de",
} in metadata
assert {
"namespace": "",
"prefix": "header",
"key": "user-agent",
"value": "Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101\n Thunderbird/102.3.1",
} in metadata
assert {
"namespace": "",
"prefix": "header",
"key": "x-last-tls-session-version",
"value": "TLSv1.3",
} in metadata
class TestEmailThumbnailGenerate:
"""
Tests the correct generation of an thumbnail for an email
"""
def test_get_thumbnail(
self,
mocker: MockerFixture,
mail_parser: MailDocumentParser,
simple_txt_email_file: Path,
):
"""
GIVEN:
- An E-Mail was parsed
WHEN:
- The Thumbnail is requested
THEN:
- The parser should call the functions which generate the thumbnail
"""
mocked_return = "Passing the return value through.."
mock_make_thumbnail_from_pdf = mocker.patch(
"paperless_mail.parsers.make_thumbnail_from_pdf",
)
mock_make_thumbnail_from_pdf.return_value = mocked_return
mock_generate_pdf = mocker.patch(
"paperless_mail.parsers.MailDocumentParser.generate_pdf",
)
mock_generate_pdf.return_value = "Mocked return value.."
thumb = mail_parser.get_thumbnail(simple_txt_email_file, "message/rfc822")
mock_generate_pdf.assert_called_once()
mock_make_thumbnail_from_pdf.assert_called_once_with(
"Mocked return value..",
mail_parser.tempdir,
None,
)
assert mocked_return == thumb
class TestTikaHtmlParse:
def test_tika_parse_unsuccessful(
self,
httpx_mock: HTTPXMock,
mail_parser: MailDocumentParser,
):
"""
GIVEN:
- Fresh start
WHEN:
- tika parsing fails
THEN:
- the parser should return an empty string
"""
# Check unsuccessful parsing
httpx_mock.add_response(
json={"Content-Type": "text/html", "X-TIKA:Parsed-By": []},
)
parsed = mail_parser.tika_parse("None")
assert parsed == ""
def test_tika_parse(self, httpx_mock: HTTPXMock, mail_parser: MailDocumentParser):
"""
GIVEN:
- Fresh start
WHEN:
- tika parsing is called
THEN:
- a web request to tika shall be done and the reply es returned
"""
html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
expected_text = "Some Text"
httpx_mock.add_response(
json={
"Content-Type": "text/html",
"X-TIKA:Parsed-By": [],
"X-TIKA:content": expected_text,
},
)
parsed = mail_parser.tika_parse(html)
assert expected_text == parsed.strip()
assert "http://localhost:9998" in str(httpx_mock.get_request().url)
def test_tika_parse_exception(
self,
httpx_mock: HTTPXMock,
mail_parser: MailDocumentParser,
):
"""
GIVEN:
- Fresh start
WHEN:
- tika parsing is called and an exception is thrown on the request
THEN:
- a ParseError Exception is thrown
"""
html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
httpx_mock.add_response(status_code=httpx.codes.INTERNAL_SERVER_ERROR)
with pytest.raises(ParseError):
mail_parser.tika_parse(html)
def test_tika_parse_unreachable(
self,
settings: SettingsWrapper,
mail_parser: MailDocumentParser,
):
"""
GIVEN:
- Fresh start
WHEN:
- tika parsing is called but tika is not available
THEN:
- a ParseError Exception is thrown
"""
html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
# Check if exception is raised when Tika cannot be reached.
with pytest.raises(ParseError):
settings.TIKA_ENDPOINT = "http://does-not-exist:9998"
mail_parser.tika_parse(html)
class TestParser:
def test_parse_eml_simple(
self,
mocker: MockerFixture,
mail_parser: MailDocumentParser,
simple_txt_email_file: Path,
):
"""
GIVEN:
- Fresh start
WHEN:
- parsing is done with non html mail
THEN:
- parsed information is available
"""
# Validate parsing returns the expected results
mock_generate_pdf = mocker.patch(
"paperless_mail.parsers.MailDocumentParser.generate_pdf",
)
mail_parser.parse(simple_txt_email_file, "message/rfc822")
text_expected = (
"Subject: Simple Text Mail\n\n"
"From: Some One <mail@someserver.de>\n\n"
"To: some@one.de\n\n"
"CC: asdasd@æsdasd.de, asdadasdasdasda.asdasd@æsdasd.de\n\n"
"BCC: fdf@fvf.de\n\n"
"\n\nThis is just a simple Text Mail."
)
assert text_expected == mail_parser.text
assert (
datetime.datetime(
2022,
10,
12,
21,
40,
43,
tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)),
)
== mail_parser.date
)
# Just check if tried to generate archive, the unittest for generate_pdf() goes deeper.
mock_generate_pdf.assert_called()
def test_parse_eml_html(
self,
mocker: MockerFixture,
httpx_mock: HTTPXMock,
mail_parser: MailDocumentParser,
html_email_file: Path,
):
"""
GIVEN:
- Fresh start
WHEN:
- parsing is done with html mail
THEN:
- Tika is called, parsed information from non html parts is available
"""
mock_generate_pdf = mocker.patch(
"paperless_mail.parsers.MailDocumentParser.generate_pdf",
)
# Validate parsing returns the expected results
text_expected = (
"Subject: HTML Message\n\n"
"From: Name <someone@example.de>\n\n"
"To: someone@example.de\n\n"
"Attachments: IntM6gnXFm00FEV5.png (6.89 KiB), 600+kbfile.txt (600.24 KiB)\n\n"
"HTML content: tika return\n\n"
"Some Text and an embedded image."
)
httpx_mock.add_response(
json={
"Content-Type": "text/html",
"X-TIKA:Parsed-By": [],
"X-TIKA:content": "tika return",
},
)
mail_parser.parse(html_email_file, "message/rfc822")
mock_generate_pdf.assert_called_once()
assert text_expected == mail_parser.text
assert (
datetime.datetime(
2022,
10,
15,
11,
23,
19,
tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)),
)
== mail_parser.date
)
def test_generate_pdf_parse_error(
self,
httpx_mock: HTTPXMock,
mail_parser: MailDocumentParser,
simple_txt_email_file: Path,
):
"""
GIVEN:
- Fresh start
WHEN:
- pdf generation is requested but gotenberg fails
THEN:
- a ParseError Exception is thrown
"""
httpx_mock.add_response(status_code=httpx.codes.INTERNAL_SERVER_ERROR)
with pytest.raises(ParseError):
mail_parser.parse(simple_txt_email_file, "message/rfc822")
def test_generate_pdf_simple_email(
self,
httpx_mock: HTTPXMock,
mail_parser: MailDocumentParser,
simple_txt_email_file: Path,
simple_txt_email_pdf_file: Path,
):
"""
GIVEN:
- Simple text email with no HTML content
WHEN:
- Email is parsed
THEN:
- Gotenberg is called to generate a PDF from HTML
- Archive file is generated
"""
httpx_mock.add_response(
url="http://localhost:3000/forms/chromium/convert/html",
method="POST",
content=simple_txt_email_pdf_file.read_bytes(),
)
mail_parser.parse(simple_txt_email_file, "message/rfc822")
assert mail_parser.archive_path is not None
@pytest.mark.httpx_mock(can_send_already_matched_responses=True)
def test_generate_pdf_html_email(
self,
httpx_mock: HTTPXMock,
mail_parser: MailDocumentParser,
html_email_file: Path,
html_email_pdf_file: Path,
):
"""
GIVEN:
- email with HTML content
WHEN:
- Email is parsed
THEN:
- Gotenberg is called to generate a PDF from HTML
- Gotenberg is used to merge the two PDFs
- Archive file is generated
"""
httpx_mock.add_response(
url="http://localhost:9998/tika/text",
method="PUT",
json={
"Content-Type": "text/html",
"X-TIKA:Parsed-By": [],
"X-TIKA:content": "This is some Tika HTML text",
},
)
httpx_mock.add_response(
url="http://localhost:3000/forms/chromium/convert/html",
method="POST",
content=html_email_pdf_file.read_bytes(),
)
httpx_mock.add_response(
url="http://localhost:3000/forms/pdfengines/merge",
method="POST",
content=b"Pretend merged PDF content",
)
mail_parser.parse(html_email_file, "message/rfc822")
assert mail_parser.archive_path is not None
def test_generate_pdf_html_email_html_to_pdf_failure(
self,
httpx_mock: HTTPXMock,
mail_parser: MailDocumentParser,
html_email_file: Path,
html_email_pdf_file: Path,
):
"""
GIVEN:
- email with HTML content
WHEN:
- Email is parsed
- Conversion of email HTML content to PDF fails
THEN:
- ParseError is raised
"""
httpx_mock.add_response(
url="http://localhost:9998/tika/text",
method="PUT",
json={
"Content-Type": "text/html",
"X-TIKA:Parsed-By": [],
"X-TIKA:content": "This is some Tika HTML text",
},
)
httpx_mock.add_response(
url="http://localhost:3000/forms/chromium/convert/html",
method="POST",
content=html_email_pdf_file.read_bytes(),
)
httpx_mock.add_response(
url="http://localhost:3000/forms/chromium/convert/html",
method="POST",
status_code=httpx.codes.INTERNAL_SERVER_ERROR,
)
with pytest.raises(ParseError):
mail_parser.parse(html_email_file, "message/rfc822")
@pytest.mark.httpx_mock(can_send_already_matched_responses=True)
def test_generate_pdf_html_email_merge_failure(
self,
httpx_mock: HTTPXMock,
mail_parser: MailDocumentParser,
html_email_file: Path,
html_email_pdf_file: Path,
):
"""
GIVEN:
- email with HTML content
WHEN:
- Email is parsed
- Merging of PDFs fails
THEN:
- ParseError is raised
"""
httpx_mock.add_response(
url="http://localhost:9998/tika/text",
method="PUT",
json={
"Content-Type": "text/html",
"X-TIKA:Parsed-By": [],
"X-TIKA:content": "This is some Tika HTML text",
},
)
httpx_mock.add_response(
url="http://localhost:3000/forms/chromium/convert/html",
method="POST",
content=html_email_pdf_file.read_bytes(),
)
httpx_mock.add_response(
url="http://localhost:3000/forms/pdfengines/merge",
method="POST",
status_code=httpx.codes.INTERNAL_SERVER_ERROR,
)
with pytest.raises(ParseError):
mail_parser.parse(html_email_file, "message/rfc822")
def test_mail_to_html(
self,
mail_parser: MailDocumentParser,
html_email_file: Path,
html_email_html_file: Path,
):
"""
GIVEN:
- Email message with HTML content
WHEN:
- Email is parsed
THEN:
- Resulting HTML is as expected
"""
mail = mail_parser.parse_file_to_message(html_email_file)
html_file = mail_parser.mail_to_html(mail)
expected_html = parse_html(html_email_html_file.read_text())
actual_html = parse_html(html_file.read_text())
assert expected_html == actual_html
def test_generate_pdf_from_mail(
self,
httpx_mock: HTTPXMock,
mail_parser: MailDocumentParser,
html_email_file: Path,
):
"""
GIVEN:
- Email message with HTML content
WHEN:
- Email is parsed
THEN:
- Gotenberg is used to convert HTML to PDF
"""
httpx_mock.add_response(content=b"Content")
mail = mail_parser.parse_file_to_message(html_email_file)
retval = mail_parser.generate_pdf_from_mail(mail)
assert retval.read_bytes() == b"Content"
request = httpx_mock.get_request()
assert str(request.url) == "http://localhost:3000/forms/chromium/convert/html"
@pytest.mark.httpx_mock(can_send_already_matched_responses=True)
@mock.patch("gotenberg_client._merge.routes.SyncMergePdfsRoute.merge")
@mock.patch("paperless_mail.models.MailRule.objects.get")
def test_generate_pdf_layout_options(
self,
mock_mailrule_get: mock.Mock,
mock_merge_route: mock.Mock,
httpx_mock: HTTPXMock,
mail_parser: MailDocumentParser,
html_email_file: Path,
html_email_pdf_file: Path,
):
"""
GIVEN:
- Email message
WHEN:
- Email is parsed with different layout options
THEN:
- Gotenberg is called with the correct layout option
"""
httpx_mock.add_response(
url="http://localhost:9998/tika/text",
method="PUT",
json={
"Content-Type": "text/html",
"X-TIKA:Parsed-By": [],
"X-TIKA:content": "This is some Tika HTML text",
},
)
httpx_mock.add_response(
url="http://localhost:3000/forms/chromium/convert/html",
method="POST",
content=html_email_pdf_file.read_bytes(),
)
httpx_mock.add_response(
url="http://localhost:3000/forms/pdfengines/merge",
method="POST",
content=b"Pretend merged PDF content",
)
def test_layout_option(layout_option, expected_calls, expected_pdf_names):
mock_mailrule_get.return_value = mock.Mock(pdf_layout=layout_option)
mail_parser.parse(
document_path=html_email_file,
mime_type="message/rfc822",
mailrule_id=1,
)
args, _ = mock_merge_route.call_args
assert len(args[0]) == expected_calls
for i, pdf in enumerate(expected_pdf_names):
assert args[0][i].name == pdf
# 1 = MailRule.PdfLayout.TEXT_HTML
test_layout_option(1, 2, ["email_as_pdf.pdf", "html.pdf"])
# 2 = MailRule.PdfLayout.HTML_TEXT
test_layout_option(2, 2, ["html.pdf", "email_as_pdf.pdf"])
# 3 = MailRule.PdfLayout.HTML_ONLY
test_layout_option(3, 1, ["html.pdf"])
# 4 = MailRule.PdfLayout.TEXT_ONLY
test_layout_option(4, 1, ["email_as_pdf.pdf"])

View File

@@ -0,0 +1,233 @@
import os
import shutil
import subprocess
import tempfile
from pathlib import Path
import httpx
import pytest
from imagehash import average_hash
from PIL import Image
from pytest_mock import MockerFixture
from documents.tests.utils import util_call_with_backoff
from paperless_mail.parsers import MailDocumentParser
def extract_text(pdf_path: Path) -> str:
"""
Using pdftotext from poppler, extracts the text of a PDF into a file,
then reads the file contents and returns it
"""
with tempfile.NamedTemporaryFile(
mode="w+",
) as tmp:
subprocess.run(
[
shutil.which("pdftotext"),
"-q",
"-layout",
"-enc",
"UTF-8",
str(pdf_path),
tmp.name,
],
check=True,
)
return tmp.read()
class MailAttachmentMock:
def __init__(self, payload, content_id):
self.payload = payload
self.content_id = content_id
self.content_type = "image/png"
@pytest.mark.skipif(
"PAPERLESS_CI_TEST" not in os.environ,
reason="No Gotenberg/Tika servers to test with",
)
class TestUrlCanary:
"""
Verify certain URLs are still available so testing is valid still
"""
def test_online_image_exception_on_not_available(self):
"""
GIVEN:
- Fresh start
WHEN:
- nonexistent image is requested
THEN:
- An exception shall be thrown
"""
"""
A public image is used in the html sample file. We have no control
whether this image stays online forever, so here we check if we can detect if is not
available anymore.
"""
with pytest.raises(httpx.HTTPStatusError) as exec_info:
resp = httpx.get(
"https://upload.wikimedia.org/wikipedia/en/f/f7/nonexistent.png",
)
resp.raise_for_status()
assert exec_info.value.response.status_code == httpx.codes.NOT_FOUND
def test_is_online_image_still_available(self):
"""
GIVEN:
- Fresh start
WHEN:
- A public image used in the html sample file is requested
THEN:
- No exception shall be thrown
"""
"""
A public image is used in the html sample file. We have no control
whether this image stays online forever, so here we check if it is still there
"""
# Now check the URL used in samples/sample.html
resp = httpx.get("https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png")
resp.raise_for_status()
@pytest.mark.skipif(
"PAPERLESS_CI_TEST" not in os.environ,
reason="No Gotenberg/Tika servers to test with",
)
class TestParserLive:
@staticmethod
def imagehash(file, hash_size=18):
return f"{average_hash(Image.open(file), hash_size)}"
def test_get_thumbnail(
self,
mocker: MockerFixture,
mail_parser: MailDocumentParser,
simple_txt_email_file: Path,
simple_txt_email_pdf_file: Path,
simple_txt_email_thumbnail_file: Path,
):
"""
GIVEN:
- Fresh start
WHEN:
- The Thumbnail is requested
THEN:
- The returned thumbnail image file is as expected
"""
mock_generate_pdf = mocker.patch(
"paperless_mail.parsers.MailDocumentParser.generate_pdf",
)
mock_generate_pdf.return_value = simple_txt_email_pdf_file
thumb = mail_parser.get_thumbnail(simple_txt_email_file, "message/rfc822")
assert thumb.exists()
assert thumb.is_file()
assert self.imagehash(thumb) == self.imagehash(
simple_txt_email_thumbnail_file,
), (
f"Created Thumbnail {thumb} differs from expected file {simple_txt_email_thumbnail_file}"
)
def test_tika_parse_successful(self, mail_parser: MailDocumentParser):
"""
GIVEN:
- Fresh start
WHEN:
- tika parsing is called
THEN:
- a web request to tika shall be done and the reply es returned
"""
html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
expected_text = "Some Text"
# Check successful parsing
parsed = mail_parser.tika_parse(html)
assert expected_text == parsed.strip()
def test_generate_pdf_gotenberg_merging(
self,
mocker: MockerFixture,
mail_parser: MailDocumentParser,
html_email_file: Path,
merged_pdf_first: Path,
merged_pdf_second: Path,
):
"""
GIVEN:
- Intermediary pdfs to be merged
WHEN:
- pdf generation is requested with html file requiring merging of pdfs
THEN:
- gotenberg is called to merge files and the resulting file is returned
"""
mock_generate_pdf_from_html = mocker.patch(
"paperless_mail.parsers.MailDocumentParser.generate_pdf_from_html",
)
mock_generate_pdf_from_mail = mocker.patch(
"paperless_mail.parsers.MailDocumentParser.generate_pdf_from_mail",
)
mock_generate_pdf_from_mail.return_value = merged_pdf_first
mock_generate_pdf_from_html.return_value = merged_pdf_second
msg = mail_parser.parse_file_to_message(html_email_file)
_, pdf_path = util_call_with_backoff(
mail_parser.generate_pdf,
[msg],
)
assert pdf_path.exists()
assert pdf_path.is_file()
extracted = extract_text(pdf_path)
expected = (
"first PDF to be merged.\n\x0csecond PDF to be merged.\n\x0c"
)
assert expected == extracted
def test_generate_pdf_from_mail(
self,
mail_parser: MailDocumentParser,
html_email_file: Path,
html_email_pdf_file: Path,
html_email_thumbnail_file: Path,
):
"""
GIVEN:
- Fresh start
WHEN:
- pdf generation from simple eml file is requested
THEN:
- Gotenberg is called and the resulting file is returned and look as expected.
"""
util_call_with_backoff(mail_parser.parse, [html_email_file, "message/rfc822"])
# Check the archive PDF
archive_path = mail_parser.get_archive_path()
archive_text = extract_text(archive_path)
expected_archive_text = extract_text(html_email_pdf_file)
# Archive includes the HTML content, so use in
assert expected_archive_text in archive_text
# Check the thumbnail
generated_thumbnail = mail_parser.get_thumbnail(
html_email_file,
"message/rfc822",
)
generated_thumbnail_hash = self.imagehash(generated_thumbnail)
# The created pdf is not reproducible. But the converted image should always look the same.
expected_hash = self.imagehash(html_email_thumbnail_file)
assert generated_thumbnail_hash == expected_hash, (
f"PDF looks different. Check if {generated_thumbnail} looks weird."
)

View File

@@ -0,0 +1,228 @@
import email
import email.contentmanager
import tempfile
from email.message import Message
from email.mime.application import MIMEApplication
from email.mime.multipart import MIMEMultipart
from unittest import mock
import gnupg
from django.test import override_settings
from imap_tools import MailMessage
from paperless_mail.mail import MailAccountHandler
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
from paperless_mail.preprocessor import MailMessageDecryptor
from paperless_mail.tests.test_mail import TestMail
from paperless_mail.tests.test_mail import _AttachmentDef
class MessageEncryptor:
def __init__(self):
self.gpg_home = tempfile.mkdtemp()
self.gpg = gnupg.GPG(gnupghome=self.gpg_home)
self._testUser = "testuser@example.com"
# Generate a new key
input_data = self.gpg.gen_key_input(
name_email=self._testUser,
passphrase=None,
key_type="RSA",
key_length=2048,
expire_date=0,
no_protection=True,
)
self.gpg.gen_key(input_data)
@staticmethod
def get_email_body_without_headers(email_message: Message) -> bytes:
"""
Filters some relevant headers from an EmailMessage and returns just the body.
"""
message_copy = email.message_from_bytes(email_message.as_bytes())
message_copy._headers = [
header
for header in message_copy._headers
if header[0].lower() not in ("from", "to", "subject")
]
return message_copy.as_bytes()
def encrypt(self, message):
original_email: email.message.Message = message.obj
encrypted_data = self.gpg.encrypt(
self.get_email_body_without_headers(original_email),
self._testUser,
armor=True,
)
if not encrypted_data.ok:
raise Exception(f"Encryption failed: {encrypted_data.stderr}")
encrypted_email_content = encrypted_data.data
new_email = MIMEMultipart("encrypted", protocol="application/pgp-encrypted")
new_email["From"] = original_email["From"]
new_email["To"] = original_email["To"]
new_email["Subject"] = original_email["Subject"]
# Add the control part
control_part = MIMEApplication(_data=b"", _subtype="pgp-encrypted")
control_part.set_payload("Version: 1")
new_email.attach(control_part)
# Add the encrypted data part
encrypted_part = MIMEApplication(_data=b"", _subtype="octet-stream")
encrypted_part.set_payload(encrypted_email_content.decode("ascii"))
encrypted_part.add_header(
"Content-Disposition",
'attachment; filename="encrypted.asc"',
)
new_email.attach(encrypted_part)
encrypted_message: MailMessage = MailMessage(
[(f"UID {message.uid}".encode(), new_email.as_bytes())],
)
return encrypted_message
class TestMailMessageGpgDecryptor(TestMail):
def setUp(self):
self.messageEncryptor = MessageEncryptor()
with override_settings(
EMAIL_GNUPG_HOME=self.messageEncryptor.gpg_home,
EMAIL_ENABLE_GPG_DECRYPTOR=True,
):
super().setUp()
def test_preprocessor_is_able_to_run(self):
with override_settings(
EMAIL_GNUPG_HOME=self.messageEncryptor.gpg_home,
EMAIL_ENABLE_GPG_DECRYPTOR=True,
):
self.assertTrue(MailMessageDecryptor.able_to_run())
def test_preprocessor_is_able_to_run2(self):
with override_settings(
EMAIL_GNUPG_HOME=None,
EMAIL_ENABLE_GPG_DECRYPTOR=True,
):
self.assertTrue(MailMessageDecryptor.able_to_run())
def test_is_not_able_to_run_disabled(self):
with override_settings(
EMAIL_ENABLE_GPG_DECRYPTOR=False,
):
self.assertFalse(MailMessageDecryptor.able_to_run())
def test_is_not_able_to_run_bogus_path(self):
with override_settings(
EMAIL_ENABLE_GPG_DECRYPTOR=True,
EMAIL_GNUPG_HOME="_)@# notapath &%#$",
):
self.assertFalse(MailMessageDecryptor.able_to_run())
def test_fails_at_initialization(self):
with (
mock.patch("gnupg.GPG.__init__") as mock_run,
override_settings(
EMAIL_ENABLE_GPG_DECRYPTOR=True,
),
):
def side_effect(*args, **kwargs):
raise OSError("Cannot find 'gpg' binary")
mock_run.side_effect = side_effect
handler = MailAccountHandler()
self.assertEqual(len(handler._message_preprocessors), 0)
def test_decrypt_fails(self):
encrypted_message, _ = self.create_encrypted_unencrypted_message_pair()
empty_gpg_home = tempfile.mkdtemp()
with override_settings(
EMAIL_ENABLE_GPG_DECRYPTOR=True,
EMAIL_GNUPG_HOME=empty_gpg_home,
):
message_decryptor = MailMessageDecryptor()
self.assertRaises(Exception, message_decryptor.run, encrypted_message)
def test_decrypt_encrypted_mail(self):
"""
Creates a mail with attachments. Then encrypts it with a new key.
Verifies that this encrypted message can be decrypted with attachments intact.
"""
encrypted_message, message = self.create_encrypted_unencrypted_message_pair()
headers = message.headers
text = message.text
self.assertEqual(len(encrypted_message.attachments), 1)
self.assertEqual(encrypted_message.attachments[0].filename, "encrypted.asc")
self.assertEqual(encrypted_message.text, "")
with override_settings(
EMAIL_ENABLE_GPG_DECRYPTOR=True,
EMAIL_GNUPG_HOME=self.messageEncryptor.gpg_home,
):
message_decryptor = MailMessageDecryptor()
self.assertTrue(message_decryptor.able_to_run())
decrypted_message = message_decryptor.run(encrypted_message)
self.assertEqual(len(decrypted_message.attachments), 2)
self.assertEqual(decrypted_message.attachments[0].filename, "f1.pdf")
self.assertEqual(decrypted_message.attachments[1].filename, "f2.pdf")
self.assertEqual(decrypted_message.headers, headers)
self.assertEqual(decrypted_message.text, text)
self.assertEqual(decrypted_message.uid, message.uid)
def create_encrypted_unencrypted_message_pair(self):
message = self.mailMocker.messageBuilder.create_message(
body="Test message with 2 attachments",
attachments=[
_AttachmentDef(
filename="f1.pdf",
disposition="inline",
),
_AttachmentDef(filename="f2.pdf"),
],
)
encrypted_message = self.messageEncryptor.encrypt(message)
return encrypted_message, message
def test_handle_encrypted_message(self):
message = self.mailMocker.messageBuilder.create_message(
subject="the message title",
from_="Myself",
attachments=2,
body="Test mail",
)
encrypted_message = self.messageEncryptor.encrypt(message)
account = MailAccount.objects.create()
rule = MailRule(
assign_title_from=MailRule.TitleSource.FROM_FILENAME,
consumption_scope=MailRule.ConsumptionScope.EVERYTHING,
account=account,
)
rule.save()
result = self.mail_account_handler._handle_message(encrypted_message, rule)
self.assertEqual(result, 3)
self.mailMocker._queue_consumption_tasks_mock.assert_called()
self.mailMocker.assert_queue_consumption_tasks_call_args(
[
[
{
"override_title": message.subject,
"override_filename": f"{message.subject}.eml",
},
],
[
{"override_title": "file_0", "override_filename": "file_0.pdf"},
{"override_title": "file_1", "override_filename": "file_1.pdf"},
],
],
)

260
src/paperless_mail/views.py Normal file
View File

@@ -0,0 +1,260 @@
import datetime
import logging
from datetime import timedelta
from django.http import HttpResponseBadRequest
from django.http import HttpResponseForbidden
from django.http import HttpResponseRedirect
from django.utils import timezone
from django_filters.rest_framework import DjangoFilterBackend
from drf_spectacular.types import OpenApiTypes
from drf_spectacular.utils import extend_schema
from drf_spectacular.utils import extend_schema_view
from drf_spectacular.utils import inline_serializer
from httpx_oauth.oauth2 import GetAccessTokenError
from rest_framework import serializers
from rest_framework.decorators import action
from rest_framework.filters import OrderingFilter
from rest_framework.generics import GenericAPIView
from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
from rest_framework.viewsets import ModelViewSet
from rest_framework.viewsets import ReadOnlyModelViewSet
from documents.filters import ObjectOwnedOrGrantedPermissionsFilter
from documents.permissions import PaperlessObjectPermissions
from documents.permissions import has_perms_owner_aware
from documents.views import PassUserMixin
from paperless.views import StandardPagination
from paperless_mail.filters import ProcessedMailFilterSet
from paperless_mail.mail import MailError
from paperless_mail.mail import get_mailbox
from paperless_mail.mail import mailbox_login
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
from paperless_mail.models import ProcessedMail
from paperless_mail.oauth import PaperlessMailOAuth2Manager
from paperless_mail.serialisers import MailAccountSerializer
from paperless_mail.serialisers import MailRuleSerializer
from paperless_mail.serialisers import ProcessedMailSerializer
from paperless_mail.tasks import process_mail_accounts
@extend_schema_view(
test=extend_schema(
operation_id="mail_account_test",
request=MailAccountSerializer,
description="Test a mail account",
responses={
200: inline_serializer(
name="MailAccountTestResponse",
fields={"success": serializers.BooleanField()},
),
400: OpenApiTypes.STR,
},
),
process=extend_schema(
operation_id="mail_account_process",
description="Manually process the selected mail account for new messages.",
responses={
200: inline_serializer(
name="MailAccountProcessResponse",
fields={"result": serializers.CharField(default="OK")},
),
404: None,
},
),
)
class MailAccountViewSet(ModelViewSet, PassUserMixin):
model = MailAccount
queryset = MailAccount.objects.all().order_by("pk")
serializer_class = MailAccountSerializer
pagination_class = StandardPagination
permission_classes = (IsAuthenticated, PaperlessObjectPermissions)
filter_backends = (ObjectOwnedOrGrantedPermissionsFilter,)
def get_permissions(self):
if self.action == "test":
# Test action does not require object level permissions
self.permission_classes = (IsAuthenticated,)
return super().get_permissions()
@action(methods=["post"], detail=False)
def test(self, request):
logger = logging.getLogger("paperless_mail")
request.data["name"] = datetime.datetime.now().isoformat()
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
# account exists, use the password from there instead of *** and refresh_token / expiration
if (
len(serializer.validated_data.get("password").replace("*", "")) == 0
and request.data["id"] is not None
):
existing_account = MailAccount.objects.get(pk=request.data["id"])
serializer.validated_data["password"] = existing_account.password
serializer.validated_data["account_type"] = existing_account.account_type
serializer.validated_data["refresh_token"] = existing_account.refresh_token
serializer.validated_data["expiration"] = existing_account.expiration
account = MailAccount(**serializer.validated_data)
with get_mailbox(
account.imap_server,
account.imap_port,
account.imap_security,
) as M:
try:
if (
account.is_token
and account.expiration is not None
and account.expiration < timezone.now()
):
oauth_manager = PaperlessMailOAuth2Manager()
if oauth_manager.refresh_account_oauth_token(existing_account):
# User is not changing password and token needs to be refreshed
existing_account.refresh_from_db()
account.password = existing_account.password
else:
raise MailError("Unable to refresh oauth token")
mailbox_login(M, account)
return Response({"success": True})
except MailError as e:
logger.error(
f"Mail account {account} test failed: {e}",
)
return HttpResponseBadRequest("Unable to connect to server")
@action(methods=["post"], detail=True)
def process(self, request, pk=None):
account = self.get_object()
process_mail_accounts.delay([account.pk])
return Response({"result": "OK"})
class ProcessedMailViewSet(ReadOnlyModelViewSet, PassUserMixin):
permission_classes = (IsAuthenticated, PaperlessObjectPermissions)
serializer_class = ProcessedMailSerializer
pagination_class = StandardPagination
filter_backends = (
DjangoFilterBackend,
OrderingFilter,
ObjectOwnedOrGrantedPermissionsFilter,
)
filterset_class = ProcessedMailFilterSet
queryset = ProcessedMail.objects.all().order_by("-processed")
@action(methods=["post"], detail=False)
def bulk_delete(self, request):
mail_ids = request.data.get("mail_ids", [])
if not isinstance(mail_ids, list) or not all(
isinstance(i, int) for i in mail_ids
):
return HttpResponseBadRequest("mail_ids must be a list of integers")
mails = ProcessedMail.objects.filter(id__in=mail_ids)
for mail in mails:
if not has_perms_owner_aware(request.user, "delete_processedmail", mail):
return HttpResponseForbidden("Insufficient permissions")
mail.delete()
return Response({"result": "OK", "deleted_mail_ids": mail_ids})
class MailRuleViewSet(ModelViewSet, PassUserMixin):
model = MailRule
queryset = MailRule.objects.all().order_by("order")
serializer_class = MailRuleSerializer
pagination_class = StandardPagination
permission_classes = (IsAuthenticated, PaperlessObjectPermissions)
filter_backends = (ObjectOwnedOrGrantedPermissionsFilter,)
@extend_schema_view(
get=extend_schema(
description="Callback view for OAuth2 authentication",
responses={200: None},
),
)
class OauthCallbackView(GenericAPIView):
permission_classes = (IsAuthenticated,)
def get(self, request, format=None):
if not (
request.user and request.user.has_perms(["paperless_mail.add_mailaccount"])
):
return HttpResponseBadRequest(
"You do not have permission to add mail accounts",
)
logger = logging.getLogger("paperless_mail")
code = request.query_params.get("code")
# Gmail passes scope as a query param, Outlook does not
scope = request.query_params.get("scope")
if code is None:
logger.error(
f"Invalid oauth callback request, code: {code}, scope: {scope}",
)
return HttpResponseBadRequest("Invalid request, see logs for more detail")
oauth_manager = PaperlessMailOAuth2Manager(
state=request.session.get("oauth_state"),
)
state = request.query_params.get("state", "")
if not oauth_manager.validate_state(state):
logger.error(
f"Invalid oauth callback request received state: {state}, expected: {oauth_manager.state}",
)
return HttpResponseBadRequest("Invalid request, see logs for more detail")
try:
if scope is not None and "google" in scope:
# Google
account_type = MailAccount.MailAccountType.GMAIL_OAUTH
imap_server = "imap.gmail.com"
defaults = {
"name": f"Gmail OAuth {timezone.now()}",
"username": "",
"imap_security": MailAccount.ImapSecurity.SSL,
"imap_port": 993,
"account_type": account_type,
}
result = oauth_manager.get_gmail_access_token(code)
elif scope is None:
# Outlook
account_type = MailAccount.MailAccountType.OUTLOOK_OAUTH
imap_server = "outlook.office365.com"
defaults = {
"name": f"Outlook OAuth {timezone.now()}",
"username": "",
"imap_security": MailAccount.ImapSecurity.SSL,
"imap_port": 993,
"account_type": account_type,
}
result = oauth_manager.get_outlook_access_token(code)
access_token = result["access_token"]
refresh_token = result["refresh_token"]
expires_in = result["expires_in"]
account, _ = MailAccount.objects.update_or_create(
password=access_token,
is_token=True,
imap_server=imap_server,
refresh_token=refresh_token,
expiration=timezone.now() + timedelta(seconds=expires_in),
defaults=defaults,
)
return HttpResponseRedirect(
f"{oauth_manager.oauth_redirect_url}?oauth_success=1&account_id={account.pk}",
)
except GetAccessTokenError as e:
logger.error(f"Error getting access token: {e}")
return HttpResponseRedirect(
f"{oauth_manager.oauth_redirect_url}?oauth_success=0",
)