Created
December 5, 2025 15:16
-
-
Save matthiask/3e4ffd93c562b885cc89d6a9806f7c4c to your computer and use it in GitHub Desktop.
Prose editor module
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| from functools import partial | |
| import bs4 | |
| from content_editor.admin import ContentEditorInline | |
| from django.db import models | |
| from django.utils.translation import gettext_lazy as _ | |
| from django_prose_editor.fields import ( | |
| ProseEditorField, | |
| create_sanitizer, | |
| ) | |
| from feincms3.utils import is_first_party_link | |
| from smartypants import smartypants | |
| def convert_html_quotes(text): | |
| text = smartypants(text) | |
| return text.replace("“", "«").replace("”", "»") | |
| def convert_text_quotes(text): | |
| parts = text.split('"') | |
| quotes = ["«", "»"] | |
| def generate(): | |
| for index, part in enumerate(parts): | |
| if index: | |
| yield quotes[(index + 1) % 2] | |
| yield part | |
| return "".join(generate()) | |
| def soup_transform(html): | |
| soup = bs4.BeautifulSoup(html, "html.parser") | |
| for anchor in soup.select("a[href]"): | |
| if not anchor.get("target") and not is_first_party_link(anchor["href"]): | |
| anchor["target"] = "_blank" | |
| anchor["rel"] = "noopener" | |
| try: | |
| contents = list(list(soup.children)[-1].contents) | |
| except (AttributeError, IndexError): | |
| # AttributeError happens when we process a string without tags | |
| # IndexError happens when we have an empty document | |
| pass | |
| else: | |
| for el in reversed(contents): | |
| if isinstance(el, bs4.element.Tag) and el.name == "br": | |
| el.extract() | |
| continue | |
| break | |
| return str(soup) | |
| def remove_slash_from_void_elements(html: str) -> str: | |
| """ | |
| Remove self-closing slashes from HTML void elements to ensure consistent HTML5 output. | |
| Args: | |
| html: The HTML string to process | |
| Returns: | |
| HTML with consistent void element format (no self-closing slashes) | |
| """ | |
| if not html: | |
| return html | |
| # List of HTML5 void elements that might have self-closing slashes | |
| void_elements = [ | |
| "area", | |
| "base", | |
| "br", | |
| "col", | |
| "embed", | |
| "hr", | |
| "img", | |
| "input", | |
| "link", | |
| "meta", | |
| "param", | |
| "source", | |
| "track", | |
| "wbr", | |
| ] | |
| pattern = r"<(%s)([^>]*?)\s*/>" % "|".join(void_elements) | |
| return re.sub(pattern, r"<\1\2>", html) | |
| CMSProseEditorField = partial( | |
| ProseEditorField, | |
| extensions={ | |
| "HardBreak": True, | |
| "Heading": True, | |
| "Bold": True, | |
| "Italic": True, | |
| "Subscript": True, | |
| "Superscript": True, | |
| "HorizontalRule": True, | |
| "Link": {"enableTarget": False}, | |
| "RestrictedBlockquote": True, | |
| "BulletList": True, | |
| "OrderedList": True, | |
| "ListItem": True, | |
| "Menu": True, | |
| }, | |
| sanitize=[convert_html_quotes, soup_transform, create_sanitizer], | |
| ) | |
| RestrictedProseEditorField = partial( | |
| ProseEditorField, | |
| extensions={ | |
| "History": False, | |
| "HardBreak": True, | |
| "Bold": True, | |
| "Italic": True, | |
| "Subscript": True, | |
| "Superscript": True, | |
| "Link": {"enableTarget": False}, | |
| "Typographic": True, | |
| }, | |
| sanitize=[ | |
| remove_slash_from_void_elements, | |
| convert_html_quotes, | |
| soup_transform, | |
| create_sanitizer, | |
| ], | |
| ) | |
| NewlineProseEditorField = partial( | |
| ProseEditorField, | |
| extensions={ | |
| "History": False, | |
| "HardBreak": True, | |
| "Typographic": True, | |
| }, | |
| sanitize=[ | |
| remove_slash_from_void_elements, | |
| convert_html_quotes, | |
| soup_transform, | |
| create_sanitizer, | |
| ], | |
| ) | |
| ProseWithListsEditorField = partial( | |
| ProseEditorField, | |
| extensions={ | |
| "History": False, | |
| "HardBreak": True, | |
| "Blockquote": True, | |
| "Bold": True, | |
| "Italic": True, | |
| "Subscript": True, | |
| "Superscript": True, | |
| "HorizontalRule": True, | |
| "Link": {"enableTarget": False}, | |
| "BulletList": True, | |
| "OrderedList": True, | |
| "ListItem": True, | |
| "Typographic": True, | |
| }, | |
| sanitize=[ | |
| remove_slash_from_void_elements, | |
| convert_html_quotes, | |
| soup_transform, | |
| create_sanitizer, | |
| ], | |
| ) | |
| class RichText(models.Model): | |
| text = CMSProseEditorField("") | |
| class Meta: | |
| abstract = True | |
| verbose_name = _("rich text") | |
| verbose_name_plural = _("rich texts") | |
| def __str__(self): | |
| return self.get_text_excerpt() | |
| class RichTextInline(ContentEditorInline): | |
| icon = "notes" | |
| class AutoQuoteCharField(models.CharField): | |
| def clean(self, value, model_instance): | |
| return convert_text_quotes(super().clean(value, model_instance)) | |
| def deconstruct(self): | |
| name, _path, args, kwargs = super().deconstruct() | |
| return (name, "django.db.models.CharField", args, kwargs) | |
| class AutoQuoteTextField(models.TextField): | |
| def clean(self, value, model_instance): | |
| return convert_text_quotes(super().clean(value, model_instance)) | |
| def deconstruct(self): | |
| name, _path, args, kwargs = super().deconstruct() | |
| return (name, "django.db.models.TextField", args, kwargs) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment