Skip to content

Instantly share code, notes, and snippets.

@anotherjames
Last active January 13, 2025 14:32
Show Gist options
  • Select an option

  • Save anotherjames/b30bc805b80e970afc5de1eafd70f48e to your computer and use it in GitHub Desktop.

Select an option

Save anotherjames/b30bc805b80e970afc5de1eafd70f48e to your computer and use it in GitHub Desktop.
Drupal form element validation handler that checks a value is mostly Latin characters.
<?php
use Drupal\Component\Utility\Html;
use Drupal\Core\Form\FormStateInterface;
use Drupal\filter\FilterPluginCollection;
/**
* Implements hook_form_alter().
*/
function latinvalidation_form_alter(&$form, FormStateInterface $form_state, $form_id) {
// Attach validation to main contact form for non-latin characters.
if (!empty($form['#webform_id']) && $form['#webform_id'] == 'get_in_touch') {
$form['elements']['message']['#element_validate'][] = 'latinvalidation_latin_chars_element_validate';
}
}
/**
* Element validation handler that checks a value is mostly Latin characters.
*/
function latinvalidation_latin_chars_element_validate($element, FormStateInterface $form_state) {
if (!empty($element['#value'])) {
// We've also received a lot of submissions with a common pattern using the
// format [url=http...]...[/url]. Let's convert those to links before HTML
// is stripped.
$meaningful_text = preg_replace('/\[url=([^]]+)]/', '<a href="$1">', str_replace('[/url]', '</a>', $element['#value']));
// Ignore HTML tags (since tags are usually links).
$meaningful_text = strip_tags($meaningful_text);
// Convert any URLs in plain text to links, using core's URL filter, then
// filter out links entirely.
$plugin_id = 'filter_url';
$filterPluginManager = \Drupal::getContainer()
->get('plugin.manager.filter');
if ($filterPluginManager->getDefinition($plugin_id, FALSE)) {
// Use a filter plugin collection, just like
// \Drupal\filter\Entity\FilterFormat::filters().
$collection = new FilterPluginCollection($filterPluginManager, [
$plugin_id => [],
]);
/** @var \Drupal\filter\Plugin\Filter\FilterUrl $filter */
$filter = $collection->get($plugin_id);
$meaningful_text = $filter->prepare($meaningful_text, '');
$meaningful_text = $filter
->process($meaningful_text, '')
->getProcessedText();
$dom = Html::load($meaningful_text);
$xpath = new \DOMXPath($dom);
/** @var \DOMElement $dom_element */
foreach ($xpath->query('//a') as $dom_element) {
$dom_element->parentNode->removeChild($dom_element);
}
$meaningful_text = Html::decodeEntities(Html::serialize($dom));
}
// Ignore whitespace.
$meaningful_text = preg_replace('/\s/', '', $meaningful_text);
if (mb_strlen($meaningful_text)) {
// When available, grapheme_strlen() could be used instead (e.g. to count
// composite emojis as single characters), but the regex below doesn't
// count like that anyway, it counts like the simpler mb_strlen() does. We
// don't need to be too accurate as we're just using a vague threshold to
// limit probably-unwanted submissions. All this inaccuracy means is that
// certain emojis (those that are really combinations of emojis) and any
// other multibyte-but-single-grapheme characters are 'unfairly' punished.
// Meanwhile, do ignore punctuation (well, anything in the \p{Common}
// class for regular expressions).
$total_length = mb_strlen($meaningful_text) - preg_match_all('/\\p{Common}/u', $meaningful_text, $matches);
if ($total_length) {
$count = preg_match_all('/[^\\p{Common}\\p{Latin}]/u', $meaningful_text, $matches);
$nonlatin_proportion = $count / $total_length;
// We picked a fixed proportion of 20%, but this could be configurable.
if ($nonlatin_proportion > 0.2) {
$form_state->setError($element, t('We may have trouble trying to understand your message due to the language and/or symbols used in it. Please use plain English instead. You may prefer to <a href="/contact">contact us via phone or email</a>.'));
}
}
else {
$form_state->setError($element, t('Please include your message for us. You may prefer to <a href="/contact">contact us via phone or email</a>.'));
}
}
else {
$form_state->setError($element, t('Please include your message for us. You may prefer to <a href="/contact">contact us via phone or email</a>.'));
}
}
}
@anotherjames
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment