Skip to content

Instantly share code, notes, and snippets.

@anotherjames
Created December 22, 2023 15:14
Show Gist options
  • Select an option

  • Save anotherjames/3182672d16d20b8aa133981c4499bb61 to your computer and use it in GitHub Desktop.

Select an option

Save anotherjames/3182672d16d20b8aa133981c4499bb61 to your computer and use it in GitHub Desktop.
Scan HTML in Drupal field values for elements & attributes used with text formats
<?php
namespace Drupal\my_module\Commands;
use Drupal\Component\Utility\Html;
use Drupal\Core\Entity\Sql\SqlEntityStorageInterface;
use Drush\Commands\DrushCommands;
/**
* Drush commands for my module.
*
* This is built to work with drush 10. It needs registering as a service in a drush.services.yml file.
*/
class MyModuleCommands extends DrushCommands {
/**
* Scan for tags + attributes used in formatted text fields.
*
* @param string $format
* The ID of a text format to check values for.
*
* @command my-module:scan-html
*
* @usage drush my-module:scan-html filtered_html
* Scan values using the 'filtered_html' text format in fields.
*/
public function scanHtml($format) {
// @TODO Inject dependencies properly.
$container = \Drupal::getContainer();
$db = $container->get('database');
$entity_type_manager = $container->get('entity_type.manager');
$field_manager = $container->get('entity_field.manager');
// Hardcoded array of known field types to use text formats. Could be more?
$field_types = ['text_long', 'text_with_summary', 'text'];
$report = [];
$io = $this->io();
$schema = $db->schema();
foreach ($field_types as $field_type) {
$type_map = $field_manager->getFieldMapByFieldType($field_type);
foreach ($type_map as $entity_type => $fields) {
$entity_storage = $entity_type_manager->getStorage($entity_type);
if ($entity_storage instanceof SqlEntityStorageInterface) {
$table_mapping = $entity_storage->getTableMapping();
$fields_count = count($fields);
$io->text(dt('Checking @count @entity_type @type field(s)', [
'@count' => $fields_count,
'@entity_type' => $entity_type,
'@type' => $field_type,
]));
$io->progressStart($fields_count);
$rows_count = 0;
$attributes_count = 0;
foreach ($fields as $field_name => $field_info) {
$io->progressAdvance();
$field_table = $table_mapping->getFieldTableName($field_name);
$columns = $table_mapping->getColumnNames($field_name);
$query = $db->select($field_table, 'ft')
->condition($columns['format'], $format);
if ($schema->fieldExists($field_table, 'deleted')) {
$query->condition('deleted', 0);
}
$query->addField('ft', $columns['value'], 'value');
$rows = $query->execute();
foreach ($rows as $row) {
$dom = Html::load($row->value);
$xpath = new \DOMXpath($dom);
$elements = $xpath->query('/html/body//*[@*]');
if ($elements->count()) {
$rows_count++;
/** @var \DOMNodeList $elements */
foreach ($elements as $element) {
if ($element instanceof \DomElement) {
foreach ($element->attributes as $attribute) {
$attributes_count++;
$report[$element->tagName][$attribute->localName] = $attribute->localName;
}
}
}
}
}
}
if ($rows_count) {
$io->text(dt(' ... found @rows rows containing @attributes attributes.', [
'@rows' => $rows_count,
'@attributes' => $attributes_count,
]));
}
else {
$io->text(dt(' ... found 0 rows containing attributes.'));
}
$io->progressFinish();
}
}
}
$output = [];
ksort($report);
foreach ($report as $tag => $attributes) {
sort($attributes);
$output[] = [$tag => implode(', ', $attributes)];
}
call_user_func_array([$io, 'definitionList'], $output);
return TRUE;
}
}
@anotherjames
Copy link
Author

This ran in just a few seconds on my local database :)

Screenshot 2023-12-22 at 15 23 17

@anotherjames
Copy link
Author

This only covers content fields at the moment; so wouldn't cover things in configuration. Presumably the config schema could help indicate to us parts of configuration that use formatted text, though there will also be parts of config which are schema-less (commonly from custom modules) and/or don't actually indicate that text filters are used on them.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment