Created
December 22, 2023 15:14
-
-
Save anotherjames/3182672d16d20b8aa133981c4499bb61 to your computer and use it in GitHub Desktop.
Scan HTML in Drupal field values for elements & attributes used with text formats
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| namespace Drupal\my_module\Commands; | |
| use Drupal\Component\Utility\Html; | |
| use Drupal\Core\Entity\Sql\SqlEntityStorageInterface; | |
| use Drush\Commands\DrushCommands; | |
| /** | |
| * Drush commands for my module. | |
| * | |
| * This is built to work with drush 10. It needs registering as a service in a drush.services.yml file. | |
| */ | |
| class MyModuleCommands extends DrushCommands { | |
| /** | |
| * Scan for tags + attributes used in formatted text fields. | |
| * | |
| * @param string $format | |
| * The ID of a text format to check values for. | |
| * | |
| * @command my-module:scan-html | |
| * | |
| * @usage drush my-module:scan-html filtered_html | |
| * Scan values using the 'filtered_html' text format in fields. | |
| */ | |
| public function scanHtml($format) { | |
| // @TODO Inject dependencies properly. | |
| $container = \Drupal::getContainer(); | |
| $db = $container->get('database'); | |
| $entity_type_manager = $container->get('entity_type.manager'); | |
| $field_manager = $container->get('entity_field.manager'); | |
| // Hardcoded array of known field types to use text formats. Could be more? | |
| $field_types = ['text_long', 'text_with_summary', 'text']; | |
| $report = []; | |
| $io = $this->io(); | |
| $schema = $db->schema(); | |
| foreach ($field_types as $field_type) { | |
| $type_map = $field_manager->getFieldMapByFieldType($field_type); | |
| foreach ($type_map as $entity_type => $fields) { | |
| $entity_storage = $entity_type_manager->getStorage($entity_type); | |
| if ($entity_storage instanceof SqlEntityStorageInterface) { | |
| $table_mapping = $entity_storage->getTableMapping(); | |
| $fields_count = count($fields); | |
| $io->text(dt('Checking @count @entity_type @type field(s)', [ | |
| '@count' => $fields_count, | |
| '@entity_type' => $entity_type, | |
| '@type' => $field_type, | |
| ])); | |
| $io->progressStart($fields_count); | |
| $rows_count = 0; | |
| $attributes_count = 0; | |
| foreach ($fields as $field_name => $field_info) { | |
| $io->progressAdvance(); | |
| $field_table = $table_mapping->getFieldTableName($field_name); | |
| $columns = $table_mapping->getColumnNames($field_name); | |
| $query = $db->select($field_table, 'ft') | |
| ->condition($columns['format'], $format); | |
| if ($schema->fieldExists($field_table, 'deleted')) { | |
| $query->condition('deleted', 0); | |
| } | |
| $query->addField('ft', $columns['value'], 'value'); | |
| $rows = $query->execute(); | |
| foreach ($rows as $row) { | |
| $dom = Html::load($row->value); | |
| $xpath = new \DOMXpath($dom); | |
| $elements = $xpath->query('/html/body//*[@*]'); | |
| if ($elements->count()) { | |
| $rows_count++; | |
| /** @var \DOMNodeList $elements */ | |
| foreach ($elements as $element) { | |
| if ($element instanceof \DomElement) { | |
| foreach ($element->attributes as $attribute) { | |
| $attributes_count++; | |
| $report[$element->tagName][$attribute->localName] = $attribute->localName; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| if ($rows_count) { | |
| $io->text(dt(' ... found @rows rows containing @attributes attributes.', [ | |
| '@rows' => $rows_count, | |
| '@attributes' => $attributes_count, | |
| ])); | |
| } | |
| else { | |
| $io->text(dt(' ... found 0 rows containing attributes.')); | |
| } | |
| $io->progressFinish(); | |
| } | |
| } | |
| } | |
| $output = []; | |
| ksort($report); | |
| foreach ($report as $tag => $attributes) { | |
| sort($attributes); | |
| $output[] = [$tag => implode(', ', $attributes)]; | |
| } | |
| call_user_func_array([$io, 'definitionList'], $output); | |
| return TRUE; | |
| } | |
| } |
Author
Author
This only covers content fields at the moment; so wouldn't cover things in configuration. Presumably the config schema could help indicate to us parts of configuration that use formatted text, though there will also be parts of config which are schema-less (commonly from custom modules) and/or don't actually indicate that text filters are used on them.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This ran in just a few seconds on my local database :)