Skip to content

Instantly share code, notes, and snippets.

@web-elite
Created November 5, 2025 08:08
Show Gist options
  • Select an option

  • Save web-elite/46538caf736a3ae86ed1ddc98bddc884 to your computer and use it in GitHub Desktop.

Select an option

Save web-elite/46538caf736a3ae86ed1ddc98bddc884 to your computer and use it in GitHub Desktop.
WordPress XML Export Cleaner Script - اسکریپت پاکسازی فایل اکسپورت وردپرس
<?php
function clean_wordpress_export_xml($inputFile, $outputFile) {
// Load XML file
$xml = simplexml_load_file($inputFile);
// Register namespaces
$namespaces = $xml->getNamespaces(true);
foreach ($xml->channel->item as $item) {
// Keep only essential post meta data
$essential_meta = [
'_edit_last',
'_wp_page_template',
'_thumbnail_id',
'_wp_old_slug',
'rank_math_primary_category',
'rank_math_description',
'rank_math_focus_keyword',
'rank_math_news_sitemap_robots',
'rank_math_seo_score',
'rank_math_title',
'rmp_vote_count',
'rmp_rating_val_sum',
'rmp_avg_rating'
];
// Find all wp:postmeta elements
$postmetas = $item->children('wp', true)->postmeta;
// Remove unwanted postmeta
$postmetas_to_remove = [];
foreach ($postmetas as $postmeta) {
$meta_key = (string)$postmeta->children('wp', true)->meta_key;
// Remove if not in essential list or contains unwanted prefixes
if (!in_array($meta_key, $essential_meta) ||
strpos($meta_key, '_woodmart') === 0 ||
strpos($meta_key, '_elementor') === 0) {
$postmetas_to_remove[] = $postmeta;
}
}
// Remove the identified postmeta elements
foreach ($postmetas_to_remove as $postmeta) {
$dom = dom_import_simplexml($postmeta);
if ($dom && $dom->parentNode) {
$dom->parentNode->removeChild($dom);
}
}
// Clean content from Elementor widgets and keep only standard HTML
$content = $item->children('content', true)->encoded;
if (!empty($content)) {
$cleaned_content = clean_html_content((string)$content);
$item->children('content', true)->encoded = $cleaned_content;
}
}
// Save cleaned XML
$xml->asXML($outputFile);
return true;
}
function clean_html_content($content) {
// Remove Elementor JSON data completely
$content = preg_replace('/<script[^>]*data-elementor-type[^>]*>.*?<\/script>/s', '', $content);
// Remove Elementor-specific attributes
$content = preg_replace('/\s*data-(id|widget_type|settings|element_type|elementor-type|elementor-id)="[^"]*"/', '', $content);
$content = preg_replace('/\s*data-sourcepos="[^"]*"/', '', $content);
$content = preg_replace('/\s*data-test-id="[^"]*"/', '', $content);
// Remove specific Elementor container divs but keep their content
$content = preg_replace('/<div[^>]*data-elementor-type[^>]*>(.*?)<\/div>/s', '$1', $content);
$content = preg_replace('/<div[^>]*data-elementor-id[^>]*>(.*?)<\/div>/s', '$1', $content);
// Remove Elementor widget wrappers
$content = preg_replace('/<div[^>]*class="[^"]*elementor[^"]*"[^>]*>(.*?)<\/div>/s', '$1', $content);
// Remove empty divs and spans
$content = preg_replace('/<div[^>]*><\/div>/', '', $content);
$content = preg_replace('/<span[^>]*><\/span>/', '', $content);
// Remove Elementor-specific classes
$content = preg_replace('/\s*class="[^"]*elementor[^"]*"/', '', $content);
// Keep only standard HTML tags and clean attributes
$allowed_tags = '<h1><h2><h3><h4><h5><h6><p><div><span><strong><em><b><i><u><a><ul><ol><li><img><br><hr><table><tr><td><th><thead><tbody>';
$content = strip_tags($content, $allowed_tags);
// Clean up multiple line breaks and spaces
$content = preg_replace('/\s+/', ' ', $content);
$content = preg_replace('/(\r\n|\r|\n){2,}/', "\n\n", $content);
return trim($content);
}
// Usage example
$inputFile = 'WordPress.2025-11-04-POST.xml';
$outputFile = 'cleaned_sample.xml';
if (clean_wordpress_export_xml($inputFile, $outputFile)) {
echo "فایل XML با موفقیت پاکسازی شد. فایل خروجی: " . $outputFile;
} else {
echo "خطا در پردازش فایل";
}
?>
@web-elite
Copy link
Author

WordPress XML Export Cleaner Script

English Description

This PHP script is designed to clean WordPress export (XML) files. Its main purpose is to remove unnecessary plugin metadata while preserving only the core content and essential SEO data.

Key Features:

Preserved Metadata:

  • Core WordPress metadata (_edit_last, _thumbnail_id, etc.)
  • Yoast SEO metadata (_yoast_wpseo_*)
  • Rank Math metadata (rank_math_*)
  • Rating data (rmp_*)

Removed Metadata:

  • All Elementor metadata (_elementor_*)
  • All Woodmart metadata (_woodmart_*)
  • Other unnecessary plugin metadata

Content Cleaning:

  • Removal of Elementor JSON data
  • Removal of Elementor-specific attributes
  • Removal of Elementor containers while preserving internal content
  • Preservation of only standard HTML tags

Usage:

$inputFile = 'sample.xml';
$outputFile = 'cleaned_sample.xml';
clean_wordpress_export_xml($inputFile, $outputFile);

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment