Created
November 5, 2025 08:08
-
-
Save web-elite/46538caf736a3ae86ed1ddc98bddc884 to your computer and use it in GitHub Desktop.
WordPress XML Export Cleaner Script - اسکریپت پاکسازی فایل اکسپورت وردپرس
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| function clean_wordpress_export_xml($inputFile, $outputFile) { | |
| // Load XML file | |
| $xml = simplexml_load_file($inputFile); | |
| // Register namespaces | |
| $namespaces = $xml->getNamespaces(true); | |
| foreach ($xml->channel->item as $item) { | |
| // Keep only essential post meta data | |
| $essential_meta = [ | |
| '_edit_last', | |
| '_wp_page_template', | |
| '_thumbnail_id', | |
| '_wp_old_slug', | |
| 'rank_math_primary_category', | |
| 'rank_math_description', | |
| 'rank_math_focus_keyword', | |
| 'rank_math_news_sitemap_robots', | |
| 'rank_math_seo_score', | |
| 'rank_math_title', | |
| 'rmp_vote_count', | |
| 'rmp_rating_val_sum', | |
| 'rmp_avg_rating' | |
| ]; | |
| // Find all wp:postmeta elements | |
| $postmetas = $item->children('wp', true)->postmeta; | |
| // Remove unwanted postmeta | |
| $postmetas_to_remove = []; | |
| foreach ($postmetas as $postmeta) { | |
| $meta_key = (string)$postmeta->children('wp', true)->meta_key; | |
| // Remove if not in essential list or contains unwanted prefixes | |
| if (!in_array($meta_key, $essential_meta) || | |
| strpos($meta_key, '_woodmart') === 0 || | |
| strpos($meta_key, '_elementor') === 0) { | |
| $postmetas_to_remove[] = $postmeta; | |
| } | |
| } | |
| // Remove the identified postmeta elements | |
| foreach ($postmetas_to_remove as $postmeta) { | |
| $dom = dom_import_simplexml($postmeta); | |
| if ($dom && $dom->parentNode) { | |
| $dom->parentNode->removeChild($dom); | |
| } | |
| } | |
| // Clean content from Elementor widgets and keep only standard HTML | |
| $content = $item->children('content', true)->encoded; | |
| if (!empty($content)) { | |
| $cleaned_content = clean_html_content((string)$content); | |
| $item->children('content', true)->encoded = $cleaned_content; | |
| } | |
| } | |
| // Save cleaned XML | |
| $xml->asXML($outputFile); | |
| return true; | |
| } | |
| function clean_html_content($content) { | |
| // Remove Elementor JSON data completely | |
| $content = preg_replace('/<script[^>]*data-elementor-type[^>]*>.*?<\/script>/s', '', $content); | |
| // Remove Elementor-specific attributes | |
| $content = preg_replace('/\s*data-(id|widget_type|settings|element_type|elementor-type|elementor-id)="[^"]*"/', '', $content); | |
| $content = preg_replace('/\s*data-sourcepos="[^"]*"/', '', $content); | |
| $content = preg_replace('/\s*data-test-id="[^"]*"/', '', $content); | |
| // Remove specific Elementor container divs but keep their content | |
| $content = preg_replace('/<div[^>]*data-elementor-type[^>]*>(.*?)<\/div>/s', '$1', $content); | |
| $content = preg_replace('/<div[^>]*data-elementor-id[^>]*>(.*?)<\/div>/s', '$1', $content); | |
| // Remove Elementor widget wrappers | |
| $content = preg_replace('/<div[^>]*class="[^"]*elementor[^"]*"[^>]*>(.*?)<\/div>/s', '$1', $content); | |
| // Remove empty divs and spans | |
| $content = preg_replace('/<div[^>]*><\/div>/', '', $content); | |
| $content = preg_replace('/<span[^>]*><\/span>/', '', $content); | |
| // Remove Elementor-specific classes | |
| $content = preg_replace('/\s*class="[^"]*elementor[^"]*"/', '', $content); | |
| // Keep only standard HTML tags and clean attributes | |
| $allowed_tags = '<h1><h2><h3><h4><h5><h6><p><div><span><strong><em><b><i><u><a><ul><ol><li><img><br><hr><table><tr><td><th><thead><tbody>'; | |
| $content = strip_tags($content, $allowed_tags); | |
| // Clean up multiple line breaks and spaces | |
| $content = preg_replace('/\s+/', ' ', $content); | |
| $content = preg_replace('/(\r\n|\r|\n){2,}/', "\n\n", $content); | |
| return trim($content); | |
| } | |
| // Usage example | |
| $inputFile = 'WordPress.2025-11-04-POST.xml'; | |
| $outputFile = 'cleaned_sample.xml'; | |
| if (clean_wordpress_export_xml($inputFile, $outputFile)) { | |
| echo "فایل XML با موفقیت پاکسازی شد. فایل خروجی: " . $outputFile; | |
| } else { | |
| echo "خطا در پردازش فایل"; | |
| } | |
| ?> |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
WordPress XML Export Cleaner Script
English Description
This PHP script is designed to clean WordPress export (XML) files. Its main purpose is to remove unnecessary plugin metadata while preserving only the core content and essential SEO data.
Key Features:
Preserved Metadata:
_edit_last,_thumbnail_id, etc.)_yoast_wpseo_*)rank_math_*)rmp_*)Removed Metadata:
_elementor_*)_woodmart_*)Content Cleaning:
Usage: