Last active
August 29, 2015 14:05
-
-
Save morficus/990ee8b461b140f25687 to your computer and use it in GitHub Desktop.
Adds an alt-attribute to all img tags that don't already have one in html files inside the current directory (recessively)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from bs4 import BeautifulSoup | |
| import glob | |
| import os | |
| totalfiles = 0 | |
| modifiedfiles = 0 | |
| totalchangedtags = 0 | |
| for root, subFolders, files in os.walk('.'): | |
| for htmlFile in glob.glob(root + '/*.html'): | |
| totalfiles+=1 | |
| #open file in read-only mode | |
| html = open(htmlFile, 'r') | |
| #parse the soup as HTML | |
| soup = BeautifulSoup(html, 'html.parser') | |
| #close the file | |
| html.close() | |
| #fill all image tags | |
| images = soup.find_all('img') | |
| if(len(images)): | |
| print 'Replacing content in: ' + htmlFile | |
| #add an alt tag only to items that don't already have one | |
| for img in images: | |
| if(not img.has_attr('alt')): | |
| img['alt'] = img['src'].split('.')[0] | |
| totalchangedtags+=1 | |
| #open file in write-only mode, but only if we changed sommething | |
| if( totalchangedtags >= 1 ): | |
| html = open(htmlFile, 'w') | |
| #write content to file, properly handling the encoding | |
| modifiedfiles+=1 | |
| html.write(soup.prettify.encode('utf-8').strip()) | |
| #close file handler | |
| #print(soup.prettify('utf-8')) | |
| print '---------------' | |
| html.close() | |
| print 'All done, now get out of here.' | |
| print modifiedfiles | |
| print totalchangedtags |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
You will need BeautifulSoup installed in order for this to work.
The current implementation uses the default Python HTML parse, so no additional parser should be necessary.