Skip to content

Instantly share code, notes, and snippets.

@jvzammit
Last active December 2, 2015 12:03
Show Gist options
  • Select an option

  • Save jvzammit/73b625c6f88495cda036 to your computer and use it in GitHub Desktop.

Select an option

Save jvzammit/73b625c6f88495cda036 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import bs4
import csv
from django.core.management import setup_environ
from ca import settings
setup_environ(settings)
from buyers_guide.models import (
BuyersGuide,
GuideFeature,
GuideReview)
OPENING_TAG = '<!--[if gte mso 9]><xml>'
CLOSING_TAG = '<!--EndFragment-->'
res = {}
model_fields = {
BuyersGuide:
'description author_note legal_note more_info references'.split(),
GuideFeature: ['description'],
GuideReview: ['description'],
}
def get_string_value(field_value):
opening_tag_start = field_value.index('<!--[if gte mso 9]><xml>')
opening_tag_end = (
field_value.index('<!--StartFragment-->') +
len('<!--StartFragment-->'))
closing_tag_start = field_value.index('<!--EndFragment-->')
closing_tag_end = closing_tag_start + len('<!--EndFragment-->')
# concatenate
# start of string -> start of opening tag
# end of opening tag -> start of closing tag
# end of closing tag -> end of string
cleaned_str = (
field_value[0: opening_tag_start] +
field_value[opening_tag_end: closing_tag_start] +
field_value[closing_tag_end: len(field_value)])
# remove any remaining p tags
return bs4.BeautifulSoup(cleaned_str).get_text().strip().encode('utf-8')
def get_url_affected(model_class, instance):
base_url = 'http://www.consumeraffairs.com'
if model_class in (GuideFeature, GuideReview):
return (
base_url + instance.guide.get_absolute_url()
) if instance.guide else 'N/A'
return base_url + instance.get_absolute_url()
def get_admin_url(model_class, instance):
_url = 'https://www.consumeraffairs.com/admin/buyers_guide/buyersguide/{}/'
if model_class in (GuideFeature, GuideReview):
return (
_url.format(instance.guide.pk)
) if instance.guide else 'N/A'
return _url.format(instance.pk)
def main():
headers = [
'item_type', 'item_pk', 'item_field', 'current_value', 'cleaned_value',
'url_affected', 'admin_url']
with open('fixed_buyersguide_contents_revised.csv', 'w') as the_file:
writer = csv.DictWriter(the_file, headers)
writer.writeheader()
for model_class, fields_list in model_fields.iteritems():
res[model_class.__name__] = []
for field_name in fields_list:
for obj in model_class.objects.all():
field_value = getattr(obj, field_name)
if OPENING_TAG in field_value:
row = {}
row['item_type'] = model_class.__name__
row['item_pk'] = obj.pk
row['item_field'] = field_name
row['current_value'] = '' # field_value
row['cleaned_value'] = get_string_value(field_value)
row['url_affected'] = get_url_affected(
model_class, obj)
row['admin_url'] = get_admin_url(model_class, obj)
writer.writerow(row)
if __name__ == "__main__":
main()
print 'DONE'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment