Skip to content

Instantly share code, notes, and snippets.

@kehanlu
Last active November 14, 2017 18:07
Show Gist options
  • Select an option

  • Save kehanlu/1719b78df0b95a4013478d32ae0fda73 to your computer and use it in GitHub Desktop.

Select an option

Save kehanlu/1719b78df0b95a4013478d32ae0fda73 to your computer and use it in GitHub Desktop.
今日星座運勢爬蟲

今日星座運勢

因為網頁的資料比較亂,所以用在parse.py用BS4抓完之後就個別整理成dictionary檔

連 /astro/update_data 重整資料進資料庫(update_or_create)。

只要資料庫裡的時間比網站的時間舊,就會去抓資料

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>今日星座運勢</title>
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-beta.2/css/bootstrap.min.css" integrity="sha384-PsH8R72JQ3SOdhVi3uxftmaW6Vc51MKb0q5P2rRUpPvrszuE4W1povHYgTpBfshb" crossorigin="anonymous">
<style>
p{
padding: 0 30px 0 30px;
}
h2{
color:#00ACC1;
}
h5{
font-size: 18px;
color:#ef9a9a;
}
</style>
</head>
<body>
最後更新:{{Astros.0.date}}
<a href="/astro/update_data"><button type="button" class="btn btn-primary">update now!</button></a>
{% for as in Astros%}
<div class="row">
<div class="col-2"></div>
<div class="col-8">
<div class="alert alert-secondary" role="alert">
<h2>【{{as.title}}】</h2>
<br>
<h5>愛情運勢</h5>
<p>{{as.love_description}}</p>
<p>{{as.love_advice}}</p>
<p>{{as.love_prescription}}</p>
<h5>工作運勢</h5>
<p>{{as.work_description}}</p>
<p>{{as.work_advice}}</p>
<p>{{as.work_prescription}}</p>
<h5>金錢運勢</h5>
<p>{{as.money_description}}</p>
<p>{{as.love_advice}}</p>
<p>{{as.love_prescription}}</p>
<h5>整體運勢</h5>
<p>{{as.full_description}}</p>
<p>{{as.love_advice}}</p>
<p>{{as.love_prescription}}</p>
</div>
</div>
<div class="col-2"></div>
</div>
{% endfor %}
</body>
</html>
from django.db import models
class Astro(models.Model):
title = models.CharField(max_length=20)
date = models.CharField(max_length=20)
astro_id = models.IntegerField()
love_description = models.CharField(max_length=200)
love_advice = models.CharField(max_length=200)
love_prescription = models.CharField(max_length=200)
work_description = models.CharField(max_length=200)
work_advice = models.CharField(max_length=200)
work_prescription = models.CharField(max_length=200)
money_description = models.CharField(max_length=200)
money_advice = models.CharField(max_length=200)
money_prescription = models.CharField(max_length=200)
full_description = models.CharField(max_length=200)
full_advice = models.CharField(max_length=200)
full_prescription = models.CharField(max_length=200)
def __str__(self):
return self.title
from bs4 import BeautifulSoup
from pprint import pprint
import requests
def get_data(astro):
html = requests.get(
'http://www.starq.com/fortune/free/' + str(astro) + '/today/')
h = BeautifulSoup(html.text, 'html.parser')
data = {
'title': h.title.text[0:3],
'astro_id': astro,
'date': h.find('td', 'g12b_text').text,
'love': {
'description': h.find_all('td', 'g12_text')[0].text,
'advice': h.find_all('td', 'in_topic_text01')[0].text,
'prescription': h.find_all('td', 'in_topic_text01')[1].text,
},
'work': {
'description': h.find_all('span', 'g12_text')[0].text,
'advice': h.find_all('td', 'y12_text')[0].text,
'prescription': h.find_all('td', 'y12_text')[1].text,
},
'money': {
'description': h.find_all('td', 'g12_text')[1].text,
'advice': h.find_all('td', 'o12_text')[0].text,
'prescription': h.find_all('td', 'o12_text')[1].text,
},
'full': {
'description': h.find_all('td', 'g12_text')[2].text,
'advice': h.find_all('td', 'p12_text')[0].text,
'prescription': h.find_all('td', 'p12_text')[1].text,
}
}
return data
from django.conf.urls import url
from . import views
urlpatterns = [
url(r'^index$', views.index, name='index'),
url(r'^update_data$', views.update_data, name='update_data')
]
from django.shortcuts import render, redirect
from .models import Astro
from .parser import get_data
import datetime
def index(request):
Astros = Astro.objects.order_by('astro_id')
# 超過一天就抓新資料
if (datetime.datetime.strptime(
Astro.objects.get(astro_id=1).date, '%Y/%m/%d')
- datetime.datetime.today()) < datetime.timedelta(days=-1):
return redirect('/astro/update_data')
# 資料庫資料
context = {
'Astros': Astros,
}
return render(request, 'astro/index.html', context)
def update_data(request):
"""
更新資料庫
"""
for astro in range(1, 13):
data = get_data(astro)
defaults = {'title': data['title'],
'astro_id': data['astro_id'],
'date': data['date'],
'love_description': data['love']['description'],
'love_advice': data['love']['advice'],
'love_prescription': data['love']['prescription'],
'work_description': data['work']['description'],
'work_advice': data['work']['advice'],
'work_prescription': data['work']['prescription'],
'money_description': data['money']['description'],
'money_advice': data['money']['advice'],
'money_prescription': data['money']['prescription'],
'full_description': data['full']['description'],
'full_advice': data['full']['advice'],
'full_prescription': data['full']['prescription'], }
obj, created = Astro.objects.update_or_create(
defaults=defaults,
astro_id=data['astro_id'],
)
print(created)
return redirect('/astro/index')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment