Chapter 06

Django View Model ForeignKey スクレイピング

hello_yogurt
hello_yogurt
2022.07.24に更新

backend/myapp/views.py

backend/myapp/views.py
from rest_framework import generics, viewsets
from myapp.models import Item, LineAccessToken, CloudwatchEvent
from myapp.serializers import UserSerializer, ItemSerializer, LineAccessTokenSerializer, CloudwatchEventSerializer
from django.shortcuts import render, redirect
from django.http import JsonResponse, HttpResponse, HttpResponseServerError
from django.views import View
from django.views.generic import TemplateView
from django.contrib.auth.models import User
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import json
import requests
import sys
import os
import re
import time
import datetime
import pytz
import shutil
from django.views.decorators.csrf import ensure_csrf_cookie, csrf_exempt

class Index(TemplateView):
    template_name = "myapp/index.html"

    def get_context_data(self,**kwargs):
         # User.objects.create(username='Taro', password='t19871027')
         user = User.objects.all()
         context = super().get_context_data(**kwargs)
         context["message"] = user[1]
         return context

@ensure_csrf_cookie
def token_get(request):

    if request.method == 'GET':
        return JsonResponse({})

@csrf_exempt
def test_post(request):

    if request.method == 'POST':
        post = request.POST
        print(post)
        print("aaaaaaaaaaaaaaaaaaaaaaaaaaa")

        username    = post['username']
        item_name   = post['item_name']
        item_url    = post['item_url']
        stock       = 0
        scraping_at = '0'

        user = User.objects.get(username=username)

        id = user.id

        user_ob_id = User.objects.get(id=id)

        print("user_ob_id", user_ob_id)

        items = Item.objects.filter(item_name=item_name)
        print("items出力", items)
        if not items:
            user_ob_id.item_set.create(
                item_name   = item_name,
                item_url    = item_url,
                stock       = stock,
                scraping_at = scraping_at,
            )
        else:
            items[0].item_name = item_name
            items[0].item_url  = item_url
            items[0].stock     = stock
            items[0].save()

        url = items[0].item_url

        item_stock_pre = item[0].stock

        stock_result, item_stock = scraping(url)

        items[0].stock = stock_result

        print("スクレイピング完了")

        now = datetime.datetime.now(pytz.timezone('Asia/Tokyo'))
        print("now出力", now.strftime('%Y-%m-%d %H:%M:%S'))
        items[0].scraping_at = now.strftime('%Y-%m-%d %H:%M:%S')

        print("items[0].scraping_at出力", items[0].scraping_at)

        line_access_token_ob = LineAccessToken.objects.filter(username=id)
        access_token = line_access_token_ob[0].line_access_token

        if stock_result == 1:
            if item_stock_pre == 0:
                line.stock_availability(items[0], access_token)

        elif stock_result == 0:
            if item_stock_pre == 1:
                line.stock_not_availability(items[0], item_stock, access_token)

        else:
            line.same(items[0], item_stock, access_token)

        items[0].save()

        return JsonResponse(post)

class line():

    def stock_availability(item, access_token):
        url = "https://notify-api.line.me/api/notify"
        access_token = access_token
        headers = {'Authorization': 'Bearer ' + access_token}

        message = 'id:'+str(item[0].id)+'、商品名:「'+item[0].item_name+'」が「在庫あり」になりました'
        payload = {'message': message}
        r = requests.post(url, headers=headers, params=payload,)

    def stock_not_availability(item, item_stock, access_token):
        url = "https://notify-api.line.me/api/notify"
        access_token = access_token
        headers = {'Authorization': 'Bearer ' + access_token}

        message = 'id:'+str(item[0].id)+'、商品名:「'+item[0].item_name+'」が「在庫なし」になりました。\
                    [出荷状況]'+item_stock
        payload = {'message': message}
        r = requests.post(url, headers=headers, params=payload,)

    def same(item, item_stock, access_token):
        url = "https://notify-api.line.me/api/notify"
        access_token = access_token
        headers = {'Authorization': 'Bearer ' + access_token}

        message = 'id:'+str(item[0].id)+'、商品名:「'+item[0].item_name+'の在庫状況は前回と同じです\
                    [出荷状況]'+item_stock
        payload = {'message': message}
        r = requests.post(url, headers=headers, params=payload,)


class UserListAPIView(generics.ListAPIView):
    queryset = User.objects.all().order_by("-id")
    serializer_class = UserSerializer

class UserDetailAPIview(generics.ListAPIView):
    serializer_class = UserSerializer

    def get_queryset(self):
        username = self.kwargs['username']
        return User.objects.filter(username=username).order_by("-id")


class ItemListCreateAPIView(generics.ListCreateAPIView):
    queryset = Item.objects.all().order_by("-id")
    serializer_class = ItemSerializer


class ItemDetailAPIview(generics.ListAPIView):
    serializer_class = ItemSerializer

    def get_queryset(self):
        username = self.kwargs['username']
        return Item.objects.filter(username=username).order_by("-id")



class Crud(View):
    def get(self, request):

        return HttpResponse('result')

    def post(self, request):

        post = json.loads(request.body.decode("utf-8", "ignore"))

        # id  = post['id']
        username    = post['username']
        item_name   = post['item_name']
        item_url    = post['item_url']
        stock       = 0
        scraping_at = '0'

        user = User.objects.get(username=username)

        id = user.id

        user_ob_id = User.objects.get(id=id)

        print("user_ob_id", user_ob_id)

        items = Item.objects.filter(item_name=item_name)
        print("items出力", items)
        if not items:
            user_ob_id.item_set.create(
                item_name   = item_name,
                item_url    = item_url,
                stock       = stock,
                scraping_at = scraping_at,
            )
        else:
            items[0].item_name = item_name
            items[0].item_url  = item_url
            items[0].stock     = stock
            items[0].save()

        url = items[0].item_url

        items[0].stock = scraping(url)[0]

        print("スクレイピング完了")

        now = datetime.datetime.now(pytz.timezone('Asia/Tokyo'))
        print("now出力", now.strftime('%Y-%m-%d %H:%M:%S'))
        items[0].scraping_at = now.strftime('%Y-%m-%d %H:%M:%S')

        print("items[0].scraping_at出力", items[0].scraping_at)

        items[0].save()

        return JsonResponse(post)


    def put(self, request):

        put = json.loads(request.body.decode("utf-8", "ignore"))
        put_id = put['id']

        put_object = Item.objects.filter(id=put_id)[0]
        print("put_object出力", put_object)


        put_object.item_name = put['item_name']
        put_object.item_url  = put['item_url']
        put_object.stock     = put['stock']

        print("item_name出力", put_object.item_name)

        put_object.save()

        print("put_object出力", put_object)

        return JsonResponse(put)

    def delete(self, request):

        delete = json.loads(request.body.decode("utf-8", "ignore"))
        delete_id = delete['id']

        delete_object = Item.objects.filter(id=delete_id)[0]

        delete_object.delete()


        return JsonResponse(delete)


class LineSetting(View):
    def get(self, request):

        return HttpResponse('result')

    def post(self, request):

        post = json.loads(request.body.decode("utf-8", "ignore"))

        id         = post['id']
        line_token = post['line_token']

        user_ob_id = User.objects.get(id=id)

        line_access_token_ob = LineAccessToken.objects.filter(username=id)
        print("line_access_token_ob出力", line_access_token_ob)
        if not line_access_token_ob:
            user_ob_id.lineaccesstoken_set.create(
                line_access_token = line_token,
            )
        else:
            line_access_token_ob[0].line_access_token = line_token
            line_access_token_ob[0].save()

        print("line_access_token_ob[0]出力", line_access_token_ob[0])
        print("line_access_token_ob[0].id出力", line_access_token_ob[0].id)
        print("line_access_token_ob[0].line_access_token出力", line_access_token_ob[0].line_access_token)
        print("line_access_token_ob[0].username出力", line_access_token_ob[0].username)

        url = "https://notify-api.line.me/api/notify"
        access_token = line_token
        headers = {'Authorization': 'Bearer ' + access_token}

        message = "Line通知設定が完了しました"
        payload = {'message': message}
        r = requests.post(url, headers=headers, params=payload,)

        return HttpResponse('result')


class LineAccessTokenViewSet(viewsets.ModelViewSet):
    queryset = LineAccessToken.objects.all().order_by("-id")
    serializer_class = LineAccessTokenSerializer

class CloudwatchEventViewSet(viewsets.ModelViewSet):
    queryset = CloudwatchEvent.objects.all().order_by("-id")
    serializer_class = CloudwatchEventSerializer


class CloudwatchEventListCreateAPIView(generics.ListCreateAPIView):
    queryset = CloudwatchEvent.objects.all().order_by("-id")
    serializer_class = CloudwatchEventSerializer


class CloudwatchEventDetailAPIview(generics.ListAPIView):
    serializer_class = CloudwatchEventSerializer

    def get_queryset(self):
        item = self.kwargs['item']
        return CloudwatchEvent.objects.filter(item=item).order_by("-id")

class CloudwatchEventView(View):
    def get(self, request):

        return HttpResponse('result')

    def post(self, request):

        post = json.loads(request.body.decode("utf-8", "ignore"))

        item_id               = post['item_id']
        cloudwatch_event_name = post['cloudwatch_event_name']
        minute                = post['minute']
        hour                  = post['hour']
        day                   = post['day']
        weekday               = post['weekday']
        state                 = post['state']

        item_ob_id = Item.objects.get(id=item_id)

        cloudwatch_event_ob = CloudwatchEvent.objects.filter(item=item_id)
        print("cloudwatch_event_ob出力", cloudwatch_event_ob)
        if not cloudwatch_event_ob:
            item_ob_id.cloudwatchevent_set.create(
                cloudwatch_event_name = cloudwatch_event_name,
                minute                = minute,
                hour                  = hour,
                day                   = day,
                weekday               = weekday,
                state                 = state
            )
        else:
            cloudwatch_event_ob[0].cloudwatch_event_name = cloudwatch_event_name
            cloudwatch_event_ob[0].minute                = minute
            cloudwatch_event_ob[0].hour                  = hour
            cloudwatch_event_ob[0].day                   = day
            cloudwatch_event_ob[0].weekday               = weekday
            cloudwatch_event_ob[0].state                 = state
            cloudwatch_event_ob[0].save()

        print("cloudwatch_event_ob出力", cloudwatch_event_ob)
        print("cloudwatch_event_ob[0]出力", cloudwatch_event_ob[0])
        print("cloudwatch_event_ob[0].cloudwatch_event_name出力", cloudwatch_event_ob[0].cloudwatch_event_name)
        print("cloudwatch_event_ob[0].minute出力", cloudwatch_event_ob[0].minute)
        print("cloudwatch_event_ob[0].hour出力", cloudwatch_event_ob[0].hour)
        print("cloudwatch_event_ob[0].day出力", cloudwatch_event_ob[0].day)
        print("cloudwatch_event_ob[0].weekday出力", cloudwatch_event_ob[0].weekday)
        print("cloudwatch_event_ob[0].state出力", cloudwatch_event_ob[0].state)


        print("item_ob_id.username出力", item_ob_id.username)
        print("item_ob_id.username.username出力", item_ob_id.username.username)
        lambda_cloudwatch_event_endpoint = "https://nfs4jnw0a0.execute-api.ap-northeast-1.amazonaws.com/default/test"

        payload = {
        "item_id": item_id,
        "item_name": item_ob_id.item_name,
        "item_url": item_ob_id.item_url,
        "username": item_ob_id.username.username,
        "cloudwatch_event_name" : cloudwatch_event_name,
        "minute"                : minute,
        "hour"                  : hour,
        "day"                   : day,
        "weekday"               : weekday,
        "state"                 : state
        }
# {
#   "item_id"               : $input.json("$.item_id"),
#   "item_name"             : $input.json("$.item_name"),
#   "item_url"              : $input.json("$.item_url"),
#   "username"              : $input.json("$.username"),
#   "cloudwatch_event_name" : $input.json("$.cloudwatch_event_name"),
#   "minute"                : $input.json("$.minute"),
#   "hour"                  : $input.json("$.hour"),
#   "day"                   : $input.json("$.day"),
#   "weekday"               : $input.json("$.weekday"),
#   "state"                 : $input.json("$.state")
# }

        response = requests.post(
            lambda_cloudwatch_event_endpoint,
            data=json.dumps(payload)
        )

        data = response.json()
        print(json.dumps(data))

        # return {'statusCode': 200}

        return HttpResponse('result')



class CreateUser(View):
    def get(self, request):

        return HttpResponse('result')

    def post(self, request):

        post = json.loads(request.body.decode("utf-8", "ignore"))

        print("post出力", post)

        username = post["username"]
        email    = None
        password = post["password"]

        User.objects.create_user(username,email,password)

        return HttpResponse('result')



# スクレイピング

def scraping(url):

    t1 = time.time()

    # dir = "./bin/chromedriver_mac"
    dir = "./bin/chromedriver_latest"
    # dir = "/usr/local/bin/chromedriver"

    options = Options()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')

    driver = webdriver.Chrome(executable_path=dir, chrome_options=options)


    print('aaaaaaaaaaaaaaaaaaaaaa')
    try:
        driver.get(url)

        time.sleep(5)

        print('bbbbbbbbbbbbbbbbb')

        stock_pattern_words = r'(在庫あり|\d+(~|-|~|ー)\d+(日|週間|か月)以内に発送|残り\d+点)'

        html = driver.page_source.encode('utf-8')

        soup = BeautifulSoup(html, 'lxml')
        print('cccccccccccccccccc')
        availability = soup.find(id="availability")
        print('availability', availability, sep='\n')
        availability = availability.find('span')

        item_stock   = availability.get_text()
        print('ddddddddddddddd')

        print('item_stock', item_stock, sep='\n')

        if re.search(stock_pattern_words, item_stock):
            stock_availability = 1
            stock_availability = str(stock_availability)

            driver.close()

            driver.quit()

            t2 = time.time()

            elapsed_time = t2-t1
            print(f"経過時間:{elapsed_time:.1f}")
            print("stock_availability出力", stock_availability)

            return stock_availability, item_stock
        else:
            stock_not_availability = 0
            stock_not_availability = str(stock_not_availability)

            driver.close()

            driver.quit()

            t2 = time.time()

            elapsed_time = t2-t1
            print(f"経過時間:{elapsed_time:.1f}")
            print("stock_not_availability出力", stock_not_availability)


            return stock_not_availability, item_stock

    except ZeroDivisionError:
        driver.quit()
        print('エラーが出たので、chromeを終了しました')

backend/myapp/models.py

backend/myapp/models.py
from django.db import models
from django.contrib.auth.models import User

class Item(models.Model):
    item_name = models.CharField(max_length=500)
    item_url = models.URLField(max_length=500)
    stock = models.IntegerField()
    created_at = models.DateField(auto_now_add=True)
    scraping_at = models.CharField(max_length=500)
    username = models.ForeignKey(User, on_delete=models.CASCADE)

    def __str__(self):
        return self.item_name

class LineAccessToken(models.Model):
    line_access_token = models.CharField(max_length=500)
    username = models.ForeignKey(User, on_delete=models.CASCADE)

class CloudwatchEvent(models.Model):
    cloudwatch_event_name = models.CharField(max_length=500)
    minute                = models.CharField(max_length=500)
    hour                  = models.CharField(max_length=500)
    day                   = models.CharField(max_length=500)
    weekday               = models.CharField(max_length=500)
    state                 = models.CharField(max_length=500)
    item                  = models.ForeignKey(Item, on_delete=models.CASCADE)


ForeignKeyについて

【Django】ForeignKeyの基本から応用まで実例付で解説

https://itc.tokyo/django/foreignkey/

Djangoで関連テーブルにレコードを追加する方法

https://intellectual-curiosity.tokyo/2019/07/23/djangoで関連テーブルにレコードを追加する方法/amp/

ForeignKeyでレコードを追加する場合
user_ob_id.item_set.create(
を使う

class Article(models.Model):
    reporter = models.ForeignKey(Reporter, on_delete=models.CASCADE)
↓
reporter.article_setが利用できる
↓
reporter.article_set.create()
class Crud(View):
    def get(self, request):

        return HttpResponse('result')

    def post(self, request):

        post = json.loads(request.body.decode("utf-8", "ignore"))

        # id  = post['id']
        username    = post['username']
        item_name   = post['item_name']
        item_url    = post['item_url']
        stock       = 0
        scraping_at = '0'

        user = User.objects.get(username=username)

        id = user.id

        user_ob_id = User.objects.get(id=id)

        print("user_ob_id", user_ob_id)

        items = Item.objects.filter(item_name=item_name)
        print("items出力", items)
        if not items:
            user_ob_id.item_set.create(
                item_name   = item_name,
                item_url    = item_url,
                stock       = stock,
                scraping_at = scraping_at,
            )
        else:
            items[0].item_name = item_name
            items[0].item_url  = item_url
            items[0].stock     = stock
            items[0].save()

スクレイピング

1.binフォルダにchromedriver置く(os合わせる、chromeとバージョン合わせる)
2.optionは以下だけで動いた

options = Options()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')

    driver = webdriver.Chrome(executable_path=dir, chrome_options=options)
def scraping(url):

    t1 = time.time()

    # dir = "./bin/chromedriver_mac"
    dir = "./bin/chromedriver_latest"
    # dir = "/usr/local/bin/chromedriver"

    options = Options()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')

    driver = webdriver.Chrome(executable_path=dir, chrome_options=options)


    print('aaaaaaaaaaaaaaaaaaaaaa')
    try:
        driver.get(url)

        time.sleep(5)

        print('bbbbbbbbbbbbbbbbb')

        stock_pattern_words = r'(在庫あり|\d+(~|-|~|ー)\d+(日|週間|か月)以内に発送|残り\d+点)'

        html = driver.page_source.encode('utf-8')

        soup = BeautifulSoup(html, 'lxml')
        print('cccccccccccccccccc')
        availability = soup.find(id="availability")
        print('availability', availability, sep='\n')
        availability = availability.find('span')

        item_stock   = availability.get_text()
        print('ddddddddddddddd')

        print('item_stock', item_stock, sep='\n')

        if re.search(stock_pattern_words, item_stock):
            stock_availability = 1
            stock_availability = str(stock_availability)

            driver.close()

            driver.quit()

            t2 = time.time()

            elapsed_time = t2-t1
            print(f"経過時間:{elapsed_time:.1f}")
            print("stock_availability出力", stock_availability)

            return stock_availability, item_stock
        else:
            stock_not_availability = 0
            stock_not_availability = str(stock_not_availability)

            driver.close()

            driver.quit()

            t2 = time.time()

            elapsed_time = t2-t1
            print(f"経過時間:{elapsed_time:.1f}")
            print("stock_not_availability出力", stock_not_availability)


            return stock_not_availability, item_stock

    except ZeroDivisionError:
        driver.quit()
        print('エラーが出たので、chromeを終了しました')