Browse Source

first commit

master
stephen.yu 10 months ago
commit
5063429ab8
  1. 8
      .idea/.gitignore
  2. 14
      .idea/RecipeCrawler.iml
  3. 6
      .idea/inspectionProfiles/profiles_settings.xml
  4. 7
      .idea/misc.xml
  5. 6
      .idea/vcs.xml
  6. 0
      README.md
  7. 0
      Recipe/__init__.py
  8. 3
      Recipe/admin.py
  9. 6
      Recipe/apps.py
  10. 57
      Recipe/migrations/0001_initial.py
  11. 18
      Recipe/migrations/0002_dish_insoluble_fiber.py
  12. 158
      Recipe/migrations/0003_alter_dish_ca_alter_dish_calories_and_more.py
  13. 0
      Recipe/migrations/__init__.py
  14. 45
      Recipe/models.py
  15. 3
      Recipe/tests.py
  16. 336
      Recipe/views.py
  17. 0
      RecipeCrawler/__init__.py
  18. 16
      RecipeCrawler/asgi.py
  19. 125
      RecipeCrawler/settings.py
  20. 24
      RecipeCrawler/urls.py
  21. 16
      RecipeCrawler/wsgi.py
  22. 23
      manage.py
  23. 24
      recipe.py

8
.idea/.gitignore vendored

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

14
.idea/RecipeCrawler.iml

@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<module version="4">
<component name="PyDocumentationSettings">
<option name="format" value="PLAIN" />
<option name="myDocStringFormat" value="Plain" />
</component>
<component name="TemplatesService">
<option name="TEMPLATE_FOLDERS">
<list>
<option value="$MODULE_DIR$/templates" />
</list>
</option>
</component>
</module>

6
.idea/inspectionProfiles/profiles_settings.xml

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

7
.idea/misc.xml

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Python 3.11 (RecipeCrawler)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (RecipeCrawler)" project-jdk-type="Python SDK" />
</project>

6
.idea/vcs.xml

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

0
Recipe/__init__.py

3
Recipe/admin.py

@ -0,0 +1,3 @@
from django.contrib import admin
# Register your models here.

6
Recipe/apps.py

@ -0,0 +1,6 @@
from django.apps import AppConfig
class RecipeConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'Recipe'

57
Recipe/migrations/0001_initial.py

@ -0,0 +1,57 @@
# Generated by Django 5.0.2 on 2024-03-05 08:14
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='Dish',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=255, unique=True)),
('image', models.TextField()),
('likes', models.IntegerField(default=0)),
('tags', models.TextField()),
('indications', models.TextField()),
('Calories', models.CharField(max_length=255, unique=True)),
('Salt', models.CharField(max_length=255, unique=True)),
('Protein', models.CharField(max_length=255, unique=True)),
('Total_fat', models.CharField(max_length=255, unique=True)),
('Total_Carbohydrate', models.CharField(max_length=255, unique=True)),
('Total_sugar', models.CharField(max_length=255, unique=True)),
('Dietary_fiber', models.CharField(max_length=255, unique=True)),
('Soluble_fiber', models.CharField(max_length=255, unique=True)),
('K', models.CharField(max_length=255, unique=True)),
('Ca', models.CharField(max_length=255, unique=True)),
('Mg', models.CharField(max_length=255, unique=True)),
('P', models.CharField(max_length=255, unique=True)),
('Fe', models.CharField(max_length=255, unique=True)),
('Zn', models.CharField(max_length=255, unique=True)),
('I', models.CharField(max_length=255, unique=True)),
('Cholesterol', models.CharField(max_length=255, unique=True)),
('Vitamin_B1', models.CharField(max_length=255, unique=True)),
('Vitamin_B2', models.CharField(max_length=255, unique=True)),
('Vitamin_C', models.CharField(max_length=255, unique=True)),
('Vitamin_B6', models.CharField(max_length=255, unique=True)),
('Vitamin_B12', models.CharField(max_length=255, unique=True)),
('Folate', models.CharField(max_length=255, unique=True)),
('Vitamin_A', models.CharField(max_length=255, unique=True)),
('Vitamin_D', models.CharField(max_length=255, unique=True)),
('Vitamin_K', models.CharField(max_length=255, unique=True)),
('Vitamin_E', models.CharField(max_length=255, unique=True)),
('Saturated_fatty_acid', models.CharField(max_length=255, unique=True)),
('Monounsaturated_fatty_acid', models.CharField(max_length=255, unique=True)),
('Polyunsaturated_fatty_acid', models.CharField(max_length=255, unique=True)),
('Ingredients', models.TextField()),
('Steps', models.JSONField()),
('Step_images_Base64', models.JSONField()),
],
),
]

18
Recipe/migrations/0002_dish_insoluble_fiber.py

@ -0,0 +1,18 @@
# Generated by Django 5.0.2 on 2024-03-06 06:06
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('Recipe', '0001_initial'),
]
operations = [
migrations.AddField(
model_name='dish',
name='Insoluble_fiber',
field=models.CharField(blank=True, max_length=255, null=True),
),
]

158
Recipe/migrations/0003_alter_dish_ca_alter_dish_calories_and_more.py

@ -0,0 +1,158 @@
# Generated by Django 5.0.2 on 2024-03-06 06:23
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('Recipe', '0002_dish_insoluble_fiber'),
]
operations = [
migrations.AlterField(
model_name='dish',
name='Ca',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Calories',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Cholesterol',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Dietary_fiber',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Fe',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Folate',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='I',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='K',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Mg',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Monounsaturated_fatty_acid',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='P',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Polyunsaturated_fatty_acid',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Protein',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Salt',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Saturated_fatty_acid',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Soluble_fiber',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Total_Carbohydrate',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Total_fat',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Total_sugar',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Vitamin_A',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Vitamin_B1',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Vitamin_B12',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Vitamin_B2',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Vitamin_B6',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Vitamin_C',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Vitamin_D',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Vitamin_E',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Vitamin_K',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='dish',
name='Zn',
field=models.CharField(blank=True, max_length=255, null=True),
),
]

0
Recipe/migrations/__init__.py

45
Recipe/models.py

@ -0,0 +1,45 @@
from django.db import models
from django.contrib.postgres.fields import JSONField
# Create your models here.
class Dish(models.Model):
name = models.CharField(max_length=255, unique=True) # 假设每个菜名是唯一的
image = models.TextField() # 存储图片的 Base64 编码
likes = models.IntegerField(default=0)
tags = models.TextField() # 可以存储标签列表,例如以逗号分隔的字符串
indications = models.TextField()
Calories = models.CharField(max_length=255, unique=False, blank=True, null=True)
Salt = models.CharField(max_length=255, unique=False, blank=True, null=True)
Protein = models.CharField(max_length=255, unique=False, blank=True, null=True)
Total_fat = models.CharField(max_length=255, unique=False, blank=True, null=True)
Total_Carbohydrate = models.CharField(max_length=255, unique=False, blank=True, null=True)
Total_sugar = models.CharField(max_length=255, unique=False, blank=True, null=True)
Dietary_fiber = models.CharField(max_length=255, unique=False, blank=True, null=True)
Soluble_fiber = models.CharField(max_length=255, unique=False, blank=True, null=True)
Insoluble_fiber = models.CharField(max_length=255, unique=False, blank=True, null=True)
K = models.CharField(max_length=255, unique=False, blank=True, null=True)
Ca = models.CharField(max_length=255, unique=False, blank=True, null=True)
Mg = models.CharField(max_length=255, unique=False, blank=True, null=True)
P = models.CharField(max_length=255, unique=False, blank=True, null=True)
Fe = models.CharField(max_length=255, unique=False, blank=True, null=True)
Zn = models.CharField(max_length=255, unique=False, blank=True, null=True)
I = models.CharField(max_length=255, unique=False, blank=True, null=True)
Cholesterol = models.CharField(max_length=255, unique=False, blank=True, null=True)
Vitamin_B1 = models.CharField(max_length=255, unique=False, blank=True, null=True)
Vitamin_B2 = models.CharField(max_length=255, unique=False, blank=True, null=True)
Vitamin_C = models.CharField(max_length=255, unique=False, blank=True, null=True)
Vitamin_B6 = models.CharField(max_length=255, unique=False, blank=True, null=True)
Vitamin_B12 = models.CharField(max_length=255, unique=False, blank=True, null=True)
Folate = models.CharField(max_length=255, unique=False, blank=True, null=True)
Vitamin_A = models.CharField(max_length=255, unique=False, blank=True, null=True)
Vitamin_D = models.CharField(max_length=255, unique=False, blank=True, null=True)
Vitamin_K = models.CharField(max_length=255, unique=False, blank=True, null=True)
Vitamin_E = models.CharField(max_length=255, unique=False, blank=True, null=True)
Saturated_fatty_acid = models.CharField(max_length=255, unique=False, blank=True, null=True)
Monounsaturated_fatty_acid = models.CharField(max_length=255, unique=False, blank=True, null=True)
Polyunsaturated_fatty_acid = models.CharField(max_length=255, unique=False, blank=True, null=True)
Ingredients = models.TextField()
Steps = models.JSONField()
Step_images_Base64 = models.JSONField()

3
Recipe/tests.py

@ -0,0 +1,3 @@
from django.test import TestCase
# Create your tests here.

336
Recipe/views.py

@ -0,0 +1,336 @@
import asyncio
import base64
import pandas as pd
from pandas import DataFrame
from playwright.async_api import Playwright, async_playwright, Error
from asgiref.sync import sync_to_async
import time
from django.http import HttpResponseBadRequest, HttpResponse
from django.views import View
from Recipe.models import Dish
class RecipeCrawlerView(View):
def get(self, request):
sync_main()
return HttpResponse("Success!", content_type="text/plain")
# 因應django ORM,以同步函數執行非同步程式
def sync_main():
# 使用sync_to_async包装器調用非同步函數
asyncio.run(main())
async def save_to_db(dish_data):
# 将异步ORM操作转换为同步,以适应Django ORM
dish, created = await sync_to_async(Dish.objects.update_or_create)(
name=dish_data['name'],
defaults=dish_data
)
action = "added" if created else "updated"
print(f"Dish '{dish.name}' was {action}.")
# 將圖片轉為base64
async def fetch_image_as_base64(page, image_url):
print("Fetching image from URL:", image_url)
try:
response = await page.request.get(image_url)
if response.ok:
image_data = await response.body()
image_base64 = base64.b64encode(image_data).decode()
return image_base64
except Exception as e:
print(f"Error fetching image: {e}")
return None
async def run(playwright: Playwright):
browser = await playwright.chromium.launch(headless=False)
context = await browser.new_context()
page = await context.new_page()
await page.goto("https://oishi-kenko.com/recipes")
await page.get_by_role("link", name="ログイン").click()
await page.get_by_role("link", name="メールアドレス でログイン").click()
await page.locator("#secure_account_credential_email").click()
await page.locator("#secure_account_credential_email").fill("asd851117005545@gmail.com")
await page.locator("#secure_account_credential_email").press("Tab")
await page.locator("#secure_account_credential_password").fill("a22897051")
await page.get_by_role("button", name="ログイン").click()
await page.goto("https://oishi-kenko.com/recipes")
max_retries = 3
while True:
# 訂位到所有匹配連結
links = page.locator('a.p-recipe-list-item__title-link')
# 獲取連結數量
link_count = await links.count()
# 點擊每個連結
for i in range(link_count):
retry_count = 0 # 设置重试次数计数器
while retry_count < 3: # 假设最多重试3次
# 使用 nth(i) 定位第 i 個元素,並點擊
try:
await page.locator('a.p-recipe-list-item__title-link').nth(i).click()
# 等待頁面
await page.wait_for_load_state('networkidle')
# await asyncio.sleep(5)
print("------菜名-----")
# 輸出名稱
dishname = await page.text_content('.p-recipe-detail__title')
print(dishname)
dishname_clean = dishname.strip().replace('\n', '')
# 菜名圖片
image_element = page.locator('.p-recipe-detail__photo-image--pc-only')
# 从元素的 'src' 属性中获取图片的 URL
image_url = await image_element.get_attribute('src')
# 确保获取到的 URL 不为空
if image_url:
# 获取图片的 Base64 编码
Dish_image_base64 = await fetch_image_as_base64(page, image_url)
else:
Dish_image_base64 = None
# print("------按讚數-----")
# 輸出按讚數
Like_count = await page.text_content('.c-button-circle__top-text')
# print(Likes_count)
Likes_count_clean = Like_count.strip().replace('\n', '')
# print("------標籤-----")
# 輸出標籤
tags = await page.locator('.c-button-round-tag__link').all_text_contents()
# for tag in tags:
# print(tag)
tags_clean = [tag.strip() for tag in tags]
# print("------適應症-----")
# 輸出適應症
Indications = await page.locator('.c-recipes-relevant-dietary-concerns__text').all_text_contents()
# for Indication in Indications:
# print(Indication)
Indications_clean = [Indication.strip() for Indication in Indications]
# print("------營養標示-----")
# 輸出營養標示
# 各營養標示分類
Calorie = ''
Salt = ''
Protein = ''
Fat = ''
Carbohydrate = ''
Sugar = ''
Dietary_fiber = ''
Soluble_fiber = ''
Insoluble_fiber = ''
Potassium = ''
Calcium = ''
Magnesium = ''
Phosphorous = ''
Iron = ''
Zinc = ''
Iodine = ''
Cholesterol = ''
Vitamin_B1 = ''
Vitamin_B2 = ''
Vitamin_C = ''
Vitamin_B6 = ''
Vitamin_B12 = ''
Folate = ''
Vitamin_A = ''
Vitamin_D = ''
Vitamin_K = ''
Vitamin_E = ''
Saturated_fatty_acid = ''
Monounsaturated_fatty_acid = ''
Polyunsaturated_fatty_acid = ''
nutritions1 = await page.locator('.c-nutrition-table__cell--1').all_text_contents()
# for nutrition in nutritions1:
# print(nutrition)
# nutritions_clean = [nutrition.strip().replace('\n', '') for nutrition in nutritions]
for nutrition1 in nutritions1:
if 'エネルギー' in nutrition1:
Calorie = nutrition1.split('エネルギー')[1].strip().replace('\n', '')
if '食塩相当量' in nutrition1:
Salt = nutrition1.split('食塩相当量')[1].strip().replace('\n', '')
if 'たんぱく質' in nutrition1:
Protein = nutrition1.split('たんぱく質')[1].strip().replace('\n', '')
if '脂質' in nutrition1:
Fat = nutrition1.split('脂質')[1].strip().replace('\n', '')
if '炭水化物' in nutrition1:
Carbohydrate = nutrition1.split('炭水化物')[1].strip().replace('\n', '')
if '糖質' in nutrition1:
Sugar = nutrition1.split('糖質')[1].strip().replace('\n', '')
if '食物繊維' in nutrition1:
Dietary_fiber = nutrition1.split('食物繊維')[1].strip().replace('\n', '')
if '水溶性食物繊維' in nutrition1:
Soluble_fiber = nutrition1.split('水溶性食物繊維')[1].strip().replace('\n', '')
if '不溶性食物繊維' in nutrition1:
Insoluble_fiber = nutrition1.split('不溶性食物繊維')[1].strip().replace('\n', '')
if 'カリウム' in nutrition1:
Potassium = nutrition1.split('カリウム')[1].strip().replace('\n', '')
nutritions2 = await page.locator('.c-nutrition-table__cell--2').all_text_contents()
for nutrition2 in nutritions2:
if 'カルシウム' in nutrition2:
Calcium = nutrition2.split('カルシウム')[1].strip().replace('\n', '')
if 'マグネシウム' in nutrition2:
Magnesium = nutrition2.split('マグネシウム')[1].strip().replace('\n', '')
if 'リン' in nutrition2:
Phosphorous = nutrition2.split('リン')[1].strip().replace('\n', '')
if '' in nutrition2:
Iron = nutrition2.split('')[1].strip().replace('\n', '')
if '亜鉛' in nutrition2:
Zinc = nutrition2.split('亜鉛')[1].strip().replace('\n', '')
if 'ヨウ素' in nutrition2:
Iodine = nutrition2.split('ヨウ素')[1].strip().replace('\n', '')
if 'コレステロール' in nutrition2:
Cholesterol = nutrition2.split('コレステロール')[1].strip().replace('\n', '')
if 'ビタミンB1' in nutrition2:
Vitamin_B1 = nutrition2.split('ビタミンB1')[1].strip().replace('\n', '')
if 'ビタミンB2' in nutrition2:
Vitamin_B2 = nutrition2.split('ビタミンB2')[1].strip().replace('\n', '')
if 'ビタミンC' in nutrition2:
Vitamin_C = nutrition2.split('ビタミンC')[1].strip().replace('\n', '')
nutritions3 = await page.locator('.c-nutrition-table__cell--3').all_text_contents()
for nutrition3 in nutritions3:
if 'ビタミンB6' in nutrition3:
Vitamin_B6 = nutrition3.split('ビタミンB6')[1].strip().replace('\n', '')
if 'ビタミンB12' in nutrition3:
Vitamin_B12 = nutrition3.split('ビタミンB12')[1].strip().replace('\n', '')
if '葉酸' in nutrition3:
Folate = nutrition3.split('葉酸')[1].strip().replace('\n', '')
if 'ビタミンA' in nutrition3:
Vitamin_A = nutrition3.split('ビタミンA')[1].strip().replace('\n', '')
if 'ビタミンD' in nutrition3:
Vitamin_D = nutrition3.split('ビタミンD')[1].strip().replace('\n', '')
if 'ビタミンK' in nutrition3:
Vitamin_K = nutrition3.split('ビタミンK')[1].strip().replace('\n', '')
if 'ビタミンE' in nutrition3:
Vitamin_E = nutrition3.split('ビタミンE')[1].strip().replace('\n', '')
if '飽和脂肪酸' in nutrition3:
Saturated_fatty_acid = nutrition3.split('飽和脂肪酸')[1].strip().replace('\n', '')
if '一価不飽和脂肪酸' in nutrition3:
Monounsaturated_fatty_acid = nutrition3.split('一価不飽和脂肪酸')[1].strip().replace('\n', '')
if '多価不飽和脂肪酸' in nutrition3:
Polyunsaturated_fatty_acid = nutrition3.split('多価不飽和脂肪酸')[1].strip().replace('\n', '')
# 食料
Ingredients = await page.locator('.p-recipe-ingredient-list__item').all_text_contents()
# for Ingredient in Ingredients:
# print(Ingredient)
Ingredients_clean = [Ingredient.strip().replace('\n', '') for Ingredient in Ingredients]
print('------作法步驟-----')
# 作法
Steps = await page.locator('.p-recipe-step__item').all_text_contents()
# for Step in Steps:
# print(Step)
Steps_clean = [Step.strip().replace('\n', '') for Step in Steps]
# 定位到所有步骤的图片元素
image_elements = page.locator('.p-recipe-step__item-image')
# 获取所有图片元素的 src 属性(即图片的 URL)
image_urls = await image_elements.evaluate_all("elements => elements.map(e => e.getAttribute('src'))")
# 遍历图片 URL 列表,下载图片并转换为 Base64
Step_images_base64 = []
for image_url in image_urls:
# 直接使用图片的 URL 下载图片并转换为 Base64
image_base64 = await fetch_image_as_base64(page, image_url)
if image_base64:
Step_images_base64.append(image_base64)
dish_data = {
'name': dishname_clean,
'image': Dish_image_base64,
'likes': Likes_count_clean,
'tags': ", ".join(tags_clean),
'indications': ", ".join(Indications_clean),
'Calories': Calorie,
'Salt': Salt,
'Protein': Protein,
'Total_fat': Fat,
'Total_Carbohydrate': Carbohydrate,
'Total_sugar': Sugar,
'Dietary_fiber': Dietary_fiber,
'Soluble_fiber': Soluble_fiber,
'Insoluble_fiber': Insoluble_fiber,
'K': Potassium,
'Ca': Calcium,
'Mg': Magnesium,
'P': Phosphorous,
'Fe': Iron,
'Zn': Zinc,
'I': Iodine,
'Cholesterol': Cholesterol,
'Vitamin_B1': Vitamin_B1,
'Vitamin_B2': Vitamin_B2,
'Vitamin_C': Vitamin_C,
'Vitamin_B6': Vitamin_B6,
'Vitamin_B12': Vitamin_B12,
'Folate': Folate,
'Vitamin_A': Vitamin_A,
'Vitamin_D': Vitamin_D,
'Vitamin_K': Vitamin_K,
'Vitamin_E': Vitamin_E,
'Saturated_fatty_acid': Saturated_fatty_acid,
'Monounsaturated_fatty_acid': Monounsaturated_fatty_acid,
'Polyunsaturated_fatty_acid': Polyunsaturated_fatty_acid,
'Ingredients': ", ".join(Ingredients_clean),
'Steps': Steps_clean,
'Step_images_Base64': Step_images_base64
}
await save_to_db(dish_data)
break
except Exception as e: # 捕获可能发生的异常
print(f"遇到错误:{e},尝试返回并重试")
await page.go_back() # 返回前一页
retry_count += 1 # 重试计数器加1
if retry_count >= 3:
print("重试次数超限,跳过当前链接")
break # 跳出循环,处理下一个链接
# 使用浏览器的后退功能返回列表页,这样不需要重新加载初始URL
await page.go_back()
try:
await page.click('span.next a[rel="next"]')
await page.wait_for_load_state('networkidle')
except Error:
# 如果“下一頁”不存在,break
break
# ---------------------
await context.close()
await browser.close()
async def main() -> None:
async with async_playwright() as playwright:
await run(playwright)
# asyncio.run(main())
# async def simple_test():
# print("Simple async test")
#
# asyncio.run(simple_test())

0
RecipeCrawler/__init__.py

16
RecipeCrawler/asgi.py

@ -0,0 +1,16 @@
"""
ASGI config for RecipeCrawler project.
It exposes the ASGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/5.0/howto/deployment/asgi/
"""
import os
from django.core.asgi import get_asgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'RecipeCrawler.settings')
application = get_asgi_application()

125
RecipeCrawler/settings.py

@ -0,0 +1,125 @@
"""
Django settings for RecipeCrawler project.
Generated by 'django-admin startproject' using Django 5.0.2.
For more information on this file, see
https://docs.djangoproject.com/en/5.0/topics/settings/
For the full list of settings and their values, see
https://docs.djangoproject.com/en/5.0/ref/settings/
"""
from pathlib import Path
# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/5.0/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = 'django-insecure-89j!e^jyf!ak#t!2oxzwbk^%fmhljxi%w*epobnrz^k-*&+!wr'
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True
ALLOWED_HOSTS = []
# Application definition
INSTALLED_APPS = [
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'Recipe',
]
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
]
ROOT_URLCONF = 'RecipeCrawler.urls'
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [BASE_DIR / 'templates']
,
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
],
},
},
]
WSGI_APPLICATION = 'RecipeCrawler.wsgi.application'
# Database
# https://docs.djangoproject.com/en/5.0/ref/settings/#databases
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': BASE_DIR / 'recipe_db.sqlite3',
}
}
# Password validation
# https://docs.djangoproject.com/en/5.0/ref/settings/#auth-password-validators
AUTH_PASSWORD_VALIDATORS = [
{
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
},
]
# Internationalization
# https://docs.djangoproject.com/en/5.0/topics/i18n/
LANGUAGE_CODE = 'en-us'
TIME_ZONE = 'UTC'
USE_I18N = True
USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/5.0/howto/static-files/
STATIC_URL = 'static/'
# Default primary key field type
# https://docs.djangoproject.com/en/5.0/ref/settings/#default-auto-field
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'

24
RecipeCrawler/urls.py

@ -0,0 +1,24 @@
"""
URL configuration for RecipeCrawler project.
The `urlpatterns` list routes URLs to views. For more information please see:
https://docs.djangoproject.com/en/5.0/topics/http/urls/
Examples:
Function views
1. Add an import: from my_app import views
2. Add a URL to urlpatterns: path('', views.home, name='home')
Class-based views
1. Add an import: from other_app.views import Home
2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
Including another URLconf
1. Import the include() function: from django.urls import include, path
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
"""
from django.contrib import admin
from django.urls import path
from Recipe import views as recipe_views
urlpatterns = [
path('start/', recipe_views.RecipeCrawlerView.as_view()),
path('admin/', admin.site.urls),
]

16
RecipeCrawler/wsgi.py

@ -0,0 +1,16 @@
"""
WSGI config for RecipeCrawler project.
It exposes the WSGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/5.0/howto/deployment/wsgi/
"""
import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'RecipeCrawler.settings')
application = get_wsgi_application()

23
manage.py

@ -0,0 +1,23 @@
#!/usr/bin/env python
"""Django's command-line utility for administrative tasks."""
import os
import sys
import django
def main():
"""Run administrative tasks."""
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'RecipeCrawler.settings')
try:
from django.core.management import execute_from_command_line
except ImportError as exc:
raise ImportError(
"Couldn't import Django. Are you sure it's installed and "
"available on your PYTHONPATH environment variable? Did you "
"forget to activate a virtual environment?"
) from exc
execute_from_command_line(sys.argv)
if __name__ == '__main__':
main()

24
recipe.py

@ -0,0 +1,24 @@
from playwright.sync_api import Playwright, sync_playwright, expect
def run(playwright: Playwright) -> None:
browser = playwright.chromium.launch(headless=False)
context = browser.new_context()
page = context.new_page()
page.goto("https://oishi-kenko.com/recipes")
page.get_by_text("ねぎたっぷり 塩牛丼").click()
page.get_by_role("heading", name="ねぎたっぷり 塩牛丼").click()
page.get_by_role("heading", name="ねぎたっぷり 塩牛丼").click(button="right")
expect(page.get_by_role("article")).to_contain_text("ねぎたっぷり 塩牛丼")
page.get_by_role("article").locator("li").filter(has_text="糖尿病").locator("div").click()
expect(page.get_by_role("article")).to_contain_text("糖尿病")
page.get_by_role("link", name="高血圧", exact=True).click()
page.close()
# ---------------------
context.close()
browser.close()
with sync_playwright() as playwright:
run(playwright)
Loading…
Cancel
Save