first commit
This commit is contained in:
0
Recipe/__init__.py
Normal file
0
Recipe/__init__.py
Normal file
3
Recipe/admin.py
Normal file
3
Recipe/admin.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from django.contrib import admin
|
||||
|
||||
# Register your models here.
|
||||
6
Recipe/apps.py
Normal file
6
Recipe/apps.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class RecipeConfig(AppConfig):
|
||||
default_auto_field = 'django.db.models.BigAutoField'
|
||||
name = 'Recipe'
|
||||
57
Recipe/migrations/0001_initial.py
Normal file
57
Recipe/migrations/0001_initial.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# Generated by Django 5.0.2 on 2024-03-05 08:14
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Dish',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(max_length=255, unique=True)),
|
||||
('image', models.TextField()),
|
||||
('likes', models.IntegerField(default=0)),
|
||||
('tags', models.TextField()),
|
||||
('indications', models.TextField()),
|
||||
('Calories', models.CharField(max_length=255, unique=True)),
|
||||
('Salt', models.CharField(max_length=255, unique=True)),
|
||||
('Protein', models.CharField(max_length=255, unique=True)),
|
||||
('Total_fat', models.CharField(max_length=255, unique=True)),
|
||||
('Total_Carbohydrate', models.CharField(max_length=255, unique=True)),
|
||||
('Total_sugar', models.CharField(max_length=255, unique=True)),
|
||||
('Dietary_fiber', models.CharField(max_length=255, unique=True)),
|
||||
('Soluble_fiber', models.CharField(max_length=255, unique=True)),
|
||||
('K', models.CharField(max_length=255, unique=True)),
|
||||
('Ca', models.CharField(max_length=255, unique=True)),
|
||||
('Mg', models.CharField(max_length=255, unique=True)),
|
||||
('P', models.CharField(max_length=255, unique=True)),
|
||||
('Fe', models.CharField(max_length=255, unique=True)),
|
||||
('Zn', models.CharField(max_length=255, unique=True)),
|
||||
('I', models.CharField(max_length=255, unique=True)),
|
||||
('Cholesterol', models.CharField(max_length=255, unique=True)),
|
||||
('Vitamin_B1', models.CharField(max_length=255, unique=True)),
|
||||
('Vitamin_B2', models.CharField(max_length=255, unique=True)),
|
||||
('Vitamin_C', models.CharField(max_length=255, unique=True)),
|
||||
('Vitamin_B6', models.CharField(max_length=255, unique=True)),
|
||||
('Vitamin_B12', models.CharField(max_length=255, unique=True)),
|
||||
('Folate', models.CharField(max_length=255, unique=True)),
|
||||
('Vitamin_A', models.CharField(max_length=255, unique=True)),
|
||||
('Vitamin_D', models.CharField(max_length=255, unique=True)),
|
||||
('Vitamin_K', models.CharField(max_length=255, unique=True)),
|
||||
('Vitamin_E', models.CharField(max_length=255, unique=True)),
|
||||
('Saturated_fatty_acid', models.CharField(max_length=255, unique=True)),
|
||||
('Monounsaturated_fatty_acid', models.CharField(max_length=255, unique=True)),
|
||||
('Polyunsaturated_fatty_acid', models.CharField(max_length=255, unique=True)),
|
||||
('Ingredients', models.TextField()),
|
||||
('Steps', models.JSONField()),
|
||||
('Step_images_Base64', models.JSONField()),
|
||||
],
|
||||
),
|
||||
]
|
||||
18
Recipe/migrations/0002_dish_insoluble_fiber.py
Normal file
18
Recipe/migrations/0002_dish_insoluble_fiber.py
Normal file
@@ -0,0 +1,18 @@
|
||||
# Generated by Django 5.0.2 on 2024-03-06 06:06
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('Recipe', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='dish',
|
||||
name='Insoluble_fiber',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
]
|
||||
@@ -0,0 +1,158 @@
|
||||
# Generated by Django 5.0.2 on 2024-03-06 06:23
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('Recipe', '0002_dish_insoluble_fiber'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Ca',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Calories',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Cholesterol',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Dietary_fiber',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Fe',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Folate',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='I',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='K',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Mg',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Monounsaturated_fatty_acid',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='P',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Polyunsaturated_fatty_acid',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Protein',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Salt',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Saturated_fatty_acid',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Soluble_fiber',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Total_Carbohydrate',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Total_fat',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Total_sugar',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Vitamin_A',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Vitamin_B1',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Vitamin_B12',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Vitamin_B2',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Vitamin_B6',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Vitamin_C',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Vitamin_D',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Vitamin_E',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Vitamin_K',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='dish',
|
||||
name='Zn',
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
]
|
||||
0
Recipe/migrations/__init__.py
Normal file
0
Recipe/migrations/__init__.py
Normal file
45
Recipe/models.py
Normal file
45
Recipe/models.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from django.db import models
|
||||
from django.contrib.postgres.fields import JSONField
|
||||
|
||||
|
||||
|
||||
# Create your models here.
|
||||
class Dish(models.Model):
|
||||
name = models.CharField(max_length=255, unique=True) # 假设每个菜名是唯一的
|
||||
image = models.TextField() # 存储图片的 Base64 编码
|
||||
likes = models.IntegerField(default=0)
|
||||
tags = models.TextField() # 可以存储标签列表,例如以逗号分隔的字符串
|
||||
indications = models.TextField()
|
||||
Calories = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Salt = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Protein = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Total_fat = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Total_Carbohydrate = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Total_sugar = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Dietary_fiber = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Soluble_fiber = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Insoluble_fiber = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
K = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Ca = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Mg = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
P = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Fe = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Zn = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
I = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Cholesterol = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Vitamin_B1 = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Vitamin_B2 = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Vitamin_C = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Vitamin_B6 = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Vitamin_B12 = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Folate = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Vitamin_A = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Vitamin_D = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Vitamin_K = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Vitamin_E = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Saturated_fatty_acid = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Monounsaturated_fatty_acid = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Polyunsaturated_fatty_acid = models.CharField(max_length=255, unique=False, blank=True, null=True)
|
||||
Ingredients = models.TextField()
|
||||
Steps = models.JSONField()
|
||||
Step_images_Base64 = models.JSONField()
|
||||
3
Recipe/tests.py
Normal file
3
Recipe/tests.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from django.test import TestCase
|
||||
|
||||
# Create your tests here.
|
||||
336
Recipe/views.py
Normal file
336
Recipe/views.py
Normal file
@@ -0,0 +1,336 @@
|
||||
import asyncio
|
||||
import base64
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
from playwright.async_api import Playwright, async_playwright, Error
|
||||
from asgiref.sync import sync_to_async
|
||||
import time
|
||||
from django.http import HttpResponseBadRequest, HttpResponse
|
||||
from django.views import View
|
||||
from Recipe.models import Dish
|
||||
|
||||
|
||||
class RecipeCrawlerView(View):
|
||||
def get(self, request):
|
||||
sync_main()
|
||||
return HttpResponse("Success!", content_type="text/plain")
|
||||
|
||||
|
||||
# 因應django ORM,以同步函數執行非同步程式
|
||||
def sync_main():
|
||||
# 使用sync_to_async包装器調用非同步函數
|
||||
asyncio.run(main())
|
||||
|
||||
|
||||
async def save_to_db(dish_data):
|
||||
# 将异步ORM操作转换为同步,以适应Django ORM
|
||||
dish, created = await sync_to_async(Dish.objects.update_or_create)(
|
||||
name=dish_data['name'],
|
||||
defaults=dish_data
|
||||
)
|
||||
action = "added" if created else "updated"
|
||||
print(f"Dish '{dish.name}' was {action}.")
|
||||
|
||||
|
||||
# 將圖片轉為base64
|
||||
async def fetch_image_as_base64(page, image_url):
|
||||
print("Fetching image from URL:", image_url)
|
||||
try:
|
||||
response = await page.request.get(image_url)
|
||||
if response.ok:
|
||||
image_data = await response.body()
|
||||
image_base64 = base64.b64encode(image_data).decode()
|
||||
return image_base64
|
||||
except Exception as e:
|
||||
print(f"Error fetching image: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def run(playwright: Playwright):
|
||||
browser = await playwright.chromium.launch(headless=False)
|
||||
context = await browser.new_context()
|
||||
page = await context.new_page()
|
||||
await page.goto("https://oishi-kenko.com/recipes")
|
||||
await page.get_by_role("link", name="ログイン").click()
|
||||
await page.get_by_role("link", name="メールアドレス でログイン").click()
|
||||
await page.locator("#secure_account_credential_email").click()
|
||||
await page.locator("#secure_account_credential_email").fill("asd851117005545@gmail.com")
|
||||
await page.locator("#secure_account_credential_email").press("Tab")
|
||||
await page.locator("#secure_account_credential_password").fill("a22897051")
|
||||
await page.get_by_role("button", name="ログイン").click()
|
||||
await page.goto("https://oishi-kenko.com/recipes")
|
||||
|
||||
|
||||
|
||||
max_retries = 3
|
||||
|
||||
while True:
|
||||
# 訂位到所有匹配連結
|
||||
links = page.locator('a.p-recipe-list-item__title-link')
|
||||
|
||||
# 獲取連結數量
|
||||
link_count = await links.count()
|
||||
|
||||
# 點擊每個連結
|
||||
for i in range(link_count):
|
||||
retry_count = 0 # 设置重试次数计数器
|
||||
while retry_count < 3: # 假设最多重试3次
|
||||
# 使用 nth(i) 定位第 i 個元素,並點擊
|
||||
|
||||
try:
|
||||
await page.locator('a.p-recipe-list-item__title-link').nth(i).click()
|
||||
|
||||
# 等待頁面
|
||||
await page.wait_for_load_state('networkidle')
|
||||
# await asyncio.sleep(5)
|
||||
|
||||
print("------菜名-----")
|
||||
# 輸出名稱
|
||||
dishname = await page.text_content('.p-recipe-detail__title')
|
||||
print(dishname)
|
||||
dishname_clean = dishname.strip().replace('\n', '')
|
||||
|
||||
# 菜名圖片
|
||||
image_element = page.locator('.p-recipe-detail__photo-image--pc-only')
|
||||
|
||||
# 从元素的 'src' 属性中获取图片的 URL
|
||||
image_url = await image_element.get_attribute('src')
|
||||
|
||||
# 确保获取到的 URL 不为空
|
||||
if image_url:
|
||||
# 获取图片的 Base64 编码
|
||||
Dish_image_base64 = await fetch_image_as_base64(page, image_url)
|
||||
else:
|
||||
Dish_image_base64 = None
|
||||
|
||||
# print("------按讚數-----")
|
||||
# 輸出按讚數
|
||||
Like_count = await page.text_content('.c-button-circle__top-text')
|
||||
# print(Likes_count)
|
||||
Likes_count_clean = Like_count.strip().replace('\n', '')
|
||||
|
||||
# print("------標籤-----")
|
||||
# 輸出標籤
|
||||
tags = await page.locator('.c-button-round-tag__link').all_text_contents()
|
||||
# for tag in tags:
|
||||
# print(tag)
|
||||
tags_clean = [tag.strip() for tag in tags]
|
||||
|
||||
# print("------適應症-----")
|
||||
# 輸出適應症
|
||||
Indications = await page.locator('.c-recipes-relevant-dietary-concerns__text').all_text_contents()
|
||||
# for Indication in Indications:
|
||||
# print(Indication)
|
||||
Indications_clean = [Indication.strip() for Indication in Indications]
|
||||
|
||||
# print("------營養標示-----")
|
||||
# 輸出營養標示
|
||||
|
||||
# 各營養標示分類
|
||||
Calorie = ''
|
||||
Salt = ''
|
||||
Protein = ''
|
||||
Fat = ''
|
||||
Carbohydrate = ''
|
||||
Sugar = ''
|
||||
Dietary_fiber = ''
|
||||
Soluble_fiber = ''
|
||||
Insoluble_fiber = ''
|
||||
Potassium = ''
|
||||
Calcium = ''
|
||||
Magnesium = ''
|
||||
Phosphorous = ''
|
||||
Iron = ''
|
||||
Zinc = ''
|
||||
Iodine = ''
|
||||
Cholesterol = ''
|
||||
Vitamin_B1 = ''
|
||||
Vitamin_B2 = ''
|
||||
Vitamin_C = ''
|
||||
Vitamin_B6 = ''
|
||||
Vitamin_B12 = ''
|
||||
Folate = ''
|
||||
Vitamin_A = ''
|
||||
Vitamin_D = ''
|
||||
Vitamin_K = ''
|
||||
Vitamin_E = ''
|
||||
Saturated_fatty_acid = ''
|
||||
Monounsaturated_fatty_acid = ''
|
||||
Polyunsaturated_fatty_acid = ''
|
||||
|
||||
nutritions1 = await page.locator('.c-nutrition-table__cell--1').all_text_contents()
|
||||
# for nutrition in nutritions1:
|
||||
# print(nutrition)
|
||||
# nutritions_clean = [nutrition.strip().replace('\n', '') for nutrition in nutritions]
|
||||
for nutrition1 in nutritions1:
|
||||
if 'エネルギー' in nutrition1:
|
||||
Calorie = nutrition1.split('エネルギー')[1].strip().replace('\n', '')
|
||||
if '食塩相当量' in nutrition1:
|
||||
Salt = nutrition1.split('食塩相当量')[1].strip().replace('\n', '')
|
||||
if 'たんぱく質' in nutrition1:
|
||||
Protein = nutrition1.split('たんぱく質')[1].strip().replace('\n', '')
|
||||
if '脂質' in nutrition1:
|
||||
Fat = nutrition1.split('脂質')[1].strip().replace('\n', '')
|
||||
if '炭水化物' in nutrition1:
|
||||
Carbohydrate = nutrition1.split('炭水化物')[1].strip().replace('\n', '')
|
||||
if '糖質' in nutrition1:
|
||||
Sugar = nutrition1.split('糖質')[1].strip().replace('\n', '')
|
||||
if '食物繊維' in nutrition1:
|
||||
Dietary_fiber = nutrition1.split('食物繊維')[1].strip().replace('\n', '')
|
||||
if '水溶性食物繊維' in nutrition1:
|
||||
Soluble_fiber = nutrition1.split('水溶性食物繊維')[1].strip().replace('\n', '')
|
||||
if '不溶性食物繊維' in nutrition1:
|
||||
Insoluble_fiber = nutrition1.split('不溶性食物繊維')[1].strip().replace('\n', '')
|
||||
if 'カリウム' in nutrition1:
|
||||
Potassium = nutrition1.split('カリウム')[1].strip().replace('\n', '')
|
||||
|
||||
nutritions2 = await page.locator('.c-nutrition-table__cell--2').all_text_contents()
|
||||
|
||||
for nutrition2 in nutritions2:
|
||||
if 'カルシウム' in nutrition2:
|
||||
Calcium = nutrition2.split('カルシウム')[1].strip().replace('\n', '')
|
||||
if 'マグネシウム' in nutrition2:
|
||||
Magnesium = nutrition2.split('マグネシウム')[1].strip().replace('\n', '')
|
||||
if 'リン' in nutrition2:
|
||||
Phosphorous = nutrition2.split('リン')[1].strip().replace('\n', '')
|
||||
if '鉄' in nutrition2:
|
||||
Iron = nutrition2.split('鉄')[1].strip().replace('\n', '')
|
||||
if '亜鉛' in nutrition2:
|
||||
Zinc = nutrition2.split('亜鉛')[1].strip().replace('\n', '')
|
||||
if 'ヨウ素' in nutrition2:
|
||||
Iodine = nutrition2.split('ヨウ素')[1].strip().replace('\n', '')
|
||||
if 'コレステロール' in nutrition2:
|
||||
Cholesterol = nutrition2.split('コレステロール')[1].strip().replace('\n', '')
|
||||
if 'ビタミンB1' in nutrition2:
|
||||
Vitamin_B1 = nutrition2.split('ビタミンB1')[1].strip().replace('\n', '')
|
||||
if 'ビタミンB2' in nutrition2:
|
||||
Vitamin_B2 = nutrition2.split('ビタミンB2')[1].strip().replace('\n', '')
|
||||
if 'ビタミンC' in nutrition2:
|
||||
Vitamin_C = nutrition2.split('ビタミンC')[1].strip().replace('\n', '')
|
||||
|
||||
nutritions3 = await page.locator('.c-nutrition-table__cell--3').all_text_contents()
|
||||
|
||||
for nutrition3 in nutritions3:
|
||||
if 'ビタミンB6' in nutrition3:
|
||||
Vitamin_B6 = nutrition3.split('ビタミンB6')[1].strip().replace('\n', '')
|
||||
if 'ビタミンB12' in nutrition3:
|
||||
Vitamin_B12 = nutrition3.split('ビタミンB12')[1].strip().replace('\n', '')
|
||||
if '葉酸' in nutrition3:
|
||||
Folate = nutrition3.split('葉酸')[1].strip().replace('\n', '')
|
||||
if 'ビタミンA' in nutrition3:
|
||||
Vitamin_A = nutrition3.split('ビタミンA')[1].strip().replace('\n', '')
|
||||
if 'ビタミンD' in nutrition3:
|
||||
Vitamin_D = nutrition3.split('ビタミンD')[1].strip().replace('\n', '')
|
||||
if 'ビタミンK' in nutrition3:
|
||||
Vitamin_K = nutrition3.split('ビタミンK')[1].strip().replace('\n', '')
|
||||
if 'ビタミンE' in nutrition3:
|
||||
Vitamin_E = nutrition3.split('ビタミンE')[1].strip().replace('\n', '')
|
||||
if '飽和脂肪酸' in nutrition3:
|
||||
Saturated_fatty_acid = nutrition3.split('飽和脂肪酸')[1].strip().replace('\n', '')
|
||||
if '一価不飽和脂肪酸' in nutrition3:
|
||||
Monounsaturated_fatty_acid = nutrition3.split('一価不飽和脂肪酸')[1].strip().replace('\n', '')
|
||||
if '多価不飽和脂肪酸' in nutrition3:
|
||||
Polyunsaturated_fatty_acid = nutrition3.split('多価不飽和脂肪酸')[1].strip().replace('\n', '')
|
||||
|
||||
# 食料
|
||||
Ingredients = await page.locator('.p-recipe-ingredient-list__item').all_text_contents()
|
||||
# for Ingredient in Ingredients:
|
||||
# print(Ingredient)
|
||||
Ingredients_clean = [Ingredient.strip().replace('\n', '') for Ingredient in Ingredients]
|
||||
|
||||
print('------作法步驟-----')
|
||||
|
||||
# 作法
|
||||
Steps = await page.locator('.p-recipe-step__item').all_text_contents()
|
||||
# for Step in Steps:
|
||||
# print(Step)
|
||||
Steps_clean = [Step.strip().replace('\n', '') for Step in Steps]
|
||||
|
||||
# 定位到所有步骤的图片元素
|
||||
image_elements = page.locator('.p-recipe-step__item-image')
|
||||
|
||||
# 获取所有图片元素的 src 属性(即图片的 URL)
|
||||
image_urls = await image_elements.evaluate_all("elements => elements.map(e => e.getAttribute('src'))")
|
||||
|
||||
# 遍历图片 URL 列表,下载图片并转换为 Base64
|
||||
Step_images_base64 = []
|
||||
for image_url in image_urls:
|
||||
# 直接使用图片的 URL 下载图片并转换为 Base64
|
||||
image_base64 = await fetch_image_as_base64(page, image_url)
|
||||
if image_base64:
|
||||
Step_images_base64.append(image_base64)
|
||||
|
||||
dish_data = {
|
||||
'name': dishname_clean,
|
||||
'image': Dish_image_base64,
|
||||
'likes': Likes_count_clean,
|
||||
'tags': ", ".join(tags_clean),
|
||||
'indications': ", ".join(Indications_clean),
|
||||
'Calories': Calorie,
|
||||
'Salt': Salt,
|
||||
'Protein': Protein,
|
||||
'Total_fat': Fat,
|
||||
'Total_Carbohydrate': Carbohydrate,
|
||||
'Total_sugar': Sugar,
|
||||
'Dietary_fiber': Dietary_fiber,
|
||||
'Soluble_fiber': Soluble_fiber,
|
||||
'Insoluble_fiber': Insoluble_fiber,
|
||||
'K': Potassium,
|
||||
'Ca': Calcium,
|
||||
'Mg': Magnesium,
|
||||
'P': Phosphorous,
|
||||
'Fe': Iron,
|
||||
'Zn': Zinc,
|
||||
'I': Iodine,
|
||||
'Cholesterol': Cholesterol,
|
||||
'Vitamin_B1': Vitamin_B1,
|
||||
'Vitamin_B2': Vitamin_B2,
|
||||
'Vitamin_C': Vitamin_C,
|
||||
'Vitamin_B6': Vitamin_B6,
|
||||
'Vitamin_B12': Vitamin_B12,
|
||||
'Folate': Folate,
|
||||
'Vitamin_A': Vitamin_A,
|
||||
'Vitamin_D': Vitamin_D,
|
||||
'Vitamin_K': Vitamin_K,
|
||||
'Vitamin_E': Vitamin_E,
|
||||
'Saturated_fatty_acid': Saturated_fatty_acid,
|
||||
'Monounsaturated_fatty_acid': Monounsaturated_fatty_acid,
|
||||
'Polyunsaturated_fatty_acid': Polyunsaturated_fatty_acid,
|
||||
'Ingredients': ", ".join(Ingredients_clean),
|
||||
'Steps': Steps_clean,
|
||||
'Step_images_Base64': Step_images_base64
|
||||
}
|
||||
|
||||
await save_to_db(dish_data)
|
||||
|
||||
break
|
||||
except Exception as e: # 捕获可能发生的异常
|
||||
print(f"遇到错误:{e},尝试返回并重试")
|
||||
await page.go_back() # 返回前一页
|
||||
retry_count += 1 # 重试计数器加1
|
||||
if retry_count >= 3:
|
||||
print("重试次数超限,跳过当前链接")
|
||||
break # 跳出循环,处理下一个链接
|
||||
# 使用浏览器的后退功能返回列表页,这样不需要重新加载初始URL
|
||||
await page.go_back()
|
||||
|
||||
try:
|
||||
await page.click('span.next a[rel="next"]')
|
||||
await page.wait_for_load_state('networkidle')
|
||||
except Error:
|
||||
# 如果“下一頁”不存在,break
|
||||
break
|
||||
# ---------------------
|
||||
await context.close()
|
||||
await browser.close()
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
async with async_playwright() as playwright:
|
||||
await run(playwright)
|
||||
|
||||
# asyncio.run(main())
|
||||
# async def simple_test():
|
||||
# print("Simple async test")
|
||||
#
|
||||
# asyncio.run(simple_test())
|
||||
Reference in New Issue
Block a user