stephen.yu
10 months ago
commit
5063429ab8
23 changed files with 895 additions and 0 deletions
@ -0,0 +1,8 @@
@@ -0,0 +1,8 @@
|
||||
# Default ignored files |
||||
/shelf/ |
||||
/workspace.xml |
||||
# Editor-based HTTP Client requests |
||||
/httpRequests/ |
||||
# Datasource local storage ignored files |
||||
/dataSources/ |
||||
/dataSources.local.xml |
@ -0,0 +1,14 @@
@@ -0,0 +1,14 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?> |
||||
<module version="4"> |
||||
<component name="PyDocumentationSettings"> |
||||
<option name="format" value="PLAIN" /> |
||||
<option name="myDocStringFormat" value="Plain" /> |
||||
</component> |
||||
<component name="TemplatesService"> |
||||
<option name="TEMPLATE_FOLDERS"> |
||||
<list> |
||||
<option value="$MODULE_DIR$/templates" /> |
||||
</list> |
||||
</option> |
||||
</component> |
||||
</module> |
@ -0,0 +1,6 @@
@@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager"> |
||||
<settings> |
||||
<option name="USE_PROJECT_PROFILE" value="false" /> |
||||
<version value="1.0" /> |
||||
</settings> |
||||
</component> |
@ -0,0 +1,7 @@
@@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?> |
||||
<project version="4"> |
||||
<component name="Black"> |
||||
<option name="sdkName" value="Python 3.11 (RecipeCrawler)" /> |
||||
</component> |
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (RecipeCrawler)" project-jdk-type="Python SDK" /> |
||||
</project> |
@ -0,0 +1,6 @@
@@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?> |
||||
<project version="4"> |
||||
<component name="VcsDirectoryMappings"> |
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" /> |
||||
</component> |
||||
</project> |
@ -0,0 +1,3 @@
@@ -0,0 +1,3 @@
|
||||
from django.contrib import admin |
||||
|
||||
# Register your models here. |
@ -0,0 +1,6 @@
@@ -0,0 +1,6 @@
|
||||
from django.apps import AppConfig |
||||
|
||||
|
||||
class RecipeConfig(AppConfig): |
||||
default_auto_field = 'django.db.models.BigAutoField' |
||||
name = 'Recipe' |
@ -0,0 +1,57 @@
@@ -0,0 +1,57 @@
|
||||
# Generated by Django 5.0.2 on 2024-03-05 08:14 |
||||
|
||||
from django.db import migrations, models |
||||
|
||||
|
||||
class Migration(migrations.Migration): |
||||
|
||||
initial = True |
||||
|
||||
dependencies = [ |
||||
] |
||||
|
||||
operations = [ |
||||
migrations.CreateModel( |
||||
name='Dish', |
||||
fields=[ |
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), |
||||
('name', models.CharField(max_length=255, unique=True)), |
||||
('image', models.TextField()), |
||||
('likes', models.IntegerField(default=0)), |
||||
('tags', models.TextField()), |
||||
('indications', models.TextField()), |
||||
('Calories', models.CharField(max_length=255, unique=True)), |
||||
('Salt', models.CharField(max_length=255, unique=True)), |
||||
('Protein', models.CharField(max_length=255, unique=True)), |
||||
('Total_fat', models.CharField(max_length=255, unique=True)), |
||||
('Total_Carbohydrate', models.CharField(max_length=255, unique=True)), |
||||
('Total_sugar', models.CharField(max_length=255, unique=True)), |
||||
('Dietary_fiber', models.CharField(max_length=255, unique=True)), |
||||
('Soluble_fiber', models.CharField(max_length=255, unique=True)), |
||||
('K', models.CharField(max_length=255, unique=True)), |
||||
('Ca', models.CharField(max_length=255, unique=True)), |
||||
('Mg', models.CharField(max_length=255, unique=True)), |
||||
('P', models.CharField(max_length=255, unique=True)), |
||||
('Fe', models.CharField(max_length=255, unique=True)), |
||||
('Zn', models.CharField(max_length=255, unique=True)), |
||||
('I', models.CharField(max_length=255, unique=True)), |
||||
('Cholesterol', models.CharField(max_length=255, unique=True)), |
||||
('Vitamin_B1', models.CharField(max_length=255, unique=True)), |
||||
('Vitamin_B2', models.CharField(max_length=255, unique=True)), |
||||
('Vitamin_C', models.CharField(max_length=255, unique=True)), |
||||
('Vitamin_B6', models.CharField(max_length=255, unique=True)), |
||||
('Vitamin_B12', models.CharField(max_length=255, unique=True)), |
||||
('Folate', models.CharField(max_length=255, unique=True)), |
||||
('Vitamin_A', models.CharField(max_length=255, unique=True)), |
||||
('Vitamin_D', models.CharField(max_length=255, unique=True)), |
||||
('Vitamin_K', models.CharField(max_length=255, unique=True)), |
||||
('Vitamin_E', models.CharField(max_length=255, unique=True)), |
||||
('Saturated_fatty_acid', models.CharField(max_length=255, unique=True)), |
||||
('Monounsaturated_fatty_acid', models.CharField(max_length=255, unique=True)), |
||||
('Polyunsaturated_fatty_acid', models.CharField(max_length=255, unique=True)), |
||||
('Ingredients', models.TextField()), |
||||
('Steps', models.JSONField()), |
||||
('Step_images_Base64', models.JSONField()), |
||||
], |
||||
), |
||||
] |
@ -0,0 +1,18 @@
@@ -0,0 +1,18 @@
|
||||
# Generated by Django 5.0.2 on 2024-03-06 06:06 |
||||
|
||||
from django.db import migrations, models |
||||
|
||||
|
||||
class Migration(migrations.Migration): |
||||
|
||||
dependencies = [ |
||||
('Recipe', '0001_initial'), |
||||
] |
||||
|
||||
operations = [ |
||||
migrations.AddField( |
||||
model_name='dish', |
||||
name='Insoluble_fiber', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
] |
@ -0,0 +1,158 @@
@@ -0,0 +1,158 @@
|
||||
# Generated by Django 5.0.2 on 2024-03-06 06:23 |
||||
|
||||
from django.db import migrations, models |
||||
|
||||
|
||||
class Migration(migrations.Migration): |
||||
|
||||
dependencies = [ |
||||
('Recipe', '0002_dish_insoluble_fiber'), |
||||
] |
||||
|
||||
operations = [ |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Ca', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Calories', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Cholesterol', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Dietary_fiber', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Fe', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Folate', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='I', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='K', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Mg', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Monounsaturated_fatty_acid', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='P', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Polyunsaturated_fatty_acid', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Protein', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Salt', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Saturated_fatty_acid', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Soluble_fiber', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Total_Carbohydrate', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Total_fat', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Total_sugar', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Vitamin_A', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Vitamin_B1', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Vitamin_B12', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Vitamin_B2', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Vitamin_B6', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Vitamin_C', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Vitamin_D', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Vitamin_E', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Vitamin_K', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
migrations.AlterField( |
||||
model_name='dish', |
||||
name='Zn', |
||||
field=models.CharField(blank=True, max_length=255, null=True), |
||||
), |
||||
] |
@ -0,0 +1,45 @@
@@ -0,0 +1,45 @@
|
||||
from django.db import models |
||||
from django.contrib.postgres.fields import JSONField |
||||
|
||||
|
||||
|
||||
# Create your models here. |
||||
class Dish(models.Model): |
||||
name = models.CharField(max_length=255, unique=True) # 假设每个菜名是唯一的 |
||||
image = models.TextField() # 存储图片的 Base64 编码 |
||||
likes = models.IntegerField(default=0) |
||||
tags = models.TextField() # 可以存储标签列表,例如以逗号分隔的字符串 |
||||
indications = models.TextField() |
||||
Calories = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Salt = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Protein = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Total_fat = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Total_Carbohydrate = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Total_sugar = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Dietary_fiber = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Soluble_fiber = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Insoluble_fiber = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
K = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Ca = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Mg = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
P = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Fe = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Zn = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
I = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Cholesterol = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Vitamin_B1 = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Vitamin_B2 = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Vitamin_C = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Vitamin_B6 = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Vitamin_B12 = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Folate = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Vitamin_A = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Vitamin_D = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Vitamin_K = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Vitamin_E = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Saturated_fatty_acid = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Monounsaturated_fatty_acid = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Polyunsaturated_fatty_acid = models.CharField(max_length=255, unique=False, blank=True, null=True) |
||||
Ingredients = models.TextField() |
||||
Steps = models.JSONField() |
||||
Step_images_Base64 = models.JSONField() |
@ -0,0 +1,3 @@
@@ -0,0 +1,3 @@
|
||||
from django.test import TestCase |
||||
|
||||
# Create your tests here. |
@ -0,0 +1,336 @@
@@ -0,0 +1,336 @@
|
||||
import asyncio |
||||
import base64 |
||||
import pandas as pd |
||||
from pandas import DataFrame |
||||
from playwright.async_api import Playwright, async_playwright, Error |
||||
from asgiref.sync import sync_to_async |
||||
import time |
||||
from django.http import HttpResponseBadRequest, HttpResponse |
||||
from django.views import View |
||||
from Recipe.models import Dish |
||||
|
||||
|
||||
class RecipeCrawlerView(View): |
||||
def get(self, request): |
||||
sync_main() |
||||
return HttpResponse("Success!", content_type="text/plain") |
||||
|
||||
|
||||
# 因應django ORM,以同步函數執行非同步程式 |
||||
def sync_main(): |
||||
# 使用sync_to_async包装器調用非同步函數 |
||||
asyncio.run(main()) |
||||
|
||||
|
||||
async def save_to_db(dish_data): |
||||
# 将异步ORM操作转换为同步,以适应Django ORM |
||||
dish, created = await sync_to_async(Dish.objects.update_or_create)( |
||||
name=dish_data['name'], |
||||
defaults=dish_data |
||||
) |
||||
action = "added" if created else "updated" |
||||
print(f"Dish '{dish.name}' was {action}.") |
||||
|
||||
|
||||
# 將圖片轉為base64 |
||||
async def fetch_image_as_base64(page, image_url): |
||||
print("Fetching image from URL:", image_url) |
||||
try: |
||||
response = await page.request.get(image_url) |
||||
if response.ok: |
||||
image_data = await response.body() |
||||
image_base64 = base64.b64encode(image_data).decode() |
||||
return image_base64 |
||||
except Exception as e: |
||||
print(f"Error fetching image: {e}") |
||||
return None |
||||
|
||||
|
||||
async def run(playwright: Playwright): |
||||
browser = await playwright.chromium.launch(headless=False) |
||||
context = await browser.new_context() |
||||
page = await context.new_page() |
||||
await page.goto("https://oishi-kenko.com/recipes") |
||||
await page.get_by_role("link", name="ログイン").click() |
||||
await page.get_by_role("link", name="メールアドレス でログイン").click() |
||||
await page.locator("#secure_account_credential_email").click() |
||||
await page.locator("#secure_account_credential_email").fill("asd851117005545@gmail.com") |
||||
await page.locator("#secure_account_credential_email").press("Tab") |
||||
await page.locator("#secure_account_credential_password").fill("a22897051") |
||||
await page.get_by_role("button", name="ログイン").click() |
||||
await page.goto("https://oishi-kenko.com/recipes") |
||||
|
||||
|
||||
|
||||
max_retries = 3 |
||||
|
||||
while True: |
||||
# 訂位到所有匹配連結 |
||||
links = page.locator('a.p-recipe-list-item__title-link') |
||||
|
||||
# 獲取連結數量 |
||||
link_count = await links.count() |
||||
|
||||
# 點擊每個連結 |
||||
for i in range(link_count): |
||||
retry_count = 0 # 设置重试次数计数器 |
||||
while retry_count < 3: # 假设最多重试3次 |
||||
# 使用 nth(i) 定位第 i 個元素,並點擊 |
||||
|
||||
try: |
||||
await page.locator('a.p-recipe-list-item__title-link').nth(i).click() |
||||
|
||||
# 等待頁面 |
||||
await page.wait_for_load_state('networkidle') |
||||
# await asyncio.sleep(5) |
||||
|
||||
print("------菜名-----") |
||||
# 輸出名稱 |
||||
dishname = await page.text_content('.p-recipe-detail__title') |
||||
print(dishname) |
||||
dishname_clean = dishname.strip().replace('\n', '') |
||||
|
||||
# 菜名圖片 |
||||
image_element = page.locator('.p-recipe-detail__photo-image--pc-only') |
||||
|
||||
# 从元素的 'src' 属性中获取图片的 URL |
||||
image_url = await image_element.get_attribute('src') |
||||
|
||||
# 确保获取到的 URL 不为空 |
||||
if image_url: |
||||
# 获取图片的 Base64 编码 |
||||
Dish_image_base64 = await fetch_image_as_base64(page, image_url) |
||||
else: |
||||
Dish_image_base64 = None |
||||
|
||||
# print("------按讚數-----") |
||||
# 輸出按讚數 |
||||
Like_count = await page.text_content('.c-button-circle__top-text') |
||||
# print(Likes_count) |
||||
Likes_count_clean = Like_count.strip().replace('\n', '') |
||||
|
||||
# print("------標籤-----") |
||||
# 輸出標籤 |
||||
tags = await page.locator('.c-button-round-tag__link').all_text_contents() |
||||
# for tag in tags: |
||||
# print(tag) |
||||
tags_clean = [tag.strip() for tag in tags] |
||||
|
||||
# print("------適應症-----") |
||||
# 輸出適應症 |
||||
Indications = await page.locator('.c-recipes-relevant-dietary-concerns__text').all_text_contents() |
||||
# for Indication in Indications: |
||||
# print(Indication) |
||||
Indications_clean = [Indication.strip() for Indication in Indications] |
||||
|
||||
# print("------營養標示-----") |
||||
# 輸出營養標示 |
||||
|
||||
# 各營養標示分類 |
||||
Calorie = '' |
||||
Salt = '' |
||||
Protein = '' |
||||
Fat = '' |
||||
Carbohydrate = '' |
||||
Sugar = '' |
||||
Dietary_fiber = '' |
||||
Soluble_fiber = '' |
||||
Insoluble_fiber = '' |
||||
Potassium = '' |
||||
Calcium = '' |
||||
Magnesium = '' |
||||
Phosphorous = '' |
||||
Iron = '' |
||||
Zinc = '' |
||||
Iodine = '' |
||||
Cholesterol = '' |
||||
Vitamin_B1 = '' |
||||
Vitamin_B2 = '' |
||||
Vitamin_C = '' |
||||
Vitamin_B6 = '' |
||||
Vitamin_B12 = '' |
||||
Folate = '' |
||||
Vitamin_A = '' |
||||
Vitamin_D = '' |
||||
Vitamin_K = '' |
||||
Vitamin_E = '' |
||||
Saturated_fatty_acid = '' |
||||
Monounsaturated_fatty_acid = '' |
||||
Polyunsaturated_fatty_acid = '' |
||||
|
||||
nutritions1 = await page.locator('.c-nutrition-table__cell--1').all_text_contents() |
||||
# for nutrition in nutritions1: |
||||
# print(nutrition) |
||||
# nutritions_clean = [nutrition.strip().replace('\n', '') for nutrition in nutritions] |
||||
for nutrition1 in nutritions1: |
||||
if 'エネルギー' in nutrition1: |
||||
Calorie = nutrition1.split('エネルギー')[1].strip().replace('\n', '') |
||||
if '食塩相当量' in nutrition1: |
||||
Salt = nutrition1.split('食塩相当量')[1].strip().replace('\n', '') |
||||
if 'たんぱく質' in nutrition1: |
||||
Protein = nutrition1.split('たんぱく質')[1].strip().replace('\n', '') |
||||
if '脂質' in nutrition1: |
||||
Fat = nutrition1.split('脂質')[1].strip().replace('\n', '') |
||||
if '炭水化物' in nutrition1: |
||||
Carbohydrate = nutrition1.split('炭水化物')[1].strip().replace('\n', '') |
||||
if '糖質' in nutrition1: |
||||
Sugar = nutrition1.split('糖質')[1].strip().replace('\n', '') |
||||
if '食物繊維' in nutrition1: |
||||
Dietary_fiber = nutrition1.split('食物繊維')[1].strip().replace('\n', '') |
||||
if '水溶性食物繊維' in nutrition1: |
||||
Soluble_fiber = nutrition1.split('水溶性食物繊維')[1].strip().replace('\n', '') |
||||
if '不溶性食物繊維' in nutrition1: |
||||
Insoluble_fiber = nutrition1.split('不溶性食物繊維')[1].strip().replace('\n', '') |
||||
if 'カリウム' in nutrition1: |
||||
Potassium = nutrition1.split('カリウム')[1].strip().replace('\n', '') |
||||
|
||||
nutritions2 = await page.locator('.c-nutrition-table__cell--2').all_text_contents() |
||||
|
||||
for nutrition2 in nutritions2: |
||||
if 'カルシウム' in nutrition2: |
||||
Calcium = nutrition2.split('カルシウム')[1].strip().replace('\n', '') |
||||
if 'マグネシウム' in nutrition2: |
||||
Magnesium = nutrition2.split('マグネシウム')[1].strip().replace('\n', '') |
||||
if 'リン' in nutrition2: |
||||
Phosphorous = nutrition2.split('リン')[1].strip().replace('\n', '') |
||||
if '鉄' in nutrition2: |
||||
Iron = nutrition2.split('鉄')[1].strip().replace('\n', '') |
||||
if '亜鉛' in nutrition2: |
||||
Zinc = nutrition2.split('亜鉛')[1].strip().replace('\n', '') |
||||
if 'ヨウ素' in nutrition2: |
||||
Iodine = nutrition2.split('ヨウ素')[1].strip().replace('\n', '') |
||||
if 'コレステロール' in nutrition2: |
||||
Cholesterol = nutrition2.split('コレステロール')[1].strip().replace('\n', '') |
||||
if 'ビタミンB1' in nutrition2: |
||||
Vitamin_B1 = nutrition2.split('ビタミンB1')[1].strip().replace('\n', '') |
||||
if 'ビタミンB2' in nutrition2: |
||||
Vitamin_B2 = nutrition2.split('ビタミンB2')[1].strip().replace('\n', '') |
||||
if 'ビタミンC' in nutrition2: |
||||
Vitamin_C = nutrition2.split('ビタミンC')[1].strip().replace('\n', '') |
||||
|
||||
nutritions3 = await page.locator('.c-nutrition-table__cell--3').all_text_contents() |
||||
|
||||
for nutrition3 in nutritions3: |
||||
if 'ビタミンB6' in nutrition3: |
||||
Vitamin_B6 = nutrition3.split('ビタミンB6')[1].strip().replace('\n', '') |
||||
if 'ビタミンB12' in nutrition3: |
||||
Vitamin_B12 = nutrition3.split('ビタミンB12')[1].strip().replace('\n', '') |
||||
if '葉酸' in nutrition3: |
||||
Folate = nutrition3.split('葉酸')[1].strip().replace('\n', '') |
||||
if 'ビタミンA' in nutrition3: |
||||
Vitamin_A = nutrition3.split('ビタミンA')[1].strip().replace('\n', '') |
||||
if 'ビタミンD' in nutrition3: |
||||
Vitamin_D = nutrition3.split('ビタミンD')[1].strip().replace('\n', '') |
||||
if 'ビタミンK' in nutrition3: |
||||
Vitamin_K = nutrition3.split('ビタミンK')[1].strip().replace('\n', '') |
||||
if 'ビタミンE' in nutrition3: |
||||
Vitamin_E = nutrition3.split('ビタミンE')[1].strip().replace('\n', '') |
||||
if '飽和脂肪酸' in nutrition3: |
||||
Saturated_fatty_acid = nutrition3.split('飽和脂肪酸')[1].strip().replace('\n', '') |
||||
if '一価不飽和脂肪酸' in nutrition3: |
||||
Monounsaturated_fatty_acid = nutrition3.split('一価不飽和脂肪酸')[1].strip().replace('\n', '') |
||||
if '多価不飽和脂肪酸' in nutrition3: |
||||
Polyunsaturated_fatty_acid = nutrition3.split('多価不飽和脂肪酸')[1].strip().replace('\n', '') |
||||
|
||||
# 食料 |
||||
Ingredients = await page.locator('.p-recipe-ingredient-list__item').all_text_contents() |
||||
# for Ingredient in Ingredients: |
||||
# print(Ingredient) |
||||
Ingredients_clean = [Ingredient.strip().replace('\n', '') for Ingredient in Ingredients] |
||||
|
||||
print('------作法步驟-----') |
||||
|
||||
# 作法 |
||||
Steps = await page.locator('.p-recipe-step__item').all_text_contents() |
||||
# for Step in Steps: |
||||
# print(Step) |
||||
Steps_clean = [Step.strip().replace('\n', '') for Step in Steps] |
||||
|
||||
# 定位到所有步骤的图片元素 |
||||
image_elements = page.locator('.p-recipe-step__item-image') |
||||
|
||||
# 获取所有图片元素的 src 属性(即图片的 URL) |
||||
image_urls = await image_elements.evaluate_all("elements => elements.map(e => e.getAttribute('src'))") |
||||
|
||||
# 遍历图片 URL 列表,下载图片并转换为 Base64 |
||||
Step_images_base64 = [] |
||||
for image_url in image_urls: |
||||
# 直接使用图片的 URL 下载图片并转换为 Base64 |
||||
image_base64 = await fetch_image_as_base64(page, image_url) |
||||
if image_base64: |
||||
Step_images_base64.append(image_base64) |
||||
|
||||
dish_data = { |
||||
'name': dishname_clean, |
||||
'image': Dish_image_base64, |
||||
'likes': Likes_count_clean, |
||||
'tags': ", ".join(tags_clean), |
||||
'indications': ", ".join(Indications_clean), |
||||
'Calories': Calorie, |
||||
'Salt': Salt, |
||||
'Protein': Protein, |
||||
'Total_fat': Fat, |
||||
'Total_Carbohydrate': Carbohydrate, |
||||
'Total_sugar': Sugar, |
||||
'Dietary_fiber': Dietary_fiber, |
||||
'Soluble_fiber': Soluble_fiber, |
||||
'Insoluble_fiber': Insoluble_fiber, |
||||
'K': Potassium, |
||||
'Ca': Calcium, |
||||
'Mg': Magnesium, |
||||
'P': Phosphorous, |
||||
'Fe': Iron, |
||||
'Zn': Zinc, |
||||
'I': Iodine, |
||||
'Cholesterol': Cholesterol, |
||||
'Vitamin_B1': Vitamin_B1, |
||||
'Vitamin_B2': Vitamin_B2, |
||||
'Vitamin_C': Vitamin_C, |
||||
'Vitamin_B6': Vitamin_B6, |
||||
'Vitamin_B12': Vitamin_B12, |
||||
'Folate': Folate, |
||||
'Vitamin_A': Vitamin_A, |
||||
'Vitamin_D': Vitamin_D, |
||||
'Vitamin_K': Vitamin_K, |
||||
'Vitamin_E': Vitamin_E, |
||||
'Saturated_fatty_acid': Saturated_fatty_acid, |
||||
'Monounsaturated_fatty_acid': Monounsaturated_fatty_acid, |
||||
'Polyunsaturated_fatty_acid': Polyunsaturated_fatty_acid, |
||||
'Ingredients': ", ".join(Ingredients_clean), |
||||
'Steps': Steps_clean, |
||||
'Step_images_Base64': Step_images_base64 |
||||
} |
||||
|
||||
await save_to_db(dish_data) |
||||
|
||||
break |
||||
except Exception as e: # 捕获可能发生的异常 |
||||
print(f"遇到错误:{e},尝试返回并重试") |
||||
await page.go_back() # 返回前一页 |
||||
retry_count += 1 # 重试计数器加1 |
||||
if retry_count >= 3: |
||||
print("重试次数超限,跳过当前链接") |
||||
break # 跳出循环,处理下一个链接 |
||||
# 使用浏览器的后退功能返回列表页,这样不需要重新加载初始URL |
||||
await page.go_back() |
||||
|
||||
try: |
||||
await page.click('span.next a[rel="next"]') |
||||
await page.wait_for_load_state('networkidle') |
||||
except Error: |
||||
# 如果“下一頁”不存在,break |
||||
break |
||||
# --------------------- |
||||
await context.close() |
||||
await browser.close() |
||||
|
||||
|
||||
async def main() -> None: |
||||
async with async_playwright() as playwright: |
||||
await run(playwright) |
||||
|
||||
# asyncio.run(main()) |
||||
# async def simple_test(): |
||||
# print("Simple async test") |
||||
# |
||||
# asyncio.run(simple_test()) |
@ -0,0 +1,16 @@
@@ -0,0 +1,16 @@
|
||||
""" |
||||
ASGI config for RecipeCrawler project. |
||||
|
||||
It exposes the ASGI callable as a module-level variable named ``application``. |
||||
|
||||
For more information on this file, see |
||||
https://docs.djangoproject.com/en/5.0/howto/deployment/asgi/ |
||||
""" |
||||
|
||||
import os |
||||
|
||||
from django.core.asgi import get_asgi_application |
||||
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'RecipeCrawler.settings') |
||||
|
||||
application = get_asgi_application() |
@ -0,0 +1,125 @@
@@ -0,0 +1,125 @@
|
||||
""" |
||||
Django settings for RecipeCrawler project. |
||||
|
||||
Generated by 'django-admin startproject' using Django 5.0.2. |
||||
|
||||
For more information on this file, see |
||||
https://docs.djangoproject.com/en/5.0/topics/settings/ |
||||
|
||||
For the full list of settings and their values, see |
||||
https://docs.djangoproject.com/en/5.0/ref/settings/ |
||||
""" |
||||
|
||||
from pathlib import Path |
||||
|
||||
# Build paths inside the project like this: BASE_DIR / 'subdir'. |
||||
BASE_DIR = Path(__file__).resolve().parent.parent |
||||
|
||||
|
||||
# Quick-start development settings - unsuitable for production |
||||
# See https://docs.djangoproject.com/en/5.0/howto/deployment/checklist/ |
||||
|
||||
# SECURITY WARNING: keep the secret key used in production secret! |
||||
SECRET_KEY = 'django-insecure-89j!e^jyf!ak#t!2oxzwbk^%fmhljxi%w*epobnrz^k-*&+!wr' |
||||
|
||||
# SECURITY WARNING: don't run with debug turned on in production! |
||||
DEBUG = True |
||||
|
||||
ALLOWED_HOSTS = [] |
||||
|
||||
|
||||
# Application definition |
||||
|
||||
INSTALLED_APPS = [ |
||||
'django.contrib.admin', |
||||
'django.contrib.auth', |
||||
'django.contrib.contenttypes', |
||||
'django.contrib.sessions', |
||||
'django.contrib.messages', |
||||
'django.contrib.staticfiles', |
||||
'Recipe', |
||||
] |
||||
|
||||
MIDDLEWARE = [ |
||||
'django.middleware.security.SecurityMiddleware', |
||||
'django.contrib.sessions.middleware.SessionMiddleware', |
||||
'django.middleware.common.CommonMiddleware', |
||||
'django.middleware.csrf.CsrfViewMiddleware', |
||||
'django.contrib.auth.middleware.AuthenticationMiddleware', |
||||
'django.contrib.messages.middleware.MessageMiddleware', |
||||
'django.middleware.clickjacking.XFrameOptionsMiddleware', |
||||
] |
||||
|
||||
ROOT_URLCONF = 'RecipeCrawler.urls' |
||||
|
||||
TEMPLATES = [ |
||||
{ |
||||
'BACKEND': 'django.template.backends.django.DjangoTemplates', |
||||
'DIRS': [BASE_DIR / 'templates'] |
||||
, |
||||
'APP_DIRS': True, |
||||
'OPTIONS': { |
||||
'context_processors': [ |
||||
'django.template.context_processors.debug', |
||||
'django.template.context_processors.request', |
||||
'django.contrib.auth.context_processors.auth', |
||||
'django.contrib.messages.context_processors.messages', |
||||
], |
||||
}, |
||||
}, |
||||
] |
||||
|
||||
WSGI_APPLICATION = 'RecipeCrawler.wsgi.application' |
||||
|
||||
|
||||
# Database |
||||
# https://docs.djangoproject.com/en/5.0/ref/settings/#databases |
||||
|
||||
DATABASES = { |
||||
'default': { |
||||
'ENGINE': 'django.db.backends.sqlite3', |
||||
'NAME': BASE_DIR / 'recipe_db.sqlite3', |
||||
} |
||||
} |
||||
|
||||
|
||||
# Password validation |
||||
# https://docs.djangoproject.com/en/5.0/ref/settings/#auth-password-validators |
||||
|
||||
AUTH_PASSWORD_VALIDATORS = [ |
||||
{ |
||||
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', |
||||
}, |
||||
{ |
||||
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', |
||||
}, |
||||
{ |
||||
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', |
||||
}, |
||||
{ |
||||
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', |
||||
}, |
||||
] |
||||
|
||||
|
||||
# Internationalization |
||||
# https://docs.djangoproject.com/en/5.0/topics/i18n/ |
||||
|
||||
LANGUAGE_CODE = 'en-us' |
||||
|
||||
TIME_ZONE = 'UTC' |
||||
|
||||
USE_I18N = True |
||||
|
||||
USE_TZ = True |
||||
|
||||
|
||||
# Static files (CSS, JavaScript, Images) |
||||
# https://docs.djangoproject.com/en/5.0/howto/static-files/ |
||||
|
||||
STATIC_URL = 'static/' |
||||
|
||||
# Default primary key field type |
||||
# https://docs.djangoproject.com/en/5.0/ref/settings/#default-auto-field |
||||
|
||||
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' |
@ -0,0 +1,24 @@
@@ -0,0 +1,24 @@
|
||||
""" |
||||
URL configuration for RecipeCrawler project. |
||||
|
||||
The `urlpatterns` list routes URLs to views. For more information please see: |
||||
https://docs.djangoproject.com/en/5.0/topics/http/urls/ |
||||
Examples: |
||||
Function views |
||||
1. Add an import: from my_app import views |
||||
2. Add a URL to urlpatterns: path('', views.home, name='home') |
||||
Class-based views |
||||
1. Add an import: from other_app.views import Home |
||||
2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') |
||||
Including another URLconf |
||||
1. Import the include() function: from django.urls import include, path |
||||
2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) |
||||
""" |
||||
from django.contrib import admin |
||||
from django.urls import path |
||||
from Recipe import views as recipe_views |
||||
|
||||
urlpatterns = [ |
||||
path('start/', recipe_views.RecipeCrawlerView.as_view()), |
||||
path('admin/', admin.site.urls), |
||||
] |
@ -0,0 +1,16 @@
@@ -0,0 +1,16 @@
|
||||
""" |
||||
WSGI config for RecipeCrawler project. |
||||
|
||||
It exposes the WSGI callable as a module-level variable named ``application``. |
||||
|
||||
For more information on this file, see |
||||
https://docs.djangoproject.com/en/5.0/howto/deployment/wsgi/ |
||||
""" |
||||
|
||||
import os |
||||
|
||||
from django.core.wsgi import get_wsgi_application |
||||
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'RecipeCrawler.settings') |
||||
|
||||
application = get_wsgi_application() |
@ -0,0 +1,23 @@
@@ -0,0 +1,23 @@
|
||||
#!/usr/bin/env python |
||||
"""Django's command-line utility for administrative tasks.""" |
||||
import os |
||||
import sys |
||||
import django |
||||
|
||||
|
||||
def main(): |
||||
"""Run administrative tasks.""" |
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'RecipeCrawler.settings') |
||||
try: |
||||
from django.core.management import execute_from_command_line |
||||
except ImportError as exc: |
||||
raise ImportError( |
||||
"Couldn't import Django. Are you sure it's installed and " |
||||
"available on your PYTHONPATH environment variable? Did you " |
||||
"forget to activate a virtual environment?" |
||||
) from exc |
||||
execute_from_command_line(sys.argv) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
main() |
@ -0,0 +1,24 @@
@@ -0,0 +1,24 @@
|
||||
from playwright.sync_api import Playwright, sync_playwright, expect |
||||
|
||||
|
||||
def run(playwright: Playwright) -> None: |
||||
browser = playwright.chromium.launch(headless=False) |
||||
context = browser.new_context() |
||||
page = context.new_page() |
||||
page.goto("https://oishi-kenko.com/recipes") |
||||
page.get_by_text("ねぎたっぷり 塩牛丼").click() |
||||
page.get_by_role("heading", name="ねぎたっぷり 塩牛丼").click() |
||||
page.get_by_role("heading", name="ねぎたっぷり 塩牛丼").click(button="right") |
||||
expect(page.get_by_role("article")).to_contain_text("ねぎたっぷり 塩牛丼") |
||||
page.get_by_role("article").locator("li").filter(has_text="糖尿病").locator("div").click() |
||||
expect(page.get_by_role("article")).to_contain_text("糖尿病") |
||||
page.get_by_role("link", name="高血圧", exact=True).click() |
||||
page.close() |
||||
|
||||
# --------------------- |
||||
context.close() |
||||
browser.close() |
||||
|
||||
|
||||
with sync_playwright() as playwright: |
||||
run(playwright) |
Loading…
Reference in new issue