third commit
This commit is contained in:
103
Recipe/views.py
103
Recipe/views.py
@@ -22,22 +22,22 @@ def sync_main():
|
|||||||
|
|
||||||
async def save_to_db(dDish_data):
|
async def save_to_db(dDish_data):
|
||||||
# 将异步ORM操作转换为同步,以适应Django ORM
|
# 将异步ORM操作转换为同步,以适应Django ORM
|
||||||
dish, created = await sync_to_async(Dish.objects.update_or_create)(
|
oDish, bCreated = await sync_to_async(Dish.objects.update_or_create)(
|
||||||
name= dDish_data['name'],
|
name= dDish_data['name'],
|
||||||
defaults= dDish_data
|
defaults= dDish_data
|
||||||
)
|
)
|
||||||
sAction = "added" if created else "updated"
|
sAction = "added" if bCreated else "updated"
|
||||||
print(f"Dish '{dish.name}' was {sAction}.")
|
print(f"Dish '{oDish.name}' was {sAction}.")
|
||||||
|
|
||||||
|
|
||||||
# 將圖片轉為base64
|
# 將圖片轉為base64
|
||||||
async def fetch_image_as_base64(page, image_url):
|
async def fetch_image_as_base64(oPage, sImage_url):
|
||||||
print("Fetching image from URL:", image_url)
|
print("Fetching image from URL:", sImage_url)
|
||||||
try:
|
try:
|
||||||
response = await page.request.get(image_url)
|
oResponse = await oPage.request.get(sImage_url)
|
||||||
if response.ok:
|
if oResponse.ok:
|
||||||
image_data = await response.body()
|
bytImage_data = await oResponse.body()
|
||||||
sImage_base64 = base64.b64encode(image_data).decode()
|
sImage_base64 = base64.b64encode(bytImage_data).decode()
|
||||||
return sImage_base64
|
return sImage_base64
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error fetching image: {e}")
|
print(f"Error fetching image: {e}")
|
||||||
@@ -45,27 +45,27 @@ async def fetch_image_as_base64(page, image_url):
|
|||||||
|
|
||||||
|
|
||||||
async def run(playwright: Playwright):
|
async def run(playwright: Playwright):
|
||||||
browser = await playwright.chromium.launch(headless=False)
|
oBrowser = await playwright.chromium.launch(headless=False)
|
||||||
context = await browser.new_context()
|
oContext = await oBrowser.new_context()
|
||||||
page = await context.new_page()
|
oPage = await oContext.new_page()
|
||||||
await page.goto("https://oishi-kenko.com/recipes")
|
await oPage.goto("https://oishi-kenko.com/recipes")
|
||||||
await page.get_by_role("link", name="ログイン").click()
|
await oPage.get_by_role("link", name="ログイン").click()
|
||||||
await page.get_by_role("link", name="メールアドレス でログイン").click()
|
await oPage.get_by_role("link", name="メールアドレス でログイン").click()
|
||||||
await page.locator("#secure_account_credential_email").click()
|
await oPage.locator("#secure_account_credential_email").click()
|
||||||
await page.locator("#secure_account_credential_email").fill("asd851117005545@gmail.com")
|
await oPage.locator("#secure_account_credential_email").fill("asd851117005545@gmail.com")
|
||||||
await page.locator("#secure_account_credential_email").press("Tab")
|
await oPage.locator("#secure_account_credential_email").press("Tab")
|
||||||
await page.locator("#secure_account_credential_password").fill("a22897051")
|
await oPage.locator("#secure_account_credential_password").fill("a22897051")
|
||||||
await page.get_by_role("button", name="ログイン").click()
|
await oPage.get_by_role("button", name="ログイン").click()
|
||||||
await page.goto("https://oishi-kenko.com/recipes")
|
await oPage.goto("https://oishi-kenko.com/recipes")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
# 訂位到所有匹配連結
|
# 訂位到所有匹配連結
|
||||||
links = page.locator('a.p-recipe-list-item__title-link')
|
oLinks = oPage.locator('a.p-recipe-list-item__title-link')
|
||||||
|
|
||||||
# 獲取連結數量
|
# 獲取連結數量
|
||||||
iLink_count = await links.count()
|
iLink_count = await oLinks.count()
|
||||||
|
|
||||||
# 點擊每個連結
|
# 點擊每個連結
|
||||||
for i in range(iLink_count):
|
for i in range(iLink_count):
|
||||||
@@ -74,47 +74,47 @@ async def run(playwright: Playwright):
|
|||||||
|
|
||||||
# 使用 nth(i) 定位第 i 個元素,並點擊
|
# 使用 nth(i) 定位第 i 個元素,並點擊
|
||||||
try:
|
try:
|
||||||
await page.locator('a.p-recipe-list-item__title-link').nth(i).click()
|
await oPage.locator('a.p-recipe-list-item__title-link').nth(i).click()
|
||||||
|
|
||||||
# 等待頁面
|
# 等待頁面
|
||||||
await page.wait_for_load_state('networkidle')
|
await oPage.wait_for_load_state('networkidle')
|
||||||
# await asyncio.sleep(5)
|
# await asyncio.sleep(5)
|
||||||
|
|
||||||
print("------菜名-----")
|
print("------菜名-----")
|
||||||
# 輸出名稱
|
# 輸出名稱
|
||||||
sDishname = await page.text_content('.p-recipe-detail__title')
|
sDishname = await oPage.text_content('.p-recipe-detail__title')
|
||||||
print(sDishname)
|
print(sDishname)
|
||||||
sDishname_clean = sDishname.strip().replace('\n', '')
|
sDishname_clean = sDishname.strip().replace('\n', '')
|
||||||
|
|
||||||
# 菜名圖片
|
# 菜名圖片
|
||||||
image_element = page.locator('.p-recipe-detail__photo-image--pc-only')
|
oImage_element = oPage.locator('.p-recipe-detail__photo-image--pc-only')
|
||||||
|
|
||||||
# 从元素的 'src' 属性中获取图片的 URL
|
# 从元素的 'src' 属性中获取图片的 URL
|
||||||
sImage_url = await image_element.get_attribute('src')
|
sImage_url = await oImage_element.get_attribute('src')
|
||||||
|
|
||||||
# 确保获取到的 URL 不为空
|
# 确保获取到的 URL 不为空
|
||||||
if sImage_url:
|
if sImage_url:
|
||||||
# 获取图片的 Base64 编码
|
# 获取图片的 Base64 编码
|
||||||
sDish_image_base64 = await fetch_image_as_base64(page, sImage_url)
|
sDish_image_base64 = await fetch_image_as_base64(oPage, sImage_url)
|
||||||
else:
|
else:
|
||||||
sDish_image_base64 = None
|
sDish_image_base64 = None
|
||||||
|
|
||||||
# print("------按讚數-----")
|
# print("------按讚數-----")
|
||||||
# 輸出按讚數
|
# 輸出按讚數
|
||||||
sLike_count = await page.text_content('.c-button-circle__top-text')
|
sLike_count = await oPage.text_content('.c-button-circle__top-text')
|
||||||
# print(Likes_count)
|
# print(Likes_count)
|
||||||
sLikes_count_clean = sLike_count.strip().replace('\n', '')
|
sLikes_count_clean = sLike_count.strip().replace('\n', '')
|
||||||
|
|
||||||
# print("------標籤-----")
|
# print("------標籤-----")
|
||||||
# 輸出標籤
|
# 輸出標籤
|
||||||
lTags = await page.locator('.c-button-round-tag__link').all_text_contents()
|
lTags = await oPage.locator('.c-button-round-tag__link').all_text_contents()
|
||||||
# for tag in tags:
|
# for tag in tags:
|
||||||
# print(tag)
|
# print(tag)
|
||||||
lTags_clean = [sTag.strip() for sTag in lTags]
|
lTags_clean = [sTag.strip() for sTag in lTags]
|
||||||
|
|
||||||
# print("------適應症-----")
|
# print("------適應症-----")
|
||||||
# 輸出適應症
|
# 輸出適應症
|
||||||
lIndications = await page.locator('.c-recipes-relevant-dietary-concerns__text').all_text_contents()
|
lIndications = await oPage.locator('.c-recipes-relevant-dietary-concerns__text').all_text_contents()
|
||||||
# for Indication in Indications:
|
# for Indication in Indications:
|
||||||
# print(Indication)
|
# print(Indication)
|
||||||
lIndications_clean = [sIndication.strip() for sIndication in lIndications]
|
lIndications_clean = [sIndication.strip() for sIndication in lIndications]
|
||||||
@@ -154,7 +154,7 @@ async def run(playwright: Playwright):
|
|||||||
sMonounsaturated_fatty_acid = ''
|
sMonounsaturated_fatty_acid = ''
|
||||||
sPolyunsaturated_fatty_acid = ''
|
sPolyunsaturated_fatty_acid = ''
|
||||||
|
|
||||||
lNutritions1 = await page.locator('.c-nutrition-table__cell--1').all_text_contents()
|
lNutritions1 = await oPage.locator('.c-nutrition-table__cell--1').all_text_contents()
|
||||||
# for nutrition in nutritions1:
|
# for nutrition in nutritions1:
|
||||||
# print(nutrition)
|
# print(nutrition)
|
||||||
# nutritions_clean = [nutrition.strip().replace('\n', '') for nutrition in nutritions]
|
# nutritions_clean = [nutrition.strip().replace('\n', '') for nutrition in nutritions]
|
||||||
@@ -180,7 +180,7 @@ async def run(playwright: Playwright):
|
|||||||
if 'カリウム' in sNutrition1:
|
if 'カリウム' in sNutrition1:
|
||||||
sPotassium = sNutrition1.split('カリウム')[1].strip().replace('\n', '')
|
sPotassium = sNutrition1.split('カリウム')[1].strip().replace('\n', '')
|
||||||
|
|
||||||
lNutritions2 = await page.locator('.c-nutrition-table__cell--2').all_text_contents()
|
lNutritions2 = await oPage.locator('.c-nutrition-table__cell--2').all_text_contents()
|
||||||
|
|
||||||
for sNutrition2 in lNutritions2:
|
for sNutrition2 in lNutritions2:
|
||||||
if 'カルシウム' in sNutrition2:
|
if 'カルシウム' in sNutrition2:
|
||||||
@@ -204,7 +204,7 @@ async def run(playwright: Playwright):
|
|||||||
if 'ビタミンC' in sNutrition2:
|
if 'ビタミンC' in sNutrition2:
|
||||||
sVitamin_C = sNutrition2.split('ビタミンC')[1].strip().replace('\n', '')
|
sVitamin_C = sNutrition2.split('ビタミンC')[1].strip().replace('\n', '')
|
||||||
|
|
||||||
lNutritions3 = await page.locator('.c-nutrition-table__cell--3').all_text_contents()
|
lNutritions3 = await oPage.locator('.c-nutrition-table__cell--3').all_text_contents()
|
||||||
|
|
||||||
for sNutrition3 in lNutritions3:
|
for sNutrition3 in lNutritions3:
|
||||||
if 'ビタミンB6' in sNutrition3:
|
if 'ビタミンB6' in sNutrition3:
|
||||||
@@ -229,7 +229,7 @@ async def run(playwright: Playwright):
|
|||||||
sPolyunsaturated_fatty_acid = sNutrition3.split('多価不飽和脂肪酸')[1].strip().replace('\n', '')
|
sPolyunsaturated_fatty_acid = sNutrition3.split('多価不飽和脂肪酸')[1].strip().replace('\n', '')
|
||||||
|
|
||||||
# 食料
|
# 食料
|
||||||
lIngredients = await page.locator('.p-recipe-ingredient-list__item').all_text_contents()
|
lIngredients = await oPage.locator('.p-recipe-ingredient-list__item').all_text_contents()
|
||||||
# for Ingredient in Ingredients:
|
# for Ingredient in Ingredients:
|
||||||
# print(Ingredient)
|
# print(Ingredient)
|
||||||
lIngredients_clean = [sIngredient.strip().replace('\n', '') for sIngredient in lIngredients]
|
lIngredients_clean = [sIngredient.strip().replace('\n', '') for sIngredient in lIngredients]
|
||||||
@@ -237,13 +237,13 @@ async def run(playwright: Playwright):
|
|||||||
print('------作法步驟-----')
|
print('------作法步驟-----')
|
||||||
|
|
||||||
# 作法
|
# 作法
|
||||||
lSteps = await page.locator('.p-recipe-step__item').all_text_contents()
|
lSteps = await oPage.locator('.p-recipe-step__item').all_text_contents()
|
||||||
# for Step in Steps:
|
# for Step in Steps:
|
||||||
# print(Step)
|
# print(Step)
|
||||||
lSteps_clean = [sStep.strip().replace('\n', '') for sStep in lSteps]
|
lSteps_clean = [sStep.strip().replace('\n', '') for sStep in lSteps]
|
||||||
|
|
||||||
# 定位到所有步骤的图片元素
|
# 定位到所有步骤的图片元素
|
||||||
Image_elements = page.locator('.p-recipe-step__item-image')
|
Image_elements = oPage.locator('.p-recipe-step__item-image')
|
||||||
|
|
||||||
# 获取所有图片元素的 src 属性(即图片的 URL)
|
# 获取所有图片元素的 src 属性(即图片的 URL)
|
||||||
lImage_urls = await Image_elements.evaluate_all("elements => elements.map(e => e.getAttribute('src'))")
|
lImage_urls = await Image_elements.evaluate_all("elements => elements.map(e => e.getAttribute('src'))")
|
||||||
@@ -252,7 +252,7 @@ async def run(playwright: Playwright):
|
|||||||
lStep_images_base64 = []
|
lStep_images_base64 = []
|
||||||
for sImage_url in lImage_urls:
|
for sImage_url in lImage_urls:
|
||||||
# 直接使用图片的 URL 下载图片并转换为 Base64
|
# 直接使用图片的 URL 下载图片并转换为 Base64
|
||||||
sImage_base64 = await fetch_image_as_base64(page, sImage_url)
|
sImage_base64 = await fetch_image_as_base64(oPage, sImage_url)
|
||||||
if sImage_base64:
|
if sImage_base64:
|
||||||
lStep_images_base64.append(sImage_base64)
|
lStep_images_base64.append(sImage_base64)
|
||||||
|
|
||||||
@@ -302,23 +302,28 @@ async def run(playwright: Playwright):
|
|||||||
break
|
break
|
||||||
except Exception as e: # 捕获可能发生的异常
|
except Exception as e: # 捕获可能发生的异常
|
||||||
print(f"遇到错误:{e},尝试返回并重试")
|
print(f"遇到错误:{e},尝试返回并重试")
|
||||||
await page.go_back() # 返回前一页
|
await oPage.go_back() # 返回前一页
|
||||||
iRetry_count += 1 # 重试计数器加1
|
iRetry_count += 1 # 重试计数器加1
|
||||||
if iRetry_count >= 3:
|
if iRetry_count >= 3:
|
||||||
print("重试次数超限,跳过当前链接")
|
print("重试次数超限,跳过当前链接")
|
||||||
break # 跳出循环,处理下一个链接
|
break # 跳出循环,处理下一个链接
|
||||||
# 使用浏览器的后退功能返回列表页,这样不需要重新加载初始URL
|
# 使用浏览器的后退功能返回列表页,这样不需要重新加载初始URL
|
||||||
await page.go_back()
|
await oPage.go_back()
|
||||||
|
bNext_page_button_exists = await oPage.is_visible('span.next a[rel="next"]')
|
||||||
try:
|
if bNext_page_button_exists:
|
||||||
await page.click('span.next a[rel="next"]')
|
try:
|
||||||
await page.wait_for_load_state('networkidle')
|
await oPage.click('span.next a[rel="next"]')
|
||||||
except Error:
|
await oPage.wait_for_load_state('networkidle')
|
||||||
# 如果“下一頁”不存在,break
|
except Exception as e:
|
||||||
|
print(f"訪問時錯誤:{e},嘗試重新加載")
|
||||||
|
# 如果“下一頁”不存在,break
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
print("已達最後一頁")
|
||||||
break
|
break
|
||||||
# ---------------------
|
# ---------------------
|
||||||
await context.close()
|
await oContext.close()
|
||||||
await browser.close()
|
await oBrowser.close()
|
||||||
|
|
||||||
|
|
||||||
async def main() -> None:
|
async def main() -> None:
|
||||||
|
|||||||
Reference in New Issue
Block a user