From 172b67a716d4fb81128b22e6bba5b365973374a3 Mon Sep 17 00:00:00 2001 From: "stephen.yu" Date: Fri, 15 Mar 2024 10:32:44 +0800 Subject: [PATCH] third commit --- Recipe/views.py | 103 +++++++++++++++++++++++++----------------------- 1 file changed, 54 insertions(+), 49 deletions(-) diff --git a/Recipe/views.py b/Recipe/views.py index 5266294..b76b304 100644 --- a/Recipe/views.py +++ b/Recipe/views.py @@ -22,22 +22,22 @@ def sync_main(): async def save_to_db(dDish_data): # 将异步ORM操作转换为同步,以适应Django ORM - dish, created = await sync_to_async(Dish.objects.update_or_create)( + oDish, bCreated = await sync_to_async(Dish.objects.update_or_create)( name= dDish_data['name'], defaults= dDish_data ) - sAction = "added" if created else "updated" - print(f"Dish '{dish.name}' was {sAction}.") + sAction = "added" if bCreated else "updated" + print(f"Dish '{oDish.name}' was {sAction}.") # 將圖片轉為base64 -async def fetch_image_as_base64(page, image_url): - print("Fetching image from URL:", image_url) +async def fetch_image_as_base64(oPage, sImage_url): + print("Fetching image from URL:", sImage_url) try: - response = await page.request.get(image_url) - if response.ok: - image_data = await response.body() - sImage_base64 = base64.b64encode(image_data).decode() + oResponse = await oPage.request.get(sImage_url) + if oResponse.ok: + bytImage_data = await oResponse.body() + sImage_base64 = base64.b64encode(bytImage_data).decode() return sImage_base64 except Exception as e: print(f"Error fetching image: {e}") @@ -45,27 +45,27 @@ async def fetch_image_as_base64(page, image_url): async def run(playwright: Playwright): - browser = await playwright.chromium.launch(headless=False) - context = await browser.new_context() - page = await context.new_page() - await page.goto("https://oishi-kenko.com/recipes") - await page.get_by_role("link", name="ログイン").click() - await page.get_by_role("link", name="メールアドレス でログイン").click() - await page.locator("#secure_account_credential_email").click() - await page.locator("#secure_account_credential_email").fill("asd851117005545@gmail.com") - await page.locator("#secure_account_credential_email").press("Tab") - await page.locator("#secure_account_credential_password").fill("a22897051") - await page.get_by_role("button", name="ログイン").click() - await page.goto("https://oishi-kenko.com/recipes") + oBrowser = await playwright.chromium.launch(headless=False) + oContext = await oBrowser.new_context() + oPage = await oContext.new_page() + await oPage.goto("https://oishi-kenko.com/recipes") + await oPage.get_by_role("link", name="ログイン").click() + await oPage.get_by_role("link", name="メールアドレス でログイン").click() + await oPage.locator("#secure_account_credential_email").click() + await oPage.locator("#secure_account_credential_email").fill("asd851117005545@gmail.com") + await oPage.locator("#secure_account_credential_email").press("Tab") + await oPage.locator("#secure_account_credential_password").fill("a22897051") + await oPage.get_by_role("button", name="ログイン").click() + await oPage.goto("https://oishi-kenko.com/recipes") while True: # 訂位到所有匹配連結 - links = page.locator('a.p-recipe-list-item__title-link') + oLinks = oPage.locator('a.p-recipe-list-item__title-link') # 獲取連結數量 - iLink_count = await links.count() + iLink_count = await oLinks.count() # 點擊每個連結 for i in range(iLink_count): @@ -74,47 +74,47 @@ async def run(playwright: Playwright): # 使用 nth(i) 定位第 i 個元素,並點擊 try: - await page.locator('a.p-recipe-list-item__title-link').nth(i).click() + await oPage.locator('a.p-recipe-list-item__title-link').nth(i).click() # 等待頁面 - await page.wait_for_load_state('networkidle') + await oPage.wait_for_load_state('networkidle') # await asyncio.sleep(5) print("------菜名-----") # 輸出名稱 - sDishname = await page.text_content('.p-recipe-detail__title') + sDishname = await oPage.text_content('.p-recipe-detail__title') print(sDishname) sDishname_clean = sDishname.strip().replace('\n', '') # 菜名圖片 - image_element = page.locator('.p-recipe-detail__photo-image--pc-only') + oImage_element = oPage.locator('.p-recipe-detail__photo-image--pc-only') # 从元素的 'src' 属性中获取图片的 URL - sImage_url = await image_element.get_attribute('src') + sImage_url = await oImage_element.get_attribute('src') # 确保获取到的 URL 不为空 if sImage_url: # 获取图片的 Base64 编码 - sDish_image_base64 = await fetch_image_as_base64(page, sImage_url) + sDish_image_base64 = await fetch_image_as_base64(oPage, sImage_url) else: sDish_image_base64 = None # print("------按讚數-----") # 輸出按讚數 - sLike_count = await page.text_content('.c-button-circle__top-text') + sLike_count = await oPage.text_content('.c-button-circle__top-text') # print(Likes_count) sLikes_count_clean = sLike_count.strip().replace('\n', '') # print("------標籤-----") # 輸出標籤 - lTags = await page.locator('.c-button-round-tag__link').all_text_contents() + lTags = await oPage.locator('.c-button-round-tag__link').all_text_contents() # for tag in tags: # print(tag) lTags_clean = [sTag.strip() for sTag in lTags] # print("------適應症-----") # 輸出適應症 - lIndications = await page.locator('.c-recipes-relevant-dietary-concerns__text').all_text_contents() + lIndications = await oPage.locator('.c-recipes-relevant-dietary-concerns__text').all_text_contents() # for Indication in Indications: # print(Indication) lIndications_clean = [sIndication.strip() for sIndication in lIndications] @@ -154,7 +154,7 @@ async def run(playwright: Playwright): sMonounsaturated_fatty_acid = '' sPolyunsaturated_fatty_acid = '' - lNutritions1 = await page.locator('.c-nutrition-table__cell--1').all_text_contents() + lNutritions1 = await oPage.locator('.c-nutrition-table__cell--1').all_text_contents() # for nutrition in nutritions1: # print(nutrition) # nutritions_clean = [nutrition.strip().replace('\n', '') for nutrition in nutritions] @@ -180,7 +180,7 @@ async def run(playwright: Playwright): if 'カリウム' in sNutrition1: sPotassium = sNutrition1.split('カリウム')[1].strip().replace('\n', '') - lNutritions2 = await page.locator('.c-nutrition-table__cell--2').all_text_contents() + lNutritions2 = await oPage.locator('.c-nutrition-table__cell--2').all_text_contents() for sNutrition2 in lNutritions2: if 'カルシウム' in sNutrition2: @@ -204,7 +204,7 @@ async def run(playwright: Playwright): if 'ビタミンC' in sNutrition2: sVitamin_C = sNutrition2.split('ビタミンC')[1].strip().replace('\n', '') - lNutritions3 = await page.locator('.c-nutrition-table__cell--3').all_text_contents() + lNutritions3 = await oPage.locator('.c-nutrition-table__cell--3').all_text_contents() for sNutrition3 in lNutritions3: if 'ビタミンB6' in sNutrition3: @@ -229,7 +229,7 @@ async def run(playwright: Playwright): sPolyunsaturated_fatty_acid = sNutrition3.split('多価不飽和脂肪酸')[1].strip().replace('\n', '') # 食料 - lIngredients = await page.locator('.p-recipe-ingredient-list__item').all_text_contents() + lIngredients = await oPage.locator('.p-recipe-ingredient-list__item').all_text_contents() # for Ingredient in Ingredients: # print(Ingredient) lIngredients_clean = [sIngredient.strip().replace('\n', '') for sIngredient in lIngredients] @@ -237,13 +237,13 @@ async def run(playwright: Playwright): print('------作法步驟-----') # 作法 - lSteps = await page.locator('.p-recipe-step__item').all_text_contents() + lSteps = await oPage.locator('.p-recipe-step__item').all_text_contents() # for Step in Steps: # print(Step) lSteps_clean = [sStep.strip().replace('\n', '') for sStep in lSteps] # 定位到所有步骤的图片元素 - Image_elements = page.locator('.p-recipe-step__item-image') + Image_elements = oPage.locator('.p-recipe-step__item-image') # 获取所有图片元素的 src 属性(即图片的 URL) lImage_urls = await Image_elements.evaluate_all("elements => elements.map(e => e.getAttribute('src'))") @@ -252,7 +252,7 @@ async def run(playwright: Playwright): lStep_images_base64 = [] for sImage_url in lImage_urls: # 直接使用图片的 URL 下载图片并转换为 Base64 - sImage_base64 = await fetch_image_as_base64(page, sImage_url) + sImage_base64 = await fetch_image_as_base64(oPage, sImage_url) if sImage_base64: lStep_images_base64.append(sImage_base64) @@ -302,23 +302,28 @@ async def run(playwright: Playwright): break except Exception as e: # 捕获可能发生的异常 print(f"遇到错误:{e},尝试返回并重试") - await page.go_back() # 返回前一页 + await oPage.go_back() # 返回前一页 iRetry_count += 1 # 重试计数器加1 if iRetry_count >= 3: print("重试次数超限,跳过当前链接") break # 跳出循环,处理下一个链接 # 使用浏览器的后退功能返回列表页,这样不需要重新加载初始URL - await page.go_back() - - try: - await page.click('span.next a[rel="next"]') - await page.wait_for_load_state('networkidle') - except Error: - # 如果“下一頁”不存在,break + await oPage.go_back() + bNext_page_button_exists = await oPage.is_visible('span.next a[rel="next"]') + if bNext_page_button_exists: + try: + await oPage.click('span.next a[rel="next"]') + await oPage.wait_for_load_state('networkidle') + except Exception as e: + print(f"訪問時錯誤:{e},嘗試重新加載") + # 如果“下一頁”不存在,break + continue + else: + print("已達最後一頁") break # --------------------- - await context.close() - await browser.close() + await oContext.close() + await oBrowser.close() async def main() -> None: