批量提取ASIN标题
Python实例
批量提取ASIN标题

批量提取ASIN标题



import os
import re
import shutil
from bs4 import BeautifulSoup
def extract_data_from_file(filepath: str, success_folder: str):
    with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
        file_content = f.read()
    asin_value = "未找到 currentAsin"
    asin_match = re.search(r'"currentAsin"\s*:\s*"([^"]+)"', file_content)
    if asin_match:
        asin_value = asin_match.group(1)
    else:
        opts_match = re.search(r'var opts\s*=\s*{[^}]*asin\s*:\s*"([^"]+)"', file_content)
        if opts_match:
            asin_value = opts_match.group(1)
    soup = BeautifulSoup(file_content, "html.parser")
    span_element = soup.select_one("h1#title span")
    if span_element:
        span_value = span_element.get_text(strip=True)
    else:
        span_value = "未找到 span 属性值"
    file_name_without_extension = os.path.splitext(os.path.basename(filepath))[0]
    span_value = span_value.replace("'", "\\'")
    success_path = os.path.join(success_folder, os.path.basename(filepath))
    shutil.move(filepath, success_path)
    return file_name_without_extension, asin_value, span_value
def extract_data_from_folder(folder_path: str, success_folder: str):
    results = []
    if not os.path.exists(success_folder):
        os.makedirs(success_folder)
    for filename in os.listdir(folder_path):    
        filepath = os.path.join(folder_path, filename)
        if os.path.isfile(filepath):
            try:
                result = extract_data_from_file(filepath, success_folder)
                results.append(result)
            except Exception as e:
                print(f"无法解析文件: {filename}, 错误: {e}")
    return results
def save_to_csv(results, output_folder, output_csv="result.csv"):
    import csv
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    output_file_path = os.path.join(output_folder, output_csv)
    with open(output_file_path, "w", newline="", encoding="utf-8-sig") as f:
        writer = csv.writer(f)
        writer.writerow(["文件名", "ASIN", "标题"])
        writer.writerows(results)
    print(f"已保存到: {output_file_path}")
if __name__ == "__main__":
    folder_path = r"G:\bulkasin"
    success_folder = r"G:\bulkasin\success"
    output_folder = r"G:\bulkasin\output"
    
    results = extract_data_from_folder(folder_path, success_folder)
    save_to_csv(results, output_folder, "result.csv")











重要的事情说三遍!!!
这是网站预览,如果预览不正常,有可能需要外网才能访问,或网站已经不存在,或对方服务器拒绝预览访问。
这是网站预览,如果预览不正常,有可能需要外网才能访问,或网站已经不存在,或对方服务器拒绝预览访问。
这是网站预览,如果预览不正常,有可能需要外网才能访问,或网站已经不存在,或对方服务器拒绝预览访问。

(网址均来源于网络,如有侵权,请联系删除)












1. 链接直达将跳转网址: https://www.qaqlinks.com/sites/594.html

2. 网址收录时间:2026-02-10 09:31:47

3. 这是第“594”个收录网址。

如对此链接有任何疑问,可以添加以上微信,或此链接底部留言。看到会马上回复。

QaQlinks 跨境电商导航网站 声明:网站上的服务均为第三方提供,与 QAQlinks 网站无关。请用户注意甄别第三方服务质量,避免上当受骗。

(文章来源于网络,如有侵权,请联系删除)




相关导航