import os
import re
import shutil
from bs4 import BeautifulSoup
def extract_data_from_file(filepath: str, success_folder: str):
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
file_content = f.read()
asin_value = "未找到 currentAsin"
asin_match = re.search(r'"currentAsin"\s*:\s*"([^"]+)"', file_content)
if asin_match:
asin_value = asin_match.group(1)
else:
opts_match = re.search(r'var opts\s*=\s*{[^}]*asin\s*:\s*"([^"]+)"', file_content)
if opts_match:
asin_value = opts_match.group(1)
soup = BeautifulSoup(file_content, "html.parser")
span_element = soup.select_one("h1#title span")
if span_element:
span_value = span_element.get_text(strip=True)
else:
span_value = "未找到 span 属性值"
file_name_without_extension = os.path.splitext(os.path.basename(filepath))[0]
span_value = span_value.replace("'", "\\'")
success_path = os.path.join(success_folder, os.path.basename(filepath))
shutil.move(filepath, success_path)
return file_name_without_extension, asin_value, span_value
def extract_data_from_folder(folder_path: str, success_folder: str):
results = []
if not os.path.exists(success_folder):
os.makedirs(success_folder)
for filename in os.listdir(folder_path):
filepath = os.path.join(folder_path, filename)
if os.path.isfile(filepath):
try:
result = extract_data_from_file(filepath, success_folder)
results.append(result)
except Exception as e:
print(f"无法解析文件: {filename}, 错误: {e}")
return results
def save_to_csv(results, output_folder, output_csv="result.csv"):
import csv
if not os.path.exists(output_folder):
os.makedirs(output_folder)
output_file_path = os.path.join(output_folder, output_csv)
with open(output_file_path, "w", newline="", encoding="utf-8-sig") as f:
writer = csv.writer(f)
writer.writerow(["文件名", "ASIN", "标题"])
writer.writerows(results)
print(f"已保存到: {output_file_path}")
if __name__ == "__main__":
folder_path = r"G:\bulkasin"
success_folder = r"G:\bulkasin\success"
output_folder = r"G:\bulkasin\output"
results = extract_data_from_folder(folder_path, success_folder)
save_to_csv(results, output_folder, "result.csv")
重要的事情说三遍!!!
这是网站预览,如果预览不正常,有可能需要外网才能访问,或网站已经不存在,或对方服务器拒绝预览访问。
这是网站预览,如果预览不正常,有可能需要外网才能访问,或网站已经不存在,或对方服务器拒绝预览访问。
这是网站预览,如果预览不正常,有可能需要外网才能访问,或网站已经不存在,或对方服务器拒绝预览访问。
(网址均来源于网络,如有侵权,请联系删除)
1. 链接直达将跳转网址: https://www.qaqlinks.com/sites/594.html
2. 网址收录时间:2026-02-10 09:31:47
3. 这是第“594”个收录网址。
如对此链接有任何疑问,可以添加以上微信,或此链接底部留言。看到会马上回复。
QaQlinks 跨境电商导航网站 声明:网站上的服务均为第三方提供,与 QAQlinks 网站无关。请用户注意甄别第三方服务质量,避免上当受骗。
(文章来源于网络,如有侵权,请联系删除)


使用XPath表达式提取需要的数据,将数据保存到桌面Excel文件