proginn21304097622022年07月28日
99阅读

作品详情

import time
import re
import csv
import jieba
import wordcloud
# import faker
from faker import Faker
fx = Faker()
from selenium import webdriver


drive = webdriver.Chrome()
drive.get('https://item.jd.com/100026667910.html')
drive.execute_script('window.scrollTo(0,document.body.scrollHeight)')
time.sleep(2)
drive.maximize_window()
time.sleep(2)
drive.find_element_by_xpath('//div[@class="ETab"]/div/ul/li[5]').click()
time.sleep(2)
tm = 2
li = []
com = 0
for t in range(1000):
drive.execute_script('window.scrollTo(0,document.body.scrollHeight)')
time.sleep(4)
drive.execute_script('window.scrollTo(0,document.body.scrollHeight)')
aa = drive.find_element_by_xpath('./html')
td = aa.size
fd = td['height'] - 2426
drive.execute_script(f'window.scrollTo(0,{fd})') #精准打击
# drive.execute_script('window.scrollTo(0,-1420)')
time.sleep(2)
tm1 = drive.find_elements_by_xpath('//div[@id="comment-0"]/div')[:10]
# print(len(tm1))

for i in tm1:
worth = {}
what = i.find_element_by_xpath('.//div[2]/p').text
if len(re.findall('\n', what)) == 0:
worth['京东评论'] = what
else:
why = what.replace('\n', '')
worth['京东评论'] = why
com += len(worth)
print(f'已爬取{com}条数据')
li.append(worth)
print(f'第{t+1}页爬完')
if com != 1000:
if tm < 6:
tm += 1
drive.find_element_by_xpath(f'//div[@class="ui-page"]/a[{tm}]').click()
time.sleep(2)
else:
drive.find_element_by_xpath(f'//div[@class="ui-page"]/a[{tm}]').click()
time.sleep(2)
else:
break

with open('jd.csv', 'w', encoding='utf-8', newline='')as f: #将评论的数据保存到jd.csv中
write = csv.DictWriter(f, fieldnames=['京东评论'])
write.writeheader()
write.writerows(li)

# 读取文本,就是刚刚保存的文本。然后做一个词云图
with open("jd.csv", encoding="utf-8") as f:
s = f.read()
# print(s)
ls = jieba.lcut(s) # 生成分词列表
text = ' '.join(ls) # 连接成字符串
print(text)
# stopwords = ["& hellip", "n", "&%", 'vcontent'] # 去掉不需要显示的词
wc = wordcloud.WordCloud(font_path="msyh.ttc",
width=1000,
height=700,
background_color='white',
max_words=100, stopwords=s)
# msyh.ttc电脑本地字体,写可以写成绝对路径
wc.generate(text) # 加载词云文本
wc.to_file("京东好评爬取.png") # 保存词云文件
声明:本文仅代表作者观点,不代表本站立场。如果侵犯到您的合法权益,请联系我们删除侵权资源!如果遇到资源链接失效,请您通过评论或工单的方式通知管理员。未经允许,不得转载,本站所有资源文章禁止商业使用运营!
下载安装【程序员客栈】APP
实时对接需求、及时收发消息、丰富的开放项目需求、随时随地查看项目状态

评论