赞
踩
如果你有一个文本文件,那么以下这段代码可以帮助你实现LDA主题模型。
- import jieba
-
- # from nltk.corpus import stopwords
- import pyLDAvis.gensim_models
- import wordcloud
- from gensim.models.coherencemodel import CoherenceModel
- from gensim.models.ldamodel import LdaModel
- from gensim.corpora.dictionary import Dictionary
-
- text_h = ""
- with open("temp.txt", "r", encoding="utf-8") as f:
- for ann in f.readlines():
- ann = ann.strip("\n") # 去除文本中的换行符
- print(ann)
- text_h += ann
- # text_h = word_tokenize(text_h)
- text_h = jieba.cut(text_h, cut_all=True)
- """
- for i in range(len(text_h)):
- text_h[i] = text_h[i].lower()
- text_h = list(filter(lambda x: not str(x).isdigit(), text_h))
- print(text_h)
- """
- interpunctuations = [
- ",",
- "。",
- ":",
- ";",
- "?",
- "(",
- ")",
- "【",
- "】",
- "&",
- "!",
- "、",
- "*",
- "@",
- "#",
- "$",
- "%",
- ".",
- ",",
- ":",
- ":",
- ";",
- "!",
- '"',
- "“",
- "”",
- "[",
- "]",
- "‘",
- "’",
- "。”",
- "",
- ] # 定义标点符号列表
- text_h = [word for word in text_h if word not in interpunctuations]
- # text_h = [word for word in text_h if word not in stopwords.words("english")]
-
- # text_h = jieba.cut(text_h, cut_all=True)
-
- s = " ".join(text_h) # 连接成字符串
-
-
- wc = wordcloud.WordCloud(
- font_path="msyh.ttc",
- width=1000,
- height=700,
- background_color="white",
- max_words=100,
- stopwords=s,
- )
-
- wc.generate(s) # 加载词云文本
- wc.to_file("XXXXX.png") # 保存词云文件
-
-
- # 构造词典
- dictionary = Dictionary([text_h])
- # 基于词典,使【词】→【稀疏向量】,并将向量放入列表,形成【稀疏向量集】
- corpus = [dictionary.doc2bow(words) for words in [text_h]]
- # lda模型,num_topics设置主题的个数
- lda = LdaModel(
- corpus=corpus, id2word=dictionary, num_topics=2, random_state=123, iterations=50
- )
- # U_Mass Coherence
- ldaCM = CoherenceModel(
- model=lda, corpus=corpus, dictionary=dictionary, coherence="u_mass"
- )
-
- # 打印所有主题,每个主题显示15个词
- for topic in lda.print_topics(num_words=15):
- print(topic)
-
- # 用pyLDAvis将LDA模式可视化
- # plot = pyLDAvis.gensim_models.prepare(lda, corpus, dictionary)
- # 保存到本地html
- # pyLDAvis.save_html(plot, "./XXXXX.html")
- """
- """

运行过后,你可以得到一个html文件,如下所示。
大家可以根据自己不同的需求进行自定义修改,模型主体是不变的。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。