LDA主题模型Python实现_lda主题模型python代码

作者：Cpp五条 | 2024-04-04 13:15:02

踩

lda主题模型python代码

如果你有一个文本文件，那么以下这段代码可以帮助你实现LDA主题模型。


import jieba
 
# from nltk.corpus import stopwords
import pyLDAvis.gensim_models
import wordcloud
from gensim.models.coherencemodel import CoherenceModel
from gensim.models.ldamodel import LdaModel
from gensim.corpora.dictionary import Dictionary
 
text_h = ""
with open("temp.txt", "r", encoding="utf-8") as f:
    for ann in f.readlines():
        ann = ann.strip("\n")  # 去除文本中的换行符
        print(ann)
        text_h += ann
# text_h = word_tokenize(text_h)
text_h = jieba.cut(text_h, cut_all=True)
"""
for i in range(len(text_h)):
    text_h[i] = text_h[i].lower()
text_h = list(filter(lambda x: not str(x).isdigit(), text_h))
print(text_h)
"""
interpunctuations = [
    "，",
    "。",
    "：",
    "；",
    "？",
    "（",
    "）",
    "【",
    "】",
    "&",
    "！",
    "、",
    "*",
    "@",
    "#",
    "$",
    "%",
    ".",
    ",",
    ":",
    ":",
    ";",
    "!",
    '"',
    "“",
    "”",
    "[",
    "]",
    "‘",
    "’",
    "。”",
    "",
]  # 定义标点符号列表
text_h = [word for word in text_h if word not in interpunctuations]
# text_h = [word for word in text_h if word not in stopwords.words("english")]
 
# text_h = jieba.cut(text_h, cut_all=True)
 
s = " ".join(text_h)  # 连接成字符串
 
 
wc = wordcloud.WordCloud(
    font_path="msyh.ttc",
    width=1000,
    height=700,
    background_color="white",
    max_words=100,
    stopwords=s,
)
 
wc.generate(s)  # 加载词云文本
wc.to_file("XXXXX.png")  # 保存词云文件
 
 
# 构造词典
dictionary = Dictionary([text_h])
# 基于词典，使【词】→【稀疏向量】，并将向量放入列表，形成【稀疏向量集】
corpus = [dictionary.doc2bow(words) for words in [text_h]]
# lda模型，num_topics设置主题的个数
lda = LdaModel(
    corpus=corpus, id2word=dictionary, num_topics=2, random_state=123, iterations=50
)
# U_Mass Coherence
ldaCM = CoherenceModel(
    model=lda, corpus=corpus, dictionary=dictionary, coherence="u_mass"
)
 
# 打印所有主题，每个主题显示15个词
for topic in lda.print_topics(num_words=15):
    print(topic)
 
# 用pyLDAvis将LDA模式可视化
# plot = pyLDAvis.gensim_models.prepare(lda, corpus, dictionary)
# 保存到本地html
# pyLDAvis.save_html(plot, "./XXXXX.html")
"""
"""

运行过后，你可以得到一个html文件，如下所示。

大家可以根据自己不同的需求进行自定义修改，模型主体是不变的。

声明：本文内容由网友自发贡献，不代表【wpsshop博客】立场，版权归原作者所有，本站不承担相应法律责任。如您发现有侵权的内容，请联系我们。转载请注明出处：https://www.wpsshop.cn/w/Cpp五条/article/detail/358834