赞
踩
from transformers import AutoTokenizer, AutoModel tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="") model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda() # tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True) # model = AutoModel.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda() # model = AutoModel.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True) # kernel_file = "./models/chatglm-6b-int4/quantization_kernels.so" # model = model.quantize(bits=4, kernel_file=kernel_file) model = model.quantize(bits=4) model = model.eval() def parse_text(text): lines = text.split("\n") lines = [line for line in lines if line != ""] count = 0 for i, line in enumerate(lines): if "```" in line: count += 1 items = line.split('`') if count % 2 == 1: lines[i] = f'<pre><code class="language-{items[-1]}">' else: lines[i] = f'<br></code></pre>' else: if i > 0: if count % 2 == 1: line = line.replace("`", "\`") line = line.replace("<", "<") line = line.replace(">", ">") line = line.replace(" ", " ") line = line.replace("*", "*") line = line.replace("_", "_") line = line.replace("-", "-") line = line.replace(".", ".") line = line.replace("!", "!") line = line.replace("(", "(") line = line.replace(")", ")") line = line.replace("$", "$") lines[i] = "<br>"+line text = "".join(lines) return text def predict(input, chatbot, max_length, top_p, temperature, history): chatbot.append((parse_text(input), "")) for response, history in model.stream_chat(tokenizer, input, history, max_length=max_length, top_p=top_p, temperature=temperature): chatbot[-1] = (parse_text(input), parse_text(response)) yield chatbot, history def text2ver_search(file_name,search_text,limit=1): #使用text2ver进行单个文件的语义搜索 from docarray import Document, DocumentArray from text2vec import SentenceModel, EncoderType from tqdm import tqdm with open(file_name, encoding='utf-8') as f: txt = f.read() document = Document(text=txt) document_array = DocumentArray( Document(text=s.strip()) for s in document.text.split('\n') if s.strip()) # 按照换行进行分割字符串 model = SentenceModel("shibing624/text2vec-base-chinese", encoder_type=EncoderType.FIRST_LAST_AVG, device='cpu') feature_vec = model.encode for document in tqdm(document_array): document.embedding = feature_vec(document.text) text = Document(text=search_text) # 要匹配的文本 text.embedding = feature_vec(text.text) querys = text.match(document_array, limit=limit, exclude_self=True, metric='cos', use_scipy=True) # 找到与输入的文本最相似的句子 querys_text = querys.matches[:, ('text')] querys_list = [] for query_text in querys_text: temp = (search_text, query_text) querys_list.append(temp) return querys_list file_name ='./data/test.txt' search_text = '安心的老婆是谁?' querys_list = text2ver_search(file_name,search_text,1) print("querys_list:", querys_list) response_new = '' history = querys_list for chatbot, history in predict(search_text, chatbot=[], max_length=10000, top_p=0.5, temperature=0.5, history=history): response_old = response_new response_new = chatbot[0][1] new_single = response_new.replace(response_old, '') print(new_single,end='')
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。