赞
踩
爬虫系列更新-第二篇文章——《Python爬虫系列-有道批量翻译英文单词-注音标版》
之前发布计算机英文单词时研究了下,怎么把一个含有大量英文单词的txt文件翻译成如下格式:
如上图,左边图片是需要翻译的txt文本,右边图片是翻译后的txt文本。
运行的实际界面效果。
python代码参考了CSDN上的这个作者的帖子,他的分析博文很牛,但是没有批量翻译功能,所以我在他的代码的基础上添加了翻译中文、写入国际音标的功能,全部代码如下:
- import hashlib
- import base64
- import requests
- import json
- import time
-
- from urllib.parse import urlencode
- from Crypto.Cipher import AES
- from Crypto.Util.Padding import unpad, pad
-
-
- class AESCipher(object):
- key = b'ydsecret://query/key/B*RGygVywfNBwpmBaZg*WT7SIOUP2T0C9WHMZN39j^DAdaZhAnxvGcCY6VYFwnHl'
- iv = b'ydsecret://query/iv/C@lZe2YzHtZ2CYgaXKSVfsb7Y4QWHjITPPZ0nQp87fBeJ!Iv6v^6fvi2WN@bYpJ4'
- iv = hashlib.md5(iv).digest()
- key = hashlib.md5(key).digest()
-
- @staticmethod
- def decrypt(data):
- # AES解密
- cipher = AES.new(AESCipher.key, AES.MODE_CBC, iv=AESCipher.iv)
- decrypted = cipher.decrypt(base64.b64decode(data, b'-_'))
- unpadded_message = unpad(decrypted, AES.block_size).decode()
- return unpadded_message
-
- @staticmethod
- def encrypt(plaintext: str):
- # AES加密
- cipher = AES.new(AESCipher.key, AES.MODE_CBC, iv=AESCipher.iv)
- plaintext = plaintext.encode()
- padded_message = pad(plaintext, AES.block_size)
- encrypted = cipher.encrypt(padded_message)
- encrypted = base64.b64encode(encrypted, b'-_')
- return encrypted
-
-
- def get_form_data(sentence, from_lang, to_lang):
- """
- 构建表单参数
- :param :sentence:翻译内容
- :param from_lang:源语言
- :param to_lang:目标语言
- :return:
- """
- e = 'fsdsogkndfokasodnaso'
- d = 'fanyideskweb'
- u = 'webfanyi'
- m = 'client,mysticTime,product'
- p = '1.0.0'
- b = 'web'
- f = 'fanyi.web'
- t = time.time()
-
- query = {
- 'client': d,
- 'mysticTime': t,
- 'product': u,
- 'key': e
- }
-
- # 获取sign值 - -密钥值
- h = hashlib.md5(urlencode(query).encode('utf-8')).hexdigest()
-
- form_data = {
- 'i': sentence,
- 'from': from_lang,
- 'to': to_lang,
- 'domain': 0,
- 'dictResult': 'true',
- 'keyid': u,
- 'sign': h,
- 'client': d,
- 'product': u,
- 'appVersion': p,
- 'vendor': b,
- 'pointParam': m,
- 'mysticTime': t,
- 'keyfrom': f
- }
- return form_data
-
-
- def translate(sentence, from_lang='auto', to_lang=''):
- """
- :param sentence:需翻译的句子
- :param from_lang:源语言
- :param to_lang:目标语言
- :return:
- """
- # 有道翻译网页请求参数
- url = 'https://dict.youdao.com/webtranslate'
- headers = {
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
- 'referer': 'https://fanyi.youdao.com/',
- 'cookie': 'OUTFOX_SEARCH_USER_ID=-805044645@10.112.57.88; OUTFOX_SEARCH_USER_ID_NCOO=818822109.5585971;'
- }
- params = get_form_data(sentence, from_lang, to_lang)
-
- res = requests.post(url, headers=headers, data=params)
- # 翻译结果进行AES解密
- cipher = AESCipher
- ret = json.loads(cipher.decrypt(res.text))
- #ret1 = json.dumps(ret,indent=4,ensure_ascii=False,sort_keys=False,separators=(",",";"))
- try:
- out = "英:[" + ret["dictResult"]["ec"]["word"]["ukphone"] + "] " + "美:[" + ret["dictResult"]["ec"]["word"]["usphone"] + "]"
- trans = ret["dictResult"]["ec"]["word"]["trs"]
- tgt = ret['translateResult'][0][0]['tgt']
- out = out + " 译:[" + tgt + "]"
- #for tran in trans:
- #if 'pos' in tran:
- #out = out + " " + tran['pos'] + " " + tran['tran']
- #print(out)
- return out
- except Exception as e:
- print('翻译失败:', e)
- return 0
-
- if __name__ == '__main__':
- path = input("请输入你要翻译的txt文档路径(E:\1.txt): ")
- # result = translate(word)
- out = ""
- with open(path,'r') as f:
- lines = f.readlines()
- for line in lines:
- print(line.replace("\n","").replace("\r",""))
- result = translate(line)#'zh-CHS', 'ja')
- if result:
- out = out + line.replace("\n","").replace("\r","") + " " + result + "\n"
- #print(out)
- #print('翻译结果:\n', result)
- f.close()
- with open(path[:-4] + "_已翻译.txt",'w',encoding='utf-8') as fout:
- fout.write(out)
- fout.close()
- print(out)
- print("已完成!")
-
调用方法如下:
把代码保存到.py文件中,运行.py文件,输入需要翻译的txt文本路径地址,如下图所示:
然后翻译后的txt,也会出现在之前的文本文件目录里,如下图:
上图中"1.txt"就是输入的英文文档,"1_已翻译.txt"就是翻译后的文档。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。