当前位置:   article > 正文

周末没有怎么出门,终于解决了word不能另存为UTF8格式txt的问题_文本文档另存utf8没有用

文本文档另存utf8没有用

 

之前使用的ole的方法,doctotxt,在通过另存为的方式处理word的时候,保存的格式是ANSI的,就会导致韩文等字符变成?,丢失数据。

上网查询(用的百度),绞尽脑汁,黔驴技穷,山穷水尽,一筹莫展,崩溃了

 

上午的时候,实在没有办法了,考虑用c#去实现,然后在c里调用c#的程序(DLL)

 

今天下午继续看文档 http://msdn.microsoft.com/en-us/library/Aa155776#offaut_creautclicplus

有点灵感,对msword.cpp进行了修改

  1. VARIANT Selection::GetText(char* result)
  2. {
  3. //CString result;
  4. // LPSTR
  5. // wchar_t tmp[10240] = {0};
  6. // LPWSTR result1 = tmp;
  7. VARIANT tmp;
  8. InvokeHelper(0x0, DISPATCH_PROPERTYGET, VT_VARIANT, (void*)&tmp, NULL);
  9. return tmp;
  10. }

调试了下,发现可以显示韩文了,终于有了突破,这种方法可以解决问题了,这时是3,4点钟

 

继续看文档,搜wdFormatUnicodeText

接着,想到了用Google去搜,这个时候,有了重大突破,

还是Google好啊,一招制敌,泪流满面,相见恨晚。

 

唉,竟然忘记了:搜技术问题一定要用Google。教训啊,走了这么多的弯路。

 

Google上面有老外的提问和解答,找到了点线索

 http://stackoverflow.com/questions/11736327/save-every-page-of-a-word-document-into-a-txt-file-utf-8-using-vba


nDoc.SaveAs Encoding:=msoEncodingUTF8, FileFormat:=wdFormatUnicodeText,nDoc.SaveAs Encoding:=msoEncodingUTF8, FileFormat:=wdFormatUnicodeText,

知道了要用Encoding这个参数

又从http://msdn.microsoft.com/en-us/library/microsoft.office.tools.word.document.saveas(v=vs.80).aspx?cs-save-lang=1&cs-lang=csharp#code-snippet-1

看到了saveas的参数

public virtual void SaveAs ([OptionalAttribute] ref Object FileName,[OptionalAttribute] ref Object FileFormat,[OptionalAttribute] ref Object LockComments,[OptionalAttribute] ref Object Password,[OptionalAttribute] ref Object AddToRecentFiles,[OptionalAttribute] ref Object WritePassword,[OptionalAttribute] ref Object ReadOnlyRecommended,[OptionalAttribute] ref Object EmbedTrueTypeFonts,[OptionalAttribute] ref Object SaveNativePictureFormat,[OptionalAttribute] ref Object SaveFormsData,[OptionalAttribute] ref Object SaveAsAOCELetter,[OptionalAttribute] ref Object Encoding,[OptionalAttribute] ref Object InsertLineBreaks,[OptionalAttribute] ref Object AllowSubstitutions,[OptionalAttribute] ref Object LineEnding,[OptionalAttribute] ref Object AddBiDiMarks)

 

知道了再程序中需要再添加一个参数,看文档的时候知道了那边的参数数值要按反过来的顺序写,

再搜Const msoEncodingUTF8 = 65001 ,知道了这个值是65001

 

然后修改程序

  1. //modified by Sunday 2013-4-14
  2. VARIANT vOpt;
  3. vOpt.vt = VT_ERROR;
  4. vOpt.scode = DISP_E_PARAMNOTFOUND;
  5. //Prepare arguments for save as .txt
  6. VARIANT vArgsSaveAs[12];
  7. DISPPARAMS dpSaveAs;
  8. dpSaveAs.cArgs = 12;
  9. dpSaveAs.cNamedArgs = 0;
  10. dpSaveAs.rgvarg = vArgsSaveAs;
  11. vArgsSaveAs[11].vt = VT_BSTR;
  12. vArgsSaveAs[11].bstrVal = bstrSaveFile; //Filename
  13. vArgsSaveAs[10].vt = VT_I4;//VT_I4;
  14. vArgsSaveAs[10].lVal = 7;//wdFormatUnicodeText;// 7;//2; //FileFormat(wdFormatText = 2)
  15. vArgsSaveAs[9] = vFalse; //LockComments
  16. vArgsSaveAs[8].vt = VT_BSTR;
  17. vArgsSaveAs[8].bstrVal = m_bstrEmptyString; //Password
  18. vArgsSaveAs[7].vt = VT_BOOL;
  19. vArgsSaveAs[7].boolVal = TRUE; //AddToRecentFiles
  20. vArgsSaveAs[6].vt = VT_BSTR;
  21. vArgsSaveAs[6].bstrVal = m_bstrEmptyString; //WritePassword
  22. vArgsSaveAs[5] = vFalse; //ReadOnlyRecommended
  23. vArgsSaveAs[4] = vFalse; //EmbedTrueTypeFonts
  24. vArgsSaveAs[3] = vFalse; //SaveNativePictureFormat
  25. vArgsSaveAs[2] = vFalse; //SaveFormsData
  26. vArgsSaveAs[1] = vFalse; //SaveAsOCELetter
  27. vArgsSaveAs[0].vt = VT_I4;
  28. vArgsSaveAs[0].lVal = 65001;//UTF8


 

调试,激动人心的时刻

打开记事本,内容为

  1. Today is 2012-11-12
  2. 왜 한국 사람测1试
  3. A
  4. B
  5. C
  6. ffffffffffffffff顶顶顶顶顶顶顶顶kkkkkkkkkkk
  7. ffffffffffffffff顶顶顶顶顶顶顶顶kkkkkkkkkkk
  8. ffffff顶顶顶顶顶顶顶顶顶顶顶顶顶顶fffffffffffffffff
  9. gggggggggg4-13


终于解决了,

这是人类的一小步,是我的一大步

这时是5点多,打开窗户,伸出头去,吹吹风,春风,看看外面的世界,

这2天都没有怎么出门,极度烦闷,

Ag 5.18  Au 295 ,事已至此,不能惊慌

 

完整代码

  1. void DocToTxt(BSTR bstrOpenFile, BSTR bstrSaveFile)
  2. {
  3. // ******************* Declare Some Variables ********************
  4. // Variables that will be used and re-used in our calls
  5. DISPPARAMS m_dpNoArgs = {NULL, NULL, 0, 0};
  6. VARIANT m_vResult;
  7. OLECHAR FAR* m_szFunction;
  8. // IDispatch pointers for Word's objects
  9. IDispatch* m_pDispDocs; //Documents collection
  10. IDispatch* m_pDispActiveDoc; //ActiveDocument object
  11. // DISPID's
  12. DISPID m_dispid_Docs; //Documents property of Application object
  13. DISPID m_dispid_ActiveDoc; //ActiveDocument property of Application
  14. DISPID m_dispid_SaveAs; //SaveAs method of the Document object
  15. DISPID m_dispid_Quit; //Quit method of the Application object
  16. DISPID m_dispid_Open; //Open method of the Application object
  17. BSTR m_bstrEmptyString ;
  18. // ******************** Start Automation ***********************
  19. //Initialize the COM libraries
  20. ::CoInitialize(NULL);
  21. // Create an instance of the Word application and obtain the pointer
  22. // to the application's IDispatch interface.
  23. CLSID m_clsid;
  24. CLSIDFromProgID(L"Word.Application.12", &m_clsid);
  25. IUnknown* m_pUnk;
  26. HRESULT m_hr = ::CoCreateInstance( m_clsid, NULL, CLSCTX_SERVER,
  27. IID_IUnknown, (void**) &m_pUnk);
  28. IDispatch* m_pDispApp;
  29. m_hr = m_pUnk->QueryInterface( IID_IDispatch, (void**)&m_pDispApp);
  30. // Get IDispatch* for the Documents collection object
  31. m_szFunction = OLESTR("Documents");
  32. m_hr = m_pDispApp->GetIDsOfNames (IID_NULL, &m_szFunction, 1,
  33. LOCALE_USER_DEFAULT, &m_dispid_Docs);
  34. m_hr = m_pDispApp->Invoke (m_dispid_Docs, IID_NULL, LOCALE_USER_DEFAULT,
  35. DISPATCH_PROPERTYGET, &m_dpNoArgs, &m_vResult,
  36. NULL, NULL);
  37. m_pDispDocs = m_vResult.pdispVal;
  38. // Query id of method open
  39. m_szFunction = OLESTR("Open");
  40. m_hr = m_pDispDocs->GetIDsOfNames(IID_NULL, &m_szFunction,1,
  41. LOCALE_USER_DEFAULT, &m_dispid_Open);
  42. // Prepare parameters for open method
  43. //modified by Sunday 2013-4-14
  44. VARIANT vArgsOpen[6];
  45. DISPPARAMS dpOpen;
  46. dpOpen.cArgs = 6;
  47. dpOpen.cNamedArgs = 0;
  48. dpOpen.rgvarg = vArgsOpen;
  49. VARIANT vFalse, vTrue;
  50. vFalse.vt = VT_BOOL;
  51. vFalse.boolVal = FALSE;
  52. vTrue.vt = VT_BOOL;
  53. vTrue.boolVal = TRUE;
  54. m_bstrEmptyString = ::SysAllocString(OLESTR("")) ;
  55. vArgsOpen[5].vt = VT_BSTR;
  56. vArgsOpen[5].bstrVal = bstrOpenFile;
  57. vArgsOpen[4] = vFalse;
  58. vArgsOpen[3] = vTrue;
  59. vArgsOpen[2] = vFalse;
  60. vArgsOpen[1].vt = VT_BSTR;
  61. vArgsOpen[1].bstrVal = m_bstrEmptyString;
  62. vArgsOpen[0].vt = VT_BSTR;
  63. vArgsOpen[0].bstrVal = m_bstrEmptyString;
  64. //Execute open method
  65. m_hr=m_pDispDocs->Invoke(m_dispid_Open,IID_NULL,LOCALE_USER_DEFAULT,
  66. DISPATCH_METHOD,&dpOpen,NULL,NULL,NULL);
  67. //Query activedocument interface
  68. m_szFunction = OLESTR("ActiveDocument");
  69. m_hr = m_pDispApp->GetIDsOfNames(IID_NULL, &m_szFunction, 1,
  70. LOCALE_USER_DEFAULT,&m_dispid_ActiveDoc);
  71. m_hr = m_pDispApp->Invoke(m_dispid_ActiveDoc,IID_NULL,
  72. LOCALE_USER_DEFAULT, DISPATCH_PROPERTYGET,
  73. &m_dpNoArgs,&m_vResult,NULL,NULL);
  74. m_pDispActiveDoc = m_vResult.pdispVal;
  75. //4-14
  76. //modified by Sunday 2013-4-14
  77. VARIANT vOpt;
  78. vOpt.vt = VT_ERROR;
  79. vOpt.scode = DISP_E_PARAMNOTFOUND;
  80. //Prepare arguments for save as .txt
  81. VARIANT vArgsSaveAs[12];
  82. DISPPARAMS dpSaveAs;
  83. dpSaveAs.cArgs = 12;
  84. dpSaveAs.cNamedArgs = 0;
  85. dpSaveAs.rgvarg = vArgsSaveAs;
  86. vArgsSaveAs[11].vt = VT_BSTR;
  87. vArgsSaveAs[11].bstrVal = bstrSaveFile; //Filename
  88. vArgsSaveAs[10].vt = VT_I4;//VT_I4;
  89. vArgsSaveAs[10].lVal = 7;//wdFormatUnicodeText;// 7;//2; //FileFormat(wdFormatText = 2)
  90. vArgsSaveAs[9] = vFalse; //LockComments
  91. vArgsSaveAs[8].vt = VT_BSTR;
  92. vArgsSaveAs[8].bstrVal = m_bstrEmptyString; //Password
  93. vArgsSaveAs[7].vt = VT_BOOL;
  94. vArgsSaveAs[7].boolVal = TRUE; //AddToRecentFiles
  95. vArgsSaveAs[6].vt = VT_BSTR;
  96. vArgsSaveAs[6].bstrVal = m_bstrEmptyString; //WritePassword
  97. vArgsSaveAs[5] = vFalse; //ReadOnlyRecommended
  98. vArgsSaveAs[4] = vFalse; //EmbedTrueTypeFonts
  99. vArgsSaveAs[3] = vFalse; //SaveNativePictureFormat
  100. vArgsSaveAs[2] = vFalse; //SaveFormsData
  101. vArgsSaveAs[1] = vFalse; //SaveAsOCELetter
  102. vArgsSaveAs[0].vt = VT_I4;
  103. vArgsSaveAs[0].lVal = 65001;//UTF8
  104. /*
  105. //Prepare arguments for save as .txt
  106. VARIANT vArgsSaveAs[11];
  107. DISPPARAMS dpSaveAs;
  108. dpSaveAs.cArgs = 11;
  109. dpSaveAs.cNamedArgs = 0;
  110. dpSaveAs.rgvarg = vArgsSaveAs;
  111. vArgsSaveAs[10].vt = VT_BSTR;
  112. vArgsSaveAs[10].bstrVal = bstrSaveFile; //Filename
  113. vArgsSaveAs[9].vt = VT_I4;//VT_I4;
  114. vArgsSaveAs[9].lVal = 7;//wdFormatUnicodeText;// 7;//2; //FileFormat(wdFormatText = 2)
  115. vArgsSaveAs[8] = vFalse; //LockComments
  116. vArgsSaveAs[7].vt = VT_BSTR;
  117. vArgsSaveAs[7].bstrVal = m_bstrEmptyString; //Password
  118. vArgsSaveAs[6].vt = VT_BOOL;
  119. vArgsSaveAs[6].boolVal = TRUE; //AddToRecentFiles
  120. vArgsSaveAs[5].vt = VT_BSTR;
  121. vArgsSaveAs[5].bstrVal = m_bstrEmptyString; //WritePassword
  122. vArgsSaveAs[4] = vFalse; //ReadOnlyRecommended
  123. vArgsSaveAs[3] = vFalse; //EmbedTrueTypeFonts
  124. vArgsSaveAs[2] = vFalse; //SaveNativePictureFormat
  125. vArgsSaveAs[1] = vFalse; //SaveFormsData
  126. vArgsSaveAs[0] = vFalse; //SaveAsOCELetter
  127. */
  128. // Query and execute SaveAs method
  129. m_szFunction = OLESTR("SaveAs");
  130. m_hr = m_pDispActiveDoc->GetIDsOfNames(IID_NULL,&m_szFunction,1,
  131. LOCALE_USER_DEFAULT,&m_dispid_SaveAs);
  132. m_hr = m_pDispActiveDoc->Invoke(m_dispid_SaveAs, IID_NULL,
  133. LOCALE_SYSTEM_DEFAULT,DISPATCH_METHOD,//LOCALE_USER_DEFAULT
  134. &dpSaveAs,NULL,NULL,NULL);
  135. // Invoke the Quit method
  136. m_szFunction = OLESTR("Quit");
  137. m_hr = m_pDispApp->GetIDsOfNames(IID_NULL, &m_szFunction, 1,
  138. LOCALE_USER_DEFAULT, &m_dispid_Quit);
  139. m_hr = m_pDispApp->Invoke(m_dispid_Quit, IID_NULL, LOCALE_USER_DEFAULT,
  140. DISPATCH_METHOD, &m_dpNoArgs, NULL, NULL, NULL);
  141. //
  142. cout << m_bstrEmptyString << endl;
  143. cout << (char*)m_bstrEmptyString << endl;
  144. //Clean-up
  145. ::SysFreeString(bstrOpenFile) ;
  146. ::SysFreeString(bstrSaveFile) ;
  147. ::SysFreeString(m_bstrEmptyString) ;
  148. m_pDispActiveDoc->Release();
  149. m_pDispDocs->Release();
  150. m_pDispApp->Release();
  151. m_pUnk->Release();
  152. ::CoUninitialize();
  153. }
  154. int main(int argc, char* argv[])
  155. {
  156. DocToTxt(::SysAllocString(OLESTR("D:\\code\\data\\c2.docx")), ::SysAllocString(OLESTR("D:\\to.txt")));
  157. }


 

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/我家自动化/article/detail/391445
推荐阅读
相关标签
  

闽ICP备14008679号