Skip to content

Commit 1c95f5d

Browse files
author
lixungeng
committed
add C# demo and encoding check.
1 parent ec76d84 commit 1c95f5d

File tree

7 files changed

+161
-5
lines changed

7 files changed

+161
-5
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,6 @@
1313

1414
.vs/
1515
/demo/
16+
Properties/
17+
bin/
18+
obj/

c_sharp/Program.cs

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
using System;
2+
using System.Runtime.InteropServices;
3+
using System.Text;
4+
5+
class Program
6+
{
7+
// 注意要把生成的64位dll拷到 c_sharp\bin\x64\Debug\net8.0 目录下
8+
// 声明外部函数
9+
[DllImport("wcocr.dll", CallingConvention = CallingConvention.Cdecl)]
10+
public static extern bool wechat_ocr(
11+
[MarshalAs(UnmanagedType.LPWStr)] string ocr_exe,
12+
[MarshalAs(UnmanagedType.LPWStr)] string wechat_dir,
13+
[MarshalAs(UnmanagedType.LPStr)] string imgfn,
14+
SetResultDelegate set_res);
15+
16+
// 定义委托类型
17+
public delegate void SetResultDelegate(IntPtr result);
18+
19+
public class StringResult
20+
{
21+
public string result_ = "";
22+
public void SetResult(IntPtr dt)
23+
{
24+
int length = 0;
25+
while (Marshal.ReadByte(dt, length) != 0) length++;
26+
byte[] byteArray = new byte[length];
27+
Marshal.Copy(dt, byteArray, 0, length);
28+
result_ = Encoding.UTF8.GetString(byteArray);
29+
}
30+
public string GetResult()
31+
{
32+
return result_;
33+
}
34+
};
35+
36+
static void Main(string[] args)
37+
{
38+
if (args.Length != 3)
39+
{
40+
Console.WriteLine("Usage: wcocr.exe ocr_exe wechat_dir imgfn");
41+
return;
42+
}
43+
44+
StringResult stringResult = new StringResult();
45+
SetResultDelegate setRes = new SetResultDelegate(stringResult.SetResult);
46+
bool success = wechat_ocr(args[0], args[1], args[2], setRes);
47+
Console.WriteLine($"OCR Success: {success} res:{stringResult.GetResult()}");
48+
}
49+
}

c_sharp/test_cs.csproj

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net8.0</TargetFramework>
6+
<ImplicitUsings>enable</ImplicitUsings>
7+
<Nullable>enable</Nullable>
8+
<Platforms>AnyCPU;x64</Platforms>
9+
</PropertyGroup>
10+
11+
</Project>

c_sharp/test_cs.sln

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
2+
Microsoft Visual Studio Solution File, Format Version 12.00
3+
# Visual Studio Version 17
4+
VisualStudioVersion = 17.9.34728.123
5+
MinimumVisualStudioVersion = 10.0.40219.1
6+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "test_cs", "test_cs.csproj", "{0F79C844-AF8C-44CA-9D3C-27F1E47E4805}"
7+
EndProject
8+
Global
9+
GlobalSection(SolutionConfigurationPlatforms) = preSolution
10+
Debug|Any CPU = Debug|Any CPU
11+
Debug|x64 = Debug|x64
12+
Release|Any CPU = Release|Any CPU
13+
Release|x64 = Release|x64
14+
EndGlobalSection
15+
GlobalSection(ProjectConfigurationPlatforms) = postSolution
16+
{0F79C844-AF8C-44CA-9D3C-27F1E47E4805}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
17+
{0F79C844-AF8C-44CA-9D3C-27F1E47E4805}.Debug|Any CPU.Build.0 = Debug|Any CPU
18+
{0F79C844-AF8C-44CA-9D3C-27F1E47E4805}.Debug|x64.ActiveCfg = Debug|x64
19+
{0F79C844-AF8C-44CA-9D3C-27F1E47E4805}.Debug|x64.Build.0 = Debug|x64
20+
{0F79C844-AF8C-44CA-9D3C-27F1E47E4805}.Release|Any CPU.ActiveCfg = Release|Any CPU
21+
{0F79C844-AF8C-44CA-9D3C-27F1E47E4805}.Release|Any CPU.Build.0 = Release|Any CPU
22+
{0F79C844-AF8C-44CA-9D3C-27F1E47E4805}.Release|x64.ActiveCfg = Release|x64
23+
{0F79C844-AF8C-44CA-9D3C-27F1E47E4805}.Release|x64.Build.0 = Release|x64
24+
EndGlobalSection
25+
GlobalSection(SolutionProperties) = preSolution
26+
HideSolutionNode = FALSE
27+
EndGlobalSection
28+
GlobalSection(ExtensibilityGlobals) = postSolution
29+
SolutionGuid = {61ED6E08-2B59-4CDB-8534-BC52560BC05E}
30+
EndGlobalSection
31+
EndGlobal

pb/ocr_protobuf.proto

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ message OcrRespond {
4848
message OcrRequest {
4949
int32 unknow = 1; //必定为0
5050
// 核查了好几次,在腾讯proto文件中,这个task_id确实是64位的。但在,在执行过程中,高32位会被丢弃,且第32位为1会出错。
51-
// 也就是协议上是有64位的uint64,实际上只有31位。必须是>0的整形数字,范围是[12147483647]
51+
// 也就是协议上是有64位的uint64,实际上只有31位。必须是>0的整形数字,范围是[1,2147483647]
5252
uint64 task_id = 2;
5353

5454
message OcrInputBuffer {

src/main.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ HRESULT DllRegisterServer(void)
7575
return E_FAIL;
7676
}
7777
wechat_ocr.doOCR(getenv("TEST_PNG"), nullptr);
78-
// MessageBoxW(NULL, L"注册成功", L"提示", MB_OK);
7978
wechat_ocr.wait_done(-1);
8079
return S_OK;
8180
}

src/wechatocr.cpp

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,55 @@
33
#include "ocr_protobuf.pb.h"
44
#include "mmmojo.h"
55

6+
namespace {
7+
bool is_text_utf8(const char* sin, size_t len) {
8+
const unsigned char* s = (const unsigned char*)sin;
9+
const unsigned char* end = s + len;
10+
while (s < end) {
11+
if (*s < 0x80) {
12+
++s;
13+
} else if (*s < 0xC0) {
14+
return false;
15+
} else if (*s < 0xE0) {
16+
if (s + 1 >= end || (s[1] & 0xC0) != 0x80)
17+
return false;
18+
s += 2;
19+
} else if (*s < 0xF0) {
20+
if (s + 2 >= end || (s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
21+
return false;
22+
s += 3;
23+
} else if (*s < 0xF8) {
24+
if (s + 3 >= end || (s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80)
25+
return false;
26+
s += 4;
27+
} else {
28+
return false;
29+
}
30+
}
31+
return true;
32+
}
33+
34+
}
35+
636
CWeChatOCR::CWeChatOCR(LPCWSTR exe, LPCWSTR wcdir)
737
{
838
m_exe = exe;
939
m_wcdir = wcdir;
40+
DWORD attr1 = GetFileAttributesW(exe);
41+
if (attr1 == INVALID_FILE_ATTRIBUTES || (attr1 & FILE_ATTRIBUTE_DIRECTORY) != 0)
42+
{
43+
// 传入的ocr.exe路径无效
44+
m_state = STATE_INVALID;
45+
return;
46+
}
47+
DWORD attr2 = GetFileAttributesW(wcdir);
48+
if (attr2 == INVALID_FILE_ATTRIBUTES || (attr2 & FILE_ATTRIBUTE_DIRECTORY) == 0)
49+
{
50+
// 传入的微信目录无效
51+
m_state = STATE_INVALID;
52+
return;
53+
}
54+
1055
if (Init(m_wcdir.c_str()))
1156
{
1257
m_args["user-lib-dir"] = m_wcdir;
@@ -22,17 +67,35 @@ CWeChatOCR::CWeChatOCR(LPCWSTR exe, LPCWSTR wcdir)
2267
}
2368

2469
#define OCR_MAX_TASK_ID INT_MAX
25-
bool CWeChatOCR::doOCR(crefstr imgpath, result_t* res)
70+
bool CWeChatOCR::doOCR(crefstr imgpath0, result_t* res)
2671
{
2772
if (m_state == STATE_INVALID || (m_state == STATE_PENDING && !wait_connection(2000)))
2873
return false;
2974

75+
// 为了更好的中文支持,这里检查imgpath是否是utf8编码的,如果是GBK,需要转换为utf8编码。
76+
// TODO: 其实不应该写在这里,应该是调用者确保传入的imgpath是utf8编码的。
77+
string tmp;
78+
const string * imgpath = &imgpath0;
79+
if (!is_text_utf8(imgpath0.c_str(), imgpath0.length())) {
80+
wstring wimgpath;
81+
wimgpath.resize(imgpath0.size() + 10);
82+
int len = MultiByteToWideChar(CP_ACP, 0, imgpath0.c_str(), (int)imgpath0.length(), &wimgpath[0], (int)wimgpath.size());
83+
if (len > 0) {
84+
tmp.resize(len * 3 + 10);
85+
len = WideCharToMultiByte(CP_UTF8, 0, wimgpath.c_str(), len, &tmp[0], (int)tmp.size(), 0, 0);
86+
if (len > 0) {
87+
tmp.resize(len);
88+
imgpath = &tmp;
89+
}
90+
}
91+
}
92+
3093
int found_id = -1;
3194
m_mutex.lock();
3295
// TODO: task_id本身可以是任何正整形数,但是这里要不要限制同时并发任务数呢?
3396
for (uint32_t i = 1; i <= OCR_MAX_TASK_ID; ++i)
3497
{
35-
if (m_idpath.insert(std::make_pair(i, std::pair<string,result_t*>(imgpath,res))).second)
98+
if (m_idpath.insert(std::make_pair(i, std::pair<string,result_t*>(*imgpath,res))).second)
3699
{
37100
found_id = i;
38101
break;
@@ -47,7 +110,7 @@ bool CWeChatOCR::doOCR(crefstr imgpath, result_t* res)
47110
ocr_request.set_unknow(0);
48111
ocr_request.set_task_id(found_id);
49112
auto pp = new ocr_protobuf::OcrRequest::OcrInputBuffer;
50-
pp->set_pic_path(imgpath);
113+
pp->set_pic_path(*imgpath);
51114
ocr_request.set_allocated_input(pp);
52115

53116
std::string data_;

0 commit comments

Comments
 (0)