Skip to content

Commit 4926813

Browse files
author
Fcitx Bot
committed
Merge remote-tracking branch 'origin/master' into fcitx
2 parents fe619ed + 1a2f57b commit 4926813

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+1265
-826
lines changed

src/converter/BUILD.bazel

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,12 +57,15 @@ mozc_cc_library(
5757
"//rewriter:__pkg__",
5858
],
5959
deps = [
60+
":attribute",
61+
":inner_segment",
6062
":lattice",
6163
"//base:number_util",
6264
"//base:util",
6365
"//base:vlog",
6466
"//base/container:freelist",
6567
"//base/strings:assign",
68+
"@com_google_absl//absl/base:core_headers",
6669
"@com_google_absl//absl/log",
6770
"@com_google_absl//absl/log:check",
6871
"@com_google_absl//absl/strings",
@@ -90,13 +93,59 @@ mozc_cc_test(
9093
size = "small",
9194
srcs = ["candidate_test.cc"],
9295
deps = [
96+
":inner_segment",
9397
":segments",
9498
"//base:number_util",
9599
"//testing:gunit_main",
96100
"@com_google_absl//absl/strings",
97101
],
98102
)
99103

104+
mozc_cc_library(
105+
name = "attribute",
106+
hdrs = ["attribute.h"],
107+
visibility = [
108+
"//engine:__pkg__",
109+
"//prediction:__pkg__",
110+
"//rewriter:__pkg__",
111+
],
112+
)
113+
114+
mozc_cc_library(
115+
name = "inner_segment",
116+
hdrs = [
117+
"inner_segment.h",
118+
],
119+
visibility = [
120+
"//engine:__pkg__",
121+
"//prediction:__pkg__",
122+
"//request:__pkg__",
123+
"//rewriter:__pkg__",
124+
],
125+
deps = [
126+
"@com_google_absl//absl/container:fixed_array",
127+
"@com_google_absl//absl/log:check",
128+
"@com_google_absl//absl/strings",
129+
"@com_google_absl//absl/strings:str_format",
130+
"@com_google_absl//absl/types:span",
131+
],
132+
)
133+
134+
mozc_cc_test(
135+
name = "inner_segment_test",
136+
srcs = [
137+
"inner_segment_test.cc",
138+
],
139+
deps = [
140+
":inner_segment",
141+
"//testing:gunit_main",
142+
"@com_google_absl//absl/log",
143+
"@com_google_absl//absl/log:check",
144+
"@com_google_absl//absl/strings",
145+
"@com_google_absl//absl/types:span",
146+
],
147+
)
148+
100149
mozc_cc_library(
101150
name = "segments_matchers",
102151
testonly = 1,
@@ -212,6 +261,7 @@ mozc_cc_library(
212261
deps = [
213262
":candidate_filter",
214263
":connector",
264+
":inner_segment",
215265
":lattice",
216266
":node",
217267
":segmenter",
@@ -412,6 +462,7 @@ mozc_cc_test(
412462
],
413463
deps = [
414464
":immutable_converter_no_factory",
465+
":inner_segment",
415466
":lattice",
416467
":node",
417468
":segments",
@@ -464,6 +515,7 @@ mozc_cc_library(
464515
":converter_interface",
465516
":history_reconstructor",
466517
":immutable_converter_interface",
518+
":inner_segment",
467519
":reverse_converter",
468520
":segments",
469521
"//base:util",
@@ -498,6 +550,7 @@ mozc_cc_test(
498550
":converter_interface",
499551
":immutable_converter_interface",
500552
":immutable_converter_no_factory",
553+
":inner_segment",
501554
":segments",
502555
":segments_matchers",
503556
"//base:util",

src/converter/attribute.h

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
// Copyright 2010-2021, Google Inc.
2+
// All rights reserved.
3+
//
4+
// Redistribution and use in source and binary forms, with or without
5+
// modification, are permitted provided that the following conditions are
6+
// met:
7+
//
8+
// * Redistributions of source code must retain the above copyright
9+
// notice, this list of conditions and the following disclaimer.
10+
// * Redistributions in binary form must reproduce the above
11+
// copyright notice, this list of conditions and the following disclaimer
12+
// in the documentation and/or other materials provided with the
13+
// distribution.
14+
// * Neither the name of Google Inc. nor the names of its
15+
// contributors may be used to endorse or promote products derived from
16+
// this software without specific prior written permission.
17+
//
18+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29+
30+
#ifndef MOZC_CONVERTER_ATTRIBUTE_H_
31+
#define MOZC_CONVERTER_ATTRIBUTE_H_
32+
33+
namespace mozc {
34+
namespace converter {
35+
36+
// This unscoped enum is used to allow implicit conversion to integer types for
37+
// bitwise operations. This is an alternative to using an enum class and
38+
// explicit casting.
39+
class Attribute {
40+
public:
41+
// TODO(taku): Can use unnamed enum after full migration.
42+
enum Attribute_ {
43+
DEFAULT_ATTRIBUTE = 0,
44+
// this was the best candidate before learning
45+
BEST_CANDIDATE = 1 << 0,
46+
// this candidate was reranked by user
47+
RERANKED = 1 << 1,
48+
// don't save it in history
49+
NO_HISTORY_LEARNING = 1 << 2,
50+
// don't save it in suggestion
51+
NO_SUGGEST_LEARNING = 1 << 3,
52+
// NO_HISTORY_LEARNING | NO_SUGGEST_LEARNING
53+
NO_LEARNING = (1 << 2 | 1 << 3),
54+
// learn it with left/right context
55+
CONTEXT_SENSITIVE = 1 << 4,
56+
// has "did you mean"
57+
SPELLING_CORRECTION = 1 << 5,
58+
// No need to have full/half width expansion
59+
NO_VARIANTS_EXPANSION = 1 << 6,
60+
// No need to have extra descriptions
61+
NO_EXTRA_DESCRIPTION = 1 << 7,
62+
// was generated by real-time conversion
63+
REALTIME_CONVERSION = 1 << 8,
64+
// contains tokens in user dictionary.
65+
USER_DICTIONARY = 1 << 9,
66+
// command candidate. e.g., incognito mode.
67+
COMMAND_CANDIDATE = 1 << 10,
68+
// key characters are consumed partially.
69+
// Consumed size is |consumed_key_size|.
70+
// If not set, all the key characters are consumed.
71+
PARTIALLY_KEY_CONSUMED = 1 << 11,
72+
// Typing correction candidate.
73+
// - Special description should be shown when the candidate is created
74+
// by a dictionary predictor.
75+
// - No description should be shown when the candidate is loaded from
76+
// history.
77+
// - Otherwise following unexpected behavior can be observed.
78+
// 1. Type "やんしょん" and submit "マンション" (annotated with "補正").
79+
// 2. Type "まんしょん".
80+
// 3. "マンション" (annotated with "補正") is shown as a candidate
81+
// regardless of a user's correct typing.
82+
TYPING_CORRECTION = 1 << 12,
83+
// Auto partial suggestion candidate.
84+
// - Special description should be shown when the candidate is created
85+
// by a dictionary predictor.
86+
// - No description should be shown when the candidate is loaded from
87+
// history.
88+
AUTO_PARTIAL_SUGGESTION = 1 << 13,
89+
// Predicted from user prediction history.
90+
USER_HISTORY_PREDICTION = 1 << 14,
91+
// Contains suffix dictionary.
92+
SUFFIX_DICTIONARY = 1 << 15,
93+
// Disables modification and removal in rewriters.
94+
NO_MODIFICATION = 1 << 16,
95+
// Candidate which is reranked by user segment history rewriter.
96+
USER_SEGMENT_HISTORY_REWRITER = 1 << 17,
97+
// Keys are expanded in the dictionary lookup. Usually
98+
// Kana-modifiers are expanded.
99+
KEY_EXPANDED_IN_DICTIONARY = 1 << 18,
100+
};
101+
};
102+
} // namespace converter
103+
} // namespace mozc
104+
105+
#endif // MOZC_CONVERTER_ATTRIBUTE_H_

src/converter/candidate.cc

Lines changed: 5 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include "absl/strings/string_view.h"
4545
#include "absl/types/span.h"
4646
#include "base/number_util.h"
47+
#include "converter/inner_segment.h"
4748

4849
#ifdef MOZC_CANDIDATE_DEBUG
4950
#include "absl/strings/str_cat.h"
@@ -85,54 +86,6 @@ void Candidate::Dlog(absl::string_view filename, int line,
8586
}
8687
#endif // MOZC_CANDIDATE_DEBUG
8788

88-
bool Candidate::IsValid() const {
89-
if (inner_segment_boundary.empty()) {
90-
return true;
91-
}
92-
// The sums of the lengths of key, value components must coincide with those
93-
// of key, value, respectively.
94-
size_t sum_key_len = 0, sum_value_len = 0;
95-
for (InnerSegmentIterator iter(this); !iter.Done(); iter.Next()) {
96-
sum_key_len += iter.GetKey().size();
97-
sum_value_len += iter.GetValue().size();
98-
}
99-
return sum_key_len == key.size() && sum_value_len == value.size();
100-
}
101-
102-
bool Candidate::EncodeLengths(size_t key_len, size_t value_len,
103-
size_t content_key_len, size_t content_value_len,
104-
uint32_t *result) {
105-
if (key_len > std::numeric_limits<uint8_t>::max() ||
106-
value_len > std::numeric_limits<uint8_t>::max() ||
107-
content_key_len > std::numeric_limits<uint8_t>::max() ||
108-
content_value_len > std::numeric_limits<uint8_t>::max()) {
109-
return false;
110-
}
111-
*result = (static_cast<uint32_t>(key_len) << 24) |
112-
(static_cast<uint32_t>(value_len) << 16) |
113-
(static_cast<uint32_t>(content_key_len) << 8) |
114-
static_cast<uint32_t>(content_value_len);
115-
return true;
116-
}
117-
118-
std::tuple<size_t, size_t, size_t, size_t> Candidate::DecodeLengths(
119-
uint32_t encoded) {
120-
return std::make_tuple(encoded >> 24, (encoded >> 16) & 0xff,
121-
(encoded >> 8) & 0xff, (encoded & 0xff));
122-
}
123-
124-
bool Candidate::PushBackInnerSegmentBoundary(size_t key_len, size_t value_len,
125-
size_t content_key_len,
126-
size_t content_value_len) {
127-
uint32_t encoded;
128-
if (EncodeLengths(key_len, value_len, content_key_len, content_value_len,
129-
&encoded)) {
130-
inner_segment_boundary.push_back(encoded);
131-
return true;
132-
}
133-
return false;
134-
}
135-
13689
std::string Candidate::DebugString() const {
13790
std::stringstream os;
13891
os << "(key=" << key << " ckey=" << content_key << " val=" << value
@@ -151,70 +104,15 @@ std::string Candidate::DebugString() const {
151104
}
152105
if (!inner_segment_boundary.empty()) {
153106
os << " segbdd=";
154-
for (size_t i = 0; i < inner_segment_boundary.size(); ++i) {
155-
const uint32_t encoded_lengths = inner_segment_boundary[i];
156-
const auto [key_len, value_len, content_key_len, content_value_len] =
157-
DecodeLengths(encoded_lengths);
158-
os << absl::StreamFormat("<%d,%d,%d,%d>", key_len, value_len,
159-
content_key_len, content_value_len);
107+
for (const auto &iter : inner_segments()) {
108+
os << absl::StreamFormat(
109+
"<%d,%d,%d,%d>", iter.GetKey().size(), iter.GetValue().size(),
110+
iter.GetContentKey().size(), iter.GetContentValue().size());
160111
}
161112
}
162113
os << ")" << std::endl;
163114
return os.str();
164115
}
165116

166-
void Candidate::InnerSegmentIterator::Next() {
167-
DCHECK_LT(index_, inner_segment_boundary_.size());
168-
const uint32_t encoded_lengths = inner_segment_boundary_[index_++];
169-
key_offset_ += encoded_lengths >> 24;
170-
value_offset_ += (encoded_lengths >> 16) & 0xff;
171-
}
172-
173-
absl::string_view Candidate::InnerSegmentIterator::GetKey() const {
174-
DCHECK_LT(index_, inner_segment_boundary_.size());
175-
const uint32_t encoded_lengths = inner_segment_boundary_[index_];
176-
return absl::string_view(key_offset_, encoded_lengths >> 24);
177-
}
178-
179-
absl::string_view Candidate::InnerSegmentIterator::GetValue() const {
180-
DCHECK_LT(index_, inner_segment_boundary_.size());
181-
const uint32_t encoded_lengths = inner_segment_boundary_[index_];
182-
return absl::string_view(value_offset_, (encoded_lengths >> 16) & 0xff);
183-
}
184-
185-
absl::string_view Candidate::InnerSegmentIterator::GetContentKey() const {
186-
DCHECK_LT(index_, inner_segment_boundary_.size());
187-
const uint32_t encoded_lengths = inner_segment_boundary_[index_];
188-
return absl::string_view(key_offset_, (encoded_lengths >> 8) & 0xff);
189-
}
190-
191-
absl::string_view Candidate::InnerSegmentIterator::GetContentValue() const {
192-
DCHECK_LT(index_, inner_segment_boundary_.size());
193-
const uint32_t encoded_lengths = inner_segment_boundary_[index_];
194-
return absl::string_view(value_offset_, encoded_lengths & 0xff);
195-
}
196-
197-
absl::string_view Candidate::InnerSegmentIterator::GetFunctionalKey() const {
198-
DCHECK_LT(index_, inner_segment_boundary_.size());
199-
const uint32_t encoded_lengths = inner_segment_boundary_[index_];
200-
const int key_len = encoded_lengths >> 24;
201-
const int content_key_len = (encoded_lengths >> 8) & 0xff;
202-
if (const int key_size = key_len - content_key_len; key_size > 0) {
203-
return absl::string_view(key_offset_ + content_key_len, key_size);
204-
}
205-
return absl::string_view();
206-
}
207-
208-
absl::string_view Candidate::InnerSegmentIterator::GetFunctionalValue() const {
209-
DCHECK_LT(index_, inner_segment_boundary_.size());
210-
const uint32_t encoded_lengths = inner_segment_boundary_[index_];
211-
const int value_len = (encoded_lengths >> 16) & 0xff;
212-
const int content_value_len = encoded_lengths & 0xff;
213-
if (const int value_size = value_len - content_value_len; value_size > 0) {
214-
return absl::string_view(value_offset_ + content_value_len, value_size);
215-
}
216-
return absl::string_view();
217-
}
218-
219117
} // namespace converter
220118
} // namespace mozc

0 commit comments

Comments
 (0)