Skip to content

Commit

Permalink
Update DateRewriter::RewriteAd() (AD to Era)
Browse files Browse the repository at this point in the history
This patch updates the AD to Era conversion (`RewriteAd()`) to be more consistent with the Era to AD conversion (`RewriteEra()`) behavior.

In particular:
1. It checks if the `key` is the `kNenKey`, instead of if the `candidate(0).value` is the `kNenValue`.
2. It accepts both when the first segment ends with the `kNenKey` and when the next segment starts with the `kNenKey`.

PiperOrigin-RevId: 647248914
  • Loading branch information
kojiishi authored and hiroyuki-komatsu committed Jun 27, 2024
1 parent 9fca486 commit 6ceada3
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 49 deletions.
41 changes: 19 additions & 22 deletions src/rewriter/date_rewriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1009,36 +1009,30 @@ bool DateRewriter::RewriteDate(Segment *segment,
}

size_t DateRewriter::RewriteEra(Segments::range segments_range) {
if (segments_range.size() < 2) {
return 0;
}
Segment &current_segment = segments_range.front();
const Segment &next_segment = segments_range[1];
if (current_segment.candidates_size() <= 0 ||
next_segment.candidates_size() <= 0) {
LOG(ERROR) << "Candidate size is 0";
return 0;
}

const std::string &current_key = current_segment.key();
const std::string &next_value = next_segment.candidate(0).value;

if (next_value != "") {
// Rewrite:
// * If the first segment ends with the `kNenKey`, or
// * If the second segment starts with the `kNenKey`.
Segment &segment = segments_range.front();
absl::string_view key = segment.key();
const bool has_suffix = absl::EndsWith(key, kNenKey);
if (has_suffix) {
key.remove_suffix(kNenKey.size());
} else if (segments_range.size() < 2 ||
!absl::StartsWith(segments_range[1].key(), kNenKey)) {
return 0;
}

if (Util::GetScriptType(current_key) != Util::NUMBER) {
if (Util::GetScriptType(key) != Util::NUMBER) {
return 0;
}

const size_t len = Util::CharsLen(current_key);
const size_t len = Util::CharsLen(key);
if (len < 3 || len > 4) {
LOG(WARNING) << "Too long year";
return 0;
}

std::string year_str =
japanese_util::FullWidthAsciiToHalfWidthAscii(current_key);
std::string year_str = japanese_util::FullWidthAsciiToHalfWidthAscii(key);

uint32_t year = 0;
if (!absl::SimpleAtoi(year_str, &year)) {
Expand All @@ -1051,20 +1045,23 @@ size_t DateRewriter::RewriteEra(Segments::range segments_range) {
}

constexpr absl::string_view kDescription = "和暦";
const Segment::Candidate &base_cand = current_segment.candidate(0);
const Segment::Candidate &base_cand = segment.candidate(0);
std::vector<std::unique_ptr<Segment::Candidate>> candidates;
candidates.reserve(results.size());
for (std::string &value : results) {
if (has_suffix) {
value.append(kNenValue);
}
std::unique_ptr<Segment::Candidate> candidate =
CreateCandidate(base_cand, std::move(value), std::string(kDescription));
candidate->attributes &= ~Segment::Candidate::NO_VARIANTS_EXPANSION;
candidates.push_back(std::move(candidate));
}

constexpr int kInsertPosition = 2;
current_segment.insert_candidates(kInsertPosition, std::move(candidates));
segment.insert_candidates(kInsertPosition, std::move(candidates));

return 2; // Consumed 2 segments.
return has_suffix ? 1 : 2;
}

bool DateRewriter::RewriteAd(Segments::range segments_range) {
Expand Down
41 changes: 14 additions & 27 deletions src/rewriter/date_rewriter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -981,33 +981,6 @@ TEST_F(DateRewriterTest, MobileEnvironmentTest) {
}
}

TEST_F(DateRewriterTest, RewriteYearTest) {
DateRewriter rewriter;
Segments segments;
const ConversionRequest request;
InitSegment("2010", "2010", &segments);
AppendSegment("nenn", "", &segments);
EXPECT_TRUE(rewriter.Rewrite(request, &segments));
ASSERT_EQ(segments.segments_size(), 2);
EXPECT_THAT(segments.segment(0), ContainsCandidate(ValueIs("平成22")));
EXPECT_THAT(segments.segment(1), HasSingleCandidate(ValueIs("")));
}

// This test treats the situation that if UserHistoryRewriter or other like
// Rewriter moves up a candidate which is actually a number but can not be
// converted integer easily.
TEST_F(DateRewriterTest, RelationWithUserHistoryRewriterTest) {
DateRewriter rewriter;
Segments segments;
const ConversionRequest request;
InitSegment("2011", "二千十一", &segments);
AppendSegment("nenn", "", &segments);
EXPECT_TRUE(rewriter.Rewrite(request, &segments));
ASSERT_EQ(segments.segments_size(), 2);
EXPECT_THAT(segments.segment(0), ContainsCandidate(ValueIs("平成23")));
EXPECT_THAT(segments.segment(1), HasSingleCandidate(ValueIs("")));
}

TEST_F(DateRewriterTest, ConsecutiveDigitsInsertPositionTest) {
commands::Request request;
const config::Config config;
Expand Down Expand Up @@ -1218,9 +1191,11 @@ INSTANTIATE_TEST_SUITE_P(
Values(
// One segment, the most basic case.
RewriteAdData{{{"へいせい23ねん", "平成23年"}}, 0, "2011年"},
RewriteAdData{{{"2011ねん", "2011年"}}, 0, "平成23年"},
// The `value` should be ignored when rewriting.
RewriteAdData{{{"へいせい23ねん", "兵勢23年"}}, 0, "2011年"},
RewriteAdData{{{"へいせい23ねん", "兵勢23念"}}, 0, "2011年"},
RewriteAdData{{{"2011ねん", "2011念"}}, 0, "平成23年"},
// Invalid era name.
RewriteAdData{{{"ああ23ねん", "ああ23年"}}, 0, ""},
// One segment, with preceding and following segments.
Expand All @@ -1229,15 +1204,27 @@ INSTANTIATE_TEST_SUITE_P(
{"です", "です"}},
1,
"2011年"},
RewriteAdData{
{{"きょうは", "今日は"}, {"2011ねん", "2011年"}, {"です", "です"}},
1,
"平成23年"},
// The "年" suffix in the following segment. They don't need resizing,
// and the result shouldn't contain the "年" suffix.
RewriteAdData{{{"へいせい23", "平成23"}, {"ねん", ""}}, 0, "2011"},
RewriteAdData{{{"2011", "2011"}, {"ねん", ""}}, 0, "平成23"},
RewriteAdData{{{"2011", "二千十一"}, {"ねん", ""}}, 0, "平成23"},
RewriteAdData{{{"きょうは", "今日は"},
{"へいせい23", "平成23"},
{"ねん", ""},
{"です", "です"}},
1,
"2011"},
RewriteAdData{{{"きょうは", "今日は"},
{"2011", "2011"},
{"ねん", ""},
{"です", "です"}},
1,
"平成23"},
// Multiple segments.
RewriteAdData{{{"へいせい", "平成"}, {"23ねん", "23年"}},
0,
Expand Down

0 comments on commit 6ceada3

Please sign in to comment.