Skip to content

Commit

Permalink
tmp
Browse files Browse the repository at this point in the history
  • Loading branch information
xmlyqing00 committed Jun 26, 2019
1 parent 4f7a029 commit a5be006
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 29 deletions.
6 changes: 3 additions & 3 deletions benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

test_n=3
nums=(20 30 40 60)
samples=(100 300 1000 5000)
samples=(200 600 2000 8000)

method_n=1
metrics=(0 2)
Expand Down Expand Up @@ -36,7 +36,7 @@ fi

if [ "$1" == "noise" ]; then
echo "Run noise dataset."
test_files=("doc19_noise5")
test_files=("doc16_noise20" "doc18_noise20")
fi

if [ "$1" == "doc" ] || [ "$1" == "noise" ]; then
Expand Down Expand Up @@ -81,7 +81,7 @@ done

for i in $(seq 0 ${method_n}); do
for j in $(seq 0 ${test_n}); do
echo "Test args:" ${i} ${j}
echo "Test args:" ${test_name} ${nums[j]} ${metrics[i]} ${comps[i]} ${samples[j]}
./bin/release/solver \
-t $1 \
-n ${nums[j]} \
Expand Down
42 changes: 25 additions & 17 deletions calc_scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,48 +4,56 @@

folder_name = 'data/scores/'
scores_list = os.listdir(folder_name)
scores_list.sort(key = lambda x: (len(x), x))
nums = [20, 30, 40, 60]
method_names = ['P-Greedy', 'P-GCOM', 'W-Greedy', 'W-GCOM', 'P-GA']
method_names = ['P-Greedy', 'C-Greedy', 'W-Greedy', 'W-GCOM', 'P-GA']
method_colors = ['teal', 'olive', 'aqua', 'orchid', 'orange']
order = [4, 0, 1, 2, 3]
# order = [0, 1, 2, 3]
score_files = ['doc0', 'doc3', 'doc7', 'doc10', 'doc12', 'doc13', 'doc14', 'doc15', 'doc16', 'doc17', 'doc18', 'doc20', 'doc21', 'doc23', 'doc24']
# order = [4, 0, 1, 2, 3]
order = [0, 1, 2, 3]

adjust_flag = True
test_cnt = 0
test_n = len(nums)
method_n = len(method_names)
# method_n = 4
# method_n = len(method_names)
method_n = 4
data = np.zeros(method_n * test_n)

our_data = None
for file_name in scores_list:

if (file_name == 'doc11.txt' or file_name == 'doc19.txt'):
if file_name[:-4] not in score_files:
continue

print(folder_name + file_name)
test_cnt += 1

score_file = open(folder_name + file_name, 'r')
for i in range(method_n):
x = score_file.readline()
scores = x.split()
for i in range(2):
scores = score_file.readline().split()
if (len(scores) == 0):
continue

for j in range(test_n):
data[i * test_n + j] += float(scores[j])

if i == 3:
if our_data is None:
our_data = np.array(scores)
else:
our_data = np.vstack([our_data, np.array(scores)])

scores = score_file.readline().split()
next_scores = score_file.readline().split()
scores = scores + next_scores
print(scores)
our_score = []
for j in range(test_n):
data[2 * test_n + j] += float(scores[j*2])
data[3 * test_n + j] += float(scores[j*2+1])
our_score.append(float(scores[j*2+1]))
if our_data is None:
our_data = np.array(our_score)
else:
our_data = np.vstack([our_data, np.array(our_score)])

score_file.close()

data /= test_cnt
# print(data)
print(data)
print(our_data)

for j in range(test_n):
Expand Down
4 changes: 2 additions & 2 deletions include/stripes_solver.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,10 @@ class StripesSolver {
// Tesseract
const string tesseract_model_path {"data/tesseract_model/"};
// tesseract::TessBaseAPI * ocr;
const double word_conf_thres {70};
const double word_conf_thres {70}; // 70
double lambda0 = 0.3;
double lambda1 = 0.5;
double filter_rate = 0.3;
double filter_rate = 0.5; // 0.3

// Compatibility
// const int symbols_n = 64;
Expand Down
15 changes: 8 additions & 7 deletions src/solver/stripes_solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,8 +370,9 @@ cv::Mat StripesSolver::word_detection( const cv::Mat & img,
if (ocr_iter != 0) {
do {
const float conf = ocr_iter->Confidence(tesseract_level);
if (conf < word_conf_thres) continue;
const string word = ocr_iter->GetUTF8Text(tesseract_level);
if (word.length() < 3 || conf < word_conf_thres || !ocr_iter->WordIsFromDictionary()) continue;
if (word.length() < 3 || !ocr_iter->WordIsFromDictionary()) continue;

// Boundary cross constraint
int x0, y0, x1, y1;
Expand All @@ -385,10 +386,10 @@ cv::Mat StripesSolver::word_detection( const cv::Mat & img,
seq_words[vector<int>(seq.begin()+seq_path_st, seq.begin()+seq_path_ed)]++;
}
#ifdef DEBUG
// cv::rectangle(img_bbox, bbox, color_blue);
// printf("word: '%s'; \tconf: %.2f; \tDict: %d; \tBoundingBox: %d,%d,%d,%d;\n",
// word.c_str(), conf, ocr_iter->WordIsFromDictionary(), x0, y0, x1, y1);
// cout << endl;
// cv::rectangle(img_bbox, bbox, color_blue);
printf("word: '%s'; \tconf: %.2f; \tDict: %d; \tBoundingBox: %d,%d,%d,%d;\n",
word.c_str(), conf, ocr_iter->WordIsFromDictionary(), x0, y0, x1, y1);
cout << endl;
#endif


Expand Down Expand Up @@ -779,7 +780,6 @@ void StripesSolver::compute_bigraph_w(vector< vector<int> > & fragments, vector<
for (int j = 0; j < fragments.size(); j++) {

if (i == j) continue;

const int bias = real_flag ? 3 : 1;

int seam_x = frag_imgs[i].cols;
Expand All @@ -795,8 +795,9 @@ void StripesSolver::compute_bigraph_w(vector< vector<int> > & fragments, vector<
if (ocr_iter != 0) {
do {
const float conf = ocr_iter->Confidence(tesseract::RIL_WORD);
if (conf < word_conf_thres) continue;
const string word = ocr_iter->GetUTF8Text(tesseract::RIL_WORD);
if (word.length() < 3 || conf < word_conf_thres || !ocr_iter->WordIsFromDictionary()) continue;
if (word.length() < 3 || !ocr_iter->WordIsFromDictionary()) continue;

// Boundary cross constraint
int x0, y0, x1, y1;
Expand Down

0 comments on commit a5be006

Please sign in to comment.