diff --git a/benchmark.sh b/benchmark.sh index 8baf75a..c236422 100755 --- a/benchmark.sh +++ b/benchmark.sh @@ -2,7 +2,7 @@ test_n=3 nums=(20 30 40 60) -samples=(100 300 1000 5000) +samples=(200 600 2000 8000) method_n=1 metrics=(0 2) @@ -36,7 +36,7 @@ fi if [ "$1" == "noise" ]; then echo "Run noise dataset." - test_files=("doc19_noise5") + test_files=("doc16_noise20" "doc18_noise20") fi if [ "$1" == "doc" ] || [ "$1" == "noise" ]; then @@ -81,7 +81,7 @@ done for i in $(seq 0 ${method_n}); do for j in $(seq 0 ${test_n}); do - echo "Test args:" ${i} ${j} + echo "Test args:" ${test_name} ${nums[j]} ${metrics[i]} ${comps[i]} ${samples[j]} ./bin/release/solver \ -t $1 \ -n ${nums[j]} \ diff --git a/calc_scores.py b/calc_scores.py index 60c28fd..7c454c9 100644 --- a/calc_scores.py +++ b/calc_scores.py @@ -4,48 +4,56 @@ folder_name = 'data/scores/' scores_list = os.listdir(folder_name) +scores_list.sort(key = lambda x: (len(x), x)) nums = [20, 30, 40, 60] -method_names = ['P-Greedy', 'P-GCOM', 'W-Greedy', 'W-GCOM', 'P-GA'] +method_names = ['P-Greedy', 'C-Greedy', 'W-Greedy', 'W-GCOM', 'P-GA'] method_colors = ['teal', 'olive', 'aqua', 'orchid', 'orange'] -order = [4, 0, 1, 2, 3] -# order = [0, 1, 2, 3] +score_files = ['doc0', 'doc3', 'doc7', 'doc10', 'doc12', 'doc13', 'doc14', 'doc15', 'doc16', 'doc17', 'doc18', 'doc20', 'doc21', 'doc23', 'doc24'] +# order = [4, 0, 1, 2, 3] +order = [0, 1, 2, 3] adjust_flag = True test_cnt = 0 test_n = len(nums) -method_n = len(method_names) -# method_n = 4 +# method_n = len(method_names) +method_n = 4 data = np.zeros(method_n * test_n) our_data = None for file_name in scores_list: - - if (file_name == 'doc11.txt' or file_name == 'doc19.txt'): + if file_name[:-4] not in score_files: continue print(folder_name + file_name) test_cnt += 1 score_file = open(folder_name + file_name, 'r') - for i in range(method_n): - x = score_file.readline() - scores = x.split() + for i in range(2): + scores = score_file.readline().split() if (len(scores) == 0): continue for j in range(test_n): data[i * test_n + j] += float(scores[j]) - - if i == 3: - if our_data is None: - our_data = np.array(scores) - else: - our_data = np.vstack([our_data, np.array(scores)]) + + scores = score_file.readline().split() + next_scores = score_file.readline().split() + scores = scores + next_scores + print(scores) + our_score = [] + for j in range(test_n): + data[2 * test_n + j] += float(scores[j*2]) + data[3 * test_n + j] += float(scores[j*2+1]) + our_score.append(float(scores[j*2+1])) + if our_data is None: + our_data = np.array(our_score) + else: + our_data = np.vstack([our_data, np.array(our_score)]) score_file.close() data /= test_cnt -# print(data) +print(data) print(our_data) for j in range(test_n): diff --git a/include/stripes_solver.h b/include/stripes_solver.h index 1eed732..3e5bba2 100644 --- a/include/stripes_solver.h +++ b/include/stripes_solver.h @@ -90,10 +90,10 @@ class StripesSolver { // Tesseract const string tesseract_model_path {"data/tesseract_model/"}; // tesseract::TessBaseAPI * ocr; - const double word_conf_thres {70}; + const double word_conf_thres {70}; // 70 double lambda0 = 0.3; double lambda1 = 0.5; - double filter_rate = 0.3; + double filter_rate = 0.5; // 0.3 // Compatibility // const int symbols_n = 64; diff --git a/src/solver/stripes_solver.cpp b/src/solver/stripes_solver.cpp index 8161663..24cf458 100644 --- a/src/solver/stripes_solver.cpp +++ b/src/solver/stripes_solver.cpp @@ -370,8 +370,9 @@ cv::Mat StripesSolver::word_detection( const cv::Mat & img, if (ocr_iter != 0) { do { const float conf = ocr_iter->Confidence(tesseract_level); + if (conf < word_conf_thres) continue; const string word = ocr_iter->GetUTF8Text(tesseract_level); - if (word.length() < 3 || conf < word_conf_thres || !ocr_iter->WordIsFromDictionary()) continue; + if (word.length() < 3 || !ocr_iter->WordIsFromDictionary()) continue; // Boundary cross constraint int x0, y0, x1, y1; @@ -385,10 +386,10 @@ cv::Mat StripesSolver::word_detection( const cv::Mat & img, seq_words[vector(seq.begin()+seq_path_st, seq.begin()+seq_path_ed)]++; } #ifdef DEBUG - // cv::rectangle(img_bbox, bbox, color_blue); - // printf("word: '%s'; \tconf: %.2f; \tDict: %d; \tBoundingBox: %d,%d,%d,%d;\n", - // word.c_str(), conf, ocr_iter->WordIsFromDictionary(), x0, y0, x1, y1); - // cout << endl; + // cv::rectangle(img_bbox, bbox, color_blue); + printf("word: '%s'; \tconf: %.2f; \tDict: %d; \tBoundingBox: %d,%d,%d,%d;\n", + word.c_str(), conf, ocr_iter->WordIsFromDictionary(), x0, y0, x1, y1); + cout << endl; #endif @@ -779,7 +780,6 @@ void StripesSolver::compute_bigraph_w(vector< vector > & fragments, vector< for (int j = 0; j < fragments.size(); j++) { if (i == j) continue; - const int bias = real_flag ? 3 : 1; int seam_x = frag_imgs[i].cols; @@ -795,8 +795,9 @@ void StripesSolver::compute_bigraph_w(vector< vector > & fragments, vector< if (ocr_iter != 0) { do { const float conf = ocr_iter->Confidence(tesseract::RIL_WORD); + if (conf < word_conf_thres) continue; const string word = ocr_iter->GetUTF8Text(tesseract::RIL_WORD); - if (word.length() < 3 || conf < word_conf_thres || !ocr_iter->WordIsFromDictionary()) continue; + if (word.length() < 3 || !ocr_iter->WordIsFromDictionary()) continue; // Boundary cross constraint int x0, y0, x1, y1;