tmp

xmlyqing00 · Jun 26, 2019 · a5be006 · a5be006
1 parent 4f7a029
commit a5be006
Show file tree

Hide file tree

Showing 4 changed files with 38 additions and 29 deletions.
diff --git a/benchmark.sh b/benchmark.sh
@@ -2,7 +2,7 @@
 
 test_n=3
 nums=(20 30 40 60)
-samples=(100 300 1000 5000)
+samples=(200 600 2000 8000)
 
 method_n=1
 metrics=(0 2)
@@ -36,7 +36,7 @@ fi
 
 if [ "$1" == "noise" ]; then
     echo "Run noise dataset."
-    test_files=("doc19_noise5")
+    test_files=("doc16_noise20" "doc18_noise20")
 fi
 
 if [ "$1" == "doc" ] || [ "$1" == "noise" ]; then
@@ -81,7 +81,7 @@ done
 
 for i in $(seq 0 ${method_n}); do
     for j in $(seq 0 ${test_n}); do
-        echo "Test args:" ${i} ${j}
+        echo "Test args:" ${test_name} ${nums[j]} ${metrics[i]} ${comps[i]} ${samples[j]}
         ./bin/release/solver \
             -t $1 \
             -n ${nums[j]} \

diff --git a/calc_scores.py b/calc_scores.py
@@ -4,48 +4,56 @@
 
 folder_name = 'data/scores/'
 scores_list = os.listdir(folder_name)
+scores_list.sort(key = lambda x: (len(x), x))
 nums = [20, 30, 40, 60]
-method_names = ['P-Greedy', 'P-GCOM', 'W-Greedy', 'W-GCOM', 'P-GA']
+method_names = ['P-Greedy', 'C-Greedy', 'W-Greedy', 'W-GCOM', 'P-GA']
 method_colors = ['teal', 'olive', 'aqua', 'orchid', 'orange']
-order = [4, 0, 1, 2, 3]
-# order = [0, 1, 2, 3]
+score_files = ['doc0', 'doc3', 'doc7', 'doc10', 'doc12', 'doc13', 'doc14', 'doc15', 'doc16', 'doc17', 'doc18', 'doc20', 'doc21', 'doc23', 'doc24']
+# order = [4, 0, 1, 2, 3]
+order = [0, 1, 2, 3]
 
 adjust_flag = True
 test_cnt = 0
 test_n = len(nums)
-method_n = len(method_names)
-# method_n = 4
+# method_n = len(method_names)
+method_n = 4
 data = np.zeros(method_n * test_n)
 
 our_data = None
 for file_name in scores_list:
-
-    if (file_name == 'doc11.txt' or file_name == 'doc19.txt'):
+    if file_name[:-4] not in score_files:
         continue
 
     print(folder_name + file_name)
     test_cnt += 1
 
     score_file = open(folder_name + file_name, 'r')
-    for i in range(method_n):
-        x = score_file.readline()
-        scores = x.split()
+    for i in range(2):
+        scores = score_file.readline().split()
         if (len(scores) == 0):
             continue
 
         for j in range(test_n):
             data[i * test_n + j] += float(scores[j])
-
-        if i == 3:
-            if our_data is None:
-                our_data = np.array(scores)
-            else:
-                our_data = np.vstack([our_data, np.array(scores)])
+
+    scores = score_file.readline().split()
+    next_scores = score_file.readline().split()
+    scores = scores + next_scores
+    print(scores)
+    our_score = []
+    for j in range(test_n):
+        data[2 * test_n + j] += float(scores[j*2])
+        data[3 * test_n + j] += float(scores[j*2+1])
+        our_score.append(float(scores[j*2+1]))
+    if our_data is None:
+        our_data = np.array(our_score)
+    else:
+        our_data = np.vstack([our_data, np.array(our_score)])
 
     score_file.close()
 
 data /= test_cnt
-# print(data)
+print(data)
 print(our_data)
 
 for j in range(test_n):

diff --git a/include/stripes_solver.h b/include/stripes_solver.h
@@ -90,10 +90,10 @@ class StripesSolver {
     // Tesseract
     const string tesseract_model_path {"data/tesseract_model/"};
     // tesseract::TessBaseAPI * ocr;
-    const double word_conf_thres {70};
+    const double word_conf_thres {70}; // 70
     double lambda0 = 0.3;
     double lambda1 = 0.5;
-    double filter_rate = 0.3;
+    double filter_rate = 0.5; // 0.3
 
     // Compatibility 
     // const int symbols_n = 64;

diff --git a/src/solver/stripes_solver.cpp b/src/solver/stripes_solver.cpp
@@ -370,8 +370,9 @@ cv::Mat StripesSolver::word_detection(  const cv::Mat & img,
     if (ocr_iter != 0) {
         do {
             const float conf = ocr_iter->Confidence(tesseract_level);
+            if (conf < word_conf_thres) continue;
             const string word = ocr_iter->GetUTF8Text(tesseract_level);
-            if (word.length() < 3 || conf < word_conf_thres || !ocr_iter->WordIsFromDictionary()) continue;
+            if (word.length() < 3 || !ocr_iter->WordIsFromDictionary()) continue;
 
             // Boundary cross constraint
             int x0, y0, x1, y1;
@@ -385,10 +386,10 @@ cv::Mat StripesSolver::word_detection(  const cv::Mat & img,
                 seq_words[vector<int>(seq.begin()+seq_path_st, seq.begin()+seq_path_ed)]++;
             }
 #ifdef DEBUG
-                // cv::rectangle(img_bbox, bbox, color_blue);
-                // printf("word: '%s';  \tconf: %.2f; \tDict: %d; \tBoundingBox: %d,%d,%d,%d;\n",
-                //         word.c_str(), conf, ocr_iter->WordIsFromDictionary(), x0, y0, x1, y1);
-                // cout << endl;
+            // cv::rectangle(img_bbox, bbox, color_blue);
+            printf("word: '%s';  \tconf: %.2f; \tDict: %d; \tBoundingBox: %d,%d,%d,%d;\n",
+                    word.c_str(), conf, ocr_iter->WordIsFromDictionary(), x0, y0, x1, y1);
+            cout << endl;
 #endif
 
 
@@ -779,7 +780,6 @@ void StripesSolver::compute_bigraph_w(vector< vector<int> > & fragments, vector<
         for (int j = 0; j < fragments.size(); j++) {
 
             if (i == j) continue;
-
             const int bias = real_flag ? 3 : 1;
 
             int seam_x = frag_imgs[i].cols;
@@ -795,8 +795,9 @@ void StripesSolver::compute_bigraph_w(vector< vector<int> > & fragments, vector<
             if (ocr_iter != 0) {
                 do {
                     const float conf = ocr_iter->Confidence(tesseract::RIL_WORD);
+                    if (conf < word_conf_thres) continue;
                     const string word = ocr_iter->GetUTF8Text(tesseract::RIL_WORD);
-                    if (word.length() < 3 || conf < word_conf_thres || !ocr_iter->WordIsFromDictionary()) continue;
+                    if (word.length() < 3 || !ocr_iter->WordIsFromDictionary()) continue;
 
                     // Boundary cross constraint
                     int x0, y0, x1, y1;