diff --git a/CMakeLists.txt b/CMakeLists.txt
index 861e03f69..7966ed55b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,7 +3,13 @@
 ########################################################################
 #
 # Project-wide settings
-CMAKE_MINIMUM_REQUIRED(VERSION 3.1.0)
+CMAKE_MINIMUM_REQUIRED(VERSION 3.8.0)
+if(POLICY CMP0011)
+	cmake_policy(SET CMP0011 NEW)
+endif()
+if(POLICY CMP0074)
+	cmake_policy(SET CMP0074 NEW)
+endif()
 
 # Name of the project.
 #
@@ -13,8 +19,8 @@ CMAKE_MINIMUM_REQUIRED(VERSION 3.1.0)
 PROJECT(OpenMVS)
 
 set(OpenMVS_MAJOR_VERSION 1)
-set(OpenMVS_MINOR_VERSION 0)
-set(OpenMVS_PATCH_VERSION 1)
+set(OpenMVS_MINOR_VERSION 1)
+set(OpenMVS_PATCH_VERSION 0)
 set(OpenMVS_VERSION ${OpenMVS_MAJOR_VERSION}.${OpenMVS_MINOR_VERSION}.${OpenMVS_PATCH_VERSION})
 
 # Find dependencies:
@@ -27,7 +33,6 @@ endif()
 SET(COTIRE_INTDIR "cotire")
 
 # Define helper functions and macros.
-cmake_policy(SET CMP0011 OLD)
 INCLUDE(build/Utils.cmake)
 if(ENABLE_PRECOMPILED_HEADERS)
 	INCLUDE(build/Cotire.cmake)
@@ -75,6 +80,9 @@ if(OpenMVS_USE_OPENMP)
 endif()
 
 if(OpenMVS_USE_OPENGL)
+	if(POLICY CMP0072)
+		cmake_policy(SET CMP0072 NEW)
+	endif()
 	FIND_PACKAGE(OpenGL)
 	if(OPENGL_FOUND)
 		INCLUDE_DIRECTORIES(${OPENGL_INCLUDE_DIR})
diff --git a/MvgMvsPipeline.py b/MvgMvsPipeline.py
new file mode 100644
index 000000000..f4914bf86
--- /dev/null
+++ b/MvgMvsPipeline.py
@@ -0,0 +1,358 @@
+#!/usr/bin/python3
+# -*- encoding: utf-8 -*-
+#
+# Created by @FlachyJoe
+"""
+This script is for an easy use of OpenMVG and OpenMVS
+
+usage: MvgMvs_Pipeline.py [-h] [--steps STEPS [STEPS ...]] [--preset PRESET]
+                          [--0 0 [0 ...]] [--1 1 [1 ...]] [--2 2 [2 ...]]
+                          [--3 3 [3 ...]] [--4 4 [4 ...]] [--5 5 [5 ...]]
+                          [--6 6 [6 ...]] [--7 7 [7 ...]] [--8 8 [8 ...]]
+                          [--9 9 [9 ...]] [--10 10 [10 ...]] [--11 11 [11 ...]]
+                          [--12 12 [12 ...]] [--13 13 [13 ...]]
+                          [--14 14 [14 ...]] [--15 15 [15 ...]]
+                          input_dir output_dir
+
+Photogrammetry reconstruction with these steps:
+    0. Intrinsics analysis             openMVG_main_SfMInit_ImageListing
+    1. Compute features                openMVG_main_ComputeFeatures
+    2. Compute matches                 openMVG_main_ComputeMatches
+    3. Incremental reconstruction      openMVG_main_IncrementalSfM
+    4. Global reconstruction           openMVG_main_GlobalSfM
+    5. Colorize Structure              openMVG_main_ComputeSfM_DataColor
+    6. Structure from Known Poses      openMVG_main_ComputeStructureFromKnownPoses
+    7. Colorized robust triangulation  openMVG_main_ComputeSfM_DataColor
+    8. Control Points Registration     ui_openMVG_control_points_registration
+    9. Export to openMVS               openMVG_main_openMVG2openMVS
+    10. Densify point-cloud            DensifyPointCloud
+    11. Reconstruct the mesh           ReconstructMesh
+    12. Refine the mesh                RefineMesh
+    13. Texture the mesh               TextureMesh
+    14. Estimate disparity-maps        DensifyPointCloud
+    15. Fuse disparity-maps            DensifyPointCloud
+
+positional arguments:
+  input_dir                 the directory wich contains the pictures set.
+  output_dir                the directory wich will contain the resulting files.
+
+optional arguments:
+  -h, --help                show this help message and exit
+  --steps STEPS [STEPS ...] steps to process
+  --preset PRESET           steps list preset in
+                            SEQUENTIAL = [0, 1, 2, 3, 9, 10, 11, 12, 13]
+                            GLOBAL = [0, 1, 2, 4, 9, 10, 11, 12, 13]
+                            MVG_SEQ = [0, 1, 2, 3, 5, 6, 7]
+                            MVG_GLOBAL = [0, 1, 2, 4, 5, 6, 7]
+                            MVS_SGM = [14, 15]
+                            default : SEQUENTIAL
+
+Passthrough:
+  Option to be passed to command lines (remove - in front of option names)
+  e.g. --1 p ULTRA to use the ULTRA preset in openMVG_main_ComputeFeatures
+"""
+
+import os
+import subprocess
+import sys
+import argparse
+
+DEBUG = False
+
+# add current directory to PATH
+if sys.platform.startswith('win'):
+    path_delim = ';'
+else:
+    path_delim = ':'
+os.environ['PATH'] += path_delim + os.getcwd()
+
+
+def whereis(afile):
+    """
+        return directory in which afile is, None if not found. Look in PATH
+    """
+    if sys.platform.startswith('win'):
+        cmd = "where"
+    else:
+        cmd = "which"
+    try:
+        ret = subprocess.run([cmd, afile], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True)
+        return os.path.split(ret.stdout.decode())[0]
+    except subprocess.CalledProcessError:
+        return None
+
+def find(afile):
+    """
+        As whereis look only for executable on linux, this find look for all file type
+    """
+    for d in os.environ['PATH'].split(path_delim):
+        if os.path.isfile(os.path.join(d, afile)):
+            return d
+    return None
+
+# Try to find openMVG and openMVS binaries in PATH
+OPENMVG_BIN = whereis("openMVG_main_SfMInit_ImageListing")
+OPENMVS_BIN = whereis("ReconstructMesh")
+
+# Try to find openMVG camera sensor database
+CAMERA_SENSOR_DB_FILE = "sensor_width_camera_database.txt"
+CAMERA_SENSOR_DB_DIRECTORY = find(CAMERA_SENSOR_DB_FILE)
+
+# Ask user for openMVG and openMVS directories if not found
+if not OPENMVG_BIN:
+    OPENMVG_BIN = input("openMVG binary folder?\n")
+if not OPENMVS_BIN:
+    OPENMVS_BIN = input("openMVS binary folder?\n")
+if not CAMERA_SENSOR_DB_DIRECTORY:
+    CAMERA_SENSOR_DB_DIRECTORY = input("openMVG camera database (%s) folder?\n" % CAMERA_SENSOR_DB_FILE)
+
+
+PRESET = {'SEQUENTIAL': [0, 1, 2, 3, 9, 10, 11, 12, 13],
+          'GLOBAL': [0, 1, 2, 4, 9, 10, 11, 12, 13],
+          'MVG_SEQ': [0, 1, 2, 3, 5, 6, 7],
+          'MVG_GLOBAL': [0, 1, 2, 4, 5, 6, 7],
+          'MVS_SGM': [14, 15]}
+
+PRESET_DEFAULT = 'SEQUENTIAL'
+
+
+# HELPERS for terminal colors
+BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8)
+NO_EFFECT, BOLD, UNDERLINE, BLINK, INVERSE, HIDDEN = (0, 1, 4, 5, 7, 8)
+
+# from Python cookbook, #475186
+def has_colours(stream):
+    '''
+        Return stream colours capability
+    '''
+    if not hasattr(stream, "isatty"):
+        return False
+    if not stream.isatty():
+        return False  # auto color only on TTYs
+    try:
+        import curses
+        curses.setupterm()
+        return curses.tigetnum("colors") > 2
+    except Exception:
+        # guess false in case of error
+        return False
+
+HAS_COLOURS = has_colours(sys.stdout)
+
+
+def printout(text, colour=WHITE, background=BLACK, effect=NO_EFFECT):
+    """
+        print() with colour
+    """
+    if HAS_COLOURS:
+        seq = "\x1b[%d;%d;%dm" % (effect, 30+colour, 40+background) + text + "\x1b[0m"
+        sys.stdout.write(seq+'\r\n')
+    else:
+        sys.stdout.write(text+'\r\n')
+
+
+# OBJECTS to store config and data in
+class ConfContainer:
+    """
+        Container for all the config variables
+    """
+    def __init__(self):
+        pass
+
+
+class AStep:
+    def __init__(self, info, cmd, opt):
+        self.info = info
+        self.cmd = cmd
+        self.opt = opt
+
+
+class StepsStore:
+    def __init__(self):
+        self.steps_data = [
+            ["Intrinsics analysis",          # 0
+             os.path.join(OPENMVG_BIN, "openMVG_main_SfMInit_ImageListing"),
+             ["-i", "%input_dir%", "-o", "%matches_dir%", "-d", "%camera_file_params%"]],
+            ["Compute features",             # 1
+             os.path.join(OPENMVG_BIN, "openMVG_main_ComputeFeatures"),
+             ["-i", "%matches_dir%/sfm_data.json", "-o", "%matches_dir%", "-m", "SIFT", "-n", "4"]],
+            ["Compute matches",              # 2
+             os.path.join(OPENMVG_BIN, "openMVG_main_ComputeMatches"),
+             ["-i", "%matches_dir%/sfm_data.json", "-o", "%matches_dir%", "-n", "HNSWL2", "-r", ".8"]],
+            ["Incremental reconstruction",   # 3
+             os.path.join(OPENMVG_BIN, "openMVG_main_IncrementalSfM"),
+             ["-i", "%matches_dir%/sfm_data.json", "-m", "%matches_dir%", "-o", "%reconstruction_dir%"]],
+            ["Global reconstruction",        # 4
+             os.path.join(OPENMVG_BIN, "openMVG_main_GlobalSfM"),
+             ["-i", "%matches_dir%/sfm_data.json", "-m", "%matches_dir%", "-o", "%reconstruction_dir%"]],
+            ["Colorize Structure",           # 5
+             os.path.join(OPENMVG_BIN, "openMVG_main_ComputeSfM_DataColor"),
+             ["-i", "%reconstruction_dir%/sfm_data.bin", "-o", "%reconstruction_dir%/colorized.ply"]],
+            ["Structure from Known Poses",   # 6
+             os.path.join(OPENMVG_BIN, "openMVG_main_ComputeStructureFromKnownPoses"),
+             ["-i", "%reconstruction_dir%/sfm_data.bin", "-m", "%matches_dir%", "-f", "%matches_dir%/matches.f.bin", "-o", "%reconstruction_dir%/robust.bin"]],
+            ["Colorized robust triangulation",  # 7
+             os.path.join(OPENMVG_BIN, "openMVG_main_ComputeSfM_DataColor"),
+             ["-i", "%reconstruction_dir%/robust.bin", "-o", "%reconstruction_dir%/robust_colorized.ply"]],
+            ["Control Points Registration",  # 8
+             os.path.join(OPENMVG_BIN, "ui_openMVG_control_points_registration"),
+             ["-i", "%reconstruction_dir%/sfm_data.bin"]],
+            ["Export to openMVS",            # 9
+             os.path.join(OPENMVG_BIN, "openMVG_main_openMVG2openMVS"),
+             ["-i", "%reconstruction_dir%/sfm_data.bin", "-o", "%mvs_dir%/scene.mvs", "-d", "%mvs_dir%/images"]],
+            ["Densify point cloud",          # 10
+             os.path.join(OPENMVS_BIN, "DensifyPointCloud"),
+             ["scene.mvs", "--dense-config-file", "Densify.ini", "--resolution-level", "1", "-w", "%mvs_dir%"]],
+            ["Reconstruct the mesh",         # 11
+             os.path.join(OPENMVS_BIN, "ReconstructMesh"),
+             ["scene_dense.mvs", "-w", "%mvs_dir%"]],
+            ["Refine the mesh",              # 12
+             os.path.join(OPENMVS_BIN, "RefineMesh"),
+             ["scene_dense_mesh.mvs", "--scales", "2", "-w", "%mvs_dir%"]],
+            ["Texture the mesh",             # 13
+             os.path.join(OPENMVS_BIN, "TextureMesh"),
+             ["scene_dense_mesh_refine.mvs", "--decimate", "0.5", "-w", "%mvs_dir%"]],
+            ["Estimate disparity-maps",      # 14
+             os.path.join(OPENMVS_BIN, "DensifyPointCloud"),
+             ["scene.mvs", "--dense-config-file", "Densify.ini", "--fusion-mode", "-1", "-w", "%mvs_dir%"]],
+            ["Fuse disparity-maps",          # 15
+             os.path.join(OPENMVS_BIN, "DensifyPointCloud"),
+             ["scene.mvs", "--dense-config-file", "Densify.ini", "--fusion-mode", "-2", "-w", "%mvs_dir%"]]
+            ]
+
+    def __getitem__(self, indice):
+        return AStep(*self.steps_data[indice])
+
+    def length(self):
+        return len(self.steps_data)
+
+    def apply_conf(self, conf):
+        """ replace each %var% per conf.var value in steps data """
+        for s in self.steps_data:
+            o2 = []
+            for o in s[2]:
+                co = o.replace("%input_dir%", conf.input_dir)
+                co = co.replace("%output_dir%", conf.output_dir)
+                co = co.replace("%matches_dir%", conf.matches_dir)
+                co = co.replace("%reconstruction_dir%", conf.reconstruction_dir)
+                co = co.replace("%mvs_dir%", conf.mvs_dir)
+                co = co.replace("%camera_file_params%", conf.camera_file_params)
+                o2.append(co)
+            s[2] = o2
+
+
+CONF = ConfContainer()
+STEPS = StepsStore()
+
+# ARGS
+parser = argparse.ArgumentParser(
+    formatter_class=argparse.RawTextHelpFormatter,
+    description="Photogrammetry reconstruction with these steps: \r\n" +
+    "\r\n".join(("\t%i. %s\t %s" % (t, STEPS[t].info, STEPS[t].cmd) for t in range(STEPS.length())))
+    )
+parser.add_argument('input_dir',
+                    help="the directory wich contains the pictures set.")
+parser.add_argument('output_dir',
+                    help="the directory wich will contain the resulting files.")
+parser.add_argument('--steps',
+                    type=int,
+                    nargs="+",
+                    help="steps to process")
+parser.add_argument('--preset',
+                    help="steps list preset in \r\n" +
+                    " \r\n".join([k + " = " + str(PRESET[k]) for k in PRESET]) +
+                    " \r\ndefault : " + PRESET_DEFAULT)
+
+group = parser.add_argument_group('Passthrough', description="Option to be passed to command lines (remove - in front of option names)\r\ne.g. --1 p ULTRA to use the ULTRA preset in openMVG_main_ComputeFeatures")
+for n in range(STEPS.length()):
+    group.add_argument('--'+str(n), nargs='+')
+
+parser.parse_args(namespace=CONF)  # store args in the ConfContainer
+
+# FOLDERS
+
+def mkdir_ine(dirname):
+    """Create the folder if not presents"""
+    if not os.path.exists(dirname):
+        os.mkdir(dirname)
+
+# Absolute path for input and ouput dirs
+CONF.input_dir = os.path.abspath(CONF.input_dir)
+CONF.output_dir = os.path.abspath(CONF.output_dir)
+
+if not os.path.exists(CONF.input_dir):
+    sys.exit("%s: path not found" % CONF.input_dir)
+
+CONF.reconstruction_dir = os.path.join(CONF.output_dir, "sfm")
+CONF.matches_dir = os.path.join(CONF.reconstruction_dir, "matches")
+CONF.mvs_dir = os.path.join(CONF.output_dir, "mvs")
+CONF.camera_file_params = os.path.join(CAMERA_SENSOR_DB_DIRECTORY, CAMERA_SENSOR_DB_FILE)
+
+mkdir_ine(CONF.output_dir)
+mkdir_ine(CONF.reconstruction_dir)
+mkdir_ine(CONF.matches_dir)
+mkdir_ine(CONF.mvs_dir)
+
+# Update directories in steps commandlines
+STEPS.apply_conf(CONF)
+
+# PRESET
+if CONF.steps and CONF.preset:
+    sys.exit("Steps and preset arguments can't be set together.")
+elif CONF.preset:
+    try:
+        CONF.steps = PRESET[CONF.preset]
+    except KeyError:
+        sys.exit("Unkown preset %s, choose %s" % (CONF.preset, ' or '.join([s for s in PRESET])))
+elif not CONF.steps:
+    CONF.steps = PRESET[PRESET_DEFAULT]
+
+# WALK
+print("# Using input dir:  %s" % CONF.input_dir)
+print("#      output dir:  %s" % CONF.output_dir)
+print("# Steps:  %s" % str(CONF.steps))
+
+if 2 in CONF.steps:    # ComputeMatches
+    if 4 in CONF.steps:  # GlobalReconstruction
+        # Set the geometric_model of ComputeMatches to Essential
+        STEPS[2].opt.extend(["-g", "e"])
+
+for cstep in CONF.steps:
+    printout("#%i. %s" % (cstep, STEPS[cstep].info), effect=INVERSE)
+
+    # Retrieve "passthrough" commandline options
+    opt = getattr(CONF, str(cstep))
+    if opt:
+        # add - sign to short options and -- to long ones
+        for o in range(0, len(opt), 2):
+            if len(opt[o]) > 1:
+                opt[o] = '-' + opt[o]
+            opt[o] = '-' + opt[o]
+    else:
+        opt = []
+
+    # Remove STEPS[cstep].opt options now defined in opt
+    for anOpt in STEPS[cstep].opt:
+        if anOpt in opt:
+            idx = STEPS[cstep].opt.index(anOpt)
+            if DEBUG:
+                print('#\tRemove ' + str(anOpt) + ' from defaults options at id ' + str(idx))
+            del STEPS[cstep].opt[idx:idx+2]
+
+    # create a commandline for the current step
+    cmdline = [STEPS[cstep].cmd] + STEPS[cstep].opt + opt
+    print('Cmd: ' + ' '.join(cmdline))
+
+    if not DEBUG:
+        # Launch the current step
+        try:
+            pStep = subprocess.Popen(cmdline)
+            pStep.wait()
+            if pStep.returncode != 0:
+                break
+        except KeyboardInterrupt:
+            sys.exit('\r\nProcess canceled by user, all files remains')
+    else:
+        print('\t'.join(cmdline))
+
+printout("# Pipeline end #", effect=INVERSE)
diff --git a/apps/DensifyPointCloud/DensifyPointCloud.cpp b/apps/DensifyPointCloud/DensifyPointCloud.cpp
index aeaa18cc7..9d225289c 100644
--- a/apps/DensifyPointCloud/DensifyPointCloud.cpp
+++ b/apps/DensifyPointCloud/DensifyPointCloud.cpp
@@ -50,6 +50,7 @@ String strMeshFileName;
 String strDenseConfigFileName;
 float fSampleMesh;
 int thFilterPointCloud;
+int nFusionMode;
 int nArchiveType;
 int nProcessPriority;
 unsigned nMaxThreads;
@@ -107,6 +108,7 @@ bool Initialize(size_t argc, LPCTSTR* argv)
 		("estimate-normals", boost::program_options::value(&nEstimateNormals)->default_value(0), "estimate the normals for the dense point-cloud")
 		("sample-mesh", boost::program_options::value(&OPT::fSampleMesh)->default_value(0.f), "uniformly samples points on a mesh (0 - disabled, <0 - number of points, >0 - sample density per square unit)")
 		("filter-point-cloud", boost::program_options::value(&OPT::thFilterPointCloud)->default_value(0), "filter dense point-cloud based on visibility (0 - disabled)")
+		("fusion-mode", boost::program_options::value(&OPT::nFusionMode)->default_value(0), "depth map fusion mode (-2 - fuse disparity-maps, -1 - export disparity-maps only, 0 - depth-maps & fusion, 1 - export depth-maps only)")
 		;
 
 	// hidden options, allowed both on command line and
@@ -256,8 +258,13 @@ int main(int argc, LPCTSTR* argv)
 	}
 	if ((ARCHIVE_TYPE)OPT::nArchiveType != ARCHIVE_MVS) {
 		TD_TIMER_START();
-		if (!scene.DenseReconstruction())
-			return EXIT_FAILURE;
+		if (!scene.DenseReconstruction(OPT::nFusionMode)) {
+			if (ABS(OPT::nFusionMode) != 1)
+				return EXIT_FAILURE;
+			VERBOSE("Depth-maps estimated (%s)", TD_TIMER_GET_FMT().c_str());
+			Finalize();
+			return EXIT_SUCCESS;
+		}
 		VERBOSE("Densifying point-cloud completed: %u points (%s)", scene.pointcloud.GetSize(), TD_TIMER_GET_FMT().c_str());
 	}
 
diff --git a/apps/TextureMesh/TextureMesh.cpp b/apps/TextureMesh/TextureMesh.cpp
index 9e610a8ba..23f1421d1 100644
--- a/apps/TextureMesh/TextureMesh.cpp
+++ b/apps/TextureMesh/TextureMesh.cpp
@@ -47,6 +47,8 @@ namespace OPT {
 String strInputFileName;
 String strOutputFileName;
 String strMeshFileName;
+float fDecimateMesh;
+unsigned nCloseHoles;
 unsigned nResolutionLevel;
 unsigned nMinResolution;
 float fOutlierThreshold;
@@ -98,6 +100,8 @@ bool Initialize(size_t argc, LPCTSTR* argv)
 	config.add_options()
 		("input-file,i", boost::program_options::value<std::string>(&OPT::strInputFileName), "input filename containing camera poses and image list")
 		("output-file,o", boost::program_options::value<std::string>(&OPT::strOutputFileName), "output filename for storing the mesh")
+		("decimate", boost::program_options::value<float>(&OPT::fDecimateMesh)->default_value(1.f), "decimation factor in range [0..1] to be applied to the input surface before refinement (0 - auto, 1 - disabled)")
+		("close-holes", boost::program_options::value<unsigned>(&OPT::nCloseHoles)->default_value(30), "try to close small holes in the input surface (0 - disabled)")
 		("resolution-level", boost::program_options::value<unsigned>(&OPT::nResolutionLevel)->default_value(0), "how many times to scale down the images before mesh refinement")
 		("min-resolution", boost::program_options::value<unsigned>(&OPT::nMinResolution)->default_value(640), "do not scale images lower than this resolution")
 		("outlier-threshold", boost::program_options::value<float>(&OPT::fOutlierThreshold)->default_value(6e-2f), "threshold used to find and remove outlier face textures (0 - disabled)")
@@ -226,6 +230,17 @@ int main(int argc, LPCTSTR* argv)
 	}
 
 	{
+	// decimate to the desired resolution
+	if (OPT::fDecimateMesh < 1.f) {
+		ASSERT(OPT::fDecimateMesh > 0.f);
+		scene.mesh.Clean(OPT::fDecimateMesh, 0.f, false, OPT::nCloseHoles, 0u, false);
+		scene.mesh.Clean(1.f, 0.f, false, 0, 0u, true); // extra cleaning to remove non-manifold problems created by closing holes
+		#if TD_VERBOSE != TD_VERBOSE_OFF
+		if (VERBOSITY_LEVEL > 3)
+			scene.mesh.Save(baseFileName +_T("_decim")+OPT::strExportType);
+		#endif
+	}
+
 	// compute mesh texture
 	TD_TIMER_START();
 	if (!scene.TextureMesh(OPT::nResolutionLevel, OPT::nMinResolution, OPT::fOutlierThreshold, OPT::fRatioDataSmoothness, OPT::bGlobalSeamLeveling, OPT::bLocalSeamLeveling, OPT::nTextureSizeMultiple, OPT::nRectPackingHeuristic, Pixel8U(OPT::nColEmpty)))
diff --git a/build/Utils.cmake b/build/Utils.cmake
index c828a28cf..f41c9d89f 100644
--- a/build/Utils.cmake
+++ b/build/Utils.cmake
@@ -493,12 +493,17 @@ macro(optimize_default_compiler_settings)
 		add_extra_compiler_option(-Wno-switch)
 		add_extra_compiler_option(-Wno-switch-enum)
 		add_extra_compiler_option(-Wno-switch-default)
+		add_extra_compiler_option(-Wno-implicit-fallthrough)
 		add_extra_compiler_option(-Wno-comment)
 		add_extra_compiler_option(-Wno-narrowing)
 		add_extra_compiler_option(-Wno-attributes)
+		add_extra_compiler_option(-Wno-ignored-attributes)
+		add_extra_compiler_option(-Wno-maybe-uninitialized)
 		add_extra_compiler_option(-Wno-enum-compare)
 		add_extra_compiler_option(-Wno-misleading-indentation)
 		add_extra_compiler_option(-Wno-missing-field-initializers)
+		add_extra_compiler_option(-Wno-unused-result)
+		add_extra_compiler_option(-Wno-unused-function)
 		add_extra_compiler_option(-Wno-unused-parameter)
 		add_extra_compiler_option(-Wno-delete-incomplete)
 		add_extra_compiler_option(-Wno-unnamed-type-template-args)
diff --git a/libs/Common/EventQueue.cpp b/libs/Common/EventQueue.cpp
index dd090e63a..82f835ba0 100644
--- a/libs/Common/EventQueue.cpp
+++ b/libs/Common/EventQueue.cpp
@@ -86,3 +86,16 @@ uint_t EventQueue::GetSize() const
 	return m_events.GetSize();
 }
 /*----------------------------------------------------------------*/
+
+
+
+/*-----------------------------------------------------------*
+* EventThreadPool class implementation                      *
+*-----------------------------------------------------------*/
+
+void EventThreadPool::stop()
+{
+	ThreadPool::stop();
+	EventQueue::Clear();
+}
+/*----------------------------------------------------------------*/
diff --git a/libs/Common/EventQueue.h b/libs/Common/EventQueue.h
index 31206cb72..b698785c3 100644
--- a/libs/Common/EventQueue.h
+++ b/libs/Common/EventQueue.h
@@ -71,6 +71,20 @@ class GENERAL_API EventQueue
 };
 /*----------------------------------------------------------------*/
 
+
+// basic event and thread pool
+class GENERAL_API EventThreadPool : public ThreadPool, public EventQueue
+{
+public:
+	inline EventThreadPool() {}
+	inline EventThreadPool(size_type nThreads) : ThreadPool(nThreads) {}
+	inline EventThreadPool(size_type nThreads, Thread::FncStart pfnStarter, void* pData=NULL) : ThreadPool(nThreads, pfnStarter, pData) {}
+	inline ~EventThreadPool() {}
+
+	void stop(); //stop threads, reset locks state and empty event queue
+};
+/*----------------------------------------------------------------*/
+
 } // namespace SEACAVE
 
 #endif // __SEACAVE_EVENTQUEUE_H__
diff --git a/libs/Common/File.h b/libs/Common/File.h
index 674aed571..f369bb5df 100644
--- a/libs/Common/File.h
+++ b/libs/Common/File.h
@@ -13,10 +13,66 @@
 
 #include "Streams.h"
 
+// Under both Windows and Unix, the stat function is used for classification
+
+// Under Gnu/Linux, the following classifications are defined
+// source: Gnu/Linux man page for stat(2) http://linux.die.net/man/2/stat
+//   S_IFMT 	0170000	bitmask for the file type bitfields
+//   S_IFSOCK 	0140000	socket (Note this overlaps with S_IFDIR)
+//   S_IFLNK 	0120000	symbolic link
+//   S_IFREG 	0100000	regular file
+//   S_IFBLK 	0060000	block device
+//   S_IFDIR 	0040000	directory
+//   S_IFCHR 	0020000	character device
+//   S_IFIFO 	0010000	FIFO
+// There are also some Posix-standard macros:
+//   S_ISREG(m)        is it a regular file? 
+//   S_ISDIR(m)        directory? 
+//   S_ISCHR(m)        character device? 
+//   S_ISBLK(m)        block device? 
+//   S_ISFIFO(m)       FIFO (named pipe)? 
+//   S_ISLNK(m)        symbolic link? (Not in POSIX.1-1996.) 
+//   S_ISSOCK(m)       socket? (Not in POSIX.1-1996.)
+// Under Windows, the following are defined:
+// source: Header file sys/stat.h distributed with Visual Studio 10
+//   _S_IFMT  (S_IFMT)   0xF000 file type mask
+//   _S_IFREG (S_IFREG)  0x8000 regular
+//   _S_IFDIR (S_IFDIR)  0x4000 directory
+//   _S_IFCHR (S_IFCHR)  0x2000 character special
+//   _S_IFIFO            0x1000 pipe
+
 #ifdef _MSC_VER
 #include <io.h>
+// file type tests are not defined for some reason on Windows despite them providing the stat() function!
+#define F_OK 0
+#define X_OK 1
+#define W_OK 2
+#define R_OK 4
+// Posix-style macros for Windows
+#ifndef S_ISREG
+#define S_ISREG(mode)  ((mode & _S_IFMT) == _S_IFREG)
+#endif
+#ifndef S_ISDIR
+#define S_ISDIR(mode)  ((mode & _S_IFMT) == _S_IFDIR)
+#endif
+#ifndef S_ISCHR
+#define S_ISCHR(mode)  ((mode & _S_IFMT) == _S_IFCHR)
+#endif
+#ifndef S_ISBLK
+#define S_ISBLK(mode)  (false)
+#endif
+#ifndef S_ISFIFO
+#define S_ISFIFO(mode) ((mode & _S_IFMT) == _S_IFIFO)
+#endif
+#ifndef S_ISLNK
+#define S_ISLNK(mode)  (false)
+#endif
+#ifndef S_ISSOCK
+#define S_ISSOCK(mode) (false)
+#endif
 #else
 #include <unistd.h>
+#include <dirent.h>
 #define _taccess access
 #endif
 
@@ -282,7 +338,7 @@ class GENERAL_API File : public IOStream {
 		size_f_t totalSize = 0;
 		String strPath(_strPath);
 		Util::ensureFolderSlash(strPath);
-		//Find all the files in this folder.
+		// Find all the files in this folder.
 		hFind = FindFirstFile((strPath + strMask).c_str(), &fd);
 		if (hFind != INVALID_HANDLE_VALUE)
 		{
@@ -304,7 +360,7 @@ class GENERAL_API File : public IOStream {
 			while (FindNextFile(hFind, &fd));
 			FindClose(hFind);
 		}
-		//Process the subfolders also...
+		// Process the subfolders also...
 		if (!bProcessSubdir)
 			return totalSize;
 		hFind = FindFirstFile((strPath + '*').c_str(), &fd);
@@ -318,7 +374,7 @@ class GENERAL_API File : public IOStream {
 					continue;
 				if (!_tcscmp(fd.cFileName, _T("..")))
 					continue;
-				// Processe all subfolders recursively
+				// Process all subfolders recursively
 				totalSize += findFiles(strPath + fd.cFileName + PATH_SEPARATOR, strMask, true, arrFiles);
 			}
 			while (FindNextFile(hFind, &fd));
@@ -490,7 +546,35 @@ class GENERAL_API File : public IOStream {
 
 #endif // _MSC_VER
 
-	static bool access(LPCTSTR aFileName, int mode=CA_EXIST) { return ::_taccess(aFileName, mode) == 0; }
+	// test for whether there's something (i.e. folder or file) with this name
+	// and what access mode is supported
+	static bool isPresent(LPCTSTR path) {
+		struct stat buf;
+		return stat(path, &buf) == 0;
+	}
+	static bool access(LPCTSTR path, int mode=CA_EXIST) {
+		return ::_taccess(path, mode) == 0;
+	}
+	// test for whether there's something present and its a folder
+	static bool isFolder(LPCTSTR path) {
+		struct stat buf;
+		if (!(stat(path, &buf) == 0))
+			return false;
+		// If the object is present, see if it is a directory
+		// this is the Posix-approved way of testing
+		return S_ISDIR(buf.st_mode);
+	}
+	// test for whether there's something present and its a file
+	// a file can be a regular file, a symbolic link, a FIFO or a socket, but not a device
+	static bool isFile(LPCTSTR path) {
+		struct stat buf;
+		if (!(stat(path, &buf) == 0))
+			return false;
+		// If the object is present, see if it is a file or file-like object
+		// Note that devices are neither folders nor files
+		// this is the Posix-approved way of testing
+		return S_ISREG(buf.st_mode) || S_ISLNK(buf.st_mode) || S_ISSOCK(buf.st_mode) || S_ISFIFO(buf.st_mode);
+	}
 
 	template <class VECTOR>
 	inline size_t write(const VECTOR& arr) {
diff --git a/libs/Common/List.h b/libs/Common/List.h
index 794b0a2a4..7b714308c 100644
--- a/libs/Common/List.h
+++ b/libs/Common/List.h
@@ -20,28 +20,24 @@
 
 // cList index type
 #ifdef _SUPPORT_CPP11
-#ifdef _MSC_VER
-#define ARR2IDX(arr) std::remove_reference<decltype(arr)>::type::IDX
-#else
-#define ARR2IDX(arr) typename std::remove_reference<decltype(arr)>::type::IDX
-#endif
+#define ARR2IDX(arr) typename std::remove_reference<decltype(arr)>::type::size_type
 #else
 #define ARR2IDX(arr) IDX
 #endif
 
 // cList iterator by index
 #ifndef FOREACH
-#define FOREACH(var, arr) for (ARR2IDX(arr) var=0, var##Size=(arr).GetSize(); var<var##Size; ++var)
+#define FOREACH(var, arr) for (ARR2IDX(arr) var=0, var##Size=(arr).size(); var<var##Size; ++var)
 #endif
 #ifndef RFOREACH
-#define RFOREACH(var, arr) for (ARR2IDX(arr) var=(arr).GetSize(); var-->0; )
+#define RFOREACH(var, arr) for (ARR2IDX(arr) var=(arr).size(); var-->0; )
 #endif
 // cList iterator by pointer
 #ifndef FOREACHPTR
-#define FOREACHPTR(var, arr) for (auto var=(arr).Begin(), var##End=(arr).End(); var!=var##End; ++var)
+#define FOREACHPTR(var, arr) for (auto var=(arr).begin(), var##End=(arr).end(); var!=var##End; ++var)
 #endif
 #ifndef RFOREACHPTR
-#define RFOREACHPTR(var, arr) for (auto var=(arr).End(), var##Begin=(arr).Begin(); var--!=var##Begin; )
+#define RFOREACHPTR(var, arr) for (auto var=(arr).end(), var##Begin=(arr).begin(); var--!=var##Begin; )
 #endif
 
 // raw data array iterator by index
@@ -627,6 +623,41 @@ class cList
 		return std::accumulate(Begin(), End(), TYPE(0)) / _size;
 	}
 
+	inline ArgType	GetMax() const {
+		return *std::max_element(Begin(), End());
+	}
+	template <typename Functor>
+	inline ArgType	GetMax(const Functor& functor) const {
+		return *std::max_element(Begin(), End(), functor);
+	}
+	inline IDX	GetMaxIdx() const {
+		return static_cast<IDX>(std::max_element(Begin(), End()) - Begin());
+	}
+	template <typename Functor>
+	inline IDX	GetMaxIdx(const Functor& functor) const {
+		return static_cast<IDX>(std::max_element(Begin(), End(), functor) - Begin());
+	}
+	#ifdef _SUPPORT_CPP11
+	inline std::pair<ArgType,ArgType>	GetMinMax() const {
+		const auto minmax(std::minmax_element(Begin(), End()));
+		return std::pair<ArgType,ArgType>(*minmax.first, *minmax.second);
+	}
+	template <typename Functor>
+	inline std::pair<ArgType,ArgType>	GetMinMax(const Functor& functor) const {
+		const auto minmax(std::minmax_element(Begin(), End(), functor));
+		return std::pair<ArgType,ArgType>(*minmax.first, *minmax.second);
+	}
+	inline std::pair<IDX,IDX>	GetMinMaxIdx() const {
+		const auto minmax(std::minmax_element(Begin(), End()));
+		return std::make_pair(static_cast<IDX>(minmax.first-Begin()), static_cast<IDX>(minmax.second-Begin()));
+	}
+	template <typename Functor>
+	inline std::pair<IDX,IDX>	GetMinMaxIdx(const Functor& functor) const {
+		const auto minmax(std::minmax_element(Begin(), End(), functor));
+		return std::make_pair(static_cast<IDX>(minmax.first-Begin()), static_cast<IDX>(minmax.second-Begin()));
+	}
+	#endif
+
 	inline TYPE&	PartialSort(IDX index)
 	{
 		TYPE* const nth(Begin()+index);
@@ -1294,6 +1325,7 @@ class cList
 
 #if _USE_VECTORINTERFACE != 0
 public:
+	typedef IDX size_type;
 	typedef Type value_type;
 	typedef value_type* iterator;
 	typedef const value_type* const_iterator;
@@ -1302,11 +1334,11 @@ class cList
 	typedef std::vector<Type> VectorType;
 	inline cList(const VectorType& rList) { CopyOf(&rList[0], rList.size()); }
 	#ifdef _SUPPORT_CPP11
-	inline cList(std::initializer_list<Type> l) : _size(0), _vectorSize((IDX)l.size()), _vector(NULL) { ASSERT(l.size()<NO_INDEX); if (_vectorSize == 0) return; _vector = (Type*) operator new[] (_vectorSize*sizeof(Type)); const Type* first(l.begin()); do new(_vector + _size++) Type(*first++); while (first!=l.end()); }
+	inline cList(std::initializer_list<Type> l) : _size(0), _vectorSize((size_type)l.size()), _vector(NULL) { ASSERT(l.size()<NO_INDEX); if (_vectorSize == 0) return; _vector = (Type*) operator new[] (_vectorSize*sizeof(Type)); const Type* first(l.begin()); do new(_vector + _size++) Type(*first++); while (first!=l.end()); }
 	#endif
 	inline bool empty() const { return IsEmpty(); }
-	inline IDX size() const { return GetSize(); }
-	inline IDX capacity() const { return GetCapacity(); }
+	inline size_type size() const { return GetSize(); }
+	inline size_type capacity() const { return GetCapacity(); }
 	inline void clear() { Empty(); }
 	inline void insert(const_iterator it, const_reference elem) { InsertAt(it-this->_vector, elem); }
 	#ifdef _SUPPORT_CPP11
@@ -1316,8 +1348,8 @@ class cList
 	#endif
 	inline void push_back(const_reference elem) { Insert(elem); }
 	inline void pop_back() { RemoveLast(); }
-	inline void reserve(IDX newSize) { Reserve(newSize); }
-	inline void resize(IDX newSize) { Resize(newSize); }
+	inline void reserve(size_type newSize) { Reserve(newSize); }
+	inline void resize(size_type newSize) { Resize(newSize); }
 	inline void erase(const_iterator it) { RemoveAtMove(it-this->_vector); }
 	inline const_iterator cdata() const { return GetData(); }
 	inline const_iterator cbegin() const { return Begin(); }
diff --git a/libs/Common/Thread.h b/libs/Common/Thread.h
index d05bdae2e..9ccacb37d 100644
--- a/libs/Common/Thread.h
+++ b/libs/Common/Thread.h
@@ -39,7 +39,7 @@ namespace SEACAVE {
  * basic thread control
  **************************************************************************************/
 
-class Thread
+class GENERAL_API Thread
 {
 public:
 	#ifdef  _ENVIRONMENT64
@@ -252,6 +252,86 @@ class Thread
 	DWORD threadId;
 	#endif
 };
+/*----------------------------------------------------------------*/
+
+
+
+/**************************************************************************************
+ * ThreadPool
+ * --------------
+ * basic thread pool
+ **************************************************************************************/
+
+class GENERAL_API ThreadPool
+{
+public:
+	typedef uint32_t size_type;
+	typedef Thread value_type;
+	typedef value_type* iterator;
+	typedef const value_type* const_iterator;
+	typedef value_type& reference;
+	typedef const value_type& const_reference;
+	typedef CLISTDEFIDX(Thread,size_type) Threads;
+
+public:
+	inline ThreadPool() {}
+	inline ThreadPool(size_type nThreads) : _threads(nThreads>0?nThreads:Thread::hardwareConcurrency()) {}
+	inline ThreadPool(size_type nThreads, Thread::FncStart pfnStarter, void* pData=NULL) : _threads(nThreads>0?nThreads:Thread::hardwareConcurrency()) { start(pfnStarter, pData); }
+	inline ~ThreadPool() { join(); }
+
+	#ifdef _SUPPORT_CPP11
+	inline ThreadPool(ThreadPool&& rhs) : _threads(std::forward<Threads>(rhs._threads)) {
+	}
+
+	inline ThreadPool& operator=(ThreadPool&& rhs) {
+		_threads.Swap(rhs._threads);
+		return *this;
+	}
+	#endif
+
+	// wait for all running threads to finish and resize threads array
+	void resize(size_type nThreads) {
+		join();
+		_threads.resize(nThreads);
+	}
+	// start all threads with the given function
+	bool start(Thread::FncStart pfnStarter, void* pData=NULL) {
+		for (Thread& thread: _threads)
+			if (!thread.start(pfnStarter, pData))
+				return false;
+		return true;
+	}
+	// wait for all running threads to finish
+	void join() {
+		for (Thread& thread: _threads)
+			thread.join();
+	}
+	// stop all threads
+	void stop() {
+		for (Thread& thread: _threads)
+			thread.stop();
+	}
+	// wait for all running threads to finish and release threads array
+	void Release() {
+		join();
+		_threads.Release();
+	}
+
+	inline bool empty() const { return _threads.empty(); }
+	inline size_type size() const { return _threads.size(); }
+	inline const_iterator cbegin() const { return _threads.cbegin(); }
+	inline const_iterator cend() const { return _threads.cend(); }
+	inline const_iterator begin() const { return _threads.begin(); }
+	inline const_iterator end() const { return _threads.end(); }
+	inline iterator begin() { return _threads.begin(); }
+	inline iterator end() { return _threads.end(); }
+	inline const_reference operator[](size_type index) const { return _threads[index]; }
+	inline reference operator[](size_type index) { return _threads[index]; }
+
+protected:
+	Threads _threads;
+};
+/*----------------------------------------------------------------*/
 
 
 
@@ -261,7 +341,7 @@ class Thread
  * basic process control
  **************************************************************************************/
 
-class Process
+class GENERAL_API Process
 {
 public:
 	enum Priority {
diff --git a/libs/Common/Types.h b/libs/Common/Types.h
index 57f59b918..bcac65c1b 100644
--- a/libs/Common/Types.h
+++ b/libs/Common/Types.h
@@ -380,9 +380,9 @@ typedef TAliasCast<double,int32_t> CastD2I;
 
 #include "Strings.h"
 #include "AutoPtr.h"
+#include "List.h"
 #include "Thread.h"
 #include "SharedPtr.h"
-#include "List.h"
 #include "Queue.h"
 #include "Hash.h"
 #include "Timer.h"
@@ -605,6 +605,16 @@ namespace SEACAVE {
 
 // F U N C T I O N S ///////////////////////////////////////////////
 
+template<typename T>
+struct MakeIdentity { using type = T; };
+template<typename T>
+using MakeSigned = typename std::conditional<std::is_integral<T>::value,std::make_signed<T>,SEACAVE::MakeIdentity<T>>::type;
+
+template<typename T1, typename T2>
+constexpr T1 Cast(const T2& v) {
+	return static_cast<T1>(v);
+}
+
 template<typename T>
 constexpr T& NEGATE(T& a) {
 	return (a = -a);
@@ -808,6 +818,82 @@ FORCEINLINE double CBRT(const double& x) {
 /*----------------------------------------------------------------*/
 
 
+#if defined(__GNUC__)
+
+FORCEINLINE int PopCnt(uint32_t bb) {
+	return __builtin_popcount(bb);
+}
+FORCEINLINE int PopCnt(uint64_t bb) {
+	return __builtin_popcountll(bb);
+}
+FORCEINLINE int PopCnt15(uint64_t bb) {
+	return __builtin_popcountll(bb);
+}
+FORCEINLINE int PopCntSparse(uint64_t bb) {
+	return __builtin_popcountll(bb);
+}
+
+#elif defined(_USE_SSE) && defined(_M_AMD64) // 64 bit windows
+
+FORCEINLINE int PopCnt(uint32_t bb) {
+	return (int)_mm_popcnt_u32(bb);
+}
+FORCEINLINE int PopCnt(uint64_t bb) {
+	return (int)_mm_popcnt_u64(bb);
+}
+FORCEINLINE int PopCnt15(uint64_t bb) {
+	return (int)_mm_popcnt_u64(bb);
+}
+FORCEINLINE int PopCntSparse(uint64_t bb) {
+	return (int)_mm_popcnt_u64(bb);
+}
+
+#else
+
+// general purpose population count
+template<typename T>
+constexpr int PopCnt(T bb)
+{
+	STATIC_ASSERT(std::is_integral<T>::value && std::is_unsigned<T>::value);
+	return std::bitset<sizeof(T)*8>(bb).count();
+}
+template<>
+inline int PopCnt(uint64_t bb) {
+	const uint64_t k1 = (uint64_t)0x5555555555555555;
+	const uint64_t k2 = (uint64_t)0x3333333333333333;
+	const uint64_t k3 = (uint64_t)0x0F0F0F0F0F0F0F0F;
+	const uint64_t k4 = (uint64_t)0x0101010101010101;
+	bb -= (bb >> 1) & k1;
+	bb = (bb & k2) + ((bb >> 2) & k2);
+	bb = (bb + (bb >> 4)) & k3;
+	return (bb * k4) >> 56;
+}
+// faster version assuming not more than 15 bits set, used in mobility
+// eval, posted on CCC forum by Marco Costalba of Stockfish team
+inline int PopCnt15(uint64_t bb) {
+	unsigned w = unsigned(bb >> 32), v = unsigned(bb);
+	v -= (v >> 1) & 0x55555555; // 0-2 in 2 bits
+	w -= (w >> 1) & 0x55555555;
+	v = ((v >> 2) & 0x33333333) + (v & 0x33333333); // 0-4 in 4 bits
+	w = ((w >> 2) & 0x33333333) + (w & 0x33333333);
+	v += w; // 0-8 in 4 bits
+	v *= 0x11111111;
+	return int(v >> 28);
+}
+// version faster on sparsely populated bitboards
+inline int PopCntSparse(uint64_t bb) {
+	int count = 0;
+	while (bb) {
+		count++;
+		bb &= bb - 1;
+	}
+	return count;
+}
+
+#endif
+/*----------------------------------------------------------------*/
+
+
 #ifdef _FAST_FLOAT2INT
 // fast float to int conversion
 // (xs routines at stereopsis: http://www.stereopsis.com/sree/fpu2006.html by Sree Kotay)
@@ -1354,6 +1440,7 @@ class TMatrix : public cv::Matx<TYPE,m,n>
 	#endif
 
 	using Base::val;
+	using Base::channels;
 
 	enum { elems = m*n };
 
@@ -1371,6 +1458,37 @@ class TMatrix : public cv::Matx<TYPE,m,n>
 	inline TMatrix(const EMat& rhs) { operator EMat& () = rhs; }
 	#endif
 
+	TMatrix(TYPE v0); //!< 1x1 matrix
+	TMatrix(TYPE v0, TYPE v1); //!< 1x2 or 2x1 matrix
+	TMatrix(TYPE v0, TYPE v1, TYPE v2); //!< 1x3 or 3x1 matrix
+	TMatrix(TYPE v0, TYPE v1, TYPE v2, TYPE v3); //!< 1x4, 2x2 or 4x1 matrix
+	TMatrix(TYPE v0, TYPE v1, TYPE v2, TYPE v3, TYPE v4); //!< 1x5 or 5x1 matrix
+	TMatrix(TYPE v0, TYPE v1, TYPE v2, TYPE v3, TYPE v4, TYPE v5); //!< 1x6, 2x3, 3x2 or 6x1 matrix
+	TMatrix(TYPE v0, TYPE v1, TYPE v2, TYPE v3, TYPE v4, TYPE v5, TYPE v6); //!< 1x7 or 7x1 matrix
+	TMatrix(TYPE v0, TYPE v1, TYPE v2, TYPE v3, TYPE v4, TYPE v5, TYPE v6, TYPE v7); //!< 1x8, 2x4, 4x2 or 8x1 matrix
+	TMatrix(TYPE v0, TYPE v1, TYPE v2, TYPE v3, TYPE v4, TYPE v5, TYPE v6, TYPE v7, TYPE v8); //!< 1x9, 3x3 or 9x1 matrix
+	TMatrix(TYPE v0, TYPE v1, TYPE v2, TYPE v3, TYPE v4, TYPE v5, TYPE v6, TYPE v7, TYPE v8, TYPE v9); //!< 1x10, 2x5 or 5x2 or 10x1 matrix
+	TMatrix(TYPE v0, TYPE v1, TYPE v2, TYPE v3,
+		TYPE v4, TYPE v5, TYPE v6, TYPE v7,
+		TYPE v8, TYPE v9, TYPE v10, TYPE v11); //!< 1x12, 2x6, 3x4, 4x3, 6x2 or 12x1 matrix
+	TMatrix(TYPE v0, TYPE v1, TYPE v2, TYPE v3,
+		TYPE v4, TYPE v5, TYPE v6, TYPE v7,
+		TYPE v8, TYPE v9, TYPE v10, TYPE v11,
+		TYPE v12, TYPE v13); //!< 1x14, 2x7, 7x2 or 14x1 matrix
+	TMatrix(TYPE v0, TYPE v1, TYPE v2, TYPE v3,
+		TYPE v4, TYPE v5, TYPE v6, TYPE v7,
+		TYPE v8, TYPE v9, TYPE v10, TYPE v11,
+		TYPE v12, TYPE v13, TYPE v14, TYPE v15); //!< 1x16, 4x4 or 16x1 matrix
+	explicit TMatrix(const TYPE* vals); //!< initialize from a plain array
+
+	TMatrix(const TMatrix<TYPE,m,n>& a, const TMatrix<TYPE,m,n>& b, cv::Matx_AddOp) : Base(a, b, cv::Matx_AddOp()) {}
+	TMatrix(const TMatrix<TYPE,m,n>& a, const TMatrix<TYPE,m,n>& b, cv::Matx_SubOp) : Base(a, b, cv::Matx_SubOp()) {}
+	template<typename TYPE2> TMatrix(const TMatrix<TYPE,m,n>& a, TYPE2 alpha, cv::Matx_ScaleOp) : Base(a, alpha, cv::Matx_ScaleOp()) {}
+	TMatrix(const TMatrix<TYPE,m,n>& a, const TMatrix<TYPE,m,n>& b, cv::Matx_MulOp) : Base(a, b, cv::Matx_MulOp()) {}
+	TMatrix(const TMatrix<TYPE,m,n>& a, const TMatrix<TYPE,m,n>& b, cv::Matx_DivOp) : Base(a, b, cv::Matx_DivOp()) {}
+	template<int l> TMatrix(const TMatrix<TYPE,m,l>& a, const TMatrix<TYPE,l,n>& b, cv::Matx_MatMulOp) : Base(a, b, cv::Matx_MatMulOp()) {}
+	TMatrix(const TMatrix<TYPE,n,m>& a, cv::Matx_TOp) : Base(a, cv::Matx_TOp()) {}
+
 	template <typename T> inline TMatrix& operator = (const cv::Matx<T,m,n>& rhs) { Base::operator = (rhs); return *this; }
 	inline TMatrix& operator = (const cv::Mat& rhs) { Base::operator = (rhs); return *this; }
 	#ifdef _USE_EIGEN
@@ -1822,6 +1940,10 @@ struct TPixel {
 	template<typename T> inline TPixel operator-(T v) const { return TPixel((TYPE)(r-v), (TYPE)(g-v), (TYPE)(b-v)); }
 	template<typename T> inline TPixel& operator-=(T v) { return (*this = operator-(v)); }
 	inline uint32_t toDWORD() const { return RGBA((uint8_t)r, (uint8_t)g, (uint8_t)b, (uint8_t)0); }
+	// tools
+	template <typename VT>
+	static TPixel colorRamp(VT v, VT vmin, VT vmax);
+	static TPixel gray2color(ALT v);
 	#ifdef _USE_BOOST
 	// serialize
 	template <class Archive>
@@ -2008,14 +2130,14 @@ class TImage : public TDMatrix<TYPE>
 	template <typename T>
 	TYPE sampleSafe(const TPoint2<T>& pt) const;
 
-	template <typename T>
-	bool sample(TYPE& v, const TPoint2<T>& pt, bool (STCALL *fncCond)(const TYPE&)) const;
-	template <typename T>
-	bool sampleSafe(TYPE& v, const TPoint2<T>& pt, bool (STCALL *fncCond)(const TYPE&)) const;
-	template <typename T>
-	TYPE sample(const TPoint2<T>& pt, bool (STCALL *fncCond)(const TYPE&), const TYPE& dv) const;
-	template <typename T>
-	TYPE sampleSafe(const TPoint2<T>& pt, bool (STCALL *fncCond)(const TYPE&), const TYPE& dv) const;
+	template <typename T, typename TV, typename Functor>
+	bool sample(TV& v, const TPoint2<T>& pt, const Functor& functor) const;
+	template <typename T, typename TV, typename Functor>
+	bool sampleSafe(TV& v, const TPoint2<T>& pt, const Functor& functor) const;
+	template <typename T, typename Functor>
+	TYPE sample(const TPoint2<T>& pt, const Functor& functor, const TYPE& dv) const;
+	template <typename T, typename Functor>
+	TYPE sampleSafe(const TPoint2<T>& pt, const Functor& functor, const TYPE& dv) const;
 
 	template <typename SAMPLER, typename INTERTYPE>
 	INTERTYPE sample(const SAMPLER& sampler, const TPoint2<typename SAMPLER::Type>& pt) const;
@@ -2023,6 +2145,8 @@ class TImage : public TDMatrix<TYPE>
 	template <typename T>
 	void toGray(TImage<T>& out, int code, bool bNormalize=false, bool bSRGB=false) const;
 
+	static cv::Size computeResize(const cv::Size& size, REAL scale);
+	static cv::Size computeResize(const cv::Size& size, REAL scale, unsigned resizes);
 	unsigned computeMaxResolution(unsigned& level, unsigned minImageSize=320, unsigned maxImageSize=INT_MAX) const;
 	static unsigned computeMaxResolution(unsigned width, unsigned height, unsigned& level, unsigned minImageSize=320, unsigned maxImageSize=INT_MAX);
 
@@ -2239,6 +2363,7 @@ struct TAccumulator {
 
 	inline TAccumulator() : value(0), weight(0) {}
 	inline TAccumulator(const Type& v, const WeightType& w) : value(v), weight(w) {}
+	inline bool IsEmpty() const { return weight <= 0; }
 	// adds the given weighted value to the internal value
 	inline void Add(const Type& v, const WeightType& w) {
 		value += v*w;
diff --git a/libs/Common/Types.inl b/libs/Common/Types.inl
index aa66a4714..090f896dc 100644
--- a/libs/Common/Types.inl
+++ b/libs/Common/Types.inl
@@ -1404,6 +1404,52 @@ inline TColor<TYPE>& operator*=(TColor<TYPE>& pt0, const TColor<TYPE>& pt1) {
 	return pt0;
 }
 
+// TMatrix operators
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n> operator + (const TMatrix<TYPE,m,n>& m1, const TMatrix<TYPE,m,n>& m2) {
+	return TMatrix<TYPE,m,n>(m1, m2, cv::Matx_AddOp());
+}
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n> operator + (const TMatrix<TYPE,m,n>& m1, const cv::Matx<TYPE,m,n>& m2) {
+	return cv::Matx<TYPE,m,n>(m1, m2, cv::Matx_AddOp());
+}
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n> operator + (const cv::Matx<TYPE,m,n>& m1, const TMatrix<TYPE,m,n>& m2) {
+	return TMatrix<TYPE,m,n>(m1, m2, cv::Matx_AddOp());
+}
+
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n> operator - (const TMatrix<TYPE,m,n>& m1, const TMatrix<TYPE,m,n>& m2) {
+	return TMatrix<TYPE,m,n>(m1, m2, cv::Matx_SubOp());
+}
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n> operator - (const TMatrix<TYPE,m,n>& m1, const cv::Matx<TYPE,m,n>& m2) {
+	return TMatrix<TYPE,m,n>(m1, m2, cv::Matx_SubOp());
+}
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n> operator - (const cv::Matx<TYPE,m,n>& m1, const TMatrix<TYPE,m,n>& m2) {
+	return TMatrix<TYPE,m,n>(m1, m2, cv::Matx_SubOp());
+}
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n> operator - (const TMatrix<TYPE,m,n>& M) {
+	return TMatrix<TYPE,m,n>(M, TYPE(-1), cv::Matx_ScaleOp());
+}
+
+template <typename TYPE, int m, int l, int n>
+inline TMatrix<TYPE,m,n> operator * (const TMatrix<TYPE,m,l>& m1, const TMatrix<TYPE,l,n>& m2) {
+	return TMatrix<TYPE,m,n>(m1, m2, cv::Matx_MatMulOp());
+}
+
+template <typename TYPE, int m, int n, typename TYPE2>
+inline TMatrix<TYPE,m,n> operator / (const TMatrix<TYPE,m,n>& mat, TYPE2 v) {
+	typedef typename std::conditional<std::is_floating_point<TYPE2>::value,TYPE2,REAL>::type real_t;
+	return TMatrix<TYPE,m,n>(mat, real_t(1)/v, cv::Matx_ScaleOp());
+}
+template <typename TYPE, int m, int n, typename TYPE2>
+inline TMatrix<TYPE,m,n>& operator /= (TMatrix<TYPE,m,n>& mat, TYPE2 v) {
+	return mat = mat/v;
+}
+
 // TImage operators
 template <typename TFROM, typename TTO>
 inline TImage<TTO> cvtImage(const TImage<TFROM>& image) {
@@ -1471,6 +1517,7 @@ DEFINE_CVDATADEPTH(SEACAVE::cuint32_t)
 // define specialized cv:DataType<>
 DEFINE_CVDATATYPE(SEACAVE::hfloat)
 DEFINE_CVDATATYPE(SEACAVE::cuint32_t)
+DEFINE_GENERIC_CVDATATYPE(uint64_t,double)
 /*----------------------------------------------------------------*/
 DEFINE_CVDATATYPE(SEACAVE::Point2i)
 DEFINE_CVDATATYPE(SEACAVE::Point2hf)
@@ -1690,6 +1737,133 @@ TPoint3<TYPE> TPoint3<TYPE>::RotateAngleAxis(const TPoint3& pt, const TPoint3& a
 
 // C L A S S  //////////////////////////////////////////////////////
 
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n>::TMatrix(TYPE v0)
+{
+	STATIC_ASSERT(channels >= 1);
+	val[0] = v0;
+	for (int i = 1; i < channels; i++)
+		val[i] = TYPE(0);
+}
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n>::TMatrix(TYPE v0, TYPE v1)
+{
+	STATIC_ASSERT(channels >= 2);
+	val[0] = v0; val[1] = v1;
+	for (int i = 2; i < channels; i++)
+		val[i] = TYPE(0);
+}
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n>::TMatrix(TYPE v0, TYPE v1, TYPE v2)
+{
+	STATIC_ASSERT(channels >= 3);
+	val[0] = v0; val[1] = v1; val[2] = v2;
+	for (int i = 3; i < channels; i++)
+		val[i] = TYPE(0);
+}
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n>::TMatrix(TYPE v0, TYPE v1, TYPE v2, TYPE v3)
+{
+	STATIC_ASSERT(channels >= 4);
+	val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+	for (int i = 4; i < channels; i++)
+		val[i] = TYPE(0);
+}
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n>::TMatrix(TYPE v0, TYPE v1, TYPE v2, TYPE v3, TYPE v4)
+{
+	STATIC_ASSERT(channels >= 5);
+	val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3; val[4] = v4;
+	for (int i = 5; i < channels; i++)
+		val[i] = TYPE(0);
+}
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n>::TMatrix(TYPE v0, TYPE v1, TYPE v2, TYPE v3, TYPE v4, TYPE v5)
+{
+	STATIC_ASSERT(channels >= 6);
+	val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+	val[4] = v4; val[5] = v5;
+	for (int i = 6; i < channels; i++)
+		val[i] = TYPE(0);
+}
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n>::TMatrix(TYPE v0, TYPE v1, TYPE v2, TYPE v3, TYPE v4, TYPE v5, TYPE v6)
+{
+	STATIC_ASSERT(channels >= 7);
+	val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+	val[4] = v4; val[5] = v5; val[6] = v6;
+	for (int i = 7; i < channels; i++)
+		val[i] = TYPE(0);
+}
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n>::TMatrix(TYPE v0, TYPE v1, TYPE v2, TYPE v3, TYPE v4, TYPE v5, TYPE v6, TYPE v7)
+{
+	STATIC_ASSERT(channels >= 8);
+	val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+	val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
+	for (int i = 8; i < channels; i++)
+		val[i] = TYPE(0);
+}
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n>::TMatrix(TYPE v0, TYPE v1, TYPE v2, TYPE v3, TYPE v4, TYPE v5, TYPE v6, TYPE v7, TYPE v8)
+{
+	STATIC_ASSERT(channels >= 9);
+	val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+	val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
+	val[8] = v8;
+	for (int i = 9; i < channels; i++)
+		val[i] = TYPE(0);
+}
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n>::TMatrix(TYPE v0, TYPE v1, TYPE v2, TYPE v3, TYPE v4, TYPE v5, TYPE v6, TYPE v7, TYPE v8, TYPE v9)
+{
+	STATIC_ASSERT(channels >= 10);
+	val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+	val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
+	val[8] = v8; val[9] = v9;
+	for (int i = 10; i < channels; i++)
+		val[i] = TYPE(0);
+}
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n>::TMatrix(TYPE v0, TYPE v1, TYPE v2, TYPE v3, TYPE v4, TYPE v5, TYPE v6, TYPE v7, TYPE v8, TYPE v9, TYPE v10, TYPE v11)
+{
+	STATIC_ASSERT(channels >= 12);
+	val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+	val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
+	val[8] = v8; val[9] = v9; val[10] = v10; val[11] = v11;
+	for (int i = 12; i < channels; i++)
+		val[i] = TYPE(0);
+}
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n>::TMatrix(TYPE v0, TYPE v1, TYPE v2, TYPE v3, TYPE v4, TYPE v5, TYPE v6, TYPE v7, TYPE v8, TYPE v9, TYPE v10, TYPE v11, TYPE v12, TYPE v13)
+{
+	STATIC_ASSERT(channels == 14);
+	val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+	val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
+	val[8] = v8; val[9] = v9; val[10] = v10; val[11] = v11;
+	val[12] = v12; val[13] = v13;
+	for (int i = 14; i < channels; i++)
+		val[i] = TYPE(0);
+}
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n>::TMatrix(TYPE v0, TYPE v1, TYPE v2, TYPE v3, TYPE v4, TYPE v5, TYPE v6, TYPE v7, TYPE v8, TYPE v9, TYPE v10, TYPE v11, TYPE v12, TYPE v13, TYPE v14, TYPE v15)
+{
+	STATIC_ASSERT(channels >= 16);
+	val[0] = v0; val[1] = v1; val[2] = v2; val[3] = v3;
+	val[4] = v4; val[5] = v5; val[6] = v6; val[7] = v7;
+	val[8] = v8; val[9] = v9; val[10] = v10; val[11] = v11;
+	val[12] = v12; val[13] = v13; val[14] = v14; val[15] = v15;
+	for (int i = 16; i < channels; i++)
+		val[i] = TYPE(0);
+}
+template <typename TYPE, int m, int n>
+inline TMatrix<TYPE,m,n>::TMatrix(const TYPE* values)
+{
+	for (int i = 0; i < channels; i++)
+		val[i] = values[i];
+}
+
+
 template <typename TYPE, int m, int n>
 inline bool TMatrix<TYPE,m,n>::IsEqual(const Base& rhs) const
 {
@@ -1990,6 +2164,67 @@ void TDVector<TYPE>::getKroneckerProduct(const TDVector<TYPE>& arg, TDVector<TYP
 /*----------------------------------------------------------------*/
 
 
+// C L A S S  //////////////////////////////////////////////////////
+
+// Color ramp code from "Colour Ramping for Data Visualisation"
+// (see http://paulbourke.net/texture_colour/colourspace)
+template <typename TYPE/*PixelType*/>
+template <typename VT/*ValueType*/>
+TPixel<TYPE> TPixel<TYPE>::colorRamp(VT v, VT vmin, VT vmax)
+{
+	if (v < vmin)
+		v = vmin;
+	if (v > vmax)
+		v = vmax;
+	const TYPE dv((TYPE)(vmax - vmin));
+	TPixel<TYPE> c(1,1,1); // white
+	if (v < vmin + (VT)(TYPE(0.25) * dv)) {
+		c.r = TYPE(0);
+		c.g = TYPE(4) * (v - vmin) / dv;
+	} else if (v < vmin + (VT)(TYPE(0.5) * dv)) {
+		c.r = TYPE(0);
+		c.b = TYPE(1) + TYPE(4) * (vmin + TYPE(0.25) * dv - v) / dv;
+	} else if (v < vmin + (VT)(TYPE(0.75) * dv)) {
+		c.r = TYPE(4) * (v - vmin - TYPE(0.5) * dv) / dv;
+		c.b = TYPE(0);
+	} else {
+		c.g = TYPE(1) + TYPE(4) * (vmin + TYPE(0.75) * dv - v) / dv;
+		c.b = TYPE(0);
+	}
+	return c;
+}
+
+// Gray values are expected in the range [0, 1] and converted to RGB values.
+template <typename TYPE>
+TPixel<TYPE> TPixel<TYPE>::gray2color(ALT gray)
+{
+	ASSERT(ALT(0) <= gray && gray <= ALT(1));
+	// Jet colormap inspired by Matlab.
+	auto const Interpolate = [](ALT val, ALT y0, ALT x0, ALT y1, ALT x1) -> ALT {
+		return (val - x0) * (y1 - y0) / (x1 - x0) + y0;
+	};
+	auto const  Base = [&Interpolate](ALT val) -> ALT {
+		if (val <= ALT(0.125)) {
+			return ALT(0);
+		} else if (val <= ALT(0.375)) {
+			return Interpolate(ALT(2) * val - ALT(1), ALT(0), ALT(-0.75), ALT(1), ALT(-0.25));
+		} else if (val <= ALT(0.625)) {
+			return ALT(1);
+		} else if (val <= ALT(0.87)) {
+			return Interpolate(ALT(2) * val - ALT(1), ALT(1), ALT(0.25), ALT(0), ALT(0.75));
+		} else {
+			return ALT(0);
+		}
+	};
+	return TPixel<ALT>(
+		Base(gray + ALT(0.25)),
+		Base(gray),
+		Base(gray - ALT(0.25))
+	).template cast<TYPE>();
+}
+/*----------------------------------------------------------------*/
+
+
 // C L A S S  //////////////////////////////////////////////////////
 
 // Find a pixel inside the image
@@ -2037,71 +2272,71 @@ TYPE TImage<TYPE>::sampleSafe(const TPoint2<T>& pt) const
 
 // sample by bilinear interpolation, using only pixels that meet the user condition
 template <typename TYPE>
-template <typename T>
-bool TImage<TYPE>::sample(TYPE& v, const TPoint2<T>& pt, bool (STCALL *fncCond)(const TYPE&)) const
+template <typename T, typename TV, typename Functor>
+bool TImage<TYPE>::sample(TV& v, const TPoint2<T>& pt, const Functor& functor) const
 {
 	const int lx((int)pt.x);
 	const int ly((int)pt.y);
 	const T x(pt.x-lx), x1(T(1)-x);
 	const T y(pt.y-ly), y1(T(1)-y);
-	const TYPE& x0y0(BaseBase::operator()(ly  , lx  )); const bool b00(fncCond(x0y0));
-	const TYPE& x1y0(BaseBase::operator()(ly  , lx+1)); const bool b10(fncCond(x1y0));
-	const TYPE& x0y1(BaseBase::operator()(ly+1, lx  )); const bool b01(fncCond(x0y1));
-	const TYPE& x1y1(BaseBase::operator()(ly+1, lx+1)); const bool b11(fncCond(x1y1));
+	const TYPE& x0y0(BaseBase::operator()(ly  , lx  )); const bool b00(functor(x0y0));
+	const TYPE& x1y0(BaseBase::operator()(ly  , lx+1)); const bool b10(functor(x1y0));
+	const TYPE& x0y1(BaseBase::operator()(ly+1, lx  )); const bool b01(functor(x0y1));
+	const TYPE& x1y1(BaseBase::operator()(ly+1, lx+1)); const bool b11(functor(x1y1));
 	if (!b00 && !b10 && !b01 && !b11)
 		return false;
-	v = y1*(x1*(b00 ? x0y0 : (b10 ? x1y0 : (b01 ? x0y1 : x1y1))) + x*(b10 ? x1y0 : (b00 ? x0y0 : (b11 ? x1y1 : x0y1)))) +
-		y *(x1*(b01 ? x0y1 : (b11 ? x1y1 : (b00 ? x0y0 : x1y0))) + x*(b11 ? x1y1 : (b01 ? x0y1 : (b10 ? x1y0 : x0y0))));
+	v = TV(y1*(x1*Cast<T>(b00 ? x0y0 : (b10 ? x1y0 : (b01 ? x0y1 : x1y1))) + x*Cast<T>(b10 ? x1y0 : (b00 ? x0y0 : (b11 ? x1y1 : x0y1)))) +
+		   y *(x1*Cast<T>(b01 ? x0y1 : (b11 ? x1y1 : (b00 ? x0y0 : x1y0))) + x*Cast<T>(b11 ? x1y1 : (b01 ? x0y1 : (b10 ? x1y0 : x0y0)))));
 	return true;
 }
 template <typename TYPE>
-template <typename T>
-bool TImage<TYPE>::sampleSafe(TYPE& v, const TPoint2<T>& pt, bool (STCALL *fncCond)(const TYPE&)) const
+template <typename T, typename TV, typename Functor>
+bool TImage<TYPE>::sampleSafe(TV& v, const TPoint2<T>& pt, const Functor& functor) const
 {
 	const int lx((int)pt.x);
 	const int ly((int)pt.y);
 	const T x(pt.x-lx), x1(T(1)-x);
 	const T y(pt.y-ly), y1(T(1)-y);
-	const TYPE& x0y0(getPixel(ly  , lx  )); const bool b00(fncCond(x0y0));
-	const TYPE& x1y0(getPixel(ly  , lx+1)); const bool b10(fncCond(x1y0));
-	const TYPE& x0y1(getPixel(ly+1, lx  )); const bool b01(fncCond(x0y1));
-	const TYPE& x1y1(getPixel(ly+1, lx+1)); const bool b11(fncCond(x1y1));
+	const TYPE& x0y0(getPixel(ly  , lx  )); const bool b00(functor(x0y0));
+	const TYPE& x1y0(getPixel(ly  , lx+1)); const bool b10(functor(x1y0));
+	const TYPE& x0y1(getPixel(ly+1, lx  )); const bool b01(functor(x0y1));
+	const TYPE& x1y1(getPixel(ly+1, lx+1)); const bool b11(functor(x1y1));
 	if (!b00 && !b10 && !b01 && !b11)
 		return false;
-	v = y1*(x1*(b00 ? x0y0 : (b10 ? x1y0 : (b01 ? x0y1 : x1y1))) + x*(b10 ? x1y0 : (b00 ? x0y0 : (b11 ? x1y1 : x0y1)))) +
-		y *(x1*(b01 ? x0y1 : (b11 ? x1y1 : (b00 ? x0y0 : x1y0))) + x*(b11 ? x1y1 : (b01 ? x0y1 : (b10 ? x1y0 : x0y0))));
+	v = TV(y1*(x1*Cast<T>(b00 ? x0y0 : (b10 ? x1y0 : (b01 ? x0y1 : x1y1))) + x*Cast<T>(b10 ? x1y0 : (b00 ? x0y0 : (b11 ? x1y1 : x0y1)))) +
+		   y *(x1*Cast<T>(b01 ? x0y1 : (b11 ? x1y1 : (b00 ? x0y0 : x1y0))) + x*Cast<T>(b11 ? x1y1 : (b01 ? x0y1 : (b10 ? x1y0 : x0y0)))));
 	return true;
 }
 // same as above, but using default value if the condition is not met
 template <typename TYPE>
-template <typename T>
-TYPE TImage<TYPE>::sample(const TPoint2<T>& pt, bool (STCALL *fncCond)(const TYPE&), const TYPE& dv) const
+template <typename T, typename Functor>
+TYPE TImage<TYPE>::sample(const TPoint2<T>& pt, const Functor& functor, const TYPE& dv) const
 {
 	const int lx((int)pt.x);
 	const int ly((int)pt.y);
 	const T x(pt.x-lx), x1(T(1)-x);
 	const T y(pt.y-ly), y1(T(1)-y);
-	const TYPE& x0y0(BaseBase::operator()(ly  , lx  )); const bool b00(fncCond(x0y0));
-	const TYPE& x1y0(BaseBase::operator()(ly  , lx+1)); const bool b10(fncCond(x1y0));
-	const TYPE& x0y1(BaseBase::operator()(ly+1, lx  )); const bool b01(fncCond(x0y1));
-	const TYPE& x1y1(BaseBase::operator()(ly+1, lx+1)); const bool b11(fncCond(x1y1));
-	return y1*(x1*(b00 ? x0y0 : dv) + x*(b10 ? x1y0 : dv)) +
-		    y*(x1*(b01 ? x0y1 : dv) + x*(b11 ? x1y1 : dv));
+	const TYPE& x0y0(BaseBase::operator()(ly  , lx  )); const bool b00(functor(x0y0));
+	const TYPE& x1y0(BaseBase::operator()(ly  , lx+1)); const bool b10(functor(x1y0));
+	const TYPE& x0y1(BaseBase::operator()(ly+1, lx  )); const bool b01(functor(x0y1));
+	const TYPE& x1y1(BaseBase::operator()(ly+1, lx+1)); const bool b11(functor(x1y1));
+	return TYPE(y1*(x1*Cast<T>(b00 ? x0y0 : dv) + x*Cast<T>(b10 ? x1y0 : dv)) +
+				y *(x1*Cast<T>(b01 ? x0y1 : dv) + x*Cast<T>(b11 ? x1y1 : dv)));
 }
 template <typename TYPE>
-template <typename T>
-TYPE TImage<TYPE>::sampleSafe(const TPoint2<T>& pt, bool (STCALL *fncCond)(const TYPE&), const TYPE& dv) const
+template <typename T, typename Functor>
+TYPE TImage<TYPE>::sampleSafe(const TPoint2<T>& pt, const Functor& functor, const TYPE& dv) const
 {
 	const int lx((int)pt.x);
 	const int ly((int)pt.y);
 	const T x(pt.x-lx), x1(T(1)-x);
 	const T y(pt.y-ly), y1(T(1)-y);
-	const TYPE& x0y0(getPixel(ly  , lx  )); const bool b00(fncCond(x0y0));
-	const TYPE& x1y0(getPixel(ly  , lx+1)); const bool b10(fncCond(x1y0));
-	const TYPE& x0y1(getPixel(ly+1, lx  )); const bool b01(fncCond(x0y1));
-	const TYPE& x1y1(getPixel(ly+1, lx+1)); const bool b11(fncCond(x1y1));
-	return y1*(x1*(b00 ? x0y0 : dv) + x*(b10 ? x1y0 : dv)) +
-			y*(x1*(b01 ? x0y1 : dv) + x*(b11 ? x1y1 : dv));
+	const TYPE& x0y0(getPixel(ly  , lx  )); const bool b00(functor(x0y0));
+	const TYPE& x1y0(getPixel(ly  , lx+1)); const bool b10(functor(x1y0));
+	const TYPE& x0y1(getPixel(ly+1, lx  )); const bool b01(functor(x0y1));
+	const TYPE& x1y1(getPixel(ly+1, lx+1)); const bool b11(functor(x1y1));
+	return TYPE(y1*(x1*Cast<T>(b00 ? x0y0 : dv) + x*Cast<T>(b10 ? x1y0 : dv)) +
+				y *(x1*Cast<T>(b01 ? x0y1 : dv) + x*Cast<T>(b11 ? x1y1 : dv)));
 }
 
 // same as above, sample image at a specified position, but using the given sampler
@@ -2119,10 +2354,11 @@ template <typename T>
 void TImage<TYPE>::toGray(TImage<T>& out, int code, bool bNormalize, bool bSRGB) const
 {
 	#if 1
+	typedef typename RealType<T,float>::type Real;
 	ASSERT(code==cv::COLOR_RGB2GRAY || code==cv::COLOR_RGBA2GRAY || code==cv::COLOR_BGR2GRAY || code==cv::COLOR_BGRA2GRAY);
-	static const T coeffsRGB[] = {T(0.299), T(0.587), T(0.114)};
-	static const T coeffsBGR[] = {T(0.114), T(0.587), T(0.299)};
-	const float* coeffs;
+	static const Real coeffsRGB[] = {Real(0.299), Real(0.587), Real(0.114)};
+	static const Real coeffsBGR[] = {Real(0.114), Real(0.587), Real(0.299)};
+	const Real* coeffs;
 	switch (code) {
 	case cv::COLOR_BGR2GRAY:
 	case cv::COLOR_BGRA2GRAY:
@@ -2135,7 +2371,7 @@ void TImage<TYPE>::toGray(TImage<T>& out, int code, bool bNormalize, bool bSRGB)
 	default:
 		ASSERT("Unsupported image format" == NULL);
 	}
-	const T &cb(coeffs[0]), &cg(coeffs[1]), &cr(coeffs[2]);
+	const Real &cb(coeffs[0]), &cg(coeffs[1]), &cr(coeffs[2]);
 	if (out.rows!=rows || out.cols!=cols)
 		out.create(rows, cols);
 	ASSERT(cv::Mat::isContinuous());
@@ -2146,17 +2382,17 @@ void TImage<TYPE>::toGray(TImage<T>& out, int code, bool bNormalize, bool bSRGB)
 	typedef typename cv::DataType<TYPE>::channel_type ST;
 	if (bSRGB) {
 		if (bNormalize) {
-			typedef typename CONVERT::NormsRGB2RGB_t<ST,T> ColConv;
+			typedef typename CONVERT::NormsRGB2RGB_t<ST,Real> ColConv;
 			for (const ST* src=cv::Mat::template ptr<ST>(); dst!=dstEnd; src+=scn)
 				*dst++ = T(cb*ColConv(src[0]) + cg*ColConv(src[1]) + cr*ColConv(src[2]));
 		} else {
-			typedef typename CONVERT::NormsRGB2RGBUnNorm_t<ST,T> ColConv;
+			typedef typename CONVERT::NormsRGB2RGBUnNorm_t<ST,Real> ColConv;
 			for (const ST* src=cv::Mat::template ptr<ST>(); dst!=dstEnd; src+=scn)
 				*dst++ = T(cb*ColConv(src[0]) + cg*ColConv(src[1]) + cr*ColConv(src[2]));
 		}
 	} else {
 		if (bNormalize) {
-			typedef typename CONVERT::NormRGB_t<ST,T> ColConv;
+			typedef typename CONVERT::NormRGB_t<ST,Real> ColConv;
 			for (const ST* src=cv::Mat::template ptr<ST>(); dst!=dstEnd; src+=scn)
 				*dst++ = T(cb*ColConv(src[0]) + cg*ColConv(src[1]) + cr*ColConv(src[2]));
 		} else {
@@ -2175,6 +2411,32 @@ void TImage<TYPE>::toGray(TImage<T>& out, int code, bool bNormalize, bool bSRGB)
 /*----------------------------------------------------------------*/
 
 
+// compute scaled size such that the biggest dimension is scaled as desired
+// and the smaller one maintains the aspect ratio as best as it can
+template <typename TYPE>
+cv::Size TImage<TYPE>::computeResize(const cv::Size& size, REAL scale)
+{
+	cv::Size scaledSize;
+	if (size.width > size.height) {
+		scaledSize.width = ROUND2INT(REAL(size.width)*scale);
+		scaledSize.height = ROUND2INT(REAL(size.height)*scaledSize.width/REAL(size.width));
+	} else {
+		scaledSize.height = ROUND2INT(REAL(size.height)*scale);
+		scaledSize.width = ROUND2INT(REAL(size.width)*scaledSize.height/REAL(size.height));
+	}
+	return scaledSize;
+}
+// compute the final scaled size by performing successive resizes
+// with the given scale value
+template <typename TYPE>
+cv::Size TImage<TYPE>::computeResize(const cv::Size& size, REAL scale, unsigned resizes)
+{
+	cv::Size scaledSize(size);
+	while (resizes-- > 0)
+		scaledSize = computeResize(scaledSize, scale);
+	return scaledSize;
+}
+
 // compute image scale for a given max and min resolution
 template <typename TYPE>
 unsigned TImage<TYPE>::computeMaxResolution(unsigned width, unsigned height, unsigned& level, unsigned minImageSize, unsigned maxImageSize)
diff --git a/libs/Common/Util.inl b/libs/Common/Util.inl
index f64a1704e..d605b9173 100644
--- a/libs/Common/Util.inl
+++ b/libs/Common/Util.inl
@@ -886,8 +886,9 @@ inline std::pair<TYPEW,TYPEW> ComputeX84Threshold(const TYPE* const values, size
 	std::nth_element(data.Begin(), mid, data.End());
 	const TYPEW median(*mid);
 	// threshold = 5.2 * MEDIAN(ABS(values-median));
-	FOREACHPTR(pVal, data)
-		*pVal = ABS((*pVal)-median);
+	using TYPEI = typename MakeSigned<TYPE>::type;
+	for (TYPE& val: data)
+		val = TYPE(ABS(TYPEI(val)-TYPEI(median)));
 	std::nth_element(data.Begin(), mid, data.End());
 	return std::make_pair(median, mul*TYPEW(*mid));
 } // ComputeX84Threshold
diff --git a/libs/IO/OBJ.cpp b/libs/IO/OBJ.cpp
index 2d8bd4fda..bf82ee43d 100644
--- a/libs/IO/OBJ.cpp
+++ b/libs/IO/OBJ.cpp
@@ -160,12 +160,17 @@ bool ObjModel::Save(const String& fileName, unsigned precision, bool texLossless
 	for (size_t i = 0; i < groups.size(); ++i) {
 		out << "usemtl " << groups[i].material_name << "\n";
 		for (size_t j = 0; j < groups[i].faces.size(); ++j) {
-			Face const & face =  groups[i].faces[j];
+			const Face& face =  groups[i].faces[j];
 			out << "f";
 			for (size_t k = 0; k < 3; ++k) {
-				out << " " << face.vertices[k]  + OBJ_INDEX_OFFSET
-					<< "/" << face.texcoords[k]  + OBJ_INDEX_OFFSET
-					<< "/" << face.normals[k]  + OBJ_INDEX_OFFSET;
+				out << " " << face.vertices[k]  + OBJ_INDEX_OFFSET;
+				if (!texcoords.empty()) {
+					out << "/" << face.texcoords[k]  + OBJ_INDEX_OFFSET;
+					if (!normals.empty())
+						out << "/" << face.normals[k]  + OBJ_INDEX_OFFSET;
+				} else
+				if (!normals.empty())
+					out << "//" << face.normals[k]  + OBJ_INDEX_OFFSET;
 			}
 			out << "\n";
 		}
@@ -233,6 +238,8 @@ bool ObjModel::Load(const String& fileName)
 			}
 			if (in.fail())
 				return false;
+			if (groups.empty())
+				AddGroup("");
 			groups.back().faces.push_back(f);
 		} else
 		if (keyword == "mtllib") {
diff --git a/libs/MVS/Camera.cpp b/libs/MVS/Camera.cpp
index e8d7d5e0c..cdcdbc154 100644
--- a/libs/MVS/Camera.cpp
+++ b/libs/MVS/Camera.cpp
@@ -187,3 +187,210 @@ void MVS::AssembleProjectionMatrix(const RMatrix& R, const CMatrix& C, PMatrix&
 	eT = ((const Matrix3x3::EMat)R) * (-((const Point3::EVec)C)); //3x1
 } // AssembleProjectionMatrix
 /*----------------------------------------------------------------*/
+
+
+
+namespace MVS {
+
+namespace RECTIFY {
+	
+// compute the ROIs for the two images based on the corresponding points
+void GetImagePairROI(const Point3fArr& points1, const Point3fArr& points2, const Matrix3x3& K1, const Matrix3x3& K2, const Matrix3x3& R1, const Matrix3x3& R2, const Matrix3x3& invK1, const Matrix3x3& invK2, AABB2f& roi1h, AABB2f& roi2h)
+{
+	ASSERT(!points1.empty() && points1.size() && points2.size());
+
+	// compute rectification homography (from original to rectified image)
+	const Matrix3x3 H1(K1 * R1 * invK1);
+	const Matrix3x3 H2(K2 * R2 * invK2);
+
+	// determine the ROIs in rectified images
+	roi1h.Reset(); roi2h.Reset();
+	FOREACH(i, points1) {
+		Point2f xh;
+		const Point3f& x1 = points1[i];
+		ProjectVertex_3x3_2_2(H1.val, x1.ptr(), xh.ptr());
+		roi1h.InsertFull(xh);
+		const Point3f& x2 = points2[i];
+		ProjectVertex_3x3_2_2(H2.val, x2.ptr(), xh.ptr());
+		roi2h.InsertFull(xh);
+	}
+}
+
+void SetCameraMatricesROI(const AABB2f& roi1h, const AABB2f& roi2h, cv::Size& size1, cv::Size& size2, Matrix3x3& K1, Matrix3x3& K2)
+{
+	// set the new image sizes such that they are equal and contain the entire ROI
+	const Point2f size1h(roi1h.GetSize());
+	const Point2f size2h(roi2h.GetSize());
+	const int maxSize(MAXF(size1.width+size2.width, size1.height+size2.height)/2);
+	size1.width = size2.width = MINF(ROUND2INT(MAXF(size1h.x,size2h.x)), maxSize);
+	size1.height = size2.height = MINF(ROUND2INT(MAXF(size1h.y,size2h.y)), maxSize);
+
+	// set the new camera matrices such that the ROI is centered
+	const Point2f center1h(roi1h.GetCenter());
+	const Point2f center2h(roi2h.GetCenter());
+	K1(0,2) += size1.width /2-center1h[0];
+	K1(1,2) += size1.height/2-center1h[1];
+	K2(0,2) += size2.width /2-center2h[0];
+	K2(1,2) += size2.height/2-center2h[1];
+}
+
+} // namespace RECTIFY
+
+} // namespace MVS
+
+// Compute stereo rectification homographies that transform two images,
+// such that corresponding pixels in one image lie on the same scan-line in the other image;
+// note: this function assumes that the two cameras are already undistorted
+REAL Camera::StereoRectify(const cv::Size& size1, const Camera& camera1, const cv::Size& size2, const Camera& camera2, Matrix3x3& R1, Matrix3x3& R2, Matrix3x3& K1, Matrix3x3& K2)
+{
+	// compute relative pose
+	RMatrix poseR;
+	CMatrix poseC;
+	ComputeRelativePose(camera1.R, camera1.C, camera2.R, camera2.C, poseR, poseC);
+
+	// compute the average rotation between the first and the second camera
+	const Point3 r(poseR.GetRotationAxisAngle());
+	reinterpret_cast<RMatrix&>(R2).SetFromAxisAngle(r*REAL(-0.5));
+	R1 = R2.t();
+
+	// compute the translation, such that it coincides with the X-axis
+	int idx(0);
+	Point3 t(R2 * (poseR*(-poseC)));
+	#if 0
+	const Point3 unitX(t.x<0?-1:1, 0, 0);
+	const Point3 axis(t.cross(unitX));
+	const REAL normAxis(norm(axis));
+	if (normAxis > EPSILONTOLERANCE<REAL>()) {
+		ASSERT(t.dot(unitX) >= 0);
+		const REAL angle(ACOS(ComputeAngle(t.ptr(), unitX.ptr())));
+		const RMatrix Rx(axis*angle/normAxis);
+	#else
+	const REAL nt(norm(t));
+	//idx = (ABS(t.x) > ABS(t.y) ? 0 : 1);
+	Point3 axis(Point3(0,0,1).cross(t));
+	const REAL na(norm(axis));
+	if (na < EPSILONTOLERANCE<REAL>())
+		return 0;
+	{
+		const Point3 w(normalized(t.cross(axis)));
+		RMatrix Rx;
+		for (int i = 0; i < 3; ++i) {
+			Rx(idx,i) = -t[i]/nt;
+			Rx(idx^1,i) = -axis[i]/na;
+			Rx(2,i) = w[i]*(1-2*idx); // if idx == 1 -> opposite direction
+		}
+	#endif
+		R1 = Rx * R1;
+		R2 = Rx * R2;
+		t = R2 * (poseR*(-poseC));
+	}
+	ASSERT(ISEQUAL(-t.x, norm(camera2.C-camera1.C)) && ISZERO(t.y) && ISZERO(t.z));
+
+	// determine the intrinsic calibration matrix;
+	// vertical focal length must be the same for both images to keep the epipolar constraint
+	K1 = Matrix3x3::IDENTITY;
+	K1(0,0) = (camera1.K(0,0)+camera2.K(0,0))/2;
+	K1(1,1) = (camera1.K(1,1)+camera2.K(1,1))/2;
+	K2 = K1;
+	// set ROI to contain the whole transformed image
+	for (int k = 0; k < 2; k++) {
+		const Matrix3x3 H(k == 0 ? K1*R1*camera1.GetInvK() : K2*R2*camera2.GetInvK());
+		Point2f ptMin(std::numeric_limits<float>::max());
+		for (int i = 0; i < 4; i++) {
+			const Point2f pt(
+				(float)((i%2)*size1.width),
+				(float)((i<2?0:1)*size1.height)
+			);
+			Point2f ptH;
+			ProjectVertex_3x3_2_2(H.val, pt.ptr(), ptH.ptr());
+			ptMin.x = MINF(ptMin.x,ptH.x);
+			ptMin.y = MINF(ptMin.y,ptH.y);
+		}
+		Matrix3x3& KNew(k == 0 ? K1 : K2);
+		KNew(0,2) = -ptMin.x;
+		KNew(1,2) = -ptMin.y;
+	}
+	K1(idx^1,2) = K2(idx^1,2) = MAXF(K1(idx^1,2), K2(idx^1,2));
+	return t.x;
+} // StereoRectify
+
+// see: "A compact algorithm for rectification of stereo pairs", A. Fusiello, E. Trucco, and A. Verri, 2000
+REAL Camera::StereoRectifyFusiello(const cv::Size& size1, const Camera& camera1, const cv::Size& size2, const Camera& camera2, Matrix3x3& R1, Matrix3x3& R2, Matrix3x3& K1, Matrix3x3& K2)
+{
+	// compute relative pose
+	RMatrix poseR;
+	CMatrix poseC;
+	ComputeRelativePose(camera1.R, camera1.C, camera2.R, camera2.C, poseR, poseC);
+
+	// new x axis (baseline, from C1 to C2)
+	const Point3 v1(camera2.C-camera1.C);
+	// new y axes (orthogonal to old z and new x)
+	const Point3 v2(camera1.Direction().cross(v1));
+	// new z axes (no choice, orthogonal to baseline and y)
+	const Point3 v3(v1.cross(v2));
+
+	// new extrinsic (translation unchanged)
+	RMatrix R;
+	R.SetFromRowVectors(normalized(v1), normalized(v2), normalized(v3));
+
+	// new intrinsic (arbitrary)
+	K1 = camera1.K; K1(0,1) = 0;
+	K2 = camera2.K; K2(0,1) = 0;
+	K1(1,1) = K2(1,1) = (camera1.K(1,1)+camera2.K(1,1))/2;
+
+	// new rotations
+	R1 = R*camera1.R.t();
+	R2 = R*camera2.R.t();
+
+	#if 0
+	// new projection matrices
+	PMatrix P1, P2;
+	AssembleProjectionMatrix(K1, R, camera1.C, P1);
+	AssembleProjectionMatrix(K2, R, camera2.C, P2);
+
+	// rectifying image transformation
+	#if 0
+	const Matrix3x3 H1((PMatrix::EMat(P1).leftCols<3>()*(PMatrix::EMat(camera1.P).leftCols<3>().inverse())).eval());
+	const Matrix3x3 H2((PMatrix::EMat(P2).leftCols<3>()*(PMatrix::EMat(camera2.P).leftCols<3>().inverse())).eval());
+	#else
+	const Matrix3x3 H1(K1*R*camera1.R.t()*camera1.GetInvK());
+	const Matrix3x3 H2(K2*R*camera2.R.t()*camera2.GetInvK());
+	#endif
+	#endif
+
+	const Point3 t(R2 * (poseR*(-poseC)));
+	ASSERT(ISEQUAL(-t.x, norm(v1)) && ISZERO(t.y) && ISZERO(t.z));
+	return t.x;
+} // StereoRectifyFusiello
+
+// adjust rectified camera matrices such that the entire area common to both source images is contained in the rectified images;
+// as long as the focal-length on x and the skewness are equal in both cameras
+// the point-scale is also equal in both images;
+// note however that the surface-scale can still be different (most likely is)
+// as it depends on its (unknown) normal too;
+//  - points1 and points2: contain the pairs of corresponding pairs of image projections and their depth
+//  - size1 and size2: input the size of the source images, output the size of the rectified images (rectified image sizes are equal)
+void Camera::SetStereoRectificationROI(const Point3fArr& points1, cv::Size& size1, const Camera& camera1, const Point3fArr& points2, cv::Size& size2, const Camera& camera2, const Matrix3x3& R1, const Matrix3x3& R2, Matrix3x3& K1, Matrix3x3& K2)
+{
+	ASSERT(!points1.empty() && points1.size() && points2.size());
+
+	#if 1
+	// ignore skewness
+	K1(0,1) = K2(0,1) = 0;
+	#else
+	// set same skewness
+	K1(0,1) = K2(0,1) = (K1(0,1)+K2(0,1))/2;
+	#endif
+
+	// set same focal-length on x too
+	K1(0,0) = K2(0,0) = (K1(0,0)+K2(0,0))/2;
+	ASSERT(ISEQUAL(K1(1,1), K2(1,1)));
+
+	// determine the ROIs in rectified images
+	AABB2f roi1h, roi2h;
+	RECTIFY::GetImagePairROI(points1, points2, K1, K2, R1, R2, camera1.GetInvK(), camera2.GetInvK(), roi1h, roi2h);
+
+	// set the new camera matrices such that the ROI is centered
+	RECTIFY::SetCameraMatricesROI(roi1h, roi2h, size1, size2, K1, K2);
+} // SetStereoRectificationROI
+/*----------------------------------------------------------------*/
diff --git a/libs/MVS/Camera.h b/libs/MVS/Camera.h
index aa9aebb6d..ee22d8a80 100644
--- a/libs/MVS/Camera.h
+++ b/libs/MVS/Camera.h
@@ -232,6 +232,10 @@ class MVS_API Camera : public CameraIntern
 	REAL DistanceSq(const Point3& X) const; // compute the distance from the camera to the given 3D point
 	inline REAL Distance(const Point3& X) const { return SQRT(DistanceSq(X)); }
 
+	static REAL StereoRectify(const cv::Size& size1, const Camera& camera1, const cv::Size& size2, const Camera& camera2, Matrix3x3& R1, Matrix3x3& R2, Matrix3x3& K1, Matrix3x3& K2);
+	static REAL StereoRectifyFusiello(const cv::Size& size1, const Camera& camera1, const cv::Size& size2, const Camera& camera2, Matrix3x3& R1, Matrix3x3& R2, Matrix3x3& K1, Matrix3x3& K2);
+	static void SetStereoRectificationROI(const Point3fArr& points1, cv::Size& size1, const Camera& camera1, const Point3fArr& points2, cv::Size& size2, const Camera& camera2, const Matrix3x3& R1, const Matrix3x3& R2, Matrix3x3& K1, Matrix3x3& K2);
+
 	// project 3D point by the camera
 	template <typename TYPE>
 	inline TPoint3<TYPE> ProjectPointRT3(const TPoint3<TYPE>& X) const {
@@ -337,6 +341,11 @@ class MVS_API Camera : public CameraIntern
 	inline TPoint2<TYPE> TransformPointW2I(const TPoint3<TYPE>& X) const {
 		return TransformPointC2I(TransformPointW2C(X));
 	}
+	template <typename TYPE>
+	inline TPoint3<TYPE> TransformPointW2I3(const TPoint3<TYPE>& X) const {
+		const TPoint3<TYPE> camX(TransformPointW2C(X));
+		return TPoint3<TYPE>(TransformPointC2I(camX), camX.z);
+	}
 
 	// check if the given point (or its projection) is inside the camera view
 	template <typename TYPE>
diff --git a/libs/MVS/DepthMap.cpp b/libs/MVS/DepthMap.cpp
index 0e4b12d54..0b3e6008d 100644
--- a/libs/MVS/DepthMap.cpp
+++ b/libs/MVS/DepthMap.cpp
@@ -32,6 +32,10 @@
 #include "Common.h"
 #include "DepthMap.h"
 #include "../Common/AutoEstimator.h"
+// CGAL: depth-map initialization
+#include <CGAL/Simple_cartesian.h>
+#include <CGAL/Delaunay_triangulation_2.h>
+#include <CGAL/Projection_traits_xy_3.h>
 // CGAL: estimate normals
 #include <CGAL/Simple_cartesian.h>
 #include <CGAL/property_map.h>
@@ -908,6 +912,216 @@ DepthEstimator::PixelEstimate DepthEstimator::PerturbEstimate(const PixelEstimat
 
 // S T R U C T S ///////////////////////////////////////////////////
 
+namespace CGAL {
+typedef CGAL::Simple_cartesian<double> kernel_t;
+typedef CGAL::Projection_traits_xy_3<kernel_t> Geometry;
+typedef CGAL::Delaunay_triangulation_2<Geometry> Delaunay;
+typedef CGAL::Delaunay::Face_circulator FaceCirculator;
+typedef CGAL::Delaunay::Face_handle FaceHandle;
+typedef CGAL::Delaunay::Vertex_circulator VertexCirculator;
+typedef CGAL::Delaunay::Vertex_handle VertexHandle;
+typedef kernel_t::Point_3 Point;
+}
+
+// triangulate in-view points, generating a 2D mesh
+// return also the estimated depth boundaries (min and max depth)
+std::pair<float,float> TriangulatePointsDelaunay(const DepthData::ViewData& image, const PointCloud& pointcloud, const IndexArr& points, CGAL::Delaunay& delaunay)
+{
+	ASSERT(sizeof(Point3) == sizeof(X3D));
+	ASSERT(sizeof(Point3) == sizeof(CGAL::Point));
+	std::pair<float,float> depthBounds(FLT_MAX, 0.f);
+	for (uint32_t idx: points) {
+		const Point3f pt(image.camera.ProjectPointP3(pointcloud.points[idx]));
+		delaunay.insert(CGAL::Point(pt.x/pt.z, pt.y/pt.z, pt.z));
+		if (depthBounds.first > pt.z)
+			depthBounds.first = pt.z;
+		if (depthBounds.second < pt.z)
+			depthBounds.second = pt.z;
+	}
+	// if full size depth-map requested
+	if (OPTDENSE::bAddCorners) {
+		typedef TIndexScore<float,float> DepthDist;
+		typedef CLISTDEF0(DepthDist) DepthDistArr;
+		typedef Eigen::Map< Eigen::VectorXf, Eigen::Unaligned, Eigen::InnerStride<2> > FloatMap;
+		// add the four image corners at the average depth
+		ASSERT(image.pImageData->IsValid() && ISINSIDE(image.pImageData->avgDepth, depthBounds.first, depthBounds.second));
+		const CGAL::VertexHandle vcorners[] = {
+			delaunay.insert(CGAL::Point(0, 0, image.pImageData->avgDepth)),
+			delaunay.insert(CGAL::Point(image.image.width(), 0, image.pImageData->avgDepth)),
+			delaunay.insert(CGAL::Point(0, image.image.height(), image.pImageData->avgDepth)),
+			delaunay.insert(CGAL::Point(image.image.width(), image.image.height(), image.pImageData->avgDepth))
+		};
+		// compute average depth from the closest 3 directly connected faces,
+		// weighted by the distance
+		const size_t numPoints = 3;
+		for (int i=0; i<4; ++i) {
+			const CGAL::VertexHandle vcorner = vcorners[i];
+			CGAL::FaceCirculator cfc(delaunay.incident_faces(vcorner));
+			if (cfc == 0)
+				continue; // normally this should never happen
+			const CGAL::FaceCirculator done(cfc);
+			Point3d& poszA = reinterpret_cast<Point3d&>(vcorner->point());
+			const Point2d& posA = reinterpret_cast<const Point2d&>(poszA);
+			const Ray3d rayA(Point3d::ZERO, normalized(image.camera.TransformPointI2C(poszA)));
+			DepthDistArr depths(0, numPoints);
+			do {
+				CGAL::FaceHandle fc(cfc->neighbor(cfc->index(vcorner)));
+				if (fc == delaunay.infinite_face())
+					continue;
+				for (int j=0; j<4; ++j)
+					if (fc->has_vertex(vcorners[j]))
+						goto Continue;
+				// compute the depth as the intersection of the corner ray with
+				// the plane defined by the face's vertices
+				{
+				const Point3d& poszB0 = reinterpret_cast<const Point3d&>(fc->vertex(0)->point());
+				const Point3d& poszB1 = reinterpret_cast<const Point3d&>(fc->vertex(1)->point());
+				const Point3d& poszB2 = reinterpret_cast<const Point3d&>(fc->vertex(2)->point());
+				const Planed planeB(
+					image.camera.TransformPointI2C(poszB0),
+					image.camera.TransformPointI2C(poszB1),
+					image.camera.TransformPointI2C(poszB2)
+				);
+				const Point3d poszB(rayA.Intersects(planeB));
+				if (poszB.z <= 0)
+					continue;
+				const Point2d posB((
+					reinterpret_cast<const Point2d&>(poszB0)+
+					reinterpret_cast<const Point2d&>(poszB1)+
+					reinterpret_cast<const Point2d&>(poszB2))/3.f
+				);
+				const double dist(norm(posB-posA));
+				depths.StoreTop<numPoints>(DepthDist(CLAMP((float)poszB.z,depthBounds.first,depthBounds.second), INVERT((float)dist)));
+				}
+				Continue:;
+			} while (++cfc != done);
+			if (depths.size() != numPoints)
+				continue; // normally this should never happen
+			FloatMap vecDists(&depths[0].score, numPoints);
+			vecDists *= 1.f/vecDists.sum();
+			FloatMap vecDepths(&depths[0].idx, numPoints);
+			poszA.z = vecDepths.dot(vecDists);
+		}
+	}
+	return depthBounds;
+}
+
+// roughly estimate depth and normal maps by triangulating the sparse point cloud
+// and interpolating normal and depth for all pixels
+bool MVS::TriangulatePoints2DepthMap(
+	const DepthData::ViewData& image, const PointCloud& pointcloud, const IndexArr& points,
+	DepthMap& depthMap, NormalMap& normalMap, Depth& dMin, Depth& dMax)
+{
+	ASSERT(image.pImageData != NULL);
+
+	// triangulate in-view points
+	CGAL::Delaunay delaunay;
+	const std::pair<float,float> thDepth(TriangulatePointsDelaunay(image, pointcloud, points, delaunay));
+	dMin = thDepth.first;
+	dMax = thDepth.second;
+
+	// create rough depth-map by interpolating inside triangles
+	const Camera& camera = image.camera;
+	depthMap.create(image.image.size());
+	normalMap.create(image.image.size());
+	if (!OPTDENSE::bAddCorners) {
+		depthMap.memset(0);
+		normalMap.memset(0);
+	}
+	struct RasterDepthDataPlaneData {
+		const Camera& P;
+		DepthMap& depthMap;
+		NormalMap& normalMap;
+		Point3f normal;
+		Point3f normalPlane;
+		inline void operator()(const ImageRef& pt) {
+			if (!depthMap.isInside(pt))
+				return;
+			const Depth z(INVERT(normalPlane.dot(P.TransformPointI2C(Point2f(pt)))));
+			if (z <= 0) // due to numerical instability
+				return;
+			depthMap(pt) = z;
+			normalMap(pt) = normal;
+		}
+	};
+	RasterDepthDataPlaneData data = {camera, depthMap, normalMap};
+	for (CGAL::Delaunay::Face_iterator it=delaunay.faces_begin(); it!=delaunay.faces_end(); ++it) {
+		const CGAL::Delaunay::Face& face = *it;
+		const Point3f i0(reinterpret_cast<const Point3d&>(face.vertex(0)->point()));
+		const Point3f i1(reinterpret_cast<const Point3d&>(face.vertex(1)->point()));
+		const Point3f i2(reinterpret_cast<const Point3d&>(face.vertex(2)->point()));
+		// compute the plane defined by the 3 points
+		const Point3f c0(camera.TransformPointI2C(i0));
+		const Point3f c1(camera.TransformPointI2C(i1));
+		const Point3f c2(camera.TransformPointI2C(i2));
+		const Point3f edge1(c1-c0);
+		const Point3f edge2(c2-c0);
+		data.normal = normalized(edge2.cross(edge1));
+		data.normalPlane = data.normal * INVERT(data.normal.dot(c0));
+		// draw triangle and for each pixel compute depth as the ray intersection with the plane
+		Image8U::RasterizeTriangle(
+			reinterpret_cast<const Point2f&>(i2),
+			reinterpret_cast<const Point2f&>(i1),
+			reinterpret_cast<const Point2f&>(i0), data);
+	}
+	return true;
+} // TriangulatePoints2DepthMap
+// same as above, but does not estimate the normal-map
+bool MVS::TriangulatePoints2DepthMap(
+	const DepthData::ViewData& image, const PointCloud& pointcloud, const IndexArr& points,
+	DepthMap& depthMap, Depth& dMin, Depth& dMax)
+{
+	ASSERT(image.pImageData != NULL);
+
+	// triangulate in-view points
+	CGAL::Delaunay delaunay;
+	const std::pair<float,float> thDepth(TriangulatePointsDelaunay(image, pointcloud, points, delaunay));
+	dMin = thDepth.first;
+	dMax = thDepth.second;
+
+	// create rough depth-map by interpolating inside triangles
+	const Camera& camera = image.camera;
+	depthMap.create(image.image.size());
+	if (!OPTDENSE::bAddCorners)
+		depthMap.memset(0);
+	struct RasterDepthDataPlaneData {
+		const Camera& P;
+		DepthMap& depthMap;
+		Point3f normalPlane;
+		inline void operator()(const ImageRef& pt) {
+			if (!depthMap.isInside(pt))
+				return;
+			const Depth z((Depth)INVERT(normalPlane.dot(P.TransformPointI2C(Point2f(pt)))));
+			if (z <= 0) // due to numerical instability
+				return;
+			depthMap(pt) = z;
+		}
+	};
+	RasterDepthDataPlaneData data = {camera, depthMap};
+	for (CGAL::Delaunay::Face_iterator it=delaunay.faces_begin(); it!=delaunay.faces_end(); ++it) {
+		const CGAL::Delaunay::Face& face = *it;
+		const Point3f i0(reinterpret_cast<const Point3d&>(face.vertex(0)->point()));
+		const Point3f i1(reinterpret_cast<const Point3d&>(face.vertex(1)->point()));
+		const Point3f i2(reinterpret_cast<const Point3d&>(face.vertex(2)->point()));
+		// compute the plane defined by the 3 points
+		const Point3f c0(camera.TransformPointI2C(i0));
+		const Point3f c1(camera.TransformPointI2C(i1));
+		const Point3f c2(camera.TransformPointI2C(i2));
+		const Point3f edge1(c1-c0);
+		const Point3f edge2(c2-c0);
+		const Normal normal(normalized(edge2.cross(edge1)));
+		data.normalPlane = normal * INVERT(normal.dot(c0));
+		// draw triangle and for each pixel compute depth as the ray intersection with the plane
+		Image8U::RasterizeTriangle(
+			reinterpret_cast<const Point2f&>(i2),
+			reinterpret_cast<const Point2f&>(i1),
+			reinterpret_cast<const Point2f&>(i0), data);
+	}
+	return true;
+} // TriangulatePoints2DepthMap
+/*----------------------------------------------------------------*/
+
+
 namespace MVS {
 
 class PlaneSolverAdaptor
@@ -1131,6 +1345,100 @@ void MVS::EstimatePointNormals(const ImageArr& images, PointCloud& pointcloud, i
 } // EstimatePointNormals
 /*----------------------------------------------------------------*/
 
+bool MVS::EstimateNormalMap(const Matrix3x3f& K, const DepthMap& depthMap, NormalMap& normalMap)
+{
+	normalMap.create(depthMap.size());
+	struct Tool {
+		static bool IsDepthValid(Depth d, Depth nd) {
+			return nd > 0 && IsDepthSimilar(d, nd, Depth(0.03f));
+		}
+		// computes depth gradient (first derivative) at current pixel
+		static bool DepthGradient(const DepthMap& depthMap, const ImageRef& ir, Point3f& ws) {
+			float& w  = ws[0];
+			float& wx = ws[1];
+			float& wy = ws[2];
+			w = depthMap(ir);
+			if (w <= 0)
+				return false;
+			// loop over neighborhood and finding least squares plane,
+			// the coefficients of which give gradient of depth
+			int whxx(0), whxy(0), whyy(0);
+			float wgx(0), wgy(0);
+			const int Radius(1);
+			int n(0);
+			for (int y = -Radius; y <= Radius; ++y) {
+				for (int x = -Radius; x <= Radius; ++x) {
+					if (x == 0 && y == 0)
+						continue;
+					const ImageRef pt(ir.x+x, ir.y+y);
+					if (!depthMap.isInside(pt))
+						continue;
+					const float wi(depthMap(pt));
+					if (!IsDepthValid(w, wi))
+						continue;
+					whxx += x*x; whxy += x*y; whyy += y*y;
+					wgx += (wi - w)*x; wgy += (wi - w)*y;
+					++n;
+				}
+			}
+			if (n < 3)
+				return false;
+			// solve 2x2 system, generated from depth gradient
+			const int det(whxx*whyy - whxy*whxy);
+			if (det == 0)
+				return false;
+			const float invDet(1.f/float(det));
+			wx = (float( whyy)*wgx - float(whxy)*wgy)*invDet;
+			wy = (float(-whxy)*wgx + float(whxx)*wgy)*invDet;
+			return true;
+		}
+		// computes normal to the surface given the depth and its gradient
+		static Normal ComputeNormal(const Matrix3x3f& K, int x, int y, Depth d, Depth dx, Depth dy) {
+			ASSERT(ISZERO(K(0,1)));
+			return normalized(Normal(
+				K(0,0)*dx,
+				K(1,1)*dy,
+				(K(0,2)-float(x))*dx+(K(1,2)-float(y))*dy-d
+			));
+		}
+	};
+	for (int r=0; r<normalMap.rows; ++r) {
+		for (int c=0; c<normalMap.cols; ++c) {
+			#if 0
+			const Depth d(depthMap(r,c));
+			if (d <= 0) {
+				normalMap(r,c) = Normal::ZERO;
+				continue;
+			}
+			Depth dl, du;
+			if (depthMap.isInside(ImageRef(c-1,r-1)) && Tool::IsDepthValid(d, dl=depthMap(r,c-1)) &&  Tool::IsDepthValid(d, du=depthMap(r-1,c)))
+				normalMap(r,c) = Tool::ComputeNormal(K, c, r, d, du-d, dl-d);
+			else
+			if (depthMap.isInside(ImageRef(c+1,r-1)) && Tool::IsDepthValid(d, dl=depthMap(r,c+1)) &&  Tool::IsDepthValid(d, du=depthMap(r-1,c)))
+				normalMap(r,c) = Tool::ComputeNormal(K, c, r, d, du-d, d-dl);
+			else
+			if (depthMap.isInside(ImageRef(c+1,r+1)) && Tool::IsDepthValid(d, dl=depthMap(r,c+1)) &&  Tool::IsDepthValid(d, du=depthMap(r+1,c)))
+				normalMap(r,c) = Tool::ComputeNormal(K, c, r, d, d-du, d-dl);
+			else
+			if (depthMap.isInside(ImageRef(c-1,r+1)) && Tool::IsDepthValid(d, dl=depthMap(r,c-1)) &&  Tool::IsDepthValid(d, du=depthMap(r+1,c)))
+				normalMap(r,c) = Tool::ComputeNormal(K, c, r, d, d-du, dl-d);
+			else
+				normalMap(r,c) = Normal(0,0,-1);
+			#else
+			// calculates depth gradient at x
+			Normal& n = normalMap(r,c);
+			if (Tool::DepthGradient(depthMap, ImageRef(c,r), n))
+				n = Tool::ComputeNormal(K, c, r, n.x, n.y, n.z);
+			else
+				n = Normal::ZERO;
+			#endif
+			ASSERT(normalMap(r,c).dot(K.inv()*Point3f(float(c),float(r),1.f)) <= 0);
+		}
+	}
+	return true;
+} // EstimateNormalMap
+/*----------------------------------------------------------------*/
+
 
 // save the depth map in our .dmap file format
 bool MVS::SaveDepthMap(const String& fileName, const DepthMap& depthMap)
diff --git a/libs/MVS/DepthMap.h b/libs/MVS/DepthMap.h
index d8f00409b..2e71cc48e 100644
--- a/libs/MVS/DepthMap.h
+++ b/libs/MVS/DepthMap.h
@@ -440,6 +440,13 @@ struct MVS_API DepthEstimator {
 
 
 // Tools
+bool TriangulatePoints2DepthMap(
+	const DepthData::ViewData& image, const PointCloud& pointcloud, const IndexArr& points,
+	DepthMap& depthMap, NormalMap& normalMap, Depth& dMin, Depth& dMax);
+bool TriangulatePoints2DepthMap(
+	const DepthData::ViewData& image, const PointCloud& pointcloud, const IndexArr& points,
+	DepthMap& depthMap, Depth& dMin, Depth& dMax);
+
 MVS_API unsigned EstimatePlane(const Point3Arr&, Plane&, double& maxThreshold, bool arrInliers[]=NULL, size_t maxIters=0);
 MVS_API unsigned EstimatePlaneLockFirstPoint(const Point3Arr&, Plane&, double& maxThreshold, bool arrInliers[]=NULL, size_t maxIters=0);
 MVS_API unsigned EstimatePlaneTh(const Point3Arr&, Plane&, double maxThreshold, bool arrInliers[]=NULL, size_t maxIters=0);
@@ -448,6 +455,8 @@ MVS_API unsigned EstimatePlaneThLockFirstPoint(const Point3Arr&, Plane&, double
 MVS_API void EstimatePointColors(const ImageArr& images, PointCloud& pointcloud);
 MVS_API void EstimatePointNormals(const ImageArr& images, PointCloud& pointcloud, int numNeighbors=16/*K-nearest neighbors*/);
 
+MVS_API bool EstimateNormalMap(const Matrix3x3f& K, const DepthMap&, NormalMap&);
+
 MVS_API bool SaveDepthMap(const String& fileName, const DepthMap& depthMap);
 MVS_API bool LoadDepthMap(const String& fileName, DepthMap& depthMap);
 MVS_API bool SaveNormalMap(const String& fileName, const NormalMap& normalMap);
diff --git a/libs/MVS/Image.cpp b/libs/MVS/Image.cpp
index c5454b665..0b5bdcf12 100644
--- a/libs/MVS/Image.cpp
+++ b/libs/MVS/Image.cpp
@@ -174,6 +174,22 @@ unsigned Image::RecomputeMaxResolution(unsigned& level, unsigned minImageSize, u
 /*----------------------------------------------------------------*/
 
 
+// resize image at desired scale
+Image Image::GetImage(const PlatformArr& platforms, double scale, bool bUseImage) const
+{
+	Image scaledImage(*this);
+	const cv::Size scaledSize(Image8U::computeResize(GetSize(), scale));
+	scaledImage.width = (unsigned)scaledSize.width;
+	scaledImage.height = (unsigned)scaledSize.height;
+	scaledImage.camera = GetCamera(platforms, scaledImage.GetSize());
+	if (!image.empty()) {
+		if (bUseImage)
+			cv::resize(image, scaledImage.image, scaledSize, 0, 0, scale>1?cv::INTER_CUBIC:cv::INTER_AREA);
+		else
+			scaledImage.image.release();
+	}
+	return scaledImage;
+} // GetImage
 // compute the camera extrinsics from the platform pose and the relative camera pose to the platform
 Camera Image::GetCamera(const PlatformArr& platforms, const Image8U::Size& resolution) const
 {
@@ -209,3 +225,216 @@ REAL Image::ComputeFOV(int dir) const
 	return 0;
 } // ComputeFOV
 /*----------------------------------------------------------------*/
+
+
+// stereo-rectify the image pair
+//  - input leftPoints and rightPoints contains the pairs of corresponding image projections and their depth (optional)
+//  - H converts a pixel from original to rectified left-image
+//  - Q converts [x' y' disparity 1] in rectified coordinates to [x*z y*z z 1]*w in original image coordinates
+//    where disparity=x1-x2
+bool Image::StereoRectifyImages(const Image& image1, const Image& image2, const Point3fArr& points1, const Point3fArr& points2, Image8U3& rectifiedImage1, Image8U3& rectifiedImage2, Image8U& mask1, Image8U& mask2, Matrix3x3& H, Matrix4x4& Q)
+{
+	ASSERT(image1.IsValid() && image2.IsValid());
+	ASSERT(image1.GetSize() == image1.image.size() && image2.GetSize() == image2.image.size());
+	ASSERT(points1.size() && points2.size());
+
+	#if 0
+	{	// display projection pairs
+		std::vector<Point2f> matches1, matches2;
+		FOREACH(i, points1) {
+			matches1.emplace_back(reinterpret_cast<const Point2f&>(points1[i]));
+			matches2.emplace_back(reinterpret_cast<const Point2f&>(points2[i]));
+		}
+		RECTIFY::DrawMatches(const_cast<Image8U3&>(image1.image), const_cast<Image8U3&>(image2.image), matches1, matches2);
+	}
+	#endif
+
+	// compute rectification
+	Matrix3x3 K1, K2, R1, R2;
+	#if 0
+	const REAL t(Camera::StereoRectify(image1.GetSize(), image1.camera, image2.GetSize(), image2.camera, R1, R2, K1, K2));
+	#elif 1
+	const REAL t(Camera::StereoRectifyFusiello(image1.GetSize(), image1.camera, image2.GetSize(), image2.camera, R1, R2, K1, K2));
+	#else
+	Pose pose;
+	ComputeRelativePose(image1.camera.R, image1.camera.C, image2.camera.R, image2.camera.C, pose.R, pose.C);
+	cv::Mat P1, P2;
+	cv::stereoRectify(image1.camera.K, cv::noArray(), image2.camera.K, cv::noArray(), image1.GetSize(), pose.R, Vec3(pose.GetTranslation()), R1, R2, P1, P2, Q, 0/*cv::CALIB_ZERO_DISPARITY*/, -1);
+	K1 = P1(cv::Rect(0,0,3,3));
+	K2 = P2(cv::Rect(0,0,3,3));
+	const Point3 _t(R2 * pose.GetTranslation());
+	ASSERT((ISZERO(_t.x) || ISZERO(_t.y)) && ISZERO(_t.z));
+	const REAL t(ISZERO(_t.x)?_t.y:_t.x);
+	#if 0
+	cv::Mat map1, map2;
+	cv::initUndistortRectifyMap(image1.camera.K, cv::noArray(), R1, K1, image1.GetSize(), CV_16SC2, map1, map2);
+	cv::remap(image1.image, rectifiedImage1, map1, map2, cv::INTER_CUBIC);
+	cv::initUndistortRectifyMap(image2.camera.K, cv::noArray(), R2, K2, image1.GetSize(), CV_16SC2, map1, map2);
+	cv::remap(image2.image, rectifiedImage2, map1, map2, cv::INTER_CUBIC);
+	return;
+	#endif
+	#endif
+	if (ISZERO(t))
+		return false;
+
+	// adjust rectified camera matrices such that the entire area common to both source images is contained in the rectified images
+	cv::Size size1(image1.GetSize()), size2(image2.GetSize());
+	if (!points1.empty())
+		Camera::SetStereoRectificationROI(points1, size1, image1.camera, points2, size2, image2.camera, R1, R2, K1, K2);
+	ASSERT(size1 == size2);
+
+	// compute rectification homography (from original to rectified image)
+	const Matrix3x3 H1(K1 * R1 * image1.camera.GetInvK()); H = H1;
+	const Matrix3x3 H2(K2 * R2 * image2.camera.GetInvK());
+
+	#if 0
+	{	// display epipolar lines before and after rectification
+		Pose pose;
+		ComputeRelativePose(image1.camera.R, image1.camera.C, image2.camera.R, image2.camera.C, pose.R, pose.C);
+		const Matrix3x3 F(CreateF(pose.R, pose.C, image1.camera.K, image2.camera.K));
+		std::vector<Point2f> matches1, matches2;
+		#if 1
+		FOREACH(i, points1) {
+			matches1.emplace_back(reinterpret_cast<const Point2f&>(points1[i]));
+			matches2.emplace_back(reinterpret_cast<const Point2f&>(points2[i]));
+		}
+		#endif
+		RECTIFY::DrawRectifiedImages(image1.image.clone(), image2.image.clone(), F, H1, H2, matches1, matches2);
+	}
+	#endif
+
+	// rectify images (apply homographies)
+	rectifiedImage1.create(size1);
+	cv::warpPerspective(image1.image, rectifiedImage1, H1, rectifiedImage1.size());
+	rectifiedImage2.create(size2);
+	cv::warpPerspective(image2.image, rectifiedImage2, H2, rectifiedImage2.size());
+
+	// mark valid regions covered by the rectified images
+	struct Compute {
+		static void Mask(Image8U& mask, const cv::Size& sizeh, const cv::Size& size, const Matrix3x3& H) {
+			mask.create(sizeh);
+			mask.memset(0);
+			std::vector<Point2f> corners(4);
+			corners[0] = Point2f(0,0);
+			corners[1] = Point2f((float)size.width,0);
+			corners[2] = Point2f((float)size.width,(float)size.height);
+			corners[3] = Point2f(0,(float)size.height);
+			cv::perspectiveTransform(corners, corners, H);
+			std::vector<std::vector<Point2i>> contours(1);
+			for (int i=0; i<4; ++i)
+				contours.front().emplace_back(ROUND2INT(corners[i]));
+			cv::drawContours(mask, contours, 0, cv::Scalar(255), cv::FILLED);
+		}
+	};
+	Compute::Mask(mask1, size1, image1.GetSize(), H1);
+	Compute::Mask(mask2, size2, image2.GetSize(), H2);
+
+	// from the formula that relates disparity to depth as z=B*f/d where B=-t and d=x_l-x_r
+	// and the formula that converts the image projection from right to left x_r=K1*K2.inv()*x_l
+	// compute the inverse projection matrix that transforms image coordinates in image 1 and its
+	// corresponding disparity value to the 3D point in camera 1 coordinates as:
+	ASSERT(ISEQUAL(K1(1,1),K2(1,1)));
+	Q = Matrix4x4::ZERO;
+	//   Q * [x, y, disparity, 1] = [X, Y, Z, 1] * w
+	ASSERT(ISEQUAL(K1(0,0),K2(0,0)) && ISZERO(K1(0,1)) && ISZERO(K2(0,1)));
+	Q(0,0) = Q(1,1) = REAL(1);
+	Q(0,3) = -K1(0,2);
+	Q(1,3) = -K1(1,2);
+	Q(2,3) =  K1(0,0);
+	Q(3,2) = -REAL(1)/t;
+	Q(3,3) =  (K1(0,2)-K2(0,2))/t;
+
+	// compute Q that converts disparity from rectified to depth in original image
+	Matrix4x4 P(Matrix4x4::IDENTITY);
+	cv::Mat(image1.camera.K*R1.t()).copyTo(cv::Mat(4,4,cv::DataType<Matrix4x4::Type>::type,P.val)(cv::Rect(0,0,3,3)));
+	Q = P*Q;
+	return true;
+}
+
+// adjust H and Q such that they correspond to rectified images scaled by the given factor
+void Image::ScaleStereoRectification(Matrix3x3& H, Matrix4x4& Q, REAL scale)
+{
+	{
+		Matrix3x3 S(Matrix3x3::IDENTITY);
+		S(0,0) = S(1,1) = scale;
+		H = S*H*S.inv();
+	}
+	{
+		Matrix4x4 Sinv(Matrix4x4::IDENTITY);
+		Sinv(0,0) = Sinv(1,1) = Sinv(2,2) = REAL(1)/scale;
+		Matrix4x4 S(Matrix4x4::IDENTITY);
+		S(0,0) = S(1,1) = scale*scale;
+		S(2,2) = S(3,3) = scale;
+		Q = S*Q*Sinv;
+	}
+}
+
+
+// note: disparity has to be inverted as the formula is z=B*f/d where d=x_l-x_r
+//       while we store d as the value that fulfills x_l+d=x_r
+//
+// converts the given disparity at the rectified image coordinates to
+// the depth corresponding to the un-rectified image coordinates
+template <typename TYPE>
+float TDisparity2Depth(const Matrix4x4& Q, const TPoint2<TYPE>& u, float d)
+{
+	const REAL w(Q(3,0)*u.x + Q(3,1)*u.y - Q(3,2)*d + Q(3,3));
+	if (ISZERO(w))
+		return 0;
+	const REAL z(Q(2,0)*u.x + Q(2,1)*u.y - Q(2,2)*d + Q(2,3));
+	const float depth((float)(z/w));
+	return depth < ZEROTOLERANCE<float>() ? float(0) : depth;
+}
+float Image::Disparity2Depth(const Matrix4x4& Q, const ImageRef& u, float d)
+{
+	return TDisparity2Depth(Q, u, d);
+}
+float Image::Disparity2Depth(const Matrix4x4& Q, const Point2f& u, float d)
+{
+	return TDisparity2Depth(Q, u, d);
+}
+// same as above, but converts also the coordinates from rectified to original image
+template <typename TYPE>
+float TDisparity2Depth(const Matrix4x4& Q, const TPoint2<TYPE>& u, float d, Point2f& pt)
+{
+	const REAL w(Q(3,0)*u.x + Q(3,1)*u.y - Q(3,2)*d + Q(3,3));
+	if (ISZERO(w))
+		return 0;
+	const REAL z((Q(2,0)*u.x + Q(2,1)*u.y - Q(2,2)*d + Q(2,3))/w);
+	if (z < ZEROTOLERANCE<REAL>())
+		return 0;
+	const REAL nrm(REAL(1)/(w*z));
+	pt.x = (float)((Q(0,0)*u.x + Q(0,1)*u.y - Q(0,2)*d + Q(0,3))*nrm);
+	pt.y = (float)((Q(1,0)*u.x + Q(1,1)*u.y - Q(1,2)*d + Q(1,3))*nrm);
+	return (float)z;
+}
+float Image::Disparity2Depth(const Matrix4x4& Q, const ImageRef& u, float d, Point2f& pt)
+{
+	return TDisparity2Depth(Q, u, d, pt);
+}
+float Image::Disparity2Depth(const Matrix4x4& Q, const Point2f& u, float d, Point2f& pt)
+{
+	return TDisparity2Depth(Q, u, d, pt);
+}
+// converts the given disparity at the rectified image coordinates into
+// the distance from the camera position to the corresponding 3D point;
+// note: the distance is the same in both rectified and un-rectified cameras
+//  - K is the camera matrix of the un-rectified image
+float Image::Disparity2Distance(const Matrix3x3& K, const Matrix4x4& Q, const Point2f& u, float d)
+{
+	Point2f pt;
+	const float depth(Disparity2Depth(Q, u, d, pt));
+	return (float)(SQRT(SQUARE(pt.x-K(0,2))+SQUARE(pt.y-K(1,2))+SQUARE(K(0,0)))*depth/K(0,0));
+}
+// converts the given depth at the un-rectified image coordinates to
+// the disparity corresponding to the rectified image coordinates
+bool Image::Depth2Disparity(const Matrix4x4& Q, const Point2f& u, float d, float& disparity)
+{
+	const REAL w((Q(3,0)*u.x + Q(3,1)*u.y + Q(3,2))*d + Q(3,3));
+	if (ISZERO(w))
+		return false;
+	const REAL z((Q(2,0)*u.x + Q(2,1)*u.y + Q(2,2))*d + Q(2,3));
+	disparity = -(float)(z/w);
+	return true;
+}
+/*----------------------------------------------------------------*/
diff --git a/libs/MVS/Image.h b/libs/MVS/Image.h
index 627ec5cee..85a4f5eaf 100644
--- a/libs/MVS/Image.h
+++ b/libs/MVS/Image.h
@@ -108,10 +108,20 @@ class MVS_API Image
 	float ResizeImage(unsigned nMaxResolution=0);
 	unsigned RecomputeMaxResolution(unsigned& level, unsigned minImageSize, unsigned maxImageSize=INT_MAX) const;
 
+	Image GetImage(const PlatformArr& platforms, double scale, bool bUseImage=true) const;
 	Camera GetCamera(const PlatformArr& platforms, const Image8U::Size& resolution) const;
 	void UpdateCamera(const PlatformArr& platforms);
 	REAL ComputeFOV(int dir) const;
 
+	static bool StereoRectifyImages(const Image& image1, const Image& image2, const Point3fArr& points1, const Point3fArr& points2, Image8U3& rectifiedImage1, Image8U3& rectifiedImage2, Image8U& mask1, Image8U& mask2, Matrix3x3& H, Matrix4x4& Q);
+	static void ScaleStereoRectification(Matrix3x3& H, Matrix4x4& Q, REAL scale);
+	static float Disparity2Depth(const Matrix4x4& Q, const ImageRef& u, float d);
+	static float Disparity2Depth(const Matrix4x4& Q, const Point2f& u, float d);
+	static float Disparity2Depth(const Matrix4x4& Q, const ImageRef& u, float d, Point2f& pt);
+	static float Disparity2Depth(const Matrix4x4& Q, const Point2f& u, float d, Point2f& pt);
+	static float Disparity2Distance(const Matrix3x3& K, const Matrix4x4& Q, const Point2f& u, float d);
+	static bool Depth2Disparity(const Matrix4x4& Q, const Point2f& u, float d, float& disparity);
+
 	float GetNormalizationScale() const {
 		ASSERT(width > 0 && height > 0);
 		return camera.GetNormalizationScale(width, height);
diff --git a/libs/MVS/Interface.h b/libs/MVS/Interface.h
index 97d96240e..ea47cfcf9 100644
--- a/libs/MVS/Interface.h
+++ b/libs/MVS/Interface.h
@@ -12,7 +12,7 @@
 // D E F I N E S ///////////////////////////////////////////////////
 
 #define MVSI_PROJECT_ID "MVSI" // identifies the project stream
-#define MVSI_PROJECT_VER ((uint32_t)4) // identifies the version of a project stream
+#define MVSI_PROJECT_VER ((uint32_t)5) // identifies the version of a project stream
 
 // set a default namespace name if none given
 #ifndef _INTERFACE_NAMESPACE
@@ -453,6 +453,7 @@ struct Interface
 	// structure describing an image
 	struct Image {
 		std::string name; // image file name
+		std::string maskName; // segmentation file name (optional)
 		uint32_t platformID; // ID of the associated platform
 		uint32_t cameraID; // ID of the associated camera on the associated platform
 		uint32_t poseID; // ID of the pose of the associated platform
@@ -465,6 +466,9 @@ struct Interface
 		template <class Archive>
 		void serialize(Archive& ar, const unsigned int version) {
 			ar & name;
+			if (version > 4) {
+				ar & maskName;
+			}
 			ar & platformID;
 			ar & cameraID;
 			ar & poseID;
diff --git a/libs/MVS/Mesh.cpp b/libs/MVS/Mesh.cpp
index 48ee5a1f9..f50997438 100644
--- a/libs/MVS/Mesh.cpp
+++ b/libs/MVS/Mesh.cpp
@@ -2852,9 +2852,10 @@ void Mesh::Subdivide(const AreaArr& maxAreas, uint32_t maxArea)
 	FacetCountMap mapFaces; mapFaces.reserve(12*3);
 	vertices.Reserve(vertices.GetSize()*2);
 	faces.Reserve(faces.GetSize()*3);
+	const uint32_t maxAreaTh(2*maxArea);
 	FOREACH(f, maxAreas) {
 		const AreaArr::Type area(maxAreas[f]);
-		if (area <= maxArea)
+		if (area <= maxAreaTh)
 			continue;
 		// split face in four triangles
 		// by adding a new vertex at the middle of each edge
@@ -2892,7 +2893,7 @@ void Mesh::Subdivide(const AreaArr& maxAreas, uint32_t maxArea)
 			ASSERT(fc.second.count <= 2 || (fc.second.count == 3 && fc.first == f));
 			if (fc.second.count != 2)
 				continue;
-			if (fc.first < f && maxAreas[fc.first] > maxArea) {
+			if (fc.first < f && maxAreas[fc.first] > maxAreaTh) {
 				// already fully split, nothing to do
 				ASSERT(mapSplits[fc.first].idxVert[SplitFace::FindSharedEdge(faces[fc.first], face)] == newface[SplitFace::FindSharedEdge(face, faces[fc.first])]);
 				continue;
diff --git a/libs/MVS/Scene.cpp b/libs/MVS/Scene.cpp
index 3b2fd8631..ff8cf696d 100644
--- a/libs/MVS/Scene.cpp
+++ b/libs/MVS/Scene.cpp
@@ -192,7 +192,7 @@ bool Scene::LoadInterface(const String & fileName)
 	return true;
 } // LoadInterface
 
-bool Scene::SaveInterface(const String & fileName) const
+bool Scene::SaveInterface(const String & fileName, int version) const
 {
 	TD_TIMER_STARTD();
 	Interface obj;
@@ -264,7 +264,7 @@ bool Scene::SaveInterface(const String & fileName) const
 	}
 
 	// serialize out the current state
-	if (!ARCHIVE::SerializeSave(obj, fileName))
+	if (!ARCHIVE::SerializeSave(obj, fileName, version>=0?uint32_t(version):MVSI_PROJECT_VER))
 		return false;
 
 	DEBUG_EXTRA("Scene saved to interface format (%s):\n"
diff --git a/libs/MVS/Scene.h b/libs/MVS/Scene.h
index 7956525b5..055969ca7 100644
--- a/libs/MVS/Scene.h
+++ b/libs/MVS/Scene.h
@@ -35,9 +35,8 @@
 
 // I N C L U D E S /////////////////////////////////////////////////
 
-#include "DepthMap.h"
-#include "Mesh.h"
 #include "SceneDensify.h"
+#include "Mesh.h"
 
 
 // D E F I N E S ///////////////////////////////////////////////////
@@ -46,7 +45,7 @@
 // S T R U C T S ///////////////////////////////////////////////////
 
 namespace MVS {
-	
+
 // Forward declarations
 struct MVS_API DenseDepthMapData;
 
@@ -69,7 +68,7 @@ class MVS_API Scene
 	bool IsEmpty() const;
 
 	bool LoadInterface(const String& fileName);
-	bool SaveInterface(const String& fileName) const;
+	bool SaveInterface(const String& fileName, int version=-1) const;
 
 	bool Import(const String& fileName);
 
@@ -82,7 +81,7 @@ class MVS_API Scene
 	bool ExportCamerasMLP(const String& fileName, const String& fileNameScene) const;
 
 	// Dense reconstruction
-	bool DenseReconstruction();
+	bool DenseReconstruction(int nFusionMode=0);
 	bool ComputeDepthMaps(DenseDepthMapData& data);
 	void DenseReconstructionEstimate(void*);
 	void DenseReconstructionFilter(void*);
diff --git a/libs/MVS/SceneDensify.cpp b/libs/MVS/SceneDensify.cpp
index 82473bd05..1e97e8806 100644
--- a/libs/MVS/SceneDensify.cpp
+++ b/libs/MVS/SceneDensify.cpp
@@ -31,12 +31,9 @@
 
 #include "Common.h"
 #include "Scene.h"
+#include "SceneDensify.h"
 // MRF: view selection
 #include "../Math/TRWS/MRFEnergy.h"
-// CGAL: depth-map initialization
-#include <CGAL/Simple_cartesian.h>
-#include <CGAL/Delaunay_triangulation_2.h>
-#include <CGAL/Projection_traits_xy_3.h>
 
 using namespace MVS;
 
@@ -364,99 +361,6 @@ bool DepthMapsData::InitViews(DepthData& depthData, IIndex idxNeighbor, IIndex n
 } // InitViews
 /*----------------------------------------------------------------*/
 
-namespace CGAL {
-typedef CGAL::Simple_cartesian<double> kernel_t;
-typedef CGAL::Projection_traits_xy_3<kernel_t> Geometry;
-typedef CGAL::Delaunay_triangulation_2<Geometry> Delaunay;
-typedef CGAL::Delaunay::Face_circulator FaceCirculator;
-typedef CGAL::Delaunay::Face_handle FaceHandle;
-typedef CGAL::Delaunay::Vertex_circulator VertexCirculator;
-typedef CGAL::Delaunay::Vertex_handle VertexHandle;
-typedef kernel_t::Point_3 Point;
-}
-
-// triangulate in-view points, generating a 2D mesh
-// return also the estimated depth boundaries (min and max depth)
-std::pair<float,float> TriangulatePointsDelaunay(CGAL::Delaunay& delaunay, const Scene& scene, const DepthData::ViewData& image, const IndexArr& points)
-{
-	ASSERT(sizeof(Point3) == sizeof(X3D));
-	ASSERT(sizeof(Point3) == sizeof(CGAL::Point));
-	std::pair<float,float> depthBounds(FLT_MAX, 0.f);
-	for (uint32_t idx: points) {
-		const Point3f pt(image.camera.ProjectPointP3(scene.pointcloud.points[idx]));
-		delaunay.insert(CGAL::Point(pt.x/pt.z, pt.y/pt.z, pt.z));
-		if (depthBounds.first > pt.z)
-			depthBounds.first = pt.z;
-		if (depthBounds.second < pt.z)
-			depthBounds.second = pt.z;
-	}
-	// if full size depth-map requested
-	if (OPTDENSE::bAddCorners) {
-		typedef TIndexScore<float,float> DepthDist;
-		typedef CLISTDEF0(DepthDist) DepthDistArr;
-		typedef Eigen::Map< Eigen::VectorXf, Eigen::Unaligned, Eigen::InnerStride<2> > FloatMap;
-		// add the four image corners at the average depth
-		const CGAL::VertexHandle vcorners[] = {
-			delaunay.insert(CGAL::Point(0, 0, image.pImageData->avgDepth)),
-			delaunay.insert(CGAL::Point(image.image.width(), 0, image.pImageData->avgDepth)),
-			delaunay.insert(CGAL::Point(0, image.image.height(), image.pImageData->avgDepth)),
-			delaunay.insert(CGAL::Point(image.image.width(), image.image.height(), image.pImageData->avgDepth))
-		};
-		// compute average depth from the closest 3 directly connected faces,
-		// weighted by the distance
-		const size_t numPoints = 3;
-		for (int i=0; i<4; ++i) {
-			const CGAL::VertexHandle vcorner = vcorners[i];
-			CGAL::FaceCirculator cfc(delaunay.incident_faces(vcorner));
-			if (cfc == 0)
-				continue; // normally this should never happen
-			const CGAL::FaceCirculator done(cfc);
-			Point3d& poszA = reinterpret_cast<Point3d&>(vcorner->point());
-			const Point2d& posA = reinterpret_cast<const Point2d&>(poszA);
-			const Ray3d rayA(Point3d::ZERO, normalized(image.camera.TransformPointI2C(poszA)));
-			DepthDistArr depths(0, numPoints);
-			do {
-				CGAL::FaceHandle fc(cfc->neighbor(cfc->index(vcorner)));
-				if (fc == delaunay.infinite_face())
-					continue;
-				for (int j=0; j<4; ++j)
-					if (fc->has_vertex(vcorners[j]))
-						goto Continue;
-				// compute the depth as the intersection of the corner ray with
-				// the plane defined by the face's vertices
-				{
-				const Point3d& poszB0 = reinterpret_cast<const Point3d&>(fc->vertex(0)->point());
-				const Point3d& poszB1 = reinterpret_cast<const Point3d&>(fc->vertex(1)->point());
-				const Point3d& poszB2 = reinterpret_cast<const Point3d&>(fc->vertex(2)->point());
-				const Planed planeB(
-					image.camera.TransformPointI2C(poszB0),
-					image.camera.TransformPointI2C(poszB1),
-					image.camera.TransformPointI2C(poszB2)
-				);
-				const Point3d poszB(rayA.Intersects(planeB));
-				if (poszB.z <= 0)
-					continue;
-				const Point2d posB((
-					reinterpret_cast<const Point2d&>(poszB0)+
-					reinterpret_cast<const Point2d&>(poszB1)+
-					reinterpret_cast<const Point2d&>(poszB2))/3.f
-				);
-				const double dist(norm(posB-posA));
-				depths.StoreTop<numPoints>(DepthDist(CLAMP((float)poszB.z,depthBounds.first,depthBounds.second), INVERT((float)dist)));
-				}
-				Continue:;
-			} while (++cfc != done);
-			if (depths.GetSize() != numPoints)
-				continue; // normally this should never happen
-			FloatMap vecDists(&depths[0].score, numPoints);
-			vecDists *= 1.f/vecDists.sum();
-			FloatMap vecDepths(&depths[0].idx, numPoints);
-			poszA.z = vecDepths.dot(vecDists);
-		}
-	}
-	return depthBounds;
-}
-
 // roughly estimate depth and normal maps by triangulating the sparse point cloud
 // and interpolating normal and depth for all pixels
 bool DepthMapsData::InitDepthMap(DepthData& depthData)
@@ -464,58 +368,21 @@ bool DepthMapsData::InitDepthMap(DepthData& depthData)
 	TD_TIMER_STARTD();
 
 	ASSERT(depthData.images.GetSize() > 1 && !depthData.points.IsEmpty());
-	const DepthData::ViewData& image(depthData.images.First());
-	ASSERT(!image.image.empty());
-
-	// triangulate in-view points
-	CGAL::Delaunay delaunay;
-	const std::pair<float,float> thDepth(TriangulatePointsDelaunay(delaunay, scene, image, depthData.points));
-	depthData.dMin = thDepth.first*0.9f;
-	depthData.dMax = thDepth.second*1.1f;
-
-	// create rough depth-map by interpolating inside triangles
-	const Camera& camera = image.camera;
-	depthData.depthMap.create(image.image.size());
-	depthData.normalMap.create(image.image.size());
-	if (!OPTDENSE::bAddCorners) {
-		depthData.depthMap.setTo(Depth(0));
-		depthData.normalMap.setTo(0.f);
-	}
-	struct RasterDepthDataPlaneData {
-		const Camera& P;
-		DepthMap& depthMap;
-		NormalMap& normalMap;
-		Point3f normal;
-		Point3f normalPlane;
-		inline void operator()(const ImageRef& pt) {
-			if (!depthMap.isInside(pt))
-				return;
-			const Depth z(INVERT(normalPlane.dot(P.TransformPointI2C(Point2f(pt)))));
-			if (z <= 0) // due to numerical instability
-				return;
-			depthMap(pt) = z;
-			normalMap(pt) = normal;
-		}
-	};
-	RasterDepthDataPlaneData data = {camera, depthData.depthMap, depthData.normalMap};
-	for (CGAL::Delaunay::Face_iterator it=delaunay.faces_begin(); it!=delaunay.faces_end(); ++it) {
-		const CGAL::Delaunay::Face& face = *it;
-		const Point3f i0(reinterpret_cast<const Point3d&>(face.vertex(0)->point()));
-		const Point3f i1(reinterpret_cast<const Point3d&>(face.vertex(1)->point()));
-		const Point3f i2(reinterpret_cast<const Point3d&>(face.vertex(2)->point()));
-		// compute the plane defined by the 3 points
-		const Point3f c0(camera.TransformPointI2C(i0));
-		const Point3f c1(camera.TransformPointI2C(i1));
-		const Point3f c2(camera.TransformPointI2C(i2));
-		const Point3f edge1(c1-c0);
-		const Point3f edge2(c2-c0);
-		data.normal = normalized(edge2.cross(edge1));
-		data.normalPlane = data.normal * INVERT(data.normal.dot(c0));
-		// draw triangle and for each pixel compute depth as the ray intersection with the plane
-		Image8U::RasterizeTriangle(reinterpret_cast<const Point2f&>(i2), reinterpret_cast<const Point2f&>(i1), reinterpret_cast<const Point2f&>(i0), data);
+	const DepthData::ViewData& image(depthData.GetView());
+	TriangulatePoints2DepthMap(image, scene.pointcloud, depthData.points, depthData.depthMap, depthData.normalMap, depthData.dMin, depthData.dMax);
+	depthData.dMin *= 0.9f;
+	depthData.dMax *= 1.1f;
+
+	#if TD_VERBOSE != TD_VERBOSE_OFF
+	// save rough depth map as image
+	if (g_nVerbosityLevel > 4) {
+		ExportDepthMap(ComposeDepthFilePath(image.GetID(), "init.png"), depthData.depthMap);
+		ExportNormalMap(ComposeDepthFilePath(image.GetID(), "init.normal.png"), depthData.normalMap);
+		ExportPointCloud(ComposeDepthFilePath(image.GetID(), "init.ply"), *depthData.images.First().pImageData, depthData.depthMap, depthData.normalMap);
 	}
+	#endif
 
-	DEBUG_ULTIMATE("Depth-map %3u roughly estimated from %u sparse points: %dx%d (%s)", &depthData-arrDepthData.Begin(), depthData.points.GetSize(), image.image.width(), image.image.height(), TD_TIMER_GET_FMT().c_str());
+	DEBUG_ULTIMATE("Depth-map %3u roughly estimated from %u sparse points: %dx%d (%s)", image.GetID(), depthData.points.size(), image.image.width(), image.image.height(), TD_TIMER_GET_FMT().c_str());
 	return true;
 } // InitDepthMap
 /*----------------------------------------------------------------*/
@@ -668,14 +535,6 @@ bool DepthMapsData::EstimateDepthMap(IIndex idxImage)
 	} else {
 		// compute rough estimates using the sparse point-cloud
 		InitDepthMap(depthData);
-		#if TD_VERBOSE != TD_VERBOSE_OFF
-		// save rough depth map as image
-		if (g_nVerbosityLevel > 4) {
-			ExportDepthMap(ComposeDepthFilePath(image.GetID(), "init.png"), depthData.depthMap);
-			ExportNormalMap(ComposeDepthFilePath(image.GetID(), "init.normal.png"), depthData.normalMap);
-			ExportPointCloud(ComposeDepthFilePath(image.GetID(), "init.ply"), *depthData.images.First().pImageData, depthData.depthMap, depthData.normalMap);
-		}
-		#endif
 	}
 
 	// init integral images and index to image-ref map for the reference data
@@ -1316,6 +1175,7 @@ void DepthMapsData::FuseDepthMaps(PointCloud& pointcloud, bool bEstimateColor, b
 	// find best connected images
 	IndexScoreArr connections(0, scene.images.GetSize());
 	size_t nPointsEstimate(0);
+	bool bNormalMap(true);
 	FOREACH(i, scene.images) {
 		DepthData& depthData = arrDepthData[i];
 		if (!depthData.IsValid())
@@ -1327,6 +1187,8 @@ void DepthMapsData::FuseDepthMaps(PointCloud& pointcloud, bool bEstimateColor, b
 		connection.idx = i;
 		connection.score = (float)scene.images[i].neighbors.GetSize();
 		nPointsEstimate += ROUND2INT(depthData.depthMap.area()*(0.5f/*valid*/*0.3f/*new*/));
+		if (depthData.normalMap.empty())
+			bNormalMap = false;
 	}
 	connections.Sort();
 
@@ -1339,6 +1201,8 @@ void DepthMapsData::FuseDepthMaps(PointCloud& pointcloud, bool bEstimateColor, b
 	typedef cList<DepthIndex> DepthIndexArr;
 	DepthIndexArr arrDepthIdx(scene.images.GetSize());
 	ProjsArr projs(0, nPointsEstimate);
+	if (bEstimateNormal && !bNormalMap)
+		bEstimateNormal = false;
 	pointcloud.points.Reserve(nPointsEstimate);
 	pointcloud.pointViews.Reserve(nPointsEstimate);
 	pointcloud.pointWeights.Reserve(nPointsEstimate);
@@ -1394,7 +1258,7 @@ void DepthMapsData::FuseDepthMaps(PointCloud& pointcloud, bool bEstimateColor, b
 				REAL confidence(weights.emplace_back(Conf2Weight(depthData.confMap(x),depth)));
 				ProjArr& pointProjs = projs.AddEmpty();
 				pointProjs.Insert(Proj(x));
-				const PointCloud::Normal normal(imageData.camera.R.t()*Cast<REAL>(depthData.normalMap(x)));
+				const PointCloud::Normal normal(bNormalMap ? Cast<Normal::Type>(imageData.camera.R.t()*Cast<REAL>(depthData.normalMap(x))) : Normal(0,0,-1));
 				ASSERT(ISEQUAL(norm(normal), 1.f));
 				// check the projection in the neighbor depth-maps
 				Point3 X(point*confidence);
@@ -1422,7 +1286,7 @@ void DepthMapsData::FuseDepthMaps(PointCloud& pointcloud, bool bEstimateColor, b
 						continue;
 					if (IsDepthSimilar(pt.z, depthB, OPTDENSE::fDepthDiffThreshold)) {
 						// check if normals agree
-						const PointCloud::Normal normalB(imageDataB.camera.R.t()*Cast<REAL>(depthDataB.normalMap(xB)));
+						const PointCloud::Normal normalB(bNormalMap ? Cast<Normal::Type>(imageDataB.camera.R.t()*Cast<REAL>(depthDataB.normalMap(xB))) : Normal(0,0,-1));
 						ASSERT(ISEQUAL(norm(normalB), 1.f));
 						if (normal.dot(normalB) > normalError) {
 							// add view to the 3D point
@@ -1520,19 +1384,48 @@ void DepthMapsData::FuseDepthMaps(PointCloud& pointcloud, bool bEstimateColor, b
 
 
 
+// S T R U C T S ///////////////////////////////////////////////////
+
+DenseDepthMapData::DenseDepthMapData(Scene& _scene, int _nFusionMode)
+	: scene(_scene), depthMaps(_scene), idxImage(0), sem(1), nFusionMode(_nFusionMode)
+{
+	if (nFusionMode < 0) {
+		STEREO::SemiGlobalMatcher::CreateThreads(scene.nMaxThreads);
+		if (nFusionMode == -1)
+			OPTDENSE::nOptimize &= ~OPTDENSE::OPTIMIZE;
+	}
+}
+DenseDepthMapData::~DenseDepthMapData()
+{
+	if (nFusionMode < 0)
+		STEREO::SemiGlobalMatcher::DestroyThreads();
+}
+
+void DenseDepthMapData::SignalCompleteDepthmapFilter()
+{
+	ASSERT(idxImage > 0);
+	if (Thread::safeDec(idxImage) == 0)
+		sem.Signal((unsigned)images.GetSize()*2);
+}
+/*----------------------------------------------------------------*/
+
+
+
 // S T R U C T S ///////////////////////////////////////////////////
 
 static void* DenseReconstructionEstimateTmp(void*);
 static void* DenseReconstructionFilterTmp(void*);
 
-bool Scene::DenseReconstruction()
+bool Scene::DenseReconstruction(int nFusionMode)
 {
-	DenseDepthMapData data(*this);
-	
+	DenseDepthMapData data(*this, nFusionMode);
+
 	// estimate depth-maps
 	if (!ComputeDepthMaps(data))
 		return false;
-	
+	if (ABS(nFusionMode) == 1)
+		return true;
+
 	// fuse all depth-maps
 	pointcloud.Release();
 	data.depthMaps.FuseDepthMaps(pointcloud, OPTDENSE::nEstimateColors == 2, OPTDENSE::nEstimateNormals == 2);
@@ -1761,7 +1654,7 @@ void Scene::DenseReconstructionEstimate(void* pData)
 				break;
 			}
 			// try to load already compute depth-map for this image
-			if (File::access(ComposeDepthFilePath(depthData.GetView().GetID(), "dmap"))) {
+			if (data.nFusionMode >= 0 && File::access(ComposeDepthFilePath(depthData.GetView().GetID(), "dmap"))) {
 				if (OPTDENSE::nOptimize & OPTDENSE::OPTIMIZE) {
 					if (!depthData.Load(ComposeDepthFilePath(depthData.GetView().GetID(), "dmap"))) {
 						VERBOSE("error: invalid depth-map '%s'", ComposeDepthFilePath(depthData.GetView().GetID(), "dmap").c_str());
@@ -1784,7 +1677,23 @@ void Scene::DenseReconstructionEstimate(void* pData)
 			data.events.AddEvent(new EVTProcessImage((uint32_t)Thread::safeInc(data.idxImage)));
 			// extract depth map
 			data.sem.Wait();
-			data.depthMaps.EstimateDepthMap(data.images[evtImage.idxImage]);
+			if (data.nFusionMode >= 0) {
+				// extract depth-map using Patch-Match algorithm
+				data.depthMaps.EstimateDepthMap(data.images[evtImage.idxImage]);
+			} else {
+				// extract disparity-maps using SGM algorithm
+				if (data.nFusionMode == -1) {
+					data.sgm.Match(*this, data.images[evtImage.idxImage], OPTDENSE::nNumViews);
+				} else {
+					// fuse existing disparity-maps
+					const IIndex idx(data.images[evtImage.idxImage]);
+					DepthData& depthData(data.depthMaps.arrDepthData[idx]);
+					data.sgm.Fuse(*this, data.images[evtImage.idxImage], OPTDENSE::nNumViews, 2, depthData.depthMap, depthData.confMap);
+					if (OPTDENSE::nEstimateNormals == 2)
+						EstimateNormalMap(depthData.images.front().camera.K, depthData.depthMap, depthData.normalMap);
+					depthData.dMin = ZEROTOLERANCE<float>(); depthData.dMax = FLT_MAX;
+				}
+			}
 			data.sem.Signal();
 			if (OPTDENSE::nOptimize & OPTDENSE::OPTIMIZE) {
 				// optimize depth-map
@@ -1838,7 +1747,8 @@ void Scene::DenseReconstructionEstimate(void* pData)
 			}
 			#endif
 			// save compute depth-map for this image
-			depthData.Save(ComposeDepthFilePath(depthData.GetView().GetID(), "dmap"));
+			if (!depthData.depthMap.empty())
+				depthData.Save(ComposeDepthFilePath(depthData.GetView().GetID(), "dmap"));
 			depthData.ReleaseImages();
 			depthData.Release();
 			data.progress->operator++();
diff --git a/libs/MVS/SceneDensify.h b/libs/MVS/SceneDensify.h
index 6d7d0efe0..2f21142ab 100644
--- a/libs/MVS/SceneDensify.h
+++ b/libs/MVS/SceneDensify.h
@@ -35,6 +35,8 @@
 
 // I N C L U D E S /////////////////////////////////////////////////
 
+#include "SemiGlobalMatcher.h"
+
 
 // S T R U C T S ///////////////////////////////////////////////////
 
@@ -68,9 +70,9 @@ class MVS_API DepthMapsData
 	static void* STCALL EndDepthMapTmp(void*);
 
 public:
-  Scene& scene;
+	Scene& scene;
 
-  DepthDataArr arrDepthData;
+	DepthDataArr arrDepthData;
 
 	// used internally to estimate the depth-maps
 	Image8U::Size prevDepthMapSize; // remember the size of the last estimated depth-map
@@ -89,15 +91,13 @@ struct MVS_API DenseDepthMapData {
 	SEACAVE::EventQueue events; // internal events queue (processed by the working threads)
 	Semaphore sem;
 	CAutoPtr<Util::Progress> progress;
+	int nFusionMode;
+	STEREO::SemiGlobalMatcher sgm;
 
-	DenseDepthMapData(Scene& _scene)
-		: scene(_scene), depthMaps(_scene), idxImage(0), sem(1) {}
+	DenseDepthMapData(Scene& _scene, int _nFusionMode=0);
+	~DenseDepthMapData();
 
-	void SignalCompleteDepthmapFilter() {
-		ASSERT(idxImage > 0);
-		if (Thread::safeDec(idxImage) == 0)
-			sem.Signal((unsigned)images.GetSize()*2);
-	}
+	void SignalCompleteDepthmapFilter();
 };
 /*----------------------------------------------------------------*/
 
diff --git a/libs/MVS/SemiGlobalMatcher.cpp b/libs/MVS/SemiGlobalMatcher.cpp
new file mode 100644
index 000000000..6549acbda
--- /dev/null
+++ b/libs/MVS/SemiGlobalMatcher.cpp
@@ -0,0 +1,2361 @@
+/*
+* SemiGlobalMatcher.cpp
+*
+* Copyright (c) 2014-2015 SEACAVE
+*
+* Author(s):
+*
+*      cDc <cdc.seacave@gmail.com>
+*
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*
+*
+* Additional Terms:
+*
+*      You are required to preserve legal notices and author attributions in
+*      that material or in the Appropriate Legal Notices displayed by works
+*      containing it.
+*/
+
+#include "Common.h"
+#include "SemiGlobalMatcher.h"
+#include "Scene.h"
+
+using namespace MVS;
+
+using namespace STEREO;
+
+
+// D E F I N E S ///////////////////////////////////////////////////
+
+// uncomment to enable OpenCV filter demo
+//#define _USE_FILTER_DEMO
+
+
+// S T R U C T S ///////////////////////////////////////////////////
+
+#ifdef _USE_FILTER_DEMO
+#include "opencv2/ximgproc/disparity_filter.hpp"
+
+const cv::String keys =
+"{help h usage ? |        | print this message                                                }"
+"{GT             |None    | optional ground-truth disparity (MPI-Sintel or Middlebury format) }"
+"{dst_path       |None    | optional path to save the resulting filtered disparity map        }"
+"{dst_raw_path   |None    | optional path to save raw disparity map before filtering          }"
+"{algorithm      |bm      | stereo matching method (bm or sgbm)                               }"
+"{filter         |wls_conf| used post-filtering (wls_conf or wls_no_conf)                     }"
+"{no-display     |        | don't display results                                             }"
+"{no-downscale   |        | force stereo matching on full-sized views to improve quality      }"
+"{dst_conf_path  |None    | optional path to save the confidence map used in filtering        }"
+"{vis_mult       |1.0     | coefficient used to scale disparity map visualizations            }"
+"{max_disparity  |160     | parameter of stereo matching                                      }"
+"{window_size    |-1      | parameter of stereo matching                                      }"
+"{wls_lambda     |8000.0  | parameter of post-filtering                                       }"
+"{wls_sigma      |1.5     | parameter of post-filtering                                       }"
+;
+
+cv::Rect computeROI(cv::Size2i src_sz, cv::Ptr<cv::StereoMatcher> matcher_instance)
+{
+	int min_disparity = matcher_instance->getMinDisparity();
+	int num_disparities = matcher_instance->getNumDisparities();
+	int block_size = matcher_instance->getBlockSize();
+
+	int bs2 = block_size/2;
+	int minD = min_disparity, maxD = min_disparity + num_disparities - 1;
+
+	int xmin = maxD + bs2;
+	int xmax = src_sz.width + minD - bs2;
+	int ymin = bs2;
+	int ymax = src_sz.height - bs2;
+
+	return cv::Rect(xmin, ymin, xmax - xmin, ymax - ymin);
+}
+
+int disparityFiltering(cv::Mat left, cv::Mat right, int argc, const LPCSTR* argv)
+{
+	cv::CommandLineParser parser(argc,argv,keys);
+	parser.about("Disparity Filtering Demo");
+	if(parser.has("help"))
+	{
+		parser.printMessage();
+		return 0;
+	}
+
+	cv::String GT_path = parser.get<cv::String>("GT");
+	cv::String dst_path = parser.get<cv::String>("dst_path");
+	cv::String dst_raw_path = parser.get<cv::String>("dst_raw_path");
+	cv::String dst_conf_path = parser.get<cv::String>("dst_conf_path");
+	cv::String algo = parser.get<cv::String>("algorithm");
+	cv::String filter = parser.get<cv::String>("filter");
+	bool no_display = parser.has("no-display");
+	bool no_downscale = parser.has("no-downscale");
+	int max_disp = parser.get<int>("max_disparity");
+	double lambda = parser.get<double>("wls_lambda");
+	double sigma  = parser.get<double>("wls_sigma");
+	double vis_mult = parser.get<double>("vis_mult");
+
+	int wsize;
+	if(parser.get<int>("window_size")>=0) //user provided window_size value
+		wsize = parser.get<int>("window_size");
+	else
+	{
+		if(algo=="sgbm")
+			wsize = 3; //default window size for SGBM
+		else if(!no_downscale && algo=="bm" && filter=="wls_conf")
+			wsize = 7; //default window size for BM on downscaled views (downscaling is performed only for wls_conf)
+		else
+			wsize = 15; //default window size for BM on full-sized views
+	}
+	if(!parser.check())
+	{
+		parser.printErrors();
+		return -1;
+	}
+
+	cv::Mat GT_disp;
+	bool noGT(cv::ximgproc::readGT(GT_path,GT_disp)!=0);
+
+	cv::Mat left_for_matcher, right_for_matcher;
+	cv::Mat left_disp,right_disp;
+	cv::Mat filtered_disp;
+	cv::Mat conf_map(left.rows,left.cols,CV_8U);
+	conf_map = cv::Scalar(255);
+	cv::Rect ROI;
+	cv::Ptr<cv::ximgproc::DisparityWLSFilter> wls_filter;
+	double matching_time, filtering_time;
+	if(max_disp<=0 || max_disp%16!=0)
+	{
+		std::cout<<"Incorrect max_disparity value: it should be positive and divisible by 16";
+		return -1;
+	}
+	if(wsize<=0 || wsize%2!=1)
+	{
+		std::cout<<"Incorrect window_size value: it should be positive and odd";
+		return -1;
+	}
+	if(filter=="wls_conf") // filtering with confidence (significantly better quality than wls_no_conf)
+	{
+		if(!no_downscale)
+		{
+			// downscale the views to speed-up the matching stage, as we will need to compute both left
+			// and right disparity maps for confidence map computation
+			//! [downscale]
+			max_disp/=2;
+			if(max_disp%16!=0)
+				max_disp += 16-(max_disp%16);
+			resize(left ,left_for_matcher ,cv::Size(),0.5,0.5, cv::INTER_LINEAR_EXACT);
+			resize(right,right_for_matcher,cv::Size(),0.5,0.5, cv::INTER_LINEAR_EXACT);
+			//! [downscale]
+		}
+		else
+		{
+			left_for_matcher  = left.clone();
+			right_for_matcher = right.clone();
+		}
+
+		if(algo=="bm")
+		{
+			//! [matching]
+			cv::Ptr<cv::StereoBM> left_matcher = cv::StereoBM::create(max_disp,wsize);
+			wls_filter = cv::ximgproc::createDisparityWLSFilter(left_matcher);
+			cv::Ptr<cv::StereoMatcher> right_matcher = cv::ximgproc::createRightMatcher(left_matcher);
+
+			cvtColor(left_for_matcher,  left_for_matcher,  cv::COLOR_BGR2GRAY);
+			cvtColor(right_for_matcher, right_for_matcher, cv::COLOR_BGR2GRAY);
+
+			matching_time = (double)cv::getTickCount();
+			left_matcher-> compute(left_for_matcher, right_for_matcher,left_disp);
+			right_matcher->compute(right_for_matcher,left_for_matcher, right_disp);
+			matching_time = ((double)cv::getTickCount() - matching_time)/cv::getTickFrequency();
+			//! [matching]
+		}
+		else if(algo=="sgbm")
+		{
+			cv::Ptr<cv::StereoSGBM> left_matcher  = cv::StereoSGBM::create(-max_disp/2+1,max_disp,wsize);
+			left_matcher->setP1(24*wsize*wsize);
+			left_matcher->setP2(96*wsize*wsize);
+			left_matcher->setPreFilterCap(63);
+			left_matcher->setMode(cv::StereoSGBM::MODE_SGBM_3WAY);
+			wls_filter = cv::ximgproc::createDisparityWLSFilter(left_matcher);
+			cv::Ptr<cv::StereoMatcher> right_matcher = cv::ximgproc::createRightMatcher(left_matcher);
+
+			matching_time = (double)cv::getTickCount();
+			left_matcher-> compute(left_for_matcher, right_for_matcher,left_disp);
+			right_matcher->compute(right_for_matcher,left_for_matcher, right_disp);
+			matching_time = ((double)cv::getTickCount() - matching_time)/cv::getTickFrequency();
+		}
+		else
+		{
+			std::cout<<"Unsupported algorithm";
+			return -1;
+		}
+
+		//! [filtering]
+		wls_filter->setLambda(lambda);
+		wls_filter->setSigmaColor(sigma);
+		filtering_time = (double)cv::getTickCount();
+		wls_filter->filter(left_disp,left,filtered_disp,right_disp);
+		filtering_time = ((double)cv::getTickCount() - filtering_time)/cv::getTickFrequency();
+		//! [filtering]
+		conf_map = wls_filter->getConfidenceMap();
+
+		// Get the ROI that was used in the last filter call:
+		ROI = wls_filter->getROI();
+		if(!no_downscale)
+		{
+			// upscale raw disparity and ROI back for a proper comparison:
+			resize(left_disp,left_disp,cv::Size(),2.0,2.0,cv::INTER_LINEAR_EXACT);
+			left_disp = left_disp*2.0;
+			ROI = cv::Rect(ROI.x*2,ROI.y*2,ROI.width*2,ROI.height*2);
+		}
+	}
+	else if(filter=="wls_no_conf")
+	{
+		/* There is no convenience function for the case of filtering with no confidence, so we
+		will need to set the ROI and matcher parameters manually */
+
+		left_for_matcher  = left.clone();
+		right_for_matcher = right.clone();
+
+		if(algo=="bm")
+		{
+			cv::Ptr<cv::StereoBM> matcher  = cv::StereoBM::create(max_disp,wsize);
+			matcher->setTextureThreshold(0);
+			matcher->setUniquenessRatio(0);
+			cvtColor(left_for_matcher,  left_for_matcher, cv::COLOR_BGR2GRAY);
+			cvtColor(right_for_matcher, right_for_matcher, cv::COLOR_BGR2GRAY);
+			ROI = computeROI(left_for_matcher.size(),matcher);
+			wls_filter = cv::ximgproc::createDisparityWLSFilterGeneric(false);
+			wls_filter->setDepthDiscontinuityRadius((int)ceil(0.33*wsize));
+
+			matching_time = (double)cv::getTickCount();
+			matcher->compute(left_for_matcher,right_for_matcher,left_disp);
+			matching_time = ((double)cv::getTickCount() - matching_time)/cv::getTickFrequency();
+		}
+		else if(algo=="sgbm")
+		{
+			cv::Ptr<cv::StereoSGBM> matcher  = cv::StereoSGBM::create(0,max_disp,wsize);
+			matcher->setUniquenessRatio(0);
+			matcher->setDisp12MaxDiff(1000000);
+			matcher->setSpeckleWindowSize(0);
+			matcher->setP1(24*wsize*wsize);
+			matcher->setP2(96*wsize*wsize);
+			matcher->setMode(cv::StereoSGBM::MODE_SGBM_3WAY);
+			ROI = computeROI(left_for_matcher.size(),matcher);
+			wls_filter = cv::ximgproc::createDisparityWLSFilterGeneric(false);
+			wls_filter->setDepthDiscontinuityRadius((int)ceil(0.5*wsize));
+
+			matching_time = (double)cv::getTickCount();
+			matcher->compute(left_for_matcher,right_for_matcher,left_disp);
+			matching_time = ((double)cv::getTickCount() - matching_time)/cv::getTickFrequency();
+		}
+		else
+		{
+			std::cout<<"Unsupported algorithm";
+			return -1;
+		}
+
+		wls_filter->setLambda(lambda);
+		wls_filter->setSigmaColor(sigma);
+		filtering_time = (double)cv::getTickCount();
+		wls_filter->filter(left_disp,left,filtered_disp,cv::Mat(),ROI);
+		filtering_time = ((double)cv::getTickCount() - filtering_time)/cv::getTickFrequency();
+	}
+	else
+	{
+		std::cout<<"Unsupported filter";
+		return -1;
+	}
+
+	//collect and print all the stats:
+	std::cout.precision(2);
+	std::cout<<"Matching time:  "<<matching_time<<"s"<<std::endl;
+	std::cout<<"Filtering time: "<<filtering_time<<"s"<<std::endl;
+	std::cout<<std::endl;
+
+	double MSE_before,percent_bad_before,MSE_after,percent_bad_after;
+	if(!noGT)
+	{
+		MSE_before = cv::ximgproc::computeMSE(GT_disp,left_disp,ROI);
+		percent_bad_before = cv::ximgproc::computeBadPixelPercent(GT_disp,left_disp,ROI);
+		MSE_after = cv::ximgproc::computeMSE(GT_disp,filtered_disp,ROI);
+		percent_bad_after = cv::ximgproc::computeBadPixelPercent(GT_disp,filtered_disp,ROI);
+
+		std::cout.precision(5);
+		std::cout<<"MSE before filtering: "<<MSE_before<<std::endl;
+		std::cout<<"MSE after filtering:  "<<MSE_after<<std::endl;
+		std::cout<<std::endl;
+		std::cout.precision(3);
+		std::cout<<"Percent of bad pixels before filtering: "<<percent_bad_before<<std::endl;
+		std::cout<<"Percent of bad pixels after filtering:  "<<percent_bad_after<<std::endl;
+	}
+
+	if(dst_path!="None")
+	{
+		cv::Mat filtered_disp_vis;
+		cv::ximgproc::getDisparityVis(filtered_disp,filtered_disp_vis,vis_mult);
+		cv::imwrite(dst_path,filtered_disp_vis);
+	}
+	if(dst_raw_path!="None")
+	{
+		cv::Mat raw_disp_vis;
+		cv::ximgproc::getDisparityVis(left_disp,raw_disp_vis,vis_mult);
+		cv::imwrite(dst_raw_path,raw_disp_vis);
+	}
+	if(dst_conf_path!="None")
+	{
+		cv::imwrite(dst_conf_path,conf_map);
+	}
+
+	if(!no_display)
+	{
+		cv::namedWindow("left", cv::WINDOW_AUTOSIZE);
+		cv::imshow("left", left);
+		cv::namedWindow("right", cv::WINDOW_AUTOSIZE);
+		cv::imshow("right", right);
+
+		if(!noGT)
+		{
+			cv::Mat GT_disp_vis;
+			cv::ximgproc::getDisparityVis(GT_disp,GT_disp_vis,vis_mult);
+			cv::namedWindow("ground-truth disparity", cv::WINDOW_AUTOSIZE);
+			cv::imshow("ground-truth disparity", GT_disp_vis);
+		}
+
+		//! [visualization]
+		cv::Mat raw_disp_vis;
+		cv::ximgproc::getDisparityVis(left_disp,raw_disp_vis,vis_mult);
+		cv::namedWindow("raw disparity", cv::WINDOW_AUTOSIZE);
+		cv::imshow("raw disparity", raw_disp_vis);
+		cv::Mat filtered_disp_vis;
+		cv::ximgproc::getDisparityVis(filtered_disp,filtered_disp_vis,vis_mult);
+		cv::namedWindow("filtered disparity", cv::WINDOW_AUTOSIZE);
+		cv::imshow("filtered disparity", filtered_disp_vis);
+		cv::waitKey();
+		//! [visualization]
+	}
+
+	return 0;
+}
+#endif
+/*----------------------------------------------------------------*/
+
+
+// apply a gradient like filter
+void PrefilterXSobel(const cv::Mat& src, cv::Mat& dst, int ftzero=31)
+{
+	const int OFS = 256 * 4, TABSZ = OFS * 2 + 256;
+	uint8_t tab[TABSZ];
+	for (int x = 0; x < TABSZ; x++)
+		tab[x] = (uint8_t)(x - OFS < -ftzero ? 0 : x - OFS > ftzero ? ftzero * 2 : x - OFS + ftzero);
+	uint8_t val0 = tab[0 + OFS];
+
+	#ifdef _USE_SSE
+	volatile bool useSIMD = cv::checkHardwareSupport(CV_CPU_SSE2);
+	#endif
+
+	ASSERT(src.type() == CV_8U);
+	const cv::Size size(src.size());
+	dst.create(size, src.type());
+	int y;
+	for (y = 0; y < size.height - 1; y += 2) {
+		const uint8_t* srow1 = src.ptr<uint8_t>(y);
+		const uint8_t* srow0 = y > 0 ? srow1 - src.step : size.height > 1 ? srow1 + src.step : srow1;
+		const uint8_t* srow2 = y < size.height - 1 ? srow1 + src.step : size.height > 1 ? srow1 - src.step : srow1;
+		const uint8_t* srow3 = y < size.height - 2 ? srow1 + src.step * 2 : srow1;
+		uint8_t* dptr0 = dst.ptr<uint8_t>(y);
+		uint8_t* dptr1 = dptr0 + dst.step;
+
+		dptr0[0] = dptr0[size.width - 1] = dptr1[0] = dptr1[size.width - 1] = val0;
+		int x = 1;
+
+		#ifdef _USE_SSE
+		if (useSIMD) {
+			__m128i z = _mm_setzero_si128(), ftz = _mm_set1_epi16((short)ftzero);
+			__m128i ftz2 = _mm_set1_epi8(cv::saturate_cast<uint8_t>(ftzero * 2));
+			for (; x <= size.width - 9; x += 8) {
+				__m128i c0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow0 + x - 1)), z);
+				__m128i c1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow1 + x - 1)), z);
+				__m128i d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow0 + x + 1)), z);
+				__m128i d1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow1 + x + 1)), z);
+
+				d0 = _mm_sub_epi16(d0, c0);
+				d1 = _mm_sub_epi16(d1, c1);
+
+				__m128i c2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow2 + x - 1)), z);
+				__m128i c3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow3 + x - 1)), z);
+				__m128i d2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow2 + x + 1)), z);
+				__m128i d3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow3 + x + 1)), z);
+
+				d2 = _mm_sub_epi16(d2, c2);
+				d3 = _mm_sub_epi16(d3, c3);
+
+				__m128i v0 = _mm_add_epi16(d0, _mm_add_epi16(d2, _mm_add_epi16(d1, d1)));
+				__m128i v1 = _mm_add_epi16(d1, _mm_add_epi16(d3, _mm_add_epi16(d2, d2)));
+				v0 = _mm_packus_epi16(_mm_add_epi16(v0, ftz), _mm_add_epi16(v1, ftz));
+				v0 = _mm_min_epu8(v0, ftz2);
+
+				_mm_storel_epi64((__m128i*)(dptr0 + x), v0);
+				_mm_storel_epi64((__m128i*)(dptr1 + x), _mm_unpackhi_epi64(v0, v0));
+			}
+		}
+		#endif
+
+		for (; x < size.width - 1; x++) {
+			int d0 = srow0[x + 1] - srow0[x - 1], d1 = srow1[x + 1] - srow1[x - 1],
+				d2 = srow2[x + 1] - srow2[x - 1], d3 = srow3[x + 1] - srow3[x - 1];
+			int v0 = tab[d0 + d1 * 2 + d2 + OFS];
+			int v1 = tab[d1 + d2 * 2 + d3 + OFS];
+			dptr0[x] = (uint8_t)v0;
+			dptr1[x] = (uint8_t)v1;
+		}
+	}
+
+	for (; y < size.height; y++) {
+		uint8_t* dptr = dst.ptr<uint8_t>(y);
+		for (int x = 0; x < size.width; x++)
+			dptr[x] = val0;
+	}
+}
+/*----------------------------------------------------------------*/
+
+
+
+// S T R U C T S ///////////////////////////////////////////////////
+
+enum EVENT_TYPE {
+	EVT_JOB = 0,
+	EVT_CLOSE,
+};
+
+class EVTClose : public Event
+{
+public:
+	EVTClose() : Event(EVT_CLOSE) {}
+};
+class EVTPixelProcess : public Event
+{
+public:
+	typedef std::function<void (int,int,int)> FncPixel;
+	const cv::Size size;
+	volatile Thread::safe_t& idxPixel;
+	const FncPixel fncPixel;
+	bool Run(void*) override {
+		const int numPixels(size.area());
+		int idx;
+		while ((idx=(int)Thread::safeInc(idxPixel)) < numPixels)
+			fncPixel(idx, idx/size.width, idx%size.width);
+		return true;
+	}
+	EVTPixelProcess(cv::Size s, volatile Thread::safe_t& idx, FncPixel f) : Event(EVT_JOB), size(s), idxPixel(idx), fncPixel(f) {}
+};
+class EVTPixelAccumInc : public Event
+{
+public:
+	typedef std::function<void (int)> FncPixel;
+	const int numPixels;
+	volatile Thread::safe_t& idxPixel;
+	const FncPixel fncPixel;
+	bool Run(void*) override {
+		int idx;
+		while ((idx=(int)Thread::safeInc(idxPixel)) < numPixels)
+			fncPixel(idx);
+		return true;
+	}
+	EVTPixelAccumInc(int s, volatile Thread::safe_t& idx, FncPixel f) : Event(EVT_JOB), numPixels(s), idxPixel(idx), fncPixel(f) {}
+};
+class EVTPixelAccumDec : public Event
+{
+public:
+	typedef std::function<void (int)> FncPixel;
+	volatile Thread::safe_t& idxPixel;
+	const FncPixel fncPixel;
+	bool Run(void*) override {
+		int idx;
+		while ((idx=(int)Thread::safeDec(idxPixel)) >= 0)
+			fncPixel(idx);
+		return true;
+	}
+	EVTPixelAccumDec(volatile Thread::safe_t& idx, FncPixel f) : Event(EVT_JOB), idxPixel(idx), fncPixel(f) {}
+};
+/*----------------------------------------------------------------*/
+
+
+
+// S T R U C T S ///////////////////////////////////////////////////
+
+// - P1 and P2s are algorithm constants very similar to those from the original SGM algorithm;
+//   they are set to defaults according to the patch size
+// - alpha & beta form the final P2 as P2*(1+alpha*e^(-DI^2/(2*beta^2)))
+//   where DI is the difference in image intensity I(x)-I(x_prev) in [0,255] range
+// - subpixelSteps represents how much sub-pixel accuracy is searched/stored;
+//   if 1 no sub-pixel precision, if for example 4 a 0.25 sub-pixel accuracy is stored;
+//   the stored value is quantized and represented as integer: val=(float)valStored/subpixelSteps
+SemiGlobalMatcher::SemiGlobalMatcher(SgmSubpixelMode _subpixelMode, Disparity _subpixelSteps, AccumCost _P1, AccumCost P2, float P2alpha, float P2beta)
+	:
+	subpixelMode(_subpixelMode),
+	subpixelSteps(_subpixelSteps),
+	P1(_P1), P2s(GenerateP2s(P2, P2alpha, P2beta))
+{
+}
+
+SemiGlobalMatcher::~SemiGlobalMatcher()
+{
+}
+
+CLISTDEF0IDX(SemiGlobalMatcher::AccumCost,int) SemiGlobalMatcher::GenerateP2s(AccumCost P2, float P2alpha, float P2beta)
+{
+	CLISTDEF0IDX(AccumCost,int) P2s(256);
+	FOREACH(i, P2s)
+		P2s[i] = (AccumCost)ROUND2INT(P2*(1.f+P2alpha*EXP(-SQUARE((float)i)/(2.f*SQUARE(P2beta)))));
+	return P2s;
+}
+
+
+// Compute SGM stereo for this image and each of the neighbor views:
+//  - minResolution is the resolution of the top of the pyramid for tSGM;
+//    can be 0 to force the standard SGM algorithm
+void SemiGlobalMatcher::Match(const Scene& scene, IIndex idxImage, IIndex numNeighbors, unsigned minResolution)
+{
+	const Image& leftImage = scene.images[idxImage];
+	const float fMinScore(MAXF(leftImage.neighbors.front().score*(OPTDENSE::fViewMinScoreRatio*0.1f), OPTDENSE::fViewMinScore));
+	FOREACH(idxNeighbor, leftImage.neighbors) {
+		const ViewScore& neighbor = leftImage.neighbors[idxNeighbor];
+		// exclude neighbors that over the limit or too small score
+		ASSERT(scene.images[neighbor.idx.ID].IsValid());
+		if ((numNeighbors && idxNeighbor >= numNeighbors) ||
+			(neighbor.score < fMinScore))
+			break;
+		// check if the disparity-map was already estimated for the same image pairs
+		const Image& rightImage = scene.images[neighbor.idx.ID];
+		const String pairName(MAKE_PATH(String::FormatString("%04u_%04u", leftImage.ID, rightImage.ID)));
+		if (File::isPresent((pairName+".dimap").c_str()) || File::isPresent(MAKE_PATH(String::FormatString("%04u_%04u.dimap", rightImage.ID, leftImage.ID))))
+			continue;
+		TD_TIMER_STARTD();
+		IndexArr points;
+		Matrix3x3 H; Matrix4x4 Q;
+		ViewData leftData, rightData;
+		MaskMap leftMaskMap, rightMaskMap; {
+		// fetch pairs of corresponding image points
+		//TODO: use precomputed points from SelectViews()
+		Point3fArr leftPoints, rightPoints;
+		FOREACH(idxPoint, scene.pointcloud.points) {
+			const PointCloud::ViewArr& views = scene.pointcloud.pointViews[idxPoint];
+			if (views.FindFirst(idxImage) != PointCloud::ViewArr::NO_INDEX) {
+				points.push_back((uint32_t)idxPoint);
+				if (views.FindFirst(neighbor.idx.ID) != PointCloud::ViewArr::NO_INDEX) {
+					const Point3 X(scene.pointcloud.points[idxPoint]);
+					leftPoints.emplace_back(leftImage.camera.TransformPointW2I3(X));
+					rightPoints.emplace_back(rightImage.camera.TransformPointW2I3(X));
+				}
+			}
+		}
+		// stereo-rectify image pair
+		if (!Image::StereoRectifyImages(leftImage, rightImage, leftPoints, rightPoints, leftData.imageColor, rightData.imageColor, leftMaskMap, rightMaskMap, H, Q))
+			continue;
+		ASSERT(leftData.imageColor.size() == rightData.imageColor.size());
+		}
+		#ifdef _USE_FILTER_DEMO
+		// run openCV implementation
+		const LPCSTR argv[] = {"disparityFiltering", "-algorithm=sgbm", "-max_disparity=160", "-no-downscale"};
+		disparityFiltering(leftData.imageColor, rightData.imageColor, (int)SizeOfArray(argv), argv);
+		#endif
+		// color to gray conversion
+		#if SGM_SIMILARITY == SGM_SIMILARITY_CENSUS
+		leftData.imageColor.toGray(leftData.imageGray, cv::COLOR_BGR2GRAY, false, true);
+		rightData.imageColor.toGray(rightData.imageGray, cv::COLOR_BGR2GRAY, false, true);
+		#else
+		leftData.imageColor.toGray(leftData.imageGray, cv::COLOR_BGR2GRAY, true, true);
+		rightData.imageColor.toGray(rightData.imageGray, cv::COLOR_BGR2GRAY, true, true);
+		#endif
+		// compute scale used for the disparity estimation
+		REAL scale(1);
+		if (minResolution) {
+			unsigned resolutionLevel(8);
+			Image8U::computeMaxResolution(leftData.imageGray.width(), leftData.imageGray.height(), resolutionLevel, minResolution);
+			scale = REAL(1)/MAXF(2,POWI(2,resolutionLevel));
+		}
+		const bool tSGM(!ISEQUAL(scale, REAL(1)));
+		DisparityMap leftDisparityMap, rightDisparityMap; AccumCostMap costMap;
+		do {
+			#if 0
+			// export the intermediate disparity-maps
+			if (!leftDisparityMap.empty()) {
+				const REAL _scale(scale*0.5);
+				Matrix3x3 _H(H); Matrix4x4 _Q(Q);
+				Image::ScaleStereoRectification(_H, _Q, _scale);
+				ExportDisparityDataRawFull(String::FormatString("%s_%d.dimap", pairName.c_str(), log2i(ROUND2INT(REAL(1)/_scale))), leftDisparityMap, costMap, Image8U::computeResize(leftImage.GetSize(), _scale), H, _Q, 1);
+			}
+			#endif
+			// initialize
+			const ViewData leftDataLevel(leftData.GetImage(scale));
+			const ViewData rightDataLevel(rightData.GetImage(scale));
+			const cv::Size size(leftDataLevel.imageGray.size());
+			const cv::Size sizeValid(size.width-2*halfWindowSizeX, size.height-2*halfWindowSizeY);
+			const bool bFirstLevel(leftDisparityMap.empty());
+			if (bFirstLevel) {
+				// initialize the disparity-map with a rough estimate based on the sparse point-cloud
+				//TODO: remove DepthData::ViewData dependency
+				Image leftImageLevel(leftImage.GetImage(scene.platforms, scale*0.5, false));
+				DepthData::ViewData image;
+				image.pImageData = &leftImageLevel; // used only for avgDepth
+				image.image.create(leftImageLevel.GetSize());
+				image.camera = leftImageLevel.camera;
+				DepthMap depthMap;
+				Depth dMin, dMax;
+				TriangulatePoints2DepthMap(image, scene.pointcloud, points, depthMap, dMin, dMax);
+				points.Release();
+				Matrix3x3 H2(H); Matrix4x4 Q2(Q);
+				Image::ScaleStereoRectification(H2, Q2, scale*0.5);
+				const cv::Size sizeHalf(Image8U::computeResize(size, 0.5));
+				const cv::Size sizeValidHalf(sizeHalf.width-2*halfWindowSizeX, sizeHalf.height-2*halfWindowSizeY);
+				leftDisparityMap.create(sizeValidHalf);
+				Depth2DisparityMap(depthMap, H2.inv(), Q2.inv(), 1, leftDisparityMap);
+				// resize masks
+				cv::resize(leftMaskMap, leftMaskMap, size, 0, 0, cv::INTER_NEAREST);
+				cv::resize(rightMaskMap, rightMaskMap, size, 0, 0, cv::INTER_NEAREST);
+				const cv::Rect ROI(halfWindowSizeX,halfWindowSizeY, sizeValid.width,sizeValid.height);
+				leftMaskMap(ROI).copyTo(leftMaskMap);
+				rightMaskMap(ROI).copyTo(rightMaskMap);
+			} else {
+				// upscale masks
+				UpscaleMask(leftMaskMap, sizeValid);
+				UpscaleMask(rightMaskMap, sizeValid);
+			}
+			// estimate right-left disparity-map
+			Index numCosts;
+			if (tSGM) {
+				// upscale the disparity-map from the previous level
+				FlipDirection(leftDisparityMap, rightDisparityMap);
+				numCosts = Disparity2RangeMap(rightDisparityMap, rightMaskMap, bFirstLevel?11:5, bFirstLevel?33:7);
+			} else {
+				// extract global min and max disparities
+				Range range{std::numeric_limits<Disparity>::max(), std::numeric_limits<Disparity>::min()};
+				ASSERT(leftDisparityMap.isContinuous());
+				const Disparity* pd = leftDisparityMap.ptr<const Disparity>();
+				const Disparity* const pde = pd+leftDisparityMap.area();
+				do {
+					const Disparity d(*pd);
+					if (range.minDisp > d)
+						range.minDisp = d;
+					if (range.maxDisp < d)
+						range.maxDisp = d;
+				} while (++pd < pde);
+				// set disparity search range to the global min/max range
+				const Disparity numDisp(range.numDisp()+16);
+				const Disparity disp(range.minDisp+range.maxDisp);
+				range.minDisp = disp-numDisp;
+				range.maxDisp = disp+numDisp;
+				maxNumDisp = range.numDisp();
+				numCosts = 0;
+				imagePixels.resize(sizeValid.area());
+				for (PixelData& pixel: imagePixels) {
+					pixel.range = range;
+					pixel.idx = numCosts;
+					numCosts += maxNumDisp;
+				}
+			}
+			imageCosts.resize(numCosts);
+			imageAccumCosts.resize(numCosts);
+			Match(rightDataLevel, leftDataLevel, rightDisparityMap, costMap);
+			// estimate left-right disparity-map
+			if (tSGM) {
+				numCosts = Disparity2RangeMap(leftDisparityMap, leftMaskMap, bFirstLevel?11:5, bFirstLevel?33:7);
+				imageCosts.resize(numCosts);
+				imageAccumCosts.resize(numCosts);
+			} else {
+				for (PixelData& pixel: imagePixels) {
+					const Disparity maxDisp(-pixel.range.minDisp);
+					pixel.range.minDisp = -pixel.range.maxDisp;
+					pixel.range.maxDisp = maxDisp;
+				}
+			}
+			Match(leftDataLevel, rightDataLevel, leftDisparityMap, costMap);
+			// check disparity-map cross-consistency
+			#if 0
+			if (ISEQUAL(scale, REAL(1))) {
+				cv::Ptr<cv::ximgproc::DisparityWLSFilter> filter = cv::ximgproc::createDisparityWLSFilterGeneric(true);
+				const cv::Rect rcValid(halfWindowSizeX,halfWindowSizeY, sizeValid.width,sizeValid.height);
+				Image32F leftDisparityMap32F, rightDisparityMap32F, filtered32F;
+				leftDisparityMap.convertTo(leftDisparityMap32F, CV_32F, 16);
+				rightDisparityMap.convertTo(rightDisparityMap32F, CV_32F, 16);
+				filter->filter(leftDisparityMap32F, leftData.imageColor(rcValid), filtered32F, rightDisparityMap32F);
+				filtered32F.convertTo(leftDisparityMap, CV_16S, 1.0/16, 0.5);
+			} else
+			#endif
+			if (bFirstLevel) {
+				// perform a rigorous filtering of the estimated disparity maps in order to
+				// estimate the common region or interest and set the validity masks
+				ConsistencyCrossCheck(leftDisparityMap, rightDisparityMap);
+				ConsistencyCrossCheck(rightDisparityMap, leftDisparityMap);
+				cv::filterSpeckles(leftDisparityMap, NO_DISP, OPTDENSE::nSpeckleSize, 5);
+				cv::filterSpeckles(rightDisparityMap, NO_DISP, OPTDENSE::nSpeckleSize, 5);
+				ExtractMask(leftDisparityMap, leftMaskMap);
+				ExtractMask(rightDisparityMap, rightMaskMap);
+			} else {
+				// simply run a left-right consistency check
+				ConsistencyCrossCheck(leftDisparityMap, rightDisparityMap);
+			}
+		} while ((scale*=2) < REAL(1)+ZEROTOLERANCE<REAL>());
+		#if 0
+		// remove speckles
+		if (OPTDENSE::nSpeckleSize > 0)
+			cv::filterSpeckles(leftDisparityMap, NO_DISP, OPTDENSE::nSpeckleSize, 5);
+		#endif
+		// sub-pixel disparity-map estimation
+		RefineDisparityMap(leftDisparityMap);
+		#if 1
+		// export disparity-map for the left image
+		DEBUG_EXTRA("Disparity-map for images %3u and %3u: %dx%d (%s)", leftImage.ID, rightImage.ID,
+			leftImage.width, leftImage.height, TD_TIMER_GET_FMT().c_str());
+		ExportPointCloud(pairName+".ply", leftImage, leftDisparityMap, Q, subpixelSteps);
+		ExportDisparityMap(pairName+".png", leftDisparityMap);
+		ExportDisparityDataRawFull(pairName+".dimap", leftDisparityMap, costMap, leftImage.GetSize(), H, Q, subpixelSteps);
+		#else
+		// convert disparity-map to final depth-map for the left image
+		DepthMap depthMap(leftImage.image.size()); ConfidenceMap confMap;
+		Disparity2DepthMap(leftDisparityMap, costMap, H, Q, subpixelSteps, depthMap, confMap);
+		DEBUG_EXTRA("Depth-map for images %3u and %3u: %dx%d (%s)", leftImage.ID, rightImage.ID,
+			depthMap.width(), depthMap.height(), TD_TIMER_GET_FMT().c_str());
+		ExportDepthMap(pairName+".png", depthMap);
+		MVS::ExportPointCloud(pairName+".ply", leftImage, depthMap, NormalMap());
+		ExportDepthDataRaw(pairName+".dmap", leftImage.name, IIndexArr{leftImage.ID,rightImage.ID}, leftImage.GetSize(), leftImage.camera.K, leftImage.camera.R, leftImage.camera.C, 0, FLT_MAX, depthMap, NormalMap(), confMap);
+		#endif
+	}
+}
+
+void SemiGlobalMatcher::Fuse(const Scene& scene, IIndex idxImage, IIndex numNeighbors, unsigned minViews, DepthMap& depthMap, ConfidenceMap& confMap)
+{
+	TD_TIMER_STARTD();
+	const Image& leftImage = scene.images[idxImage];
+	const float fMinScore(MAXF(leftImage.neighbors.front().score*(OPTDENSE::fViewMinScoreRatio*0.1f), OPTDENSE::fViewMinScore));
+	struct PairData {
+		DepthMap depthMap;
+		DepthRangeMap depthRangeMap;
+		ConfidenceMap confMap;
+		PairData(const cv::Size& size) : depthMap(size) {}
+	};
+	// load and put in same space all disparity-maps containing this view
+	CLISTDEFIDX(PairData,IIndex) pairs;
+	pairs.reserve(numNeighbors);
+	FOREACH(idxNeighbor, leftImage.neighbors) {
+		const ViewScore& neighbor = leftImage.neighbors[idxNeighbor];
+		// exclude neighbors that over the limit or too small score
+		ASSERT(scene.images[neighbor.idx.ID].IsValid());
+		if ((numNeighbors && idxNeighbor >= numNeighbors) ||
+			(neighbor.score < fMinScore))
+			break;
+		// check if the disparity-map was estimated for this images pair
+		const Image& rightImage = scene.images[neighbor.idx.ID];
+		Disparity subpixelSteps;
+		cv::Size imageSize; Matrix3x3 H; Matrix4x4 Q;
+		DisparityMap disparityMap; AccumCostMap costMap;
+		if (!ImportDisparityDataRawFull(MAKE_PATH(String::FormatString("%04u_%04u.dimap", leftImage.ID, rightImage.ID)), disparityMap, costMap, imageSize, H, Q, subpixelSteps)) {
+			if (!ImportDisparityDataRawFull(MAKE_PATH(String::FormatString("%04u_%04u.dimap", rightImage.ID, leftImage.ID)), disparityMap, costMap, imageSize, H, Q, subpixelSteps)) {
+				DEBUG("warning: no disparity-data file found for image pair (%u,%u)", leftImage.ID, rightImage.ID);
+				continue;
+			}
+			// adapt Q to project the 3D point from right into the left-image
+			RMatrix poseR;
+			CMatrix poseC;
+			ComputeRelativePose(rightImage.camera.R, rightImage.camera.C, leftImage.camera.R, leftImage.camera.C, poseR, poseC);
+			Matrix4x4 P(Matrix4x4::IDENTITY);
+			AssembleProjectionMatrix(leftImage.camera.K, poseR, poseC, reinterpret_cast<Matrix3x4&>(P));
+			Matrix4x4 invK(Matrix4x4::IDENTITY);
+			cv::Mat(rightImage.camera.GetInvK()).copyTo(cv::Mat(4,4,cv::DataType<Matrix4x4::Type>::type,invK.val)(cv::Rect(0,0,3,3)));
+			Q = P*invK*Q;
+		}
+		// convert disparity-map to final depth-map for the left image
+		PairData& pair = pairs.emplace_back(leftImage.image.size());
+		if (!ProjectDisparity2DepthMap(disparityMap, costMap, Q, subpixelSteps, pair.depthMap, pair.depthRangeMap, pair.confMap)) {
+			pairs.RemoveLast();
+			continue;
+		}
+		#if TD_VERBOSE != TD_VERBOSE_OFF
+		// save pair depth-map
+		if (VERBOSITY_LEVEL > 3) {
+			const String pairName(MAKE_PATH(String::FormatString("depth%04u_%04u", leftImage.ID, rightImage.ID)));
+			ExportDepthMap(pairName+".png", pair.depthMap);
+			MVS::ExportPointCloud(pairName+".ply", leftImage, pair.depthMap, NormalMap());
+		}
+		#endif
+	}
+	// fuse available depth-maps such that for each pixel set its depth as the average of the largest cluster of agreeing depths;
+	// pixel depths values agree if their trust regions overlap
+	depthMap.create(leftImage.image.size()); confMap.create(depthMap.size());
+	for (int r=0; r<depthMap.rows; ++r) {
+		for (int c=0; c<depthMap.cols; ++c) {
+			struct Cluster {
+				IIndexArr views;
+				DepthRange range;
+			};
+			CLISTDEFIDX(Cluster,IIndex) clusters(0, pairs.size());
+			FOREACH(p, pairs) {
+				const PairData& pair = pairs[p];
+				const Depth depth(pair.depthMap(r,c));
+				if (depth <= 0)
+					continue;
+				const DepthRange& range(pair.depthRangeMap(r,c));
+				unsigned numClusters(0);
+				for (Cluster& cluster: clusters) {
+					if (!ISINSIDE(depth, cluster.range.x, cluster.range.y))
+						continue;
+					cluster.views.push_back(p);
+					if (cluster.range.x < range.x)
+						cluster.range.x = range.x;
+					if (cluster.range.y > range.y)
+						cluster.range.y = range.y;
+					++numClusters;
+				}
+				if (numClusters == 0)
+					clusters.emplace_back(Cluster{IIndexArr{p}, range});
+			}
+			if (clusters.empty()) {
+				depthMap(r,c) = Depth(0);
+				confMap(r,c) = float(0);
+				continue;
+			}
+			const Cluster& cluster = clusters.GetMax([](const Cluster& i, const Cluster& j) { return i.views.size() < j.views.size(); });
+			if (cluster.views.size() < minViews) {
+				depthMap(r,c) = Depth(0);
+				confMap(r,c) = float(0);
+				continue;
+			}
+			Depth& depth = depthMap(r,c); depth = 0;
+			float& conf = confMap(r,c); conf = 0;
+			unsigned numDepths(0);
+			for (IIndex p: cluster.views) {
+				const PairData& pair = pairs[p];
+				depth += pair.depthMap(r,c);
+				conf += pair.confMap(r,c);
+				++numDepths;
+			}
+			depth /= numDepths;
+			conf /= numDepths;
+		}
+	}
+	DEBUG_EXTRA("Depth-map for image %3u fused: %dx%d (%s)", leftImage.ID,
+		depthMap.width(), depthMap.height(), TD_TIMER_GET_FMT().c_str());
+	#if TD_VERBOSE != TD_VERBOSE_OFF
+	// save depth-map
+	if (VERBOSITY_LEVEL > 2) {
+		const String fileName(MAKE_PATH(String::FormatString("depth%04u", leftImage.ID)));
+		ExportDepthMap(fileName+".png", depthMap);
+		MVS::ExportPointCloud(fileName+".ply", leftImage, depthMap, NormalMap());
+	}
+	#endif
+}
+
+
+// Compute SGM stereo on the images
+void SemiGlobalMatcher::Match(const ViewData& leftImage, const ViewData& rightImage, DisparityMap& disparityMap, AccumCostMap& costMap)
+{
+	const cv::Size size(leftImage.imageGray.size());
+	const cv::Size sizeValid(size.width-2*halfWindowSizeX, size.height-2*halfWindowSizeY);
+	ASSERT(leftImage.imageColor.size() == size);
+
+	#if 0
+	// display search info (average disparity and range)
+	DisplayState(sizeValid);
+	#endif
+
+	// compute costs
+	{
+	ASSERT(!imageCosts.empty());
+	const float eps(1e-3f); // used suppress the effect of noise in untextured regions
+	auto pixel = [&](int idx, int r, int c) {
+		// ignore pixel if not valid
+		const PixelData& pixel = imagePixels[idx];
+		if (!pixel.range.isValid())
+			return;
+		#if SGM_SIMILARITY == SGM_SIMILARITY_CENSUS
+		struct Compute {
+			static inline int HammingDistance(uint64_t c1, uint64_t c2) {
+				return PopCnt(c1 ^ c2);
+			}
+		};
+		// compute pixel cost
+		Cost* costs = imageCosts.data()+pixel.idx;
+		const Census lc(leftImage.imageCensus(r,c));
+		for (int d=pixel.range.minDisp; d<pixel.range.maxDisp; ++d) {
+			const ImageRef x(c+d,r);
+			if (!rightImage.imageCensus.isInside(x)) {
+				*costs++ = 255;
+				continue;
+			}
+			const Census rc(rightImage.imageCensus(x));
+			*costs++ = Compute::HammingDistance(lc, rc)*4;
+		}
+		#else
+		struct Compute {
+			static float NormL1Sq(const Pixel8U& a, const Pixel8U& b) {
+				// |(a-b)| L1 norm of (a-b) when types are byte
+				return float(
+					SQUARE(unsigned(a[0]<b[0] ? b[0]-a[0] : a[0]-b[0])) +
+					SQUARE(unsigned(a[1]<b[1] ? b[1]-a[1] : a[1]-b[1])) +
+					SQUARE(unsigned(a[2]<b[2] ? b[2]-a[2] : a[2]-b[2])));
+			}
+			static float WeightColor(const Image8U3& image, const Pixel8U& center, const ImageRef& x) {
+				// color weight [0..1]
+				static const float sigmaColor(-1.f/(2.f*SQUARE(0.3f*255)));
+				return Compute::NormL1Sq(image(x), center) * sigmaColor;
+			}
+			static float WeightSpatial(int x, int y) {
+				// spatial weight [0..1]
+				static const float sigmaSpatial(-1.f/(2.f*SQUARE(0.4f*MAXF<int>(windowSizeX,windowSizeY))));
+				return float(SQUARE(x) + SQUARE(y)) * sigmaSpatial;
+			}
+		};
+		const ImageRef u(c+halfWindowSizeX,r+halfWindowSizeY);
+		// initialize pixel patch weights
+		WeightedPatch w;
+		w.normSq0 = 0;
+		w.sumWeights = 0;
+		int n = 0;
+		const Pixel8U& colCenter = leftImage.imageColor(u);
+		for (int i=-halfWindowSizeY; i<=halfWindowSizeY; ++i) {
+			for (int j=-halfWindowSizeX; j<=halfWindowSizeX; ++j) {
+				const ImageRef x(u.x+j,u.y+i);
+				WeightedPatch::Pixel& pw = w.weights[n++];
+				w.normSq0 +=
+					(pw.tempWeight = leftImage.imageGray(x)) *
+					(pw.weight = EXP(Compute::WeightColor(leftImage.imageColor, colCenter, x)+Compute::WeightSpatial(j,i)));
+				w.sumWeights += pw.weight;
+			}
+		}
+		ASSERT(n == numTexels);
+		const float tm(w.normSq0/w.sumWeights);
+		w.normSq0 = 0;
+		n = 0;
+		do {
+			WeightedPatch::Pixel& pw = w.weights[n];
+			const float t(pw.tempWeight - tm);
+			w.normSq0 += (pw.tempWeight = pw.weight * t) * t;
+		} while (++n < numTexels);
+		// compute pixel cost
+		Cost* costs = imageCosts.data()+pixel.idx;
+		for (int d=pixel.range.minDisp; d<pixel.range.maxDisp; ++d) {
+			float sum(0), sumSq(0), nom(0);
+			for (int i=-halfWindowSizeY, n=0; i<=halfWindowSizeY; ++i) {
+				for (int j=-halfWindowSizeX; j<=halfWindowSizeX; ++j) {
+					const ImageRef x(u.x+j+d,u.y+i);
+					if (!rightImage.imageGray.isInside(x)) {
+						*costs++ = 255;
+						goto NEXT_COST;
+					}
+					const float f(rightImage.imageGray(x));
+					const WeightedPatch::Pixel& pw = w.weights[n++];
+					const float fw(f*pw.weight);
+					sum += fw;
+					sumSq += f*fw;
+					nom += f*pw.tempWeight;
+				}
+			}
+			{
+			const float normSq1(sumSq-SQUARE(sum)/w.sumWeights);
+			const float ncc(nom/SQRT(w.normSq0*normSq1+eps));
+			*costs++ = (ncc <= 0 ? Cost(255) : (Cost)ROUND2INT((1.f-MINF(ncc,1.f))*255.f));
+			}
+			NEXT_COST:;
+		}
+		#endif
+	};
+	ASSERT(threads.IsEmpty());
+	if (!threads.empty()) {
+		volatile Thread::safe_t idxPixel(-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelProcess(sizeValid, idxPixel, pixel));
+		WaitThreadWorkers(threads.size());
+	} else
+	for (int r=0; r<sizeValid.height; ++r)
+		for (int c=0; c<sizeValid.width; ++c)
+			pixel(r*sizeValid.width+c, r, c);
+	}
+
+	// accumulate costs
+	{
+	ASSERT(!imageAccumCosts.empty());
+	imageAccumCosts.Memset(0);
+	#if SGM_SIMILARITY == SGM_SIMILARITY_CENSUS
+	const ImageGray::Type Igray(127);
+	#else
+	const ImageGray::Type Igray(0.5f);
+	#endif
+	struct LineData {
+		AccumCost* L;
+		Range R;
+		~LineData() { delete[] L; }
+		AccumCost operator[] (int i) const { return L[i]; }
+		AccumCost& operator[] (int i) { return L[i]; }
+	};
+	auto pixelAccum = [&](const Cost* costs, const LineData& Lp, LineData& Ls, AccumCost* accums, ImageGray::Type DI) {
+		struct Compute {
+			static inline void MINS(AccumCost& m, AccumCost v) { if (m > v) m = v; }
+		};
+		ASSERT(Ls.R.isValid());
+		#if SGM_SIMILARITY == SGM_SIMILARITY_CENSUS
+		const AccumCost P2(P2s[DI]);
+		#else
+		const AccumCost P2(P2s[ABS(ROUND2INT(255.f*DI))]);
+		#endif
+		const Disparity minDisp(MAXF(Lp.R.minDisp, Ls.R.minDisp));
+		const Disparity maxDisp(MINF(Lp.R.maxDisp, Ls.R.maxDisp));
+		if (minDisp >= maxDisp) {
+			// the disparity ranges for the two pixels do not intersect;
+			// fill all accumulated costs with L(d)=C(d)+P2
+			const Disparity numDisp(Ls.R.numDisp());
+			for (int idxDisp=0; idxDisp<numDisp; ++idxDisp)
+				accums[idxDisp] += (Ls[idxDisp] = costs[idxDisp]+P2);
+		} else {
+			// accumulate cost as L(d)=C(d)+min(Lp(d)+V(d,dp))-min(Lp)
+			// where V(d,dp) is:
+			//  0  if d=dp
+			//  P1 if |d-dp|=1
+			//  P2 if |d-dp|>1
+			AccumCost minLp(std::numeric_limits<AccumCost>::max());
+			for (const AccumCost *L=Lp.L+(minDisp-Lp.R.minDisp), *endL=L+(maxDisp-minDisp); L<endL; ++L)
+				Compute::MINS(minLp, *L);
+			for (Disparity d=Ls.R.minDisp; d<Ls.R.maxDisp; ++d) {
+				const int idxDisp(d-Ls.R.minDisp);
+				AccumCost& L = Ls[idxDisp];
+				L = std::numeric_limits<AccumCost>::max();
+				for (Disparity dp=minDisp; dp<maxDisp; ++dp) {
+					const int idxDispp(dp-Lp.R.minDisp);
+					if (dp == d)
+						Compute::MINS(L, Lp[idxDispp]);
+					else if (dp == d-1 || dp == d+1)
+						Compute::MINS(L, Lp[idxDispp]+P1);
+					else
+						Compute::MINS(L, Lp[idxDispp]+P2);
+				}
+				accums[idxDisp] += (L = costs[idxDisp]+L-minLp);
+			}
+		}
+	};
+	ASSERT(threads.IsEmpty());
+	if (!threads.empty()) {
+		struct AccumLines {
+			LineData linesBuffer[2];
+			LineData* lines[2];
+			AccumLines(Disparity maxNumDisp) {
+				for (LineData& line: linesBuffer) {
+					line.L = new AccumCost[maxNumDisp];
+					memset(line.L, 0, sizeof(AccumCost)*maxNumDisp);
+					line.R.minDisp = line.R.maxDisp = 0;
+				}
+				lines[0] = linesBuffer+0;
+				lines[1] = linesBuffer+1;
+			}
+			void NextLine() { std::swap(lines[0], lines[1]); }
+			const LineData& operator() (int i) const { return *lines[i]; }
+			LineData& operator() (int i) { return *lines[i]; }
+		};
+		#define ACCUM_PIXELS(cond) \
+			AccumLines lines(maxNumDisp); \
+			ImageGray::Type Ip(Igray); \
+			do { \
+				const int idx(u.y*sizeValid.width+u.x); \
+				const PixelData& pixel = imagePixels[idx]; \
+				if (!pixel.range.isValid()) \
+					continue; \
+				const Cost* costs = imageCosts.cdata()+pixel.idx; \
+				AccumCost* accums = imageAccumCosts.data()+pixel.idx; \
+				const LineData& Lp = lines(0); \
+				LineData& Ls = lines(1); \
+				Ls.R = pixel.range; \
+				const ImageGray::Type I(leftImage.imageGray(u)); \
+				pixelAccum(costs, Lp, Ls, accums, I-Ip); \
+				Ip = I; \
+				lines.NextLine(); \
+			} while (cond)
+		{ // width-down
+		auto pixels = [&](int x) {
+			ImageRef u(x,0);
+			ACCUM_PIXELS(++u.y < sizeValid.height);
+		};
+		volatile Thread::safe_t idxPixel(-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelAccumInc(sizeValid.width, idxPixel, pixels));
+		WaitThreadWorkers(threads.size());
+		}
+		{ // height-right
+		auto pixels = [&](int y) {
+			ImageRef u(0,y);
+			ACCUM_PIXELS(++u.x < sizeValid.width);
+		};
+		volatile Thread::safe_t idxPixel(-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelAccumInc(sizeValid.height, idxPixel, pixels));
+		WaitThreadWorkers(threads.size());
+		}
+		{ // width-up
+		auto pixels = [&](int x) {
+			ImageRef u(x,sizeValid.height-1);
+			ACCUM_PIXELS(--u.y >= 0);
+		};
+		volatile Thread::safe_t idxPixel(-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelAccumInc(sizeValid.width, idxPixel, pixels));
+		WaitThreadWorkers(threads.size());
+		}
+		{ // height-left
+		auto pixels = [&](int y) {
+			ImageRef u(sizeValid.width-1,y);
+			ACCUM_PIXELS(--u.x >= 0);
+		};
+		volatile Thread::safe_t idxPixel(-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelAccumInc(sizeValid.height, idxPixel, pixels));
+		WaitThreadWorkers(threads.size());
+		}
+		if (numDirs == 4) {
+		{ // width-right-down
+		auto pixels = [&](int x) {
+			ImageRef u(x,0);
+			ACCUM_PIXELS(++u.x < sizeValid.width && ++u.y < sizeValid.height);
+		};
+		volatile Thread::safe_t idxPixel(-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelAccumInc(sizeValid.width, idxPixel, pixels));
+		}
+		{ // height-right-down
+		auto pixels = [&](int y) {
+			ImageRef u(0,y);
+			ACCUM_PIXELS(++u.x < sizeValid.width && ++u.y < sizeValid.height);
+		};
+		volatile Thread::safe_t idxPixel(0);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelAccumInc(sizeValid.height, idxPixel, pixels));
+		}
+		WaitThreadWorkers(threads.size()*2);
+		{ // width-left-down
+		auto pixels = [&](int x) {
+			ImageRef u(x,0);
+			ACCUM_PIXELS(--u.x >= 0  && ++u.y < sizeValid.height);
+		};
+		volatile Thread::safe_t idxPixel(-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelAccumInc(sizeValid.width-1, idxPixel, pixels));
+		}
+		{ // height-left-down
+		auto pixels = [&](int y) {
+			ImageRef u(sizeValid.width-1,y);
+			ACCUM_PIXELS(--u.x >= 0 && ++u.y < sizeValid.height);
+		};
+		volatile Thread::safe_t idxPixel(-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelAccumInc(sizeValid.height, idxPixel, pixels));
+		}
+		WaitThreadWorkers(threads.size()*2);
+		{ // width-right-up
+		auto pixels = [&](int x) {
+			ImageRef u(x,sizeValid.height-1);
+			ACCUM_PIXELS(++u.x < sizeValid.width && --u.y >= 0);
+		};
+		volatile Thread::safe_t idxPixel(0);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelAccumInc(sizeValid.width, idxPixel, pixels));
+		}
+		{ // height-right-up
+		auto pixels = [&](int y) {
+			ImageRef u(0,y);
+			ACCUM_PIXELS(++u.x < sizeValid.width && --u.y >= 0);
+		};
+		volatile Thread::safe_t idxPixel(sizeValid.height);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelAccumDec(idxPixel, pixels));
+		}
+		WaitThreadWorkers(threads.size()*2);
+		{ // width-left-up
+		auto pixels = [&](int x) {
+			ImageRef u(x,sizeValid.height-1);
+			ACCUM_PIXELS(--u.x >= 0 && --u.y >= 0);
+		};
+		volatile Thread::safe_t idxPixel(sizeValid.width);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelAccumDec(idxPixel, pixels));
+		}
+		{ // height-left-up
+		auto pixels = [&](int y) {
+			ImageRef u(sizeValid.width-1,y);
+			ACCUM_PIXELS(--u.x >= 0 && --u.y >= 0);
+		};
+		volatile Thread::safe_t idxPixel(sizeValid.height-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelAccumDec(idxPixel, pixels));
+		}
+		WaitThreadWorkers(threads.size()*2);
+		}
+		#undef ACCUM_PIXELS
+	} else {
+	const ImageRef dirs[] = {{-1,0}, {0,-1}, {-1,-1}, {1,-1}};
+	struct AccumLines {
+		const Disparity maxNumDisp;
+		LineData* linesBuffer;
+		LineData* lines[numDirs][2];
+		AccumLines(Disparity _maxNumDisp) : maxNumDisp(_maxNumDisp), linesBuffer(NULL) {}
+		~AccumLines() { delete[] linesBuffer; }
+		void Init(int w) {
+			const int linewidth(w+2);
+			const int buffersize(2*linewidth*numDirs);
+			if (linesBuffer == NULL) {
+				linesBuffer = new LineData[buffersize];
+				for (int i=0; i<buffersize; ++i)
+					linesBuffer[i].L = new AccumCost[maxNumDisp];
+				LineData* line(linesBuffer-linewidth+1);
+				for (int idxDir=0; idxDir<numDirs; ++idxDir) {
+					lines[idxDir][0] = (line+=linewidth);
+					lines[idxDir][1] = (line+=linewidth);
+				}
+			}
+			for (int i=0; i<buffersize; ++i) {
+				LineData& line = linesBuffer[i]; 
+				memset(line.L, 0, sizeof(AccumCost)*maxNumDisp);
+				line.R.minDisp = line.R.maxDisp = 0;
+			}
+		}
+		void NextLine() {
+			for (int idxDir=0; idxDir<numDirs; ++idxDir)
+				std::swap(lines[idxDir][0], lines[idxDir][1]);
+		}
+		const LineData& operator() (int idxDir, int r, int c) const { return lines[idxDir][r][c]; }
+		LineData& operator() (int idxDir, int r, int c) { return lines[idxDir][r][c]; }
+	};
+	AccumLines lines(maxNumDisp);
+	#define ACCUM_PIXELS(dx, dy, _x) \
+		const int idx(r*sizeValid.width+c); \
+		const PixelData& pixel = imagePixels[idx]; \
+		if (!pixel.range.isValid()) \
+			continue; \
+		const Cost* costs = imageCosts.cdata()+pixel.idx; \
+		AccumCost* accums = imageAccumCosts.data()+pixel.idx; \
+		for (int idxDir=0; idxDir<numDirs; ++idxDir) { \
+			const ImageRef& dir = dirs[idxDir]; \
+			const LineData& Lp = lines(idxDir,1+dir.y,_x+dir.x); \
+			LineData& Ls = lines(idxDir,1,_x); \
+			Ls.R = pixel.range; \
+			const ImageRef xp(c+dx, r+dy); \
+			const ImageGray::Type DI(leftImage.imageGray(r,c)-(leftImage.imageGray.isInside(xp)?leftImage.imageGray(xp):Igray)); \
+			pixelAccum(costs, Lp, Ls, accums, DI); \
+		}
+	lines.Init(sizeValid.width);
+	for (int r=0; r<sizeValid.height; ++r) {
+		for (int c=0; c<sizeValid.width; ++c) {
+			ACCUM_PIXELS(dir.x, dir.y, c);
+		}
+		lines.NextLine();
+	}
+	lines.Init(sizeValid.width);
+	for (int r=sizeValid.height; --r>=0; ) {
+		for (int c=sizeValid.width; --c>=0; ) {
+			ACCUM_PIXELS(-dir.x, -dir.y, sizeValid.width-1-c);
+		}
+		lines.NextLine();
+	}
+	#undef ACCUM_PIXELS
+	}
+	}
+
+	// select best disparity and cost
+	{
+	disparityMap.create(sizeValid);
+	costMap.create(sizeValid);
+	auto pixel = [&](int idx) {
+		const PixelData& pixel = imagePixels[idx];
+		if (pixel.range.isValid()) {
+			const AccumCost* accums = imageAccumCosts.cdata()+pixel.idx;
+			const AccumCost* bestAccum = accums;
+			for (const AccumCost *accum=accums+1, *accumEnd=accums+pixel.range.numDisp(); accum<accumEnd; ++accum) {
+				if (*bestAccum > *accum)
+					bestAccum = accum;
+			}
+			disparityMap(idx) = pixel.range.minDisp+(Disparity)(bestAccum-accums);
+			costMap(idx) = *bestAccum;
+		} else {
+			disparityMap(idx) = pixel.range.minDisp;
+			costMap(idx) = NO_ACCUMCOST;
+		}
+	};
+	ASSERT(threads.IsEmpty());
+	if (!threads.empty()) {
+		volatile Thread::safe_t idxPixel(-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelAccumInc(sizeValid.area(), idxPixel, pixel));
+		WaitThreadWorkers(threads.size());
+	} else
+	for (int r=0; r<sizeValid.height; ++r)
+		for (int c=0; c<sizeValid.width; ++c)
+			pixel(r*sizeValid.width+c);
+	}
+}
+
+#if SGM_SIMILARITY == SGM_SIMILARITY_CENSUS
+// Compute the census bit-mask for all the pixels of the image
+void SemiGlobalMatcher::CensusTransform(const Image8U& imageGray, CensusMap& imageCensus)
+{
+	ASSERT(!imageGray.empty());
+	const cv::Size size(imageGray.size());
+	const cv::Size sizeValid(size.width-2*halfWindowSizeX, size.height-2*halfWindowSizeY);
+	imageCensus.create(sizeValid);
+
+	#if 0
+	// pre-process input image to easy CENSUS task
+	ImageGray image;
+	PrefilterXSobel(imageGray, image);
+	#else
+	const Image8U& image = imageGray;
+	#endif
+
+	auto pixel = [&](int, int r, int c) {
+		const ImageRef u(c+halfWindowSizeX, r+halfWindowSizeY);
+		const uint8_t g(image(u));
+		Census& cs = imageCensus(r,c);
+		cs = 0;
+		for (int i=-halfWindowSizeY; i<=halfWindowSizeY; ++i) {
+			for (int j=-halfWindowSizeX; j<=halfWindowSizeX; ++j) {
+				cs <<= 1;
+				if (g <= image(u.y+i, u.x+j))
+					cs += 1;
+			}
+		}
+	};
+	ASSERT(threads.IsEmpty());
+	if (!threads.empty()) {
+		volatile Thread::safe_t idxPixel(-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelProcess(imageCensus.size(), idxPixel, pixel));
+		WaitThreadWorkers(threads.size());
+	} else
+	for (int r=0; r<imageCensus.rows; ++r)
+		for (int c=0; c<imageCensus.cols; ++c)
+			pixel(-1, r, c);
+}
+#endif
+
+// Compute search range from the given disparity-map and setup pixel-map at twice the scale;
+// the validity mask-map is considered as well and upscaled in the same time;
+// return the total size of the disparities searched
+SemiGlobalMatcher::Index SemiGlobalMatcher::Disparity2RangeMap(const DisparityMap& disparityMap, const MaskMap& maskMap, Disparity minNumDisp, Disparity minNumDispInvalid)
+{
+	ASSERT(!disparityMap.empty() && disparityMap.width()<maskMap.width() && disparityMap.height()<maskMap.height());
+	const cv::Size size2x(maskMap.size());
+	imagePixels.resize(size2x.area());
+	Index numCosts(0);
+	maxNumDisp = 0;
+	CLISTDEF0IDX(Disparity,Disparity) disps(31*31);
+	for (int r=0; r<disparityMap.rows; ++r) {
+		const int r2(r == 0 ? 0 : r*2+halfWindowSizeY);
+		ASSERT(r2 < size2x.height);
+		const int offset(r2*size2x.width);
+		int c2e(halfWindowSizeX);
+		const Mask* pm(maskMap.ptr<const Mask>(r*2+halfWindowSizeY, halfWindowSizeX));
+		for (int c=0, c2=0; c<disparityMap.cols; ++c, pm+=2) {
+			Disparity numDisp; Range range;
+			if (*pm == INVALID) {
+				// set empty range
+				range = Range{NO_DISP,NO_DISP};
+				numDisp = 0;
+			} else {
+				// set range based on the estimates around this location
+				const bool bInvalid(disparityMap(r,c) == NO_DISP);
+				// search range around 41x41 or 7x7 window
+				disps.Empty();
+				const int hw(bInvalid ? 20 : 3);
+				for (int i=-hw; i<=hw; ++i) {
+					for (int j=-hw; j<=hw; ++j) {
+						const ImageRef u(c+j,r+i);
+						if (disparityMap.isInside(u)) {
+							const Disparity d(disparityMap(u));
+							if (d != NO_DISP)
+								disps.push_back(d);
+						}
+					}
+				}
+				// set search range
+				if (disps.size() < 3) {
+					range.maxDisp = MINF((Disparity)(disparityMap.width()*2/3), minNumDispInvalid);
+					range.minDisp = -range.maxDisp;
+					numDisp = range.numDisp();
+				} else {
+					const Disparity disp(disps.GetMedian()*2);
+					const auto minmax(disps.GetMinMax());
+					numDisp = (minmax.second-minmax.first)*2;
+					if (numDisp < minNumDisp) {
+						numDisp = minNumDisp;
+						range.minDisp = disp-numDisp/2;
+						range.maxDisp = disp+(numDisp+1)/2;
+					} else {
+						const Disparity maxNumDisp(bInvalid ? 64 : 32);
+						if (numDisp > maxNumDisp) {
+							range.minDisp = disp-(maxNumDisp*(disp-minmax.first*2)+1)/numDisp;
+							range.maxDisp = disp+(maxNumDisp*(minmax.second*2+1-disp)+1)/numDisp;
+							numDisp = range.numDisp();
+						} else {
+							range.minDisp = disp-numDisp/2;
+							range.maxDisp = disp+(numDisp+1)/2;
+						}
+					}
+				}
+				ASSERT(range.numDisp() == numDisp);
+				if (maxNumDisp < numDisp)
+					maxNumDisp = numDisp;
+			}
+			c2e += 2;
+			do {
+				ASSERT(c2 < size2x.width);
+				PixelData& pixel = imagePixels[offset+c2];
+				pixel.range = range;
+				pixel.idx = numCosts;
+				numCosts += numDisp;
+			} while (++c2 < c2e);
+		}
+		ASSERT(c2e < size2x.width);
+		do {
+			const PixelData& pixel = imagePixels[offset+c2e-1];
+			PixelData& _pixel = imagePixels[offset+c2e];
+			_pixel.range = pixel.range;
+			_pixel.idx = numCosts;
+			numCosts += pixel.range.numDisp();
+		} while (++c2e < size2x.width);
+		const int _offsete((r+1 == disparityMap.rows ? size2x.height : r*2+halfWindowSizeY+2)*size2x.width);
+		for (int _offset=offset+size2x.width; _offset<_offsete; _offset+=size2x.width) {
+			for (int c2=0; c2<size2x.width; ++c2) {
+				const PixelData& pixel = imagePixels[offset+c2];
+				PixelData& _pixel = imagePixels[_offset+c2];
+				_pixel.range = pixel.range;
+				_pixel.idx = numCosts;
+				numCosts += pixel.range.numDisp();
+			}
+		}
+	}
+	return numCosts;
+}
+
+// Check for consistency between a left-to-right and right-to-left pair of stereo results;
+// the results are expected to be opposite in sign but equal in magnitude;
+// the valid disparities are returned in the left map
+void SemiGlobalMatcher::ConsistencyCrossCheck(DisparityMap& l2r, const DisparityMap& r2l, Disparity thCross)
+{
+	ASSERT(thCross >= 0);
+	ASSERT(!l2r.empty() && !r2l.empty());
+	ASSERT(l2r.height() == r2l.height());
+
+	auto pixel = [&](int, int r, int c) {
+		Disparity& ld = l2r(r,c);
+		if (ld == NO_DISP)
+			return;
+		// compute the corresponding disparity pixel according to the disparity value
+		const ImageRef v(c+ld,r);
+		// check image bounds
+		if (v.x < 0 || v.x >= r2l.width()) {
+			ld = NO_DISP;
+			return;
+		}
+		// check right disparity is valid
+		const Disparity rd = r2l(v);
+		if (r2l(v) == NO_DISP) {
+			ld = NO_DISP;
+			return;
+		}
+		// check disparity consistency:
+		//   since the left and right disparities are opposite in sign,
+		//   we determine their similarity by *summing* them, rather
+		//   than differencing them as you might expect
+		if (ABS(ld + rd) > thCross)
+			ld = NO_DISP;
+	};
+	ASSERT(threads.IsEmpty());
+	if (!threads.empty()) {
+		volatile Thread::safe_t idxPixel(-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelProcess(l2r.size(), idxPixel, pixel));
+		WaitThreadWorkers(threads.size());
+	} else
+	for (int r=0; r<l2r.rows; ++r)
+		for (int c=0; c<l2r.cols; ++c)
+			pixel(-1, r, c);
+}
+
+// Discard disparities that have a hight similarity score
+void SemiGlobalMatcher::FilterByCost(DisparityMap& disparityMap, const AccumCostMap& costMap, AccumCost th)
+{
+	ASSERT(th > 0);
+	ASSERT(!disparityMap.empty() && disparityMap.size() == costMap.size());
+
+	auto pixel = [&](int, int r, int c) {
+		Disparity& d = disparityMap(r,c);
+		if (d == NO_DISP)
+			return;
+		if (costMap(r,c) > th)
+			d = NO_DISP;
+	};
+	ASSERT(threads.IsEmpty());
+	if (!threads.empty()) {
+		volatile Thread::safe_t idxPixel(-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelProcess(disparityMap.size(), idxPixel, pixel));
+		WaitThreadWorkers(threads.size());
+	} else
+	for (int r=0; r<disparityMap.rows; ++r)
+		for (int c=0; c<disparityMap.cols; ++c)
+			pixel(-1, r, c);
+}
+
+// Mark empty regions on the border of the disparity-map as invalid;
+//  thValid is the number of valid disparities encountered to consider the valid region starts
+void SemiGlobalMatcher::ExtractMask(const DisparityMap& disparityMap, MaskMap& maskMap, int thValid)
+{
+	ASSERT(!disparityMap.empty());
+	ASSERT(maskMap.empty() || disparityMap.size() == maskMap.size());
+	if (disparityMap.size() != maskMap.size()) {
+		maskMap.create(disparityMap.size());
+		maskMap.setTo(VALID);
+	}
+
+	#define MASK_PIXEL() \
+		Mask& m = maskMap(r,c); \
+		if (m == INVALID) \
+			continue; \
+		m = INVALID; \
+		if (disparityMap(r,c) == NO_DISP) \
+			continue; \
+		if (++numValid >= thValid) \
+			break
+
+	// left-right direction
+	{
+	auto pixel = [&](int r) {
+		int numValid(0);
+		for (int c=0; c<disparityMap.cols; ++c) {
+			MASK_PIXEL();
+		}
+	};
+	ASSERT(threads.IsEmpty());
+	if (!threads.empty()) {
+		volatile Thread::safe_t idxPixel(-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelAccumInc(disparityMap.height(), idxPixel, pixel));
+		WaitThreadWorkers(threads.size());
+	} else
+	for (int r=0; r<disparityMap.rows; ++r)
+		pixel(r);
+	}
+
+	// right-left direction
+	{
+	auto pixel = [&](int r) {
+		int numValid(0);
+		for (int c=disparityMap.cols; --c>=0; ) {
+			MASK_PIXEL();
+		}
+	};
+	ASSERT(threads.IsEmpty());
+	if (!threads.empty()) {
+		volatile Thread::safe_t idxPixel(-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelAccumInc(disparityMap.height(), idxPixel, pixel));
+		WaitThreadWorkers(threads.size());
+	} else
+	for (int r=0; r<disparityMap.rows; ++r)
+		pixel(r);
+	}
+
+	#undef MASK_PIXEL
+
+	#if 0
+	#define MASK_PIXEL() \
+		Mask& m = maskMap(r,c); \
+		if (m == INVALID) \
+			continue; \
+		if (disparityMap(r,c) != NO_DISP) \
+			break; \
+		m = INVALID
+
+	// top-bottom direction
+	{
+	auto pixel = [&](int c) {
+		for (int r=0; r<disparityMap.rows; ++r) {
+			MASK_PIXEL();
+		}
+	};
+	ASSERT(threads.IsEmpty());
+	if (!threads.empty()) {
+		volatile Thread::safe_t idxPixel(-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelAccumInc(disparityMap.width(), idxPixel, pixel));
+		WaitThreadWorkers(threads.size());
+	} else
+	for (int c=0; c<disparityMap.cols; ++c)
+		pixel(c);
+	}
+
+	// bottom-top direction
+	{
+	auto pixel = [&](int c) {
+		for (int r=disparityMap.rows; --r>=0; ) {
+			MASK_PIXEL();
+		}
+	};
+	ASSERT(threads.IsEmpty());
+	if (!threads.empty()) {
+		volatile Thread::safe_t idxPixel(-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelAccumInc(disparityMap.width(), idxPixel, pixel));
+		WaitThreadWorkers(threads.size());
+	} else
+	for (int c=0; c<disparityMap.cols; ++c)
+		pixel(c);
+	}
+
+	#undef MASK_PIXEL
+	#endif
+}
+
+// Translate disparity-map between left-to-right and right-to-left stereo pair
+// by translating to the x coordinated dictated by the disparity and negating the sign;
+// the sub-pixel steps is assumed to be one and
+// the input disparity-map has been cross-checked for consistency
+void SemiGlobalMatcher::FlipDirection(const DisparityMap& l2r, DisparityMap& r2l)
+{
+	ASSERT(!l2r.empty());
+	ASSERT(r2l.empty() || l2r.height() == r2l.height());
+	if (r2l.empty())
+		r2l.create(l2r.size());
+	r2l.setTo(NO_DISP);
+
+	auto pixel = [&](int r) {
+		for (int c=0; c<l2r.cols; ++c) {
+			const Disparity d = l2r(r,c);
+			if (d == NO_DISP)
+				continue;
+			// compute the corresponding disparity pixel according to the disparity value and set right disparity value
+			for (int x=MAXF(c+d-1,0), xe=MINF(c+d+2,r2l.width()); x<xe; ++x)
+				r2l(r,x) = -d;
+		}
+	};
+	ASSERT(threads.IsEmpty());
+	if (!threads.empty()) {
+		volatile Thread::safe_t idxPixel(-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelAccumInc(l2r.rows, idxPixel, pixel));
+		WaitThreadWorkers(threads.size());
+	} else
+	for (int r=0; r<l2r.rows; ++r)
+		pixel(r);
+}
+
+// Translate disparity-map between left-to-right and right-to-left stereo pair
+// by translating to the x coordinated dictated by the disparity and negating the sign;
+// the input disparity-map has been cross-checked for consistency
+void SemiGlobalMatcher::UpscaleMask(MaskMap& maskMap, const cv::Size& size2x)
+{
+	ASSERT(!maskMap.empty());
+	MaskMap maskMap2x(size2x, INVALID);
+
+	auto pixel = [&](int, int r, int c) {
+		const int r2(r*2+halfWindowSizeY), c2(c*2+halfWindowSizeX);
+		const Mask m(maskMap(r,c));
+		for (int i=0; i<2; ++i) {
+			for (int j=0; j<2; ++j) {
+				const ImageRef u(c2+j,r2+i);
+				if (maskMap2x.isInside(u))
+					maskMap2x(u) = m;
+			}
+		}
+	};
+	ASSERT(threads.IsEmpty());
+	if (!threads.empty()) {
+		volatile Thread::safe_t idxPixel(-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelProcess(maskMap.size(), idxPixel, pixel));
+		WaitThreadWorkers(threads.size());
+	} else
+	for (int r=0; r<maskMap.rows; ++r)
+		for (int c=0; c<maskMap.cols; ++c)
+			pixel(-1, r, c);
+
+	cv::swap(maskMap, maskMap2x);
+}
+
+// Sub-pixel disparity-map refinement based on the accumulated-cost volume
+void SemiGlobalMatcher::RefineDisparityMap(DisparityMap& disparityMap) const
+{
+	ASSERT(!disparityMap.empty());
+	if (subpixelSteps <= 1)
+		return;
+	if (subpixelMode == SUBPIXEL_NA) {
+		// simply multiply disparity to the sub-pixel steps
+		auto pixel = [&](int, int r, int c) {
+			Disparity& d = disparityMap(r,c);
+			if (d == NO_DISP)
+				return;
+			ASSERT((int)d*subpixelSteps > (int)std::numeric_limits<Disparity>::min());
+			ASSERT((int)d*subpixelSteps < (int)std::numeric_limits<Disparity>::max());
+			d *= subpixelSteps;
+		};
+		ASSERT(threads.IsEmpty());
+		if (!threads.empty()) {
+			volatile Thread::safe_t idxPixel(-1);
+			FOREACH(i, threads)
+				threads.AddEvent(new EVTPixelProcess(disparityMap.size(), idxPixel, pixel));
+			WaitThreadWorkers(threads.size());
+		} else
+		for (int r=0; r<disparityMap.rows; ++r)
+			for (int c=0; c<disparityMap.cols; ++c)
+				pixel(-1, r, c);
+		return;
+	}
+	// proposed subpixelMode algorithms
+	typedef float real;
+	struct Fit {
+		static real linear(real x) {
+			return x/real(2);
+		}
+		static real poly4(real x) {
+			return (x*x*x*x + x)/real(4);
+		}
+		static real parabola(real x) {
+			return x/(x+real(1));
+		}
+		static real sine(real x) {
+			return real(0.5) * (SIN((x-real(1))*real(HALF_PI)) + real(1));
+		}
+		static real cosine(real x) {
+			return (real(1) - COS(x*(real)(PI/3.0)));
+		}
+		static real lcBlend(real x) {
+			const real factor(real(1.195) - COS(x*(real)(PI/2.3)));
+			return cosine(x)*factor + linear(x)*(real(1)-factor);
+		}
+		// subpixelMode interpolation when only two values are available
+		// returns fraction of distance from the primary to the other value
+		static real semisubpixel(AccumCost primary, AccumCost other) {
+			return real(0.5)*(static_cast<real>(primary) / static_cast<real>(other));
+		}
+		// compute the offset to be added to the integer disparity to get the final result
+		static real subpixelMode(AccumCost prev, AccumCost center, AccumCost next, SgmSubpixelMode subpixelMode) {
+			ASSERT(prev != NO_ACCUMCOST && center != NO_ACCUMCOST && next != NO_ACCUMCOST);
+			// use a lower quality two value interpolation if only two values are available
+			if (prev == center)
+				return center == next ? real(0) : semisubpixel(center, next);
+			if (center == next)
+				return prev == center ? real(0) : -semisubpixel(center, prev);
+			// pick which direction to interpolate in
+			const AccumCost ld(prev-center);
+			const AccumCost rd(next-center);
+			real x, mult;
+			if (ld < rd) {
+				x = static_cast<real>(ld) / static_cast<real>(rd);
+				mult = real(1);
+			} else {
+				x = static_cast<real>(rd) / static_cast<real>(ld);
+				mult = real(-1);
+			}
+			// use the selected subpixelMode function
+			real value(0);
+			switch (subpixelMode) {
+			case SUBPIXEL_LINEAR:   value = linear(x); break;
+			case SUBPIXEL_POLY4:    value = poly4(x); break;
+			case SUBPIXEL_PARABOLA: value = parabola(x); break;
+			case SUBPIXEL_SINE:     value = sine(x); break;
+			case SUBPIXEL_COSINE:   value = cosine(x); break;
+			case SUBPIXEL_LC_BLEND: value = lcBlend(x); break;
+			};
+			// complete computation
+			return (value - real(0.5))*mult;
+		}
+	};
+	// estimate sub-pixel disparity based on the cost values
+	auto pixel = [&](int idx) {
+		const PixelData& pixel = imagePixels[idx];
+		if (pixel.range.numDisp() < 2)
+			return;
+		Disparity& d = disparityMap(idx);
+		if (d == NO_DISP)
+			return;
+		const AccumCost* accums = imageAccumCosts.cdata()+pixel.idx;
+		const int idxDisp(d-pixel.range.minDisp);
+		real disparity((real)d);
+		if (d == pixel.range.minDisp)
+			disparity += Fit::semisubpixel(accums[idxDisp], accums[idxDisp+1]);
+		else if (d+1 == pixel.range.maxDisp)
+			disparity -= Fit::semisubpixel(accums[idxDisp], accums[idxDisp-1]);
+		else
+			disparity += Fit::subpixelMode(accums[idxDisp-1], accums[idxDisp], accums[idxDisp+1], subpixelMode);
+		ASSERT(ROUND2INT(disparity*subpixelSteps) > (int)std::numeric_limits<Disparity>::min());
+		ASSERT(ROUND2INT(disparity*subpixelSteps) < (int)std::numeric_limits<Disparity>::max());
+		d = (Disparity)ROUND2INT(disparity*subpixelSteps);
+	};
+	ASSERT(threads.IsEmpty());
+	if (!threads.empty()) {
+		volatile Thread::safe_t idxPixel(-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelAccumInc(disparityMap.size().area(), idxPixel, pixel));
+		WaitThreadWorkers(threads.size());
+	} else
+	for (int r=0; r<disparityMap.rows; ++r)
+		for (int c=0; c<disparityMap.cols; ++c)
+			pixel(r*disparityMap.cols+c);
+}
+
+
+// extract disparity and range from the pixel-map
+void SemiGlobalMatcher::DisplayState(const cv::Size& size) const
+{
+	Image8U disparity(size);
+	Image8U range(size);
+	for (int idx=0; idx<size.area(); ++idx) {
+		const PixelData& pixel = imagePixels[idx];
+		disparity(idx) = (uint8_t)(CLAMP(pixel.range.avgDisp(), Disparity(-128), Disparity(127))+Disparity(128));
+		range(idx) = (uint8_t)CLAMP(pixel.range.numDisp(), Disparity(0), Disparity(255));
+	}
+
+	disparity.Show("Disparity", -1, false);
+	range.Show("Range", -1, false);
+
+	char c = 'a';
+	while (std::tolower(c) != 'q')
+		c = cv::waitKey();
+
+	cv::destroyAllWindows();
+}
+
+// Compute the disparity-map for the rectified image from the given depth-map of the un-rectified image;
+// the disparity map needs to be already constructed at the desired size (valid size, excluding the border)
+void SemiGlobalMatcher::Depth2DisparityMap(const DepthMap& depthMap, const Matrix3x3& invH, const Matrix4x4& invQ, Disparity subpixelSteps, DisparityMap& disparityMap)
+{
+	auto pixel = [&](int, int r, int c) {
+		const ImageRef x(c+halfWindowSizeX,r+halfWindowSizeY); Point2f u;
+		ProjectVertex_3x3_2_2(invH.val, x.ptr(), u.ptr());
+		float depth, disparity;
+		if (!depthMap.sampleSafe(depth, u, [](Depth d) { return d > 0; }) || !Image::Depth2Disparity(invQ, u, depth, disparity))
+			disparityMap(r,c) = NO_DISP;
+		else
+			disparityMap(r,c) = (Disparity)ROUND2INT(disparity*subpixelSteps);
+	};
+	ASSERT(threads.IsEmpty());
+	if (!threads.empty()) {
+		volatile Thread::safe_t idxPixel(-1);
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTPixelProcess(disparityMap.size(), idxPixel, pixel));
+		WaitThreadWorkers(threads.size());
+	} else
+	for (int r=0; r<disparityMap.rows; ++r)
+		for (int c=0; c<disparityMap.cols; ++c)
+			pixel(-1, r, c);
+}
+
+// Compute the depth-map for the un-rectified image from the given disparity-map of the rectified image
+void SemiGlobalMatcher::Disparity2DepthMap(const DisparityMap& disparityMap, const AccumCostMap& costMap, const Matrix3x3& H, const Matrix4x4& Q, Disparity subpixelSteps, DepthMap& depthMap, ConfidenceMap& confMap)
+{
+	ASSERT(costMap.empty() || costMap.size() == disparityMap.size());
+	ASSERT(!depthMap.empty());
+	ASSERT(confMap.empty() || confMap.size() == depthMap.size());
+
+	if (!costMap.empty()) {
+		confMap.create(depthMap.size());
+		auto pixel = [&](int, int r, int c) {
+			const ImageRef x(c,r); Point2f u;
+			ProjectVertex_3x3_2_2(H.val, x.ptr(), u.ptr());
+			u.x -= halfWindowSizeX; u.y -= halfWindowSizeY;
+			float disparity;
+			if (!disparityMap.sampleSafe(disparity, u, [](Disparity d) { return d != NO_DISP; })) {
+				depthMap(x) = 0;
+				confMap(x) = 0;
+				return;
+			}
+			float cost;
+			costMap.sampleSafe(cost, u, [](AccumCost c) { return c != NO_ACCUMCOST; });
+			depthMap(x) = Image::Disparity2Depth(Q, u, disparity/subpixelSteps);
+			confMap(x) = 1.f/(cost+1);
+		};
+		ASSERT(threads.IsEmpty());
+		if (!threads.empty()) {
+			volatile Thread::safe_t idxPixel(-1);
+			FOREACH(i, threads)
+				threads.AddEvent(new EVTPixelProcess(depthMap.size(), idxPixel, pixel));
+			WaitThreadWorkers(threads.size());
+		} else
+		for (int r=0; r<depthMap.rows; ++r)
+			for (int c=0; c<depthMap.cols; ++c)
+				pixel(-1, r, c);
+	} else {
+		auto pixel = [&](int, int r, int c) {
+			const ImageRef x(c,r); Point2f u;
+			ProjectVertex_3x3_2_2(H.val, x.ptr(), u.ptr());
+			u.x -= halfWindowSizeX; u.y -= halfWindowSizeY;
+			float disparity;
+			if (!disparityMap.sampleSafe(disparity, u, [](Disparity d) { return d != NO_DISP; }))
+				depthMap(x) = 0;
+			else
+				depthMap(x) = Image::Disparity2Depth(Q, u, disparity/subpixelSteps);
+		};
+		ASSERT(threads.IsEmpty());
+		if (!threads.empty()) {
+			volatile Thread::safe_t idxPixel(-1);
+			FOREACH(i, threads)
+				threads.AddEvent(new EVTPixelProcess(depthMap.size(), idxPixel, pixel));
+			WaitThreadWorkers(threads.size());
+		} else
+		for (int r=0; r<depthMap.rows; ++r)
+			for (int c=0; c<depthMap.cols; ++c)
+				pixel(-1, r, c);
+	}
+}
+
+// Compute the depth-map for the un-rectified image from the given disparity-map of the rectified image
+// by projecting the point-cloud to the image and setting the pixel depth as the average of the closest 4 points;
+// return false if the disparity map is completely empty
+bool SemiGlobalMatcher::ProjectDisparity2DepthMap(const DisparityMap& disparityMap, const AccumCostMap& costMap, const Matrix4x4& Q, Disparity subpixelSteps, DepthMap& depthMap, DepthRangeMap& depthRangeMap, ConfidenceMap& confMap)
+{
+	ASSERT(costMap.empty() || costMap.size() == disparityMap.size());
+	ASSERT(!depthMap.empty());
+	ASSERT(confMap.empty() || confMap.size() == depthMap.size());
+
+	// store 4 depths per pixel, the closest one from each side, ordered as:
+	//  - left, right, top, bottom (x pointing right and y pointing down)
+	// with a small overlap border
+	const float overlapBorder(0.5f+0.25f);
+	struct DepthData {
+		Image32F distMap;
+		DepthMap depthMap;
+		DepthRangeMap depthRangeMap;
+		ConfidenceMap confMap;
+		void Init(const cv::Size& size) {
+			distMap.create(size);
+			depthMap.create(size);
+			depthRangeMap.create(size);
+			confMap.create(size);
+			depthMap.memset(0);
+		}
+	} depthDatas[4];
+	for (int i=0; i<4; ++i)
+		depthDatas[i].Init(depthMap.size());
+	for (int r=0; r<disparityMap.rows; ++r) {
+		for (int c=0; c<disparityMap.cols; ++c) {
+			const Disparity disparityInt(disparityMap(r,c));
+			if (disparityInt == NO_DISP)
+				continue;
+			const float disparity((float)disparityInt/subpixelSteps);
+			Point2f u, v;
+			const ImageRef dx(c+halfWindowSizeX,r+halfWindowSizeY);
+			const Depth depth(Image::Disparity2Depth(Q, dx, disparity, u));
+			if (depth <= 0)
+				continue;
+			const Disparity disparityCenter((Disparity)FLOOR2INT(disparity));
+			const DepthRange depthRange(
+				Image::Disparity2Depth(Q, dx, (float)(disparityCenter-1)),
+				Image::Disparity2Depth(Q, dx, (float)(disparityCenter+1))
+			);
+			ASSERT(ISINSIDE(depth, depthRange.x, depthRange.y));
+			const float cost(costMap.empty()?0.f:1.f/(costMap(r,c)+1));
+			const ImageRef x(FLOOR2INT(u));
+			u.x -= 0.5f; u.y -= 0.5f;
+			for (int i=-1; i<=1; ++i) {
+				for (int j=-1; j<=1; ++j) {
+					const ImageRef nx(x.x+j,x.y+i);
+					if (!depthMap.isInside(nx))
+						continue;
+					const Point2f dist((float)nx.x-u.x,(float)nx.y-u.y);
+					const Point2f absDist(ABS(dist));
+					if (absDist.x > overlapBorder || absDist.y > overlapBorder)
+						continue;
+					const int idx((dist.x<0?1:0)+(dist.y<0?2:0));
+					DepthData& depthData = depthDatas[idx];
+					const float deistSq(normSq(dist));
+					float& ndeistSq = depthData.distMap(nx);
+					Depth& ndepth = depthData.depthMap(nx);
+					if (ndepth > 0 && ndeistSq <= deistSq)
+						continue;
+					ndeistSq = deistSq;
+					ndepth = depth;
+					depthData.depthRangeMap(nx) = depthRange;
+					depthData.confMap(nx) = cost;
+				}
+			}
+		}
+	}
+	typedef TAccumulator<Vec4f> ValueAccumulator;
+	depthRangeMap.create(depthMap.size());
+	confMap.create(depthMap.size());
+	const float thDist(SQUARE(0.75f));
+	const Depth thDepth(0.02f);
+	unsigned numDepths(0);
+	for (int r=0; r<depthMap.rows; ++r) {
+		for (int c=0; c<depthMap.cols; ++c) {
+			float distCenter(std::numeric_limits<float>::max());
+			Depth depthCenter;
+			for (int i=0; i<4; ++i) {
+				const DepthData& depthData = depthDatas[i];
+				const Depth depth(depthData.depthMap(r,c));
+				if (depth <= 0)
+					continue;
+				const float dist(depthData.distMap(r,c));
+				if (distCenter > dist) {
+					distCenter = dist;
+					depthCenter = depth;
+				}
+			}
+			if (distCenter > thDist) {
+				depthMap(r,c) = Depth(0);
+				continue;
+			}
+			ValueAccumulator acc(Vec4f::ZERO, 0.f);
+			for (int i=0; i<4; ++i) {
+				const DepthData& depthData = depthDatas[i];
+				const Depth depth(depthData.depthMap(r,c));
+				if (depth <= 0 || !IsDepthSimilar(depthCenter, depth, thDepth))
+					continue;
+				const DepthRange& depthRange(depthData.depthRangeMap(r,c));
+				acc.Add(Vec4f((float)depth,depthRange.x,depthRange.y,depthData.confMap(r,c)), SQRT(depthData.distMap(r,c)));
+			}
+			ASSERT(!acc.IsEmpty());
+			const Vec4f value(acc.Normalized());
+			depthMap(r,c) = value(0);
+			depthRangeMap(r,c) = DepthRange(value(1),value(2));
+			confMap(r,c) = value(3);
+			++numDepths;
+		}
+	}
+	if (costMap.empty())
+		confMap.release();
+	return numDepths > 0;
+}
+
+
+EventThreadPool SemiGlobalMatcher::threads;
+Semaphore SemiGlobalMatcher::sem;
+
+// start worker threads
+void SemiGlobalMatcher::CreateThreads(unsigned nMaxThreads)
+{
+	ASSERT(nMaxThreads > 0);
+	ASSERT(threads.IsEmpty() && threads.empty());
+	if (nMaxThreads > 1) {
+		threads.resize(nMaxThreads);
+		threads.start(ThreadWorker);
+	}
+}
+// destroy worker threads
+void SemiGlobalMatcher::DestroyThreads()
+{
+	ASSERT(threads.IsEmpty());
+	if (!threads.empty()) {
+		FOREACH(i, threads)
+			threads.AddEvent(new EVTClose());
+		threads.Release();
+	}
+}
+
+void* SemiGlobalMatcher::ThreadWorker(void*) {
+	while (true) {
+		CAutoPtr<Event> evt(threads.GetEvent());
+		switch (evt->GetID()) {
+		case EVT_JOB:
+			evt->Run();
+			break;
+		case EVT_CLOSE:
+			return NULL;
+		default:
+			ASSERT("Should not happen!" == NULL);
+		}
+		sem.Signal();
+	}
+	return NULL;
+}
+void SemiGlobalMatcher::WaitThreadWorkers(unsigned nJobs)
+{
+	while (nJobs-- > 0)
+		sem.Wait();
+	ASSERT(threads.IsEmpty());
+}
+/*----------------------------------------------------------------*/
+
+
+
+// S T R U C T S ///////////////////////////////////////////////////
+
+bool SemiGlobalMatcher::ExportDisparityDataRaw(const String& fileName, const DisparityMap& disparityMap, const AccumCostMap& costMap, const cv::Size& imageSize, const Matrix3x3& H, const Matrix4x4& Q, Disparity subpixelSteps)
+{
+	ASSERT(!disparityMap.empty());
+	ASSERT(costMap.empty() || disparityMap.size() == costMap.size());
+
+	FILE *f = fopen(fileName, "wb");
+	if (f == NULL)
+		return false;
+
+	// write info
+	fwrite(&imageSize.width, sizeof(int), 2, f);
+	fwrite(H.val, sizeof(REAL), 9, f);
+	fwrite(Q.val, sizeof(REAL), 16, f);
+	fwrite(&subpixelSteps, sizeof(Disparity), 1, f);
+
+	// write resolution
+	fwrite(&disparityMap.cols, sizeof(int), 1, f);
+	fwrite(&disparityMap.rows, sizeof(int), 1, f);
+
+	// write disparity-map
+	fwrite(disparityMap.getData(), sizeof(Disparity), disparityMap.area(), f);
+
+	// write cost-map
+	if (!costMap.empty())
+		fwrite(costMap.getData(), sizeof(AccumCost), costMap.area(), f);
+
+	fclose(f);
+	return true;
+} // ExportDisparityDataRaw
+// same as above, but exports also the empty border
+bool SemiGlobalMatcher::ExportDisparityDataRawFull(const String& fileName, const DisparityMap& disparityMap, const AccumCostMap& costMap, const cv::Size& imageSize, const Matrix3x3& H, const Matrix4x4& Q, Disparity subpixelSteps)
+{
+	ASSERT(!disparityMap.empty());
+	ASSERT(costMap.empty() || disparityMap.size() == costMap.size());
+
+	const cv::Size size(disparityMap.width()+2*halfWindowSizeX,disparityMap.height()+2*halfWindowSizeY);
+	const cv::Rect ROI(halfWindowSizeX,halfWindowSizeY, disparityMap.width(),disparityMap.height());
+	DisparityMap disparityMapFull(size, NO_DISP);
+	disparityMap.copyTo(disparityMapFull(ROI));
+	if (costMap.empty())
+		return ExportDisparityDataRaw(fileName, disparityMapFull, costMap, imageSize, H, Q, subpixelSteps);
+	AccumCostMap costMapFull(size, NO_ACCUMCOST);
+	costMap.copyTo(costMapFull(ROI));
+	return ExportDisparityDataRaw(fileName, disparityMapFull, costMapFull, imageSize, H, Q, subpixelSteps);
+} // ExportDisparityDataRawFull
+
+bool SemiGlobalMatcher::ImportDisparityDataRaw(const String& fileName, DisparityMap& disparityMap, AccumCostMap& costMap, cv::Size& imageSize, Matrix3x3& H, Matrix4x4& Q, Disparity& subpixelSteps)
+{
+	FILE *f = fopen(fileName, "rb");
+	if (f == NULL)
+		return false;
+
+	// read info
+	fread(&imageSize.width, sizeof(int), 2, f);
+	fread(H.val, sizeof(REAL), 9, f);
+	fread(Q.val, sizeof(REAL), 16, f);
+	fread(&subpixelSteps, sizeof(Disparity), 1, f);
+	ASSERT(imageSize.width > 0 && imageSize.height > 0);
+
+	// read resolution
+	int w, h;
+	fread(&w, sizeof(int), 1, f);
+	fread(&h, sizeof(int), 1, f);
+	ASSERT(w > 0 && h > 0);
+
+	// read disparity-map
+	disparityMap.create(h,w);
+	fread(disparityMap.getData(), sizeof(Disparity), w*h, f);
+
+	// read cost-map
+	if (fgetc(f) != EOF) {
+		fseek(f, -1, SEEK_CUR);
+		costMap.create(h,w);
+		fread(costMap.getData(), sizeof(AccumCost), w*h, f);
+	}
+
+	fclose(f);
+	return true;
+} // ImportDisparityDataRaw
+// same as above, but imports also the empty border
+bool SemiGlobalMatcher::ImportDisparityDataRawFull(const String& fileName, DisparityMap& disparityMap, AccumCostMap& costMap, cv::Size& imageSize, Matrix3x3& H, Matrix4x4& Q, Disparity& subpixelSteps)
+{
+	if (!ImportDisparityDataRaw(fileName, disparityMap, costMap, imageSize, H, Q, subpixelSteps))
+		return false;
+	const cv::Size sizeValid(disparityMap.width()-2*halfWindowSizeX,disparityMap.height()-2*halfWindowSizeY);
+	const cv::Rect ROI(halfWindowSizeX,halfWindowSizeY, sizeValid.width,sizeValid.height);
+	disparityMap = disparityMap(ROI).clone();
+	if (!costMap.empty())
+		costMap = costMap(ROI).clone();
+	return true;
+} // ImportDisparityDataRawFull
+
+
+// export disparity-map as an image (red - maximum disparity, blue - minimum disparity)
+Image8U3 SemiGlobalMatcher::DisparityMap2Image(const DisparityMap& disparityMap, Disparity minDisparity, Disparity maxDisparity)
+{
+	ASSERT(!disparityMap.empty());
+	// find min and max values
+	if (minDisparity == NO_DISP || maxDisparity == NO_DISP) {
+		CLISTDEF0(Disparity) disparities(0, disparityMap.area());
+		for (int i=disparityMap.area(); i-- > 0; ) {
+			const Disparity disparity = disparityMap[i];
+			if (disparity != NO_DISP)
+				disparities.emplace_back(disparity);
+		}
+		if (!disparities.empty()) {
+			const std::pair<float,float> th(ComputeX84Threshold<Disparity,float>(disparities.data(), disparities.size(), 5.2f));
+			minDisparity = (Disparity)ROUND2INT(th.first-th.second);
+			maxDisparity = (Disparity)ROUND2INT(th.first+th.second);
+		}
+		DEBUG_ULTIMATE("\tdisparity range: [%d, %d]", minDisparity, maxDisparity);
+	}
+	const float sclDepth(1.f/(float)(maxDisparity - minDisparity));
+	// create color image
+	Image8U3 img(cv::Size(disparityMap.width()+2*halfWindowSizeX,disparityMap.height()+2*halfWindowSizeY), Pixel8U::BLACK);
+	for (int r=0; r<disparityMap.rows; ++r) {
+		for (int c=0; c<disparityMap.cols; ++c) {
+			const Disparity disparity(disparityMap(r,c));
+			if (disparity == NO_DISP)
+				continue;
+			img(r+halfWindowSizeY,c+halfWindowSizeX) = Pixel8U::gray2color(CLAMP((float)(maxDisparity-disparity)*sclDepth, 0.f, 1.f));
+		}
+	}
+	return img;
+} // DisparityMap2Image
+bool SemiGlobalMatcher::ExportDisparityMap(const String& fileName, const DisparityMap& disparityMap, Disparity minDisparity, Disparity maxDisparity)
+{
+	if (disparityMap.empty())
+		return false;
+	return DisparityMap2Image(disparityMap, minDisparity, maxDisparity).Save(fileName);
+} // ExportDisparityMap
+
+
+// export point cloud
+bool SemiGlobalMatcher::ExportPointCloud(const String& fileName, const Image& imageData, const DisparityMap& disparityMap, const Matrix4x4& Q, Disparity subpixelSteps)
+{
+	ASSERT(!disparityMap.empty());
+
+	// vertex definition
+	struct Vertex {
+		float x,y,z;
+		uint8_t r,g,b;
+	};
+	// list of property information for a vertex
+	static PLY::PlyProperty vert_props[] = {
+		{"x", PLY::Float32, PLY::Float32, offsetof(Vertex,x), 0, 0, 0, 0},
+		{"y", PLY::Float32, PLY::Float32, offsetof(Vertex,y), 0, 0, 0, 0},
+		{"z", PLY::Float32, PLY::Float32, offsetof(Vertex,z), 0, 0, 0, 0},
+		{"red", PLY::Uint8, PLY::Uint8, offsetof(Vertex,r), 0, 0, 0, 0},
+		{"green", PLY::Uint8, PLY::Uint8, offsetof(Vertex,g), 0, 0, 0, 0},
+		{"blue", PLY::Uint8, PLY::Uint8, offsetof(Vertex,b), 0, 0, 0, 0},
+	};
+	// list of the kinds of elements in the PLY
+	static const char* elem_names[] = {
+		"vertex"
+	};
+
+	// create PLY object
+	ASSERT(!fileName.empty());
+	Util::ensureFolder(fileName);
+	const size_t memBufferSize(disparityMap.area()*(8*3/*pos*/+3*3/*color*/+7/*space*/+2/*eol*/) + 2048/*extra size*/);
+	PLY ply;
+	if (!ply.write(fileName, 1, elem_names, PLY::BINARY_LE, memBufferSize))
+		return false;
+
+	// describe what properties go into the vertex elements
+	ply.describe_property("vertex", 6, vert_props);
+
+	// export the array of 3D points
+	Vertex vertex;
+	for (int r=0; r<disparityMap.rows; ++r) {
+		for (int c=0; c<disparityMap.cols; ++c) {
+			const Disparity& disparity = disparityMap(r,c);
+			if (disparity == NO_DISP)
+				continue;
+			Point2f u;
+			Depth depth(Image::Disparity2Depth(Q, ImageRef(c+halfWindowSizeX,r+halfWindowSizeY), (float)disparity/subpixelSteps, u));
+			if (depth <= 0)
+				continue;
+			if (!imageData.image.isInsideWithBorder<float,1>(u))
+				continue;
+			const Point3f X(imageData.camera.TransformPointI2W(Point3(u,depth)));
+			vertex.x = X.x; vertex.y = X.y; vertex.z = X.z;
+			const Pixel8U C(imageData.image.empty() ? Pixel8U::WHITE : imageData.image.sample(u));
+			vertex.r = C.r; vertex.g = C.g; vertex.b = C.b;
+			ply.put_element(&vertex);
+		}
+	}
+	if (ply.get_current_element_count() == 0)
+		return false;
+
+	// write to file
+	return ply.header_complete();
+} // ExportPointCloud
+
+// imports a DIMAP file and converts it to point-cloud
+bool SemiGlobalMatcher::ImportPointCloud(const String& fileName, const ImageArr& images, PointCloud& pointcloud)
+{
+	// load disparity-map
+	Disparity subpixelSteps;
+	cv::Size imageSize; Matrix3x3 H; Matrix4x4 Q;
+	DisparityMap disparityMap; AccumCostMap costMap;
+	if (!ImportDisparityDataRawFull(fileName, disparityMap, costMap, imageSize, H, Q, subpixelSteps))
+		return false;
+	// parse image index from the file name
+	const String name(Util::getFileName(fileName));
+	IIndex idxImage(NO_ID), idxImagePair(NO_ID);
+	if (sscanf(name, "%u_%u", &idxImage, &idxImagePair) != 2 || idxImage == NO_ID || idxImagePair == NO_ID)
+		return false;
+	const Image& imageData = images[idxImage];
+	ASSERT(imageData.image.size() == imageSize);
+	// import the array of 3D points
+	for (int r=0; r<disparityMap.rows; ++r) {
+		for (int c=0; c<disparityMap.cols; ++c) {
+			const Disparity& disparity = disparityMap(r,c);
+			if (disparity == NO_DISP)
+				continue;
+			Point2f u;
+			Depth depth(Image::Disparity2Depth(Q, ImageRef(c+halfWindowSizeX,r+halfWindowSizeY), (float)disparity/subpixelSteps, u));
+			if (depth <= 0)
+				continue;
+			if (!imageData.image.isInsideWithBorder<float,1>(u))
+				continue;
+			pointcloud.points.emplace_back(Cast<PointCloud::Point::Type>(imageData.camera.TransformPointI2W(Point3(u,depth))));
+			pointcloud.colors.emplace_back(imageData.image.empty() ? Pixel8U::WHITE : imageData.image.sample(u));
+		}
+	}
+	return true;
+} // ImportPointCloud
+/*----------------------------------------------------------------*/
+
+
+bool MVS::STEREO::ExportCamerasEngin(const Scene& scene, const String& fileName)
+{
+	ASSERT(!scene.IsEmpty());
+
+	File f(fileName, File::WRITE, File::CREATE | File::TRUNCATE);
+	if (!f.isOpen())
+		return false;
+
+	// write header
+	f.print("n_cameras %u\n", scene.images.size());
+	f.print("n_points %u\n", 0);
+
+	// write cameras
+	for (const Image& image: scene.images) {
+		if (!image.IsValid())
+			continue;
+		const Point3 t(image.camera.GetT());
+		f.print("%u %u %u %s "
+			"%g %g %g %g "
+			"%g %g %g %g %g %g %g %g %g "
+			"%g %g %g "
+			"%g %g "
+			"%u",
+			image.ID, image.width, image.height, image.name.c_str(),
+			image.camera.K(0,0), image.camera.K(1,1), image.camera.K(0,2), image.camera.K(1,2),
+			image.camera.R(0,0), image.camera.R(0,1), image.camera.R(0,2),
+			image.camera.R(1,0), image.camera.R(1,1), image.camera.R(1,2),
+			image.camera.R(2,0), image.camera.R(2,1), image.camera.R(2,2),
+			t.x, t.y, t.z,
+			0, 0,
+			image.neighbors.size()
+		);
+		for (const auto& neighbor: image.neighbors)
+			f.print(" %u", neighbor.idx.ID);
+		f.print("\n");
+	}
+
+	return true;
+} // ExportCamerasEngin
+/*----------------------------------------------------------------*/
diff --git a/libs/MVS/SemiGlobalMatcher.h b/libs/MVS/SemiGlobalMatcher.h
new file mode 100644
index 000000000..740840db4
--- /dev/null
+++ b/libs/MVS/SemiGlobalMatcher.h
@@ -0,0 +1,215 @@
+/*
+* SemiGlobalMatcher.h
+*
+* Copyright (c) 2014-2015 SEACAVE
+*
+* Author(s):
+*
+*      cDc <cdc.seacave@gmail.com>
+*
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*
+*
+* Additional Terms:
+*
+*      You are required to preserve legal notices and author attributions in
+*      that material or in the Appropriate Legal Notices displayed by works
+*      containing it.
+*/
+
+#ifndef _MVS_SEMIGLOBALMATCHER_H_
+#define _MVS_SEMIGLOBALMATCHER_H_
+
+
+// I N C L U D E S /////////////////////////////////////////////////
+
+#include "DepthMap.h"
+
+
+// D E F I N E S ///////////////////////////////////////////////////
+
+// similarity method
+#define SGM_SIMILARITY_WZNCC 1
+#define SGM_SIMILARITY_CENSUS 2
+#define SGM_SIMILARITY SGM_SIMILARITY_WZNCC
+
+
+// S T R U C T S ///////////////////////////////////////////////////
+
+namespace MVS {
+
+class Scene;
+
+namespace STEREO {
+
+// An implementation of the popular Semi-Global Matching (SGM) algorithm.
+class MVS_API SemiGlobalMatcher
+{
+public:
+	typedef uint8_t  Mask;      // used to mark the valid region to be estimated
+	typedef int16_t  Disparity; // contains the allowable disparity range
+	typedef uint8_t  Cost;      // used to describe a single disparity cost
+	typedef uint16_t AccumCost; // used to accumulate CostType values
+	typedef uint64_t Index; // index pointing to pixel costs/accumulated-costs
+
+	enum : Mask      { INVALID = 0, VALID = DECLARE_NO_INDEX(Mask) }; // invalid/valid mask value
+	enum : Disparity { NO_DISP = DECLARE_NO_INDEX(Disparity) }; // invalid disparity value
+	enum : AccumCost { NO_ACCUMCOST = DECLARE_NO_INDEX(AccumCost) }; // invalid accumulated cost value
+	enum : Index     { NO_INDEX = DECLARE_NO_INDEX(Index) }; // invalid index value
+
+	struct Range {
+		Disparity minDisp;
+		Disparity maxDisp;
+		Disparity avgDisp() const { return (minDisp+maxDisp)>>1; }
+		Disparity numDisp() const { return maxDisp-minDisp; }
+		Disparity isValid() const { return minDisp<maxDisp; }
+	};
+	struct PixelData {
+		Index idx; // index to pixel costs/accumulated-costs data
+		Range range; // first and last disparity evaluated for this pixel
+	};
+
+	typedef TImage<Mask> MaskMap; // image of accumulated disparity cost
+	typedef TImage<Disparity> DisparityMap; // disparity image type
+	typedef TImage<AccumCost> AccumCostMap; // image of accumulated disparity cost
+	typedef CLISTDEF0IDX(PixelData,Index) PixelMap; // map of pixel data
+	typedef CLISTDEF0IDX(Cost,Index) CostsMap; // map of pre-computed disparity costs
+	typedef CLISTDEF0IDX(AccumCost,Index) AccumCostsMap; // map of accumulated disparity costs
+
+	enum : int { numDirs = 4 }; // 2 or 4 directions accumulated per pixel pass
+	#if SGM_SIMILARITY == SGM_SIMILARITY_CENSUS
+	enum : int { halfWindowSizeX = 3, halfWindowSizeY = 4 }; // patch kernel size
+	#else
+	enum : int { halfWindowSizeX = 3, halfWindowSizeY = 3 }; // patch kernel size
+	#endif
+	enum : int { windowSizeX = halfWindowSizeX*2+1, windowSizeY = halfWindowSizeY*2+1, numTexels = windowSizeX*windowSizeY }; // patch kernel info
+
+	#if SGM_SIMILARITY == SGM_SIMILARITY_CENSUS
+	typedef uint64_t Census; // used to store Census transform
+	typedef TImage<Census> CensusMap; // image of Census transforms
+	typedef Image8U ImageGray; // used to store intensities image
+	STATIC_ASSERT(sizeof(Census)*8 >= numTexels);
+	#else
+	typedef WeightedPatchFix<numTexels> WeightedPatch; // pre-computed patch weights
+	typedef Image32F ImageGray; // used to store normalized float intensities image
+	#endif
+
+
+	// Available sub-pixel options
+	enum SgmSubpixelMode {
+		SUBPIXEL_NA = 0, // no sub-pixel processing
+		SUBPIXEL_LINEAR,
+		SUBPIXEL_POLY4,
+		SUBPIXEL_PARABOLA,
+		SUBPIXEL_SINE,
+		SUBPIXEL_COSINE,
+		SUBPIXEL_LC_BLEND // probably the best option
+	};
+
+	struct ViewData {
+		Image8U3 imageColor; // color image
+		ImageGray imageGray; // intensity image
+		#if SGM_SIMILARITY == SGM_SIMILARITY_CENSUS
+		CensusMap imageCensus; // image of Census transforms (valid per scale)
+		#endif
+		ViewData GetImage(REAL scale) const {
+			if (ISEQUAL(scale, REAL(1))) {
+				#if SGM_SIMILARITY == SGM_SIMILARITY_CENSUS
+				CensusTransform(imageGray, const_cast<CensusMap&>(imageCensus));
+				#endif
+				return *this;
+			}
+			ASSERT(scale < 1);
+			ViewData scaledView;
+			cv::resize(imageColor, scaledView.imageColor, cv::Size(), scale, scale, cv::INTER_AREA);
+			cv::resize(imageGray, scaledView.imageGray, cv::Size(), scale, scale, cv::INTER_AREA);
+			#if SGM_SIMILARITY == SGM_SIMILARITY_CENSUS
+			CensusTransform(scaledView.imageGray, scaledView.imageCensus);
+			#endif
+			return scaledView;
+		}
+	};
+
+	typedef MVS_API Point2f DepthRange;
+	typedef MVS_API TImage<DepthRange> DepthRangeMap;
+
+public:
+	SemiGlobalMatcher(SgmSubpixelMode subpixelMode=SUBPIXEL_LC_BLEND, Disparity subpixelSteps=4, AccumCost P1=3, AccumCost P2=4, float P2alpha=14, float P2beta=38);
+	~SemiGlobalMatcher();
+
+	void Match(const Scene& scene, IIndex idxImage, IIndex numNeighbors, unsigned minResolution=320);
+	void Fuse(const Scene& scene, IIndex idxImage, IIndex numNeighbors, unsigned minViews, DepthMap& depthMap, ConfidenceMap& confMap);
+
+	static void CreateThreads(unsigned nMaxThreads=1);
+	static void DestroyThreads();
+	static void* ThreadWorker(void*);
+	static void WaitThreadWorkers(unsigned nJobs);
+
+	static bool ExportDisparityDataRaw(const String& fileName, const DisparityMap&, const AccumCostMap&, const cv::Size& imageSize, const Matrix3x3& H, const Matrix4x4& Q, Disparity subpixelSteps);
+	static bool ExportDisparityDataRawFull(const String& fileName, const DisparityMap&, const AccumCostMap&, const cv::Size& imageSize, const Matrix3x3& H, const Matrix4x4& Q, Disparity subpixelSteps);
+	static bool ImportDisparityDataRaw(const String& fileName, DisparityMap&, AccumCostMap&, cv::Size& imageSize, Matrix3x3& H, Matrix4x4& Q, Disparity& subpixelSteps);
+	static bool ImportDisparityDataRawFull(const String& fileName, DisparityMap&, AccumCostMap&, cv::Size& imageSize, Matrix3x3& H, Matrix4x4& Q, Disparity& subpixelSteps);
+	static Image8U3 DisparityMap2Image(const DisparityMap&, Disparity minDisparity=NO_DISP, Disparity maxDisparity=NO_DISP);
+	static bool ExportDisparityMap(const String& fileName, const DisparityMap&, Disparity minDisparity=NO_DISP, Disparity maxDisparity=NO_DISP);
+	static bool ExportPointCloud(const String& fileName, const Image&, const DisparityMap&, const Matrix4x4& Q, Disparity subpixelSteps);
+	static bool ImportPointCloud(const String& fileName, const ImageArr& images, PointCloud&);
+
+protected:
+	void Match(const ViewData& leftImage, const ViewData& rightImage, DisparityMap& disparityMap, AccumCostMap& costMap);
+	Index Disparity2RangeMap(const DisparityMap& disparityMap, const MaskMap& maskMap, Disparity minNumDisp=3, Disparity minNumDispInvalid=16);
+	#if SGM_SIMILARITY == SGM_SIMILARITY_CENSUS
+	static void CensusTransform(const Image8U& imageGray, CensusMap& imageCensus);
+	#endif
+	static void ConsistencyCrossCheck(DisparityMap& l2r, const DisparityMap& r2l, Disparity thCross=1);
+	static void FilterByCost(DisparityMap&, const AccumCostMap&, AccumCost th);
+	static void ExtractMask(const DisparityMap&, MaskMap&, int thValid=3);
+	static void FlipDirection(const DisparityMap& l2r, DisparityMap& r2l);
+	static void UpscaleMask(MaskMap& maskMap, const cv::Size& size2x);
+	void RefineDisparityMap(DisparityMap& disparityMap) const;
+	void DisplayState(const cv::Size& size) const;
+
+	static CLISTDEF0IDX(AccumCost,int) GenerateP2s(AccumCost P2, float P2alpha, float P2beta);
+	static void Depth2DisparityMap(const DepthMap&, const Matrix3x3& invH, const Matrix4x4& invQ, Disparity subpixelSteps, DisparityMap&);
+	static void Disparity2DepthMap(const DisparityMap&, const AccumCostMap&, const Matrix3x3& H, const Matrix4x4& Q, Disparity subpixelSteps, DepthMap&, ConfidenceMap&);
+	static bool ProjectDisparity2DepthMap(const DisparityMap&, const AccumCostMap&, const Matrix4x4& Q, Disparity subpixelSteps, DepthMap&, DepthRangeMap&, ConfidenceMap&);
+
+protected:
+	// parameters
+	SgmSubpixelMode subpixelMode;
+	Disparity subpixelSteps;
+	AccumCost P1;
+	CLISTDEF0IDX(AccumCost,int) P2s;
+
+	// main memory buffers that must be allocated
+	PixelMap imagePixels;
+	CostsMap imageCosts;
+	AccumCostsMap imageAccumCosts;
+	Disparity maxNumDisp; // maximum number of disparities per-pixel
+
+	// multi-threading
+	static EventThreadPool threads; // worker threads
+	static Semaphore sem; // signal job end
+};
+/*----------------------------------------------------------------*/
+
+
+MVS_API bool ExportCamerasEngin(const Scene&, const String& fileName);
+/*----------------------------------------------------------------*/
+
+} // namespace STEREO
+
+} // namespace MVS
+
+#endif // _MVS_SEMIGLOBALMATCHER_H_