aosp12/toolchain/benchmark/panorama.diff

diff --git a/benchmark/Android.mk b/benchmark/Android.mk
new file mode 100755
index 00000000..d48e0d8e
--- /dev/null
+++ b/benchmark/Android.mk
@@ -0,0 +1,53 @@
+local_target_dir := $(TARGET_OUT_DATA)/local/tmp
+
+LOCAL_PATH:= $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_C_INCLUDES := \
+    $(LOCAL_PATH)/feature_mos/src \
+    $(LOCAL_PATH)/feature_stab/src \
+    $(LOCAL_PATH)/feature_stab/db_vlvm
+
+LOCAL_SRC_FILES := benchmark.cpp \
+	feature_mos/src/mosaic/ImageUtils.cpp \
+    feature_mos/src/mosaic/Mosaic.cpp \
+    feature_mos/src/mosaic/AlignFeatures.cpp \
+    feature_mos/src/mosaic/Blend.cpp \
+    feature_mos/src/mosaic/Pyramid.cpp \
+    feature_mos/src/mosaic/trsMatrix.cpp \
+    feature_mos/src/mosaic/Delaunay.cpp \
+    feature_mos/src/mosaic_renderer/Renderer.cpp \
+    feature_mos/src/mosaic_renderer/WarpRenderer.cpp \
+    feature_mos/src/mosaic_renderer/SurfaceTextureRenderer.cpp \
+    feature_mos/src/mosaic_renderer/YVURenderer.cpp \
+    feature_mos/src/mosaic_renderer/FrameBuffer.cpp \
+    feature_stab/db_vlvm/db_rob_image_homography.cpp \
+    feature_stab/db_vlvm/db_feature_detection.cpp \
+    feature_stab/db_vlvm/db_image_homography.cpp \
+    feature_stab/db_vlvm/db_framestitching.cpp \
+    feature_stab/db_vlvm/db_feature_matching.cpp \
+    feature_stab/db_vlvm/db_utilities.cpp \
+    feature_stab/db_vlvm/db_utilities_camera.cpp \
+    feature_stab/db_vlvm/db_utilities_indexing.cpp \
+    feature_stab/db_vlvm/db_utilities_linalg.cpp \
+    feature_stab/db_vlvm/db_utilities_poly.cpp \
+    feature_stab/src/dbreg/dbstabsmooth.cpp \
+    feature_stab/src/dbreg/dbreg.cpp \
+    feature_stab/src/dbreg/vp_motionmodel.c
+
+LOCAL_CFLAGS := -O3 -DNDEBUG -Wno-unused-parameter -Wno-maybe-uninitialized
+LOCAL_CFLAGS += $(CFLAGS_FOR_BENCH_SUITE)
+LOCAL_LDFLAGS := $(LDFLAGS_FOR_BENCH_SUITE)
+LOCAL_CPPFLAGS := -std=c++98
+LOCAL_MODULE_TAGS := tests
+LOCAL_MODULE := panorama_bench
+LOCAL_MODULE_STEM_32 := panorama_bench
+LOCAL_MODULE_STEM_64 := panorama_bench64
+LOCAL_MULTILIB := both
+LOCAL_MODULE_PATH := $(local_target_dir)
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+LOCAL_FORCE_STATIC_EXECUTABLE := true
+LOCAL_STATIC_LIBRARIES := libc libm
+
+include $(BUILD_EXECUTABLE)
diff --git a/benchmark/NOTICE b/benchmark/NOTICE
new file mode 100644
index 00000000..7317ae2a
--- /dev/null
+++ b/benchmark/NOTICE
@@ -0,0 +1,190 @@
+
+   Copyright (c) 2005-2011, The Android Open Source Project
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp
new file mode 100755
index 00000000..2a6440f4
--- /dev/null
+++ b/benchmark/benchmark.cpp
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "mosaic/Mosaic.h"
+#include "mosaic/ImageUtils.h"
+
+#define MAX_FRAMES 200
+#define KERNEL_ITERATIONS 10
+
+const int blendingType = Blend::BLEND_TYPE_HORZ;
+const int stripType = Blend::STRIP_TYPE_WIDE;
+
+ImageType yvuFrames[MAX_FRAMES];
+
+int loadImages(const char* basename, int &width, int &height)
+{
+    char filename[512];
+    struct stat filestat;
+    int i;
+
+    for (i = 0; i < MAX_FRAMES; i++) {
+        sprintf(filename, "%s_%03d.ppm", basename, i + 1);
+        if (stat(filename, &filestat) != 0) break;
+        ImageType rgbFrame = ImageUtils::readBinaryPPM(filename, width, height);
+        yvuFrames[i] = ImageUtils::allocateImage(width, height,
+                                ImageUtils::IMAGE_TYPE_NUM_CHANNELS);
+        ImageUtils::rgb2yvu(yvuFrames[i], rgbFrame, width, height);
+        ImageUtils::freeImage(rgbFrame);
+    }
+    return i;
+}
+
+int main(int argc, char **argv)
+{
+    struct timespec t1, t2, t3;
+
+    int width, height;
+    float totalElapsedTime = 0;
+
+    const char *basename;
+    const char *filename;
+
+    if (argc != 3) {
+        printf("Usage: %s input_dir output_filename\n", argv[0]);
+        return 0;
+    } else {
+        basename = argv[1];
+        filename = argv[2];
+    }
+
+    // Load the images outside the computational kernel
+    int totalFrames = loadImages(basename, width, height);
+
+    if (totalFrames == 0) {
+        printf("Image files not found. Make sure %s exists.\n",
+               basename);
+        return 1;
+    }
+
+    printf("%d frames loaded\n", totalFrames);
+
+
+    // Interesting stuff is here
+    for (int iteration = 0; iteration < KERNEL_ITERATIONS; iteration++)  {
+        Mosaic mosaic;
+
+        mosaic.initialize(blendingType, stripType, width, height, -1, false, 0);
+
+        clock_gettime(CLOCK_MONOTONIC, &t1);
+        for (int i = 0; i < totalFrames; i++) {
+            mosaic.addFrame(yvuFrames[i]);
+        }
+        clock_gettime(CLOCK_MONOTONIC, &t2);
+
+        float progress = 0.0;
+        bool cancelComputation = false;
+
+        mosaic.createMosaic(progress, cancelComputation);
+
+        int mosaicWidth, mosaicHeight;
+        ImageType resultYVU = mosaic.getMosaic(mosaicWidth, mosaicHeight);
+
+        ImageType imageRGB = ImageUtils::allocateImage(
+            mosaicWidth, mosaicHeight, ImageUtils::IMAGE_TYPE_NUM_CHANNELS);
+
+        clock_gettime(CLOCK_MONOTONIC, &t3);
+
+        float elapsedTime =
+            (t3.tv_sec - t1.tv_sec) + (t3.tv_nsec - t1.tv_nsec)/1e9;
+        float addImageTime =
+            (t2.tv_sec - t1.tv_sec) + (t2.tv_nsec - t1.tv_nsec)/1e9;
+        float stitchImageTime =
+            (t3.tv_sec - t2.tv_sec) + (t3.tv_nsec - t2.tv_nsec)/1e9;
+
+        totalElapsedTime += elapsedTime;
+
+        printf("Iteration %d: %dx%d moasic created: "
+               "%.2f seconds (%.2f + %.2f)\n",
+               iteration, mosaicWidth, mosaicHeight,
+               elapsedTime, addImageTime, stitchImageTime);
+
+        // Write the output only once for correctness check
+        if (iteration == 0) {
+            ImageUtils::yvu2rgb(imageRGB, resultYVU, mosaicWidth,
+                                mosaicHeight);
+            ImageUtils::writeBinaryPPM(imageRGB, filename, mosaicWidth,
+                                       mosaicHeight);
+        }
+    }
+    printf("Total elapsed time: %.2f seconds\n", totalElapsedTime);
+
+    return 0;
+}
diff --git a/benchmark/feature_mos/doc/Readme.txt b/benchmark/feature_mos/doc/Readme.txt
new file mode 100644
index 00000000..83ce165e
--- /dev/null
+++ b/benchmark/feature_mos/doc/Readme.txt
@@ -0,0 +1,3 @@
+To generate the html docs, execute
+doxygen feature_mos_API_doxyfile
+
diff --git a/benchmark/feature_mos/doc/feature_mos_API_doxyfile b/benchmark/feature_mos/doc/feature_mos_API_doxyfile
new file mode 100755
index 00000000..dca8c8c0
--- /dev/null
+++ b/benchmark/feature_mos/doc/feature_mos_API_doxyfile
@@ -0,0 +1,1557 @@
+# Doxyfile 1.6.1
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+#       TAG = value [value, ...]
+# For lists items can also be appended using:
+#       TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# http://www.gnu.org/software/libiconv for the list of possible encodings.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
+# by quotes) that should identify the project.
+
+PROJECT_NAME           =
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER         =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = .
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of
+# source files, where putting all generated files in the same directory would
+# otherwise cause performance problems for the file system.
+
+CREATE_SUBDIRS         = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
+# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
+# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
+# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
+# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak,
+# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
+
+OUTPUT_LANGUAGE        = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is
+# used as the annotated text. Otherwise, the brief description is used as-is.
+# If left blank, the following values are used ("$name" is automatically
+# replaced with the name of the entity): "The $name class" "The $name widget"
+# "The $name file" "is" "provides" "specifies" "contains"
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF       = "The $name class" \
+                         "The $name widget" \
+                         "The $name file" \
+                         is \
+                         provides \
+                         specifies \
+                         contains \
+                         represents \
+                         a \
+                         an \
+                         the
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES        = YES
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip.
+
+STRIP_FROM_PATH        = /Users/dimitri/doxygen/mail/1.5.7/doxywizard/
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH    =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful is your file systems
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like regular Qt-style comments
+# (thus requiring an explicit @brief command for a brief description.)
+
+JAVADOC_AUTOBRIEF      = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
+# interpret the first line (until the first dot) of a Qt-style
+# comment as the brief description. If set to NO, the comments
+# will behave just like regular Qt-style comments (thus requiring
+# an explicit \brief command for a brief description.)
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
+# a new page for each member. If set to NO, the documentation of a member will
+# be part of the file/class/namespace that contains it.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE               = 8
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
+# sources only. Doxygen will then generate output that is more tailored for
+# Java. For instance, namespaces will be presented as packages, qualified
+# scopes will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources only. Doxygen will then generate output that is more tailored for
+# Fortran.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for
+# VHDL.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it parses.
+# With this tag you can assign which parser to use for a given extension.
+# Doxygen has a built-in mapping, but you can override or extend it using this tag.
+# The format is ext=language, where ext is a file extension, and language is one of
+# the parsers supported by doxygen: IDL, Java, Javascript, C#, C, C++, D, PHP,
+# Objective-C, Python, Fortran, VHDL, C, C++. For instance to make doxygen treat
+# .inc files as Fortran files (default is PHP), and .f files as C (default is Fortran),
+# use: inc=Fortran f=C. Note that for custom extensions you also need to set
+# FILE_PATTERNS otherwise the files are not read by doxygen.
+
+EXTENSION_MAPPING      =
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should
+# set this tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
+# func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
+# Doxygen will parse them like normal C++ but will assume all classes use public
+# instead of private inheritance when no explicit protection keyword is present.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate getter
+# and setter methods for a property. Setting this option to YES (the default)
+# will make doxygen to replace the get and set methods by a property in the
+# documentation. This will only work if the methods are indeed getting or
+# setting a simple type. If this is not the case, or you want to show the
+# methods anyway, you should set this option to NO.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING            = YES
+
+# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
+# is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically
+# be useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
+# determine which symbols to keep in memory and which to flush to disk.
+# When the cache is full, less often used symbols will be written to disk.
+# For small to medium size projects (<1000 input files) the default value is
+# probably good enough. For larger projects a too small cache size can cause
+# doxygen to be busy swapping symbols to and from disk most of the time
+# causing a significant performance penality.
+# If the system has enough physical memory increasing the cache will improve the
+# performance by keeping more symbols in memory. Note that the value works on
+# a logarithmic scale so increasing the size by one will rougly double the
+# memory usage. The cache size is given by this formula:
+# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
+# corresponding to a cache size of 2^16 = 65536 symbols
+
+SYMBOL_CACHE_SIZE      = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL            = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE        = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC         = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base
+# name of the file that contains the anonymous namespace. By default
+# anonymous namespace are hidden.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES       = NO
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
+# will sort the (brief and detailed) documentation of class members so that
+# constructors and destructors are listed first. If set to NO (the default)
+# the constructors will appear in the respective orders defined by
+# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
+# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
+# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
+# hierarchy of group names into alphabetical order. If set to NO (the default)
+# the group names will appear in their defined order.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or define consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and defines in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES        = YES
+
+# If the sources in your project are distributed over multiple directories
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
+# in the documentation. The default is NO.
+
+SHOW_DIRECTORIES       = NO
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
+# This will remove the Files entry from the Quick Index and from the
+# Folder Tree View (if specified). The default is YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
+# Namespaces page.  This will remove the Namespaces entry from the Quick Index
+# and from the Folder Tree View (if specified). The default is YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command <command> <input-file>, where <command> is the value of
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
+# provided by doxygen. Whatever the program writes to standard output
+# is used as the file version. See the manual for examples.
+
+FILE_VERSION_FILTER    =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by
+# doxygen. The layout file controls the global structure of the generated output files
+# in an output format independent way. The create the layout file that represents
+# doxygen's defaults, run doxygen with the -l option. You can optionally specify a
+# file name after the option, if omitted DoxygenLayout.xml will be used as the name
+# of the layout file.
+
+LAYOUT_FILE            =
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS               = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR      = YES
+
+# This WARN_NO_PARAMDOC option can be abled to get warnings for
+# functions that are documented, but have no documentation for their parameters
+# or return value. If set to NO (the default) doxygen will only warn about
+# wrong or incomplete parameter documentation, but not about the absence of
+# documentation.
+
+WARN_NO_PARAMDOC       = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text. Optionally the format may contain
+# $version, which will be replaced by the version of the file (if it could
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE           =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  = ../src/mosaic/Mosaic.h
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
+# also the default input encoding. Doxygen uses libiconv (or the iconv built
+# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
+# the list of possible encodings.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          = *.c \
+                         *.cc \
+                         *.cxx \
+                         *.cpp \
+                         *.c++ \
+                         *.d \
+                         *.java \
+                         *.ii \
+                         *.ixx \
+                         *.ipp \
+                         *.i++ \
+                         *.inl \
+                         *.h \
+                         *.hh \
+                         *.hxx \
+                         *.hpp \
+                         *.h++ \
+                         *.idl \
+                         *.odl \
+                         *.cs \
+                         *.php \
+                         *.php3 \
+                         *.inc \
+                         *.m \
+                         *.mm \
+                         *.dox \
+                         *.py \
+                         *.f90 \
+                         *.f \
+                         *.vhd \
+                         *.vhdl
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE              = NO
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE                =
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
+# directories that are symbolic links (a Unix filesystem feature) are excluded
+# from the input.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS       = *
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output.  If FILTER_PATTERNS is specified, this tag will be
+# ignored.
+
+INPUT_FILTER           =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis.  Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match.  The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
+# is applied to all files.
+
+FILTER_PATTERNS        =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES    = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER         = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION    = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
+# link to the source code.  Otherwise they will link to the documentation.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code
+# will point to the HTML generated by the htags(1) tool instead of doxygen
+# built-in source browser. The htags tool is part of GNU's global source
+# tagging system (see http://www.gnu.org/software/global/global.html). You
+# will need version 4.8.6 or higher.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS       = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX     = NO
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX          =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header.
+
+HTML_HEADER            =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER            =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet. Note that doxygen will try to copy
+# the style sheet file to the HTML output directory, so don't put your own
+# stylesheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET        =
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS     = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded. For this to work a browser that supports
+# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
+# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files
+# will be generated that can be used as input for Apple's Xcode 3
+# integrated development environment, introduced with OSX 10.5 (Leopard).
+# To create a documentation set, doxygen will generate a Makefile in the
+# HTML output directory. Running make will produce the docset in that
+# directory and running "make install" will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
+# it at startup.
+# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information.
+
+GENERATE_DOCSET        = NO
+
+# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
+# feed. A documentation feed provides an umbrella under which multiple
+# documentation sets from a single provider (such as a company or product suite)
+# can be grouped.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
+# should uniquely identify the documentation set bundle. This should be a
+# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
+# will append .docset to the name.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP      = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE               =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION           =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI           = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
+# is used to encode HtmlHelp index (hhk), content (hhc) and project file
+# content.
+
+CHM_INDEX_ENCODING     =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER
+# are set, an additional index file will be generated that can be used as input for
+# Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated
+# HTML documentation.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
+# be used to specify the file name of the resulting .qch file.
+# The path specified is relative to the HTML output folder.
+
+QCH_FILE               =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#namespace
+
+QHP_NAMESPACE          =
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#virtual-folders
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add.
+# For more information please see
+# http://doc.trolltech.com/qthelpproject.html#custom-filters
+
+QHP_CUST_FILTER_NAME   =
+
+# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see
+# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">Qt Help Project / Custom Filters</a>.
+
+QHP_CUST_FILTER_ATTRS  =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's
+# filter section matches.
+# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">Qt Help Project / Filter Attributes</a>.
+
+QHP_SECT_FILTER_ATTRS  =
+
+# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
+# be used to specify the location of Qt's qhelpgenerator.
+# If non-empty doxygen will try to run qhelpgenerator on the generated
+# .qhp file.
+
+QHG_LOCATION           =
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
+# top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it.
+
+DISABLE_INDEX          = NO
+
+# This tag can be used to set the number of enum values (range [1..20])
+# that doxygen will group on one line in the generated HTML documentation.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information.
+# If the tag value is set to YES, a side panel will be generated
+# containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
+# Windows users are probably better off using the HTML help feature.
+
+GENERATE_TREEVIEW      = NO
+
+# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
+# and Class Hierarchy pages using a tree view instead of an ordered list.
+
+USE_INLINE_TREES       = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH         = 250
+
+# Use this tag to change the font size of Latex formulas included
+# as images in the HTML documentation. The default is 10. Note that
+# when you change the font size after a successful doxygen run you need
+# to manually remove any form_*.png images from the HTML output directory
+# to force them to be regenerated.
+
+FORMULA_FONTSIZE       = 10
+
+# When the SEARCHENGINE tag is enable doxygen will generate a search box
+# for the HTML output. The underlying search engine uses javascript
+# and DHTML and should work on any modern browser. Note that when using
+# HTML help (GENERATE_HTMLHELP) or Qt help (GENERATE_QHP)
+# there is already a search function so this one should typically
+# be disabled.
+
+SEARCHENGINE           = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX         = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, a4wide, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE             = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES         =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER           =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS         = YES
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE        = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES     = NO
+
+# If LATEX_SOURCE_CODE is set to YES then doxygen will include
+# source code with syntax highlighting in the LaTeX output.
+# Note that which sources are shown also depends on other settings
+# such as SOURCE_BROWSER.
+
+LATEX_SOURCE_CODE      = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE    =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE    =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION          = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT             = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA             =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD                =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING     = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader.  This is useful
+# if you want to understand what is going on.  On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION        = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
+EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS  =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all function-like macros that are alone
+# on a line, have an all uppercase name, and do not end with a semicolon. Such
+# function macros are typically used for boiler-plate code, and will confuse
+# the parser if not removed.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles.
+# Optionally an initial location of the external documentation
+# can be added for each tagfile. The format of a tag file without
+# this location is as follows:
+#   TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+#   TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths or
+# URLs. If a location is present for each tag, the installdox tool
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen
+# is run, you must also specify the path to the tagfile here.
+
+TAGFILES               =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE       =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS        = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH              = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
+# or super classes. Setting the tag to NO turns the diagrams off. Note that
+# this option is superseded by the HAVE_DOT option below. This is only a
+# fallback. It is recommended to install and use dot, since it yields more
+# powerful graphs.
+
+CLASS_DIAGRAMS         = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see
+# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH            =
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT               = NO
+
+# By default doxygen will write a font called FreeSans.ttf to the output
+# directory and reference it in all dot files that doxygen generates. This
+# font does not include all possible unicode characters however, so when you need
+# these (or just want a differently looking font) you can specify the font name
+# using DOT_FONTNAME. You need need to make sure dot is able to find the font,
+# which can be done by putting it in a standard location or by setting the
+# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory
+# containing the font.
+
+DOT_FONTNAME           = FreeSans
+
+# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
+# The default size is 10pt.
+
+DOT_FONTSIZE           = 10
+
+# By default doxygen will tell dot to use the output directory to look for the
+# FreeSans.ttf font (which doxygen will put there itself). If you specify a
+# different font using DOT_FONTNAME you can set the path where dot
+# can find it using this tag.
+
+DOT_FONTPATH           =
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# the CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for groups, showing the direct groups dependencies
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK               = NO
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS     = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH          = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH and HAVE_DOT options are set to YES then
+# doxygen will generate a call dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable call graphs
+# for selected functions only using the \callgraph command.
+
+CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
+# doxygen will generate a caller dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable caller
+# graphs for selected functions only using the \callergraph command.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
+# then doxygen will show the dependencies a directory has on other directories
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are png, jpg, or gif
+# If left blank png will be used.
+
+DOT_IMAGE_FORMAT       = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
+DOT_PATH               =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
+# nodes that will be shown in the graph. If the number of nodes in a graph
+# becomes larger than this value, doxygen will truncate the graph, which is
+# visualized by representing a node as a red box. Note that doxygen if the
+# number of direct children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
+# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes
+# that lay further from the root node will be omitted. Note that setting this
+# option to 1 or 2 may greatly reduce the computation time needed for large
+# code bases. Also note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not
+# seem to support this out of the box. Warning: Depending on the platform used,
+# enabling this option may lead to badly anti-aliased labels on the edges of
+# a graph (i.e. they become hard to read).
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10)
+# support this, this feature is disabled by default.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP            = YES
diff --git a/benchmark/feature_mos/src/mosaic/AlignFeatures.cpp b/benchmark/feature_mos/src/mosaic/AlignFeatures.cpp
new file mode 100644
index 00000000..703a5ea5
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/AlignFeatures.cpp
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+///////////////////////////////////////////////////
+// AlignFeatures.cpp
+// S.O. # :
+// Author(s): zkira, mbansal, bsouthall, narodits
+// $Id: AlignFeatures.cpp,v 1.20 2011/06/17 13:35:47 mbansal Exp $
+
+#include <stdio.h>
+#include <string.h>
+
+#include "trsMatrix.h"
+#include "MatrixUtils.h"
+#include "AlignFeatures.h"
+#include "Log.h"
+
+#define LOG_TAG "AlignFeatures"
+
+const double Align::DEFAULT_MAX_DISPARITY = 0.1;
+
+Align::Align()
+{
+  width = height = 0;
+  frame_number = 0;
+  num_frames_captured = 0;
+  reference_frame_index = 0;
+  db_Identity3x3(Hcurr);
+  db_Identity3x3(Hprev);
+}
+
+Align::~Align()
+{
+  // Free gray-scale image
+  if (imageGray != ImageUtils::IMAGE_TYPE_NOIMAGE)
+    ImageUtils::freeImage(imageGray);
+}
+
+char* Align::getRegProfileString()
+{
+  return reg.profile_string;
+}
+
+int Align::initialize(int width, int height, bool _quarter_res, float _thresh_still)
+{
+  int    nr_corners = DEFAULT_NR_CORNERS;
+  double max_disparity = DEFAULT_MAX_DISPARITY;
+  int    motion_model_type = DEFAULT_MOTION_MODEL;
+  int nrsamples = DB_DEFAULT_NR_SAMPLES;
+  double scale = DB_POINT_STANDARDDEV;
+  int chunk_size = DB_DEFAULT_CHUNK_SIZE;
+  int nrhorz = width/48;  // Empirically determined number of horizontal
+  int nrvert = height/60; // and vertical buckets for harris corner detection.
+  bool linear_polish = false;
+  unsigned int reference_update_period = DEFAULT_REFERENCE_UPDATE_PERIOD;
+
+  const bool DEFAULT_USE_SMALLER_MATCHING_WINDOW = false;
+  bool   use_smaller_matching_window = DEFAULT_USE_SMALLER_MATCHING_WINDOW;
+
+  quarter_res = _quarter_res;
+  thresh_still = _thresh_still;
+
+  frame_number = 0;
+  num_frames_captured = 0;
+  reference_frame_index = 0;
+  db_Identity3x3(Hcurr);
+  db_Identity3x3(Hprev);
+
+  if (!reg.Initialized())
+  {
+    reg.Init(width, height, motion_model_type, 20, linear_polish, quarter_res,
+            scale, reference_update_period, false, 0, nrsamples, chunk_size,
+            nr_corners, max_disparity, use_smaller_matching_window,
+            nrhorz, nrvert);
+  }
+  this->width = width;
+  this->height = height;
+
+  imageGray = ImageUtils::allocateImage(width, height, 1);
+
+  if (reg.Initialized())
+    return ALIGN_RET_OK;
+  else
+    return ALIGN_RET_ERROR;
+}
+
+int Align::addFrameRGB(ImageType imageRGB)
+{
+  ImageUtils::rgb2gray(imageGray, imageRGB, width, height);
+  return addFrame(imageGray);
+}
+
+int Align::addFrame(ImageType imageGray_)
+{
+  int ret_code = ALIGN_RET_OK;
+
+ // Obtain a vector of pointers to rows in image and pass in to dbreg
+  ImageType *m_rows = ImageUtils::imageTypeToRowPointers(imageGray_, width, height);
+
+  if (frame_number == 0)
+  {
+      reg.AddFrame(m_rows, Hcurr, true);    // Force this to be a reference frame
+      int num_corner_ref = reg.GetNrRefCorners();
+
+      if (num_corner_ref < MIN_NR_REF_CORNERS)
+      {
+          return ALIGN_RET_LOW_TEXTURE;
+      }
+  }
+  else
+  {
+      reg.AddFrame(m_rows, Hcurr, false);
+  }
+
+  // Average translation per frame =
+  //    [Translation from Frame0 to Frame(n-1)] / [(n-1)]
+  average_tx_per_frame = (num_frames_captured < 2) ? 0.0 :
+        Hprev[2] / (num_frames_captured - 1);
+
+  // Increment the captured frame counter if we already have a reference frame
+  num_frames_captured++;
+
+  if (frame_number != 0)
+  {
+    int num_inliers = reg.GetNrInliers();
+
+    if(num_inliers < MIN_NR_INLIERS)
+    {
+        ret_code = ALIGN_RET_FEW_INLIERS;
+
+        Hcurr[0] = 1.0;
+        Hcurr[1] = 0.0;
+        // Set this as the average per frame translation taking into acccount
+        // the separation of the current frame from the reference frame...
+        Hcurr[2] = -average_tx_per_frame *
+                (num_frames_captured - reference_frame_index);
+        Hcurr[3] = 0.0;
+        Hcurr[4] = 1.0;
+        Hcurr[5] = 0.0;
+        Hcurr[6] = 0.0;
+        Hcurr[7] = 0.0;
+        Hcurr[8] = 1.0;
+    }
+
+    if(fabs(Hcurr[2])<thresh_still && fabs(Hcurr[5])<thresh_still)  // Still camera
+    {
+        return ALIGN_RET_ERROR;
+    }
+
+    // compute the homography:
+    double Hinv33[3][3];
+    double Hprev33[3][3];
+    double Hcurr33[3][3];
+
+    // Invert and multiple with previous transformation
+    Matrix33::convert9to33(Hcurr33, Hcurr);
+    Matrix33::convert9to33(Hprev33, Hprev);
+    normProjMat33d(Hcurr33);
+
+    inv33d(Hcurr33, Hinv33);
+
+    mult33d(Hcurr33, Hprev33, Hinv33);
+    normProjMat33d(Hcurr33);
+    Matrix9::convert33to9(Hprev, Hcurr33);
+    // Since we have already factored the current transformation
+    // into Hprev, we can reset the Hcurr to identity
+    db_Identity3x3(Hcurr);
+
+    // Update the reference frame to be the current frame
+    reg.UpdateReference(m_rows,quarter_res,false);
+
+    // Update the reference frame index
+    reference_frame_index = num_frames_captured;
+  }
+
+  frame_number++;
+
+  return ret_code;
+}
+
+// Get current transformation
+int Align::getLastTRS(double trs[3][3])
+{
+  if (frame_number < 1)
+  {
+    trs[0][0] = 1.0;
+    trs[0][1] = 0.0;
+    trs[0][2] = 0.0;
+    trs[1][0] = 0.0;
+    trs[1][1] = 1.0;
+    trs[1][2] = 0.0;
+    trs[2][0] = 0.0;
+    trs[2][1] = 0.0;
+    trs[2][2] = 1.0;
+    return ALIGN_RET_ERROR;
+  }
+
+  // Note that the logic here handles the case, where a frame is not used for
+  // mosaicing but is captured and used in the preview-rendering.
+  // For these frames, we don't set Hcurr to identity in AddFrame() and the
+  // logic here appends their transformation to Hprev to render them with the
+  // correct transformation. For the frames we do use for mosaicing, we already
+  // append their Hcurr to Hprev in AddFrame() and then set Hcurr to identity.
+
+  double Hinv33[3][3];
+  double Hprev33[3][3];
+  double Hcurr33[3][3];
+
+  Matrix33::convert9to33(Hcurr33, Hcurr);
+  normProjMat33d(Hcurr33);
+  inv33d(Hcurr33, Hinv33);
+
+  Matrix33::convert9to33(Hprev33, Hprev);
+
+  mult33d(trs, Hprev33, Hinv33);
+  normProjMat33d(trs);
+
+  return ALIGN_RET_OK;
+}
+
diff --git a/benchmark/feature_mos/src/mosaic/AlignFeatures.h b/benchmark/feature_mos/src/mosaic/AlignFeatures.h
new file mode 100644
index 00000000..9999f575
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/AlignFeatures.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+///////////////////////////////////////////////////
+// Align.h
+// S.O. # :
+// Author(s): zkira
+// $Id: AlignFeatures.h,v 1.13 2011/06/17 13:35:47 mbansal Exp $
+
+#ifndef ALIGN_H
+#define ALIGN_H
+
+#include "dbreg/dbreg.h"
+#include <db_utilities_camera.h>
+
+#include "ImageUtils.h"
+#include "MatrixUtils.h"
+
+class Align {
+
+public:
+  // Types of alignment possible
+  static const int ALIGN_TYPE_PAN    = 1;
+
+  // Return codes
+  static const int ALIGN_RET_LOW_TEXTURE  = -2;
+  static const int ALIGN_RET_ERROR        = -1;
+  static const int ALIGN_RET_OK           = 0;
+  static const int ALIGN_RET_FEW_INLIERS  = 1;
+
+  ///// Settings for feature-based alignment
+  // Number of features to use from corner detection
+  static const int DEFAULT_NR_CORNERS=750;
+  static const double DEFAULT_MAX_DISPARITY;//0.4;
+  // Type of homography to model
+  static const int DEFAULT_MOTION_MODEL=DB_HOMOGRAPHY_TYPE_R_T;
+// static const int DEFAULT_MOTION_MODEL=DB_HOMOGRAPHY_TYPE_PROJECTIVE;
+//  static const int DEFAULT_MOTION_MODEL=DB_HOMOGRAPHY_TYPE_AFFINE;
+  static const unsigned int DEFAULT_REFERENCE_UPDATE_PERIOD=1500; //  Manual reference frame update so set this to a large number
+
+  static const int MIN_NR_REF_CORNERS = 25;
+  static const int MIN_NR_INLIERS = 10;
+
+  Align();
+  ~Align();
+
+  // Initialization of structures, etc.
+  int initialize(int width, int height, bool quarter_res, float thresh_still);
+
+  // Add a frame.  Note: The alignment computation is performed
+  // in this function
+  int addFrameRGB(ImageType image);
+  int addFrame(ImageType image);
+
+  // Obtain the TRS matrix from the last two frames
+  int getLastTRS(double trs[3][3]);
+  char* getRegProfileString();
+
+protected:
+
+  db_FrameToReferenceRegistration reg;
+
+  int frame_number;
+
+  double Hcurr[9];   // Homography from the alignment reference to the frame-t
+  double Hprev[9];   // Homography from frame-0 to the frame-(t-1)
+
+  int reference_frame_index; // Index of the reference frame from all captured frames
+  int num_frames_captured; // Total number of frames captured (different from frame_number)
+  double average_tx_per_frame; // Average pixel translation per captured frame
+
+  int width,height;
+
+  bool quarter_res;     // Whether to process at quarter resolution
+  float thresh_still;   // Translation threshold in pixels to detect still camera
+  ImageType imageGray;
+};
+
+
+#endif
diff --git a/benchmark/feature_mos/src/mosaic/Blend.cpp b/benchmark/feature_mos/src/mosaic/Blend.cpp
new file mode 100644
index 00000000..625196c6
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/Blend.cpp
@@ -0,0 +1,1385 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+///////////////////////////////////////////////////
+// Blend.cpp
+// $Id: Blend.cpp,v 1.22 2011/06/24 04:22:14 mbansal Exp $
+
+#include <string.h>
+
+#include "Interp.h"
+#include "Blend.h"
+
+#include "Geometry.h"
+#include "trsMatrix.h"
+
+const float Blend::LIMIT_SIZE_MULTIPLIER = 50.f * 2.0f;
+const float Blend::LIMIT_HEIGHT_MULTIPLIER = 2.5f;
+
+Blend::Blend()
+{
+  m_wb.blendingType = BLEND_TYPE_NONE;
+}
+
+Blend::~Blend()
+{
+    if (m_pFrameVPyr) free(m_pFrameVPyr);
+    if (m_pFrameUPyr) free(m_pFrameUPyr);
+    if (m_pFrameYPyr) free(m_pFrameYPyr);
+}
+
+int Blend::initialize(int blendingType, int stripType, int frame_width, int frame_height)
+{
+    this->width = frame_width;
+    this->height = frame_height;
+    this->m_wb.blendingType = blendingType;
+    this->m_wb.stripType = stripType;
+
+    m_wb.blendRange = m_wb.blendRangeUV = BLEND_RANGE_DEFAULT;
+    m_wb.nlevs = m_wb.blendRange;
+    m_wb.nlevsC = m_wb.blendRangeUV;
+
+    if (m_wb.nlevs <= 0) m_wb.nlevs = 1; // Need levels for YUV processing
+    if (m_wb.nlevsC > m_wb.nlevs) m_wb.nlevsC = m_wb.nlevs;
+
+    m_wb.roundoffOverlap = 1.5;
+
+    m_pFrameYPyr = NULL;
+    m_pFrameUPyr = NULL;
+    m_pFrameVPyr = NULL;
+
+    m_pFrameYPyr = PyramidShort::allocatePyramidPacked(m_wb.nlevs, (unsigned short) width, (unsigned short) height, BORDER);
+    m_pFrameUPyr = PyramidShort::allocatePyramidPacked(m_wb.nlevsC, (unsigned short) (width), (unsigned short) (height), BORDER);
+    m_pFrameVPyr = PyramidShort::allocatePyramidPacked(m_wb.nlevsC, (unsigned short) (width), (unsigned short) (height), BORDER);
+
+    if (!m_pFrameYPyr || !m_pFrameUPyr || !m_pFrameVPyr)
+    {
+        return BLEND_RET_ERROR_MEMORY;
+    }
+
+    return BLEND_RET_OK;
+}
+
+inline double max(double a, double b) { return a > b ? a : b; }
+inline double min(double a, double b) { return a < b ? a : b; }
+
+void Blend::AlignToMiddleFrame(MosaicFrame **frames, int frames_size)
+{
+    // Unwarp this frame and Warp the others to match
+    MosaicFrame *mb = NULL;
+    MosaicFrame *ref = frames[int(frames_size/2)];    // Middle frame
+
+    double invtrs[3][3];
+    inv33d(ref->trs, invtrs);
+
+    for(int mfit = 0; mfit < frames_size; mfit++)
+    {
+        mb = frames[mfit];
+        double temp[3][3];
+        mult33d(temp, invtrs, mb->trs);
+        memcpy(mb->trs, temp, sizeof(temp));
+        normProjMat33d(mb->trs);
+    }
+}
+
+int Blend::runBlend(MosaicFrame **oframes, MosaicFrame **rframes,
+        int frames_size,
+        ImageType &imageMosaicYVU, int &mosaicWidth, int &mosaicHeight,
+        float &progress, bool &cancelComputation)
+{
+    int ret;
+    int numCenters;
+
+    MosaicFrame **frames;
+
+    // For THIN strip mode, accept all frames for blending
+    if (m_wb.stripType == STRIP_TYPE_THIN)
+    {
+        frames = oframes;
+    }
+    else // For WIDE strip mode, first select the relevant frames to blend.
+    {
+        SelectRelevantFrames(oframes, frames_size, rframes, frames_size);
+        frames = rframes;
+    }
+
+    ComputeBlendParameters(frames, frames_size, true);
+    numCenters = frames_size;
+
+    if (numCenters == 0)
+    {
+        return BLEND_RET_ERROR;
+    }
+
+    if (!(m_AllSites = m_Triangulator.allocMemory(numCenters)))
+    {
+        return BLEND_RET_ERROR_MEMORY;
+    }
+
+    // Bounding rectangle (real numbers) of the final mosaic computed by projecting
+    // each input frame into the mosaic coordinate system.
+    BlendRect global_rect;
+
+    global_rect.lft = global_rect.bot = 2e30; // min values
+    global_rect.rgt = global_rect.top = -2e30; // max values
+    MosaicFrame *mb = NULL;
+
+    double x0, y0, x1, y1, x2, y2, x3, y3;
+
+    // Corners of the left-most and right-most frames respectively in the
+    // mosaic coordinate system.
+    double xLeftCorners[2] = {2e30, 2e30};
+    double xRightCorners[2] = {-2e30, -2e30};
+
+    // Corners of the top-most and bottom-most frames respectively in the
+    // mosaic coordinate system.
+    double yTopCorners[2] = {2e30, 2e30};
+    double yBottomCorners[2] = {-2e30, -2e30};
+
+
+    // Determine the extents of the final mosaic
+    CSite *csite = m_AllSites ;
+    for(int mfit = 0; mfit < frames_size; mfit++)
+    {
+        mb = frames[mfit];
+
+        // Compute clipping for this frame's rect
+        FrameToMosaicRect(mb->width, mb->height, mb->trs, mb->brect);
+        // Clip global rect using this frame's rect
+        ClipRect(mb->brect, global_rect);
+
+        // Calculate the corner points
+        FrameToMosaic(mb->trs, 0.0,             0.0,            x0, y0);
+        FrameToMosaic(mb->trs, 0.0,             mb->height-1.0, x1, y1);
+        FrameToMosaic(mb->trs, mb->width-1.0,   mb->height-1.0, x2, y2);
+        FrameToMosaic(mb->trs, mb->width-1.0,   0.0,            x3, y3);
+
+        if(x0 < xLeftCorners[0] || x1 < xLeftCorners[1])    // If either of the left corners is lower
+        {
+            xLeftCorners[0] = x0;
+            xLeftCorners[1] = x1;
+        }
+
+        if(x3 > xRightCorners[0] || x2 > xRightCorners[1])    // If either of the right corners is higher
+        {
+            xRightCorners[0] = x3;
+            xRightCorners[1] = x2;
+        }
+
+        if(y0 < yTopCorners[0] || y3 < yTopCorners[1])    // If either of the top corners is lower
+        {
+            yTopCorners[0] = y0;
+            yTopCorners[1] = y3;
+        }
+
+        if(y1 > yBottomCorners[0] || y2 > yBottomCorners[1])    // If either of the bottom corners is higher
+        {
+            yBottomCorners[0] = y1;
+            yBottomCorners[1] = y2;
+        }
+
+
+        // Compute the centroid of the warped region
+        FindQuadCentroid(x0, y0, x1, y1, x2, y2, x3, y3, csite->getVCenter().x, csite->getVCenter().y);
+
+        csite->setMb(mb);
+        csite++;
+    }
+
+    // Get origin and sizes
+
+    // Bounding rectangle (int numbers) of the final mosaic computed by projecting
+    // each input frame into the mosaic coordinate system.
+    MosaicRect fullRect;
+
+    fullRect.left = (int) floor(global_rect.lft); // min-x
+    fullRect.top = (int) floor(global_rect.bot);  // min-y
+    fullRect.right = (int) ceil(global_rect.rgt); // max-x
+    fullRect.bottom = (int) ceil(global_rect.top);// max-y
+    Mwidth = (unsigned short) (fullRect.right - fullRect.left + 1);
+    Mheight = (unsigned short) (fullRect.bottom - fullRect.top + 1);
+
+    int xLeftMost, xRightMost;
+    int yTopMost, yBottomMost;
+
+    // Rounding up, so that we don't include the gray border.
+    xLeftMost = max(0, max(xLeftCorners[0], xLeftCorners[1]) - fullRect.left + 1);
+    xRightMost = min(Mwidth - 1, min(xRightCorners[0], xRightCorners[1]) - fullRect.left - 1);
+
+    yTopMost = max(0, max(yTopCorners[0], yTopCorners[1]) - fullRect.top + 1);
+    yBottomMost = min(Mheight - 1, min(yBottomCorners[0], yBottomCorners[1]) - fullRect.top - 1);
+
+    if (xRightMost <= xLeftMost || yBottomMost <= yTopMost)
+    {
+        return BLEND_RET_ERROR;
+    }
+
+    // Make sure image width is multiple of 4
+    Mwidth = (unsigned short) ((Mwidth + 3) & ~3);
+    Mheight = (unsigned short) ((Mheight + 3) & ~3);    // Round up.
+
+    ret = MosaicSizeCheck(LIMIT_SIZE_MULTIPLIER, LIMIT_HEIGHT_MULTIPLIER);
+    if (ret != BLEND_RET_OK)
+    {
+       return ret;
+    }
+
+    YUVinfo *imgMos = YUVinfo::allocateImage(Mwidth, Mheight);
+    if (imgMos == NULL)
+    {
+        return BLEND_RET_ERROR_MEMORY;
+    }
+
+    // Set the Y image to 255 so we can distinguish when frame idx are written to it
+    memset(imgMos->Y.ptr[0], 255, (imgMos->Y.width * imgMos->Y.height));
+    // Set the v and u images to black
+    memset(imgMos->V.ptr[0], 128, (imgMos->V.width * imgMos->V.height) << 1);
+
+    // Do the triangulation.  It returns a sorted list of edges
+    SEdgeVector *edge;
+    int n = m_Triangulator.triangulate(&edge, numCenters, width, height);
+    m_Triangulator.linkNeighbors(edge, n, numCenters);
+
+    // Bounding rectangle that determines the positioning of the rectangle that is
+    // cropped out of the computed mosaic to get rid of the gray borders.
+    MosaicRect cropping_rect;
+
+    if (m_wb.horizontal)
+    {
+        cropping_rect.left = xLeftMost;
+        cropping_rect.right = xRightMost;
+    }
+    else
+    {
+        cropping_rect.top = yTopMost;
+        cropping_rect.bottom = yBottomMost;
+    }
+
+    // Do merging and blending :
+    ret = DoMergeAndBlend(frames, numCenters, width, height, *imgMos, fullRect,
+            cropping_rect, progress, cancelComputation);
+
+    if (m_wb.blendingType == BLEND_TYPE_HORZ)
+        CropFinalMosaic(*imgMos, cropping_rect);
+
+
+    m_Triangulator.freeMemory();    // note: can be called even if delaunay_alloc() wasn't successful
+
+    imageMosaicYVU = imgMos->Y.ptr[0];
+
+
+    if (m_wb.blendingType == BLEND_TYPE_HORZ)
+    {
+        mosaicWidth = cropping_rect.right - cropping_rect.left + 1;
+        mosaicHeight = cropping_rect.bottom - cropping_rect.top + 1;
+    }
+    else
+    {
+        mosaicWidth = Mwidth;
+        mosaicHeight = Mheight;
+    }
+
+    return ret;
+}
+
+int Blend::MosaicSizeCheck(float sizeMultiplier, float heightMultiplier) {
+   if (Mwidth < width || Mheight < height) {
+        return BLEND_RET_ERROR;
+    }
+
+   if ((Mwidth * Mheight) > (width * height * sizeMultiplier)) {
+         return BLEND_RET_ERROR;
+   }
+
+   // We won't do blending for the cases where users swing the device too much
+   // in the secondary direction. We use a short side to determine the
+   // secondary direction because users may hold the device in landsape
+   // or portrait.
+   int shortSide = min(Mwidth, Mheight);
+   if (shortSide > height * heightMultiplier) {
+       return BLEND_RET_ERROR;
+   }
+
+   return BLEND_RET_OK;
+}
+
+int Blend::FillFramePyramid(MosaicFrame *mb)
+{
+    ImageType mbY, mbU, mbV;
+    // Lay this image, centered into the temporary buffer
+    mbY = mb->image;
+    mbU = mb->getU();
+    mbV = mb->getV();
+
+    int h, w;
+
+    for(h=0; h<height; h++)
+    {
+        ImageTypeShort yptr = m_pFrameYPyr->ptr[h];
+        ImageTypeShort uptr = m_pFrameUPyr->ptr[h];
+        ImageTypeShort vptr = m_pFrameVPyr->ptr[h];
+
+        for(w=0; w<width; w++)
+        {
+            yptr[w] = (short) ((*(mbY++)) << 3);
+            uptr[w] = (short) ((*(mbU++)) << 3);
+            vptr[w] = (short) ((*(mbV++)) << 3);
+        }
+    }
+
+    // Spread the image through the border
+    PyramidShort::BorderSpread(m_pFrameYPyr, BORDER, BORDER, BORDER, BORDER);
+    PyramidShort::BorderSpread(m_pFrameUPyr, BORDER, BORDER, BORDER, BORDER);
+    PyramidShort::BorderSpread(m_pFrameVPyr, BORDER, BORDER, BORDER, BORDER);
+
+    // Generate Laplacian pyramids
+    if (!PyramidShort::BorderReduce(m_pFrameYPyr, m_wb.nlevs) || !PyramidShort::BorderExpand(m_pFrameYPyr, m_wb.nlevs, -1) ||
+            !PyramidShort::BorderReduce(m_pFrameUPyr, m_wb.nlevsC) || !PyramidShort::BorderExpand(m_pFrameUPyr, m_wb.nlevsC, -1) ||
+            !PyramidShort::BorderReduce(m_pFrameVPyr, m_wb.nlevsC) || !PyramidShort::BorderExpand(m_pFrameVPyr, m_wb.nlevsC, -1))
+    {
+        return BLEND_RET_ERROR;
+    }
+    else
+    {
+        return BLEND_RET_OK;
+    }
+}
+
+int Blend::DoMergeAndBlend(MosaicFrame **frames, int nsite,
+             int width, int height, YUVinfo &imgMos, MosaicRect &rect,
+             MosaicRect &cropping_rect, float &progress, bool &cancelComputation)
+{
+    m_pMosaicYPyr = NULL;
+    m_pMosaicUPyr = NULL;
+    m_pMosaicVPyr = NULL;
+
+    m_pMosaicYPyr = PyramidShort::allocatePyramidPacked(m_wb.nlevs,(unsigned short)rect.Width(),(unsigned short)rect.Height(),BORDER);
+    m_pMosaicUPyr = PyramidShort::allocatePyramidPacked(m_wb.nlevsC,(unsigned short)rect.Width(),(unsigned short)rect.Height(),BORDER);
+    m_pMosaicVPyr = PyramidShort::allocatePyramidPacked(m_wb.nlevsC,(unsigned short)rect.Width(),(unsigned short)rect.Height(),BORDER);
+    if (!m_pMosaicYPyr || !m_pMosaicUPyr || !m_pMosaicVPyr)
+    {
+      return BLEND_RET_ERROR_MEMORY;
+    }
+
+    MosaicFrame *mb;
+
+    CSite *esite = m_AllSites + nsite;
+    int site_idx;
+
+    // First go through each frame and for each mosaic pixel determine which frame it should come from
+    site_idx = 0;
+    for(CSite *csite = m_AllSites; csite < esite; csite++)
+    {
+        if(cancelComputation)
+        {
+            if (m_pMosaicVPyr) free(m_pMosaicVPyr);
+            if (m_pMosaicUPyr) free(m_pMosaicUPyr);
+            if (m_pMosaicYPyr) free(m_pMosaicYPyr);
+            return BLEND_RET_CANCELLED;
+        }
+
+        mb = csite->getMb();
+
+        mb->vcrect = mb->brect;
+        ClipBlendRect(csite, mb->vcrect);
+
+        ComputeMask(csite, mb->vcrect, mb->brect, rect, imgMos, site_idx);
+
+        site_idx++;
+    }
+
+    ////////// imgMos.Y, imgMos.V, imgMos.U are used as follows //////////////
+    ////////////////////// THIN STRIP MODE ///////////////////////////////////
+
+    // imgMos.Y is used to store the index of the image from which each pixel
+    // in the output mosaic can be read out for the thin-strip mode. Thus,
+    // there is no special handling for pixels around the seam. Also, imgMos.Y
+    // is set to 255 wherever we can't get its value from any input image e.g.
+    // in the gray border areas. imgMos.V and imgMos.U are set to 128 for the
+    // thin-strip mode.
+
+    ////////////////////// WIDE STRIP MODE ///////////////////////////////////
+
+    // imgMos.Y is used the same way as the thin-strip mode.
+    // imgMos.V is used to store the index of the neighboring image which
+    // should contribute to the color of an output pixel in a band around
+    // the seam. Thus, in this band, we will crossfade between the color values
+    // from the image index imgMos.Y and image index imgMos.V. imgMos.U is
+    // used to store the weight (multiplied by 100) that each image will
+    // contribute to the blending process. Thus, we start at 99% contribution
+    // from the first image, then go to 50% contribution from each image at
+    // the seam. Then, the contribution from the second image goes up to 99%.
+
+    // For WIDE mode, set the pixel masks to guide the blender to cross-fade
+    // between the images on either side of each seam:
+    if (m_wb.stripType == STRIP_TYPE_WIDE)
+    {
+        if(m_wb.horizontal)
+        {
+            // Set the number of pixels around the seam to cross-fade between
+            // the two component images,
+            int tw = STRIP_CROSS_FADE_WIDTH_PXLS;
+
+            // Proceed with the image index calculation for cross-fading
+            // only if the cross-fading width is larger than 0
+            if (tw > 0)
+            {
+                for(int y = 0; y < imgMos.Y.height; y++)
+                {
+                    // Since we compare two adjecant pixels to determine
+                    // whether there is a seam, the termination condition of x
+                    // is set to imgMos.Y.width - tw, so that x+1 below
+                    // won't exceed the imgMos' boundary.
+                    for(int x = tw; x < imgMos.Y.width - tw; )
+                    {
+                        // Determine where the seam is...
+                        if (imgMos.Y.ptr[y][x] != imgMos.Y.ptr[y][x+1] &&
+                                imgMos.Y.ptr[y][x] != 255 &&
+                                imgMos.Y.ptr[y][x+1] != 255)
+                        {
+                            // Find the image indices on both sides of the seam
+                            unsigned char idx1 = imgMos.Y.ptr[y][x];
+                            unsigned char idx2 = imgMos.Y.ptr[y][x+1];
+
+                            for (int o = tw; o >= 0; o--)
+                            {
+                                // Set the image index to use for cross-fading
+                                imgMos.V.ptr[y][x - o] = idx2;
+                                // Set the intensity weights to use for cross-fading
+                                imgMos.U.ptr[y][x - o] = 50 + (99 - 50) * o / tw;
+                            }
+
+                            for (int o = 1; o <= tw; o++)
+                            {
+                                // Set the image index to use for cross-fading
+                                imgMos.V.ptr[y][x + o] = idx1;
+                                // Set the intensity weights to use for cross-fading
+                                imgMos.U.ptr[y][x + o] = imgMos.U.ptr[y][x - o];
+                            }
+
+                            x += (tw + 1);
+                        }
+                        else
+                        {
+                            x++;
+                        }
+                    }
+                }
+            }
+        }
+        else
+        {
+            // Set the number of pixels around the seam to cross-fade between
+            // the two component images,
+            int tw = STRIP_CROSS_FADE_WIDTH_PXLS;
+
+            // Proceed with the image index calculation for cross-fading
+            // only if the cross-fading width is larger than 0
+            if (tw > 0)
+            {
+                for(int x = 0; x < imgMos.Y.width; x++)
+                {
+                    // Since we compare two adjecant pixels to determine
+                    // whether there is a seam, the termination condition of y
+                    // is set to imgMos.Y.height - tw, so that y+1 below
+                    // won't exceed the imgMos' boundary.
+                    for(int y = tw; y < imgMos.Y.height - tw; )
+                    {
+                        // Determine where the seam is...
+                        if (imgMos.Y.ptr[y][x] != imgMos.Y.ptr[y+1][x] &&
+                                imgMos.Y.ptr[y][x] != 255 &&
+                                imgMos.Y.ptr[y+1][x] != 255)
+                        {
+                            // Find the image indices on both sides of the seam
+                            unsigned char idx1 = imgMos.Y.ptr[y][x];
+                            unsigned char idx2 = imgMos.Y.ptr[y+1][x];
+
+                            for (int o = tw; o >= 0; o--)
+                            {
+                                // Set the image index to use for cross-fading
+                                imgMos.V.ptr[y - o][x] = idx2;
+                                // Set the intensity weights to use for cross-fading
+                                imgMos.U.ptr[y - o][x] = 50 + (99 - 50) * o / tw;
+                            }
+
+                            for (int o = 1; o <= tw; o++)
+                            {
+                                // Set the image index to use for cross-fading
+                                imgMos.V.ptr[y + o][x] = idx1;
+                                // Set the intensity weights to use for cross-fading
+                                imgMos.U.ptr[y + o][x] = imgMos.U.ptr[y - o][x];
+                            }
+
+                            y += (tw + 1);
+                        }
+                        else
+                        {
+                            y++;
+                        }
+                    }
+                }
+            }
+        }
+
+    }
+
+    // Now perform the actual blending using the frame assignment determined above
+    site_idx = 0;
+    for(CSite *csite = m_AllSites; csite < esite; csite++)
+    {
+        if(cancelComputation)
+        {
+            if (m_pMosaicVPyr) free(m_pMosaicVPyr);
+            if (m_pMosaicUPyr) free(m_pMosaicUPyr);
+            if (m_pMosaicYPyr) free(m_pMosaicYPyr);
+            return BLEND_RET_CANCELLED;
+        }
+
+        mb = csite->getMb();
+
+
+        if(FillFramePyramid(mb)!=BLEND_RET_OK)
+            return BLEND_RET_ERROR;
+
+        ProcessPyramidForThisFrame(csite, mb->vcrect, mb->brect, rect, imgMos, mb->trs, site_idx);
+
+        progress += TIME_PERCENT_BLEND/nsite;
+
+        site_idx++;
+    }
+
+
+    // Blend
+    PerformFinalBlending(imgMos, cropping_rect);
+
+    if (cropping_rect.Width() <= 0 || cropping_rect.Height() <= 0)
+    {
+        return BLEND_RET_ERROR;
+    }
+
+    if (m_pMosaicVPyr) free(m_pMosaicVPyr);
+    if (m_pMosaicUPyr) free(m_pMosaicUPyr);
+    if (m_pMosaicYPyr) free(m_pMosaicYPyr);
+
+    progress += TIME_PERCENT_FINAL;
+
+    return BLEND_RET_OK;
+}
+
+void Blend::CropFinalMosaic(YUVinfo &imgMos, MosaicRect &cropping_rect)
+{
+    int i, j, k;
+    ImageType yimg;
+    ImageType uimg;
+    ImageType vimg;
+
+
+    yimg = imgMos.Y.ptr[0];
+    uimg = imgMos.U.ptr[0];
+    vimg = imgMos.V.ptr[0];
+
+    k = 0;
+    for (j = cropping_rect.top; j <= cropping_rect.bottom; j++)
+    {
+        for (i = cropping_rect.left; i <= cropping_rect.right; i++)
+        {
+            yimg[k] = yimg[j*imgMos.Y.width+i];
+            k++;
+        }
+    }
+    for (j = cropping_rect.top; j <= cropping_rect.bottom; j++)
+    {
+       for (i = cropping_rect.left; i <= cropping_rect.right; i++)
+        {
+            yimg[k] = vimg[j*imgMos.Y.width+i];
+            k++;
+        }
+    }
+    for (j = cropping_rect.top; j <= cropping_rect.bottom; j++)
+    {
+       for (i = cropping_rect.left; i <= cropping_rect.right; i++)
+        {
+            yimg[k] = uimg[j*imgMos.Y.width+i];
+            k++;
+        }
+    }
+}
+
+int Blend::PerformFinalBlending(YUVinfo &imgMos, MosaicRect &cropping_rect)
+{
+    if (!PyramidShort::BorderExpand(m_pMosaicYPyr, m_wb.nlevs, 1) || !PyramidShort::BorderExpand(m_pMosaicUPyr, m_wb.nlevsC, 1) ||
+        !PyramidShort::BorderExpand(m_pMosaicVPyr, m_wb.nlevsC, 1))
+    {
+      return BLEND_RET_ERROR;
+    }
+
+    ImageTypeShort myimg;
+    ImageTypeShort muimg;
+    ImageTypeShort mvimg;
+    ImageType yimg;
+    ImageType uimg;
+    ImageType vimg;
+
+    // 2D boolean array that contains true wherever the mosaic image data is
+    // invalid (i.e. in the gray border).
+    bool **b = new bool*[imgMos.Y.height];
+
+    for(int j=0; j<imgMos.Y.height; j++)
+    {
+        b[j] = new bool[imgMos.Y.width];
+    }
+
+    // Copy the resulting image into the full image using the mask
+    int i, j;
+
+    yimg = imgMos.Y.ptr[0];
+    uimg = imgMos.U.ptr[0];
+    vimg = imgMos.V.ptr[0];
+
+    for (j = 0; j < imgMos.Y.height; j++)
+    {
+        myimg = m_pMosaicYPyr->ptr[j];
+        muimg = m_pMosaicUPyr->ptr[j];
+        mvimg = m_pMosaicVPyr->ptr[j];
+
+        for (i = 0; i<imgMos.Y.width; i++)
+        {
+            // A final mask was set up previously,
+            // if the value is zero skip it, otherwise replace it.
+            if (*yimg <255)
+            {
+                short value = (short) ((*myimg) >> 3);
+                if (value < 0) value = 0;
+                else if (value > 255) value = 255;
+                *yimg = (unsigned char) value;
+
+                value = (short) ((*muimg) >> 3);
+                if (value < 0) value = 0;
+                else if (value > 255) value = 255;
+                *uimg = (unsigned char) value;
+
+                value = (short) ((*mvimg) >> 3);
+                if (value < 0) value = 0;
+                else if (value > 255) value = 255;
+                *vimg = (unsigned char) value;
+
+                b[j][i] = false;
+
+            }
+            else
+            {   // set border color in here
+                *yimg = (unsigned char) 96;
+                *uimg = (unsigned char) 128;
+                *vimg = (unsigned char) 128;
+
+                b[j][i] = true;
+            }
+
+            yimg++;
+            uimg++;
+            vimg++;
+            myimg++;
+            muimg++;
+            mvimg++;
+        }
+    }
+
+    if(m_wb.horizontal)
+    {
+        //Scan through each row and increment top if the row contains any gray
+        for (j = 0; j < imgMos.Y.height; j++)
+        {
+            for (i = cropping_rect.left; i < cropping_rect.right; i++)
+            {
+                if (b[j][i])
+                {
+                    break; // to next row
+                }
+            }
+
+            if (i == cropping_rect.right)   //no gray pixel in this row!
+            {
+                cropping_rect.top = j;
+                break;
+            }
+        }
+
+        //Scan through each row and decrement bottom if the row contains any gray
+        for (j = imgMos.Y.height-1; j >= 0; j--)
+        {
+            for (i = cropping_rect.left; i < cropping_rect.right; i++)
+            {
+                if (b[j][i])
+                {
+                    break; // to next row
+                }
+            }
+
+            if (i == cropping_rect.right)   //no gray pixel in this row!
+            {
+                cropping_rect.bottom = j;
+                break;
+            }
+        }
+    }
+    else // Vertical Mosaic
+    {
+        //Scan through each column and increment left if the column contains any gray
+        for (i = 0; i < imgMos.Y.width; i++)
+        {
+            for (j = cropping_rect.top; j < cropping_rect.bottom; j++)
+            {
+                if (b[j][i])
+                {
+                    break; // to next column
+                }
+            }
+
+            if (j == cropping_rect.bottom)   //no gray pixel in this column!
+            {
+                cropping_rect.left = i;
+                break;
+            }
+        }
+
+        //Scan through each column and decrement right if the column contains any gray
+        for (i = imgMos.Y.width-1; i >= 0; i--)
+        {
+            for (j = cropping_rect.top; j < cropping_rect.bottom; j++)
+            {
+                if (b[j][i])
+                {
+                    break; // to next column
+                }
+            }
+
+            if (j == cropping_rect.bottom)   //no gray pixel in this column!
+            {
+                cropping_rect.right = i;
+                break;
+            }
+        }
+
+    }
+
+    RoundingCroppingSizeToMultipleOf8(cropping_rect);
+
+    for(int j=0; j<imgMos.Y.height; j++)
+    {
+        delete b[j];
+    }
+
+    delete[] b;
+
+    return BLEND_RET_OK;
+}
+
+void Blend::RoundingCroppingSizeToMultipleOf8(MosaicRect &rect) {
+    int height = rect.bottom - rect.top + 1;
+    int residue = height & 7;
+    rect.bottom -= residue;
+
+    int width = rect.right - rect.left + 1;
+    residue = width & 7;
+    rect.right -= residue;
+}
+
+void Blend::ComputeMask(CSite *csite, BlendRect &vcrect, BlendRect &brect, MosaicRect &rect, YUVinfo &imgMos, int site_idx)
+{
+    PyramidShort *dptr = m_pMosaicYPyr;
+
+    int l = (int) ((vcrect.lft - rect.left));
+    int b = (int) ((vcrect.bot - rect.top));
+    int r = (int) ((vcrect.rgt - rect.left));
+    int t = (int) ((vcrect.top - rect.top));
+
+    if (vcrect.lft == brect.lft)
+        l = (l <= 0) ? -BORDER : l - BORDER;
+    else if (l < -BORDER)
+        l = -BORDER;
+
+    if (vcrect.bot == brect.bot)
+        b = (b <= 0) ? -BORDER : b - BORDER;
+    else if (b < -BORDER)
+        b = -BORDER;
+
+    if (vcrect.rgt == brect.rgt)
+        r = (r >= dptr->width) ? dptr->width + BORDER - 1 : r + BORDER;
+    else if (r >= dptr->width + BORDER)
+        r = dptr->width + BORDER - 1;
+
+    if (vcrect.top == brect.top)
+        t = (t >= dptr->height) ? dptr->height + BORDER - 1 : t + BORDER;
+    else if (t >= dptr->height + BORDER)
+        t = dptr->height + BORDER - 1;
+
+    // Walk the Region of interest and populate the pyramid
+    for (int j = b; j <= t; j++)
+    {
+        int jj = j;
+        double sj = jj + rect.top;
+
+        for (int i = l; i <= r; i++)
+        {
+            int ii = i;
+            // project point and then triangulate to neighbors
+            double si = ii + rect.left;
+
+            double dself = hypotSq(csite->getVCenter().x - si, csite->getVCenter().y - sj);
+            int inMask = ((unsigned) ii < imgMos.Y.width &&
+                    (unsigned) jj < imgMos.Y.height) ? 1 : 0;
+
+            if(!inMask)
+                continue;
+
+            // scan the neighbors to see if this is a valid position
+            SEdgeVector *ce;
+            int ecnt;
+            for (ce = csite->getNeighbor(), ecnt = csite->getNumNeighbors(); ecnt--; ce++)
+            {
+                double d1 = hypotSq(m_AllSites[ce->second].getVCenter().x - si,
+                        m_AllSites[ce->second].getVCenter().y - sj);
+                if (d1 < dself)
+                {
+                    break;
+                }
+            }
+
+            if (ecnt >= 0) continue;
+
+            imgMos.Y.ptr[jj][ii] = (unsigned char)site_idx;
+        }
+    }
+}
+
+void Blend::ProcessPyramidForThisFrame(CSite *csite, BlendRect &vcrect, BlendRect &brect, MosaicRect &rect, YUVinfo &imgMos, double trs[3][3], int site_idx)
+{
+    // Put the Region of interest (for all levels) into m_pMosaicYPyr
+    double inv_trs[3][3];
+    inv33d(trs, inv_trs);
+
+    // Process each pyramid level
+    PyramidShort *sptr = m_pFrameYPyr;
+    PyramidShort *suptr = m_pFrameUPyr;
+    PyramidShort *svptr = m_pFrameVPyr;
+
+    PyramidShort *dptr = m_pMosaicYPyr;
+    PyramidShort *duptr = m_pMosaicUPyr;
+    PyramidShort *dvptr = m_pMosaicVPyr;
+
+    int dscale = 0; // distance scale for the current level
+    int nC = m_wb.nlevsC;
+    for (int n = m_wb.nlevs; n--; dscale++, dptr++, sptr++, dvptr++, duptr++, svptr++, suptr++, nC--)
+    {
+        int l = (int) ((vcrect.lft - rect.left) / (1 << dscale));
+        int b = (int) ((vcrect.bot - rect.top) / (1 << dscale));
+        int r = (int) ((vcrect.rgt - rect.left) / (1 << dscale) + .5);
+        int t = (int) ((vcrect.top - rect.top) / (1 << dscale) + .5);
+
+        if (vcrect.lft == brect.lft)
+            l = (l <= 0) ? -BORDER : l - BORDER;
+        else if (l < -BORDER)
+            l = -BORDER;
+
+        if (vcrect.bot == brect.bot)
+            b = (b <= 0) ? -BORDER : b - BORDER;
+        else if (b < -BORDER)
+            b = -BORDER;
+
+        if (vcrect.rgt == brect.rgt)
+            r = (r >= dptr->width) ? dptr->width + BORDER - 1 : r + BORDER;
+        else if (r >= dptr->width + BORDER)
+            r = dptr->width + BORDER - 1;
+
+        if (vcrect.top == brect.top)
+            t = (t >= dptr->height) ? dptr->height + BORDER - 1 : t + BORDER;
+        else if (t >= dptr->height + BORDER)
+            t = dptr->height + BORDER - 1;
+
+        // Walk the Region of interest and populate the pyramid
+        for (int j = b; j <= t; j++)
+        {
+            int jj = (j << dscale);
+            double sj = jj + rect.top;
+
+            for (int i = l; i <= r; i++)
+            {
+                int ii = (i << dscale);
+                // project point and then triangulate to neighbors
+                double si = ii + rect.left;
+
+                int inMask = ((unsigned) ii < imgMos.Y.width &&
+                        (unsigned) jj < imgMos.Y.height) ? 1 : 0;
+
+                if(inMask && imgMos.Y.ptr[jj][ii] != site_idx &&
+                        imgMos.V.ptr[jj][ii] != site_idx &&
+                        imgMos.Y.ptr[jj][ii] != 255)
+                    continue;
+
+                // Setup weights for cross-fading
+                // Weight of the intensity already in the output pixel
+                double wt0 = 0.0;
+                // Weight of the intensity from the input pixel (current frame)
+                double wt1 = 1.0;
+
+                if (m_wb.stripType == STRIP_TYPE_WIDE)
+                {
+                    if(inMask && imgMos.Y.ptr[jj][ii] != 255)
+                    {
+                        // If not on a seam OR pyramid level exceeds
+                        // maximum level for cross-fading.
+                        if((imgMos.V.ptr[jj][ii] == 128) ||
+                            (dscale > STRIP_CROSS_FADE_MAX_PYR_LEVEL))
+                        {
+                            wt0 = 0.0;
+                            wt1 = 1.0;
+                        }
+                        else
+                        {
+                            wt0 = 1.0;
+                            wt1 = ((imgMos.Y.ptr[jj][ii] == site_idx) ?
+                                    (double)imgMos.U.ptr[jj][ii] / 100.0 :
+                                    1.0 - (double)imgMos.U.ptr[jj][ii] / 100.0);
+                        }
+                    }
+                }
+
+                // Project this mosaic point into the original frame coordinate space
+                double xx, yy;
+
+                MosaicToFrame(inv_trs, si, sj, xx, yy);
+
+                if (xx < 0.0 || yy < 0.0 || xx > width - 1.0 || yy > height - 1.0)
+                {
+                    if(inMask)
+                    {
+                        imgMos.Y.ptr[jj][ii] = 255;
+                        wt0 = 0.0f;
+                        wt1 = 1.0f;
+                    }
+                }
+
+                xx /= (1 << dscale);
+                yy /= (1 << dscale);
+
+
+                int x1 = (xx >= 0.0) ? (int) xx : (int) floor(xx);
+                int y1 = (yy >= 0.0) ? (int) yy : (int) floor(yy);
+
+                // Final destination in extended pyramid
+#ifndef LINEAR_INTERP
+                if(inSegment(x1, sptr->width, BORDER-1) &&
+                        inSegment(y1, sptr->height, BORDER-1))
+                {
+                    double xfrac = xx - x1;
+                    double yfrac = yy - y1;
+                    dptr->ptr[j][i] = (short) (wt0 * dptr->ptr[j][i] + .5 +
+                            wt1 * ciCalc(sptr, x1, y1, xfrac, yfrac));
+                    if (dvptr >= m_pMosaicVPyr && nC > 0)
+                    {
+                        duptr->ptr[j][i] = (short) (wt0 * duptr->ptr[j][i] + .5 +
+                                wt1 * ciCalc(suptr, x1, y1, xfrac, yfrac));
+                        dvptr->ptr[j][i] = (short) (wt0 * dvptr->ptr[j][i] + .5 +
+                                wt1 * ciCalc(svptr, x1, y1, xfrac, yfrac));
+                    }
+                }
+#else
+                if(inSegment(x1, sptr->width, BORDER) && inSegment(y1, sptr->height, BORDER))
+                {
+                    int x2 = x1 + 1;
+                    int y2 = y1 + 1;
+                    double xfrac = xx - x1;
+                    double yfrac = yy - y1;
+                    double y1val = sptr->ptr[y1][x1] +
+                        (sptr->ptr[y1][x2] - sptr->ptr[y1][x1]) * xfrac;
+                    double y2val = sptr->ptr[y2][x1] +
+                        (sptr->ptr[y2][x2] - sptr->ptr[y2][x1]) * xfrac;
+                    dptr->ptr[j][i] = (short) (y1val + yfrac * (y2val - y1val));
+
+                    if (dvptr >= m_pMosaicVPyr && nC > 0)
+                    {
+                        y1val = suptr->ptr[y1][x1] +
+                            (suptr->ptr[y1][x2] - suptr->ptr[y1][x1]) * xfrac;
+                        y2val = suptr->ptr[y2][x1] +
+                            (suptr->ptr[y2][x2] - suptr->ptr[y2][x1]) * xfrac;
+
+                        duptr->ptr[j][i] = (short) (y1val + yfrac * (y2val - y1val));
+
+                        y1val = svptr->ptr[y1][x1] +
+                            (svptr->ptr[y1][x2] - svptr->ptr[y1][x1]) * xfrac;
+                        y2val = svptr->ptr[y2][x1] +
+                            (svptr->ptr[y2][x2] - svptr->ptr[y2][x1]) * xfrac;
+
+                        dvptr->ptr[j][i] = (short) (y1val + yfrac * (y2val - y1val));
+                    }
+                }
+#endif
+                else
+                {
+                    clipToSegment(x1, sptr->width, BORDER);
+                    clipToSegment(y1, sptr->height, BORDER);
+
+                    dptr->ptr[j][i] = (short) (wt0 * dptr->ptr[j][i] + 0.5 +
+                            wt1 * sptr->ptr[y1][x1] );
+                    if (dvptr >= m_pMosaicVPyr && nC > 0)
+                    {
+                        dvptr->ptr[j][i] = (short) (wt0 * dvptr->ptr[j][i] +
+                                0.5 + wt1 * svptr->ptr[y1][x1] );
+                        duptr->ptr[j][i] = (short) (wt0 * duptr->ptr[j][i] +
+                                0.5 + wt1 * suptr->ptr[y1][x1] );
+                    }
+                }
+            }
+        }
+    }
+}
+
+void Blend::MosaicToFrame(double trs[3][3], double x, double y, double &wx, double &wy)
+{
+    double X, Y, z;
+    if (m_wb.theta == 0.0)
+    {
+        X = x;
+        Y = y;
+    }
+    else if (m_wb.horizontal)
+    {
+        double alpha = x * m_wb.direction / m_wb.width;
+        double length = (y - alpha * m_wb.correction) * m_wb.direction + m_wb.radius;
+        double deltaTheta = m_wb.theta * alpha;
+        double sinTheta = sin(deltaTheta);
+        double cosTheta = sqrt(1.0 - sinTheta * sinTheta) * m_wb.direction;
+        X = length * sinTheta + m_wb.x;
+        Y = length * cosTheta + m_wb.y;
+    }
+    else
+    {
+        double alpha = y * m_wb.direction / m_wb.width;
+        double length = (x - alpha * m_wb.correction) * m_wb.direction + m_wb.radius;
+        double deltaTheta = m_wb.theta * alpha;
+        double sinTheta = sin(deltaTheta);
+        double cosTheta = sqrt(1.0 - sinTheta * sinTheta) * m_wb.direction;
+        Y = length * sinTheta + m_wb.y;
+        X = length * cosTheta + m_wb.x;
+    }
+    z = ProjZ(trs, X, Y, 1.0);
+    wx = ProjX(trs, X, Y, z, 1.0);
+    wy = ProjY(trs, X, Y, z, 1.0);
+}
+
+void Blend::FrameToMosaic(double trs[3][3], double x, double y, double &wx, double &wy)
+{
+    // Project into the intermediate Mosaic coordinate system
+    double z = ProjZ(trs, x, y, 1.0);
+    double X = ProjX(trs, x, y, z, 1.0);
+    double Y = ProjY(trs, x, y, z, 1.0);
+
+    if (m_wb.theta == 0.0)
+    {
+        // No rotation, then this is all we need to do.
+        wx = X;
+        wy = Y;
+    }
+    else if (m_wb.horizontal)
+    {
+        double deltaX = X - m_wb.x;
+        double deltaY = Y - m_wb.y;
+        double length = sqrt(deltaX * deltaX + deltaY * deltaY);
+        double deltaTheta = asin(deltaX / length);
+        double alpha = deltaTheta / m_wb.theta;
+        wx = alpha * m_wb.width * m_wb.direction;
+        wy = (length - m_wb.radius) * m_wb.direction + alpha * m_wb.correction;
+    }
+    else
+    {
+        double deltaX = X - m_wb.x;
+        double deltaY = Y - m_wb.y;
+        double length = sqrt(deltaX * deltaX + deltaY * deltaY);
+        double deltaTheta = asin(deltaY / length);
+        double alpha = deltaTheta / m_wb.theta;
+        wy = alpha * m_wb.width * m_wb.direction;
+        wx = (length - m_wb.radius) * m_wb.direction + alpha * m_wb.correction;
+    }
+}
+
+
+
+// Clip the region of interest as small as possible by using the Voronoi edges of
+// the neighbors
+void Blend::ClipBlendRect(CSite *csite, BlendRect &brect)
+{
+      SEdgeVector *ce;
+      int ecnt;
+      for (ce = csite->getNeighbor(), ecnt = csite->getNumNeighbors(); ecnt--; ce++)
+      {
+        // calculate the Voronoi bisector intersection
+        const double epsilon = 1e-5;
+        double dx = (m_AllSites[ce->second].getVCenter().x - m_AllSites[ce->first].getVCenter().x);
+        double dy = (m_AllSites[ce->second].getVCenter().y - m_AllSites[ce->first].getVCenter().y);
+        double xmid = m_AllSites[ce->first].getVCenter().x + dx/2.0;
+        double ymid = m_AllSites[ce->first].getVCenter().y + dy/2.0;
+        double inter;
+
+        if (dx > epsilon)
+        {
+          // neighbor is on right
+          if ((inter = m_wb.roundoffOverlap + xmid - dy * (((dy >= 0.0) ? brect.bot : brect.top) - ymid) / dx) < brect.rgt)
+            brect.rgt = inter;
+        }
+        else if (dx < -epsilon)
+        {
+          // neighbor is on left
+          if ((inter = -m_wb.roundoffOverlap + xmid - dy * (((dy >= 0.0) ? brect.bot : brect.top) - ymid) / dx) > brect.lft)
+            brect.lft = inter;
+        }
+        if (dy > epsilon)
+        {
+          // neighbor is above
+          if ((inter = m_wb.roundoffOverlap + ymid - dx * (((dx >= 0.0) ? brect.lft : brect.rgt) - xmid) / dy) < brect.top)
+            brect.top = inter;
+        }
+        else if (dy < -epsilon)
+        {
+          // neighbor is below
+          if ((inter = -m_wb.roundoffOverlap + ymid - dx * (((dx >= 0.0) ? brect.lft : brect.rgt) - xmid) / dy) > brect.bot)
+            brect.bot = inter;
+        }
+      }
+}
+
+void Blend::FrameToMosaicRect(int width, int height, double trs[3][3], BlendRect &brect)
+{
+    // We need to walk the perimeter since the borders can be bent.
+    brect.lft = brect.bot = 2e30;
+    brect.rgt = brect.top = -2e30;
+    double xpos, ypos;
+    double lasty = height - 1.0;
+    double lastx = width - 1.0;
+    int i;
+
+    for (i = width; i--;)
+    {
+
+        FrameToMosaic(trs, (double) i, 0.0, xpos, ypos);
+        ClipRect(xpos, ypos, brect);
+        FrameToMosaic(trs, (double) i, lasty, xpos, ypos);
+        ClipRect(xpos, ypos, brect);
+    }
+    for (i = height; i--;)
+    {
+        FrameToMosaic(trs, 0.0, (double) i, xpos, ypos);
+        ClipRect(xpos, ypos, brect);
+        FrameToMosaic(trs, lastx, (double) i, xpos, ypos);
+        ClipRect(xpos, ypos, brect);
+    }
+}
+
+void Blend::SelectRelevantFrames(MosaicFrame **frames, int frames_size,
+        MosaicFrame **relevant_frames, int &relevant_frames_size)
+{
+    MosaicFrame *first = frames[0];
+    MosaicFrame *last = frames[frames_size-1];
+    MosaicFrame *mb;
+
+    double midX = last->width / 2.0;
+    double midY = last->height / 2.0;
+    double z = ProjZ(first->trs, midX, midY, 1.0);
+    double firstX, firstY;
+    double prevX = firstX = ProjX(first->trs, midX, midY, z, 1.0);
+    double prevY = firstY = ProjY(first->trs, midX, midY, z, 1.0);
+
+    relevant_frames[0] = first; // Add first frame by default
+    relevant_frames_size = 1;
+
+    for (int i = 0; i < frames_size - 1; i++)
+    {
+        mb = frames[i];
+        double currX, currY;
+        z = ProjZ(mb->trs, midX, midY, 1.0);
+        currX = ProjX(mb->trs, midX, midY, z, 1.0);
+        currY = ProjY(mb->trs, midX, midY, z, 1.0);
+        double deltaX = currX - prevX;
+        double deltaY = currY - prevY;
+
+        if (fabs(deltaX) > STRIP_SEPARATION_THRESHOLD_PXLS ||
+                fabs(deltaY) > STRIP_SEPARATION_THRESHOLD_PXLS)
+        {
+            relevant_frames[relevant_frames_size] = mb;
+            relevant_frames_size++;
+
+            prevX = currX;
+            prevY = currY;
+        }
+    }
+
+    // Add last frame by default
+    relevant_frames[relevant_frames_size] = last;
+    relevant_frames_size++;
+}
+
+void Blend::ComputeBlendParameters(MosaicFrame **frames, int frames_size, int is360)
+{
+    // For FULL and PAN modes, we do not unwarp the mosaic into a rectangular coordinate system
+    // and so we set the theta to 0 and return.
+    if (m_wb.blendingType != BLEND_TYPE_CYLPAN && m_wb.blendingType != BLEND_TYPE_HORZ)
+    {
+        m_wb.theta = 0.0;
+        return;
+    }
+
+    MosaicFrame *first = frames[0];
+    MosaicFrame *last = frames[frames_size-1];
+    MosaicFrame *mb;
+
+    double lxpos = last->trs[0][2], lypos = last->trs[1][2];
+    double fxpos = first->trs[0][2], fypos = first->trs[1][2];
+
+    // Calculate warp to produce proper stitching.
+    // get x, y displacement
+    double midX = last->width / 2.0;
+    double midY = last->height / 2.0;
+    double z = ProjZ(first->trs, midX, midY, 1.0);
+    double firstX, firstY;
+    double prevX = firstX = ProjX(first->trs, midX, midY, z, 1.0);
+    double prevY = firstY = ProjY(first->trs, midX, midY, z, 1.0);
+
+    double arcLength, lastTheta;
+    m_wb.theta = lastTheta = arcLength = 0.0;
+
+    // Step through all the frames to compute the total arc-length of the cone
+    // swept while capturing the mosaic (in the original conical coordinate system).
+    for (int i = 0; i < frames_size; i++)
+    {
+        mb = frames[i];
+        double currX, currY;
+        z = ProjZ(mb->trs, midX, midY, 1.0);
+        currX = ProjX(mb->trs, midX, midY, z, 1.0);
+        currY = ProjY(mb->trs, midX, midY, z, 1.0);
+        double deltaX = currX - prevX;
+        double deltaY = currY - prevY;
+
+        // The arcLength is computed by summing the lengths of the chords
+        // connecting the pairwise projected image centers of the input image frames.
+        arcLength += sqrt(deltaY * deltaY + deltaX * deltaX);
+
+        if (!is360)
+        {
+            double thisTheta = asin(mb->trs[1][0]);
+            m_wb.theta += thisTheta - lastTheta;
+            lastTheta = thisTheta;
+        }
+
+        prevX = currX;
+        prevY = currY;
+    }
+
+    // Stretch this to end at the proper alignment i.e. the width of the
+    // rectangle is determined by the arcLength computed above and the cone
+    // sector angle is determined using the rotation of the last frame.
+    m_wb.width = arcLength;
+    if (is360) m_wb.theta = asin(last->trs[1][0]);
+
+    // If there is no rotation, we're done.
+    if (m_wb.theta != 0.0)
+    {
+        double dx = prevX - firstX;
+        double dy = prevY - firstY;
+
+        // If the mosaic was captured by sweeping horizontally
+        if (abs(lxpos - fxpos) > abs(lypos - fypos))
+        {
+            m_wb.horizontal = 1;
+            // Calculate radius position to make ends exactly the same Y offset
+            double radiusTheta = dx / cos(3.14159 / 2.0 - m_wb.theta);
+            m_wb.radius = dy + radiusTheta * cos(m_wb.theta);
+            if (m_wb.radius < 0.0) m_wb.radius = -m_wb.radius;
+        }
+        else
+        {
+            m_wb.horizontal = 0;
+            // Calculate radius position to make ends exactly the same Y offset
+            double radiusTheta = dy / cos(3.14159 / 2.0 - m_wb.theta);
+            m_wb.radius = dx + radiusTheta * cos(m_wb.theta);
+            if (m_wb.radius < 0.0) m_wb.radius = -m_wb.radius;
+        }
+
+        // Determine major direction
+        if (m_wb.horizontal)
+        {
+            // Horizontal strip
+            // m_wb.x,y record the origin of the rectangle coordinate system.
+            if (is360) m_wb.x = firstX;
+            else
+            {
+                if (lxpos - fxpos < 0)
+                {
+                    m_wb.x = firstX + midX;
+                    z = ProjZ(last->trs, 0.0, midY, 1.0);
+                    prevX = ProjX(last->trs, 0.0, midY, z, 1.0);
+                    prevY = ProjY(last->trs, 0.0, midY, z, 1.0);
+                }
+                else
+                {
+                    m_wb.x = firstX - midX;
+                    z = ProjZ(last->trs, last->width - 1.0, midY, 1.0);
+                    prevX = ProjX(last->trs, last->width - 1.0, midY, z, 1.0);
+                    prevY = ProjY(last->trs, last->width - 1.0, midY, z, 1.0);
+                }
+            }
+            dy = prevY - firstY;
+            if (dy < 0.0) m_wb.direction = 1.0;
+            else m_wb.direction = -1.0;
+            m_wb.y = firstY - m_wb.radius * m_wb.direction;
+            if (dy * m_wb.theta > 0.0) m_wb.width = -m_wb.width;
+        }
+        else
+        {
+            // Vertical strip
+            if (is360) m_wb.y = firstY;
+            else
+            {
+                if (lypos - fypos < 0)
+                {
+                    m_wb.x = firstY + midY;
+                    z = ProjZ(last->trs, midX, 0.0, 1.0);
+                    prevX = ProjX(last->trs, midX, 0.0, z, 1.0);
+                    prevY = ProjY(last->trs, midX, 0.0, z, 1.0);
+                }
+                else
+                {
+                    m_wb.x = firstX - midX;
+                    z = ProjZ(last->trs, midX, last->height - 1.0, 1.0);
+                    prevX = ProjX(last->trs, midX, last->height - 1.0, z, 1.0);
+                    prevY = ProjY(last->trs, midX, last->height - 1.0, z, 1.0);
+                }
+            }
+            dx = prevX - firstX;
+            if (dx < 0.0) m_wb.direction = 1.0;
+            else m_wb.direction = -1.0;
+            m_wb.x = firstX - m_wb.radius * m_wb.direction;
+            if (dx * m_wb.theta > 0.0) m_wb.width = -m_wb.width;
+        }
+
+        // Calculate the correct correction factor
+        double deltaX = prevX - m_wb.x;
+        double deltaY = prevY - m_wb.y;
+        double length = sqrt(deltaX * deltaX + deltaY * deltaY);
+        double deltaTheta = (m_wb.horizontal) ? deltaX : deltaY;
+        deltaTheta = asin(deltaTheta / length);
+        m_wb.correction = ((m_wb.radius - length) * m_wb.direction) /
+            (deltaTheta / m_wb.theta);
+    }
+}
diff --git a/benchmark/feature_mos/src/mosaic/Blend.h b/benchmark/feature_mos/src/mosaic/Blend.h
new file mode 100644
index 00000000..175eacd4
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/Blend.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+///////////////////////////////////////////////////
+// Blend.h
+// $Id: Blend.h,v 1.23 2011/06/24 04:22:14 mbansal Exp $
+
+#ifndef BLEND_H
+#define BLEND_H
+
+#include "MosaicTypes.h"
+#include "Pyramid.h"
+#include "Delaunay.h"
+
+#define BLEND_RANGE_DEFAULT 6
+#define BORDER 8
+
+// Percent of total mosaicing time spent on each of the following operations
+const float TIME_PERCENT_ALIGN = 20.0;
+const float TIME_PERCENT_BLEND = 75.0;
+const float TIME_PERCENT_FINAL = 5.0;
+
+// This threshold determines the minimum separation between the image centers
+// of the input image frames for them to be accepted for blending in the
+// STRIP_TYPE_WIDE mode.
+const float STRIP_SEPARATION_THRESHOLD_PXLS = 10;
+
+// This threshold determines the number of pixels on either side of the strip
+// to cross-fade using the images contributing to each seam.
+const float STRIP_CROSS_FADE_WIDTH_PXLS = 2;
+// This specifies the maximum pyramid level to which cross-fading is applied.
+// The original image resolution is Level-0, half of that size is Level-1 and
+// so on. BLEND_RANGE_DEFAULT specifies the number of pyramid levels used by
+// the blending algorithm.
+const int STRIP_CROSS_FADE_MAX_PYR_LEVEL = 2;
+
+/**
+ *  Class for pyramid blending a mosaic.
+ */
+class Blend {
+
+public:
+
+  static const int BLEND_TYPE_NONE    = -1;
+  static const int BLEND_TYPE_FULL    = 0;
+  static const int BLEND_TYPE_PAN     = 1;
+  static const int BLEND_TYPE_CYLPAN  = 2;
+  static const int BLEND_TYPE_HORZ   = 3;
+
+  static const int STRIP_TYPE_THIN      = 0;
+  static const int STRIP_TYPE_WIDE      = 1;
+
+  static const int BLEND_RET_ERROR        = -1;
+  static const int BLEND_RET_OK           = 0;
+  static const int BLEND_RET_ERROR_MEMORY = 1;
+  static const int BLEND_RET_CANCELLED    = -2;
+
+  Blend();
+  ~Blend();
+
+  int initialize(int blendingType, int stripType, int frame_width, int frame_height);
+
+  int runBlend(MosaicFrame **frames, MosaicFrame **rframes, int frames_size, ImageType &imageMosaicYVU,
+        int &mosaicWidth, int &mosaicHeight, float &progress, bool &cancelComputation);
+
+protected:
+
+  PyramidShort *m_pFrameYPyr;
+  PyramidShort *m_pFrameUPyr;
+  PyramidShort *m_pFrameVPyr;
+
+  PyramidShort *m_pMosaicYPyr;
+  PyramidShort *m_pMosaicUPyr;
+  PyramidShort *m_pMosaicVPyr;
+
+  CDelaunay m_Triangulator;
+  CSite *m_AllSites;
+
+  BlendParams m_wb;
+
+  // Height and width of individual frames
+  int width, height;
+
+   // Height and width of mosaic
+  unsigned short Mwidth, Mheight;
+
+  // Helper functions
+  void FrameToMosaic(double trs[3][3], double x, double y, double &wx, double &wy);
+  void MosaicToFrame(double trs[3][3], double x, double y, double &wx, double &wy);
+  void FrameToMosaicRect(int width, int height, double trs[3][3], BlendRect &brect);
+  void ClipBlendRect(CSite *csite, BlendRect &brect);
+  void AlignToMiddleFrame(MosaicFrame **frames, int frames_size);
+
+  int  DoMergeAndBlend(MosaicFrame **frames, int nsite,  int width, int height, YUVinfo &imgMos, MosaicRect &rect, MosaicRect &cropping_rect, float &progress, bool &cancelComputation);
+  void ComputeMask(CSite *csite, BlendRect &vcrect, BlendRect &brect, MosaicRect &rect, YUVinfo &imgMos, int site_idx);
+  void ProcessPyramidForThisFrame(CSite *csite, BlendRect &vcrect, BlendRect &brect, MosaicRect &rect, YUVinfo &imgMos, double trs[3][3], int site_idx);
+
+  int  FillFramePyramid(MosaicFrame *mb);
+
+  // TODO: need to add documentation about the parameters
+  void ComputeBlendParameters(MosaicFrame **frames, int frames_size, int is360);
+  void SelectRelevantFrames(MosaicFrame **frames, int frames_size,
+        MosaicFrame **relevant_frames, int &relevant_frames_size);
+
+  int  PerformFinalBlending(YUVinfo &imgMos, MosaicRect &cropping_rect);
+  void CropFinalMosaic(YUVinfo &imgMos, MosaicRect &cropping_rect);
+
+private:
+   static const float LIMIT_SIZE_MULTIPLIER;
+   static const float LIMIT_HEIGHT_MULTIPLIER;
+   int MosaicSizeCheck(float sizeMultiplier, float heightMultiplier);
+   void RoundingCroppingSizeToMultipleOf8(MosaicRect& rect);
+};
+
+#endif
diff --git a/benchmark/feature_mos/src/mosaic/CSite.h b/benchmark/feature_mos/src/mosaic/CSite.h
new file mode 100644
index 00000000..928c1734
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/CSite.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+///////////////////////////////////////////////////
+// CSite.h
+// $Id: CSite.h,v 1.3 2011/06/17 13:35:47 mbansal Exp $
+
+#ifndef TRIDEL_H
+#define TRIDEL_H
+
+#include "MosaicTypes.h"
+
+typedef struct
+{
+    short first;
+    short second;
+} SEdgeVector;
+
+typedef struct
+{
+    double x;
+    double y;
+} SVec2d;
+
+class CSite
+{
+private:
+    MosaicFrame *mosaicFrame;
+    SEdgeVector *neighbor;
+    int numNeighbors;
+    SVec2d voronoiCenter;
+
+public:
+    CSite();
+    ~CSite();
+
+    inline MosaicFrame* getMb() { return mosaicFrame; }
+    inline SEdgeVector* getNeighbor() { return neighbor; }
+    inline int getNumNeighbors() { return numNeighbors; }
+    inline SVec2d& getVCenter() { return voronoiCenter; }
+    inline double X() { return voronoiCenter.x; }
+    inline double Y() { return voronoiCenter.y; }
+
+    inline void incrNumNeighbors() { numNeighbors++; }
+    inline void setNumNeighbors(int num) { numNeighbors = num; }
+    inline void setNeighbor(SEdgeVector *nb) { neighbor = nb; }
+    inline void setMb(MosaicFrame *mb) { mosaicFrame = mb; }
+};
+
+#endif
diff --git a/benchmark/feature_mos/src/mosaic/Delaunay.cpp b/benchmark/feature_mos/src/mosaic/Delaunay.cpp
new file mode 100644
index 00000000..e8015dfd
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/Delaunay.cpp
@@ -0,0 +1,631 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Delaunay.cpp
+// $Id: Delaunay.cpp,v 1.10 2011/06/17 13:35:48 mbansal Exp $
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include "Delaunay.h"
+
+#define QQ 9   // Optimal value as determined by testing
+#define DM 38  // 2^(1+DM/2) element sort capability. DM=38 for >10^6 elements
+#define NYL -1
+#define valid(l) ccw(orig(basel), dest(l), dest(basel))
+
+
+CDelaunay::CDelaunay()
+{
+}
+
+CDelaunay::~CDelaunay()
+{
+}
+
+// Allocate storage, construct triangulation, compute voronoi corners
+int CDelaunay::triangulate(SEdgeVector **edges, int n_sites, int width, int height)
+{
+  EdgePointer cep;
+
+  deleteAllEdges();
+  buildTriangulation(n_sites);
+  cep = consolidateEdges();
+  *edges = ev;
+
+  // Note: construction_list will change ev
+  return constructList(cep, width, height);
+}
+
+// builds delaunay triangulation
+void CDelaunay::buildTriangulation(int size)
+{
+  int i, rows;
+  EdgePointer lefte, righte;
+
+  rows = (int)( 0.5 + sqrt( (double) size / log( (double) size )));
+
+  // Sort the pointers by  x-coordinate of site
+  for ( i=0 ; i < size ; i++ ) {
+    sp[i] = (SitePointer) i;
+  }
+
+  spsortx( sp, 0, size-1 );
+  build( 0, size-1, &lefte, &righte, rows );
+  oneBndryEdge = lefte;
+}
+
+// Recursive Delaunay Triangulation Procedure
+// Contains modifications for axis-switching division.
+void CDelaunay::build(int lo, int hi, EdgePointer *le, EdgePointer *re, int rows)
+{
+  EdgePointer a, b, c, ldo, rdi, ldi, rdo, maxx, minx;
+  int split, lowrows;
+  int low, high;
+  SitePointer s1, s2, s3;
+  low = lo;
+  high = hi;
+
+  if ( low < (high-2) ) {
+    // more than three elements; do recursion
+    minx = sp[low];
+    maxx = sp[high];
+    if (rows == 1) {    // time to switch axis of division
+      spsorty( sp, low, high);
+      rows = 65536;
+    }
+    lowrows = rows/2;
+    split = low - 1 + (int)
+      (0.5 + ((double)(high-low+1) * ((double)lowrows / (double)rows)));
+    build( low, split, &ldo, &ldi, lowrows );
+    build( split+1, high, &rdi, &rdo, (rows-lowrows) );
+    doMerge(&ldo, ldi, rdi, &rdo);
+    while (orig(ldo) != minx) {
+      ldo = rprev(ldo);
+    }
+    while (orig(rdo) != maxx) {
+      rdo = (SitePointer) lprev(rdo);
+    }
+    *le = ldo;
+    *re = rdo;
+  }
+  else if (low >= (high - 1)) { // two or one points
+    a = makeEdge(sp[low], sp[high]);
+    *le = a;
+    *re = (EdgePointer) sym(a);
+  } else { // three points
+    // 3 cases: triangles of 2 orientations, and 3 points on a line
+    a = makeEdge((s1 = sp[low]), (s2 = sp[low+1]));
+    b = makeEdge(s2, (s3 = sp[high]));
+    splice((EdgePointer) sym(a), b);
+    if (ccw(s1, s3, s2)) {
+      c = connectLeft(b, a);
+      *le = (EdgePointer) sym(c);
+      *re = c;
+    } else {
+      *le = a;
+      *re = (EdgePointer) sym(b);
+      if (ccw(s1, s2, s3)) {
+        // not colinear
+        c = connectLeft(b, a);
+      }
+    }
+  }
+}
+
+// Quad-edge manipulation primitives
+EdgePointer CDelaunay::makeEdge(SitePointer origin, SitePointer destination)
+{
+  EdgePointer temp, ans;
+  temp = allocEdge();
+  ans = temp;
+
+  onext(temp) = ans;
+  orig(temp) = origin;
+  onext(++temp) = (EdgePointer) (ans + 3);
+  onext(++temp) = (EdgePointer) (ans + 2);
+  orig(temp) = destination;
+  onext(++temp) = (EdgePointer) (ans + 1);
+
+  return(ans);
+}
+
+void CDelaunay::splice(EdgePointer a, EdgePointer b)
+{
+  EdgePointer alpha, beta, temp;
+  alpha = (EdgePointer) rot(onext(a));
+  beta = (EdgePointer) rot(onext(b));
+  temp = onext(alpha);
+  onext(alpha) = onext(beta);
+  onext(beta) = temp;
+  temp = onext(a);
+  onext(a) = onext(b);
+  onext(b) = temp;
+}
+
+EdgePointer CDelaunay::connectLeft(EdgePointer a, EdgePointer b)
+{
+  EdgePointer ans;
+  ans = makeEdge(dest(a), orig(b));
+  splice(ans, (EdgePointer) lnext(a));
+  splice((EdgePointer) sym(ans), b);
+  return(ans);
+}
+
+EdgePointer CDelaunay::connectRight(EdgePointer a, EdgePointer b)
+{
+  EdgePointer ans;
+  ans = makeEdge(dest(a), orig(b));
+  splice(ans, (EdgePointer) sym(a));
+  splice((EdgePointer) sym(ans), (EdgePointer) oprev(b));
+  return(ans);
+}
+
+// disconnects e from the rest of the structure and destroys it
+void CDelaunay::deleteEdge(EdgePointer e)
+{
+  splice(e, (EdgePointer) oprev(e));
+  splice((EdgePointer) sym(e), (EdgePointer) oprev(sym(e)));
+  freeEdge(e);
+}
+
+//
+// Overall storage allocation
+//
+
+// Quad-edge storage allocation
+CSite *CDelaunay::allocMemory(int n)
+{
+  unsigned int size;
+
+  size = ((sizeof(CSite) + sizeof(SitePointer)) * n +
+          (sizeof(SitePointer) + sizeof(EdgePointer)) * 12
+          ) * n;
+  if (!(sa = (CSite*) malloc(size))) {
+    return NULL;
+  }
+  sp = (SitePointer *) (sa + n);
+  ev = (SEdgeVector *) (org = sp + n);
+  next = (EdgePointer *) (org + 12 * n);
+  ei = (struct EDGE_INFO *) (next + 12 * n);
+  return sa;
+}
+
+void CDelaunay::freeMemory()
+{
+  if (sa) {
+    free(sa);
+    sa = (CSite*)NULL;
+  }
+}
+
+//
+// Edge storage management
+//
+
+void CDelaunay::deleteAllEdges()
+{
+    nextEdge = 0;
+    availEdge = NYL;
+}
+
+EdgePointer CDelaunay::allocEdge()
+{
+  EdgePointer ans;
+
+  if (availEdge == NYL) {
+    ans = nextEdge, nextEdge += 4;
+  } else {
+    ans = availEdge, availEdge = onext(availEdge);
+  }
+  return(ans);
+}
+
+void CDelaunay::freeEdge(EdgePointer e)
+{
+  e ^= e & 3;
+  onext(e) = availEdge;
+  availEdge = e;
+}
+
+EdgePointer CDelaunay::consolidateEdges()
+{
+  EdgePointer e;
+  int i,j;
+
+  while (availEdge != NYL) {
+    nextEdge -= 4; e = availEdge; availEdge = onext(availEdge);
+
+    if (e==nextEdge) {
+      continue; // the one deleted was the last one anyway
+    }
+    if ((oneBndryEdge&~3) == nextEdge) {
+      oneBndryEdge = (EdgePointer) (e | (oneBndryEdge&3));
+    }
+    for (i=0,j=3; i<4; i++,j=rot(j)) {
+      onext(e+i) = onext(nextEdge+i);
+      onext(rot(onext(e+i))) = (EdgePointer) (e+j);
+    }
+  }
+  return nextEdge;
+}
+
+//
+// Sorting Routines
+//
+
+int CDelaunay::xcmpsp(int i, int j)
+{
+  double d = sa[(i>=0)?sp[i]:sp1].X() - sa[(j>=0)?sp[j]:sp1].X();
+  if ( d > 0. ) {
+    return 1;
+  }
+  if ( d < 0. ) {
+    return -1;
+  }
+  d = sa[(i>=0)?sp[i]:sp1].Y() - sa[(j>=0)?sp[j]:sp1].Y();
+  if ( d > 0. ) {
+    return 1;
+  }
+  if ( d < 0. ) {
+    return -1;
+  }
+  return 0;
+}
+
+int CDelaunay::ycmpsp(int i, int j)
+{
+  double d = sa[(i>=0)?sp[i]:sp1].Y() - sa[(j>=0)?sp[j]:sp1].Y();
+  if ( d > 0. ) {
+    return 1;
+  }
+  if ( d < 0. ) {
+    return -1;
+  }
+  d = sa[(i>=0)?sp[i]:sp1].X() - sa[(j>=0)?sp[j]:sp1].X();
+  if ( d > 0. ) {
+    return 1;
+  }
+  if ( d < 0. ) {
+    return -1;
+  }
+  return 0;
+}
+
+int CDelaunay::cmpev(int i, int j)
+{
+  return (ev[i].first - ev[j].first);
+}
+
+void CDelaunay::swapsp(int i, int j)
+{
+  int t;
+  t = (i>=0) ? sp[i] : sp1;
+
+  if (i>=0) {
+    sp[i] = (j>=0)?sp[j]:sp1;
+  } else {
+    sp1 = (j>=0)?sp[j]:sp1;
+  }
+
+  if (j>=0) {
+    sp[j] = (SitePointer) t;
+  } else {
+    sp1 = (SitePointer) t;
+  }
+}
+
+void CDelaunay::swapev(int i, int j)
+{
+  SEdgeVector temp;
+
+  temp = ev[i];
+  ev[i] = ev[j];
+  ev[j] = temp;
+}
+
+void CDelaunay::copysp(int i, int j)
+{
+  if (j>=0) {
+    sp[j] = (i>=0)?sp[i]:sp1;
+  } else {
+    sp1 = (i>=0)?sp[i]:sp1;
+  }
+}
+
+void CDelaunay::copyev(int i, int j)
+{
+  ev[j] = ev[i];
+}
+
+void CDelaunay::spsortx(SitePointer *sp_in, int low, int high)
+{
+  sp = sp_in;
+  rcssort(low,high,-1,&CDelaunay::xcmpsp,&CDelaunay::swapsp,&CDelaunay::copysp);
+}
+
+void CDelaunay::spsorty(SitePointer *sp_in, int low, int high )
+{
+  sp = sp_in;
+  rcssort(low,high,-1,&CDelaunay::ycmpsp,&CDelaunay::swapsp,&CDelaunay::copysp);
+}
+
+void CDelaunay::rcssort(int lowelt, int highelt, int temp,
+                    int (CDelaunay::*comparison)(int,int),
+                    void (CDelaunay::*swap)(int,int),
+                    void (CDelaunay::*copy)(int,int))
+{
+  int m,sij,si,sj,sL,sk;
+  int stack[DM];
+
+  if (highelt-lowelt<=1) {
+    return;
+  }
+  if (highelt-lowelt>QQ) {
+    m = 0;
+    si = lowelt; sj = highelt;
+    for (;;) { // partition [si,sj] about median-of-3.
+      sij = (sj+si) >> 1;
+
+      // Now to sort elements si,sij,sj into order & set temp=their median
+      if ( (this->*comparison)( si,sij ) > 0 ) {
+        (this->*swap)( si,sij );
+      }
+      if ( (this->*comparison)( sij,sj ) > 0 ) {
+        (this->*swap)( sj,sij );
+        if ( (this->*comparison)( si,sij ) > 0 ) {
+          (this->*swap)( si,sij );
+        }
+      }
+      (this->*copy)( sij,temp );
+
+      // Now to partition into elements <=temp, >=temp, and ==temp.
+      sk = si; sL = sj;
+      do {
+        do {
+          sL--;
+        } while( (this->*comparison)( sL,temp ) > 0 );
+        do {
+          sk++;
+        } while( (this->*comparison)( temp,sk ) > 0 );
+        if ( sk < sL ) {
+          (this->*swap)( sL,sk );
+        }
+      } while(sk <= sL);
+
+      // Now to recurse on shorter partition, store longer partition on stack
+      if ( sL-si > sj-sk ) {
+        if ( sL-si < QQ ) {
+          if( m==0 ) {
+            break;  // empty stack && both partitions < QQ so break
+          } else {
+            sj = stack[--m];
+            si = stack[--m];
+          }
+        }
+        else {
+          if ( sj-sk < QQ ) {
+            sj = sL;
+          } else {
+            stack[m++] = si;
+            stack[m++] = sL;
+            si = sk;
+          }
+        }
+      }
+      else {
+        if ( sj-sk < QQ ) {
+          if ( m==0 ) {
+            break; // empty stack && both partitions < QQ so break
+          } else {
+            sj = stack[--m];
+            si = stack[--m];
+          }
+        }
+        else {
+          if ( sL-si < QQ ) {
+            si = sk;
+          } else {
+            stack[m++] = sk;
+            stack[m++] = sj;
+            sj = sL;
+          }
+        }
+      }
+    }
+  }
+
+  // Now for 0 or Data bounded  "straight insertion" sort of [0,nels-1]; if it is
+  // known that el[-1] = -INF, then can omit the "sk>=0" test and save time.
+  for (si=lowelt; si<highelt; si++) {
+    if ( (this->*comparison)( si,si+1 ) > 0 ) {
+      (this->*copy)( si+1,temp );
+      sj = sk = si;
+      sj++;
+      do {
+        (this->*copy)( sk,sj );
+        sj = sk;
+        sk--;
+      } while ( (this->*comparison)( sk,temp ) > 0 && sk>=lowelt );
+      (this->*copy)( temp,sj );
+    }
+  }
+}
+
+//
+// Geometric primitives
+//
+
+// incircle, as in the Guibas-Stolfi paper.
+int CDelaunay::incircle(SitePointer a, SitePointer b, SitePointer c, SitePointer d)
+{
+  double adx, ady, bdx, bdy, cdx, cdy, dx, dy, nad, nbd, ncd;
+  dx = sa[d].X();
+  dy = sa[d].Y();
+  adx = sa[a].X() - dx;
+  ady = sa[a].Y() - dy;
+  bdx = sa[b].X() - dx;
+  bdy = sa[b].Y() - dy;
+  cdx = sa[c].X() - dx;
+  cdy = sa[c].Y() - dy;
+  nad = adx*adx+ady*ady;
+  nbd = bdx*bdx+bdy*bdy;
+  ncd = cdx*cdx+cdy*cdy;
+  return( (0.0 < (nad * (bdx * cdy - bdy * cdx)
+                  + nbd * (cdx * ady - cdy * adx)
+                  + ncd * (adx * bdy - ady * bdx))) ? TRUE : FALSE );
+}
+
+// TRUE iff A, B, C form a counterclockwise oriented triangle
+int CDelaunay::ccw(SitePointer a, SitePointer b, SitePointer c)
+{
+  double ax = sa[a].X();
+  double bx = sa[b].X();
+  double cx = sa[c].X();
+  double ay = sa[a].Y();
+  double by = sa[b].Y();
+  double cy = sa[c].Y();
+
+  double val = (ax - cx)*(by - cy) - (bx - cx)*(ay - cy);
+  if ( val > 0.0) {
+    return true;
+  }
+
+  return false;
+}
+
+//
+// The Merge Procedure.
+//
+
+void CDelaunay::doMerge(EdgePointer *ldo, EdgePointer ldi, EdgePointer rdi, EdgePointer *rdo)
+{
+  int rvalid, lvalid;
+  EdgePointer basel,lcand,rcand,t;
+
+  for (;;) {
+    while (ccw(orig(ldi), dest(ldi), orig(rdi))) {
+        ldi = (EdgePointer) lnext(ldi);
+    }
+    if (ccw(dest(rdi), orig(rdi), orig(ldi))) {
+        rdi = (EdgePointer)rprev(rdi);
+    } else {
+      break;
+    }
+  }
+
+  basel = connectLeft((EdgePointer) sym(rdi), ldi);
+  lcand = rprev(basel);
+  rcand = (EdgePointer) oprev(basel);
+  if (orig(basel) == orig(*rdo)) {
+    *rdo = basel;
+  }
+  if (dest(basel) == orig(*ldo)) {
+    *ldo = (EdgePointer) sym(basel);
+  }
+
+  for (;;) {
+#if 1
+    if (valid(t=onext(lcand))) {
+#else
+    t = (EdgePointer)onext(lcand);
+    if (valid(basel, t)) {
+#endif
+      while (incircle(dest(lcand), dest(t), orig(lcand), orig(basel))) {
+        deleteEdge(lcand);
+        lcand = t;
+        t = onext(lcand);
+      }
+    }
+#if 1
+    if (valid(t=(EdgePointer)oprev(rcand))) {
+#else
+    t = (EdgePointer)oprev(rcand);
+    if (valid(basel, t)) {
+#endif
+      while (incircle(dest(t), dest(rcand), orig(rcand), dest(basel))) {
+        deleteEdge(rcand);
+        rcand = t;
+        t = (EdgePointer)oprev(rcand);
+      }
+    }
+
+#if 1
+    lvalid = valid(lcand);
+    rvalid = valid(rcand);
+#else
+    lvalid = valid(basel, lcand);
+    rvalid = valid(basel, rcand);
+#endif
+    if ((! lvalid) && (! rvalid)) {
+      return;
+    }
+
+    if (!lvalid ||
+        (rvalid && incircle(dest(lcand), orig(lcand), orig(rcand), dest(rcand)))) {
+      basel = connectLeft(rcand, (EdgePointer) sym(basel));
+      rcand = (EdgePointer) lnext(sym(basel));
+    } else {
+      basel = (EdgePointer) sym(connectRight(lcand, basel));
+      lcand = rprev(basel);
+    }
+  }
+}
+
+int CDelaunay::constructList(EdgePointer last, int width, int height)
+{
+  int c, i;
+  EdgePointer curr, src, nex;
+  SEdgeVector *currv, *prevv;
+
+  c = (int) ((curr = (EdgePointer) ((last & ~3))) >> 1);
+
+  for (last -= 4; last >= 0; last -= 4) {
+    src = orig(last);
+    nex = dest(last);
+    orig(--curr) = src;
+    orig(--curr) = nex;
+    orig(--curr) = nex;
+    orig(--curr) = src;
+  }
+  rcssort(0, c - 1, -1, &CDelaunay::cmpev, &CDelaunay::swapev, &CDelaunay::copyev);
+
+  // Throw out any edges that are too far apart
+  currv = prevv = ev;
+  for (i = c; i--; currv++) {
+      if ((int) fabs(sa[currv->first].getVCenter().x - sa[currv->second].getVCenter().x) <= width &&
+          (int) fabs(sa[currv->first].getVCenter().y - sa[currv->second].getVCenter().y) <= height) {
+          *(prevv++) = *currv;
+      } else {
+        c--;
+      }
+  }
+  return c;
+}
+
+// Fill in site neighbor information
+void CDelaunay::linkNeighbors(SEdgeVector *edge, int nedge, int nsite)
+{
+  int i;
+
+  for (i = 0; i < nsite; i++) {
+    sa[i].setNeighbor(edge);
+    sa[i].setNumNeighbors(0);
+    for (; edge->first == i && nedge; edge++, nedge--) {
+      sa[i].incrNumNeighbors();
+    }
+  }
+}
diff --git a/benchmark/feature_mos/src/mosaic/Delaunay.h b/benchmark/feature_mos/src/mosaic/Delaunay.h
new file mode 100644
index 00000000..7a450b5e
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/Delaunay.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Delaunay.h
+// $Id: Delaunay.h,v 1.9 2011/06/17 13:35:48 mbansal Exp $
+
+#ifndef DELAUNAY_H
+#define DELAUNAY_H
+#include <stdio.h>
+#include <math.h>
+#include "CSite.h"
+#include "EdgePointerUtil.h"
+
+#ifndef TRUE
+#define TRUE 1==1
+#define FALSE 0==1
+#endif
+
+//******************************************************************************
+// Reference for Quad-edge data structure:
+//
+// Leonidas Guibas and Jorge Stolfi, "Primitives for the manipulation of general
+//     subdivisions and the computations of Voronoi diagrams",
+//     ACM Transactions on Graphics 4, 74-123 (1985).
+//
+//******************************************************************************
+
+//
+// Common data structures
+//
+
+typedef short SitePointer;
+typedef short TrianglePointer;
+
+class CDelaunay
+{
+private:
+  CSite *sa;
+  EdgePointer oneBndryEdge;
+  EdgePointer *next;
+  SitePointer *org;
+  struct EDGE_INFO *ei;
+  SitePointer *sp;
+  SEdgeVector *ev;
+
+  SitePointer sp1;
+  EdgePointer nextEdge;
+  EdgePointer availEdge;
+
+private:
+  void build(int lo, int hi, EdgePointer *le, EdgePointer *re, int rows);
+  void buildTriangulation(int size);
+
+  EdgePointer allocEdge();
+  void freeEdge(EdgePointer e);
+
+  EdgePointer makeEdge(SitePointer origin, SitePointer destination);
+  void deleteEdge(EdgePointer e);
+
+  void splice(EdgePointer, EdgePointer);
+  EdgePointer consolidateEdges();
+  void deleteAllEdges();
+
+  void spsortx(SitePointer *, int, int);
+  void spsorty(SitePointer *, int, int);
+
+  int cmpev(int i, int j);
+  int xcmpsp(int i, int j);
+  int ycmpsp(int i, int j);
+
+  void swapsp(int i, int j);
+  void swapev(int i, int j);
+
+  void copysp(int i, int j);
+  void copyev(int i, int j);
+
+  void rcssort(int lowelt, int highelt, int temp,
+                 int (CDelaunay::*comparison)(int,int),
+                 void (CDelaunay::*swap)(int,int),
+                 void (CDelaunay::*copy)(int,int));
+
+  void doMerge(EdgePointer *ldo, EdgePointer ldi, EdgePointer rdi, EdgePointer *rdo);
+  EdgePointer connectLeft(EdgePointer a, EdgePointer b);
+  EdgePointer connectRight(EdgePointer a, EdgePointer b);
+  int ccw(SitePointer a, SitePointer b, SitePointer c);
+  int incircle(SitePointer a, SitePointer b, SitePointer c, SitePointer d);
+  int constructList(EdgePointer e, int width, int height);
+
+public:
+  CDelaunay();
+  ~CDelaunay();
+
+  CSite *allocMemory(int nsite);
+  void freeMemory();
+  int triangulate(SEdgeVector **edge, int nsite, int width, int height);
+  void linkNeighbors(SEdgeVector *edge, int nedge, int nsite);
+};
+
+#define onext(a) next[a]
+#define oprev(a) rot(onext(rot(a)))
+#define lnext(a) rot(onext(rotinv(a)))
+#define lprev(a) sym(onext(a))
+#define rnext(a) rotinv(onext(rot(a)))
+#define rprev(a) onext(sym(a))
+#define dnext(a) sym(onext(sym(a)))
+#define dprev(a) rotinv(onext(rotinv(a)))
+
+#define orig(a) org[a]
+#define dest(a) orig(sym(a))
+#define left(a) orig(rotinv(a))
+#define right(a) orig(rot(a))
+
+#endif
diff --git a/benchmark/feature_mos/src/mosaic/EdgePointerUtil.h b/benchmark/feature_mos/src/mosaic/EdgePointerUtil.h
new file mode 100644
index 00000000..fad05d7e
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/EdgePointerUtil.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _EDGEPOINTERUTIL_H_
+#define _EDGEPOINTERUTIL_H_
+
+typedef short EdgePointer;
+
+inline EdgePointer sym(EdgePointer a)
+{
+  return a ^ 2;
+}
+
+inline EdgePointer rot(EdgePointer a)
+{
+  return (((a) + 1) & 3) | ((a) & ~3);
+}
+
+inline EdgePointer rotinv(EdgePointer a)
+{
+  return (((a) + 3) & 3) | ((a) & ~3);
+}
+
+#endif //_EDGEPOINTERUTIL_H_
diff --git a/benchmark/feature_mos/src/mosaic/Geometry.h b/benchmark/feature_mos/src/mosaic/Geometry.h
new file mode 100644
index 00000000..0efa0f4a
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/Geometry.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/////////////////////////////
+// Geometry.h
+// $Id: Geometry.h,v 1.2 2011/06/17 13:35:48 mbansal Exp $
+
+#pragma once
+#include "MosaicTypes.h"
+
+///////////////////////////////////////////////////////////////
+///////////////// BEG GLOBAL ROUTINES /////////////////////////
+///////////////////////////////////////////////////////////////
+
+
+inline double hypotSq(double a, double b)
+{
+    return ((a)*(a)+(b)*(b));
+}
+
+inline void ClipRect(double x, double y, BlendRect &brect)
+{
+    if (y < brect.bot) brect.bot = y;
+    if (y > brect.top) brect.top = y;
+    if (x < brect.lft) brect.lft = x;
+    if (x > brect.rgt) brect.rgt = x;
+}
+
+inline void ClipRect(BlendRect rrect, BlendRect &brect)
+{
+    if (rrect.bot < brect.bot) brect.bot = rrect.bot;
+    if (rrect.top > brect.top) brect.top = rrect.top;
+    if (rrect.lft < brect.lft) brect.lft = rrect.lft;
+    if (rrect.rgt > brect.rgt) brect.rgt = rrect.rgt;
+}
+
+// Clip x to be within [-border,width+border-1]
+inline void clipToSegment(int &x, int width, int border)
+{
+    if(x < -border)
+        x = -border;
+    else if(x >= width+border)
+        x = width + border - 1;
+}
+
+// Return true if x within [-border,width+border-1]
+inline bool inSegment(int x, int width, int border)
+{
+    return (x >= -border && x < width + border - 1);
+}
+
+inline void FindTriangleCentroid(double x0, double y0, double x1, double y1,
+                                    double x2, double y2,
+                                    double &mass, double &centX, double &centY)
+{
+    // Calculate the centroid of the triangle
+    centX = (x0 + x1 + x2) / 3.0;
+    centY = (y0 + y1 + y2) / 3.0;
+
+    // Calculate 2*Area for the triangle
+    if (y0 == y2)
+    {
+        if (x0 == x1)
+        {
+            mass = fabs((y1 - y0) * (x2 - x0)); // Special case 1a
+        }
+        else
+        {
+            mass = fabs((y1 - y0) * (x1 - x0)); // Special case 1b
+        }
+    }
+    else if (x0 == x2)
+    {
+        if (x0 == x1)
+        {
+            mass = fabs((x2 - x0) * (y2 - y0)); // Special case 2a
+        }
+        else
+        {
+            mass = fabs((x1 - x0) * (y2 - y0)); // Special case 2a
+        }
+    }
+    else if (x1 == x2)
+    {
+        mass = fabs((x1 - x0) * (y2 - y0)); // Special case 3
+    }
+    else
+    {
+        // Calculate line equation from x0,y0 to x2,y2
+        double dx = x2 - x0;
+        double dy = y2 - y0;
+        // Calculate the length of the side
+        double len1 = sqrt(dx * dx + dy * dy);
+        double m1 = dy / dx;
+        double b1 = y0 - m1 * x0;
+        // Calculate the line that goes through x1,y1 and is perpendicular to
+        // the other line
+        double m2 = 1.0 / m1;
+        double b2 = y1 - m2 * x1;
+        // Calculate the intersection of the two lines
+        if (fabs( m1 - m2 ) > 1.e-6)
+        {
+            double x = (b2 - b1) / (m1 - m2);
+            // the mass is the base * height
+            dx = x1 - x;
+            dy = y1 - m1 * x + b1;
+            mass = len1 * sqrt(dx * dx + dy * dy);
+        }
+        else
+        {
+            mass = fabs( (y1 - y0) * (x2 - x0) );
+        }
+    }
+}
+
+inline void FindQuadCentroid(double x0, double y0, double x1, double y1, double x2, double y2, double x3, double y3,
+                                     double &centX, double &centY)
+
+{
+    // To find the centroid:
+    // 1) Divide the quadrilateral into two triangles by scribing a diagonal
+    // 2) Calculate the centroid of each triangle (the intersection of the angle bisections).
+    // 3) Find the centroid of the quad by weighting each triangle centroids by their area.
+
+    // Calculate the corner points
+    double z;
+
+    // The quad is split from x0,y0 to x2,y2
+    double mass1, mass2, cent1x, cent2x, cent1y, cent2y;
+    FindTriangleCentroid(x0, y0, x1, y1, x2, y2, mass1, cent1x, cent1y);
+    FindTriangleCentroid(x0, y0, x3, y3, x2, y2, mass2, cent2x, cent2y);
+
+    // determine position of quad centroid
+    z = mass2 / (mass1 + mass2);
+    centX = cent1x + (cent2x - cent1x) * z;
+    centY = cent1y + (cent2y - cent1y) * z;
+}
+
+///////////////////////////////////////////////////////////////
+////////////////// END GLOBAL ROUTINES ////////////////////////
+///////////////////////////////////////////////////////////////
+
+
diff --git a/benchmark/feature_mos/src/mosaic/ImageUtils.cpp b/benchmark/feature_mos/src/mosaic/ImageUtils.cpp
new file mode 100644
index 00000000..28629d34
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/ImageUtils.cpp
@@ -0,0 +1,406 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+///////////////////////////////////////////////////
+// ImageUtils.cpp
+// $Id: ImageUtils.cpp,v 1.12 2011/06/17 13:35:48 mbansal Exp $
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+
+#include "ImageUtils.h"
+
+void ImageUtils::rgba2yvu(ImageType out, ImageType in, int width, int height)
+{
+  int r,g,b, a;
+  ImageType yimg = out;
+  ImageType vimg = yimg + width*height;
+  ImageType uimg = vimg + width*height;
+  ImageType image = in;
+
+  for (int ii = 0; ii < height; ii++) {
+    for (int ij = 0; ij < width; ij++) {
+      r = (*image++);
+      g = (*image++);
+      b = (*image++);
+      a = (*image++);
+
+      if (r < 0) r = 0;
+      if (r > 255) r = 255;
+      if (g < 0) g = 0;
+      if (g > 255) g = 255;
+      if (b < 0) b = 0;
+      if (b > 255) b = 255;
+
+      int val = (int) (REDY * r + GREENY * g + BLUEY * b) / 1000 + 16;
+      if (val < 0) val = 0;
+      if (val > 255) val = 255;
+      *(yimg) = val;
+
+      val = (int) (REDV * r - GREENV * g - BLUEV * b) / 1000 + 128;
+      if (val < 0) val = 0;
+      if (val > 255) val = 255;
+      *(vimg) = val;
+
+      val = (int) (-REDU * r - GREENU * g + BLUEU * b) / 1000 + 128;
+      if (val < 0) val = 0;
+      if (val > 255) val = 255;
+      *(uimg) = val;
+
+      yimg++;
+      uimg++;
+      vimg++;
+    }
+  }
+}
+
+
+void ImageUtils::rgb2yvu(ImageType out, ImageType in, int width, int height)
+{
+  int r,g,b;
+  ImageType yimg = out;
+  ImageType vimg = yimg + width*height;
+  ImageType uimg = vimg + width*height;
+  ImageType image = in;
+
+  for (int ii = 0; ii < height; ii++) {
+    for (int ij = 0; ij < width; ij++) {
+      r = (*image++);
+      g = (*image++);
+      b = (*image++);
+
+      if (r < 0) r = 0;
+      if (r > 255) r = 255;
+      if (g < 0) g = 0;
+      if (g > 255) g = 255;
+      if (b < 0) b = 0;
+      if (b > 255) b = 255;
+
+      int val = (int) (REDY * r + GREENY * g + BLUEY * b) / 1000 + 16;
+      if (val < 0) val = 0;
+      if (val > 255) val = 255;
+      *(yimg) = val;
+
+      val = (int) (REDV * r - GREENV * g - BLUEV * b) / 1000 + 128;
+      if (val < 0) val = 0;
+      if (val > 255) val = 255;
+      *(vimg) = val;
+
+      val = (int) (-REDU * r - GREENU * g + BLUEU * b) / 1000 + 128;
+      if (val < 0) val = 0;
+      if (val > 255) val = 255;
+      *(uimg) = val;
+
+      yimg++;
+      uimg++;
+      vimg++;
+    }
+  }
+}
+
+ImageType ImageUtils::rgb2gray(ImageType in, int width, int height)
+{
+  int r,g,b;
+  ImageType image = in;
+  ImageType out = ImageUtils::allocateImage(width, height, 1);
+  ImageType outCopy = out;
+
+  for (int ii = 0; ii < height; ii++) {
+    for (int ij = 0; ij < width; ij++) {
+      r = (*image++);
+      g = (*image++);
+      b = (*image++);
+
+      if (r < 0) r = 0;
+      if (r > 255) r = 255;
+      if (g < 0) g = 0;
+      if (g > 255) g = 255;
+      if (b < 0) b = 0;
+      if (b > 255) b = 255;
+
+      (*outCopy) = ( 0.3*r + 0.59*g + 0.11*b);
+
+      outCopy++;
+    }
+  }
+
+  return out;
+}
+
+ImageType ImageUtils::rgb2gray(ImageType out, ImageType in, int width, int height)
+{
+  int r,g,b;
+  ImageType image = in;
+  ImageType outCopy = out;
+
+  for (int ii = 0; ii < height; ii++) {
+    for (int ij = 0; ij < width; ij++) {
+      r = (*image++);
+      g = (*image++);
+      b = (*image++);
+
+      if (r < 0) r = 0;
+      if (r > 255) r = 255;
+      if (g < 0) g = 0;
+      if (g > 255) g = 255;
+      if (b < 0) b = 0;
+      if (b > 255) b = 255;
+
+      (*outCopy) = ( 0.3*r + 0.59*g + 0.11*b);
+
+      outCopy++;
+    }
+  }
+
+  return out;
+
+}
+
+ImageType *ImageUtils::imageTypeToRowPointers(ImageType in, int width, int height)
+{
+  int i;
+  int m_h = height;
+  int m_w = width;
+
+  ImageType *m_rows = new ImageType[m_h];
+
+  for (i=0;i<m_h;i++) {
+    m_rows[i] = &in[(m_w)*i];
+  }
+  return m_rows;
+}
+
+void ImageUtils::yvu2rgb(ImageType out, ImageType in, int width, int height)
+{
+  int y,v,u, r, g, b;
+  unsigned char *yimg = in;
+  unsigned char *vimg = yimg + width*height;
+  unsigned char *uimg = vimg + width*height;
+  unsigned char *image = out;
+
+  for (int i = 0; i < height; i++) {
+    for (int j = 0; j < width; j++) {
+
+      y = (*yimg);
+      v = (*vimg);
+      u = (*uimg);
+
+      if (y < 0) y = 0;
+      if (y > 255) y = 255;
+      if (u < 0) u = 0;
+      if (u > 255) u = 255;
+      if (v < 0) v = 0;
+      if (v > 255) v = 255;
+
+      b = (int) ( 1.164*(y - 16) + 2.018*(u-128));
+      g = (int) ( 1.164*(y - 16) - 0.813*(v-128) - 0.391*(u-128));
+      r = (int) ( 1.164*(y - 16) + 1.596*(v-128));
+
+      if (r < 0) r = 0;
+      if (r > 255) r = 255;
+      if (g < 0) g = 0;
+      if (g > 255) g = 255;
+      if (b < 0) b = 0;
+      if (b > 255) b = 255;
+
+      *(image++) = r;
+      *(image++) = g;
+      *(image++) = b;
+
+      yimg++;
+      uimg++;
+      vimg++;
+
+    }
+  }
+}
+
+void ImageUtils::yvu2bgr(ImageType out, ImageType in, int width, int height)
+{
+  int y,v,u, r, g, b;
+  unsigned char *yimg = in;
+  unsigned char *vimg = yimg + width*height;
+  unsigned char *uimg = vimg + width*height;
+  unsigned char *image = out;
+
+  for (int i = 0; i < height; i++) {
+    for (int j = 0; j < width; j++) {
+
+      y = (*yimg);
+      v = (*vimg);
+      u = (*uimg);
+
+      if (y < 0) y = 0;
+      if (y > 255) y = 255;
+      if (u < 0) u = 0;
+      if (u > 255) u = 255;
+      if (v < 0) v = 0;
+      if (v > 255) v = 255;
+
+      b = (int) ( 1.164*(y - 16) + 2.018*(u-128));
+      g = (int) ( 1.164*(y - 16) - 0.813*(v-128) - 0.391*(u-128));
+      r = (int) ( 1.164*(y - 16) + 1.596*(v-128));
+
+      if (r < 0) r = 0;
+      if (r > 255) r = 255;
+      if (g < 0) g = 0;
+      if (g > 255) g = 255;
+      if (b < 0) b = 0;
+      if (b > 255) b = 255;
+
+      *(image++) = b;
+      *(image++) = g;
+      *(image++) = r;
+
+      yimg++;
+      uimg++;
+      vimg++;
+
+    }
+  }
+}
+
+
+ImageType ImageUtils::readBinaryPPM(const char *filename, int &width, int &height)
+{
+
+  FILE *imgin = NULL;
+  int mval=0, format=0, eret;
+  ImageType ret = NULL;//IMAGE_TYPE_NOIMAGE;
+
+  imgin = fopen(filename, "r");
+  if (imgin == NULL) {
+    fprintf(stderr, "Error: Filename %s not found\n", filename);
+    return ret;
+  }
+
+  eret = fscanf(imgin, "P%d\n", &format);
+  if (format != 6) {
+    fprintf(stderr, "Error: readBinaryPPM only supports PPM format (P6)\n");
+    return ret;
+  }
+
+  eret = fscanf(imgin, "%d %d\n", &width, &height);
+  eret = fscanf(imgin, "%d\n", &mval);
+  ret  = allocateImage(width, height, IMAGE_TYPE_NUM_CHANNELS);
+  eret = fread(ret, sizeof(ImageTypeBase), IMAGE_TYPE_NUM_CHANNELS*width*height, imgin);
+
+  fclose(imgin);
+
+  return ret;
+
+}
+
+void ImageUtils::writeBinaryPPM(ImageType image, const char *filename, int width, int height, int numChannels)
+{
+  FILE *imgout = fopen(filename, "w");
+
+  if (imgout == NULL) {
+    fprintf(stderr, "Error: Filename %s could not be opened for writing\n", filename);
+    return;
+  }
+
+  if (numChannels == 3) {
+    fprintf(imgout, "P6\n%d %d\n255\n", width, height);
+  } else if (numChannels == 1) {
+    fprintf(imgout, "P5\n%d %d\n255\n", width, height);
+  } else {
+    fprintf(stderr, "Error: writeBinaryPPM: Unsupported number of channels\n");
+  }
+  fwrite(image, sizeof(ImageTypeBase), numChannels*width*height, imgout);
+
+  fclose(imgout);
+
+}
+
+ImageType ImageUtils::allocateImage(int width, int height, int numChannels, short int border)
+{
+  int overallocation = 256;
+ return (ImageType) calloc(width*height*numChannels+overallocation, sizeof(ImageTypeBase));
+}
+
+
+void ImageUtils::freeImage(ImageType image)
+{
+  free(image);
+}
+
+
+// allocation of one color image used for tmp buffers, etc.
+// format of contiguous memory block:
+//    YUVInfo struct (type + BimageInfo for Y,U, and V),
+//    Y row pointers
+//    U row pointers
+//    V row pointers
+//    Y image pixels
+//    U image pixels
+//    V image pixels
+YUVinfo *YUVinfo::allocateImage(unsigned short width, unsigned short height)
+{
+    unsigned short heightUV, widthUV;
+
+    widthUV = width;
+    heightUV = height;
+
+    // figure out how much space to hold all pixels...
+    int size = ((width * height * 3) + 8);
+    unsigned char *position = 0;
+
+    // VC 8 does not like calling free on yuv->Y.ptr since it is in
+    // the middle of a block.  So rearrange the memory layout so after
+    // calling mapYUVInforToImage yuv->Y.ptr points to the begginning
+    // of the calloc'ed block.
+    YUVinfo *yuv = (YUVinfo *) calloc(sizeof(YUVinfo), 1);
+    if (yuv) {
+        yuv->Y.width  = yuv->Y.pitch = width;
+        yuv->Y.height = height;
+        yuv->Y.border = yuv->U.border = yuv->V.border = (unsigned short) 0;
+        yuv->U.width  = yuv->U.pitch = yuv->V.width = yuv->V.pitch = widthUV;
+        yuv->U.height = yuv->V.height = heightUV;
+
+        unsigned char* block = (unsigned char*) calloc(
+                sizeof(unsigned char *) * (height + heightUV + heightUV) +
+                sizeof(unsigned char) * size, 1);
+
+        position = block;
+        unsigned char **y = (unsigned char **) (block + size);
+
+        /* Initialize and assign row pointers */
+        yuv->Y.ptr = y;
+        yuv->V.ptr = &y[height];
+        yuv->U.ptr = &y[height + heightUV];
+    }
+    if (size)
+        mapYUVInfoToImage(yuv, position);
+    return yuv;
+}
+
+// wrap YUVInfo row pointers around 3 contiguous image (color component) planes.
+// position = starting pixel in image.
+void YUVinfo::mapYUVInfoToImage(YUVinfo *img, unsigned char *position)
+{
+    int i;
+    for (i = 0; i < img->Y.height; i++, position += img->Y.width)
+        img->Y.ptr[i] = position;
+    for (i = 0; i < img->V.height; i++, position += img->V.width)
+        img->V.ptr[i] = position;
+    for (i = 0; i < img->U.height; i++, position += img->U.width)
+        img->U.ptr[i] = position;
+}
+
+
diff --git a/benchmark/feature_mos/src/mosaic/ImageUtils.h b/benchmark/feature_mos/src/mosaic/ImageUtils.h
new file mode 100644
index 00000000..92965ca8
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/ImageUtils.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+///////////////////////////////////////////////////
+// ImageUtils.h
+// $Id: ImageUtils.h,v 1.9 2011/05/16 15:33:06 mbansal Exp $
+
+#ifndef IMAGE_UTILS_H
+#define IMAGE_UTILS_H
+
+#include <stdlib.h>
+
+/**
+ *  Definition of basic image types
+ */
+typedef unsigned char ImageTypeBase;
+typedef ImageTypeBase *ImageType;
+
+typedef short ImageTypeShortBase;
+typedef ImageTypeShortBase *ImageTypeShort;
+
+typedef float ImageTypeFloatBase;
+typedef ImageTypeFloatBase *ImageTypeFloat;
+
+
+class ImageUtils {
+public:
+
+  /**
+   *  Default number of channels in image.
+   */
+  static const int IMAGE_TYPE_NUM_CHANNELS = 3;
+
+  /**
+   *  Definition of an empty image.
+   */
+  static const int IMAGE_TYPE_NOIMAGE = 0;
+
+  /**
+   *  Convert image from BGR (interlaced) to YVU (non-interlaced)
+   *
+   *  Arguments:
+   *    out: Resulting image (note must be preallocated before
+   *    call)
+   *    in: Input image
+   *    width: Width of input image
+   *    height: Height of input image
+   */
+  static void rgb2yvu(ImageType out, ImageType in, int width, int height);
+
+  static void rgba2yvu(ImageType out, ImageType in, int width, int height);
+
+  /**
+   *  Convert image from YVU (non-interlaced) to BGR (interlaced)
+   *
+   *  Arguments:
+   *    out: Resulting image (note must be preallocated before
+   *    call)
+   *    in: Input image
+   *    width: Width of input image
+   *    height: Height of input image
+   */
+  static void yvu2rgb(ImageType out, ImageType in, int width, int height);
+  static void yvu2bgr(ImageType out, ImageType in, int width, int height);
+
+  /**
+   *  Convert image from BGR to grayscale
+   *
+   *  Arguments:
+   *    in: Input image
+   *    width: Width of input image
+   *    height: Height of input image
+   *
+   *  Return:
+   *    Pointer to resulting image (allocation is done here, free
+   *    must be done by caller)
+   */
+  static ImageType rgb2gray(ImageType in, int width, int height);
+  static ImageType rgb2gray(ImageType out, ImageType in, int width, int height);
+
+  /**
+   *  Read a binary PPM image
+   */
+  static ImageType readBinaryPPM(const char *filename, int &width, int &height);
+
+  /**
+   *  Write a binary PPM image
+   */
+  static void writeBinaryPPM(ImageType image, const char *filename, int width, int height, int numChannels = IMAGE_TYPE_NUM_CHANNELS);
+
+  /**
+   *  Allocate space for a standard image.
+   */
+  static ImageType allocateImage(int width, int height, int numChannels, short int border = 0);
+
+  /**
+   *  Free memory of image
+   */
+  static void freeImage(ImageType image);
+
+  static ImageType *imageTypeToRowPointers(ImageType out, int width, int height);
+  /**
+   *  Get time.
+   */
+  static double getTime();
+
+protected:
+
+  /**
+  *  Constants for YVU/RGB conversion
+  */
+  static const int REDY = 257;
+  static const int REDV = 439;
+  static const int REDU = 148;
+  static const int GREENY = 504;
+  static const int GREENV = 368;
+  static const int GREENU = 291;
+  static const int BLUEY = 98;
+  static const int BLUEV = 71;
+  static const int BLUEU = 439;
+
+};
+
+/**
+ * Structure containing an image and other bookkeeping items.
+ * Used in YUVinfo to store separate YVU image planes.
+ */
+typedef struct {
+  ImageType *ptr;
+  unsigned short width;
+  unsigned short height;
+  unsigned short border;
+  unsigned short pitch;
+} BimageInfo;
+
+/**
+ * A YUV image container,
+ */
+class YUVinfo {
+public:
+  static YUVinfo *allocateImage(unsigned short width, unsigned short height);
+  static void mapYUVInfoToImage(YUVinfo *img, unsigned char *position);
+
+  /**
+  * Y Plane
+  */
+  BimageInfo Y;
+
+  /**
+  *  V (1st color) plane
+  */
+  BimageInfo V;
+
+  /**
+  *  U (1st color) plane
+  */
+  BimageInfo U;
+};
+
+#endif
diff --git a/benchmark/feature_mos/src/mosaic/Interp.h b/benchmark/feature_mos/src/mosaic/Interp.h
new file mode 100644
index 00000000..19c4a40c
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/Interp.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+///////////////////////////////////////////////////////////
+// Interp.h
+// $Id: Interp.h,v 1.2 2011/06/17 13:35:48 mbansal Exp $
+
+#ifndef INTERP_H
+#define INTERP_H
+
+#include "Pyramid.h"
+
+#define CTAPS 40
+static double ciTable[81] = {
+        1, 0.998461, 0.993938, 0.98657, 0.9765,
+        0.963867, 0.948813, 0.931477, 0.912, 0.890523,
+        0.867188, 0.842133, 0.8155, 0.78743, 0.758062,
+        0.727539, 0.696, 0.663586, 0.630437, 0.596695,
+        0.5625, 0.527992, 0.493312, 0.458602, 0.424,
+        0.389648, 0.355687, 0.322258, 0.2895, 0.257555,
+        0.226562, 0.196664, 0.168, 0.140711, 0.114937,
+        0.0908203, 0.0685, 0.0481172, 0.0298125, 0.0137266,
+        0, -0.0118828, -0.0225625, -0.0320859, -0.0405,
+        -0.0478516, -0.0541875, -0.0595547, -0.064, -0.0675703,
+        -0.0703125, -0.0722734, -0.0735, -0.0740391, -0.0739375,
+        -0.0732422, -0.072, -0.0702578, -0.0680625, -0.0654609,
+        -0.0625, -0.0592266, -0.0556875, -0.0519297, -0.048,
+        -0.0439453, -0.0398125, -0.0356484, -0.0315, -0.0274141,
+        -0.0234375, -0.0196172, -0.016, -0.0126328, -0.0095625,
+        -0.00683594, -0.0045, -0.00260156, -0.0011875, -0.000304687, 0.0
+};
+
+inline double ciCalc(PyramidShort *img, int xi, int yi, double xfrac, double yfrac)
+{
+  double tmpf[4];
+
+  // Interpolate using 16 points
+  ImageTypeShortBase *in = img->ptr[yi-1] + xi - 1;
+  int off = (int)(xfrac * CTAPS);
+
+  tmpf[0] = in[0] * ciTable[off + 40];
+  tmpf[0] += in[1] * ciTable[off];
+  tmpf[0] += in[2] * ciTable[40 - off];
+  tmpf[0] += in[3] * ciTable[80 - off];
+  in += img->pitch;
+  tmpf[1] = in[0] * ciTable[off + 40];
+  tmpf[1] += in[1] * ciTable[off];
+  tmpf[1] += in[2] * ciTable[40 - off];
+  tmpf[1] += in[3] * ciTable[80 - off];
+  in += img->pitch;
+  tmpf[2] = in[0] * ciTable[off + 40];
+  tmpf[2] += in[1] * ciTable[off];
+  tmpf[2] += in[2] * ciTable[40 - off];
+  tmpf[2] += in[3] * ciTable[80 - off];
+  in += img->pitch;
+  tmpf[3] = in[0] * ciTable[off + 40];
+  tmpf[3] += in[1] * ciTable[off];
+  tmpf[3] += in[2] * ciTable[40 - off];
+  tmpf[3] += in[3] * ciTable[80 - off];
+
+  // this is the final interpolation
+  off = (int)(yfrac * CTAPS);
+  return (ciTable[off + 40] * tmpf[0] + ciTable[off] * tmpf[1] +
+          ciTable[40 - off] * tmpf[2] + ciTable[80 - off] * tmpf[3]);
+}
+
+#endif
diff --git a/benchmark/feature_mos/src/mosaic/Log.h b/benchmark/feature_mos/src/mosaic/Log.h
new file mode 100644
index 00000000..2adfeda9
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/Log.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LOG_H_
+#define LOG_H_
+
+#include <android/log.h>
+#define LOGV(...) __android_log_print(ANDROID_LOG_SILENT, LOG_TAG, __VA_ARGS__)
+#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
+#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
+
+#endif
diff --git a/benchmark/feature_mos/src/mosaic/MatrixUtils.h b/benchmark/feature_mos/src/mosaic/MatrixUtils.h
new file mode 100644
index 00000000..a0b84d81
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/MatrixUtils.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+///////////////////////////////////////////////////
+// Matrixutils.h
+// $Id: MatrixUtils.h,v 1.5 2011/05/16 15:33:06 mbansal Exp $
+
+
+#ifndef MATRIX_UTILS_H
+#define MATRIX_UTILS_H
+
+/* Simple class for 3x3 matrix, mainly used to convert from 9x1
+ * to 3x3
+ */
+class Matrix33 {
+public:
+
+  /**
+   *  Empty constructor
+   */
+  Matrix33() {
+    initialize();
+  }
+
+  /**
+   *  Constructor with identity initialization
+   *  Arguments:
+   *     identity: Specifies wether to initialize matrix to
+   *     identity or zeros
+   */
+  Matrix33(bool identity) {
+    initialize(identity);
+  }
+
+  /**
+   *  Initialize to identity matrix
+   */
+  void initialize(bool identity = false) {
+    mat[0][1] = mat[0][2] = mat[1][0] = mat[1][2] = mat[2][0] = mat[2][1] = 0.0;
+    if (identity) {
+      mat[0][0] = mat[1][1] = mat[2][2] = 1.0;
+    } else {
+      mat[0][0] = mat[1][1] = mat[2][2] = 0.0;
+    }
+  }
+
+  /**
+   *  Conver ta 9x1 matrix to a 3x3 matrix
+   */
+  static void convert9to33(double out[3][3], double in[9]) {
+    out[0][0] = in[0];
+    out[0][1] = in[1];
+    out[0][2] = in[2];
+
+    out[1][0] = in[3];
+    out[1][1] = in[4];
+    out[1][2] = in[5];
+
+    out[2][0] = in[6];
+    out[2][1] = in[7];
+    out[2][2] = in[8];
+
+  }
+
+  /* Matrix data */
+  double mat[3][3];
+
+};
+
+/* Simple class for 9x1 matrix, mainly used to convert from 3x3
+ * to 9x1
+ */
+class Matrix9 {
+public:
+
+  /**
+   *  Empty constructor
+   */
+  Matrix9() {
+    initialize();
+  }
+
+  /**
+   *  Constructor with identity initialization
+   *  Arguments:
+   *     identity: Specifies wether to initialize matrix to
+   *     identity or zeros
+   */
+  Matrix9(bool identity) {
+    initialize(identity);
+  }
+
+  /**
+   *  Initialize to identity matrix
+   */
+  void initialize(bool identity = false) {
+    mat[1] = mat[2] = mat[3] = mat[5] = mat[6] = mat[7] = 0.0;
+    if (identity) {
+      mat[0] = mat[4] = mat[8] = 1.0;
+    } else {
+      mat[0] = mat[4] = mat[8] = 0.0;
+    }
+  }
+
+  /**
+   *  Conver ta 3x3 matrix to a 9x1 matrix
+   */
+  static void convert33to9(double out[9], double in[3][3]) {
+    out[0] = in[0][0];
+    out[1] = in[0][1];
+    out[2] = in[0][2];
+
+    out[3] = in[1][0];
+    out[4] = in[1][1];
+    out[5] = in[1][2];
+
+    out[6] = in[2][0];
+    out[7] = in[2][1];
+    out[8] = in[2][2];
+
+  }
+
+  /* Matrix data */
+  double mat[9];
+
+};
+
+#endif
diff --git a/benchmark/feature_mos/src/mosaic/Mosaic.cpp b/benchmark/feature_mos/src/mosaic/Mosaic.cpp
new file mode 100644
index 00000000..4abc6f68
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/Mosaic.cpp
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+///////////////////////////////////////////////////
+// Mosaic.pp
+// S.O. # :
+// Author(s): zkira
+// $Id: Mosaic.cpp,v 1.20 2011/06/24 04:22:14 mbansal Exp $
+
+#include <stdio.h>
+#include <string.h>
+
+#include "Mosaic.h"
+#include "trsMatrix.h"
+
+Mosaic::Mosaic()
+{
+    initialized = false;
+    imageMosaicYVU = NULL;
+    frames_size = 0;
+    max_frames = 200;
+}
+
+Mosaic::~Mosaic()
+{
+    for (int i = 0; i < frames_size; i++)
+    {
+        if (frames[i])
+            delete frames[i];
+    }
+    delete frames;
+    delete rframes;
+
+    for (int j = 0; j < owned_size; j++)
+        delete owned_frames[j];
+    delete owned_frames;
+
+    if (aligner != NULL)
+        delete aligner;
+    if (blender != NULL)
+        delete blender;
+}
+
+int Mosaic::initialize(int blendingType, int stripType, int width, int height, int nframes, bool quarter_res, float thresh_still)
+{
+    this->blendingType = blendingType;
+
+    // TODO: Review this logic if enabling FULL or PAN mode
+    if (blendingType == Blend::BLEND_TYPE_FULL ||
+            blendingType == Blend::BLEND_TYPE_PAN)
+    {
+        stripType = Blend::STRIP_TYPE_THIN;
+    }
+
+    this->stripType = stripType;
+    this->width = width;
+    this->height = height;
+
+
+    mosaicWidth = mosaicHeight = 0;
+    imageMosaicYVU = NULL;
+
+    frames = new MosaicFrame *[max_frames];
+    rframes = new MosaicFrame *[max_frames];
+
+    if(nframes>-1)
+    {
+        for(int i=0; i<nframes; i++)
+        {
+            frames[i] = new MosaicFrame(this->width,this->height,false); // Do no allocate memory for YUV data
+        }
+    }
+    else
+    {
+        for(int i=0; i<max_frames; i++)
+        {
+            frames[i] = NULL;
+        }
+    }
+
+    owned_frames = new ImageType[max_frames];
+    owned_size = 0;
+
+    aligner = new Align();
+    aligner->initialize(width, height,quarter_res,thresh_still);
+
+    if (blendingType == Blend::BLEND_TYPE_FULL ||
+            blendingType == Blend::BLEND_TYPE_PAN ||
+            blendingType == Blend::BLEND_TYPE_CYLPAN ||
+            blendingType == Blend::BLEND_TYPE_HORZ) {
+        blender = new Blend();
+        blender->initialize(blendingType, stripType, width, height);
+    } else {
+        blender = NULL;
+        return MOSAIC_RET_ERROR;
+    }
+
+    initialized = true;
+
+    return MOSAIC_RET_OK;
+}
+
+int Mosaic::addFrameRGB(ImageType imageRGB)
+{
+    ImageType imageYVU;
+    // Convert to YVU24 which is used by blending
+    imageYVU = ImageUtils::allocateImage(this->width, this->height, ImageUtils::IMAGE_TYPE_NUM_CHANNELS);
+    ImageUtils::rgb2yvu(imageYVU, imageRGB, width, height);
+
+    int existing_frames_size = frames_size;
+    int ret = addFrame(imageYVU);
+
+    if (frames_size > existing_frames_size)
+        owned_frames[owned_size++] = imageYVU;
+    else
+        ImageUtils::freeImage(imageYVU);
+
+    return ret;
+}
+
+int Mosaic::addFrame(ImageType imageYVU)
+{
+    if(frames[frames_size]==NULL)
+        frames[frames_size] = new MosaicFrame(this->width,this->height,false);
+
+    MosaicFrame *frame = frames[frames_size];
+
+    frame->image = imageYVU;
+
+    // Add frame to aligner
+    int ret = MOSAIC_RET_ERROR;
+    if (aligner != NULL)
+    {
+        // Note aligner takes in RGB images
+        int align_flag = Align::ALIGN_RET_OK;
+        align_flag = aligner->addFrame(frame->image);
+        aligner->getLastTRS(frame->trs);
+
+        if (frames_size >= max_frames)
+        {
+            return MOSAIC_RET_ERROR;
+        }
+
+        switch (align_flag)
+        {
+            case Align::ALIGN_RET_OK:
+                frames_size++;
+                ret = MOSAIC_RET_OK;
+                break;
+            case Align::ALIGN_RET_FEW_INLIERS:
+                frames_size++;
+                ret = MOSAIC_RET_FEW_INLIERS;
+                break;
+            case Align::ALIGN_RET_LOW_TEXTURE:
+                ret = MOSAIC_RET_LOW_TEXTURE;
+                break;
+            case Align::ALIGN_RET_ERROR:
+                ret = MOSAIC_RET_ERROR;
+                break;
+            default:
+                break;
+        }
+    }
+
+    return ret;
+}
+
+
+int Mosaic::createMosaic(float &progress, bool &cancelComputation)
+{
+    if (frames_size <= 0)
+    {
+        // Haven't accepted any frame in aligner. No need to do blending.
+        progress = TIME_PERCENT_ALIGN + TIME_PERCENT_BLEND
+                + TIME_PERCENT_FINAL;
+        return MOSAIC_RET_OK;
+    }
+
+    if (blendingType == Blend::BLEND_TYPE_PAN)
+    {
+
+        balanceRotations();
+
+    }
+
+    int ret = Blend::BLEND_RET_ERROR;
+
+    // Blend the mosaic (alignment has already been done)
+    if (blender != NULL)
+    {
+        ret = blender->runBlend((MosaicFrame **) frames, (MosaicFrame **) rframes,
+                frames_size, imageMosaicYVU,
+                mosaicWidth, mosaicHeight, progress, cancelComputation);
+    }
+
+    switch(ret)
+    {
+        case Blend::BLEND_RET_ERROR:
+        case Blend::BLEND_RET_ERROR_MEMORY:
+            ret = MOSAIC_RET_ERROR;
+            break;
+        case Blend::BLEND_RET_CANCELLED:
+            ret = MOSAIC_RET_CANCELLED;
+            break;
+        case Blend::BLEND_RET_OK:
+            ret = MOSAIC_RET_OK;
+    }
+    return ret;
+}
+
+ImageType Mosaic::getMosaic(int &width, int &height)
+{
+    width = mosaicWidth;
+    height = mosaicHeight;
+
+    return imageMosaicYVU;
+}
+
+
+
+int Mosaic::balanceRotations()
+{
+    // Normalize to the mean angle of rotation (Smiley face)
+    double sineAngle = 0.0;
+
+    for (int i = 0; i < frames_size; i++) sineAngle += frames[i]->trs[0][1];
+    sineAngle /= frames_size;
+    // Calculate the cosineAngle (1 - sineAngle*sineAngle) = cosineAngle*cosineAngle
+    double cosineAngle = sqrt(1.0 - sineAngle*sineAngle);
+    double m[3][3] = {
+        { cosineAngle, -sineAngle, 0 },
+        { sineAngle, cosineAngle, 0},
+        { 0, 0, 1}};
+    double tmp[3][3];
+
+    for (int i = 0; i < frames_size; i++) {
+        memcpy(tmp, frames[i]->trs, sizeof(tmp));
+        mult33d(frames[i]->trs, m, tmp);
+    }
+
+    return MOSAIC_RET_OK;
+}
diff --git a/benchmark/feature_mos/src/mosaic/Mosaic.h b/benchmark/feature_mos/src/mosaic/Mosaic.h
new file mode 100644
index 00000000..9dea6642
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/Mosaic.h
@@ -0,0 +1,226 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+///////////////////////////////////////////////////
+// Mosaic.h
+// S.O. # :
+// Author(s): zkira
+// $Id: Mosaic.h,v 1.16 2011/06/24 04:22:14 mbansal Exp $
+
+#ifndef MOSAIC_H
+#define MOSAIC_H
+
+#include "ImageUtils.h"
+#include "AlignFeatures.h"
+#include "Blend.h"
+#include "MosaicTypes.h"
+
+/*! \mainpage Mosaic
+
+    \section intro Introduction
+    The class Mosaic provides a simple interface to the panoramic mosaicing algorithm. The class allows passing in individual image frames to be stitched together, computes the alignment transformation between them, and then stitches and blends them together into a single panoramic output which can then be accessed as a single image. \
+
+    \section usage Usage
+    The class methods need to be called as outlined in the sample application which is created from the mosaic_main.cpp file in the directory src/mosaic/. A brief snapshot of the flow is given below:
+
+    \code
+    Mosaic mosaic;
+    // Define blending types to use, and the frame dimensions
+    int blendingType = Blend::BLEND_TYPE_CYLPAN;
+    int stripType = Blend::STRIP_TYPE_THIN;
+    int width = 640;
+    int height = 480;
+
+    while (<image frames are available>)
+    {
+        // Check for initialization and if not, initialize
+        if (!mosaic.isInitialized())
+        {
+          // Initialize mosaic processing
+          mosaic.initialize(blendingType, stripType, width, height, -1, false, 5.0f);
+        }
+
+        // Add to list of frames
+        mosaic.addFrameRGB(imageRGB);
+
+        // Free image
+        ImageUtils::freeImage(imageRGB);
+    }
+
+    // Create the mosaic
+    ret = mosaic.createMosaic();
+
+    // Get back the result
+    resultYVU = mosaic.getMosaic(mosaicWidth, mosaicHeight);
+
+    printf("Got mosaic of size %d,%d\n", mosaicWidth, mosaicHeight);
+
+    \endcode
+*/
+
+/*!
+ *  Main class that creates a mosaic by creating an aligner and blender.
+ */
+class Mosaic
+{
+
+public:
+
+  Mosaic();
+  ~Mosaic();
+
+   /*!
+    *   Creates the aligner and blender and initializes state.
+    *   \param blendingType Type of blending to perform
+    *   \param stripType    Type of strip to use. 0: thin, 1: wide. stripType
+    *                       is effective only when blendingType is CylPan or
+    *                       Horz. Otherwise, it is set to thin irrespective of the input.
+    *   \param width        Width of input images (note: all images must be same size)
+    *   \param height       Height of input images (note: all images must be same size)
+    *   \param nframes      Number of frames to pre-allocate; default value -1 will allocate each frame as it comes
+    *   \param quarter_res  Whether to compute alignment at quarter the input resolution (default = false)
+    *   \param thresh_still Minimum number of pixels of translation detected between the new frame and the last frame before this frame is added to be mosaiced. For the low-res processing at 320x180 resolution input, we set this to 5 pixels. To reject no frames, set this to 0.0 (default value).
+    *   \return             Return code signifying success or failure.
+    */
+  int initialize(int blendingType, int stripType, int width, int height, int nframes = -1, bool quarter_res = false, float thresh_still = 0.0);
+
+   /*!
+    *   Adds a YVU frame to the mosaic.
+    *   \param imageYVU     Pointer to a YVU image.
+    *   \return             Return code signifying success or failure.
+    */
+  int addFrame(ImageType imageYVU);
+
+   /*!
+    *   Adds a RGB frame to the mosaic.
+    *   \param imageRGB     Pointer to a RGB image.
+    *   \return             Return code signifying success or failure.
+    */
+  int addFrameRGB(ImageType imageRGB);
+
+   /*!
+    *   After adding all frames, call this function to perform the final blending.
+    *   \param progress     Variable to set the current progress in.
+    *   \return             Return code signifying success or failure.
+    */
+  int createMosaic(float &progress, bool &cancelComputation);
+
+    /*!
+    *   Obtains the resulting mosaic and its dimensions.
+    *   \param width        Width of the resulting mosaic (returned)
+    *   \param height       Height of the resulting mosaic (returned)
+    *   \return             Pointer to image.
+    */
+  ImageType getMosaic(int &width, int &height);
+
+    /*!
+    *   Provides access to the internal alignment object pointer.
+    *   \return             Pointer to the aligner object.
+    */
+  Align* getAligner() { return aligner; }
+
+    /*!
+    *   Obtain initialization state.
+    *
+    *   return              Returns true if initialized, false otherwise.
+    */
+  bool isInitialized() { return initialized; }
+
+
+  /*!
+   *  Return codes for mosaic.
+   */
+  static const int MOSAIC_RET_OK    = 1;
+  static const int MOSAIC_RET_ERROR = -1;
+  static const int MOSAIC_RET_CANCELLED = -2;
+  static const int MOSAIC_RET_LOW_TEXTURE = -3;
+  static const int MOSAIC_RET_FEW_INLIERS = 2;
+
+protected:
+
+  /**
+   * Size of image frames making up mosaic
+   */
+  int width, height;
+
+  /**
+   * Size of actual mosaic
+   */
+  int mosaicWidth, mosaicHeight;
+
+  /**
+   * Bounding box to crop the mosaic when the gray border is not desired.
+   */
+  MosaicRect mosaicCroppingRect;
+
+  ImageType imageMosaicYVU;
+
+  /**
+   * Collection of frames that will make up mosaic.
+   */
+  MosaicFrame **frames;
+
+  /**
+    * Subset of frames that are considered as relevant.
+    */
+  MosaicFrame **rframes;
+
+  int frames_size;
+  int max_frames;
+
+  /**
+    * Implicitly created frames, should be freed by Mosaic.
+    */
+  ImageType *owned_frames;
+  int owned_size;
+
+  /**
+   * Initialization state.
+   */
+  bool initialized;
+
+  /**
+   *  Type of blending to perform.
+   */
+  int blendingType;
+
+  /**
+    * Type of strip to use. 0: thin (default), 1: wide
+    */
+  int stripType;
+
+  /**
+   *  Pointer to aligner.
+   */
+  Align *aligner;
+
+  /**
+   *  Pointer to blender.
+   */
+  Blend *blender;
+
+  /**
+   *  Modifies TRS matrices so that rotations are balanced
+   *  about center of mosaic
+   *
+   * Side effect: TRS matrices of all mosaic frames
+   *              are modified
+   */
+  int balanceRotations();
+
+};
+
+#endif
diff --git a/benchmark/feature_mos/src/mosaic/MosaicTypes.h b/benchmark/feature_mos/src/mosaic/MosaicTypes.h
new file mode 100644
index 00000000..395ec458
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/MosaicTypes.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+///////////////////////////////////////////////////
+// MosaicTypes.h
+// S.O. # :
+// Author(s): zkira
+// $Id: MosaicTypes.h,v 1.15 2011/06/17 13:35:48 mbansal Exp $
+
+
+#ifndef MOSAIC_TYPES_H
+#define MOSAIC_TYPES_H
+
+#include "ImageUtils.h"
+
+/**
+ *  Definition of rectangle in a mosaic.
+ */
+class MosaicRect
+{
+    public:
+        MosaicRect()
+        {
+            left = right = top = bottom = 0.0;
+        }
+
+        inline int Width()
+        {
+            return right - left;
+        }
+
+        inline int Height()
+        {
+            return bottom - top;
+        }
+
+        /**
+         *  Bounds of the rectangle
+         */
+        int left, right, top, bottom;
+};
+
+class BlendRect
+{
+    public:
+    double lft, rgt, top, bot;
+};
+
+/**
+ *  A frame making up the mosaic.
+ *  Note: Currently assumes a YVU image
+ *  containing separate Y,V, and U planes
+ *  in contiguous memory (in that order).
+ */
+class MosaicFrame {
+public:
+  ImageType image;
+  double trs[3][3];
+  int width, height;
+  BlendRect brect;  // This frame warped to the Mosaic coordinate system
+  BlendRect vcrect; // brect clipped using the voronoi neighbors
+  bool internal_allocation;
+
+  MosaicFrame() { };
+  MosaicFrame(int _width, int _height, bool allocate=true)
+  {
+    width = _width;
+    height = _height;
+    internal_allocation = allocate;
+    if(internal_allocation)
+        image = ImageUtils::allocateImage(width, height, ImageUtils::IMAGE_TYPE_NUM_CHANNELS);
+  }
+
+
+  ~MosaicFrame()
+  {
+    if(internal_allocation)
+        if (image)
+        free(image);
+  }
+
+  /**
+  *  Get the V plane of the image.
+  */
+  inline ImageType getV()
+  {
+    return (image + (width*height));
+  }
+
+  /**
+  *  Get the U plane of the image.
+  */
+  inline ImageType getU()
+  {
+    return (image + (width*height*2));
+  }
+
+  /**
+  *  Get a pixel from the V plane of the image.
+  */
+  inline int getV(int y, int x)
+  {
+    ImageType U = image + (width*height);
+    return U[y*width+x];
+  }
+
+  /**
+  *  Get a pixel from the U plane of the image.
+  */
+  inline int getU(int y, int x)
+  {
+    ImageType U = image + (width*height*2);
+    return U[y*width+x];
+  }
+
+};
+
+/**
+ *  Structure for describing a warp.
+ */
+typedef struct {
+  int horizontal;
+  double theta;
+  double x;
+  double y;
+  double width;
+  double radius;
+  double direction;
+  double correction;
+  int blendRange;
+  int blendRangeUV;
+  int nlevs;
+  int nlevsC;
+  int blendingType;
+  int stripType;
+  // Add an overlap to prevent a gap between pictures due to roundoffs
+  double roundoffOverlap;// 1.5
+
+} BlendParams;
+
+#endif
diff --git a/benchmark/feature_mos/src/mosaic/Pyramid.cpp b/benchmark/feature_mos/src/mosaic/Pyramid.cpp
new file mode 100644
index 00000000..b022d73d
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/Pyramid.cpp
@@ -0,0 +1,270 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// pyramid.cpp
+
+#include <stdio.h>
+#include <string.h>
+
+#include "Pyramid.h"
+
+// We allocate the entire pyramid into one contiguous storage. This makes
+// cleanup easier than fragmented stuff. In addition, we added a "pitch"
+// field, so pointer manipulation is much simpler when it would be faster.
+PyramidShort *PyramidShort::allocatePyramidPacked(real levels,
+        real width, real height, real border)
+{
+    real border2 = (real) (border << 1);
+    int lines, size = calcStorage(width, height, border2, levels, &lines);
+
+    PyramidShort *img = (PyramidShort *) calloc(sizeof(PyramidShort) * levels
+            + sizeof(short *) * lines +
+            + sizeof(short) * size, 1);
+
+    if (img) {
+        PyramidShort *curr, *last;
+        ImageTypeShort *y = (ImageTypeShort *) &img[levels];
+        ImageTypeShort position = (ImageTypeShort) &y[lines];
+        for (last = (curr = img) + levels; curr < last; curr++) {
+            curr->width = width;
+            curr->height = height;
+            curr->border = border;
+            curr->pitch = (real) (width + border2);
+            curr->ptr = y + border;
+
+            // Assign row pointers
+            for (int j = height + border2; j--; y++, position += curr->pitch) {
+                *y = position + border;
+            }
+
+            width >>= 1;
+            height >>= 1;
+        }
+    }
+
+    return img;
+}
+
+// Allocate an image of type short
+PyramidShort *PyramidShort::allocateImage(real width, real height, real border)
+{
+    real border2 = (real) (border << 1);
+    PyramidShort *img = (PyramidShort *)
+        calloc(sizeof(PyramidShort) + sizeof(short *) * (height + border2) +
+                sizeof(short) * (width + border2) * (height + border2), 1);
+
+    if (img) {
+        short **y = (short **) &img[1];
+        short *position = (short *) &y[height + border2];
+        img->width = width;
+        img->height = height;
+        img->border = border;
+        img->pitch = (real) (width + border2);
+        img->ptr = y + border;
+        position += border; // Move position down to origin of real image
+
+        // Assign row pointers
+        for (int j = height + border2; j--; y++, position += img->pitch) {
+            *y = position;
+        }
+    }
+
+    return img;
+}
+
+// Free the images
+void PyramidShort::freeImage(PyramidShort *image)
+{
+    if (image != NULL)
+        free(image);
+}
+
+// Calculate amount of storage needed taking into account the borders, etc.
+unsigned int PyramidShort::calcStorage(real width, real height, real border2,   int levels, int *lines)
+{
+    int size;
+
+    *lines = size = 0;
+
+    while(levels--) {
+        size += (width + border2) * (height + border2);
+        *lines += height + border2;
+        width >>= 1;
+        height >>= 1;
+    }
+
+    return size;
+}
+
+void PyramidShort::BorderSpread(PyramidShort *pyr, int left, int right,
+        int top, int bot)
+{
+    int off, off2, height, h, w;
+    ImageTypeShort base;
+
+    if (left || right) {
+        off = pyr->border - left;
+        off2 = pyr->width + off + pyr->border - right - 1;
+        h = pyr->border - top;
+        height = pyr->height + (h << 1);
+        base = pyr->ptr[-h] - off;
+
+        // spread in X
+        for (h = height; h--; base += pyr->pitch) {
+            for (w = left; w--;)
+                base[-1 - w] = base[0];
+            for (w = right; w--;)
+                base[off2 + w + 1] = base[off2];
+        }
+    }
+
+    if (top || bot) {
+        // spread in Y
+        base = pyr->ptr[top - pyr->border] - pyr->border;
+        for (h = top; h--; base -= pyr->pitch) {
+            memcpy(base - pyr->pitch, base, pyr->pitch * sizeof(short));
+        }
+
+        base = pyr->ptr[pyr->height + pyr->border - bot] - pyr->border;
+        for (h = bot; h--; base += pyr->pitch) {
+            memcpy(base, base - pyr->pitch, pyr->pitch * sizeof(short));
+        }
+    }
+}
+
+void PyramidShort::BorderExpandOdd(PyramidShort *in, PyramidShort *out, PyramidShort *scr,
+        int mode)
+{
+    int i,j;
+    int off = in->border / 2;
+
+    // Vertical Filter
+    for (j = -off; j < in->height + off; j++) {
+        int j2 = j * 2;
+        int limit = scr->width + scr->border;
+        for (i = -scr->border; i < limit; i++) {
+            int t1 = in->ptr[j][i];
+            int t2 = in->ptr[j+1][i];
+            scr->ptr[j2][i] = (short)
+                ((6 * t1 + (in->ptr[j-1][i] + t2) + 4) >> 3);
+            scr->ptr[j2+1][i] = (short)((t1 + t2 + 1) >> 1);
+        }
+    }
+
+    BorderSpread(scr, 0, 0, 3, 3);
+
+    // Horizontal Filter
+    int limit = out->height + out->border;
+    for (j = -out->border; j < limit; j++) {
+        for (i = -off; i < scr->width + off; i++) {
+            int i2 = i * 2;
+            int t1 = scr->ptr[j][i];
+            int t2 = scr->ptr[j][i+1];
+            out->ptr[j][i2] = (short) (out->ptr[j][i2] +
+                    (mode * ((6 * t1 +
+                              scr->ptr[j][i-1] + t2 + 4) >> 3)));
+            out->ptr[j][i2+1] = (short) (out->ptr[j][i2+1] +
+                    (mode * ((t1 + t2 + 1) >> 1)));
+        }
+    }
+
+}
+
+int PyramidShort::BorderExpand(PyramidShort *pyr, int nlev, int mode)
+{
+    PyramidShort *tpyr = pyr + nlev - 1;
+    PyramidShort *scr = allocateImage(pyr[1].width, pyr[0].height, pyr->border);
+    if (scr == NULL) return 0;
+
+    if (mode > 0) {
+        // Expand and add (reconstruct from Laplacian)
+        for (; tpyr > pyr; tpyr--) {
+            scr->width = tpyr[0].width;
+            scr->height = tpyr[-1].height;
+            BorderExpandOdd(tpyr, tpyr - 1, scr, 1);
+        }
+    }
+    else if (mode < 0) {
+        // Expand and subtract (build Laplacian)
+        while ((pyr++) < tpyr) {
+            scr->width = pyr[0].width;
+            scr->height = pyr[-1].height;
+            BorderExpandOdd(pyr, pyr - 1, scr, -1);
+        }
+    }
+
+    freeImage(scr);
+    return 1;
+}
+
+void PyramidShort::BorderReduceOdd(PyramidShort *in, PyramidShort *out, PyramidShort *scr)
+{
+    ImageTypeShortBase *s, *ns, *ls, *p, *np;
+
+    int off = scr->border - 2;
+    s = scr->ptr[-scr->border] - (off >> 1);
+    ns = s + scr->pitch;
+    ls = scr->ptr[scr->height + scr->border - 1] + scr->pitch - (off >> 1);
+    int width = scr->width + scr->border;
+    p = in->ptr[-scr->border] - off;
+    np = p + in->pitch;
+
+    // treat it as if the whole thing were the image
+    for (; s < ls; s = ns, ns += scr->pitch, p = np, np += in->pitch) {
+        for (int w = width; w--; s++, p += 2) {
+            *s = (short)((((int) p[-2]) + ((int) p[2]) + 8 +    // 1
+                        ((((int) p[-1]) + ((int) p[1])) << 2) + // 4
+                        ((int) *p) * 6) >> 4);          // 6
+        }
+    }
+
+    BorderSpread(scr, 5, 4 + ((in->width ^ 1) & 1), 0, 0); //
+
+    s = out->ptr[-(off >> 1)] - out->border;
+    ns = s + out->pitch;
+    ls = s + out->pitch * (out->height + off);
+    p = scr->ptr[-off] - out->border;
+    int pitch = scr->pitch;
+    int pitch2 = pitch << 1;
+    np = p + pitch2;
+    for (; s < ls; s = ns, ns += out->pitch, p = np, np += pitch2) {
+        for (int w = out->pitch; w--; s++, p++) {
+            *s = (short)((((int) p[-pitch2]) + ((int) p[pitch2]) + 8 + // 1
+                        ((((int) p[-pitch]) + ((int) p[pitch])) << 2) + // 4
+                        ((int) *p) * 6) >> 4);              // 6
+        }
+    }
+    BorderSpread(out, 0, 0, 5, 5);
+
+}
+
+int PyramidShort::BorderReduce(PyramidShort *pyr, int nlev)
+{
+    PyramidShort *scr = allocateImage(pyr[1].width, pyr[0].height, pyr->border);
+    if (scr == NULL)
+        return 0;
+
+    BorderSpread(pyr, pyr->border, pyr->border, pyr->border, pyr->border);
+    while (--nlev) {
+        BorderReduceOdd(pyr, pyr + 1, scr);
+        pyr++;
+        scr->width = pyr[1].width;
+        scr->height = pyr[0].height;
+    }
+
+    freeImage(scr);
+    return 1;
+}
diff --git a/benchmark/feature_mos/src/mosaic/Pyramid.h b/benchmark/feature_mos/src/mosaic/Pyramid.h
new file mode 100644
index 00000000..c5fe9071
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/Pyramid.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Pyramid.h
+
+#ifndef PYRAMID_H
+#define PYRAMID_H
+
+#include "ImageUtils.h"
+
+typedef unsigned short int real;
+
+//  Structure containing a packed pyramid of type ImageTypeShort.  Used for pyramid
+//  blending, among other things.
+
+class PyramidShort
+{
+
+public:
+
+  ImageTypeShort *ptr;              // Pointer containing the image
+  real width, height;               // Width and height of input images
+  real numChannels;                 // Number of channels in input images
+  real border;                      // border size
+  real pitch;                       // Pitch.  Used for moving through image efficiently.
+
+  static PyramidShort *allocatePyramidPacked(real width, real height, real levels, real border = 0);
+  static PyramidShort *allocateImage(real width, real height, real border);
+  static void createPyramid(ImageType image, PyramidShort *pyramid, int last = 3 );
+  static void freeImage(PyramidShort *image);
+
+  static unsigned int calcStorage(real width, real height, real border2, int levels, int *lines);
+
+  static void BorderSpread(PyramidShort *pyr, int left, int right, int top, int bot);
+  static void BorderExpandOdd(PyramidShort *in, PyramidShort *out, PyramidShort *scr, int mode);
+  static int BorderExpand(PyramidShort *pyr, int nlev, int mode);
+  static int BorderReduce(PyramidShort *pyr, int nlev);
+  static void BorderReduceOdd(PyramidShort *in, PyramidShort *out, PyramidShort *scr);
+};
+
+#endif
diff --git a/benchmark/feature_mos/src/mosaic/trsMatrix.cpp b/benchmark/feature_mos/src/mosaic/trsMatrix.cpp
new file mode 100644
index 00000000..4c04e6c3
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/trsMatrix.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// trsMatrix.cpp
+// $Id: trsMatrix.cpp,v 1.9 2011/06/17 13:35:48 mbansal Exp $
+
+#include "stdio.h"
+#include <math.h>
+#include "trsMatrix.h"
+
+void mult33d(double a[3][3], double b[3][3], double c[3][3])
+{
+    a[0][0] = b[0][0]*c[0][0] + b[0][1]*c[1][0] + b[0][2]*c[2][0];
+    a[0][1] = b[0][0]*c[0][1] + b[0][1]*c[1][1] + b[0][2]*c[2][1];
+    a[0][2] = b[0][0]*c[0][2] + b[0][1]*c[1][2] + b[0][2]*c[2][2];
+    a[1][0] = b[1][0]*c[0][0] + b[1][1]*c[1][0] + b[1][2]*c[2][0];
+    a[1][1] = b[1][0]*c[0][1] + b[1][1]*c[1][1] + b[1][2]*c[2][1];
+    a[1][2] = b[1][0]*c[0][2] + b[1][1]*c[1][2] + b[1][2]*c[2][2];
+    a[2][0] = b[2][0]*c[0][0] + b[2][1]*c[1][0] + b[2][2]*c[2][0];
+    a[2][1] = b[2][0]*c[0][1] + b[2][1]*c[1][1] + b[2][2]*c[2][1];
+    a[2][2] = b[2][0]*c[0][2] + b[2][1]*c[1][2] + b[2][2]*c[2][2];
+}
+
+
+// normProjMat33d
+// m = input matrix
+// return: result if successful
+int normProjMat33d(double m[3][3])
+{
+
+    if(m[2][2] == 0.0)
+        {
+        return 0;
+}
+
+    m[0][0] /= m[2][2];
+    m[0][1] /= m[2][2];
+    m[0][2] /= m[2][2];
+    m[1][0] /= m[2][2];
+    m[1][1] /= m[2][2];
+    m[1][2] /= m[2][2];
+    m[2][0] /= m[2][2];
+    m[2][1] /= m[2][2];
+    m[2][2] = 1.0;
+
+    return 1;
+}
+
+// det33d
+// m = input matrix
+// returns: determinant
+double det33d(const double m[3][3])
+{
+    double result;
+
+    result  = m[0][0] * (m[1][1] * m[2][2] - m[1][2] * m[2][1]);
+    result += m[0][1] * (m[1][2] * m[2][0] - m[1][0] * m[2][2]);
+    result += m[0][2] * (m[1][0] * m[2][1] - m[1][1] * m[2][0]);
+
+    return result;
+}
+
+// inv33d
+//
+void inv33d(const double m[3][3], double out[3][3])
+{
+    double det = det33d(m);
+
+    out[0][0] = (m[1][1]*m[2][2] - m[1][2]*m[2][1]) / det;
+    out[1][0] = (m[1][2]*m[2][0] - m[1][0]*m[2][2]) / det;
+    out[2][0] = (m[1][0]*m[2][1] - m[1][1]*m[2][0]) / det;
+
+    out[0][1] = (m[0][2]*m[2][1] - m[0][1]*m[2][2]) / det;
+    out[1][1] = (m[0][0]*m[2][2] - m[0][2]*m[2][0]) / det;
+    out[2][1] = (m[0][1]*m[2][0] - m[0][0]*m[2][1]) / det;
+
+    out[0][2] = (m[0][1]*m[1][2] - m[0][2]*m[1][1]) / det;
+    out[1][2] = (m[0][2]*m[1][0] - m[0][0]*m[1][2]) / det;
+    out[2][2] = (m[0][0]*m[1][1] - m[0][1]*m[1][0]) / det;
+}
diff --git a/benchmark/feature_mos/src/mosaic/trsMatrix.h b/benchmark/feature_mos/src/mosaic/trsMatrix.h
new file mode 100644
index 00000000..054cc333
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic/trsMatrix.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+///////////////////////////////////////////////////
+// trsMatrix.h
+// $Id: trsMatrix.h,v 1.8 2011/06/17 13:35:48 mbansal Exp $
+
+#ifndef TRSMATRIX_H_
+#define TRSMATRIX_H_
+
+
+// Calculate the determinant of a matrix
+double det33d(const double m[3][3]);
+
+// Invert a matrix
+void inv33d(const double m[3][3], double out[3][3]);
+
+// Multiply a = b * c
+void mult33d(double a[3][3], double b[3][3], double c[3][3]);
+
+// Normalize matrix so matrix[2][2] is '1'
+int normProjMat33d(double m[3][3]);
+
+inline double ProjZ(double trs[3][3], double x, double y, double f)
+{
+    return ((trs)[2][0]*(x) + (trs)[2][1]*(y) + (trs)[2][2]*(f));
+}
+
+inline double ProjX(double trs[3][3], double x, double y, double z, double f)
+{
+    return (((trs)[0][0]*(x) + (trs)[0][1]*(y) + (trs)[0][2]*(f)) / (z));
+}
+
+inline double ProjY(double trs[3][3], double x, double y, double z, double f)
+{
+    return (((trs)[1][0]*(x) + (trs)[1][1]*(y) + (trs)[1][2]*(f)) / (z));
+}
+
+
+#endif
diff --git a/benchmark/feature_mos/src/mosaic_renderer/FrameBuffer.cpp b/benchmark/feature_mos/src/mosaic_renderer/FrameBuffer.cpp
new file mode 100755
index 00000000..a956f23b
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic_renderer/FrameBuffer.cpp
@@ -0,0 +1,98 @@
+#include "FrameBuffer.h"
+
+FrameBuffer::FrameBuffer()
+{
+    Reset();
+}
+
+FrameBuffer::~FrameBuffer() {
+}
+
+void FrameBuffer::Reset() {
+    mFrameBufferName = -1;
+    mTextureName = -1;
+    mWidth = 0;
+    mHeight = 0;
+    mFormat = -1;
+}
+
+bool FrameBuffer::InitializeGLContext() {
+    Reset();
+    return CreateBuffers();
+}
+
+bool FrameBuffer::Init(int width, int height, GLenum format) {
+    if (mFrameBufferName == (GLuint)-1) {
+        if (!CreateBuffers()) {
+            return false;
+        }
+    }
+    glBindFramebuffer(GL_FRAMEBUFFER, mFrameBufferName);
+    glBindTexture(GL_TEXTURE_2D, mTextureName);
+
+    glTexImage2D(GL_TEXTURE_2D,
+                 0,
+                 format,
+                 width,
+                 height,
+                 0,
+                 format,
+                 GL_UNSIGNED_BYTE,
+                 NULL);
+    if (!checkGlError("bind/teximage")) {
+        return false;
+    }
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+    // This is necessary to work with user-generated frame buffers with
+    // dimensions that are NOT powers of 2.
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+
+    // Attach texture to frame buffer.
+    glFramebufferTexture2D(GL_FRAMEBUFFER,
+                           GL_COLOR_ATTACHMENT0,
+                           GL_TEXTURE_2D,
+                           mTextureName,
+                           0);
+    checkFramebufferStatus("FrameBuffer.cpp");
+    checkGlError("framebuffertexture2d");
+
+    if (!checkGlError("texture setup")) {
+        return false;
+    }
+    mWidth = width;
+    mHeight = height;
+    mFormat = format;
+    glBindFramebuffer(GL_FRAMEBUFFER, 0);
+    return true;
+}
+
+bool FrameBuffer::CreateBuffers() {
+    glGenFramebuffers(1, &mFrameBufferName);
+    glGenTextures(1, &mTextureName);
+    if (!checkGlError("texture generation")) {
+        return false;
+    }
+    return true;
+}
+
+GLuint FrameBuffer::GetTextureName() const {
+    return mTextureName;
+}
+
+GLuint FrameBuffer::GetFrameBufferName() const {
+    return mFrameBufferName;
+}
+
+GLenum FrameBuffer::GetFormat() const {
+    return mFormat;
+}
+
+int FrameBuffer::GetWidth() const {
+    return mWidth;
+}
+
+int FrameBuffer::GetHeight() const {
+    return mHeight;
+}
diff --git a/benchmark/feature_mos/src/mosaic_renderer/FrameBuffer.h b/benchmark/feature_mos/src/mosaic_renderer/FrameBuffer.h
new file mode 100755
index 00000000..314b1262
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic_renderer/FrameBuffer.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include <EGL/egl.h>
+#include <GLES2/gl2.h>
+#include <GLES2/gl2ext.h>
+
+#define checkGlError(op)  checkGLErrorDetail(__FILE__, __LINE__, (op))
+
+extern bool checkGLErrorDetail(const char* file, int line, const char* op);
+extern void checkFramebufferStatus(const char* name);
+
+class FrameBuffer {
+  public:
+    FrameBuffer();
+    virtual ~FrameBuffer();
+
+    bool InitializeGLContext();
+    bool Init(int width, int height, GLenum format);
+    GLuint GetTextureName() const;
+    GLuint GetFrameBufferName() const;
+    GLenum GetFormat() const;
+
+    int GetWidth() const;
+    int GetHeight() const;
+
+ private:
+    void Reset();
+    bool CreateBuffers();
+    GLuint mFrameBufferName;
+    GLuint mTextureName;
+    int mWidth;
+    int mHeight;
+    GLenum mFormat;
+};
diff --git a/benchmark/feature_mos/src/mosaic_renderer/Renderer.cpp b/benchmark/feature_mos/src/mosaic_renderer/Renderer.cpp
new file mode 100755
index 00000000..b9938eb6
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic_renderer/Renderer.cpp
@@ -0,0 +1,226 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Renderer.h"
+
+#include "mosaic/Log.h"
+#define LOG_TAG "Renderer"
+
+#include <GLES2/gl2ext.h>
+
+Renderer::Renderer()
+      : mGlProgram(0),
+        mInputTextureName(-1),
+        mInputTextureWidth(0),
+        mInputTextureHeight(0),
+        mSurfaceWidth(0),
+        mSurfaceHeight(0)
+{
+    InitializeGLContext();
+}
+
+Renderer::~Renderer() {
+}
+
+GLuint Renderer::loadShader(GLenum shaderType, const char* pSource) {
+    GLuint shader = glCreateShader(shaderType);
+    if (shader) {
+        glShaderSource(shader, 1, &pSource, NULL);
+        glCompileShader(shader);
+        GLint compiled = 0;
+        glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled);
+        if (!compiled) {
+            GLint infoLen = 0;
+            glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &infoLen);
+            if (infoLen) {
+                char* buf = (char*) malloc(infoLen);
+                if (buf) {
+                    glGetShaderInfoLog(shader, infoLen, NULL, buf);
+                    LOGE("Could not compile shader %d:\n%s\n",
+                            shaderType, buf);
+                    free(buf);
+                }
+                glDeleteShader(shader);
+                shader = 0;
+            }
+        }
+    }
+    return shader;
+}
+
+GLuint Renderer::createProgram(const char* pVertexSource, const char* pFragmentSource)
+{
+    GLuint vertexShader = loadShader(GL_VERTEX_SHADER, pVertexSource);
+    if (!vertexShader)
+    {
+        return 0;
+    }
+
+    GLuint pixelShader = loadShader(GL_FRAGMENT_SHADER, pFragmentSource);
+    if (!pixelShader)
+    {
+        return 0;
+    }
+
+    GLuint program = glCreateProgram();
+    if (program)
+    {
+        glAttachShader(program, vertexShader);
+        checkGlError("glAttachShader");
+        glAttachShader(program, pixelShader);
+        checkGlError("glAttachShader");
+
+        glLinkProgram(program);
+        GLint linkStatus = GL_FALSE;
+        glGetProgramiv(program, GL_LINK_STATUS, &linkStatus);
+
+        LOGI("Program Linked (%d)!", program);
+
+        if (linkStatus != GL_TRUE)
+        {
+            GLint bufLength = 0;
+            glGetProgramiv(program, GL_INFO_LOG_LENGTH, &bufLength);
+            if (bufLength)
+            {
+                char* buf = (char*) malloc(bufLength);
+                if (buf)
+                {
+                    glGetProgramInfoLog(program, bufLength, NULL, buf);
+                    LOGE("Could not link program:\n%s\n", buf);
+                    free(buf);
+                }
+            }
+            glDeleteProgram(program);
+            program = 0;
+        }
+    }
+    return program;
+}
+
+// Set this renderer to use the default frame-buffer (screen) and
+// set the viewport size to be the given width and height (pixels).
+bool Renderer::SetupGraphics(int width, int height)
+{
+    bool succeeded = false;
+    do {
+        if (mGlProgram == 0)
+        {
+            if (!InitializeGLProgram())
+            {
+              break;
+            }
+        }
+        glUseProgram(mGlProgram);
+        if (!checkGlError("glUseProgram")) break;
+
+        glBindFramebuffer(GL_FRAMEBUFFER, 0);
+
+        mFrameBuffer = NULL;
+        mSurfaceWidth = width;
+        mSurfaceHeight = height;
+
+        glViewport(0, 0, mSurfaceWidth, mSurfaceHeight);
+        if (!checkGlError("glViewport")) break;
+        succeeded = true;
+    } while (false);
+
+    return succeeded;
+}
+
+
+// Set this renderer to use the specified FBO and
+// set the viewport size to be the width and height of this FBO.
+bool Renderer::SetupGraphics(FrameBuffer* buffer)
+{
+    bool succeeded = false;
+    do {
+        if (mGlProgram == 0)
+        {
+            if (!InitializeGLProgram())
+            {
+              break;
+            }
+        }
+        glUseProgram(mGlProgram);
+        if (!checkGlError("glUseProgram")) break;
+
+        glBindFramebuffer(GL_FRAMEBUFFER, buffer->GetFrameBufferName());
+
+        mFrameBuffer = buffer;
+        mSurfaceWidth = mFrameBuffer->GetWidth();
+        mSurfaceHeight = mFrameBuffer->GetHeight();
+
+        glViewport(0, 0, mSurfaceWidth, mSurfaceHeight);
+        if (!checkGlError("glViewport")) break;
+        succeeded = true;
+    } while (false);
+
+    return succeeded;
+}
+
+bool Renderer::Clear(float r, float g, float b, float a)
+{
+    bool succeeded = false;
+    do {
+        bool rt = (mFrameBuffer == NULL)?
+                SetupGraphics(mSurfaceWidth, mSurfaceHeight) :
+                SetupGraphics(mFrameBuffer);
+
+        if(!rt)
+            break;
+
+        glClearColor(r, g, b, a);
+        glClear(GL_COLOR_BUFFER_BIT);
+
+        succeeded = true;
+    } while (false);
+    return succeeded;
+
+}
+
+void Renderer::InitializeGLContext()
+{
+    if(mFrameBuffer != NULL)
+    {
+        delete mFrameBuffer;
+        mFrameBuffer = NULL;
+    }
+
+    mInputTextureName = -1;
+    mInputTextureType = GL_TEXTURE_2D;
+    mGlProgram = 0;
+}
+
+int Renderer::GetTextureName()
+{
+    return mInputTextureName;
+}
+
+void Renderer::SetInputTextureName(GLuint textureName)
+{
+    mInputTextureName = textureName;
+}
+
+void Renderer::SetInputTextureType(GLenum textureType)
+{
+    mInputTextureType = textureType;
+}
+
+void Renderer::SetInputTextureDimensions(int width, int height)
+{
+    mInputTextureWidth = width;
+    mInputTextureHeight = height;
+}
diff --git a/benchmark/feature_mos/src/mosaic_renderer/Renderer.h b/benchmark/feature_mos/src/mosaic_renderer/Renderer.h
new file mode 100755
index 00000000..a43e8028
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic_renderer/Renderer.h
@@ -0,0 +1,65 @@
+#pragma once
+
+#include "FrameBuffer.h"
+
+#include <GLES2/gl2.h>
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+class Renderer {
+  public:
+    Renderer();
+    virtual ~Renderer();
+
+    // Initialize OpenGL resources
+    // @return true if successful
+    virtual bool InitializeGLProgram() = 0;
+
+    bool SetupGraphics(FrameBuffer* buffer);
+    bool SetupGraphics(int width, int height);
+
+    bool Clear(float r, float g, float b, float a);
+
+    int GetTextureName();
+    void SetInputTextureName(GLuint textureName);
+    void SetInputTextureDimensions(int width, int height);
+    void SetInputTextureType(GLenum textureType);
+
+    void InitializeGLContext();
+
+  protected:
+
+    GLuint loadShader(GLenum shaderType, const char* pSource);
+    GLuint createProgram(const char*, const char* );
+
+    int SurfaceWidth() const { return mSurfaceWidth; }
+    int SurfaceHeight() const { return mSurfaceHeight; }
+
+    // Source code for shaders.
+    virtual const char* VertexShaderSource() const = 0;
+    virtual const char* FragmentShaderSource() const = 0;
+
+    // Redefine this to use special texture types such as
+    // GL_TEXTURE_EXTERNAL_OES.
+    GLenum InputTextureType() const { return mInputTextureType; }
+
+    GLuint mGlProgram;
+    GLuint mInputTextureName;
+    GLenum mInputTextureType;
+    int mInputTextureWidth;
+    int mInputTextureHeight;
+
+    // Attribute locations
+    GLint  mScalingtransLoc;
+    GLint maPositionHandle;
+    GLint maTextureHandle;
+
+
+    int mSurfaceWidth;      // Width of target surface.
+    int mSurfaceHeight;     // Height of target surface.
+
+    FrameBuffer *mFrameBuffer;
+};
+
diff --git a/benchmark/feature_mos/src/mosaic_renderer/SurfaceTextureRenderer.cpp b/benchmark/feature_mos/src/mosaic_renderer/SurfaceTextureRenderer.cpp
new file mode 100755
index 00000000..36f133f8
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic_renderer/SurfaceTextureRenderer.cpp
@@ -0,0 +1,186 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SurfaceTextureRenderer.h"
+
+#include <string.h>
+
+#include <GLES2/gl2ext.h>
+const GLfloat g_vVertices[] = {
+    -1.f, -1.f, 0.0f, 1.0f,  // Position 0
+    0.0f,  0.0f,         // TexCoord 0
+     1.f, -1.f, 0.0f, 1.0f, // Position 1
+    1.0f,  0.0f,         // TexCoord 1
+    -1.f,  1.f, 0.0f, 1.0f, // Position 2
+    0.0f,  1.0f,         // TexCoord 2
+    1.f,   1.f, 0.0f, 1.0f, // Position 3
+    1.0f,  1.0f          // TexCoord 3
+};
+GLushort g_iIndices2[] = { 0, 1, 2, 3 };
+
+const int VERTEX_STRIDE = 6 * sizeof(GLfloat);
+
+SurfaceTextureRenderer::SurfaceTextureRenderer() : Renderer() {
+    memset(mSTMatrix, 0.0, 16*sizeof(float));
+    mSTMatrix[0] = 1.0f;
+    mSTMatrix[5] = 1.0f;
+    mSTMatrix[10] = 1.0f;
+    mSTMatrix[15] = 1.0f;
+}
+
+SurfaceTextureRenderer::~SurfaceTextureRenderer() {
+}
+
+void SurfaceTextureRenderer::SetViewportMatrix(int w, int h, int W, int H)
+{
+    for(int i=0; i<16; i++)
+    {
+        mViewportMatrix[i] = 0.0f;
+    }
+
+    mViewportMatrix[0] = float(w)/float(W);
+    mViewportMatrix[5] = float(h)/float(H);
+    mViewportMatrix[10] = 1.0f;
+    mViewportMatrix[12] = -1.0f + float(w)/float(W);
+    mViewportMatrix[13] = -1.0f + float(h)/float(H);
+    mViewportMatrix[15] = 1.0f;
+}
+
+void SurfaceTextureRenderer::SetScalingMatrix(float xscale, float yscale)
+{
+    for(int i=0; i<16; i++)
+    {
+        mScalingMatrix[i] = 0.0f;
+    }
+
+    mScalingMatrix[0] = xscale;
+    mScalingMatrix[5] = yscale;
+    mScalingMatrix[10] = 1.0f;
+    mScalingMatrix[15] = 1.0f;
+}
+
+void SurfaceTextureRenderer::SetSTMatrix(float *stmat)
+{
+    memcpy(mSTMatrix, stmat, 16*sizeof(float));
+}
+
+
+bool SurfaceTextureRenderer::InitializeGLProgram()
+{
+    bool succeeded = false;
+    do {
+        GLuint glProgram;
+        glProgram = createProgram(VertexShaderSource(),
+                FragmentShaderSource());
+        if (!glProgram) {
+            break;
+        }
+
+        glUseProgram(glProgram);
+        if (!checkGlError("glUseProgram")) break;
+
+        maPositionHandle = glGetAttribLocation(glProgram, "aPosition");
+        checkGlError("glGetAttribLocation aPosition");
+        maTextureHandle = glGetAttribLocation(glProgram, "aTextureCoord");
+        checkGlError("glGetAttribLocation aTextureCoord");
+        muSTMatrixHandle = glGetUniformLocation(glProgram, "uSTMatrix");
+        checkGlError("glGetUniformLocation uSTMatrix");
+        mScalingtransLoc = glGetUniformLocation(glProgram, "u_scalingtrans");
+
+        glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
+        mGlProgram = glProgram;
+        succeeded = true;
+    } while (false);
+
+    if (!succeeded && (mGlProgram != 0))
+    {
+        glDeleteProgram(mGlProgram);
+        checkGlError("glDeleteProgram");
+        mGlProgram = 0;
+    }
+    return succeeded;
+}
+
+bool SurfaceTextureRenderer::DrawTexture(GLfloat *affine)
+{
+    bool succeeded = false;
+    do {
+        bool rt = (mFrameBuffer == NULL)?
+            SetupGraphics(mSurfaceWidth, mSurfaceHeight) :
+            SetupGraphics(mFrameBuffer);
+
+        if(!rt)
+            break;
+
+        glDisable(GL_BLEND);
+
+        glActiveTexture(GL_TEXTURE0);
+        if (!checkGlError("glActiveTexture")) break;
+
+        const GLenum texture_type = InputTextureType();
+        glBindTexture(texture_type, mInputTextureName);
+        if (!checkGlError("glBindTexture")) break;
+
+        glUniformMatrix4fv(mScalingtransLoc, 1, GL_FALSE, mScalingMatrix);
+        glUniformMatrix4fv(muSTMatrixHandle, 1, GL_FALSE, mSTMatrix);
+
+        // Load the vertex position
+        glVertexAttribPointer(maPositionHandle, 4, GL_FLOAT,
+                GL_FALSE, VERTEX_STRIDE, g_vVertices);
+        glEnableVertexAttribArray(maPositionHandle);
+        // Load the texture coordinate
+        glVertexAttribPointer(maTextureHandle, 2, GL_FLOAT,
+                GL_FALSE, VERTEX_STRIDE, &g_vVertices[4]);
+        glEnableVertexAttribArray(maTextureHandle);
+
+        // And, finally, execute the GL draw command.
+        glDrawElements(GL_TRIANGLE_STRIP, 4, GL_UNSIGNED_SHORT, g_iIndices2);
+
+        glBindFramebuffer(GL_FRAMEBUFFER, 0);
+        succeeded = true;
+    } while (false);
+    return succeeded;
+}
+
+const char* SurfaceTextureRenderer::VertexShaderSource() const
+{
+    static const char gVertexShader[] =
+        "uniform mat4 uSTMatrix;\n"
+        "uniform mat4 u_scalingtrans;  \n"
+        "attribute vec4 aPosition;\n"
+        "attribute vec4 aTextureCoord;\n"
+        "varying vec2 vTextureNormCoord;\n"
+        "void main() {\n"
+        "  gl_Position = u_scalingtrans * aPosition;\n"
+        "  vTextureNormCoord = (uSTMatrix * aTextureCoord).xy;\n"
+        "}\n";
+
+    return gVertexShader;
+}
+
+const char* SurfaceTextureRenderer::FragmentShaderSource() const
+{
+    static const char gFragmentShader[] =
+        "#extension GL_OES_EGL_image_external : require\n"
+        "precision mediump float;\n"
+        "varying vec2 vTextureNormCoord;\n"
+        "uniform samplerExternalOES sTexture;\n"
+        "void main() {\n"
+        "  gl_FragColor = texture2D(sTexture, vTextureNormCoord);\n"
+        "}\n";
+
+    return gFragmentShader;
+}
diff --git a/benchmark/feature_mos/src/mosaic_renderer/SurfaceTextureRenderer.h b/benchmark/feature_mos/src/mosaic_renderer/SurfaceTextureRenderer.h
new file mode 100755
index 00000000..ea2b81ad
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic_renderer/SurfaceTextureRenderer.h
@@ -0,0 +1,44 @@
+#pragma once
+
+#include "FrameBuffer.h"
+#include "Renderer.h"
+
+#include <GLES2/gl2.h>
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+class SurfaceTextureRenderer: public Renderer {
+  public:
+    SurfaceTextureRenderer();
+    virtual ~SurfaceTextureRenderer();
+
+    // Initialize OpenGL resources
+    // @return true if successful
+    bool InitializeGLProgram();
+
+    bool DrawTexture(GLfloat *affine);
+
+    void SetViewportMatrix(int w, int h, int W, int H);
+    void SetScalingMatrix(float xscale, float yscale);
+    void SetSTMatrix(float *stmat);
+
+ private:
+    // Source code for shaders.
+    const char* VertexShaderSource() const;
+    const char* FragmentShaderSource() const;
+
+    // Attribute locations
+    GLint  mScalingtransLoc;
+    GLint muSTMatrixHandle;
+    GLint maPositionHandle;
+    GLint maTextureHandle;
+
+    GLfloat mViewportMatrix[16];
+    GLfloat mScalingMatrix[16];
+
+    GLfloat mSTMatrix[16];
+
+};
+
diff --git a/benchmark/feature_mos/src/mosaic_renderer/WarpRenderer.cpp b/benchmark/feature_mos/src/mosaic_renderer/WarpRenderer.cpp
new file mode 100755
index 00000000..af6779a3
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic_renderer/WarpRenderer.cpp
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "WarpRenderer.h"
+
+#include <GLES2/gl2ext.h>
+
+const GLfloat g_vVertices[] = {
+    -1.f, 1.f, 0.0f, 1.0f,  // Position 0
+    0.0f,  1.0f,         // TexCoord 0
+     1.f, 1.f, 0.0f, 1.0f, // Position 1
+    1.0f,  1.0f,         // TexCoord 1
+    -1.f, -1.f, 0.0f, 1.0f, // Position 2
+    0.0f,  0.0f,         // TexCoord 2
+    1.f,  -1.f, 0.0f, 1.0f, // Position 3
+    1.0f,  0.0f          // TexCoord 3
+};
+
+const int VERTEX_STRIDE = 6 * sizeof(GLfloat);
+
+GLushort g_iIndices[] = { 0, 1, 2, 3 };
+
+WarpRenderer::WarpRenderer() : Renderer()
+{
+}
+
+WarpRenderer::~WarpRenderer() {
+}
+
+void WarpRenderer::SetViewportMatrix(int w, int h, int W, int H)
+{
+    for(int i=0; i<16; i++)
+    {
+        mViewportMatrix[i] = 0.0f;
+    }
+
+    mViewportMatrix[0] = float(w)/float(W);
+    mViewportMatrix[5] = float(h)/float(H);
+    mViewportMatrix[10] = 1.0f;
+    mViewportMatrix[12] = -1.0f + float(w)/float(W);
+    mViewportMatrix[13] = -1.0f + float(h)/float(H);
+    mViewportMatrix[15] = 1.0f;
+}
+
+void WarpRenderer::SetScalingMatrix(float xscale, float yscale)
+{
+    for(int i=0; i<16; i++)
+    {
+        mScalingMatrix[i] = 0.0f;
+    }
+
+    mScalingMatrix[0] = xscale;
+    mScalingMatrix[5] = yscale;
+    mScalingMatrix[10] = 1.0f;
+    mScalingMatrix[15] = 1.0f;
+}
+
+bool WarpRenderer::InitializeGLProgram()
+{
+    bool succeeded = false;
+    do {
+        GLuint glProgram;
+        glProgram = createProgram(VertexShaderSource(),
+                FragmentShaderSource());
+        if (!glProgram) {
+            break;
+        }
+
+        glUseProgram(glProgram);
+        if (!checkGlError("glUseProgram")) break;
+
+        // Get attribute locations
+        mPositionLoc     = glGetAttribLocation(glProgram, "a_position");
+        mAffinetransLoc  = glGetUniformLocation(glProgram, "u_affinetrans");
+        mViewporttransLoc = glGetUniformLocation(glProgram, "u_viewporttrans");
+        mScalingtransLoc = glGetUniformLocation(glProgram, "u_scalingtrans");
+        mTexCoordLoc     = glGetAttribLocation(glProgram, "a_texCoord");
+
+        // Get sampler location
+        mSamplerLoc      = glGetUniformLocation(glProgram, "s_texture");
+
+        mGlProgram = glProgram;
+        succeeded = true;
+    } while (false);
+
+    if (!succeeded && (mGlProgram != 0))
+    {
+        glDeleteProgram(mGlProgram);
+        checkGlError("glDeleteProgram");
+        mGlProgram = 0;
+    }
+    return succeeded;
+}
+
+bool WarpRenderer::DrawTexture(GLfloat *affine)
+{
+    bool succeeded = false;
+    do {
+        bool rt = (mFrameBuffer == NULL)?
+                SetupGraphics(mSurfaceWidth, mSurfaceHeight) :
+                SetupGraphics(mFrameBuffer);
+
+        if(!rt)
+            break;
+
+        glDisable(GL_BLEND);
+
+        glActiveTexture(GL_TEXTURE0);
+        if (!checkGlError("glActiveTexture")) break;
+
+        const GLenum texture_type = InputTextureType();
+        glBindTexture(texture_type, mInputTextureName);
+        if (!checkGlError("glBindTexture")) break;
+
+        // Set the sampler texture unit to 0
+        glUniform1i(mSamplerLoc, 0);
+
+        // Load the vertex position
+        glVertexAttribPointer(mPositionLoc, 4, GL_FLOAT,
+                GL_FALSE, VERTEX_STRIDE, g_vVertices);
+
+        // Load the texture coordinate
+        glVertexAttribPointer(mTexCoordLoc, 2, GL_FLOAT,
+                GL_FALSE, VERTEX_STRIDE, &g_vVertices[4]);
+
+        glEnableVertexAttribArray(mPositionLoc);
+        glEnableVertexAttribArray(mTexCoordLoc);
+
+        // pass matrix information to the vertex shader
+        glUniformMatrix4fv(mAffinetransLoc, 1, GL_FALSE, affine);
+        glUniformMatrix4fv(mViewporttransLoc, 1, GL_FALSE, mViewportMatrix);
+        glUniformMatrix4fv(mScalingtransLoc, 1, GL_FALSE, mScalingMatrix);
+
+        // And, finally, execute the GL draw command.
+        glDrawElements(GL_TRIANGLE_STRIP, 4, GL_UNSIGNED_SHORT, g_iIndices);
+
+        checkGlError("glDrawElements");
+
+        glBindFramebuffer(GL_FRAMEBUFFER, 0);
+        succeeded = true;
+    } while (false);
+    return succeeded;
+}
+
+const char* WarpRenderer::VertexShaderSource() const
+{
+    static const char gVertexShader[] =
+        "uniform mat4 u_affinetrans;  \n"
+        "uniform mat4 u_viewporttrans;  \n"
+        "uniform mat4 u_scalingtrans;  \n"
+        "attribute vec4 a_position;   \n"
+        "attribute vec2 a_texCoord;   \n"
+        "varying vec2 v_texCoord;     \n"
+        "void main()                  \n"
+        "{                            \n"
+        "   gl_Position = u_scalingtrans * u_viewporttrans * u_affinetrans * a_position; \n"
+        "   v_texCoord = a_texCoord;  \n"
+        "}                            \n";
+
+    return gVertexShader;
+}
+
+const char* WarpRenderer::FragmentShaderSource() const
+{
+    static const char gFragmentShader[] =
+        "precision mediump float;                            \n"
+        "varying vec2 v_texCoord;                            \n"
+        "uniform sampler2D s_texture;                        \n"
+        "void main()                                         \n"
+        "{                                                   \n"
+        "  vec4 color;                                       \n"
+        "  color = texture2D(s_texture, v_texCoord);       \n"
+        "  gl_FragColor = color;                             \n"
+        "}                                                   \n";
+
+    return gFragmentShader;
+}
diff --git a/benchmark/feature_mos/src/mosaic_renderer/WarpRenderer.h b/benchmark/feature_mos/src/mosaic_renderer/WarpRenderer.h
new file mode 100755
index 00000000..aa3f50c0
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic_renderer/WarpRenderer.h
@@ -0,0 +1,44 @@
+#pragma once
+
+#include "FrameBuffer.h"
+#include "Renderer.h"
+
+#include <GLES2/gl2.h>
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+class WarpRenderer: public Renderer {
+  public:
+    WarpRenderer();
+    virtual ~WarpRenderer();
+
+    // Initialize OpenGL resources
+    // @return true if successful
+    bool InitializeGLProgram();
+
+    void SetViewportMatrix(int w, int h, int W, int H);
+    void SetScalingMatrix(float xscale, float yscale);
+
+    bool DrawTexture(GLfloat *affine);
+
+ private:
+    // Source code for shaders.
+    const char* VertexShaderSource() const;
+    const char* FragmentShaderSource() const;
+
+    // Attribute locations
+    GLint  mPositionLoc;
+    GLint  mAffinetransLoc;
+    GLint  mViewporttransLoc;
+    GLint  mScalingtransLoc;
+    GLint  mTexCoordLoc;
+
+    GLfloat mViewportMatrix[16];
+    GLfloat mScalingMatrix[16];
+
+    // Sampler location
+    GLint mSamplerLoc;
+};
+
diff --git a/benchmark/feature_mos/src/mosaic_renderer/YVURenderer.cpp b/benchmark/feature_mos/src/mosaic_renderer/YVURenderer.cpp
new file mode 100755
index 00000000..f7dcf6f6
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic_renderer/YVURenderer.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "YVURenderer.h"
+
+#include <GLES2/gl2ext.h>
+
+const GLfloat g_vVertices[] = {
+    -1.f, 1.f, 0.0f, 1.0f,  // Position 0
+    0.0f,  1.0f,         // TexCoord 0
+     1.f, 1.f, 0.0f, 1.0f, // Position 1
+    1.0f,  1.0f,         // TexCoord 1
+    -1.f, -1.f, 0.0f, 1.0f, // Position 2
+    0.0f,  0.0f,         // TexCoord 2
+    1.f,  -1.f, 0.0f, 1.0f, // Position 3
+    1.0f,  0.0f          // TexCoord 3
+};
+
+const int VERTEX_STRIDE = 6 * sizeof(GLfloat);
+
+GLushort g_iIndices3[] = { 0, 1, 2, 3 };
+
+YVURenderer::YVURenderer() : Renderer()
+                   {
+}
+
+YVURenderer::~YVURenderer() {
+}
+
+bool YVURenderer::InitializeGLProgram()
+{
+    bool succeeded = false;
+    do {
+        GLuint glProgram;
+        glProgram = createProgram(VertexShaderSource(),
+                FragmentShaderSource());
+        if (!glProgram) {
+            break;
+        }
+
+        glUseProgram(glProgram);
+        if (!checkGlError("glUseProgram")) break;
+
+        // Get attribute locations
+        mPositionLoc     = glGetAttribLocation(glProgram, "a_Position");
+        mTexCoordLoc     = glGetAttribLocation(glProgram, "a_texCoord");
+
+        // Get sampler location
+        mSamplerLoc      = glGetUniformLocation(glProgram, "s_texture");
+
+        mGlProgram = glProgram;
+        succeeded = true;
+    } while (false);
+
+    if (!succeeded && (mGlProgram != 0))
+    {
+        glDeleteProgram(mGlProgram);
+        checkGlError("glDeleteProgram");
+        mGlProgram = 0;
+    }
+    return succeeded;
+}
+
+bool YVURenderer::DrawTexture()
+{
+    bool succeeded = false;
+    do {
+        bool rt = (mFrameBuffer == NULL)?
+                SetupGraphics(mSurfaceWidth, mSurfaceHeight) :
+                SetupGraphics(mFrameBuffer);
+
+        if(!rt)
+            break;
+
+        glDisable(GL_BLEND);
+
+        glActiveTexture(GL_TEXTURE0);
+        if (!checkGlError("glActiveTexture")) break;
+
+        const GLenum texture_type = InputTextureType();
+        glBindTexture(texture_type, mInputTextureName);
+        if (!checkGlError("glBindTexture")) break;
+
+        // Set the sampler texture unit to 0
+        glUniform1i(mSamplerLoc, 0);
+
+        // Load the vertex position
+        glVertexAttribPointer(mPositionLoc, 4, GL_FLOAT,
+                GL_FALSE, VERTEX_STRIDE, g_vVertices);
+
+        // Load the texture coordinate
+        glVertexAttribPointer(mTexCoordLoc, 2, GL_FLOAT,
+                GL_FALSE, VERTEX_STRIDE, &g_vVertices[4]);
+
+        glEnableVertexAttribArray(mPositionLoc);
+        glEnableVertexAttribArray(mTexCoordLoc);
+
+        // And, finally, execute the GL draw command.
+        glDrawElements(GL_TRIANGLE_STRIP, 4, GL_UNSIGNED_SHORT, g_iIndices3);
+
+        checkGlError("glDrawElements");
+
+        glBindFramebuffer(GL_FRAMEBUFFER, 0);
+        succeeded = true;
+    } while (false);
+    return succeeded;
+}
+
+const char* YVURenderer::VertexShaderSource() const
+{
+    // All this really does is copy the coordinates into
+    // variables for the fragment shader to pick up.
+    static const char gVertexShader[] =
+        "attribute vec4 a_Position;\n"
+        "attribute vec2 a_texCoord;\n"
+        "varying vec2 v_texCoord;\n"
+        "void main() {\n"
+        "  gl_Position = a_Position;\n"
+        "  v_texCoord = a_texCoord;\n"
+        "}\n";
+
+    return gVertexShader;
+}
+
+const char* YVURenderer::FragmentShaderSource() const
+{
+    static const char gFragmentShader[] =
+        "precision mediump float;\n"
+        "uniform sampler2D s_texture;\n"
+        "const vec4 coeff_y = vec4(0.257, 0.594, 0.098, 0.063);\n"
+        "const vec4 coeff_v = vec4(0.439, -0.368, -0.071, 0.500);\n"
+        "const vec4 coeff_u = vec4(-0.148, -0.291, 0.439, 0.500);\n"
+        "varying vec2 v_texCoord;\n"
+        "void main() {\n"
+        "  vec4 p;\n"
+        "  p = texture2D(s_texture, v_texCoord);\n"
+        "  gl_FragColor[0] = dot(p, coeff_y);\n"
+        "  p = texture2D(s_texture, v_texCoord);\n"
+        "  gl_FragColor[1] = dot(p, coeff_v);\n"
+        "  p = texture2D(s_texture, v_texCoord);\n"
+        "  gl_FragColor[2] = dot(p, coeff_u);\n"
+        "  p = texture2D(s_texture, v_texCoord);\n"
+        "  gl_FragColor[3] = dot(p, coeff_y);\n"
+        "}\n";
+
+    return gFragmentShader;
+}
diff --git a/benchmark/feature_mos/src/mosaic_renderer/YVURenderer.h b/benchmark/feature_mos/src/mosaic_renderer/YVURenderer.h
new file mode 100755
index 00000000..d14a4b99
--- /dev/null
+++ b/benchmark/feature_mos/src/mosaic_renderer/YVURenderer.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include "FrameBuffer.h"
+#include "Renderer.h"
+
+#include <GLES2/gl2.h>
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+class YVURenderer: public Renderer {
+  public:
+    YVURenderer();
+    virtual ~YVURenderer();
+
+    // Initialize OpenGL resources
+    // @return true if successful
+    bool InitializeGLProgram();
+
+    bool DrawTexture();
+
+ private:
+    // Source code for shaders.
+    const char* VertexShaderSource() const;
+    const char* FragmentShaderSource() const;
+
+    // Attribute locations
+    GLint  mPositionLoc;
+    GLint  mTexCoordLoc;
+
+    // Sampler location
+    GLint mSamplerLoc;
+};
+
diff --git a/benchmark/feature_mos_jni.cpp b/benchmark/feature_mos_jni.cpp
new file mode 100644
index 00000000..e40b75a0
--- /dev/null
+++ b/benchmark/feature_mos_jni.cpp
@@ -0,0 +1,669 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+*
+ */
+#include <string.h>
+#include <jni.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <db_utilities_camera.h>
+
+#include "mosaic/AlignFeatures.h"
+#include "mosaic/Blend.h"
+#include "mosaic/Mosaic.h"
+#include "mosaic/Log.h"
+#define LOG_TAG "FEATURE_MOS_JNI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "mosaic_renderer_jni.h"
+
+char buffer[1024];
+
+const int MAX_FRAMES = 100;
+
+static double mTx;
+
+int tWidth[NR];
+int tHeight[NR];
+
+ImageType tImage[NR][MAX_FRAMES];// = {{ImageUtils::IMAGE_TYPE_NOIMAGE}}; // YVU24 format image
+Mosaic *mosaic[NR] = {NULL,NULL};
+ImageType resultYVU = ImageUtils::IMAGE_TYPE_NOIMAGE;
+ImageType resultBGR = ImageUtils::IMAGE_TYPE_NOIMAGE;
+float gTRS[11]; // 9 elements of the transformation, 1 for frame-number, 1 for alignment error code.
+// Variables to keep track of the mosaic computation progress for both LR & HR.
+float gProgress[NR];
+// Variables to be able to cancel the mosaic computation when the GUI says so.
+bool gCancelComputation[NR];
+
+int c;
+int width=0, height=0;
+int mosaicWidth=0, mosaicHeight=0;
+
+//int blendingType = Blend::BLEND_TYPE_FULL;
+//int blendingType = Blend::BLEND_TYPE_CYLPAN;
+int blendingType = Blend::BLEND_TYPE_HORZ;
+int stripType = Blend::STRIP_TYPE_THIN;
+bool high_res = false;
+bool quarter_res[NR] = {false,false};
+float thresh_still[NR] = {5.0f,0.0f};
+
+/* return current time in milliseconds*/
+
+#ifndef now_ms
+static double
+now_ms(void)
+{
+    //struct timespec res;
+    struct timeval res;
+    //clock_gettime(CLOCK_REALTIME, &res);
+    gettimeofday(&res, NULL);
+    return 1000.0*res.tv_sec + (double)res.tv_usec/1e3;
+}
+#endif
+
+
+static int frame_number_HR = 0;
+static int frame_number_LR = 0;
+
+int Init(int mID, int nmax)
+{
+        double  t0, t1, time_c;
+
+        if(mosaic[mID]!=NULL)
+        {
+                delete mosaic[mID];
+                mosaic[mID] = NULL;
+        }
+
+        mosaic[mID] = new Mosaic();
+
+        t0 = now_ms();
+
+        // When processing higher than 720x480 video, process low-res at
+        // quarter resolution
+        if(tWidth[LR]>180)
+            quarter_res[LR] = true;
+
+
+        // Check for initialization and if not, initialize
+        if (!mosaic[mID]->isInitialized())
+        {
+                mosaic[mID]->initialize(blendingType, stripType, tWidth[mID], tHeight[mID],
+                        nmax, quarter_res[mID], thresh_still[mID]);
+        }
+
+        t1 = now_ms();
+        time_c = t1 - t0;
+        LOGV("Init[%d]: %g ms [%d frames]",mID,time_c,nmax);
+        return 1;
+}
+
+void GenerateQuarterResImagePlanar(ImageType im, int input_w, int input_h,
+        ImageType &out)
+{
+    ImageType imp;
+    ImageType outp;
+
+    int count = 0;
+
+    for (int j = 0; j < input_h; j += H2L_FACTOR)
+    {
+        imp = im + j * input_w;
+        outp = out + (j / H2L_FACTOR) * (input_w / H2L_FACTOR);
+
+        for (int i = 0; i < input_w; i += H2L_FACTOR)
+        {
+            *outp++ = *(imp + i);
+            count++;
+        }
+    }
+
+    for (int j = input_h; j < 2 * input_h; j += H2L_FACTOR)
+    {
+        imp = im + j * input_w;
+        outp = out + (j / H2L_FACTOR) * (input_w / H2L_FACTOR);
+
+        for (int i = 0; i < input_w; i += H2L_FACTOR)
+        {
+            *outp++ = *(imp + i);
+            count++;
+        }
+    }
+
+    for (int j = 2 * input_h; j < 3 * input_h; j += H2L_FACTOR)
+    {
+        imp = im + j * input_w;
+        outp = out + (j / H2L_FACTOR) * (input_w / H2L_FACTOR);
+
+        for (int i = 0; i < input_w; i += H2L_FACTOR)
+        {
+            *outp++ = *(imp + i);
+            count++;
+        }
+    }
+}
+
+int AddFrame(int mID, int k, float* trs1d)
+{
+    double  t0, t1, time_c;
+    double trs[3][3];
+
+    int ret_code = mosaic[mID]->addFrame(tImage[mID][k]);
+
+    mosaic[mID]->getAligner()->getLastTRS(trs);
+
+    if(trs1d!=NULL)
+    {
+
+        trs1d[0] = trs[0][0];
+        trs1d[1] = trs[0][1];
+        trs1d[2] = trs[0][2];
+        trs1d[3] = trs[1][0];
+        trs1d[4] = trs[1][1];
+        trs1d[5] = trs[1][2];
+        trs1d[6] = trs[2][0];
+        trs1d[7] = trs[2][1];
+        trs1d[8] = trs[2][2];
+    }
+
+    return ret_code;
+}
+
+int Finalize(int mID)
+{
+    double  t0, t1, time_c;
+
+    t0 = now_ms();
+    // Create the mosaic
+    int ret = mosaic[mID]->createMosaic(gProgress[mID], gCancelComputation[mID]);
+    t1 = now_ms();
+    time_c = t1 - t0;
+    LOGV("CreateMosaic: %g ms",time_c);
+
+    // Get back the result
+    resultYVU = mosaic[mID]->getMosaic(mosaicWidth, mosaicHeight);
+
+    return ret;
+}
+
+void YUV420toYVU24(ImageType yvu24, ImageType yuv420sp, int width, int height)
+{
+    int frameSize = width * height;
+
+    ImageType oyp = yvu24;
+    ImageType ovp = yvu24+frameSize;
+    ImageType oup = yvu24+frameSize+frameSize;
+
+    for (int j = 0, yp = 0; j < height; j++)
+    {
+        unsigned char u = 0, v = 0;
+        int uvp = frameSize + (j >> 1) * width;
+        for (int i = 0; i < width; i++, yp++)
+        {
+            *oyp++ = yuv420sp[yp];
+            //int y = (0xff & (int)yuv420sp[yp]) -16;
+            //yvu24p[yp] = (y<0)?0:y;
+
+            if ((i & 1) == 0)
+            {
+                v = yuv420sp[uvp++];
+                u = yuv420sp[uvp++];
+            }
+
+            *ovp++ = v;
+            *oup++ = u;
+        }
+    }
+}
+
+void YUV420toYVU24_NEW(ImageType yvu24, ImageType yuv420sp, int width,
+        int height)
+{
+    int frameSize = width * height;
+
+    ImageType oyp = yvu24;
+    ImageType ovp = yvu24 + frameSize;
+    ImageType oup = yvu24 + frameSize + frameSize;
+
+    memcpy(yvu24, yuv420sp, frameSize * sizeof(unsigned char));
+
+    for (int j = 0; j < height; j += 2)
+    {
+        unsigned char u = 0, v = 0;
+        int uvp = frameSize + (j >> 1) * width;
+        ovp = yvu24 + frameSize + j * width;
+        oup = ovp + frameSize;
+
+        ImageType iuvp = yuv420sp + uvp;
+
+        for (int i = 0; i < width; i += 2)
+        {
+            v = *iuvp++;
+            u = *iuvp++;
+
+            *ovp++ = v;
+            *oup++ = u;
+
+            *ovp++ = v;
+            *oup++ = u;
+
+        }
+        memcpy(ovp, ovp - width, width * sizeof(unsigned char));
+        memcpy(oup, oup - width, width * sizeof(unsigned char));
+    }
+}
+
+
+JNIEXPORT void JNICALL Java_com_android_camera_panorama_Mosaic_allocateMosaicMemory(
+        JNIEnv* env, jobject thiz, jint width, jint height)
+{
+    tWidth[HR] = width;
+    tHeight[HR] = height;
+    tWidth[LR] = int(width / H2L_FACTOR);
+    tHeight[LR] = int(height / H2L_FACTOR);
+
+    for(int i=0; i<MAX_FRAMES; i++)
+    {
+            tImage[LR][i] = ImageUtils::allocateImage(tWidth[LR], tHeight[LR],
+                    ImageUtils::IMAGE_TYPE_NUM_CHANNELS);
+            tImage[HR][i] = ImageUtils::allocateImage(tWidth[HR], tHeight[HR],
+                    ImageUtils::IMAGE_TYPE_NUM_CHANNELS);
+    }
+
+    AllocateTextureMemory(tWidth[HR], tHeight[HR], tWidth[LR], tHeight[LR]);
+}
+
+JNIEXPORT void JNICALL Java_com_android_camera_panorama_Mosaic_freeMosaicMemory(
+        JNIEnv* env, jobject thiz)
+{
+    for(int i = 0; i < MAX_FRAMES; i++)
+    {
+        ImageUtils::freeImage(tImage[LR][i]);
+        ImageUtils::freeImage(tImage[HR][i]);
+    }
+
+    FreeTextureMemory();
+}
+
+
+void decodeYUV444SP(unsigned char* rgb, unsigned char* yuv420sp, int width,
+        int height)
+{
+    int frameSize = width * height;
+
+    for (int j = 0, yp = 0; j < height; j++)
+    {
+        int vp = frameSize + j * width, u = 0, v = 0;
+        int up = vp + frameSize;
+
+        for (int i = 0; i < width; i++, yp++, vp++, up++)
+        {
+            int y = (0xff & ((int) yuv420sp[yp])) - 16;
+            if (y < 0) y = 0;
+
+            v = (0xff & yuv420sp[vp]) - 128;
+            u = (0xff & yuv420sp[up]) - 128;
+
+            int y1192 = 1192 * y;
+            int r = (y1192 + 1634 * v);
+            int g = (y1192 - 833 * v - 400 * u);
+            int b = (y1192 + 2066 * u);
+
+            if (r < 0) r = 0; else if (r > 262143) r = 262143;
+            if (g < 0) g = 0; else if (g > 262143) g = 262143;
+            if (b < 0) b = 0; else if (b > 262143) b = 262143;
+
+            //rgb[yp] = 0xff000000 | ((r << 6) & 0xff0000) | ((g >> 2) & 0xff00) | ((b >> 10) & 0xff);
+            int p = j*width*3+i*3;
+            rgb[p+0] = (r<<6 & 0xFF0000)>>16;
+            rgb[p+1] = (g>>2 & 0xFF00)>>8;
+            rgb[p+2] =  b>>10 & 0xFF;
+        }
+    }
+}
+
+static int count = 0;
+
+void ConvertYVUAiToPlanarYVU(unsigned char *planar, unsigned char *in, int width,
+        int height)
+{
+    int planeSize = width * height;
+    unsigned char* Yptr = planar;
+    unsigned char* Vptr = planar + planeSize;
+    unsigned char* Uptr = Vptr + planeSize;
+
+    for (int i = 0; i < planeSize; i++)
+    {
+        *Yptr++ = *in++;
+        *Vptr++ = *in++;
+        *Uptr++ = *in++;
+        in++;   // Alpha
+    }
+}
+
+JNIEXPORT jfloatArray JNICALL Java_com_android_camera_panorama_Mosaic_setSourceImageFromGPU(
+        JNIEnv* env, jobject thiz)
+{
+    double  t0, t1, time_c;
+    t0 = now_ms();
+    int ret_code;
+
+    if(frame_number_HR<MAX_FRAMES && frame_number_LR<MAX_FRAMES)
+    {
+        double last_tx = mTx;
+
+        sem_wait(&gPreviewImage_semaphore);
+        ConvertYVUAiToPlanarYVU(tImage[LR][frame_number_LR], gPreviewImage[LR],
+                tWidth[LR], tHeight[LR]);
+
+        sem_post(&gPreviewImage_semaphore);
+
+        ret_code = AddFrame(LR, frame_number_LR, gTRS);
+
+        if(ret_code == Mosaic::MOSAIC_RET_OK || ret_code == Mosaic::MOSAIC_RET_FEW_INLIERS)
+        {
+            // Copy into HR buffer only if this is a valid frame
+            sem_wait(&gPreviewImage_semaphore);
+            ConvertYVUAiToPlanarYVU(tImage[HR][frame_number_HR], gPreviewImage[HR],
+                    tWidth[HR], tHeight[HR]);
+            sem_post(&gPreviewImage_semaphore);
+
+            frame_number_LR++;
+            frame_number_HR++;
+        }
+    }
+    else
+    {
+        gTRS[1] = gTRS[2] = gTRS[3] = gTRS[5] = gTRS[6] = gTRS[7] = 0.0f;
+        gTRS[0] = gTRS[4] = gTRS[8] = 1.0f;
+    }
+
+    UpdateWarpTransformation(gTRS);
+
+    gTRS[9] = frame_number_HR;
+    gTRS[10] = ret_code;
+
+    jfloatArray bytes = env->NewFloatArray(11);
+    if(bytes != 0)
+    {
+        env->SetFloatArrayRegion(bytes, 0, 11, (jfloat*) gTRS);
+    }
+    return bytes;
+}
+
+
+
+JNIEXPORT jfloatArray JNICALL Java_com_android_camera_panorama_Mosaic_setSourceImage(
+        JNIEnv* env, jobject thiz, jbyteArray photo_data)
+{
+    double  t0, t1, time_c;
+    t0 = now_ms();
+
+    int ret_code;
+
+    if(frame_number_HR<MAX_FRAMES && frame_number_LR<MAX_FRAMES)
+    {
+        jbyte *pixels = env->GetByteArrayElements(photo_data, 0);
+
+        YUV420toYVU24_NEW(tImage[HR][frame_number_HR], (ImageType)pixels,
+                tWidth[HR], tHeight[HR]);
+
+        env->ReleaseByteArrayElements(photo_data, pixels, 0);
+
+        double last_tx = mTx;
+
+        t0 = now_ms();
+        GenerateQuarterResImagePlanar(tImage[HR][frame_number_HR], tWidth[HR],
+                tHeight[HR], tImage[LR][frame_number_LR]);
+
+
+        sem_wait(&gPreviewImage_semaphore);
+        decodeYUV444SP(gPreviewImage[LR], tImage[LR][frame_number_LR],
+                gPreviewImageWidth[LR], gPreviewImageHeight[LR]);
+        sem_post(&gPreviewImage_semaphore);
+
+        ret_code = AddFrame(LR, frame_number_LR, gTRS);
+
+        if(ret_code == Mosaic::MOSAIC_RET_OK || ret_code == Mosaic::MOSAIC_RET_FEW_INLIERS)
+        {
+            frame_number_LR++;
+            frame_number_HR++;
+        }
+
+    }
+    else
+    {
+        gTRS[1] = gTRS[2] = gTRS[3] = gTRS[5] = gTRS[6] = gTRS[7] = 0.0f;
+        gTRS[0] = gTRS[4] = gTRS[8] = 1.0f;
+    }
+
+    UpdateWarpTransformation(gTRS);
+
+    gTRS[9] = frame_number_HR;
+    gTRS[10] = ret_code;
+
+    jfloatArray bytes = env->NewFloatArray(11);
+    if(bytes != 0)
+    {
+        env->SetFloatArrayRegion(bytes, 0, 11, (jfloat*) gTRS);
+    }
+    return bytes;
+}
+
+JNIEXPORT void JNICALL Java_com_android_camera_panorama_Mosaic_setBlendingType(
+        JNIEnv* env, jobject thiz, jint type)
+{
+    blendingType = int(type);
+}
+
+JNIEXPORT void JNICALL Java_com_android_camera_panorama_Mosaic_setStripType(
+        JNIEnv* env, jobject thiz, jint type)
+{
+    stripType = int(type);
+}
+
+JNIEXPORT void JNICALL Java_com_android_camera_panorama_Mosaic_reset(
+        JNIEnv* env, jobject thiz)
+{
+    frame_number_HR = 0;
+    frame_number_LR = 0;
+
+    gProgress[LR] = 0.0;
+    gProgress[HR] = 0.0;
+
+    gCancelComputation[LR] = false;
+    gCancelComputation[HR] = false;
+
+    Init(LR,MAX_FRAMES);
+}
+
+JNIEXPORT jint JNICALL Java_com_android_camera_panorama_Mosaic_reportProgress(
+        JNIEnv* env, jobject thiz, jboolean hires, jboolean cancel_computation)
+{
+    if(bool(hires))
+        gCancelComputation[HR] = cancel_computation;
+    else
+        gCancelComputation[LR] = cancel_computation;
+
+    if(bool(hires))
+        return (jint) gProgress[HR];
+    else
+        return (jint) gProgress[LR];
+}
+
+JNIEXPORT jint JNICALL Java_com_android_camera_panorama_Mosaic_createMosaic(
+        JNIEnv* env, jobject thiz, jboolean value)
+{
+    high_res = bool(value);
+
+    int ret;
+
+    if(high_res)
+    {
+        LOGV("createMosaic() - High-Res Mode");
+        double  t0, t1, time_c;
+
+        gProgress[HR] = 0.0;
+        t0 = now_ms();
+
+        Init(HR, frame_number_HR);
+
+        for(int k = 0; k < frame_number_HR; k++)
+        {
+            if (gCancelComputation[HR])
+                break;
+            AddFrame(HR, k, NULL);
+            gProgress[HR] += TIME_PERCENT_ALIGN/frame_number_HR;
+        }
+
+        if (gCancelComputation[HR])
+        {
+            ret = Mosaic::MOSAIC_RET_CANCELLED;
+        }
+        else
+        {
+            gProgress[HR] = TIME_PERCENT_ALIGN;
+
+            t1 = now_ms();
+            time_c = t1 - t0;
+            LOGV("AlignAll - %d frames [HR]: %g ms", frame_number_HR, time_c);
+
+            ret = Finalize(HR);
+
+            gProgress[HR] = 100.0;
+        }
+
+        high_res = false;
+    }
+    else
+    {
+        LOGV("createMosaic() - Low-Res Mode");
+        gProgress[LR] = TIME_PERCENT_ALIGN;
+
+        ret = Finalize(LR);
+
+        gProgress[LR] = 100.0;
+    }
+
+    return (jint) ret;
+}
+
+JNIEXPORT jintArray JNICALL Java_com_android_camera_panorama_Mosaic_getFinalMosaic(
+        JNIEnv* env, jobject thiz)
+{
+    int y,x;
+    int width = mosaicWidth;
+    int height = mosaicHeight;
+    int imageSize = width * height;
+
+    // Convert back to RGB24
+    resultBGR = ImageUtils::allocateImage(mosaicWidth, mosaicHeight,
+            ImageUtils::IMAGE_TYPE_NUM_CHANNELS);
+    ImageUtils::yvu2bgr(resultBGR, resultYVU, mosaicWidth, mosaicHeight);
+
+    LOGV("MosBytes: %d, W = %d, H = %d", imageSize, width, height);
+
+    int* image = new int[imageSize];
+    int* dims = new int[2];
+
+    for(y=0; y<height; y++)
+    {
+        for(x=0; x<width; x++)
+        {
+            image[y*width+x] = (0xFF<<24) | (resultBGR[y*width*3+x*3+2]<<16)|
+                    (resultBGR[y*width*3+x*3+1]<<8)| (resultBGR[y*width*3+x*3]);
+        }
+    }
+
+    dims[0] = width;
+    dims[1] = height;
+
+    ImageUtils::freeImage(resultBGR);
+
+    jintArray bytes = env->NewIntArray(imageSize+2);
+    if (bytes == 0) {
+        LOGE("Error in creating the image.");
+        delete[] image;
+        return 0;
+    }
+    env->SetIntArrayRegion(bytes, 0, imageSize, (jint*) image);
+    env->SetIntArrayRegion(bytes, imageSize, 2, (jint*) dims);
+    delete[] image;
+    delete[] dims;
+    return bytes;
+}
+
+JNIEXPORT jbyteArray JNICALL Java_com_android_camera_panorama_Mosaic_getFinalMosaicNV21(
+        JNIEnv* env, jobject thiz)
+{
+    int y,x;
+    int width;
+    int height;
+
+    width = mosaicWidth;
+    height = mosaicHeight;
+
+    int imageSize = 1.5*width * height;
+
+    // Convert YVU to NV21 format in-place
+    ImageType V = resultYVU+mosaicWidth*mosaicHeight;
+    ImageType U = V+mosaicWidth*mosaicHeight;
+    for(int j=0; j<mosaicHeight/2; j++)
+    {
+        for(int i=0; i<mosaicWidth; i+=2)
+        {
+            V[j*mosaicWidth+i] = V[(2*j)*mosaicWidth+i];        // V
+            V[j*mosaicWidth+i+1] = U[(2*j)*mosaicWidth+i];        // U
+        }
+    }
+
+    LOGV("MosBytes: %d, W = %d, H = %d", imageSize, width, height);
+
+    unsigned char* dims = new unsigned char[8];
+
+    dims[0] = (unsigned char)(width >> 24);
+    dims[1] = (unsigned char)(width >> 16);
+    dims[2] = (unsigned char)(width >> 8);
+    dims[3] = (unsigned char)width;
+
+    dims[4] = (unsigned char)(height >> 24);
+    dims[5] = (unsigned char)(height >> 16);
+    dims[6] = (unsigned char)(height >> 8);
+    dims[7] = (unsigned char)height;
+
+    jbyteArray bytes = env->NewByteArray(imageSize+8);
+    if (bytes == 0) {
+        LOGE("Error in creating the image.");
+        ImageUtils::freeImage(resultYVU);
+        return 0;
+    }
+    env->SetByteArrayRegion(bytes, 0, imageSize, (jbyte*) resultYVU);
+    env->SetByteArrayRegion(bytes, imageSize, 8, (jbyte*) dims);
+    delete[] dims;
+    ImageUtils::freeImage(resultYVU);
+    return bytes;
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/benchmark/feature_stab/db_vlvm/db_bundle.h b/benchmark/feature_stab/db_vlvm/db_bundle.h
new file mode 100644
index 00000000..e4fb8db2
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_bundle.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_bundle.h,v 1.2 2011/06/17 14:03:30 mbansal Exp $ */
+
+#ifndef DB_BUNDLE_H
+#define DB_BUNDLE_H
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup LMBundle (LM) Bundle adjustment utilities (a.k.a. Levenberg-Marquardt algorithm)
+ */
+/*\{*/
+
+#include "db_utilities.h"
+
+/*!
+Solve for update dx such that diagmult(1+lambda,transpose(J)%J)%dx= -Jtf
+using only upper half of JtJ, destroying lower half below diagonal in the process
+dimension is n and d should point to n allocated doubles of scratch memory
+*/
+inline void db_Compute_dx(double *dx,double **JtJ,double *min_Jtf,double lambda,double *d,int n)
+{
+    int i;
+    double opl;
+
+    opl=1.0+lambda;
+    for(i=0;i<n;i++) d[i]=JtJ[i][i]*opl;
+
+    db_CholeskyDecompSeparateDiagonal(JtJ,d,n);
+    db_CholeskyBacksub(dx,JtJ,d,n,min_Jtf);
+}
+
+/*!
+Solve for update dx such that diagmult(1+lambda,transpose(J)%J)%dx= -Jtf
+using only upper half of JtJ, destroying lower half below diagonal in the process
+*/
+inline void db_Compute_dx_3x3(double dx[3],double JtJ[9],const double min_Jtf[3],double lambda)
+{
+    double d[3],opl;
+
+    opl=1.0+lambda;
+    d[0]=JtJ[0]*opl;
+    d[1]=JtJ[4]*opl;
+    d[2]=JtJ[8]*opl;
+    db_CholeskyDecomp3x3SeparateDiagonal(JtJ,d);
+    db_CholeskyBacksub3x3(dx,JtJ,d,min_Jtf);
+}
+
+/*\}*/
+
+#endif /* DB_BUNDLE_H */
diff --git a/benchmark/feature_stab/db_vlvm/db_feature_detection.cpp b/benchmark/feature_stab/db_vlvm/db_feature_detection.cpp
new file mode 100644
index 00000000..28cb4a78
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_feature_detection.cpp
@@ -0,0 +1,1770 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*$Id: db_feature_detection.cpp,v 1.4 2011/06/17 14:03:30 mbansal Exp $*/
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+#include "db_utilities.h"
+#include "db_feature_detection.h"
+#ifdef _VERBOSE_
+#include <iostream>
+#endif
+#include <float.h>
+
+#define DB_SUB_PIXEL
+
+#define BORDER 10 // 5
+
+float** db_AllocStrengthImage_f(float **im,int w,int h)
+{
+    int i,n,aw;
+    long c,size;
+    float **img,*aim,*p;
+
+    /*Determine number of 124 element chunks needed*/
+    n=(db_maxi(1,w-6)+123)/124;
+    /*Determine the total allocation width aw*/
+    aw=n*124+8;
+    /*Allocate*/
+    size=aw*h+16;
+    *im=new float [size];
+    /*Clean up*/
+    p=(*im);
+    for(c=0;c<size;c++) p[c]=0.0;
+    /*Get a 16 byte aligned pointer*/
+    aim=db_AlignPointer_f(*im,16);
+    /*Allocate pointer table*/
+    img=new float* [h];
+    /*Initialize the pointer table*/
+    for(i=0;i<h;i++)
+    {
+        img[i]=aim+aw*i+1;
+    }
+
+    return(img);
+}
+
+void db_FreeStrengthImage_f(float *im,float **img,int h)
+{
+    delete [] im;
+    delete [] img;
+}
+
+/*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width chunk_width
+Memory references occur one pixel outside the subrow*/
+inline void db_IxIyRow_f(float *Ix,float *Iy,const float * const *img,int i,int j,int chunk_width)
+{
+    int c;
+
+    for(c=0;c<chunk_width;c++)
+    {
+        Ix[c]=img[i][j+c-1]-img[i][j+c+1];
+        Iy[c]=img[i-1][j+c]-img[i+1][j+c];
+    }
+}
+
+/*Compute derivatives Ix,Iy for a subrow of img with upper left (i,j) and width 128
+Memory references occur one pixel outside the subrow*/
+inline void db_IxIyRow_u(int *dxx,const unsigned char * const *img,int i,int j,int nc)
+{
+#ifdef DB_USE_MMX
+    const unsigned char *r1,*r2,*r3;
+
+    r1=img[i-1]+j; r2=img[i]+j; r3=img[i+1]+j;
+
+    _asm
+    {
+        mov esi,16
+        mov eax,r1
+        mov ebx,r2
+        mov ecx,r3
+        mov edx,dxx
+
+        /*Get bitmask into mm7*/
+        mov       edi,7F7F7F7Fh
+        movd      mm7,edi
+        punpckldq mm7,mm7
+
+loopstart:
+        /***************dx part 1-12*********************************/
+        movq       mm0,[eax]       /*1 Get upper*/
+         pxor      mm6,mm6         /*2 Set to zero*/
+        movq       mm1,[ecx]       /*3 Get lower*/
+         psrlq     mm0,1           /*4 Shift*/
+        psrlq      mm1,1           /*5 Shift*/
+         pand      mm0,mm7         /*6 And*/
+        movq       mm2,[ebx-1]     /*13 Get left*/
+         pand      mm1,mm7         /*7 And*/
+        psubb      mm0,mm1         /*8 Subtract*/
+         pxor      mm5,mm5         /*14 Set to zero*/
+        movq       mm1,mm0         /*9 Copy*/
+         pcmpgtb   mm6,mm0         /*10 Create unpack mask*/
+        movq       mm3,[ebx+1]     /*15 Get right*/
+         punpcklbw mm0,mm6         /*11 Unpack low*/
+        punpckhbw  mm1,mm6         /*12 Unpack high*/
+        /***************dy part 13-24*********************************/
+         movq      mm4,mm0         /*25 Copy dx*/
+        psrlq      mm2,1           /*16 Shift*/
+         pmullw    mm0,mm0         /*26 Multiply dx*dx*/
+        psrlq      mm3,1           /*17 Shift*/
+         pand      mm2,mm7         /*18 And*/
+        pand       mm3,mm7         /*19 And*/
+         /*Stall*/
+        psubb      mm2,mm3         /*20 Subtract*/
+         /*Stall*/
+        movq       mm3,mm2         /*21 Copy*/
+         pcmpgtb   mm5,mm2         /*22 Create unpack mask*/
+        punpcklbw  mm2,mm5         /*23 Unpack low*/
+         /*Stall*/
+        punpckhbw  mm3,mm5         /*24 Unpack high*/
+        /***************dxx dxy dyy low part 25-49*********************************/
+         pmullw    mm4,mm2         /*27 Multiply dx*dy*/
+        pmullw     mm2,mm2         /*28 Multiply dy*dy*/
+         pxor      mm6,mm6         /*29 Set to zero*/
+        movq       mm5,mm0         /*30 Copy dx*dx*/
+         pcmpgtw   mm6,mm0         /*31 Create unpack mask for dx*dx*/
+        punpcklwd  mm0,mm6         /*32 Unpack dx*dx lows*/
+         /*Stall*/
+        punpckhwd  mm5,mm6         /*33 Unpack dx*dx highs*/
+         pxor      mm6,mm6         /*36 Set to zero*/
+        movq       [edx],mm0       /*34 Store dx*dx lows*/
+         movq      mm0,mm4         /*37 Copy dx*dy*/
+        movq       [edx+8],mm5     /*35 Store dx*dx highs*/
+         pcmpgtw   mm6,mm4         /*38 Create unpack mask for dx*dy*/
+        punpcklwd  mm4,mm6         /*39 Unpack dx*dy lows*/
+         /*Stall*/
+        punpckhwd  mm0,mm6         /*40 Unpack dx*dy highs*/
+         pxor      mm6,mm6         /*43 Set to zero*/
+        movq       [edx+512],mm4   /*41 Store dx*dy lows*/
+         movq      mm5,mm2         /*44 Copy dy*dy*/
+        movq       [edx+520],mm0   /*42 Store dx*dy highs*/
+         pcmpgtw   mm6,mm2         /*45 Create unpack mask for dy*dy*/
+        punpcklwd  mm2,mm6         /*46 Unpack dy*dy lows*/
+         movq      mm4,mm1         /*50 Copy dx*/
+        punpckhwd  mm5,mm6         /*47 Unpack dy*dy highs*/
+         pmullw    mm1,mm1         /*51 Multiply dx*dx*/
+        movq       [edx+1024],mm2  /*48 Store dy*dy lows*/
+         pmullw    mm4,mm3         /*52 Multiply dx*dy*/
+        movq       [edx+1032],mm5  /*49 Store dy*dy highs*/
+        /***************dxx dxy dyy high part 50-79*********************************/
+         pmullw    mm3,mm3         /*53 Multiply dy*dy*/
+        pxor       mm6,mm6         /*54 Set to zero*/
+         movq      mm5,mm1         /*55 Copy dx*dx*/
+        pcmpgtw    mm6,mm1         /*56 Create unpack mask for dx*dx*/
+         pxor      mm2,mm2         /*61 Set to zero*/
+        punpcklwd  mm1,mm6         /*57 Unpack dx*dx lows*/
+         movq      mm0,mm4         /*62 Copy dx*dy*/
+        punpckhwd  mm5,mm6         /*58 Unpack dx*dx highs*/
+         pcmpgtw   mm2,mm4         /*63 Create unpack mask for dx*dy*/
+        movq       [edx+16],mm1    /*59 Store dx*dx lows*/
+         punpcklwd mm4,mm2         /*64 Unpack dx*dy lows*/
+        movq       [edx+24],mm5    /*60 Store dx*dx highs*/
+         punpckhwd mm0,mm2         /*65 Unpack dx*dy highs*/
+        movq       [edx+528],mm4   /*66 Store dx*dy lows*/
+         pxor      mm6,mm6         /*68 Set to zero*/
+        movq       [edx+536],mm0   /*67 Store dx*dy highs*/
+         movq      mm5,mm3         /*69 Copy dy*dy*/
+        pcmpgtw    mm6,mm3         /*70 Create unpack mask for dy*dy*/
+         add       eax,8           /*75*/
+        punpcklwd  mm3,mm6         /*71 Unpack dy*dy lows*/
+         add       ebx,8           /*76*/
+        punpckhwd  mm5,mm6         /*72 Unpack dy*dy highs*/
+         add       ecx,8           /*77*/
+        movq       [edx+1040],mm3  /*73 Store dy*dy lows*/
+         /*Stall*/
+        movq       [edx+1048],mm5  /*74 Store dy*dy highs*/
+         /*Stall*/
+        add        edx,32          /*78*/
+         dec esi                   /*79*/
+        jnz loopstart
+
+        emms
+    }
+
+#else
+    int c;
+    int Ix,Iy;
+
+    for(c=0;c<nc;c++)
+    {
+        Ix=(img[i][j+c-1]-img[i][j+c+1])>>1;
+        Iy=(img[i-1][j+c]-img[i+1][j+c])>>1;
+        dxx[c]=Ix*Ix;
+        dxx[c+128]=Ix*Iy;
+        dxx[c+256]=Iy*Iy;
+    }
+#endif /*DB_USE_MMX*/
+}
+
+/*Filter vertically five rows of derivatives of length chunk_width into gxx,gxy,gyy*/
+inline void db_gxx_gxy_gyy_row_f(float *gxx,float *gxy,float *gyy,int chunk_width,
+                                 float *Ix0,float *Ix1,float *Ix2,float *Ix3,float *Ix4,
+                                 float *Iy0,float *Iy1,float *Iy2,float *Iy3,float *Iy4)
+{
+    int c;
+    float dx,dy;
+    float Ixx0,Ixy0,Iyy0,Ixx1,Ixy1,Iyy1,Ixx2,Ixy2,Iyy2,Ixx3,Ixy3,Iyy3,Ixx4,Ixy4,Iyy4;
+
+    for(c=0;c<chunk_width;c++)
+    {
+        dx=Ix0[c];
+        dy=Iy0[c];
+        Ixx0=dx*dx;
+        Ixy0=dx*dy;
+        Iyy0=dy*dy;
+
+        dx=Ix1[c];
+        dy=Iy1[c];
+        Ixx1=dx*dx;
+        Ixy1=dx*dy;
+        Iyy1=dy*dy;
+
+        dx=Ix2[c];
+        dy=Iy2[c];
+        Ixx2=dx*dx;
+        Ixy2=dx*dy;
+        Iyy2=dy*dy;
+
+        dx=Ix3[c];
+        dy=Iy3[c];
+        Ixx3=dx*dx;
+        Ixy3=dx*dy;
+        Iyy3=dy*dy;
+
+        dx=Ix4[c];
+        dy=Iy4[c];
+        Ixx4=dx*dx;
+        Ixy4=dx*dy;
+        Iyy4=dy*dy;
+
+        /*Filter vertically*/
+        gxx[c]=Ixx0+Ixx1*4.0f+Ixx2*6.0f+Ixx3*4.0f+Ixx4;
+        gxy[c]=Ixy0+Ixy1*4.0f+Ixy2*6.0f+Ixy3*4.0f+Ixy4;
+        gyy[c]=Iyy0+Iyy1*4.0f+Iyy2*6.0f+Iyy3*4.0f+Iyy4;
+    }
+}
+
+/*Filter vertically five rows of derivatives of length 128 into gxx,gxy,gyy*/
+inline void db_gxx_gxy_gyy_row_s(int *g,int *d0,int *d1,int *d2,int *d3,int *d4,int nc)
+{
+#ifdef DB_USE_MMX
+    int c;
+
+    _asm
+    {
+        mov c,64
+        mov eax,d0
+        mov ebx,d1
+        mov ecx,d2
+        mov edx,d3
+        mov edi,d4
+        mov esi,g
+
+loopstart:
+        /***************dxx part 1-14*********************************/
+        movq        mm0,[eax]      /*1 Get dxx0*/
+         /*Stall*/
+        movq        mm1,[ebx]      /*2 Get dxx1*/
+         /*Stall*/
+        movq        mm2,[ecx]      /*5 Get dxx2*/
+         pslld      mm1,2          /*3 Shift dxx1*/
+        movq        mm3,[edx]      /*10 Get dxx3*/
+         paddd      mm0,mm1        /*4 Accumulate dxx1*/
+        movq        mm4,[eax+512]  /*15 Get dxy0*/
+         pslld      mm2,1          /*6 Shift dxx2 1*/
+        paddd       mm0,mm2        /*7 Accumulate dxx2 1*/
+         pslld      mm2,1          /*8 Shift dxx2 2*/
+        movq        mm5,[ebx+512]  /*16 Get dxy1*/
+         paddd      mm0,mm2        /*9 Accumulate dxx2 2*/
+        pslld       mm3,2          /*11 Shift dxx3*/
+         /*Stall*/
+        paddd       mm0,mm3        /*12 Accumulate dxx3*/
+         pslld      mm5,2          /*17 Shift dxy1*/
+        paddd       mm0,[edi]      /*13 Accumulate dxx4*/
+         paddd      mm4,mm5        /*18 Accumulate dxy1*/
+        movq        mm6,[ecx+512]  /*19 Get dxy2*/
+         /*Stall*/
+        movq        [esi],mm0      /*14 Store dxx sums*/
+        /***************dxy part 15-28*********************************/
+         pslld      mm6,1          /*20 Shift dxy2 1*/
+        paddd       mm4,mm6        /*21 Accumulate dxy2 1*/
+         pslld      mm6,1          /*22 Shift dxy2 2*/
+        movq        mm0,[eax+1024] /*29 Get dyy0*/
+         paddd      mm4,mm6        /*23 Accumulate dxy2 2*/
+        movq        mm7,[edx+512]  /*24 Get dxy3*/
+         pslld      mm7,2          /*25 Shift dxy3*/
+        movq        mm1,[ebx+1024] /*30 Get dyy1*/
+         paddd      mm4,mm7        /*26 Accumulate dxy3*/
+        paddd       mm4,[edi+512]  /*27 Accumulate dxy4*/
+         pslld      mm1,2          /*31 Shift dyy1*/
+        movq        mm2,[ecx+1024] /*33 Get dyy2*/
+         paddd      mm0,mm1        /*32 Accumulate dyy1*/
+        movq        [esi+512],mm4  /*28 Store dxy sums*/
+         pslld      mm2,1          /*34 Shift dyy2 1*/
+        /***************dyy part 29-49*********************************/
+
+
+        movq        mm3,[edx+1024] /*38 Get dyy3*/
+         paddd      mm0,mm2        /*35 Accumulate dyy2 1*/
+        paddd       mm0,[edi+1024] /*41 Accumulate dyy4*/
+         pslld      mm2,1          /*36 Shift dyy2 2*/
+        paddd       mm0,mm2        /*37 Accumulate dyy2 2*/
+         pslld      mm3,2          /*39 Shift dyy3*/
+        paddd       mm0,mm3        /*40 Accumulate dyy3*/
+         add        eax,8           /*43*/
+        add         ebx,8           /*44*/
+         add        ecx,8           /*45*/
+        movq        [esi+1024],mm0 /*42 Store dyy sums*/
+         /*Stall*/
+        add         edx,8           /*46*/
+         add        edi,8           /*47*/
+        add         esi,8           /*48*/
+         dec        c               /*49*/
+        jnz         loopstart
+
+        emms
+    }
+
+#else
+    int c,dd;
+
+    for(c=0;c<nc;c++)
+    {
+        /*Filter vertically*/
+        dd=d2[c];
+        g[c]=d0[c]+(d1[c]<<2)+(dd<<2)+(dd<<1)+(d3[c]<<2)+d4[c];
+
+        dd=d2[c+128];
+        g[c+128]=d0[c+128]+(d1[c+128]<<2)+(dd<<2)+(dd<<1)+(d3[c+128]<<2)+d4[c+128];
+
+        dd=d2[c+256];
+        g[c+256]=d0[c+256]+(d1[c+256]<<2)+(dd<<2)+(dd<<1)+(d3[c+256]<<2)+d4[c+256];
+    }
+#endif /*DB_USE_MMX*/
+}
+
+/*Filter horizontally the three rows gxx,gxy,gyy into the strength subrow starting at i,j
+and with width chunk_width. gxx,gxy and gyy are assumed to be four pixels wider than chunk_width
+and starting at (i,j-2)*/
+inline void db_HarrisStrength_row_f(float **s,float *gxx,float *gxy,float *gyy,int i,int j,int chunk_width)
+{
+    float Gxx,Gxy,Gyy,det,trc;
+    int c;
+
+    for(c=0;c<chunk_width;c++)
+    {
+        Gxx=gxx[c]+gxx[c+1]*4.0f+gxx[c+2]*6.0f+gxx[c+3]*4.0f+gxx[c+4];
+        Gxy=gxy[c]+gxy[c+1]*4.0f+gxy[c+2]*6.0f+gxy[c+3]*4.0f+gxy[c+4];
+        Gyy=gyy[c]+gyy[c+1]*4.0f+gyy[c+2]*6.0f+gyy[c+3]*4.0f+gyy[c+4];
+
+        det=Gxx*Gyy-Gxy*Gxy;
+        trc=Gxx+Gyy;
+        s[i][j+c]=det-0.06f*trc*trc;
+    }
+}
+
+/*Filter g of length 128 in place with 14641. Output is shifted two steps
+and of length 124*/
+inline void db_Filter14641_128_i(int *g,int nc)
+{
+#ifdef DB_USE_MMX
+    int mask;
+
+    mask=0xFFFFFFFF;
+    _asm
+    {
+        mov esi,31
+        mov eax,g
+
+        /*Get bitmask 00000000FFFFFFFF into mm7*/
+        movd mm7,mask
+
+        /*Warming iteration one 1-16********************/
+        movq       mm6,[eax]      /*1 Load new data*/
+        paddd      mm0,mm6        /*2 Add 1* behind two steps*/
+        movq       mm2,mm6        /*3 Start with 1* in front two steps*/
+        pslld      mm6,1          /*4*/
+        paddd      mm1,mm6        /*5 Add 2* same place*/
+        pslld      mm6,1          /*6*/
+        paddd      mm1,mm6        /*7 Add 4* same place*/
+        pshufw     mm6,mm6,4Eh    /*8 Swap the two double-words using bitmask 01001110=4Eh*/
+        paddd      mm1,mm6        /*9 Add 4* swapped*/
+        movq       mm5,mm6        /*10 Copy*/
+        pand       mm6,mm7        /*11 Get low double-word only*/
+        paddd      mm2,mm6        /*12 Add 4* in front one step*/
+        pxor       mm6,mm5        /*13 Get high double-word only*/
+        paddd      mm0,mm6        /*14 Add 4* behind one step*/
+        movq       mm0,mm1        /*15 Shift along*/
+        movq       mm1,mm2        /*16 Shift along*/
+        /*Warming iteration two 17-32********************/
+        movq       mm4,[eax+8]    /*17 Load new data*/
+        paddd      mm0,mm4        /*18 Add 1* behind two steps*/
+        movq       mm2,mm4        /*19 Start with 1* in front two steps*/
+        pslld      mm4,1          /*20*/
+        paddd      mm1,mm4        /*21 Add 2* same place*/
+        pslld      mm4,1          /*22*/
+        paddd      mm1,mm4        /*23 Add 4* same place*/
+        pshufw     mm4,mm4,4Eh    /*24 Swap the two double-words using bitmask 01001110=4Eh*/
+        paddd      mm1,mm4        /*25 Add 4* swapped*/
+        movq       mm3,mm4        /*26 Copy*/
+        pand       mm4,mm7        /*27 Get low double-word only*/
+        paddd      mm2,mm4        /*28 Add 4* in front one step*/
+        pxor       mm4,mm3        /*29 Get high double-word only*/
+        paddd      mm0,mm4        /*30 Add 4* behind one step*/
+        movq       mm0,mm1        /*31 Shift along*/
+        movq       mm1,mm2        /*32 Shift along*/
+
+        /*Loop********************/
+loopstart:
+        /*First part of loop 33-47********/
+        movq        mm6,[eax+16]   /*33 Load new data*/
+         /*Stall*/
+        paddd       mm0,mm6        /*34 Add 1* behind two steps*/
+         movq       mm2,mm6        /*35 Start with 1* in front two steps*/
+        movq        mm4,[eax+24]   /*48 Load new data*/
+         pslld      mm6,1          /*36*/
+        paddd       mm1,mm6        /*37 Add 2* same place*/
+         pslld      mm6,1          /*38*/
+        paddd       mm1,mm6        /*39 Add 4* same place*/
+         pshufw     mm6,mm6,4Eh    /*40 Swap the two double-words using bitmask 01001110=4Eh*/
+        paddd       mm1,mm4        /*49 Add 1* behind two steps*/
+         movq       mm5,mm6        /*41 Copy*/
+        paddd       mm1,mm6        /*42 Add 4* swapped*/
+         pand       mm6,mm7        /*43 Get low double-word only*/
+        paddd       mm2,mm6        /*44 Add 4* in front one step*/
+         pxor       mm6,mm5        /*45 Get high double-word only*/
+        paddd       mm0,mm6        /*46 Add 4* behind one step*/
+         movq       mm6,mm4        /*50a Copy*/
+        pslld       mm4,1          /*51*/
+         /*Stall*/
+        movq        [eax],mm0      /*47 Store result two steps behind*/
+        /*Second part of loop 48-66********/
+         movq       mm0,mm6        /*50b Start with 1* in front two steps*/
+        paddd       mm2,mm4        /*52 Add 2* same place*/
+         pslld      mm4,1          /*53*/
+        paddd       mm2,mm4        /*54 Add 4* same place*/
+         pshufw     mm4,mm4,4Eh    /*55 Swap the two double-words using bitmask 01001110=4Eh*/
+        paddd       mm2,mm4        /*56 Add 4* swapped*/
+         movq       mm3,mm4        /*57 Copy*/
+        pand        mm4,mm7        /*58 Get low double-word only*/
+         /*Stall*/
+        paddd       mm0,mm4        /*59 Add 4* in front one step*/
+         pxor       mm4,mm3        /*60 Get high double-word only*/
+        paddd       mm1,mm4        /*61 Add 4* behind one step*/
+         add        eax,16         /*65*/
+        dec         esi            /*66*/
+         /*Stall*/
+        movq        [eax-8],mm1    /*62 Store result two steps behind*/
+         movq       mm1,mm0        /*63 Shift along*/
+        movq        mm0,mm2        /*64 Shift along*/
+        jnz loopstart
+
+        emms
+    }
+
+#else
+    int c;
+
+    for(c=0;c<nc-4;c++)
+    {
+        g[c]=g[c]+(g[c+1]<<2)+(g[c+2]<<2)+(g[c+2]<<1)+(g[c+3]<<2)+g[c+4];
+    }
+#endif /*DB_USE_MMX*/
+}
+
+/*Filter horizontally the three rows gxx,gxy,gyy of length 128 into the strength subrow s
+of length 124. gxx,gxy and gyy are assumed to be starting at (i,j-2) if s[i][j] is sought.
+s should be 16 byte aligned*/
+inline void db_HarrisStrength_row_s(float *s,int *gxx,int *gxy,int *gyy,int nc)
+{
+    float k;
+
+    k=0.06f;
+
+    db_Filter14641_128_i(gxx,nc);
+    db_Filter14641_128_i(gxy,nc);
+    db_Filter14641_128_i(gyy,nc);
+
+#ifdef DB_USE_SIMD
+
+
+    _asm
+    {
+        mov esi,15
+        mov eax,gxx
+        mov ebx,gxy
+        mov ecx,gyy
+        mov edx,s
+
+        /*broadcast k to all positions of xmm7*/
+        movss   xmm7,k
+        shufps  xmm7,xmm7,0
+
+        /*****Warm up 1-10**************************************/
+        cvtpi2ps  xmm0,[eax+8] /*1 Convert two integers into floating point of low double-word*/
+         /*Stall*/
+        cvtpi2ps  xmm1,[ebx+8] /*4 Convert two integers into floating point of low double-word*/
+         movlhps  xmm0,xmm0    /*2 Move them to the high double-word*/
+        cvtpi2ps  xmm2,[ecx+8] /*7 Convert two integers into floating point of low double-word*/
+         movlhps  xmm1,xmm1    /*5 Move them to the high double-word*/
+        cvtpi2ps  xmm0,[eax]   /*3 Convert two integers into floating point of low double-word*/
+         movlhps  xmm2,xmm2    /*8 Move them to the high double-word*/
+        cvtpi2ps  xmm1,[ebx]   /*6 Convert two integers into floating point of low double-word*/
+         movaps   xmm3,xmm0    /*10 Copy Cxx*/
+        cvtpi2ps  xmm2,[ecx]   /*9 Convert two integers into floating point of low double-word*/
+         /*Stall*/
+loopstart:
+        /*****First part of loop 11-18***********************/
+        mulps     xmm0,xmm2     /*11 Multiply to get Gxx*Gyy*/
+         addps    xmm2,xmm3     /*12 Add to get Gxx+Gyy*/
+        cvtpi2ps  xmm4,[eax+24] /*19 Convert two integers into floating point of low double-word*/
+         mulps    xmm1,xmm1     /*13 Multiply to get Gxy*Gxy*/
+        mulps     xmm2,xmm2     /*14 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
+         movlhps  xmm4,xmm4     /*20 Move them to the high double-word*/
+        cvtpi2ps  xmm4,[eax+16] /*21 Convert two integers into floating point of low double-word*/
+         /*Stall*/
+        subps     xmm0,xmm1     /*15 Subtract to get Gxx*Gyy-Gxy*Gxy*/
+         mulps    xmm2,xmm7     /*16 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
+        cvtpi2ps  xmm5,[ebx+24] /*22 Convert two integers into floating point of low double-word*/
+         /*Stall*/
+        movlhps   xmm5,xmm5     /*23 Move them to the high double-word*/
+         /*Stall*/
+        cvtpi2ps  xmm5,[ebx+16] /*24 Convert two integers into floating point of low double-word*/
+         subps    xmm0,xmm2     /*17 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
+        cvtpi2ps  xmm6,[ecx+24] /*25 Convert two integers into floating point of low double-word*/
+         /*Stall*/
+        movaps    [edx],xmm0    /*18 Store*/
+        /*****Second part of loop 26-40***********************/
+         movlhps  xmm6,xmm6     /*26 Move them to the high double-word*/
+        cvtpi2ps  xmm6,[ecx+16] /*27 Convert two integers into floating point of low double-word*/
+         movaps   xmm3,xmm4     /*28 Copy Cxx*/
+        mulps     xmm4,xmm6     /*29 Multiply to get Gxx*Gyy*/
+         addps    xmm6,xmm3     /*30 Add to get Gxx+Gyy*/
+        cvtpi2ps  xmm0,[eax+40] /*(1 Next) Convert two integers into floating point of low double-word*/
+         mulps    xmm5,xmm5     /*31 Multiply to get Gxy*Gxy*/
+        cvtpi2ps  xmm1,[ebx+40] /*(4 Next) Convert two integers into floating point of low double-word*/
+         mulps    xmm6,xmm6     /*32 Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
+        cvtpi2ps  xmm2,[ecx+40] /*(7 Next) Convert two integers into floating point of low double-word*/
+         movlhps  xmm0,xmm0     /*(2 Next) Move them to the high double-word*/
+        subps     xmm4,xmm5     /*33 Subtract to get Gxx*Gyy-Gxy*Gxy*/
+         movlhps  xmm1,xmm1     /*(5 Next) Move them to the high double-word*/
+        cvtpi2ps  xmm0,[eax+32] /*(3 Next)Convert two integers into floating point of low double-word*/
+         mulps    xmm6,xmm7     /*34 Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
+        cvtpi2ps  xmm1,[ebx+32] /*(6 Next) Convert two integers into floating point of low double-word*/
+         movlhps  xmm2,xmm2     /*(8 Next) Move them to the high double-word*/
+        movaps    xmm3,xmm0     /*(10 Next) Copy Cxx*/
+         add      eax,32        /*37*/
+        subps     xmm4,xmm6     /*35 Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
+         add      ebx,32        /*38*/
+        cvtpi2ps  xmm2,[ecx+32] /*(9 Next) Convert two integers into floating point of low double-word*/
+         /*Stall*/
+        movaps    [edx+16],xmm4 /*36 Store*/
+         /*Stall*/
+        add       ecx,32        /*39*/
+         add      edx,32        /*40*/
+        dec       esi           /*41*/
+        jnz loopstart
+
+        /****Cool down***************/
+        mulps    xmm0,xmm2    /*Multiply to get Gxx*Gyy*/
+        addps    xmm2,xmm3    /*Add to get Gxx+Gyy*/
+        mulps    xmm1,xmm1    /*Multiply to get Gxy*Gxy*/
+        mulps    xmm2,xmm2    /*Multiply to get (Gxx+Gyy)*(Gxx+Gyy)*/
+        subps    xmm0,xmm1    /*Subtract to get Gxx*Gyy-Gxy*Gxy*/
+        mulps    xmm2,xmm7    /*Multiply to get k*(Gxx+Gyy)*(Gxx+Gyy)*/
+        subps    xmm0,xmm2    /*Subtract to get Gxx*Gyy-Gxy*Gxy-k*(Gxx+Gyy)*(Gxx+Gyy)*/
+        movaps   [edx],xmm0   /*Store*/
+    }
+
+#else
+    float Gxx,Gxy,Gyy,det,trc;
+    int c;
+
+    //for(c=0;c<124;c++)
+    for(c=0;c<nc-4;c++)
+    {
+        Gxx=(float)gxx[c];
+        Gxy=(float)gxy[c];
+        Gyy=(float)gyy[c];
+
+        det=Gxx*Gyy-Gxy*Gxy;
+        trc=Gxx+Gyy;
+        s[c]=det-k*trc*trc;
+    }
+#endif /*DB_USE_SIMD*/
+}
+
+/*Compute the Harris corner strength of the chunk [left,top,right,bottom] of img and
+store it into the corresponding region of s. left and top have to be at least 3 and
+right and bottom have to be at most width-4,height-4*/
+inline void db_HarrisStrengthChunk_f(float **s,const float * const *img,int left,int top,int right,int bottom,
+                                      /*temp should point to at least
+                                      13*(right-left+5) of allocated memory*/
+                                      float *temp)
+{
+    float *Ix[5],*Iy[5];
+    float *gxx,*gxy,*gyy;
+    int i,chunk_width,chunk_width_p4;
+
+    chunk_width=right-left+1;
+    chunk_width_p4=chunk_width+4;
+    gxx=temp;
+    gxy=gxx+chunk_width_p4;
+    gyy=gxy+chunk_width_p4;
+    for(i=0;i<5;i++)
+    {
+        Ix[i]=gyy+chunk_width_p4+(2*i*chunk_width_p4);
+        Iy[i]=Ix[i]+chunk_width_p4;
+    }
+
+    /*Fill four rows of the wrap-around derivative buffers*/
+    for(i=top-2;i<top+2;i++) db_IxIyRow_f(Ix[i%5],Iy[i%5],img,i,left-2,chunk_width_p4);
+
+    /*For each output row*/
+    for(i=top;i<=bottom;i++)
+    {
+        /*Step the derivative buffers*/
+        db_IxIyRow_f(Ix[(i+2)%5],Iy[(i+2)%5],img,(i+2),left-2,chunk_width_p4);
+
+        /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/
+        db_gxx_gxy_gyy_row_f(gxx,gxy,gyy,chunk_width_p4,
+                                 Ix[(i-2)%5],Ix[(i-1)%5],Ix[i%5],Ix[(i+1)%5],Ix[(i+2)%5],
+                                 Iy[(i-2)%5],Iy[(i-1)%5],Iy[i%5],Iy[(i+1)%5],Iy[(i+2)%5]);
+
+        /*Filter gxx,gxy,gyy horizontally and compute corner response s*/
+        db_HarrisStrength_row_f(s,gxx,gxy,gyy,i,left,chunk_width);
+    }
+}
+
+/*Compute the Harris corner strength of the chunk [left,top,left+123,bottom] of img and
+store it into the corresponding region of s. left and top have to be at least 3 and
+right and bottom have to be at most width-4,height-4. The left of the region in s should
+be 16 byte aligned*/
+inline void db_HarrisStrengthChunk_u(float **s,const unsigned char * const *img,int left,int top,int bottom,
+                                      /*temp should point to at least
+                                      18*128 of allocated memory*/
+                                      int *temp, int nc)
+{
+    int *Ixx[5],*Ixy[5],*Iyy[5];
+    int *gxx,*gxy,*gyy;
+    int i;
+
+    gxx=temp;
+    gxy=gxx+128;
+    gyy=gxy+128;
+    for(i=0;i<5;i++)
+    {
+        Ixx[i]=gyy+(3*i+1)*128;
+        Ixy[i]=gyy+(3*i+2)*128;
+        Iyy[i]=gyy+(3*i+3)*128;
+    }
+
+    /*Fill four rows of the wrap-around derivative buffers*/
+    for(i=top-2;i<top+2;i++) db_IxIyRow_u(Ixx[i%5],img,i,left-2,nc);
+
+    /*For each output row*/
+    for(i=top;i<=bottom;i++)
+    {
+        /*Step the derivative buffers*/
+        db_IxIyRow_u(Ixx[(i+2)%5],img,(i+2),left-2,nc);
+
+        /*Filter Ix2,IxIy,Iy2 vertically into gxx,gxy,gyy*/
+        db_gxx_gxy_gyy_row_s(gxx,Ixx[(i-2)%5],Ixx[(i-1)%5],Ixx[i%5],Ixx[(i+1)%5],Ixx[(i+2)%5],nc);
+
+        /*Filter gxx,gxy,gyy horizontally and compute corner response s*/
+        db_HarrisStrength_row_s(s[i]+left,gxx,gxy,gyy,nc);
+    }
+
+}
+
+/*Compute Harris corner strength of img. Strength is returned for the region
+with (3,3) as upper left and (w-4,h-4) as lower right, positioned in the
+same place in s. In other words,image should be at least 7 pixels wide and 7 pixels high
+for a meaningful result*/
+void db_HarrisStrength_f(float **s,const float * const *img,int w,int h,
+                                    /*temp should point to at least
+                                    13*(chunk_width+4) of allocated memory*/
+                                    float *temp,
+                                    int chunk_width)
+{
+    int x,next_x,last,right;
+
+    last=w-4;
+    for(x=3;x<=last;x=next_x)
+    {
+        next_x=x+chunk_width;
+        right=next_x-1;
+        if(right>last) right=last;
+        /*Compute the Harris strength of a chunk*/
+        db_HarrisStrengthChunk_f(s,img,x,3,right,h-4,temp);
+    }
+}
+
+/*Compute Harris corner strength of img. Strength is returned for the region
+with (3,3) as upper left and (w-4,h-4) as lower right, positioned in the
+same place in s. In other words,image should be at least 7 pixels wide and 7 pixels high
+for a meaningful result.Moreover, the image should be overallocated by 256 bytes.
+s[i][3] should by 16 byte aligned for any i*/
+void db_HarrisStrength_u(float **s, const unsigned char * const *img,int w,int h,
+                                    /*temp should point to at least
+                                    18*128 of allocated memory*/
+                                    int *temp)
+{
+    int x,next_x,last;
+    int nc;
+
+    last=w-4;
+    for(x=3;x<=last;x=next_x)
+    {
+        next_x=x+124;
+
+        // mayban: to revert to the original full chunks state, change the line below to: nc = 128;
+        nc = db_mini(128,last-x+1);
+        //nc = 128;
+
+        /*Compute the Harris strength of a chunk*/
+        db_HarrisStrengthChunk_u(s,img,x,3,h-4,temp,nc);
+    }
+}
+
+inline float db_Max_128Aligned16_f(float *v)
+{
+#ifdef DB_USE_SIMD
+    float back;
+
+    _asm
+    {
+        mov eax,v
+
+        /*Chunk1*/
+        movaps xmm0,[eax]
+        movaps xmm1,[eax+16]
+        movaps xmm2,[eax+32]
+        movaps xmm3,[eax+48]
+        movaps xmm4,[eax+64]
+        movaps xmm5,[eax+80]
+        movaps xmm6,[eax+96]
+        movaps xmm7,[eax+112]
+
+        /*Chunk2*/
+        maxps xmm0,[eax+128]
+        maxps xmm1,[eax+144]
+        maxps xmm2,[eax+160]
+        maxps xmm3,[eax+176]
+        maxps xmm4,[eax+192]
+        maxps xmm5,[eax+208]
+        maxps xmm6,[eax+224]
+        maxps xmm7,[eax+240]
+
+        /*Chunk3*/
+        maxps xmm0,[eax+256]
+        maxps xmm1,[eax+272]
+        maxps xmm2,[eax+288]
+        maxps xmm3,[eax+304]
+        maxps xmm4,[eax+320]
+        maxps xmm5,[eax+336]
+        maxps xmm6,[eax+352]
+        maxps xmm7,[eax+368]
+
+        /*Chunk4*/
+        maxps xmm0,[eax+384]
+        maxps xmm1,[eax+400]
+        maxps xmm2,[eax+416]
+        maxps xmm3,[eax+432]
+        maxps xmm4,[eax+448]
+        maxps xmm5,[eax+464]
+        maxps xmm6,[eax+480]
+        maxps xmm7,[eax+496]
+
+        /*Collect*/
+        maxps   xmm0,xmm1
+        maxps   xmm2,xmm3
+        maxps   xmm4,xmm5
+        maxps   xmm6,xmm7
+        maxps   xmm0,xmm2
+        maxps   xmm4,xmm6
+        maxps   xmm0,xmm4
+        movhlps xmm1,xmm0
+        maxps   xmm0,xmm1
+        shufps  xmm1,xmm0,1
+        maxps   xmm0,xmm1
+        movss   back,xmm0
+    }
+
+    return(back);
+#else
+    float val,max_val;
+    float *p,*stop_p;
+    max_val=v[0];
+    for(p=v+1,stop_p=v+128;p!=stop_p;)
+    {
+        val= *p++;
+        if(val>max_val) max_val=val;
+    }
+    return(max_val);
+#endif /*DB_USE_SIMD*/
+}
+
+inline float db_Max_64Aligned16_f(float *v)
+{
+#ifdef DB_USE_SIMD
+    float back;
+
+    _asm
+    {
+        mov eax,v
+
+        /*Chunk1*/
+        movaps xmm0,[eax]
+        movaps xmm1,[eax+16]
+        movaps xmm2,[eax+32]
+        movaps xmm3,[eax+48]
+        movaps xmm4,[eax+64]
+        movaps xmm5,[eax+80]
+        movaps xmm6,[eax+96]
+        movaps xmm7,[eax+112]
+
+        /*Chunk2*/
+        maxps xmm0,[eax+128]
+        maxps xmm1,[eax+144]
+        maxps xmm2,[eax+160]
+        maxps xmm3,[eax+176]
+        maxps xmm4,[eax+192]
+        maxps xmm5,[eax+208]
+        maxps xmm6,[eax+224]
+        maxps xmm7,[eax+240]
+
+        /*Collect*/
+        maxps   xmm0,xmm1
+        maxps   xmm2,xmm3
+        maxps   xmm4,xmm5
+        maxps   xmm6,xmm7
+        maxps   xmm0,xmm2
+        maxps   xmm4,xmm6
+        maxps   xmm0,xmm4
+        movhlps xmm1,xmm0
+        maxps   xmm0,xmm1
+        shufps  xmm1,xmm0,1
+        maxps   xmm0,xmm1
+        movss   back,xmm0
+    }
+
+    return(back);
+#else
+    float val,max_val;
+    float *p,*stop_p;
+    max_val=v[0];
+    for(p=v+1,stop_p=v+64;p!=stop_p;)
+    {
+        val= *p++;
+        if(val>max_val) max_val=val;
+    }
+    return(max_val);
+#endif /*DB_USE_SIMD*/
+}
+
+inline float db_Max_32Aligned16_f(float *v)
+{
+#ifdef DB_USE_SIMD
+    float back;
+
+    _asm
+    {
+        mov eax,v
+
+        /*Chunk1*/
+        movaps xmm0,[eax]
+        movaps xmm1,[eax+16]
+        movaps xmm2,[eax+32]
+        movaps xmm3,[eax+48]
+        movaps xmm4,[eax+64]
+        movaps xmm5,[eax+80]
+        movaps xmm6,[eax+96]
+        movaps xmm7,[eax+112]
+
+        /*Collect*/
+        maxps   xmm0,xmm1
+        maxps   xmm2,xmm3
+        maxps   xmm4,xmm5
+        maxps   xmm6,xmm7
+        maxps   xmm0,xmm2
+        maxps   xmm4,xmm6
+        maxps   xmm0,xmm4
+        movhlps xmm1,xmm0
+        maxps   xmm0,xmm1
+        shufps  xmm1,xmm0,1
+        maxps   xmm0,xmm1
+        movss   back,xmm0
+    }
+
+    return(back);
+#else
+    float val,max_val;
+    float *p,*stop_p;
+    max_val=v[0];
+    for(p=v+1,stop_p=v+32;p!=stop_p;)
+    {
+        val= *p++;
+        if(val>max_val) max_val=val;
+    }
+    return(max_val);
+#endif /*DB_USE_SIMD*/
+}
+
+inline float db_Max_16Aligned16_f(float *v)
+{
+#ifdef DB_USE_SIMD
+    float back;
+
+    _asm
+    {
+        mov eax,v
+
+        /*Chunk1*/
+        movaps xmm0,[eax]
+        movaps xmm1,[eax+16]
+        movaps xmm2,[eax+32]
+        movaps xmm3,[eax+48]
+
+        /*Collect*/
+        maxps   xmm0,xmm1
+        maxps   xmm2,xmm3
+        maxps   xmm0,xmm2
+        movhlps xmm1,xmm0
+        maxps   xmm0,xmm1
+        shufps  xmm1,xmm0,1
+        maxps   xmm0,xmm1
+        movss   back,xmm0
+    }
+
+    return(back);
+#else
+    float val,max_val;
+    float *p,*stop_p;
+    max_val=v[0];
+    for(p=v+1,stop_p=v+16;p!=stop_p;)
+    {
+        val= *p++;
+        if(val>max_val) max_val=val;
+    }
+    return(max_val);
+#endif /*DB_USE_SIMD*/
+}
+
+inline float db_Max_8Aligned16_f(float *v)
+{
+#ifdef DB_USE_SIMD
+    float back;
+
+    _asm
+    {
+        mov eax,v
+
+        /*Chunk1*/
+        movaps xmm0,[eax]
+        movaps xmm1,[eax+16]
+
+        /*Collect*/
+        maxps   xmm0,xmm1
+        movhlps xmm1,xmm0
+        maxps   xmm0,xmm1
+        shufps  xmm1,xmm0,1
+        maxps   xmm0,xmm1
+        movss   back,xmm0
+    }
+
+    return(back);
+#else
+    float val,max_val;
+    float *p,*stop_p;
+    max_val=v[0];
+    for(p=v+1,stop_p=v+8;p!=stop_p;)
+    {
+        val= *p++;
+        if(val>max_val) max_val=val;
+    }
+    return(max_val);
+#endif /*DB_USE_SIMD*/
+}
+
+inline float db_Max_Aligned16_f(float *v,int size)
+{
+    float val,max_val;
+    float *stop_v;
+
+    max_val=v[0];
+    for(;size>=128;size-=128)
+    {
+        val=db_Max_128Aligned16_f(v);
+        v+=128;
+        if(val>max_val) max_val=val;
+    }
+    if(size&64)
+    {
+        val=db_Max_64Aligned16_f(v);
+        v+=64;
+        if(val>max_val) max_val=val;
+    }
+    if(size&32)
+    {
+        val=db_Max_32Aligned16_f(v);
+        v+=32;
+        if(val>max_val) max_val=val;
+    }
+    if(size&16)
+    {
+        val=db_Max_16Aligned16_f(v);
+        v+=16;
+        if(val>max_val) max_val=val;
+    }
+    if(size&8)
+    {
+        val=db_Max_8Aligned16_f(v);
+        v+=8;
+        if(val>max_val) max_val=val;
+    }
+    if(size&7)
+    {
+        for(stop_v=v+(size&7);v!=stop_v;)
+        {
+            val= *v++;
+            if(val>max_val) max_val=val;
+        }
+    }
+
+    return(max_val);
+}
+
+/*Find maximum value of img in the region starting at (left,top)
+and with width w and height h. img[left] should be 16 byte aligned*/
+float db_MaxImage_Aligned16_f(float **img,int left,int top,int w,int h)
+{
+    float val,max_val;
+    int i,stop_i;
+
+    if(w && h)
+    {
+        stop_i=top+h;
+        max_val=img[top][left];
+
+        for(i=top;i<stop_i;i++)
+        {
+            val=db_Max_Aligned16_f(img[i]+left,w);
+            if(val>max_val) max_val=val;
+        }
+        return(max_val);
+    }
+    return(0.0);
+}
+
+inline void db_MaxVector_128_Aligned16_f(float *m,float *v1,float *v2)
+{
+#ifdef DB_USE_SIMD
+    _asm
+    {
+        mov eax,v1
+        mov ebx,v2
+        mov ecx,m
+
+        /*Chunk1*/
+        movaps xmm0,[eax]
+        movaps xmm1,[eax+16]
+        movaps xmm2,[eax+32]
+        movaps xmm3,[eax+48]
+        movaps xmm4,[eax+64]
+        movaps xmm5,[eax+80]
+        movaps xmm6,[eax+96]
+        movaps xmm7,[eax+112]
+        maxps  xmm0,[ebx]
+        maxps  xmm1,[ebx+16]
+        maxps  xmm2,[ebx+32]
+        maxps  xmm3,[ebx+48]
+        maxps  xmm4,[ebx+64]
+        maxps  xmm5,[ebx+80]
+        maxps  xmm6,[ebx+96]
+        maxps  xmm7,[ebx+112]
+        movaps [ecx],xmm0
+        movaps [ecx+16],xmm1
+        movaps [ecx+32],xmm2
+        movaps [ecx+48],xmm3
+        movaps [ecx+64],xmm4
+        movaps [ecx+80],xmm5
+        movaps [ecx+96],xmm6
+        movaps [ecx+112],xmm7
+
+        /*Chunk2*/
+        movaps xmm0,[eax+128]
+        movaps xmm1,[eax+144]
+        movaps xmm2,[eax+160]
+        movaps xmm3,[eax+176]
+        movaps xmm4,[eax+192]
+        movaps xmm5,[eax+208]
+        movaps xmm6,[eax+224]
+        movaps xmm7,[eax+240]
+        maxps  xmm0,[ebx+128]
+        maxps  xmm1,[ebx+144]
+        maxps  xmm2,[ebx+160]
+        maxps  xmm3,[ebx+176]
+        maxps  xmm4,[ebx+192]
+        maxps  xmm5,[ebx+208]
+        maxps  xmm6,[ebx+224]
+        maxps  xmm7,[ebx+240]
+        movaps [ecx+128],xmm0
+        movaps [ecx+144],xmm1
+        movaps [ecx+160],xmm2
+        movaps [ecx+176],xmm3
+        movaps [ecx+192],xmm4
+        movaps [ecx+208],xmm5
+        movaps [ecx+224],xmm6
+        movaps [ecx+240],xmm7
+
+        /*Chunk3*/
+        movaps xmm0,[eax+256]
+        movaps xmm1,[eax+272]
+        movaps xmm2,[eax+288]
+        movaps xmm3,[eax+304]
+        movaps xmm4,[eax+320]
+        movaps xmm5,[eax+336]
+        movaps xmm6,[eax+352]
+        movaps xmm7,[eax+368]
+        maxps  xmm0,[ebx+256]
+        maxps  xmm1,[ebx+272]
+        maxps  xmm2,[ebx+288]
+        maxps  xmm3,[ebx+304]
+        maxps  xmm4,[ebx+320]
+        maxps  xmm5,[ebx+336]
+        maxps  xmm6,[ebx+352]
+        maxps  xmm7,[ebx+368]
+        movaps [ecx+256],xmm0
+        movaps [ecx+272],xmm1
+        movaps [ecx+288],xmm2
+        movaps [ecx+304],xmm3
+        movaps [ecx+320],xmm4
+        movaps [ecx+336],xmm5
+        movaps [ecx+352],xmm6
+        movaps [ecx+368],xmm7
+
+        /*Chunk4*/
+        movaps xmm0,[eax+384]
+        movaps xmm1,[eax+400]
+        movaps xmm2,[eax+416]
+        movaps xmm3,[eax+432]
+        movaps xmm4,[eax+448]
+        movaps xmm5,[eax+464]
+        movaps xmm6,[eax+480]
+        movaps xmm7,[eax+496]
+        maxps  xmm0,[ebx+384]
+        maxps  xmm1,[ebx+400]
+        maxps  xmm2,[ebx+416]
+        maxps  xmm3,[ebx+432]
+        maxps  xmm4,[ebx+448]
+        maxps  xmm5,[ebx+464]
+        maxps  xmm6,[ebx+480]
+        maxps  xmm7,[ebx+496]
+        movaps [ecx+384],xmm0
+        movaps [ecx+400],xmm1
+        movaps [ecx+416],xmm2
+        movaps [ecx+432],xmm3
+        movaps [ecx+448],xmm4
+        movaps [ecx+464],xmm5
+        movaps [ecx+480],xmm6
+        movaps [ecx+496],xmm7
+    }
+#else
+    int i;
+    float a,b;
+    for(i=0;i<128;i++)
+    {
+        a=v1[i];
+        b=v2[i];
+        if(a>=b) m[i]=a;
+        else m[i]=b;
+    }
+#endif /*DB_USE_SIMD*/
+}
+
+inline void db_MaxVector_128_SecondSourceDestAligned16_f(float *m,float *v1,float *v2)
+{
+#ifdef DB_USE_SIMD
+    _asm
+    {
+        mov eax,v1
+        mov ebx,v2
+        mov ecx,m
+
+        /*Chunk1*/
+        movups xmm0,[eax]
+        movups xmm1,[eax+16]
+        movups xmm2,[eax+32]
+        movups xmm3,[eax+48]
+        movups xmm4,[eax+64]
+        movups xmm5,[eax+80]
+        movups xmm6,[eax+96]
+        movups xmm7,[eax+112]
+        maxps  xmm0,[ebx]
+        maxps  xmm1,[ebx+16]
+        maxps  xmm2,[ebx+32]
+        maxps  xmm3,[ebx+48]
+        maxps  xmm4,[ebx+64]
+        maxps  xmm5,[ebx+80]
+        maxps  xmm6,[ebx+96]
+        maxps  xmm7,[ebx+112]
+        movaps [ecx],xmm0
+        movaps [ecx+16],xmm1
+        movaps [ecx+32],xmm2
+        movaps [ecx+48],xmm3
+        movaps [ecx+64],xmm4
+        movaps [ecx+80],xmm5
+        movaps [ecx+96],xmm6
+        movaps [ecx+112],xmm7
+
+        /*Chunk2*/
+        movups xmm0,[eax+128]
+        movups xmm1,[eax+144]
+        movups xmm2,[eax+160]
+        movups xmm3,[eax+176]
+        movups xmm4,[eax+192]
+        movups xmm5,[eax+208]
+        movups xmm6,[eax+224]
+        movups xmm7,[eax+240]
+        maxps  xmm0,[ebx+128]
+        maxps  xmm1,[ebx+144]
+        maxps  xmm2,[ebx+160]
+        maxps  xmm3,[ebx+176]
+        maxps  xmm4,[ebx+192]
+        maxps  xmm5,[ebx+208]
+        maxps  xmm6,[ebx+224]
+        maxps  xmm7,[ebx+240]
+        movaps [ecx+128],xmm0
+        movaps [ecx+144],xmm1
+        movaps [ecx+160],xmm2
+        movaps [ecx+176],xmm3
+        movaps [ecx+192],xmm4
+        movaps [ecx+208],xmm5
+        movaps [ecx+224],xmm6
+        movaps [ecx+240],xmm7
+
+        /*Chunk3*/
+        movups xmm0,[eax+256]
+        movups xmm1,[eax+272]
+        movups xmm2,[eax+288]
+        movups xmm3,[eax+304]
+        movups xmm4,[eax+320]
+        movups xmm5,[eax+336]
+        movups xmm6,[eax+352]
+        movups xmm7,[eax+368]
+        maxps  xmm0,[ebx+256]
+        maxps  xmm1,[ebx+272]
+        maxps  xmm2,[ebx+288]
+        maxps  xmm3,[ebx+304]
+        maxps  xmm4,[ebx+320]
+        maxps  xmm5,[ebx+336]
+        maxps  xmm6,[ebx+352]
+        maxps  xmm7,[ebx+368]
+        movaps [ecx+256],xmm0
+        movaps [ecx+272],xmm1
+        movaps [ecx+288],xmm2
+        movaps [ecx+304],xmm3
+        movaps [ecx+320],xmm4
+        movaps [ecx+336],xmm5
+        movaps [ecx+352],xmm6
+        movaps [ecx+368],xmm7
+
+        /*Chunk4*/
+        movups xmm0,[eax+384]
+        movups xmm1,[eax+400]
+        movups xmm2,[eax+416]
+        movups xmm3,[eax+432]
+        movups xmm4,[eax+448]
+        movups xmm5,[eax+464]
+        movups xmm6,[eax+480]
+        movups xmm7,[eax+496]
+        maxps  xmm0,[ebx+384]
+        maxps  xmm1,[ebx+400]
+        maxps  xmm2,[ebx+416]
+        maxps  xmm3,[ebx+432]
+        maxps  xmm4,[ebx+448]
+        maxps  xmm5,[ebx+464]
+        maxps  xmm6,[ebx+480]
+        maxps  xmm7,[ebx+496]
+        movaps [ecx+384],xmm0
+        movaps [ecx+400],xmm1
+        movaps [ecx+416],xmm2
+        movaps [ecx+432],xmm3
+        movaps [ecx+448],xmm4
+        movaps [ecx+464],xmm5
+        movaps [ecx+480],xmm6
+        movaps [ecx+496],xmm7
+    }
+#else
+    int i;
+    float a,b;
+    for(i=0;i<128;i++)
+    {
+        a=v1[i];
+        b=v2[i];
+        if(a>=b) m[i]=a;
+        else m[i]=b;
+    }
+#endif /*DB_USE_SIMD*/
+}
+
+/*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top), of width 124 and
+stopping at bottom. The output is shifted two steps left and overwrites 128 elements for each row.
+The input s should be of width at least 128, and exist for 2 pixels outside the specified region.
+s[i][left-2] and sf[i][left-2] should be 16 byte aligned. Top must be at least 3*/
+inline void db_MaxSuppressFilterChunk_5x5_Aligned16_f(float **sf,float **s,int left,int top,int bottom,
+                                      /*temp should point to at least
+                                      6*132 floats of 16-byte-aligned allocated memory*/
+                                      float *temp)
+{
+#ifdef DB_USE_SIMD
+    int i,lm2;
+    float *two[4];
+    float *four,*five;
+
+    lm2=left-2;
+
+    /*Set pointers to pre-allocated memory*/
+    four=temp;
+    five=four+132;
+    for(i=0;i<4;i++)
+    {
+        two[i]=five+(i+1)*132;
+    }
+
+    /*Set rests of four and five to zero to avoid
+    floating point exceptions*/
+    for(i=129;i<132;i++)
+    {
+        four[i]=0.0;
+        five[i]=0.0;
+    }
+
+    /*Fill three rows of the wrap-around max buffers*/
+    for(i=top-3;i<top;i++) db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2);
+
+    /*For each output row*/
+    for(;i<=bottom;i++)
+    {
+        /*Compute max of the lowest pair of rows in the five row window*/
+        db_MaxVector_128_Aligned16_f(two[i&3],s[i+1]+lm2,s[i+2]+lm2);
+        /*Compute max of the lowest and highest pair of rows in the five row window*/
+        db_MaxVector_128_Aligned16_f(four,two[i&3],two[(i-3)&3]);
+        /*Compute max of all rows*/
+        db_MaxVector_128_Aligned16_f(five,four,two[(i-1)&3]);
+        /*Compute max of 2x5 chunks*/
+        db_MaxVector_128_SecondSourceDestAligned16_f(five,five+1,five);
+        /*Compute max of pairs of 2x5 chunks*/
+        db_MaxVector_128_SecondSourceDestAligned16_f(five,five+3,five);
+        /*Compute max of pairs of 5x5 except middle*/
+        db_MaxVector_128_SecondSourceDestAligned16_f(sf[i]+lm2,four+2,five);
+    }
+
+#else
+    int i,j,right;
+    float sv;
+
+    right=left+128;
+    for(i=top;i<=bottom;i++) for(j=left;j<right;j++)
+    {
+        sv=s[i][j];
+
+        if( sv>s[i-2][j-2] && sv>s[i-2][j-1] && sv>s[i-2][j] && sv>s[i-2][j+1] && sv>s[i-2][j+2] &&
+            sv>s[i-1][j-2] && sv>s[i-1][j-1] && sv>s[i-1][j] && sv>s[i-1][j+1] && sv>s[i-1][j+2] &&
+            sv>s[  i][j-2] && sv>s[  i][j-1] &&                 sv>s[  i][j+1] && sv>s[  i][j+2] &&
+            sv>s[i+1][j-2] && sv>s[i+1][j-1] && sv>s[i+1][j] && sv>s[i+1][j+1] && sv>s[i+1][j+2] &&
+            sv>s[i+2][j-2] && sv>s[i+2][j-1] && sv>s[i+2][j] && sv>s[i+2][j+1] && sv>s[i+2][j+2])
+        {
+            sf[i][j-2]=0.0;
+        }
+        else sf[i][j-2]=sv;
+    }
+#endif /*DB_USE_SIMD*/
+}
+
+/*Compute Max-suppression-filtered image for a chunk of sf starting at (left,top) and
+stopping at bottom. The output is shifted two steps left. The input s should exist for 2 pixels
+outside the specified region. s[i][left-2] and sf[i][left-2] should be 16 byte aligned.
+Top must be at least 3. Reading and writing from and to the input and output images is done
+as if the region had a width equal to a multiple of 124. If this is not the case, the images
+should be over-allocated and the input cleared for a sufficient region*/
+void db_MaxSuppressFilter_5x5_Aligned16_f(float **sf,float **s,int left,int top,int right,int bottom,
+                                          /*temp should point to at least
+                                          6*132 floats of 16-byte-aligned allocated memory*/
+                                          float *temp)
+{
+    int x,next_x;
+
+    for(x=left;x<=right;x=next_x)
+    {
+        next_x=x+124;
+        db_MaxSuppressFilterChunk_5x5_Aligned16_f(sf,s,x,top,bottom,temp);
+    }
+}
+
+/*Extract corners from the chunk (left,top) to (right,bottom). Store in x_temp,y_temp and s_temp
+which should point to space of at least as many positions as there are pixels in the chunk*/
+inline int db_CornersFromChunk(float **strength,int left,int top,int right,int bottom,float threshold,double *x_temp,double *y_temp,double *s_temp)
+{
+    int i,j,nr;
+    float s;
+
+    nr=0;
+    for(i=top;i<=bottom;i++) for(j=left;j<=right;j++)
+    {
+        s=strength[i][j];
+
+        if(s>=threshold &&
+            s>strength[i-2][j-2] && s>strength[i-2][j-1] && s>strength[i-2][j] && s>strength[i-2][j+1] && s>strength[i-2][j+2] &&
+            s>strength[i-1][j-2] && s>strength[i-1][j-1] && s>strength[i-1][j] && s>strength[i-1][j+1] && s>strength[i-1][j+2] &&
+            s>strength[  i][j-2] && s>strength[  i][j-1] &&                       s>strength[  i][j+1] && s>strength[  i][j+2] &&
+            s>strength[i+1][j-2] && s>strength[i+1][j-1] && s>strength[i+1][j] && s>strength[i+1][j+1] && s>strength[i+1][j+2] &&
+            s>strength[i+2][j-2] && s>strength[i+2][j-1] && s>strength[i+2][j] && s>strength[i+2][j+1] && s>strength[i+2][j+2])
+        {
+            x_temp[nr]=(double) j;
+            y_temp[nr]=(double) i;
+            s_temp[nr]=(double) s;
+            nr++;
+        }
+    }
+    return(nr);
+}
+
+
+//Sub-pixel accuracy using 2D quadratic interpolation.(YCJ)
+inline void db_SubPixel(float **strength, const double xd, const double yd, double &xs, double &ys)
+{
+    int x = (int) xd;
+    int y = (int) yd;
+
+    float fxx = strength[y][x-1] - strength[y][x] - strength[y][x] + strength[y][x+1];
+    float fyy = strength[y-1][x] - strength[y][x] - strength[y][x] + strength[y+1][x];
+    float fxy = (strength[y-1][x-1] - strength[y-1][x+1] - strength[y+1][x-1] + strength[y+1][x+1])/(float)4.0;
+
+    float denom = (fxx * fyy - fxy * fxy) * (float) 2.0;
+
+    xs = xd;
+    ys = yd;
+
+    if ( db_absf(denom) <= FLT_EPSILON )
+    {
+        return;
+    }
+    else
+    {
+        float fx = strength[y][x+1] - strength[y][x-1];
+        float fy = strength[y+1][x] - strength[y-1][x];
+
+        float dx = (fyy * fx - fxy * fy) / denom;
+        float dy = (fxx * fy - fxy * fx) / denom;
+
+        if ( db_absf(dx) > 1.0 || db_absf(dy) > 1.0 )
+        {
+            return;
+        }
+        else
+        {
+            xs -= dx;
+            ys -= dy;
+        }
+    }
+
+    return;
+}
+
+/*Extract corners from the image part from (left,top) to (right,bottom).
+Store in x and y, extracting at most satnr corners in each block of size (bw,bh).
+The pointer temp_d should point to at least 5*bw*bh positions.
+area_factor holds how many corners max to extract per 10000 pixels*/
+void db_ExtractCornersSaturated(float **strength,int left,int top,int right,int bottom,
+                                int bw,int bh,unsigned long area_factor,
+                                float threshold,double *temp_d,
+                                double *x_coord,double *y_coord,int *nr_corners)
+{
+    double *x_temp,*y_temp,*s_temp,*select_temp;
+    double loc_thresh;
+    unsigned long bwbh,area,saturation;
+    int x,next_x,last_x;
+    int y,next_y,last_y;
+    int nr,nr_points,i,stop;
+
+    bwbh=bw*bh;
+    x_temp=temp_d;
+    y_temp=x_temp+bwbh;
+    s_temp=y_temp+bwbh;
+    select_temp=s_temp+bwbh;
+
+#ifdef DB_SUB_PIXEL
+    // subpixel processing may sometimes push the corner ourside the real border
+    // increasing border size:
+    left++;
+    top++;
+    bottom--;
+    right--;
+#endif /*DB_SUB_PIXEL*/
+
+    nr_points=0;
+    for(y=top;y<=bottom;y=next_y)
+    {
+        next_y=y+bh;
+        last_y=next_y-1;
+        if(last_y>bottom) last_y=bottom;
+        for(x=left;x<=right;x=next_x)
+        {
+            next_x=x+bw;
+            last_x=next_x-1;
+            if(last_x>right) last_x=right;
+
+            area=(last_x-x+1)*(last_y-y+1);
+            saturation=(area*area_factor)/10000;
+            nr=db_CornersFromChunk(strength,x,y,last_x,last_y,threshold,x_temp,y_temp,s_temp);
+            if(nr)
+            {
+                if(((unsigned long)nr)>saturation) loc_thresh=db_LeanQuickSelect(s_temp,nr,nr-saturation,select_temp);
+                else loc_thresh=threshold;
+
+                stop=nr_points+saturation;
+                for(i=0;(i<nr)&&(nr_points<stop);i++)
+                {
+                    if(s_temp[i]>=loc_thresh)
+                    {
+                        #ifdef DB_SUB_PIXEL
+                               db_SubPixel(strength, x_temp[i], y_temp[i], x_coord[nr_points], y_coord[nr_points]);
+                        #else
+                               x_coord[nr_points]=x_temp[i];
+                               y_coord[nr_points]=y_temp[i];
+                        #endif
+
+                        nr_points++;
+                    }
+                }
+            }
+        }
+    }
+    *nr_corners=nr_points;
+}
+
+db_CornerDetector_f::db_CornerDetector_f()
+{
+    m_w=0; m_h=0;
+}
+
+db_CornerDetector_f::~db_CornerDetector_f()
+{
+    Clean();
+}
+
+void db_CornerDetector_f::Clean()
+{
+    if(m_w!=0)
+    {
+        delete [] m_temp_f;
+        delete [] m_temp_d;
+        db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h);
+    }
+    m_w=0; m_h=0;
+}
+
+unsigned long db_CornerDetector_f::Init(int im_width,int im_height,int target_nr_corners,
+                            int nr_horizontal_blocks,int nr_vertical_blocks,
+                            double absolute_threshold,double relative_threshold)
+{
+    int chunkwidth=208;
+    int block_width,block_height;
+    unsigned long area_factor;
+    int active_width,active_height;
+
+    active_width=db_maxi(1,im_width-10);
+    active_height=db_maxi(1,im_height-10);
+    block_width=db_maxi(1,active_width/nr_horizontal_blocks);
+    block_height=db_maxi(1,active_height/nr_vertical_blocks);
+
+    area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/
+        (((double)active_width)*((double)active_height)))));
+
+    return(Start(im_width,im_height,block_width,block_height,area_factor,
+        absolute_threshold,relative_threshold,chunkwidth));
+}
+
+unsigned long db_CornerDetector_f::Start(int im_width,int im_height,
+                             int block_width,int block_height,unsigned long area_factor,
+                             double absolute_threshold,double relative_threshold,int chunkwidth)
+{
+    Clean();
+
+    m_w=im_width;
+    m_h=im_height;
+    m_cw=chunkwidth;
+    m_bw=block_width;
+    m_bh=block_height;
+    m_area_factor=area_factor;
+    m_r_thresh=relative_threshold;
+    m_a_thresh=absolute_threshold;
+    m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000);
+
+    m_temp_f=new float[13*(m_cw+4)];
+    m_temp_d=new double[5*m_bw*m_bh];
+    m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h);
+
+    return(m_max_nr);
+}
+
+void db_CornerDetector_f::DetectCorners(const float * const *img,double *x_coord,double *y_coord,int *nr_corners) const
+{
+    float max_val,threshold;
+
+    db_HarrisStrength_f(m_strength,img,m_w,m_h,m_temp_f,m_cw);
+
+    if(m_r_thresh)
+    {
+        max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6);
+        threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh);
+    }
+    else threshold= (float) m_a_thresh;
+
+    db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold,
+        m_temp_d,x_coord,y_coord,nr_corners);
+}
+
+db_CornerDetector_u::db_CornerDetector_u()
+{
+    m_w=0; m_h=0;
+}
+
+db_CornerDetector_u::~db_CornerDetector_u()
+{
+    Clean();
+}
+
+db_CornerDetector_u::db_CornerDetector_u(const db_CornerDetector_u& cd)
+{
+    Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor,
+        cd.m_a_thresh, cd.m_r_thresh);
+}
+
+db_CornerDetector_u& db_CornerDetector_u::operator=(const db_CornerDetector_u& cd)
+{
+    if ( this == &cd ) return *this;
+
+    Clean();
+
+    Start(cd.m_w, cd.m_h, cd.m_bw, cd.m_bh, cd.m_area_factor,
+        cd.m_a_thresh, cd.m_r_thresh);
+
+    return *this;
+}
+
+void db_CornerDetector_u::Clean()
+{
+    if(m_w!=0)
+    {
+        delete [] m_temp_i;
+        delete [] m_temp_d;
+        db_FreeStrengthImage_f(m_strength_mem,m_strength,m_h);
+    }
+    m_w=0; m_h=0;
+}
+
+unsigned long db_CornerDetector_u::Init(int im_width,int im_height,int target_nr_corners,
+                            int nr_horizontal_blocks,int nr_vertical_blocks,
+                            double absolute_threshold,double relative_threshold)
+{
+    int block_width,block_height;
+    unsigned long area_factor;
+    int active_width,active_height;
+
+    active_width=db_maxi(1,im_width-10);
+    active_height=db_maxi(1,im_height-10);
+    block_width=db_maxi(1,active_width/nr_horizontal_blocks);
+    block_height=db_maxi(1,active_height/nr_vertical_blocks);
+
+    area_factor=db_minl(1000,db_maxl(1,(long)(10000.0*((double)target_nr_corners)/
+        (((double)active_width)*((double)active_height)))));
+
+    return(Start(im_width,im_height,block_width,block_height,area_factor,
+        16.0*absolute_threshold,relative_threshold));
+}
+
+unsigned long db_CornerDetector_u::Start(int im_width,int im_height,
+                             int block_width,int block_height,unsigned long area_factor,
+                             double absolute_threshold,double relative_threshold)
+{
+    Clean();
+
+    m_w=im_width;
+    m_h=im_height;
+    m_bw=block_width;
+    m_bh=block_height;
+    m_area_factor=area_factor;
+    m_r_thresh=relative_threshold;
+    m_a_thresh=absolute_threshold;
+    m_max_nr=db_maxl(1,1+(m_w*m_h*m_area_factor)/10000);
+
+    m_temp_i=new int[18*128];
+    m_temp_d=new double[5*m_bw*m_bh];
+    m_strength=db_AllocStrengthImage_f(&m_strength_mem,m_w,m_h);
+
+    return(m_max_nr);
+}
+
+void db_CornerDetector_u::DetectCorners(const unsigned char * const *img,double *x_coord,double *y_coord,int *nr_corners,
+                                        const unsigned char * const *msk, unsigned char fgnd) const
+{
+    float max_val,threshold;
+
+    db_HarrisStrength_u(m_strength,img,m_w,m_h,m_temp_i);
+
+
+    if(m_r_thresh)
+    {
+        max_val=db_MaxImage_Aligned16_f(m_strength,3,3,m_w-6,m_h-6);
+        threshold= (float) db_maxd(m_a_thresh,max_val*m_r_thresh);
+    }
+    else threshold= (float) m_a_thresh;
+
+    db_ExtractCornersSaturated(m_strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,threshold,
+        m_temp_d,x_coord,y_coord,nr_corners);
+
+
+    if ( msk )
+    {
+        int nr_corners_mask=0;
+
+        for ( int i = 0; i < *nr_corners; ++i)
+        {
+            int cor_x = db_roundi(*(x_coord+i));
+            int cor_y = db_roundi(*(y_coord+i));
+            if ( msk[cor_y][cor_x] == fgnd )
+            {
+                x_coord[nr_corners_mask] = x_coord[i];
+                y_coord[nr_corners_mask] = y_coord[i];
+                nr_corners_mask++;
+            }
+        }
+        *nr_corners = nr_corners_mask;
+    }
+}
+
+void db_CornerDetector_u::ExtractCorners(float ** strength, double *x_coord, double *y_coord, int *nr_corners) {
+    if ( m_w!=0 )
+        db_ExtractCornersSaturated(strength,BORDER,BORDER,m_w-BORDER-1,m_h-BORDER-1,m_bw,m_bh,m_area_factor,float(m_a_thresh),
+            m_temp_d,x_coord,y_coord,nr_corners);
+}
+
diff --git a/benchmark/feature_stab/db_vlvm/db_feature_detection.h b/benchmark/feature_stab/db_vlvm/db_feature_detection.h
new file mode 100644
index 00000000..68ffcc9a
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_feature_detection.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*$Id: db_feature_detection.h,v 1.3 2011/06/17 14:03:30 mbansal Exp $*/
+
+#ifndef DB_FEATURE_DETECTION_H
+#define DB_FEATURE_DETECTION_H
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup FeatureDetection Feature Detection
+ */
+#include "db_utilities.h"
+#include "db_utilities_constants.h"
+#include <stdlib.h> //for NULL
+
+/*!
+ * \class db_CornerDetector_f
+ * \ingroup FeatureDetection
+ * \brief Harris corner detector for float images.
+ *
+ *  This class performs Harris corner extraction on *float* images managed
+ * with functions in \ref LMImageBasicUtilities.
+ */
+class DB_API db_CornerDetector_f
+{
+public:
+    db_CornerDetector_f();
+    ~db_CornerDetector_f();
+
+    /*!
+     * Set parameters and pre-allocate memory. Return an upper bound
+     * on the number of corners detected in one frame.
+     * \param im_width      width
+     * \param im_height     height
+     * \param target_nr_corners
+     * \param nr_horizontal_blocks
+     * \param nr_vertical_blocks
+     * \param absolute_threshold
+     * \param relative_threshold
+     */
+    unsigned long Init(int im_width,int im_height,
+                            int target_nr_corners=DB_DEFAULT_TARGET_NR_CORNERS,
+                            int nr_horizontal_blocks=DB_DEFAULT_NR_FEATURE_BLOCKS,
+                            int nr_vertical_blocks=DB_DEFAULT_NR_FEATURE_BLOCKS,
+                            double absolute_threshold=DB_DEFAULT_ABS_CORNER_THRESHOLD,
+                            double relative_threshold=DB_DEFAULT_REL_CORNER_THRESHOLD);
+
+    /*!
+     * Detect the corners.
+     * x_coord and y_coord should be pre-allocated arrays of length returned by Init().
+     * \param img   row array pointer
+     * \param x_coord   corner locations
+     * \param y_coord   corner locations
+     * \param nr_corners    actual number of corners computed
+     */
+    void DetectCorners(const float * const *img,double *x_coord,double *y_coord,int *nr_corners) const;
+    void SetAbsoluteThreshold(double a_thresh) { m_a_thresh = a_thresh; };
+    void SetRelativeThreshold(double r_thresh) { m_r_thresh = r_thresh; };
+protected:
+    void Clean();
+    unsigned long Start(int im_width,int im_height,
+            int block_width,int block_height,unsigned long area_factor,
+            double absolute_threshold,double relative_threshold,int chunkwidth);
+
+    int m_w,m_h,m_cw,m_bw,m_bh;
+    /*Area factor holds the maximum number of corners to detect
+    per 10000 pixels*/
+    unsigned long m_area_factor,m_max_nr;
+    double m_a_thresh,m_r_thresh;
+    float *m_temp_f;
+    double *m_temp_d;
+    float **m_strength,*m_strength_mem;
+};
+/*!
+ * \class db_CornerDetector_u
+ * \ingroup FeatureDetection
+ * \brief Harris corner detector for byte images.
+ *
+ *  This class performs Harris corner extraction on *byte* images managed
+ * with functions in \ref LMImageBasicUtilities.
+ */
+class DB_API db_CornerDetector_u
+{
+public:
+    db_CornerDetector_u();
+    virtual ~db_CornerDetector_u();
+
+    /*!
+     Copy ctor duplicates settings.
+     Memory is not copied.
+     */
+    db_CornerDetector_u(const db_CornerDetector_u& cd);
+    /*!
+     Assignment optor duplicates settings.
+     Memory not copied.
+     */
+    db_CornerDetector_u& operator=(const db_CornerDetector_u& cd);
+
+    /*!
+     * Set parameters and pre-allocate memory. Return an upper bound
+     * on the number of corners detected in one frame
+     */
+    virtual unsigned long Init(int im_width,int im_height,
+                            int target_nr_corners=DB_DEFAULT_TARGET_NR_CORNERS,
+                            int nr_horizontal_blocks=DB_DEFAULT_NR_FEATURE_BLOCKS,
+                            int nr_vertical_blocks=DB_DEFAULT_NR_FEATURE_BLOCKS,
+                            double absolute_threshold=DB_DEFAULT_ABS_CORNER_THRESHOLD,
+                            double relative_threshold=DB_DEFAULT_REL_CORNER_THRESHOLD);
+
+    /*!
+     * Detect the corners.
+     * Observe that the image should be overallocated by at least 256 bytes
+     * at the end.
+     * x_coord and y_coord should be pre-allocated arrays of length returned by Init().
+     * Specifying image mask will restrict corner output to foreground regions.
+     * Foreground value can be specified using fgnd. By default any >0 mask value
+     * is considered to be foreground
+     * \param img   row array pointer
+     * \param x_coord   corner locations
+     * \param y_coord   corner locations
+     * \param nr_corners    actual number of corners computed
+     * \param msk       row array pointer to mask image
+     * \param fgnd      foreground value in the mask
+     */
+    virtual void DetectCorners(const unsigned char * const *img,double *x_coord,double *y_coord,int *nr_corners,
+        const unsigned char * const * msk=NULL, unsigned char fgnd=255) const;
+
+    /*!
+     Set absolute feature threshold
+     */
+    virtual void SetAbsoluteThreshold(double a_thresh) { m_a_thresh = a_thresh; };
+    /*!
+     Set relative feature threshold
+     */
+    virtual void SetRelativeThreshold(double r_thresh) { m_r_thresh = r_thresh; };
+
+    /*!
+     Extract corners from a pre-computed strength image.
+     \param strength    Harris strength image
+     \param x_coord corner locations
+     \param y_coord corner locations
+     \param nr_corners  actual number of corners computed
+     */
+    virtual void ExtractCorners(float ** strength, double *x_coord, double *y_coord, int *nr_corners);
+protected:
+    virtual void Clean();
+    /*The absolute threshold to this function should be 16.0 times
+    normal*/
+    unsigned long Start(int im_width,int im_height,
+            int block_width,int block_height,unsigned long area_factor,
+            double absolute_threshold,double relative_threshold);
+
+    int m_w,m_h,m_bw,m_bh;
+    /*Area factor holds the maximum number of corners to detect
+    per 10000 pixels*/
+    unsigned long m_area_factor,m_max_nr;
+    double m_a_thresh,m_r_thresh;
+    int *m_temp_i;
+    double *m_temp_d;
+    float **m_strength,*m_strength_mem;
+};
+
+#endif /*DB_FEATURE_DETECTION_H*/
diff --git a/benchmark/feature_stab/db_vlvm/db_feature_matching.cpp b/benchmark/feature_stab/db_vlvm/db_feature_matching.cpp
new file mode 100644
index 00000000..891cac33
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_feature_matching.cpp
@@ -0,0 +1,3409 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*$Id: db_feature_matching.cpp,v 1.4 2011/06/17 14:03:30 mbansal Exp $*/
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+#include "db_utilities.h"
+#include "db_feature_matching.h"
+#ifdef _VERBOSE_
+#include <iostream>
+#endif
+
+
+int AffineWarpPoint_NN_LUT_x[11][11];
+int AffineWarpPoint_NN_LUT_y[11][11];
+
+float AffineWarpPoint_BL_LUT_x[11][11];
+float AffineWarpPoint_BL_LUT_y[11][11];
+
+
+inline float db_SignedSquareNormCorr7x7_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g)
+{
+    unsigned char *pf,*pg;
+    float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
+    int xm_f,xm_g;
+
+    xm_f=x_f-3;
+    xm_g=x_g-3;
+    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
+
+    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    fg_corr=49.0f*fgsum-fsum*gsum;
+    den=(49.0f*f2sum-fsum*fsum)*(49.0f*g2sum-gsum*gsum);
+    if(den!=0.0)
+    {
+        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
+        return(-fg_corr*fg_corr/den);
+    }
+    return(0.0);
+}
+
+inline float db_SignedSquareNormCorr9x9_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g)
+{
+    unsigned char *pf,*pg;
+    float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
+    int xm_f,xm_g;
+
+    xm_f=x_f-4;
+    xm_g=x_g-4;
+    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
+
+    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    fg_corr=81.0f*fgsum-fsum*gsum;
+    den=(81.0f*f2sum-fsum*fsum)*(81.0f*g2sum-gsum*gsum);
+    if(den!=0.0)
+    {
+        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
+        return(-fg_corr*fg_corr/den);
+    }
+    return(0.0);
+}
+
+inline float db_SignedSquareNormCorr11x11_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g)
+{
+    unsigned char *pf,*pg;
+    float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
+    int xm_f,xm_g;
+
+    xm_f=x_f-5;
+    xm_g=x_g-5;
+    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
+
+    pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    fg_corr=121.0f*fgsum-fsum*gsum;
+    den=(121.0f*f2sum-fsum*fsum)*(121.0f*g2sum-gsum*gsum);
+    if(den!=0.0)
+    {
+        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
+        return(-fg_corr*fg_corr/den);
+    }
+    return(0.0);
+}
+
+inline void db_SignedSquareNormCorr11x11_Pre_u(unsigned char **f_img,int x_f,int y_f,float *sum,float *recip)
+{
+    unsigned char *pf;
+    float den;
+    int f,f2sum,fsum;
+    int xm_f;
+
+    xm_f=x_f-5;
+
+    pf=f_img[y_f-5]+xm_f;
+    f= *pf++; f2sum=f*f;  fsum=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f-4]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f-3]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f-2]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f-1]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f+1]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f+2]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f+3]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f+4]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f+5]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    *sum= (float) fsum;
+    den=(121.0f*f2sum-fsum*fsum);
+    *recip=(float)(((den!=0.0)?1.0/den:0.0));
+}
+
+inline void db_SignedSquareNormCorr5x5_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip)
+{
+    float den;
+    int f2sum,fsum;
+    int xm_f=x_f-2;
+
+#ifndef DB_USE_SSE2
+    const unsigned char *pf;
+    short f;
+
+    pf=f_img[y_f-2]+xm_f;
+    f= *pf++; f2sum=f*f; fsum=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f-1]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+1]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+2]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+    //int xwi;
+    //int ywi;
+    //f2sum=0;
+    //fsum=0;
+    //for (int r=-5;r<=5;r++){
+    //  ywi=y_f+r;
+    //  for (int c=-5;c<=5;c++){
+    //      xwi=x_f+c;
+    //      f=f_img[ywi][xwi];
+    //      f2sum+=f*f;
+    //      fsum+=f;
+    //      (*patch++)=f;
+    //  }
+    //}
+    (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0;
+    (*patch++)=0; (*patch++)=0;
+#endif /* DB_USE_SSE2 */
+
+    *sum= (float) fsum;
+    den=(25.0f*f2sum-fsum*fsum);
+    *recip= (float)((den!=0.0)?1.0/den:0.0);
+}
+
+inline void db_SignedSquareNormCorr21x21_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip)
+{
+    float den;
+    int f2sum,fsum;
+    short f;
+
+    int xwi;
+    int ywi;
+    f2sum=0;
+    fsum=0;
+    for (int r=-10;r<=10;r++){
+        ywi=y_f+r;
+        for (int c=-10;c<=10;c++){
+            xwi=x_f+c;
+            f=f_img[ywi][xwi];
+            f2sum+=f*f;
+            fsum+=f;
+            (*patch++)=f;
+        }
+    }
+
+    for(int i=442; i<512; i++)
+        (*patch++)=0;
+
+    *sum= (float) fsum;
+    den=(441.0f*f2sum-fsum*fsum);
+    *recip= (float)((den!=0.0)?1.0/den:0.0);
+
+
+}
+
+/* Lay out the image in the patch, computing norm and
+*/
+inline void db_SignedSquareNormCorr11x11_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip)
+{
+    float den;
+    int f2sum,fsum;
+    int xm_f=x_f-5;
+
+#ifndef DB_USE_SSE2
+    const unsigned char *pf;
+    short f;
+
+    pf=f_img[y_f-5]+xm_f;
+    f= *pf++; f2sum=f*f;  fsum=f;  (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f-4]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f-3]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f-2]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f-1]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+1]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+2]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+3]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+4]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+5]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    //int xwi;
+    //int ywi;
+    //f2sum=0;
+    //fsum=0;
+    //for (int r=-5;r<=5;r++){
+    //  ywi=y_f+r;
+    //  for (int c=-5;c<=5;c++){
+    //      xwi=x_f+c;
+    //      f=f_img[ywi][xwi];
+    //      f2sum+=f*f;
+    //      fsum+=f;
+    //      (*patch++)=f;
+    //  }
+    //}
+
+    (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0;
+    (*patch++)=0; (*patch++)=0;
+#else
+    const unsigned char *pf0 =f_img[y_f-5]+xm_f;
+    const unsigned char *pf1 =f_img[y_f-4]+xm_f;
+    const unsigned char *pf2 =f_img[y_f-3]+xm_f;
+    const unsigned char *pf3 =f_img[y_f-2]+xm_f;
+    const unsigned char *pf4 =f_img[y_f-1]+xm_f;
+    const unsigned char *pf5 =f_img[y_f  ]+xm_f;
+    const unsigned char *pf6 =f_img[y_f+1]+xm_f;
+    const unsigned char *pf7 =f_img[y_f+2]+xm_f;
+    const unsigned char *pf8 =f_img[y_f+3]+xm_f;
+    const unsigned char *pf9 =f_img[y_f+4]+xm_f;
+    const unsigned char *pf10=f_img[y_f+5]+xm_f;
+
+    /* pixel mask */
+    const unsigned char pm[16] = {
+        0xFF,0xFF,
+        0xFF,0xFF,
+        0xFF,0xFF,
+        0,0,0,0,0,
+        0,0,0,0,0};
+    const unsigned char * pm_p = pm;
+
+    _asm
+    {
+        mov         ecx,patch   /* load patch pointer */
+        mov         ebx, pm_p   /* load pixel mask pointer */
+        movdqu      xmm1,[ebx]  /* load pixel mask */
+
+        pxor        xmm5,xmm5   /* set xmm5 to 0 accumulator for sum squares */
+        pxor        xmm4,xmm4   /* set xmm4 to 0 accumulator for sum */
+        pxor        xmm0,xmm0   /* set xmm0 to 0 */
+
+        /* row 0 */
+        mov         eax,pf0     /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqa      [ecx+0*22],xmm7 /* move short values to patch */
+        movdqa      [ecx+0*22+16],xmm6  /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* row 1 */
+        mov         eax,pf1     /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqu      [ecx+1*22],xmm7 /* move short values to patch */
+        movdqu      [ecx+1*22+16],xmm6  /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* row 2 */
+        mov         eax,pf2     /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqu      [ecx+2*22],xmm7 /* move short values to patch */
+        movdqu      [ecx+2*22+16],xmm6  /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* row 3 */
+        mov         eax,pf3     /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqu      [ecx+3*22],xmm7 /* move short values to patch */
+        movdqu      [ecx+3*22+16],xmm6  /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* row 4 */
+        mov         eax,pf4     /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqu      [ecx+4*22],xmm7 /* move short values to patch */
+        movdqu      [ecx+4*22+16],xmm6  /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* row 5 */
+        mov         eax,pf5     /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqu      [ecx+5*22],xmm7 /* move short values to patch */
+        movdqu      [ecx+5*22+16],xmm6  /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* row 6 */
+        mov         eax,pf6     /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqu      [ecx+6*22],xmm7 /* move short values to patch */
+        movdqu      [ecx+6*22+16],xmm6  /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* row 7 */
+        mov         eax,pf7     /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqu      [ecx+7*22],xmm7 /* move short values to patch */
+        movdqu      [ecx+7*22+16],xmm6  /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* row 8 */
+        mov         eax,pf8     /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqa      [ecx+8*22],xmm7 /* move short values to patch */
+        movdqa      [ecx+8*22+16],xmm6  /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* row 9 */
+        mov         eax,pf9     /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqu      [ecx+9*22],xmm7 /* move short values to patch */
+        movdqu      [ecx+9*22+16],xmm6  /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit uints into 16 bit uints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* row 10 */
+        mov         eax,pf10    /* load image pointer */
+        movdqu      xmm7,[eax]  /* load 16 pixels */
+        movdqa      xmm6,xmm7
+
+        punpcklbw   xmm7,xmm0   /* unpack low pixels (first 8)*/
+        punpckhbw   xmm6,xmm0   /* unpack high pixels (last 8)*/
+
+        pand        xmm6,xmm1   /* mask out pixels 12-16 */
+
+        movdqu      [ecx+10*22],xmm7    /* move short values to patch */
+        movdqu      [ecx+10*22+16],xmm6 /* move short values to patch */
+
+        paddusw     xmm4,xmm7   /* accumulate sums */
+        pmaddwd     xmm7,xmm7   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm7   /* accumulate sum squares */
+
+        paddw       xmm4,xmm6   /* accumulate sums */
+        pmaddwd     xmm6,xmm6   /* multiply 16 bit ints and add into 32 bit ints */
+        paddd       xmm5,xmm6   /* accumulate sum squares */
+
+        /* add up the sum squares */
+        movhlps     xmm0,xmm5   /* high half to low half */
+        paddd       xmm5,xmm0   /* add high to low */
+        pshuflw     xmm0,xmm5, 0xE /* reshuffle */
+        paddd       xmm5,xmm0   /* add remaining */
+        movd        f2sum,xmm5
+
+        /* add up the sum */
+        movhlps     xmm0,xmm4
+        paddw       xmm4,xmm0   /* halves added */
+        pshuflw     xmm0,xmm4,0xE
+        paddw       xmm4,xmm0   /* quarters added */
+        pshuflw     xmm0,xmm4,0x1
+        paddw       xmm4,xmm0   /* eighth added */
+        movd        fsum, xmm4
+
+        emms
+    }
+
+    fsum = fsum & 0xFFFF;
+
+    patch[126] = 0;
+    patch[127] = 0;
+#endif /* DB_USE_SSE2 */
+
+    *sum= (float) fsum;
+    den=(121.0f*f2sum-fsum*fsum);
+    *recip= (float)((den!=0.0)?1.0/den:0.0);
+}
+
+void AffineWarpPointOffset(float &r_w,float &c_w,double Hinv[9],int r,int c)
+{
+    r_w=(float)(Hinv[3]*c+Hinv[4]*r);
+    c_w=(float)(Hinv[0]*c+Hinv[1]*r);
+}
+
+
+
+/*!
+Prewarp the patches with given affine transform. For a given homogeneous point "x", "H*x" is
+the warped point and for any displacement "d" in the warped image resulting in point "y", the
+corresponding point in the original image is given by "Hinv*y", which can be simplified for affine H.
+If "affine" is 1, then nearest neighbor method is used, else if it is 2, then
+bilinear method is used.
+ */
+inline void db_SignedSquareNormCorr11x11_PreAlign_AffinePatchWarp_u(short *patch,const unsigned char * const *f_img,
+                                                                    int xi,int yi,float *sum,float *recip,
+                                                                    const double Hinv[9],int affine)
+{
+    float den;
+    short f;
+    int f2sum,fsum;
+
+    f2sum=0;
+    fsum=0;
+
+    if (affine==1)
+    {
+        for (int r=0;r<11;r++){
+            for (int c=0;c<11;c++){
+                f=f_img[yi+AffineWarpPoint_NN_LUT_y[r][c]][xi+AffineWarpPoint_NN_LUT_x[r][c]];
+                f2sum+=f*f;
+                fsum+=f;
+                (*patch++)=f;
+            }
+        }
+    }
+    else if (affine==2)
+    {
+        for (int r=0;r<11;r++){
+            for (int c=0;c<11;c++){
+                f=db_BilinearInterpolation(yi+AffineWarpPoint_BL_LUT_y[r][c]
+                ,xi+AffineWarpPoint_BL_LUT_x[r][c],f_img);
+                f2sum+=f*f;
+                fsum+=f;
+                (*patch++)=f;
+            }
+        }
+    }
+
+
+
+    (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0; (*patch++)=0;
+    (*patch++)=0; (*patch++)=0;
+
+    *sum= (float) fsum;
+    den=(121.0f*f2sum-fsum*fsum);
+    *recip= (float)((den!=0.0)?1.0/den:0.0);
+}
+
+
+inline float db_SignedSquareNormCorr11x11_Post_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g,
+                                                float fsum_gsum,float f_recip_g_recip)
+{
+    unsigned char *pf,*pg;
+    int fgsum;
+    float fg_corr;
+    int xm_f,xm_g;
+
+    xm_f=x_f-5;
+    xm_g=x_g-5;
+
+    pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
+    fgsum=(*pf++)*(*pg++);  fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    fg_corr=121.0f*fgsum-fsum_gsum;
+    if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
+    return(-fg_corr*fg_corr*f_recip_g_recip);
+}
+
+float db_SignedSquareNormCorr21x21Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip)
+{
+    float fgsum,fg_corr;
+
+    fgsum= (float) db_ScalarProduct512_s(f_patch,g_patch);
+
+    fg_corr=441.0f*fgsum-fsum_gsum;
+    if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
+    return(-fg_corr*fg_corr*f_recip_g_recip);
+}
+
+
+float db_SignedSquareNormCorr11x11Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip)
+{
+    float fgsum,fg_corr;
+
+    fgsum= (float) db_ScalarProduct128_s(f_patch,g_patch);
+
+    fg_corr=121.0f*fgsum-fsum_gsum;
+    if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
+    return(-fg_corr*fg_corr*f_recip_g_recip);
+}
+
+float db_SignedSquareNormCorr5x5Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip)
+{
+    float fgsum,fg_corr;
+
+    fgsum= (float) db_ScalarProduct32_s(f_patch,g_patch);
+
+    fg_corr=25.0f*fgsum-fsum_gsum;
+    if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
+    return(-fg_corr*fg_corr*f_recip_g_recip);
+}
+
+
+inline float db_SignedSquareNormCorr15x15_u(unsigned char **f_img,unsigned char **g_img,int x_f,int y_f,int x_g,int y_g)
+{
+    unsigned char *pf,*pg;
+    float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
+    int xm_f,xm_g;
+
+    xm_f=x_f-7;
+    xm_g=x_g-7;
+    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
+
+    pf=f_img[y_f-7]+xm_f; pg=g_img[y_g-7]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-6]+xm_f; pg=g_img[y_g-6]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+6]+xm_f; pg=g_img[y_g+6]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+7]+xm_f; pg=g_img[y_g+7]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    fg_corr=225.0f*fgsum-fsum*gsum;
+    den=(225.0f*f2sum-fsum*fsum)*(225.0f*g2sum-gsum*gsum);
+    if(den!=0.0)
+    {
+        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
+        return(-fg_corr*fg_corr/den);
+    }
+    return(0.0);
+}
+
+inline float db_SignedSquareNormCorr7x7_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g)
+{
+    float f,g,*pf,*pg,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
+    int xm_f,xm_g;
+
+    xm_f=x_f-3;
+    xm_g=x_g-3;
+    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
+
+    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    fg_corr=49.0f*fgsum-fsum*gsum;
+    den=(49.0f*f2sum-fsum*fsum)*(49.0f*g2sum-gsum*gsum);
+    if(den!=0.0)
+    {
+        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
+        return(-fg_corr*fg_corr/den);
+    }
+    return(0.0);
+}
+
+inline float db_SignedSquareNormCorr9x9_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g)
+{
+    float f,g,*pf,*pg,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
+    int xm_f,xm_g;
+
+    xm_f=x_f-4;
+    xm_g=x_g-4;
+    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
+
+    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    fg_corr=81.0f*fgsum-fsum*gsum;
+    den=(81.0f*f2sum-fsum*fsum)*(81.0f*g2sum-gsum*gsum);
+    if(den!=0.0)
+    {
+        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
+        return(-fg_corr*fg_corr/den);
+    }
+    return(0.0);
+}
+
+inline float db_SignedSquareNormCorr11x11_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g)
+{
+    float *pf,*pg;
+    float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
+    int xm_f,xm_g;
+
+    xm_f=x_f-5;
+    xm_g=x_g-5;
+    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
+
+    pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    fg_corr=121.0f*fgsum-fsum*gsum;
+    den=(121.0f*f2sum-fsum*fsum)*(121.0f*g2sum-gsum*gsum);
+    if(den!=0.0)
+    {
+        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
+        return(-fg_corr*fg_corr/den);
+    }
+    return(0.0);
+}
+
+inline void db_SignedSquareNormCorr11x11_Pre_f(float **f_img,int x_f,int y_f,float *sum,float *recip)
+{
+    float *pf,den;
+    float f,f2sum,fsum;
+    int xm_f;
+
+    xm_f=x_f-5;
+
+    pf=f_img[y_f-5]+xm_f;
+    f= *pf++; f2sum=f*f;  fsum=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f-4]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f-3]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f-2]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f-1]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f+1]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f+2]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f+3]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f+4]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    pf=f_img[y_f+5]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf++; f2sum+=f*f; fsum+=f;
+    f= *pf;   f2sum+=f*f; fsum+=f;
+
+    *sum=fsum;
+    den=(121.0f*f2sum-fsum*fsum);
+    *recip= (float) ((den!=0.0)?1.0/den:0.0);
+}
+
+inline void db_SignedSquareNormCorr11x11_PreAlign_f(float *patch,const float * const *f_img,int x_f,int y_f,float *sum,float *recip)
+{
+    const float *pf;
+    float den,f,f2sum,fsum;
+    int xm_f;
+
+    xm_f=x_f-5;
+
+    pf=f_img[y_f-5]+xm_f;
+    f= *pf++; f2sum=f*f;  fsum=f;  (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f-4]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f-3]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f-2]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f-1]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+1]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+2]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+3]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+4]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    pf=f_img[y_f+5]+xm_f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf++; f2sum+=f*f; fsum+=f; (*patch++)=f;
+    f= *pf;   f2sum+=f*f; fsum+=f; (*patch++)=f;
+
+    (*patch++)=0.0; (*patch++)=0.0; (*patch++)=0.0; (*patch++)=0.0; (*patch++)=0.0;
+    (*patch++)=0.0; (*patch++)=0.0;
+
+    *sum=fsum;
+    den=(121.0f*f2sum-fsum*fsum);
+    *recip= (float) ((den!=0.0)?1.0/den:0.0);
+}
+
+inline float db_SignedSquareNormCorr11x11_Post_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g,
+                                                float fsum_gsum,float f_recip_g_recip)
+{
+    float *pf,*pg;
+    float fgsum,fg_corr;
+    int xm_f,xm_g;
+
+    xm_f=x_f-5;
+    xm_g=x_g-5;
+
+    pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
+    fgsum=(*pf++)*(*pg++);  fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++); fgsum+=(*pf++)*(*pg++);
+    fgsum+=(*pf++)*(*pg++); fgsum+=(*pf)*(*pg);
+
+    fg_corr=121.0f*fgsum-fsum_gsum;
+    if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
+    return(-fg_corr*fg_corr*f_recip_g_recip);
+}
+
+inline float db_SignedSquareNormCorr11x11Aligned_Post_f(const float *f_patch,const float *g_patch,float fsum_gsum,float f_recip_g_recip)
+{
+    float fgsum,fg_corr;
+
+    fgsum=db_ScalarProduct128Aligned16_f(f_patch,g_patch);
+
+    fg_corr=121.0f*fgsum-fsum_gsum;
+    if(fg_corr>=0.0) return(fg_corr*fg_corr*f_recip_g_recip);
+    return(-fg_corr*fg_corr*f_recip_g_recip);
+}
+
+inline float db_SignedSquareNormCorr15x15_f(float **f_img,float **g_img,int x_f,int y_f,int x_g,int y_g)
+{
+    float *pf,*pg;
+    float f,g,fgsum,f2sum,g2sum,fsum,gsum,fg_corr,den;
+    int xm_f,xm_g;
+
+    xm_f=x_f-7;
+    xm_g=x_g-7;
+    fgsum=0.0; f2sum=0.0; g2sum=0.0; fsum=0.0; gsum=0.0;
+
+    pf=f_img[y_f-7]+xm_f; pg=g_img[y_g-7]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-6]+xm_f; pg=g_img[y_g-6]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-5]+xm_f; pg=g_img[y_g-5]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-4]+xm_f; pg=g_img[y_g-4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-3]+xm_f; pg=g_img[y_g-3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-2]+xm_f; pg=g_img[y_g-2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f-1]+xm_f; pg=g_img[y_g-1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f]+xm_f; pg=g_img[y_g]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+1]+xm_f; pg=g_img[y_g+1]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+2]+xm_f; pg=g_img[y_g+2]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+3]+xm_f; pg=g_img[y_g+3]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+4]+xm_f; pg=g_img[y_g+4]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+5]+xm_f; pg=g_img[y_g+5]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+6]+xm_f; pg=g_img[y_g+6]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    pf=f_img[y_f+7]+xm_f; pg=g_img[y_g+7]+xm_g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf++; g= *pg++; fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+    f= *pf;   g= *pg;   fgsum+=f*g; f2sum+=f*f; g2sum+=g*g; fsum+=f; gsum+=g;
+
+    fg_corr=225.0f*fgsum-fsum*gsum;
+    den=(225.0f*f2sum-fsum*fsum)*(225.0f*g2sum-gsum*gsum);
+    if(den!=0.0)
+    {
+        if(fg_corr>=0.0) return(fg_corr*fg_corr/den);
+        return(-fg_corr*fg_corr/den);
+    }
+    return(0.0);
+}
+
+db_Bucket_f** db_AllocBuckets_f(int nr_h,int nr_v,int bd)
+{
+    int i,j;
+    db_Bucket_f **bp,*b;
+
+    b=new db_Bucket_f [(nr_h+2)*(nr_v+2)];
+    bp=new db_Bucket_f* [(nr_v+2)];
+    bp=bp+1;
+    for(i= -1;i<=nr_v;i++)
+    {
+        bp[i]=b+1+(nr_h+2)*(i+1);
+        for(j= -1;j<=nr_h;j++)
+        {
+            bp[i][j].ptr=new db_PointInfo_f [bd];
+        }
+    }
+
+    return(bp);
+}
+
+db_Bucket_u** db_AllocBuckets_u(int nr_h,int nr_v,int bd)
+{
+    int i,j;
+    db_Bucket_u **bp,*b;
+
+    b=new db_Bucket_u [(nr_h+2)*(nr_v+2)];
+    bp=new db_Bucket_u* [(nr_v+2)];
+    bp=bp+1;
+    for(i= -1;i<=nr_v;i++)
+    {
+        bp[i]=b+1+(nr_h+2)*(i+1);
+        for(j= -1;j<=nr_h;j++)
+        {
+            bp[i][j].ptr=new db_PointInfo_u [bd];
+        }
+    }
+
+    return(bp);
+}
+
+void db_FreeBuckets_f(db_Bucket_f **bp,int nr_h,int nr_v)
+{
+    int i,j;
+
+    for(i= -1;i<=nr_v;i++) for(j= -1;j<=nr_h;j++)
+    {
+        delete [] bp[i][j].ptr;
+    }
+    delete [] (bp[-1]-1);
+    delete [] (bp-1);
+}
+
+void db_FreeBuckets_u(db_Bucket_u **bp,int nr_h,int nr_v)
+{
+    int i,j;
+
+    for(i= -1;i<=nr_v;i++) for(j= -1;j<=nr_h;j++)
+    {
+        delete [] bp[i][j].ptr;
+    }
+    delete [] (bp[-1]-1);
+    delete [] (bp-1);
+}
+
+void db_EmptyBuckets_f(db_Bucket_f **bp,int nr_h,int nr_v)
+{
+    int i,j;
+    for(i= -1;i<=nr_v;i++) for(j= -1;j<=nr_h;j++) bp[i][j].nr=0;
+}
+
+void db_EmptyBuckets_u(db_Bucket_u **bp,int nr_h,int nr_v)
+{
+    int i,j;
+    for(i= -1;i<=nr_v;i++) for(j= -1;j<=nr_h;j++) bp[i][j].nr=0;
+}
+
+float* db_FillBuckets_f(float *patch_space,const float * const *f_img,db_Bucket_f **bp,int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,int nr_corners)
+{
+    int i,xi,yi,xpos,ypos,nr;
+    db_Bucket_f *br;
+    db_PointInfo_f *pir;
+
+    db_EmptyBuckets_f(bp,nr_h,nr_v);
+    for(i=0;i<nr_corners;i++)
+    {
+        xi=(int) x[i];
+        yi=(int) y[i];
+        xpos=xi/bw;
+        ypos=yi/bh;
+        if(xpos>=0 && xpos<nr_h && ypos>=0 && ypos<nr_v)
+        {
+            br=&bp[ypos][xpos];
+            nr=br->nr;
+            if(nr<bd)
+            {
+                pir=&(br->ptr[nr]);
+                pir->x=xi;
+                pir->y=yi;
+                pir->id=i;
+                pir->pir=0;
+                pir->patch=patch_space;
+                br->nr=nr+1;
+
+                db_SignedSquareNormCorr11x11_PreAlign_f(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
+                patch_space+=128;
+            }
+        }
+    }
+    return(patch_space);
+}
+
+short* db_FillBuckets_u(short *patch_space,const unsigned char * const *f_img,db_Bucket_u **bp,int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,int nr_corners,int use_smaller_matching_window, int use_21)
+{
+    int i,xi,yi,xpos,ypos,nr;
+    db_Bucket_u *br;
+    db_PointInfo_u *pir;
+
+    db_EmptyBuckets_u(bp,nr_h,nr_v);
+    for(i=0;i<nr_corners;i++)
+    {
+        xi=(int)db_roundi(x[i]);
+        yi=(int)db_roundi(y[i]);
+        xpos=xi/bw;
+        ypos=yi/bh;
+        if(xpos>=0 && xpos<nr_h && ypos>=0 && ypos<nr_v)
+        {
+            br=&bp[ypos][xpos];
+            nr=br->nr;
+            if(nr<bd)
+            {
+                pir=&(br->ptr[nr]);
+                pir->x=xi;
+                pir->y=yi;
+                pir->id=i;
+                pir->pir=0;
+                pir->patch=patch_space;
+                br->nr=nr+1;
+
+                if(use_21)
+                {
+                    db_SignedSquareNormCorr21x21_PreAlign_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
+                    patch_space+=512;
+                }
+                else
+                {
+                if(!use_smaller_matching_window)
+                {
+                    db_SignedSquareNormCorr11x11_PreAlign_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
+                    patch_space+=128;
+                }
+                else
+                {
+                    db_SignedSquareNormCorr5x5_PreAlign_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
+                    patch_space+=32;
+                }
+                }
+            }
+        }
+    }
+    return(patch_space);
+}
+
+
+
+float* db_FillBucketsPrewarped_f(float *patch_space,const float *const *f_img,db_Bucket_f **bp,int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,int nr_corners,const double H[9])
+{
+    int i,xi,yi,xpos,ypos,nr,wxi,wyi;
+    db_Bucket_f *br;
+    db_PointInfo_f *pir;
+    double xd[2],wx[2];
+
+    db_EmptyBuckets_f(bp,nr_h,nr_v);
+    for(i=0;i<nr_corners;i++)
+    {
+        xd[0]=x[i];
+        xd[1]=y[i];
+        xi=(int) xd[0];
+        yi=(int) xd[1];
+        db_ImageHomographyInhomogenous(wx,H,xd);
+        wxi=(int) wx[0];
+        wyi=(int) wx[1];
+
+        xpos=((wxi+bw)/bw)-1;
+        ypos=((wyi+bh)/bh)-1;
+        if(xpos>= -1 && xpos<=nr_h && ypos>= -1 && ypos<=nr_v)
+        {
+            br=&bp[ypos][xpos];
+            nr=br->nr;
+            if(nr<bd)
+            {
+                pir=&(br->ptr[nr]);
+                pir->x=wxi;
+                pir->y=wyi;
+                pir->id=i;
+                pir->pir=0;
+                pir->patch=patch_space;
+                br->nr=nr+1;
+
+                db_SignedSquareNormCorr11x11_PreAlign_f(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
+                patch_space+=128;
+            }
+        }
+    }
+    return(patch_space);
+}
+
+short* db_FillBucketsPrewarped_u(short *patch_space,const unsigned char * const *f_img,db_Bucket_u **bp,
+                                 int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,
+                                 int nr_corners,const double H[9])
+{
+    int i,xi,yi,xpos,ypos,nr,wxi,wyi;
+    db_Bucket_u *br;
+    db_PointInfo_u *pir;
+    double xd[2],wx[2];
+
+    db_EmptyBuckets_u(bp,nr_h,nr_v);
+    for(i=0;i<nr_corners;i++)
+    {
+        xd[0]=x[i];
+        xd[1]=y[i];
+        xi=(int) db_roundi(xd[0]);
+        yi=(int) db_roundi(xd[1]);
+        db_ImageHomographyInhomogenous(wx,H,xd);
+        wxi=(int) wx[0];
+        wyi=(int) wx[1];
+
+        xpos=((wxi+bw)/bw)-1;
+        ypos=((wyi+bh)/bh)-1;
+        if(xpos>= -1 && xpos<=nr_h && ypos>= -1 && ypos<=nr_v)
+        {
+            br=&bp[ypos][xpos];
+            nr=br->nr;
+            if(nr<bd)
+            {
+                pir=&(br->ptr[nr]);
+                pir->x=wxi;
+                pir->y=wyi;
+                pir->id=i;
+                pir->pir=0;
+                pir->patch=patch_space;
+                br->nr=nr+1;
+
+                db_SignedSquareNormCorr11x11_PreAlign_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip));
+                patch_space+=128;
+            }
+        }
+    }
+    return(patch_space);
+}
+
+
+
+short* db_FillBucketsPrewarpedAffine_u(short *patch_space,const unsigned char * const *f_img,db_Bucket_u **bp,
+                                 int bw,int bh,int nr_h,int nr_v,int bd,const double *x,const double *y,
+                                 int nr_corners,const double H[9],const double Hinv[9],const int warpboundsp[4],
+                                 int affine)
+{
+    int i,xi,yi,xpos,ypos,nr,wxi,wyi;
+    db_Bucket_u *br;
+    db_PointInfo_u *pir;
+    double xd[2],wx[2];
+
+    db_EmptyBuckets_u(bp,nr_h,nr_v);
+    for(i=0;i<nr_corners;i++)
+    {
+        xd[0]=x[i];
+        xd[1]=y[i];
+        xi=(int) db_roundi(xd[0]);
+        yi=(int) db_roundi(xd[1]);
+        db_ImageHomographyInhomogenous(wx,H,xd);
+        wxi=(int) wx[0];
+        wyi=(int) wx[1];
+
+        xpos=((wxi+bw)/bw)-1;
+        ypos=((wyi+bh)/bh)-1;
+
+
+        if (xpos>= -1 && xpos<=nr_h && ypos>= -1 && ypos<=nr_v)
+        {
+            if( xi>warpboundsp[0] && xi<warpboundsp[1] && yi>warpboundsp[2] && yi<warpboundsp[3])
+            {
+
+                br=&bp[ypos][xpos];
+                nr=br->nr;
+                if(nr<bd)
+                {
+                    pir=&(br->ptr[nr]);
+                    pir->x=wxi;
+                    pir->y=wyi;
+                    pir->id=i;
+                    pir->pir=0;
+                    pir->patch=patch_space;
+                    br->nr=nr+1;
+
+                    db_SignedSquareNormCorr11x11_PreAlign_AffinePatchWarp_u(patch_space,f_img,xi,yi,&(pir->sum),&(pir->recip),Hinv,affine);
+                    patch_space+=128;
+                }
+            }
+        }
+    }
+    return(patch_space);
+}
+
+
+
+inline void db_MatchPointPair_f(db_PointInfo_f *pir_l,db_PointInfo_f *pir_r,
+                            unsigned long kA,unsigned long kB)
+{
+    int x_l,y_l,x_r,y_r,xm,ym;
+    double score;
+
+    x_l=pir_l->x;
+    y_l=pir_l->y;
+    x_r=pir_r->x;
+    y_r=pir_r->y;
+    xm=x_l-x_r;
+    ym=y_l-y_r;
+    /*Check if disparity is within the maximum disparity
+    with the formula xm^2*256+ym^2*kA<kB
+    where kA=256*w^2/h^2
+    and   kB=256*max_disp^2*w^2*/
+    if(((xm*xm)<<8)+ym*ym*kA<kB)
+    {
+        /*Correlate*/
+        score=db_SignedSquareNormCorr11x11Aligned_Post_f(pir_l->patch,pir_r->patch,
+            (pir_l->sum)*(pir_r->sum),
+            (pir_l->recip)*(pir_r->recip));
+
+        if((!(pir_l->pir)) || (score>pir_l->s))
+        {
+            /*Update left corner*/
+            pir_l->s=score;
+            pir_l->pir=pir_r;
+        }
+        if((!(pir_r->pir)) || (score>pir_r->s))
+        {
+            /*Update right corner*/
+            pir_r->s=score;
+            pir_r->pir=pir_l;
+        }
+    }
+}
+
+inline void db_MatchPointPair_u(db_PointInfo_u *pir_l,db_PointInfo_u *pir_r,
+                            unsigned long kA,unsigned long kB, unsigned int rect_window,bool use_smaller_matching_window, int use_21)
+{
+    int xm,ym;
+    double score;
+    bool compute_score;
+
+
+    if( rect_window )
+        compute_score = ((unsigned)db_absi(pir_l->x - pir_r->x)<kA && (unsigned)db_absi(pir_l->y - pir_r->y)<kB);
+    else
+    {   /*Check if disparity is within the maximum disparity
+        with the formula xm^2*256+ym^2*kA<kB
+        where kA=256*w^2/h^2
+        and   kB=256*max_disp^2*w^2*/
+        xm= pir_l->x - pir_r->x;
+        ym= pir_l->y - pir_r->y;
+        compute_score = ((xm*xm)<<8)+ym*ym*kA < kB;
+    }
+
+    if ( compute_score )
+    {
+        if(use_21)
+        {
+            score=db_SignedSquareNormCorr21x21Aligned_Post_s(pir_l->patch,pir_r->patch,
+                (pir_l->sum)*(pir_r->sum),
+                (pir_l->recip)*(pir_r->recip));
+        }
+        else
+        {
+        /*Correlate*/
+        if(!use_smaller_matching_window)
+        {
+            score=db_SignedSquareNormCorr11x11Aligned_Post_s(pir_l->patch,pir_r->patch,
+                (pir_l->sum)*(pir_r->sum),
+                (pir_l->recip)*(pir_r->recip));
+        }
+        else
+        {
+            score=db_SignedSquareNormCorr5x5Aligned_Post_s(pir_l->patch,pir_r->patch,
+                (pir_l->sum)*(pir_r->sum),
+                (pir_l->recip)*(pir_r->recip));
+        }
+        }
+
+        if((!(pir_l->pir)) || (score>pir_l->s))
+        {
+            /*Update left corner*/
+            pir_l->s=score;
+            pir_l->pir=pir_r;
+        }
+        if((!(pir_r->pir)) || (score>pir_r->s))
+        {
+            /*Update right corner*/
+            pir_r->s=score;
+            pir_r->pir=pir_l;
+        }
+    }
+}
+
+inline void db_MatchPointAgainstBucket_f(db_PointInfo_f *pir_l,db_Bucket_f *b_r,
+                                       unsigned long kA,unsigned long kB)
+{
+    int p_r,nr;
+    db_PointInfo_f *pir_r;
+
+    nr=b_r->nr;
+    pir_r=b_r->ptr;
+    for(p_r=0;p_r<nr;p_r++) db_MatchPointPair_f(pir_l,pir_r+p_r,kA,kB);
+}
+
+inline void db_MatchPointAgainstBucket_u(db_PointInfo_u *pir_l,db_Bucket_u *b_r,
+                                       unsigned long kA,unsigned long kB,int rect_window, bool use_smaller_matching_window, int use_21)
+{
+    int p_r,nr;
+    db_PointInfo_u *pir_r;
+
+    nr=b_r->nr;
+    pir_r=b_r->ptr;
+
+    for(p_r=0;p_r<nr;p_r++) db_MatchPointPair_u(pir_l,pir_r+p_r,kA,kB, rect_window, use_smaller_matching_window, use_21);
+
+}
+
+void db_MatchBuckets_f(db_Bucket_f **bp_l,db_Bucket_f **bp_r,int nr_h,int nr_v,
+                     unsigned long kA,unsigned long kB)
+{
+    int i,j,k,a,b,br_nr;
+    db_Bucket_f *br;
+    db_PointInfo_f *pir_l;
+
+    /*For all buckets*/
+    for(i=0;i<nr_v;i++) for(j=0;j<nr_h;j++)
+    {
+        br=&bp_l[i][j];
+        br_nr=br->nr;
+        /*For all points in bucket*/
+        for(k=0;k<br_nr;k++)
+        {
+            pir_l=br->ptr+k;
+            for(a=i-1;a<=i+1;a++)
+            {
+                for(b=j-1;b<=j+1;b++)
+                {
+                    db_MatchPointAgainstBucket_f(pir_l,&bp_r[a][b],kA,kB);
+                }
+            }
+        }
+    }
+}
+
+void db_MatchBuckets_u(db_Bucket_u **bp_l,db_Bucket_u **bp_r,int nr_h,int nr_v,
+                     unsigned long kA,unsigned long kB,int rect_window,bool use_smaller_matching_window, int use_21)
+{
+    int i,j,k,a,b,br_nr;
+    db_Bucket_u *br;
+    db_PointInfo_u *pir_l;
+
+    /*For all buckets*/
+    for(i=0;i<nr_v;i++) for(j=0;j<nr_h;j++)
+    {
+        br=&bp_l[i][j];
+        br_nr=br->nr;
+        /*For all points in bucket*/
+        for(k=0;k<br_nr;k++)
+        {
+            pir_l=br->ptr+k;
+            for(a=i-1;a<=i+1;a++)
+            {
+                for(b=j-1;b<=j+1;b++)
+                {
+                    db_MatchPointAgainstBucket_u(pir_l,&bp_r[a][b],kA,kB,rect_window,use_smaller_matching_window, use_21);
+                }
+            }
+        }
+    }
+}
+
+void db_CollectMatches_f(db_Bucket_f **bp_l,int nr_h,int nr_v,unsigned long target,int *id_l,int *id_r,int *nr_matches)
+{
+    int i,j,k,br_nr;
+    unsigned long count;
+    db_Bucket_f *br;
+    db_PointInfo_f *pir,*pir2;
+
+    count=0;
+    /*For all buckets*/
+    for(i=0;i<nr_v;i++) for(j=0;j<nr_h;j++)
+    {
+        br=&bp_l[i][j];
+        br_nr=br->nr;
+        /*For all points in bucket*/
+        for(k=0;k<br_nr;k++)
+        {
+            pir=br->ptr+k;
+            pir2=pir->pir;
+            if(pir2)
+            {
+                /*This point has a best match*/
+                if((pir2->pir)==pir)
+                {
+                    /*We have a mutually consistent match*/
+                    if(count<target)
+                    {
+                        id_l[count]=pir->id;
+                        id_r[count]=pir2->id;
+                        count++;
+                    }
+                }
+            }
+        }
+    }
+    *nr_matches=count;
+}
+
+void db_CollectMatches_u(db_Bucket_u **bp_l,int nr_h,int nr_v,unsigned long target,int *id_l,int *id_r,int *nr_matches)
+{
+    int i,j,k,br_nr;
+    unsigned long count;
+    db_Bucket_u *br;
+    db_PointInfo_u *pir,*pir2;
+
+    count=0;
+    /*For all buckets*/
+    for(i=0;i<nr_v;i++) for(j=0;j<nr_h;j++)
+    {
+        br=&bp_l[i][j];
+        br_nr=br->nr;
+        /*For all points in bucket*/
+        for(k=0;k<br_nr;k++)
+        {
+            pir=br->ptr+k;
+            pir2=pir->pir;
+            if(pir2)
+            {
+                /*This point has a best match*/
+                if((pir2->pir)==pir)
+                {
+                    /*We have a mutually consistent match*/
+                    if(count<target)
+                    {
+                        id_l[count]=pir->id;
+                        id_r[count]=pir2->id;
+                        count++;
+                    }
+                }
+            }
+        }
+    }
+    *nr_matches=count;
+}
+
+db_Matcher_f::db_Matcher_f()
+{
+    m_w=0; m_h=0;
+}
+
+db_Matcher_f::~db_Matcher_f()
+{
+    Clean();
+}
+
+void db_Matcher_f::Clean()
+{
+    if(m_w)
+    {
+        /*Free buckets*/
+        db_FreeBuckets_f(m_bp_l,m_nr_h,m_nr_v);
+        db_FreeBuckets_f(m_bp_r,m_nr_h,m_nr_v);
+        /*Free space for patch layouts*/
+        delete [] m_patch_space;
+    }
+    m_w=0; m_h=0;
+}
+
+unsigned long db_Matcher_f::Init(int im_width,int im_height,double max_disparity,int target_nr_corners)
+{
+    Clean();
+    m_w=im_width;
+    m_h=im_height;
+    m_bw=db_maxi(1,(int) (max_disparity*((double)im_width)));
+    m_bh=db_maxi(1,(int) (max_disparity*((double)im_height)));
+    m_nr_h=1+(im_width-1)/m_bw;
+    m_nr_v=1+(im_height-1)/m_bh;
+    m_bd=db_maxi(1,(int)(((double)target_nr_corners)*
+        max_disparity*max_disparity));
+    m_target=target_nr_corners;
+    m_kA=(long)(256.0*((double)(m_w*m_w))/((double)(m_h*m_h)));
+    m_kB=(long)(256.0*max_disparity*max_disparity*((double)(m_w*m_w)));
+
+    /*Alloc bucket structure*/
+    m_bp_l=db_AllocBuckets_f(m_nr_h,m_nr_v,m_bd);
+    m_bp_r=db_AllocBuckets_f(m_nr_h,m_nr_v,m_bd);
+
+    /*Alloc 16byte-aligned space for patch layouts*/
+    m_patch_space=new float [2*(m_nr_h+2)*(m_nr_v+2)*m_bd*128+16];
+    m_aligned_patch_space=db_AlignPointer_f(m_patch_space,16);
+
+    return(m_target);
+}
+
+void db_Matcher_f::Match(const float * const *l_img,const float * const *r_img,
+        const double *x_l,const double *y_l,int nr_l,const double *x_r,const double *y_r,int nr_r,
+        int *id_l,int *id_r,int *nr_matches,const double H[9])
+{
+    float *ps;
+
+    /*Insert the corners into bucket structure*/
+    ps=db_FillBuckets_f(m_aligned_patch_space,l_img,m_bp_l,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_l,y_l,nr_l);
+    if(H==0) db_FillBuckets_f(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_r,y_r,nr_r);
+    else db_FillBucketsPrewarped_f(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_r,y_r,nr_r,H);
+
+    /*Compute all the necessary match scores*/
+    db_MatchBuckets_f(m_bp_l,m_bp_r,m_nr_h,m_nr_v,m_kA,m_kB);
+
+    /*Collect the correspondences*/
+    db_CollectMatches_f(m_bp_l,m_nr_h,m_nr_v,m_target,id_l,id_r,nr_matches);
+}
+
+db_Matcher_u::db_Matcher_u()
+{
+    m_w=0; m_h=0;
+    m_rect_window = 0;
+    m_bw=m_bh=m_nr_h=m_nr_v=m_bd=m_target=0;
+    m_bp_l=m_bp_r=0;
+    m_patch_space=m_aligned_patch_space=0;
+}
+
+db_Matcher_u::db_Matcher_u(const db_Matcher_u& cm)
+{
+    Init(cm.m_w, cm.m_h, cm.m_max_disparity, cm.m_target, cm.m_max_disparity_v);
+}
+
+db_Matcher_u& db_Matcher_u::operator= (const db_Matcher_u& cm)
+{
+    if ( this == &cm ) return *this;
+    Init(cm.m_w, cm.m_h, cm.m_max_disparity, cm.m_target, cm.m_max_disparity_v);
+    return *this;
+}
+
+
+db_Matcher_u::~db_Matcher_u()
+{
+    Clean();
+}
+
+void db_Matcher_u::Clean()
+{
+    if(m_w)
+    {
+        /*Free buckets*/
+        db_FreeBuckets_u(m_bp_l,m_nr_h,m_nr_v);
+        db_FreeBuckets_u(m_bp_r,m_nr_h,m_nr_v);
+        /*Free space for patch layouts*/
+        delete [] m_patch_space;
+    }
+    m_w=0; m_h=0;
+}
+
+
+unsigned long db_Matcher_u::Init(int im_width,int im_height,double max_disparity,int target_nr_corners,
+                                 double max_disparity_v, bool use_smaller_matching_window, int use_21)
+{
+    Clean();
+    m_w=im_width;
+    m_h=im_height;
+    m_max_disparity=max_disparity;
+    m_max_disparity_v=max_disparity_v;
+
+    if ( max_disparity_v != DB_DEFAULT_NO_DISPARITY )
+    {
+        m_rect_window = 1;
+
+        m_bw=db_maxi(1,(int)(max_disparity*((double)im_width)));
+        m_bh=db_maxi(1,(int)(max_disparity_v*((double)im_height)));
+
+        m_bd=db_maxi(1,(int)(((double)target_nr_corners)*max_disparity*max_disparity_v));
+
+        m_kA=(int)(max_disparity*m_w);
+        m_kB=(int)(max_disparity_v*m_h);
+
+    } else
+    {
+        m_bw=(int)db_maxi(1,(int)(max_disparity*((double)im_width)));
+        m_bh=(int)db_maxi(1,(int)(max_disparity*((double)im_height)));
+
+        m_bd=db_maxi(1,(int)(((double)target_nr_corners)*max_disparity*max_disparity));
+
+        m_kA=(long)(256.0*((double)(m_w*m_w))/((double)(m_h*m_h)));
+        m_kB=(long)(256.0*max_disparity*max_disparity*((double)(m_w*m_w)));
+    }
+
+    m_nr_h=1+(im_width-1)/m_bw;
+    m_nr_v=1+(im_height-1)/m_bh;
+
+    m_target=target_nr_corners;
+
+    /*Alloc bucket structure*/
+    m_bp_l=db_AllocBuckets_u(m_nr_h,m_nr_v,m_bd);
+    m_bp_r=db_AllocBuckets_u(m_nr_h,m_nr_v,m_bd);
+
+    m_use_smaller_matching_window = use_smaller_matching_window;
+    m_use_21 = use_21;
+
+    if(m_use_21)
+    {
+        /*Alloc 64byte-aligned space for patch layouts*/
+        m_patch_space=new short [2*(m_nr_h+2)*(m_nr_v+2)*m_bd*512+64];
+        m_aligned_patch_space=db_AlignPointer_s(m_patch_space,64);
+    }
+    else
+    {
+    if(!m_use_smaller_matching_window)
+    {
+        /*Alloc 16byte-aligned space for patch layouts*/
+        m_patch_space=new short [2*(m_nr_h+2)*(m_nr_v+2)*m_bd*128+16];
+        m_aligned_patch_space=db_AlignPointer_s(m_patch_space,16);
+    }
+    else
+    {
+        /*Alloc 4byte-aligned space for patch layouts*/
+        m_patch_space=new short [2*(m_nr_h+2)*(m_nr_v+2)*m_bd*32+4];
+        m_aligned_patch_space=db_AlignPointer_s(m_patch_space,4);
+    }
+    }
+
+    return(m_target);
+}
+
+void db_Matcher_u::Match(const unsigned char * const *l_img,const unsigned char * const *r_img,
+        const double *x_l,const double *y_l,int nr_l,const double *x_r,const double *y_r,int nr_r,
+        int *id_l,int *id_r,int *nr_matches,const double H[9],int affine)
+{
+    short *ps;
+
+    /*Insert the corners into bucket structure*/
+    ps=db_FillBuckets_u(m_aligned_patch_space,l_img,m_bp_l,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_l,y_l,nr_l,m_use_smaller_matching_window,m_use_21);
+    if(H==0)
+        db_FillBuckets_u(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_r,y_r,nr_r,m_use_smaller_matching_window,m_use_21);
+    else
+    {
+        if (affine)
+        {
+            double Hinv[9];
+            db_InvertAffineTransform(Hinv,H);
+            float r_w, c_w;
+            float stretch_x[2];
+            float stretch_y[2];
+            AffineWarpPointOffset(r_w,c_w,Hinv, 5,5);
+            stretch_x[0]=db_absf(c_w);stretch_y[0]=db_absf(r_w);
+            AffineWarpPointOffset(r_w,c_w,Hinv, 5,-5);
+            stretch_x[1]=db_absf(c_w);stretch_y[1]=db_absf(r_w);
+            int max_stretxh_x=(int) (db_maxd(stretch_x[0],stretch_x[1]));
+            int max_stretxh_y=(int) (db_maxd(stretch_y[0],stretch_y[1]));
+            int warpbounds[4]={max_stretxh_x,m_w-1-max_stretxh_x,max_stretxh_y,m_h-1-max_stretxh_y};
+
+            for (int r=-5;r<=5;r++){
+                for (int c=-5;c<=5;c++){
+                    AffineWarpPointOffset(r_w,c_w,Hinv,r,c);
+                    AffineWarpPoint_BL_LUT_y[r+5][c+5]=r_w;
+                    AffineWarpPoint_BL_LUT_x[r+5][c+5]=c_w;
+
+                    AffineWarpPoint_NN_LUT_y[r+5][c+5]=db_roundi(r_w);
+                    AffineWarpPoint_NN_LUT_x[r+5][c+5]=db_roundi(c_w);
+
+                }
+            }
+
+            db_FillBucketsPrewarpedAffine_u(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,
+                x_r,y_r,nr_r,H,Hinv,warpbounds,affine);
+        }
+        else
+            db_FillBucketsPrewarped_u(ps,r_img,m_bp_r,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,x_r,y_r,nr_r,H);
+    }
+
+
+    /*Compute all the necessary match scores*/
+    db_MatchBuckets_u(m_bp_l,m_bp_r,m_nr_h,m_nr_v,m_kA,m_kB, m_rect_window,m_use_smaller_matching_window,m_use_21);
+
+    /*Collect the correspondences*/
+    db_CollectMatches_u(m_bp_l,m_nr_h,m_nr_v,m_target,id_l,id_r,nr_matches);
+}
+
+int db_Matcher_u::IsAllocated()
+{
+    return (int)(m_w != 0);
+}
diff --git a/benchmark/feature_stab/db_vlvm/db_feature_matching.h b/benchmark/feature_stab/db_vlvm/db_feature_matching.h
new file mode 100644
index 00000000..6c056b9a
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_feature_matching.h
@@ -0,0 +1,260 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*$Id: db_feature_matching.h,v 1.3 2011/06/17 14:03:30 mbansal Exp $*/
+
+#ifndef DB_FEATURE_MATCHING_H
+#define DB_FEATURE_MATCHING_H
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup FeatureMatching Feature Matching
+ */
+#include "db_utilities.h"
+#include "db_utilities_constants.h"
+
+DB_API void db_SignedSquareNormCorr21x21_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip);
+DB_API void db_SignedSquareNormCorr11x11_PreAlign_u(short *patch,const unsigned char * const *f_img,int x_f,int y_f,float *sum,float *recip);
+float db_SignedSquareNormCorr21x21Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip);
+float db_SignedSquareNormCorr11x11Aligned_Post_s(const short *f_patch,const short *g_patch,float fsum_gsum,float f_recip_g_recip);
+
+class db_PointInfo_f
+{
+public:
+    /*Coordinates of point*/
+    int x;
+    int y;
+    /*Id nr of point*/
+    int id;
+    /*Best match score*/
+    double s;
+    /*Best match candidate*/
+    db_PointInfo_f *pir;
+    /*Precomputed coefficients
+    of image patch*/
+    float sum;
+    float recip;
+    /*Pointer to patch layout*/
+    const float *patch;
+};
+
+class db_Bucket_f
+{
+public:
+    db_PointInfo_f *ptr;
+    int nr;
+};
+
+class db_PointInfo_u
+{
+public:
+    /*Coordinates of point*/
+    int x;
+    int y;
+    /*Id nr of point*/
+    int id;
+    /*Best match score*/
+    double s;
+    /*Best match candidate*/
+    db_PointInfo_u *pir;
+    /*Precomputed coefficients
+    of image patch*/
+    float sum;
+    float recip;
+    /*Pointer to patch layout*/
+    const short *patch;
+};
+
+class db_Bucket_u
+{
+public:
+    db_PointInfo_u *ptr;
+    int nr;
+};
+/*!
+ * \class db_Matcher_f
+ * \ingroup FeatureMatching
+ * \brief Feature matcher for float images.
+ *
+ * Normalized correlation feature matcher for <b>float</b> images.
+ * Correlation window size is constant and set to 11x11.
+ * See \ref FeatureDetection to detect Harris corners.
+ * Images are managed with functions in \ref LMImageBasicUtilities.
+ */
+class DB_API db_Matcher_f
+{
+public:
+    db_Matcher_f();
+    ~db_Matcher_f();
+
+    /*!
+     * Set parameters and pre-allocate memory. Return an upper bound
+     * on the number of matches.
+     * \param im_width          width
+     * \param im_height         height
+     * \param max_disparity     maximum distance (as fraction of image size) between matches
+     * \param target_nr_corners maximum number of matches
+     * \return maximum number of matches
+     */
+    unsigned long Init(int im_width,int im_height,
+        double max_disparity=DB_DEFAULT_MAX_DISPARITY,
+        int target_nr_corners=DB_DEFAULT_TARGET_NR_CORNERS);
+
+    /*!
+     * Match two sets of features.
+     * If the prewarp H is not NULL it will be applied to the features
+     * in the right image before matching.
+     * Parameters id_l and id_r must point to arrays of size target_nr_corners
+     * (returned by Init()).
+     * The results of matching are in id_l and id_r.
+     * Interpretaqtion of results: if id_l[i] = m and id_r[i] = n,
+     * feature at (x_l[m],y_l[m]) matched to (x_r[n],y_r[n]).
+     * \param l_img     left image
+     * \param r_img     right image
+     * \param x_l       left x coordinates of features
+     * \param y_l       left y coordinates of features
+     * \param nr_l      number of features in left image
+     * \param x_r       right x coordinates of features
+     * \param y_r       right y coordinates of features
+     * \param nr_r      number of features in right image
+     * \param id_l      indices of left features that matched
+     * \param id_r      indices of right features that matched
+     * \param nr_matches    number of features actually matched
+     * \param H         image homography (prewarp) to be applied to right image features
+     */
+    void Match(const float * const *l_img,const float * const *r_img,
+        const double *x_l,const double *y_l,int nr_l,const double *x_r,const double *y_r,int nr_r,
+        int *id_l,int *id_r,int *nr_matches,const double H[9]=0);
+
+protected:
+    void Clean();
+
+    int m_w,m_h,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,m_target;
+    unsigned long m_kA,m_kB;
+    db_Bucket_f **m_bp_l;
+    db_Bucket_f **m_bp_r;
+    float *m_patch_space,*m_aligned_patch_space;
+};
+/*!
+ * \class db_Matcher_u
+ * \ingroup FeatureMatching
+ * \brief Feature matcher for byte images.
+ *
+ * Normalized correlation feature matcher for <b>byte</b> images.
+ * Correlation window size is constant and set to 11x11.
+ * See \ref FeatureDetection to detect Harris corners.
+ * Images are managed with functions in \ref LMImageBasicUtilities.
+ *
+ * If the prewarp matrix H is supplied, the feature coordinates are warped by H before being placed in
+ * appropriate buckets. If H is an affine transform and the "affine" parameter is set to 1 or 2,
+ * then the correlation patches themselves are warped before being placed in the patch space.
+ */
+class DB_API db_Matcher_u
+{
+public:
+    db_Matcher_u();
+
+    int GetPatchSize(){return 11;};
+
+    virtual ~db_Matcher_u();
+
+    /*!
+     Copy ctor duplicates settings.
+     Memory not copied.
+     */
+    db_Matcher_u(const db_Matcher_u& cm);
+
+    /*!
+     Assignment optor duplicates settings
+     Memory not copied.
+     */
+    db_Matcher_u& operator= (const db_Matcher_u& cm);
+
+    /*!
+     * Set parameters and pre-allocate memory. Return an upper bound
+     * on the number of matches.
+     * If max_disparity_v is DB_DEFAULT_NO_DISPARITY, look for matches
+     * in a ellipse around a feature of radius max_disparity*im_width by max_disparity*im_height.
+     * If max_disparity_v is specified, use a rectangle max_disparity*im_width by max_disparity_v*im_height.
+     * \param im_width          width
+     * \param im_height         height
+     * \param max_disparity     maximum distance (as fraction of image size) between matches
+     * \param target_nr_corners maximum number of matches
+     * \param max_disparity_v   maximum vertical disparity (distance between matches)
+     * \param use_smaller_matching_window   if set to true, uses a correlation window of 5x5 instead of the default 11x11
+     * \return maximum number of matches
+     */
+    virtual unsigned long Init(int im_width,int im_height,
+        double max_disparity=DB_DEFAULT_MAX_DISPARITY,
+        int target_nr_corners=DB_DEFAULT_TARGET_NR_CORNERS,
+        double max_disparity_v=DB_DEFAULT_NO_DISPARITY,
+        bool use_smaller_matching_window=false, int use_21=0);
+
+    /*!
+     * Match two sets of features.
+     * If the prewarp H is not NULL it will be applied to the features
+     * in the right image before matching.
+     * Parameters id_l and id_r must point to arrays of size target_nr_corners
+     * (returned by Init()).
+     * The results of matching are in id_l and id_r.
+     * Interpretaqtion of results: if id_l[i] = m and id_r[i] = n,
+     * feature at (x_l[m],y_l[m]) matched to (x_r[n],y_r[n]).
+     * \param l_img     left image
+     * \param r_img     right image
+     * \param x_l       left x coordinates of features
+     * \param y_l       left y coordinates of features
+     * \param nr_l      number of features in left image
+     * \param x_r       right x coordinates of features
+     * \param y_r       right y coordinates of features
+     * \param nr_r      number of features in right image
+     * \param id_l      indices of left features that matched
+     * \param id_r      indices of right features that matched
+     * \param nr_matches    number of features actually matched
+     * \param H         image homography (prewarp) to be applied to right image features
+     * \param affine    prewarp the 11x11 patches by given affine transform. 0 means no warping,
+                        1 means nearest neighbor, 2 means bilinear warping.
+     */
+    virtual void Match(const unsigned char * const *l_img,const unsigned char * const *r_img,
+        const double *x_l,const double *y_l,int nr_l,const double *x_r,const double *y_r,int nr_r,
+        int *id_l,int *id_r,int *nr_matches,const double H[9]=0,int affine=0);
+
+    /*!
+     * Checks if Init() was called.
+     * \return 1 if Init() was called, 0 otherwise.
+     */
+    int IsAllocated();
+
+protected:
+    virtual void Clean();
+
+
+    int m_w,m_h,m_bw,m_bh,m_nr_h,m_nr_v,m_bd,m_target;
+    unsigned long m_kA,m_kB;
+    db_Bucket_u **m_bp_l;
+    db_Bucket_u **m_bp_r;
+    short *m_patch_space,*m_aligned_patch_space;
+
+    double m_max_disparity, m_max_disparity_v;
+    int m_rect_window;
+    bool m_use_smaller_matching_window;
+    int m_use_21;
+};
+
+
+
+#endif /*DB_FEATURE_MATCHING_H*/
diff --git a/benchmark/feature_stab/db_vlvm/db_framestitching.cpp b/benchmark/feature_stab/db_vlvm/db_framestitching.cpp
new file mode 100644
index 00000000..b574f7a0
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_framestitching.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_framestitching.cpp,v 1.2 2011/06/17 14:03:30 mbansal Exp $ */
+
+#include "db_utilities.h"
+#include "db_framestitching.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+inline void db_RotationFromMOuterProductSum(double R[9],double *score,double M[9])
+{
+    double N[16],q[4],lambda[4],lambda_max;
+    double y[4];
+    int nr_roots;
+
+    N[0]=   M[0]+M[4]+M[8];
+    N[5]=   M[0]-M[4]-M[8];
+    N[10]= -M[0]+M[4]-M[8];
+    N[15]= -M[0]-M[4]+M[8];
+    N[1] =N[4] =M[5]-M[7];
+    N[2] =N[8] =M[6]-M[2];
+    N[3] =N[12]=M[1]-M[3];
+    N[6] =N[9] =M[1]+M[3];
+    N[7] =N[13]=M[6]+M[2];
+    N[11]=N[14]=M[5]+M[7];
+
+    /*get the quaternion representing the rotation
+    by finding the eigenvector corresponding to the most
+    positive eigenvalue. Force eigenvalue solutions, since the matrix
+    is symmetric and solutions might otherwise be lost
+    when the data is planar*/
+    db_RealEigenvalues4x4(lambda,&nr_roots,N,1);
+    if(nr_roots)
+    {
+        lambda_max=lambda[0];
+        if(nr_roots>=2)
+        {
+            if(lambda[1]>lambda_max) lambda_max=lambda[1];
+            if(nr_roots>=3)
+            {
+                if(lambda[2]>lambda_max) lambda_max=lambda[2];
+                {
+                    if(nr_roots>=4) if(lambda[3]>lambda_max) lambda_max=lambda[3];
+                }
+            }
+        }
+    }
+    else lambda_max=1.0;
+    db_EigenVector4x4(q,lambda_max,N);
+
+    /*Compute the rotation matrix*/
+    db_QuaternionToRotation(R,q);
+
+    if(score)
+    {
+        /*Compute score=transpose(q)*N*q */
+        db_Multiply4x4_4x1(y,N,q);
+        *score=db_ScalarProduct4(q,y);
+    }
+}
+
+void db_StitchSimilarity3DRaw(double *scale,double R[9],double t[3],
+                            double **Xp,double **X,int nr_points,int orientation_preserving,
+                            int allow_scaling,int allow_rotation,int allow_translation)
+{
+    int i;
+    double c[3],cp[3],r[3],rp[3],M[9],s,sp,sc;
+    double Rr[9],score_p,score_r;
+    double *temp,*temp_p;
+
+    if(allow_translation)
+    {
+        db_PointCentroid3D(c,X,nr_points);
+        db_PointCentroid3D(cp,Xp,nr_points);
+    }
+    else
+    {
+        db_Zero3(c);
+        db_Zero3(cp);
+    }
+
+    db_Zero9(M);
+    s=sp=0;
+    for(i=0;i<nr_points;i++)
+    {
+        temp=   *X++;
+        temp_p= *Xp++;
+        r[0]=(*temp++)-c[0];
+        r[1]=(*temp++)-c[1];
+        r[2]=(*temp++)-c[2];
+        rp[0]=(*temp_p++)-cp[0];
+        rp[1]=(*temp_p++)-cp[1];
+        rp[2]=(*temp_p++)-cp[2];
+
+        M[0]+=r[0]*rp[0];
+        M[1]+=r[0]*rp[1];
+        M[2]+=r[0]*rp[2];
+        M[3]+=r[1]*rp[0];
+        M[4]+=r[1]*rp[1];
+        M[5]+=r[1]*rp[2];
+        M[6]+=r[2]*rp[0];
+        M[7]+=r[2]*rp[1];
+        M[8]+=r[2]*rp[2];
+
+        s+=db_sqr(r[0])+db_sqr(r[1])+db_sqr(r[2]);
+        sp+=db_sqr(rp[0])+db_sqr(rp[1])+db_sqr(rp[2]);
+    }
+
+    /*Compute scale*/
+    if(allow_scaling) sc=sqrt(db_SafeDivision(sp,s));
+    else sc=1.0;
+    *scale=sc;
+
+    /*Compute rotation*/
+    if(allow_rotation)
+    {
+        if(orientation_preserving)
+        {
+            db_RotationFromMOuterProductSum(R,0,M);
+        }
+        else
+        {
+            /*Try preserving*/
+            db_RotationFromMOuterProductSum(R,&score_p,M);
+            /*Try reversing*/
+            M[6]= -M[6];
+            M[7]= -M[7];
+            M[8]= -M[8];
+            db_RotationFromMOuterProductSum(Rr,&score_r,M);
+            if(score_r>score_p)
+            {
+                /*Reverse is better*/
+                R[0]=Rr[0]; R[1]=Rr[1]; R[2]= -Rr[2];
+                R[3]=Rr[3]; R[4]=Rr[4]; R[5]= -Rr[5];
+                R[6]=Rr[6]; R[7]=Rr[7]; R[8]= -Rr[8];
+            }
+        }
+    }
+    else db_Identity3x3(R);
+
+    /*Compute translation*/
+    if(allow_translation)
+    {
+        t[0]=cp[0]-sc*(R[0]*c[0]+R[1]*c[1]+R[2]*c[2]);
+        t[1]=cp[1]-sc*(R[3]*c[0]+R[4]*c[1]+R[5]*c[2]);
+        t[2]=cp[2]-sc*(R[6]*c[0]+R[7]*c[1]+R[8]*c[2]);
+    }
+    else db_Zero3(t);
+}
+
+
diff --git a/benchmark/feature_stab/db_vlvm/db_framestitching.h b/benchmark/feature_stab/db_vlvm/db_framestitching.h
new file mode 100644
index 00000000..5fef5f37
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_framestitching.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_framestitching.h,v 1.2 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_FRAMESTITCHING_H
+#define DB_FRAMESTITCHING_H
+/*!
+ * \defgroup FrameStitching Frame Stitching (2D and 3D homography estimation)
+ */
+/*\{*/
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup LMFrameStitching (LM) Frame Stitching (2D and 3D homography estimation)
+ */
+/*\{*/
+
+/*!
+Find scale, rotation and translation of the similarity that
+takes the nr_points inhomogenous 3D points X to Xp
+(left to right according to Horn), i.e. for the homogenous equivalents
+Xp and X we would have
+\code
+    Xp~
+    [sR t]*X
+    [0  1]
+\endcode
+If orientation_preserving is true, R is restricted such that det(R)>0.
+allow_scaling, allow_rotation and allow_translation allow s,R and t
+to differ from 1,Identity and 0
+
+Full similarity takes the following on 550MHz:
+\code
+4.5 microseconds with       3 points
+4.7 microseconds with       4 points
+5.0 microseconds with       5 points
+5.2 microseconds with       6 points
+5.8 microseconds with      10 points
+20  microseconds with     100 points
+205 microseconds with    1000 points
+2.9 milliseconds with   10000 points
+50  milliseconds with  100000 points
+0.5 seconds      with 1000000 points
+\endcode
+Without orientation_preserving:
+\code
+4 points is minimal for (s,R,t) (R,t)
+3 points is minimal for (s,R) (R)
+2 points is minimal for (s,t)
+1 point is minimal for  (s) (t)
+\endcode
+With orientation_preserving:
+\code
+3 points is minimal for (s,R,t) (R,t)
+2 points is minimal for (s,R) (s,t) (R)
+1 point is minimal for  (s) (t)
+\endcode
+
+\param scale                    scale
+\param R                        rotation
+\param t                        translation
+\param Xp                       inhomogenouse 3D points in first coordinate system
+\param X                        inhomogenouse 3D points in second coordinate system
+\param nr_points                number of points
+\param orientation_preserving   if true, R is restricted such that det(R)>0.
+\param allow_scaling            estimate scale
+\param allow_rotation           estimate rotation
+\param allow_translation        estimate translation
+*/
+DB_API void db_StitchSimilarity3DRaw(double *scale,double R[9],double t[3],
+                            double **Xp,double **X,int nr_points,int orientation_preserving=1,
+                            int allow_scaling=1,int allow_rotation=1,int allow_translation=1);
+
+
+/*\}*/
+
+#endif /* DB_FRAMESTITCHING_H */
diff --git a/benchmark/feature_stab/db_vlvm/db_image_homography.cpp b/benchmark/feature_stab/db_vlvm/db_image_homography.cpp
new file mode 100644
index 00000000..aaad7f85
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_image_homography.cpp
@@ -0,0 +1,332 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_image_homography.cpp,v 1.2 2011/06/17 14:03:31 mbansal Exp $ */
+
+#include "db_utilities.h"
+#include "db_image_homography.h"
+#include "db_framestitching.h"
+#include "db_metrics.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+/*Compute the linear constraint on H obtained by requiring that the
+ratio between coordinate i_num and i_den of xp is equal to the ratio
+between coordinate i_num and i_den of Hx. i_zero should be set to
+the coordinate not equal to i_num or i_den. No normalization is used*/
+inline void db_SProjImagePointPointConstraint(double c[9],int i_num,int i_den,int i_zero,
+                           double xp[3],double x[3])
+{
+    db_MultiplyScalarCopy3(c+3*i_den,x,  xp[i_num]);
+    db_MultiplyScalarCopy3(c+3*i_num,x, -xp[i_den]);
+    db_Zero3(c+3*i_zero);
+}
+
+/*Compute two constraints on H generated by the correspondence (Xp,X),
+assuming that Xp ~= H*X. No normalization is used*/
+inline void db_SProjImagePointPointConstraints(double c1[9],double c2[9],double xp[3],double x[3])
+{
+    int ma_ind;
+
+    /*Find index of coordinate of Xp with largest absolute value*/
+    ma_ind=db_MaxAbsIndex3(xp);
+
+    /*Generate 2 constraints,
+    each constraint is generated by considering the ratio between a
+    coordinate and the largest absolute value coordinate*/
+    switch(ma_ind)
+    {
+    case 0:
+        db_SProjImagePointPointConstraint(c1,1,0,2,xp,x);
+        db_SProjImagePointPointConstraint(c2,2,0,1,xp,x);
+        break;
+    case 1:
+        db_SProjImagePointPointConstraint(c1,0,1,2,xp,x);
+        db_SProjImagePointPointConstraint(c2,2,1,0,xp,x);
+        break;
+    default:
+        db_SProjImagePointPointConstraint(c1,0,2,1,xp,x);
+        db_SProjImagePointPointConstraint(c2,1,2,0,xp,x);
+    }
+}
+
+inline void db_SAffineImagePointPointConstraints(double c1[7],double c2[7],double xp[3],double x[3])
+{
+    double ct1[9],ct2[9];
+
+    db_SProjImagePointPointConstraints(ct1,ct2,xp,x);
+    db_Copy6(c1,ct1); c1[6]=ct1[8];
+    db_Copy6(c2,ct2); c2[6]=ct2[8];
+}
+
+void db_StitchProjective2D_4Points(double H[9],
+                                      double x1[3],double x2[3],double x3[3],double x4[3],
+                                      double xp1[3],double xp2[3],double xp3[3],double xp4[3])
+{
+    double c[72];
+
+    /*Collect the constraints*/
+    db_SProjImagePointPointConstraints(c   ,c+9 ,xp1,x1);
+    db_SProjImagePointPointConstraints(c+18,c+27,xp2,x2);
+    db_SProjImagePointPointConstraints(c+36,c+45,xp3,x3);
+    db_SProjImagePointPointConstraints(c+54,c+63,xp4,x4);
+    /*Solve for the nullvector*/
+    db_NullVector8x9Destructive(H,c);
+}
+
+void db_StitchAffine2D_3Points(double H[9],
+                                      double x1[3],double x2[3],double x3[3],
+                                      double xp1[3],double xp2[3],double xp3[3])
+{
+    double c[42];
+
+    /*Collect the constraints*/
+    db_SAffineImagePointPointConstraints(c   ,c+7 ,xp1,x1);
+    db_SAffineImagePointPointConstraints(c+14,c+21,xp2,x2);
+    db_SAffineImagePointPointConstraints(c+28,c+35,xp3,x3);
+    /*Solve for the nullvector*/
+    db_NullVector6x7Destructive(H,c);
+    db_MultiplyScalar6(H,db_SafeReciprocal(H[6]));
+    H[6]=H[7]=0; H[8]=1.0;
+}
+
+/*Compute up to three solutions for the focal length given two point correspondences
+generated by a rotation with a common unknown focal length. No specific normalization
+of the input points is required. If signed_disambiguation is true, the points are
+required to be in front of the camera*/
+inline void db_CommonFocalLengthFromRotation_2Point(double fsol[3],int *nr_sols,double x1[3],double x2[3],double xp1[3],double xp2[3],int signed_disambiguation=1)
+{
+    double m,ax,ay,apx,apy,bx,by,bpx,bpy;
+    double p1[2],p2[2],p3[2],p4[2],p5[2],p6[2];
+    double p7[3],p8[4],p9[5],p10[3],p11[4];
+    double roots[3];
+    int nr_roots,i,j;
+
+    /*Solve for focal length using the equation
+    <a,b>^2*<ap,ap><bp,bp>=<ap,bp>^2*<a,a><b,b>
+    where a and ap are the homogenous vectors in the first image
+    after focal length scaling and b,bp are the vectors in the
+    second image*/
+
+    /*Normalize homogenous coordinates so that last coordinate is one*/
+    m=db_SafeReciprocal(x1[2]);
+    ax=x1[0]*m;
+    ay=x1[1]*m;
+    m=db_SafeReciprocal(xp1[2]);
+    apx=xp1[0]*m;
+    apy=xp1[1]*m;
+    m=db_SafeReciprocal(x2[2]);
+    bx=x2[0]*m;
+    by=x2[1]*m;
+    m=db_SafeReciprocal(xp2[2]);
+    bpx=xp2[0]*m;
+    bpy=xp2[1]*m;
+
+    /*Compute cubic in l=1/(f^2)
+    by dividing out the root l=0 from the equation
+    (l(ax*bx+ay*by)+1)^2*(l(apx^2+apy^2)+1)*(l(bpx^2+bpy^2)+1)=
+    (l(apx*bpx+apy*bpy)+1)^2*(l(ax^2+ay^2)+1)*(l(bx^2+by^2)+1)*/
+    p1[1]=ax*bx+ay*by;
+    p2[1]=db_sqr(apx)+db_sqr(apy);
+    p3[1]=db_sqr(bpx)+db_sqr(bpy);
+    p4[1]=apx*bpx+apy*bpy;
+    p5[1]=db_sqr(ax)+db_sqr(ay);
+    p6[1]=db_sqr(bx)+db_sqr(by);
+    p1[0]=p2[0]=p3[0]=p4[0]=p5[0]=p6[0]=1;
+
+    db_MultiplyPoly1_1(p7,p1,p1);
+    db_MultiplyPoly1_2(p8,p2,p7);
+    db_MultiplyPoly1_3(p9,p3,p8);
+
+    db_MultiplyPoly1_1(p10,p4,p4);
+    db_MultiplyPoly1_2(p11,p5,p10);
+    db_SubtractPolyProduct1_3(p9,p6,p11);
+    /*Cubic starts at p9[1]*/
+    db_SolveCubic(roots,&nr_roots,p9[4],p9[3],p9[2],p9[1]);
+
+    for(j=0,i=0;i<nr_roots;i++)
+    {
+        if(roots[i]>0)
+        {
+            if((!signed_disambiguation) || (db_PolyEval1(p1,roots[i])*db_PolyEval1(p4,roots[i])>0))
+            {
+                fsol[j++]=db_SafeSqrtReciprocal(roots[i]);
+            }
+        }
+    }
+    *nr_sols=j;
+}
+
+int db_StitchRotationCommonFocalLength_3Points(double H[9],double x1[3],double x2[3],double x3[3],double xp1[3],double xp2[3],double xp3[3],double *f,int signed_disambiguation)
+{
+    double fsol[3];
+    int nr_sols,i,best_sol,done;
+    double cost,best_cost;
+    double m,hyp[27],x1_temp[3],x2_temp[3],xp1_temp[3],xp2_temp[3];
+    double *hyp_point,ft;
+    double y[2];
+
+    db_CommonFocalLengthFromRotation_2Point(fsol,&nr_sols,x1,x2,xp1,xp2,signed_disambiguation);
+    if(nr_sols)
+    {
+        db_DeHomogenizeImagePoint(y,xp3);
+        done=0;
+        for(i=0;i<nr_sols;i++)
+        {
+            ft=fsol[i];
+            m=db_SafeReciprocal(ft);
+            x1_temp[0]=x1[0]*m;
+            x1_temp[1]=x1[1]*m;
+            x1_temp[2]=x1[2];
+            x2_temp[0]=x2[0]*m;
+            x2_temp[1]=x2[1]*m;
+            x2_temp[2]=x2[2];
+            xp1_temp[0]=xp1[0]*m;
+            xp1_temp[1]=xp1[1]*m;
+            xp1_temp[2]=xp1[2];
+            xp2_temp[0]=xp2[0]*m;
+            xp2_temp[1]=xp2[1]*m;
+            xp2_temp[2]=xp2[2];
+
+            hyp_point=hyp+9*i;
+            db_StitchCameraRotation_2Points(hyp_point,x1_temp,x2_temp,xp1_temp,xp2_temp);
+            hyp_point[2]*=ft;
+            hyp_point[5]*=ft;
+            hyp_point[6]*=m;
+            hyp_point[7]*=m;
+            cost=db_SquaredReprojectionErrorHomography(y,hyp_point,x3);
+
+            if(!done || cost<best_cost)
+            {
+                done=1;
+                best_cost=cost;
+                best_sol=i;
+            }
+        }
+
+        if(f) *f=fsol[best_sol];
+        db_Copy9(H,hyp+9*best_sol);
+        return(1);
+    }
+    else
+    {
+        db_Identity3x3(H);
+        if(f) *f=1.0;
+        return(0);
+    }
+}
+
+void db_StitchSimilarity2DRaw(double *scale,double R[4],double t[2],
+                            double **Xp,double **X,int nr_points,int orientation_preserving,
+                            int allow_scaling,int allow_rotation,int allow_translation)
+{
+    int i;
+    double c[2],cp[2],r[2],rp[2],M[4],s,sp,sc;
+    double *temp,*temp_p;
+    double Aacc,Bacc,Aacc2,Bacc2,divisor,divisor2,m,Am,Bm;
+
+    if(allow_translation)
+    {
+        db_PointCentroid2D(c,X,nr_points);
+        db_PointCentroid2D(cp,Xp,nr_points);
+    }
+    else
+    {
+        db_Zero2(c);
+        db_Zero2(cp);
+    }
+
+    db_Zero4(M);
+    s=sp=0;
+    for(i=0;i<nr_points;i++)
+    {
+        temp=   *X++;
+        temp_p= *Xp++;
+        r[0]=(*temp++)-c[0];
+        r[1]=(*temp++)-c[1];
+        rp[0]=(*temp_p++)-cp[0];
+        rp[1]=(*temp_p++)-cp[1];
+
+        M[0]+=r[0]*rp[0];
+        M[1]+=r[0]*rp[1];
+        M[2]+=r[1]*rp[0];
+        M[3]+=r[1]*rp[1];
+
+        s+=db_sqr(r[0])+db_sqr(r[1]);
+        sp+=db_sqr(rp[0])+db_sqr(rp[1]);
+    }
+
+    /*Compute scale*/
+    if(allow_scaling) sc=sqrt(db_SafeDivision(sp,s));
+    else sc=1.0;
+    *scale=sc;
+
+    /*Compute rotation*/
+    if(allow_rotation)
+    {
+        /*orientation preserving*/
+        Aacc=M[0]+M[3];
+        Bacc=M[2]-M[1];
+        /*orientation reversing*/
+        Aacc2=M[0]-M[3];
+        Bacc2=M[2]+M[1];
+        if(Aacc!=0.0 || Bacc!=0.0)
+        {
+            divisor=sqrt(Aacc*Aacc+Bacc*Bacc);
+            m=db_SafeReciprocal(divisor);
+            Am=Aacc*m;
+            Bm=Bacc*m;
+            R[0]=  Am;
+            R[1]=  Bm;
+            R[2]= -Bm;
+            R[3]=  Am;
+        }
+        else
+        {
+            db_Identity2x2(R);
+            divisor=0.0;
+        }
+        if(!orientation_preserving && (Aacc2!=0.0 || Bacc2!=0.0))
+        {
+            divisor2=sqrt(Aacc2*Aacc2+Bacc2*Bacc2);
+            if(divisor2>divisor)
+            {
+                m=db_SafeReciprocal(divisor2);
+                Am=Aacc2*m;
+                Bm=Bacc2*m;
+                R[0]=  Am;
+                R[1]=  Bm;
+                R[2]=  Bm;
+                R[3]= -Am;
+            }
+        }
+    }
+    else db_Identity2x2(R);
+
+    /*Compute translation*/
+    if(allow_translation)
+    {
+        t[0]=cp[0]-sc*(R[0]*c[0]+R[1]*c[1]);
+        t[1]=cp[1]-sc*(R[2]*c[0]+R[3]*c[1]);
+    }
+    else db_Zero2(t);
+}
+
+
diff --git a/benchmark/feature_stab/db_vlvm/db_image_homography.h b/benchmark/feature_stab/db_vlvm/db_image_homography.h
new file mode 100644
index 00000000..165447dd
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_image_homography.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_image_homography.h,v 1.2 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_IMAGE_HOMOGRAPHY
+#define DB_IMAGE_HOMOGRAPHY
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+#include "db_framestitching.h"
+/*!
+ * \defgroup LMImageHomography (LM) Image Homography Estimation (feature based)
+ */
+/*\{*/
+/*!
+Solve for projective H such that xp~Hx. Prior normalization is not necessary,
+although desirable for numerical conditioning
+\param H    image projective (out)
+\param x1   image 1 point 1
+\param x2   image 1 point 2
+\param x3   image 1 point 3
+\param x4   image 1 point 4
+\param xp1  image 2 point 1
+\param xp2  image 2 point 2
+\param xp3  image 2 point 3
+\param xp4  image 2 point 4
+*/
+DB_API void db_StitchProjective2D_4Points(double H[9],
+                                      double x1[3],double x2[3],double x3[3],double x4[3],
+                                      double xp1[3],double xp2[3],double xp3[3],double xp4[3]);
+
+/*!
+Solve for affine H such that xp~Hx. Prior normalization is not necessary,
+although desirable for numerical conditioning
+\param H    image projective (out)
+\param x1   image 1 point 1
+\param x2   image 1 point 2
+\param x3   image 1 point 3
+\param xp1  image 2 point 1
+\param xp2  image 2 point 2
+\param xp3  image 2 point 3
+*/
+DB_API void db_StitchAffine2D_3Points(double H[9],
+                                      double x1[3],double x2[3],double x3[3],
+                                      double xp1[3],double xp2[3],double xp3[3]);
+
+/*!
+Solve for rotation R such that xp~Rx.
+Image points have to be of unit norm for the least squares to be meaningful.
+\param R    image rotation (out)
+\param x1   image 1 point 1
+\param x2   image 1 point 2
+\param xp1  image 2 point 1
+\param xp2  image 2 point 2
+*/
+inline void db_StitchCameraRotation_2Points(double R[9],
+                                            /*Image points have to be of unit norm
+                                            for the least squares to be meaningful*/
+                                            double x1[3],double x2[3],
+                                            double xp1[3],double xp2[3])
+{
+    double* x[2];
+    double* xp[2];
+    double scale,t[3];
+
+    x[0]=x1;
+    x[1]=x2;
+    xp[0]=xp1;
+    xp[1]=xp2;
+    db_StitchSimilarity3DRaw(&scale,R,t,xp,x,2,1,0,1,0);
+}
+
+/*!
+Solve for a homography H generated by a rotation R with a common unknown focal length f, i.e.
+H=diag(f,f,1)*R*diag(1/f,1/f,1) such that xp~Hx.
+If signed_disambiguation is true, the points are
+required to be in front of the camera. No specific normalization of the homogenous points
+is required, although it could be desirable to keep x1,x2,xp1 and xp2 of reasonable magnitude.
+If a solution is obtained the function returns 1, otherwise 0. If the focal length is desired
+a valid pointer should be passed in f
+*/
+DB_API int db_StitchRotationCommonFocalLength_3Points(double H[9],double x1[3],double x2[3],double x3[3],
+                                                      double xp1[3],double xp2[3],double xp3[3],double *f=0,int signed_disambiguation=1);
+
+/*!
+Find scale, rotation and translation of the similarity that
+takes the nr_points inhomogenous 2D points X to Xp,
+i.e. for the homogenous equivalents
+Xp and X we would have
+\code
+Xp~
+[sR t]*X
+[0  1]
+\endcode
+If orientation_preserving is true, R is restricted such that det(R)>0.
+allow_scaling, allow_rotation and allow_translation allow s,R and t
+to differ from 1,Identity and 0
+
+Full similarity takes the following on 550MHz:
+\code
+0.9 microseconds with       2 points
+1.0 microseconds with       3 points
+1.1 microseconds with       4 points
+1.3 microseconds with       5 points
+1.4 microseconds with       6 points
+1.7 microseconds with      10 points
+9   microseconds with     100 points
+130 microseconds with    1000 points
+1.3 milliseconds with   10000 points
+35  milliseconds with  100000 points
+350 milliseconds with 1000000 points
+\endcode
+
+Without orientation_preserving:
+\code
+3 points is minimal for (s,R,t) (R,t)
+2 points is minimal for (s,t) (s,R) (R)
+1 point is minimal for  (s) (t)
+\endcode
+
+With orientation_preserving:
+\code
+2 points is minimal for (s,R,t) (R,t) (s,t)
+1 point is minimal for (s,R) (R) (s) (t)
+\endcode
+\param scale        (out)
+\param R            2D rotation (out)
+\param t            2D translation (out)
+\param Xp           (nr_points x 2) pointer to array of image points
+\param X            (nr_points x 2 ) pointer to array of image points
+\param nr_points    number of points
+\param orientation_preserving
+\param allow_scaling    compute scale (if 0, scale=1)
+\param allow_rotation   compute rotation (if 0, R=[I])
+\param allow_translation compute translation (if 0 t = [0,0]')
+*/
+DB_API void db_StitchSimilarity2DRaw(double *scale,double R[4],double t[2],
+                            double **Xp,double **X,int nr_points,int orientation_preserving=1,
+                            int allow_scaling=1,int allow_rotation=1,int allow_translation=1);
+/*!
+See db_StitchRotationCommonFocalLength_3Points().
+\param H            Image similarity transformation (out)
+\param Xp           (nr_points x 2) pointer to array of image points
+\param X            (nr_points x 2) pointer to array of image points
+\param nr_points    number of points
+\param orientation_preserving
+\param allow_scaling    compute scale (if 0, scale=1)
+\param allow_rotation   compute rotation (if 0, R=[I])
+\param allow_translation compute translation (if 0 t = [0,0]')
+*/
+inline void db_StitchSimilarity2D(double H[9],double **Xp,double **X,int nr_points,int orientation_preserving=1,
+                                  int allow_scaling=1,int allow_rotation=1,int allow_translation=1)
+{
+    double s,R[4],t[2];
+
+    db_StitchSimilarity2DRaw(&s,R,t,Xp,X,nr_points,orientation_preserving,
+        allow_scaling,allow_rotation,allow_translation);
+
+    H[0]=s*R[0]; H[1]=s*R[1]; H[2]=t[0];
+    H[3]=s*R[2]; H[4]=s*R[3]; H[5]=t[1];
+    db_Zero2(H+6);
+    H[8]=1.0;
+}
+/*\}*/
+#endif /* DB_IMAGE_HOMOGRAPHY */
diff --git a/benchmark/feature_stab/db_vlvm/db_metrics.h b/benchmark/feature_stab/db_vlvm/db_metrics.h
new file mode 100644
index 00000000..6b95458f
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_metrics.h
@@ -0,0 +1,408 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_metrics.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_METRICS
+#define DB_METRICS
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+#include "db_utilities.h"
+/*!
+ * \defgroup LMMetrics (LM) Metrics
+ */
+/*\{*/
+
+
+
+
+/*!
+Compute function value fp and Jacobian J of robustifier given input value f*/
+inline void db_CauchyDerivative(double J[4],double fp[2],const double f[2],double one_over_scale2)
+{
+    double x2,y2,r,r2,r2s,one_over_r2,fu,r_fu,one_over_r_fu;
+    double one_plus_r2s,half_dfu_dx,half_dfu_dy,coeff,coeff2,coeff3;
+    int at_zero;
+
+    /*The robustifier takes the input (x,y) and makes a new
+    vector (xp,yp) where
+    xp=sqrt(log(1+(x^2+y^2)*one_over_scale2))*x/sqrt(x^2+y^2)
+    yp=sqrt(log(1+(x^2+y^2)*one_over_scale2))*y/sqrt(x^2+y^2)
+    The new vector has the property
+    xp^2+yp^2=log(1+(x^2+y^2)*one_over_scale2)
+    i.e. when it is square-summed it gives the robust
+    reprojection error
+    Define
+    r2=(x^2+y^2) and
+    r2s=r2*one_over_scale2
+    fu=log(1+r2s)/r2
+    then
+    xp=sqrt(fu)*x
+    yp=sqrt(fu)*y
+    and
+    d(r2)/dx=2x
+    d(r2)/dy=2y
+    and
+    dfu/dx=d(r2)/dx*(r2s/(1+r2s)-log(1+r2s))/(r2*r2)
+    dfu/dy=d(r2)/dy*(r2s/(1+r2s)-log(1+r2s))/(r2*r2)
+    and
+    d(xp)/dx=1/(2sqrt(fu))*(dfu/dx)*x+sqrt(fu)
+    d(xp)/dy=1/(2sqrt(fu))*(dfu/dy)*x
+    d(yp)/dx=1/(2sqrt(fu))*(dfu/dx)*y
+    d(yp)/dy=1/(2sqrt(fu))*(dfu/dy)*y+sqrt(fu)
+    */
+
+    x2=db_sqr(f[0]);
+    y2=db_sqr(f[1]);
+    r2=x2+y2;
+    r=sqrt(r2);
+
+    if(r2<=0.0) at_zero=1;
+    else
+    {
+        one_over_r2=1.0/r2;
+        r2s=r2*one_over_scale2;
+        one_plus_r2s=1.0+r2s;
+        fu=log(one_plus_r2s)*one_over_r2;
+        r_fu=sqrt(fu);
+        if(r_fu<=0.0) at_zero=1;
+        else
+        {
+            one_over_r_fu=1.0/r_fu;
+            fp[0]=r_fu*f[0];
+            fp[1]=r_fu*f[1];
+            /*r2s is always >= 0*/
+            coeff=(r2s/one_plus_r2s*one_over_r2-fu)*one_over_r2;
+            half_dfu_dx=f[0]*coeff;
+            half_dfu_dy=f[1]*coeff;
+            coeff2=one_over_r_fu*half_dfu_dx;
+            coeff3=one_over_r_fu*half_dfu_dy;
+
+            J[0]=coeff2*f[0]+r_fu;
+            J[1]=coeff3*f[0];
+            J[2]=coeff2*f[1];
+            J[3]=coeff3*f[1]+r_fu;
+            at_zero=0;
+        }
+    }
+    if(at_zero)
+    {
+        /*Close to zero the robustifying mapping
+        becomes identity*sqrt(one_over_scale2)*/
+        fp[0]=0.0;
+        fp[1]=0.0;
+        J[0]=sqrt(one_over_scale2);
+        J[1]=0.0;
+        J[2]=0.0;
+        J[3]=J[0];
+    }
+}
+
+inline double db_SquaredReprojectionErrorHomography(const double y[2],const double H[9],const double x[3])
+{
+    double x0,x1,x2,mult;
+    double sd;
+
+    x0=H[0]*x[0]+H[1]*x[1]+H[2]*x[2];
+    x1=H[3]*x[0]+H[4]*x[1]+H[5]*x[2];
+    x2=H[6]*x[0]+H[7]*x[1]+H[8]*x[2];
+    mult=1.0/((x2!=0.0)?x2:1.0);
+    sd=db_sqr((y[0]-x0*mult))+db_sqr((y[1]-x1*mult));
+
+    return(sd);
+}
+
+inline double db_SquaredInhomogenousHomographyError(const double y[2],const double H[9],const double x[2])
+{
+    double x0,x1,x2,mult;
+    double sd;
+
+    x0=H[0]*x[0]+H[1]*x[1]+H[2];
+    x1=H[3]*x[0]+H[4]*x[1]+H[5];
+    x2=H[6]*x[0]+H[7]*x[1]+H[8];
+    mult=1.0/((x2!=0.0)?x2:1.0);
+    sd=db_sqr((y[0]-x0*mult))+db_sqr((y[1]-x1*mult));
+
+    return(sd);
+}
+
+/*!
+Return a constant divided by likelihood of a Cauchy distributed
+reprojection error given the image point y, homography H, image point
+point x and the squared scale coefficient one_over_scale2=1.0/(scale*scale)
+where scale is the half width at half maximum (hWahM) of the
+Cauchy distribution*/
+inline double db_ExpCauchyInhomogenousHomographyError(const double y[2],const double H[9],const double x[2],
+                                                      double one_over_scale2)
+{
+    double sd;
+    sd=db_SquaredInhomogenousHomographyError(y,H,x);
+    return(1.0+sd*one_over_scale2);
+}
+
+/*!
+Compute residual vector f between image point y and homography Hx of
+image point x. Also compute Jacobian of f with respect
+to an update dx of H*/
+inline void db_DerivativeInhomHomographyError(double Jf_dx[18],double f[2],const double y[2],const double H[9],
+                                              const double x[2])
+{
+    double xh,yh,zh,mult,mult2,xh_mult2,yh_mult2;
+    /*The Jacobian of the inhomogenous coordinates with respect to
+    the homogenous is
+    [1/zh  0  -xh/(zh*zh)]
+    [ 0  1/zh -yh/(zh*zh)]
+    The Jacobian of the homogenous coordinates with respect to dH is
+    [x0 x1 1  0  0 0  0  0 0]
+    [ 0  0 0 x0 x1 1  0  0 0]
+    [ 0  0 0  0  0 0 x0 x1 1]
+    The output Jacobian is minus their product, i.e.
+    [-x0/zh -x1/zh -1/zh    0      0     0    x0*xh/(zh*zh) x1*xh/(zh*zh) xh/(zh*zh)]
+    [   0      0     0   -x0/zh -x1/zh -1/zh  x0*yh/(zh*zh) x1*yh/(zh*zh) yh/(zh*zh)]*/
+
+    /*Compute warped point, which is the same as
+    homogenous coordinates of reprojection*/
+    xh=H[0]*x[0]+H[1]*x[1]+H[2];
+    yh=H[3]*x[0]+H[4]*x[1]+H[5];
+    zh=H[6]*x[0]+H[7]*x[1]+H[8];
+    mult=1.0/((zh!=0.0)?zh:1.0);
+    /*Compute inhomogenous residual*/
+    f[0]=y[0]-xh*mult;
+    f[1]=y[1]-yh*mult;
+    /*Compute Jacobian*/
+    mult2=mult*mult;
+    xh_mult2=xh*mult2;
+    yh_mult2=yh*mult2;
+    Jf_dx[0]= -x[0]*mult;
+    Jf_dx[1]= -x[1]*mult;
+    Jf_dx[2]= -mult;
+    Jf_dx[3]=0;
+    Jf_dx[4]=0;
+    Jf_dx[5]=0;
+    Jf_dx[6]=x[0]*xh_mult2;
+    Jf_dx[7]=x[1]*xh_mult2;
+    Jf_dx[8]=xh_mult2;
+    Jf_dx[9]=0;
+    Jf_dx[10]=0;
+    Jf_dx[11]=0;
+    Jf_dx[12]=Jf_dx[0];
+    Jf_dx[13]=Jf_dx[1];
+    Jf_dx[14]=Jf_dx[2];
+    Jf_dx[15]=x[0]*yh_mult2;
+    Jf_dx[16]=x[1]*yh_mult2;
+    Jf_dx[17]=yh_mult2;
+}
+
+/*!
+Compute robust residual vector f between image point y and homography Hx of
+image point x. Also compute Jacobian of f with respect
+to an update dH of H*/
+inline void db_DerivativeCauchyInhomHomographyReprojection(double Jf_dx[18],double f[2],const double y[2],const double H[9],
+                                                           const double x[2],double one_over_scale2)
+{
+    double Jf_dx_loc[18],f_loc[2];
+    double J[4],J0,J1,J2,J3;
+
+    /*Compute reprojection Jacobian*/
+    db_DerivativeInhomHomographyError(Jf_dx_loc,f_loc,y,H,x);
+    /*Compute robustifier Jacobian*/
+    db_CauchyDerivative(J,f,f_loc,one_over_scale2);
+
+    /*Multiply the robustifier Jacobian with
+    the reprojection Jacobian*/
+    J0=J[0];J1=J[1];J2=J[2];J3=J[3];
+    Jf_dx[0]=J0*Jf_dx_loc[0];
+    Jf_dx[1]=J0*Jf_dx_loc[1];
+    Jf_dx[2]=J0*Jf_dx_loc[2];
+    Jf_dx[3]=                J1*Jf_dx_loc[12];
+    Jf_dx[4]=                J1*Jf_dx_loc[13];
+    Jf_dx[5]=                J1*Jf_dx_loc[14];
+    Jf_dx[6]=J0*Jf_dx_loc[6]+J1*Jf_dx_loc[15];
+    Jf_dx[7]=J0*Jf_dx_loc[7]+J1*Jf_dx_loc[16];
+    Jf_dx[8]=J0*Jf_dx_loc[8]+J1*Jf_dx_loc[17];
+    Jf_dx[9]= J2*Jf_dx_loc[0];
+    Jf_dx[10]=J2*Jf_dx_loc[1];
+    Jf_dx[11]=J2*Jf_dx_loc[2];
+    Jf_dx[12]=                J3*Jf_dx_loc[12];
+    Jf_dx[13]=                J3*Jf_dx_loc[13];
+    Jf_dx[14]=                J3*Jf_dx_loc[14];
+    Jf_dx[15]=J2*Jf_dx_loc[6]+J3*Jf_dx_loc[15];
+    Jf_dx[16]=J2*Jf_dx_loc[7]+J3*Jf_dx_loc[16];
+    Jf_dx[17]=J2*Jf_dx_loc[8]+J3*Jf_dx_loc[17];
+}
+/*!
+Compute residual vector f between image point y and rotation of
+image point x by R. Also compute Jacobian of f with respect
+to an update dx of R*/
+inline void db_DerivativeInhomRotationReprojection(double Jf_dx[6],double f[2],const double y[2],const double R[9],
+                                                   const double x[2])
+{
+    double xh,yh,zh,mult,mult2,xh_mult2,yh_mult2;
+    /*The Jacobian of the inhomogenous coordinates with respect to
+    the homogenous is
+    [1/zh  0  -xh/(zh*zh)]
+    [ 0  1/zh -yh/(zh*zh)]
+    The Jacobian at zero of the homogenous coordinates with respect to
+    [sin(phi) sin(ohm) sin(kap)] is
+    [-rx2   0   rx1 ]
+    [  0   rx2 -rx0 ]
+    [ rx0 -rx1   0  ]
+    The output Jacobian is minus their product, i.e.
+    [1+xh*xh/(zh*zh) -xh*yh/(zh*zh)   -yh/zh]
+    [xh*yh/(zh*zh)   -1-yh*yh/(zh*zh)  xh/zh]*/
+
+    /*Compute rotated point, which is the same as
+    homogenous coordinates of reprojection*/
+    xh=R[0]*x[0]+R[1]*x[1]+R[2];
+    yh=R[3]*x[0]+R[4]*x[1]+R[5];
+    zh=R[6]*x[0]+R[7]*x[1]+R[8];
+    mult=1.0/((zh!=0.0)?zh:1.0);
+    /*Compute inhomogenous residual*/
+    f[0]=y[0]-xh*mult;
+    f[1]=y[1]-yh*mult;
+    /*Compute Jacobian*/
+    mult2=mult*mult;
+    xh_mult2=xh*mult2;
+    yh_mult2=yh*mult2;
+    Jf_dx[0]= 1.0+xh*xh_mult2;
+    Jf_dx[1]= -yh*xh_mult2;
+    Jf_dx[2]= -yh*mult;
+    Jf_dx[3]= -Jf_dx[1];
+    Jf_dx[4]= -1-yh*yh_mult2;
+    Jf_dx[5]= xh*mult;
+}
+
+/*!
+Compute robust residual vector f between image point y and rotation of
+image point x by R. Also compute Jacobian of f with respect
+to an update dx of R*/
+inline void db_DerivativeCauchyInhomRotationReprojection(double Jf_dx[6],double f[2],const double y[2],const double R[9],
+                                                         const double x[2],double one_over_scale2)
+{
+    double Jf_dx_loc[6],f_loc[2];
+    double J[4],J0,J1,J2,J3;
+
+    /*Compute reprojection Jacobian*/
+    db_DerivativeInhomRotationReprojection(Jf_dx_loc,f_loc,y,R,x);
+    /*Compute robustifier Jacobian*/
+    db_CauchyDerivative(J,f,f_loc,one_over_scale2);
+
+    /*Multiply the robustifier Jacobian with
+    the reprojection Jacobian*/
+    J0=J[0];J1=J[1];J2=J[2];J3=J[3];
+    Jf_dx[0]=J0*Jf_dx_loc[0]+J1*Jf_dx_loc[3];
+    Jf_dx[1]=J0*Jf_dx_loc[1]+J1*Jf_dx_loc[4];
+    Jf_dx[2]=J0*Jf_dx_loc[2]+J1*Jf_dx_loc[5];
+    Jf_dx[3]=J2*Jf_dx_loc[0]+J3*Jf_dx_loc[3];
+    Jf_dx[4]=J2*Jf_dx_loc[1]+J3*Jf_dx_loc[4];
+    Jf_dx[5]=J2*Jf_dx_loc[2]+J3*Jf_dx_loc[5];
+}
+
+
+
+/*!
+// remove the outliers whose projection error is larger than pre-defined
+*/
+inline int db_RemoveOutliers_Homography(const double H[9], double *x_i,double *xp_i, double *wp,double *im, double *im_p, double *im_r, double *im_raw,double *im_raw_p,int point_count,double scale, double thresh=DB_OUTLIER_THRESHOLD)
+{
+    double temp_valueE, t2;
+    int c;
+    int k1=0;
+    int k2=0;
+    int k3=0;
+    int numinliers=0;
+    int ind1;
+    int ind2;
+    int ind3;
+    int isinlier;
+
+    // experimentally determined
+    t2=1.0/(thresh*thresh*thresh*thresh);
+
+    // count the inliers
+    for(c=0;c<point_count;c++)
+    {
+        ind1=c<<1;
+        ind2=c<<2;
+        ind3=3*c;
+
+        temp_valueE=db_SquaredInhomogenousHomographyError(im_p+ind3,H,im+ind3);
+
+        isinlier=((temp_valueE<=t2)?1:0);
+
+        // if it is inlier, then copy the 3d and 2d correspondences
+        if (isinlier)
+        {
+            numinliers++;
+
+            x_i[k1]=x_i[ind1];
+            x_i[k1+1]=x_i[ind1+1];
+
+            xp_i[k1]=xp_i[ind1];
+            xp_i[k1+1]=xp_i[ind1+1];
+
+            k1=k1+2;
+
+            // original normalized pixel coordinates
+            im[k3]=im[ind3];
+            im[k3+1]=im[ind3+1];
+            im[k3+2]=im[ind3+2];
+
+            im_r[k3]=im_r[ind3];
+            im_r[k3+1]=im_r[ind3+1];
+            im_r[k3+2]=im_r[ind3+2];
+
+            im_p[k3]=im_p[ind3];
+            im_p[k3+1]=im_p[ind3+1];
+            im_p[k3+2]=im_p[ind3+2];
+
+            // left and right raw pixel coordinates
+            im_raw[k3] = im_raw[ind3];
+            im_raw[k3+1] = im_raw[ind3+1];
+            im_raw[k3+2] = im_raw[ind3+2]; // the index
+
+            im_raw_p[k3] = im_raw_p[ind3];
+            im_raw_p[k3+1] = im_raw_p[ind3+1];
+            im_raw_p[k3+2] = im_raw_p[ind3+2]; // the index
+
+            k3=k3+3;
+
+            // 3D coordinates
+            wp[k2]=wp[ind2];
+            wp[k2+1]=wp[ind2+1];
+            wp[k2+2]=wp[ind2+2];
+            wp[k2+3]=wp[ind2+3];
+
+            k2=k2+4;
+
+        }
+    }
+
+    return numinliers;
+}
+
+
+
+
+
+/*\}*/
+
+#endif /* DB_METRICS */
diff --git a/benchmark/feature_stab/db_vlvm/db_rob_image_homography.cpp b/benchmark/feature_stab/db_vlvm/db_rob_image_homography.cpp
new file mode 100644
index 00000000..82dec0cb
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_rob_image_homography.cpp
@@ -0,0 +1,1082 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_rob_image_homography.cpp,v 1.2 2011/06/17 14:03:31 mbansal Exp $ */
+
+#include "db_utilities.h"
+#include "db_rob_image_homography.h"
+#include "db_bundle.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+#include "db_image_homography.h"
+
+#ifdef _VERBOSE_
+#include <iostream>
+using namespace std;
+#endif /*VERBOSE*/
+
+inline double db_RobImageHomography_Cost(double H[9],int point_count,double *x_i,double *xp_i,double one_over_scale2)
+{
+    int c;
+    double back,acc,*x_i_temp,*xp_i_temp;
+
+    for(back=0.0,c=0;c<point_count;)
+    {
+        /*Take log of product of ten reprojection
+        errors to reduce nr of expensive log operations*/
+        if(c+9<point_count)
+        {
+            x_i_temp=x_i+(c<<1);
+            xp_i_temp=xp_i+(c<<1);
+
+            acc=db_ExpCauchyInhomogenousHomographyError(xp_i_temp,H,x_i_temp,one_over_scale2);
+            acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+2,H,x_i_temp+2,one_over_scale2);
+            acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+4,H,x_i_temp+4,one_over_scale2);
+            acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+6,H,x_i_temp+6,one_over_scale2);
+            acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+8,H,x_i_temp+8,one_over_scale2);
+            acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+10,H,x_i_temp+10,one_over_scale2);
+            acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+12,H,x_i_temp+12,one_over_scale2);
+            acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+14,H,x_i_temp+14,one_over_scale2);
+            acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+16,H,x_i_temp+16,one_over_scale2);
+            acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+18,H,x_i_temp+18,one_over_scale2);
+            c+=10;
+        }
+        else
+        {
+            for(acc=1.0;c<point_count;c++)
+            {
+                acc*=db_ExpCauchyInhomogenousHomographyError(xp_i+(c<<1),H,x_i+(c<<1),one_over_scale2);
+            }
+        }
+        back+=log(acc);
+    }
+    return(back);
+}
+
+inline double db_RobImageHomography_Statistics(double H[9],int point_count,double *x_i,double *xp_i,double one_over_scale2,db_Statistics *stat,double thresh=DB_OUTLIER_THRESHOLD)
+{
+    int c,i;
+    double t2,frac;
+
+    t2=thresh*thresh;
+    for(i=0,c=0;c<point_count;c++)
+    {
+        i+=(db_SquaredInhomogenousHomographyError(xp_i+(c<<1),H,x_i+(c<<1))*one_over_scale2<=t2)?1:0;
+    }
+    frac=((double)i)/((double)(db_maxi(point_count,1)));
+
+#ifdef _VERBOSE_
+    std::cout << "Inlier Percentage RobImageHomography: " << frac*100.0 << "% out of " << point_count << " constraints" << std::endl;
+#endif /*_VERBOSE_*/
+
+    if(stat)
+    {
+        stat->nr_points=point_count;
+        stat->one_over_scale2=one_over_scale2;
+        stat->nr_inliers=i;
+        stat->inlier_fraction=frac;
+
+        stat->cost=db_RobImageHomography_Cost(H,point_count,x_i,xp_i,one_over_scale2);
+        stat->model_dimension=0;
+        /*stat->nr_parameters=;*/
+
+        stat->lambda1=log(4.0);
+        stat->lambda2=log(4.0*((double)db_maxi(1,stat->nr_points)));
+        stat->lambda3=10.0;
+        stat->gric=stat->cost+stat->lambda1*stat->model_dimension*((double)stat->nr_points)+stat->lambda2*((double)stat->nr_parameters);
+        stat->inlier_evidence=((double)stat->nr_inliers)-stat->lambda3*((double)stat->nr_parameters);
+    }
+
+    return(frac);
+}
+
+/*Compute min_Jtf and upper right of JtJ. Return cost.*/
+inline double db_RobImageHomography_Jacobians(double JtJ[81],double min_Jtf[9],double H[9],int point_count,double *x_i,double *xp_i,double one_over_scale2)
+{
+    double back,Jf_dx[18],f[2],temp,temp2;
+    int i;
+
+    db_Zero(JtJ,81);
+    db_Zero(min_Jtf,9);
+    for(back=0.0,i=0;i<point_count;i++)
+    {
+        /*Compute reprojection error vector and its Jacobian
+        for this point*/
+        db_DerivativeCauchyInhomHomographyReprojection(Jf_dx,f,xp_i+(i<<1),H,x_i+(i<<1),one_over_scale2);
+        /*Perform
+        min_Jtf-=Jf_dx*f[0] and
+        min_Jtf-=(Jf_dx+9)*f[1] to accumulate -Jt%f*/
+        db_RowOperation9(min_Jtf,Jf_dx,f[0]);
+        db_RowOperation9(min_Jtf,Jf_dx+9,f[1]);
+        /*Accumulate upper right of JtJ with outer product*/
+        temp=Jf_dx[0]; temp2=Jf_dx[9];
+        JtJ[0]+=temp*Jf_dx[0]+temp2*Jf_dx[9];
+        JtJ[1]+=temp*Jf_dx[1]+temp2*Jf_dx[10];
+        JtJ[2]+=temp*Jf_dx[2]+temp2*Jf_dx[11];
+        JtJ[3]+=temp*Jf_dx[3]+temp2*Jf_dx[12];
+        JtJ[4]+=temp*Jf_dx[4]+temp2*Jf_dx[13];
+        JtJ[5]+=temp*Jf_dx[5]+temp2*Jf_dx[14];
+        JtJ[6]+=temp*Jf_dx[6]+temp2*Jf_dx[15];
+        JtJ[7]+=temp*Jf_dx[7]+temp2*Jf_dx[16];
+        JtJ[8]+=temp*Jf_dx[8]+temp2*Jf_dx[17];
+        temp=Jf_dx[1]; temp2=Jf_dx[10];
+        JtJ[10]+=temp*Jf_dx[1]+temp2*Jf_dx[10];
+        JtJ[11]+=temp*Jf_dx[2]+temp2*Jf_dx[11];
+        JtJ[12]+=temp*Jf_dx[3]+temp2*Jf_dx[12];
+        JtJ[13]+=temp*Jf_dx[4]+temp2*Jf_dx[13];
+        JtJ[14]+=temp*Jf_dx[5]+temp2*Jf_dx[14];
+        JtJ[15]+=temp*Jf_dx[6]+temp2*Jf_dx[15];
+        JtJ[16]+=temp*Jf_dx[7]+temp2*Jf_dx[16];
+        JtJ[17]+=temp*Jf_dx[8]+temp2*Jf_dx[17];
+        temp=Jf_dx[2]; temp2=Jf_dx[11];
+        JtJ[20]+=temp*Jf_dx[2]+temp2*Jf_dx[11];
+        JtJ[21]+=temp*Jf_dx[3]+temp2*Jf_dx[12];
+        JtJ[22]+=temp*Jf_dx[4]+temp2*Jf_dx[13];
+        JtJ[23]+=temp*Jf_dx[5]+temp2*Jf_dx[14];
+        JtJ[24]+=temp*Jf_dx[6]+temp2*Jf_dx[15];
+        JtJ[25]+=temp*Jf_dx[7]+temp2*Jf_dx[16];
+        JtJ[26]+=temp*Jf_dx[8]+temp2*Jf_dx[17];
+        temp=Jf_dx[3]; temp2=Jf_dx[12];
+        JtJ[30]+=temp*Jf_dx[3]+temp2*Jf_dx[12];
+        JtJ[31]+=temp*Jf_dx[4]+temp2*Jf_dx[13];
+        JtJ[32]+=temp*Jf_dx[5]+temp2*Jf_dx[14];
+        JtJ[33]+=temp*Jf_dx[6]+temp2*Jf_dx[15];
+        JtJ[34]+=temp*Jf_dx[7]+temp2*Jf_dx[16];
+        JtJ[35]+=temp*Jf_dx[8]+temp2*Jf_dx[17];
+        temp=Jf_dx[4]; temp2=Jf_dx[13];
+        JtJ[40]+=temp*Jf_dx[4]+temp2*Jf_dx[13];
+        JtJ[41]+=temp*Jf_dx[5]+temp2*Jf_dx[14];
+        JtJ[42]+=temp*Jf_dx[6]+temp2*Jf_dx[15];
+        JtJ[43]+=temp*Jf_dx[7]+temp2*Jf_dx[16];
+        JtJ[44]+=temp*Jf_dx[8]+temp2*Jf_dx[17];
+        temp=Jf_dx[5]; temp2=Jf_dx[14];
+        JtJ[50]+=temp*Jf_dx[5]+temp2*Jf_dx[14];
+        JtJ[51]+=temp*Jf_dx[6]+temp2*Jf_dx[15];
+        JtJ[52]+=temp*Jf_dx[7]+temp2*Jf_dx[16];
+        JtJ[53]+=temp*Jf_dx[8]+temp2*Jf_dx[17];
+        temp=Jf_dx[6]; temp2=Jf_dx[15];
+        JtJ[60]+=temp*Jf_dx[6]+temp2*Jf_dx[15];
+        JtJ[61]+=temp*Jf_dx[7]+temp2*Jf_dx[16];
+        JtJ[62]+=temp*Jf_dx[8]+temp2*Jf_dx[17];
+        temp=Jf_dx[7]; temp2=Jf_dx[16];
+        JtJ[70]+=temp*Jf_dx[7]+temp2*Jf_dx[16];
+        JtJ[71]+=temp*Jf_dx[8]+temp2*Jf_dx[17];
+        temp=Jf_dx[8]; temp2=Jf_dx[17];
+        JtJ[80]+=temp*Jf_dx[8]+temp2*Jf_dx[17];
+
+        /*Add square-sum to cost*/
+        back+=db_sqr(f[0])+db_sqr(f[1]);
+    }
+
+    return(back);
+}
+
+/*Compute min_Jtf and upper right of JtJ. Return cost*/
+inline double db_RobCamRotation_Jacobians(double JtJ[9],double min_Jtf[3],double H[9],int point_count,double *x_i,double *xp_i,double one_over_scale2)
+{
+    double back,Jf_dx[6],f[2];
+    int i,j;
+
+    db_Zero(JtJ,9);
+    db_Zero(min_Jtf,3);
+    for(back=0.0,i=0;i<point_count;i++)
+    {
+        /*Compute reprojection error vector and its Jacobian
+        for this point*/
+        j=(i<<1);
+        db_DerivativeCauchyInhomRotationReprojection(Jf_dx,f,xp_i+j,H,x_i+j,one_over_scale2);
+        /*Perform
+        min_Jtf-=Jf_dx*f[0] and
+        min_Jtf-=(Jf_dx+3)*f[1] to accumulate -Jt%f*/
+        db_RowOperation3(min_Jtf,Jf_dx,f[0]);
+        db_RowOperation3(min_Jtf,Jf_dx+3,f[1]);
+        /*Accumulate upper right of JtJ with outer product*/
+        JtJ[0]+=Jf_dx[0]*Jf_dx[0]+Jf_dx[3]*Jf_dx[3];
+        JtJ[1]+=Jf_dx[0]*Jf_dx[1]+Jf_dx[3]*Jf_dx[4];
+        JtJ[2]+=Jf_dx[0]*Jf_dx[2]+Jf_dx[3]*Jf_dx[5];
+        JtJ[4]+=Jf_dx[1]*Jf_dx[1]+Jf_dx[4]*Jf_dx[4];
+        JtJ[5]+=Jf_dx[1]*Jf_dx[2]+Jf_dx[4]*Jf_dx[5];
+        JtJ[8]+=Jf_dx[2]*Jf_dx[2]+Jf_dx[5]*Jf_dx[5];
+
+        /*Add square-sum to cost*/
+        back+=db_sqr(f[0])+db_sqr(f[1]);
+    }
+
+    return(back);
+}
+
+void db_RobCamRotation_Polish(double H[9],int point_count,double *x_i,double *xp_i,double one_over_scale2,
+                               int max_iterations,double improvement_requirement)
+{
+    int i,update,stop;
+    double lambda,cost,current_cost;
+    double JtJ[9],min_Jtf[3],dx[3],H_p_dx[9];
+
+    lambda=0.001;
+    for(update=1,stop=0,i=0;(stop<2) && (i<max_iterations);i++)
+    {
+        /*if first time since improvement, compute Jacobian and residual*/
+        if(update)
+        {
+            current_cost=db_RobCamRotation_Jacobians(JtJ,min_Jtf,H,point_count,x_i,xp_i,one_over_scale2);
+            update=0;
+        }
+
+#ifdef _VERBOSE_
+        /*std::cout << "Cost:" << current_cost << " ";*/
+#endif /*_VERBOSE_*/
+
+        /*Come up with a hypothesis dx
+        based on the current lambda*/
+        db_Compute_dx_3x3(dx,JtJ,min_Jtf,lambda);
+
+        /*Compute Cost(x+dx)*/
+        db_UpdateRotation(H_p_dx,H,dx);
+        cost=db_RobImageHomography_Cost(H_p_dx,point_count,x_i,xp_i,one_over_scale2);
+
+        /*Is there an improvement?*/
+        if(cost<current_cost)
+        {
+            /*improvement*/
+            if(current_cost-cost<current_cost*improvement_requirement) stop++;
+            else stop=0;
+            lambda*=0.1;
+            /*Move to the hypothesised position x+dx*/
+            current_cost=cost;
+            db_Copy9(H,H_p_dx);
+            db_OrthonormalizeRotation(H);
+            update=1;
+
+#ifdef _VERBOSE_
+        std::cout << "Step" << i << "Imp,Lambda=" << lambda << "Cost:" << current_cost << std::endl;
+#endif /*_VERBOSE_*/
+        }
+        else
+        {
+            /*no improvement*/
+            lambda*=10.0;
+            stop=0;
+        }
+    }
+}
+
+inline void db_RobImageHomographyFetchJacobian(double **JtJ_ref,double *min_Jtf,double **JtJ_temp_ref,double *min_Jtf_temp,int n,int *fetch_vector)
+{
+    int i,j,t;
+    double *t1,*t2;
+
+    for(i=0;i<n;i++)
+    {
+        t=fetch_vector[i];
+        min_Jtf[i]=min_Jtf_temp[t];
+        t1=JtJ_ref[i];
+        t2=JtJ_temp_ref[t];
+        for(j=i;j<n;j++)
+        {
+            t1[j]=t2[fetch_vector[j]];
+        }
+    }
+}
+
+inline void db_RobImageHomographyMultiplyJacobian(double **JtJ_ref,double *min_Jtf,double **JtJ_temp_ref,double *min_Jtf_temp,double **JE_dx_ref,int n)
+{
+    double JtJ_JE[72],*JtJ_JE_ref[9];
+
+    db_SetupMatrixRefs(JtJ_JE_ref,9,8,JtJ_JE);
+
+    db_SymmetricExtendUpperToLower(JtJ_temp_ref,9,9);
+    db_MultiplyMatricesAB(JtJ_JE_ref,JtJ_temp_ref,JE_dx_ref,9,9,n);
+    db_UpperMultiplyMatricesAtB(JtJ_ref,JE_dx_ref,JtJ_JE_ref,n,9,n);
+    db_MultiplyMatrixVectorAtb(min_Jtf,JE_dx_ref,min_Jtf_temp,n,9);
+}
+
+inline void db_RobImageHomographyJH_Js(double **JE_dx_ref,int j,double H[9])
+{
+    /*Update of upper 2x2 is multiplication by
+    [s 0][ cos(theta) sin(theta)]
+    [0 s][-sin(theta) cos(theta)]*/
+    JE_dx_ref[0][j]=H[0];
+    JE_dx_ref[1][j]=H[1];
+    JE_dx_ref[2][j]=0;
+    JE_dx_ref[3][j]=H[2];
+    JE_dx_ref[4][j]=H[3];
+    JE_dx_ref[5][j]=0;
+    JE_dx_ref[6][j]=0;
+    JE_dx_ref[7][j]=0;
+    JE_dx_ref[8][j]=0;
+}
+
+inline void db_RobImageHomographyJH_JR(double **JE_dx_ref,int j,double H[9])
+{
+    /*Update of upper 2x2 is multiplication by
+    [s 0][ cos(theta) sin(theta)]
+    [0 s][-sin(theta) cos(theta)]*/
+    JE_dx_ref[0][j]=  H[3];
+    JE_dx_ref[1][j]=  H[4];
+    JE_dx_ref[2][j]=0;
+    JE_dx_ref[3][j]= -H[0];
+    JE_dx_ref[4][j]= -H[1];
+    JE_dx_ref[5][j]=0;
+    JE_dx_ref[6][j]=0;
+    JE_dx_ref[7][j]=0;
+    JE_dx_ref[8][j]=0;
+}
+
+inline void db_RobImageHomographyJH_Jt(double **JE_dx_ref,int j,int k,double H[9])
+{
+    JE_dx_ref[0][j]=0;
+    JE_dx_ref[1][j]=0;
+    JE_dx_ref[2][j]=1.0;
+    JE_dx_ref[3][j]=0;
+    JE_dx_ref[4][j]=0;
+    JE_dx_ref[5][j]=0;
+    JE_dx_ref[6][j]=0;
+    JE_dx_ref[7][j]=0;
+    JE_dx_ref[8][j]=0;
+
+    JE_dx_ref[0][k]=0;
+    JE_dx_ref[1][k]=0;
+    JE_dx_ref[2][k]=0;
+    JE_dx_ref[3][k]=0;
+    JE_dx_ref[4][k]=0;
+    JE_dx_ref[5][k]=1.0;
+    JE_dx_ref[6][k]=0;
+    JE_dx_ref[7][k]=0;
+    JE_dx_ref[8][k]=0;
+}
+
+inline void db_RobImageHomographyJH_dRotFocal(double **JE_dx_ref,int j,int k,int l,int m,double H[9])
+{
+    double f,fi,fi2;
+    double R[9],J[9];
+
+    /*Updated matrix is diag(f+df,f+df)*dR*R*diag(1/(f+df),1/(f+df),1)*/
+    f=db_FocalAndRotFromCamRotFocalHomography(R,H);
+    fi=db_SafeReciprocal(f);
+    fi2=db_sqr(fi);
+    db_JacobianOfRotatedPointStride(J,R,3);
+    JE_dx_ref[0][j]=   J[0];
+    JE_dx_ref[1][j]=   J[1];
+    JE_dx_ref[2][j]=f* J[2];
+    JE_dx_ref[3][j]=   J[3];
+    JE_dx_ref[4][j]=   J[4];
+    JE_dx_ref[5][j]=f* J[5];
+    JE_dx_ref[6][j]=fi*J[6];
+    JE_dx_ref[7][j]=fi*J[7];
+    JE_dx_ref[8][j]=   J[8];
+    db_JacobianOfRotatedPointStride(J,R+1,3);
+    JE_dx_ref[0][k]=   J[0];
+    JE_dx_ref[1][k]=   J[1];
+    JE_dx_ref[2][k]=f* J[2];
+    JE_dx_ref[3][k]=   J[3];
+    JE_dx_ref[4][k]=   J[4];
+    JE_dx_ref[5][k]=f* J[5];
+    JE_dx_ref[6][k]=fi*J[6];
+    JE_dx_ref[7][k]=fi*J[7];
+    JE_dx_ref[8][k]=   J[8];
+    db_JacobianOfRotatedPointStride(J,R+2,3);
+    JE_dx_ref[0][l]=   J[0];
+    JE_dx_ref[1][l]=   J[1];
+    JE_dx_ref[2][l]=f* J[2];
+    JE_dx_ref[3][l]=   J[3];
+    JE_dx_ref[4][l]=   J[4];
+    JE_dx_ref[5][l]=f* J[5];
+    JE_dx_ref[6][l]=fi*J[6];
+    JE_dx_ref[7][l]=fi*J[7];
+    JE_dx_ref[8][l]=   J[8];
+
+    JE_dx_ref[0][m]=0;
+    JE_dx_ref[1][m]=0;
+    JE_dx_ref[2][m]=H[2];
+    JE_dx_ref[3][m]=0;
+    JE_dx_ref[4][m]=0;
+    JE_dx_ref[5][m]=H[5];
+    JE_dx_ref[6][m]= -fi2*H[6];
+    JE_dx_ref[7][m]= -fi2*H[7];
+    JE_dx_ref[8][m]=0;
+}
+
+inline double db_RobImageHomography_Jacobians_Generic(double *JtJ_ref[8],double min_Jtf[8],int *num_param,int *frozen_coord,double H[9],int point_count,double *x_i,double *xp_i,int homography_type,double one_over_scale2)
+{
+    double back;
+    int i,j,fetch_vector[8],n;
+    double JtJ_temp[81],min_Jtf_temp[9],JE_dx[72];
+    double *JE_dx_ref[9],*JtJ_temp_ref[9];
+
+    /*Compute cost and JtJ,min_Jtf with respect to H*/
+    back=db_RobImageHomography_Jacobians(JtJ_temp,min_Jtf_temp,H,point_count,x_i,xp_i,one_over_scale2);
+
+    /*Compute JtJ,min_Jtf with respect to the right parameters
+    The formulas are
+    JtJ=transpose(JE_dx)*JtJ*JE_dx and
+    min_Jtf=transpose(JE_dx)*min_Jtf,
+    where the 9xN matrix JE_dx is the Jacobian of H with respect
+    to the update*/
+    db_SetupMatrixRefs(JtJ_temp_ref,9,9,JtJ_temp);
+    db_SetupMatrixRefs(JE_dx_ref,9,8,JE_dx);
+    switch(homography_type)
+    {
+        case DB_HOMOGRAPHY_TYPE_SIMILARITY:
+        case DB_HOMOGRAPHY_TYPE_SIMILARITY_U:
+            n=4;
+            db_RobImageHomographyJH_Js(JE_dx_ref,0,H);
+            db_RobImageHomographyJH_JR(JE_dx_ref,1,H);
+            db_RobImageHomographyJH_Jt(JE_dx_ref,2,3,H);
+            db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n);
+            break;
+        case DB_HOMOGRAPHY_TYPE_ROTATION:
+        case DB_HOMOGRAPHY_TYPE_ROTATION_U:
+            n=1;
+            db_RobImageHomographyJH_JR(JE_dx_ref,0,H);
+            db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n);
+            break;
+        case DB_HOMOGRAPHY_TYPE_SCALING:
+            n=1;
+            db_RobImageHomographyJH_Js(JE_dx_ref,0,H);
+            db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n);
+            break;
+        case DB_HOMOGRAPHY_TYPE_S_T:
+            n=3;
+            db_RobImageHomographyJH_Js(JE_dx_ref,0,H);
+            db_RobImageHomographyJH_Jt(JE_dx_ref,1,2,H);
+            db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n);
+            break;
+        case DB_HOMOGRAPHY_TYPE_R_T:
+            n=3;
+            db_RobImageHomographyJH_JR(JE_dx_ref,0,H);
+            db_RobImageHomographyJH_Jt(JE_dx_ref,1,2,H);
+            db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n);
+            break;
+        case DB_HOMOGRAPHY_TYPE_R_S:
+            n=2;
+            db_RobImageHomographyJH_Js(JE_dx_ref,0,H);
+            db_RobImageHomographyJH_JR(JE_dx_ref,1,H);
+            db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n);
+            break;
+
+        case DB_HOMOGRAPHY_TYPE_TRANSLATION:
+            n=2;
+            fetch_vector[0]=2;
+            fetch_vector[1]=5;
+            db_RobImageHomographyFetchJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,n,fetch_vector);
+            break;
+        case DB_HOMOGRAPHY_TYPE_AFFINE:
+            n=6;
+            fetch_vector[0]=0;
+            fetch_vector[1]=1;
+            fetch_vector[2]=2;
+            fetch_vector[3]=3;
+            fetch_vector[4]=4;
+            fetch_vector[5]=5;
+            db_RobImageHomographyFetchJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,n,fetch_vector);
+            break;
+        case DB_HOMOGRAPHY_TYPE_PROJECTIVE:
+            n=8;
+            *frozen_coord=db_MaxAbsIndex9(H);
+            for(j=0,i=0;i<9;i++) if(i!=(*frozen_coord))
+            {
+                fetch_vector[j]=i;
+                j++;
+            }
+            db_RobImageHomographyFetchJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,n,fetch_vector);
+            break;
+        case DB_HOMOGRAPHY_TYPE_CAMROTATION_F:
+        case DB_HOMOGRAPHY_TYPE_CAMROTATION_F_UD:
+            n=4;
+            db_RobImageHomographyJH_dRotFocal(JE_dx_ref,0,1,2,3,H);
+            db_RobImageHomographyMultiplyJacobian(JtJ_ref,min_Jtf,JtJ_temp_ref,min_Jtf_temp,JE_dx_ref,n);
+            break;
+    }
+    *num_param=n;
+
+    return(back);
+}
+
+inline void db_ImageHomographyUpdateGeneric(double H_p_dx[9],double H[9],double *dx,int homography_type,int frozen_coord)
+{
+    switch(homography_type)
+    {
+        case DB_HOMOGRAPHY_TYPE_SIMILARITY:
+        case DB_HOMOGRAPHY_TYPE_SIMILARITY_U:
+            db_Copy9(H_p_dx,H);
+            db_MultiplyScaleOntoImageHomography(H,1.0+dx[0]);
+            db_MultiplyRotationOntoImageHomography(H,dx[1]);
+            H_p_dx[2]+=dx[2];
+            H_p_dx[5]+=dx[3];
+            break;
+        case DB_HOMOGRAPHY_TYPE_ROTATION:
+        case DB_HOMOGRAPHY_TYPE_ROTATION_U:
+            db_MultiplyRotationOntoImageHomography(H,dx[0]);
+            break;
+        case DB_HOMOGRAPHY_TYPE_SCALING:
+            db_MultiplyScaleOntoImageHomography(H,1.0+dx[0]);
+            break;
+        case DB_HOMOGRAPHY_TYPE_S_T:
+            db_Copy9(H_p_dx,H);
+            db_MultiplyScaleOntoImageHomography(H,1.0+dx[0]);
+            H_p_dx[2]+=dx[1];
+            H_p_dx[5]+=dx[2];
+            break;
+        case DB_HOMOGRAPHY_TYPE_R_T:
+            db_Copy9(H_p_dx,H);
+            db_MultiplyRotationOntoImageHomography(H,dx[0]);
+            H_p_dx[2]+=dx[1];
+            H_p_dx[5]+=dx[2];
+            break;
+        case DB_HOMOGRAPHY_TYPE_R_S:
+            db_Copy9(H_p_dx,H);
+            db_MultiplyScaleOntoImageHomography(H,1.0+dx[0]);
+            db_MultiplyRotationOntoImageHomography(H,dx[1]);
+            break;
+        case DB_HOMOGRAPHY_TYPE_TRANSLATION:
+            db_Copy9(H_p_dx,H);
+            H_p_dx[2]+=dx[0];
+            H_p_dx[5]+=dx[1];
+            break;
+        case DB_HOMOGRAPHY_TYPE_AFFINE:
+            db_UpdateImageHomographyAffine(H_p_dx,H,dx);
+            break;
+        case DB_HOMOGRAPHY_TYPE_PROJECTIVE:
+            db_UpdateImageHomographyProjective(H_p_dx,H,dx,frozen_coord);
+            break;
+        case DB_HOMOGRAPHY_TYPE_CAMROTATION_F:
+        case DB_HOMOGRAPHY_TYPE_CAMROTATION_F_UD:
+            db_UpdateRotFocalHomography(H_p_dx,H,dx);
+            break;
+    }
+}
+
+void db_RobCamRotation_Polish_Generic(double H[9],int point_count,int homography_type,double *x_i,double *xp_i,double one_over_scale2,
+                               int max_iterations,double improvement_requirement)
+{
+    int i,update,stop,n;
+    int frozen_coord = 0;
+    double lambda,cost,current_cost;
+    double JtJ[72],min_Jtf[9],dx[8],H_p_dx[9];
+    double *JtJ_ref[9],d[8];
+
+    lambda=0.001;
+    for(update=1,stop=0,i=0;(stop<2) && (i<max_iterations);i++)
+    {
+        /*if first time since improvement, compute Jacobian and residual*/
+        if(update)
+        {
+            db_SetupMatrixRefs(JtJ_ref,9,8,JtJ);
+            current_cost=db_RobImageHomography_Jacobians_Generic(JtJ_ref,min_Jtf,&n,&frozen_coord,H,point_count,x_i,xp_i,homography_type,one_over_scale2);
+            update=0;
+        }
+
+#ifdef _VERBOSE_
+        /*std::cout << "Cost:" << current_cost << " ";*/
+#endif /*_VERBOSE_*/
+
+        /*Come up with a hypothesis dx
+        based on the current lambda*/
+        db_Compute_dx(dx,JtJ_ref,min_Jtf,lambda,d,n);
+
+        /*Compute Cost(x+dx)*/
+        db_ImageHomographyUpdateGeneric(H_p_dx,H,dx,homography_type,frozen_coord);
+        cost=db_RobImageHomography_Cost(H_p_dx,point_count,x_i,xp_i,one_over_scale2);
+
+        /*Is there an improvement?*/
+        if(cost<current_cost)
+        {
+            /*improvement*/
+            if(current_cost-cost<current_cost*improvement_requirement) stop++;
+            else stop=0;
+            lambda*=0.1;
+            /*Move to the hypothesised position x+dx*/
+            current_cost=cost;
+            db_Copy9(H,H_p_dx);
+            update=1;
+
+#ifdef _VERBOSE_
+        std::cout << "Step" << i << "Imp,Lambda=" << lambda << "Cost:" << current_cost << std::endl;
+#endif /*_VERBOSE_*/
+        }
+        else
+        {
+            /*no improvement*/
+            lambda*=10.0;
+            stop=0;
+        }
+    }
+}
+void db_RobImageHomography(
+                              /*Best homography*/
+                              double H[9],
+                              /*2DPoint to 2DPoint constraints
+                              Points are assumed to be given in
+                              homogenous coordinates*/
+                              double *im, double *im_p,
+                              /*Nr of points in total*/
+                              int nr_points,
+                              /*Calibration matrices
+                              used to normalize the points*/
+                              double K[9],
+                              double Kp[9],
+                              /*Pre-allocated space temp_d
+                              should point to at least
+                              12*nr_samples+10*nr_points
+                              allocated positions*/
+                              double *temp_d,
+                              /*Pre-allocated space temp_i
+                              should point to at least
+                              max(nr_samples,nr_points)
+                              allocated positions*/
+                              int *temp_i,
+                              int homography_type,
+                              db_Statistics *stat,
+                              int max_iterations,
+                              int max_points,
+                              double scale,
+                              int nr_samples,
+                              int chunk_size,
+                              /////////////////////////////////////////////
+                              // regular use: set outlierremoveflagE =0;
+                              // flag for the outlier removal
+                              int outlierremoveflagE,
+                              // if flag is 1, then the following variables
+                              // need the input
+                              //////////////////////////////////////
+                              // 3D coordinates
+                              double *wp,
+                              // its corresponding stereo pair's points
+                              double *im_r,
+                              // raw image coordinates
+                              double *im_raw, double *im_raw_p,
+                              // final matches
+                              int *finalNumE)
+{
+    /*Random seed*/
+    int r_seed;
+
+    int point_count_new;
+    /*Counters*/
+    int i,j,c,point_count,hyp_count;
+    int last_hyp,new_last_hyp,last_corr;
+    int pos,point_pos,last_point;
+    /*Accumulator*/
+    double acc;
+    /*Hypothesis pointer*/
+    double *hyp_point;
+    /*Random sample*/
+    int s[4];
+    /*Pivot for hypothesis pruning*/
+    double pivot;
+    /*Best hypothesis position*/
+    int best_pos;
+    /*Best score*/
+    double lowest_cost;
+    /*One over the squared scale of
+    Cauchy distribution*/
+    double one_over_scale2;
+    /*temporary pointers*/
+    double *x_i_temp,*xp_i_temp;
+    /*Temporary space for inverse calibration matrices*/
+    double K_inv[9];
+    double Kp_inv[9];
+    /*Temporary space for homography*/
+    double H_temp[9],H_temp2[9];
+    /*Pointers to homogenous coordinates*/
+    double *x_h_point,*xp_h_point;
+    /*Array of pointers to inhomogenous coordinates*/
+    double *X[3],*Xp[3];
+    /*Similarity parameters*/
+    int orientation_preserving,allow_scaling,allow_rotation,allow_translation,sample_size;
+
+    /*Homogenous coordinates of image points in first image*/
+    double *x_h;
+    /*Homogenous coordinates of image points in second image*/
+    double *xp_h;
+    /*Inhomogenous coordinates of image points in first image*/
+    double *x_i;
+    /*Inhomogenous coordinates of image points in second image*/
+    double *xp_i;
+    /*Homography hypotheses*/
+    double *hyp_H_array;
+    /*Cost array*/
+    double *hyp_cost_array;
+    /*Permutation of the hypotheses*/
+    int *hyp_perm;
+    /*Sample of the points*/
+    int *point_perm;
+    /*Temporary space for quick-select
+    2*nr_samples*/
+    double *temp_select;
+
+    /*Get inverse calibration matrices*/
+    db_InvertCalibrationMatrix(K_inv,K);
+    db_InvertCalibrationMatrix(Kp_inv,Kp);
+    /*Compute scale coefficient*/
+    one_over_scale2=1.0/(scale*scale);
+    /*Initialize random seed*/
+    r_seed=12345;
+    /*Set pointers to pre-allocated space*/
+    hyp_cost_array=temp_d;
+    hyp_H_array=temp_d+nr_samples;
+    temp_select=temp_d+10*nr_samples;
+    x_h=temp_d+12*nr_samples;
+    xp_h=temp_d+12*nr_samples+3*nr_points;
+    x_i=temp_d+12*nr_samples+6*nr_points;
+    xp_i=temp_d+12*nr_samples+8*nr_points;
+    hyp_perm=temp_i;
+    point_perm=temp_i;
+
+    /*Prepare a randomly permuted subset of size
+    point_count from the input points*/
+
+    point_count=db_mini(nr_points,(int)(chunk_size*log((double)nr_samples)/DB_LN2));
+
+    point_count_new = point_count;
+
+    for(i=0;i<nr_points;i++) point_perm[i]=i;
+
+    for(last_point=nr_points-1,i=0;i<point_count;i++,last_point--)
+    {
+        pos=db_RandomInt(r_seed,last_point);
+        point_pos=point_perm[pos];
+        point_perm[pos]=point_perm[last_point];
+
+        /*Normalize image points with calibration
+        matrices and move them to x_h and xp_h*/
+        c=3*point_pos;
+        j=3*i;
+        x_h_point=x_h+j;
+        xp_h_point=xp_h+j;
+        db_Multiply3x3_3x1(x_h_point,K_inv,im+c);
+        db_Multiply3x3_3x1(xp_h_point,Kp_inv,im_p+c);
+
+        db_HomogenousNormalize3(x_h_point);
+        db_HomogenousNormalize3(xp_h_point);
+
+        /*Dehomogenize image points and move them
+        to x_i and xp_i*/
+        c=(i<<1);
+        db_DeHomogenizeImagePoint(x_i+c,x_h_point); // 2-dimension
+        db_DeHomogenizeImagePoint(xp_i+c,xp_h_point); //2-dimension
+    }
+
+
+    /*Generate Hypotheses*/
+    hyp_count=0;
+    switch(homography_type)
+    {
+    case DB_HOMOGRAPHY_TYPE_SIMILARITY:
+    case DB_HOMOGRAPHY_TYPE_SIMILARITY_U:
+    case DB_HOMOGRAPHY_TYPE_TRANSLATION:
+    case DB_HOMOGRAPHY_TYPE_ROTATION:
+    case DB_HOMOGRAPHY_TYPE_ROTATION_U:
+    case DB_HOMOGRAPHY_TYPE_SCALING:
+    case DB_HOMOGRAPHY_TYPE_S_T:
+    case DB_HOMOGRAPHY_TYPE_R_T:
+    case DB_HOMOGRAPHY_TYPE_R_S:
+
+        switch(homography_type)
+        {
+        case DB_HOMOGRAPHY_TYPE_SIMILARITY:
+            orientation_preserving=1;
+            allow_scaling=1;
+            allow_rotation=1;
+            allow_translation=1;
+            sample_size=2;
+            break;
+        case DB_HOMOGRAPHY_TYPE_SIMILARITY_U:
+            orientation_preserving=0;
+            allow_scaling=1;
+            allow_rotation=1;
+            allow_translation=1;
+            sample_size=3;
+            break;
+        case DB_HOMOGRAPHY_TYPE_TRANSLATION:
+            orientation_preserving=1;
+            allow_scaling=0;
+            allow_rotation=0;
+            allow_translation=1;
+            sample_size=1;
+            break;
+        case DB_HOMOGRAPHY_TYPE_ROTATION:
+            orientation_preserving=1;
+            allow_scaling=0;
+            allow_rotation=1;
+            allow_translation=0;
+            sample_size=1;
+            break;
+        case DB_HOMOGRAPHY_TYPE_ROTATION_U:
+            orientation_preserving=0;
+            allow_scaling=0;
+            allow_rotation=1;
+            allow_translation=0;
+            sample_size=2;
+            break;
+        case DB_HOMOGRAPHY_TYPE_SCALING:
+            orientation_preserving=1;
+            allow_scaling=1;
+            allow_rotation=0;
+            allow_translation=0;
+            sample_size=1;
+            break;
+        case DB_HOMOGRAPHY_TYPE_S_T:
+            orientation_preserving=1;
+            allow_scaling=1;
+            allow_rotation=0;
+            allow_translation=1;
+            sample_size=2;
+            break;
+        case DB_HOMOGRAPHY_TYPE_R_T:
+            orientation_preserving=1;
+            allow_scaling=0;
+            allow_rotation=1;
+            allow_translation=1;
+            sample_size=2;
+            break;
+        case DB_HOMOGRAPHY_TYPE_R_S:
+            orientation_preserving=1;
+            allow_scaling=1;
+            allow_rotation=0;
+            allow_translation=0;
+            sample_size=1;
+            break;
+        }
+
+        if(point_count>=sample_size) for(i=0;i<nr_samples;i++)
+        {
+            db_RandomSample(s,3,point_count,r_seed);
+            X[0]= &x_i[s[0]<<1];
+            X[1]= &x_i[s[1]<<1];
+            X[2]= &x_i[s[2]<<1];
+            Xp[0]= &xp_i[s[0]<<1];
+            Xp[1]= &xp_i[s[1]<<1];
+            Xp[2]= &xp_i[s[2]<<1];
+            db_StitchSimilarity2D(&hyp_H_array[9*hyp_count],Xp,X,sample_size,orientation_preserving,
+                                  allow_scaling,allow_rotation,allow_translation);
+            hyp_count++;
+        }
+        break;
+
+    case DB_HOMOGRAPHY_TYPE_CAMROTATION:
+        if(point_count>=2) for(i=0;i<nr_samples;i++)
+        {
+            db_RandomSample(s,2,point_count,r_seed);
+            db_StitchCameraRotation_2Points(&hyp_H_array[9*hyp_count],
+                                      &x_h[3*s[0]],&x_h[3*s[1]],
+                                      &xp_h[3*s[0]],&xp_h[3*s[1]]);
+            hyp_count++;
+        }
+        break;
+
+    case DB_HOMOGRAPHY_TYPE_CAMROTATION_F:
+        if(point_count>=3) for(i=0;i<nr_samples;i++)
+        {
+            db_RandomSample(s,3,point_count,r_seed);
+            hyp_count+=db_StitchRotationCommonFocalLength_3Points(&hyp_H_array[9*hyp_count],
+                                      &x_h[3*s[0]],&x_h[3*s[1]],&x_h[3*s[2]],
+                                      &xp_h[3*s[0]],&xp_h[3*s[1]],&xp_h[3*s[2]]);
+        }
+        break;
+
+    case DB_HOMOGRAPHY_TYPE_CAMROTATION_F_UD:
+        if(point_count>=3) for(i=0;i<nr_samples;i++)
+        {
+            db_RandomSample(s,3,point_count,r_seed);
+            hyp_count+=db_StitchRotationCommonFocalLength_3Points(&hyp_H_array[9*hyp_count],
+                                      &x_h[3*s[0]],&x_h[3*s[1]],&x_h[3*s[2]],
+                                      &xp_h[3*s[0]],&xp_h[3*s[1]],&xp_h[3*s[2]],NULL,0);
+        }
+        break;
+
+    case DB_HOMOGRAPHY_TYPE_AFFINE:
+        if(point_count>=3) for(i=0;i<nr_samples;i++)
+        {
+            db_RandomSample(s,3,point_count,r_seed);
+            db_StitchAffine2D_3Points(&hyp_H_array[9*hyp_count],
+                                      &x_h[3*s[0]],&x_h[3*s[1]],&x_h[3*s[2]],
+                                      &xp_h[3*s[0]],&xp_h[3*s[1]],&xp_h[3*s[2]]);
+            hyp_count++;
+        }
+        break;
+
+    case DB_HOMOGRAPHY_TYPE_PROJECTIVE:
+    default:
+        if(point_count>=4) for(i=0;i<nr_samples;i++)
+        {
+            db_RandomSample(s,4,point_count,r_seed);
+            db_StitchProjective2D_4Points(&hyp_H_array[9*hyp_count],
+                                      &x_h[3*s[0]],&x_h[3*s[1]],&x_h[3*s[2]],&x_h[3*s[3]],
+                                      &xp_h[3*s[0]],&xp_h[3*s[1]],&xp_h[3*s[2]],&xp_h[3*s[3]]);
+            hyp_count++;
+        }
+    }
+
+    if(hyp_count)
+    {
+        /*Count cost in chunks and decimate hypotheses
+        until only one remains or the correspondences are
+        exhausted*/
+        for(i=0;i<hyp_count;i++)
+        {
+            hyp_perm[i]=i;
+            hyp_cost_array[i]=0.0;
+        }
+        for(i=0,last_hyp=hyp_count-1;(last_hyp>0) && (i<point_count);i+=chunk_size)
+        {
+            /*Update cost with the next chunk*/
+            last_corr=db_mini(i+chunk_size-1,point_count-1);
+            for(j=0;j<=last_hyp;j++)
+            {
+                hyp_point=hyp_H_array+9*hyp_perm[j];
+                for(c=i;c<=last_corr;)
+                {
+                    /*Take log of product of ten reprojection
+                    errors to reduce nr of expensive log operations*/
+                    if(c+9<=last_corr)
+                    {
+                        x_i_temp=x_i+(c<<1);
+                        xp_i_temp=xp_i+(c<<1);
+
+                        acc=db_ExpCauchyInhomogenousHomographyError(xp_i_temp,hyp_point,x_i_temp,one_over_scale2);
+                        acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+2,hyp_point,x_i_temp+2,one_over_scale2);
+                        acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+4,hyp_point,x_i_temp+4,one_over_scale2);
+                        acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+6,hyp_point,x_i_temp+6,one_over_scale2);
+                        acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+8,hyp_point,x_i_temp+8,one_over_scale2);
+                        acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+10,hyp_point,x_i_temp+10,one_over_scale2);
+                        acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+12,hyp_point,x_i_temp+12,one_over_scale2);
+                        acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+14,hyp_point,x_i_temp+14,one_over_scale2);
+                        acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+16,hyp_point,x_i_temp+16,one_over_scale2);
+                        acc*=db_ExpCauchyInhomogenousHomographyError(xp_i_temp+18,hyp_point,x_i_temp+18,one_over_scale2);
+                        c+=10;
+                    }
+                    else
+                    {
+                        for(acc=1.0;c<=last_corr;c++)
+                        {
+                            acc*=db_ExpCauchyInhomogenousHomographyError(xp_i+(c<<1),hyp_point,x_i+(c<<1),one_over_scale2);
+                        }
+                    }
+                    hyp_cost_array[j]+=log(acc);
+                }
+            }
+            if (chunk_size<point_count){
+                /*Prune out half of the hypotheses*/
+                new_last_hyp=(last_hyp+1)/2-1;
+                pivot=db_LeanQuickSelect(hyp_cost_array,last_hyp+1,new_last_hyp,temp_select);
+                for(j=0,c=0;(j<=last_hyp) && (c<=new_last_hyp);j++)
+                {
+                    if(hyp_cost_array[j]<=pivot)
+                    {
+                        hyp_cost_array[c]=hyp_cost_array[j];
+                        hyp_perm[c]=hyp_perm[j];
+                        c++;
+                    }
+                }
+                last_hyp=new_last_hyp;
+            }
+        }
+        /*Find the best hypothesis*/
+        lowest_cost=hyp_cost_array[0];
+        best_pos=0;
+        for(j=1;j<=last_hyp;j++)
+        {
+            if(hyp_cost_array[j]<lowest_cost)
+            {
+                lowest_cost=hyp_cost_array[j];
+                best_pos=j;
+            }
+        }
+
+        /*Move the best hypothesis*/
+        db_Copy9(H_temp,hyp_H_array+9*hyp_perm[best_pos]);
+
+        // outlier removal
+        if (outlierremoveflagE) // no polishment needed
+        {
+            point_count_new = db_RemoveOutliers_Homography(H_temp,x_i,xp_i,wp,im,im_p,im_r,im_raw,im_raw_p,point_count,one_over_scale2);
+        }
+        else
+        {
+            /*Polish*/
+            switch(homography_type)
+            {
+            case DB_HOMOGRAPHY_TYPE_SIMILARITY:
+            case DB_HOMOGRAPHY_TYPE_SIMILARITY_U:
+            case DB_HOMOGRAPHY_TYPE_TRANSLATION:
+            case DB_HOMOGRAPHY_TYPE_ROTATION:
+            case DB_HOMOGRAPHY_TYPE_ROTATION_U:
+            case DB_HOMOGRAPHY_TYPE_SCALING:
+            case DB_HOMOGRAPHY_TYPE_S_T:
+            case DB_HOMOGRAPHY_TYPE_R_T:
+            case DB_HOMOGRAPHY_TYPE_R_S:
+            case DB_HOMOGRAPHY_TYPE_AFFINE:
+            case DB_HOMOGRAPHY_TYPE_PROJECTIVE:
+            case DB_HOMOGRAPHY_TYPE_CAMROTATION_F:
+            case DB_HOMOGRAPHY_TYPE_CAMROTATION_F_UD:
+                db_RobCamRotation_Polish_Generic(H_temp,db_mini(point_count,max_points),homography_type,x_i,xp_i,one_over_scale2,max_iterations);
+                break;
+            case DB_HOMOGRAPHY_TYPE_CAMROTATION:
+                db_RobCamRotation_Polish(H_temp,db_mini(point_count,max_points),x_i,xp_i,one_over_scale2,max_iterations);
+                break;
+            }
+
+        }
+
+    }
+    else db_Identity3x3(H_temp);
+
+    switch(homography_type)
+    {
+    case DB_HOMOGRAPHY_TYPE_PROJECTIVE:
+        if(stat) stat->nr_parameters=8;
+        break;
+    case DB_HOMOGRAPHY_TYPE_AFFINE:
+        if(stat) stat->nr_parameters=6;
+        break;
+    case DB_HOMOGRAPHY_TYPE_SIMILARITY:
+    case DB_HOMOGRAPHY_TYPE_SIMILARITY_U:
+    case DB_HOMOGRAPHY_TYPE_CAMROTATION_F:
+    case DB_HOMOGRAPHY_TYPE_CAMROTATION_F_UD:
+        if(stat) stat->nr_parameters=4;
+        break;
+    case DB_HOMOGRAPHY_TYPE_CAMROTATION:
+        if(stat) stat->nr_parameters=3;
+        break;
+    case DB_HOMOGRAPHY_TYPE_TRANSLATION:
+    case DB_HOMOGRAPHY_TYPE_S_T:
+    case DB_HOMOGRAPHY_TYPE_R_T:
+    case DB_HOMOGRAPHY_TYPE_R_S:
+        if(stat) stat->nr_parameters=2;
+        break;
+    case DB_HOMOGRAPHY_TYPE_ROTATION:
+    case DB_HOMOGRAPHY_TYPE_ROTATION_U:
+    case DB_HOMOGRAPHY_TYPE_SCALING:
+        if(stat) stat->nr_parameters=1;
+        break;
+    }
+
+    db_RobImageHomography_Statistics(H_temp,db_mini(point_count,max_points),x_i,xp_i,one_over_scale2,stat);
+
+    /*Put on the calibration matrices*/
+    db_Multiply3x3_3x3(H_temp2,H_temp,K_inv);
+    db_Multiply3x3_3x3(H,Kp,H_temp2);
+
+    if (finalNumE)
+        *finalNumE = point_count_new;
+
+}
diff --git a/benchmark/feature_stab/db_vlvm/db_rob_image_homography.h b/benchmark/feature_stab/db_vlvm/db_rob_image_homography.h
new file mode 100644
index 00000000..59cde7da
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_rob_image_homography.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_rob_image_homography.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_ROB_IMAGE_HOMOGRAPHY
+#define DB_ROB_IMAGE_HOMOGRAPHY
+
+#include "db_utilities.h"
+#include "db_robust.h"
+#include "db_metrics.h"
+
+#include <stdlib.h> // for NULL
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup LMRobImageHomography (LM) Robust Image Homography
+ */
+/*\{*/
+
+#define DB_HOMOGRAPHY_TYPE_DEFAULT           0
+#define DB_HOMOGRAPHY_TYPE_PROJECTIVE        0
+#define DB_HOMOGRAPHY_TYPE_AFFINE            1
+#define DB_HOMOGRAPHY_TYPE_SIMILARITY        2
+#define DB_HOMOGRAPHY_TYPE_SIMILARITY_U      3
+#define DB_HOMOGRAPHY_TYPE_TRANSLATION       4
+#define DB_HOMOGRAPHY_TYPE_ROTATION          5
+#define DB_HOMOGRAPHY_TYPE_ROTATION_U        6
+#define DB_HOMOGRAPHY_TYPE_SCALING           7
+#define DB_HOMOGRAPHY_TYPE_S_T               8
+#define DB_HOMOGRAPHY_TYPE_R_T               9
+#define DB_HOMOGRAPHY_TYPE_R_S              10
+#define DB_HOMOGRAPHY_TYPE_CAMROTATION      11
+#define DB_HOMOGRAPHY_TYPE_CAMROTATION_F    12
+#define DB_HOMOGRAPHY_TYPE_CAMROTATION_F_UD 13
+
+/*!
+Solve for homography H such that xp~Hx
+\param H    best homography
+
+2D point to 2D point constraints:
+
+\param im           first image points
+\param im_p         second image points
+\param nr_points    number of points
+
+Calibration matrices:
+
+\param K    first camera
+\param Kp   second camera
+
+ Temporary space:
+
+ \param temp_d      pre-allocated space of size 12*nr_samples+10*nr_points doubles
+ \param temp_i      pre-allocated space of size max(nr_samples,nr_points) ints
+
+ Statistics for this estimation
+
+ \param stat        NULL - do not compute
+
+ \param homography_type see DB_HOMOGRAPHY_TYPE_* definitions above
+
+ Estimation parameters:
+
+ \param max_iterations  max number of polishing steps
+ \param max_points      only use this many points
+ \param scale           Cauchy scale coefficient (see db_ExpCauchyReprojectionError() )
+ \param nr_samples      number of times to compute a hypothesis
+ \param chunk_size      size of cost chunks
+*/
+DB_API void db_RobImageHomography(
+                              /*Best homography*/
+                              double H[9],
+                              /*2DPoint to 2DPoint constraints
+                              Points are assumed to be given in
+                              homogenous coordinates*/
+                              double *im,double *im_p,
+                              /*Nr of points in total*/
+                              int nr_points,
+                              /*Calibration matrices
+                              used to normalize the points*/
+                              double K[9],
+                              double Kp[9],
+                              /*Pre-allocated space temp_d
+                              should point to at least
+                              12*nr_samples+10*nr_points
+                              allocated positions*/
+                              double *temp_d,
+                              /*Pre-allocated space temp_i
+                              should point to at least
+                              max(nr_samples,nr_points)
+                              allocated positions*/
+                              int *temp_i,
+                              int homography_type=DB_HOMOGRAPHY_TYPE_DEFAULT,
+                              db_Statistics *stat=NULL,
+                              int max_iterations=DB_DEFAULT_MAX_ITERATIONS,
+                              int max_points=DB_DEFAULT_MAX_POINTS,
+                              double scale=DB_POINT_STANDARDDEV,
+                              int nr_samples=DB_DEFAULT_NR_SAMPLES,
+                              int chunk_size=DB_DEFAULT_CHUNK_SIZE,
+                              ///////////////////////////////////////////////////
+                              // flag for the outlier removal
+                              int outlierremoveflagE = 0,
+                              // if flag is 1, then the following variables
+                              // need to input
+                              ///////////////////////////////////////////////////
+                              // 3D coordinates
+                              double *wp=NULL,
+                              // its corresponding stereo pair's points
+                              double *im_r=NULL,
+                              // raw image coordinates
+                              double *im_raw=NULL, double *im_raw_p=NULL,
+                              // final matches
+                              int *final_NumE=0);
+
+DB_API double db_RobImageHomography_Cost(double H[9],int point_count,double *x_i,
+                                                double *xp_i,double one_over_scale2);
+
+
+DB_API void db_RobCamRotation_Polish(double H[9],int point_count,double *x_i,
+                                     double *xp_i, double one_over_scale2,
+                                     int max_iterations=DB_DEFAULT_MAX_ITERATIONS,
+                                     double improvement_requirement=DB_DEFAULT_IMP_REQ);
+
+
+DB_API void db_RobCamRotation_Polish_Generic(double H[9],int point_count,int homography_type,
+                                             double *x_i,double *xp_i,double one_over_scale2,
+                                             int max_iterations=DB_DEFAULT_MAX_ITERATIONS,
+                                             double improvement_requirement=DB_DEFAULT_IMP_REQ);
+
+
+#endif /* DB_ROB_IMAGE_HOMOGRAPHY */
diff --git a/benchmark/feature_stab/db_vlvm/db_robust.h b/benchmark/feature_stab/db_vlvm/db_robust.h
new file mode 100644
index 00000000..be0794c6
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_robust.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_robust.h,v 1.4 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_ROBUST
+#define DB_ROBUST
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup LMRobust (LM) Robust Estimation
+ */
+
+/*!
+    \struct     db_Statistics
+    \ingroup    LMRobust
+    \brief      (LnM) Sampling problem statistics
+    \date       Mon Sep 10 10:28:08 EDT 2007
+    \par        Copyright: 2007 Sarnoff Corporation.  All Rights Reserved
+ */
+ struct db_stat_struct
+ {
+     int nr_points;
+     int nr_inliers;
+     double inlier_fraction;
+     double cost;
+     double one_over_scale2;
+     double lambda1;
+     double lambda2;
+     double lambda3;
+     int nr_parameters;
+     int model_dimension;
+     double gric;
+     double inlier_evidence;
+     double posestd[6];
+     double rotationvecCov[9];
+     double translationvecCov[9];
+     int posecov_inliercount;
+     int posecovready;
+     double median_reprojection_error;
+ };
+ typedef db_stat_struct db_Statistics;
+
+#endif /* DB_ROBUST */
diff --git a/benchmark/feature_stab/db_vlvm/db_utilities.cpp b/benchmark/feature_stab/db_vlvm/db_utilities.cpp
new file mode 100644
index 00000000..ce2093b0
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_utilities.cpp
@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities.cpp,v 1.4 2011/06/17 14:03:31 mbansal Exp $ */
+
+#include "db_utilities.h"
+#include <string.h>
+#include <stdio.h>
+
+float** db_SetupImageReferences_f(float *im,int w,int h)
+{
+    int i;
+    float **img;
+    assert(im);
+    img=new float* [h];
+    for(i=0;i<h;i++)
+    {
+        img[i]=im+w*i;
+    }
+    return(img);
+}
+
+unsigned char** db_SetupImageReferences_u(unsigned char *im,int w,int h)
+{
+    int i;
+    unsigned char **img;
+
+    assert(im);
+
+    img=new unsigned char* [h];
+    for(i=0;i<h;i++)
+    {
+        img[i]=im+w*i;
+    }
+    return(img);
+}
+float** db_AllocImage_f(int w,int h,int over_allocation)
+{
+    float **img,*im;
+
+    im=new float [w*h+over_allocation];
+    img=db_SetupImageReferences_f(im,w,h);
+
+    return(img);
+}
+
+unsigned char** db_AllocImage_u(int w,int h,int over_allocation)
+{
+    unsigned char **img,*im;
+
+    im=new unsigned char [w*h+over_allocation];
+    img=db_SetupImageReferences_u(im,w,h);
+
+    return(img);
+}
+
+void db_FreeImage_f(float **img,int h)
+{
+    delete [] (img[0]);
+    delete [] img;
+}
+
+void db_FreeImage_u(unsigned char **img,int h)
+{
+    delete [] (img[0]);
+    delete [] img;
+}
+
+// ----------------------------------------------------------------------------------------------------------- ;
+//
+// copy image (source to destination)
+// ---> must be a 2D image array with the same image size
+// ---> the size of the input and output images must be same
+//
+// ------------------------------------------------------------------------------------------------------------ ;
+void db_CopyImage_u(unsigned char **d,const unsigned char * const *s, int w, int h, int over_allocation)
+{
+    int i;
+
+    for (i=0;i<h;i++)
+    {
+        memcpy(d[i],s[i],w*sizeof(unsigned char));
+    }
+
+    memcpy(&d[h],&d[h],over_allocation);
+
+}
+
+inline void db_WarpImageLutFast_u(const unsigned char * const * src, unsigned char ** dst, int w, int h,
+                                  const float * const * lut_x, const float * const * lut_y)
+{
+    assert(src && dst);
+    int xd=0, yd=0;
+
+    for ( int i = 0; i < w; ++i )
+        for ( int j = 0; j < h; ++j )
+        {
+            //xd = static_cast<unsigned int>(lut_x[j][i]);
+            //yd = static_cast<unsigned int>(lut_y[j][i]);
+            xd = (unsigned int)(lut_x[j][i]);
+            yd = (unsigned int)(lut_y[j][i]);
+            if ( xd >= w || yd >= h ||
+                 xd < 0 || yd < 0)
+                dst[j][i] = 0;
+            else
+                dst[j][i] = src[yd][xd];
+        }
+}
+
+inline void db_WarpImageLutBilinear_u(const unsigned char * const * src, unsigned char ** dst, int w, int h,
+                                      const float * const * lut_x,const float * const* lut_y)
+{
+    assert(src && dst);
+    double xd=0.0, yd=0.0;
+
+    for ( int i = 0; i < w; ++i )
+        for ( int j = 0; j < h; ++j )
+        {
+            xd = static_cast<double>(lut_x[j][i]);
+            yd = static_cast<double>(lut_y[j][i]);
+            if ( xd > w   || yd > h ||
+                 xd < 0.0 || yd < 0.0)
+                dst[j][i] = 0;
+            else
+                dst[j][i] = db_BilinearInterpolation(yd, xd, src);
+        }
+}
+
+
+void db_WarpImageLut_u(const unsigned char * const * src, unsigned char ** dst, int w, int h,
+                       const float * const * lut_x,const float * const * lut_y, int type)
+{
+    switch (type)
+    {
+    case DB_WARP_FAST:
+        db_WarpImageLutFast_u(src,dst,w,h,lut_x,lut_y);
+        break;
+    case DB_WARP_BILINEAR:
+        db_WarpImageLutBilinear_u(src,dst,w,h,lut_x,lut_y);
+        break;
+    default:
+        break;
+    }
+}
+
+
+void db_PrintDoubleVector(double *a,long size)
+{
+    printf("[ ");
+    for(long i=0;i<size;i++) printf("%lf ",a[i]);
+    printf("]");
+}
+
+void db_PrintDoubleMatrix(double *a,long rows,long cols)
+{
+    printf("[\n");
+    for(long i=0;i<rows;i++)
+    {
+        for(long j=0;j<cols;j++) printf("%lf ",a[i*cols+j]);
+        printf("\n");
+    }
+    printf("]");
+}
diff --git a/benchmark/feature_stab/db_vlvm/db_utilities.h b/benchmark/feature_stab/db_vlvm/db_utilities.h
new file mode 100644
index 00000000..fa9c8774
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_utilities.h
@@ -0,0 +1,571 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_UTILITIES_H
+#define DB_UTILITIES_H
+
+
+#ifdef _WIN32
+#pragma warning(disable: 4275)
+#pragma warning(disable: 4251)
+#pragma warning(disable: 4786)
+#pragma warning(disable: 4800)
+#pragma warning(disable: 4018) /* signed-unsigned mismatch */
+#endif /* _WIN32 */
+
+#ifdef _WIN32
+    #ifdef DBDYNAMIC_EXPORTS
+        #define DB_API __declspec(dllexport)
+    #else
+        #ifdef DBDYNAMIC_IMPORTS
+            #define DB_API __declspec(dllimport)
+        #else
+            #define DB_API
+        #endif
+    #endif
+#else
+    #define DB_API
+#endif /* _WIN32 */
+
+#ifdef _VERBOSE_
+#include <iostream>
+#endif
+
+#include <math.h>
+
+#include <assert.h>
+#include "db_utilities_constants.h"
+/*!
+ * \defgroup LMBasicUtilities (LM) Utility Functions (basic math, linear algebra and array manipulations)
+ */
+/*\{*/
+
+/*!
+ * Round double into int using fld and fistp instructions.
+ */
+inline int db_roundi (double x) {
+#ifdef WIN32_ASM
+    int n;
+    __asm
+    {
+        fld x;
+        fistp n;
+    }
+    return n;
+#else
+    return static_cast<int>(floor(x+0.5));
+#endif
+}
+
+/*!
+ * Square a double.
+ */
+inline double db_sqr(double a)
+{
+    return(a*a);
+}
+
+/*!
+ * Square a long.
+ */
+inline long db_sqr(long a)
+{
+    return(a*a);
+}
+
+/*!
+ * Square an int.
+ */
+inline long db_sqr(int a)
+{
+    return(a*a);
+}
+
+/*!
+ * Maximum of two doubles.
+ */
+inline double db_maxd(double a,double b)
+{
+    if(b>a) return(b);
+    else return(a);
+}
+/*!
+ * Minumum of two doubles.
+ */
+inline double db_mind(double a,double b)
+{
+    if(b<a) return(b);
+    else return(a);
+}
+
+
+/*!
+ * Maximum of two ints.
+ */
+inline int db_maxi(int a,int b)
+{
+    if(b>a) return(b);
+    else return(a);
+}
+
+/*!
+ * Minimum of two numbers.
+ */
+inline int db_mini(int a,int b)
+{
+    if(b<a) return(b);
+    else return(a);
+}
+/*!
+ * Maximum of two numbers.
+ */
+inline long db_maxl(long a,long b)
+{
+    if(b>a) return(b);
+    else return(a);
+}
+
+/*!
+ * Minimum of two numbers.
+ */
+inline long db_minl(long a,long b)
+{
+    if(b<a) return(b);
+    else return(a);
+}
+
+/*!
+ * Sign of a number.
+ * \return -1.0 if negative, 1.0 if positive.
+ */
+inline double db_sign(double x)
+{
+    if(x>=0.0) return(1.0);
+    else return(-1.0);
+}
+/*!
+ * Absolute value.
+ */
+inline int db_absi(int a)
+{
+    if(a<0) return(-a);
+    else return(a);
+}
+/*!
+ * Absolute value.
+ */
+inline float db_absf(float a)
+{
+    if(a<0) return(-a);
+    else return(a);
+}
+
+/*!
+ * Absolute value.
+ */
+inline double db_absd(double a)
+{
+    if(a<0) return(-a);
+    else return(a);
+}
+
+/*!
+ * Reciprocal (1/a). Prevents divide by 0.
+ * \return 1/a if a != 0. 1.0 otherwise.
+ */
+inline double db_SafeReciprocal(double a)
+{
+    return((a!=0.0)?(1.0/a):1.0);
+}
+
+/*!
+ * Division. Prevents divide by 0.
+ * \return a/b if b!=0. a otherwise.
+ */
+inline double db_SafeDivision(double a,double b)
+{
+    return((b!=0.0)?(a/b):a);
+}
+
+/*!
+ * Square root. Prevents imaginary output.
+ * \return sqrt(a) if a > 0.0. 0.0 otherewise.
+ */
+inline double db_SafeSqrt(double a)
+{
+    return((a>=0.0)?(sqrt(a)):0.0);
+}
+
+/*!
+ * Square root of a reciprocal. Prevents divide by 0 and imaginary output.
+ * \return sqrt(1/a) if a > 0.0. 1.0 otherewise.
+ */
+inline double db_SafeSqrtReciprocal(double a)
+{
+    return((a>0.0)?(sqrt(1.0/a)):1.0);
+}
+/*!
+ * Cube root.
+ */
+inline double db_CubRoot(double x)
+{
+    if(x>=0.0) return(pow(x,1.0/3.0));
+    else return(-pow(-x,1.0/3.0));
+}
+/*!
+ * Sum of squares of elements of x.
+ */
+inline double db_SquareSum3(const double x[3])
+{
+    return(db_sqr(x[0])+db_sqr(x[1])+db_sqr(x[2]));
+}
+/*!
+ * Sum of squares of elements of x.
+ */
+inline double db_SquareSum7(double x[7])
+{
+    return(db_sqr(x[0])+db_sqr(x[1])+db_sqr(x[2])+
+        db_sqr(x[3])+db_sqr(x[4])+db_sqr(x[5])+
+        db_sqr(x[6]));
+}
+/*!
+ * Sum of squares of elements of x.
+ */
+inline double db_SquareSum9(double x[9])
+{
+    return(db_sqr(x[0])+db_sqr(x[1])+db_sqr(x[2])+
+        db_sqr(x[3])+db_sqr(x[4])+db_sqr(x[5])+
+        db_sqr(x[6])+db_sqr(x[7])+db_sqr(x[8]));
+}
+/*!
+ * Copy a vector.
+ * \param xd destination
+ * \param xs source
+ */
+void inline db_Copy3(double xd[3],const double xs[3])
+{
+    xd[0]=xs[0];xd[1]=xs[1];xd[2]=xs[2];
+}
+/*!
+ * Copy a vector.
+ * \param xd destination
+ * \param xs source
+ */
+void inline db_Copy6(double xd[6],const double xs[6])
+{
+    xd[0]=xs[0];xd[1]=xs[1];xd[2]=xs[2];
+    xd[3]=xs[3];xd[4]=xs[4];xd[5]=xs[5];
+}
+/*!
+ * Copy a vector.
+ * \param xd destination
+ * \param xs source
+ */
+void inline db_Copy9(double xd[9],const double xs[9])
+{
+    xd[0]=xs[0];xd[1]=xs[1];xd[2]=xs[2];
+    xd[3]=xs[3];xd[4]=xs[4];xd[5]=xs[5];
+    xd[6]=xs[6];xd[7]=xs[7];xd[8]=xs[8];
+}
+
+/*!
+ * Scalar product: Transpose(A)*B.
+ */
+inline double db_ScalarProduct4(const double A[4],const double B[4])
+{
+    return(A[0]*B[0]+A[1]*B[1]+A[2]*B[2]+A[3]*B[3]);
+}
+/*!
+ * Scalar product: Transpose(A)*B.
+ */
+inline double db_ScalarProduct7(const double A[7],const double B[7])
+{
+    return(A[0]*B[0]+A[1]*B[1]+A[2]*B[2]+
+        A[3]*B[3]+A[4]*B[4]+A[5]*B[5]+
+        A[6]*B[6]);
+}
+/*!
+ * Scalar product: Transpose(A)*B.
+ */
+inline double db_ScalarProduct9(const double A[9],const double B[9])
+{
+    return(A[0]*B[0]+A[1]*B[1]+A[2]*B[2]+
+        A[3]*B[3]+A[4]*B[4]+A[5]*B[5]+
+        A[6]*B[6]+A[7]*B[7]+A[8]*B[8]);
+}
+/*!
+ * Vector addition: S=A+B.
+ */
+inline void db_AddVectors6(double S[6],const double A[6],const double B[6])
+{
+    S[0]=A[0]+B[0]; S[1]=A[1]+B[1]; S[2]=A[2]+B[2]; S[3]=A[3]+B[3]; S[4]=A[4]+B[4];
+    S[5]=A[5]+B[5];
+}
+/*!
+ * Multiplication: C(3x1)=A(3x3)*B(3x1).
+ */
+inline void db_Multiply3x3_3x1(double y[3],const double A[9],const double x[3])
+{
+    y[0]=A[0]*x[0]+A[1]*x[1]+A[2]*x[2];
+    y[1]=A[3]*x[0]+A[4]*x[1]+A[5]*x[2];
+    y[2]=A[6]*x[0]+A[7]*x[1]+A[8]*x[2];
+}
+inline void db_Multiply3x3_3x3(double C[9], const double A[9],const double B[9])
+{
+    C[0]=A[0]*B[0]+A[1]*B[3]+A[2]*B[6];
+    C[1]=A[0]*B[1]+A[1]*B[4]+A[2]*B[7];
+    C[2]=A[0]*B[2]+A[1]*B[5]+A[2]*B[8];
+
+    C[3]=A[3]*B[0]+A[4]*B[3]+A[5]*B[6];
+    C[4]=A[3]*B[1]+A[4]*B[4]+A[5]*B[7];
+    C[5]=A[3]*B[2]+A[4]*B[5]+A[5]*B[8];
+
+    C[6]=A[6]*B[0]+A[7]*B[3]+A[8]*B[6];
+    C[7]=A[6]*B[1]+A[7]*B[4]+A[8]*B[7];
+    C[8]=A[6]*B[2]+A[7]*B[5]+A[8]*B[8];
+}
+/*!
+ * Multiplication: C(4x1)=A(4x4)*B(4x1).
+ */
+inline void db_Multiply4x4_4x1(double y[4],const double A[16],const double x[4])
+{
+    y[0]=A[0]*x[0]+A[1]*x[1]+A[2]*x[2]+A[3]*x[3];
+    y[1]=A[4]*x[0]+A[5]*x[1]+A[6]*x[2]+A[7]*x[3];
+    y[2]=A[8]*x[0]+A[9]*x[1]+A[10]*x[2]+A[11]*x[3];
+    y[3]=A[12]*x[0]+A[13]*x[1]+A[14]*x[2]+A[15]*x[3];
+}
+/*!
+ * Scalar multiplication in place: A(3)=mult*A(3).
+ */
+inline void db_MultiplyScalar3(double *A,double mult)
+{
+    (*A++) *= mult; (*A++) *= mult; (*A++) *= mult;
+}
+
+/*!
+ * Scalar multiplication: A(3)=mult*B(3).
+ */
+inline void db_MultiplyScalarCopy3(double *A,const double *B,double mult)
+{
+    (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult;
+}
+
+/*!
+ * Scalar multiplication: A(4)=mult*B(4).
+ */
+inline void db_MultiplyScalarCopy4(double *A,const double *B,double mult)
+{
+    (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult;
+}
+/*!
+ * Scalar multiplication: A(7)=mult*B(7).
+ */
+inline void db_MultiplyScalarCopy7(double *A,const double *B,double mult)
+{
+    (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult;
+    (*A++)=(*B++)*mult; (*A++)=(*B++)*mult;
+}
+/*!
+ * Scalar multiplication: A(9)=mult*B(9).
+ */
+inline void db_MultiplyScalarCopy9(double *A,const double *B,double mult)
+{
+    (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult;
+    (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult; (*A++)=(*B++)*mult;
+}
+
+/*!
+ * \defgroup LMImageBasicUtilities (LM) Basic Image Utility Functions
+
+ Images in db are simply 2D arrays of unsigned char or float types.
+ Only the very basic operations are supported: allocation/deallocation,
+copying, simple pyramid construction and LUT warping. These images are used
+by db_CornerDetector_u and db_Matcher_u. The db_Image class is an attempt
+to wrap these images. It has not been tested well.
+
+ */
+/*\{*/
+/*!
+ * Given a float image array, allocates and returns the set of row poiners.
+ * \param im    image pointer
+ * \param w     image width
+ * \param h     image height
+ */
+DB_API float** db_SetupImageReferences_f(float *im,int w,int h);
+/*!
+ * Allocate a float image.
+ * Note: for feature detection images must be overallocated by 256 bytes.
+ * \param w                 width
+ * \param h                 height
+ * \param over_allocation   allocate this many extra bytes at the end
+ * \return row array pointer
+ */
+DB_API float** db_AllocImage_f(int w,int h,int over_allocation=256);
+/*!
+ * Free a float image
+ * \param img   row array pointer
+ * \param h     image height (number of rows)
+ */
+DB_API void db_FreeImage_f(float **img,int h);
+/*!
+ * Given an unsigned char image array, allocates and returns the set of row poiners.
+ * \param im    image pointer
+ * \param w     image width
+ * \param h     image height
+ */
+DB_API unsigned char** db_SetupImageReferences_u(unsigned char *im,int w,int h);
+/*!
+ * Allocate an unsigned char image.
+ * Note: for feature detection images must be overallocated by 256 bytes.
+ * \param w                 width
+ * \param h                 height
+ * \param over_allocation   allocate this many extra bytes at the end
+ * \return row array pointer
+ */
+DB_API unsigned char** db_AllocImage_u(int w,int h,int over_allocation=256);
+/*!
+ * Free an unsigned char image
+ * \param img   row array pointer
+ * \param h     image height (number of rows)
+ */
+DB_API void db_FreeImage_u(unsigned char **img,int h);
+
+/*!
+ Copy an image from s to d. Both s and d must be pre-allocated at of the same size.
+ Copy is done row by row.
+ \param s   source
+ \param d   destination
+ \param w   width
+ \param h   height
+ \param over_allocation copy this many bytes after the end of the last line
+ */
+DB_API void db_CopyImage_u(unsigned char **d,const unsigned char * const *s,int w,int h,int over_allocation=0);
+
+DB_API inline unsigned char db_BilinearInterpolation(double y, double x, const unsigned char * const * v)
+{
+    int floor_x=(int) x;
+    int floor_y=(int) y;
+
+    int ceil_x=floor_x+1;
+    int ceil_y=floor_y+1;
+
+    unsigned char f00 = v[floor_y][floor_x];
+    unsigned char f01 = v[floor_y][ceil_x];
+    unsigned char f10 = v[ceil_y][floor_x];
+    unsigned char f11 = v[ceil_y][ceil_x];
+
+    double xl = x-floor_x;
+    double yl = y-floor_y;
+
+    return (unsigned char)(f00*(1-yl)*(1-xl) + f10*yl*(1-xl) + f01*(1-yl)*xl + f11*yl*xl);
+}
+/*\}*/
+/*!
+ * \ingroup LMRotation
+ * Compute an incremental rotation matrix using the update dx=[sin(phi) sin(ohm) sin(kap)]
+ */
+inline void db_IncrementalRotationMatrix(double R[9],const double dx[3])
+{
+    double sp,so,sk,om_sp2,om_so2,om_sk2,cp,co,ck,sp_so,cp_so;
+
+    /*Store sines*/
+    sp=dx[0]; so=dx[1]; sk=dx[2];
+    om_sp2=1.0-sp*sp;
+    om_so2=1.0-so*so;
+    om_sk2=1.0-sk*sk;
+    /*Compute cosines*/
+    cp=(om_sp2>=0.0)?sqrt(om_sp2):1.0;
+    co=(om_so2>=0.0)?sqrt(om_so2):1.0;
+    ck=(om_sk2>=0.0)?sqrt(om_sk2):1.0;
+    /*Compute matrix*/
+    sp_so=sp*so;
+    cp_so=cp*so;
+    R[0]=sp_so*sk+cp*ck; R[1]=co*sk; R[2]=cp_so*sk-sp*ck;
+    R[3]=sp_so*ck-cp*sk; R[4]=co*ck; R[5]=cp_so*ck+sp*sk;
+    R[6]=sp*co;          R[7]= -so;  R[8]=cp*co;
+}
+/*!
+ * Zero out 2 vector in place.
+ */
+void inline db_Zero2(double x[2])
+{
+    x[0]=x[1]=0;
+}
+/*!
+ * Zero out 3 vector in place.
+ */
+void inline db_Zero3(double x[3])
+{
+    x[0]=x[1]=x[2]=0;
+}
+/*!
+ * Zero out 4 vector in place.
+ */
+void inline db_Zero4(double x[4])
+{
+    x[0]=x[1]=x[2]=x[3]=0;
+}
+/*!
+ * Zero out 9 vector in place.
+ */
+void inline db_Zero9(double x[9])
+{
+    x[0]=x[1]=x[2]=x[3]=x[4]=x[5]=x[6]=x[7]=x[8]=0;
+}
+
+#define DB_WARP_FAST        0
+#define DB_WARP_BILINEAR    1
+
+/*!
+ * Perform a look-up table warp.
+ * The LUTs must be float images of the same size as source image.
+ * The source value x_s is determined from destination (x_d,y_d) through lut_x
+ * and y_s is determined from lut_y:
+   \code
+   x_s = lut_x[y_d][x_d];
+   y_s = lut_y[y_d][x_d];
+   \endcode
+
+ * \param src   source image
+ * \param dst   destination image
+ * \param w     width
+ * \param h     height
+ * \param lut_x LUT for x
+ * \param lut_y LUT for y
+ * \param type  warp type (DB_WARP_FAST or DB_WARP_BILINEAR)
+ */
+DB_API void db_WarpImageLut_u(const unsigned char * const * src,unsigned char ** dst, int w, int h,
+                               const float * const * lut_x, const float * const * lut_y, int type=DB_WARP_BILINEAR);
+
+DB_API void db_PrintDoubleVector(double *a,long size);
+DB_API void db_PrintDoubleMatrix(double *a,long rows,long cols);
+
+#include "db_utilities_constants.h"
+#include "db_utilities_algebra.h"
+#include "db_utilities_indexing.h"
+#include "db_utilities_linalg.h"
+#include "db_utilities_poly.h"
+#include "db_utilities_geometry.h"
+#include "db_utilities_random.h"
+#include "db_utilities_rotation.h"
+#include "db_utilities_camera.h"
+
+#define DB_INVALID (-1)
+
+
+#endif /* DB_UTILITIES_H */
diff --git a/benchmark/feature_stab/db_vlvm/db_utilities_algebra.h b/benchmark/feature_stab/db_vlvm/db_utilities_algebra.h
new file mode 100644
index 00000000..2aedd74d
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_utilities_algebra.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_algebra.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_UTILITIES_ALGEBRA
+#define DB_UTILITIES_ALGEBRA
+
+#include "db_utilities.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup LMAlgebra (LM) Algebra utilities
+ */
+/*\{*/
+
+inline void db_HomogenousNormalize3(double *x)
+{
+    db_MultiplyScalar3(x,db_SafeSqrtReciprocal(db_SquareSum3(x)));
+}
+
+/*\}*/
+
+#endif /* DB_UTILITIES_ALGEBRA */
diff --git a/benchmark/feature_stab/db_vlvm/db_utilities_camera.cpp b/benchmark/feature_stab/db_vlvm/db_utilities_camera.cpp
new file mode 100644
index 00000000..dceba9b6
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_utilities_camera.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_camera.cpp,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */
+
+#include "db_utilities_camera.h"
+#include "db_utilities.h"
+#include <assert.h>
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+void db_Approx3DCalMat(double K[9],double Kinv[9],int im_width,int im_height,double f_correction,int field)
+{
+    double iw,ih,av_size,field_fact;
+
+    if(field) field_fact=2.0;
+    else field_fact=1.0;
+
+    iw=(double)im_width;
+    ih=(double)(im_height*field_fact);
+    av_size=(iw+ih)/2.0;
+    K[0]=f_correction*av_size;
+    K[1]=0;
+    K[2]=iw/2.0;
+    K[3]=0;
+    K[4]=f_correction*av_size/field_fact;
+    K[5]=ih/2.0/field_fact;
+    K[6]=0;
+    K[7]=0;
+    K[8]=1;
+
+    db_InvertCalibrationMatrix(Kinv,K);
+}
diff --git a/benchmark/feature_stab/db_vlvm/db_utilities_camera.h b/benchmark/feature_stab/db_vlvm/db_utilities_camera.h
new file mode 100644
index 00000000..26ba4420
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_utilities_camera.h
@@ -0,0 +1,332 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_camera.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_UTILITIES_CAMERA
+#define DB_UTILITIES_CAMERA
+
+#include "db_utilities.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup LMCamera (LM) Camera Utilities
+ */
+/*\{*/
+
+#include "db_utilities.h"
+
+#define DB_RADDISTMODE_BOUGEUT  4
+#define DB_RADDISTMODE_2NDORDER 5
+#define DB_RADDISTMODE_IDENTITY 6
+
+/*!
+Give reasonable guess of the calibration matrix for normalization purposes.
+Use real K matrix when doing real geometry.
+focal length = (w+h)/2.0*f_correction.
+\param K            calibration matrix (out)
+\param Kinv         inverse of K (out)
+\param im_width     image width
+\param im_height    image height
+\param f_correction focal length correction factor
+\param field        set to 1 if this is a field image (fy = fx/2)
+\return K(3x3) intrinsic calibration matrix
+*/
+DB_API void db_Approx3DCalMat(double K[9],double Kinv[9],int im_width,int im_height,double f_correction=1.0,int field=0);
+
+/*!
+ Make a 2x2 identity matrix
+ */
+void inline db_Identity2x2(double A[4])
+{
+    A[0]=1;A[1]=0;
+    A[2]=0;A[3]=1;
+}
+/*!
+ Make a 3x3 identity matrix
+ */
+void inline db_Identity3x3(double A[9])
+{
+    A[0]=1;A[1]=0;A[2]=0;
+    A[3]=0;A[4]=1;A[5]=0;
+    A[6]=0;A[7]=0;A[8]=1;
+}
+/*!
+ Invert intrinsic calibration matrix K(3x3)
+ If fx or fy is 0, I is returned.
+ */
+void inline db_InvertCalibrationMatrix(double Kinv[9],const double K[9])
+{
+    double a,b,c,d,e,f,ainv,dinv,adinv;
+
+    a=K[0];b=K[1];c=K[2];d=K[4];e=K[5];f=K[8];
+    if((a==0.0)||(d==0.0)) db_Identity3x3(Kinv);
+    else
+    {
+        Kinv[3]=0.0;
+        Kinv[6]=0.0;
+        Kinv[7]=0.0;
+        Kinv[8]=1.0;
+
+        ainv=1.0/a;
+        dinv=1.0/d;
+        adinv=ainv*dinv;
+        Kinv[0]=f*ainv;
+        Kinv[1]= -b*f*adinv;
+        Kinv[2]=(b*e-c*d)*adinv;
+        Kinv[4]=f*dinv;
+        Kinv[5]= -e*dinv;
+    }
+}
+/*!
+ De-homogenize image point: xd(1:2) = xs(1:2)/xs(3).
+ If xs(3) is 0, xd will become 0
+ \param xd  destination point
+ \param xs  source point
+ */
+void inline db_DeHomogenizeImagePoint(double xd[2],const double xs[3])
+{
+    double temp,div;
+
+    temp=xs[2];
+    if(temp!=0)
+    {
+        div=1.0/temp;
+        xd[0]=xs[0]*div;xd[1]=xs[1]*div;
+    }
+    else
+    {
+        xd[0]=0.0;xd[1]=0.0;
+    }
+}
+
+
+/*!
+ Orthonormalize 3D rotation R
+ */
+inline void db_OrthonormalizeRotation(double R[9])
+{
+    double s,mult;
+    /*Normalize first vector*/
+    s=db_sqr(R[0])+db_sqr(R[1])+db_sqr(R[2]);
+    mult=sqrt(1.0/(s?s:1));
+    R[0]*=mult; R[1]*=mult; R[2]*=mult;
+    /*Subtract scalar product from second vector*/
+    s=R[0]*R[3]+R[1]*R[4]+R[2]*R[5];
+    R[3]-=s*R[0]; R[4]-=s*R[1]; R[5]-=s*R[2];
+    /*Normalize second vector*/
+    s=db_sqr(R[3])+db_sqr(R[4])+db_sqr(R[5]);
+    mult=sqrt(1.0/(s?s:1));
+    R[3]*=mult; R[4]*=mult; R[5]*=mult;
+    /*Get third vector by vector product*/
+    R[6]=R[1]*R[5]-R[4]*R[2];
+    R[7]=R[2]*R[3]-R[5]*R[0];
+    R[8]=R[0]*R[4]-R[3]*R[1];
+}
+/*!
+Update a rotation with the update dx=[sin(phi) sin(ohm) sin(kap)]
+*/
+inline void db_UpdateRotation(double R_p_dx[9],double R[9],const double dx[3])
+{
+    double R_temp[9];
+    /*Update rotation*/
+    db_IncrementalRotationMatrix(R_temp,dx);
+    db_Multiply3x3_3x3(R_p_dx,R_temp,R);
+}
+/*!
+ Compute xp = Hx for inhomogenous image points.
+ */
+inline void db_ImageHomographyInhomogenous(double xp[2],const double H[9],const double x[2])
+{
+    double x3,m;
+
+    x3=H[6]*x[0]+H[7]*x[1]+H[8];
+    if(x3!=0.0)
+    {
+        m=1.0/x3;
+        xp[0]=m*(H[0]*x[0]+H[1]*x[1]+H[2]);
+        xp[1]=m*(H[3]*x[0]+H[4]*x[1]+H[5]);
+    }
+    else
+    {
+        xp[0]=xp[1]=0.0;
+    }
+}
+inline double db_FocalFromCamRotFocalHomography(const double H[9])
+{
+    double k1,k2;
+
+    k1=db_sqr(H[2])+db_sqr(H[5]);
+    k2=db_sqr(H[6])+db_sqr(H[7]);
+    if(k1>=k2)
+    {
+        return(db_SafeSqrt(db_SafeDivision(k1,1.0-db_sqr(H[8]))));
+    }
+    else
+    {
+        return(db_SafeSqrt(db_SafeDivision(1.0-db_sqr(H[8]),k2)));
+    }
+}
+
+inline double db_FocalAndRotFromCamRotFocalHomography(double R[9],const double H[9])
+{
+    double back,fi;
+
+    back=db_FocalFromCamRotFocalHomography(H);
+    fi=db_SafeReciprocal(back);
+    R[0]=H[0];      R[1]=H[1];      R[2]=fi*H[2];
+    R[3]=H[3];      R[4]=H[4];      R[5]=fi*H[5];
+    R[6]=back*H[6]; R[7]=back*H[7]; R[8]=H[8];
+    return(back);
+}
+/*!
+Compute Jacobian at zero of three coordinates dR*x with
+respect to the update dR([sin(phi) sin(ohm) sin(kap)]) given x.
+
+The Jacobian at zero of the homogenous coordinates with respect to
+    [sin(phi) sin(ohm) sin(kap)] is
+\code
+    [-rx2   0   rx1 ]
+    [  0   rx2 -rx0 ]
+    [ rx0 -rx1   0  ].
+\endcode
+
+*/
+inline void db_JacobianOfRotatedPointStride(double J[9],const double x[3],int stride)
+{
+    /*The Jacobian at zero of the homogenous coordinates with respect to
+    [sin(phi) sin(ohm) sin(kap)] is
+    [-rx2   0   rx1 ]
+    [  0   rx2 -rx0 ]
+    [ rx0 -rx1   0  ]*/
+
+    J[0]= -x[stride<<1];
+    J[1]=0;
+    J[2]=  x[stride];
+    J[3]=0;
+    J[4]=  x[stride<<1];
+    J[5]= -x[0];
+    J[6]=  x[0];
+    J[7]= -x[stride];
+    J[8]=0;
+}
+/*!
+ Invert an affine (if possible)
+ \param Hinv    inverted matrix
+ \param H       input matrix
+ \return true if success and false if matrix is ill-conditioned (det < 1e-7)
+ */
+inline bool db_InvertAffineTransform(double Hinv[9],const double H[9])
+{
+    double det=H[0]*H[4]-H[3]*H[1];
+    if (det<1e-7)
+    {
+        db_Copy9(Hinv,H);
+        return false;
+    }
+    else
+    {
+        Hinv[0]=H[4]/det;
+        Hinv[1]=-H[1]/det;
+        Hinv[3]=-H[3]/det;
+        Hinv[4]=H[0]/det;
+        Hinv[2]= -Hinv[0]*H[2]-Hinv[1]*H[5];
+        Hinv[5]= -Hinv[3]*H[2]-Hinv[4]*H[5];
+    }
+    return true;
+}
+
+/*!
+Update of upper 2x2 is multiplication by
+\code
+[s 0][ cos(theta) sin(theta)]
+[0 s][-sin(theta) cos(theta)]
+\endcode
+*/
+inline void db_MultiplyScaleOntoImageHomography(double H[9],double s)
+{
+
+    H[0]*=s;
+    H[1]*=s;
+    H[3]*=s;
+    H[4]*=s;
+}
+/*!
+Update of upper 2x2 is multiplication by
+\code
+[s 0][ cos(theta) sin(theta)]
+[0 s][-sin(theta) cos(theta)]
+\endcode
+*/
+inline void db_MultiplyRotationOntoImageHomography(double H[9],double theta)
+{
+    double c,s,H0,H1;
+
+
+    c=cos(theta);
+    s=db_SafeSqrt(1.0-db_sqr(c));
+    H0=  c*H[0]+s*H[3];
+    H[3]= -s*H[0]+c*H[3];
+    H[0]=H0;
+    H1=c*H[1]+s*H[4];
+    H[4]= -s*H[1]+c*H[4];
+    H[1]=H1;
+}
+
+inline void db_UpdateImageHomographyAffine(double H_p_dx[9],const double H[9],const double dx[6])
+{
+    db_AddVectors6(H_p_dx,H,dx);
+    db_Copy3(H_p_dx+6,H+6);
+}
+
+inline void db_UpdateImageHomographyProjective(double H_p_dx[9],const double H[9],const double dx[8],int frozen_coord)
+{
+    int i,j;
+
+    for(j=0,i=0;i<9;i++)
+    {
+        if(i!=frozen_coord)
+        {
+            H_p_dx[i]=H[i]+dx[j];
+            j++;
+        }
+        else H_p_dx[i]=H[i];
+    }
+}
+
+inline void db_UpdateRotFocalHomography(double H_p_dx[9],const double H[9],const double dx[4])
+{
+    double f,fp,fpi;
+    double R[9],dR[9];
+
+    /*Updated matrix is diag(f+df,f+df)*dR*R*diag(1/(f+df),1/(f+df),1)*/
+    f=db_FocalAndRotFromCamRotFocalHomography(R,H);
+    db_IncrementalRotationMatrix(dR,dx);
+    db_Multiply3x3_3x3(H_p_dx,dR,R);
+    fp=f+dx[3];
+    fpi=db_SafeReciprocal(fp);
+    H_p_dx[2]*=fp;
+    H_p_dx[5]*=fp;
+    H_p_dx[6]*=fpi;
+    H_p_dx[7]*=fpi;
+}
+
+/*\}*/
+#endif /* DB_UTILITIES_CAMERA */
diff --git a/benchmark/feature_stab/db_vlvm/db_utilities_constants.h b/benchmark/feature_stab/db_vlvm/db_utilities_constants.h
new file mode 100644
index 00000000..07565efd
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_utilities_constants.h
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_constants.h,v 1.2 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_UTILITIES_CONSTANTS
+#define DB_UTILITIES_CONSTANTS
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+/****************Constants********************/
+#define DB_E             2.7182818284590452354
+#define DB_LOG2E         1.4426950408889634074
+#define DB_LOG10E        0.43429448190325182765
+#define DB_LN2           0.69314718055994530942
+#define DB_LN10          2.30258509299404568402
+#define DB_PI            3.1415926535897932384626433832795
+#define DB_PI_2          1.57079632679489661923
+#define DB_PI_4          0.78539816339744830962
+#define DB_1_PI          0.31830988618379067154
+#define DB_2_PI          0.63661977236758134308
+#define DB_SQRTPI        1.7724538509055160272981674833411
+#define DB_SQRT_2PI      2.506628274631000502415765284811
+#define DB_2_SQRTPI      1.12837916709551257390
+#define DB_SQRT2         1.41421356237309504880
+#define DB_SQRT3         1.7320508075688772935274463415059
+#define DB_SQRT1_2       0.70710678118654752440
+#define DB_EPS           2.220446049250313e-016 /* for 32 bit double */
+
+/****************Default Parameters********************/
+/*Preemptive ransac parameters*/
+#define DB_DEFAULT_NR_SAMPLES 500
+#define DB_DEFAULT_CHUNK_SIZE 100
+#define DB_DEFAULT_GROUP_SIZE 10
+
+/*Optimisation parameters*/
+#define DB_DEFAULT_MAX_POINTS 1000
+#define DB_DEFAULT_MAX_ITERATIONS 25
+#define DB_DEFAULT_IMP_REQ 0.001
+
+/*Feature standard deviation parameters*/
+#define DB_POINT_STANDARDDEV (1.0/(826.0)) /*1 pixel for CIF (fraction of (image width+image height)/2)*/
+#define DB_OUTLIER_THRESHOLD 3.0 /*In number of DB_POINT_STANDARDDEV's*/
+#define DB_WORST_CASE 50.0 /*In number of DB_POINT_STANDARDDEV's*/
+
+/*Front-end parameters*/
+#define DB_DEFAULT_TARGET_NR_CORNERS 5000
+#define DB_DEFAULT_NR_FEATURE_BLOCKS 10
+#define DB_DEFAULT_ABS_CORNER_THRESHOLD 50000000.0
+#define DB_DEFAULT_REL_CORNER_THRESHOLD 0.00005
+#define DB_DEFAULT_MAX_DISPARITY 0.1
+#define DB_DEFAULT_NO_DISPARITY -1.0
+#define DB_DEFAULT_MAX_TRACK_LENGTH 300
+
+#define DB_DEFAULT_MAX_NR_CAMERAS 1000
+
+#define DB_DEFAULT_TRIPLE_STEP 2
+#define DB_DEFAULT_DOUBLE_STEP 2
+#define DB_DEFAULT_SINGLE_STEP 1
+#define DB_DEFAULT_NR_SINGLES 10
+#define DB_DEFAULT_NR_DOUBLES 1
+#define DB_DEFAULT_NR_TRIPLES 1
+
+#define DB_DEFAULT_TRIFOCAL_FOUR_STEPS 40
+
+#define DB_DEFAULT_EPIPOLAR_ERROR 1 /*in pixels*/
+
+////////////////////////// DOXYGEN /////////////////////
+
+/*!
+ * \def DB_DEFAULT_GROUP_SIZE
+ * \ingroup LMRobust
+ * \brief Default group size for db_PreemptiveRansac class.
+ * Group size is the number of observation costs multiplied together
+ * before a log of the product is added to the total cost.
+*/
+
+/*!
+ * \def DB_DEFAULT_TARGET_NR_CORNERS
+ * \ingroup FeatureDetection
+ * \brief Default target number of corners
+*/
+/*!
+ * \def DB_DEFAULT_NR_FEATURE_BLOCKS
+ * \ingroup FeatureDetection
+ * \brief Default number of regions (horizontal or vertical) that are considered separately
+ * for feature detection. The greater the number, the more uniform the distribution of
+ * detected features.
+*/
+/*!
+ * \def DB_DEFAULT_ABS_CORNER_THRESHOLD
+ * \ingroup FeatureDetection
+ * \brief Absolute feature strength threshold.
+*/
+/*!
+ * \def DB_DEFAULT_REL_CORNER_THRESHOLD
+ * \ingroup FeatureDetection
+ * \brief Relative feature strength threshold.
+*/
+/*!
+ * \def DB_DEFAULT_MAX_DISPARITY
+ * \ingroup FeatureMatching
+ * \brief Maximum disparity (as fraction of image size) allowed in feature matching
+*/
+ /*!
+ * \def DB_DEFAULT_NO_DISPARITY
+ * \ingroup FeatureMatching
+ * \brief Indicates that vertical disparity is the same as horizontal disparity.
+*/
+///////////////////////////////////////////////////////////////////////////////////
+ /*!
+ * \def DB_E
+ * \ingroup LMBasicUtilities
+ * \brief e
+*/
+ /*!
+ * \def DB_LOG2E
+ * \ingroup LMBasicUtilities
+ * \brief log2(e)
+*/
+ /*!
+ * \def DB_LOG10E
+ * \ingroup LMBasicUtilities
+ * \brief log10(e)
+*/
+ /*!
+ * \def DB_LOG10E
+ * \ingroup LMBasicUtilities
+ * \brief log10(e)
+*/
+/*!
+ * \def DB_LN2
+ * \ingroup LMBasicUtilities
+ * \brief ln(2)
+*/
+/*!
+ * \def DB_LN10
+ * \ingroup LMBasicUtilities
+ * \brief ln(10)
+*/
+/*!
+ * \def DB_PI
+ * \ingroup LMBasicUtilities
+ * \brief Pi
+*/
+/*!
+ * \def DB_PI_2
+ * \ingroup LMBasicUtilities
+ * \brief Pi/2
+*/
+/*!
+ * \def DB_PI_4
+ * \ingroup LMBasicUtilities
+ * \brief Pi/4
+*/
+/*!
+ * \def DB_1_PI
+ * \ingroup LMBasicUtilities
+ * \brief 1/Pi
+*/
+/*!
+ * \def DB_2_PI
+ * \ingroup LMBasicUtilities
+ * \brief 2/Pi
+*/
+/*!
+ * \def DB_SQRTPI
+ * \ingroup LMBasicUtilities
+ * \brief sqrt(Pi)
+*/
+/*!
+ * \def DB_SQRT_2PI
+ * \ingroup LMBasicUtilities
+ * \brief sqrt(2*Pi)
+*/
+/*!
+ * \def DB_SQRT2
+ * \ingroup LMBasicUtilities
+ * \brief sqrt(2)
+*/
+/*!
+ * \def DB_SQRT3
+ * \ingroup LMBasicUtilities
+ * \brief sqrt(3)
+*/
+/*!
+ * \def DB_SQRT1_2
+ * \ingroup LMBasicUtilities
+ * \brief sqrt(1/2)
+*/
+#endif /* DB_UTILITIES_CONSTANTS */
+
+
diff --git a/benchmark/feature_stab/db_vlvm/db_utilities_geometry.h b/benchmark/feature_stab/db_vlvm/db_utilities_geometry.h
new file mode 100644
index 00000000..f2155846
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_utilities_geometry.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_geometry.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_UTILITIES_GEOMETRY_H
+#define DB_UTILITIES_GEOMETRY_H
+
+#include "db_utilities.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*! Get the inhomogenous 2D-point centroid of nr_point inhomogenous
+points in X*/
+inline void db_PointCentroid2D(double c[2],const double *X,int nr_points)
+{
+    int i;
+    double cx,cy,m;
+
+    cx=0;cy=0;
+    for(i=0;i<nr_points;i++)
+    {
+        cx+= *X++;
+        cy+= *X++;
+    }
+    if(nr_points)
+    {
+        m=1.0/((double)nr_points);
+        c[0]=cx*m;
+        c[1]=cy*m;
+    }
+    else c[0]=c[1]=0;
+}
+
+inline void db_PointCentroid2D(double c[2],const double * const *X,int nr_points)
+{
+    int i;
+    double cx,cy,m;
+    const double *temp;
+
+    cx=0;cy=0;
+    for(i=0;i<nr_points;i++)
+    {
+        temp= *X++;
+        cx+=temp[0];
+        cy+=temp[1];
+    }
+    if(nr_points)
+    {
+        m=1.0/((double)nr_points);
+        c[0]=cx*m;
+        c[1]=cy*m;
+    }
+    else c[0]=c[1]=0;
+}
+
+/*! Get the inhomogenous 3D-point centroid of nr_point inhomogenous
+points in X*/
+inline void db_PointCentroid3D(double c[3],const double *X,int nr_points)
+{
+    int i;
+    double cx,cy,cz,m;
+
+    cx=0;cy=0;cz=0;
+    for(i=0;i<nr_points;i++)
+    {
+        cx+= *X++;
+        cy+= *X++;
+        cz+= *X++;
+    }
+    if(nr_points)
+    {
+        m=1.0/((double)nr_points);
+        c[0]=cx*m;
+        c[1]=cy*m;
+        c[2]=cz*m;
+    }
+    else c[0]=c[1]=c[2]=0;
+}
+
+inline void db_PointCentroid3D(double c[3],const double * const *X,int nr_points)
+{
+    int i;
+    double cx,cy,cz,m;
+    const double *temp;
+
+    cx=0;cy=0;cz=0;
+    for(i=0;i<nr_points;i++)
+    {
+        temp= *X++;
+        cx+=temp[0];
+        cy+=temp[1];
+        cz+=temp[2];
+    }
+    if(nr_points)
+    {
+        m=1.0/((double)nr_points);
+        c[0]=cx*m;
+        c[1]=cy*m;
+        c[2]=cz*m;
+    }
+    else c[0]=c[1]=c[2]=0;
+}
+
+#endif /* DB_UTILITIES_GEOMETRY_H */
diff --git a/benchmark/feature_stab/db_vlvm/db_utilities_indexing.cpp b/benchmark/feature_stab/db_vlvm/db_utilities_indexing.cpp
new file mode 100644
index 00000000..30ce03aa
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_utilities_indexing.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_indexing.cpp,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */
+
+#include "db_utilities_indexing.h"
+#include "db_utilities.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+void db_Zero(double *d,long nr)
+{
+    long i;
+    for(i=0;i<nr;i++) d[i]=0.0;
+}
+
+/*This routine breaks number in source into values smaller and larger than
+a pivot element. Values equal to the pivot are ignored*/
+void db_LeanPartitionOnPivot(double pivot,double *dest,const double *source,long first,long last,long *first_equal,long *last_equal)
+{
+    double temp;
+    const double *s_point;
+    const double *s_top;
+    double *d_bottom;
+    double *d_top;
+
+    s_point=source+first;
+    s_top=source+last;
+    d_bottom=dest+first;
+    d_top=dest+last;
+
+    for(;s_point<=s_top;)
+    {
+        temp= *(s_point++);
+        if(temp<pivot) *(d_bottom++)=temp;
+        else if(temp>pivot) *(d_top--)=temp;
+    }
+    *first_equal=d_bottom-dest;
+    *last_equal=d_top-dest;
+}
+
+double db_LeanQuickSelect(const double *s,long nr_elements,long pos,double *temp)
+{
+  long first=0;
+  long last=nr_elements-1;
+  double pivot;
+  long first_equal,last_equal;
+  double *tempA;
+  double *tempB;
+  double *tempC;
+  const double *source;
+  double *dest;
+
+  tempA=temp;
+  tempB=temp+nr_elements;
+  source=s;
+  dest=tempA;
+
+  for(;last-first>2;)
+  {
+      pivot=db_TripleMedian(source[first],source[last],source[(first+last)/2]);
+      db_LeanPartitionOnPivot(pivot,dest,source,first,last,&first_equal,&last_equal);
+
+      if(first_equal>pos) last=first_equal-1;
+      else if(last_equal<pos) first=last_equal+1;
+      else
+      {
+        return(pivot);
+      }
+
+      /*Swap pointers*/
+      tempC=tempA;
+      tempA=tempB;
+      tempB=tempC;
+      source=tempB;
+      dest=tempA;
+  }
+  pivot=db_TripleMedian(source[first],source[last],source[(first+last)/2]);
+
+  return(pivot);
+}
+
+float* db_AlignPointer_f(float *p,unsigned long nr_bytes)
+{
+    float *ap;
+    unsigned long m;
+
+    m=((unsigned long)p)%nr_bytes;
+    if(m) ap=(float*) (((unsigned long)p)-m+nr_bytes);
+    else ap=p;
+    return(ap);
+}
+
+short* db_AlignPointer_s(short *p,unsigned long nr_bytes)
+{
+    short *ap;
+    unsigned long m;
+
+    m=((unsigned long)p)%nr_bytes;
+    if(m) ap=(short*) (((unsigned long)p)-m+nr_bytes);
+    else ap=p;
+    return(ap);
+}
diff --git a/benchmark/feature_stab/db_vlvm/db_utilities_indexing.h b/benchmark/feature_stab/db_vlvm/db_utilities_indexing.h
new file mode 100644
index 00000000..01eeb9ea
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_utilities_indexing.h
@@ -0,0 +1,270 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_indexing.h,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_UTILITIES_INDEXING
+#define DB_UTILITIES_INDEXING
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+#include "db_utilities.h"
+
+/*!
+ * \defgroup LMIndexing (LM) Indexing Utilities (Order Statistics, Matrix Operations)
+ */
+/*\{*/
+
+inline void db_SetupMatrixRefs(double **ar,long rows,long cols,double *a)
+{
+    long i;
+    for(i=0;i<rows;i++) ar[i]=&a[i*cols];
+}
+
+inline void db_SymmetricExtendUpperToLower(double **A,int rows,int cols)
+{
+    int i,j;
+    for(i=1;i<rows;i++) for(j=0;j<i;j++) A[i][j]=A[j][i];
+}
+
+void inline db_MultiplyMatrixVectorAtb(double *c,const double * const *At,const double *b,int arows,int acols)
+{
+    int i,j;
+    double acc;
+
+    for(i=0;i<arows;i++)
+    {
+        acc=0;
+        for(j=0;j<acols;j++) acc+=At[j][i]*b[j];
+        c[i]=acc;
+    }
+}
+
+inline void db_MultiplyMatricesAB(double **C,const double * const *A,const double * const *B,int arows,int acols,int bcols)
+{
+    int i,j,k;
+    double acc;
+
+    for(i=0;i<arows;i++) for(j=0;j<bcols;j++)
+    {
+        acc=0;
+        for(k=0;k<acols;k++) acc+=A[i][k]*B[k][j];
+        C[i][j]=acc;
+    }
+}
+
+inline void db_UpperMultiplyMatricesAtB(double **Cu,const double * const *At,const double * const *B,int arows,int acols,int bcols)
+{
+    int i,j,k;
+    double acc;
+
+    for(i=0;i<arows;i++) for(j=i;j<bcols;j++)
+    {
+        acc=0;
+        for(k=0;k<acols;k++) acc+=At[k][i]*B[k][j];
+        Cu[i][j]=acc;
+    }
+}
+
+DB_API void db_Zero(double *d,long nr);
+
+inline int db_MaxIndex2(double s[2])
+{
+    if(s[0]>=s[1]) return(0);
+    return(1);
+}
+
+inline int db_MaxIndex3(const double s[3])
+{
+    double best;
+    int pos;
+
+    best=s[0];pos=0;
+    if(s[1]>best){best=s[1];pos=1;}
+    if(s[2]>best){best=s[2];pos=2;}
+    return(pos);
+}
+
+inline int db_MaxIndex4(const double s[4])
+{
+    double best;
+    int pos;
+
+    best=s[0];pos=0;
+    if(s[1]>best){best=s[1];pos=1;}
+    if(s[2]>best){best=s[2];pos=2;}
+    if(s[3]>best){best=s[3];pos=3;}
+    return(pos);
+}
+
+inline int db_MaxIndex5(const double s[5])
+{
+    double best;
+    int pos;
+
+    best=s[0];pos=0;
+    if(s[1]>best){best=s[1];pos=1;}
+    if(s[2]>best){best=s[2];pos=2;}
+    if(s[3]>best){best=s[3];pos=3;}
+    if(s[4]>best){best=s[4];pos=4;}
+    return(pos);
+}
+
+inline int db_MaxIndex6(const double s[6])
+{
+    double best;
+    int pos;
+
+    best=s[0];pos=0;
+    if(s[1]>best){best=s[1];pos=1;}
+    if(s[2]>best){best=s[2];pos=2;}
+    if(s[3]>best){best=s[3];pos=3;}
+    if(s[4]>best){best=s[4];pos=4;}
+    if(s[5]>best){best=s[5];pos=5;}
+    return(pos);
+}
+
+inline int db_MaxIndex7(const double s[7])
+{
+    double best;
+    int pos;
+
+    best=s[0];pos=0;
+    if(s[1]>best){best=s[1];pos=1;}
+    if(s[2]>best){best=s[2];pos=2;}
+    if(s[3]>best){best=s[3];pos=3;}
+    if(s[4]>best){best=s[4];pos=4;}
+    if(s[5]>best){best=s[5];pos=5;}
+    if(s[6]>best){best=s[6];pos=6;}
+    return(pos);
+}
+
+inline int db_MinIndex7(const double s[7])
+{
+    double best;
+    int pos;
+
+    best=s[0];pos=0;
+    if(s[1]<best){best=s[1];pos=1;}
+    if(s[2]<best){best=s[2];pos=2;}
+    if(s[3]<best){best=s[3];pos=3;}
+    if(s[4]<best){best=s[4];pos=4;}
+    if(s[5]<best){best=s[5];pos=5;}
+    if(s[6]<best){best=s[6];pos=6;}
+    return(pos);
+}
+
+inline int db_MinIndex9(const double s[9])
+{
+    double best;
+    int pos;
+
+    best=s[0];pos=0;
+    if(s[1]<best){best=s[1];pos=1;}
+    if(s[2]<best){best=s[2];pos=2;}
+    if(s[3]<best){best=s[3];pos=3;}
+    if(s[4]<best){best=s[4];pos=4;}
+    if(s[5]<best){best=s[5];pos=5;}
+    if(s[6]<best){best=s[6];pos=6;}
+    if(s[7]<best){best=s[7];pos=7;}
+    if(s[8]<best){best=s[8];pos=8;}
+    return(pos);
+}
+
+inline int db_MaxAbsIndex3(const double *s)
+{
+    double t,best;
+    int pos;
+
+    best=fabs(s[0]);pos=0;
+    t=fabs(s[1]);if(t>best){best=t;pos=1;}
+    t=fabs(s[2]);if(t>best){pos=2;}
+    return(pos);
+}
+
+inline int db_MaxAbsIndex9(const double *s)
+{
+    double t,best;
+    int pos;
+
+    best=fabs(s[0]);pos=0;
+    t=fabs(s[1]);if(t>best){best=t;pos=1;}
+    t=fabs(s[2]);if(t>best){best=t;pos=2;}
+    t=fabs(s[3]);if(t>best){best=t;pos=3;}
+    t=fabs(s[4]);if(t>best){best=t;pos=4;}
+    t=fabs(s[5]);if(t>best){best=t;pos=5;}
+    t=fabs(s[6]);if(t>best){best=t;pos=6;}
+    t=fabs(s[7]);if(t>best){best=t;pos=7;}
+    t=fabs(s[8]);if(t>best){best=t;pos=8;}
+    return(pos);
+}
+
+
+/*!
+Select ordinal pos (zero based) out of nr_elements in s.
+temp should point to alloced memory of at least nr_elements*2
+Optimized runtimes on 450MHz:
+\code
+  30 with   3 microsecs
+ 100 with  11 microsecs
+ 300 with  30 microsecs
+ 500 with  40 microsecs
+1000 with 100 microsecs
+5000 with 540 microsecs
+\endcode
+so the expected runtime is around
+(nr_elements/10) microseconds
+The total quickselect cost of splitting 500 hypotheses recursively
+is thus around 100 microseconds
+
+Does the same operation as std::nth_element().
+*/
+DB_API double db_LeanQuickSelect(const double *s,long nr_elements,long pos,double *temp);
+
+/*!
+ Median of 3 doubles
+ */
+inline double db_TripleMedian(double a,double b,double c)
+{
+    if(a>b)
+    {
+        if(c>a) return(a);
+        else if(c>b) return(c);
+        else return(b);
+    }
+    else
+    {
+        if(c>b) return(b);
+        else if(c>a) return(c);
+        else return(a);
+    }
+}
+
+/*!
+Align float pointer to nr_bytes by moving forward
+*/
+DB_API float* db_AlignPointer_f(float *p,unsigned long nr_bytes);
+
+/*!
+Align short pointer to nr_bytes by moving forward
+*/
+DB_API short* db_AlignPointer_s(short *p,unsigned long nr_bytes);
+
+#endif /* DB_UTILITIES_INDEXING */
diff --git a/benchmark/feature_stab/db_vlvm/db_utilities_linalg.cpp b/benchmark/feature_stab/db_vlvm/db_utilities_linalg.cpp
new file mode 100644
index 00000000..8f68b303
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_utilities_linalg.cpp
@@ -0,0 +1,376 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_linalg.cpp,v 1.3 2011/06/17 14:03:31 mbansal Exp $ */
+
+#include "db_utilities_linalg.h"
+#include "db_utilities.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+/*Cholesky-factorize symmetric positive definite 6 x 6 matrix A. Upper
+part of A is used from the input. The Cholesky factor is output as
+subdiagonal part in A and diagonal in d, which is 6-dimensional*/
+void db_CholeskyDecomp6x6(double A[36],double d[6])
+{
+    double s,temp;
+
+    /*[50 mult 35 add 6sqrt=85flops 6func]*/
+    /*i=0*/
+    s=A[0];
+    d[0]=((s>0.0)?sqrt(s):1.0);
+    temp=db_SafeReciprocal(d[0]);
+    A[6]=A[1]*temp;
+    A[12]=A[2]*temp;
+    A[18]=A[3]*temp;
+    A[24]=A[4]*temp;
+    A[30]=A[5]*temp;
+    /*i=1*/
+    s=A[7]-A[6]*A[6];
+    d[1]=((s>0.0)?sqrt(s):1.0);
+    temp=db_SafeReciprocal(d[1]);
+    A[13]=(A[8]-A[6]*A[12])*temp;
+    A[19]=(A[9]-A[6]*A[18])*temp;
+    A[25]=(A[10]-A[6]*A[24])*temp;
+    A[31]=(A[11]-A[6]*A[30])*temp;
+    /*i=2*/
+    s=A[14]-A[12]*A[12]-A[13]*A[13];
+    d[2]=((s>0.0)?sqrt(s):1.0);
+    temp=db_SafeReciprocal(d[2]);
+    A[20]=(A[15]-A[12]*A[18]-A[13]*A[19])*temp;
+    A[26]=(A[16]-A[12]*A[24]-A[13]*A[25])*temp;
+    A[32]=(A[17]-A[12]*A[30]-A[13]*A[31])*temp;
+    /*i=3*/
+    s=A[21]-A[18]*A[18]-A[19]*A[19]-A[20]*A[20];
+    d[3]=((s>0.0)?sqrt(s):1.0);
+    temp=db_SafeReciprocal(d[3]);
+    A[27]=(A[22]-A[18]*A[24]-A[19]*A[25]-A[20]*A[26])*temp;
+    A[33]=(A[23]-A[18]*A[30]-A[19]*A[31]-A[20]*A[32])*temp;
+    /*i=4*/
+    s=A[28]-A[24]*A[24]-A[25]*A[25]-A[26]*A[26]-A[27]*A[27];
+    d[4]=((s>0.0)?sqrt(s):1.0);
+    temp=db_SafeReciprocal(d[4]);
+    A[34]=(A[29]-A[24]*A[30]-A[25]*A[31]-A[26]*A[32]-A[27]*A[33])*temp;
+    /*i=5*/
+    s=A[35]-A[30]*A[30]-A[31]*A[31]-A[32]*A[32]-A[33]*A[33]-A[34]*A[34];
+    d[5]=((s>0.0)?sqrt(s):1.0);
+}
+
+/*Cholesky-factorize symmetric positive definite n x n matrix A.Part
+above diagonal of A is used from the input, diagonal of A is assumed to
+be stored in d. The Cholesky factor is output as
+subdiagonal part in A and diagonal in d, which is n-dimensional*/
+void db_CholeskyDecompSeparateDiagonal(double **A,double *d,int n)
+{
+    int i,j,k;
+    double s;
+    double temp = 0.0;
+
+    for(i=0;i<n;i++) for(j=i;j<n;j++)
+    {
+        if(i==j) s=d[i];
+        else s=A[i][j];
+        for(k=i-1;k>=0;k--) s-=A[i][k]*A[j][k];
+        if(i==j)
+        {
+            d[i]=((s>0.0)?sqrt(s):1.0);
+            temp=db_SafeReciprocal(d[i]);
+        }
+        else A[j][i]=s*temp;
+    }
+}
+
+/*Backsubstitute L%transpose(L)*x=b for x given the Cholesky decomposition
+of an n x n matrix and the right hand side b. The vector b is unchanged*/
+void db_CholeskyBacksub(double *x,const double * const *A,const double *d,int n,const double *b)
+{
+    int i,k;
+    double s;
+
+    for(i=0;i<n;i++)
+    {
+        for(s=b[i],k=i-1;k>=0;k--) s-=A[i][k]*x[k];
+        x[i]=db_SafeDivision(s,d[i]);
+    }
+    for(i=n-1;i>=0;i--)
+    {
+        for(s=x[i],k=i+1;k<n;k++) s-=A[k][i]*x[k];
+        x[i]=db_SafeDivision(s,d[i]);
+    }
+}
+
+/*Cholesky-factorize symmetric positive definite 3 x 3 matrix A. Part
+above diagonal of A is used from the input, diagonal of A is assumed to
+be stored in d. The Cholesky factor is output as subdiagonal part in A
+and diagonal in d, which is 3-dimensional*/
+void db_CholeskyDecomp3x3SeparateDiagonal(double A[9],double d[3])
+{
+    double s,temp;
+
+    /*i=0*/
+    s=d[0];
+    d[0]=((s>0.0)?sqrt(s):1.0);
+    temp=db_SafeReciprocal(d[0]);
+    A[3]=A[1]*temp;
+    A[6]=A[2]*temp;
+    /*i=1*/
+    s=d[1]-A[3]*A[3];
+    d[1]=((s>0.0)?sqrt(s):1.0);
+    temp=db_SafeReciprocal(d[1]);
+    A[7]=(A[5]-A[3]*A[6])*temp;
+    /*i=2*/
+    s=d[2]-A[6]*A[6]-A[7]*A[7];
+    d[2]=((s>0.0)?sqrt(s):1.0);
+}
+
+/*Backsubstitute L%transpose(L)*x=b for x given the Cholesky decomposition
+of a 3 x 3 matrix and the right hand side b. The vector b is unchanged*/
+void db_CholeskyBacksub3x3(double x[3],const double A[9],const double d[3],const double b[3])
+{
+    /*[42 mult 30 add=72flops]*/
+    x[0]=db_SafeDivision(b[0],d[0]);
+    x[1]=db_SafeDivision((b[1]-A[3]*x[0]),d[1]);
+    x[2]=db_SafeDivision((b[2]-A[6]*x[0]-A[7]*x[1]),d[2]);
+    x[2]=db_SafeDivision(x[2],d[2]);
+    x[1]=db_SafeDivision((x[1]-A[7]*x[2]),d[1]);
+    x[0]=db_SafeDivision((x[0]-A[6]*x[2]-A[3]*x[1]),d[0]);
+}
+
+/*Backsubstitute L%transpose(L)*x=b for x given the Cholesky decomposition
+of a 6 x 6 matrix and the right hand side b. The vector b is unchanged*/
+void db_CholeskyBacksub6x6(double x[6],const double A[36],const double d[6],const double b[6])
+{
+    /*[42 mult 30 add=72flops]*/
+    x[0]=db_SafeDivision(b[0],d[0]);
+    x[1]=db_SafeDivision((b[1]-A[6]*x[0]),d[1]);
+    x[2]=db_SafeDivision((b[2]-A[12]*x[0]-A[13]*x[1]),d[2]);
+    x[3]=db_SafeDivision((b[3]-A[18]*x[0]-A[19]*x[1]-A[20]*x[2]),d[3]);
+    x[4]=db_SafeDivision((b[4]-A[24]*x[0]-A[25]*x[1]-A[26]*x[2]-A[27]*x[3]),d[4]);
+    x[5]=db_SafeDivision((b[5]-A[30]*x[0]-A[31]*x[1]-A[32]*x[2]-A[33]*x[3]-A[34]*x[4]),d[5]);
+    x[5]=db_SafeDivision(x[5],d[5]);
+    x[4]=db_SafeDivision((x[4]-A[34]*x[5]),d[4]);
+    x[3]=db_SafeDivision((x[3]-A[33]*x[5]-A[27]*x[4]),d[3]);
+    x[2]=db_SafeDivision((x[2]-A[32]*x[5]-A[26]*x[4]-A[20]*x[3]),d[2]);
+    x[1]=db_SafeDivision((x[1]-A[31]*x[5]-A[25]*x[4]-A[19]*x[3]-A[13]*x[2]),d[1]);
+    x[0]=db_SafeDivision((x[0]-A[30]*x[5]-A[24]*x[4]-A[18]*x[3]-A[12]*x[2]-A[6]*x[1]),d[0]);
+}
+
+
+void db_Orthogonalize6x7(double A[42],int orthonormalize)
+{
+    int i;
+    double ss[6];
+
+    /*Compute square sums of rows*/
+    ss[0]=db_SquareSum7(A);
+    ss[1]=db_SquareSum7(A+7);
+    ss[2]=db_SquareSum7(A+14);
+    ss[3]=db_SquareSum7(A+21);
+    ss[4]=db_SquareSum7(A+28);
+    ss[5]=db_SquareSum7(A+35);
+
+    ss[1]-=db_OrthogonalizePair7(A+7 ,A,ss[0]);
+    ss[2]-=db_OrthogonalizePair7(A+14,A,ss[0]);
+    ss[3]-=db_OrthogonalizePair7(A+21,A,ss[0]);
+    ss[4]-=db_OrthogonalizePair7(A+28,A,ss[0]);
+    ss[5]-=db_OrthogonalizePair7(A+35,A,ss[0]);
+
+    /*Pivot on largest ss (could also be done on ss/(original_ss))*/
+    i=db_MaxIndex5(ss+1);
+    db_OrthogonalizationSwap7(A+7,i,ss+1);
+
+    ss[2]-=db_OrthogonalizePair7(A+14,A+7,ss[1]);
+    ss[3]-=db_OrthogonalizePair7(A+21,A+7,ss[1]);
+    ss[4]-=db_OrthogonalizePair7(A+28,A+7,ss[1]);
+    ss[5]-=db_OrthogonalizePair7(A+35,A+7,ss[1]);
+
+    i=db_MaxIndex4(ss+2);
+    db_OrthogonalizationSwap7(A+14,i,ss+2);
+
+    ss[3]-=db_OrthogonalizePair7(A+21,A+14,ss[2]);
+    ss[4]-=db_OrthogonalizePair7(A+28,A+14,ss[2]);
+    ss[5]-=db_OrthogonalizePair7(A+35,A+14,ss[2]);
+
+    i=db_MaxIndex3(ss+3);
+    db_OrthogonalizationSwap7(A+21,i,ss+3);
+
+    ss[4]-=db_OrthogonalizePair7(A+28,A+21,ss[3]);
+    ss[5]-=db_OrthogonalizePair7(A+35,A+21,ss[3]);
+
+    i=db_MaxIndex2(ss+4);
+    db_OrthogonalizationSwap7(A+28,i,ss+4);
+
+    ss[5]-=db_OrthogonalizePair7(A+35,A+28,ss[4]);
+
+    if(orthonormalize)
+    {
+        db_MultiplyScalar7(A   ,db_SafeSqrtReciprocal(ss[0]));
+        db_MultiplyScalar7(A+7 ,db_SafeSqrtReciprocal(ss[1]));
+        db_MultiplyScalar7(A+14,db_SafeSqrtReciprocal(ss[2]));
+        db_MultiplyScalar7(A+21,db_SafeSqrtReciprocal(ss[3]));
+        db_MultiplyScalar7(A+28,db_SafeSqrtReciprocal(ss[4]));
+        db_MultiplyScalar7(A+35,db_SafeSqrtReciprocal(ss[5]));
+    }
+}
+
+void db_Orthogonalize8x9(double A[72],int orthonormalize)
+{
+    int i;
+    double ss[8];
+
+    /*Compute square sums of rows*/
+    ss[0]=db_SquareSum9(A);
+    ss[1]=db_SquareSum9(A+9);
+    ss[2]=db_SquareSum9(A+18);
+    ss[3]=db_SquareSum9(A+27);
+    ss[4]=db_SquareSum9(A+36);
+    ss[5]=db_SquareSum9(A+45);
+    ss[6]=db_SquareSum9(A+54);
+    ss[7]=db_SquareSum9(A+63);
+
+    ss[1]-=db_OrthogonalizePair9(A+9 ,A,ss[0]);
+    ss[2]-=db_OrthogonalizePair9(A+18,A,ss[0]);
+    ss[3]-=db_OrthogonalizePair9(A+27,A,ss[0]);
+    ss[4]-=db_OrthogonalizePair9(A+36,A,ss[0]);
+    ss[5]-=db_OrthogonalizePair9(A+45,A,ss[0]);
+    ss[6]-=db_OrthogonalizePair9(A+54,A,ss[0]);
+    ss[7]-=db_OrthogonalizePair9(A+63,A,ss[0]);
+
+    /*Pivot on largest ss (could also be done on ss/(original_ss))*/
+    i=db_MaxIndex7(ss+1);
+    db_OrthogonalizationSwap9(A+9,i,ss+1);
+
+    ss[2]-=db_OrthogonalizePair9(A+18,A+9,ss[1]);
+    ss[3]-=db_OrthogonalizePair9(A+27,A+9,ss[1]);
+    ss[4]-=db_OrthogonalizePair9(A+36,A+9,ss[1]);
+    ss[5]-=db_OrthogonalizePair9(A+45,A+9,ss[1]);
+    ss[6]-=db_OrthogonalizePair9(A+54,A+9,ss[1]);
+    ss[7]-=db_OrthogonalizePair9(A+63,A+9,ss[1]);
+
+    i=db_MaxIndex6(ss+2);
+    db_OrthogonalizationSwap9(A+18,i,ss+2);
+
+    ss[3]-=db_OrthogonalizePair9(A+27,A+18,ss[2]);
+    ss[4]-=db_OrthogonalizePair9(A+36,A+18,ss[2]);
+    ss[5]-=db_OrthogonalizePair9(A+45,A+18,ss[2]);
+    ss[6]-=db_OrthogonalizePair9(A+54,A+18,ss[2]);
+    ss[7]-=db_OrthogonalizePair9(A+63,A+18,ss[2]);
+
+    i=db_MaxIndex5(ss+3);
+    db_OrthogonalizationSwap9(A+27,i,ss+3);
+
+    ss[4]-=db_OrthogonalizePair9(A+36,A+27,ss[3]);
+    ss[5]-=db_OrthogonalizePair9(A+45,A+27,ss[3]);
+    ss[6]-=db_OrthogonalizePair9(A+54,A+27,ss[3]);
+    ss[7]-=db_OrthogonalizePair9(A+63,A+27,ss[3]);
+
+    i=db_MaxIndex4(ss+4);
+    db_OrthogonalizationSwap9(A+36,i,ss+4);
+
+    ss[5]-=db_OrthogonalizePair9(A+45,A+36,ss[4]);
+    ss[6]-=db_OrthogonalizePair9(A+54,A+36,ss[4]);
+    ss[7]-=db_OrthogonalizePair9(A+63,A+36,ss[4]);
+
+    i=db_MaxIndex3(ss+5);
+    db_OrthogonalizationSwap9(A+45,i,ss+5);
+
+    ss[6]-=db_OrthogonalizePair9(A+54,A+45,ss[5]);
+    ss[7]-=db_OrthogonalizePair9(A+63,A+45,ss[5]);
+
+    i=db_MaxIndex2(ss+6);
+    db_OrthogonalizationSwap9(A+54,i,ss+6);
+
+    ss[7]-=db_OrthogonalizePair9(A+63,A+54,ss[6]);
+
+    if(orthonormalize)
+    {
+        db_MultiplyScalar9(A   ,db_SafeSqrtReciprocal(ss[0]));
+        db_MultiplyScalar9(A+9 ,db_SafeSqrtReciprocal(ss[1]));
+        db_MultiplyScalar9(A+18,db_SafeSqrtReciprocal(ss[2]));
+        db_MultiplyScalar9(A+27,db_SafeSqrtReciprocal(ss[3]));
+        db_MultiplyScalar9(A+36,db_SafeSqrtReciprocal(ss[4]));
+        db_MultiplyScalar9(A+45,db_SafeSqrtReciprocal(ss[5]));
+        db_MultiplyScalar9(A+54,db_SafeSqrtReciprocal(ss[6]));
+        db_MultiplyScalar9(A+63,db_SafeSqrtReciprocal(ss[7]));
+    }
+}
+
+void db_NullVectorOrthonormal6x7(double x[7],const double A[42])
+{
+    int i;
+    double omss[7];
+    const double *B;
+
+    /*Pivot by choosing row of the identity matrix
+    (the one corresponding to column of A with smallest square sum)*/
+    omss[0]=db_SquareSum6Stride7(A);
+    omss[1]=db_SquareSum6Stride7(A+1);
+    omss[2]=db_SquareSum6Stride7(A+2);
+    omss[3]=db_SquareSum6Stride7(A+3);
+    omss[4]=db_SquareSum6Stride7(A+4);
+    omss[5]=db_SquareSum6Stride7(A+5);
+    omss[6]=db_SquareSum6Stride7(A+6);
+    i=db_MinIndex7(omss);
+    /*orthogonalize that row against all previous rows
+    and normalize it*/
+    B=A+i;
+    db_MultiplyScalarCopy7(x,A,-B[0]);
+    db_RowOperation7(x,A+7 ,B[7]);
+    db_RowOperation7(x,A+14,B[14]);
+    db_RowOperation7(x,A+21,B[21]);
+    db_RowOperation7(x,A+28,B[28]);
+    db_RowOperation7(x,A+35,B[35]);
+    x[i]+=1.0;
+    db_MultiplyScalar7(x,db_SafeSqrtReciprocal(1.0-omss[i]));
+}
+
+void db_NullVectorOrthonormal8x9(double x[9],const double A[72])
+{
+    int i;
+    double omss[9];
+    const double *B;
+
+    /*Pivot by choosing row of the identity matrix
+    (the one corresponding to column of A with smallest square sum)*/
+    omss[0]=db_SquareSum8Stride9(A);
+    omss[1]=db_SquareSum8Stride9(A+1);
+    omss[2]=db_SquareSum8Stride9(A+2);
+    omss[3]=db_SquareSum8Stride9(A+3);
+    omss[4]=db_SquareSum8Stride9(A+4);
+    omss[5]=db_SquareSum8Stride9(A+5);
+    omss[6]=db_SquareSum8Stride9(A+6);
+    omss[7]=db_SquareSum8Stride9(A+7);
+    omss[8]=db_SquareSum8Stride9(A+8);
+    i=db_MinIndex9(omss);
+    /*orthogonalize that row against all previous rows
+    and normalize it*/
+    B=A+i;
+    db_MultiplyScalarCopy9(x,A,-B[0]);
+    db_RowOperation9(x,A+9 ,B[9]);
+    db_RowOperation9(x,A+18,B[18]);
+    db_RowOperation9(x,A+27,B[27]);
+    db_RowOperation9(x,A+36,B[36]);
+    db_RowOperation9(x,A+45,B[45]);
+    db_RowOperation9(x,A+54,B[54]);
+    db_RowOperation9(x,A+63,B[63]);
+    x[i]+=1.0;
+    db_MultiplyScalar9(x,db_SafeSqrtReciprocal(1.0-omss[i]));
+}
+
diff --git a/benchmark/feature_stab/db_vlvm/db_utilities_linalg.h b/benchmark/feature_stab/db_vlvm/db_utilities_linalg.h
new file mode 100644
index 00000000..1f63d4e5
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_utilities_linalg.h
@@ -0,0 +1,802 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_linalg.h,v 1.5 2011/06/17 14:03:31 mbansal Exp $ */
+
+#ifndef DB_UTILITIES_LINALG
+#define DB_UTILITIES_LINALG
+
+#include "db_utilities.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup LMLinAlg (LM) Linear Algebra Utilities (QR factorization, orthogonal basis, etc.)
+ */
+
+/*!
+ \ingroup LMBasicUtilities
+ */
+inline void db_MultiplyScalar6(double A[6],double mult)
+{
+    (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult;
+    (*A++) *= mult;
+}
+/*!
+ \ingroup LMBasicUtilities
+ */
+inline void db_MultiplyScalar7(double A[7],double mult)
+{
+    (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult;
+    (*A++) *= mult; (*A++) *= mult;
+}
+/*!
+ \ingroup LMBasicUtilities
+ */
+inline void db_MultiplyScalar9(double A[9],double mult)
+{
+    (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult;
+    (*A++) *= mult; (*A++) *= mult; (*A++) *= mult; (*A++) *= mult;
+}
+
+/*!
+ \ingroup LMBasicUtilities
+ */
+inline double db_SquareSum6Stride7(const double *x)
+{
+    return(db_sqr(x[0])+db_sqr(x[7])+db_sqr(x[14])+
+        db_sqr(x[21])+db_sqr(x[28])+db_sqr(x[35]));
+}
+
+/*!
+ \ingroup LMBasicUtilities
+ */
+inline double db_SquareSum8Stride9(const double *x)
+{
+    return(db_sqr(x[0])+db_sqr(x[9])+db_sqr(x[18])+
+        db_sqr(x[27])+db_sqr(x[36])+db_sqr(x[45])+
+        db_sqr(x[54])+db_sqr(x[63]));
+}
+
+/*!
+ \ingroup LMLinAlg
+ Cholesky-factorize symmetric positive definite 6 x 6 matrix A. Upper
+part of A is used from the input. The Cholesky factor is output as
+subdiagonal part in A and diagonal in d, which is 6-dimensional
+1.9 microseconds on 450MHz*/
+DB_API void db_CholeskyDecomp6x6(double A[36],double d[6]);
+
+/*!
+ \ingroup LMLinAlg
+ Backsubstitute L%transpose(L)*x=b for x given the Cholesky decomposition
+of a 6 x 6 matrix and the right hand side b. The vector b is unchanged
+1.3 microseconds on 450MHz*/
+DB_API void db_CholeskyBacksub6x6(double x[6],const double A[36],const double d[6],const double b[6]);
+
+/*!
+ \ingroup LMLinAlg
+ Cholesky-factorize symmetric positive definite n x n matrix A.Part
+above diagonal of A is used from the input, diagonal of A is assumed to
+be stored in d. The Cholesky factor is output as
+subdiagonal part in A and diagonal in d, which is n-dimensional*/
+DB_API void db_CholeskyDecompSeparateDiagonal(double **A,double *d,int n);
+
+/*!
+ \ingroup LMLinAlg
+ Backsubstitute L%transpose(L)*x=b for x given the Cholesky decomposition
+of an n x n matrix and the right hand side b. The vector b is unchanged*/
+DB_API void db_CholeskyBacksub(double *x,const double * const *A,const double *d,int n,const double *b);
+
+/*!
+ \ingroup LMLinAlg
+ Cholesky-factorize symmetric positive definite 3 x 3 matrix A. Part
+above diagonal of A is used from the input, diagonal of A is assumed to
+be stored in d. The Cholesky factor is output as subdiagonal part in A
+and diagonal in d, which is 3-dimensional*/
+DB_API void db_CholeskyDecomp3x3SeparateDiagonal(double A[9],double d[3]);
+
+/*!
+ \ingroup LMLinAlg
+ Backsubstitute L%transpose(L)*x=b for x given the Cholesky decomposition
+of a 3 x 3 matrix and the right hand side b. The vector b is unchanged*/
+DB_API void db_CholeskyBacksub3x3(double x[3],const double A[9],const double d[3],const double b[3]);
+
+/*!
+ \ingroup LMLinAlg
+ perform A-=B*mult*/
+inline void db_RowOperation3(double A[3],const double B[3],double mult)
+{
+    *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++);
+}
+
+/*!
+ \ingroup LMLinAlg
+ */
+inline void db_RowOperation7(double A[7],const double B[7],double mult)
+{
+    *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++);
+    *A++ -= mult*(*B++); *A++ -= mult*(*B++);
+}
+
+/*!
+ \ingroup LMLinAlg
+ */
+inline void db_RowOperation9(double A[9],const double B[9],double mult)
+{
+    *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++);
+    *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++); *A++ -= mult*(*B++);
+}
+
+/*!
+ \ingroup LMBasicUtilities
+ Swap values of A[7] and B[7]
+ */
+inline void db_Swap7(double A[7],double B[7])
+{
+    double temp;
+    temp= *A; *A++ = *B; *B++ =temp;    temp= *A; *A++ = *B; *B++ =temp;    temp= *A; *A++ = *B; *B++ =temp;
+    temp= *A; *A++ = *B; *B++ =temp;    temp= *A; *A++ = *B; *B++ =temp;    temp= *A; *A++ = *B; *B++ =temp;
+    temp= *A; *A++ = *B; *B++ =temp;
+}
+
+/*!
+ \ingroup LMBasicUtilities
+ Swap values of A[9] and B[9]
+ */
+inline void db_Swap9(double A[9],double B[9])
+{
+    double temp;
+    temp= *A; *A++ = *B; *B++ =temp;    temp= *A; *A++ = *B; *B++ =temp;    temp= *A; *A++ = *B; *B++ =temp;
+    temp= *A; *A++ = *B; *B++ =temp;    temp= *A; *A++ = *B; *B++ =temp;    temp= *A; *A++ = *B; *B++ =temp;
+    temp= *A; *A++ = *B; *B++ =temp;    temp= *A; *A++ = *B; *B++ =temp;    temp= *A; *A++ = *B; *B++ =temp;
+}
+
+
+/*!
+ \ingroup LMLinAlg
+ */
+DB_API void db_Orthogonalize6x7(double A[42],int orthonormalize=0);
+
+/*!
+ \ingroup LMLinAlg
+ */
+DB_API void db_Orthogonalize8x9(double A[72],int orthonormalize=0);
+
+/*!
+ \ingroup LMLinAlg
+ */
+inline double db_OrthogonalizePair7(double *x,const double *v,double ssv)
+{
+    double m,sp,sp_m;
+
+    m=db_SafeReciprocal(ssv);
+    sp=db_ScalarProduct7(x,v);
+    sp_m=sp*m;
+    db_RowOperation7(x,v,sp_m);
+    return(sp*sp_m);
+}
+
+/*!
+ \ingroup LMLinAlg
+ */
+inline double db_OrthogonalizePair9(double *x,const double *v,double ssv)
+{
+    double m,sp,sp_m;
+
+    m=db_SafeReciprocal(ssv);
+    sp=db_ScalarProduct9(x,v);
+    sp_m=sp*m;
+    db_RowOperation9(x,v,sp_m);
+    return(sp*sp_m);
+}
+
+/*!
+ \ingroup LMLinAlg
+ */
+inline void db_OrthogonalizationSwap7(double *A,int i,double *ss)
+{
+    double temp;
+
+    db_Swap7(A,A+7*i);
+    temp=ss[0]; ss[0]=ss[i]; ss[i]=temp;
+}
+/*!
+ \ingroup LMLinAlg
+ */
+inline void db_OrthogonalizationSwap9(double *A,int i,double *ss)
+{
+    double temp;
+
+    db_Swap9(A,A+9*i);
+    temp=ss[0]; ss[0]=ss[i]; ss[i]=temp;
+}
+
+/*!
+ \ingroup LMLinAlg
+ */
+DB_API void db_NullVectorOrthonormal6x7(double x[7],const double A[42]);
+/*!
+ \ingroup LMLinAlg
+ */
+DB_API void db_NullVectorOrthonormal8x9(double x[9],const double A[72]);
+
+/*!
+ \ingroup LMLinAlg
+ */
+inline void db_NullVector6x7Destructive(double x[7],double A[42])
+{
+    db_Orthogonalize6x7(A,1);
+    db_NullVectorOrthonormal6x7(x,A);
+}
+
+/*!
+ \ingroup LMLinAlg
+ */
+inline void db_NullVector8x9Destructive(double x[9],double A[72])
+{
+    db_Orthogonalize8x9(A,1);
+    db_NullVectorOrthonormal8x9(x,A);
+}
+
+inline int db_ScalarProduct512_s(const short *f,const short *g)
+{
+#ifndef DB_USE_MMX
+    int back;
+    back=0;
+    for(int i=1; i<=512; i++)
+        back+=(*f++)*(*g++);
+
+    return(back);
+#endif
+}
+
+
+inline int db_ScalarProduct32_s(const short *f,const short *g)
+{
+#ifndef DB_USE_MMX
+    int back;
+    back=0;
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+
+    return(back);
+#endif
+}
+
+/*!
+ \ingroup LMLinAlg
+ Scalar product of 128-vectors (short)
+  Compile-time control: MMX, SSE2 or standard C
+ */
+inline int db_ScalarProduct128_s(const short *f,const short *g)
+{
+#ifndef DB_USE_MMX
+    int back;
+    back=0;
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+
+    return(back);
+#else
+#ifdef DB_USE_SSE2
+    int back;
+
+    _asm
+    {
+        mov eax,f
+        mov ecx,g
+        /*First iteration************************************/
+        movdqa     xmm0,[eax]
+         pxor    xmm7,xmm7      /*Set xmm7 to zero*/
+        pmaddwd  xmm0,[ecx]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movdqa     xmm2,[eax+16]
+         paddd   xmm7,xmm0
+        pmaddwd  xmm2,[ecx+16]
+         /*Stall*/
+        movdqa     xmm1,[eax+32]
+         paddd   xmm7,xmm2
+        pmaddwd  xmm1,[ecx+32]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movdqa     xmm0,[eax+48]
+         paddd   xmm7,xmm1
+        pmaddwd  xmm0,[ecx+48]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movdqa     xmm2,[eax+64]
+         paddd   xmm7,xmm0
+        pmaddwd  xmm2,[ecx+64]
+         /*Stall*/
+        movdqa     xmm1,[eax+80]
+         paddd   xmm7,xmm2
+        pmaddwd  xmm1,[ecx+80]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movdqa     xmm0,[eax+96]
+         paddd   xmm7,xmm1
+        pmaddwd  xmm0,[ecx+96]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movdqa     xmm2,[eax+112]
+         paddd   xmm7,xmm0
+        pmaddwd  xmm2,[ecx+112]
+         /*Stall*/
+        movdqa     xmm1,[eax+128]
+         paddd   xmm7,xmm2
+        pmaddwd  xmm1,[ecx+128]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movdqa     xmm0,[eax+144]
+         paddd   xmm7,xmm1
+        pmaddwd  xmm0,[ecx+144]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movdqa     xmm2,[eax+160]
+         paddd   xmm7,xmm0
+        pmaddwd  xmm2,[ecx+160]
+         /*Stall*/
+        movdqa     xmm1,[eax+176]
+         paddd   xmm7,xmm2
+        pmaddwd  xmm1,[ecx+176]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movdqa     xmm0,[eax+192]
+         paddd   xmm7,xmm1
+        pmaddwd  xmm0,[ecx+192]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movdqa     xmm2,[eax+208]
+         paddd   xmm7,xmm0
+        pmaddwd  xmm2,[ecx+208]
+         /*Stall*/
+        movdqa     xmm1,[eax+224]
+         paddd   xmm7,xmm2
+        pmaddwd  xmm1,[ecx+224]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movdqa     xmm0,[eax+240]
+         paddd   xmm7,xmm1
+        pmaddwd  xmm0,[ecx+240]
+         /*Stall*/
+        /*Rest iteration************************************/
+        paddd    xmm7,xmm0
+
+        /* add up the sum squares */
+        movhlps     xmm0,xmm7   /* high half to low half */
+        paddd       xmm7,xmm0   /* add high to low */
+        pshuflw     xmm0,xmm7, 0xE /* reshuffle */
+        paddd       xmm7,xmm0   /* add remaining */
+        movd        back,xmm7
+
+        emms
+    }
+
+    return(back);
+#else
+    int back;
+
+    _asm
+    {
+        mov eax,f
+        mov ecx,g
+        /*First iteration************************************/
+        movq     mm0,[eax]
+         pxor    mm7,mm7      /*Set mm7 to zero*/
+        pmaddwd  mm0,[ecx]
+         /*Stall*/
+        movq     mm1,[eax+8]
+         /*Stall*/
+        pmaddwd  mm1,[ecx+8]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movq     mm2,[eax+16]
+         paddd   mm7,mm0
+        pmaddwd  mm2,[ecx+16]
+         /*Stall*/
+        movq     mm0,[eax+24]
+         paddd   mm7,mm1
+        pmaddwd  mm0,[ecx+24]
+         /*Stall*/
+        movq     mm1,[eax+32]
+         paddd   mm7,mm2
+        pmaddwd  mm1,[ecx+32]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movq     mm2,[eax+40]
+         paddd   mm7,mm0
+        pmaddwd  mm2,[ecx+40]
+         /*Stall*/
+        movq     mm0,[eax+48]
+         paddd   mm7,mm1
+        pmaddwd  mm0,[ecx+48]
+         /*Stall*/
+        movq     mm1,[eax+56]
+         paddd   mm7,mm2
+        pmaddwd  mm1,[ecx+56]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movq     mm2,[eax+64]
+         paddd   mm7,mm0
+        pmaddwd  mm2,[ecx+64]
+         /*Stall*/
+        movq     mm0,[eax+72]
+         paddd   mm7,mm1
+        pmaddwd  mm0,[ecx+72]
+         /*Stall*/
+        movq     mm1,[eax+80]
+         paddd   mm7,mm2
+        pmaddwd  mm1,[ecx+80]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movq     mm2,[eax+88]
+         paddd   mm7,mm0
+        pmaddwd  mm2,[ecx+88]
+         /*Stall*/
+        movq     mm0,[eax+96]
+         paddd   mm7,mm1
+        pmaddwd  mm0,[ecx+96]
+         /*Stall*/
+        movq     mm1,[eax+104]
+         paddd   mm7,mm2
+        pmaddwd  mm1,[ecx+104]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movq     mm2,[eax+112]
+         paddd   mm7,mm0
+        pmaddwd  mm2,[ecx+112]
+         /*Stall*/
+        movq     mm0,[eax+120]
+         paddd   mm7,mm1
+        pmaddwd  mm0,[ecx+120]
+         /*Stall*/
+        movq     mm1,[eax+128]
+         paddd   mm7,mm2
+        pmaddwd  mm1,[ecx+128]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movq     mm2,[eax+136]
+         paddd   mm7,mm0
+        pmaddwd  mm2,[ecx+136]
+         /*Stall*/
+        movq     mm0,[eax+144]
+         paddd   mm7,mm1
+        pmaddwd  mm0,[ecx+144]
+         /*Stall*/
+        movq     mm1,[eax+152]
+         paddd   mm7,mm2
+        pmaddwd  mm1,[ecx+152]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movq     mm2,[eax+160]
+         paddd   mm7,mm0
+        pmaddwd  mm2,[ecx+160]
+         /*Stall*/
+        movq     mm0,[eax+168]
+         paddd   mm7,mm1
+        pmaddwd  mm0,[ecx+168]
+         /*Stall*/
+        movq     mm1,[eax+176]
+         paddd   mm7,mm2
+        pmaddwd  mm1,[ecx+176]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movq     mm2,[eax+184]
+         paddd   mm7,mm0
+        pmaddwd  mm2,[ecx+184]
+         /*Stall*/
+        movq     mm0,[eax+192]
+         paddd   mm7,mm1
+        pmaddwd  mm0,[ecx+192]
+         /*Stall*/
+        movq     mm1,[eax+200]
+         paddd   mm7,mm2
+        pmaddwd  mm1,[ecx+200]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movq     mm2,[eax+208]
+         paddd   mm7,mm0
+        pmaddwd  mm2,[ecx+208]
+         /*Stall*/
+        movq     mm0,[eax+216]
+         paddd   mm7,mm1
+        pmaddwd  mm0,[ecx+216]
+         /*Stall*/
+        movq     mm1,[eax+224]
+         paddd   mm7,mm2
+        pmaddwd  mm1,[ecx+224]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movq     mm2,[eax+232]
+         paddd   mm7,mm0
+        pmaddwd  mm2,[ecx+232]
+         /*Stall*/
+        movq     mm0,[eax+240]
+         paddd   mm7,mm1
+        pmaddwd  mm0,[ecx+240]
+         /*Stall*/
+        movq     mm1,[eax+248]
+         paddd   mm7,mm2
+        pmaddwd  mm1,[ecx+248]
+         /*Stall*/
+        /*Rest iteration************************************/
+        paddd    mm7,mm0
+         /*Stall*/
+        /*Stall*/
+         /*Stall*/
+        paddd    mm7,mm1
+         /*Stall*/
+        movq     mm0,mm7
+         psrlq   mm7,32
+        paddd    mm0,mm7
+         /*Stall*/
+        /*Stall*/
+         /*Stall*/
+        movd     back,mm0
+        emms
+    }
+
+    return(back);
+#endif
+#endif /*DB_USE_MMX*/
+}
+
+/*!
+ \ingroup LMLinAlg
+ Scalar product of 16 byte aligned 128-vectors (float).
+  Compile-time control: SIMD (SSE) or standard C.
+ */
+inline float db_ScalarProduct128Aligned16_f(const float *f,const float *g)
+{
+#ifndef DB_USE_SIMD
+    float back;
+    back=0.0;
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+    back+=(*f++)*(*g++); back+=(*f++)*(*g++); back+=(*f++)*(*g++);
+
+    return(back);
+#else
+    float back;
+
+    _asm
+    {
+        mov eax,f
+        mov ecx,g
+        /*First iteration************************************/
+        movaps     xmm0,[eax]
+         xorps      xmm7,xmm7       /*Set mm7 to zero*/
+        mulps      xmm0,[ecx]
+         /*Stall*/
+        movaps     xmm1,[eax+16]
+         /*Stall*/
+        mulps      xmm1,[ecx+16]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movaps     xmm2,[eax+32]
+         addps      xmm7,xmm0
+        mulps      xmm2,[ecx+32]
+         /*Stall*/
+        movaps     xmm0,[eax+48]
+         addps      xmm7,xmm1
+        mulps      xmm0,[ecx+48]
+         /*Stall*/
+        movaps     xmm1,[eax+64]
+         addps      xmm7,xmm2
+        mulps      xmm1,[ecx+64]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movaps     xmm2,[eax+80]
+         addps      xmm7,xmm0
+        mulps      xmm2,[ecx+80]
+         /*Stall*/
+        movaps     xmm0,[eax+96]
+         addps      xmm7,xmm1
+        mulps      xmm0,[ecx+96]
+         /*Stall*/
+        movaps     xmm1,[eax+112]
+         addps      xmm7,xmm2
+        mulps      xmm1,[ecx+112]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movaps     xmm2,[eax+128]
+         addps      xmm7,xmm0
+        mulps      xmm2,[ecx+128]
+         /*Stall*/
+        movaps     xmm0,[eax+144]
+         addps      xmm7,xmm1
+        mulps      xmm0,[ecx+144]
+         /*Stall*/
+        movaps     xmm1,[eax+160]
+         addps      xmm7,xmm2
+        mulps      xmm1,[ecx+160]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movaps     xmm2,[eax+176]
+         addps      xmm7,xmm0
+        mulps      xmm2,[ecx+176]
+         /*Stall*/
+        movaps     xmm0,[eax+192]
+         addps      xmm7,xmm1
+        mulps      xmm0,[ecx+192]
+         /*Stall*/
+        movaps     xmm1,[eax+208]
+         addps      xmm7,xmm2
+        mulps      xmm1,[ecx+208]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movaps     xmm2,[eax+224]
+         addps      xmm7,xmm0
+        mulps      xmm2,[ecx+224]
+         /*Stall*/
+        movaps     xmm0,[eax+240]
+         addps      xmm7,xmm1
+        mulps      xmm0,[ecx+240]
+         /*Stall*/
+        movaps     xmm1,[eax+256]
+         addps      xmm7,xmm2
+        mulps      xmm1,[ecx+256]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movaps     xmm2,[eax+272]
+         addps      xmm7,xmm0
+        mulps      xmm2,[ecx+272]
+         /*Stall*/
+        movaps     xmm0,[eax+288]
+         addps      xmm7,xmm1
+        mulps      xmm0,[ecx+288]
+         /*Stall*/
+        movaps     xmm1,[eax+304]
+         addps      xmm7,xmm2
+        mulps      xmm1,[ecx+304]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movaps     xmm2,[eax+320]
+         addps      xmm7,xmm0
+        mulps      xmm2,[ecx+320]
+         /*Stall*/
+        movaps     xmm0,[eax+336]
+         addps      xmm7,xmm1
+        mulps      xmm0,[ecx+336]
+         /*Stall*/
+        movaps     xmm1,[eax+352]
+         addps      xmm7,xmm2
+        mulps      xmm1,[ecx+352]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movaps     xmm2,[eax+368]
+         addps      xmm7,xmm0
+        mulps      xmm2,[ecx+368]
+         /*Stall*/
+        movaps     xmm0,[eax+384]
+         addps      xmm7,xmm1
+        mulps      xmm0,[ecx+384]
+         /*Stall*/
+        movaps     xmm1,[eax+400]
+         addps      xmm7,xmm2
+        mulps      xmm1,[ecx+400]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movaps     xmm2,[eax+416]
+         addps      xmm7,xmm0
+        mulps      xmm2,[ecx+416]
+         /*Stall*/
+        movaps     xmm0,[eax+432]
+         addps      xmm7,xmm1
+        mulps      xmm0,[ecx+432]
+         /*Stall*/
+        movaps     xmm1,[eax+448]
+         addps      xmm7,xmm2
+        mulps      xmm1,[ecx+448]
+         /*Stall*/
+        /*Standard iteration************************************/
+        movaps     xmm2,[eax+464]
+         addps      xmm7,xmm0
+        mulps      xmm2,[ecx+464]
+         /*Stall*/
+        movaps     xmm0,[eax+480]
+         addps      xmm7,xmm1
+        mulps      xmm0,[ecx+480]
+         /*Stall*/
+        movaps     xmm1,[eax+496]
+         addps      xmm7,xmm2
+        mulps      xmm1,[ecx+496]
+         /*Stall*/
+        /*Rest iteration************************************/
+        addps      xmm7,xmm0
+         /*Stall*/
+        addps      xmm7,xmm1
+         /*Stall*/
+        movaps xmm6,xmm7
+         /*Stall*/
+        shufps xmm6,xmm6,4Eh
+         /*Stall*/
+        addps  xmm7,xmm6
+         /*Stall*/
+        movaps xmm6,xmm7
+         /*Stall*/
+        shufps xmm6,xmm6,11h
+         /*Stall*/
+        addps  xmm7,xmm6
+         /*Stall*/
+        movss  back,xmm7
+    }
+
+    return(back);
+#endif /*DB_USE_SIMD*/
+}
+
+#endif /* DB_UTILITIES_LINALG */
diff --git a/benchmark/feature_stab/db_vlvm/db_utilities_poly.cpp b/benchmark/feature_stab/db_vlvm/db_utilities_poly.cpp
new file mode 100644
index 00000000..013ac726
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_utilities_poly.cpp
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_poly.cpp,v 1.2 2010/09/03 12:00:10 bsouthall Exp $ */
+
+#include "db_utilities_poly.h"
+#include "db_utilities.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+
+void db_SolveCubic(double *roots,int *nr_roots,double a,double b,double c,double d)
+{
+    double bp,bp2,cp,dp,q,r,srq;
+    double r2_min_q3,theta,bp_through3,theta_through3;
+    double cos_theta_through3,sin_theta_through3,min2_cos_theta_plu,min2_cos_theta_min;
+    double si_r_srq,A;
+
+    /*For nondegenerate cubics with three roots
+    [24 mult 9 add 2sqrt 1acos 1cos=33flops 4func]
+    For nondegenerate cubics with one root
+    [16 mult 6 add 1sqrt 1qbrt=24flops 3func]*/
+
+    if(a==0.0) db_SolveQuadratic(roots,nr_roots,b,c,d);
+    else
+    {
+        bp=b/a;
+        bp2=bp*bp;
+        cp=c/a;
+        dp=d/a;
+
+        q=(bp2-3.0*cp)/9.0;
+        r=(2.0*bp2*bp-9.0*bp*cp+27.0*dp)/54.0;
+        r2_min_q3=r*r-q*q*q;
+        if(r2_min_q3<0.0)
+        {
+            *nr_roots=3;
+            /*q has to be > 0*/
+            srq=sqrt(q);
+            theta=acos(db_maxd(-1.0,db_mind(1.0,r/(q*srq))));
+            bp_through3=bp/3.0;
+            theta_through3=theta/3.0;
+            cos_theta_through3=cos(theta_through3);
+            sin_theta_through3=sqrt(db_maxd(0.0,1.0-cos_theta_through3*cos_theta_through3));
+
+            /*cos(theta_through3+2*pi/3)=cos_theta_through3*cos(2*pi/3)-sin_theta_through3*sin(2*pi/3)
+            = -0.5*cos_theta_through3-sqrt(3)/2.0*sin_theta_through3
+            = -0.5*(cos_theta_through3+sqrt(3)*sin_theta_through3)*/
+            min2_cos_theta_plu=cos_theta_through3+DB_SQRT3*sin_theta_through3;
+            min2_cos_theta_min=cos_theta_through3-DB_SQRT3*sin_theta_through3;
+
+            roots[0]= -2.0*srq*cos_theta_through3-bp_through3;
+            roots[1]=srq*min2_cos_theta_plu-bp_through3;
+            roots[2]=srq*min2_cos_theta_min-bp_through3;
+        }
+        else if(r2_min_q3>0.0)
+        {
+            *nr_roots=1;
+            A= -db_sign(r)*db_CubRoot(db_absd(r)+sqrt(r2_min_q3));
+            bp_through3=bp/3.0;
+            if(A!=0.0) roots[0]=A+q/A-bp_through3;
+            else roots[0]= -bp_through3;
+        }
+        else
+        {
+            *nr_roots=2;
+            bp_through3=bp/3.0;
+            /*q has to be >= 0*/
+            si_r_srq=db_sign(r)*sqrt(q);
+            /*Single root*/
+            roots[0]= -2.0*si_r_srq-bp_through3;
+            /*Double root*/
+            roots[1]=si_r_srq-bp_through3;
+        }
+    }
+}
+
+void db_SolveQuartic(double *roots,int *nr_roots,double a,double b,double c,double d,double e)
+{
+    /*Normalized coefficients*/
+    double c0,c1,c2,c3;
+    /*Temporary coefficients*/
+    double c3through2,c3through4,c3c3through4_min_c2,min4_c0;
+    double lz,ms,ns,mn,m,n,lz_through2;
+    /*Cubic polynomial roots, nr of roots and coefficients*/
+    double c_roots[3];
+    int nr_c_roots;
+    double k0,k1;
+    /*nr additional roots from second quadratic*/
+    int addroots;
+
+    /*For nondegenerate quartics
+    [16mult 11add 2sqrt 1cubic 2quadratic=74flops 8funcs]*/
+
+    if(a==0.0) db_SolveCubic(roots,nr_roots,b,c,d,e);
+    else if(e==0.0)
+    {
+        db_SolveCubic(roots,nr_roots,a,b,c,d);
+        roots[*nr_roots]=0.0;
+        *nr_roots+=1;
+    }
+    else
+    {
+        /*Compute normalized coefficients*/
+        c3=b/a;
+        c2=c/a;
+        c1=d/a;
+        c0=e/a;
+        /*Compute temporary coefficients*/
+        c3through2=c3/2.0;
+        c3through4=c3/4.0;
+        c3c3through4_min_c2=c3*c3through4-c2;
+        min4_c0= -4.0*c0;
+        /*Compute coefficients of cubic*/
+        k0=min4_c0*c3c3through4_min_c2-c1*c1;
+        k1=c1*c3+min4_c0;
+        /*k2= -c2*/
+        /*k3=1.0*/
+
+        /*Solve it for roots*/
+        db_SolveCubic(c_roots,&nr_c_roots,1.0,-c2,k1,k0);
+
+        if(nr_c_roots>0)
+        {
+            lz=c_roots[0];
+            lz_through2=lz/2.0;
+            ms=lz+c3c3through4_min_c2;
+            ns=lz_through2*lz_through2-c0;
+            mn=lz*c3through4-c1/2.0;
+
+            if((ms>=0.0)&&(ns>=0.0))
+            {
+                m=sqrt(ms);
+                n=sqrt(ns)*db_sign(mn);
+
+                db_SolveQuadratic(roots,nr_roots,
+                    1.0,c3through2+m,lz_through2+n);
+
+                db_SolveQuadratic(&roots[*nr_roots],&addroots,
+                    1.0,c3through2-m,lz_through2-n);
+
+                *nr_roots+=addroots;
+            }
+            else *nr_roots=0;
+        }
+        else *nr_roots=0;
+    }
+}
+
+void db_SolveQuarticForced(double *roots,int *nr_roots,double a,double b,double c,double d,double e)
+{
+    /*Normalized coefficients*/
+    double c0,c1,c2,c3;
+    /*Temporary coefficients*/
+    double c3through2,c3through4,c3c3through4_min_c2,min4_c0;
+    double lz,ms,ns,mn,m,n,lz_through2;
+    /*Cubic polynomial roots, nr of roots and coefficients*/
+    double c_roots[3];
+    int nr_c_roots;
+    double k0,k1;
+    /*nr additional roots from second quadratic*/
+    int addroots;
+
+    /*For nondegenerate quartics
+    [16mult 11add 2sqrt 1cubic 2quadratic=74flops 8funcs]*/
+
+    if(a==0.0) db_SolveCubic(roots,nr_roots,b,c,d,e);
+    else if(e==0.0)
+    {
+        db_SolveCubic(roots,nr_roots,a,b,c,d);
+        roots[*nr_roots]=0.0;
+        *nr_roots+=1;
+    }
+    else
+    {
+        /*Compute normalized coefficients*/
+        c3=b/a;
+        c2=c/a;
+        c1=d/a;
+        c0=e/a;
+        /*Compute temporary coefficients*/
+        c3through2=c3/2.0;
+        c3through4=c3/4.0;
+        c3c3through4_min_c2=c3*c3through4-c2;
+        min4_c0= -4.0*c0;
+        /*Compute coefficients of cubic*/
+        k0=min4_c0*c3c3through4_min_c2-c1*c1;
+        k1=c1*c3+min4_c0;
+        /*k2= -c2*/
+        /*k3=1.0*/
+
+        /*Solve it for roots*/
+        db_SolveCubic(c_roots,&nr_c_roots,1.0,-c2,k1,k0);
+
+        if(nr_c_roots>0)
+        {
+            lz=c_roots[0];
+            lz_through2=lz/2.0;
+            ms=lz+c3c3through4_min_c2;
+            ns=lz_through2*lz_through2-c0;
+            mn=lz*c3through4-c1/2.0;
+
+            if(ms<0.0) ms=0.0;
+            if(ns<0.0) ns=0.0;
+
+            m=sqrt(ms);
+            n=sqrt(ns)*db_sign(mn);
+
+            db_SolveQuadratic(roots,nr_roots,
+                1.0,c3through2+m,lz_through2+n);
+
+            db_SolveQuadratic(&roots[*nr_roots],&addroots,
+                1.0,c3through2-m,lz_through2-n);
+
+            *nr_roots+=addroots;
+        }
+        else *nr_roots=0;
+    }
+}
diff --git a/benchmark/feature_stab/db_vlvm/db_utilities_poly.h b/benchmark/feature_stab/db_vlvm/db_utilities_poly.h
new file mode 100644
index 00000000..1f878907
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_utilities_poly.h
@@ -0,0 +1,383 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_poly.h,v 1.2 2010/09/03 12:00:11 bsouthall Exp $ */
+
+#ifndef DB_UTILITIES_POLY
+#define DB_UTILITIES_POLY
+
+#include "db_utilities.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup LMPolynomial (LM) Polynomial utilities (solvers, arithmetic, evaluation, etc.)
+ */
+/*\{*/
+
+/*!
+In debug mode closed form quadratic solving takes on the order of 15 microseconds
+while eig of the companion matrix takes about 1.1 milliseconds
+Speed-optimized code in release mode solves a quadratic in 0.3 microseconds on 450MHz
+*/
+inline void db_SolveQuadratic(double *roots,int *nr_roots,double a,double b,double c)
+{
+    double rs,srs,q;
+
+    /*For non-degenerate quadratics
+    [5 mult 2 add 1 sqrt=7flops 1func]*/
+    if(a==0.0)
+    {
+        if(b==0.0) *nr_roots=0;
+        else
+        {
+            roots[0]= -c/b;
+            *nr_roots=1;
+        }
+    }
+    else
+    {
+        rs=b*b-4.0*a*c;
+        if(rs>=0.0)
+        {
+            *nr_roots=2;
+            srs=sqrt(rs);
+            q= -0.5*(b+db_sign(b)*srs);
+            roots[0]=q/a;
+            /*If b is zero db_sign(b) returns 1,
+            so q is only zero when b=0 and c=0*/
+            if(q==0.0) *nr_roots=1;
+            else roots[1]=c/q;
+        }
+        else *nr_roots=0;
+    }
+}
+
+/*!
+In debug mode closed form cubic solving takes on the order of 45 microseconds
+while eig of the companion matrix takes about 1.3 milliseconds
+Speed-optimized code in release mode solves a cubic in 1.5 microseconds on 450MHz
+For a non-degenerate cubic with two roots, the first root is the single root and
+the second root is the double root
+*/
+DB_API void db_SolveCubic(double *roots,int *nr_roots,double a,double b,double c,double d);
+/*!
+In debug mode closed form quartic solving takes on the order of 0.1 milliseconds
+while eig of the companion matrix takes about 1.5 milliseconds
+Speed-optimized code in release mode solves a quartic in 2.6 microseconds on 450MHz*/
+DB_API void db_SolveQuartic(double *roots,int *nr_roots,double a,double b,double c,double d,double e);
+/*!
+Quartic solving where a solution is forced when splitting into quadratics, which
+can be good if the quartic is sometimes in fact a quadratic, such as in absolute orientation
+when the data is planar*/
+DB_API void db_SolveQuarticForced(double *roots,int *nr_roots,double a,double b,double c,double d,double e);
+
+inline double db_PolyEval1(const double p[2],double x)
+{
+    return(p[0]+x*p[1]);
+}
+
+inline void db_MultiplyPoly1_1(double *d,const double *a,const double *b)
+{
+    double a0,a1;
+    double b0,b1;
+    a0=a[0];a1=a[1];
+    b0=b[0];b1=b[1];
+
+    d[0]=a0*b0;
+    d[1]=a0*b1+a1*b0;
+    d[2]=      a1*b1;
+}
+
+inline void db_MultiplyPoly0_2(double *d,const double *a,const double *b)
+{
+    double a0;
+    double b0,b1,b2;
+    a0=a[0];
+    b0=b[0];b1=b[1];b2=b[2];
+
+    d[0]=a0*b0;
+    d[1]=a0*b1;
+    d[2]=a0*b2;
+}
+
+inline void db_MultiplyPoly1_2(double *d,const double *a,const double *b)
+{
+    double a0,a1;
+    double b0,b1,b2;
+    a0=a[0];a1=a[1];
+    b0=b[0];b1=b[1];b2=b[2];
+
+    d[0]=a0*b0;
+    d[1]=a0*b1+a1*b0;
+    d[2]=a0*b2+a1*b1;
+    d[3]=      a1*b2;
+}
+
+
+inline void db_MultiplyPoly1_3(double *d,const double *a,const double *b)
+{
+    double a0,a1;
+    double b0,b1,b2,b3;
+    a0=a[0];a1=a[1];
+    b0=b[0];b1=b[1];b2=b[2];b3=b[3];
+
+    d[0]=a0*b0;
+    d[1]=a0*b1+a1*b0;
+    d[2]=a0*b2+a1*b1;
+    d[3]=a0*b3+a1*b2;
+    d[4]=      a1*b3;
+}
+/*!
+Multiply d=a*b where a is one degree and b is two degree*/
+inline void db_AddPolyProduct0_1(double *d,const double *a,const double *b)
+{
+    double a0;
+    double b0,b1;
+    a0=a[0];
+    b0=b[0];b1=b[1];
+
+    d[0]+=a0*b0;
+    d[1]+=a0*b1;
+}
+inline void db_AddPolyProduct0_2(double *d,const double *a,const double *b)
+{
+    double a0;
+    double b0,b1,b2;
+    a0=a[0];
+    b0=b[0];b1=b[1];b2=b[2];
+
+    d[0]+=a0*b0;
+    d[1]+=a0*b1;
+    d[2]+=a0*b2;
+}
+/*!
+Multiply d=a*b where a is one degree and b is two degree*/
+inline void db_SubtractPolyProduct0_0(double *d,const double *a,const double *b)
+{
+    double a0;
+    double b0;
+    a0=a[0];
+    b0=b[0];
+
+    d[0]-=a0*b0;
+}
+
+inline void db_SubtractPolyProduct0_1(double *d,const double *a,const double *b)
+{
+    double a0;
+    double b0,b1;
+    a0=a[0];
+    b0=b[0];b1=b[1];
+
+    d[0]-=a0*b0;
+    d[1]-=a0*b1;
+}
+
+inline void db_SubtractPolyProduct0_2(double *d,const double *a,const double *b)
+{
+    double a0;
+    double b0,b1,b2;
+    a0=a[0];
+    b0=b[0];b1=b[1];b2=b[2];
+
+    d[0]-=a0*b0;
+    d[1]-=a0*b1;
+    d[2]-=a0*b2;
+}
+
+inline void db_SubtractPolyProduct1_3(double *d,const double *a,const double *b)
+{
+    double a0,a1;
+    double b0,b1,b2,b3;
+    a0=a[0];a1=a[1];
+    b0=b[0];b1=b[1];b2=b[2];b3=b[3];
+
+    d[0]-=a0*b0;
+    d[1]-=a0*b1+a1*b0;
+    d[2]-=a0*b2+a1*b1;
+    d[3]-=a0*b3+a1*b2;
+    d[4]-=      a1*b3;
+}
+
+inline void    db_CharacteristicPolynomial4x4(double p[5],const double A[16])
+{
+    /*All two by two determinants of the first two rows*/
+    double two01[3],two02[3],two03[3],two12[3],two13[3],two23[3];
+    /*Polynomials representing third and fourth row of A*/
+    double P0[2],P1[2],P2[2],P3[2];
+    double P4[2],P5[2],P6[2],P7[2];
+    /*All three by three determinants of the first three rows*/
+    double neg_three0[4],neg_three1[4],three2[4],three3[4];
+
+    /*Compute 2x2 determinants*/
+    two01[0]=A[0]*A[5]-A[1]*A[4];
+    two01[1]= -(A[0]+A[5]);
+    two01[2]=1.0;
+
+    two02[0]=A[0]*A[6]-A[2]*A[4];
+    two02[1]= -A[6];
+
+    two03[0]=A[0]*A[7]-A[3]*A[4];
+    two03[1]= -A[7];
+
+    two12[0]=A[1]*A[6]-A[2]*A[5];
+    two12[1]=A[2];
+
+    two13[0]=A[1]*A[7]-A[3]*A[5];
+    two13[1]=A[3];
+
+    two23[0]=A[2]*A[7]-A[3]*A[6];
+
+    P0[0]=A[8];
+    P1[0]=A[9];
+    P2[0]=A[10];P2[1]= -1.0;
+    P3[0]=A[11];
+
+    P4[0]=A[12];
+    P5[0]=A[13];
+    P6[0]=A[14];
+    P7[0]=A[15];P7[1]= -1.0;
+
+    /*Compute 3x3 determinants.Note that the highest
+    degree polynomial goes first and the smaller ones
+    are added or subtracted from it*/
+    db_MultiplyPoly1_1(       neg_three0,P2,two13);
+    db_SubtractPolyProduct0_0(neg_three0,P1,two23);
+    db_SubtractPolyProduct0_1(neg_three0,P3,two12);
+
+    db_MultiplyPoly1_1(       neg_three1,P2,two03);
+    db_SubtractPolyProduct0_1(neg_three1,P3,two02);
+    db_SubtractPolyProduct0_0(neg_three1,P0,two23);
+
+    db_MultiplyPoly0_2(       three2,P3,two01);
+    db_AddPolyProduct0_1(     three2,P0,two13);
+    db_SubtractPolyProduct0_1(three2,P1,two03);
+
+    db_MultiplyPoly1_2(       three3,P2,two01);
+    db_AddPolyProduct0_1(     three3,P0,two12);
+    db_SubtractPolyProduct0_1(three3,P1,two02);
+
+    /*Compute 4x4 determinants*/
+    db_MultiplyPoly1_3(       p,P7,three3);
+    db_AddPolyProduct0_2(     p,P4,neg_three0);
+    db_SubtractPolyProduct0_2(p,P5,neg_three1);
+    db_SubtractPolyProduct0_2(p,P6,three2);
+}
+
+inline void db_RealEigenvalues4x4(double lambda[4],int *nr_roots,const double A[16],int forced=0)
+{
+    double p[5];
+
+    db_CharacteristicPolynomial4x4(p,A);
+    if(forced) db_SolveQuarticForced(lambda,nr_roots,p[4],p[3],p[2],p[1],p[0]);
+     else db_SolveQuartic(lambda,nr_roots,p[4],p[3],p[2],p[1],p[0]);
+}
+
+/*!
+Compute the unit norm eigenvector v of the matrix A corresponding
+to the eigenvalue lambda
+[96mult 60add 1sqrt=156flops 1sqrt]*/
+inline void db_EigenVector4x4(double v[4],double lambda,const double A[16])
+{
+    double a0,a5,a10,a15;
+    double d01,d02,d03,d12,d13,d23;
+    double e01,e02,e03,e12,e13,e23;
+    double C[16],n0,n1,n2,n3,m;
+
+    /*Compute diagonal
+    [4add=4flops]*/
+    a0=A[0]-lambda;
+    a5=A[5]-lambda;
+    a10=A[10]-lambda;
+    a15=A[15]-lambda;
+
+    /*Compute 2x2 determinants of rows 1,2 and 3,4
+    [24mult 12add=36flops]*/
+    d01=a0*a5    -A[1]*A[4];
+    d02=a0*A[6]  -A[2]*A[4];
+    d03=a0*A[7]  -A[3]*A[4];
+    d12=A[1]*A[6]-A[2]*a5;
+    d13=A[1]*A[7]-A[3]*a5;
+    d23=A[2]*A[7]-A[3]*A[6];
+
+    e01=A[8]*A[13]-A[9] *A[12];
+    e02=A[8]*A[14]-a10  *A[12];
+    e03=A[8]*a15  -A[11]*A[12];
+    e12=A[9]*A[14]-a10  *A[13];
+    e13=A[9]*a15  -A[11]*A[13];
+    e23=a10 *a15  -A[11]*A[14];
+
+    /*Compute matrix of cofactors
+    [48mult 32 add=80flops*/
+    C[0]=  (a5  *e23-A[6]*e13+A[7]*e12);
+    C[1]= -(A[4]*e23-A[6]*e03+A[7]*e02);
+    C[2]=  (A[4]*e13-a5  *e03+A[7]*e01);
+    C[3]= -(A[4]*e12-a5  *e02+A[6]*e01);
+
+    C[4]= -(A[1]*e23-A[2]*e13+A[3]*e12);
+    C[5]=  (a0  *e23-A[2]*e03+A[3]*e02);
+    C[6]= -(a0  *e13-A[1]*e03+A[3]*e01);
+    C[7]=  (a0  *e12-A[1]*e02+A[2]*e01);
+
+    C[8]=   (A[13]*d23-A[14]*d13+a15  *d12);
+    C[9]=  -(A[12]*d23-A[14]*d03+a15  *d02);
+    C[10]=  (A[12]*d13-A[13]*d03+a15  *d01);
+    C[11]= -(A[12]*d12-A[13]*d02+A[14]*d01);
+
+    C[12]= -(A[9]*d23-a10 *d13+A[11]*d12);
+    C[13]=  (A[8]*d23-a10 *d03+A[11]*d02);
+    C[14]= -(A[8]*d13-A[9]*d03+A[11]*d01);
+    C[15]=  (A[8]*d12-A[9]*d02+a10  *d01);
+
+    /*Compute square sums of rows
+    [16mult 12add=28flops*/
+    n0=db_sqr(C[0]) +db_sqr(C[1]) +db_sqr(C[2]) +db_sqr(C[3]);
+    n1=db_sqr(C[4]) +db_sqr(C[5]) +db_sqr(C[6]) +db_sqr(C[7]);
+    n2=db_sqr(C[8]) +db_sqr(C[9]) +db_sqr(C[10])+db_sqr(C[11]);
+    n3=db_sqr(C[12])+db_sqr(C[13])+db_sqr(C[14])+db_sqr(C[15]);
+
+    /*Take the largest norm row and normalize
+    [4mult 1 sqrt=4flops 1sqrt]*/
+    if(n0>=n1 && n0>=n2 && n0>=n3)
+    {
+        m=db_SafeReciprocal(sqrt(n0));
+        db_MultiplyScalarCopy4(v,C,m);
+    }
+    else if(n1>=n2 && n1>=n3)
+    {
+        m=db_SafeReciprocal(sqrt(n1));
+        db_MultiplyScalarCopy4(v,&(C[4]),m);
+    }
+    else if(n2>=n3)
+    {
+        m=db_SafeReciprocal(sqrt(n2));
+        db_MultiplyScalarCopy4(v,&(C[8]),m);
+    }
+    else
+    {
+        m=db_SafeReciprocal(sqrt(n3));
+        db_MultiplyScalarCopy4(v,&(C[12]),m);
+    }
+}
+
+
+
+/*\}*/
+#endif /* DB_UTILITIES_POLY */
diff --git a/benchmark/feature_stab/db_vlvm/db_utilities_random.h b/benchmark/feature_stab/db_vlvm/db_utilities_random.h
new file mode 100644
index 00000000..ef24039c
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_utilities_random.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_random.h,v 1.1 2010/08/19 18:09:20 bsouthall Exp $ */
+
+#ifndef DB_UTILITIES_RANDOM
+#define DB_UTILITIES_RANDOM
+
+#include "db_utilities.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup LMRandom (LM) Random numbers, random sampling
+ */
+/*\{*/
+/*!
+ Random Number generator. Initialize with non-zero
+integer value r. A double between zero and one is
+returned.
+\param r    seed
+\return random double
+*/
+inline double db_QuickRandomDouble(int &r)
+{
+    int c;
+    c=r/127773;
+    r=16807*(r-c*127773)-2836*c;
+    if(r<0) r+=2147483647;
+    return((1.0/((double)2147483647))*r);
+    //return (((double)rand())/(double)RAND_MAX);
+}
+
+/*!
+Random Number generator. Initialize with non-zero
+integer value r. An int between and including 0 and max
+ \param r    seed
+ \param max    upped limit
+ \return random int
+*/
+inline int db_RandomInt(int &r,int max)
+{
+    double dtemp;
+    int itemp;
+    dtemp=db_QuickRandomDouble(r)*(max+1);
+    itemp=(int) dtemp;
+    if(itemp<=0) return(0);
+    if(itemp>=max) return(max);
+    return(itemp);
+}
+
+/*!
+ Generate a random sample indexing into [0..pool_size-1].
+ \param s            sample (out) pre-allocated array of size sample_size
+ \param sample_size    size of sample
+ \param pool_size    upper limit on item index
+ \param r_seed        random number generator seed
+ */
+inline void db_RandomSample(int *s,int sample_size,int pool_size,int &r_seed)
+{
+    int temp,temp2,i,j;
+
+    for(i=0;i<sample_size;i++)
+    {
+        temp=db_RandomInt(r_seed,pool_size-1-i);
+
+        for(j=0;j<i;j++)
+        {
+            if(s[j]<=temp) temp++;
+            else
+            {
+                /*swap*/
+                temp2=temp;
+                temp=s[j];
+                s[j]=temp2;
+            }
+        }
+        s[i]=temp;
+    }
+}
+/*\}*/
+#endif /* DB_UTILITIES_RANDOM */
diff --git a/benchmark/feature_stab/db_vlvm/db_utilities_rotation.h b/benchmark/feature_stab/db_vlvm/db_utilities_rotation.h
new file mode 100644
index 00000000..7f5f937b
--- /dev/null
+++ b/benchmark/feature_stab/db_vlvm/db_utilities_rotation.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id: db_utilities_rotation.h,v 1.2 2010/09/03 12:00:11 bsouthall Exp $ */
+
+#ifndef DB_UTILITIES_ROTATION
+#define DB_UTILITIES_ROTATION
+
+#include "db_utilities.h"
+
+
+
+/*****************************************************************
+*    Lean and mean begins here                                   *
+*****************************************************************/
+/*!
+ * \defgroup LMRotation (LM) Rotation Utilities (quaternions, orthonormal)
+ */
+/*\{*/
+/*!
+ Takes a unit quaternion and gives its corresponding rotation matrix.
+ \param R rotation matrix (out)
+ \param q quaternion
+ */
+inline void db_QuaternionToRotation(double R[9],const double q[4])
+{
+    double q0q0,q0qx,q0qy,q0qz,qxqx,qxqy,qxqz,qyqy,qyqz,qzqz;
+
+    q0q0=q[0]*q[0];
+    q0qx=q[0]*q[1];
+    q0qy=q[0]*q[2];
+    q0qz=q[0]*q[3];
+    qxqx=q[1]*q[1];
+    qxqy=q[1]*q[2];
+    qxqz=q[1]*q[3];
+    qyqy=q[2]*q[2];
+    qyqz=q[2]*q[3];
+    qzqz=q[3]*q[3];
+
+    R[0]=q0q0+qxqx-qyqy-qzqz; R[1]=2.0*(qxqy-q0qz);     R[2]=2.0*(qxqz+q0qy);
+    R[3]=2.0*(qxqy+q0qz);     R[4]=q0q0-qxqx+qyqy-qzqz; R[5]=2.0*(qyqz-q0qx);
+    R[6]=2.0*(qxqz-q0qy);     R[7]=2.0*(qyqz+q0qx);     R[8]=q0q0-qxqx-qyqy+qzqz;
+}
+
+/*\}*/
+#endif /* DB_UTILITIES_ROTATION */
diff --git a/benchmark/feature_stab/doc/Readme.txt b/benchmark/feature_stab/doc/Readme.txt
new file mode 100644
index 00000000..fcd7c38c
--- /dev/null
+++ b/benchmark/feature_stab/doc/Readme.txt
@@ -0,0 +1,3 @@
+To generate the html docs, execute
+doxygen dbreg_API_doxyfile
+
diff --git a/benchmark/feature_stab/doc/dbreg_API_doxyfile b/benchmark/feature_stab/doc/dbreg_API_doxyfile
new file mode 100755
index 00000000..dc61a9c0
--- /dev/null
+++ b/benchmark/feature_stab/doc/dbreg_API_doxyfile
@@ -0,0 +1,1557 @@
+# Doxyfile 1.6.1
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+#       TAG = value [value, ...]
+# For lists items can also be appended using:
+#       TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# http://www.gnu.org/software/libiconv for the list of possible encodings.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
+# by quotes) that should identify the project.
+
+PROJECT_NAME           =
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER         =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = .
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of
+# source files, where putting all generated files in the same directory would
+# otherwise cause performance problems for the file system.
+
+CREATE_SUBDIRS         = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
+# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
+# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
+# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
+# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak,
+# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
+
+OUTPUT_LANGUAGE        = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is
+# used as the annotated text. Otherwise, the brief description is used as-is.
+# If left blank, the following values are used ("$name" is automatically
+# replaced with the name of the entity): "The $name class" "The $name widget"
+# "The $name file" "is" "provides" "specifies" "contains"
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF       = "The $name class" \
+                         "The $name widget" \
+                         "The $name file" \
+                         is \
+                         provides \
+                         specifies \
+                         contains \
+                         represents \
+                         a \
+                         an \
+                         the
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES        = YES
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip.
+
+STRIP_FROM_PATH        = /Users/dimitri/doxygen/mail/1.5.7/doxywizard/
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH    =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful is your file systems
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like regular Qt-style comments
+# (thus requiring an explicit @brief command for a brief description.)
+
+JAVADOC_AUTOBRIEF      = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
+# interpret the first line (until the first dot) of a Qt-style
+# comment as the brief description. If set to NO, the comments
+# will behave just like regular Qt-style comments (thus requiring
+# an explicit \brief command for a brief description.)
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
+# a new page for each member. If set to NO, the documentation of a member will
+# be part of the file/class/namespace that contains it.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE               = 8
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
+# sources only. Doxygen will then generate output that is more tailored for
+# Java. For instance, namespaces will be presented as packages, qualified
+# scopes will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources only. Doxygen will then generate output that is more tailored for
+# Fortran.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for
+# VHDL.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it parses.
+# With this tag you can assign which parser to use for a given extension.
+# Doxygen has a built-in mapping, but you can override or extend it using this tag.
+# The format is ext=language, where ext is a file extension, and language is one of
+# the parsers supported by doxygen: IDL, Java, Javascript, C#, C, C++, D, PHP,
+# Objective-C, Python, Fortran, VHDL, C, C++. For instance to make doxygen treat
+# .inc files as Fortran files (default is PHP), and .f files as C (default is Fortran),
+# use: inc=Fortran f=C. Note that for custom extensions you also need to set
+# FILE_PATTERNS otherwise the files are not read by doxygen.
+
+EXTENSION_MAPPING      =
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should
+# set this tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
+# func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
+# Doxygen will parse them like normal C++ but will assume all classes use public
+# instead of private inheritance when no explicit protection keyword is present.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate getter
+# and setter methods for a property. Setting this option to YES (the default)
+# will make doxygen to replace the get and set methods by a property in the
+# documentation. This will only work if the methods are indeed getting or
+# setting a simple type. If this is not the case, or you want to show the
+# methods anyway, you should set this option to NO.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING            = YES
+
+# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
+# is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically
+# be useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
+# determine which symbols to keep in memory and which to flush to disk.
+# When the cache is full, less often used symbols will be written to disk.
+# For small to medium size projects (<1000 input files) the default value is
+# probably good enough. For larger projects a too small cache size can cause
+# doxygen to be busy swapping symbols to and from disk most of the time
+# causing a significant performance penality.
+# If the system has enough physical memory increasing the cache will improve the
+# performance by keeping more symbols in memory. Note that the value works on
+# a logarithmic scale so increasing the size by one will rougly double the
+# memory usage. The cache size is given by this formula:
+# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
+# corresponding to a cache size of 2^16 = 65536 symbols
+
+SYMBOL_CACHE_SIZE      = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL            = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE        = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC         = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base
+# name of the file that contains the anonymous namespace. By default
+# anonymous namespace are hidden.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES       = NO
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
+# will sort the (brief and detailed) documentation of class members so that
+# constructors and destructors are listed first. If set to NO (the default)
+# the constructors will appear in the respective orders defined by
+# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
+# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
+# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
+# hierarchy of group names into alphabetical order. If set to NO (the default)
+# the group names will appear in their defined order.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or define consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and defines in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES        = YES
+
+# If the sources in your project are distributed over multiple directories
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
+# in the documentation. The default is NO.
+
+SHOW_DIRECTORIES       = NO
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
+# This will remove the Files entry from the Quick Index and from the
+# Folder Tree View (if specified). The default is YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
+# Namespaces page.  This will remove the Namespaces entry from the Quick Index
+# and from the Folder Tree View (if specified). The default is YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command <command> <input-file>, where <command> is the value of
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
+# provided by doxygen. Whatever the program writes to standard output
+# is used as the file version. See the manual for examples.
+
+FILE_VERSION_FILTER    =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by
+# doxygen. The layout file controls the global structure of the generated output files
+# in an output format independent way. The create the layout file that represents
+# doxygen's defaults, run doxygen with the -l option. You can optionally specify a
+# file name after the option, if omitted DoxygenLayout.xml will be used as the name
+# of the layout file.
+
+LAYOUT_FILE            =
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS               = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR      = YES
+
+# This WARN_NO_PARAMDOC option can be abled to get warnings for
+# functions that are documented, but have no documentation for their parameters
+# or return value. If set to NO (the default) doxygen will only warn about
+# wrong or incomplete parameter documentation, but not about the absence of
+# documentation.
+
+WARN_NO_PARAMDOC       = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text. Optionally the format may contain
+# $version, which will be replaced by the version of the file (if it could
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE           =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  = ../src/dbreg/dbreg.h
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
+# also the default input encoding. Doxygen uses libiconv (or the iconv built
+# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
+# the list of possible encodings.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          = *.c \
+                         *.cc \
+                         *.cxx \
+                         *.cpp \
+                         *.c++ \
+                         *.d \
+                         *.java \
+                         *.ii \
+                         *.ixx \
+                         *.ipp \
+                         *.i++ \
+                         *.inl \
+                         *.h \
+                         *.hh \
+                         *.hxx \
+                         *.hpp \
+                         *.h++ \
+                         *.idl \
+                         *.odl \
+                         *.cs \
+                         *.php \
+                         *.php3 \
+                         *.inc \
+                         *.m \
+                         *.mm \
+                         *.dox \
+                         *.py \
+                         *.f90 \
+                         *.f \
+                         *.vhd \
+                         *.vhdl
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE              = NO
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE                =
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
+# directories that are symbolic links (a Unix filesystem feature) are excluded
+# from the input.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS       = *
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output.  If FILTER_PATTERNS is specified, this tag will be
+# ignored.
+
+INPUT_FILTER           =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis.  Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match.  The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
+# is applied to all files.
+
+FILTER_PATTERNS        =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES    = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER         = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION    = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
+# link to the source code.  Otherwise they will link to the documentation.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code
+# will point to the HTML generated by the htags(1) tool instead of doxygen
+# built-in source browser. The htags tool is part of GNU's global source
+# tagging system (see http://www.gnu.org/software/global/global.html). You
+# will need version 4.8.6 or higher.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS       = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX     = NO
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX          =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header.
+
+HTML_HEADER            =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER            =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet. Note that doxygen will try to copy
+# the style sheet file to the HTML output directory, so don't put your own
+# stylesheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET        =
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS     = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded. For this to work a browser that supports
+# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
+# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files
+# will be generated that can be used as input for Apple's Xcode 3
+# integrated development environment, introduced with OSX 10.5 (Leopard).
+# To create a documentation set, doxygen will generate a Makefile in the
+# HTML output directory. Running make will produce the docset in that
+# directory and running "make install" will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
+# it at startup.
+# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information.
+
+GENERATE_DOCSET        = NO
+
+# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
+# feed. A documentation feed provides an umbrella under which multiple
+# documentation sets from a single provider (such as a company or product suite)
+# can be grouped.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
+# should uniquely identify the documentation set bundle. This should be a
+# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
+# will append .docset to the name.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP      = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE               =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION           =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI           = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
+# is used to encode HtmlHelp index (hhk), content (hhc) and project file
+# content.
+
+CHM_INDEX_ENCODING     =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER
+# are set, an additional index file will be generated that can be used as input for
+# Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated
+# HTML documentation.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
+# be used to specify the file name of the resulting .qch file.
+# The path specified is relative to the HTML output folder.
+
+QCH_FILE               =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#namespace
+
+QHP_NAMESPACE          =
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#virtual-folders
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add.
+# For more information please see
+# http://doc.trolltech.com/qthelpproject.html#custom-filters
+
+QHP_CUST_FILTER_NAME   =
+
+# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see
+# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">Qt Help Project / Custom Filters</a>.
+
+QHP_CUST_FILTER_ATTRS  =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's
+# filter section matches.
+# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">Qt Help Project / Filter Attributes</a>.
+
+QHP_SECT_FILTER_ATTRS  =
+
+# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
+# be used to specify the location of Qt's qhelpgenerator.
+# If non-empty doxygen will try to run qhelpgenerator on the generated
+# .qhp file.
+
+QHG_LOCATION           =
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
+# top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it.
+
+DISABLE_INDEX          = NO
+
+# This tag can be used to set the number of enum values (range [1..20])
+# that doxygen will group on one line in the generated HTML documentation.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information.
+# If the tag value is set to YES, a side panel will be generated
+# containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
+# Windows users are probably better off using the HTML help feature.
+
+GENERATE_TREEVIEW      = NO
+
+# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
+# and Class Hierarchy pages using a tree view instead of an ordered list.
+
+USE_INLINE_TREES       = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH         = 250
+
+# Use this tag to change the font size of Latex formulas included
+# as images in the HTML documentation. The default is 10. Note that
+# when you change the font size after a successful doxygen run you need
+# to manually remove any form_*.png images from the HTML output directory
+# to force them to be regenerated.
+
+FORMULA_FONTSIZE       = 10
+
+# When the SEARCHENGINE tag is enable doxygen will generate a search box
+# for the HTML output. The underlying search engine uses javascript
+# and DHTML and should work on any modern browser. Note that when using
+# HTML help (GENERATE_HTMLHELP) or Qt help (GENERATE_QHP)
+# there is already a search function so this one should typically
+# be disabled.
+
+SEARCHENGINE           = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX         = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, a4wide, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE             = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES         =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER           =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS         = YES
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE        = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES     = NO
+
+# If LATEX_SOURCE_CODE is set to YES then doxygen will include
+# source code with syntax highlighting in the LaTeX output.
+# Note that which sources are shown also depends on other settings
+# such as SOURCE_BROWSER.
+
+LATEX_SOURCE_CODE      = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE    =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE    =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION          = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT             = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA             =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD                =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING     = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader.  This is useful
+# if you want to understand what is going on.  On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION        = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
+EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS  =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all function-like macros that are alone
+# on a line, have an all uppercase name, and do not end with a semicolon. Such
+# function macros are typically used for boiler-plate code, and will confuse
+# the parser if not removed.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles.
+# Optionally an initial location of the external documentation
+# can be added for each tagfile. The format of a tag file without
+# this location is as follows:
+#   TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+#   TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths or
+# URLs. If a location is present for each tag, the installdox tool
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen
+# is run, you must also specify the path to the tagfile here.
+
+TAGFILES               =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE       =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS        = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH              = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
+# or super classes. Setting the tag to NO turns the diagrams off. Note that
+# this option is superseded by the HAVE_DOT option below. This is only a
+# fallback. It is recommended to install and use dot, since it yields more
+# powerful graphs.
+
+CLASS_DIAGRAMS         = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see
+# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH            =
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT               = NO
+
+# By default doxygen will write a font called FreeSans.ttf to the output
+# directory and reference it in all dot files that doxygen generates. This
+# font does not include all possible unicode characters however, so when you need
+# these (or just want a differently looking font) you can specify the font name
+# using DOT_FONTNAME. You need need to make sure dot is able to find the font,
+# which can be done by putting it in a standard location or by setting the
+# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory
+# containing the font.
+
+DOT_FONTNAME           = FreeSans
+
+# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
+# The default size is 10pt.
+
+DOT_FONTSIZE           = 10
+
+# By default doxygen will tell dot to use the output directory to look for the
+# FreeSans.ttf font (which doxygen will put there itself). If you specify a
+# different font using DOT_FONTNAME you can set the path where dot
+# can find it using this tag.
+
+DOT_FONTPATH           =
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# the CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for groups, showing the direct groups dependencies
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK               = NO
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS     = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH          = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH and HAVE_DOT options are set to YES then
+# doxygen will generate a call dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable call graphs
+# for selected functions only using the \callgraph command.
+
+CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
+# doxygen will generate a caller dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable caller
+# graphs for selected functions only using the \callergraph command.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
+# then doxygen will show the dependencies a directory has on other directories
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are png, jpg, or gif
+# If left blank png will be used.
+
+DOT_IMAGE_FORMAT       = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
+DOT_PATH               =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
+# nodes that will be shown in the graph. If the number of nodes in a graph
+# becomes larger than this value, doxygen will truncate the graph, which is
+# visualized by representing a node as a red box. Note that doxygen if the
+# number of direct children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
+# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes
+# that lay further from the root node will be omitted. Note that setting this
+# option to 1 or 2 may greatly reduce the computation time needed for large
+# code bases. Also note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not
+# seem to support this out of the box. Warning: Depending on the platform used,
+# enabling this option may lead to badly anti-aliased labels on the edges of
+# a graph (i.e. they become hard to read).
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10)
+# support this, this feature is disabled by default.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP            = YES
diff --git a/benchmark/feature_stab/src/dbreg/dbreg.cpp b/benchmark/feature_stab/src/dbreg/dbreg.cpp
new file mode 100644
index 00000000..986e255e
--- /dev/null
+++ b/benchmark/feature_stab/src/dbreg/dbreg.cpp
@@ -0,0 +1,793 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// $Id: dbreg.cpp,v 1.31 2011/06/17 14:04:32 mbansal Exp $
+#include "dbreg.h"
+#include <string.h>
+#include <stdio.h>
+
+
+#if PROFILE
+#endif
+
+//#include <iostream>
+
+db_FrameToReferenceRegistration::db_FrameToReferenceRegistration() :
+  m_initialized(false),m_nr_matches(0),m_over_allocation(256),m_nr_bins(20),m_max_cost_pix(30), m_quarter_resolution(false)
+{
+  m_reference_image = NULL;
+  m_aligned_ins_image = NULL;
+
+  m_quarter_res_image = NULL;
+  m_horz_smooth_subsample_image = NULL;
+
+  m_x_corners_ref = NULL;
+  m_y_corners_ref = NULL;
+
+  m_x_corners_ins = NULL;
+  m_y_corners_ins = NULL;
+
+  m_match_index_ref = NULL;
+  m_match_index_ins = NULL;
+
+  m_inlier_indices = NULL;
+
+  m_num_inlier_indices = 0;
+
+  m_temp_double = NULL;
+  m_temp_int = NULL;
+
+  m_corners_ref = NULL;
+  m_corners_ins = NULL;
+
+  m_sq_cost = NULL;
+  m_cost_histogram = NULL;
+
+  profile_string = NULL;
+
+  db_Identity3x3(m_K);
+  db_Identity3x3(m_H_ref_to_ins);
+  db_Identity3x3(m_H_dref_to_ref);
+
+  m_sq_cost_computed = false;
+  m_reference_set = false;
+
+  m_reference_update_period = 0;
+  m_nr_frames_processed = 0;
+
+  return;
+}
+
+db_FrameToReferenceRegistration::~db_FrameToReferenceRegistration()
+{
+  Clean();
+}
+
+void db_FrameToReferenceRegistration::Clean()
+{
+  if ( m_reference_image )
+    db_FreeImage_u(m_reference_image,m_im_height);
+
+  if ( m_aligned_ins_image )
+    db_FreeImage_u(m_aligned_ins_image,m_im_height);
+
+  if ( m_quarter_res_image )
+  {
+    db_FreeImage_u(m_quarter_res_image, m_im_height);
+  }
+
+  if ( m_horz_smooth_subsample_image )
+  {
+    db_FreeImage_u(m_horz_smooth_subsample_image, m_im_height*2);
+  }
+
+  delete [] m_x_corners_ref;
+  delete [] m_y_corners_ref;
+
+  delete [] m_x_corners_ins;
+  delete [] m_y_corners_ins;
+
+  delete [] m_match_index_ref;
+  delete [] m_match_index_ins;
+
+  delete [] m_temp_double;
+  delete [] m_temp_int;
+
+  delete [] m_corners_ref;
+  delete [] m_corners_ins;
+
+  delete [] m_sq_cost;
+  delete [] m_cost_histogram;
+
+  delete [] m_inlier_indices;
+
+  if(profile_string)
+    delete [] profile_string;
+
+  m_reference_image = NULL;
+  m_aligned_ins_image = NULL;
+
+  m_quarter_res_image = NULL;
+  m_horz_smooth_subsample_image = NULL;
+
+  m_x_corners_ref = NULL;
+  m_y_corners_ref = NULL;
+
+  m_x_corners_ins = NULL;
+  m_y_corners_ins = NULL;
+
+  m_match_index_ref = NULL;
+  m_match_index_ins = NULL;
+
+  m_inlier_indices = NULL;
+
+  m_temp_double = NULL;
+  m_temp_int = NULL;
+
+  m_corners_ref = NULL;
+  m_corners_ins = NULL;
+
+  m_sq_cost = NULL;
+  m_cost_histogram = NULL;
+}
+
+void db_FrameToReferenceRegistration::Init(int width, int height,
+                       int    homography_type,
+                       int    max_iterations,
+                       bool   linear_polish,
+                       bool   quarter_resolution,
+                       double scale,
+                       unsigned int reference_update_period,
+                       bool   do_motion_smoothing,
+                       double motion_smoothing_gain,
+                       int    nr_samples,
+                       int    chunk_size,
+                       int    cd_target_nr_corners,
+                       double cm_max_disparity,
+                           bool   cm_use_smaller_matching_window,
+                       int    cd_nr_horz_blocks,
+                       int    cd_nr_vert_blocks
+                       )
+{
+  Clean();
+
+  m_reference_update_period = reference_update_period;
+  m_nr_frames_processed = 0;
+
+  m_do_motion_smoothing = do_motion_smoothing;
+  m_motion_smoothing_gain = motion_smoothing_gain;
+
+  m_stab_smoother.setSmoothingFactor(m_motion_smoothing_gain);
+
+  m_quarter_resolution = quarter_resolution;
+
+  profile_string = new char[10240];
+
+  if (m_quarter_resolution == true)
+  {
+    width = width/2;
+    height = height/2;
+
+    m_horz_smooth_subsample_image = db_AllocImage_u(width,height*2,m_over_allocation);
+    m_quarter_res_image = db_AllocImage_u(width,height,m_over_allocation);
+  }
+
+  m_im_width = width;
+  m_im_height = height;
+
+  double temp[9];
+  db_Approx3DCalMat(m_K,temp,m_im_width,m_im_height);
+
+  m_homography_type = homography_type;
+  m_max_iterations = max_iterations;
+  m_scale = 2/(m_K[0]+m_K[4]);
+  m_nr_samples = nr_samples;
+  m_chunk_size = chunk_size;
+
+  double outlier_t1 = 5.0;
+
+  m_outlier_t2 = outlier_t1*outlier_t1;//*m_scale*m_scale;
+
+  m_current_is_reference = false;
+
+  m_linear_polish = linear_polish;
+
+  m_reference_image = db_AllocImage_u(m_im_width,m_im_height,m_over_allocation);
+  m_aligned_ins_image = db_AllocImage_u(m_im_width,m_im_height,m_over_allocation);
+
+  // initialize feature detection and matching:
+  //m_max_nr_corners = m_cd.Init(m_im_width,m_im_height,cd_target_nr_corners,cd_nr_horz_blocks,cd_nr_vert_blocks,0.0,0.0);
+  m_max_nr_corners = m_cd.Init(m_im_width,m_im_height,cd_target_nr_corners,cd_nr_horz_blocks,cd_nr_vert_blocks,DB_DEFAULT_ABS_CORNER_THRESHOLD/500.0,0.0);
+
+    int use_21 = 0;
+  m_max_nr_matches = m_cm.Init(m_im_width,m_im_height,cm_max_disparity,m_max_nr_corners,DB_DEFAULT_NO_DISPARITY,cm_use_smaller_matching_window,use_21);
+
+  // allocate space for corner feature locations for reference and inspection images:
+  m_x_corners_ref = new double [m_max_nr_corners];
+  m_y_corners_ref = new double [m_max_nr_corners];
+
+  m_x_corners_ins = new double [m_max_nr_corners];
+  m_y_corners_ins = new double [m_max_nr_corners];
+
+  // allocate space for match indices:
+  m_match_index_ref = new int [m_max_nr_matches];
+  m_match_index_ins = new int [m_max_nr_matches];
+
+  m_temp_double = new double [12*DB_DEFAULT_NR_SAMPLES+10*m_max_nr_matches];
+  m_temp_int = new int [db_maxi(DB_DEFAULT_NR_SAMPLES,m_max_nr_matches)];
+
+  // allocate space for homogenous image points:
+  m_corners_ref = new double [3*m_max_nr_corners];
+  m_corners_ins = new double [3*m_max_nr_corners];
+
+  // allocate cost array and histogram:
+  m_sq_cost = new double [m_max_nr_matches];
+  m_cost_histogram = new int [m_nr_bins];
+
+  // reserve array:
+  //m_inlier_indices.reserve(m_max_nr_matches);
+  m_inlier_indices = new int[m_max_nr_matches];
+
+  m_initialized = true;
+
+  m_max_inlier_count = 0;
+}
+
+
+#define MB 0
+// Save the reference image, detect features and update the dref-to-ref transformation
+int db_FrameToReferenceRegistration::UpdateReference(const unsigned char * const * im, bool subsample, bool detect_corners)
+{
+  double temp[9];
+  db_Multiply3x3_3x3(temp,m_H_dref_to_ref,m_H_ref_to_ins);
+  db_Copy9(m_H_dref_to_ref,temp);
+
+  const unsigned char * const * imptr = im;
+
+  if (m_quarter_resolution && subsample)
+  {
+    GenerateQuarterResImage(im);
+    imptr = m_quarter_res_image;
+  }
+
+  // save the reference image, detect features and quit
+  db_CopyImage_u(m_reference_image,imptr,m_im_width,m_im_height,m_over_allocation);
+
+  if(detect_corners)
+  {
+    #if MB
+    m_cd.DetectCorners(imptr, m_x_corners_ref,m_y_corners_ref,&m_nr_corners_ref);
+    int nr = 0;
+    for(int k=0; k<m_nr_corners_ref; k++)
+    {
+        if(m_x_corners_ref[k]>m_im_width/3)
+        {
+            m_x_corners_ref[nr] = m_x_corners_ref[k];
+            m_y_corners_ref[nr] = m_y_corners_ref[k];
+            nr++;
+        }
+
+    }
+    m_nr_corners_ref = nr;
+    #else
+    m_cd.DetectCorners(imptr, m_x_corners_ref,m_y_corners_ref,&m_nr_corners_ref);
+    #endif
+  }
+  else
+  {
+    m_nr_corners_ref = m_nr_corners_ins;
+
+    for(int k=0; k<m_nr_corners_ins; k++)
+    {
+        m_x_corners_ref[k] = m_x_corners_ins[k];
+        m_y_corners_ref[k] = m_y_corners_ins[k];
+    }
+
+  }
+
+  db_Identity3x3(m_H_ref_to_ins);
+
+  m_max_inlier_count = 0;   // Reset to 0 as no inliers seen until now
+  m_sq_cost_computed = false;
+  m_reference_set = true;
+  m_current_is_reference = true;
+  return 1;
+}
+
+void db_FrameToReferenceRegistration::Get_H_dref_to_ref(double H[9])
+{
+  db_Copy9(H,m_H_dref_to_ref);
+}
+
+void db_FrameToReferenceRegistration::Get_H_dref_to_ins(double H[9])
+{
+  db_Multiply3x3_3x3(H,m_H_dref_to_ref,m_H_ref_to_ins);
+}
+
+void db_FrameToReferenceRegistration::Set_H_dref_to_ins(double H[9])
+{
+    double H_ins_to_ref[9];
+
+    db_Identity3x3(H_ins_to_ref);   // Ensure it has proper values
+    db_InvertAffineTransform(H_ins_to_ref,m_H_ref_to_ins);  // Invert to get ins to ref
+    db_Multiply3x3_3x3(m_H_dref_to_ref,H,H_ins_to_ref); // Update dref to ref using the input H from dref to ins
+}
+
+
+void db_FrameToReferenceRegistration::ResetDisplayReference()
+{
+  db_Identity3x3(m_H_dref_to_ref);
+}
+
+bool db_FrameToReferenceRegistration::NeedReferenceUpdate()
+{
+  // If less than 50% of the starting number of inliers left, then its time to update the reference.
+  if(m_max_inlier_count>0 && float(m_num_inlier_indices)/float(m_max_inlier_count)<0.5)
+    return true;
+  else
+    return false;
+}
+
+int db_FrameToReferenceRegistration::AddFrame(const unsigned char * const * im, double H[9],bool force_reference,bool prewarp)
+{
+  m_current_is_reference = false;
+  if(!m_reference_set || force_reference)
+    {
+      db_Identity3x3(m_H_ref_to_ins);
+      db_Copy9(H,m_H_ref_to_ins);
+
+      UpdateReference(im,true,true);
+      return 0;
+    }
+
+  const unsigned char * const * imptr = im;
+
+  if (m_quarter_resolution)
+  {
+    if (m_quarter_res_image)
+    {
+      GenerateQuarterResImage(im);
+    }
+
+    imptr = (const unsigned char * const* )m_quarter_res_image;
+  }
+
+  double H_last[9];
+  db_Copy9(H_last,m_H_ref_to_ins);
+  db_Identity3x3(m_H_ref_to_ins);
+
+  m_sq_cost_computed = false;
+
+  // detect corners on inspection image and match to reference image features:s
+
+  // @jke - Adding code to time the functions.  TODO: Remove after test
+#if PROFILE
+  double iTimer1, iTimer2;
+  char str[255];
+  strcpy(profile_string,"\n");
+  sprintf(str,"[%dx%d] %p\n",m_im_width,m_im_height,im);
+  strcat(profile_string, str);
+#endif
+
+  // @jke - Adding code to time the functions.  TODO: Remove after test
+#if PROFILE
+  iTimer1 = now_ms();
+#endif
+  m_cd.DetectCorners(imptr, m_x_corners_ins,m_y_corners_ins,&m_nr_corners_ins);
+  // @jke - Adding code to time the functions.  TODO: Remove after test
+# if PROFILE
+  iTimer2 = now_ms();
+  double elapsedTimeCorner = iTimer2 - iTimer1;
+  sprintf(str,"Corner Detection [%d corners] = %g ms\n",m_nr_corners_ins, elapsedTimeCorner);
+  strcat(profile_string, str);
+#endif
+
+  // @jke - Adding code to time the functions.  TODO: Remove after test
+#if PROFILE
+  iTimer1 = now_ms();
+#endif
+    if(prewarp)
+  m_cm.Match(m_reference_image,imptr,m_x_corners_ref,m_y_corners_ref,m_nr_corners_ref,
+         m_x_corners_ins,m_y_corners_ins,m_nr_corners_ins,
+         m_match_index_ref,m_match_index_ins,&m_nr_matches,H,0);
+    else
+  m_cm.Match(m_reference_image,imptr,m_x_corners_ref,m_y_corners_ref,m_nr_corners_ref,
+         m_x_corners_ins,m_y_corners_ins,m_nr_corners_ins,
+         m_match_index_ref,m_match_index_ins,&m_nr_matches);
+  // @jke - Adding code to time the functions.  TODO: Remove after test
+# if PROFILE
+  iTimer2 = now_ms();
+  double elapsedTimeMatch = iTimer2 - iTimer1;
+  sprintf(str,"Matching [%d] = %g ms\n",m_nr_matches,elapsedTimeMatch);
+  strcat(profile_string, str);
+#endif
+
+
+  // copy out matching features:
+  for ( int i = 0; i < m_nr_matches; ++i )
+    {
+      int offset = 3*i;
+      m_corners_ref[offset  ] = m_x_corners_ref[m_match_index_ref[i]];
+      m_corners_ref[offset+1] = m_y_corners_ref[m_match_index_ref[i]];
+      m_corners_ref[offset+2] = 1.0;
+
+      m_corners_ins[offset  ] = m_x_corners_ins[m_match_index_ins[i]];
+      m_corners_ins[offset+1] = m_y_corners_ins[m_match_index_ins[i]];
+      m_corners_ins[offset+2] = 1.0;
+    }
+
+  // @jke - Adding code to time the functions.  TODO: Remove after test
+#if PROFILE
+  iTimer1 = now_ms();
+#endif
+  // perform the alignment:
+  db_RobImageHomography(m_H_ref_to_ins, m_corners_ref, m_corners_ins, m_nr_matches, m_K, m_K, m_temp_double, m_temp_int,
+            m_homography_type,NULL,m_max_iterations,m_max_nr_matches,m_scale,
+            m_nr_samples, m_chunk_size);
+  // @jke - Adding code to time the functions.  TODO: Remove after test
+# if PROFILE
+  iTimer2 = now_ms();
+  double elapsedTimeHomography = iTimer2 - iTimer1;
+  sprintf(str,"Homography = %g ms\n",elapsedTimeHomography);
+  strcat(profile_string, str);
+#endif
+
+
+  SetOutlierThreshold();
+
+  // Compute the inliers for the db compute m_H_ref_to_ins
+  ComputeInliers(m_H_ref_to_ins);
+
+  // Update the max inlier count
+  m_max_inlier_count = (m_max_inlier_count > m_num_inlier_indices)?m_max_inlier_count:m_num_inlier_indices;
+
+  // Fit a least-squares model to just the inliers and put it in m_H_ref_to_ins
+  if(m_linear_polish)
+    Polish(m_inlier_indices, m_num_inlier_indices);
+
+  if (m_quarter_resolution)
+  {
+    m_H_ref_to_ins[2] *= 2.0;
+    m_H_ref_to_ins[5] *= 2.0;
+  }
+
+#if PROFILE
+  sprintf(str,"#Inliers = %d \n",m_num_inlier_indices);
+  strcat(profile_string, str);
+#endif
+/*
+  ///// CHECK IF CURRENT TRANSFORMATION GOOD OR BAD ////
+  ///// IF BAD, then update reference to the last correctly aligned inspection frame;
+  if(m_num_inlier_indices<5)//0.9*m_nr_matches || m_nr_matches < 20)
+  {
+    db_Copy9(m_H_ref_to_ins,H_last);
+    UpdateReference(imptr,false);
+//  UpdateReference(m_aligned_ins_image,false);
+  }
+  else
+  {
+  ///// IF GOOD, then update the last correctly aligned inspection frame to be this;
+  //db_CopyImage_u(m_aligned_ins_image,imptr,m_im_width,m_im_height,m_over_allocation);
+*/
+  if(m_do_motion_smoothing)
+    SmoothMotion();
+
+   // Disable debug printing
+   // db_PrintDoubleMatrix(m_H_ref_to_ins,3,3);
+
+  db_Copy9(H, m_H_ref_to_ins);
+
+  m_nr_frames_processed++;
+{
+  if ( (m_nr_frames_processed % m_reference_update_period) == 0 )
+  {
+    //UpdateReference(imptr,false, false);
+
+    #if MB
+    UpdateReference(imptr,false, true);
+    #else
+    UpdateReference(imptr,false, false);
+    #endif
+  }
+
+
+  }
+
+
+
+  return 1;
+}
+
+//void db_FrameToReferenceRegistration::ComputeInliers(double H[9],std::vector<int> &inlier_indices)
+void db_FrameToReferenceRegistration::ComputeInliers(double H[9])
+{
+  double totnummatches = m_nr_matches;
+  int inliercount=0;
+
+  m_num_inlier_indices = 0;
+//  inlier_indices.clear();
+
+  for(int c=0; c < totnummatches; c++ )
+    {
+      if (m_sq_cost[c] <= m_outlier_t2)
+    {
+      m_inlier_indices[inliercount] = c;
+      inliercount++;
+    }
+    }
+
+  m_num_inlier_indices = inliercount;
+}
+
+//void db_FrameToReferenceRegistration::Polish(std::vector<int> &inlier_indices)
+void db_FrameToReferenceRegistration::Polish(int *inlier_indices, int &num_inlier_indices)
+{
+  db_Zero(m_polish_C,36);
+  db_Zero(m_polish_D,6);
+  for (int i=0;i<num_inlier_indices;i++)
+    {
+      int j = 3*inlier_indices[i];
+      m_polish_C[0]+=m_corners_ref[j]*m_corners_ref[j];
+      m_polish_C[1]+=m_corners_ref[j]*m_corners_ref[j+1];
+      m_polish_C[2]+=m_corners_ref[j];
+      m_polish_C[7]+=m_corners_ref[j+1]*m_corners_ref[j+1];
+      m_polish_C[8]+=m_corners_ref[j+1];
+      m_polish_C[14]+=1;
+      m_polish_D[0]+=m_corners_ref[j]*m_corners_ins[j];
+      m_polish_D[1]+=m_corners_ref[j+1]*m_corners_ins[j];
+      m_polish_D[2]+=m_corners_ins[j];
+      m_polish_D[3]+=m_corners_ref[j]*m_corners_ins[j+1];
+      m_polish_D[4]+=m_corners_ref[j+1]*m_corners_ins[j+1];
+      m_polish_D[5]+=m_corners_ins[j+1];
+    }
+
+  double a=db_maxd(m_polish_C[0],m_polish_C[7]);
+  m_polish_C[0]/=a; m_polish_C[1]/=a;   m_polish_C[2]/=a;
+  m_polish_C[7]/=a; m_polish_C[8]/=a; m_polish_C[14]/=a;
+
+  m_polish_D[0]/=a; m_polish_D[1]/=a;   m_polish_D[2]/=a;
+  m_polish_D[3]/=a; m_polish_D[4]/=a;   m_polish_D[5]/=a;
+
+
+  m_polish_C[6]=m_polish_C[1];
+  m_polish_C[12]=m_polish_C[2];
+  m_polish_C[13]=m_polish_C[8];
+
+  m_polish_C[21]=m_polish_C[0]; m_polish_C[22]=m_polish_C[1]; m_polish_C[23]=m_polish_C[2];
+  m_polish_C[28]=m_polish_C[7]; m_polish_C[29]=m_polish_C[8];
+  m_polish_C[35]=m_polish_C[14];
+
+
+  double d[6];
+  db_CholeskyDecomp6x6(m_polish_C,d);
+  db_CholeskyBacksub6x6(m_H_ref_to_ins,m_polish_C,d,m_polish_D);
+}
+
+void db_FrameToReferenceRegistration::EstimateSecondaryModel(double H[9])
+{
+  /*      if ( m_current_is_reference )
+      {
+      db_Identity3x3(H);
+      return;
+      }
+  */
+
+  // select the outliers of the current model:
+  SelectOutliers();
+
+  // perform the alignment:
+  db_RobImageHomography(m_H_ref_to_ins, m_corners_ref, m_corners_ins, m_nr_matches, m_K, m_K, m_temp_double, m_temp_int,
+            m_homography_type,NULL,m_max_iterations,m_max_nr_matches,m_scale,
+            m_nr_samples, m_chunk_size);
+
+  db_Copy9(H,m_H_ref_to_ins);
+}
+
+void db_FrameToReferenceRegistration::ComputeCostArray()
+{
+  if ( m_sq_cost_computed ) return;
+
+  for( int c=0, k=0 ;c < m_nr_matches; c++, k=k+3)
+    {
+      m_sq_cost[c] = SquaredInhomogenousHomographyError(m_corners_ins+k,m_H_ref_to_ins,m_corners_ref+k);
+    }
+
+  m_sq_cost_computed = true;
+}
+
+void db_FrameToReferenceRegistration::SelectOutliers()
+{
+  int nr_outliers=0;
+
+  ComputeCostArray();
+
+  for(int c=0, k=0 ;c<m_nr_matches;c++,k=k+3)
+    {
+      if (m_sq_cost[c] > m_outlier_t2)
+    {
+      int offset = 3*nr_outliers++;
+      db_Copy3(m_corners_ref+offset,m_corners_ref+k);
+      db_Copy3(m_corners_ins+offset,m_corners_ins+k);
+    }
+    }
+
+  m_nr_matches = nr_outliers;
+}
+
+void db_FrameToReferenceRegistration::ComputeCostHistogram()
+{
+  ComputeCostArray();
+
+  for ( int b = 0; b < m_nr_bins; ++b )
+    m_cost_histogram[b] = 0;
+
+  for(int c = 0; c < m_nr_matches; c++)
+    {
+      double error = db_SafeSqrt(m_sq_cost[c]);
+      int bin = (int)(error/m_max_cost_pix*m_nr_bins);
+      if ( bin < m_nr_bins )
+    m_cost_histogram[bin]++;
+      else
+    m_cost_histogram[m_nr_bins-1]++;
+    }
+
+/*
+  for ( int i = 0; i < m_nr_bins; ++i )
+    std::cout << m_cost_histogram[i] << " ";
+  std::cout << std::endl;
+*/
+}
+
+void db_FrameToReferenceRegistration::SetOutlierThreshold()
+{
+  ComputeCostHistogram();
+
+  int i = 0, last=0;
+  for (; i < m_nr_bins-1; ++i )
+    {
+      if ( last > m_cost_histogram[i] )
+    break;
+      last = m_cost_histogram[i];
+    }
+
+  //std::cout << "I " <<  i << std::endl;
+
+  int max = m_cost_histogram[i];
+
+  for (; i < m_nr_bins-1; ++i )
+    {
+      if ( m_cost_histogram[i] < (int)(0.1*max) )
+    //if ( last < m_cost_histogram[i] )
+    break;
+      last = m_cost_histogram[i];
+    }
+  //std::cout << "J " <<  i << std::endl;
+
+  m_outlier_t2 = db_sqr(i*m_max_cost_pix/m_nr_bins);
+
+  //std::cout << "m_outlier_t2 " <<  m_outlier_t2 << std::endl;
+}
+
+void db_FrameToReferenceRegistration::SmoothMotion(void)
+{
+  VP_MOTION inmot,outmot;
+
+  double H[9];
+
+  Get_H_dref_to_ins(H);
+
+      MXX(inmot) = H[0];
+    MXY(inmot) = H[1];
+    MXZ(inmot) = H[2];
+    MXW(inmot) = 0.0;
+
+    MYX(inmot) = H[3];
+    MYY(inmot) = H[4];
+    MYZ(inmot) = H[5];
+    MYW(inmot) = 0.0;
+
+    MZX(inmot) = H[6];
+    MZY(inmot) = H[7];
+    MZZ(inmot) = H[8];
+    MZW(inmot) = 0.0;
+
+    MWX(inmot) = 0.0;
+    MWY(inmot) = 0.0;
+    MWZ(inmot) = 0.0;
+    MWW(inmot) = 1.0;
+
+    inmot.type = VP_MOTION_AFFINE;
+
+    int w = m_im_width;
+    int h = m_im_height;
+
+    if(m_quarter_resolution)
+    {
+    w = w*2;
+    h = h*2;
+    }
+
+#if 0
+    m_stab_smoother.smoothMotionAdaptive(w,h,&inmot,&outmot);
+#else
+    m_stab_smoother.smoothMotion(&inmot,&outmot);
+#endif
+
+    H[0] = MXX(outmot);
+    H[1] = MXY(outmot);
+    H[2] = MXZ(outmot);
+
+    H[3] = MYX(outmot);
+    H[4] = MYY(outmot);
+    H[5] = MYZ(outmot);
+
+    H[6] = MZX(outmot);
+    H[7] = MZY(outmot);
+    H[8] = MZZ(outmot);
+
+    Set_H_dref_to_ins(H);
+}
+
+void db_FrameToReferenceRegistration::GenerateQuarterResImage(const unsigned char* const* im)
+{
+  int input_h = m_im_height*2;
+  int input_w = m_im_width*2;
+
+  for (int j = 0; j < input_h; j++)
+  {
+    const unsigned char* in_row_ptr = im[j];
+    unsigned char* out_row_ptr = m_horz_smooth_subsample_image[j]+1;
+
+    for (int i = 2; i < input_w-2; i += 2)
+    {
+      int smooth_val = (
+            6*in_row_ptr[i] +
+            ((in_row_ptr[i-1]+in_row_ptr[i+1])<<2) +
+            in_row_ptr[i-2]+in_row_ptr[i+2]
+            ) >> 4;
+      *out_row_ptr++ = (unsigned char) smooth_val;
+
+      if ( (smooth_val < 0) || (smooth_val > 255))
+      {
+        return;
+      }
+
+    }
+  }
+
+  for (int j = 2; j < input_h-2; j+=2)
+  {
+
+    unsigned char* in_row_ptr = m_horz_smooth_subsample_image[j];
+    unsigned char* out_row_ptr = m_quarter_res_image[j/2];
+
+    for (int i = 1; i < m_im_width-1; i++)
+    {
+      int smooth_val = (
+            6*in_row_ptr[i] +
+            ((in_row_ptr[i-m_im_width]+in_row_ptr[i+m_im_width]) << 2)+
+            in_row_ptr[i-2*m_im_width]+in_row_ptr[i+2*m_im_width]
+            ) >> 4;
+      *out_row_ptr++ = (unsigned char)smooth_val;
+
+      if ( (smooth_val < 0) || (smooth_val > 255))
+      {
+        return;
+      }
+
+    }
+  }
+}
diff --git a/benchmark/feature_stab/src/dbreg/dbreg.h b/benchmark/feature_stab/src/dbreg/dbreg.h
new file mode 100644
index 00000000..4eb24448
--- /dev/null
+++ b/benchmark/feature_stab/src/dbreg/dbreg.h
@@ -0,0 +1,581 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#pragma once
+
+#ifdef _WIN32
+#ifdef DBREG_EXPORTS
+#define DBREG_API __declspec(dllexport)
+#else
+#define DBREG_API __declspec(dllimport)
+#endif
+#else
+#define DBREG_API
+#endif
+
+// @jke - the next few lines are for extracting timing data.  TODO: Remove after test
+#define PROFILE 0
+
+#include "dbstabsmooth.h"
+
+#include <db_feature_detection.h>
+#include <db_feature_matching.h>
+#include <db_rob_image_homography.h>
+
+#if PROFILE
+    #include <sys/time.h>
+#endif
+
+/*! \mainpage db_FrameToReferenceRegistration
+
+ \section intro Introduction
+
+ db_FrameToReferenceRegistration provides a simple interface to a set of sophisticated algorithms for stabilizing
+ video sequences.  As its name suggests, the class is used to compute parameters that will allow us to warp incoming video
+ frames and register them with respect to a so-called <i>reference</i> frame.  The reference frame is simply the first
+ frame of a sequence; the registration process is that of estimating the parameters of a warp that can be applied to
+ subsequent frames to make those frames align with the reference.  A video made up of these warped frames will be more
+ stable than the input video.
+
+ For more technical information on the internal structure of the algorithms used within the db_FrameToRegistration class,
+ please follow this <a href="../Sarnoff image registration.docx">link</a>.
+
+ \section usage Usage
+ In addition to the class constructor, there are two main functions of db_FrameToReferenceRegistration that are of
+ interest to the programmer.  db_FrameToReferenceRegistration::Init(...) is used to initialize the parameters of the
+ registration algorithm. db_FrameToReferenceRegistration::AddFrame(...) is the method by which each new video frame
+ is introduced to the registration algorithm, and produces the estimated registration warp parameters.
+
+ The following example illustrates how the major methods of the class db_FrameToReferenceRegistration can be used together
+ to calculate the registration parameters for an image sequence.  In the example, the calls to the methods of
+ db_FrameToReferenceRegistration match those found in the API, but supporting code should be considered pseudo-code.
+ For a more complete example, please consult the source code for dbregtest.
+
+
+    \code
+    // feature-based image registration class:
+    db_FrameToReferenceRegistration reg;
+
+    // Image data
+    const unsigned char * const * image_storage;
+
+    // The 3x3 frame to reference registration parameters
+    double frame_to_ref_homography[9];
+
+    // a counter to count the number of frames processed.
+    unsigned long frame_counter;
+    // ...
+
+    // main loop - keep going while there are images to process.
+    while (ImagesAreAvailable)
+    {
+        // Call functions to place latest data into image_storage
+        // ...
+
+        // if the registration object is not yet initialized, then do so
+        // The arguments to this function are explained in the accompanying
+        // html API documentation
+        if (!reg.Initialized())
+        {
+            reg.Init(w,h,motion_model_type,25,linear_polish,quarter_resolution,
+                   DB_POINT_STANDARDDEV,reference_update_period,
+                   do_motion_smoothing,motion_smoothing_gain,
+                   DB_DEFAULT_NR_SAMPLES,DB_DEFAULT_CHUNK_SIZE,
+                   nr_corners,max_disparity);
+        }
+
+        // Present the new image data to the registration algorithm,
+        // with the result being stored in the frame_to_ref_homography
+        // variable.
+        reg.AddFrame(image_storage,frame_to_ref_homography);
+
+        // frame_to_ref_homography now contains the stabilizing transform
+        // use this to warp the latest image for display, etc.
+
+        // if this is the first frame, we need to tell the registration
+        // class to store the image as its reference.  Otherwise, AddFrame
+        // takes care of that.
+        if (frame_counter == 0)
+        {
+            reg.UpdateReference(image_storage);
+        }
+
+        // increment the frame counter
+        frame_counter++;
+    }
+
+    \endcode
+
+ */
+
+/*!
+ * Performs feature-based frame to reference image registration.
+ */
+class DBREG_API db_FrameToReferenceRegistration
+{
+public:
+    db_FrameToReferenceRegistration(void);
+    ~db_FrameToReferenceRegistration();
+
+    /*!
+     * Set parameters and allocate memory. Note: The default values of these parameters have been set to the values used for the android implementation (i.e. the demo APK).
+     * \param width         image width
+     * \param height        image height
+     * \param homography_type see definitions in \ref LMRobImageHomography
+     * \param max_iterations    max number of polishing steps
+     * \param linear_polish     whether to perform a linear polishing step after RANSAC
+     * \param quarter_resolution    whether to process input images at quarter resolution (for computational efficiency)
+     * \param scale         Cauchy scale coefficient (see db_ExpCauchyReprojectionError() )
+     * \param reference_update_period   how often to update the alignment reference (in units of number of frames)
+     * \param do_motion_smoothing   whether to perform display reference smoothing
+     * \param motion_smoothing_gain weight factor to reflect how fast the display reference must follow the current frame if motion smoothing is enabled
+     * \param nr_samples        number of times to compute a hypothesis
+     * \param chunk_size        size of cost chunks
+     * \param cd_target_nr_corners  target number of corners for corner detector
+     * \param cm_max_disparity      maximum disparity search range for corner matcher (in units of ratio of image width)
+     * \param cm_use_smaller_matching_window    if set to true, uses a correlation window of 5x5 instead of the default 11x11
+     * \param cd_nr_horz_blocks     the number of horizontal blocks for the corner detector to partition the image
+     * \param cd_nr_vert_blocks     the number of vertical blocks for the corner detector to partition the image
+    */
+    void Init(int width, int height,
+          int       homography_type = DB_HOMOGRAPHY_TYPE_DEFAULT,
+          int       max_iterations = DB_DEFAULT_MAX_ITERATIONS,
+          bool      linear_polish = false,
+          bool   quarter_resolution = true,
+          double  scale = DB_POINT_STANDARDDEV,
+          unsigned int reference_update_period = 3,
+          bool   do_motion_smoothing = false,
+          double motion_smoothing_gain = 0.75,
+          int   nr_samples = DB_DEFAULT_NR_SAMPLES,
+          int   chunk_size = DB_DEFAULT_CHUNK_SIZE,
+          int    cd_target_nr_corners = 500,
+          double cm_max_disparity = 0.2,
+          bool   cm_use_smaller_matching_window = false,
+          int    cd_nr_horz_blocks = 5,
+          int    cd_nr_vert_blocks = 5);
+
+    /*!
+     * Reset the transformation type that is being use to perform alignment. Use this to change the alignment type at run time.
+     * \param homography_type   the type of transformation to use for performing alignment (see definitions in \ref LMRobImageHomography)
+    */
+    void ResetHomographyType(int homography_type) { m_homography_type = homography_type; }
+
+    /*!
+     * Enable/Disable motion smoothing. Use this to turn motion smoothing on/off at run time.
+     * \param enable    flag indicating whether to turn the motion smoothing on or off.
+    */
+    void ResetSmoothing(bool enable) { m_do_motion_smoothing = enable; }
+
+    /*!
+     * Align an inspection image to an existing reference image, update the reference image if due and perform motion smoothing if enabled.
+     * \param im                new inspection image
+     * \param H             computed transformation from reference to inspection coordinate frame. Identity is returned if no reference frame was set.
+     * \param force_reference   make this the new reference image
+     */
+    int AddFrame(const unsigned char * const * im, double H[9], bool force_reference=false, bool prewarp=false);
+
+    /*!
+     * Returns true if Init() was run.
+     */
+    bool Initialized() const { return m_initialized; }
+
+    /*!
+     * Returns true if the current frame is being used as the alignment reference.
+    */
+    bool IsCurrentReference() const { return m_current_is_reference; }
+
+    /*!
+     * Returns true if we need to call UpdateReference now.
+     */
+    bool NeedReferenceUpdate();
+
+    /*!
+     * Returns the pointer reference to the alignment reference image data
+    */
+    unsigned char ** GetReferenceImage() { return m_reference_image; }
+
+    /*!
+     * Returns the pointer reference to the double array containing the homogeneous coordinates for the matched reference image corners.
+    */
+    double * GetRefCorners() { return m_corners_ref; }
+    /*!
+     * Returns the pointer reference to the double array containing the homogeneous coordinates for the matched inspection image corners.
+    */
+    double * GetInsCorners() { return m_corners_ins; }
+    /*!
+     * Returns the number of correspondences between the reference and inspection images.
+    */
+    int GetNrMatches() { return m_nr_matches; }
+
+    /*!
+     * Returns the number of corners detected in the current reference image.
+    */
+    int GetNrRefCorners() { return m_nr_corners_ref; }
+
+    /*!
+     * Returns the pointer to an array of indices that were found to be RANSAC inliers from the matched corner lists.
+    */
+    int* GetInliers() { return m_inlier_indices; }
+
+    /*!
+     * Returns the number of inliers from the RANSAC matching step.
+    */
+    int  GetNrInliers() { return m_num_inlier_indices; }
+
+    //std::vector<int>& GetInliers();
+    //void Polish(std::vector<int> &inlier_indices);
+
+    /*!
+     * Perform a linear polishing step by re-estimating the alignment transformation using the RANSAC inliers.
+     * \param inlier_indices    pointer to an array of indices that were found to be RANSAC inliers from the matched corner lists.
+     * \param num_inlier_indices    number of inliers i.e. the length of the array passed as the first argument.
+    */
+    void Polish(int *inlier_indices, int &num_inlier_indices);
+
+    /*!
+     * Reset the motion smoothing parameters to their initial values.
+    */
+    void ResetMotionSmoothingParameters() { m_stab_smoother.Init(); }
+
+    /*!
+     * Update the alignment reference image to the specified image.
+     * \param im    pointer to the image data to be used as the new alignment reference.
+     * \param subsample boolean flag to control whether the function should internally subsample the provided image to the size provided in the Init() function.
+    */
+    int UpdateReference(const unsigned char * const * im, bool subsample = true, bool detect_corners = true);
+
+    /*!
+     * Returns the transformation from the display reference to the alignment reference frame
+    */
+    void Get_H_dref_to_ref(double H[9]);
+    /*!
+     * Returns the transformation from the display reference to the inspection reference frame
+    */
+    void Get_H_dref_to_ins(double H[9]);
+    /*!
+     * Set the transformation from the display reference to the inspection reference frame
+     * \param H the transformation to set
+    */
+    void Set_H_dref_to_ins(double H[9]);
+
+    /*!
+     * Reset the display reference to the current frame.
+    */
+    void ResetDisplayReference();
+
+    /*!
+     * Estimate a secondary motion model starting from the specified transformation.
+     * \param H the primary motion model to start from
+    */
+    void EstimateSecondaryModel(double H[9]);
+
+    /*!
+     *
+    */
+    void SelectOutliers();
+
+    char *profile_string;
+
+protected:
+    void Clean();
+    void GenerateQuarterResImage(const unsigned char* const * im);
+
+    int     m_im_width;
+    int     m_im_height;
+
+    // RANSAC and refinement parameters:
+    int m_homography_type;
+    int     m_max_iterations;
+    double  m_scale;
+    int     m_nr_samples;
+    int     m_chunk_size;
+    double  m_outlier_t2;
+
+    // Whether to fit a linear model to just the inliers at the end
+    bool   m_linear_polish;
+    double m_polish_C[36];
+    double m_polish_D[6];
+
+    // local state
+    bool m_current_is_reference;
+    bool m_initialized;
+
+    // inspection to reference homography:
+    double m_H_ref_to_ins[9];
+    double m_H_dref_to_ref[9];
+
+    // feature extraction and matching:
+    db_CornerDetector_u m_cd;
+    db_Matcher_u        m_cm;
+
+    // length of corner arrays:
+    unsigned long m_max_nr_corners;
+
+    // corner locations of reference image features:
+    double * m_x_corners_ref;
+    double * m_y_corners_ref;
+    int  m_nr_corners_ref;
+
+    // corner locations of inspection image features:
+    double * m_x_corners_ins;
+    double * m_y_corners_ins;
+    int      m_nr_corners_ins;
+
+    // length of match index arrays:
+    unsigned long m_max_nr_matches;
+
+    // match indices:
+    int * m_match_index_ref;
+    int * m_match_index_ins;
+    int   m_nr_matches;
+
+    // pointer to internal copy of the reference image:
+    unsigned char ** m_reference_image;
+
+    // pointer to internal copy of last aligned inspection image:
+    unsigned char ** m_aligned_ins_image;
+
+    // pointer to quarter resolution image, if used.
+    unsigned char** m_quarter_res_image;
+
+    // temporary storage for the quarter resolution image processing
+    unsigned char** m_horz_smooth_subsample_image;
+
+    // temporary space for homography computation:
+    double * m_temp_double;
+    int * m_temp_int;
+
+    // homogenous image point arrays:
+    double * m_corners_ref;
+    double * m_corners_ins;
+
+    // Indices of the points within the match lists
+    int * m_inlier_indices;
+    int m_num_inlier_indices;
+
+    //void ComputeInliers(double H[9], std::vector<int> &inlier_indices);
+    void ComputeInliers(double H[9]);
+
+    // cost arrays:
+    void ComputeCostArray();
+    bool m_sq_cost_computed;
+    double * m_sq_cost;
+
+    // cost histogram:
+    void ComputeCostHistogram();
+    int *m_cost_histogram;
+
+    void SetOutlierThreshold();
+
+    // utility function for smoothing the motion parameters.
+    void SmoothMotion(void);
+
+private:
+    double m_K[9];
+    const int m_over_allocation;
+
+    bool m_reference_set;
+
+    // Maximum number of inliers seen until now w.r.t the current reference frame
+    int m_max_inlier_count;
+
+    // Number of cost histogram bins:
+    int m_nr_bins;
+    // All costs above this threshold get put into the last bin:
+    int m_max_cost_pix;
+
+    // whether to quarter the image resolution for processing, or not
+    bool m_quarter_resolution;
+
+    // the period (in number of frames) for reference update.
+    unsigned int m_reference_update_period;
+
+    // the number of frames processed so far.
+    unsigned int m_nr_frames_processed;
+
+    // smoother for motion transformations
+    db_StabilizationSmoother m_stab_smoother;
+
+    // boolean to control whether motion smoothing occurs (or not)
+    bool m_do_motion_smoothing;
+
+    // double to set the gain for motion smoothing
+    double m_motion_smoothing_gain;
+};
+/*!
+ Create look-up tables to undistort images. Only Bougeut (Matlab toolkit)
+ is currently supported. Can be used with db_WarpImageLut_u().
+ \code
+    xd = H*xs;
+    xd = xd/xd(3);
+ \endcode
+ \param lut_x   pre-allocated float image
+ \param lut_y   pre-allocated float image
+ \param w       width
+ \param h       height
+ \param H       image homography from source to destination
+ */
+inline void db_GenerateHomographyLut(float ** lut_x,float ** lut_y,int w,int h,const double H[9])
+{
+    assert(lut_x && lut_y);
+    double x[3] = {0.0,0.0,1.0};
+    double xb[3];
+
+/*
+    double xl[3];
+
+    // Determine the output coordinate system ROI
+    double Hinv[9];
+    db_InvertAffineTransform(Hinv,H);
+    db_Multiply3x3_3x1(xl, Hinv, x);
+    xl[0] = db_SafeDivision(xl[0],xl[2]);
+    xl[1] = db_SafeDivision(xl[1],xl[2]);
+*/
+
+    for ( int i = 0; i < w; ++i )
+        for ( int j = 0; j < h; ++j )
+        {
+            x[0] = double(i);
+            x[1] = double(j);
+            db_Multiply3x3_3x1(xb, H, x);
+            xb[0] = db_SafeDivision(xb[0],xb[2]);
+            xb[1] = db_SafeDivision(xb[1],xb[2]);
+
+            lut_x[j][i] = float(xb[0]);
+            lut_y[j][i] = float(xb[1]);
+        }
+}
+
+/*!
+ * Perform a look-up table warp for packed RGB ([rgbrgbrgb...]) images.
+ * The LUTs must be float images of the same size as source image.
+ * The source value x_s is determined from destination (x_d,y_d) through lut_x
+ * and y_s is determined from lut_y:
+   \code
+   x_s = lut_x[y_d][x_d];
+   y_s = lut_y[y_d][x_d];
+   \endcode
+
+ * \param src   source image (w*3 by h)
+ * \param dst   destination image (w*3 by h)
+ * \param w     width
+ * \param h     height
+ * \param lut_x LUT for x
+ * \param lut_y LUT for y
+ */
+inline void db_WarpImageLutFast_rgb(const unsigned char * const * src, unsigned char ** dst, int w, int h,
+                                  const float * const * lut_x, const float * const * lut_y)
+{
+    assert(src && dst);
+    int xd=0, yd=0;
+
+    for ( int i = 0; i < w; ++i )
+        for ( int j = 0; j < h; ++j )
+        {
+            xd = static_cast<unsigned int>(lut_x[j][i]);
+            yd = static_cast<unsigned int>(lut_y[j][i]);
+            if ( xd >= w || yd >= h ||
+                 xd < 0 || yd < 0)
+            {
+                dst[j][3*i  ] = 0;
+                dst[j][3*i+1] = 0;
+                dst[j][3*i+2] = 0;
+            }
+            else
+            {
+                dst[j][3*i  ] = src[yd][3*xd  ];
+                dst[j][3*i+1] = src[yd][3*xd+1];
+                dst[j][3*i+2] = src[yd][3*xd+2];
+            }
+        }
+}
+
+inline unsigned char db_BilinearInterpolationRGB(double y, double x, const unsigned char * const * v, int offset)
+{
+         int floor_x=(int) x;
+         int floor_y=(int) y;
+
+         int ceil_x=floor_x+1;
+         int ceil_y=floor_y+1;
+
+         unsigned char f00 = v[floor_y][3*floor_x+offset];
+         unsigned char f01 = v[floor_y][3*ceil_x+offset];
+         unsigned char f10 = v[ceil_y][3*floor_x+offset];
+         unsigned char f11 = v[ceil_y][3*ceil_x+offset];
+
+         double xl = x-floor_x;
+         double yl = y-floor_y;
+
+         return (unsigned char)(f00*(1-yl)*(1-xl) + f10*yl*(1-xl) + f01*(1-yl)*xl + f11*yl*xl);
+}
+
+inline void db_WarpImageLutBilinear_rgb(const unsigned char * const * src, unsigned char ** dst, int w, int h,
+                                  const float * const * lut_x, const float * const * lut_y)
+{
+    assert(src && dst);
+    double xd=0.0, yd=0.0;
+
+    for ( int i = 0; i < w; ++i )
+        for ( int j = 0; j < h; ++j )
+        {
+            xd = static_cast<double>(lut_x[j][i]);
+            yd = static_cast<double>(lut_y[j][i]);
+            if ( xd > w-2 || yd > h-2 ||
+                 xd < 0.0 || yd < 0.0)
+            {
+                dst[j][3*i  ] = 0;
+                dst[j][3*i+1] = 0;
+                dst[j][3*i+2] = 0;
+            }
+            else
+            {
+                dst[j][3*i  ] = db_BilinearInterpolationRGB(yd,xd,src,0);
+                dst[j][3*i+1] = db_BilinearInterpolationRGB(yd,xd,src,1);
+                dst[j][3*i+2] = db_BilinearInterpolationRGB(yd,xd,src,2);
+            }
+        }
+}
+
+inline double SquaredInhomogenousHomographyError(double y[3],double H[9],double x[3]){
+    double x0,x1,x2,mult;
+    double sd;
+
+    x0=H[0]*x[0]+H[1]*x[1]+H[2];
+    x1=H[3]*x[0]+H[4]*x[1]+H[5];
+    x2=H[6]*x[0]+H[7]*x[1]+H[8];
+    mult=1.0/((x2!=0.0)?x2:1.0);
+    sd=(y[0]-x0*mult)*(y[0]-x0*mult)+(y[1]-x1*mult)*(y[1]-x1*mult);
+
+    return(sd);
+}
+
+
+// functions related to profiling
+#if PROFILE
+
+/* return current time in milliseconds */
+static double
+now_ms(void)
+{
+    //struct timespec res;
+    struct timeval res;
+    //clock_gettime(CLOCK_REALTIME, &res);
+    gettimeofday(&res, NULL);
+    return 1000.0*res.tv_sec + (double)res.tv_usec/1e3;
+}
+
+#endif
diff --git a/benchmark/feature_stab/src/dbreg/dbstabsmooth.cpp b/benchmark/feature_stab/src/dbreg/dbstabsmooth.cpp
new file mode 100644
index 00000000..03608c69
--- /dev/null
+++ b/benchmark/feature_stab/src/dbreg/dbstabsmooth.cpp
@@ -0,0 +1,301 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdlib.h>
+#include "dbstabsmooth.h"
+
+///// TODO TODO ////////// Replace this with the actual definition from Jayan's reply /////////////
+#define vp_copy_motion_no_id vp_copy_motion
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+db_StabilizationSmoother::db_StabilizationSmoother()
+{
+    Init();
+}
+
+void db_StabilizationSmoother::Init()
+{
+    f_smoothOn = true;
+    f_smoothReset = false;
+    f_smoothFactor = 1.0f;
+    f_minDampingFactor = 0.2f;
+    f_zoom = 1.0f;
+    VP_MOTION_ID(f_motLF);
+    VP_MOTION_ID(f_imotLF);
+    f_hsize = 0;
+    f_vsize = 0;
+
+    VP_MOTION_ID(f_disp_mot);
+    VP_MOTION_ID(f_src_mot);
+    VP_MOTION_ID(f_diff_avg);
+
+    for( int i = 0; i < MOTION_ARRAY-1; i++) {
+        VP_MOTION_ID(f_hist_mot_speed[i]);
+        VP_MOTION_ID(f_hist_mot[i]);
+        VP_MOTION_ID(f_hist_diff_mot[i]);
+    }
+    VP_MOTION_ID(f_hist_mot[MOTION_ARRAY-1]);
+
+}
+
+db_StabilizationSmoother::~db_StabilizationSmoother()
+{}
+
+
+bool db_StabilizationSmoother::smoothMotion(VP_MOTION *inmot, VP_MOTION *outmot)
+{
+    VP_MOTION_ID(f_motLF);
+    VP_MOTION_ID(f_imotLF);
+    f_motLF.insid = inmot->refid;
+    f_motLF.refid = inmot->insid;
+
+    if(f_smoothOn) {
+        if(!f_smoothReset) {
+            MXX(f_motLF) = (VP_PAR) (f_smoothFactor*(double) MXX(f_motLF) + (1.0-f_smoothFactor)* (double) MXX(*inmot));
+            MXY(f_motLF) = (VP_PAR) (f_smoothFactor*(double) MXY(f_motLF) + (1.0-f_smoothFactor)* (double) MXY(*inmot));
+            MXZ(f_motLF) = (VP_PAR) (f_smoothFactor*(double) MXZ(f_motLF) + (1.0-f_smoothFactor)* (double) MXZ(*inmot));
+            MXW(f_motLF) = (VP_PAR) (f_smoothFactor*(double) MXW(f_motLF) + (1.0-f_smoothFactor)* (double) MXW(*inmot));
+
+            MYX(f_motLF) = (VP_PAR) (f_smoothFactor*(double) MYX(f_motLF) + (1.0-f_smoothFactor)* (double) MYX(*inmot));
+            MYY(f_motLF) = (VP_PAR) (f_smoothFactor*(double) MYY(f_motLF) + (1.0-f_smoothFactor)* (double) MYY(*inmot));
+            MYZ(f_motLF) = (VP_PAR) (f_smoothFactor*(double) MYZ(f_motLF) + (1.0-f_smoothFactor)* (double) MYZ(*inmot));
+            MYW(f_motLF) = (VP_PAR) (f_smoothFactor*(double) MYW(f_motLF) + (1.0-f_smoothFactor)* (double) MYW(*inmot));
+
+            MZX(f_motLF) = (VP_PAR) (f_smoothFactor*(double) MZX(f_motLF) + (1.0-f_smoothFactor)* (double) MZX(*inmot));
+            MZY(f_motLF) = (VP_PAR) (f_smoothFactor*(double) MZY(f_motLF) + (1.0-f_smoothFactor)* (double) MZY(*inmot));
+            MZZ(f_motLF) = (VP_PAR) (f_smoothFactor*(double) MZZ(f_motLF) + (1.0-f_smoothFactor)* (double) MZZ(*inmot));
+            MZW(f_motLF) = (VP_PAR) (f_smoothFactor*(double) MZW(f_motLF) + (1.0-f_smoothFactor)* (double) MZW(*inmot));
+
+            MWX(f_motLF) = (VP_PAR) (f_smoothFactor*(double) MWX(f_motLF) + (1.0-f_smoothFactor)* (double) MWX(*inmot));
+            MWY(f_motLF) = (VP_PAR) (f_smoothFactor*(double) MWY(f_motLF) + (1.0-f_smoothFactor)* (double) MWY(*inmot));
+            MWZ(f_motLF) = (VP_PAR) (f_smoothFactor*(double) MWZ(f_motLF) + (1.0-f_smoothFactor)* (double) MWZ(*inmot));
+            MWW(f_motLF) = (VP_PAR) (f_smoothFactor*(double) MWW(f_motLF) + (1.0-f_smoothFactor)* (double) MWW(*inmot));
+        }
+        else
+            vp_copy_motion_no_id(inmot, &f_motLF); // f_smoothFactor = 0.0
+
+        // Only allow LF motion to be compensated. Remove HF motion from
+        // the output transformation
+        if(!vp_invert_motion(&f_motLF, &f_imotLF))
+            return false;
+
+        if(!vp_cascade_motion(&f_imotLF, inmot, outmot))
+            return false;
+    }
+    else {
+        vp_copy_motion_no_id(inmot, outmot);
+    }
+
+    return true;
+}
+
+bool db_StabilizationSmoother::smoothMotionAdaptive(/*VP_BIMG *bimg,*/int hsize, int vsize, VP_MOTION *inmot, VP_MOTION *outmot)
+{
+    VP_MOTION tmpMotion, testMotion;
+    VP_PAR p1x, p2x, p3x, p4x;
+    VP_PAR p1y, p2y, p3y, p4y;
+    double smoothFactor;
+    double minSmoothFactor = f_minDampingFactor;
+
+//  int hsize = bimg->w;
+//  int vsize = bimg->h;
+    double border_factor = 0.01;//0.2;
+    double border_x = border_factor * hsize;
+    double border_y = border_factor * vsize;
+
+    VP_MOTION_ID(f_motLF);
+    VP_MOTION_ID(f_imotLF);
+    VP_MOTION_ID(testMotion);
+    VP_MOTION_ID(tmpMotion);
+
+    if (f_smoothOn) {
+        VP_MOTION identityMotion;
+        VP_MOTION_ID(identityMotion); // initialize the motion
+        vp_copy_motion(inmot/*in*/, &testMotion/*out*/);
+        VP_PAR delta = vp_motion_cornerdiff(&testMotion, &identityMotion, 0, 0,(int)hsize, (int)vsize);
+
+        smoothFactor = 0.99 - 0.0015 * delta;
+
+        if(smoothFactor < minSmoothFactor)
+            smoothFactor = minSmoothFactor;
+
+        // Find the amount of motion that must be compensated so that no "border" pixels are seen in the stable video
+        for (; smoothFactor >= minSmoothFactor; smoothFactor -= 0.01) {
+            // Compute the smoothed motion
+            if(!smoothMotion(inmot, &tmpMotion, smoothFactor))
+                break;
+
+            // TmpMotion, or Qsi where s is the smoothed display reference and i is the
+            // current image, tells us how points in the S co-ordinate system map to
+            // points in the I CS.  We would like to check whether the four corners of the
+            // warped and smoothed display reference lies entirely within the I co-ordinate
+            // system.  If yes, then the amount of smoothing is sufficient so that NO
+            // border pixels are seen at the output.  We test for f_smoothFactor terms
+            // between 0.9 and 1.0, in steps of 0.01, and between 0.5 ands 0.9 in steps of 0.1
+
+            (void) vp_zoom_motion2d(&tmpMotion, &testMotion, 1, hsize, vsize, (double)f_zoom); // needs to return bool
+
+            VP_WARP_POINT_2D(0, 0, testMotion, p1x, p1y);
+            VP_WARP_POINT_2D(hsize - 1, 0, testMotion, p2x, p2y);
+            VP_WARP_POINT_2D(hsize - 1, vsize - 1, testMotion, p3x, p3y);
+            VP_WARP_POINT_2D(0, vsize - 1, testMotion, p4x, p4y);
+
+            if (!is_point_in_rect((double)p1x,(double)p1y,-border_x,-border_y,(double)(hsize+2.0*border_x),(double)(vsize+2.0*border_y))) {
+                continue;
+            }
+            if (!is_point_in_rect((double)p2x, (double)p2y,-border_x,-border_y,(double)(hsize+2.0*border_x),(double)(vsize+2.0*border_y))) {
+                continue;
+            }
+            if (!is_point_in_rect((double)p3x,(double)p3y,-border_x,-border_y,(double)(hsize+2.0*border_x),(double)(vsize+2.0*border_y))) {
+                continue;
+            }
+            if (!is_point_in_rect((double)p4x, (double)p4y,-border_x,-border_y,(double)(hsize+2.0*border_x),(double)(vsize+2.0*border_y))) {
+                continue;
+            }
+
+            // If we get here, then all the points are in the rectangle.
+            // Therefore, break out of this loop
+            break;
+        }
+
+        // if we get here and f_smoothFactor <= fMinDampingFactor, reset the stab reference
+        if (smoothFactor < f_minDampingFactor)
+            smoothFactor = f_minDampingFactor;
+
+        // use the smoothed motion for stabilization
+        vp_copy_motion_no_id(&tmpMotion/*in*/, outmot/*out*/);
+    }
+    else
+    {
+        vp_copy_motion_no_id(inmot, outmot);
+    }
+
+    return true;
+}
+
+bool db_StabilizationSmoother::smoothMotion(VP_MOTION *inmot, VP_MOTION *outmot, double smooth_factor)
+{
+    f_motLF.insid = inmot->refid;
+    f_motLF.refid = inmot->insid;
+
+    if(f_smoothOn) {
+        if(!f_smoothReset) {
+            MXX(f_motLF) = (VP_PAR) (smooth_factor*(double) MXX(f_motLF) + (1.0-smooth_factor)* (double) MXX(*inmot));
+            MXY(f_motLF) = (VP_PAR) (smooth_factor*(double) MXY(f_motLF) + (1.0-smooth_factor)* (double) MXY(*inmot));
+            MXZ(f_motLF) = (VP_PAR) (smooth_factor*(double) MXZ(f_motLF) + (1.0-smooth_factor)* (double) MXZ(*inmot));
+            MXW(f_motLF) = (VP_PAR) (smooth_factor*(double) MXW(f_motLF) + (1.0-smooth_factor)* (double) MXW(*inmot));
+
+            MYX(f_motLF) = (VP_PAR) (smooth_factor*(double) MYX(f_motLF) + (1.0-smooth_factor)* (double) MYX(*inmot));
+            MYY(f_motLF) = (VP_PAR) (smooth_factor*(double) MYY(f_motLF) + (1.0-smooth_factor)* (double) MYY(*inmot));
+            MYZ(f_motLF) = (VP_PAR) (smooth_factor*(double) MYZ(f_motLF) + (1.0-smooth_factor)* (double) MYZ(*inmot));
+            MYW(f_motLF) = (VP_PAR) (smooth_factor*(double) MYW(f_motLF) + (1.0-smooth_factor)* (double) MYW(*inmot));
+
+            MZX(f_motLF) = (VP_PAR) (smooth_factor*(double) MZX(f_motLF) + (1.0-smooth_factor)* (double) MZX(*inmot));
+            MZY(f_motLF) = (VP_PAR) (smooth_factor*(double) MZY(f_motLF) + (1.0-smooth_factor)* (double) MZY(*inmot));
+            MZZ(f_motLF) = (VP_PAR) (smooth_factor*(double) MZZ(f_motLF) + (1.0-smooth_factor)* (double) MZZ(*inmot));
+            MZW(f_motLF) = (VP_PAR) (smooth_factor*(double) MZW(f_motLF) + (1.0-smooth_factor)* (double) MZW(*inmot));
+
+            MWX(f_motLF) = (VP_PAR) (smooth_factor*(double) MWX(f_motLF) + (1.0-smooth_factor)* (double) MWX(*inmot));
+            MWY(f_motLF) = (VP_PAR) (smooth_factor*(double) MWY(f_motLF) + (1.0-smooth_factor)* (double) MWY(*inmot));
+            MWZ(f_motLF) = (VP_PAR) (smooth_factor*(double) MWZ(f_motLF) + (1.0-smooth_factor)* (double) MWZ(*inmot));
+            MWW(f_motLF) = (VP_PAR) (smooth_factor*(double) MWW(f_motLF) + (1.0-smooth_factor)* (double) MWW(*inmot));
+        }
+        else
+            vp_copy_motion_no_id(inmot, &f_motLF); // smooth_factor = 0.0
+
+        // Only allow LF motion to be compensated. Remove HF motion from
+        // the output transformation
+        if(!vp_invert_motion(&f_motLF, &f_imotLF))
+            return false;
+
+        if(!vp_cascade_motion(&f_imotLF, inmot, outmot))
+            return false;
+    }
+    else {
+        vp_copy_motion_no_id(inmot, outmot);
+    }
+
+    return true;
+}
+
+//! Overloaded smoother function that takes in user-specidied smoothing factor
+bool
+db_StabilizationSmoother::smoothMotion1(VP_MOTION *inmot, VP_MOTION *outmot, VP_MOTION *motLF, VP_MOTION *imotLF, double factor)
+{
+
+    if(!f_smoothOn) {
+        vp_copy_motion(inmot, outmot);
+        return true;
+    }
+    else {
+        if(!f_smoothReset) {
+            MXX(*motLF) = (VP_PAR) (factor*(double) MXX(*motLF) + (1.0-factor)* (double) MXX(*inmot));
+            MXY(*motLF) = (VP_PAR) (factor*(double) MXY(*motLF) + (1.0-factor)* (double) MXY(*inmot));
+            MXZ(*motLF) = (VP_PAR) (factor*(double) MXZ(*motLF) + (1.0-factor)* (double) MXZ(*inmot));
+            MXW(*motLF) = (VP_PAR) (factor*(double) MXW(*motLF) + (1.0-factor)* (double) MXW(*inmot));
+
+            MYX(*motLF) = (VP_PAR) (factor*(double) MYX(*motLF) + (1.0-factor)* (double) MYX(*inmot));
+            MYY(*motLF) = (VP_PAR) (factor*(double) MYY(*motLF) + (1.0-factor)* (double) MYY(*inmot));
+            MYZ(*motLF) = (VP_PAR) (factor*(double) MYZ(*motLF) + (1.0-factor)* (double) MYZ(*inmot));
+            MYW(*motLF) = (VP_PAR) (factor*(double) MYW(*motLF) + (1.0-factor)* (double) MYW(*inmot));
+
+            MZX(*motLF) = (VP_PAR) (factor*(double) MZX(*motLF) + (1.0-factor)* (double) MZX(*inmot));
+            MZY(*motLF) = (VP_PAR) (factor*(double) MZY(*motLF) + (1.0-factor)* (double) MZY(*inmot));
+            MZZ(*motLF) = (VP_PAR) (factor*(double) MZZ(*motLF) + (1.0-factor)* (double) MZZ(*inmot));
+            MZW(*motLF) = (VP_PAR) (factor*(double) MZW(*motLF) + (1.0-factor)* (double) MZW(*inmot));
+
+            MWX(*motLF) = (VP_PAR) (factor*(double) MWX(*motLF) + (1.0-factor)* (double) MWX(*inmot));
+            MWY(*motLF) = (VP_PAR) (factor*(double) MWY(*motLF) + (1.0-factor)* (double) MWY(*inmot));
+            MWZ(*motLF) = (VP_PAR) (factor*(double) MWZ(*motLF) + (1.0-factor)* (double) MWZ(*inmot));
+            MWW(*motLF) = (VP_PAR) (factor*(double) MWW(*motLF) + (1.0-factor)* (double) MWW(*inmot));
+        }
+        else {
+            vp_copy_motion(inmot, motLF);
+        }
+        // Only allow LF motion to be compensated. Remove HF motion from the output transformation
+        if(!vp_invert_motion(motLF, imotLF)) {
+#if DEBUG_PRINT
+            printfOS("Invert failed \n");
+#endif
+            return false;
+        }
+        if(!vp_cascade_motion(imotLF, inmot, outmot)) {
+#if DEBUG_PRINT
+            printfOS("cascade failed \n");
+#endif
+            return false;
+        }
+    }
+    return true;
+}
+
+
+
+
+bool db_StabilizationSmoother::is_point_in_rect(double px, double py, double rx, double ry, double w, double h)
+{
+    if (px < rx)
+        return(false);
+    if (px >= rx + w)
+        return(false);
+    if (py < ry)
+        return(false);
+    if (py >= ry + h)
+        return(false);
+
+    return(true);
+}
+
diff --git a/benchmark/feature_stab/src/dbreg/dbstabsmooth.h b/benchmark/feature_stab/src/dbreg/dbstabsmooth.h
new file mode 100644
index 00000000..f03546ef
--- /dev/null
+++ b/benchmark/feature_stab/src/dbreg/dbstabsmooth.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+
+#ifdef _WIN32
+#ifdef DBREG_EXPORTS
+#define DBREG_API __declspec(dllexport)
+#else
+#define DBREG_API __declspec(dllimport)
+#endif
+#else
+#define DBREG_API
+#endif
+
+extern "C" {
+#include "vp_motionmodel.h"
+}
+
+#define MOTION_ARRAY 5
+
+
+/*!
+ * Performs smoothing on the motion estimate from feature_stab.
+ */
+class DBREG_API db_StabilizationSmoother
+{
+public:
+    db_StabilizationSmoother();
+    ~db_StabilizationSmoother();
+
+    /*!
+     * Initialize parameters for stab-smoother.
+    */
+    void Init();
+
+    //! Smothing type
+    typedef enum {
+        SimpleSmooth = 0, //!< simple smooth
+        AdaptSmooth  = 1, //!< adaptive smooth
+        PanSmooth    = 2  //!< pan motion smooth
+    } SmoothType;
+
+    /*!
+     * Smooth-motion is to do a weight-average between the current affine and
+     * motLF. The way to change the affine is only for the display purpose.
+     * It removes the high frequency motion and keep the low frequency motion
+     * to the display. IIR implmentation.
+     * \param inmot input motion parameters
+     * \param outmot smoothed output motion parameters
+    */
+    bool smoothMotion(VP_MOTION *inmot, VP_MOTION *outmot);
+
+    /*!
+     * The adaptive smoothing version of the above fixed smoothing function.
+     * \param hsize width of the image being aligned
+     * \param vsize height of the image being aligned
+     * \param inmot input motion parameters
+     * \param outmot    smoothed output motion parameters
+    */
+    bool smoothMotionAdaptive(/*VP_BIMG *bimg,*/int hsize, int vsize, VP_MOTION *inmot, VP_MOTION *outmot);
+    bool smoothPanMotion_1(VP_MOTION *inmot, VP_MOTION *outmot);
+    bool smoothPanMotion_2(VP_MOTION *inmot, VP_MOTION *outmot);
+
+    /*!
+    * Set the smoothing factor for the stab-smoother.
+    * \param factor the factor value to set
+    */
+    inline void setSmoothingFactor(float factor) { f_smoothFactor = factor; }
+
+    /*!
+     * Reset smoothing
+    */
+    inline void resetSmoothing(bool flag) { f_smoothReset = flag; }
+    /*!
+     * Set the zoom factor value.
+     * \param zoom  the value to set to
+    */
+    inline void setZoomFactor(float zoom) { f_zoom = zoom; }
+    /*!
+     * Set the minimum damping factor value.
+     * \param factor    the value to set to
+    */
+    inline void setminDampingFactor(float factor) { f_minDampingFactor = factor; }
+
+    /*!
+     * Returns the current smoothing factor.
+    */
+    inline float getSmoothingFactor(void) { return f_smoothFactor; }
+    /*!
+     * Returns the current zoom factor.
+    */
+    inline float getZoomFactor(void) { return f_zoom; }
+    /*!
+     * Returns the current minimum damping factor.
+    */
+    inline float getminDampingFactor(void) { return f_minDampingFactor; }
+    /*!
+     * Returns the current state of the smoothing reset flag.
+    */
+    inline bool  getSmoothReset(void) { return f_smoothReset; }
+    /*!
+     * Returns the current low frequency motion parameters.
+    */
+    inline VP_MOTION getMotLF(void) { return f_motLF; }
+    /*!
+     * Returns the inverse of the current low frequency motion parameters.
+    */
+    inline VP_MOTION getImotLF(void) { return f_imotLF; }
+    /*!
+     * Set the dimensions of the alignment image.
+     * \param hsize width of the image
+     * \param vsize height of the image
+    */
+    inline void setSize(int hsize, int vsize) { f_hsize = hsize; f_vsize = vsize; }
+
+protected:
+
+    bool smoothMotion(VP_MOTION *inmot, VP_MOTION *outmot, double smooth_factor);
+    bool smoothMotion1(VP_MOTION *inmot, VP_MOTION *outmot, VP_MOTION *motLF, VP_MOTION *imotLF, double smooth_factor);
+    void iterativeSmooth(VP_MOTION *input, VP_MOTION *output, double border_factor);
+    bool is_point_in_rect(double px, double py, double rx, double ry, double w, double h);
+
+
+private:
+    int f_hsize;
+    int f_vsize;
+    bool f_smoothOn;
+    bool f_smoothReset;
+    float f_smoothFactor;
+    float f_minDampingFactor;
+    float f_zoom;
+    VP_MOTION f_motLF;
+    VP_MOTION f_imotLF;
+    VP_MOTION f_hist_mot[MOTION_ARRAY];
+    VP_MOTION f_hist_mot_speed[MOTION_ARRAY-1];
+    VP_MOTION f_hist_diff_mot[MOTION_ARRAY-1];
+    VP_MOTION f_disp_mot;
+    VP_MOTION f_src_mot;
+    VP_MOTION f_diff_avg;
+
+};
+
diff --git a/benchmark/feature_stab/src/dbreg/targetver.h b/benchmark/feature_stab/src/dbreg/targetver.h
new file mode 100644
index 00000000..3ca3e879
--- /dev/null
+++ b/benchmark/feature_stab/src/dbreg/targetver.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+// The following macros define the minimum required platform.  The minimum required platform
+// is the earliest version of Windows, Internet Explorer etc. that has the necessary features to run
+// your application.  The macros work by enabling all features available on platform versions up to and
+// including the version specified.
+
+// Modify the following defines if you have to target a platform prior to the ones specified below.
+// Refer to MSDN for the latest info on corresponding values for different platforms.
+#ifndef WINVER                          // Specifies that the minimum required platform is Windows Vista.
+#define WINVER 0x0600           // Change this to the appropriate value to target other versions of Windows.
+#endif
+
+#ifndef _WIN32_WINNT            // Specifies that the minimum required platform is Windows Vista.
+#define _WIN32_WINNT 0x0600     // Change this to the appropriate value to target other versions of Windows.
+#endif
+
+#ifndef _WIN32_WINDOWS          // Specifies that the minimum required platform is Windows 98.
+#define _WIN32_WINDOWS 0x0410 // Change this to the appropriate value to target Windows Me or later.
+#endif
+
+#ifndef _WIN32_IE                       // Specifies that the minimum required platform is Internet Explorer 7.0.
+#define _WIN32_IE 0x0700        // Change this to the appropriate value to target other versions of IE.
+#endif
diff --git a/benchmark/feature_stab/src/dbreg/vp_motionmodel.c b/benchmark/feature_stab/src/dbreg/vp_motionmodel.c
new file mode 100644
index 00000000..1f6af15b
--- /dev/null
+++ b/benchmark/feature_stab/src/dbreg/vp_motionmodel.c
@@ -0,0 +1,377 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+#sourcefile  vpmotion/vp_motionmodel.c
+#category    motion-model
+*
+* Copyright 1998 Sarnoff Corporation
+* All Rights Reserved
+*
+* Modification History
+*      Date: 02/14/98
+*      Author: supuns
+*      Shop Order: 17xxx
+*              @(#) $Id: vp_motionmodel.c,v 1.4 2011/06/17 14:04:33 mbansal Exp $
+*/
+
+/*
+* ===================================================================
+* Include Files
+*/
+
+#include <string.h> /* memmove */
+#include <math.h>
+#include "vp_motionmodel.h"
+
+/* Static Functions */
+static
+double Det3(double m[3][3])
+{
+  double result;
+
+  result =
+    m[0][0]*m[1][1]*m[2][2] + m[0][1]*m[1][2]*m[2][0] +
+    m[0][2]*m[1][0]*m[2][1] - m[0][2]*m[1][1]*m[2][0] -
+    m[0][0]*m[1][2]*m[2][1] - m[0][1]*m[1][0]*m[2][2];
+
+  return(result);
+}
+
+typedef double MATRIX[4][4];
+
+static
+double Det4(MATRIX m)
+{
+    /* ==> This is a poor implementation of determinant.
+       Writing the formula out in closed form is unnecessarily complicated
+       and mistakes are easy to make. */
+  double result;
+
+  result=
+    m[0][3] *m[1][2] *m[2][1] *m[3][0] - m[0][2] *m[1][3] *m[2][1] *m[3][0] - m[0][3] *m[1][1] *m[2][2] *m[3][0] +
+    m[0][1] *m[1][3] *m[2][2] *m[3][0] + m[0][2] *m[1][1] *m[2][3] *m[3][0] - m[0][1] *m[1][2] *m[2][3] *m[3][0] - m[0][3] *m[1][2] *m[2][0] *m[3][1] +
+    m[0][2] *m[1][3] *m[2][0] *m[3][1] + m[0][3] *m[1][0] *m[2][2] *m[3][1] - m[0][0] *m[1][3] *m[2][2] *m[3][1] - m[0][2] *m[1][0] *m[2][3] *m[3][1] +
+    m[0][0] *m[1][2] *m[2][3] *m[3][1] + m[0][3] *m[1][1] *m[2][0] *m[3][2] - m[0][1] *m[1][3] *m[2][0] *m[3][2] - m[0][3] *m[1][0] *m[2][1] *m[3][2] +
+    m[0][0] *m[1][3] *m[2][1] *m[3][2] + m[0][1] *m[1][0] *m[2][3] *m[3][2] - m[0][0] *m[1][1] *m[2][3] *m[3][2] - m[0][2] *m[1][1] *m[2][0] *m[3][3] +
+    m[0][1] *m[1][2] *m[2][0] *m[3][3] + m[0][2] *m[1][0] *m[2][1] *m[3][3] - m[0][0] *m[1][2] *m[2][1] *m[3][3] - m[0][1] *m[1][0] *m[2][2] *m[3][3] +
+    m[0][0] *m[1][1] *m[2][2] *m[3][3];
+  /*
+    m[0][0]*m[1][1]*m[2][2]*m[3][3]-m[0][1]*m[1][0]*m[2][2]*m[3][3]+
+    m[0][1]*m[1][2]*m[2][0]*m[3][3]-m[0][2]*m[1][1]*m[2][0]*m[3][3]+
+    m[0][2]*m[1][0]*m[2][1]*m[3][3]-m[0][0]*m[1][2]*m[2][1]*m[3][3]+
+    m[0][0]*m[1][2]*m[2][3]*m[3][1]-m[0][2]*m[1][0]*m[2][3]*m[3][1]+
+    m[0][2]*m[1][3]*m[2][0]*m[3][1]-m[0][3]*m[1][2]*m[2][0]*m[3][1]+
+    m[0][3]*m[1][0]*m[2][2]*m[3][1]-m[0][0]*m[1][3]*m[2][2]*m[3][1]+
+    m[0][0]*m[1][3]*m[2][1]*m[3][2]-m[0][3]*m[1][0]*m[2][3]*m[3][2]+
+    m[0][1]*m[1][0]*m[2][3]*m[3][2]-m[0][0]*m[1][1]*m[2][0]*m[3][2]+
+    m[0][3]*m[1][1]*m[2][0]*m[3][2]-m[0][1]*m[1][3]*m[2][1]*m[3][2]+
+    m[0][1]*m[1][3]*m[2][2]*m[3][0]-m[0][3]*m[1][1]*m[2][2]*m[3][0]+
+    m[0][2]*m[1][1]*m[2][3]*m[3][0]-m[0][1]*m[1][2]*m[2][3]*m[3][0]+
+    m[0][3]*m[1][2]*m[2][1]*m[3][0]-m[0][2]*m[1][3]*m[2][1]*m[3][0];
+    */
+  return(result);
+}
+
+static
+int inv4Mat(const VP_MOTION* in, VP_MOTION* out)
+{
+    /* ==> This is a poor implementation of inversion.  The determinant
+       method is O(N^4), i.e. unnecessarily slow, and not numerically accurate.
+       The real complexity of inversion is O(N^3), and is best done using
+       LU decomposition. */
+
+  MATRIX inmat,outmat;
+  int i, j, k, l, m, n,ntemp;
+  double mat[3][3], indet, temp;
+
+  /* check for non-empty structures structure */
+  if (((VP_MOTION *) NULL == in) || ((VP_MOTION *) NULL == out)) {
+    return 1;
+  }
+
+  for(k=0,i=0;i<4;i++)
+    for(j=0;j<4;j++,k++)
+      inmat[i][j]=(double)in->par[k];
+
+  indet = Det4(inmat);
+  if (indet==0) return(-1);
+
+  for (i=0;i<4;i++) {
+    for (j=0;j<4;j++) {
+      m = 0;
+      for (k=0;k<4;k++) {
+    if (i != k) {
+      n = 0;
+      for (l=0;l<4;l++)
+        if (j != l) {
+          mat[m][n] = inmat[k][l];
+          n++;
+        }
+      m++;
+    }
+      }
+
+      temp = -1.;
+      ntemp = (i +j ) %2;
+      if( ntemp == 0)  temp = 1.;
+
+      outmat[j][i] = temp * Det3(mat)/indet;
+    }
+  }
+
+  for(k=0,i=0;i<4;i++)
+    for(j=0;j<4;j++,k++)
+      out->par[k]=(VP_PAR)outmat[i][j]; /*lint !e771*/
+
+  return(0);
+}
+
+/*
+* ===================================================================
+* Public Functions
+#htmlstart
+*/
+
+/*
+ * ===================================================================
+#fn vp_invert_motion
+#ft invert a motion
+#fd DEFINITION
+       Bool
+       vp_invert_motion(const VP_MOTION* in,VP_MOTION* out)
+#fd PURPOSE
+       This inverts the motion given in 'in'.
+       All motion models upto VP_MOTION_SEMI_PROJ_3D are supported.
+       It is assumed that the all 16 parameters are properly
+       initialized although you may not be using them. You could
+       use the VP_KEEP_ macro's defined in vp_motionmodel.h to set
+       the un-initialized parameters. This uses a 4x4 matrix invertion
+       function internally.
+       It is SAFE to pass the same pointer as both the 'in' and 'out'
+       parameters.
+#fd INPUTS
+       in  - input motion
+#fd OUTPUTS
+       out - output inverted motion. If singular matrix uninitialized.
+             if MWW(in) is non-zero it is also normalized.
+#fd RETURNS
+       FALSE - matrix is singular or motion model not supported
+       TRUE  - otherwise
+#fd SIDE EFFECTS
+       None
+#endfn
+*/
+
+int vp_invert_motion(const VP_MOTION* in,VP_MOTION* out)
+{
+  int refid;
+
+  /* check for non-empty structures structure */
+  if (((VP_MOTION *) NULL == in) || ((VP_MOTION *) NULL == out)) {
+    return FALSE;
+  }
+
+  if (in->type>VP_MOTION_SEMI_PROJ_3D) {
+    return FALSE;
+  }
+
+  if (inv4Mat(in,out)<0)
+    return FALSE;
+
+  /*VP_NORMALIZE(*out);*/
+  out->type = in->type;
+  refid=in->refid;
+  out->refid=in->insid;
+  out->insid=refid;
+  return TRUE;
+}
+
+/*
+* ===================================================================
+#fn vp_cascade_motion
+#ft Cascade two motion transforms
+#fd DEFINITION
+      Bool
+      vp_cascade_motion(const VP_MOTION* InAB,const VP_MOTION* InBC,VP_MOTION* OutAC)
+#fd PURPOSE
+      Given Motion Transforms A->B and B->C, this function will
+      generate a New Motion that describes the transformation
+      from A->C.
+      More specifically, OutAC = InBC * InAC.
+      This function works ok if InAB,InBC and OutAC are the same pointer.
+#fd INPUTS
+      InAB - First Motion Transform
+      InBC - Second Motion Tranform
+#fd OUTPUTS
+      OutAC - Cascaded Motion
+#fd RETURNS
+      FALSE - motion model not supported
+      TRUE  - otherwise
+#fd SIDE EFFECTS
+      None
+#endfn
+*/
+
+int vp_cascade_motion(const VP_MOTION* InA, const VP_MOTION* InB,VP_MOTION* Out)
+{
+    /* ==> This is a poor implementation of matrix multiplication.
+       Writing the formula out in closed form is unnecessarily complicated
+       and mistakes are easy to make. */
+  VP_PAR mxx,mxy,mxz,mxw;
+  VP_PAR myx,myy,myz,myw;
+  VP_PAR mzx,mzy,mzz,mzw;
+  VP_PAR mwx,mwy,mwz,mww;
+
+  /* check for non-empty structures structure */
+  if (((VP_MOTION *) NULL == InA) || ((VP_MOTION *) NULL == InB) ||
+      ((VP_MOTION *) NULL == Out)) {
+    return FALSE;
+  }
+
+  if (InA->type>VP_MOTION_PROJ_3D) {
+    return FALSE;
+  }
+
+  if (InB->type>VP_MOTION_PROJ_3D) {
+    return FALSE;
+  }
+
+  mxx = MXX(*InB)*MXX(*InA)+MXY(*InB)*MYX(*InA)+MXZ(*InB)*MZX(*InA)+MXW(*InB)*MWX(*InA);
+  mxy = MXX(*InB)*MXY(*InA)+MXY(*InB)*MYY(*InA)+MXZ(*InB)*MZY(*InA)+MXW(*InB)*MWY(*InA);
+  mxz = MXX(*InB)*MXZ(*InA)+MXY(*InB)*MYZ(*InA)+MXZ(*InB)*MZZ(*InA)+MXW(*InB)*MWZ(*InA);
+  mxw = MXX(*InB)*MXW(*InA)+MXY(*InB)*MYW(*InA)+MXZ(*InB)*MZW(*InA)+MXW(*InB)*MWW(*InA);
+  myx = MYX(*InB)*MXX(*InA)+MYY(*InB)*MYX(*InA)+MYZ(*InB)*MZX(*InA)+MYW(*InB)*MWX(*InA);
+  myy = MYX(*InB)*MXY(*InA)+MYY(*InB)*MYY(*InA)+MYZ(*InB)*MZY(*InA)+MYW(*InB)*MWY(*InA);
+  myz = MYX(*InB)*MXZ(*InA)+MYY(*InB)*MYZ(*InA)+MYZ(*InB)*MZZ(*InA)+MYW(*InB)*MWZ(*InA);
+  myw = MYX(*InB)*MXW(*InA)+MYY(*InB)*MYW(*InA)+MYZ(*InB)*MZW(*InA)+MYW(*InB)*MWW(*InA);
+  mzx = MZX(*InB)*MXX(*InA)+MZY(*InB)*MYX(*InA)+MZZ(*InB)*MZX(*InA)+MZW(*InB)*MWX(*InA);
+  mzy = MZX(*InB)*MXY(*InA)+MZY(*InB)*MYY(*InA)+MZZ(*InB)*MZY(*InA)+MZW(*InB)*MWY(*InA);
+  mzz = MZX(*InB)*MXZ(*InA)+MZY(*InB)*MYZ(*InA)+MZZ(*InB)*MZZ(*InA)+MZW(*InB)*MWZ(*InA);
+  mzw = MZX(*InB)*MXW(*InA)+MZY(*InB)*MYW(*InA)+MZZ(*InB)*MZW(*InA)+MZW(*InB)*MWW(*InA);
+  mwx = MWX(*InB)*MXX(*InA)+MWY(*InB)*MYX(*InA)+MWZ(*InB)*MZX(*InA)+MWW(*InB)*MWX(*InA);
+  mwy = MWX(*InB)*MXY(*InA)+MWY(*InB)*MYY(*InA)+MWZ(*InB)*MZY(*InA)+MWW(*InB)*MWY(*InA);
+  mwz = MWX(*InB)*MXZ(*InA)+MWY(*InB)*MYZ(*InA)+MWZ(*InB)*MZZ(*InA)+MWW(*InB)*MWZ(*InA);
+  mww = MWX(*InB)*MXW(*InA)+MWY(*InB)*MYW(*InA)+MWZ(*InB)*MZW(*InA)+MWW(*InB)*MWW(*InA);
+
+  MXX(*Out)=mxx; MXY(*Out)=mxy; MXZ(*Out)=mxz; MXW(*Out)=mxw;
+  MYX(*Out)=myx; MYY(*Out)=myy; MYZ(*Out)=myz; MYW(*Out)=myw;
+  MZX(*Out)=mzx; MZY(*Out)=mzy; MZZ(*Out)=mzz; MZW(*Out)=mzw;
+  MWX(*Out)=mwx; MWY(*Out)=mwy; MWZ(*Out)=mwz; MWW(*Out)=mww;
+  /* VP_NORMALIZE(*Out); */
+  Out->type= (InA->type > InB->type) ? InA->type : InB->type;
+  Out->refid=InA->refid;
+  Out->insid=InB->insid;
+
+  return TRUE;
+}
+
+/*
+* ===================================================================
+#fn vp_copy_motion
+#ft Copies the source motion to the destination motion.
+#fd DEFINITION
+    void
+    vp_copy_motion  (const VP_MOTION *src, VP_MOTION *dst)
+#fd PURPOSE
+    Copies the source motion to the destination motion.
+        It is OK if src == dst.
+    NOTE THAT THE SOURCE IS THE FIRST ARGUMENT.
+    This is different from some of the other VP
+    copy functions.
+#fd INPUTS
+    src is the source motion
+    dst is the destination motion
+#fd RETURNS
+    void
+#endfn
+*/
+void vp_copy_motion  (const VP_MOTION *src, VP_MOTION *dst)
+{
+  /* Use memmove rather than memcpy because it handles overlapping memory
+     OK. */
+  memmove(dst, src, sizeof(VP_MOTION));
+  return;
+} /* vp_copy_motion() */
+
+#define VP_SQR(x)   ( (x)*(x) )
+double vp_motion_cornerdiff(const VP_MOTION *mot_a, const VP_MOTION *mot_b,
+                     int xo, int yo, int w, int h)
+{
+  double ax1, ay1, ax2, ay2, ax3, ay3, ax4, ay4;
+  double bx1, by1, bx2, by2, bx3, by3, bx4, by4;
+  double err;
+
+  /*lint -e639 -e632 -e633 */
+  VP_WARP_POINT_2D(xo, yo,         *mot_a, ax1, ay1);
+  VP_WARP_POINT_2D(xo+w-1, yo,     *mot_a, ax2, ay2);
+  VP_WARP_POINT_2D(xo+w-1, yo+h-1, *mot_a, ax3, ay3);
+  VP_WARP_POINT_2D(xo, yo+h-1,     *mot_a, ax4, ay4);
+  VP_WARP_POINT_2D(xo, yo,         *mot_b, bx1, by1);
+  VP_WARP_POINT_2D(xo+w-1, yo,     *mot_b, bx2, by2);
+  VP_WARP_POINT_2D(xo+w-1, yo+h-1, *mot_b, bx3, by3);
+  VP_WARP_POINT_2D(xo, yo+h-1,     *mot_b, bx4, by4);
+  /*lint +e639 +e632 +e633 */
+
+  err = 0;
+  err += (VP_SQR(ax1 - bx1) + VP_SQR(ay1 - by1));
+  err += (VP_SQR(ax2 - bx2) + VP_SQR(ay2 - by2));
+  err += (VP_SQR(ax3 - bx3) + VP_SQR(ay3 - by3));
+  err += (VP_SQR(ax4 - bx4) + VP_SQR(ay4 - by4));
+
+  return(sqrt(err));
+}
+
+int vp_zoom_motion2d(VP_MOTION* in, VP_MOTION* out,
+                 int n, int w, int h, double zoom)
+{
+  int ii;
+  VP_PAR inv_zoom;
+  VP_PAR cx, cy;
+  VP_MOTION R2r,R2f;
+  VP_MOTION *res;
+
+  /* check for non-empty structures structure */
+  if (((VP_MOTION *) NULL == in)||(zoom <= 0.0)||(w <= 0)||(h <= 0)) {
+    return FALSE;
+  }
+
+  /* ==> Not sure why the special case of out=NULL is necessary.  Why couldn't
+     the caller just pass the same pointer for both in and out? */
+  res = ((VP_MOTION *) NULL == out)?in:out;
+
+  cx = (VP_PAR) (w/2.0);
+  cy = (VP_PAR) (h/2.0);
+
+  VP_MOTION_ID(R2r);
+  inv_zoom = (VP_PAR)(1.0/zoom);
+  MXX(R2r) = inv_zoom;
+  MYY(R2r) = inv_zoom;
+  MXW(R2r)=cx*(((VP_PAR)1.0) - inv_zoom);
+  MYW(R2r)=cy*(((VP_PAR)1.0) - inv_zoom);
+
+  VP_KEEP_AFFINE_2D(R2r);
+
+  for(ii=0;ii<n;ii++) {
+    (void) vp_cascade_motion(&R2r,in+ii,&R2f);
+    res[ii]=R2f;
+  }
+
+  return TRUE;
+} /* vp_zoom_motion2d() */
+
+/* =================================================================== */
+/* end vp_motionmodel.c */
diff --git a/benchmark/feature_stab/src/dbreg/vp_motionmodel.h b/benchmark/feature_stab/src/dbreg/vp_motionmodel.h
new file mode 100644
index 00000000..a63ac001
--- /dev/null
+++ b/benchmark/feature_stab/src/dbreg/vp_motionmodel.h
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+#sourcefile  vp_motionmodel.h
+#category    warp
+#description general motion model for tranlation/affine/projective
+#title       motion-model
+#parentlink  hindex.html
+*
+* Copyright 1998 Sarnoff Corporation
+* All Rights Reserved
+*
+* Modification History
+*      Date: 02/13/98
+*      Author: supuns
+*      Shop Order: 15491 001
+*              @(#) $Id: vp_motionmodel.h,v 1.4 2011/06/17 14:04:33 mbansal Exp $
+*/
+
+#ifndef VP_MOTIONMODEL_H
+#define VP_MOTIONMODEL_H
+#include <stdio.h>
+
+#define         FALSE           0
+#define         TRUE            1
+
+#if 0 /* Moved mottomat.c and mattomot_d.c from vpmotion.h to vpcompat.h
+     in order to remove otherwise unnecessary dependency of vpmotion,
+     vpwarp, and newvpio on vpmath */
+#ifndef VPMATH_H
+#include "vpmath.h"
+#endif
+#endif
+
+#if 0
+#ifndef VP_WARP_H
+#include "vp_warp.h"
+#endif
+#endif
+/*
+
+#htmlstart
+# ===================================================================
+#h 1 Introduction
+
+  This defines a motion model that can describe translation,
+  affine, and projective projective 3d and 3d view transforms.
+
+  The main structure VP_MOTION contains a 16 parameter array (That
+  can be considered as elements of a 4x4 matrix) and a type field
+  which can be one of VP_MOTION_NONE,VP_MOTION_TRANSLATION,
+  VP_MOTION_AFFINE, VP_MOTION_PROJECTIVE,VP_MOTION_PROJ_3D or
+  VP_MOTION_VIEW_3D. (These are defined using enums with gaps of 10
+  so that subsets of these motions that are still consistant can be
+  added in between. Motion models that are inconsistant with this set
+  should be added at the end so the routines can hadle them
+  independently.
+
+  The transformation VP_MOTION_NONE,VP_MOTION_TRANSLATION,
+  VP_MOTION_AFFINE, VP_MOTION_PROJECTIVE, VP_MOTION_PROJ_3D and
+  VP_MOTION_SEMI_PROJ_3D would map a point P={x,y,z,w} to a new point
+  P'={x',y',z',w'} using a motion model M such that P'= M.par * P.
+  Where M.par is thought of as  elements of a 4x4 matrix ordered row
+  by row. The interpretation of all models except VP_MOTION_SEMI_PROJ_3D
+  is taken to be mapping of a 3d point P"={x",y",z"} which is obtained
+  from the normalization {x'/w',y'/w',z'/w'}. In the VP_MOTION_SEMI_PROJ_3D
+  the mapping to a point P"={x",y",z"} is obtained from the normalization
+  {x'/w',y'/w',z'}. All these motion models have the property that they
+  can be inverted using 4x4 matrices. Except for the VP_MOTION_SEMI_PROJ_3D all
+  other types can also be cascaded using 4x4 matrices.
+
+  Specific macros and functions have been provided to handle 2d instances
+  of these functions. As the parameter interpretations can change when adding
+  new motion models it is HIGHLY RECOMMENDED that you use the macros MXX,MXY..
+  ect. to interpret each motion component.
+#pre
+*/
+
+/*
+#endpre
+# ===================================================================
+#h 1 Typedef and Struct Declarations
+#pre
+*/
+
+#define VP_MAX_MOTION_PAR 16
+
+typedef double VP_PAR;
+typedef VP_PAR VP_TRS[VP_MAX_MOTION_PAR];
+
+/* Do not add any motion models before VP_MOTION_PROJECTIVE */
+/* The order is assumed in vp functions */
+enum VP_MOTION_MODEL {
+  VP_MOTION_NONE=0,
+  VP_MOTION_TRANSLATION=10,
+  VP_MOTION_SCALE=11,
+  VP_MOTION_ROTATE=12,
+  VP_MOTION_X_SHEAR=13,
+  VP_MOTION_Y_SHEAR=14,
+  VP_MOTION_SIMILARITY=15,
+  VP_MOTION_AFFINE=20,
+  VP_MOTION_PROJECTIVE=30,
+  VP_MOTION_PROJ_3D=40,
+  VP_MOTION_SEMI_PROJ_3D=80,
+  VP_SIMILARITY=100,
+  VP_VFE_AFFINE=120
+};
+
+#define VP_REFID -1   /* Default ID used for reference frame */
+
+typedef struct {
+  VP_TRS par;            /* Contains the motion paramerers.
+                For the standard motion types this is
+                represented as 16 number that refer
+                to a 4x4 matrix */
+  enum VP_MOTION_MODEL type;
+  int refid;            /* Reference frame ( takes a point in refid frame
+               and moves it by the par to get a point in insid
+               frame ) */
+  int insid;            /* Inspection frame */
+} VP_MOTION;
+
+//typedef VP_LIST VP_MOTION_LIST;
+/*
+#endpre
+# ===================================================================
+#h 1 Constant Declarations
+*/
+
+/* Macros related to the 4x4 matrix parameters */
+#define MXX(m) (m).par[0]
+#define MXY(m) (m).par[1]
+#define MXZ(m) (m).par[2]
+#define MXW(m) (m).par[3]
+#define MYX(m) (m).par[4]
+#define MYY(m) (m).par[5]
+#define MYZ(m) (m).par[6]
+#define MYW(m) (m).par[7]
+#define MZX(m) (m).par[8]
+#define MZY(m) (m).par[9]
+#define MZZ(m) (m).par[10]
+#define MZW(m) (m).par[11]
+#define MWX(m) (m).par[12]
+#define MWY(m) (m).par[13]
+#define MWZ(m) (m).par[14]
+#define MWW(m) (m).par[15]
+
+/* The do {...} while (0) technique creates a statement that can be used legally
+   in an if-else statement.  See "Swallowing the semicolon",
+   http://gcc.gnu.org/onlinedocs/gcc-2.95.3/cpp_1.html#SEC23 */
+/* Initialize the Motion to be Identity */
+#define VP_MOTION_ID(m) do {\
+  MXX(m)=MYY(m)=MZZ(m)=MWW(m)=(VP_PAR)1.0; \
+  MXY(m)=MXZ(m)=MXW(m)=(VP_PAR)0.0; \
+  MYX(m)=MYZ(m)=MYW(m)=(VP_PAR)0.0; \
+  MZX(m)=MZY(m)=MZW(m)=(VP_PAR)0.0; \
+  MWX(m)=MWY(m)=MWZ(m)=(VP_PAR)0.0; \
+(m).type = VP_MOTION_TRANSLATION; } while (0)
+
+/* Initialize without altering the translation components */
+#define VP_KEEP_TRANSLATION_3D(m) do {\
+  MXX(m)=MYY(m)=MZZ(m)=MWW(m)=(VP_PAR)1.0; \
+  MXY(m)=MXZ(m)=(VP_PAR)0.0; \
+  MYX(m)=MYZ(m)=(VP_PAR)0.0; \
+  MZX(m)=MZY(m)=(VP_PAR)0.0; \
+  MWX(m)=MWY(m)=MWZ(m)=(VP_PAR)0.0; \
+  (m).type = VP_MOTION_PROJ_3D; } while (0)
+
+/* Initialize without altering the 2d translation components */
+#define VP_KEEP_TRANSLATION_2D(m) do {\
+  VP_KEEP_TRANSLATION_3D(m); MZW(m)=(VP_PAR)0.0; (m).type= VP_MOTION_TRANSLATION;} while (0)
+
+/* Initialize without altering the affine & translation components */
+#define VP_KEEP_AFFINE_3D(m) do {\
+  MWX(m)=MWY(m)=MWZ(m)=(VP_PAR)0.0; MWW(m)=(VP_PAR)1.0; \
+  (m).type = VP_MOTION_PROJ_3D; } while (0)
+
+/* Initialize without altering the 2d affine & translation components */
+#define VP_KEEP_AFFINE_2D(m) do {\
+  VP_KEEP_AFFINE_3D(m); \
+  MXZ(m)=MYZ(m)=(VP_PAR)0.0; MZZ(m)=(VP_PAR)1.0; \
+  MZX(m)=MZY(m)=MZW(m)=(VP_PAR)0.0; \
+  (m).type = VP_MOTION_AFFINE; } while (0)
+
+/* Initialize without altering the 2d projective parameters */
+#define VP_KEEP_PROJECTIVE_2D(m) do {\
+  MXZ(m)=MYZ(m)=(VP_PAR)0.0; MZZ(m)=(VP_PAR)1.0; \
+  MZX(m)=MZY(m)=MZW(m)=MWZ(m)=(VP_PAR)0.0; \
+  (m).type = VP_MOTION_PROJECTIVE; } while (0)
+
+/* Warp a 2d point (assuming the z component is zero) */
+#define VP_WARP_POINT_2D(inx,iny,m,outx,outy) do {\
+  VP_PAR vpTmpWarpPnt___= MWX(m)*(inx)+MWY(m)*(iny)+MWW(m); \
+  outx = (MXX(m)*((VP_PAR)inx)+MXY(m)*((VP_PAR)iny)+MXW(m))/vpTmpWarpPnt___; \
+  outy = (MYX(m)*((VP_PAR)inx)+MYY(m)*((VP_PAR)iny)+MYW(m))/vpTmpWarpPnt___; } while (0)
+
+/* Warp a 3d point */
+#define VP_WARP_POINT_3D(inx,iny,inz,m,outx,outy,outz) do {\
+  VP_PAR vpTmpWarpPnt___= MWX(m)*(inx)+MWY(m)*(iny)+MWZ(m)*((VP_PAR)inz)+MWW(m); \
+  outx = (MXX(m)*((VP_PAR)inx)+MXY(m)*((VP_PAR)iny)+MXZ(m)*((VP_PAR)inz)+MXW(m))/vpTmpWarpPnt___; \
+  outy = (MYX(m)*((VP_PAR)inx)+MYY(m)*((VP_PAR)iny)+MYZ(m)*((VP_PAR)inz)+MYW(m))/vpTmpWarpPnt___; \
+  outz = MZX(m)*((VP_PAR)inx)+MZY(m)*((VP_PAR)iny)+MZZ(m)*((VP_PAR)inz)+MZW(m); \
+  if ((m).type==VP_MOTION_PROJ_3D) outz/=vpTmpWarpPnt___; } while (0)
+
+/* Projections of each component */
+#define VP_PROJW_3D(m,x,y,z,f)   ( MWX(m)*(x)+MWY(m)*(y)+MWZ(m)*(z)+MWW(m) )
+#define VP_PROJX_3D(m,x,y,z,f,w) ((MXX(m)*(x)+MXY(m)*(y)+MXZ(m)*(z)+MXW(m))/(w))
+#define VP_PROJY_3D(m,x,y,z,f,w) ((MYX(m)*(x)+MYY(m)*(y)+MYZ(m)*(z)+MYW(m))/(w))
+#define VP_PROJZ_3D(m,x,y,z,f,w) ((MZX(m)*(x)+MZY(m)*(y)+MZZ(m)*(z)+MZW(m))/(w))
+
+/* Scale Down a matrix by Sfactor */
+#define VP_SCALEDOWN(m,Sfactor) do { \
+  MXW(m) /= (VP_PAR)Sfactor; MWX(m) *= (VP_PAR)Sfactor; \
+  MYW(m) /= (VP_PAR)Sfactor; MWY(m) *= (VP_PAR)Sfactor; \
+  MZW(m) /= (VP_PAR)Sfactor; MWZ(m) *= (VP_PAR)Sfactor; } while (0)
+
+/* Scale Up a matrix by Sfactor */
+#define VP_SCALEUP(m,Sfactor) do { \
+  MXW(m) *= (VP_PAR)Sfactor; MWX(m) /= (VP_PAR)Sfactor; \
+  MYW(m) *= (VP_PAR)Sfactor; MWY(m) /= (VP_PAR)Sfactor; \
+  MZW(m) *= (VP_PAR)Sfactor; MWZ(m) /= (VP_PAR)Sfactor; } while (0)
+
+/* Normalize the transformation matrix so that MWW is 1 */
+#define VP_NORMALIZE(m) if (MWW(m)!=(VP_PAR)0.0) do { \
+  MXX(m)/=MWW(m); MXY(m)/=MWW(m); MXZ(m)/=MWW(m); MXW(m)/= MWW(m); \
+  MYX(m)/=MWW(m); MYY(m)/=MWW(m); MYZ(m)/=MWW(m); MYW(m)/= MWW(m); \
+  MZX(m)/=MWW(m); MZY(m)/=MWW(m); MZZ(m)/=MWW(m); MZW(m)/= MWW(m); \
+  MWX(m)/=MWW(m); MWY(m)/=MWW(m); MWZ(m)/=MWW(m); MWW(m) = (VP_PAR)1.0; } while (0)
+
+#define VP_PRINT_TRANS(msg,b) do { \
+  fprintf(stderr, \
+      "%s\n%f %f %f %f\n%f %f %f %f\n%f %f %f %f\n%f %f %f %f\n", \
+      msg, \
+      MXX(b),MXY(b),MXZ(b),MXW(b),  \
+      MYX(b),MYY(b),MYZ(b),MYW(b),  \
+      MZX(b),MZY(b),MZZ(b),MZW(b),  \
+      MWX(b),MWY(b),MWZ(b),MWW(b)); \
+} while (0)
+
+/* w' projection given a point x,y,0,f */
+#define VP_PROJZ(m,x,y,f) ( \
+    MWX(m)*((VP_PAR)x)+MWY(m)*((VP_PAR)y)+MWW(m)*((VP_PAR)f))
+
+/* X Projection given a point x,y,0,f and w' */
+#define VP_PROJX(m,x,y,w,f) (\
+   (MXX(m)*((VP_PAR)x)+MXY(m)*((VP_PAR)y)+MXW(m)*((VP_PAR)f))/((VP_PAR)w))
+
+/* Y Projection given a point x,y,0,f and the w' */
+#define VP_PROJY(m,x,y,w,f) (\
+  (MYX(m)*((VP_PAR)x)+MYY(m)*((VP_PAR)y)+MYW(m)*((VP_PAR)f))/((VP_PAR)w))
+
+/* Set the reference id for a motion */
+#define VP_SET_REFID(m,id) do { (m).refid=id; } while (0)
+
+/* Set the inspection id for a motion */
+#define VP_SET_INSID(m,id) do { (m).insid=id; } while (0)
+
+void vp_copy_motion  (const VP_MOTION *src, VP_MOTION *dst);
+int vp_invert_motion(const VP_MOTION* in,VP_MOTION* out);
+int vp_cascade_motion(const VP_MOTION* InAB, const VP_MOTION* InBC,VP_MOTION* OutAC);
+int vp_zoom_motion2d(VP_MOTION* in, VP_MOTION* out,
+              int n, int w, int h, double zoom);
+double vp_motion_cornerdiff(const VP_MOTION *mot_a, const VP_MOTION *mot_b,
+                     int xo, int yo, int w, int h);
+
+#endif /* VP_MOTIONMODEL_H */
+/* =================================================================== */
+/* end vp_motionmodel.h */
diff --git a/benchmark/feature_stab/src/dbregtest/PgmImage.cpp b/benchmark/feature_stab/src/dbregtest/PgmImage.cpp
new file mode 100644
index 00000000..0891cfda
--- /dev/null
+++ b/benchmark/feature_stab/src/dbregtest/PgmImage.cpp
@@ -0,0 +1,260 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PgmImage.h"
+#include <cassert>
+
+using namespace std;
+
+PgmImage::PgmImage(std::string filename) :
+m_w(0),m_h(0),m_colors(255),m_format(PGM_BINARY_GRAYMAP),m_over_allocation(256)
+{
+    if ( !ReadPGM(filename) )
+        return;
+}
+
+PgmImage::PgmImage(int w, int h, int format) :
+m_colors(255),m_w(w),m_h(h),m_format(format),m_over_allocation(256)
+{
+    SetFormat(format);
+}
+
+PgmImage::PgmImage(unsigned char *data, int w, int h) :
+m_colors(255),m_w(w),m_h(h),m_format(PGM_BINARY_GRAYMAP),m_over_allocation(256)
+{
+    SetData(data);
+}
+
+PgmImage::PgmImage(std::vector<unsigned char> &data, int w, int h) :
+m_colors(255),m_w(w),m_h(h),m_format(PGM_BINARY_GRAYMAP),m_over_allocation(256)
+{
+    if ( data.size() == w*h )
+        SetData(&data[0]);
+    else
+        //throw (std::exception("Size of data is not w*h."));
+        throw (std::exception());
+}
+
+PgmImage::PgmImage(const PgmImage &im) :
+m_colors(255),m_w(0),m_h(0),m_format(PGM_BINARY_GRAYMAP),m_over_allocation(256)
+{
+    DeepCopy(im, *this);
+}
+
+PgmImage& PgmImage::operator= (const PgmImage &im)
+{
+    if (this == &im) return *this;
+    DeepCopy(im, *this);
+    return *this;
+}
+
+void PgmImage::DeepCopy(const PgmImage& src, PgmImage& dst)
+{
+    dst.m_data = src.m_data;
+
+    // PGM data
+    dst.m_w = src.m_w;
+    dst.m_h = src.m_h;
+    dst.m_format = src.m_format;
+    dst.m_colors = src.m_colors;
+
+    dst.m_comment = src.m_comment;
+    SetupRowPointers();
+}
+
+PgmImage::~PgmImage()
+{
+
+}
+
+void PgmImage::SetFormat(int format)
+{
+    m_format = format;
+
+    switch (format)
+    {
+    case PGM_BINARY_GRAYMAP:
+        m_data.resize(m_w*m_h+m_over_allocation);
+        break;
+    case PGM_BINARY_PIXMAP:
+        m_data.resize(m_w*m_h*3+m_over_allocation);
+        break;
+    default:
+        return;
+        break;
+    }
+    SetupRowPointers();
+}
+
+void PgmImage::SetData(const unsigned char * data)
+{
+    m_data.resize(m_w*m_h+m_over_allocation);
+    memcpy(&m_data[0],data,m_w*m_h);
+    SetupRowPointers();
+}
+
+bool PgmImage::ReadPGM(const std::string filename)
+{
+    ifstream in(filename.c_str(),std::ios::in | std::ios::binary);
+    if ( !in.is_open() )
+        return false;
+
+    // read the header:
+    string format_header,size_header,colors_header;
+
+    getline(in,format_header);
+    stringstream s;
+    s << format_header;
+
+    s >> format_header >> m_w >> m_h >> m_colors;
+    s.clear();
+
+    if ( m_w == 0 )
+    {
+        while ( in.peek() == '#' )
+            getline(in,m_comment);
+
+        getline(in,size_header);
+
+        while ( in.peek() == '#' )
+            getline(in,m_comment);
+
+            m_colors = 0;
+
+        // parse header
+        s << size_header;
+        s >> m_w >> m_h >> m_colors;
+        s.clear();
+
+        if ( m_colors == 0 )
+        {
+            getline(in,colors_header);
+            s << colors_header;
+            s >> m_colors;
+        }
+    }
+
+    if ( format_header == "P5" )
+        m_format = PGM_BINARY_GRAYMAP;
+    else if (format_header == "P6" )
+        m_format = PGM_BINARY_PIXMAP;
+    else
+        m_format = PGM_FORMAT_INVALID;
+
+    switch(m_format)
+    {
+    case(PGM_BINARY_GRAYMAP):
+        m_data.resize(m_w*m_h+m_over_allocation);
+        in.read((char *)(&m_data[0]),m_data.size());
+        break;
+    case(PGM_BINARY_PIXMAP):
+        m_data.resize(m_w*m_h*3+m_over_allocation);
+        in.read((char *)(&m_data[0]),m_data.size());
+        break;
+    default:
+        return false;
+        break;
+    }
+    in.close();
+
+    SetupRowPointers();
+
+    return true;
+}
+
+bool PgmImage::WritePGM(const std::string filename, const std::string comment)
+{
+    string format_header;
+
+    switch(m_format)
+    {
+    case PGM_BINARY_GRAYMAP:
+        format_header = "P5\n";
+        break;
+    case PGM_BINARY_PIXMAP:
+        format_header = "P6\n";
+        break;
+    default:
+        return false;
+        break;
+    }
+
+    ofstream out(filename.c_str(),std::ios::out |ios::binary);
+    out << format_header << "# " << comment << '\n' << m_w << " " << m_h << '\n' << m_colors << '\n';
+
+    out.write((char *)(&m_data[0]), m_data.size());
+
+    out.close();
+
+    return true;
+}
+
+void PgmImage::SetupRowPointers()
+{
+    int i;
+    m_rows.resize(m_h);
+
+    switch (m_format)
+    {
+    case PGM_BINARY_GRAYMAP:
+        for(i=0;i<m_h;i++)
+        {
+            m_rows[i]=&m_data[m_w*i];
+        }
+        break;
+    case PGM_BINARY_PIXMAP:
+        for(i=0;i<m_h;i++)
+        {
+            m_rows[i]=&m_data[(m_w*3)*i];
+        }
+        break;
+    }
+}
+
+void PgmImage::ConvertToGray()
+{
+    if ( m_format != PGM_BINARY_PIXMAP ) return;
+
+    // Y = 0.3*R + 0.59*G + 0.11*B;
+    for ( int i = 0; i < m_w*m_h; ++i )
+        m_data[i] = (unsigned char)(0.3*m_data[3*i]+0.59*m_data[3*i+1]+0.11*m_data[3*i+2]);
+
+    m_data.resize(m_w*m_h+m_over_allocation);
+    m_format = PGM_BINARY_GRAYMAP;
+
+    SetupRowPointers();
+}
+
+std::ostream& operator<< (std::ostream& o, const PgmImage& im)
+{
+    o << "PGM Image Info:\n";
+    o << "Size: " << im.m_w << " x " << im.m_h << "\n";
+    o << "Comment: " << im.m_comment << "\n";
+    switch (im.m_format)
+    {
+    case PgmImage::PGM_BINARY_PIXMAP:
+        o << "Format: RGB binary pixmap";
+        break;
+    case PgmImage::PGM_BINARY_GRAYMAP:
+        o << "Format: PPM binary graymap";
+        break;
+    default:
+        o << "Format: Invalid";
+        break;
+    }
+    o << endl;
+    return o;
+}
diff --git a/benchmark/feature_stab/src/dbregtest/PgmImage.h b/benchmark/feature_stab/src/dbregtest/PgmImage.h
new file mode 100644
index 00000000..d4d1eebe
--- /dev/null
+++ b/benchmark/feature_stab/src/dbregtest/PgmImage.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <memory.h>
+
+/*!
+ * Simple class to manipulate PGM/PPM images. Not suitable for heavy lifting.
+ */
+class PgmImage
+{
+    friend std::ostream& operator<< (std::ostream& o, const PgmImage& im);
+public:
+    enum {PGM_BINARY_GRAYMAP,PGM_BINARY_PIXMAP,PGM_FORMAT_INVALID};
+    /*!
+    * Constructor from a PGM file name.
+    */
+    PgmImage(std::string filename);
+    /*!
+    * Constructor to allocate an image of given size and type.
+    */
+    PgmImage(int w, int h, int format = PGM_BINARY_GRAYMAP);
+    /*!
+    * Constructor to allocate an image of given size and copy the data in.
+    */
+    PgmImage(unsigned char *data, int w, int h);
+    /*!
+    * Constructor to allocate an image of given size and copy the data in.
+    */
+    PgmImage(std::vector<unsigned char> &data, int w, int h);
+
+    PgmImage(const PgmImage &im);
+
+    PgmImage& operator= (const PgmImage &im);
+    ~PgmImage();
+
+    int GetHeight() const { return m_h; }
+    int GetWidth() const { return m_w; }
+
+    //! Copy pixels from data pointer
+    void SetData(const unsigned char * data);
+
+    //! Get a data pointer to unaligned memory area
+    unsigned char * GetDataPointer() { if ( m_data.size() > 0 ) return &m_data[0]; else return NULL; }
+    unsigned char ** GetRowPointers() { if ( m_rows.size() == m_h ) return &m_rows[0]; else return NULL; }
+
+    //! Read a PGM file from disk
+    bool ReadPGM(const std::string filename);
+    //! Write a PGM file to disk
+    bool WritePGM(const std::string filename, const std::string comment="");
+
+    //! Get image format (returns PGM_BINARY_GRAYMAP, PGM_BINARY_PIXMAP or PGM_FORMAT_INVALID)
+    int GetFormat() const { return m_format; }
+
+    //! Set image format (returns PGM_BINARY_GRAYMAP, PGM_BINARY_PIXMAP). Image data becomes invalid.
+    void SetFormat(int format);
+
+    //! If the image is PGM_BINARY_PIXMAP, convert it to PGM_BINARY_GRAYMAP via Y = 0.3*R + 0.59*G + 0.11*B.
+    void ConvertToGray();
+protected:
+    // Generic functions:
+    void DeepCopy(const PgmImage& src, PgmImage& dst);
+    void SetupRowPointers();
+
+    // PGM data
+    int m_w;
+    int m_h;
+    int m_format;
+    int m_colors;
+    int m_over_allocation;
+    std::vector<unsigned char> m_data;
+    std::string m_comment;
+
+    std::vector<unsigned char *> m_rows;
+};
+
+std::ostream& operator<< (std::ostream& o, const PgmImage& im);
diff --git a/benchmark/feature_stab/src/dbregtest/dbregtest.cpp b/benchmark/feature_stab/src/dbregtest/dbregtest.cpp
new file mode 100644
index 00000000..50873621
--- /dev/null
+++ b/benchmark/feature_stab/src/dbregtest/dbregtest.cpp
@@ -0,0 +1,399 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// $Id: dbregtest.cpp,v 1.24 2011/06/17 14:04:33 mbansal Exp $
+#include "stdafx.h"
+#include "PgmImage.h"
+#include "../dbreg/dbreg.h"
+#include "../dbreg/dbstabsmooth.h"
+#include <db_utilities_camera.h>
+
+#include <iostream>
+#include <iomanip>
+
+#if PROFILE
+    #include <sys/time.h>
+#endif
+
+
+using namespace std;
+
+const int DEFAULT_NR_CORNERS=500;
+const double DEFAULT_MAX_DISPARITY=0.2;
+const int DEFAULT_MOTION_MODEL=DB_HOMOGRAPHY_TYPE_AFFINE;
+//const int DEFAULT_MOTION_MODEL=DB_HOMOGRAPHY_TYPE_R_T;
+//const int DEFAULT_MOTION_MODEL=DB_HOMOGRAPHY_TYPE_TRANSLATION;
+const bool DEFAULT_QUARTER_RESOLUTION=false;
+const unsigned int DEFAULT_REFERENCE_UPDATE_PERIOD=3;
+const bool DEFAULT_DO_MOTION_SMOOTHING = false;
+const double DEFAULT_MOTION_SMOOTHING_GAIN = 0.75;
+const bool DEFAULT_LINEAR_POLISH = false;
+const int DEFAULT_MAX_ITERATIONS = 10;
+
+void usage(string name) {
+
+  const char *helpmsg[] = {
+    "Function: point-based frame to reference registration.",
+    "  -m [rt,a,p]  : motion model, rt = rotation+translation, a = affine (default = affine).",
+    "  -c <int>   : number of corners (default 1000).",
+    "  -d <double>: search disparity as portion of image size (default 0.1).",
+    "  -q         : quarter the image resolution (i.e. half of each dimension) (default on)",
+    "  -r <int>   : the period (in nr of frames) for reference frame updates (default = 5)",
+    "  -s <0/1>   : motion smoothing (1 activates motion smoothing, 0 turns it off - default value = 1)",
+    "  -g <double>: motion smoothing gain, only used if smoothing is on (default value =0.75)",
+    NULL
+  };
+
+  cerr << "Usage: " << name << " [options] image_list.txt" << endl;
+
+  const char **p = helpmsg;
+
+  while (*p)
+  {
+    cerr << *p++ << endl;
+  }
+}
+
+void parse_cmd_line(stringstream& cmdline,
+            const int argc,
+            const string& progname,
+            string& image_list_file_name,
+            int& nr_corners,
+            double& max_disparity,
+            int& motion_model_type,
+            bool& quarter_resolution,
+            unsigned int& reference_update_period,
+            bool& do_motion_smoothing,
+            double& motion_smoothing_gain
+            );
+
+int main(int argc, char* argv[])
+{
+  int    nr_corners = DEFAULT_NR_CORNERS;
+  double max_disparity = DEFAULT_MAX_DISPARITY;
+  int    motion_model_type = DEFAULT_MOTION_MODEL;
+  bool   quarter_resolution = DEFAULT_QUARTER_RESOLUTION;
+
+  unsigned int reference_update_period = DEFAULT_REFERENCE_UPDATE_PERIOD;
+
+  bool   do_motion_smoothing = DEFAULT_DO_MOTION_SMOOTHING;
+  double motion_smoothing_gain = DEFAULT_MOTION_SMOOTHING_GAIN;
+  const bool DEFAULT_USE_SMALLER_MATCHING_WINDOW = true;
+
+  int default_nr_samples = DB_DEFAULT_NR_SAMPLES/5;
+
+  bool   use_smaller_matching_window = DEFAULT_USE_SMALLER_MATCHING_WINDOW;
+
+
+  bool   linear_polish = DEFAULT_LINEAR_POLISH;
+
+  if (argc < 2) {
+    usage(argv[0]);
+    exit(1);
+  }
+
+  stringstream cmdline;
+  string progname(argv[0]);
+  string image_list_file_name;
+
+#if PROFILE
+  timeval ts1, ts2, ts3, ts4;
+#endif
+
+  // put the options and image list file name into the cmdline stringstream
+  for (int c = 1; c < argc; c++)
+  {
+    cmdline << argv[c] << " ";
+  }
+
+  parse_cmd_line(cmdline, argc, progname, image_list_file_name, nr_corners, max_disparity, motion_model_type,quarter_resolution,reference_update_period,do_motion_smoothing,motion_smoothing_gain);
+
+  ifstream in(image_list_file_name.c_str(),ios::in);
+
+  if ( !in.is_open() )
+  {
+    cerr << "Could not open file " << image_list_file_name << ".  Exiting" << endl;
+
+    return false;
+  }
+
+  // feature-based image registration class:
+  db_FrameToReferenceRegistration reg;
+//  db_StabilizationSmoother stab_smoother;
+
+  // input file name:
+  string file_name;
+
+  // look-up tables for image warping:
+  float ** lut_x = NULL, **lut_y = NULL;
+
+  // if the images are color, the input is saved in color_ref:
+  PgmImage color_ref(0,0);
+
+  // image width, height:
+  int w,h;
+
+  int frame_number = 0;
+
+  while ( !in.eof() )
+  {
+    getline(in,file_name);
+
+    PgmImage ref(file_name);
+
+    if ( ref.GetDataPointer() == NULL )
+    {
+      cerr << "Could not open image" << file_name << ". Exiting." << endl;
+      return -1;
+    }
+
+    cout << ref << endl;
+
+    // color format:
+    int format = ref.GetFormat();
+
+    // is the input image color?:
+    bool color = format == PgmImage::PGM_BINARY_PIXMAP;
+
+    w = ref.GetWidth();
+    h = ref.GetHeight();
+
+    if ( !reg.Initialized() )
+    {
+      reg.Init(w,h,motion_model_type,DEFAULT_MAX_ITERATIONS,linear_polish,quarter_resolution,DB_POINT_STANDARDDEV,reference_update_period,do_motion_smoothing,motion_smoothing_gain,default_nr_samples,DB_DEFAULT_CHUNK_SIZE,nr_corners,max_disparity,use_smaller_matching_window);
+      lut_x = db_AllocImage_f(w,h);
+      lut_y = db_AllocImage_f(w,h);
+
+    }
+
+    if ( color )
+    {
+      // save the color image:
+      color_ref = ref;
+    }
+
+    // make a grayscale image:
+    ref.ConvertToGray();
+
+    // compute the homography:
+    double H[9],Hinv[9];
+    db_Identity3x3(Hinv);
+    db_Identity3x3(H);
+
+    bool force_reference = false;
+
+#if PROFILE
+    gettimeofday(&ts1, NULL);
+#endif
+
+    reg.AddFrame(ref.GetRowPointers(),H,false,false);
+    cout << reg.profile_string << std::endl;
+
+#if PROFILE
+    gettimeofday(&ts2, NULL);
+
+    double elapsedTime = (ts2.tv_sec - ts1.tv_sec)*1000.0; // sec to ms
+    elapsedTime += (ts2.tv_usec - ts1.tv_usec)/1000.0; // us to ms
+    cout <<"\nelapsedTime for Reg<< "<<elapsedTime<<" ms >>>>>>>>>>>>>\n";
+#endif
+
+    if (frame_number == 0)
+    {
+      reg.UpdateReference(ref.GetRowPointers());
+    }
+
+
+    //std::vector<int> &inlier_indices = reg.GetInliers();
+    int *inlier_indices = reg.GetInliers();
+    int num_inlier_indices = reg.GetNrInliers();
+    printf("[%d] #Inliers = %d\n",frame_number,num_inlier_indices);
+
+    reg.Get_H_dref_to_ins(H);
+
+    db_GenerateHomographyLut(lut_x,lut_y,w,h,H);
+
+    // create a new image and warp:
+    PgmImage warped(w,h,format);
+
+#if PROFILE
+    gettimeofday(&ts3, NULL);
+#endif
+
+    if ( color )
+      db_WarpImageLutBilinear_rgb(color_ref.GetRowPointers(),warped.GetRowPointers(),w,h,lut_x,lut_y);
+    else
+      db_WarpImageLut_u(ref.GetRowPointers(),warped.GetRowPointers(),w,h,lut_x,lut_y,DB_WARP_FAST);
+
+#if PROFILE
+    gettimeofday(&ts4, NULL);
+    elapsedTime = (ts4.tv_sec - ts3.tv_sec)*1000.0; // sec to ms
+    elapsedTime += (ts4.tv_usec - ts3.tv_usec)/1000.0;     // us to ms
+    cout <<"\nelapsedTime for Warp <<"<<elapsedTime<<" ms >>>>>>>>>>>>>\n";
+#endif
+
+    // write aligned image: name is aligned_<corresponding input file name>
+    stringstream s;
+    s << "aligned_" << file_name;
+    warped.WritePGM(s.str());
+
+    /*
+    // Get the reference and inspection corners to write to file
+    double *ref_corners = reg.GetRefCorners();
+    double *ins_corners = reg.GetInsCorners();
+
+    // get the image file name (without extension), so we
+    // can generate the corresponding filenames for matches
+    // and inliers
+    string file_name_root(file_name.substr(0,file_name.rfind(".")));
+
+    // write matches to file
+    s.str(string(""));
+    s << "Matches_" << file_name_root << ".txt";
+
+    ofstream  match_file(s.str().c_str());
+
+    for (int i = 0; i < reg.GetNrMatches(); i++)
+    {
+      match_file << ref_corners[3*i] << " " << ref_corners[3*i+1] << " " << ins_corners[3*i] << " " << ins_corners[3*i+1] << endl;
+    }
+
+    match_file.close();
+
+    // write the inlier matches to file
+    s.str(string(""));
+    s << "InlierMatches_" << file_name_root << ".txt";
+
+    ofstream inlier_match_file(s.str().c_str());
+
+    for(int i=0; i<num_inlier_indices; i++)
+    {
+      int k = inlier_indices[i];
+      inlier_match_file << ref_corners[3*k] << " "
+            << ref_corners[3*k+1] << " "
+            << ins_corners[3*k] << " "
+            << ins_corners[3*k+1] << endl;
+    }
+    inlier_match_file.close();
+    */
+
+    frame_number++;
+  }
+
+  if ( reg.Initialized() )
+  {
+    db_FreeImage_f(lut_x,h);
+    db_FreeImage_f(lut_y,h);
+  }
+
+  return 0;
+}
+
+void parse_cmd_line(stringstream& cmdline,
+            const int argc,
+            const string& progname,
+            string& image_list_file_name,
+            int& nr_corners,
+            double& max_disparity,
+            int& motion_model_type,
+            bool& quarter_resolution,
+            unsigned int& reference_update_period,
+            bool& do_motion_smoothing,
+            double& motion_smoothing_gain)
+{
+  // for counting down the parsed arguments.
+  int c = argc;
+
+  // a holder
+  string token;
+
+  while (cmdline >> token)
+  {
+    --c;
+
+    int pos = token.find("-");
+
+    if (pos == 0)
+    {
+      switch (token[1])
+      {
+      case 'm':
+    --c; cmdline >> token;
+    if (token.compare("rt") == 0)
+    {
+      motion_model_type = DB_HOMOGRAPHY_TYPE_R_T;
+    }
+    else if (token.compare("a") == 0)
+    {
+      motion_model_type = DB_HOMOGRAPHY_TYPE_AFFINE;
+    }
+    else if (token.compare("p") == 0)
+    {
+      motion_model_type = DB_HOMOGRAPHY_TYPE_PROJECTIVE;
+    }
+    else
+    {
+      usage(progname);
+      exit(1);
+    }
+    break;
+      case 'c':
+    --c; cmdline >> nr_corners;
+    break;
+      case 'd':
+    --c; cmdline >> max_disparity;
+    break;
+      case 'q':
+    quarter_resolution = true;
+    break;
+      case 'r':
+    --c; cmdline >> reference_update_period;
+    break;
+      case 's':
+    --c; cmdline >> do_motion_smoothing;
+    break;
+      case 'g':
+    --c; cmdline >> motion_smoothing_gain;
+    break;
+      default:
+    cerr << progname << "illegal option " << token << endl;
+      case 'h':
+    usage(progname);
+    exit(1);
+    break;
+      }
+    }
+    else
+    {
+      if (c != 1)
+      {
+    usage(progname);
+    exit(1);
+      }
+      else
+      {
+    --c;
+    image_list_file_name = token;
+      }
+    }
+  }
+
+  if (c != 0)
+  {
+    usage(progname);
+    exit(1);
+  }
+}
+
diff --git a/benchmark/feature_stab/src/dbregtest/stdafx.cpp b/benchmark/feature_stab/src/dbregtest/stdafx.cpp
new file mode 100644
index 00000000..0c703e2d
--- /dev/null
+++ b/benchmark/feature_stab/src/dbregtest/stdafx.cpp
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// stdafx.cpp : source file that includes just the standard includes
+// dbregtest.pch will be the pre-compiled header
+// stdafx.obj will contain the pre-compiled type information
+
+#include "stdafx.h"
+
+// TODO: reference any additional headers you need in STDAFX.H
+// and not in this file
diff --git a/benchmark/feature_stab/src/dbregtest/stdafx.h b/benchmark/feature_stab/src/dbregtest/stdafx.h
new file mode 100644
index 00000000..9bc06ea0
--- /dev/null
+++ b/benchmark/feature_stab/src/dbregtest/stdafx.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// stdafx.h : include file for standard system include files,
+// or project specific include files that are used frequently, but
+// are changed infrequently
+//
+
+#pragma once
+
+#include "targetver.h"
+
+#include <stdio.h>
+
+// TODO: reference additional headers your program requires here
diff --git a/benchmark/feature_stab/src/dbregtest/targetver.h b/benchmark/feature_stab/src/dbregtest/targetver.h
new file mode 100644
index 00000000..9272b0d6
--- /dev/null
+++ b/benchmark/feature_stab/src/dbregtest/targetver.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+// The following macros define the minimum required platform.  The minimum required platform
+// is the earliest version of Windows, Internet Explorer etc. that has the necessary features to run
+// your application.  The macros work by enabling all features available on platform versions up to and
+// including the version specified.
+
+// Modify the following defines if you have to target a platform prior to the ones specified below.
+// Refer to MSDN for the latest info on corresponding values for different platforms.
+#ifndef _WIN32_WINNT            // Specifies that the minimum required platform is Windows Vista.
+#define _WIN32_WINNT 0x0600     // Change this to the appropriate value to target other versions of Windows.
+#endif
+
diff --git a/benchmark/mosaic_renderer_jni.cpp b/benchmark/mosaic_renderer_jni.cpp
new file mode 100644
index 00000000..bb01e7fc
--- /dev/null
+++ b/benchmark/mosaic_renderer_jni.cpp
@@ -0,0 +1,762 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <GLES2/gl2.h>
+#include <GLES2/gl2ext.h>
+#include <jni.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "db_utilities_camera.h"
+#include "mosaic/ImageUtils.h"
+#include "mosaic_renderer/FrameBuffer.h"
+#include "mosaic_renderer/WarpRenderer.h"
+#include "mosaic_renderer/SurfaceTextureRenderer.h"
+#include "mosaic_renderer/YVURenderer.h"
+
+#include "mosaic/Log.h"
+#define LOG_TAG "MosaicRenderer"
+
+#include "mosaic_renderer_jni.h"
+
+// Texture handle
+GLuint gSurfaceTextureID[1];
+
+bool gWarpImage = true;
+
+// Low-Res input image frame in YUVA format for preview rendering and processing
+// and high-res YUVA input image for processing.
+unsigned char* gPreviewImage[NR];
+// Low-Res & high-res preview image width
+int gPreviewImageWidth[NR];
+// Low-Res & high-res preview image height
+int gPreviewImageHeight[NR];
+
+// Semaphore to protect simultaneous read/writes from gPreviewImage
+sem_t gPreviewImage_semaphore;
+
+// Off-screen preview FBO width (large enough to store the entire
+// preview mosaic).
+int gPreviewFBOWidth;
+// Off-screen preview FBO height (large enough to store the entire
+// preview mosaic).
+int gPreviewFBOHeight;
+
+// gK is the transformation to map the canonical {-1,1} vertex coordinate system
+// to the {0,gPreviewImageWidth[LR]} input image frame coordinate system before
+// applying the given affine transformation trs. gKm is the corresponding
+// transformation for going to the {0,gPreviewFBOWidth}.
+double gK[9];
+double gKinv[9];
+double gKm[9];
+double gKminv[9];
+
+// Shader to copy input SurfaceTexture into and RGBA FBO. The two shaders
+// render to the textures with dimensions corresponding to the low-res and
+// high-res image frames.
+SurfaceTextureRenderer gSurfTexRenderer[NR];
+// Off-screen FBOs to store the low-res and high-res RGBA copied out from
+// the SurfaceTexture by the gSurfTexRenderers.
+FrameBuffer gBufferInput[NR];
+
+// Shader to convert RGBA textures into YVU textures for processing
+YVURenderer gYVURenderer[NR];
+// Off-screen FBOs to store the low-res and high-res YVU textures for processing
+FrameBuffer gBufferInputYVU[NR];
+
+// Shader to translate the flip-flop FBO - gBuffer[1-current] -> gBuffer[current]
+WarpRenderer gWarper1;
+// Shader to add warped current frame to the flip-flop FBO - gBuffer[current]
+WarpRenderer gWarper2;
+// Off-screen FBOs (flip-flop) to store the result of gWarper1 & gWarper2
+FrameBuffer gBuffer[2];
+
+// Shader to warp and render the preview FBO to the screen
+WarpRenderer gPreview;
+
+// Index of the gBuffer FBO gWarper1 is going to write into
+int gCurrentFBOIndex = 0;
+
+// 3x3 Matrices holding the transformation of this frame (gThisH1t) and of
+// the last frame (gLastH1t) w.r.t the first frame.
+double gThisH1t[9];
+double gLastH1t[9];
+
+// Variables to represent the fixed position of the top-left corner of the
+// current frame in the previewFBO
+double gCenterOffsetX = 0.0f;
+double gCenterOffsetY = 0.0f;
+
+// X-Offset of the viewfinder (current frame) w.r.t
+// (gCenterOffsetX, gCenterOffsetY). This offset varies with time and is
+// used to pan the viewfinder across the UI layout.
+double gPanOffset = 0.0f;
+
+// Variables tracking the translation value for the current frame and the
+// last frame (both w.r.t the first frame). The difference between these
+// values is used to control the panning speed of the viewfinder display
+// on the UI screen.
+double gThisTx = 0.0f;
+double gLastTx = 0.0f;
+
+// These are the scale factors used by the gPreview shader to ensure that
+// the image frame is correctly scaled to the full UI layout height while
+// maintaining its aspect ratio
+double gUILayoutScalingX = 1.0f;
+double gUILayoutScalingY = 1.0f;
+
+// Whether the view that we will render preview FBO onto is in landscape or portrait
+// orientation.
+bool gIsLandscapeOrientation = true;
+
+// State of the viewfinder. Set to false when the viewfinder hits the UI edge.
+bool gPanViewfinder = true;
+
+// Affine transformation in GL 4x4 format (column-major) to warp the
+// last frame mosaic into the current frame coordinate system.
+GLfloat g_dAffinetransGL[16];
+double g_dAffinetrans[16];
+
+// Affine transformation in GL 4x4 format (column-major) to translate the
+// preview FBO across the screen (viewfinder panning).
+GLfloat g_dAffinetransPanGL[16];
+double g_dAffinetransPan[16];
+
+// XY translation in GL 4x4 format (column-major) to center the current
+// preview mosaic in the preview FBO
+GLfloat g_dTranslationToFBOCenterGL[16];
+double g_dTranslationToFBOCenter[16];
+
+// GL 4x4 Identity transformation
+GLfloat g_dAffinetransIdentGL[] = {
+    1., 0., 0., 0.,
+    0., 1., 0., 0.,
+    0., 0., 1., 0.,
+    0., 0., 0., 1.};
+
+// GL 4x4 Rotation transformation (column-majored): 90 degree
+GLfloat g_dAffinetransRotation90GL[] = {
+    0., 1., 0., 0.,
+    -1., 0., 0., 0.,
+    0., 0., 1., 0.,
+    0., 0., 0., 1.};
+
+// 3x3 Rotation transformation (row-majored): 90 degree
+double gRotation90[] = {
+    0., -1., 0.,
+    1., 0., 0.,
+    0., 0., 1.,};
+
+
+float g_dIdent3x3[] = {
+    1.0, 0.0, 0.0,
+    0.0, 1.0, 0.0,
+    0.0, 0.0, 1.0};
+
+const int GL_TEXTURE_EXTERNAL_OES_ENUM = 0x8D65;
+
+static void printGLString(const char *name, GLenum s) {
+    const char *v = (const char *) glGetString(s);
+    LOGI("GL %s = %s", name, v);
+}
+
+// @return false if there was an error
+bool checkGlError(const char* op) {
+    GLint error = glGetError();
+    if (error != 0) {
+        LOGE("after %s() glError (0x%x)", op, error);
+        return false;
+    }
+    return true;
+}
+
+void bindSurfaceTexture(GLuint texId)
+{
+    glBindTexture(GL_TEXTURE_EXTERNAL_OES_ENUM, texId);
+
+    // Can't do mipmapping with camera source
+    glTexParameterf(GL_TEXTURE_EXTERNAL_OES_ENUM, GL_TEXTURE_MIN_FILTER,
+            GL_LINEAR);
+    glTexParameterf(GL_TEXTURE_EXTERNAL_OES_ENUM, GL_TEXTURE_MAG_FILTER,
+            GL_LINEAR);
+    // Clamp to edge is the only option
+    glTexParameteri(GL_TEXTURE_EXTERNAL_OES_ENUM, GL_TEXTURE_WRAP_S,
+            GL_CLAMP_TO_EDGE);
+    glTexParameteri(GL_TEXTURE_EXTERNAL_OES_ENUM, GL_TEXTURE_WRAP_T,
+            GL_CLAMP_TO_EDGE);
+}
+
+void ClearPreviewImage(int mID)
+{
+    unsigned char* ptr = gPreviewImage[mID];
+    for(int j = 0, i = 0;
+            j < gPreviewImageWidth[mID] * gPreviewImageHeight[mID] * 4;
+            j += 4)
+    {
+            ptr[i++] = 0;
+            ptr[i++] = 0;
+            ptr[i++] = 0;
+            ptr[i++] = 255;
+    }
+
+}
+
+void ConvertAffine3x3toGL4x4(double *matGL44, double *mat33)
+{
+    matGL44[0] = mat33[0];
+    matGL44[1] = mat33[3];
+    matGL44[2] = 0.0;
+    matGL44[3] = mat33[6];
+
+    matGL44[4] = mat33[1];
+    matGL44[5] = mat33[4];
+    matGL44[6] = 0.0;
+    matGL44[7] = mat33[7];
+
+    matGL44[8] = 0;
+    matGL44[9] = 0;
+    matGL44[10] = 1.0;
+    matGL44[11] = 0.0;
+
+    matGL44[12] = mat33[2];
+    matGL44[13] = mat33[5];
+    matGL44[14] = 0.0;
+    matGL44[15] = mat33[8];
+}
+
+bool continuePanningFBO(double panOffset) {
+    double normalizedScreenLimitLeft = -1.0 + VIEWPORT_BORDER_FACTOR_HORZ * 2.0;
+    double normalizedScreenLimitRight = 1.0 - VIEWPORT_BORDER_FACTOR_HORZ * 2.0;
+    double normalizedXPositionOnScreenLeft;
+    double normalizedXPositionOnScreenRight;
+
+    // Compute the position of the current frame in the screen coordinate system
+    if (gIsLandscapeOrientation) {
+        normalizedXPositionOnScreenLeft = (2.0 *
+            (gCenterOffsetX + panOffset) / gPreviewFBOWidth - 1.0) *
+            gUILayoutScalingX;
+        normalizedXPositionOnScreenRight = (2.0 *
+            ((gCenterOffsetX + panOffset) + gPreviewImageWidth[HR]) /
+            gPreviewFBOWidth - 1.0) * gUILayoutScalingX;
+    } else {
+        normalizedXPositionOnScreenLeft = (2.0 *
+            (gCenterOffsetX + panOffset) / gPreviewFBOWidth - 1.0) *
+            gUILayoutScalingY;
+        normalizedXPositionOnScreenRight = (2.0 *
+            ((gCenterOffsetX + panOffset) + gPreviewImageWidth[HR]) /
+            gPreviewFBOWidth - 1.0) * gUILayoutScalingY;
+    }
+
+    // Stop the viewfinder panning if we hit the maximum border allowed for
+    // this UI layout
+    if (normalizedXPositionOnScreenRight > normalizedScreenLimitRight ||
+            normalizedXPositionOnScreenLeft < normalizedScreenLimitLeft) {
+        return false;
+    } else {
+        return true;
+    }
+}
+
+// This function computes fills the 4x4 matrices g_dAffinetrans,
+// and g_dAffinetransPan using the specified 3x3 affine
+// transformation between the first captured frame and the current frame.
+// The computed g_dAffinetrans is such that it warps the preview mosaic in
+// the last frame's coordinate system into the coordinate system of the
+// current frame. Thus, applying this transformation will create the current
+// frame mosaic but with the current frame missing. This frame will then be
+// pasted in by gWarper2 after translating it by g_dTranslationToFBOCenter.
+// The computed g_dAffinetransPan is such that it offsets the computed preview
+// mosaic horizontally to make the viewfinder pan within the UI layout.
+void UpdateWarpTransformation(float *trs)
+{
+    double H[9], Hp[9], Htemp1[9], Htemp2[9], T[9];
+
+    for(int i = 0; i < 9; i++)
+    {
+        gThisH1t[i] = trs[i];
+    }
+
+    // Alignment is done based on low-res data.
+    // To render the preview mosaic, the translation of the high-res mosaic is estimated to
+    // H2L_FACTOR x low-res-based tranlation.
+    gThisH1t[2] *= H2L_FACTOR;
+    gThisH1t[5] *= H2L_FACTOR;
+
+    db_Identity3x3(T);
+    T[2] = -gCenterOffsetX;
+    T[5] = -gCenterOffsetY;
+
+    // H = ( inv(gThisH1t) * gLastH1t ) * T
+    db_Identity3x3(Htemp1);
+    db_Identity3x3(Htemp2);
+    db_Identity3x3(H);
+    db_InvertAffineTransform(Htemp1, gThisH1t);
+    db_Multiply3x3_3x3(Htemp2, Htemp1, gLastH1t);
+    db_Multiply3x3_3x3(H, Htemp2, T);
+
+    for(int i = 0; i < 9; i++)
+    {
+        gLastH1t[i] = gThisH1t[i];
+    }
+
+    // Move the origin such that the frame is centered in the previewFBO
+    // i.e. H = inv(T) * H
+    H[2] += gCenterOffsetX;
+    H[5] += gCenterOffsetY;
+
+    // Hp = inv(Km) * H * Km
+    // Km moves the coordinate system from openGL to image pixels so
+    // that the alignment transform H can be applied to them.
+    // inv(Km) moves the coordinate system back to openGL normalized
+    // coordinates so that the shader can correctly render it.
+    db_Identity3x3(Htemp1);
+    db_Multiply3x3_3x3(Htemp1, H, gKm);
+    db_Multiply3x3_3x3(Hp, gKminv, Htemp1);
+
+    ConvertAffine3x3toGL4x4(g_dAffinetrans, Hp);
+
+    ////////////////////////////////////////////////
+    ////// Compute g_dAffinetransPan now...   //////
+    ////////////////////////////////////////////////
+
+    gThisTx = trs[2];
+
+    if(gPanViewfinder)
+    {
+        gPanOffset += (gThisTx - gLastTx) * VIEWFINDER_PAN_FACTOR_HORZ;
+    }
+
+    gLastTx = gThisTx;
+    gPanViewfinder = continuePanningFBO(gPanOffset);
+
+    db_Identity3x3(H);
+    H[2] = gPanOffset;
+
+    // Hp = inv(Km) * H * Km
+    db_Identity3x3(Htemp1);
+    db_Multiply3x3_3x3(Htemp1, H, gKm);
+    db_Multiply3x3_3x3(Hp, gKminv, Htemp1);
+
+    if (gIsLandscapeOrientation) {
+        ConvertAffine3x3toGL4x4(g_dAffinetransPan, Hp);
+    } else {
+        // rotate Hp by 90 degress.
+        db_Multiply3x3_3x3(Htemp1, gRotation90, Hp);
+        ConvertAffine3x3toGL4x4(g_dAffinetransPan, Htemp1);
+    }
+}
+
+void AllocateTextureMemory(int widthHR, int heightHR, int widthLR, int heightLR)
+{
+    gPreviewImageWidth[HR] = widthHR;
+    gPreviewImageHeight[HR] = heightHR;
+
+    gPreviewImageWidth[LR] = widthLR;
+    gPreviewImageHeight[LR] = heightLR;
+
+    sem_init(&gPreviewImage_semaphore, 0, 1);
+
+    sem_wait(&gPreviewImage_semaphore);
+    gPreviewImage[LR] = ImageUtils::allocateImage(gPreviewImageWidth[LR],
+            gPreviewImageHeight[LR], 4);
+    ClearPreviewImage(LR);
+    gPreviewImage[HR] = ImageUtils::allocateImage(gPreviewImageWidth[HR],
+            gPreviewImageHeight[HR], 4);
+    ClearPreviewImage(HR);
+    sem_post(&gPreviewImage_semaphore);
+
+    gPreviewFBOWidth = PREVIEW_FBO_WIDTH_SCALE * gPreviewImageWidth[HR];
+    gPreviewFBOHeight = PREVIEW_FBO_HEIGHT_SCALE * gPreviewImageHeight[HR];
+
+    // The origin is such that the current frame will sit with its center
+    // at the center of the previewFBO
+    gCenterOffsetX = (gPreviewFBOWidth / 2 - gPreviewImageWidth[HR] / 2);
+    gCenterOffsetY = (gPreviewFBOHeight / 2 - gPreviewImageHeight[HR] / 2);
+
+    gPanOffset = 0.0f;
+
+    db_Identity3x3(gThisH1t);
+    db_Identity3x3(gLastH1t);
+
+    gPanViewfinder = true;
+
+    int w = gPreviewImageWidth[HR];
+    int h = gPreviewImageHeight[HR];
+
+    int wm = gPreviewFBOWidth;
+    int hm = gPreviewFBOHeight;
+
+    // K is the transformation to map the canonical [-1,1] vertex coordinate
+    // system to the [0,w] image coordinate system before applying the given
+    // affine transformation trs.
+    gKm[0] = wm / 2.0 - 0.5;
+    gKm[1] = 0.0;
+    gKm[2] = wm / 2.0 - 0.5;
+    gKm[3] = 0.0;
+    gKm[4] = hm / 2.0 - 0.5;
+    gKm[5] = hm / 2.0 - 0.5;
+    gKm[6] = 0.0;
+    gKm[7] = 0.0;
+    gKm[8] = 1.0;
+
+    gK[0] = w / 2.0 - 0.5;
+    gK[1] = 0.0;
+    gK[2] = w / 2.0 - 0.5;
+    gK[3] = 0.0;
+    gK[4] = h / 2.0 - 0.5;
+    gK[5] = h / 2.0 - 0.5;
+    gK[6] = 0.0;
+    gK[7] = 0.0;
+    gK[8] = 1.0;
+
+    db_Identity3x3(gKinv);
+    db_InvertCalibrationMatrix(gKinv, gK);
+
+    db_Identity3x3(gKminv);
+    db_InvertCalibrationMatrix(gKminv, gKm);
+
+    //////////////////////////////////////////
+    ////// Compute g_Translation now... //////
+    //////////////////////////////////////////
+    double T[9], Tp[9], Ttemp[9];
+
+    db_Identity3x3(T);
+    T[2] = gCenterOffsetX;
+    T[5] = gCenterOffsetY;
+
+    // Tp = inv(K) * T * K
+    db_Identity3x3(Ttemp);
+    db_Multiply3x3_3x3(Ttemp, T, gK);
+    db_Multiply3x3_3x3(Tp, gKinv, Ttemp);
+
+    ConvertAffine3x3toGL4x4(g_dTranslationToFBOCenter, Tp);
+
+    UpdateWarpTransformation(g_dIdent3x3);
+}
+
+void FreeTextureMemory()
+{
+    sem_wait(&gPreviewImage_semaphore);
+    ImageUtils::freeImage(gPreviewImage[LR]);
+    ImageUtils::freeImage(gPreviewImage[HR]);
+    sem_post(&gPreviewImage_semaphore);
+
+    sem_destroy(&gPreviewImage_semaphore);
+}
+
+extern "C"
+{
+    JNIEXPORT jint JNICALL Java_com_android_camera_panorama_MosaicRenderer_init(
+            JNIEnv * env, jobject obj);
+    JNIEXPORT void JNICALL Java_com_android_camera_panorama_MosaicRenderer_reset(
+            JNIEnv * env, jobject obj,  jint width, jint height,
+            jboolean isLandscapeOrientation);
+    JNIEXPORT void JNICALL Java_com_android_camera_panorama_MosaicRenderer_preprocess(
+            JNIEnv * env, jobject obj, jfloatArray stMatrix);
+    JNIEXPORT void JNICALL Java_com_android_camera_panorama_MosaicRenderer_transferGPUtoCPU(
+            JNIEnv * env, jobject obj);
+    JNIEXPORT void JNICALL Java_com_android_camera_panorama_MosaicRenderer_step(
+            JNIEnv * env, jobject obj);
+    JNIEXPORT void JNICALL Java_com_android_camera_panorama_MosaicRenderer_ready(
+            JNIEnv * env, jobject obj);
+    JNIEXPORT void JNICALL Java_com_android_camera_panorama_MosaicRenderer_setWarping(
+            JNIEnv * env, jobject obj, jboolean flag);
+};
+
+JNIEXPORT jint JNICALL Java_com_android_camera_panorama_MosaicRenderer_init(
+        JNIEnv * env, jobject obj)
+{
+    gSurfTexRenderer[LR].InitializeGLProgram();
+    gSurfTexRenderer[HR].InitializeGLProgram();
+    gYVURenderer[LR].InitializeGLProgram();
+    gYVURenderer[HR].InitializeGLProgram();
+    gWarper1.InitializeGLProgram();
+    gWarper2.InitializeGLProgram();
+    gPreview.InitializeGLProgram();
+    gBuffer[0].InitializeGLContext();
+    gBuffer[1].InitializeGLContext();
+    gBufferInput[LR].InitializeGLContext();
+    gBufferInput[HR].InitializeGLContext();
+    gBufferInputYVU[LR].InitializeGLContext();
+    gBufferInputYVU[HR].InitializeGLContext();
+
+    glBindFramebuffer(GL_FRAMEBUFFER, 0);
+
+    glGenTextures(1, gSurfaceTextureID);
+    // bind the surface texture
+    bindSurfaceTexture(gSurfaceTextureID[0]);
+
+    return (jint) gSurfaceTextureID[0];
+}
+
+
+void calculateUILayoutScaling(int width, int height, bool isLandscape) {
+    if (isLandscape) {
+        //  __________        ______
+        // |__________|  =>  |______|
+        // (Preview FBO)      (View)
+        //
+        // Scale the preview FBO's height to the height of view and
+        // maintain the aspect ratio of the current frame on the screen.
+        gUILayoutScalingY = PREVIEW_FBO_HEIGHT_SCALE;
+
+        // Note that OpenGL scales a texture to view's width and height automatically.
+        // The "width / height" inverts the scaling, so as to maintain the aspect ratio
+        // of the current frame.
+        gUILayoutScalingX = ((float) (PREVIEW_FBO_WIDTH_SCALE * gPreviewImageWidth[LR])
+                / (PREVIEW_FBO_HEIGHT_SCALE * gPreviewImageHeight[LR]) *  PREVIEW_FBO_HEIGHT_SCALE)
+                / ((float) width / height);
+    } else {
+        //                     __
+        //  __________        |  |
+        // |__________|  =>   |  |
+        // (Preview FBO)      |  |
+        //                    |__|
+        //                   (View)
+        // Scale the preview FBO's height to the width of view and
+        // maintain the aspect ratio of the current frame on the screen.
+        gUILayoutScalingX = PREVIEW_FBO_HEIGHT_SCALE;
+
+        // Note that OpenGL scales a texture to view's width and height automatically.
+        // The "height / width" inverts the scaling, so as to maintain the aspect ratio
+        // of the current frame.
+        gUILayoutScalingY = ((float) (PREVIEW_FBO_WIDTH_SCALE * gPreviewImageWidth[LR])
+                / (PREVIEW_FBO_HEIGHT_SCALE * gPreviewImageHeight[LR]) *  PREVIEW_FBO_HEIGHT_SCALE)
+                / ((float) height / width);
+
+    }
+}
+
+JNIEXPORT void JNICALL Java_com_android_camera_panorama_MosaicRenderer_reset(
+        JNIEnv * env, jobject obj,  jint width, jint height, jboolean isLandscapeOrientation)
+{
+    gIsLandscapeOrientation = isLandscapeOrientation;
+    calculateUILayoutScaling(width, height, gIsLandscapeOrientation);
+
+    gBuffer[0].Init(gPreviewFBOWidth, gPreviewFBOHeight, GL_RGBA);
+    gBuffer[1].Init(gPreviewFBOWidth, gPreviewFBOHeight, GL_RGBA);
+
+    gBufferInput[LR].Init(gPreviewImageWidth[LR],
+            gPreviewImageHeight[LR], GL_RGBA);
+
+    gBufferInput[HR].Init(gPreviewImageWidth[HR],
+            gPreviewImageHeight[HR], GL_RGBA);
+
+    gBufferInputYVU[LR].Init(gPreviewImageWidth[LR],
+            gPreviewImageHeight[LR], GL_RGBA);
+
+    gBufferInputYVU[HR].Init(gPreviewImageWidth[HR],
+            gPreviewImageHeight[HR], GL_RGBA);
+
+    sem_wait(&gPreviewImage_semaphore);
+    ClearPreviewImage(LR);
+    ClearPreviewImage(HR);
+    sem_post(&gPreviewImage_semaphore);
+
+    // bind the surface texture
+    bindSurfaceTexture(gSurfaceTextureID[0]);
+
+    gSurfTexRenderer[LR].SetupGraphics(&gBufferInput[LR]);
+    gSurfTexRenderer[LR].Clear(0.0, 0.0, 0.0, 1.0);
+    gSurfTexRenderer[LR].SetViewportMatrix(1, 1, 1, 1);
+    gSurfTexRenderer[LR].SetScalingMatrix(1.0f, -1.0f);
+    gSurfTexRenderer[LR].SetInputTextureName(gSurfaceTextureID[0]);
+    gSurfTexRenderer[LR].SetInputTextureType(GL_TEXTURE_EXTERNAL_OES_ENUM);
+
+    gSurfTexRenderer[HR].SetupGraphics(&gBufferInput[HR]);
+    gSurfTexRenderer[HR].Clear(0.0, 0.0, 0.0, 1.0);
+    gSurfTexRenderer[HR].SetViewportMatrix(1, 1, 1, 1);
+    gSurfTexRenderer[HR].SetScalingMatrix(1.0f, -1.0f);
+    gSurfTexRenderer[HR].SetInputTextureName(gSurfaceTextureID[0]);
+    gSurfTexRenderer[HR].SetInputTextureType(GL_TEXTURE_EXTERNAL_OES_ENUM);
+
+    gYVURenderer[LR].SetupGraphics(&gBufferInputYVU[LR]);
+    gYVURenderer[LR].Clear(0.0, 0.0, 0.0, 1.0);
+    gYVURenderer[LR].SetInputTextureName(gBufferInput[LR].GetTextureName());
+    gYVURenderer[LR].SetInputTextureType(GL_TEXTURE_2D);
+
+    gYVURenderer[HR].SetupGraphics(&gBufferInputYVU[HR]);
+    gYVURenderer[HR].Clear(0.0, 0.0, 0.0, 1.0);
+    gYVURenderer[HR].SetInputTextureName(gBufferInput[HR].GetTextureName());
+    gYVURenderer[HR].SetInputTextureType(GL_TEXTURE_2D);
+
+    // gBuffer[1-gCurrentFBOIndex] --> gWarper1 --> gBuffer[gCurrentFBOIndex]
+    gWarper1.SetupGraphics(&gBuffer[gCurrentFBOIndex]);
+    gWarper1.Clear(0.0, 0.0, 0.0, 1.0);
+    gWarper1.SetViewportMatrix(1, 1, 1, 1);
+    gWarper1.SetScalingMatrix(1.0f, 1.0f);
+    gWarper1.SetInputTextureName(gBuffer[1 - gCurrentFBOIndex].GetTextureName());
+    gWarper1.SetInputTextureType(GL_TEXTURE_2D);
+
+    // gBufferInput[HR] --> gWarper2 --> gBuffer[gCurrentFBOIndex]
+    gWarper2.SetupGraphics(&gBuffer[gCurrentFBOIndex]);
+    gWarper2.Clear(0.0, 0.0, 0.0, 1.0);
+    gWarper2.SetViewportMatrix(gPreviewImageWidth[HR],
+            gPreviewImageHeight[HR], gBuffer[gCurrentFBOIndex].GetWidth(),
+            gBuffer[gCurrentFBOIndex].GetHeight());
+    gWarper2.SetScalingMatrix(1.0f, 1.0f);
+    gWarper2.SetInputTextureName(gBufferInput[HR].GetTextureName());
+    gWarper2.SetInputTextureType(GL_TEXTURE_2D);
+
+    gPreview.SetupGraphics(width, height);
+    gPreview.Clear(0.0, 0.0, 0.0, 1.0);
+    gPreview.SetViewportMatrix(1, 1, 1, 1);
+
+    // Scale the previewFBO so that the viewfinder window fills the layout height
+    // while maintaining the image aspect ratio
+    gPreview.SetScalingMatrix(gUILayoutScalingX, -1.0f * gUILayoutScalingY);
+    gPreview.SetInputTextureName(gBuffer[gCurrentFBOIndex].GetTextureName());
+    gPreview.SetInputTextureType(GL_TEXTURE_2D);
+}
+
+JNIEXPORT void JNICALL Java_com_android_camera_panorama_MosaicRenderer_preprocess(
+        JNIEnv * env, jobject obj, jfloatArray stMatrix)
+{
+    jfloat *stmat = env->GetFloatArrayElements(stMatrix, 0);
+
+    gSurfTexRenderer[LR].SetSTMatrix((float*) stmat);
+    gSurfTexRenderer[HR].SetSTMatrix((float*) stmat);
+
+    env->ReleaseFloatArrayElements(stMatrix, stmat, 0);
+
+    gSurfTexRenderer[LR].DrawTexture(g_dAffinetransIdentGL);
+    gSurfTexRenderer[HR].DrawTexture(g_dAffinetransIdentGL);
+}
+
+#ifndef now_ms
+#include <time.h>
+static double
+now_ms(void)
+{
+    //struct timespec res;
+    struct timeval res;
+    //clock_gettime(CLOCK_REALTIME, &res);
+    gettimeofday(&res, NULL);
+    return 1000.0*res.tv_sec + (double)res.tv_usec/1e3;
+}
+#endif
+
+
+
+JNIEXPORT void JNICALL Java_com_android_camera_panorama_MosaicRenderer_transferGPUtoCPU(
+        JNIEnv * env, jobject obj)
+{
+    double t0, t1, time_c;
+
+    gYVURenderer[LR].DrawTexture();
+    gYVURenderer[HR].DrawTexture();
+
+    sem_wait(&gPreviewImage_semaphore);
+    // Bind to the input LR FBO and read the Low-Res data from there...
+    glBindFramebuffer(GL_FRAMEBUFFER, gBufferInputYVU[LR].GetFrameBufferName());
+    t0 = now_ms();
+    glReadPixels(0,
+                 0,
+                 gBufferInput[LR].GetWidth(),
+                 gBufferInput[LR].GetHeight(),
+                 GL_RGBA,
+                 GL_UNSIGNED_BYTE,
+                 gPreviewImage[LR]);
+
+    checkGlError("glReadPixels LR");
+
+    // Bind to the input HR FBO and read the high-res data from there...
+    glBindFramebuffer(GL_FRAMEBUFFER, gBufferInputYVU[HR].GetFrameBufferName());
+    t0 = now_ms();
+    glReadPixels(0,
+                 0,
+                 gBufferInput[HR].GetWidth(),
+                 gBufferInput[HR].GetHeight(),
+                 GL_RGBA,
+                 GL_UNSIGNED_BYTE,
+                 gPreviewImage[HR]);
+
+    checkGlError("glReadPixels HR");
+
+    sem_post(&gPreviewImage_semaphore);
+}
+
+JNIEXPORT void JNICALL Java_com_android_camera_panorama_MosaicRenderer_step(
+        JNIEnv * env, jobject obj)
+{
+    if(!gWarpImage) // ViewFinder
+    {
+        gWarper2.SetupGraphics(&gBuffer[gCurrentFBOIndex]);
+        gPreview.SetInputTextureName(gBuffer[gCurrentFBOIndex].GetTextureName());
+
+        gWarper2.DrawTexture(g_dTranslationToFBOCenterGL);
+
+        if (gIsLandscapeOrientation) {
+            gPreview.DrawTexture(g_dAffinetransIdentGL);
+        } else {
+            gPreview.DrawTexture(g_dAffinetransRotation90GL);
+        }
+    }
+    else
+    {
+        gWarper1.SetupGraphics(&gBuffer[gCurrentFBOIndex]);
+        // Clear the destination so that we can paint on it afresh
+        gWarper1.Clear(0.0, 0.0, 0.0, 1.0);
+        gWarper1.SetInputTextureName(
+                gBuffer[1 - gCurrentFBOIndex].GetTextureName());
+        gWarper2.SetupGraphics(&gBuffer[gCurrentFBOIndex]);
+        gPreview.SetInputTextureName(gBuffer[gCurrentFBOIndex].GetTextureName());
+
+        gWarper1.DrawTexture(g_dAffinetransGL);
+        gWarper2.DrawTexture(g_dTranslationToFBOCenterGL);
+        gPreview.DrawTexture(g_dAffinetransPanGL);
+
+        gCurrentFBOIndex = 1 - gCurrentFBOIndex;
+    }
+}
+
+JNIEXPORT void JNICALL Java_com_android_camera_panorama_MosaicRenderer_setWarping(
+        JNIEnv * env, jobject obj, jboolean flag)
+{
+    // TODO: Review this logic
+    if(gWarpImage != (bool) flag) //switching from viewfinder to capture or vice-versa
+    {
+        // Clear gBuffer[0]
+        gWarper1.SetupGraphics(&gBuffer[0]);
+        gWarper1.Clear(0.0, 0.0, 0.0, 1.0);
+        // Clear gBuffer[1]
+        gWarper1.SetupGraphics(&gBuffer[1]);
+        gWarper1.Clear(0.0, 0.0, 0.0, 1.0);
+        // Clear the screen to black.
+        gPreview.Clear(0.0, 0.0, 0.0, 1.0);
+
+        gLastTx = 0.0f;
+        gPanOffset = 0.0f;
+        gPanViewfinder = true;
+
+        db_Identity3x3(gThisH1t);
+        db_Identity3x3(gLastH1t);
+    }
+
+    gWarpImage = (bool)flag;
+}
+
+
+JNIEXPORT void JNICALL Java_com_android_camera_panorama_MosaicRenderer_ready(
+        JNIEnv * env, jobject obj)
+{
+    for(int i=0; i<16; i++)
+    {
+        g_dAffinetransGL[i] = g_dAffinetrans[i];
+        g_dAffinetransPanGL[i] = g_dAffinetransPan[i];
+        g_dTranslationToFBOCenterGL[i] = g_dTranslationToFBOCenter[i];
+    }
+}
diff --git a/benchmark/mosaic_renderer_jni.h b/benchmark/mosaic_renderer_jni.h
new file mode 100644
index 00000000..f9b85df8
--- /dev/null
+++ b/benchmark/mosaic_renderer_jni.h
@@ -0,0 +1,35 @@
+#pragma once
+#include <semaphore.h>
+
+// The Preview FBO dimensions are determined from the high-res
+// frame dimensions (gPreviewImageWidth, gPreviewImageHeight)
+// using the scale factors below.
+const int PREVIEW_FBO_WIDTH_SCALE = 2;
+const int PREVIEW_FBO_HEIGHT_SCALE = 1;
+
+// The factor below determines the (horizontal) speed at which the viewfinder
+// will pan across the UI during capture. A value of 0.0 will keep the viewfinder
+// static in the center of the screen and 1.0f will make it pan at the
+// same speed as the device.
+const float VIEWFINDER_PAN_FACTOR_HORZ = 0.3f;
+
+// What fraction of the screen viewport width has been allocated to show the
+// arrows on the direction of motion side.
+const float VIEWPORT_BORDER_FACTOR_HORZ = 0.1f;
+
+const int LR = 0; // Low-resolution mode
+const int HR = 1; // High-resolution mode
+const int NR = 2; // Number of resolution modes
+
+const int H2L_FACTOR = 4; // Can be 2
+
+extern "C" void AllocateTextureMemory(int widthHR, int heightHR,
+        int widthLR, int heightLR);
+extern "C" void FreeTextureMemory();
+extern "C" void UpdateWarpTransformation(float *trs);
+
+extern unsigned char* gPreviewImage[NR];
+extern int gPreviewImageWidth[NR];
+extern int gPreviewImageHeight[NR];
+
+extern sem_t gPreviewImage_semaphore;