colmap调试学习（二）--feature_matching

这里主要将colmap中Feature Matching部分的vocab_tree_matcher对应源码

图像matcher分如下步骤：

函数主体在feature_matching.cc中Run（）函数中。

复制代码

void Run() override {
    LOG_HEADING1("Feature matching & geometric verification");

    Timer run_timer;
    run_timer.Start();

    if (!matcher_.Setup()) {
      return;
    }

    std::unique_ptr<PairGenerator> pair_generator =
        THROW_CHECK_NOTNULL(pair_generator_factory_());

    while (!pair_generator->HasFinished()) {
      if (IsStopped()) {
        run_timer.PrintMinutes();
        return;
      }
      Timer timer;
      timer.Start();
      const std::vector<std::pair<image_t, image_t>> image_pairs =
          pair_generator->Next();
      matcher_.Match(image_pairs);
      LOG(INFO) << StringPrintf("in %.3fs", timer.ElapsedSeconds());
    }

在IndexImages（）函数中处理加载的db库中的数据。主要是keypoints, descriptors，然后放入到visual_index_->Add(）中。

复制代码

void VocabTreePairGenerator::IndexImages(
    const std::vector<image_t>& image_ids) {
  retrieval::VisualIndex::IndexOptions index_options;
  // We only assign each feature to a single visual word in the indexing phase.
  // During the query phase, we check for overlap in possibly multiple nearest
  // neighbor visual words. We could do it symmetrically but experiments showed
  // only marginal improvements that do not justify the memory/compute increase.
  index_options.num_neighbors = 1;
  index_options.num_checks = options_.num_checks;
  index_options.num_threads = options_.num_threads;

  for (size_t i = 0; i < image_ids.size(); ++i) {
    Timer timer;
    timer.Start();
    LOG(INFO) << StringPrintf(
        "Indexing image [%d/%d]", i + 1, image_ids.size());
    auto keypoints = *cache_->GetKeypoints(image_ids[i]);
    auto descriptors = *cache_->GetDescriptors(image_ids[i]);
    if (visual_index_ == nullptr) {
      visual_index_ = retrieval::VisualIndex::Read(
          options_.vocab_tree_path.empty()
              ? GetVocabTreeUriForFeatureType(descriptors.type)
              : options_.vocab_tree_path);
    }
    if (options_.max_num_features > 0 &&
        descriptors.data.rows() > options_.max_num_features) {
      ExtractTopScaleFeatures(
          &keypoints, &descriptors, options_.max_num_features);
    }
    visual_index_->Add(
        index_options, image_ids[i], keypoints, descriptors.ToFloat());
    LOG(INFO) << StringPrintf(" in %.3fs", timer.ElapsedSeconds());
  }

  // Compute the TF-IDF weights, etc.
  visual_index_->Prepare();
}

visual_index_->Add(）函数的主体在visual_index.cc文件中。该函数的主要作用是将单张图像的特征描述添加到视觉索引中，建立图像特征与视觉单词的映射关系。这是词汇树图像检索系统的索引过程。主要注意其中word_ids.

复制代码

void Add(const IndexOptions& options,
           int image_id,
           const FeatureKeypoints& keypoints,
           const FeatureDescriptorsFloat& descriptors) override {
    THROW_CHECK_EQ(descriptors.data.cols(), kDescDim);
    THROW_CHECK_EQ(keypoints.size(), descriptors.data.rows());
    THROW_CHECK_EQ(descriptors.type, feature_type_)
        << "Feature type mismatch: index was built with "
        << FeatureExtractorTypeToString(feature_type_) << " but received "
        << FeatureExtractorTypeToString(descriptors.type);

    // If the image is already indexed, do nothing.
    if (IsImageIndexed(image_id)) {
      return;
    }

    image_ids_.insert(image_id);

    prepared_ = false;

    if (descriptors.data.rows() == 0) {
      return;
    }

    const WordIds word_ids = FindWordIds(descriptors.data,
                                         options.num_neighbors,
                                         options.num_checks,
                                         options.num_threads);

    for (Eigen::Index i = 0; i < descriptors.data.rows(); ++i) {
      const auto& descriptor = descriptors.data.row(i);

      typename InvertedIndexType::GeomType geometry;
      geometry.x = keypoints[i].x;
      geometry.y = keypoints[i].y;
      geometry.scale = keypoints[i].ComputeScale();
      geometry.orientation = keypoints[i].ComputeOrientation();

      for (int n = 0; n < options.num_neighbors; ++n) {
        const int word_id = word_ids(i, n);
        if (word_id != InvertedIndexType::kInvalidWordId) {
          inverted_index_.AddEntry(image_id, word_id, i, descriptor, geometry);
        }
      }
    }
  }

AddEntry()的源码如下。

复制代码

template <typename kDescType, int kDescDim, int kEmbeddingDim>
void InvertedIndex<kDescType, kDescDim, kEmbeddingDim>::AddEntry(
    const int image_id,
    const int64_t word_id,
    typename DescType::Index feature_idx,
    const DescType& descriptor,
    const GeomType& geometry) {
  THROW_CHECK_EQ(descriptor.size(), kDescDim);
  const ProjDescType proj_desc =
      proj_matrix_ * descriptor.transpose().template cast<float>();
  inverted_files_.at(word_id).AddEntry(
      image_id, feature_idx, proj_desc, geometry);
}

template <int kEmbeddingDim>
void InvertedFile<kEmbeddingDim>::AddEntry(const int image_id,
                                           typename DescType::Index feature_idx,
                                           const DescType& descriptor,
                                           const GeomType& geometry) {
  THROW_CHECK_GE(image_id, 0);
  THROW_CHECK_EQ(descriptor.size(), kEmbeddingDim);
  EntryType entry;
  entry.image_id = image_id;
  entry.feature_idx = feature_idx;
  entry.geometry = geometry;
  ConvertToBinaryDescriptor(descriptor, &entry.descriptor);
  entries_.push_back(entry);
  status_ &= ~kEntriesSorted;
}

最后都放入到entries_中。entries_可以理解为集成了描述子+key+id的结构体。

复制代码

 std::vector<EntryType> entries_;

template <int kEmbeddingDim>
class InvertedFile {
 public:
  using DescType = Eigen::VectorXf;
  using GeomType = FeatureGeometry;
  using EntryType = InvertedFileEntry<kEmbeddingDim>;

  enum Status : uint8_t {
    kUnusable = 0x00,
    kHasEmbedding = 0x01,
    kEntriesSorted = 0x02,
    kUsable = 0x03,
  };

template <int N>
struct InvertedFileEntry {
  void Read(std::istream* in);
  void Write(std::ostream* out) const;

  // The identifier of the image this entry is associated with.
  int image_id = -1;

  // The index of the feature within the image's keypoints list.
  int feature_idx = -1;

  // The geometry of the feature, used for spatial verification.
  FeatureGeometry geometry;

  // The binary signature in the Hamming embedding.
  std::bitset<N> descriptor;
};