module Banzai
  module ReferenceParser
    # Base class for reference parsing classes.
    #
    # Each parser should also specify its reference type by calling
    # `self.reference_type = ...` in the body of the class. The value of this
    # method should be a symbol such as `:issue` or `:merge_request`. For
    # example:
    #
    #     class IssueParser < BaseParser
    #       self.reference_type = :issue
    #     end
    #
    # The reference type is used to determine what nodes to pass to the
    # `referenced_by` method.
    #
    # Parser classes should either implement the instance method
    # `references_relation` or overwrite `referenced_by`. The
    # `references_relation` method is supposed to return an
    # ActiveRecord::Relation used as a base relation for retrieving the objects
    # referenced in a set of HTML nodes.
    #
    # Each class can implement two additional methods:
    #
    # * `nodes_user_can_reference`: returns an Array of nodes the given user can
    #   refer to.
    # * `nodes_visible_to_user`: returns an Array of nodes that are visible to
    #   the given user.
    #
    # You only need to overwrite these methods if you want to tweak who can see
    # which references. For example, the IssueParser class defines its own
    # `nodes_visible_to_user` method so it can ensure users can only see issues
    # they have access to.
    class BaseParser
      class << self
        attr_accessor :reference_type
      end

      # Returns the attribute name containing the value for every object to be
      # parsed by the current parser.
      #
      # For example, for a parser class that returns "Animal" objects this
      # attribute would be "data-animal".
      def self.data_attribute
        @data_attribute ||= "data-#{reference_type.to_s.dasherize}"
      end

      def initialize(project = nil, current_user = nil)
        @project = project
        @current_user = current_user
      end

      # Returns all the nodes containing references that the user can refer to.
      def nodes_user_can_reference(user, nodes)
        nodes
      end

      # Returns all the nodes that are visible to the given user.
      def nodes_visible_to_user(user, nodes)
        projects = lazy { projects_for_nodes(nodes) }
        project_attr = 'data-project'

        nodes.select do |node|
          if node.has_attribute?(project_attr)
            node_id = node.attr(project_attr).to_i

            if project && project.id == node_id
              true
            else
              can?(user, :read_project, projects[node_id])
            end
          else
            true
          end
        end
      end

      # Returns an Array of objects referenced by any of the given HTML nodes.
      def referenced_by(nodes)
        ids = unique_attribute_values(nodes, self.class.data_attribute)

        references_relation.where(id: ids)
      end

      # Returns the ActiveRecord::Relation to use for querying references in the
      # DB.
      def references_relation
        raise NotImplementedError,
          "#{self.class} does not implement #{__method__}"
      end

      # Returns a Hash containing attribute values per project ID.
      #
      # The returned Hash uses the following format:
      #
      #     { project id => [value1, value2, ...] }
      #
      # nodes - An Array of HTML nodes to process.
      # attribute - The name of the attribute (as a String) for which to gather
      #             values.
      #
      # Returns a Hash.
      def gather_attributes_per_project(nodes, attribute)
        per_project = Hash.new { |hash, key| hash[key] = Set.new }

        nodes.each do |node|
          project_id = node.attr('data-project').to_i
          id = node.attr(attribute)

          per_project[project_id] << id if id
        end

        per_project
      end

      # Returns a Hash containing objects for an attribute grouped per their
      # IDs.
      #
      # The returned Hash uses the following format:
      #
      #     { id value => row }
      #
      # nodes - An Array of HTML nodes to process.
      #
      # collection - The model or ActiveRecord relation to use for retrieving
      #              rows from the database.
      #
      # attribute - The name of the attribute containing the primary key values
      #             for every row.
      #
      # Returns a Hash.
      def grouped_objects_for_nodes(nodes, collection, attribute)
        return {} if nodes.empty?

        ids = unique_attribute_values(nodes, attribute)
        rows = collection_objects_for_ids(collection, ids)

        rows.each_with_object({}) do |row, hash|
          hash[row.id] = row
        end
      end

      # Returns an Array containing all unique values of an attribute of the
      # given nodes.
      def unique_attribute_values(nodes, attribute)
        values = Set.new

        nodes.each do |node|
          if node.has_attribute?(attribute)
            values << node.attr(attribute)
          end
        end

        values.to_a
      end

      # Queries the collection for the objects with the given IDs.
      #
      # If the RequestStore module is enabled this method will only query any
      # objects that have not yet been queried. For objects that have already
      # been queried the object is returned from the cache.
      def collection_objects_for_ids(collection, ids)
        if RequestStore.active?
          cache = collection_cache[collection_cache_key(collection)]
          to_query = ids.map(&:to_i) - cache.keys

          unless to_query.empty?
            collection.where(id: to_query).each { |row| cache[row.id] = row }
          end

          cache.values
        else
          collection.where(id: ids)
        end
      end

      # Returns the cache key to use for a collection.
      def collection_cache_key(collection)
        collection.respond_to?(:model) ? collection.model : collection
      end

      # Processes the list of HTML documents and returns an Array containing all
      # the references.
      def process(documents)
        type = self.class.reference_type

        nodes = documents.flat_map do |document|
          Querying.css(document, "a[data-reference-type='#{type}'].gfm").to_a
        end

        gather_references(nodes)
      end

      # Gathers the references for the given HTML nodes.
      def gather_references(nodes)
        nodes = nodes_user_can_reference(current_user, nodes)
        nodes = nodes_visible_to_user(current_user, nodes)

        referenced_by(nodes)
      end

      # Returns a Hash containing the projects for a given list of HTML nodes.
      #
      # The returned Hash uses the following format:
      #
      #     { project ID => project }
      #
      def projects_for_nodes(nodes)
        @projects_for_nodes ||=
          grouped_objects_for_nodes(nodes, Project, 'data-project')
      end

      def can?(user, permission, subject)
        Ability.allowed?(user, permission, subject)
      end

      def find_projects_for_hash_keys(hash)
        collection_objects_for_ids(Project, hash.keys)
      end

      private

      attr_reader :current_user, :project

      def lazy(&block)
        Gitlab::Lazy.new(&block)
      end

      def collection_cache
        RequestStore[:banzai_collection_cache] ||= Hash.new do |hash, key|
          hash[key] = {}
        end
      end
    end
  end
end