Commit 79a5d768 authored by Zeger-Jan van de Weg's avatar Zeger-Jan van de Weg

Add repository languages for projects

Our friends at GitHub show the programming languages for a long time,
and inspired by that this commit means to create about the same
functionality.

Language detection is done through Linguist, as before, where the
difference is that we cache the result in the database. Also, Gitaly can
incrementaly scan a repository. This is done through a shell out, which
creates overhead of about 3s each run. For now this won't be improved.

Scans are triggered by pushed to the default branch, usually `master`.
However, one exception to this rule the charts page. If we're requesting
this expensive data anyway, we just cache it in the database.

Edge cases where there is no repository, or its empty are caught in the
Repository model. This makes use of Redis caching, which is probably
already loaded.

The added model is called RepositoryLanguage, which will make it harder
if/when GitLab supports multiple repositories per project. However, for
now I think this shouldn't be a concern. Also, Language could be
confused with the i18n languages and felt like the current name was
suiteable too.

Design of the Project#Show page is done with help from @dimitrieh. This
change is not visible to the end user unless detections are done.
parent f1750140
...@@ -754,6 +754,11 @@ ...@@ -754,6 +754,11 @@
} }
} }
.repository-languages-bar {
height: 6px;
margin-bottom: 8px;
}
pre.light-well { pre.light-well {
border-color: $well-light-border; border-color: $well-light-border;
} }
......
module RepositoryLanguagesHelper
def repository_languages_bar(languages)
return if languages.none?
content_tag :div, class: 'progress repository-languages-bar' do
safe_join(languages.map { |lang| language_progress(lang) })
end
end
def language_progress(lang)
content_tag :div, nil,
class: "progress-bar has-tooltip",
style: "width: #{lang.share}%; background-color:#{lang.color}",
title: lang.name
end
end
...@@ -122,6 +122,7 @@ class Namespace < ActiveRecord::Base ...@@ -122,6 +122,7 @@ class Namespace < ActiveRecord::Base
def to_param def to_param
full_path full_path
end end
alias_method :flipper_id, :to_param
def human_name def human_name
owner_name owner_name
......
class ProgrammingLanguage < ActiveRecord::Base
validates :name, presence: true
validates :color, allow_blank: false, color: true
end
...@@ -192,6 +192,7 @@ class Project < ActiveRecord::Base ...@@ -192,6 +192,7 @@ class Project < ActiveRecord::Base
has_many :hooks, class_name: 'ProjectHook' has_many :hooks, class_name: 'ProjectHook'
has_many :protected_branches has_many :protected_branches
has_many :protected_tags has_many :protected_tags
has_many :repository_languages, -> { order "share DESC" }
has_many :project_authorizations has_many :project_authorizations
has_many :authorized_users, through: :project_authorizations, source: :user, class_name: 'User' has_many :authorized_users, through: :project_authorizations, source: :user, class_name: 'User'
......
...@@ -235,6 +235,12 @@ class Repository ...@@ -235,6 +235,12 @@ class Repository
false false
end end
def languages
return [] if empty?
raw_repository.languages(root_ref)
end
# Makes sure a commit is kept around when Git garbage collection runs. # Makes sure a commit is kept around when Git garbage collection runs.
# Git GC will delete commits from the repository that are no longer in any # Git GC will delete commits from the repository that are no longer in any
# branches or tags, but we want to keep some of these commits around, for # branches or tags, but we want to keep some of these commits around, for
...@@ -432,6 +438,8 @@ class Repository ...@@ -432,6 +438,8 @@ class Repository
# Runs code after a repository has been forked/imported. # Runs code after a repository has been forked/imported.
def after_import def after_import
expire_content_cache expire_content_cache
DetectRepositoryLanguagesWorker.perform_async(project.id, project.owner.id)
end end
# Runs code after a new commit has been pushed. # Runs code after a new commit has been pushed.
......
class RepositoryLanguage < ActiveRecord::Base
belongs_to :project
belongs_to :programming_language
default_scope { includes(:programming_language) }
validates :project, presence: true
validates :share, inclusion: { in: 0..100, message: "The share of a lanuage is between 0 and 100" }
validates :programming_language, uniqueness: { scope: :project_id }
delegate :name, :color, to: :programming_language
end
...@@ -85,6 +85,8 @@ class GitPushService < BaseService ...@@ -85,6 +85,8 @@ class GitPushService < BaseService
types = Gitlab::FileDetector.types_in_paths(paths.to_a) types = Gitlab::FileDetector.types_in_paths(paths.to_a)
end end
DetectRepositoryLanguagesWorker.perform_async(@project.id, current_user.id)
else else
types = [] types = []
end end
......
module Projects
class DetectRepositoryLanguagesService < BaseService
attr_reader :detected_repository_languages, :programming_languages
def execute
repository_languages = project.repository_languages
detection = Gitlab::LanguageDetection.new(repository, repository_languages)
matching_programming_languages = ensure_programming_languages(detection)
RepositoryLanguage.transaction do
project.repository_languages.where(programming_language_id: detection.deletions).delete_all
detection.updates.each do |update|
RepositoryLanguage
.arel_table.update_manager
.where(project_id: project.id)
.where(programming_language_id: update[:programming_language_id])
.set(share: update[:share])
end
Gitlab::Database.bulk_insert(
RepositoryLanguage.table_name,
detection.insertions(matching_programming_languages)
)
end
project.repository_languages.reload
end
private
def ensure_programming_languages(detection)
existing_languages = ProgrammingLanguage.where(name: detection.languages)
return existing_languages if detection.languages.size == existing_languages.size
missing_languages = detection.languages - existing_languages.map(&:name)
created_languages = missing_languages.map do |name|
create_language(name, detection.language_color(name))
end
existing_languages + created_languages
end
def create_language(name, color)
ProgrammingLanguage.transaction do
ProgrammingLanguage.where(name: name).first_or_create(color: color)
end
rescue ActiveRecord::RecordNotUnique
retry
end
end
end
...@@ -18,10 +18,11 @@ ...@@ -18,10 +18,11 @@
= render "home_panel" = render "home_panel"
- if can?(current_user, :download_code, @project) - if can?(current_user, :download_code, @project)
%nav.project-stats{ class: container_class } %nav.project-stats{ class: [container_class, ("limit-container-width" unless fluid_layout)] }
= render 'stat_anchor_list', anchors: @project.statistics_anchors(show_auto_devops_callout: show_auto_devops_callout) = render 'stat_anchor_list', anchors: @project.statistics_anchors(show_auto_devops_callout: show_auto_devops_callout)
= render 'stat_anchor_list', anchors: @project.statistics_buttons(show_auto_devops_callout: show_auto_devops_callout) = render 'stat_anchor_list', anchors: @project.statistics_buttons(show_auto_devops_callout: show_auto_devops_callout)
- if Feature.enabled?(:repository_languages, @project.namespace.becomes(Namespace))
= repository_languages_bar(@project.repository_languages)
%div{ class: [container_class, ("limit-container-width" unless fluid_layout)] } %div{ class: [container_class, ("limit-container-width" unless fluid_layout)] }
- if @project.archived? - if @project.archived?
......
...@@ -123,3 +123,4 @@ ...@@ -123,3 +123,4 @@
- repository_update_remote_mirror - repository_update_remote_mirror
- create_note_diff_file - create_note_diff_file
- delete_diff_files - delete_diff_files
- detect_repository_languages
class DetectRepositoryLanguagesWorker
include ApplicationWorker
include ExceptionBacktrace
include ExclusiveLeaseGuard
sidekiq_options retry: 1
LEASE_TIMEOUT = 300
attr_reader :project
def perform(project_id, user_id)
@project = Project.find_by(id: project_id)
user = User.find_by(id: user_id)
return unless project && user
return if Feature.disabled?(:repository_languages, project.namespace)
try_obtain_lease do
::Projects::DetectRepositoryLanguagesService.new(project, user).execute
end
end
private
def lease_timeout
LEASE_TIMEOUT
end
def lease_key
"gitlab:detect_repository_languages:#{project.id}"
end
end
---
title: Show repository languages for projects
merge_request: 19480
author:
type: added
...@@ -77,3 +77,4 @@ ...@@ -77,3 +77,4 @@
- [repository_remove_remote, 1] - [repository_remove_remote, 1]
- [create_note_diff_file, 1] - [create_note_diff_file, 1]
- [delete_diff_files, 1] - [delete_diff_files, 1]
- [detect_repository_languages, 1]
class AddRepositoryLanguages < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def up
create_table(:programming_languages) do |t|
t.string :name, null: false
t.string :color, null: false
t.datetime_with_timezone :created_at, null: false
end
create_table(:repository_languages, id: false) do |t|
t.references :project, null: false, foreign_key: { on_delete: :cascade }
t.references :programming_language, null: false
t.float :share, null: false
end
add_index :programming_languages, :name, unique: true
add_index :repository_languages, [:project_id, :programming_language_id],
unique: true, name: "index_repository_languages_on_project_and_languages_id"
end
def down
drop_table :repository_languages
drop_table :programming_languages
end
end
...@@ -1502,6 +1502,14 @@ ActiveRecord::Schema.define(version: 20180726172057) do ...@@ -1502,6 +1502,14 @@ ActiveRecord::Schema.define(version: 20180726172057) do
add_index "personal_access_tokens", ["token"], name: "index_personal_access_tokens_on_token", unique: true, using: :btree add_index "personal_access_tokens", ["token"], name: "index_personal_access_tokens_on_token", unique: true, using: :btree
add_index "personal_access_tokens", ["user_id"], name: "index_personal_access_tokens_on_user_id", using: :btree add_index "personal_access_tokens", ["user_id"], name: "index_personal_access_tokens_on_user_id", using: :btree
create_table "programming_languages", force: :cascade do |t|
t.string "name", null: false
t.string "color", null: false
t.datetime_with_timezone "created_at", null: false
end
add_index "programming_languages", ["name"], name: "index_programming_languages_on_name", unique: true, using: :btree
create_table "project_authorizations", id: false, force: :cascade do |t| create_table "project_authorizations", id: false, force: :cascade do |t|
t.integer "user_id", null: false t.integer "user_id", null: false
t.integer "project_id", null: false t.integer "project_id", null: false
...@@ -1788,6 +1796,14 @@ ActiveRecord::Schema.define(version: 20180726172057) do ...@@ -1788,6 +1796,14 @@ ActiveRecord::Schema.define(version: 20180726172057) do
add_index "remote_mirrors", ["last_successful_update_at"], name: "index_remote_mirrors_on_last_successful_update_at", using: :btree add_index "remote_mirrors", ["last_successful_update_at"], name: "index_remote_mirrors_on_last_successful_update_at", using: :btree
add_index "remote_mirrors", ["project_id"], name: "index_remote_mirrors_on_project_id", using: :btree add_index "remote_mirrors", ["project_id"], name: "index_remote_mirrors_on_project_id", using: :btree
create_table "repository_languages", id: false, force: :cascade do |t|
t.integer "project_id", null: false
t.integer "programming_language_id", null: false
t.float "share", null: false
end
add_index "repository_languages", ["project_id", "programming_language_id"], name: "index_repository_languages_on_project_and_languages_id", unique: true, using: :btree
create_table "resource_label_events", id: :bigserial, force: :cascade do |t| create_table "resource_label_events", id: :bigserial, force: :cascade do |t|
t.integer "action", null: false t.integer "action", null: false
t.integer "issue_id" t.integer "issue_id"
...@@ -2359,6 +2375,7 @@ ActiveRecord::Schema.define(version: 20180726172057) do ...@@ -2359,6 +2375,7 @@ ActiveRecord::Schema.define(version: 20180726172057) do
add_foreign_key "push_event_payloads", "events", name: "fk_36c74129da", on_delete: :cascade add_foreign_key "push_event_payloads", "events", name: "fk_36c74129da", on_delete: :cascade
add_foreign_key "releases", "projects", name: "fk_47fe2a0596", on_delete: :cascade add_foreign_key "releases", "projects", name: "fk_47fe2a0596", on_delete: :cascade
add_foreign_key "remote_mirrors", "projects", on_delete: :cascade add_foreign_key "remote_mirrors", "projects", on_delete: :cascade
add_foreign_key "repository_languages", "projects", on_delete: :cascade
add_foreign_key "resource_label_events", "issues", on_delete: :cascade add_foreign_key "resource_label_events", "issues", on_delete: :cascade
add_foreign_key "resource_label_events", "labels", on_delete: :nullify add_foreign_key "resource_label_events", "labels", on_delete: :nullify
add_foreign_key "resource_label_events", "merge_requests", on_delete: :cascade add_foreign_key "resource_label_events", "merge_requests", on_delete: :cascade
......
...@@ -46,6 +46,10 @@ class Feature ...@@ -46,6 +46,10 @@ class Feature
get(key).enabled?(thing) get(key).enabled?(thing)
end end
def disabled?(key, thing = nil)
!enabled?(key, thing)
end
def enable(key, thing = true) def enable(key, thing = true)
get(key).enable(thing) get(key).enable(thing)
end end
......
...@@ -107,6 +107,7 @@ excluded_attributes: ...@@ -107,6 +107,7 @@ excluded_attributes:
- :storage_version - :storage_version
- :remote_mirror_available_overridden - :remote_mirror_available_overridden
- :description_html - :description_html
- :repository_languages
snippets: snippets:
- :expired_at - :expired_at
merge_request_diff: merge_request_diff:
......
module Gitlab
class LanguageDetection
MAX_LANGUAGES = 5
def initialize(repository, repository_languages)
@repository = repository
@repository_languages = repository_languages
end
def languages
detection.keys
end
def language_color(name)
detection.dig(name, :color)
end
# Newly detected languages, returned in a structure accepted by
# Gitlab::Database.bulk_insert
def insertions(programming_languages)
lang_to_id = programming_languages.map { |p| [p.name, p.id] }.to_h
(languages - previous_language_names).map do |new_lang|
{
project_id: @repository.project.id,
share: detection[new_lang][:value],
programming_language_id: lang_to_id[new_lang]
}
end
end
# updates analyses which records only require updating of their share
def updates
to_update = @repository_languages.select do |lang|
detection.key?(lang.name) && detection[lang.name][:value] != lang.share
end
to_update.map do |lang|
{ programming_language_id: lang.programming_language_id, share: detection[lang.name][:value] }
end
end
# Returns the ids of the programming languages that do not occur in the detection
# as current repository languages
def deletions
@repository_languages.map do |repo_lang|
next if detection.key?(repo_lang.name)
repo_lang.programming_language_id
end.compact
end
private
def previous_language_names
@previous_language_names ||= @repository_languages.map(&:name)
end
def detection
@detection ||=
@repository
.languages
.first(MAX_LANGUAGES)
.map { |l| [l[:label], l] }
.to_h
end
end
end
FactoryBot.define do
factory :programming_language do
name 'Ruby'
color '#123456'
end
end
FactoryBot.define do
factory :repository_language do
project
programming_language
share 98.5
end
end
...@@ -297,6 +297,7 @@ project: ...@@ -297,6 +297,7 @@ project:
- settings - settings
- ci_cd_settings - ci_cd_settings
- import_export_upload - import_export_upload
- repository_languages
award_emoji: award_emoji:
- awardable - awardable
- user - user
......
require 'spec_helper'
describe Gitlab::LanguageDetection do
set(:project) { create(:project, :repository) }
set(:ruby) { create(:programming_language, name: 'Ruby') }
set(:haskell) { create(:programming_language, name: 'Haskell') }
let(:repository) { project.repository }
let(:detection) do
[{ value: 66.63, label: "Ruby", color: "#701516", highlight: "#701516" },
{ value: 12.96, label: "JavaScript", color: "#f1e05a", highlight: "#f1e05a" },
{ value: 7.9, label: "Elixir", color: "#e34c26", highlight: "#e34c26" },
{ value: 2.51, label: "CoffeeScript", color: "#244776", highlight: "#244776" },
{ value: 1.51, label: "Go", color: "#2a4776", highlight: "#244776" },
{ value: 1.1, label: "MepmepLang", color: "#2a4776", highlight: "#244776" }]
end
let(:repository_languages) do
[RepositoryLanguage.new(share: 10, programming_language: ruby)]
end
subject { described_class.new(repository, repository_languages) }
before do
allow(repository).to receive(:languages).and_return(detection)
end
describe '#languages' do
it 'returns the language names' do
expect(subject.languages).to eq(%w[Ruby JavaScript Elixir CoffeeScript Go])
end
end
describe '#insertions' do
let(:programming_languages) { [ruby, haskell] }
let(:detection) do
[{ value: 10, label: haskell.name, color: haskell.color }]
end
it 'only includes new languages' do
insertions = subject.insertions(programming_languages)
expect(insertions).not_to be_empty
expect(insertions.first[:project_id]).to be(project.id)
expect(insertions.first[:programming_language_id]).to be(haskell.id)
expect(insertions.first[:share]).to be(10)
end
end
describe '#updates' do
it 'updates the share of languages' do
first_update = subject.updates.first
expect(first_update).not_to be_nil
expect(first_update[:programming_language_id]).to eq(ruby.id)
expect(first_update[:share]).to eq(66.63)
end
it 'does not include languages to be removed' do
ids = subject.updates.map { |h| h[:programming_language_id] }
expect(ids).not_to include(haskell.id)
end
context 'when silent writes occur' do
let(:repository_languages) do
[RepositoryLanguage.new(share: 66.63, programming_language: ruby)]
end
it "doesn't include them in the result" do
expect(subject.updates).to be_empty
end
end
end
describe '#deletions' do
let(:repository_languages) do
[RepositoryLanguage.new(share: 10, programming_language: ruby),
RepositoryLanguage.new(share: 5, programming_language: haskell)]
end
it 'lists undetected languages' do
expect(subject.deletions).not_to be_empty
expect(subject.deletions).to include(haskell.id)
end
end
end
require 'spec_helper'
describe ProgrammingLanguage do
it { is_expected.to respond_to(:name) }
it { is_expected.to respond_to(:color) }
it { is_expected.to validate_presence_of(:name) }
it { is_expected.to allow_value("#000000").for(:color) }
it { is_expected.not_to allow_value("000000").for(:color) }
it { is_expected.not_to allow_value("#0z0000").for(:color) }
end
...@@ -69,6 +69,7 @@ describe Project do ...@@ -69,6 +69,7 @@ describe Project do
it { is_expected.to have_many(:pages_domains) } it { is_expected.to have_many(:pages_domains) }
it { is_expected.to have_many(:labels).class_name('ProjectLabel') } it { is_expected.to have_many(:labels).class_name('ProjectLabel') }
it { is_expected.to have_many(:users_star_projects) } it { is_expected.to have_many(:users_star_projects) }
it { is_expected.to have_many(:repository_languages) }
it { is_expected.to have_many(:environments) } it { is_expected.to have_many(:environments) }
it { is_expected.to have_many(:deployments) } it { is_expected.to have_many(:deployments) }
it { is_expected.to have_many(:todos) } it { is_expected.to have_many(:todos) }
......
require 'spec_helper'
describe RepositoryLanguage do
let(:repository_language) { build(:repository_language) }
describe 'associations' do
it { is_expected.to belong_to(:project) }
it { is_expected.to belong_to(:programming_language) }
end
describe 'validations' do
it { is_expected.to allow_value(0).for(:share) }
it { is_expected.to allow_value(100.0).for(:share) }
it { is_expected.not_to allow_value(100.1).for(:share) }
end
end
...@@ -3,8 +3,8 @@ require 'spec_helper' ...@@ -3,8 +3,8 @@ require 'spec_helper'
describe GitPushService, services: true do describe GitPushService, services: true do
include RepoHelpers include RepoHelpers
let(:user) { create(:user) } set(:user) { create(:user) }
let(:project) { create(:project, :repository) } set(:project) { create(:project, :repository) }
let(:blankrev) { Gitlab::Git::BLANK_SHA } let(:blankrev) { Gitlab::Git::BLANK_SHA }
let(:oldrev) { sample_commit.parent_id } let(:oldrev) { sample_commit.parent_id }
let(:newrev) { sample_commit.id } let(:newrev) { sample_commit.id }
......
require 'spec_helper'
describe Projects::DetectRepositoryLanguagesService, :clean_gitlab_redis_shared_state do
set(:project) { create(:project, :repository) }
subject { described_class.new(project, project.owner) }
before do
allow(Feature).to receive(:disabled?).and_return(false)
end
describe '#execute' do
context 'without previous detection' do
it 'inserts new programming languages in the database' do
subject.execute
expect(ProgrammingLanguage.exists?(name: 'Ruby')).to be(true)
expect(ProgrammingLanguage.count).to be(4)
end
it 'inserts the repository langauges' do
names = subject.execute.map(&:name)
expect(names).to eq(%w[Ruby JavaScript HTML CoffeeScript])
end
end
context 'with a previous detection' do
before do
subject.execute
allow(project.repository).to receive(:languages).and_return(
[{ value: 99.63, label: "Ruby", color: "#701516", highlight: "#701516" },
{ value: 0.3, label: "D", color: "#701516", highlight: "#701516" }]
)
end
it 'updates the repository languages' do
repository_languages = subject.execute.map(&:name)
expect(repository_languages).to eq(%w[Ruby D])
end
end
context 'when no repository exists' do
set(:project) { create(:project) }
it 'has no languages' do
expect(subject.execute).to be_empty
expect(project.repository_languages).to be_empty
end
end
end
end
require 'spec_helper'
describe DetectRepositoryLanguagesWorker do
set(:project) { create(:project) }
let(:user) { project.owner }
subject { described_class.new }
describe '#perform' do
it 'calls de DetectRepositoryLanguages service' do
service = double
allow(::Projects::DetectRepositoryLanguagesService).to receive(:new).and_return(service)
expect(service).to receive(:execute)
subject.perform(project.id, user.id)
end
context 'when invalid ids are used' do
it 'does not raise when the project could not be found' do
expect do
subject.perform(-1, user.id)
end.not_to raise_error
end
it 'does not raise when the user could not be found' do
expect do
subject.perform(project.id, -1)
end.not_to raise_error
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment