Commit 7bcf737a authored by Eduardo Bonet's avatar Eduardo Bonet

Refactoring .ipynb custom diffs

Refactors the code used to generate the cleaner Jupyter
Notebook diffs into a single place, making it easier to
improve functionality in the future.
parent fb316e7c
# frozen_string_literal: true
module CustomDiffHelper
class << self
def preprocess_before_diff(path, old_blob, new_blob)
return unless path.ends_with? '.ipynb'
transformed_diff = IpynbDiff.diff(old_blob&.data, new_blob&.data,
diff_opts: { context: 5, include_diff_info: true },
transform_options: { cell_decorator: :percent },
raise_if_invalid_notebook: true)
new_diff = strip_diff_frontmatter(transformed_diff)
transformed_for_diff(new_blob, old_blob) if new_diff
Gitlab::AppLogger.info({ message: new_diff ? 'IPYNB_DIFF_GENERATED' : 'IPYNB_DIFF_NIL' })
new_diff
rescue IpynbDiff::InvalidNotebookError => e
Gitlab::ErrorTracking.log_exception(e)
nil
end
def transformed_blob_language(blob)
'md' if transformed_for_diff?(blob)
end
def transformed_blob_data(blob)
if transformed_for_diff?(blob)
IpynbDiff.transform(blob.data,
raise_errors: true,
options: { include_metadata: false, cell_decorator: :percent })
end
end
def strip_diff_frontmatter(diff_content)
diff_content.scan(/.*\n/)[2..-1]&.join('') if diff_content.present?
end
def blobs_with_transformed_diffs
@blobs_with_transformed_diffs ||= {}
end
def transformed_for_diff?(blob)
blobs_with_transformed_diffs[blob]
end
def transformed_for_diff(*blobs)
blobs.each do |b|
blobs_with_transformed_diffs[b] = true if b
end
end
end
end
......@@ -15,19 +15,8 @@ class BlobPresenter < Gitlab::View::Presenter::Delegated
Gitlab::Highlight.highlight(
blob.path,
limited_blob_data(to: to),
language: language,
plain: plain
)
end
def highlight_transformed(plain: nil)
load_all_blob_data
Gitlab::Highlight.highlight(
blob.path,
transformed_blob_data,
language: transformed_blob_language,
CustomDiffHelper.transformed_blob_data(blob) || limited_blob_data(to: to),
language: CustomDiffHelper.transformed_blob_language(blob) || language,
plain: plain
)
end
......@@ -129,21 +118,4 @@ class BlobPresenter < Gitlab::View::Presenter::Delegated
def language
blob.language_from_gitattributes
end
def transformed_blob_language
@transformed_blob_language ||= blob.path.ends_with?('.ipynb') ? 'md' : language
end
def transformed_blob_data
@transformed_blob ||= if blob.path.ends_with?('.ipynb') && blob.transformed_for_diff
IpynbDiff.transform(blob.data,
raise_errors: true,
options: { include_metadata: false, cell_decorator: :percent })
end
@transformed_blob ||= blob.data
rescue IpynbDiff::InvalidNotebookError => e
Gitlab::ErrorTracking.log_exception(e)
blob.data
end
end
......@@ -44,7 +44,11 @@ module Gitlab
new_blob_lazy
old_blob_lazy
preprocess_before_diff(diff) if Feature.enabled?(:jupyter_clean_diffs, repository.project, default_enabled: true)
diff.diff = CustomDiffHelper.preprocess_before_diff(diff.new_path, old_blob_lazy, new_blob_lazy) || diff.diff if use_custom_diff?
end
def use_custom_diff?
Feature.enabled?(:jupyter_clean_diffs, repository.project, default_enabled: true)
end
def position(position_marker, position_type: :text)
......@@ -450,33 +454,6 @@ module Gitlab
find_renderable_viewer_class(classes)
end
def preprocess_before_diff(diff)
return unless diff.new_path.ends_with? '.ipynb'
from = old_blob_lazy&.data
to = new_blob_lazy&.data
transformed_diff = IpynbDiff.diff(from, to,
diff_opts: { context: 5, include_diff_info: true },
transform_options: { cell_decorator: :percent },
raise_if_invalid_notebook: true)
new_diff = strip_diff_frontmatter(transformed_diff)
if new_diff
diff.diff = new_diff
new_blob_lazy.transformed_for_diff = true if new_blob_lazy
old_blob_lazy.transformed_for_diff = true if old_blob_lazy
end
Gitlab::AppLogger.info({ message: new_diff ? 'IPYNB_DIFF_GENERATED' : 'IPYNB_DIFF_NIL' })
rescue IpynbDiff::InvalidNotebookError => e
Gitlab::ErrorTracking.log_exception(e)
end
def strip_diff_frontmatter(diff_content)
diff_content.scan(/.*\n/)[2..-1]&.join('') if diff_content.present?
end
def alternate_viewer_class
return unless viewer.instance_of?(DiffViewer::Renamed)
......
......@@ -153,8 +153,6 @@ module Gitlab
blob.load_all_data!
return blob.present.highlight_transformed.lines if Feature.enabled?(:jupyter_clean_diffs, @project, default_enabled: true)
blob.present.highlight.lines
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe CustomDiffHelper do
include RepoHelpers
let(:project) { create(:project, :repository) }
let(:repository) { project.repository }
let(:ipynb_blob) { repository.blob_at('f6b7a707', 'files/ipython/markdown-table.ipynb') }
let(:blob) { repository.blob_at('HEAD', 'files/ruby/regex.rb') }
describe "#preprocess_before_diff" do
context 'for ipynb files' do
it 'transforms the diff' do
expect(CustomDiffHelper.preprocess_before_diff(ipynb_blob.path, nil, ipynb_blob)).not_to include('cells')
end
it 'adds the blob to the list of transformed blobs' do
CustomDiffHelper.preprocess_before_diff(ipynb_blob.path, nil, ipynb_blob)
expect(CustomDiffHelper.transformed_for_diff?(ipynb_blob)).to be_truthy
end
end
context 'for other files' do
it 'returns nil' do
expect(CustomDiffHelper.preprocess_before_diff(blob.path, nil, blob)).to be_nil
end
it 'does not add the blob to the list of transformed blobs' do
CustomDiffHelper.preprocess_before_diff(blob.path, nil, blob)
expect(CustomDiffHelper.transformed_for_diff?(blob)).to be_falsey
end
end
end
describe "#transformed_blob_data" do
it 'transforms blob data if file was processed' do
CustomDiffHelper.preprocess_before_diff(ipynb_blob.path, nil, ipynb_blob)
expect(CustomDiffHelper.transformed_blob_data(ipynb_blob)).not_to include('cells')
end
it 'transforms blob data if file was not processed' do
expect(CustomDiffHelper.transformed_blob_data(ipynb_blob)).to be_nil
end
end
describe "#transformed_blob_language" do
it 'is when file was preprocessed' do
CustomDiffHelper.preprocess_before_diff(ipynb_blob.path, nil, ipynb_blob)
expect(CustomDiffHelper.transformed_blob_language(ipynb_blob)).to eq('md')
end
it 'is nil for .ipynb, but blob when it was not preprocessed' do
expect(CustomDiffHelper.transformed_blob_language(ipynb_blob)).to be_nil
end
end
end
......@@ -155,27 +155,25 @@ RSpec.describe BlobPresenter do
presenter.highlight
end
end
end
describe '#highlight_transformed' do
context 'when blob is ipynb' do
let(:blob) { repository.blob_at('f6b7a707', 'files/ipython/markdown-table.ipynb') }
let(:git_blob) { blob.__getobj__ }
before do
allow(git_blob).to receive(:transformed_for_diff).and_return(true)
allow(CustomDiffHelper).to receive(:transformed_for_diff?).and_return(true)
end
it 'uses md as the transformed language' do
expect(Gitlab::Highlight).to receive(:highlight).with('files/ipython/markdown-table.ipynb', anything, plain: nil, language: 'md')
presenter.highlight_transformed
presenter.highlight
end
it 'transforms the blob' do
expect(Gitlab::Highlight).to receive(:highlight).with('files/ipython/markdown-table.ipynb', include("%%"), plain: nil, language: 'md')
presenter.highlight_transformed
presenter.highlight
end
end
......@@ -193,7 +191,7 @@ RSpec.describe BlobPresenter do
it 'does not transform the file' do
expect(Gitlab::Highlight).to receive(:highlight).with('files/ruby/regex.rb', git_blob.data, plain: nil, language: 'ruby')
presenter.highlight_transformed
presenter.highlight
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment