# -*- ruby encoding: utf-8 -*-
module Diff; end unless defined? Diff
# = Diff::LCS 1.2.5
#
# Computes "intelligent" differences between two sequenced Enumerables. This
# is an implementation of the McIlroy-Hunt "diff" algorithm for Enumerable
# objects that include Diffable.
#
# Based on Mario I. Wolczko's Smalltalk version (1.2, 1993) and Ned Konz's
# Perl version (Algorithm::Diff 1.15).
#
# == Synopsis
# require 'diff/lcs'
#
# seq1 = %w(a b c e h j l m n p)
# seq2 = %w(b c d e f j k l m r s t)
#
# lcs = Diff::LCS.lcs(seq1, seq2)
# diffs = Diff::LCS.diff(seq1, seq2)
# sdiff = Diff::LCS.sdiff(seq1, seq2)
# seq = Diff::LCS.traverse_sequences(seq1, seq2, callback_obj)
# bal = Diff::LCS.traverse_balanced(seq1, seq2, callback_obj)
# seq2 == Diff::LCS.patch(seq1, diffs)
# seq2 == Diff::LCS.patch!(seq1, diffs)
# seq1 == Diff::LCS.unpatch(seq2, diffs)
# seq1 == Diff::LCS.unpatch!(seq2, diffs)
# seq2 == Diff::LCS.patch(seq1, sdiff)
# seq2 == Diff::LCS.patch!(seq1, sdiff)
# seq1 == Diff::LCS.unpatch(seq2, sdiff)
# seq1 == Diff::LCS.unpatch!(seq2, sdiff)
#
# Alternatively, objects can be extended with Diff::LCS:
#
# seq1.extend(Diff::LCS)
# lcs = seq1.lcs(seq2)
# diffs = seq1.diff(seq2)
# sdiff = seq1.sdiff(seq2)
# seq = seq1.traverse_sequences(seq2, callback_obj)
# bal = seq1.traverse_balanced(seq2, callback_obj)
# seq2 == seq1.patch(diffs)
# seq2 == seq1.patch!(diffs)
# seq1 == seq2.unpatch(diffs)
# seq1 == seq2.unpatch!(diffs)
# seq2 == seq1.patch(sdiff)
# seq2 == seq1.patch!(sdiff)
# seq1 == seq2.unpatch(sdiff)
# seq1 == seq2.unpatch!(sdiff)
#
# Default extensions are provided for Array and String objects through the
# use of 'diff/lcs/array' and 'diff/lcs/string'.
#
# == Introduction (by Mark-Jason Dominus)
#
# <em>The following text is from the Perl documentation. The only changes
# have been to make the text appear better in Rdoc</em>.
#
# I once read an article written by the authors of +diff+; they said that
# they hard worked very hard on the algorithm until they found the right
# one.
#
# I think what they ended up using (and I hope someone will correct me,
# because I am not very confident about this) was the `longest common
# subsequence' method. In the LCS problem, you have two sequences of items:
#
# a b c d f g h j q z
# a b c d e f g i j k r x y z
#
# and you want to find the longest sequence of items that is present in both
# original sequences in the same order. That is, you want to find a new
# sequence *S* which can be obtained from the first sequence by deleting
# some items, and from the second sequence by deleting other items. You also
# want *S* to be as long as possible. In this case *S* is:
#
# a b c d f g j z
#
# From there it's only a small step to get diff-like output:
#
# e h i k q r x y
# + - + + - + + +
#
# This module solves the LCS problem. It also includes a canned function to
# generate +diff+-like output.
#
# It might seem from the example above that the LCS of two sequences is
# always pretty obvious, but that's not always the case, especially when the
# two sequences have many repeated elements. For example, consider
#
# a x b y c z p d q
# a b c a x b y c z
#
# A naive approach might start by matching up the +a+ and +b+ that appear at
# the beginning of each sequence, like this:
#
# a x b y c z p d q
# a b c a b y c z
#
# This finds the common subsequence +a b c z+. But actually, the LCS is +a x
# b y c z+:
#
# a x b y c z p d q
# a b c a x b y c z
#
# == Author
# This version is by Austin Ziegler <austin@rubyforge.org>.
#
# It is based on the Perl Algorithm::Diff (1.15) by Ned Konz , copyright
# © 2000–2002 and the Smalltalk diff version by Mario I.
# Wolczko, copyright © 1993. Documentation includes work by
# Mark-Jason Dominus.
#
# == Licence
# Copyright © 2004–2013 Austin Ziegler
# This program is free software; you can redistribute it and/or modify it
# under the same terms as Ruby, or alternatively under the Perl Artistic
# licence.
#
# == Credits
# Much of the documentation is taken directly from the Perl Algorithm::Diff
# implementation and was written originally by Mark-Jason Dominus and later
# by Ned Konz. The basic Ruby implementation was re-ported from the
# Smalltalk implementation, available at
# ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st
#
# #sdiff and #traverse_balanced were written for the Perl version by Mike
# Schilli <m@perlmeister.com>.
#
# "The algorithm is described in <em>A Fast Algorithm for Computing Longest
# Common Subsequences</em>, CACM, vol.20, no.5, pp.350-353, May
# 1977, with a few minor improvements to improve the speed."
module Diff::LCS
VERSION = '1.2.5'
end
require 'diff/lcs/callbacks'
require 'diff/lcs/internals'
module Diff::LCS
# Returns an Array containing the longest common subsequence(s) between
# +self+ and +other+. See Diff::LCS#LCS.
#
# lcs = seq1.lcs(seq2)
def lcs(other, &block) #:yields self[i] if there are matched subsequences:
Diff::LCS.lcs(self, other, &block)
end
# Returns the difference set between +self+ and +other+. See
# Diff::LCS#diff.
def diff(other, callbacks = nil, &block)
Diff::LCS.diff(self, other, callbacks, &block)
end
# Returns the balanced ("side-by-side") difference set between +self+ and
# +other+. See Diff::LCS#sdiff.
def sdiff(other, callbacks = nil, &block)
Diff::LCS.sdiff(self, other, callbacks, &block)
end
# Traverses the discovered longest common subsequences between +self+ and
# +other+. See Diff::LCS#traverse_sequences.
def traverse_sequences(other, callbacks = nil, &block)
traverse_sequences(self, other, callbacks ||
Diff::LCS.YieldingCallbacks, &block)
end
# Traverses the discovered longest common subsequences between +self+ and
# +other+ using the alternate, balanced algorithm. See
# Diff::LCS#traverse_balanced.
def traverse_balanced(other, callbacks = nil, &block)
traverse_balanced(self, other, callbacks ||
Diff::LCS.YieldingCallbacks, &block)
end
# Attempts to patch +self+ with the provided +patchset+. A new sequence
# based on +self+ and the +patchset+ will be created. See Diff::LCS#patch.
# Attempts to autodiscover the direction of the patch.
def patch(patchset)
Diff::LCS.patch(self, patchset)
end
alias_method :unpatch, :patch
# Attempts to patch +self+ with the provided +patchset+. A new sequence
# based on +self+ and the +patchset+ will be created. See Diff::LCS#patch.
# Does no patch direction autodiscovery.
def patch!(patchset)
Diff::LCS.patch!(self, patchset)
end
# Attempts to unpatch +self+ with the provided +patchset+. A new sequence
# based on +self+ and the +patchset+ will be created. See Diff::LCS#unpatch.
# Does no patch direction autodiscovery.
def unpatch!(patchset)
Diff::LCS.unpatch!(self, patchset)
end
# Attempts to patch +self+ with the provided +patchset+, using #patch!. If
# the sequence this is used on supports #replace, the value of +self+ will
# be replaced. See Diff::LCS#patch. Does no patch direction autodiscovery.
def patch_me(patchset)
if respond_to? :replace
replace(patch!(patchset))
else
patch!(patchset)
end
end
# Attempts to unpatch +self+ with the provided +patchset+, using
# #unpatch!. If the sequence this is used on supports #replace, the value
# of +self+ will be replaced. See Diff::LCS#unpatch. Does no patch direction
# autodiscovery.
def unpatch_me(patchset)
if respond_to? :replace
replace(unpatch!(patchset))
else
unpatch!(patchset)
end
end
end
class << Diff::LCS
def lcs(seq1, seq2, &block) #:yields seq1[i] for each matched:
matches = Diff::LCS::Internals.lcs(seq1, seq2)
ret = []
string = seq1.kind_of? String
matches.each_with_index do |e, i|
unless matches[i].nil?
v = string ? seq1[i, 1] : seq1[i]
v = block[v] if block
ret << v
end
end
ret
end
alias_method :LCS, :lcs
# #diff computes the smallest set of additions and deletions necessary to
# turn the first sequence into the second, and returns a description of
# these changes.
#
# See Diff::LCS::DiffCallbacks for the default behaviour. An alternate
# behaviour may be implemented with Diff::LCS::ContextDiffCallbacks. If a
# Class argument is provided for +callbacks+, #diff will attempt to
# initialise it. If the +callbacks+ object (possibly initialised) responds
# to #finish, it will be called.
def diff(seq1, seq2, callbacks = nil, &block) # :yields diff changes:
diff_traversal(:diff, seq1, seq2, callbacks || Diff::LCS::DiffCallbacks,
&block)
end
# #sdiff computes all necessary components to show two sequences and their
# minimized differences side by side, just like the Unix utility
# <em>sdiff</em> does:
#
# old < -
# same same
# before | after
# - > new
#
# See Diff::LCS::SDiffCallbacks for the default behaviour. An alternate
# behaviour may be implemented with Diff::LCS::ContextDiffCallbacks. If a
# Class argument is provided for +callbacks+, #diff will attempt to
# initialise it. If the +callbacks+ object (possibly initialised) responds
# to #finish, it will be called.
def sdiff(seq1, seq2, callbacks = nil, &block) #:yields diff changes:
diff_traversal(:sdiff, seq1, seq2, callbacks || Diff::LCS::SDiffCallbacks,
&block)
end
# #traverse_sequences is the most general facility provided by this
# module; #diff and #lcs are implemented as calls to it.
#
# The arguments to #traverse_sequences are the two sequences to traverse,
# and a callback object, like this:
#
# traverse_sequences(seq1, seq2, Diff::LCS::ContextDiffCallbacks.new)
#
# == Callback Methods
#
# Optional callback methods are <em>emphasized</em>.
#
# callbacks#match:: Called when +a+ and +b+ are pointing to
# common elements in +A+ and +B+.
# callbacks#discard_a:: Called when +a+ is pointing to an
# element not in +B+.
# callbacks#discard_b:: Called when +b+ is pointing to an
# element not in +A+.
# <em>callbacks#finished_a</em>:: Called when +a+ has reached the end of
# sequence +A+.
# <em>callbacks#finished_b</em>:: Called when +b+ has reached the end of
# sequence +B+.
#
# == Algorithm
#
# a---+
# v
# A = a b c e h j l m n p
# B = b c d e f j k l m r s t
# ^
# b---+
#
# If there are two arrows (+a+ and +b+) pointing to elements of sequences
# +A+ and +B+, the arrows will initially point to the first elements of
# their respective sequences. #traverse_sequences will advance the arrows
# through the sequences one element at a time, calling a method on the
# user-specified callback object before each advance. It will advance the
# arrows in such a way that if there are elements <tt>A[i]</tt> and
# <tt>B[j]</tt> which are both equal and part of the longest common
# subsequence, there will be some moment during the execution of
# #traverse_sequences when arrow +a+ is pointing to <tt>A[i]</tt> and
# arrow +b+ is pointing to <tt>B[j]</tt>. When this happens,
# #traverse_sequences will call <tt>callbacks#match</tt> and then it will
# advance both arrows.
#
# Otherwise, one of the arrows is pointing to an element of its sequence
# that is not part of the longest common subsequence. #traverse_sequences
# will advance that arrow and will call <tt>callbacks#discard_a</tt> or
# <tt>callbacks#discard_b</tt>, depending on which arrow it advanced. If
# both arrows point to elements that are not part of the longest common
# subsequence, then #traverse_sequences will advance one of them and call
# the appropriate callback, but it is not specified which it will call.
#
# The methods for <tt>callbacks#match</tt>, <tt>callbacks#discard_a</tt>,
# and <tt>callbacks#discard_b</tt> are invoked with an event comprising
# the action ("=", "+", or "-", respectively), the indicies +i+ and +j+,
# and the elements <tt>A[i]</tt> and <tt>B[j]</tt>. Return values are
# discarded by #traverse_sequences.
#
# === End of Sequences
#
# If arrow +a+ reaches the end of its sequence before arrow +b+ does,
# #traverse_sequence will try to call <tt>callbacks#finished_a</tt> with
# the last index and element of +A+ (<tt>A[-1]</tt>) and the current index
# and element of +B+ (<tt>B[j]</tt>). If <tt>callbacks#finished_a</tt>
# does not exist, then <tt>callbacks#discard_b</tt> will be called on each
# element of +B+ until the end of the sequence is reached (the call will
# be done with <tt>A[-1]</tt> and <tt>B[j]</tt> for each element).
#
# If +b+ reaches the end of +B+ before +a+ reaches the end of +A+,
# <tt>callbacks#finished_b</tt> will be called with the current index and
# element of +A+ (<tt>A[i]</tt>) and the last index and element of +B+
# (<tt>A[-1]</tt>). Again, if <tt>callbacks#finished_b</tt> does not exist
# on the callback object, then <tt>callbacks#discard_a</tt> will be called
# on each element of +A+ until the end of the sequence is reached
# (<tt>A[i]</tt> and <tt>B[-1]</tt>).
#
Loading ...