WebSVN – MLAB – /WebSVN/include/diff_util.php Rev 4987 and /WebSVN/include/diff_util.php Rev 4988 – Path Comparison

 /WebSVN/include/diff_util.php
 ,0 → 1,364
+<?php
+// WebSVN - Subversion repository viewing via the web using PHP
+// Copyright (C) 2004-2006 Tim Armes
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+// --
+//
+// diff_util.php
+//
+// help diff_inc.php to make sensible changes from added and deleted diff lines
+// These lines are automatically paired and also inline diff is performed to show
+// insertions/deletions on one line. diff_inc.php must have all Horde_Text_Diff
+// requirements met (include_once statements).
+// Interface for diffing function
+class LineDiffInterface {
+        // similarity 1 means that strings are very close to each other
+        // 0 means totally different
+        function lineSimilarity($text1, $text2) {
+                assert(false);
+        }
+        // return array($left, $right) annotated with <ins> and <del>
+        function inlineDiff($text1, $highlighted1, $text2, $highlighted2, $highlighted) {
+                assert(false);
+        }
+}
+// Default line diffing function
+class LineDiff extends LineDiffInterface {
+        private bool $ignoreWhitespace;
+        function __construct($ignoreWhitespace) {
+                $this->ignoreWhitespace = $ignoreWhitespace;
+        }
+        // {{{ levenshtein2
+        // levenshtein edit distance, on small strings use php function
+        // on large strings approximate distance using words
+        // computed by dynamic programming
+        function levenshtein2($str1, $str2) {
+                if (strlen($str1) < 255 && strlen($str2) < 255) {
+                        return levenshtein($str1, $str2);
+                }
+                $l1 = explode(' ', $str1);
+                $l2 = explode(' ', $str2);
+                $n = count($l1);
+                $m = count($l2);
+                $d = array_fill(0, $n + 1, array_fill(0, $m + 1, 0));
+                for ($i = 1; $i < $n + 1; $i++) {
+                        $d[$i][0] = $i;
+                }
+                for ($j = 1; $j < $m + 1; $j++) {
+                        $d[0][$j] = $j;
+                }
+                for ($i = 1; $i < $n + 1; $i++) {
+                        for ($j = 1; $j < $m + 1; $j++) {
+                                $c = ($l1[$i - 1] == $l2[$j - 1]) ? 0 : strlen($l1[$i - 1]) + strlen($l2[$j - 1]);
+                                $d[$i][$j] = min($d[$i - 1][$j] + 1, $d[$i][$j - 1] + 1, $d[$i - 1][$j - 1] + $c);
+                        }
+                }
+                return $d[$n][$m];
+        }
+        // }}}
+        // {{{ lineSimilarity
+        function lineSimilarity($text1, $text2) {
+                $distance = $this->levenshtein2($text1, $text2);
+                return max(0.0, 1.0 - $distance / (strlen($text1) + strlen($text2) + 4));
+        }
+        // }}}
+        // {{{  tokenize whole line into words
+        // note that separators are returned as tokens of length 1
+        // and if $ignoreWhitespace is true, consecutive whitespaces are returned as one token
+        function tokenize($string, $highlighted, $ignoreWhitespace) {
+                $html = array('<' => '>', '&' => ';');
+                $whitespaces = array("\t","\n","\r",' ');
+                $separators = array('.','-','+','*','/','<','>','?','(',')','&','/','{','}','[',']',':',';');
+                $data = array();
+                $segment = '';
+                $segmentIsWhitespace = true;
+                $count = strlen($string);
+                for ($i = 0; $i < $count; $i++) {
+                        $c = $string[$i];
+                        if ($highlighted && array_key_exists($c, $html)) {
+                                if ($segment != '') {
+                                        $data[] = $segment;
+                                }
+                                // consider html tags and entities as a single token
+                                $endchar = $html[$c];
+                                $segment = $c;
+                                do {
+                                        $i++;
+                                        $c = $string[$i];
+                                        $segment .= $c;
+                                } while ($c != $endchar && $i < $count - 1);
+                                $data[] = $segment;
+                                $segment = '';
+                                $segmentIsWhitespace = false;
+                        } else if (in_array($c, $separators) || (!$ignoreWhitespace && in_array($c, $whitespaces))) {
+                                // if it is separator or whitespace and we do not consider consecutive whitespaces
+                                if ($segment != '') {
+                                        $data[] = $segment;
+                                }
+                                $data[] = $c;
+                                $segment = '';
+                                $segmentIsWhitespace = true;
+                        } else if (in_array($c, $whitespaces)) {
+                                // if it is whitespace and we consider consecutive whitespaces as one token
+                                if (!$segmentIsWhitespace) {
+                                        $data[] = $segment;
+                                        $segment = '';
+                                        $segmentIsWhitespace = true;
+                                }
+                                $segment .= $c;
+                        } else {
+                                // no separator or whitespace
+                                if ($segmentIsWhitespace && $segment != '') {
+                                        $data[] = $segment;
+                                        $segment = '';
+                                }
+                                $segment .= $c;
+                                $segmentIsWhitespace = false;
+                        }
+                }
+                if ($segment != '') {
+                        $data[] = $segment;
+                }
+                return $data;
+        }
+        // }}}
+        // {{{ lineDiff
+        function inlineDiff($text1, $highlighted1, $text2, $highlighted2, $highlighted) {
+                $whitespaces = array(' ', "\t", "\n", "\r");
+                $do_diff = true;
+                if ($text1 == '' || $text2 == '') {
+                        $do_diff = false;
+                }
+                if ($this->ignoreWhitespace && (str_replace($whitespaces, array(), $text1) == str_replace($whitespaces, array(), $text2))) {
+                        $do_diff = false;
+                }
+                // Exit gracefully if loading of Horde_Text_Diff failed
+                if (!class_exists('Horde_Text_Diff') || !class_exists('Horde_Text_Diff_Mapped')) {
+                        $do_diff = false;
+                }
+                // Return highlighted lines without doing inline diff
+                if (!$do_diff) {
+                        return array($highlighted1, $highlighted2);
+                }
+                $tokens1 = $this->tokenize($highlighted1, $highlighted, $this->ignoreWhitespace);
+                $tokens2 = $this->tokenize($highlighted2, $highlighted, $this->ignoreWhitespace);
+                if (!$this->ignoreWhitespace) {
+                        $diff = new Horde_Text_Diff('Native', array($tokens1, $tokens2));
+                } else {
+                        // we need to create mapped parts for MappedDiff
+                        $mapped1 = array();
+                        foreach ($tokens1 as $token) {
+                                $mapped1[] = str_replace($whitespaces, array(), $token);
+                        }
+                        $mapped2 = array();
+                        foreach ($tokens2 as $token) {
+                                $mapped2[] = str_replace($whitespaces, array(), $token);
+                        }
+                        $diff = new Horde_Text_Diff_Mapped('Native', array($tokens1, $tokens2, $mapped1, $mapped2));
+                }
+                // now, get the diff and annotate text
+                $edits = $diff->getDiff();
+                $line1 = '';
+                $line2 = '';
+                foreach ($edits as $edit) {
+                        if ($edit instanceof Horde_Text_Diff_Op_Copy) {
+                                $line1 .= implode('', $edit->orig);
+                                $line2 .= implode('', $edit->final);
+                        } else if ($edit instanceof Horde_Text_Diff_Op_Delete) {
+                                $line1 .= '<del>'.implode('', $edit->orig).'</del>';
+                        } else if ($edit instanceof Horde_Text_Diff_Op_Add) {
+                                $line2 .= '<ins>'.implode('', $edit->final).'</ins>';
+                        } else if ($edit instanceof Horde_Text_Diff_Op_Change) {
+                                $line1 .= '<del>'.implode('', $edit->orig).'</del>';
+                                $line2 .= '<ins>'.implode('', $edit->final).'</ins>';
+                        } else {
+                                assert(false);
+                        }
+                }
+                return array($line1, $line2);
+        }
+        // }}}
+}
+// Class for computing sensibly added/deleted block of lines.
+class SensibleLineChanges {
+        var $_added = array();
+        var $_deleted = array();
+        var $_lineDiff = null;
+        function __construct($lineDiff) {
+                $this->_lineDiff = $lineDiff;
+        }
+        function addDeletedLine($text, $highlighted_text, $lineno) {
+                $this->_deleted[] = array($text, $highlighted_text, $lineno);
+        }
+        function addAddedLine($text, $highlighted_text, $lineno) {
+                $this->_added[] = array($text, $highlighted_text, $lineno);
+        }
+        // this function computes simple match - first min(deleted,added) lines are marked as changed
+        // it is intended to be run instead of _computeBestMatching if the diff is too big
+        function _computeFastMatching($n, $m) {
+                $result = array();
+                $q = 0;
+                while ($q < $n && $q < $m) {
+                        $result[] = array($this->_deleted[$q], $this->_added[$q]);
+                        $q++;
+                }
+                while ($q < $n) {
+                        $result[] = array($this->_deleted[$q], null);
+                        $q++;
+                }
+                while ($q < $m) {
+                        $result[] = array(null, $this->_added[$q]);
+                        $q++;
+                }
+                return $result;
+        }
+        // {{{ _computeBestMatching
+        // dynamically compute best matching
+        // note that this is O(n*m) * O(line similarity)
+        function _computeBestMatching() {
+                $n = count($this->_deleted);
+                $m = count($this->_added);
+                // if the computation will be slow, just run fast algorithm
+                if ($n * $m > 10000) {
+                        return $this->_computeFastMatching($n, $m);
+                }
+                // dyn[$i][$j] holds best sum of similarities we can obtain if we match
+                // first $i deleted lines and first $j added lines
+                $dyn = array_fill(0, $n + 1, array_fill(0, $m + 1, 0.0));
+                // backlinks, so we can reconstruct best layout easily
+                $back = array_fill(0, $n + 1, array_fill(0, $m + 1, -1));
+                // if there is no similarity, prefer adding/deleting lines
+                $value_del = 0.1;
+                $value_add = 0.1;
+                // initialize arrays
+                for ($i = 1; $i <= $n; $i++) {
+                        $back[$i][0] = 0;
+                        $dyn[$i][0] = $value_del * $i;
+                }
+                for ($j = 1; $j <= $m; $j++) {
+                        $back[0][$j] = 1;
+                        $dyn[0][$j] = $value_add * $j;
+                }
+                // main dynamic programming
+                for ($i = 1; $i <= $n; $i++) {
+                        for ($j = 1; $j <= $m; $j++) {
+                                $best = - 1.0;
+                                $b = -1;
+                                if ($dyn[$i - 1][$j] + $value_del >= $best) {
+                                        $b = 0;
+                                        $best = $dyn[$i - 1][$j] + $value_del;
+                                }
+                                if ($dyn[$i][$j - 1] + $value_add >= $best) {
+                                        $b = 1;
+                                        $best = $dyn[$i][$j - 1] + $value_add;
+                                }
+                                $sim = $this->_lineDiff->lineSimilarity($this->_deleted[$i - 1][0], $this->_added[$j - 1][0]);
+                                if ($dyn[$i - 1][$j - 1] + $sim >= $best) {
+                                        $b = 2;
+                                        $best = $dyn[$i - 1][$j - 1] + $sim;
+                                }
+                                $back[$i][$j] = $b;
+                                $dyn[$i][$j] = $best;
+                        }
+                }
+                // compute layout for best result
+                $i = $n;
+                $j = $m;
+                $result = array();
+                while ($i + $j >= 1) {
+                        switch($back[$i][$j]) {
+                                case 2: array_push($result, array($this->_deleted[$i - 1], $this->_added[$j - 1]));
+                                        $i--;
+                                        $j--;
+                                        break;
+                                case 1: array_push($result, array(null, $this->_added[$j - 1]));
+                                        $j--;
+                                        break;
+                                case 0: array_push($result, array($this->_deleted[$i - 1], null));
+                                        $i--;
+                                        break;
+                                default:
+                                        assert(false);
+                        }
+                }
+                return array_reverse($result);
+        }
+        // }}}
+        // {{{ addChangesToListing
+        // add computed changes to the listing
+        function addChangesToListing(&$listingHelper, $highlighted) {
+                $matching = $this->_computeBestMatching();
+                foreach ($matching as $change) {
+                        if ($change[1] == null) {
+                                // deleted -- preserve original highlighted text
+                                $listingHelper->addDeletedLine($change[0][1], $change[0][2]);
+                        } else if ($change[0] == null) {
+                                // added   -- preserve original highlighted text
+                                $listingHelper->addAddedLine($change[1][1], $change[1][2]);
+                        } else {
+                                // this is fully changed line, make inline diff
+                                $diff = $this->_lineDiff->inlineDiff($change[0][0], $change[0][1], $change[1][0], $change[1][1], $highlighted);
+                                $listingHelper->addChangedLine($diff[0], $change[0][2], $diff[1], $change[1][2]);
+                        }
+                }
+                $this->clear();
+        }
+        // }}}
+        function clear() {
+                $this->_added = array();
+                $this->_deleted = array();
+        }
+}

Subversion Repositories – MLAB

/WebSVN/include/diff_util.php @ 4987 → /WebSVN/include/diff_util.php @ 4988

Compare Path:	Rev
With Path:	Rev