C++语言:
Windiff 原理初探(C++源码)
001
//"Windiff 原理初探(C++源码)
002 //详细说明文章参见:http://www.2maomao.com/blog/how-windiff-works-continued-1/
003
004 // mydiff.cpp
005 //
006 #include "stdafx.h"
007 #include <BaseTsd.h>
008 #include <iostream>
009 #include <fstream>
010 #include <string>
011 #include <map>
012 #include <utility>
013 #include <vector>
014 #include <functional>
015 #include <algorithm>
016 using namespace std ;
017
018 bool MyPairCompFirst ( pair < int , int > elem1 , pair < int , int > elem2 )
019 {
020 return elem1 . first < elem2 . first ;
021 }
022
023 bool MyPairCompSecond ( pair < int , int > elem1 , pair < int , int > elem2 )
024 {
025 return elem1 . second < elem2 . second ;
026 }
027
028 int BiSearch ( vector < pair < int , int > > & valPairs , int value )
029 {
030 int left = 0 ;
031 int right = valPairs . size () - 1 ;
032 int mid = 0 ;
033 while ( left <= right )
034 {
035 mid = ( left + right ) / 2 ;
036 if ( valPairs [ mid ]. first == value )
037 return mid ;
038
039 if ( valPairs [ mid ]. first < value )
040 left = mid + 1 ;
041 else
042 right = mid - 1 ;
043 }
044 return - 1 ;
045 }
046
047 int _tmain ( int argc , _TCHAR * argv [])
048 {
049 vector < int > valsNew ;
050 vector < pair < int , int >> valsOld ;
051 vector < pair < int , int >> valsOldBackup ;
052
053 if ( argc != 3 )
054 {
055 printf ( " Usage: /n " );
056 printf ( " mydiff fileNew fileOld /n " );
057 return 0 ;
058 } else
059 {
060 //read the two input file build the hash table, turn string into values for compare
061 string line ;
062 ifstream finNew ( argv [ 1 ]);
063 if ( ! finNew )
064 {
065 cout << "failed to open file:" << argv [ 1 ] << endl ;
066 exit ( 0 );
067 }
068
069 map < string , int > diffLines ;
070 char ch [ 10001 ];
071 while ( finNew . getline ( ch , 10000 ))
072 {
073 line = ch ;
074 const char * p = line . c_str ();
075 //chop off the leading and ending blank chars
076 int pos = 0 ;
077 while ( pos < ( int ) line . size () && ( line [ pos ] == ' ' || line [ pos ] == '/t' )) pos ++ ;
078 int posLeft = pos ;
079 pos = ( int ) line . size () - 1 ;
080 while ( pos >= 0 && ( line [ pos ] == ' ' || line [ pos ] == '/t' )) pos -- ;
081 string temp = line . substr ( posLeft , pos - posLeft );
082 //@@@@ if (temp.empty()) continue; //blank lines is not counted for this special case
083
084 if ( diffLines . find ( line ) == diffLines . end ())
085 diffLines [ line ] = diffLines . size (); //use size() as the unique value
086 valsNew . push_back ( diffLines [ line ]);
087 }
088 finNew . close ();
089
090 ifstream finOld ( argv [ 2 ]);
091 if ( ! finOld )
092 {
093 cout << "failed to open file:" << argv [ 2 ] << endl ;
094 exit ( 0 );
095 }
096 while ( finOld . getline ( ch , 10000 ))
097 {
098 line = ch ;
099 //chop off the leading and ending blank chars
100 int pos = 0 ;
101 while ( pos < line . size () && ( line [ pos ] == ' ' || line [ pos ] == '/t' )) pos ++ ;
102 int posLeft = pos ;
103 pos = line . size () - 1 ;
104 while ( pos >= 0 && ( line [ pos ] == ' ' || line [ pos ] == '/t' )) pos -- ;
105 string temp = line . substr ( posLeft , pos - posLeft );
106 //@@@@ if (temp.empty()) continue; //blank lines is not counted for this special case
107
108 if ( diffLines . find ( line ) == diffLines . end ())
109 diffLines [ line ] = diffLines . size (); //use size() as the unique value
110 valsOld . push_back ( make_pair ( diffLines [ line ], valsOld . size ()));
111 }
112 finOld . close ();
113 diffLines . clear ();
114 } //Here diffLines and file handles should be released
115 valsOldBackup = valsOld ;
116
117 //use the greedy method, each step we should find the most "RECENT" equal lines
118
119 //Sort the old file values, so we can use bi-search against it when we want to compare
120 sort ( valsOld . begin (), valsOld . end (), MyPairCompFirst );
121
122 int posNew = 0 ;
123
124 //Search for the next
125 vector < int > genLinesNew ;
126 vector < int > genLinesOld ;
127 int searchedOldLineCount = 0 ;
128 while ( posNew < valsNew . size () && valsOld . size () > 0 )
129 {
130 // get the next equal one
131 // Calculate the "distance", (leftHand^2 + rightHand^2)
132 int leftHand = 0 ;
133 int rightHand = 0 ;
134 INT64 minValueBar = _I64_MAX ;
135
136 bool bFound = false ;
137 int lastFoundLeftHand = 0 ;
138 int lastFoundRightHand = 0 ;
139 while ( leftHand + posNew < valsNew . size ())
140 {
141 if ( leftHand * leftHand > minValueBar ) break ; //Found! We can stop now!
142
143 int target = valsNew [ posNew + leftHand ];
144 int pos = BiSearch ( valsOld , target );
145 if ( pos < 0 )
146 {
147 leftHand ++ ;
148 continue ;
149 }
150
151 //found a match
152 //get the most "recent" match
153 int posMin = INT_MAX ;
154 int minPos = pos ;
155 while ( pos >= 0 && valsOld [ pos ]. first == target )
156 {
157 if ( valsOld [ pos ]. second < posMin )
158 {
159 posMin = valsOld [ pos ]. second ;
160 minPos = pos ;
161 }
162 pos -- ;
163 }
164 pos = minPos ;
165
166 //process the current match
167 rightHand = valsOld [ pos ]. second - searchedOldLineCount ;
168 if ( minValueBar > (( INT64 )( leftHand )) * leftHand + rightHand * rightHand )
169 {
170 bFound = true ;
171 lastFoundLeftHand = leftHand ;
172 lastFoundRightHand = rightHand ;
173 minValueBar = (( INT64 )( leftHand )) * leftHand + rightHand * rightHand ;
174 }
175 leftHand ++ ;
176 }
177
178 if ( bFound )
179 {
180 //@@@@@@@
181 leftHand = lastFoundLeftHand ;
182 rightHand = lastFoundRightHand ;
183 int left1 = posNew ;
184 int left2 = left1 + leftHand ;
185 int right1 = searchedOldLineCount ;
186 int right2 = right1 + rightHand ;
187 //@@@@Add?
188 //@@@@Delete?
189 //@@@@Change?
190 // printf("%d,%d,c,%d,%d,/n", left1, left2, right1, right2);
191 posNew += leftHand + 1 ;
192 //delete the searched old lines from the rest old lines
193 for ( int i = searchedOldLineCount ; i < searchedOldLineCount + rightHand + 1 ; i ++ )
194 {
195 int target = valsOldBackup [ i ]. first ;
196 int pos = BiSearch ( valsOld , target );
197 if ( pos < 0 )
198 {
199 printf ( "ERROR! this should not happen!, something may be wrong in previous analysis! /n " );
200 printf ( "i=[%d], searchedOldLineCount=[%d] /n " , i , searchedOldLineCount );
201 exit ( 1 );
202 }
203
204 int posMin = INT_MAX ;
205 int minPos = pos ;
206 while ( pos >= 0 && valsOld [ pos ]. first == target )
207 {
208 if ( valsOld [ pos ]. second < posMin )
209 {
210 posMin = valsOld [ pos ]. second ;
211 minPos = pos ;
212 }
213 pos -- ;
214 }
215 pos = minPos ;
216
217 valsOld . erase ( valsOld . begin () + pos );
218 }
219 searchedOldLineCount += rightHand + 1 ;
220 } else
221 {
222 //@@@@ should output the rest RightHand and LeftHand values as changed from L->R
223 printf ( "%d,%d,c,%d,%d, /n " , posNew , valsNew . size () - 1 , searchedOldLineCount , valsOldBackup . size () - 1 );
224 break ;
225 }
226 }
227 system ( "PAUSE" );
228
229 //@@@@
230 //print out in batch file format to support console
231 }
002 //详细说明文章参见:http://www.2maomao.com/blog/how-windiff-works-continued-1/
003
004 // mydiff.cpp
005 //
006 #include "stdafx.h"
007 #include <BaseTsd.h>
008 #include <iostream>
009 #include <fstream>
010 #include <string>
011 #include <map>
012 #include <utility>
013 #include <vector>
014 #include <functional>
015 #include <algorithm>
016 using namespace std ;
017
018 bool MyPairCompFirst ( pair < int , int > elem1 , pair < int , int > elem2 )
019 {
020 return elem1 . first < elem2 . first ;
021 }
022
023 bool MyPairCompSecond ( pair < int , int > elem1 , pair < int , int > elem2 )
024 {
025 return elem1 . second < elem2 . second ;
026 }
027
028 int BiSearch ( vector < pair < int , int > > & valPairs , int value )
029 {
030 int left = 0 ;
031 int right = valPairs . size () - 1 ;
032 int mid = 0 ;
033 while ( left <= right )
034 {
035 mid = ( left + right ) / 2 ;
036 if ( valPairs [ mid ]. first == value )
037 return mid ;
038
039 if ( valPairs [ mid ]. first < value )
040 left = mid + 1 ;
041 else
042 right = mid - 1 ;
043 }
044 return - 1 ;
045 }
046
047 int _tmain ( int argc , _TCHAR * argv [])
048 {
049 vector < int > valsNew ;
050 vector < pair < int , int >> valsOld ;
051 vector < pair < int , int >> valsOldBackup ;
052
053 if ( argc != 3 )
054 {
055 printf ( " Usage: /n " );
056 printf ( " mydiff fileNew fileOld /n " );
057 return 0 ;
058 } else
059 {
060 //read the two input file build the hash table, turn string into values for compare
061 string line ;
062 ifstream finNew ( argv [ 1 ]);
063 if ( ! finNew )
064 {
065 cout << "failed to open file:" << argv [ 1 ] << endl ;
066 exit ( 0 );
067 }
068
069 map < string , int > diffLines ;
070 char ch [ 10001 ];
071 while ( finNew . getline ( ch , 10000 ))
072 {
073 line = ch ;
074 const char * p = line . c_str ();
075 //chop off the leading and ending blank chars
076 int pos = 0 ;
077 while ( pos < ( int ) line . size () && ( line [ pos ] == ' ' || line [ pos ] == '/t' )) pos ++ ;
078 int posLeft = pos ;
079 pos = ( int ) line . size () - 1 ;
080 while ( pos >= 0 && ( line [ pos ] == ' ' || line [ pos ] == '/t' )) pos -- ;
081 string temp = line . substr ( posLeft , pos - posLeft );
082 //@@@@ if (temp.empty()) continue; //blank lines is not counted for this special case
083
084 if ( diffLines . find ( line ) == diffLines . end ())
085 diffLines [ line ] = diffLines . size (); //use size() as the unique value
086 valsNew . push_back ( diffLines [ line ]);
087 }
088 finNew . close ();
089
090 ifstream finOld ( argv [ 2 ]);
091 if ( ! finOld )
092 {
093 cout << "failed to open file:" << argv [ 2 ] << endl ;
094 exit ( 0 );
095 }
096 while ( finOld . getline ( ch , 10000 ))
097 {
098 line = ch ;
099 //chop off the leading and ending blank chars
100 int pos = 0 ;
101 while ( pos < line . size () && ( line [ pos ] == ' ' || line [ pos ] == '/t' )) pos ++ ;
102 int posLeft = pos ;
103 pos = line . size () - 1 ;
104 while ( pos >= 0 && ( line [ pos ] == ' ' || line [ pos ] == '/t' )) pos -- ;
105 string temp = line . substr ( posLeft , pos - posLeft );
106 //@@@@ if (temp.empty()) continue; //blank lines is not counted for this special case
107
108 if ( diffLines . find ( line ) == diffLines . end ())
109 diffLines [ line ] = diffLines . size (); //use size() as the unique value
110 valsOld . push_back ( make_pair ( diffLines [ line ], valsOld . size ()));
111 }
112 finOld . close ();
113 diffLines . clear ();
114 } //Here diffLines and file handles should be released
115 valsOldBackup = valsOld ;
116
117 //use the greedy method, each step we should find the most "RECENT" equal lines
118
119 //Sort the old file values, so we can use bi-search against it when we want to compare
120 sort ( valsOld . begin (), valsOld . end (), MyPairCompFirst );
121
122 int posNew = 0 ;
123
124 //Search for the next
125 vector < int > genLinesNew ;
126 vector < int > genLinesOld ;
127 int searchedOldLineCount = 0 ;
128 while ( posNew < valsNew . size () && valsOld . size () > 0 )
129 {
130 // get the next equal one
131 // Calculate the "distance", (leftHand^2 + rightHand^2)
132 int leftHand = 0 ;
133 int rightHand = 0 ;
134 INT64 minValueBar = _I64_MAX ;
135
136 bool bFound = false ;
137 int lastFoundLeftHand = 0 ;
138 int lastFoundRightHand = 0 ;
139 while ( leftHand + posNew < valsNew . size ())
140 {
141 if ( leftHand * leftHand > minValueBar ) break ; //Found! We can stop now!
142
143 int target = valsNew [ posNew + leftHand ];
144 int pos = BiSearch ( valsOld , target );
145 if ( pos < 0 )
146 {
147 leftHand ++ ;
148 continue ;
149 }
150
151 //found a match
152 //get the most "recent" match
153 int posMin = INT_MAX ;
154 int minPos = pos ;
155 while ( pos >= 0 && valsOld [ pos ]. first == target )
156 {
157 if ( valsOld [ pos ]. second < posMin )
158 {
159 posMin = valsOld [ pos ]. second ;
160 minPos = pos ;
161 }
162 pos -- ;
163 }
164 pos = minPos ;
165
166 //process the current match
167 rightHand = valsOld [ pos ]. second - searchedOldLineCount ;
168 if ( minValueBar > (( INT64 )( leftHand )) * leftHand + rightHand * rightHand )
169 {
170 bFound = true ;
171 lastFoundLeftHand = leftHand ;
172 lastFoundRightHand = rightHand ;
173 minValueBar = (( INT64 )( leftHand )) * leftHand + rightHand * rightHand ;
174 }
175 leftHand ++ ;
176 }
177
178 if ( bFound )
179 {
180 //@@@@@@@
181 leftHand = lastFoundLeftHand ;
182 rightHand = lastFoundRightHand ;
183 int left1 = posNew ;
184 int left2 = left1 + leftHand ;
185 int right1 = searchedOldLineCount ;
186 int right2 = right1 + rightHand ;
187 //@@@@Add?
188 //@@@@Delete?
189 //@@@@Change?
190 // printf("%d,%d,c,%d,%d,/n", left1, left2, right1, right2);
191 posNew += leftHand + 1 ;
192 //delete the searched old lines from the rest old lines
193 for ( int i = searchedOldLineCount ; i < searchedOldLineCount + rightHand + 1 ; i ++ )
194 {
195 int target = valsOldBackup [ i ]. first ;
196 int pos = BiSearch ( valsOld , target );
197 if ( pos < 0 )
198 {
199 printf ( "ERROR! this should not happen!, something may be wrong in previous analysis! /n " );
200 printf ( "i=[%d], searchedOldLineCount=[%d] /n " , i , searchedOldLineCount );
201 exit ( 1 );
202 }
203
204 int posMin = INT_MAX ;
205 int minPos = pos ;
206 while ( pos >= 0 && valsOld [ pos ]. first == target )
207 {
208 if ( valsOld [ pos ]. second < posMin )
209 {
210 posMin = valsOld [ pos ]. second ;
211 minPos = pos ;
212 }
213 pos -- ;
214 }
215 pos = minPos ;
216
217 valsOld . erase ( valsOld . begin () + pos );
218 }
219 searchedOldLineCount += rightHand + 1 ;
220 } else
221 {
222 //@@@@ should output the rest RightHand and LeftHand values as changed from L->R
223 printf ( "%d,%d,c,%d,%d, /n " , posNew , valsNew . size () - 1 , searchedOldLineCount , valsOldBackup . size () - 1 );
224 break ;
225 }
226 }
227 system ( "PAUSE" );
228
229 //@@@@
230 //print out in batch file format to support console
231 }