]> ruin.nu Git - germs.git/blob - src/genealgorithms.cpp
174c6f7b0414277b006b562f2736a83077db9fb7
[germs.git] / src / genealgorithms.cpp
1 /***************************************************************************
2  *   Copyright (C) 2006 by Michael Andreen                                 *
3  *   andreen@student.chalmers.se                                           *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA          *
19  ***************************************************************************/
20
21 #include "genealgorithms.h"
22 #include "geneorder.h"
23 #include "componenttree.h"
24
25 #include <algorithm>
26 #include <set>
27 #include <stack>
28 #include <cstdlib>
29 #include <iostream>
30 using namespace std;
31
32 std::pair<int,int> longestSequences(const GeneOrder& go){
33         vector<vector<int> > v = robinsonSchensted(go);
34         return pair<int,int>(v[0].size(),v.size());
35 }
36
37 std::vector<std::vector<int> > robinsonSchensted(const GeneOrder& go){
38         vector<vector<int> > v;
39         for (GeneOrder::iterator i = go.begin(); i != go.end(); ++i){
40                 int n = abs(*i);
41                 bool added = false;
42                 for (vector<vector<int> >::iterator vs = v.begin();
43                                 vs != v.end(); ++vs){
44                         vector<int>::iterator bigger = upper_bound(vs->begin(),vs->end(),n);
45                         if ( bigger == vs->end()){
46                                 vs->push_back(n);
47                                 added = true;
48                                 break;
49                         }else{
50                                 swap(n,*bigger);
51                         }
52                 }
53                 if (!added){
54                         v.push_back(vector<int>());
55                         v.back().push_back(n);
56                 }
57         }
58         return v;
59 }
60
61 struct FindP{
62         size_t p;
63         FindP(size_t p) : p(p) {}
64         bool operator()(Interval i){
65                 return (i.first == p || i.second == p);
66         }
67 };
68
69
70 size_t countCycles(const GeneOrder& go){
71         size_t cycles = 0;
72         set<size_t> marked;
73         vector<Interval> intervals = findIntervals(go);
74         vector<Interval> points = findIntervalsAtPoints(intervals);
75         for (size_t p = 1; p < go.size(); ++p){
76                 if (marked.find(p) != marked.end())
77                         continue;
78                 Interval i = intervals[points[p].first];
79                 while (marked.find(p) == marked.end()){
80                         marked.insert(p);
81                         if (i == intervals[points[p].first])
82                                 i = intervals[points[p].second];
83                         else
84                                 i = intervals[points[p].first];
85
86                         if (p == i.first)
87                                 p = i.second;
88                         else
89                                 p = i.first;
90                 }
91                 ++cycles;
92         }
93         return cycles;
94 }
95
96 size_t inversionDistance(const GeneOrder& go){
97         size_t cycles = countCycles(go);
98
99         return go.size() - 1 - cycles;
100 }
101
102 int sign(Gene g){
103         if (g > 0)
104                 return 1;
105         if (g < 0)
106                 return -1;
107         return 0;
108 }
109
110 struct Abs{
111         Gene operator()(Gene x) const{
112                 return abs(x);
113         }
114 };
115 std::vector<Component> findComponents(const GeneOrder& go){
116         vector<Component> components;
117         vector<int> os(go.size()-1);
118         for (size_t i = 0; i < os.size(); ++i)
119                 os[i] = (go[i]*go[i+1] > 0 ? sign(go[i]) : 0);
120         stack<Gene> Mdir;
121         Mdir.push(go.size()-1);
122         stack<Gene> Mrev;
123         Mrev.push(0);
124         stack<size_t> Sdir;
125         Sdir.push(0);
126         stack<size_t> Srev;
127         Srev.push(0);
128         vector<Gene> dir;
129         dir.push_back(go.size()-1);
130         vector<Gene> rev;
131         rev.push_back(0);
132         size_t s;
133         vector<Gene> p(go.list());
134         transform(p.begin(),p.end(),p.begin(),Abs());
135         for (size_t i = 1; i < go.size(); ++i){
136                 //Directed
137                 if (p[i-1] > p[i])
138                         Mdir.push(p[i-1]);
139                 else while (Mdir.top() < p[i])
140                         Mdir.pop();
141                 dir.push_back(Mdir.top());
142
143                 s = Sdir.top();
144                 while(p[Sdir.top()] > p[i] || dir[Sdir.top()] < p[i]){
145                         Sdir.pop();
146                         os[Sdir.top()] = (os[Sdir.top()] == os[s] ? os[s] : 0);
147                         s = Sdir.top();
148                 }
149                 if (go[i] > 0 && dir[i] == dir[s] && static_cast<Gene>(i - s) == p[i] - p[s])
150                         components.push_back(Component(p[s],p[i],(s+1 == i ? 0 : os[s]),s,i));
151
152                 //Reverse
153                 if (p[i-1] < p[i])
154                         Mrev.push(p[i-1]);
155                 else while (Mrev.top() > p[i])
156                         Mrev.pop();
157                 rev.push_back(Mrev.top());
158
159                 s = Srev.top();
160                 while((p[s] < p[i] || rev[s] > p[i]) && s > 0){
161                         Srev.pop();
162                         os[Srev.top()] *= (os[Srev.top()] == os[s] ? 1 : 0);
163                         s = Srev.top();
164                 }
165                 if (go[i] < 0 && rev[i] == rev[s] && static_cast<Gene>(i - s) == p[s] - p[i])
166                         components.push_back(Component(-p[s],-p[i],(s+1 == i ? 0 : os[s]),s,i));
167
168                 //Update stacks
169                 if (go[i] > 0)
170                         Sdir.push(i);
171                 else
172                         Srev.push(i);
173         }
174         return components;
175 }
176
177 int sign2(Gene g){
178         if (g < 0)
179                 return -1;
180         return 1;
181 }
182 /**
183  * 
184  */
185 std::vector<Interval> findIntervals(const GeneOrder& go){
186         const size_t max = go.size();
187         vector<Interval> intervals(go.size()-1,Interval(max,max,false));
188         size_t n = 0;
189         const GeneOrder::iterator end = go.end();
190         for (GeneOrder::iterator g = go.begin(); g != end; ++g, ++n){
191                         size_t i = abs(*g);
192                         if (i < max - 1){
193                                 Interval& curr = intervals[i];
194                                 curr.first = n + (*g >= 0 ? 1 : 0);
195
196                                 if (curr.second == max)
197                                         curr.oriented = *g < 0;
198                                 else
199                                         curr.oriented ^=  *g < 0;
200                         }
201                         if (i > 0){
202                                 Interval& prev = intervals[i-1];
203                                 prev.second = n + (*g < 0 ? 1 : 0);
204
205                                 if (prev.first == max)
206                                         prev.oriented = *g < 0;
207                                 else
208                                         prev.oriented ^=  *g < 0;
209                         }
210
211         }
212         return intervals;
213 }
214
215 /**
216  * 
217  */
218 std::vector<Interval> findIntervalsAtPoints(const vector<Interval>& intervals){
219         size_t max = intervals.size()+1;
220         vector<Interval> points(max,Interval(max,max,false));
221         size_t n = 0;
222         for (vector<Interval>::const_iterator i = intervals.begin(); i != intervals.end(); ++i, ++n){
223                 if (points[i->first].first == max){
224                         points[i->first].first = n;
225                 }else
226                         points[i->first].second = n;
227                         
228                 if (points[i->second].first == max){
229                         points[i->second].first = n;
230                 }else
231                         points[i->second].second = n;
232         }
233         return points;
234
235 }