]> ruin.nu Git - germs.git/blob - src/genealgorithms.cpp
67c504e77b3f9bdfad9589236e8f65f8e2b2cd5d
[germs.git] / src / genealgorithms.cpp
1 /***************************************************************************
2  *   Copyright (C) 2006 by Michael Andreen                                 *
3  *   andreen@student.chalmers.se                                           *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA          *
19  ***************************************************************************/
20
21 #include "genealgorithms.h"
22 #include "geneorder.h"
23
24 #include <algorithm>
25 #include <set>
26 #include <stack>
27 #include <cstdlib>
28 #include <iostream>
29 using namespace std;
30
31 std::pair<int,int> longestSequences(const GeneOrder& go){
32         vector<vector<int> > v = robinsonSchensted(go);
33         return pair<int,int>(v[0].size(),v.size());
34 }
35
36 std::vector<std::vector<int> > robinsonSchensted(const GeneOrder& go){
37         vector<vector<int> > v;
38         for (GeneOrder::iterator i = go.begin(); i != go.end(); ++i){
39                 int n = abs(*i);
40                 bool added = false;
41                 for (vector<vector<int> >::iterator vs = v.begin();
42                                 vs != v.end(); ++vs){
43                         vector<int>::iterator bigger = upper_bound(vs->begin(),vs->end(),n);
44                         if ( bigger == vs->end()){
45                                 vs->push_back(n);
46                                 added = true;
47                                 break;
48                         }else{
49                                 swap(n,*bigger);
50                         }
51                 }
52                 if (!added){
53                         v.push_back(vector<int>());
54                         v.back().push_back(n);
55                 }
56         }
57         return v;
58 }
59
60 struct FindP{
61         size_t p;
62         FindP(size_t p) : p(p) {}
63         bool operator()(Interval i){
64                 return (i.first == p || i.second == p);
65         }
66 };
67
68
69 size_t countCycles(const GeneOrder& go){
70         size_t cycles = 0;
71         set<size_t> marked;
72         vector<Interval> intervals = findIntervals(go);
73         vector<Interval> points = findIntervalsAtPoints(intervals);
74         for (size_t p = 1; p < go.size(); ++p){
75                 if (marked.find(p) != marked.end())
76                         continue;
77                 Interval i = intervals[points[p].first];
78                 while (marked.find(p) == marked.end()){
79                         marked.insert(p);
80                         if (i == intervals[points[p].first])
81                                 i = intervals[points[p].second];
82                         else
83                                 i = intervals[points[p].first];
84
85                         if (p == i.first)
86                                 p = i.second;
87                         else
88                                 p = i.first;
89                 }
90                 ++cycles;
91         }
92         return cycles;
93 }
94
95 size_t inversionDistance(const GeneOrder& go){
96         size_t cycles = countCycles(go);
97
98         return go.size() - 1 - cycles;
99 }
100
101 int sign(Gene g){
102         if (g > 0)
103                 return 1;
104         if (g < 0)
105                 return -1;
106         return 0;
107 }
108
109 struct Abs{
110         Gene operator()(Gene x) const{
111                 return abs(x);
112         }
113 };
114 std::vector<Component> findComponents(const GeneOrder& go){
115         vector<Component> components;
116         vector<int> os(go.size()-1);
117         for (size_t i = 0; i < os.size(); ++i)
118                 os[i] = (go[i]*go[i+1] > 0 ? sign(go[i]) : 0);
119         stack<Gene> Mdir;
120         Mdir.push(go.size()-1);
121         stack<Gene> Mrev;
122         Mrev.push(0);
123         stack<size_t> Sdir;
124         Sdir.push(0);
125         stack<size_t> Srev;
126         Srev.push(0);
127         vector<Gene> dir;
128         dir.push_back(go.size()-1);
129         vector<Gene> rev;
130         rev.push_back(0);
131         size_t s;
132         vector<Gene> p(go.list());
133         transform(p.begin(),p.end(),p.begin(),Abs());
134         for (size_t i = 1; i < go.size(); ++i){
135                 //Directed
136                 if (p[i-1] > p[i])
137                         Mdir.push(p[i-1]);
138                 else while (Mdir.top() < p[i])
139                         Mdir.pop();
140                 dir.push_back(Mdir.top());
141
142                 s = Sdir.top();
143                 while(p[Sdir.top()] > p[i] || dir[Sdir.top()] < p[i]){
144                         Sdir.pop();
145                         os[Sdir.top()] = (os[Sdir.top()] == os[s] ? os[s] : 0);
146                         s = Sdir.top();
147                 }
148                 if (go[i] > 0 && dir[i] == dir[s] && static_cast<Gene>(i - s) == p[i] - p[s])
149                         components.push_back(Component(p[s],p[i],(s+1 == i ? 0 : os[s])));
150
151                 //Reverse
152                 if (p[i-1] < p[i])
153                         Mrev.push(p[i-1]);
154                 else while (Mrev.top() > p[i])
155                         Mrev.pop();
156                 rev.push_back(Mrev.top());
157
158                 s = Srev.top();
159                 while((p[s] < p[i] || rev[s] > p[i]) && s > 0){
160                         Srev.pop();
161                         os[Srev.top()] *= (os[Srev.top()] == os[s] ? 1 : 0);
162                         s = Srev.top();
163                 }
164                 if (go[i] < 0 && rev[i] == rev[s] && static_cast<Gene>(i - s) == p[s] - p[i])
165                         components.push_back(Component(-p[s],-p[i],(s+1 == i ? 0 : os[s])));
166
167                 //Update stacks
168                 if (go[i] > 0)
169                         Sdir.push(i);
170                 else
171                         Srev.push(i);
172         }
173         return components;
174 }
175
176 /**
177  * 
178  */
179 std::vector<Interval> findIntervals(const GeneOrder& go){
180         vector<Interval> intervals(go.size()-1,Interval(go.size(),go.size()));
181         size_t n = 0;
182         for (GeneOrder::iterator g = go.begin(); g != go.end(); ++g, ++n){
183                         size_t i = abs(*g);
184                         if (i < go.size() - 1)
185                                 intervals[i].first = n + (*g >= 0 ? 1 : 0);
186                         if (i > 0)
187                                 intervals[i-1].second = n + (*g < 0 ? 1 : 0);
188         }
189         return intervals;
190 }
191
192 /**
193  * 
194  */
195 std::vector<Interval> findIntervalsAtPoints(const vector<Interval>& intervals){
196         size_t max = intervals.size()+1;
197         vector<Interval> points(max,Interval(max,max));
198         size_t n = 0;
199         for (vector<Interval>::const_iterator i = intervals.begin(); i != intervals.end(); ++i, ++n){
200                 if (points[i->first].first == max){
201                         points[i->first].first = n;
202                 }else
203                         points[i->first].second = n;
204                         
205                 if (points[i->second].first == max){
206                         points[i->second].first = n;
207                 }else
208                         points[i->second].second = n;
209         }
210         return points;
211
212 }