Cassiopee  1.0
Suffix indexer and search tool
 All Classes Functions Variables
Cassiopee.h
1 #include <iostream>
2 #include <fstream>
3 #include <list>
4 
5 #include "tree/tree.hh"
6 
7 #include "CassiopeeConfig.h"
8 
9 #include <boost/archive/text_oarchive.hpp>
10 #include <boost/archive/text_iarchive.hpp>
11 #include <boost/archive/binary_oarchive.hpp>
12 #include <boost/archive/binary_iarchive.hpp>
13 // Provide an implementation of serialize for std::list
14 #include <boost/serialization/list.hpp>
15 
16 using namespace std;
17 
18 
19 
20 
24 class Match {
25 public:
26 
30  bool operator==(const Match& p) const {
31  return pos == p.pos && in+del == p.in + p.del;
32  }
33 
37  int in;
41  int del;
45  int subst;
46 
47 
48  long pos;
49 
50  Match();
51 
52 
53 };
54 
64 class TreeNode {
65 public:
69  char c;
70 
71 
75  list<long> positions;
76 
82  long next_pos;
83 
88 
89 
95  TreeNode(char nc);
96 
103  TreeNode(char nc, long pos);
104  TreeNode();
105 
106 
107 private:
108  friend class boost::serialization::access;
109  template<class Archive>
110  void serialize(Archive & ar, const unsigned int /*version*/)
111  {
112  ar & c;
113  ar & next_pos;
114  ar & next_length;
115  ar & positions;
116  }
117 
118 };
119 
120 
121 
122 #ifndef __CASSIOPEE_H_
123 #define __CASSIOPEE_H_
124 //inline std::ostream& operator<<(std::ostream &strm, const TreeNode &a) {
125 // return strm << "TreeNode(" << a.c << ")";
126 //}
127 #endif
128 
129 
136 public:
137 
138 
144  CassieIndexer(const char* path);
145 
146  ~CassieIndexer();
147 
151  void save();
152 
156  void load();
157 
158 
162  void graph();
163 
169  void graph(int depth);
170 
177  string getSuffix(long pos);
178 
182  void index();
183 
187  tree<TreeNode>* getTree();
188 
192  list<Match> matches;
193 
198 
202  char getCharAtSuffix(long pos);
203 
209  void filltree(long pos);
210 
214  long max_depth;
215 
216  long seq_length;
217 
221  bool index_loaded_from_file();
222 
223 private:
224  list<TreeNode> serialized_nodes;
225 
226  bool loaded_from_file;
227 
228  const char* filename;
229  ifstream seqstream;
230  tree<TreeNode> tr;
231 
232  const long MAX_SUFFIX;
233  long suffix_position;
234  char* suffix;
235 
239  long graphNode(tree<TreeNode>::iterator node, long counter, ofstream& myfile, int maxdepth);
240 
244  char* loadSuffix(long pos);
245 
246 
250  void reset_suffix();
251 
252 
253 
261  void fillTreeWithSuffix(tree<TreeNode>::iterator sib, long suffix_pos, long pos);
262  void fillTreeWithSuffix(long suffix_pos, long pos);
263 
264 
265 
266 };
267 
268 
273 public:
277  static void transform_fasta(const string in, const string out);
278 };
279 
283 class Ambiguous {
284 public:
292  static bool isequal(char a, char b);
293 private:
302  static bool ismatchequal(char a, const char b[], int len);
303  static const char K_MATCH[];
304  static const char M_MATCH[];
305  static const char R_MATCH[];
306  static const char Y_MATCH[];
307  static const char S_MATCH[];
308  static const char W_MATCH[];
309  static const char B_MATCH[];
310  static const char V_MATCH[];
311  static const char H_MATCH[];
312  static const char D_MATCH[];
313  static const char N_MATCH[];
314 };
315 
316 
318 {
319  inline bool operator() (const Match* struct1, const Match* struct2)
320  {
321  return (struct1->pos < struct2->pos);
322  }
323 };
324 
330 
331 public:
337  CassieSearch(CassieIndexer* index_ref);
338 
339  ~CassieSearch();
340 
344  void removeDuplicates();
345 
350 
354  list<Match*> matches;
355 
360  bool ambiguity;
361 
365  int nmax;
366 
374  int mode;
375 
382  void search(string suffix, bool clear);
383 
384 
391  void search(string suffix);
392 
398  void search(string suffixes[]);
399 
403  bool isequal(char a,char b);
404 
405 
414 
415  long pattern_length;
416 
420  void sort();
421 
422 private:
423 
427  static bool same_match (Match* first, Match* second)
428  { return ( *first == *second ); }
429 
430  CassieIndexer* indexer;
431 
440  void getMatchesFromNode(tree<TreeNode>::iterator sib, const int nbSubst, const int nbIn, const int nbDel);
441 
442 
443 
455  void searchAtNode(string suffix, const long suffix_pos, const tree<TreeNode>::iterator root, int nbSubst, int nbIn, int nbDel, int nbN);
456 
468  void searchAtNode(string suffix, const long suffix_pos, const tree<TreeNode>::iterator root, const tree<TreeNode>::iterator start_node, int nbSubst, int nbIn, int nbDel, int nbN);
469 
474  bool searchAtreduction(const string suffix, const tree<TreeNode>::iterator sib, long counter, long tree_reducted_pos, int nbSubst, int nbIn, int nbDel, int nbN);
475 
476 
477 };
478 
479 
480 
Definition: Cassiopee.h:317
bool do_reduction
Definition: Cassiopee.h:197
char c
Definition: Cassiopee.h:69
int max_indel
Definition: Cassiopee.h:409
Definition: Cassiopee.h:24
int subst
Definition: Cassiopee.h:45
Match * match_limits
Definition: Cassiopee.h:349
int in
Definition: Cassiopee.h:37
list< Match * > matches
Definition: Cassiopee.h:354
int max_subst
Definition: Cassiopee.h:413
list< Match > matches
Definition: Cassiopee.h:192
bool ambiguity
Definition: Cassiopee.h:360
int nmax
Definition: Cassiopee.h:365
int del
Definition: Cassiopee.h:41
bool operator==(const Match &p) const
Definition: Cassiopee.h:30
Definition: Cassiopee.h:329
Definition: Cassiopee.h:135
long next_length
Definition: Cassiopee.h:87
long max_depth
Definition: Cassiopee.h:214
long next_pos
Definition: Cassiopee.h:82
int mode
Definition: Cassiopee.h:374
Definition: Cassiopee.h:272
Definition: Cassiopee.h:283
list< long > positions
Definition: Cassiopee.h:75
Definition: Cassiopee.h:64