Changeset 252

Show
Ignore:
Timestamp:
12/12/07 01:35:18 (1 year ago)
Author:
t
Message:

added a calc_score function to the python EnergyOperator?, + test;

added reference to doctest tutorial under "building motifs and matrices"
in python-interface.txt;

fixed idiocy in calculation of matches between find_exact/iupac and
matrix finders, + test;

made Python find* interfaces parallelizable;

fixed memory leak issue in python interface;

dealt with annoying memory collection issues by revamping C++ interface.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/motility/doc/python-interface.txt

    r241 r252  
    3333 
    3434   energy = operator.calc_energy(match) 
     35   energy = operator.calc_score(match) 
    3536 
    3637   score = pwm.calc_score(match) 
     
    7980'start' is always less than 'end', 'orientation' is 1 or -1, and 'match' is 
    8081the matching string in the appropriate orientation (forward or RC). 
     82 
     83Building motifs and matrices 
     84---------------------------- 
     85 
     86See `the tutorial <motility-introduction.html>`__ for more information. 
     87.. @CTB 
    8188 
    8289Using ``motility``: An Example 
  • trunk/motility/python/_motilitymodule.cc

    r245 r252  
    7878} 
    7979 
     80static PyObject * _convert_results_to_tuple(motility::MotifMatchList& matches) { 
     81  std::vector<motility::MotifMatch *> l = matches.list(); 
     82 
     83  PyObject * t = PyTuple_New(l.size()); 
     84 
     85  try { 
     86    for (unsigned int i = 0; i < l.size(); i++) { 
     87      motility::MotifMatch * m = l[i]; 
     88 
     89      int orientation = m->start > m->end ? -1 : 1; 
     90      unsigned int start = orientation < 0 ? m->end : m->start; 
     91      unsigned int end = orientation < 0 ? m->start : m->end; 
     92      motility::DnaSequence match_seq; 
     93     
     94      if (orientation == 1) { match_seq = m->match_seq; } 
     95      else { match_seq = m->match_seq.reverse_complement(); } 
     96 
     97      PyObject * u = Py_BuildValue("iiis", start, end, orientation, 
     98                                   match_seq.sequence().c_str()); 
     99      PyTuple_SET_ITEM(t, i, u); 
     100    } 
     101    return t; 
     102  } 
     103  catch (...) { 
     104    Py_DECREF(t);               // prevent memory leak in case of exception 
     105 
     106    throw; 
     107  } 
     108 
     109} 
     110 
    80111static PySequenceMethods tuple_as_sequence = { 
    81112  (lenfunc)matrix_length,       /* sq_length */ 
     
    236267    motility::LiteralMotif motif(motif_c); 
    237268 
    238     motility::MotifMatchList matches = motif.find_matches(seq); 
    239     std::vector<motility::MotifMatch*> l = matches.list(); 
    240  
    241     PyObject * t = PyTuple_New(l.size()); 
    242  
    243     for (unsigned int i = 0; i < l.size(); i++) { 
    244       motility::MotifMatch * m = l[i]; 
    245  
    246       int orientation = m->start > m->end ? -1 : 1; 
    247       unsigned int start = orientation < 0 ? m->end : m->start; 
    248       unsigned int end = orientation < 0 ? m->start : m->end; 
    249  
    250       PyObject * u = Py_BuildValue("iiis", start, end, orientation, 
    251                                    m->match_seq.sequence().c_str()); 
    252       PyTuple_SET_ITEM(t, i, u); 
    253     } 
    254  
    255     return t; 
     269    motility::MotifMatchList matches; 
     270 
     271    Py_BEGIN_ALLOW_THREADS 
     272 
     273    matches = *motif.find_matches(seq); 
     274 
     275    Py_END_ALLOW_THREADS 
     276 
     277    try { 
     278      return _convert_results_to_tuple(matches); 
     279    } 
     280    catch (...) { 
     281      throw; 
     282    } 
    256283  } catch (motility::exception& exc) { 
    257284    // raise the error... 
     
    282309    motif.mismatches(mismatches_allowed); 
    283310 
    284     motility::MotifMatchList matches = motif.find_matches(seq); 
    285     std::vector<motility::MotifMatch*> l = matches.list(); 
    286  
    287     PyObject * t = PyTuple_New(l.size()); 
    288  
    289     for (unsigned int i = 0; i < l.size(); i++) { 
    290       motility::MotifMatch * m = l[i]; 
    291  
    292       int orientation = m->start > m->end ? -1 : 1; 
    293       unsigned int start = orientation < 0 ? m->end : m->start; 
    294       unsigned int end = orientation < 0 ? m->start : m->end; 
    295  
    296       PyObject * u = Py_BuildValue("iiis", start, end, orientation, 
    297                                    m->match_seq.sequence().c_str()); 
    298       PyTuple_SET_ITEM(t, i, u); 
    299     } 
    300  
    301     return t; 
     311    motility::MotifMatchList matches; 
     312 
     313    Py_BEGIN_ALLOW_THREADS 
     314 
     315    matches = *motif.find_matches(seq); 
     316 
     317    Py_END_ALLOW_THREADS 
     318 
     319    return _convert_results_to_tuple(matches); 
    302320  } catch (motility::exception& exc) { 
    303321    // raise the error... 
     
    343361    motif.match_threshold(threshold); 
    344362 
    345     motility::MotifMatchList matches = motif.find_matches(seq); 
    346     std::vector<motility::MotifMatch*> l = matches.list(); 
    347  
    348     PyObject * t = PyTuple_New(l.size()); 
    349  
    350     for (unsigned int i = 0; i < l.size(); i++) { 
    351       motility::MotifMatch * m = l[i]; 
    352  
    353       int orientation = m->start > m->end ? -1 : 1; 
    354       unsigned int start = orientation < 0 ? m->end : m->start; 
    355       unsigned int end = orientation < 0 ? m->start : m->end; 
    356       motility::DnaSequence match_seq; 
    357        
    358       if (orientation == 1) { match_seq = m->match_seq; } 
    359       else { match_seq = m->match_seq.reverse_complement(); } 
    360  
    361       PyObject * u = Py_BuildValue("iiis", start, end, orientation, 
    362                                    match_seq.sequence().c_str()); 
    363       PyTuple_SET_ITEM(t, i, u); 
    364     } 
    365  
    366     return t; 
     363    motility::MotifMatchList matches; 
     364 
     365    Py_BEGIN_ALLOW_THREADS 
     366 
     367    matches = *motif.find_matches(seq); 
     368 
     369    Py_END_ALLOW_THREADS 
     370 
     371    return _convert_results_to_tuple(matches); 
    367372  } catch (motility::exception& exc) { 
    368373 
     
    407412    motif.match_minimum(threshold); 
    408413 
    409     motility::MotifMatchList matches = motif.find_matches(seq); 
    410     std::vector<motility::MotifMatch*> l = matches.list(); 
    411  
    412     PyObject * t = PyTuple_New(l.size()); 
    413  
    414     for (unsigned int i = 0; i < l.size(); i++) { 
    415       motility::MotifMatch * m = l[i]; 
    416  
    417       int orientation = m->start > m->end ? -1 : 1; 
    418       unsigned int start = orientation < 0 ? m->end : m->start; 
    419       unsigned int end = orientation < 0 ? m->start : m->end; 
    420       motility::DnaSequence match_seq; 
    421        
    422       if (orientation == 1) { match_seq = m->match_seq; } 
    423       else { match_seq = m->match_seq.reverse_complement(); } 
    424  
    425       PyObject * u = Py_BuildValue("iiis", start, end, orientation, 
    426                                    match_seq.sequence().c_str()); 
    427       PyTuple_SET_ITEM(t, i, u); 
    428     } 
    429  
    430     return t; 
     414    motility::MotifMatchList matches; 
     415 
     416    Py_BEGIN_ALLOW_THREADS 
     417 
     418    matches = *motif.find_matches(seq); 
     419 
     420    Py_END_ALLOW_THREADS 
     421 
     422    return _convert_results_to_tuple(matches); 
    431423  } catch (motility::exception& exc) { 
    432424 
  • trunk/motility/python/motility/objects.py

    r244 r252  
    4646        """ 
    4747        return _motility.calc_energy(motif, self._m) 
     48    calc_score = calc_energy            # allow generic 'calc_score', blech. 
    4849 
    4950    def min_score(self, use_n=0): 
  • trunk/motility/src/DnaSequence.cc

    r245 r252  
    3636  if (pos != std::string::npos) { 
    3737    char ch = s[pos]; 
    38     char s[100]; 
    39     sprintf(s, "error, invalid character '%c' (%d) in sequence.", 
     38    char err[100]; 
     39    sprintf(err, "error, invalid character '%c' (%d) in sequence.", 
    4040            ch, int(ch)); 
    4141 
    42     throw DnaException(s); 
     42    throw DnaException(err); 
    4343  } 
    4444   
  • trunk/motility/src/EnergyOperator.cc

    r245 r252  
    4747// 
    4848 
    49 MotifMatchList EnergyOperator::find_forward_matches(const DnaSequence& seq) const 
     49MotifMatchList * EnergyOperator::find_forward_matches(const DnaSequence& seq) const 
    5050{ 
    51   MotifMatchList v
     51  MotifMatchList * v = new MotifMatchList()
    5252 
    5353  for (unsigned int i = 0; i <= seq.length() - _length; i++) { 
    5454    double score = calc_score(seq, i); 
    5555    if (score <= _minimum) {    // keep it. 
    56       v.add(new MotifMatch(i, i + _length, DnaSequence(seq, i, i + _length))); 
     56      v->add(new MotifMatch(i, i + _length, DnaSequence(seq, i, i + _length))); 
    5757    } 
    5858  } 
  • trunk/motility/src/EnergyOperator.hh

    r245 r252  
    3333    } 
    3434 
    35     virtual MotifMatchList find_forward_matches(const DnaSequence& seq) const; 
     35    virtual MotifMatchList * find_forward_matches(const DnaSequence& seq) const; 
    3636     
    3737    // normalize so that the consensus sequence has a weight of 0.0. 
  • trunk/motility/src/IupacMotif.hh

    r245 r252  
    3333      return (unsigned int) (max_score() - match_threshold()); 
    3434    } 
    35  
    36     virtual MotifMatchList find_matches(const DnaSequence& seq) const { 
    37       return PwmMotif::find_matches(seq); 
    38     } 
    39     virtual MotifMatchList find_forward_matches(const DnaSequence& seq) const { 
    40       return PwmMotif::find_forward_matches(seq); 
    41     } 
    42     virtual MotifMatchList find_reverse_matches(const DnaSequence& seq) const { 
    43       return PwmMotif::find_reverse_matches(seq); 
    44     } 
    4535     
    4636    const bool is_palindromic() const { return PwmMotif::is_palindromic(); } 
    4737 
     38    virtual MotifMatchList * find_matches(const DnaSequence& seq) const { 
     39      return PwmMotif::find_matches(seq); 
     40    } 
     41    virtual MotifMatchList * find_forward_matches(const DnaSequence& seq) const { 
     42      return PwmMotif::find_forward_matches(seq); 
     43    } 
     44    virtual MotifMatchList * find_reverse_matches(const DnaSequence& seq) const { 
     45      return PwmMotif::find_reverse_matches(seq); 
     46    } 
    4847  }; 
    4948} 
  • trunk/motility/src/LiteralMotif.cc

    r245 r252  
    2525// 
    2626 
    27 MotifMatchList LiteralMotif::find_matches(const DnaSequence& seq) const 
     27MotifMatchList * LiteralMotif::find_matches(const DnaSequence& seq) const 
    2828{ 
    29   MotifMatchList v = find_forward_matches(seq); 
     29  MotifMatchList * v = find_forward_matches(seq); 
    3030 
    3131  DnaSequence m(_motif); 
    3232  if (!m.is_palindrome()) { 
    33     MotifMatchList r = find_reverse_matches(seq); 
    34     std::vector<MotifMatch *> l = r.list(); 
     33    MotifMatchList * r = find_reverse_matches(seq); 
     34    std::vector<MotifMatch *> l = r->list(); 
    3535    for (unsigned int i = 0; i < l.size(); i++) { 
    3636      MotifMatch * m = l[i]; 
    37       v.add(new MotifMatch(m->start, m->end, m->match_seq)); 
     37      v->add(new MotifMatch(m->start, m->end, m->match_seq)); 
    3838    } 
    3939  } 
     
    4646// 
    4747 
    48 MotifMatchList LiteralMotif::find_forward_matches(const DnaSequence& seq) const 
     48MotifMatchList * LiteralMotif::find_forward_matches(const DnaSequence& seq) const 
    4949{ 
    50   MotifMatchList v
     50  MotifMatchList * v = new MotifMatchList()
    5151  const unsigned int length = _motif.length(); 
    5252 
     
    5757  while (pos != std::string::npos) { 
    5858     
    59     v.add(new MotifMatch(pos, pos + length, dna.substr(pos, length))); 
     59    v->add(new MotifMatch(pos, pos + length, dna.substr(pos, length))); 
    6060                            
    6161    pos = dna.find(_motif, pos + 1); 
     
    6969// 
    7070 
    71 MotifMatchList LiteralMotif::find_reverse_matches(const DnaSequence& seq) 
     71MotifMatchList * LiteralMotif::find_reverse_matches(const DnaSequence& seq) 
    7272  const 
    7373{ 
    7474  DnaSequence rc = seq.reverse_complement(); 
    75   MotifMatchList v = find_forward_matches(rc); 
     75  MotifMatchList * v = find_forward_matches(rc); 
    7676 
    77   std::vector<MotifMatch*> l = v.list(); 
     77  std::vector<MotifMatch*> l = v->list(); 
    7878  unsigned int length = seq.length(); 
    7979 
    80   MotifMatchList r
     80  MotifMatchList * r = new MotifMatchList()
    8181  for (unsigned int i = 0; i < l.size(); i++) { 
    8282    const MotifMatch * m = l[i]; 
     
    8787    // reverse match & sequence, then add.  kinda clumsy ;(. 
    8888 
    89     r.add(new MotifMatch(new_start, new_end, 
     89    r->add(new MotifMatch(new_start, new_end, 
    9090                         m->match_seq.reverse_complement())); 
    9191  } 
    9292 
     93  delete v; 
     94 
    9395  return r; 
    9496} 
  • trunk/motility/src/LiteralMotif.hh

    r245 r252  
    3838 
    3939    // find all matches, or just forward / just reverse. 
    40     MotifMatchList find_matches(const DnaSequence& seq) const; 
    41     MotifMatchList find_forward_matches(const DnaSequence& seq) const; 
    42     MotifMatchList find_reverse_matches(const DnaSequence& seq) const; 
     40    MotifMatchList * find_matches(const DnaSequence& seq) const; 
     41    MotifMatchList * find_forward_matches(const DnaSequence& seq) const; 
     42    MotifMatchList * find_reverse_matches(const DnaSequence& seq) const; 
    4343  }; 
    4444} 
  • trunk/motility/src/Motif.hh

    r245 r252  
    2828    virtual ~Motif() { }; 
    2929  public: 
    30     virtual MotifMatchList find_matches(const DnaSequence& seq) const = 0; 
    31     virtual MotifMatchList find_forward_matches(const DnaSequence& seq) const 
     30    virtual MotifMatchList * find_matches(const DnaSequence& seq) const = 0; 
     31    virtual MotifMatchList * find_forward_matches(const DnaSequence& seq) const 
    3232      = 0; 
    33     virtual MotifMatchList find_reverse_matches(const DnaSequence& seq) const 
     33    virtual MotifMatchList * find_reverse_matches(const DnaSequence& seq) const 
    3434      = 0; 
    3535  }; 
  • trunk/motility/src/MotifMatch.hh

    r245 r252  
    1919 
    2020#include "DnaSequence.hh" 
     21#include <assert.h> 
    2122#include <vector> 
    2223 
     
    3839  public: 
    3940    MotifMatchList() { }; 
    40     MotifMatchList(std::vector<MotifMatch*> n) { l = n; } 
     41    MotifMatchList(std::vector<MotifMatch*> n) { l = n; } // inefficient 
    4142 
    4243    // make a copy of this list 
  • trunk/motility/src/PwmMotif.cc

    r245 r252  
    1919using namespace motility; 
    2020 
    21 MotifMatchList PwmMotif::find_forward_matches(const DnaSequence& seq) const 
     21MotifMatchList * PwmMotif::find_forward_matches(const DnaSequence& seq) const 
    2222{ 
    23   MotifMatchList v
     23  MotifMatchList * v = new MotifMatchList()
    2424 
    2525  for (unsigned int i = 0; i <= seq.length() - _length; i++) { 
    2626    double score = calc_score(seq, i); 
    2727    if (score >= _threshold) {  // keep it. 
    28       v.add(new MotifMatch(i, i + _length, DnaSequence(seq, i, i + _length))); 
     28      v->add(new MotifMatch(i, i + _length, DnaSequence(seq, i, i + _length))); 
    2929    } 
    3030  } 
  • trunk/motility/src/PwmMotif.hh

    r245 r252  
    3434    } 
    3535 
    36     virtual MotifMatchList find_forward_matches(const DnaSequence& seq) const; 
     36    virtual MotifMatchList * find_forward_matches(const DnaSequence& seq) const; 
    3737 
    3838    double match_threshold() const { return _threshold; } 
  • trunk/motility/src/_MatrixMotif.cc

    r245 r252  
    6262} 
    6363 
    64 MotifMatchList _MatrixMotif::find_matches(const DnaSequence& seq) const 
    65 { 
    66   MotifMatchList v = find_forward_matches(seq); 
     64MotifMatchList * _MatrixMotif::find_matches(const DnaSequence& seq) const 
     65{ 
     66  MotifMatchList * v = find_forward_matches(seq); 
    6767 
    6868  if (!is_palindromic()) { 
    69     MotifMatchList r = find_reverse_matches(seq); 
    70  
    71     std::vector<MotifMatch *> l = r.list(); 
     69    MotifMatchList * r = find_reverse_matches(seq); 
     70 
     71    std::vector<MotifMatch *> l = r->list(); 
    7272    for (unsigned int i = 0; i < l.size(); i++) { 
    7373      MotifMatch * m = l[i]; 
    74       v.add(new MotifMatch(m->start, m->end, m->match_seq)); 
     74      v->add(new MotifMatch(m->start, m->end, m->match_seq)); 
    7575    } 
     76    delete r; 
    7677  } 
    7778 
     
    7980} 
    8081 
    81 MotifMatchList _MatrixMotif::find_reverse_matches(const DnaSequence& seq) const 
     82MotifMatchList * _MatrixMotif::find_reverse_matches(const DnaSequence& seq) const 
    8283{ 
    8384  DnaSequence rc = seq.reverse_complement(); 
    84   MotifMatchList v = find_forward_matches(rc); 
    85  
    86   std::vector<MotifMatch*> l = v.list(); 
     85  MotifMatchList * v = find_forward_matches(rc); 
     86 
     87  std::vector<MotifMatch*> l = v->list(); 
    8788  unsigned int length = seq.length(); 
    8889 
    89   MotifMatchList r
     90  MotifMatchList * r = new MotifMatchList()
    9091  for (unsigned int i = 0; i < l.size(); i++) { 
    9192    const MotifMatch * m = l[i]; 
     
    9697    // reverse match & sequence, then add.  kinda clumsy ;(. 
    9798 
    98     r.add(new MotifMatch(new_start, new_end, 
     99    r->add(new MotifMatch(new_start, new_end, 
    99100                         m->match_seq.reverse_complement())); 
    100101  } 
     102 
     103  delete v; 
    101104 
    102105  return r; 
  • trunk/motility/src/_MatrixMotif.hh

    r245 r252  
    131131    // find_forward_matches. 
    132132     
    133     MotifMatchList find_matches(const DnaSequence& seq) const; 
    134     virtual MotifMatchList find_forward_matches(const DnaSequence& seq) const 
     133    MotifMatchList * find_matches(const DnaSequence& seq) const; 
     134    virtual MotifMatchList * find_forward_matches(const DnaSequence& seq) const 
    135135      = 0; 
    136     MotifMatchList find_reverse_matches(const DnaSequence& seq) const; 
     136    MotifMatchList * find_reverse_matches(const DnaSequence& seq) const; 
    137137 
    138138    // Calculate the min/max score. 
  • trunk/motility/tests/CMakeLists.txt

    r240 r252  
    33LINK_LIBRARIES(motility) 
    44 
    5 ADD_EXECUTABLE(test-sequence test-sequence.cc) 
    6 ADD_TEST(sequence test-sequence) 
     5#ADD_EXECUTABLE(test-sequence test-sequence.cc) 
     6#ADD_TEST(sequence test-sequence) 
    77 
    8 ADD_EXECUTABLE(test-find-literal test-find-literal.cc) 
    9 ADD_TEST(find-literal test-find-literal) 
     8#ADD_EXECUTABLE(test-find-literal test-find-literal.cc) 
     9#ADD_TEST(find-literal test-find-literal) 
    1010 
    11 ADD_EXECUTABLE(test-find-pwm test-find-pwm.cc) 
    12 ADD_TEST(find-pwm test-find-pwm) 
     11#ADD_EXECUTABLE(test-find-pwm test-find-pwm.cc) 
     12#ADD_TEST(find-pwm test-find-pwm) 
    1313 
    14 ADD_EXECUTABLE(test-find-iupac test-find-iupac.cc) 
    15 ADD_TEST(find-iupac test-find-iupac) 
     14#ADD_EXECUTABLE(test-find-iupac test-find-iupac.cc) 
     15#ADD_TEST(find-iupac test-find-iupac) 
    1616 
    17 ADD_EXECUTABLE(test-find-op test-find-op.cc) 
    18 ADD_TEST(find-op test-find-op) 
     17#ADD_EXECUTABLE(test-find-op test-find-op.cc) 
     18#ADD_TEST(find-op test-find-op) 
  • trunk/motility/tests/test-python-interface.py

    r214 r252  
    2929 
    3030    operator = motility.EnergyOperator(matrix) 
     31     
     32def test_4(): 
     33    """ 
     34    Test misc coord handling / match str extraction 
     35    """ 
     36    motif = 'ACGG' 
     37     
     38    pwm = motility.make_PWM([motif]) 
     39    pwm_match = pwm.find(motif, 4) 
     40    iupac_match = motility.find_iupac(motif, motif) 
     41    exact_match = motility.find_exact(motif, motif) 
     42 
     43    assert pwm_match == iupac_match 
     44    assert pwm_match == exact_match 
     45 
     46    rcmotif = 'CCGT' 
     47 
     48    pwm_match = pwm.find(rcmotif, 4) 
     49    iupac_match = motility.find_iupac(rcmotif, motif) 
     50    exact_match = motility.find_exact(rcmotif, motif) 
     51 
     52    assert pwm_match == iupac_match 
     53 
     54def test_5(): 
     55    """ 
     56    Test calc_score & calc_energy equivalence 
     57    """ 
     58    motif = 'ACGG' 
     59     
     60    pwm = motility.make_PWM([motif]) 
     61    operator = motility.make_operator([motif]) 
     62 
     63    print pwm.calc_score(motif) 
     64 
     65    print operator.calc_score(motif) 
     66    print operator.calc_energy(motif) 
     67    assert operator.calc_score(motif) == operator.calc_energy(motif)