ProteoWizard
ProteomeDataFileTest.cpp
Go to the documentation of this file.
1 //
2 // $Id$
3 //
4 //
5 // Original author: Darren Kessner <darren@proteowizard.org>
6 //
7 // Copyright 2007 Spielberg Family Center for Applied Proteomics
8 // Cedars-Sinai Medical Center, Los Angeles, California 90048
9 //
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 //
14 // http://www.apache.org/licenses/LICENSE-2.0
15 //
16 // Unless required by applicable law or agreed to in writing, software
17 // distributed under the License is distributed on an "AS IS" BASIS,
18 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 // See the License for the specific language governing permissions and
20 // limitations under the License.
21 //
22 
23 
24 #include "ProteomeDataFile.hpp"
25 #include "Diff.hpp"
26 #include "examples.hpp"
27 #include "Reader_FASTA.hpp"
31 #include <boost/iostreams/filtering_stream.hpp>
32 #include <boost/iostreams/filter/gzip.hpp>
33 #include <boost/iostreams/device/file_descriptor.hpp>
34 #include <boost/iostreams/copy.hpp>
35 
36 
37 using namespace pwiz::util;
38 using namespace pwiz::proteome;
39 using namespace pwiz::data;
40 using boost::shared_ptr;
41 
42 
43 ostream* os_ = 0;
44 
45 
46 string filenameBase_ = "temp.ProteomeDataFileTest";
47 
48 
50  const DiffConfig diffConfig)
51 {
52  if (os_) *os_ << "validateReadIndexed()\n" << endl;
53 
54  string filename1 = filenameBase_ + "1.fasta";
55 
56  // create ProteomeData object in memory
57  ProteomeData tiny;
59 
60  // write to file #1 (static)
61  ProteomeDataFile::write(tiny, filename1, writeConfig);
62 
63  {
64  unit_assert(!bfs::exists(filename1 + ".index"));
65 
66  Reader_FASTA::Config config;
67  config.indexed = true;
68  Reader_FASTA reader(config);
69 
70  // read back into an ProteomeDataFile object
71  ProteomeDataFile pd1(filename1, reader);
72 
73  unit_assert(bfs::exists(filename1));
74  unit_assert(bfs::exists(filename1 + ".index"));
75 
76  // compare
77  Diff<ProteomeData, DiffConfig> diff(tiny, pd1, diffConfig);
78  if (diff && os_) *os_ << diff << endl;
79  unit_assert(!diff);
80 
81  // read back into an ProteomeDataFile object, this time should be indexed
82  ProteomeDataFile pd2(filename1, reader);
83 
84  // compare
85  diff(tiny, pd2);
86  if (diff && os_) *os_ << diff << endl;
87  unit_assert(!diff);
88 
89  // now give the gzip read a workout
90  /*bio::filtering_istream tinyGZ(bio::gzip_compressor() | bio::file_descriptor_source(filename1));
91  bio::copy(tinyGZ, bio::file_descriptor_sink(filename1 + ".gz", ios::out | ios::binary));
92 
93  ProteomeDataFile pd3(filename1 + ".gz", reader);
94 
95  // compare
96  diff(tiny, pd3);
97  if (diff && os_) *os_ << diff << endl;
98  unit_assert(!diff);*/
99  }
100 }
101 
103  const DiffConfig diffConfig)
104 {
105  if (os_) *os_ << "validateWriteRead()\n " << writeConfig << endl;
106 
107  string filename1 = filenameBase_ + "1.fasta";
108  string filename2 = filenameBase_ + "2.fasta";
109 
110  {
111  // create ProteomeData object in memory
112  ProteomeData tiny;
114 
115  // write to file #1 (static)
116  ProteomeDataFile::write(tiny, filename1, writeConfig);
117 
118  shared_ptr<Reader> reader;
119  if (writeConfig.format == ProteomeDataFile::Format_FASTA)
120  {
121  // Reader_FASTA creates the index in the read() call
122  Reader_FASTA::Config config;
123  config.indexed = writeConfig.indexed;
124  reader.reset(new Reader_FASTA(config));
125  }
126 
127  // read back into an ProteomeDataFile object
128  ProteomeDataFile pd1(filename1, *reader);
129 
130  // compare
131  Diff<ProteomeData, DiffConfig> diff(tiny, pd1, diffConfig);
132  if (diff && os_) *os_ << diff << endl;
133  unit_assert(!diff);
134 
135  // write to file #2 (member)
136  pd1.write(filename2, writeConfig);
137 
138  // read back into another ProteomeDataFile object
139  ProteomeDataFile pd2(filename2, *reader);
140 
141  // compare
142  diff(tiny, pd2);
143  if (diff && os_) *os_ << diff << endl;
144  unit_assert(!diff);
145 
146  // now give the gzip read a workout
147  bio::filtering_istream tinyGZ(bio::gzip_compressor() | bio::file_descriptor_source(filename1));
148  bio::copy(tinyGZ, bio::file_descriptor_sink(filename1+".gz", ios::out|ios::binary));
149 
150  ProteomeDataFile pd3(filename1+".gz", *reader);
151 
152  // compare
153  diff(tiny, pd3);
154  if (diff && os_) *os_ << diff << endl;
155  unit_assert(!diff);
156  }
157 
158  // remove temp files
159  bfs::remove(filename1);
160  bfs::remove(filename2);
161  bfs::remove(filename1 + ".gz");
162 
163  bool index1Exists = bfs::exists(filename1 + ".index");
164  bool index2Exists = bfs::exists(filename2 + ".index");
165  bool index3Exists = bfs::exists(filename1 + ".gz.index");
166 
167  bool indexShouldExist = writeConfig.indexed;
168  unit_assert(!indexShouldExist || index1Exists);
169  unit_assert(!indexShouldExist || index2Exists);
170  unit_assert(!indexShouldExist || index3Exists);
171 
172  if (index1Exists) bfs::remove(filename1 + ".index");
173  if (index2Exists) bfs::remove(filename2 + ".index");
174  if (index3Exists) bfs::remove(filename1 + ".gz.index");
175 }
176 
177 void test()
178 {
179  ProteomeDataFile::WriteConfig writeConfig;
180  DiffConfig diffConfig;
181 
182  // test FASTA with binary stream index
183  validateWriteRead(writeConfig, diffConfig);
184 
185  // test FASTA with pre-existing indexes
186  validateReadIndexed(writeConfig, diffConfig);
187 
188  // test FASTA with memory index
189  writeConfig.indexed = false;
190  validateWriteRead(writeConfig, diffConfig);
191 }
192 
193 
194 class TestReader : public Reader
195 {
196  public:
197 
198  TestReader() : count(0) {}
199 
200  virtual std::string identify(const std::string& uri, shared_ptr<istream> uriStreamPtr) const
201  {
202  ++count;
203 
204  if (!bal::iends_with(uri, ".fasta"))
205  return "";
206 
207  string buf;
208  getline(*uriStreamPtr, buf);
209  if (buf[0] != '>')
210  return "";
211 
212  return getType();
213  }
214 
215  virtual void read(const std::string& uri,
216  shared_ptr<istream> uriStreamPtr,
217  ProteomeData& pd) const
218  {
219  ++count;
220  }
221 
222  const char *getType() const {return "testReader";} // satisfy inheritance
223 
224  mutable int count;
225 };
226 
227 
229 {
230  // create a file
231  string filename = filenameBase_ + ".fAsTa";
232  ofstream os(filename.c_str());
233  os << ">Id Description\nSEQUENCE\n";
234  os.close();
235 
236  // open the file with our Reader
237  TestReader reader;
238  ProteomeDataFile pd(filename, reader);
239 
240  // verify that our reader got called properly
241  unit_assert(reader.count == 2);
242 
243  // remove temp file
244  boost::filesystem::remove(filename);
245 
246  if (os_) *os_ << endl;
247 }
248 
249 
250 int main(int argc, char* argv[])
251 {
252  TEST_PROLOG(argc, argv)
253 
254  try
255  {
256  if (argc>1 && !strcmp(argv[1],"-v")) os_ = &cout;
257  test();
258  testReader();
259  }
260  catch (exception& e)
261  {
262  TEST_FAILED(e.what())
263  }
264  catch (...)
265  {
266  TEST_FAILED("Caught unknown exception.")
267  }
268 
270 }
271 
pwiz::proteome::ProteomeDataFile::WriteConfig::indexed
bool indexed
Definition: ProteomeDataFile.hpp:55
test
void test()
Definition: ProteomeDataFileTest.cpp:177
pwiz::proteome::Reader_FASTA
FASTA -> ProteomeData stream serialization.
Definition: Reader_FASTA.hpp:39
pwiz::proteome::ProteomeDataFile::write
static void write(const ProteomeData &pd, const std::string &uri, const WriteConfig &config=WriteConfig(), const pwiz::util::IterationListenerRegistry *iterationListenerRegistry=0)
static write function for any ProteomeData object; iterationListenerRegistry may be used for progress...
pwiz::proteome::ProteomeDataFile::WriteConfig
configuration for write()
Definition: ProteomeDataFile.hpp:53
os_
ostream * os_
Definition: ProteomeDataFileTest.cpp:43
pwiz::proteome::DiffConfig
configuration struct for diffs
Definition: Diff.hpp:73
pwiz::data
Definition: BinaryIndexStream.hpp:31
pwiz::proteome
Definition: AminoAcid.hpp:35
pwiz::identdata::IO::write
PWIZ_API_DECL void write(minimxml::XMLWriter &writer, const CV &cv)
ProteomeDataFile.hpp
filename1
string filename1
Definition: FrequencyDataTest.cpp:50
pwiz::proteome::Reader
interface for file readers
Definition: Reader.hpp:37
pwiz::data::Diff
Calculate diffs of objects in a ProteoWizard data model hierarchy.
Definition: diff_std.hpp:143
pwiz::util
Definition: almost_equal.hpp:33
TEST_EPILOG
#define TEST_EPILOG
Definition: unit.hpp:183
Format_FASTA
Format_FASTA
Definition: ProteomeDataFile.hpp:49
TestReader::TestReader
TestReader()
Definition: ProteomeDataFileTest.cpp:198
Reader_FASTA.hpp
TestReader
Definition: MSDataFileTest.cpp:287
pwiz::proteome::ProteomeData
Definition: ProteomeData.hpp:89
examples.hpp
main
int main(int argc, char *argv[])
Definition: ProteomeDataFileTest.cpp:250
Std.hpp
diff
void diff(const string &filename1, const string &filename2)
Definition: FrequencyDataTest.cpp:40
pwiz::identdata::examples::initializeTiny
PWIZ_API_DECL void initializeTiny(IdentData &mzid)
Filesystem.hpp
pwiz::proteome::ProteomeDataFile
ProteomeData object plus file I/O.
Definition: ProteomeDataFile.hpp:40
TestReader::count
int count
Definition: MSDataFileTest.cpp:322
TEST_FAILED
#define TEST_FAILED(x)
Definition: unit.hpp:177
TEST_PROLOG
#define TEST_PROLOG(argc, argv)
Definition: unit.hpp:175
TestReader::read
virtual void read(const std::string &uri, shared_ptr< istream > uriStreamPtr, ProteomeData &pd) const
Definition: ProteomeDataFileTest.cpp:215
TestReader::getType
const char * getType() const
Definition: ProteomeDataFileTest.cpp:222
pwiz::proteome::Reader_FASTA::Config::indexed
bool indexed
read with a side-by-side index
Definition: Reader_FASTA.hpp:46
pwiz::proteome::Reader_FASTA::Config
Reader_FASTA configuration.
Definition: Reader_FASTA.hpp:44
Diff.hpp
filenameBase_
string filenameBase_
Definition: ProteomeDataFileTest.cpp:46
validateReadIndexed
void validateReadIndexed(const ProteomeDataFile::WriteConfig &writeConfig, const DiffConfig diffConfig)
Definition: ProteomeDataFileTest.cpp:49
unit.hpp
unit_assert
#define unit_assert(x)
Definition: unit.hpp:85
validateWriteRead
void validateWriteRead(const ProteomeDataFile::WriteConfig &writeConfig, const DiffConfig diffConfig)
Definition: ProteomeDataFileTest.cpp:102
pwiz::proteome::ProteomeDataFile::WriteConfig::format
Format format
Definition: ProteomeDataFile.hpp:54
TestReader::identify
virtual std::string identify(const std::string &uri, shared_ptr< istream > uriStreamPtr) const
Definition: ProteomeDataFileTest.cpp:200
pwiz::util::testReader
PWIZ_API_DECL int testReader(const pwiz::msdata::Reader &reader, const std::vector< std::string > &args, bool testAcceptOnly, bool requireUnicodeSupport, const TestPathPredicate &isPathTestable, const ReaderTestConfig &config=ReaderTestConfig())
A common test harness for vendor readers;.