Newer
Older
#ifndef LIBWCCL_WCCLFILE_H
#define LIBWCCL_WCCLFILE_H
#include <libwccl/values/bool.h>
#include <libwccl/values/match.h>
#include <libwccl/values/position.h>
#include <libwccl/values/strset.h>
#include <libwccl/values/tset.h>
#include <libwccl/wcclfileopsections.h>
#include <libwccl/ops/tagrulesequence.h>
#include <libwccl/ops/matchrulesequence.h>
#include <libwccl/exception.h>
#include <libpwrutils/pathsearch.h>
/**
* Class representing contents of parsed WCCL files
* - imported lexicons, any typed and untyped named
* operator sections, up to one tag rules section,
* and up to one match rules section.
*/
: WcclFileOpSections<UntypedOpSequence>,
WcclFileOpSections<OpSequence<StrSet> >,
WcclFileOpSections<OpSequence<TSet> >,
WcclFileOpSections<OpSequence<Bool> >,
WcclFileOpSections<OpSequence<Position> >,
WcclFileOpSections<OpSequence<Match> >
{
public:
WcclFile(const Corpus2::Tagset& tagset, const std::string& search_path);
// Untyped and typed operator sections: @X:"sectioname" ( op1; op2 )
/////////////////////
/**
* @returns All untyped sections from the WCCL file: @"name" ( anyop0; ...; anyopN )
*/
const std::vector<boost::shared_ptr<UntypedOpSequence> >& untyped_sections();
/**
* @returns All sections of given type T from the WCCL file: @T:"name" ( op<T>0; ...; op<T>N )
*/
const typename std::vector<boost::shared_ptr<OpSequence<T> > >& sections();
/**
* @returns True if the WCCL file contains untyped section of given name.
* False otherwise.
*/
bool has_untyped_section(const std::string& name) const;
/**
* @returns True if the WCCL file contains section of given name and type T.
* False otherwise.
*/
template<class T>
bool has_section(const std::string& name) const;
/**
* @returns Names of all untyped sections in this WCCL file, in the order they
* were found in the file.
*/
std::vector<std::string> untyped_section_names() const;
/**
* @returns Names of all sections of given type T from this WCCL file, in the
* order they were found in the file.
*/
template<class T>
std::vector<std::string> section_names() const;
/**
* @returns Untyped section of given name.
* @throws InvalidArgument if there is no untyped section of given name.
*/
UntypedOpSequence& get_untyped_section(const std::string& name);
/**
* @returns Untyped section of given name (const).
* @throws InvalidArgument if there is no untyped section of given name.
*/
const UntypedOpSequence& get_untyped_section(const std::string& name) const;
/**
* @returns Section of given name and type T.
* @throws InvalidArgument if there is no section of given name and type T.
*/
template<class T>
OpSequence<T>& get_section(const std::string& name);
/**
* @returns Section of given name and type T (const).
* @throws InvalidArgument if there is no section of given name and type T.
*/
template<class T>
const OpSequence<T>& get_section(const std::string& name) const;
/**
* @returns Shared pointer to an untyped section of the given name.
* @throws InvalidArgument if there is no untyped section of given name.
*/
boost::shared_ptr<UntypedOpSequence> get_untyped_section_ptr(const std::string& name);
/**
* @returns Shared pointer to an untyped section of the given name (const).
* @throws InvalidArgument if there is no untyped section of given name.
*/
boost::shared_ptr<const UntypedOpSequence> get_untyped_section_ptr(const std::string& name) const;
/**
* @returns Shared pointer to a section of the given name and type T.
* @throws InvalidArgument if there is no section of given name and type T.
*/
template<class T>
boost::shared_ptr<OpSequence<T> > get_section_ptr(const std::string& name);
/**
* @returns Shared pointer to a section of the given name and type T (const).
* @throws InvalidArgument if there is no section of given name and type T.
*/
template<class T>
boost::shared_ptr<const OpSequence<T> > get_section_ptr(const std::string& name) const;
/**
* @returns Operator from an untyped section of given name, present at given index.
* @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence).
* @throws InvalidArgument if there is no untyped section of given name or if idx is out of range.
*/
FunctionalOperator& get_untyped_op(const std::string& name, size_t idx = 0);
/**
* @returns Operator from an untyped section of given name, present at given index (const).
* @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence).
* @throws InvalidArgument if there is no untyped section of given name or if idx is out of range.
*/
const FunctionalOperator& get_untyped_op(const std::string& name, size_t idx = 0) const;
/**
* @returns Operator from a section of given name and type T, present at given index.
* @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence).
* @throws InvalidArgument if there is no section of given name and type T or if idx is out of range.
*/
template<class T>
Operator<T>& get_op(const std::string& name, size_t idx = 0);
/**
* @returns Operator from a section of given name and type T, present at given index (const).
* @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence).
* @throws InvalidArgument if there is no section of given name and type T or if idx is out of range.
*/
template<class T>
const Operator<T>& get_op(const std::string& name, size_t idx = 0) const;
/**
* @returns Shared pointer to an operator from an untyped section of given name, present at given index.
* @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence).
* @throws InvalidArgument if there is no untyped section of given name or if idx is out of range.
*/
boost::shared_ptr<FunctionalOperator> get_untyped_op_ptr(const std::string& name, size_t idx = 0);
/**
* @returns Shared pointer to an operator from an untyped section of given name, present at given index (const).
* @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence).
* @throws InvalidArgument if there is no untyped section of given name or if idx is out of range.
*/
boost::shared_ptr<const FunctionalOperator> get_untyped_op_ptr(const std::string& name, size_t idx = 0) const;
/**
* @returns Shared pointer to an operator from a section of given name and type T, present at given index.
* @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence).
* @throws InvalidArgument if there is no section of given name and type T, or if idx is out of range.
*/
template<class T>
boost::shared_ptr<Operator<T> > get_op_ptr(const std::string& name, size_t idx = 0);
/**
* @returns Shared pointer to an operator from a section of given name and type T, present at given index (const).
* @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence).
* @throws InvalidArgument if there is no section of given name and type T, or if idx is out of range.
*/
template<class T>
boost::shared_ptr<const Operator<T> > get_op_ptr(const std::string& name, size_t idx = 0) const;
/**
* @returns Vector of all name-operator pairs from untyped sections, in the order they were found in the file.
* @note Generated names of operators are derived from section name they are contained within,
* suffixed with hyphen and zero-based index. E.g. the first operator in section named "seq"
* will have name "seq-0", second operator's name will be "seq-1" etc.
*/
UntypedOpSequence::name_op_v_t gen_name_untyped_op_pairs();
/**
* @returns Vector of all name-operator pairs from untyped sections, in the order they were found in the file (const).
* @note Generated names of operators are derived from section name they are contained within,
* suffixed with hyphen and zero-based index. E.g. the first operator in section named "seq"
* will have name "seq-0", second operator's name will be "seq-1" etc.
*/
UntypedOpSequence::name_op_v_c_t gen_name_untyped_op_pairs() const;
/**
* @returns Vector of all name-operator pairs from sections of given type T, in the order they were found in the file.
* @note Generated names of operators are derived from section name they are contained within,
* suffixed with hyphen and zero-based index. E.g. the first operator in section named "seq"
* will have name "seq-0", second operator's name will be "seq-1" etc.
*/
template<class T>
typename OpSequence<T>::name_op_v_t gen_name_op_pairs();
/**
* @returns Vector of all name-operator pairs from sections of given type T, in the order they were found in the file (const).
* @note Generated names of operators are derived from section name they are contained within,
* suffixed with hyphen and zero-based index. E.g. the first operator in section named "seq"
* will have name "seq-0", second operator's name will be "seq-1" etc.
*/
template<class T>
typename OpSequence<T>::name_op_v_c_t gen_name_op_pairs() const;
/**
* @returns Vector of all name-operator pairs from all sections of the file (both typed and untyped),
* in the order they were found in the file.
* @note Generated names of operators are derived from section name they are contained within,
* suffixed with hyphen and zero-based index. E.g. the first operator in section named "seq"
* will have name "seq-0", second operator's name will be "seq-1" etc.
*/
FunctionalOpSequence::name_op_v_t gen_all_op_pairs();
/**
* @returns Vector of all name-operator pairs from all sections of the file (both typed and untyped),
* in the order they were found in the file (const).
* @note Generated names of operators are derived from section name they are contained within,
* suffixed with hyphen and zero-based index. E.g. the first operator in section named "seq"
* will have name "seq-0", second operator's name will be "seq-1" etc.
*/
FunctionalOpSequence::name_op_v_c_t gen_all_op_pairs() const;
// Lexicons, import sections: import("path", "name")
///////////////////////////
/**
* @returns True if there is an imported Lexicon of given name. False otherwise.
*/
bool has_lexicon(const std::string& name) const;
/**
* @returns True if there are any Lexicons imported.
*/
/**
* @returns Shared pointer to a Lexicon of given name (const).
* @throws InvalidArgument if there is no Lexicon of given name imported.
*/
boost::shared_ptr<const Lexicon> get_lexicon_ptr(const std::string& name) const;
/**
* @returns Lexicon of given name (const).
* @throws InvalidArgument if there is no Lexicon of given name imported.
*/
const Lexicon& get_lexicon(const std::string& name) const;
/**
* @returns Shared pointer to collection of all imported Lexicons (const).
*/
boost::shared_ptr<const Lexicons> get_lexicons_ptr() const;
/**
* @returns Collection of all imported Lexicons (const).
*/
const Lexicons& get_lexicons() const;
// Tag rules section: tag_rules ( rule1; rule2 )
/**
* @returns True if this WcclFile has tag_rules section. False otherwise.
*/
bool has_tag_rules() const;
/**
* @returns The tag rules section from this WCCL file.
* @throws WcclError if there is no tag rules section.
*/
const TagRuleSequence& get_tag_rules() const;
/**
* @returns The tag rules section from this WCCL file.
* @throws WcclError if there is no tag rules section.
*/
boost::shared_ptr<TagRuleSequence> get_tag_rules_ptr();
/**
* @returns The tag rules section from this WCCL file (const).
* @throws WcclError if there is no tag rules section.
*/
boost::shared_ptr<const TagRuleSequence> get_tag_rules_ptr() const;
// Match rules section: match_rules ( apply1; apply2 )
///////////////////////////
/**
* @returns True if this WcclFile has match_rules section. False otherwise.
*/
/**
* @returns The match rules section from this WCCL file.
* @throws WcclError if there is no match_rules section.
*/
const MatchRuleSequence& get_match_rules() const;
/**
* @returns The match rules section from this WCCL file.
* @throws WcclError if there is no match_rules section.
*/
boost::shared_ptr<MatchRuleSequence> get_match_rules_ptr();
/**
* @returns The match rules section from this WCCL file (const).
* @throws WcclError if there is no match_rules section.
*/
boost::shared_ptr<const MatchRuleSequence> get_match_rules_ptr() const;
friend std::ostream& operator<<(std::ostream& ostream, const WcclFile& wccl_file);
std::string to_string() const;
const Corpus2::Tagset& tagset() const;
const PwrNlp::PathSearcher<Wccl::FileNotFound> path() const { return path_; }
PwrNlp::PathSearcher<Wccl::FileNotFound> path() { return path_; }
///////////////////////////
// Internal parser-related methods to add sections.
///////////////////////////
void add_untyped_section(const boost::shared_ptr<UntypedOpSequence>& section);
void add_untyped_section(const boost::shared_ptr<const UntypedOpSequence>& section);
void add_untyped_section(const UntypedOpSequence& section);
template<class T>
void add_section(const boost::shared_ptr<OpSequence<T> >& section);
template<class T>
void add_section(const boost::shared_ptr<const OpSequence<T> >& section);
template<class T>
void add_section(const OpSequence<T>& section);
void import_lexicon(const boost::shared_ptr<Lexicon>& lexicon);
void set_tag_rules(const boost::shared_ptr<TagRuleSequence>& tag_rules);
void set_match_rules(const boost::shared_ptr<MatchRuleSequence>& match_rules);
private:
std::ostream& write_to(std::ostream& ostream) const;
std::vector<boost::shared_ptr<FunctionalOpSequence> > all_sections_;
boost::shared_ptr<TagRuleSequence> tag_rules_;
boost::shared_ptr<MatchRuleSequence> match_rules_;
const Corpus2::Tagset& tagset_;
PwrNlp::PathSearcher<Wccl::FileNotFound> path_;
};
} /* end ns Wccl */
//
// Implementation
//
namespace Wccl {
inline
WcclFile::WcclFile(const Corpus2::Tagset& tagset, const std::string& search_path)
: lexicons_(boost::make_shared<Lexicons>()), tagset_(tagset),
path_(":")
path_.set_search_path(search_path);
path_.set_verbose(true);
}
inline
const std::vector<boost::shared_ptr<UntypedOpSequence> >& WcclFile::untyped_sections()
{
return WcclFileOpSections<UntypedOpSequence>::sections();
}
template<class T> inline
const typename std::vector<boost::shared_ptr<OpSequence<T> > >& WcclFile::sections()
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
{
return WcclFileOpSections<OpSequence<T> >::sections();
}
inline
bool WcclFile::has_untyped_section(const std::string& name) const
{
return WcclFileOpSections<UntypedOpSequence>::has_section(name);
}
template<class T> inline
bool WcclFile::has_section(const std::string &name) const
{
return WcclFileOpSections<OpSequence<T> >::has_section(name);
}
inline
std::vector<std::string> WcclFile::untyped_section_names() const
{
return WcclFileOpSections<UntypedOpSequence>::section_names();
}
template<class T> inline
std::vector<std::string> WcclFile::section_names() const
{
return WcclFileOpSections<OpSequence<T> >::section_names();
}
inline
UntypedOpSequence& WcclFile::get_untyped_section(const std::string& name)
{
return WcclFileOpSections<UntypedOpSequence>::get_section(name);
}
inline
const UntypedOpSequence& WcclFile::get_untyped_section(const std::string& name) const
{
return WcclFileOpSections<UntypedOpSequence>::get_section(name);
}
template<class T> inline
OpSequence<T>& WcclFile::get_section(const std::string& name)
{
return WcclFileOpSections<OpSequence<T> >::get_section(name);
}
template<class T> inline
const OpSequence<T>& WcclFile::get_section(const std::string& name) const
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
{
return WcclFileOpSections<OpSequence<T> >::get_section(name);
}
inline
boost::shared_ptr<UntypedOpSequence> WcclFile::get_untyped_section_ptr(const std::string& name)
{
return WcclFileOpSections<UntypedOpSequence>::get_section_ptr(name);
}
inline
boost::shared_ptr<const UntypedOpSequence> WcclFile::get_untyped_section_ptr(const std::string& name) const
{
return WcclFileOpSections<UntypedOpSequence>::get_section_ptr(name);
}
template<class T> inline
typename boost::shared_ptr<OpSequence<T> > WcclFile::get_section_ptr(const std::string& name)
{
return WcclFileOpSections<OpSequence<T> >::get_section_ptr(name);
}
template<class T> inline
typename boost::shared_ptr<const OpSequence<T> > WcclFile::get_section_ptr(const std::string& name) const
{
return WcclFileOpSections<OpSequence<T> >::get_section_ptr(name);
}
inline
FunctionalOperator& WcclFile::get_untyped_op(const std::string &name, size_t idx)
{
return WcclFileOpSections<UntypedOpSequence>::get_op(name, idx);
}
inline
const FunctionalOperator& WcclFile::get_untyped_op(const std::string& name, size_t idx) const
{
return WcclFileOpSections<UntypedOpSequence>::get_op(name, idx);
}
template<class T> inline
Operator<T>& WcclFile::get_op(const std::string& name, size_t idx)
{
return WcclFileOpSections<Operator<T> >::get_op(name, idx);
}
template<class T> inline
const Operator<T>& WcclFile::get_op(const std::string& name, size_t idx) const
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
{
return WcclFileOpSections<Operator<T> >::get_op(name, idx);
}
inline
boost::shared_ptr<FunctionalOperator> WcclFile::get_untyped_op_ptr(
const std::string& name,
size_t idx)
{
return WcclFileOpSections<UntypedOpSequence>::get_op_ptr(name, idx);
}
inline
boost::shared_ptr<const FunctionalOperator> WcclFile::get_untyped_op_ptr(
const std::string& name,
size_t idx) const
{
return WcclFileOpSections<UntypedOpSequence>::get_op_ptr(name, idx);
}
template<class T> inline
boost::shared_ptr<Operator<T> > WcclFile::get_op_ptr(
const std::string& name,
size_t idx)
{
return WcclFileOpSections<OpSequence<T> >::get_op_ptr(name, idx);
}
template<class T> inline
boost::shared_ptr<const Operator<T> > WcclFile::get_op_ptr(
const std::string& name,
size_t idx) const
{
return WcclFileOpSections<OpSequence<T> >::get_op_ptr(name, idx);
}
inline
UntypedOpSequence::name_op_v_t WcclFile::gen_name_untyped_op_pairs()
{
return WcclFileOpSections<UntypedOpSequence>::gen_name_op_pairs();
}
inline
UntypedOpSequence::name_op_v_c_t WcclFile::gen_name_untyped_op_pairs() const
{
return WcclFileOpSections<UntypedOpSequence>::gen_name_op_pairs();
}
template<class T> inline
typename OpSequence<T>::name_op_v_t WcclFile::gen_name_op_pairs()
{
return WcclFileOpSections<OpSequence<T> >::gen_name_op_pairs();
}
template<class T> inline
typename OpSequence<T>::name_op_v_c_t WcclFile::gen_name_op_pairs() const
{
return WcclFileOpSections<OpSequence<T> >::gen_name_op_pairs();
}
inline
void WcclFile::import_lexicon(const boost::shared_ptr<Lexicon>& lexicon)
{
lexicons_->insert(lexicon);
}
inline
bool WcclFile::has_lexicon(const std::string& name) const
{
return lexicons_->has_lexicon(name);
}
inline
bool WcclFile::has_lexicons() const
{
return lexicons_;
}
inline
boost::shared_ptr<const Lexicon> WcclFile::get_lexicon_ptr(const std::string& name) const
{
return lexicons_->get_ptr(name);
}
inline
const Lexicon& WcclFile::get_lexicon(const std::string &name) const
{
return lexicons_->get(name);
}
inline
boost::shared_ptr<const Lexicons> WcclFile::get_lexicons_ptr() const
{
return lexicons_;
}
inline
const Lexicons& WcclFile::get_lexicons() const
{
return *lexicons_;
}
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
inline
void WcclFile::add_untyped_section(const boost::shared_ptr<UntypedOpSequence>& section)
{
WcclFileOpSections<UntypedOpSequence>::append(section);
all_sections_.push_back(section);
}
inline
void WcclFile::add_untyped_section(const boost::shared_ptr<const UntypedOpSequence>& section)
{
boost::shared_ptr<UntypedOpSequence> s = section->clone();
add_untyped_section(s);
}
inline
void WcclFile::add_untyped_section(const UntypedOpSequence& section)
{
boost::shared_ptr<UntypedOpSequence> s = section.clone();
add_untyped_section(s);
}
template<class T> inline
void WcclFile::add_section(const boost::shared_ptr<OpSequence<T> >& section)
{
WcclFileOpSections<OpSequence<T> >::append(section);
all_sections_.push_back(section);
}
template<class T> inline
void WcclFile::add_section(const boost::shared_ptr<const OpSequence<T> >& section)
{
boost::shared_ptr<OpSequence<T> > s = section->clone();
add_section(s);
}
template<class T> inline
void WcclFile::add_section(const OpSequence<T>& section)
{
boost::shared_ptr<OpSequence<T> > s = section.clone();
add_section(s);
}
inline
bool WcclFile::has_tag_rules() const
{
return tag_rules_;
}
inline
const TagRuleSequence& WcclFile::get_tag_rules() const
{
return *get_tag_rules_ptr();
}
inline
void WcclFile::set_tag_rules(const boost::shared_ptr<TagRuleSequence>& tag_rules)
{
if (has_tag_rules()) {
throw WcclError("Tag rules already added.");
}
tag_rules_ = tag_rules;
}
inline
bool WcclFile::has_match_rules() const
{
return match_rules_;
const MatchRuleSequence& WcclFile::get_match_rules() const
return *get_match_rules_ptr();
void WcclFile::set_match_rules(const boost::shared_ptr<MatchRuleSequence>& match_rules)
if (has_match_rules()) {
throw WcclError("Tag rules already added.");
}
match_rules_ = match_rules;
inline
std::ostream& operator <<(std::ostream& ostream, const WcclFile& wccl_file) {
return wccl_file.write_to(ostream);
}
inline
const Corpus2::Tagset& WcclFile::tagset() const {
return tagset_;
}
} /* end ns Wccl */
#endif // LIBWCCL_WCCLFILE_H