Newer
Older
/*
Copyright (C) 2011 Adam Wardyński, Tomasz Śniatowski, Paweł Kędzia,
Adam Radziszewski, Bartosz Broda
Part of the WCCL project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE, COPYING.LESSER and COPYING files for more details.
#ifndef LIBWCCL_WCCLFILE_H
#define LIBWCCL_WCCLFILE_H
#include <libwccl/values/bool.h>
#include <libwccl/values/match.h>
#include <libwccl/values/position.h>
#include <libwccl/values/strset.h>
#include <libwccl/values/tset.h>
#include <libwccl/wcclfileopsections.h>
#include <libwccl/ops/tagrulesequence.h>
#include <libwccl/ops/matchrulesequence.h>
#include <libwccl/exception.h>
#include <libpwrutils/pathsearch.h>
/**
* Class representing contents of parsed WCCL files
* - imported lexicons, any typed and untyped named
* operator sections, up to one tag rules section,
* and up to one match rules section.
*/
: WcclFileOpSections<UntypedOpSequence>,
WcclFileOpSections<OpSequence<StrSet> >,
WcclFileOpSections<OpSequence<TSet> >,
WcclFileOpSections<OpSequence<Bool> >,
WcclFileOpSections<OpSequence<Position> >,
WcclFileOpSections<OpSequence<Match> >
{
public:
WcclFile(const Corpus2::Tagset& tagset, const std::string& search_path);
// Untyped and typed operator sections: @X:"sectioname" ( op1; op2 )
/////////////////////
/**
* @returns All untyped sections from the WCCL file: @"name" ( anyop0; ...; anyopN )
*/
const std::vector<boost::shared_ptr<UntypedOpSequence> >& untyped_sections();
/**
* @returns All sections of given type T from the WCCL file: @T:"name" ( op<T>0; ...; op<T>N )
*/
const typename std::vector<boost::shared_ptr<OpSequence<T> > >& sections();
/**
* @returns True if the WCCL file contains untyped section of given name.
* False otherwise.
*/
bool has_untyped_section(const std::string& name) const;
/**
* @returns True if the WCCL file contains section of given name and type T.
* False otherwise.
*/
template<class T>
bool has_section(const std::string& name) const;
/**
* @returns Names of all untyped sections in this WCCL file, in the order they
* were found in the file.
*/
std::vector<std::string> untyped_section_names() const;
/**
* @returns Names of all sections of given type T from this WCCL file, in the
* order they were found in the file.
*/
template<class T>
std::vector<std::string> section_names() const;
/**
* @returns Untyped section of given name.
* @throws InvalidArgument if there is no untyped section of given name.
*/
UntypedOpSequence& get_untyped_section(const std::string& name);
/**
* @returns Untyped section of given name (const).
* @throws InvalidArgument if there is no untyped section of given name.
*/
const UntypedOpSequence& get_untyped_section(const std::string& name) const;
/**
* @returns Section of given name and type T.
* @throws InvalidArgument if there is no section of given name and type T.
*/
template<class T>
OpSequence<T>& get_section(const std::string& name);
/**
* @returns Section of given name and type T (const).
* @throws InvalidArgument if there is no section of given name and type T.
*/
template<class T>
const OpSequence<T>& get_section(const std::string& name) const;
/**
* @returns Shared pointer to an untyped section of the given name.
* @throws InvalidArgument if there is no untyped section of given name.
*/
boost::shared_ptr<UntypedOpSequence> get_untyped_section_ptr(const std::string& name);
/**
* @returns Shared pointer to an untyped section of the given name (const).
* @throws InvalidArgument if there is no untyped section of given name.
*/
boost::shared_ptr<const UntypedOpSequence> get_untyped_section_ptr(const std::string& name) const;
/**
* @returns Shared pointer to a section of the given name and type T.
* @throws InvalidArgument if there is no section of given name and type T.
*/
template<class T>
boost::shared_ptr<OpSequence<T> > get_section_ptr(const std::string& name);
/**
* @returns Shared pointer to a section of the given name and type T (const).
* @throws InvalidArgument if there is no section of given name and type T.
*/
template<class T>
boost::shared_ptr<const OpSequence<T> > get_section_ptr(const std::string& name) const;
/**
* @returns Operator from an untyped section of given name, present at given index.
* @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence).
* @throws InvalidArgument if there is no untyped section of given name or if idx is out of range.
*/
FunctionalOperator& get_untyped_op(const std::string& name, size_t idx = 0);
/**
* @returns Operator from an untyped section of given name, present at given index (const).
* @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence).
* @throws InvalidArgument if there is no untyped section of given name or if idx is out of range.
*/
const FunctionalOperator& get_untyped_op(const std::string& name, size_t idx = 0) const;
/**
* @returns Operator from a section of given name and type T, present at given index.
* @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence).
* @throws InvalidArgument if there is no section of given name and type T or if idx is out of range.
*/
template<class T>
Operator<T>& get_op(const std::string& name, size_t idx = 0);
/**
* @returns Operator from a section of given name and type T, present at given index (const).
* @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence).
* @throws InvalidArgument if there is no section of given name and type T or if idx is out of range.
*/
template<class T>
const Operator<T>& get_op(const std::string& name, size_t idx = 0) const;
/**
* @returns Shared pointer to an operator from an untyped section of given name, present at given index.
* @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence).
* @throws InvalidArgument if there is no untyped section of given name or if idx is out of range.
*/
boost::shared_ptr<FunctionalOperator> get_untyped_op_ptr(const std::string& name, size_t idx = 0);
/**
* @returns Shared pointer to an operator from an untyped section of given name, present at given index (const).
* @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence).
* @throws InvalidArgument if there is no untyped section of given name or if idx is out of range.
*/
boost::shared_ptr<const FunctionalOperator> get_untyped_op_ptr(const std::string& name, size_t idx = 0) const;
/**
* @returns Shared pointer to an operator from a section of given name and type T, present at given index.
* @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence).
* @throws InvalidArgument if there is no section of given name and type T, or if idx is out of range.
*/
template<class T>
boost::shared_ptr<Operator<T> > get_op_ptr(const std::string& name, size_t idx = 0);
/**
* @returns Shared pointer to an operator from a section of given name and type T, present at given index (const).
* @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence).
* @throws InvalidArgument if there is no section of given name and type T, or if idx is out of range.
*/
template<class T>
boost::shared_ptr<const Operator<T> > get_op_ptr(const std::string& name, size_t idx = 0) const;
/**
* @returns Vector of all name-operator pairs from untyped sections, in the order they were found in the file.
* @note Generated names of operators are derived from section name they are contained within,
* suffixed with hyphen and zero-based index. E.g. the first operator in section named "seq"
* will have name "seq-0", second operator's name will be "seq-1" etc.
*/
UntypedOpSequence::name_op_v_t gen_name_untyped_op_pairs();
/**
* @returns Vector of all name-operator pairs from untyped sections, in the order they were found in the file (const).
* @note Generated names of operators are derived from section name they are contained within,
* suffixed with hyphen and zero-based index. E.g. the first operator in section named "seq"
* will have name "seq-0", second operator's name will be "seq-1" etc.
*/
UntypedOpSequence::name_op_v_c_t gen_name_untyped_op_pairs() const;
/**
* @returns Vector of all name-operator pairs from sections of given type T, in the order they were found in the file.
* @note Generated names of operators are derived from section name they are contained within,
* suffixed with hyphen and zero-based index. E.g. the first operator in section named "seq"
* will have name "seq-0", second operator's name will be "seq-1" etc.
*/
template<class T>
typename OpSequence<T>::name_op_v_t gen_name_op_pairs();
/**
* @returns Vector of all name-operator pairs from sections of given type T, in the order they were found in the file (const).
* @note Generated names of operators are derived from section name they are contained within,
* suffixed with hyphen and zero-based index. E.g. the first operator in section named "seq"
* will have name "seq-0", second operator's name will be "seq-1" etc.
*/
template<class T>
typename OpSequence<T>::name_op_v_c_t gen_name_op_pairs() const;
/**
* @returns Vector of all name-operator pairs from all sections of the file (both typed and untyped),
* in the order they were found in the file.
* @note Generated names of operators are derived from section name they are contained within,
* suffixed with hyphen and zero-based index. E.g. the first operator in section named "seq"
* will have name "seq-0", second operator's name will be "seq-1" etc.
*/
FunctionalOpSequence::name_op_v_t gen_all_op_pairs();
/**
* @returns Vector of all name-operator pairs from all sections of the file (both typed and untyped),
* in the order they were found in the file (const).
* @note Generated names of operators are derived from section name they are contained within,
* suffixed with hyphen and zero-based index. E.g. the first operator in section named "seq"
* will have name "seq-0", second operator's name will be "seq-1" etc.
*/
FunctionalOpSequence::name_op_v_c_t gen_all_op_pairs() const;
// Lexicons, import sections: import("path", "name")
///////////////////////////
/**
* @returns True if there is an imported Lexicon of given name. False otherwise.
*/
bool has_lexicon(const std::string& name) const;
/**
* @returns True if there are any Lexicons imported.
*/
/**
* @returns Shared pointer to a Lexicon of given name (const).
* @throws InvalidArgument if there is no Lexicon of given name imported.
*/
boost::shared_ptr<const Lexicon> get_lexicon_ptr(const std::string& name) const;
/**
* @returns Lexicon of given name (const).
* @throws InvalidArgument if there is no Lexicon of given name imported.
*/
const Lexicon& get_lexicon(const std::string& name) const;
/**
* @returns Shared pointer to collection of all imported Lexicons (const).
*/
boost::shared_ptr<const Lexicons> get_lexicons_ptr() const;
/**
* @returns Collection of all imported Lexicons (const).
*/
const Lexicons& get_lexicons() const;
// Tag rules section: tag_rules ( rule1; rule2 )
/**
* @returns True if this WcclFile has tag_rules section. False otherwise.
*/
bool has_tag_rules() const;
/**
* @returns The tag rules section from this WCCL file.
* @throws WcclError if there is no tag rules section.
*/
const TagRuleSequence& get_tag_rules() const;
/**
* @returns The tag rules section from this WCCL file.
* @throws WcclError if there is no tag rules section.
*/
boost::shared_ptr<TagRuleSequence> get_tag_rules_ptr();
/**
* @returns The tag rules section from this WCCL file (const).
* @throws WcclError if there is no tag rules section.
*/
boost::shared_ptr<const TagRuleSequence> get_tag_rules_ptr() const;
// Match rules section: match_rules ( apply1; apply2 )
///////////////////////////
/**
* @returns True if this WcclFile has match_rules section. False otherwise.
*/
/**
* @returns The match rules section from this WCCL file.
* @throws WcclError if there is no match_rules section.
*/
ilor
committed
const Matching::MatchRuleSequence& get_match_rules() const;
/**
* @returns The match rules section from this WCCL file.
* @throws WcclError if there is no match_rules section.
*/
ilor
committed
boost::shared_ptr<Matching::MatchRuleSequence> get_match_rules_ptr();
/**
* @returns The match rules section from this WCCL file (const).
* @throws WcclError if there is no match_rules section.
*/
ilor
committed
boost::shared_ptr<const Matching::MatchRuleSequence> get_match_rules_ptr() const;
friend std::ostream& operator<<(std::ostream& ostream, const WcclFile& wccl_file);
std::string to_string() const;
const Corpus2::Tagset& tagset() const;
const PwrNlp::PathSearcher<Wccl::FileNotFound> path() const { return path_; }
PwrNlp::PathSearcher<Wccl::FileNotFound> path() { return path_; }
///////////////////////////
// Internal parser-related methods to add sections.
///////////////////////////
void add_untyped_section(const boost::shared_ptr<UntypedOpSequence>& section);
void add_untyped_section(const boost::shared_ptr<const UntypedOpSequence>& section);
void add_untyped_section(const UntypedOpSequence& section);
template<class T>
void add_section(const boost::shared_ptr<OpSequence<T> >& section);
template<class T>
void add_section(const boost::shared_ptr<const OpSequence<T> >& section);
template<class T>
void add_section(const OpSequence<T>& section);
void import_lexicon(const boost::shared_ptr<Lexicon>& lexicon);
void set_tag_rules(const boost::shared_ptr<TagRuleSequence>& tag_rules);
ilor
committed
void set_match_rules(const boost::shared_ptr<Matching::MatchRuleSequence>& match_rules);
private:
std::ostream& write_to(std::ostream& ostream) const;
std::vector<boost::shared_ptr<FunctionalOpSequence> > all_sections_;
boost::shared_ptr<TagRuleSequence> tag_rules_;
ilor
committed
boost::shared_ptr<Matching::MatchRuleSequence> match_rules_;
const Corpus2::Tagset& tagset_;
PwrNlp::PathSearcher<Wccl::FileNotFound> path_;
};
} /* end ns Wccl */
//
// Implementation
//
namespace Wccl {
inline
WcclFile::WcclFile(const Corpus2::Tagset& tagset, const std::string& search_path)
: lexicons_(boost::make_shared<Lexicons>()), tagset_(tagset),
path_(":")
path_.set_search_path(search_path);
}
inline
const std::vector<boost::shared_ptr<UntypedOpSequence> >& WcclFile::untyped_sections()
{
return WcclFileOpSections<UntypedOpSequence>::sections();
}
template<class T> inline
const typename std::vector<boost::shared_ptr<OpSequence<T> > >& WcclFile::sections()
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
{
return WcclFileOpSections<OpSequence<T> >::sections();
}
inline
bool WcclFile::has_untyped_section(const std::string& name) const
{
return WcclFileOpSections<UntypedOpSequence>::has_section(name);
}
template<class T> inline
bool WcclFile::has_section(const std::string &name) const
{
return WcclFileOpSections<OpSequence<T> >::has_section(name);
}
inline
std::vector<std::string> WcclFile::untyped_section_names() const
{
return WcclFileOpSections<UntypedOpSequence>::section_names();
}
template<class T> inline
std::vector<std::string> WcclFile::section_names() const
{
return WcclFileOpSections<OpSequence<T> >::section_names();
}
inline
UntypedOpSequence& WcclFile::get_untyped_section(const std::string& name)
{
return WcclFileOpSections<UntypedOpSequence>::get_section(name);
}
inline
const UntypedOpSequence& WcclFile::get_untyped_section(const std::string& name) const
{
return WcclFileOpSections<UntypedOpSequence>::get_section(name);
}
template<class T> inline
OpSequence<T>& WcclFile::get_section(const std::string& name)
{
return WcclFileOpSections<OpSequence<T> >::get_section(name);
}
template<class T> inline
const OpSequence<T>& WcclFile::get_section(const std::string& name) const
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
{
return WcclFileOpSections<OpSequence<T> >::get_section(name);
}
inline
boost::shared_ptr<UntypedOpSequence> WcclFile::get_untyped_section_ptr(const std::string& name)
{
return WcclFileOpSections<UntypedOpSequence>::get_section_ptr(name);
}
inline
boost::shared_ptr<const UntypedOpSequence> WcclFile::get_untyped_section_ptr(const std::string& name) const
{
return WcclFileOpSections<UntypedOpSequence>::get_section_ptr(name);
}
template<class T> inline
typename boost::shared_ptr<OpSequence<T> > WcclFile::get_section_ptr(const std::string& name)
{
return WcclFileOpSections<OpSequence<T> >::get_section_ptr(name);
}
template<class T> inline
typename boost::shared_ptr<const OpSequence<T> > WcclFile::get_section_ptr(const std::string& name) const
{
return WcclFileOpSections<OpSequence<T> >::get_section_ptr(name);
}
inline
FunctionalOperator& WcclFile::get_untyped_op(const std::string &name, size_t idx)
{
return WcclFileOpSections<UntypedOpSequence>::get_op(name, idx);
}
inline
const FunctionalOperator& WcclFile::get_untyped_op(const std::string& name, size_t idx) const
{
return WcclFileOpSections<UntypedOpSequence>::get_op(name, idx);
}
template<class T> inline
Operator<T>& WcclFile::get_op(const std::string& name, size_t idx)
{
return WcclFileOpSections<Operator<T> >::get_op(name, idx);
}
template<class T> inline
const Operator<T>& WcclFile::get_op(const std::string& name, size_t idx) const
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
{
return WcclFileOpSections<Operator<T> >::get_op(name, idx);
}
inline
boost::shared_ptr<FunctionalOperator> WcclFile::get_untyped_op_ptr(
const std::string& name,
size_t idx)
{
return WcclFileOpSections<UntypedOpSequence>::get_op_ptr(name, idx);
}
inline
boost::shared_ptr<const FunctionalOperator> WcclFile::get_untyped_op_ptr(
const std::string& name,
size_t idx) const
{
return WcclFileOpSections<UntypedOpSequence>::get_op_ptr(name, idx);
}
template<class T> inline
boost::shared_ptr<Operator<T> > WcclFile::get_op_ptr(
const std::string& name,
size_t idx)
{
return WcclFileOpSections<OpSequence<T> >::get_op_ptr(name, idx);
}
template<class T> inline
boost::shared_ptr<const Operator<T> > WcclFile::get_op_ptr(
const std::string& name,
size_t idx) const
{
return WcclFileOpSections<OpSequence<T> >::get_op_ptr(name, idx);
}
inline
UntypedOpSequence::name_op_v_t WcclFile::gen_name_untyped_op_pairs()
{
return WcclFileOpSections<UntypedOpSequence>::gen_name_op_pairs();
}
inline
UntypedOpSequence::name_op_v_c_t WcclFile::gen_name_untyped_op_pairs() const
{
return WcclFileOpSections<UntypedOpSequence>::gen_name_op_pairs();
}
template<class T> inline
typename OpSequence<T>::name_op_v_t WcclFile::gen_name_op_pairs()
{
return WcclFileOpSections<OpSequence<T> >::gen_name_op_pairs();
}
template<class T> inline
typename OpSequence<T>::name_op_v_c_t WcclFile::gen_name_op_pairs() const
{
return WcclFileOpSections<OpSequence<T> >::gen_name_op_pairs();
}
inline
void WcclFile::import_lexicon(const boost::shared_ptr<Lexicon>& lexicon)
{
lexicons_->insert(lexicon);
}
inline
bool WcclFile::has_lexicon(const std::string& name) const
{
return lexicons_->has_lexicon(name);
}
inline
bool WcclFile::has_lexicons() const
{
return lexicons_;
}
inline
boost::shared_ptr<const Lexicon> WcclFile::get_lexicon_ptr(const std::string& name) const
{
return lexicons_->get_ptr(name);
}
inline
const Lexicon& WcclFile::get_lexicon(const std::string &name) const
{
return lexicons_->get(name);
}
inline
boost::shared_ptr<const Lexicons> WcclFile::get_lexicons_ptr() const
{
return lexicons_;
}
inline
const Lexicons& WcclFile::get_lexicons() const
{
return *lexicons_;
}
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
inline
void WcclFile::add_untyped_section(const boost::shared_ptr<UntypedOpSequence>& section)
{
WcclFileOpSections<UntypedOpSequence>::append(section);
all_sections_.push_back(section);
}
inline
void WcclFile::add_untyped_section(const boost::shared_ptr<const UntypedOpSequence>& section)
{
boost::shared_ptr<UntypedOpSequence> s = section->clone();
add_untyped_section(s);
}
inline
void WcclFile::add_untyped_section(const UntypedOpSequence& section)
{
boost::shared_ptr<UntypedOpSequence> s = section.clone();
add_untyped_section(s);
}
template<class T> inline
void WcclFile::add_section(const boost::shared_ptr<OpSequence<T> >& section)
{
WcclFileOpSections<OpSequence<T> >::append(section);
all_sections_.push_back(section);
}
template<class T> inline
void WcclFile::add_section(const boost::shared_ptr<const OpSequence<T> >& section)
{
boost::shared_ptr<OpSequence<T> > s = section->clone();
add_section(s);
}
template<class T> inline
void WcclFile::add_section(const OpSequence<T>& section)
{
boost::shared_ptr<OpSequence<T> > s = section.clone();
add_section(s);
}
inline
bool WcclFile::has_tag_rules() const
{
return tag_rules_;
}
inline
const TagRuleSequence& WcclFile::get_tag_rules() const
{
return *get_tag_rules_ptr();
}
inline
void WcclFile::set_tag_rules(const boost::shared_ptr<TagRuleSequence>& tag_rules)
{
if (has_tag_rules()) {
throw WcclError("Tag rules already added.");
}
tag_rules_ = tag_rules;
}
inline
bool WcclFile::has_match_rules() const
{
return match_rules_;
ilor
committed
const Matching::MatchRuleSequence& WcclFile::get_match_rules() const
return *get_match_rules_ptr();
ilor
committed
void WcclFile::set_match_rules(const boost::shared_ptr<Matching::MatchRuleSequence>& match_rules)
if (has_match_rules()) {
throw WcclError("Tag rules already added.");
}
match_rules_ = match_rules;
inline
std::ostream& operator <<(std::ostream& ostream, const WcclFile& wccl_file) {
return wccl_file.write_to(ostream);
}
inline
const Corpus2::Tagset& WcclFile::tagset() const {
return tagset_;
}
} /* end ns Wccl */
#endif // LIBWCCL_WCCLFILE_H