Something went wrong on our end
-
marco authored
git-svn-id: https://dng.biozentrum.unibas.ch/svn/openstructure/trunk@1742 5a81b35b-ba03-0410-adc8-b2c5c5119f08
marco authoredgit-svn-id: https://dng.biozentrum.unibas.ch/svn/openstructure/trunk@1742 5a81b35b-ba03-0410-adc8-b2c5c5119f08
python_tokenizer.cc 9.20 KiB
//------------------------------------------------------------------------------
// This file is part of the OpenStructure project <www.openstructure.org>
//
// Copyright (C) 2008-2010 by the OpenStructure authors
// Copyright (C) 2003-2010 by the IPLT authors
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License as published by the Free
// Software Foundation; either version 3.0 of the License, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
// details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//------------------------------------------------------------------------------
/*
Authors: Marco Biasini
*/
#include "python_tokenizer.hh"
namespace ost { namespace gui {
PythonToken::PythonToken():
type_(END),
range_(String::npos,0),
value_("")
{
}
PythonToken::PythonToken(Type type,const Range& range, const QString& value):
type_(type),
range_(range),
value_(value)
{
}
PythonToken::Type PythonToken::GetType() const
{
return type_;
}
QString PythonToken::GetTypeAsString() const
{
switch(type_){
case STRING_LITERAL:
return "String literal";
case STRING_DELIM:
return "String delimiter";
case NUMBER:
return "number";
case OPERATOR:
return "operator";
case IDENTIFIER:
return "primary expression";
case KEYWORD:
return "keyword";
case END:
return "EOF";
case GROUPING:
return "grouping character";
}
return "unknown";
}
const QString& PythonToken::GetValue() const
{
return value_;
}
const Range& PythonToken::GetRange() const
{
return range_;
}
namespace {
PythonToken NullToken()
{
static PythonToken NULL_TOKEN = PythonToken(PythonToken::END,Range(),"");
return NULL_TOKEN;
}
}//anon ns
PythonTokenizer::PythonTokenizer():
command_(""),
current_pos_(0)
{
}
PythonTokenizer::PythonTokenizer(const QString& command, int string_state):
command_(command),
current_pos_(0),
in_string_(string_state >0)
{
if (string_state==2) {
string_delim_="'''";
} else if (string_state==1) {
string_delim_="\"\"\"";
}
}
const PythonToken& PythonTokenizer::CurrentToken() const
{
return current_token_;
}
void PythonTokenizer::SetInString(bool in_string)
{
in_string_=in_string;
}
QString PythonTokenizer::GetSubString(const Range& range)
{
return command_.mid(range.location,range.length);
}
void PythonTokenizer::SetCommand(const QString& command)
{
command_ = command;
current_pos_ = 0;
current_token_ = NullToken();
}
void PythonTokenizer::EatWhities()
{
while(current_pos_ < command_.size() && command_[current_pos_].isSpace()){
current_pos_++;
}
}
PythonToken PythonTokenizer::GetOperatorToken()
{
static QString operators[] = {
QString("+="), QString("-="), QString("*="),
QString("/="), QString("+"), QString("-"),
QString("/"), QString("*"), QString("."),
QString("%"), QString("="), QString("!="),
QString("!"), QString("<="), QString(">="),
QString("<"), QString(":"), QString("?"),
QString("")
};
size_t index = -1;
while (operators[++index] != QString("")) {
if (command_.indexOf( operators[index], current_pos_) == current_pos_){
QString op = operators[index];
Range range( current_pos_, op.size() );
current_pos_+=op.size();
return PythonToken(PythonToken::OPERATOR,range,op);
}
}
return NullToken();
}
PythonToken PythonTokenizer::GetKeywordToken()
{
static QString keywords[] = {
QString("for"), QString("while"), QString("def"), QString("pass"),
QString("class"), QString("if"), QString("else"), QString("print"),
QString("self"), QString("float"), QString("import"), QString("from"),
QString("and"), QString("or"), QString("not"), QString("str"),
QString("False"), QString("True"), QString("None"),
QString("in"), QString("elif"), QString("") };
size_t index = -1;
while (keywords[++index]!=QString("")) {
if (command_.indexOf( keywords[index], current_pos_) == current_pos_) {
bool keyword_accepted = false;
QString kw = keywords[index];
if (current_pos_+kw.size() < command_.size()){
QChar char_after_kw = command_[current_pos_+kw.size()];
if(!char_after_kw.isLetterOrNumber() && char_after_kw!='_'){
keyword_accepted=true;
}
}
else{
keyword_accepted = true;
}
if (keyword_accepted){
Range range( current_pos_, kw.size() );
current_pos_+=kw.size();
return PythonToken(PythonToken::KEYWORD,range,kw);
}
}
}
return NullToken();
}
PythonToken PythonTokenizer::GetIdentifierSeq()
{
Range range(current_pos_,0);
while (current_pos_ < command_.size() && (command_[current_pos_].isLetterOrNumber() ||
command_[current_pos_] == '_')) {
current_pos_++;
}
range.length = current_pos_-range.location;
return PythonToken(PythonToken::IDENTIFIER,range,GetSubString(range));
}
PythonToken PythonTokenizer::GetStringDelim()
{
Range range(current_pos_,0);
// Find out which type of delimiters are used for the String. i.e. <">,
// <"""> or <'>
QString delimiter = "\"";
if (command_[current_pos_] == '\'')
delimiter = "'";
if (command_[current_pos_] == '"') {
if (current_pos_+2 < command_.size()) {
QString sub = this->GetSubString(Range(current_pos_, 3));
if (sub=="\"\"\"") {
delimiter="\"\"\"";
}
}
}
range.length=delimiter.length();
current_pos_=range.End();
string_delim_=delimiter;
return PythonToken(PythonToken::STRING_DELIM,
range,string_delim_);
}
PythonToken PythonTokenizer::GetStringLiteral() {
Range range(current_pos_,0);
int str_end =command_.indexOf(string_delim_, current_pos_);
if (str_end!=-1) {
range.length=str_end-range.location;
} else {
range.length=command_.size()-range.location;
}
current_pos_=range.End();
return PythonToken(PythonToken::STRING_LITERAL,
range,GetSubString(range));
}
PythonToken PythonTokenizer::GetGroupingToken()
{
QString groupCharacters[] = { QString("("),
QString(")"),
QString("{"),
QString("}"),
QString("["),
QString("]"),
QString(","),
QString(";"), QString("") };
int index = -1;
Range range(current_pos_,0);
while( groupCharacters[++index] != QString("") ) {
if (command_.indexOf(groupCharacters[index],current_pos_) == current_pos_) {
QString groupCharacter = groupCharacters[index];
current_pos_+=groupCharacter.size();
range.length = groupCharacter.size();
return PythonToken(PythonToken::GROUPING,range,groupCharacter);
}
}
return NullToken();
}
PythonToken PythonTokenizer::NextToken()
{
if (command_.length()>current_pos_ && in_string_) {
QChar current_char = command_[current_pos_];
if (current_char == '"' || current_char == '\'') {
current_token_ = GetStringDelim();
in_string_=false;
return current_token_;
}
current_token_=GetStringLiteral();
return current_token_;
} else {
this->EatWhities();
if (current_pos_ < command_.size()) {
QChar current_char = command_[current_pos_];
if (current_char.isDigit()) {
current_token_ = GetNumberToken();
return current_token_;
}
if (current_char.isLetterOrNumber() || current_char == '_') {
PythonToken keywordToken = this->GetKeywordToken();
if (keywordToken.GetType() == PythonToken::END){
current_token_ = this->GetIdentifierSeq();
return current_token_;
}
current_token_ = keywordToken;
return current_token_;
}
PythonToken opToken = this->GetOperatorToken();
if (opToken.GetType() != PythonToken::END) {
current_token_ = opToken;
return current_token_;
}
if (current_char == '"' || current_char == '\'') {
current_token_ = this->GetStringDelim();
in_string_=true;
return current_token_;
}
current_token_ = this->GetGroupingToken();
return current_token_;
} else{
current_token_ = NullToken();
return current_token_;
}
}
current_token_=NullToken();
return current_token_;
}
PythonToken PythonTokenizer::GetNumberToken()
{
Range range(current_pos_,0);
bool dot_present = false;
while(current_pos_ < command_.size()){
if (command_[current_pos_].isDigit()){
current_pos_++;
} else if (command_[current_pos_] == '.'){
if (dot_present) {
return NullToken();
}
dot_present = true;
current_pos_++;
} else {
break;
}
}
range.length=current_pos_-range.location;
return PythonToken(PythonToken::NUMBER,range,this->GetSubString(range));
}
}}