Lexical analyzer for Ruby source
# File rdoc/parser/ruby.rb, line 450 def initialize(content, options) lex_init @options = options @reader = BufferedReader.new content, @options @exp_line_no = @line_no = 1 @base_char_no = 0 @indent = 0 @ltype = nil @quoted = nil @lex_state = EXPR_BEG @space_seen = false @continue = false @line = "" @skip_space = false @read_auto_clean_up = false @exception_on_syntax_error = true end
# File rdoc/parser/ruby.rb, line 484 def char_no @reader.column end
# File rdoc/parser/ruby.rb, line 488 def get_read @reader.get_read end
# File rdoc/parser/ruby.rb, line 496 def getc_of_rests @reader.getc_already_read end
# File rdoc/parser/ruby.rb, line 500 def gets c = getc or return l = "" begin l.concat c unless c == "\r" break if c == "\n" end while c = getc l end
# File rdoc/parser/ruby.rb, line 1256 def identify_comment @ltype = "#" comment = "#" while ch = getc if ch == "\\" ch = getc if ch == "\n" ch = " " else comment << "\\" end else if ch == "\n" @ltype = nil ungetc break end end comment << ch end return Token(TkCOMMENT).set_text(comment) end
# File rdoc/parser/ruby.rb, line 951 def identify_gvar @lex_state = EXPR_END str = "$" tk = case ch = getc when /[~_*$?!@\/\\;,=:<>".]/ #" str << ch Token(TkGVAR, str) when "-" str << "-" << getc Token(TkGVAR, str) when "&", "`", "'", "+" str << ch Token(TkBACK_REF, str) when /[1-9]/ str << ch while (ch = getc) =~ /[0-9]/ str << ch end ungetc Token(TkNTH_REF) when /\w/ ungetc ungetc return identify_identifier else ungetc Token("$") end tk.set_text(str) end
# File rdoc/parser/ruby.rb, line 1061 def identify_here_document ch = getc if ch == "-" ch = getc indent = true end if /['"`]/ =~ ch # ' lt = ch quoted = "" while (c = getc) && c != lt quoted.concat c end else lt = '"' quoted = ch.dup while (c = getc) && c =~ /\w/ quoted.concat c end ungetc end ltback, @ltype = @ltype, lt reserve = "" while ch = getc reserve << ch if ch == "\\" #" ch = getc reserve << ch elsif ch == "\n" break end end str = "" while (l = gets) l.chomp! l.strip! if indent break if l == quoted str << l.chomp << "\n" end @reader.divert_read_from(reserve) @ltype = ltback @lex_state = EXPR_END Token(Ltype2Token[lt], str).set_text(str.dump) end
# File rdoc/parser/ruby.rb, line 986 def identify_identifier token = "" token.concat getc if peek(0) =~ /[$@]/ token.concat getc if peek(0) == "@" while (ch = getc) =~ /\w|_/ print ":", ch, ":" if RDoc::RubyLex.debug? token.concat ch end ungetc if ch == "!" or ch == "?" token.concat getc end # fix token # $stderr.puts "identifier - #{token}, state = #@lex_state" case token when /^\$/ return Token(TkGVAR, token).set_text(token) when /^\@/ @lex_state = EXPR_END return Token(TkIVAR, token).set_text(token) end if @lex_state != EXPR_DOT print token, "\n" if RDoc::RubyLex.debug? token_c, *trans = TkReading2Token[token] if token_c # reserved word? if (@lex_state != EXPR_BEG && @lex_state != EXPR_FNAME && trans[1]) # modifiers token_c = TkSymbol2Token[trans[1]] @lex_state = trans[0] else if @lex_state != EXPR_FNAME if ENINDENT_CLAUSE.include?(token) @indent += 1 elsif DEINDENT_CLAUSE.include?(token) @indent -= 1 end @lex_state = trans[0] else @lex_state = EXPR_END end end return Token(token_c, token).set_text(token) end end if @lex_state == EXPR_FNAME @lex_state = EXPR_END if peek(0) == '=' token.concat getc end elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT @lex_state = EXPR_ARG else @lex_state = EXPR_END end if token[0, 1] =~ /[A-Z]/ return Token(TkCONSTANT, token).set_text(token) elsif token[token.size - 1, 1] =~ /[!?]/ return Token(TkFID, token).set_text(token) else return Token(TkIDENTIFIER, token).set_text(token) end end
# File rdoc/parser/ruby.rb, line 1129 def identify_number(start) str = start.dup if start == "+" or start == "-" or start == "" start = getc str << start end @lex_state = EXPR_END if start == "0" if peek(0) == "x" ch = getc str << ch match = /[0-9a-f_]/ else match = /[0-7_]/ end while ch = getc if ch !~ match ungetc break else str << ch end end return Token(TkINTEGER).set_text(str) end type = TkINTEGER allow_point = TRUE allow_e = TRUE while ch = getc case ch when /[0-9_]/ str << ch when allow_point && "." type = TkFLOAT if peek(0) !~ /[0-9]/ ungetc break end str << ch allow_point = false when allow_e && "e", allow_e && "E" str << ch type = TkFLOAT if peek(0) =~ /[+-]/ str << getc end allow_e = false allow_point = false else ungetc break end end Token(type).set_text(str) end
# File rdoc/parser/ruby.rb, line 1110 def identify_quotation(initial_char) ch = getc if lt = PERCENT_LTYPE[ch] initial_char += ch ch = getc elsif ch =~ /\W/ lt = "\"" else fail SyntaxError, "unknown type of %string ('#{ch}')" end # if ch !~ /\W/ # ungetc # next # end #@ltype = lt @quoted = ch unless @quoted = PERCENT_PAREN[ch] identify_string(lt, @quoted, ch, initial_char) end
# File rdoc/parser/ruby.rb, line 1191 def identify_string(ltype, quoted = ltype, opener=nil, initial_char = nil) @ltype = ltype @quoted = quoted subtype = nil str = "" str << initial_char if initial_char str << (opener||quoted) nest = 0 begin while ch = getc str << ch if @quoted == ch if nest == 0 break else nest -= 1 end elsif opener == ch nest += 1 elsif @ltype != "'" && @ltype != "]" and ch == "#" ch = getc if ch == "{" subtype = true str << ch << skip_inner_expression else ungetc(ch) end elsif ch == '\\' #' str << read_escape end end if @ltype == "/" if peek(0) =~ /i|o|n|e|s/ str << getc end end if subtype Token(DLtype2Token[ltype], str) else Token(Ltype2Token[ltype], str) end.set_text(str) ensure @ltype = nil @quoted = nil @lex_state = EXPR_END end end
# File rdoc/parser/ruby.rb, line 523 def lex until (TkNL === (tk = token) or TkEND_OF_SCRIPT === tk) and not @continue or tk.nil? end line = get_read if line == "" and TkEND_OF_SCRIPT === tk or tk.nil? then nil else line end end
# File rdoc/parser/ruby.rb, line 594 def lex_init() @OP = IRB::SLex.new @OP.def_rules("\0", "\004", "\032") do |chars, io| Token(TkEND_OF_SCRIPT).set_text(chars) end @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |chars, io| @space_seen = TRUE while (ch = getc) =~ /[ \t\f\r\13]/ chars << ch end ungetc Token(TkSPACE).set_text(chars) end @OP.def_rule("#") do |op, io| identify_comment end @OP.def_rule("=begin", proc{@prev_char_no == 0 && peek(0) =~ /\s/}) do |op, io| str = op @ltype = "=" begin line = "" begin ch = getc line << ch end until ch == "\n" str << line end until line =~ /^=end/ ungetc @ltype = nil if str =~ /\A=begin\s+rdoc/i str.sub!(/\A=begin.*\n/, '') str.sub!(/^=end.*/m, '') Token(TkCOMMENT).set_text(str) else Token(TkRD_COMMENT)#.set_text(str) end end @OP.def_rule("\n") do print "\\n\n" if RDoc::RubyLex.debug? case @lex_state when EXPR_BEG, EXPR_FNAME, EXPR_DOT @continue = TRUE else @continue = FALSE @lex_state = EXPR_BEG end Token(TkNL).set_text("\n") end @OP.def_rules("*", "**", "!", "!=", "!~", "=", "==", "===", "=~", "<=>", "<", "<=", ">", ">=", ">>") do |op, io| @lex_state = EXPR_BEG Token(op).set_text(op) end @OP.def_rules("<<") do |op, io| tk = nil if @lex_state != EXPR_END && @lex_state != EXPR_CLASS && (@lex_state != EXPR_ARG || @space_seen) c = peek(0) if /[-\w_\"\'\`]/ =~ c tk = identify_here_document end end if !tk @lex_state = EXPR_BEG tk = Token(op).set_text(op) end tk end @OP.def_rules("'", '"') do |op, io| identify_string(op) end @OP.def_rules("`") do |op, io| if @lex_state == EXPR_FNAME Token(op).set_text(op) else identify_string(op) end end @OP.def_rules('?') do |op, io| if @lex_state == EXPR_END @lex_state = EXPR_BEG Token(TkQUESTION).set_text(op) else ch = getc if @lex_state == EXPR_ARG && ch !~ /\s/ ungetc @lex_state = EXPR_BEG Token(TkQUESTION).set_text(op) else str = op str << ch if (ch == '\\') #' str << read_escape end @lex_state = EXPR_END Token(TkINTEGER).set_text(str) end end end @OP.def_rules("&", "&&", "|", "||") do |op, io| @lex_state = EXPR_BEG Token(op).set_text(op) end @OP.def_rules("+=", "-=", "*=", "**=", "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do |op, io| @lex_state = EXPR_BEG op =~ /^(.*)=$/ Token(TkOPASGN, $1).set_text(op) end @OP.def_rule("+@", proc{@lex_state == EXPR_FNAME}) do |op, io| Token(TkUPLUS).set_text(op) end @OP.def_rule("-@", proc{@lex_state == EXPR_FNAME}) do |op, io| Token(TkUMINUS).set_text(op) end @OP.def_rules("+", "-") do |op, io| catch(:RET) do if @lex_state == EXPR_ARG if @space_seen and peek(0) =~ /[0-9]/ throw :RET, identify_number(op) else @lex_state = EXPR_BEG end elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/ throw :RET, identify_number(op) else @lex_state = EXPR_BEG end Token(op).set_text(op) end end @OP.def_rule(".") do @lex_state = EXPR_BEG if peek(0) =~ /[0-9]/ ungetc identify_number("") else # for obj.if @lex_state = EXPR_DOT Token(TkDOT).set_text(".") end end @OP.def_rules("..", "...") do |op, io| @lex_state = EXPR_BEG Token(op).set_text(op) end lex_int2 end
# File rdoc/parser/ruby.rb, line 780 def lex_int2 @OP.def_rules("]", "}", ")") do |op, io| @lex_state = EXPR_END @indent -= 1 Token(op).set_text(op) end @OP.def_rule(":") do if @lex_state == EXPR_END || peek(0) =~ /\s/ @lex_state = EXPR_BEG tk = Token(TkCOLON) else @lex_state = EXPR_FNAME tk = Token(TkSYMBEG) end tk.set_text(":") end @OP.def_rule("::") do if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen @lex_state = EXPR_BEG tk = Token(TkCOLON3) else @lex_state = EXPR_DOT tk = Token(TkCOLON2) end tk.set_text("::") end @OP.def_rule("/") do |op, io| if @lex_state == EXPR_BEG || @lex_state == EXPR_MID identify_string(op) elsif peek(0) == '=' getc @lex_state = EXPR_BEG Token(TkOPASGN, :/).set_text("/=") #") elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/ identify_string(op) else @lex_state = EXPR_BEG Token("/").set_text(op) end end @OP.def_rules("^") do @lex_state = EXPR_BEG Token("^").set_text("^") end @OP.def_rules(",", ";") do |op, io| @lex_state = EXPR_BEG Token(op).set_text(op) end @OP.def_rule("~") do @lex_state = EXPR_BEG Token("~").set_text("~") end @OP.def_rule("~@", proc{@lex_state = EXPR_FNAME}) do @lex_state = EXPR_BEG Token("~").set_text("~@") end @OP.def_rule("(") do @indent += 1 if @lex_state == EXPR_BEG || @lex_state == EXPR_MID @lex_state = EXPR_BEG tk = Token(TkfLPAREN) else @lex_state = EXPR_BEG tk = Token(TkLPAREN) end tk.set_text("(") end @OP.def_rule("[]", proc{@lex_state == EXPR_FNAME}) do Token("[]").set_text("[]") end @OP.def_rule("[]=", proc{@lex_state == EXPR_FNAME}) do Token("[]=").set_text("[]=") end @OP.def_rule("[") do @indent += 1 if @lex_state == EXPR_FNAME t = Token(TkfLBRACK) else if @lex_state == EXPR_BEG || @lex_state == EXPR_MID t = Token(TkLBRACK) elsif @lex_state == EXPR_ARG && @space_seen t = Token(TkLBRACK) else t = Token(TkfLBRACK) end @lex_state = EXPR_BEG end t.set_text("[") end @OP.def_rule("{") do @indent += 1 if @lex_state != EXPR_END && @lex_state != EXPR_ARG t = Token(TkLBRACE) else t = Token(TkfLBRACE) end @lex_state = EXPR_BEG t.set_text("{") end @OP.def_rule('\\') do #' if getc == "\n" @space_seen = true @continue = true Token(TkSPACE).set_text("\\\n") else ungetc Token("\\").set_text("\\") #" end end @OP.def_rule('%') do |op, io| if @lex_state == EXPR_BEG || @lex_state == EXPR_MID identify_quotation('%') elsif peek(0) == '=' getc Token(TkOPASGN, "%").set_text("%=") elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/ identify_quotation('%') else @lex_state = EXPR_BEG Token("%").set_text("%") end end @OP.def_rule('$') do #' identify_gvar end @OP.def_rule('@') do if peek(0) =~ /[@\w_]/ ungetc identify_identifier else Token("@").set_text("@") end end @OP.def_rule("__END__", proc{@prev_char_no == 0 && peek(0) =~ /[\r\n]/}) do throw :eof end @OP.def_rule("") do |op, io| printf "MATCH: start %s: %s\n", op, io.inspect if RDoc::RubyLex.debug? if peek(0) =~ /[0-9]/ t = identify_number("") elsif peek(0) =~ /[\w_]/ t = identify_identifier end printf "MATCH: end %s: %s\n", op, io.inspect if RDoc::RubyLex.debug? t end end
io functions
# File rdoc/parser/ruby.rb, line 480 def line_no @reader.line_num end
# File rdoc/parser/ruby.rb, line 519 def peek(i = 0) @reader.peek(i) end
# File rdoc/parser/ruby.rb, line 515 def peek_equal?(str) @reader.peek_equal(str) end
# File rdoc/parser/ruby.rb, line 1279 def read_escape res = "" case ch = getc when /[0-7]/ ungetc ch 3.times do case ch = getc when /[0-7]/ when nil break else ungetc break end res << ch end when "x" res << ch 2.times do case ch = getc when /[0-9a-fA-F]/ when nil break else ungetc break end res << ch end when "M" res << ch if (ch = getc) != '-' ungetc else res << ch if (ch = getc) == "\\" #" res << ch res << read_escape else res << ch end end when "C", "c" #, "^" res << ch if ch == "C" and (ch = getc) != "-" ungetc else res << ch if (ch = getc) == "\\" #" res << ch res << read_escape else res << ch end end else res << ch end res end
# File rdoc/parser/ruby.rb, line 1241 def skip_inner_expression res = "" nest = 0 while (ch = getc) res << ch if ch == '}' break if nest.zero? nest -= 1 elsif ch == '{' nest += 1 end end res end
# File rdoc/parser/ruby.rb, line 537 def token set_token_position(line_no, char_no) begin begin tk = @OP.match(self) @space_seen = TkSPACE === tk rescue SyntaxError => e raise RDoc::Error, "syntax error: #{e.message}" if @exception_on_syntax_error tk = TkError.new(line_no, char_no) end end while @skip_space and TkSPACE === tk if @read_auto_clean_up get_read end # throw :eof unless tk tk end