Documentation
¶
Overview ¶
Package prattle implements a general purpose, unicode-aware lexical scanner and top down operator precedence parser suitable for parsing LL(1) grammars. The scanner and parser can be used independently from each other if desired.
Example (Interpreter) ¶
This example demonstrates parsing a simple programming language that consists of a sequence of statements.
package main
import (
"fmt"
"strconv"
"unicode"
"github.com/askeladdk/prattle"
)
const (
ksemicolon = 1 + iota
kassign
kplus
kident
knumber
)
func testScan(s *prattle.Scanner) int {
s.ExpectAny(unicode.IsSpace)
s.Skip()
switch {
case s.Done():
return 0
case s.ExpectOne(unicode.IsLetter):
s.ExpectAny(unicode.IsLetter)
return kident
case s.ExpectOne(unicode.IsDigit):
s.ExpectAny(unicode.IsDigit)
return knumber
case s.Expect('='):
return kassign
case s.Expect('+'):
return kplus
case s.Expect(';'):
return ksemicolon
}
s.Advance()
return -1
}
type testDriver struct {
stack []prattle.Token
idents map[string]int
}
func (d *testDriver) pop() (v prattle.Token) {
if n := len(d.stack); n > 0 {
v, d.stack = d.stack[n-1], d.stack[:n-1]
}
return v
}
func (d *testDriver) push(v prattle.Token) {
d.stack = append(d.stack, v)
}
func (d *testDriver) Precedence(kind int) int {
return kind
}
func (d *testDriver) tonumber(t prattle.Token) int {
if t.Kind == kident {
return d.idents[t.Text]
}
num, _ := strconv.Atoi(t.Text)
return num
}
func (d *testDriver) plus(p *prattle.Parser, t prattle.Token) error {
if err := p.Parse(d.Precedence(t.Kind)); err != nil {
return err
}
right := d.tonumber(d.pop())
left := d.tonumber(d.pop())
d.push(prattle.Token{
Kind: knumber,
Text: strconv.Itoa(left + right),
})
return nil
}
func (d *testDriver) assign(p *prattle.Parser, t prattle.Token) error {
if err := p.Parse(d.Precedence(kassign)); err != nil {
return err
}
right := d.pop()
left := d.pop()
d.idents[left.Text] = d.tonumber(right)
return nil
}
func (d *testDriver) primitive(p *prattle.Parser, t prattle.Token) error {
d.push(t)
return nil
}
func (d *testDriver) Prefix(kind int) prattle.ParseFunc {
switch kind {
default:
return nil
case kident, knumber:
return d.primitive
}
}
func (d *testDriver) Infix(kind int) prattle.ParseFunc {
switch kind {
default:
return nil
case kplus:
return d.plus
case kassign:
return d.assign
}
}
func (d *testDriver) ParseError(t prattle.Token) error {
return fmt.Errorf("%s: unexpected '%s'", t.Position, t.Text)
}
// This example demonstrates parsing a simple programming language that consists of a sequence of statements.
func main() {
c := testDriver{
idents: make(map[string]int),
}
source := "a = 1;\nb = 2;\nc = a+b+b+a;\n"
s := prattle.Scanner{Scan: testScan}
p := prattle.Parser{Driver: &c}
p.Init(s.InitWithString(source))
// Parse expressions separated by semicolons.
for p.Peek().Kind != 0 {
if err := p.Parse(ksemicolon); err != nil {
fmt.Println(err)
return
} else if !p.Expect(ksemicolon) {
fmt.Println("expected semicolon")
return
}
}
fmt.Printf("c = %d\n", c.idents["c"])
}
Output: c = 6
Example (Readme) ¶
This example is described in the readme.
package main
import (
"fmt"
"strconv"
"unicode"
"github.com/askeladdk/prattle"
)
type driver struct {
stack []int
}
func (d *driver) push(i int) {
d.stack = append(d.stack, i)
}
func (d *driver) pop() (i int) {
n := len(d.stack)
i, d.stack = d.stack[n-1], d.stack[:n-1]
return
}
func (d *driver) number(p *prattle.Parser, t prattle.Token) error {
n, _ := strconv.Atoi(t.Text)
d.push(n)
return nil
}
func (d *driver) add(p *prattle.Parser, t prattle.Token) error {
// First parse the right hand operator.
if err := p.Parse(d.Precedence(t.Kind)); err != nil {
return err
}
right := d.pop()
left := d.pop()
acc := left + right
fmt.Printf("%d + %d = %d\n", left, right, acc)
d.push(acc)
return nil
}
func (d *driver) Prefix(k int) prattle.ParseFunc {
if k == 2 {
return d.number
}
return nil
}
func (d *driver) Infix(k int) prattle.ParseFunc {
if k == 1 {
return d.add
}
return nil
}
func (d *driver) Precedence(k int) int {
return k
}
func (d *driver) ParseError(t prattle.Token) error {
return fmt.Errorf("%s", t)
}
// This example is described in the readme.
func main() {
scanner := prattle.Scanner{
Scan: func(s *prattle.Scanner) int {
// Skip whitespaces.
s.ExpectAny(unicode.IsSpace)
s.Skip()
switch {
case s.Done(): // Stop if the entire input has been consumed.
return 0
case s.Expect('+'):
return 1
case s.ExpectOne(unicode.IsDigit): // Read a number consisting of one or more digits.
s.ExpectAny(unicode.IsDigit)
return 2
}
s.Advance()
return -1
},
}
parser := prattle.Parser{
Driver: &driver{},
}
source := "1 + 23 + 456 + 7890"
scanner.InitWithString(source)
if err := parser.Init(&scanner).Parse(0); err != nil {
fmt.Println(err)
}
}
Output: 1 + 23 = 24 24 + 456 = 480 480 + 7890 = 8370
Example (Sentence) ¶
This example demonstrates tokenising a sentence into words and punctuation.
package main
import (
"fmt"
"unicode"
"github.com/askeladdk/prattle"
)
func main() {
scan := func(s *prattle.Scanner) int {
// Skip any whitespace.
s.ExpectAny(unicode.IsSpace)
s.Skip()
// Return an appropriate token kind.
switch {
case s.Done(): // EOF
return 0
case s.ExpectOne(unicode.IsLetter): // Word
s.ExpectAny(unicode.IsLetter)
return 1
case s.ExpectOne(unicode.IsPunct): // Punctuation
return 2
}
// Unrecognized character.
s.Advance()
return -1
}
sentence := "I love it when a plan comes together!"
s := (&prattle.Scanner{
Scan: scan,
}).InitWithString(sentence)
for tok, ok := s.Next(); ok; tok, ok = s.Next() {
fmt.Printf("[%d] %s\n", tok.Kind, tok.Text)
}
}
Output: [1] I [1] love [1] it [1] when [1] a [1] plan [1] comes [1] together [2] !
Index ¶
- Variables
- type AcceptFunc
- type Driver
- type Iterator
- type ParseFunc
- type Parser
- type Position
- type ScanFunc
- type Scanner
- func (s *Scanner) Advance()
- func (s *Scanner) Done() bool
- func (s *Scanner) Expect(r rune) bool
- func (s *Scanner) ExpectAny(accept AcceptFunc)
- func (s *Scanner) ExpectOne(accept AcceptFunc) bool
- func (s *Scanner) InitWithReader(r io.RuneReader) *Scanner
- func (s *Scanner) InitWithString(source string) *Scanner
- func (s *Scanner) Next() (Token, bool)
- func (s *Scanner) Peek() rune
- func (s *Scanner) Skip()
- func (s *Scanner) Text() string
- type Token
Examples ¶
Constants ¶
This section is empty.
Variables ¶
var NonAssoc error = errors.New("non-associative operator")
NonAssoc is returned by infix ParseFuncs to indicate that an operator is non-associative.
Functions ¶
This section is empty.
Types ¶
type AcceptFunc ¶
AcceptFunc accepts a rune.
func OneOf ¶
func OneOf(chars string) AcceptFunc
OneOf returns an AcceptFunc that reports whether a rune appears in chars.
type Driver ¶ added in v0.0.3
type Driver interface {
// Infix associates an infix ParseFunc with a token.
// Returning nil is a parse error.
Infix(kind int) ParseFunc
// Prefix associates a prefix ParseFunc with a token.
// Returning nil is a parse error.
Prefix(kind int) ParseFunc
// Precedence associates an operator precedence with a token.
// The higher the precedence, the tighter the token binds.
Precedence(kind int) (precedence int)
// ParseError is called by the Parser when it encounters a token that it cannot parse.
ParseError(Token) error
}
Driver drives the parsing algorithm by associating tokens to parser functions. It is expected to hold the parse state and results like the syntax tree.
type Iterator ¶ added in v0.0.6
type Iterator interface {
// Next returns the next token in the stream
// and yields true until the stream ends.
Next() (token Token, ok bool)
}
Iterator represents a stream of tokens.
type Parser ¶
type Parser struct {
// Driver drives the Parser.
Driver
// contains filtered or unexported fields
}
Parser implements the Pratt parsing algorithm, also known as the top down operator precedence (TDOP) algorithm. This is a recursive descent algorithm that handles operator precedence in a simple and flexible manner.
Parser consumes tokens from an Iterator and uses a Driver to determine precedence and executing parsing functions.
type Position ¶
type Position struct {
// Filename is the filename of the input, if any.
Filename string
// Offset is the byte offset, starting at 0.
Offset int
// Line is the line number, starting at 1.
Line int
// Column is the column number, starting at 1 (character count per line).
Column int
}
Position represents the position of a Token in the input string.
type ScanFunc ¶
ScanFunc scans the next token and returns its kind. Zero is reserved to signal end-of-input.
type Scanner ¶
type Scanner struct {
// Position of the last read token.
// The Filename field will never be modified by Scanner.
Position
// Scan scans tokens.
Scan ScanFunc
// contains filtered or unexported fields
}
Scanner produces a stream of tokens from a string or an io.RuneReader.
func (*Scanner) ExpectAny ¶
func (s *Scanner) ExpectAny(accept AcceptFunc)
ExpectAny advances the cursor zero or more times.
func (*Scanner) ExpectOne ¶
func (s *Scanner) ExpectOne(accept AcceptFunc) bool
ExpectOne advances the cursor if the current rune is accepted.
func (*Scanner) InitWithReader ¶ added in v0.0.4
func (s *Scanner) InitWithReader(r io.RuneReader) *Scanner
Init initializes a Scanner with an input reader and returns it. When initialized this way, the Scanner tokenizes unbounded streams but must allocate.
func (*Scanner) InitWithString ¶ added in v0.0.4
Init initializes a Scanner with an input string and returns it. When initialized this way, the Scanner tokenizes without allocations.
