Giant cleanup of the automata files. Fixes #1992.

git-svn-id: http://lampsvn.epfl.ch/svn-repos/scala/scala/trunk@19842 5e8d7ff9-d8ef-0310-90f0-a4852d11357a
This commit is contained in:
extempore 2009-11-24 18:59:58 +00:00
parent f30fb0e47d
commit a3862c56d2
5 changed files with 145 additions and 350 deletions

View File

@ -12,28 +12,26 @@
package scala.util.automata
import scala.util.regexp.Base
import scala.util.regexp.{ Base }
import scala.collection.mutable
import scala.collection.immutable
import collection.immutable.{List, Nil}
import collection.{Seq, Iterator}
import scala.collection.{ mutable, immutable }
import mutable.{ HashMap }
import immutable.{ Set }
// todo: replace global variable pos with acc
/** this turns a regexp over A into a NondetWorkAutom over A using the
* celebrated position automata construction (also called Berry-Sethi or
* Glushkov)
*/
abstract class BaseBerrySethi {
val lang: Base
import lang.{Alt,Eps,Meta,RegExp,Sequ,Star}
import lang.{ Alt, Eps, Meta, RegExp, Sequ, Star }
protected var pos = 0
protected var globalFirst: immutable.Set[Int] = _
// results which hold all info for the NondetWordAutomaton
protected var follow: mutable.HashMap[Int, immutable.Set[Int]] = _
protected var follow: HashMap[Int, Set[Int]] = _
protected var finalTag: Int = _
@ -41,63 +39,24 @@ abstract class BaseBerrySethi {
// constants --------------------------
final val emptySet:immutable.Set[Int] = immutable.Set[Int]()
final val emptySet: Set[Int] = Set()
private def doComp(r: RegExp, compFunction: RegExp => Set[Int]) = r match {
case x: Alt => (x.rs map compFirst).foldLeft(emptySet)(_ ++ _)
case Eps => emptySet
case x: Meta => compFunction(x.r)
case x: Sequ =>
val (l1, l2) = x.rs span (_.isNullable)
((l1 ++ (l2 take 1)) map compFunction).foldLeft(emptySet)(_ ++ _)
case Star(t) => compFunction(t)
case _ => throw new IllegalArgumentException("unexpected pattern " + r.getClass())
}
/** computes first( r ) for the word regexp r */
protected def compFirst(r: RegExp): immutable.Set[Int] = r match {
case x:Alt =>
var tmp = emptySet
val it = x.rs.iterator // union
while (it.hasNext) { tmp = tmp ++ compFirst(it.next) }
tmp
case Eps =>
emptySet
//case x:Letter => emptySet + posMap(x); // singleton set
case x:Meta =>
compFirst(x.r)
case x:Sequ =>
var tmp = emptySet;
val it = x.rs.iterator; // union
while (it.hasNext) {
val z = it.next
tmp = tmp ++ compFirst(z)
if (!z.isNullable)
return tmp
}
tmp
case Star(t) =>
compFirst(t)
case _ =>
throw new IllegalArgumentException("unexpected pattern " + r.getClass())
}
protected def compFirst(r: RegExp): Set[Int] = doComp(r, compFirst)
/** computes last( r ) for the regexp r */
protected def compLast(r: RegExp): immutable.Set[Int] = r match {
case x:Alt =>
var tmp = emptySet
val it = x.rs.iterator // union
while (it.hasNext) { tmp = tmp ++ compFirst(it.next) }
tmp
case Eps =>
emptySet
//case x:Letter => emptySet + posMap(x) // singleton set
case x:Meta =>
compLast(x.r)
case x:Sequ =>
var tmp = emptySet
val it = x.rs.iterator.toList.reverse.iterator // union
while (it.hasNext) {
val z = it.next
tmp = tmp ++ compLast(z)
if (!z.isNullable)
return tmp
}
tmp
case Star(t) =>
compLast(t)
case _ =>
throw new IllegalArgumentException("unexpected pattern " + r.getClass())
}
protected def compLast(r: RegExp): Set[Int] = doComp(r, compLast)
/** Starts from the right-to-left
* precondition: pos is final
@ -106,24 +65,17 @@ abstract class BaseBerrySethi {
* @param r ...
* @return ...
*/
protected def compFollow(r: Seq[RegExp]): immutable.Set[Int] = {
var first = emptySet
var fol = emptySet
if (r.length > 0) {//non-empty expr
val it = r.iterator.toList.reverse.iterator
fol = fol + pos // don't modify pos !
while (it.hasNext) {
val p = it.next
first = compFollow1(fol, p)
fol =
if (p.isNullable) fol ++ first
else first
}
}
this.follow.update(0, fol /*first*/)
fol
protected def compFollow(rs: Seq[RegExp]): Set[Int] = {
follow(0) =
if (rs.isEmpty) emptySet
else rs.foldRight(Set(pos))((p, fol) => {
val first = compFollow1(fol, p)
if (p.isNullable) fol ++ first
else first
})
follow(0)
}
/** returns the first set of an expression, setting the follow set along
@ -133,45 +85,20 @@ abstract class BaseBerrySethi {
* @param r ...
* @return ...
*/
protected def compFollow1(fol1: immutable.Set[Int], r: RegExp): immutable.Set[Int] = {
var fol = fol1
r match {
case x:Alt =>
var first = emptySet
val it = x.rs.iterator.toList.reverse.iterator
while (it.hasNext)
first = first ++ compFollow1(fol, it.next);
first
protected def compFollow1(fol1: Set[Int], r: RegExp): Set[Int] = r match {
case x: Alt => Set(x.rs reverseMap (compFollow1(fol1, _)) flatten: _*)
case x: Meta => compFollow1(fol1, x.r)
case x: Star => compFollow1(fol1 ++ compFirst(x.r), x.r)
case x: Sequ =>
var first = emptySet
x.rs.foldRight(fol1) { (p, fol) =>
val first = compFollow1(fol, p)
/*
case x:Letter =>
val i = posMap( x );
this.follow.update( i, fol );
emptySet + i;
*/
case x:Meta =>
compFollow1(fol1, x.r)
case x:Star =>
fol = fol ++ compFirst(x.r)
compFollow1(fol, x.r)
case x:Sequ =>
var first = emptySet
val it = x.rs.iterator.toList.reverse.iterator
while (it.hasNext) {
val p = it.next
first = compFollow1(fol, p)
fol =
if (p.isNullable) fol ++ first
else first
}
first
case _ =>
throw new IllegalArgumentException("unexpected pattern: " + r.getClass())
}
if (p.isNullable) fol ++ first
else first
}
first
case _ => throw new IllegalArgumentException("unexpected pattern: " + r.getClass())
}
/** returns "Sethi-length" of a pattern, creating the set of position
@ -179,21 +106,12 @@ abstract class BaseBerrySethi {
*
* @param r ...
*/
// todo: replace global variable pos with acc
protected def traverse(r: RegExp): Unit = r match {
// (is tree automaton stuff, more than Berry-Sethi)
case x:Alt =>
val it = x.rs.iterator
while (it.hasNext) traverse(it.next)
case x:Sequ =>
val it = x.rs.iterator
while (it.hasNext) traverse(it.next)
case x:Meta =>
traverse(x.r)
case Star(t) =>
traverse(t)
case _ =>
throw new IllegalArgumentException("unexp pattern " + r.getClass())
case x: Alt => x.rs foreach traverse
case x: Sequ => x.rs foreach traverse
case x: Meta => traverse(x.r)
case Star(t) => traverse(t)
case _ => throw new IllegalArgumentException("unexp pattern " + r.getClass())
}
}

View File

@ -11,7 +11,8 @@
package scala.util.automata
import scala.collection.{immutable, mutable, Set, Seq, Map}
import scala.collection.{ immutable, mutable, Set, Seq, Map }
import immutable.{ BitSet }
/** A nondeterministic automaton. States are integers, where
* 0 is always the only initial state. Transitions are represented
@ -26,8 +27,8 @@ abstract class NondetWordAutom[T <: AnyRef]
val labels: Seq[T]
val finals: Array[Int] // 0 means not final
val delta: Array[Map[T, immutable.BitSet]]
val default: Array[immutable.BitSet]
val delta: Array[Map[T, BitSet]]
val default: Array[BitSet]
/** returns true if the state is final */
final def isFinal(state: Int) = finals(state) > 0
@ -36,25 +37,27 @@ abstract class NondetWordAutom[T <: AnyRef]
final def finalTag(state: Int) = finals(state)
/** returns true if the set of states contains at least one final state */
final def containsFinal(Q: immutable.BitSet): Boolean = Q exists isFinal
final def containsFinal(Q: BitSet): Boolean = Q exists isFinal
/** returns true if there are no accepting states */
final def isEmpty = (0 until nstates) forall (x => !isFinal(x))
/** returns a bitset with the next states for given state and label */
def next(q: Int, a: T): immutable.BitSet = delta(q).get(a) getOrElse default(q)
def next(q: Int, a: T): BitSet = delta(q).getOrElse(a, default(q))
/** returns a bitset with the next states for given state and label */
def next(Q: immutable.BitSet, a: T): immutable.BitSet = next(Q, next(_, a))
def nextDefault(Q: immutable.BitSet): immutable.BitSet = next(Q, default)
def next(Q: BitSet, a: T): BitSet = next(Q, next(_, a))
def nextDefault(Q: BitSet): BitSet = next(Q, default)
private def next(Q: immutable.BitSet, f: (Int) => immutable.BitSet): immutable.BitSet =
(Q map f).foldLeft(immutable.BitSet.empty)(_ ++ _)
private def next(Q: BitSet, f: (Int) => BitSet): BitSet =
(Q map f).foldLeft(BitSet.empty)(_ ++ _)
private def finalStates = 0 until nstates filter isFinal
override def toString = {
val finalString = Map(0 until nstates filter isFinal map (j => j -> finals(j)) : _*).toString
val finalString = Map(finalStates map (j => j -> finals(j)) : _*).toString
val deltaString = (0 until nstates) .
map (i => " %d->%s\n _>%s\n".format(i, delta(i).toString, default(i).toString)) mkString
map (i => " %d->%s\n _>%s\n".format(i, delta(i), default(i))) mkString
"[NondetWordAutom nstates=%d finals=%s delta=\n%s".format(nstates, finalString, deltaString)
}

View File

@ -11,10 +11,10 @@
package scala.util.automata
import scala.collection.{immutable, mutable, Map}
import scala.collection.{ immutable, mutable, Map }
import immutable.{ Set }
import mutable.{ HashSet, HashMap }
import scala.util.regexp.WordExp
import collection.immutable.{List, Nil}
import collection.{Seq, Iterator}
/** This class turns a regexp into a NondetWordAutom using the
* celebrated position automata construction (also called Berry-Sethi or
@ -24,38 +24,28 @@ import collection.{Seq, Iterator}
* @version 1.0
*/
abstract class WordBerrySethi extends BaseBerrySethi {
override val lang: WordExp
type _labelT = this.lang._labelT
import lang.{ Alt, Eps, Letter, Meta, RegExp, Sequ, Star }
import lang.{Alt, Eps, Letter, Meta, RegExp, Sequ, Star}
protected var labels:mutable.HashSet[_labelT] = _
protected var labels: HashSet[lang._labelT] = _
// don't let this fool you, only labelAt is a real, surjective mapping
protected var labelAt: immutable.Map[Int, _labelT] = _ // new alphabet "gamma"
protected var labelAt: immutable.Map[Int, lang._labelT] = _ // new alphabet "gamma"
protected var deltaq: Array[mutable.HashMap[_labelT,List[Int]]] = _ // delta
protected var deltaq: Array[HashMap[lang._labelT, List[Int]]] = _ // delta
protected var defaultq: Array[List[Int]] = _ // default transitions
protected var initials:immutable.Set[Int] = _
//NondetWordAutom revNfa
// maps a letter to an Integer ( the position )
// is not *really* needed (preorder determines position!)
//protected var posMap: mutable.HashMap[RegExp, Int] = _;
protected var initials: Set[Int] = _
/** Computes <code>first(r)</code> where the word regexp <code>r</code>.
*
* @param r the regular expression
* @return the computed set <code>first(r)</code>
*/
protected override def compFirst(r: RegExp): immutable.Set[Int] = r match {
case x:Letter => emptySet + x.pos //posMap(x); // singleton set
case Eps => emptySet /*ignore*/
case _ => super.compFirst(r)
protected override def compFirst(r: RegExp): Set[Int] = r match {
case x: Letter => Set(x.pos)
case _ => super.compFirst(r)
}
/** Computes <code>last(r)</code> where the word regexp <code>r</code>.
@ -63,10 +53,9 @@ abstract class WordBerrySethi extends BaseBerrySethi {
* @param r the regular expression
* @return the computed set <code>last(r)</code>
*/
protected override def compLast(r: RegExp): immutable.Set[Int] = r match {
case x:Letter => emptySet + x.pos //posMap(x) // singleton set
case Eps => emptySet /*ignore*/
case _ => super.compLast(r)
protected override def compLast(r: RegExp): Set[Int] = r match {
case x: Letter => Set(x.pos)
case _ => super.compLast(r)
}
/** Returns the first set of an expression, setting the follow set along
@ -76,38 +65,25 @@ abstract class WordBerrySethi extends BaseBerrySethi {
* @param r the regular expression
* @return the computed set
*/
protected override def compFollow1(fol1: immutable.Set[Int], r: RegExp): immutable.Set[Int] =
r match {
case x:Letter =>
//val i = posMap(x)
val i = x.pos
this.follow.update(i, fol1)
emptySet + i
case Eps =>
emptySet /*ignore*/
case _ =>
super.compFollow1(fol1, r)
protected override def compFollow1(fol1: Set[Int], r: RegExp): Set[Int] = r match {
case x: Letter => follow(x.pos) = fol1 ; Set(x.pos)
case Eps => emptySet
case _ => super.compFollow1(fol1, r)
}
/** returns "Sethi-length" of a pattern, creating the set of position
* along the way
*/
/** called at the leaves of the regexp */
protected def seenLabel(r: RegExp, i: Int, label: _labelT) {
//Console.println("seenLabel (1)");
//this.posMap.add(r, i)
this.labelAt = this.labelAt.updated(i, label)
//@ifdef if( label != Wildcard ) {
this.labels += label
//@ifdef }
protected def seenLabel(r: RegExp, i: Int, label: lang._labelT) {
labelAt = labelAt.updated(i, label)
this.labels += label
}
// overriden in BindingBerrySethi
protected def seenLabel(r: RegExp, label: _labelT): Int = {
//Console.println("seenLabel (2)");
pos = pos + 1
protected def seenLabel(r: RegExp, label: lang._labelT): Int = {
pos += 1
seenLabel(r, pos, label)
pos
}
@ -115,151 +91,74 @@ abstract class WordBerrySethi extends BaseBerrySethi {
// todo: replace global variable pos with acc
override def traverse(r: RegExp): Unit = r match {
case a @ Letter(label) => a.pos = seenLabel(r, label)
case Eps => /*ignore*/
case Eps => // ignore
case _ => super.traverse(r)
}
protected def makeTransition(src: Int, dest: Int, label: _labelT ) {
//@ifdef compiler if( label == Wildcard )
//@ifdef compiler defaultq.add(src, dest::defaultq( src ))
//@ifdef compiler else
protected def makeTransition(src: Int, dest: Int, label: lang._labelT) {
val q = deltaq(src)
q.update(label, dest::(q.get(label) match {
case Some(x) => x
case _ => Nil
}))
q.update(label, dest :: q.getOrElse(label, Nil))
}
protected def initialize(subexpr: Seq[RegExp]): Unit = {
//this.posMap = new mutable.HashMap[RegExp,Int]()
this.labelAt = immutable.Map[Int, _labelT]()
this.follow = new mutable.HashMap[Int, immutable.Set[Int]]()
this.labels = new mutable.HashSet[_labelT]()
this.labelAt = immutable.Map()
this.follow = HashMap()
this.labels = HashSet()
this.pos = 0
// determine "Sethi-length" of the regexp
//activeBinders = new IndexedSeq()
var it = subexpr.iterator
while (it.hasNext)
traverse(it.next)
//assert(activeBinders.isEmpty())
this.initials = emptySet + 0
subexpr foreach traverse
this.initials = Set(0)
}
protected def initializeAutom() {
finals = immutable.Map.empty[Int, Int] // final states
deltaq = new Array[mutable.HashMap[_labelT, List[Int]]](pos) // delta
defaultq = new Array[List[Int]](pos) // default transitions
var j = 0
while (j < pos) {
deltaq(j) = new mutable.HashMap[_labelT,List[Int]]()
finals = immutable.Map.empty[Int, Int] // final states
deltaq = new Array[HashMap[lang._labelT, List[Int]]](pos) // delta
defaultq = new Array[List[Int]](pos) // default transitions
for (j <- 0 until pos) {
deltaq(j) = HashMap[lang._labelT, List[Int]]()
defaultq(j) = Nil
j += 1
}
}
protected def collectTransitions(): Unit = { // make transitions
//Console.println("WBS.collectTrans, this.follow.keys = "+this.follow.keys)
//Console.println("WBS.collectTrans, pos = "+this.follow.keys)
var j = 0; while (j < pos) {
//Console.println("WBS.collectTrans, j = "+j)
val fol = this.follow(j)
val it = fol.iterator
while (it.hasNext) {
val k = it.next
if (pos == k)
finals = finals.updated(j, finalTag)
else
makeTransition( j, k, labelAt(k))
}
j += 1
protected def collectTransitions(): Unit = // make transitions
for (j <- 0 until pos ; val fol = follow(j) ; k <- fol) {
if (pos == k) finals = finals.updated(j, finalTag)
else makeTransition(j, k, labelAt(k))
}
}
def automatonFrom(pat: RegExp, finalTag: Int): NondetWordAutom[_labelT] = {
def automatonFrom(pat: RegExp, finalTag: Int): NondetWordAutom[lang._labelT] = {
this.finalTag = finalTag
pat match {
case x:Sequ =>
case x: Sequ =>
// (1,2) compute follow + first
initialize(x.rs)
pos = pos + 1
globalFirst = compFollow(x.rs)
pos += 1
compFollow(x.rs) // this used to be assigned to var globalFirst and then never used.
//System.out.print("someFirst:");debugPrint(someFirst);
// (3) make automaton from follow sets
initializeAutom()
collectTransitions()
if (x.isNullable) // initial state is final
finals = finals.updated(0, finalTag)
val delta1 = immutable.Map(deltaq.zipWithIndex map (_.swap): _*)
val finalsArr = 0 until pos map (k => finals.getOrElse(k, 0)) toArray // 0 == not final
val initialsArr = initials.toArray
var delta1: immutable.Map[Int, Map[_labelT, List[Int]]] =
immutable.Map[Int, Map[_labelT, List[Int]]]()
val deltaArr: Array[Map[lang._labelT, immutable.BitSet]] =
(0 until pos map { x =>
HashMap(delta1(x).toSeq map { case (k, v) => k -> immutable.BitSet(v: _*) } : _*)
}) toArray
var i = 0
while (i < deltaq.length) {
delta1 = delta1.updated(i, deltaq(i))
i += 1
}
val finalsArr = new Array[Int](pos)
val defaultArr = 0 until pos map (k => immutable.BitSet(defaultq(k): _*)) toArray
{
var k = 0; while (k < pos) {
finalsArr(k) = finals.get(k) match {
case Some(z) => z
case None => 0 // 0 == not final
};
k += 1
}
}
val initialsArr = new Array[Int](initials.size)
val it = initials.iterator
{
var k = 0; while (k < initials.size) {
initialsArr(k) = it.next
k += 1
}
}
val deltaArr = new Array[Map[_labelT, immutable.BitSet]](pos)
{
var k = 0; while(k < pos) {
val labels = delta1(k).keysIterator
val hmap =
new mutable.HashMap[_labelT, immutable.BitSet]
for (lab <- labels) {
val trans = delta1(k)
val x = new mutable.BitSet(pos)
for (q <- trans(lab))
x += q
hmap.update(lab, x.toImmutable)
}
deltaArr(k) = hmap
k += 1
}
}
val defaultArr = new Array[immutable.BitSet](pos)
{
var k = 0; while(k < pos) {
val x = new mutable.BitSet(pos)
for (q <- defaultq(k))
x += q
defaultArr(k) = x.toImmutable
k += 1
}
}
new NondetWordAutom[_labelT] {
type _labelT = WordBerrySethi.this._labelT
new NondetWordAutom[lang._labelT] {
val nstates = pos
val labels = WordBerrySethi.this.labels.toList
val initials = initialsArr
@ -268,35 +167,7 @@ abstract class WordBerrySethi extends BaseBerrySethi {
val default = defaultArr
}
case z =>
val z1 = z.asInstanceOf[this.lang._regexpT]
automatonFrom(Sequ(z1), finalTag)
automatonFrom(Sequ(z.asInstanceOf[this.lang._regexpT]), finalTag)
}
}
/*
void print1() {
System.out.println("after sethi-style processing");
System.out.println("#positions:" + pos);
System.out.println("posMap:");
for (Iterator it = this.posMap.keySet().iterator();
it.hasNext(); ) {
Tree t = (Tree) it.next();
switch(t) {
case Literal( _ ):
System.out.print( "(" + t.toString() + " -> ");
String s2 = ((Integer) posMap.get(t)).toString();
System.out.print( s2 +") ");
}
}
System.out.println("\nfollow: ");
for (int j = 1; j < pos; j++ ) {
TreeSet fol = (TreeSet) this.follow.get(new Integer(j));
System.out.print("("+j+" -> "+fol.toString()+") ");
//debugPrint( fol );
System.out.println();
}
}
*/
}
}

View File

@ -23,23 +23,28 @@ abstract class Base
abstract class RegExp {
val isNullable: Boolean
}
/** Alt( R,R,R* ) */
case class Alt(rs: _regexpT*) extends RegExp {
// check rs \in R,R,R*
// @todo: flattening
if (rs.size < 2)
throw new SyntaxError("need at least 2 branches in Alt")
final val isNullable = rs forall (_.isNullable)
object Alt {
/** Alt( R,R,R* ) */
def apply(rs: _regexpT*) =
if (rs.size < 2) throw new SyntaxError("need at least 2 branches in Alt")
else new Alt(rs: _*)
// Can't enforce that statically without changing the interface
// def apply(r1: _regexpT, r2: _regexpT, rs: _regexpT*) = new Alt(Seq(r1, r2) ++ rs: _*)
def unapplySeq(x: Alt) = Some(x.rs)
}
case class Sequ(rs: _regexpT*) extends RegExp {
// @todo: flattening
// check rs \in R,R*
if (rs.isEmpty)
throw new SyntaxError("need at least 1 item in Sequ")
class Alt private (val rs: _regexpT*) extends RegExp {
final val isNullable = rs exists (_.isNullable)
}
object Sequ {
/** Sequ( R,R* ) */
def apply(rs: _regexpT*) = if (rs.isEmpty) Eps else new Sequ(rs: _*)
def unapplySeq(x: Sequ) = Some(x.rs)
}
class Sequ private (val rs: _regexpT*) extends RegExp {
final val isNullable = rs forall (_.isNullable)
}
@ -47,6 +52,7 @@ abstract class Base
final lazy val isNullable = true
}
// The empty Sequ.
case object Eps extends RegExp {
final lazy val isNullable = true
override def toString() = "Eps"
@ -57,7 +63,4 @@ abstract class Base
final val isNullable = r1.isNullable
def r = r1
}
final def mkSequ(rs: _regexpT *): RegExp =
if (rs.isEmpty) Eps else Sequ(rs : _*)
}

View File

@ -114,7 +114,7 @@ class ElementValidator() extends Function1[Node,Boolean] {
case _: ELEMENTS =>
dfa isFinal {
getIterable(nodes, false).foldLeft(0) { (q, e) =>
(dfa delta q get e) getOrElse (throw ValidationException("element %s not allowed here" format e))
(dfa delta q).getOrElse(e, throw ValidationException("element %s not allowed here" format e))
}
}
}