Giant cleanup of the automata files. Fixes #1992.
git-svn-id: http://lampsvn.epfl.ch/svn-repos/scala/scala/trunk@19842 5e8d7ff9-d8ef-0310-90f0-a4852d11357a
This commit is contained in:
parent
f30fb0e47d
commit
a3862c56d2
|
@ -12,28 +12,26 @@
|
||||||
package scala.util.automata
|
package scala.util.automata
|
||||||
|
|
||||||
|
|
||||||
import scala.util.regexp.Base
|
import scala.util.regexp.{ Base }
|
||||||
|
|
||||||
import scala.collection.mutable
|
import scala.collection.{ mutable, immutable }
|
||||||
import scala.collection.immutable
|
import mutable.{ HashMap }
|
||||||
import collection.immutable.{List, Nil}
|
import immutable.{ Set }
|
||||||
import collection.{Seq, Iterator}
|
|
||||||
|
// todo: replace global variable pos with acc
|
||||||
|
|
||||||
/** this turns a regexp over A into a NondetWorkAutom over A using the
|
/** this turns a regexp over A into a NondetWorkAutom over A using the
|
||||||
* celebrated position automata construction (also called Berry-Sethi or
|
* celebrated position automata construction (also called Berry-Sethi or
|
||||||
* Glushkov)
|
* Glushkov)
|
||||||
*/
|
*/
|
||||||
abstract class BaseBerrySethi {
|
abstract class BaseBerrySethi {
|
||||||
|
|
||||||
val lang: Base
|
val lang: Base
|
||||||
import lang.{Alt,Eps,Meta,RegExp,Sequ,Star}
|
import lang.{ Alt, Eps, Meta, RegExp, Sequ, Star }
|
||||||
|
|
||||||
protected var pos = 0
|
protected var pos = 0
|
||||||
|
|
||||||
protected var globalFirst: immutable.Set[Int] = _
|
|
||||||
|
|
||||||
// results which hold all info for the NondetWordAutomaton
|
// results which hold all info for the NondetWordAutomaton
|
||||||
protected var follow: mutable.HashMap[Int, immutable.Set[Int]] = _
|
protected var follow: HashMap[Int, Set[Int]] = _
|
||||||
|
|
||||||
protected var finalTag: Int = _
|
protected var finalTag: Int = _
|
||||||
|
|
||||||
|
@ -41,63 +39,24 @@ abstract class BaseBerrySethi {
|
||||||
|
|
||||||
// constants --------------------------
|
// constants --------------------------
|
||||||
|
|
||||||
final val emptySet:immutable.Set[Int] = immutable.Set[Int]()
|
final val emptySet: Set[Int] = Set()
|
||||||
|
|
||||||
|
private def doComp(r: RegExp, compFunction: RegExp => Set[Int]) = r match {
|
||||||
|
case x: Alt => (x.rs map compFirst).foldLeft(emptySet)(_ ++ _)
|
||||||
|
case Eps => emptySet
|
||||||
|
case x: Meta => compFunction(x.r)
|
||||||
|
case x: Sequ =>
|
||||||
|
val (l1, l2) = x.rs span (_.isNullable)
|
||||||
|
((l1 ++ (l2 take 1)) map compFunction).foldLeft(emptySet)(_ ++ _)
|
||||||
|
case Star(t) => compFunction(t)
|
||||||
|
case _ => throw new IllegalArgumentException("unexpected pattern " + r.getClass())
|
||||||
|
}
|
||||||
|
|
||||||
/** computes first( r ) for the word regexp r */
|
/** computes first( r ) for the word regexp r */
|
||||||
protected def compFirst(r: RegExp): immutable.Set[Int] = r match {
|
protected def compFirst(r: RegExp): Set[Int] = doComp(r, compFirst)
|
||||||
case x:Alt =>
|
|
||||||
var tmp = emptySet
|
|
||||||
val it = x.rs.iterator // union
|
|
||||||
while (it.hasNext) { tmp = tmp ++ compFirst(it.next) }
|
|
||||||
tmp
|
|
||||||
case Eps =>
|
|
||||||
emptySet
|
|
||||||
//case x:Letter => emptySet + posMap(x); // singleton set
|
|
||||||
case x:Meta =>
|
|
||||||
compFirst(x.r)
|
|
||||||
case x:Sequ =>
|
|
||||||
var tmp = emptySet;
|
|
||||||
val it = x.rs.iterator; // union
|
|
||||||
while (it.hasNext) {
|
|
||||||
val z = it.next
|
|
||||||
tmp = tmp ++ compFirst(z)
|
|
||||||
if (!z.isNullable)
|
|
||||||
return tmp
|
|
||||||
}
|
|
||||||
tmp
|
|
||||||
case Star(t) =>
|
|
||||||
compFirst(t)
|
|
||||||
case _ =>
|
|
||||||
throw new IllegalArgumentException("unexpected pattern " + r.getClass())
|
|
||||||
}
|
|
||||||
|
|
||||||
/** computes last( r ) for the regexp r */
|
/** computes last( r ) for the regexp r */
|
||||||
protected def compLast(r: RegExp): immutable.Set[Int] = r match {
|
protected def compLast(r: RegExp): Set[Int] = doComp(r, compLast)
|
||||||
case x:Alt =>
|
|
||||||
var tmp = emptySet
|
|
||||||
val it = x.rs.iterator // union
|
|
||||||
while (it.hasNext) { tmp = tmp ++ compFirst(it.next) }
|
|
||||||
tmp
|
|
||||||
case Eps =>
|
|
||||||
emptySet
|
|
||||||
//case x:Letter => emptySet + posMap(x) // singleton set
|
|
||||||
case x:Meta =>
|
|
||||||
compLast(x.r)
|
|
||||||
case x:Sequ =>
|
|
||||||
var tmp = emptySet
|
|
||||||
val it = x.rs.iterator.toList.reverse.iterator // union
|
|
||||||
while (it.hasNext) {
|
|
||||||
val z = it.next
|
|
||||||
tmp = tmp ++ compLast(z)
|
|
||||||
if (!z.isNullable)
|
|
||||||
return tmp
|
|
||||||
}
|
|
||||||
tmp
|
|
||||||
case Star(t) =>
|
|
||||||
compLast(t)
|
|
||||||
case _ =>
|
|
||||||
throw new IllegalArgumentException("unexpected pattern " + r.getClass())
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Starts from the right-to-left
|
/** Starts from the right-to-left
|
||||||
* precondition: pos is final
|
* precondition: pos is final
|
||||||
|
@ -106,24 +65,17 @@ abstract class BaseBerrySethi {
|
||||||
* @param r ...
|
* @param r ...
|
||||||
* @return ...
|
* @return ...
|
||||||
*/
|
*/
|
||||||
protected def compFollow(r: Seq[RegExp]): immutable.Set[Int] = {
|
protected def compFollow(rs: Seq[RegExp]): Set[Int] = {
|
||||||
var first = emptySet
|
follow(0) =
|
||||||
var fol = emptySet
|
if (rs.isEmpty) emptySet
|
||||||
if (r.length > 0) {//non-empty expr
|
else rs.foldRight(Set(pos))((p, fol) => {
|
||||||
|
val first = compFollow1(fol, p)
|
||||||
|
|
||||||
val it = r.iterator.toList.reverse.iterator
|
if (p.isNullable) fol ++ first
|
||||||
|
else first
|
||||||
|
})
|
||||||
|
|
||||||
fol = fol + pos // don't modify pos !
|
follow(0)
|
||||||
while (it.hasNext) {
|
|
||||||
val p = it.next
|
|
||||||
first = compFollow1(fol, p)
|
|
||||||
fol =
|
|
||||||
if (p.isNullable) fol ++ first
|
|
||||||
else first
|
|
||||||
}
|
|
||||||
}
|
|
||||||
this.follow.update(0, fol /*first*/)
|
|
||||||
fol
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** returns the first set of an expression, setting the follow set along
|
/** returns the first set of an expression, setting the follow set along
|
||||||
|
@ -133,45 +85,20 @@ abstract class BaseBerrySethi {
|
||||||
* @param r ...
|
* @param r ...
|
||||||
* @return ...
|
* @return ...
|
||||||
*/
|
*/
|
||||||
protected def compFollow1(fol1: immutable.Set[Int], r: RegExp): immutable.Set[Int] = {
|
protected def compFollow1(fol1: Set[Int], r: RegExp): Set[Int] = r match {
|
||||||
var fol = fol1
|
case x: Alt => Set(x.rs reverseMap (compFollow1(fol1, _)) flatten: _*)
|
||||||
r match {
|
case x: Meta => compFollow1(fol1, x.r)
|
||||||
|
case x: Star => compFollow1(fol1 ++ compFirst(x.r), x.r)
|
||||||
|
case x: Sequ =>
|
||||||
|
var first = emptySet
|
||||||
|
x.rs.foldRight(fol1) { (p, fol) =>
|
||||||
|
val first = compFollow1(fol, p)
|
||||||
|
|
||||||
case x:Alt =>
|
if (p.isNullable) fol ++ first
|
||||||
var first = emptySet
|
else first
|
||||||
val it = x.rs.iterator.toList.reverse.iterator
|
}
|
||||||
while (it.hasNext)
|
first
|
||||||
first = first ++ compFollow1(fol, it.next);
|
case _ => throw new IllegalArgumentException("unexpected pattern: " + r.getClass())
|
||||||
first
|
|
||||||
|
|
||||||
/*
|
|
||||||
case x:Letter =>
|
|
||||||
val i = posMap( x );
|
|
||||||
this.follow.update( i, fol );
|
|
||||||
emptySet + i;
|
|
||||||
*/
|
|
||||||
case x:Meta =>
|
|
||||||
compFollow1(fol1, x.r)
|
|
||||||
|
|
||||||
case x:Star =>
|
|
||||||
fol = fol ++ compFirst(x.r)
|
|
||||||
compFollow1(fol, x.r)
|
|
||||||
|
|
||||||
case x:Sequ =>
|
|
||||||
var first = emptySet
|
|
||||||
val it = x.rs.iterator.toList.reverse.iterator
|
|
||||||
while (it.hasNext) {
|
|
||||||
val p = it.next
|
|
||||||
first = compFollow1(fol, p)
|
|
||||||
fol =
|
|
||||||
if (p.isNullable) fol ++ first
|
|
||||||
else first
|
|
||||||
}
|
|
||||||
first
|
|
||||||
|
|
||||||
case _ =>
|
|
||||||
throw new IllegalArgumentException("unexpected pattern: " + r.getClass())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** returns "Sethi-length" of a pattern, creating the set of position
|
/** returns "Sethi-length" of a pattern, creating the set of position
|
||||||
|
@ -179,21 +106,12 @@ abstract class BaseBerrySethi {
|
||||||
*
|
*
|
||||||
* @param r ...
|
* @param r ...
|
||||||
*/
|
*/
|
||||||
// todo: replace global variable pos with acc
|
|
||||||
protected def traverse(r: RegExp): Unit = r match {
|
protected def traverse(r: RegExp): Unit = r match {
|
||||||
// (is tree automaton stuff, more than Berry-Sethi)
|
// (is tree automaton stuff, more than Berry-Sethi)
|
||||||
case x:Alt =>
|
case x: Alt => x.rs foreach traverse
|
||||||
val it = x.rs.iterator
|
case x: Sequ => x.rs foreach traverse
|
||||||
while (it.hasNext) traverse(it.next)
|
case x: Meta => traverse(x.r)
|
||||||
case x:Sequ =>
|
case Star(t) => traverse(t)
|
||||||
val it = x.rs.iterator
|
case _ => throw new IllegalArgumentException("unexp pattern " + r.getClass())
|
||||||
while (it.hasNext) traverse(it.next)
|
|
||||||
case x:Meta =>
|
|
||||||
traverse(x.r)
|
|
||||||
case Star(t) =>
|
|
||||||
traverse(t)
|
|
||||||
case _ =>
|
|
||||||
throw new IllegalArgumentException("unexp pattern " + r.getClass())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,7 +11,8 @@
|
||||||
|
|
||||||
package scala.util.automata
|
package scala.util.automata
|
||||||
|
|
||||||
import scala.collection.{immutable, mutable, Set, Seq, Map}
|
import scala.collection.{ immutable, mutable, Set, Seq, Map }
|
||||||
|
import immutable.{ BitSet }
|
||||||
|
|
||||||
/** A nondeterministic automaton. States are integers, where
|
/** A nondeterministic automaton. States are integers, where
|
||||||
* 0 is always the only initial state. Transitions are represented
|
* 0 is always the only initial state. Transitions are represented
|
||||||
|
@ -26,8 +27,8 @@ abstract class NondetWordAutom[T <: AnyRef]
|
||||||
val labels: Seq[T]
|
val labels: Seq[T]
|
||||||
|
|
||||||
val finals: Array[Int] // 0 means not final
|
val finals: Array[Int] // 0 means not final
|
||||||
val delta: Array[Map[T, immutable.BitSet]]
|
val delta: Array[Map[T, BitSet]]
|
||||||
val default: Array[immutable.BitSet]
|
val default: Array[BitSet]
|
||||||
|
|
||||||
/** returns true if the state is final */
|
/** returns true if the state is final */
|
||||||
final def isFinal(state: Int) = finals(state) > 0
|
final def isFinal(state: Int) = finals(state) > 0
|
||||||
|
@ -36,25 +37,27 @@ abstract class NondetWordAutom[T <: AnyRef]
|
||||||
final def finalTag(state: Int) = finals(state)
|
final def finalTag(state: Int) = finals(state)
|
||||||
|
|
||||||
/** returns true if the set of states contains at least one final state */
|
/** returns true if the set of states contains at least one final state */
|
||||||
final def containsFinal(Q: immutable.BitSet): Boolean = Q exists isFinal
|
final def containsFinal(Q: BitSet): Boolean = Q exists isFinal
|
||||||
|
|
||||||
/** returns true if there are no accepting states */
|
/** returns true if there are no accepting states */
|
||||||
final def isEmpty = (0 until nstates) forall (x => !isFinal(x))
|
final def isEmpty = (0 until nstates) forall (x => !isFinal(x))
|
||||||
|
|
||||||
/** returns a bitset with the next states for given state and label */
|
/** returns a bitset with the next states for given state and label */
|
||||||
def next(q: Int, a: T): immutable.BitSet = delta(q).get(a) getOrElse default(q)
|
def next(q: Int, a: T): BitSet = delta(q).getOrElse(a, default(q))
|
||||||
|
|
||||||
/** returns a bitset with the next states for given state and label */
|
/** returns a bitset with the next states for given state and label */
|
||||||
def next(Q: immutable.BitSet, a: T): immutable.BitSet = next(Q, next(_, a))
|
def next(Q: BitSet, a: T): BitSet = next(Q, next(_, a))
|
||||||
def nextDefault(Q: immutable.BitSet): immutable.BitSet = next(Q, default)
|
def nextDefault(Q: BitSet): BitSet = next(Q, default)
|
||||||
|
|
||||||
private def next(Q: immutable.BitSet, f: (Int) => immutable.BitSet): immutable.BitSet =
|
private def next(Q: BitSet, f: (Int) => BitSet): BitSet =
|
||||||
(Q map f).foldLeft(immutable.BitSet.empty)(_ ++ _)
|
(Q map f).foldLeft(BitSet.empty)(_ ++ _)
|
||||||
|
|
||||||
|
private def finalStates = 0 until nstates filter isFinal
|
||||||
override def toString = {
|
override def toString = {
|
||||||
val finalString = Map(0 until nstates filter isFinal map (j => j -> finals(j)) : _*).toString
|
|
||||||
|
val finalString = Map(finalStates map (j => j -> finals(j)) : _*).toString
|
||||||
val deltaString = (0 until nstates) .
|
val deltaString = (0 until nstates) .
|
||||||
map (i => " %d->%s\n _>%s\n".format(i, delta(i).toString, default(i).toString)) mkString
|
map (i => " %d->%s\n _>%s\n".format(i, delta(i), default(i))) mkString
|
||||||
|
|
||||||
"[NondetWordAutom nstates=%d finals=%s delta=\n%s".format(nstates, finalString, deltaString)
|
"[NondetWordAutom nstates=%d finals=%s delta=\n%s".format(nstates, finalString, deltaString)
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,10 +11,10 @@
|
||||||
|
|
||||||
package scala.util.automata
|
package scala.util.automata
|
||||||
|
|
||||||
import scala.collection.{immutable, mutable, Map}
|
import scala.collection.{ immutable, mutable, Map }
|
||||||
|
import immutable.{ Set }
|
||||||
|
import mutable.{ HashSet, HashMap }
|
||||||
import scala.util.regexp.WordExp
|
import scala.util.regexp.WordExp
|
||||||
import collection.immutable.{List, Nil}
|
|
||||||
import collection.{Seq, Iterator}
|
|
||||||
|
|
||||||
/** This class turns a regexp into a NondetWordAutom using the
|
/** This class turns a regexp into a NondetWordAutom using the
|
||||||
* celebrated position automata construction (also called Berry-Sethi or
|
* celebrated position automata construction (also called Berry-Sethi or
|
||||||
|
@ -24,38 +24,28 @@ import collection.{Seq, Iterator}
|
||||||
* @version 1.0
|
* @version 1.0
|
||||||
*/
|
*/
|
||||||
abstract class WordBerrySethi extends BaseBerrySethi {
|
abstract class WordBerrySethi extends BaseBerrySethi {
|
||||||
|
|
||||||
override val lang: WordExp
|
override val lang: WordExp
|
||||||
|
|
||||||
type _labelT = this.lang._labelT
|
import lang.{ Alt, Eps, Letter, Meta, RegExp, Sequ, Star }
|
||||||
|
|
||||||
import lang.{Alt, Eps, Letter, Meta, RegExp, Sequ, Star}
|
protected var labels: HashSet[lang._labelT] = _
|
||||||
|
|
||||||
|
|
||||||
protected var labels:mutable.HashSet[_labelT] = _
|
|
||||||
// don't let this fool you, only labelAt is a real, surjective mapping
|
// don't let this fool you, only labelAt is a real, surjective mapping
|
||||||
protected var labelAt: immutable.Map[Int, _labelT] = _ // new alphabet "gamma"
|
protected var labelAt: immutable.Map[Int, lang._labelT] = _ // new alphabet "gamma"
|
||||||
|
|
||||||
protected var deltaq: Array[mutable.HashMap[_labelT,List[Int]]] = _ // delta
|
protected var deltaq: Array[HashMap[lang._labelT, List[Int]]] = _ // delta
|
||||||
|
|
||||||
protected var defaultq: Array[List[Int]] = _ // default transitions
|
protected var defaultq: Array[List[Int]] = _ // default transitions
|
||||||
|
|
||||||
protected var initials:immutable.Set[Int] = _
|
protected var initials: Set[Int] = _
|
||||||
//NondetWordAutom revNfa
|
|
||||||
|
|
||||||
// maps a letter to an Integer ( the position )
|
|
||||||
// is not *really* needed (preorder determines position!)
|
|
||||||
//protected var posMap: mutable.HashMap[RegExp, Int] = _;
|
|
||||||
|
|
||||||
/** Computes <code>first(r)</code> where the word regexp <code>r</code>.
|
/** Computes <code>first(r)</code> where the word regexp <code>r</code>.
|
||||||
*
|
*
|
||||||
* @param r the regular expression
|
* @param r the regular expression
|
||||||
* @return the computed set <code>first(r)</code>
|
* @return the computed set <code>first(r)</code>
|
||||||
*/
|
*/
|
||||||
protected override def compFirst(r: RegExp): immutable.Set[Int] = r match {
|
protected override def compFirst(r: RegExp): Set[Int] = r match {
|
||||||
case x:Letter => emptySet + x.pos //posMap(x); // singleton set
|
case x: Letter => Set(x.pos)
|
||||||
case Eps => emptySet /*ignore*/
|
case _ => super.compFirst(r)
|
||||||
case _ => super.compFirst(r)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Computes <code>last(r)</code> where the word regexp <code>r</code>.
|
/** Computes <code>last(r)</code> where the word regexp <code>r</code>.
|
||||||
|
@ -63,10 +53,9 @@ abstract class WordBerrySethi extends BaseBerrySethi {
|
||||||
* @param r the regular expression
|
* @param r the regular expression
|
||||||
* @return the computed set <code>last(r)</code>
|
* @return the computed set <code>last(r)</code>
|
||||||
*/
|
*/
|
||||||
protected override def compLast(r: RegExp): immutable.Set[Int] = r match {
|
protected override def compLast(r: RegExp): Set[Int] = r match {
|
||||||
case x:Letter => emptySet + x.pos //posMap(x) // singleton set
|
case x: Letter => Set(x.pos)
|
||||||
case Eps => emptySet /*ignore*/
|
case _ => super.compLast(r)
|
||||||
case _ => super.compLast(r)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the first set of an expression, setting the follow set along
|
/** Returns the first set of an expression, setting the follow set along
|
||||||
|
@ -76,38 +65,25 @@ abstract class WordBerrySethi extends BaseBerrySethi {
|
||||||
* @param r the regular expression
|
* @param r the regular expression
|
||||||
* @return the computed set
|
* @return the computed set
|
||||||
*/
|
*/
|
||||||
protected override def compFollow1(fol1: immutable.Set[Int], r: RegExp): immutable.Set[Int] =
|
protected override def compFollow1(fol1: Set[Int], r: RegExp): Set[Int] = r match {
|
||||||
r match {
|
case x: Letter => follow(x.pos) = fol1 ; Set(x.pos)
|
||||||
case x:Letter =>
|
case Eps => emptySet
|
||||||
//val i = posMap(x)
|
case _ => super.compFollow1(fol1, r)
|
||||||
val i = x.pos
|
|
||||||
this.follow.update(i, fol1)
|
|
||||||
emptySet + i
|
|
||||||
case Eps =>
|
|
||||||
emptySet /*ignore*/
|
|
||||||
case _ =>
|
|
||||||
super.compFollow1(fol1, r)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** returns "Sethi-length" of a pattern, creating the set of position
|
/** returns "Sethi-length" of a pattern, creating the set of position
|
||||||
* along the way
|
* along the way
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
/** called at the leaves of the regexp */
|
/** called at the leaves of the regexp */
|
||||||
protected def seenLabel(r: RegExp, i: Int, label: _labelT) {
|
protected def seenLabel(r: RegExp, i: Int, label: lang._labelT) {
|
||||||
//Console.println("seenLabel (1)");
|
labelAt = labelAt.updated(i, label)
|
||||||
//this.posMap.add(r, i)
|
this.labels += label
|
||||||
this.labelAt = this.labelAt.updated(i, label)
|
|
||||||
//@ifdef if( label != Wildcard ) {
|
|
||||||
this.labels += label
|
|
||||||
//@ifdef }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// overriden in BindingBerrySethi
|
// overriden in BindingBerrySethi
|
||||||
protected def seenLabel(r: RegExp, label: _labelT): Int = {
|
protected def seenLabel(r: RegExp, label: lang._labelT): Int = {
|
||||||
//Console.println("seenLabel (2)");
|
pos += 1
|
||||||
pos = pos + 1
|
|
||||||
seenLabel(r, pos, label)
|
seenLabel(r, pos, label)
|
||||||
pos
|
pos
|
||||||
}
|
}
|
||||||
|
@ -115,82 +91,55 @@ abstract class WordBerrySethi extends BaseBerrySethi {
|
||||||
// todo: replace global variable pos with acc
|
// todo: replace global variable pos with acc
|
||||||
override def traverse(r: RegExp): Unit = r match {
|
override def traverse(r: RegExp): Unit = r match {
|
||||||
case a @ Letter(label) => a.pos = seenLabel(r, label)
|
case a @ Letter(label) => a.pos = seenLabel(r, label)
|
||||||
case Eps => /*ignore*/
|
case Eps => // ignore
|
||||||
case _ => super.traverse(r)
|
case _ => super.traverse(r)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected def makeTransition(src: Int, dest: Int, label: _labelT ) {
|
protected def makeTransition(src: Int, dest: Int, label: lang._labelT) {
|
||||||
//@ifdef compiler if( label == Wildcard )
|
|
||||||
//@ifdef compiler defaultq.add(src, dest::defaultq( src ))
|
|
||||||
//@ifdef compiler else
|
|
||||||
val q = deltaq(src)
|
val q = deltaq(src)
|
||||||
q.update(label, dest::(q.get(label) match {
|
q.update(label, dest :: q.getOrElse(label, Nil))
|
||||||
case Some(x) => x
|
|
||||||
case _ => Nil
|
|
||||||
}))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected def initialize(subexpr: Seq[RegExp]): Unit = {
|
protected def initialize(subexpr: Seq[RegExp]): Unit = {
|
||||||
//this.posMap = new mutable.HashMap[RegExp,Int]()
|
this.labelAt = immutable.Map()
|
||||||
this.labelAt = immutable.Map[Int, _labelT]()
|
this.follow = HashMap()
|
||||||
this.follow = new mutable.HashMap[Int, immutable.Set[Int]]()
|
this.labels = HashSet()
|
||||||
this.labels = new mutable.HashSet[_labelT]()
|
|
||||||
|
|
||||||
this.pos = 0
|
this.pos = 0
|
||||||
|
|
||||||
// determine "Sethi-length" of the regexp
|
// determine "Sethi-length" of the regexp
|
||||||
//activeBinders = new IndexedSeq()
|
subexpr foreach traverse
|
||||||
var it = subexpr.iterator
|
|
||||||
while (it.hasNext)
|
|
||||||
traverse(it.next)
|
|
||||||
|
|
||||||
//assert(activeBinders.isEmpty())
|
this.initials = Set(0)
|
||||||
this.initials = emptySet + 0
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected def initializeAutom() {
|
protected def initializeAutom() {
|
||||||
finals = immutable.Map.empty[Int, Int] // final states
|
finals = immutable.Map.empty[Int, Int] // final states
|
||||||
deltaq = new Array[mutable.HashMap[_labelT, List[Int]]](pos) // delta
|
deltaq = new Array[HashMap[lang._labelT, List[Int]]](pos) // delta
|
||||||
defaultq = new Array[List[Int]](pos) // default transitions
|
defaultq = new Array[List[Int]](pos) // default transitions
|
||||||
|
|
||||||
var j = 0
|
for (j <- 0 until pos) {
|
||||||
while (j < pos) {
|
deltaq(j) = HashMap[lang._labelT, List[Int]]()
|
||||||
deltaq(j) = new mutable.HashMap[_labelT,List[Int]]()
|
|
||||||
defaultq(j) = Nil
|
defaultq(j) = Nil
|
||||||
j += 1
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected def collectTransitions(): Unit = { // make transitions
|
protected def collectTransitions(): Unit = // make transitions
|
||||||
//Console.println("WBS.collectTrans, this.follow.keys = "+this.follow.keys)
|
for (j <- 0 until pos ; val fol = follow(j) ; k <- fol) {
|
||||||
//Console.println("WBS.collectTrans, pos = "+this.follow.keys)
|
if (pos == k) finals = finals.updated(j, finalTag)
|
||||||
var j = 0; while (j < pos) {
|
else makeTransition(j, k, labelAt(k))
|
||||||
//Console.println("WBS.collectTrans, j = "+j)
|
|
||||||
val fol = this.follow(j)
|
|
||||||
val it = fol.iterator
|
|
||||||
while (it.hasNext) {
|
|
||||||
val k = it.next
|
|
||||||
if (pos == k)
|
|
||||||
finals = finals.updated(j, finalTag)
|
|
||||||
else
|
|
||||||
makeTransition( j, k, labelAt(k))
|
|
||||||
}
|
|
||||||
j += 1
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
def automatonFrom(pat: RegExp, finalTag: Int): NondetWordAutom[_labelT] = {
|
def automatonFrom(pat: RegExp, finalTag: Int): NondetWordAutom[lang._labelT] = {
|
||||||
this.finalTag = finalTag
|
this.finalTag = finalTag
|
||||||
|
|
||||||
pat match {
|
pat match {
|
||||||
case x:Sequ =>
|
case x: Sequ =>
|
||||||
// (1,2) compute follow + first
|
// (1,2) compute follow + first
|
||||||
initialize(x.rs)
|
initialize(x.rs)
|
||||||
pos = pos + 1
|
pos += 1
|
||||||
globalFirst = compFollow(x.rs)
|
compFollow(x.rs) // this used to be assigned to var globalFirst and then never used.
|
||||||
|
|
||||||
//System.out.print("someFirst:");debugPrint(someFirst);
|
|
||||||
// (3) make automaton from follow sets
|
// (3) make automaton from follow sets
|
||||||
initializeAutom()
|
initializeAutom()
|
||||||
collectTransitions()
|
collectTransitions()
|
||||||
|
@ -198,68 +147,18 @@ abstract class WordBerrySethi extends BaseBerrySethi {
|
||||||
if (x.isNullable) // initial state is final
|
if (x.isNullable) // initial state is final
|
||||||
finals = finals.updated(0, finalTag)
|
finals = finals.updated(0, finalTag)
|
||||||
|
|
||||||
var delta1: immutable.Map[Int, Map[_labelT, List[Int]]] =
|
val delta1 = immutable.Map(deltaq.zipWithIndex map (_.swap): _*)
|
||||||
immutable.Map[Int, Map[_labelT, List[Int]]]()
|
val finalsArr = 0 until pos map (k => finals.getOrElse(k, 0)) toArray // 0 == not final
|
||||||
|
val initialsArr = initials.toArray
|
||||||
|
|
||||||
var i = 0
|
val deltaArr: Array[Map[lang._labelT, immutable.BitSet]] =
|
||||||
while (i < deltaq.length) {
|
(0 until pos map { x =>
|
||||||
delta1 = delta1.updated(i, deltaq(i))
|
HashMap(delta1(x).toSeq map { case (k, v) => k -> immutable.BitSet(v: _*) } : _*)
|
||||||
i += 1
|
}) toArray
|
||||||
}
|
|
||||||
val finalsArr = new Array[Int](pos)
|
|
||||||
|
|
||||||
{
|
val defaultArr = 0 until pos map (k => immutable.BitSet(defaultq(k): _*)) toArray
|
||||||
var k = 0; while (k < pos) {
|
|
||||||
finalsArr(k) = finals.get(k) match {
|
|
||||||
case Some(z) => z
|
|
||||||
case None => 0 // 0 == not final
|
|
||||||
};
|
|
||||||
k += 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
val initialsArr = new Array[Int](initials.size)
|
new NondetWordAutom[lang._labelT] {
|
||||||
val it = initials.iterator
|
|
||||||
|
|
||||||
{
|
|
||||||
var k = 0; while (k < initials.size) {
|
|
||||||
initialsArr(k) = it.next
|
|
||||||
k += 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
val deltaArr = new Array[Map[_labelT, immutable.BitSet]](pos)
|
|
||||||
|
|
||||||
{
|
|
||||||
var k = 0; while(k < pos) {
|
|
||||||
val labels = delta1(k).keysIterator
|
|
||||||
val hmap =
|
|
||||||
new mutable.HashMap[_labelT, immutable.BitSet]
|
|
||||||
for (lab <- labels) {
|
|
||||||
val trans = delta1(k)
|
|
||||||
val x = new mutable.BitSet(pos)
|
|
||||||
for (q <- trans(lab))
|
|
||||||
x += q
|
|
||||||
hmap.update(lab, x.toImmutable)
|
|
||||||
}
|
|
||||||
deltaArr(k) = hmap
|
|
||||||
k += 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
val defaultArr = new Array[immutable.BitSet](pos)
|
|
||||||
|
|
||||||
{
|
|
||||||
var k = 0; while(k < pos) {
|
|
||||||
val x = new mutable.BitSet(pos)
|
|
||||||
for (q <- defaultq(k))
|
|
||||||
x += q
|
|
||||||
defaultArr(k) = x.toImmutable
|
|
||||||
k += 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
new NondetWordAutom[_labelT] {
|
|
||||||
type _labelT = WordBerrySethi.this._labelT
|
|
||||||
val nstates = pos
|
val nstates = pos
|
||||||
val labels = WordBerrySethi.this.labels.toList
|
val labels = WordBerrySethi.this.labels.toList
|
||||||
val initials = initialsArr
|
val initials = initialsArr
|
||||||
|
@ -268,35 +167,7 @@ abstract class WordBerrySethi extends BaseBerrySethi {
|
||||||
val default = defaultArr
|
val default = defaultArr
|
||||||
}
|
}
|
||||||
case z =>
|
case z =>
|
||||||
val z1 = z.asInstanceOf[this.lang._regexpT]
|
automatonFrom(Sequ(z.asInstanceOf[this.lang._regexpT]), finalTag)
|
||||||
automatonFrom(Sequ(z1), finalTag)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
void print1() {
|
|
||||||
System.out.println("after sethi-style processing");
|
|
||||||
System.out.println("#positions:" + pos);
|
|
||||||
System.out.println("posMap:");
|
|
||||||
|
|
||||||
for (Iterator it = this.posMap.keySet().iterator();
|
|
||||||
it.hasNext(); ) {
|
|
||||||
Tree t = (Tree) it.next();
|
|
||||||
switch(t) {
|
|
||||||
case Literal( _ ):
|
|
||||||
System.out.print( "(" + t.toString() + " -> ");
|
|
||||||
String s2 = ((Integer) posMap.get(t)).toString();
|
|
||||||
System.out.print( s2 +") ");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
System.out.println("\nfollow: ");
|
|
||||||
for (int j = 1; j < pos; j++ ) {
|
|
||||||
TreeSet fol = (TreeSet) this.follow.get(new Integer(j));
|
|
||||||
System.out.print("("+j+" -> "+fol.toString()+") ");
|
|
||||||
//debugPrint( fol );
|
|
||||||
System.out.println();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
}
|
|
@ -24,22 +24,27 @@ abstract class Base
|
||||||
val isNullable: Boolean
|
val isNullable: Boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Alt( R,R,R* ) */
|
object Alt {
|
||||||
case class Alt(rs: _regexpT*) extends RegExp {
|
/** Alt( R,R,R* ) */
|
||||||
// check rs \in R,R,R*
|
def apply(rs: _regexpT*) =
|
||||||
// @todo: flattening
|
if (rs.size < 2) throw new SyntaxError("need at least 2 branches in Alt")
|
||||||
if (rs.size < 2)
|
else new Alt(rs: _*)
|
||||||
throw new SyntaxError("need at least 2 branches in Alt")
|
// Can't enforce that statically without changing the interface
|
||||||
|
// def apply(r1: _regexpT, r2: _regexpT, rs: _regexpT*) = new Alt(Seq(r1, r2) ++ rs: _*)
|
||||||
final val isNullable = rs forall (_.isNullable)
|
def unapplySeq(x: Alt) = Some(x.rs)
|
||||||
}
|
}
|
||||||
|
|
||||||
case class Sequ(rs: _regexpT*) extends RegExp {
|
class Alt private (val rs: _regexpT*) extends RegExp {
|
||||||
// @todo: flattening
|
final val isNullable = rs exists (_.isNullable)
|
||||||
// check rs \in R,R*
|
}
|
||||||
if (rs.isEmpty)
|
|
||||||
throw new SyntaxError("need at least 1 item in Sequ")
|
|
||||||
|
|
||||||
|
object Sequ {
|
||||||
|
/** Sequ( R,R* ) */
|
||||||
|
def apply(rs: _regexpT*) = if (rs.isEmpty) Eps else new Sequ(rs: _*)
|
||||||
|
def unapplySeq(x: Sequ) = Some(x.rs)
|
||||||
|
}
|
||||||
|
|
||||||
|
class Sequ private (val rs: _regexpT*) extends RegExp {
|
||||||
final val isNullable = rs forall (_.isNullable)
|
final val isNullable = rs forall (_.isNullable)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -47,6 +52,7 @@ abstract class Base
|
||||||
final lazy val isNullable = true
|
final lazy val isNullable = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The empty Sequ.
|
||||||
case object Eps extends RegExp {
|
case object Eps extends RegExp {
|
||||||
final lazy val isNullable = true
|
final lazy val isNullable = true
|
||||||
override def toString() = "Eps"
|
override def toString() = "Eps"
|
||||||
|
@ -57,7 +63,4 @@ abstract class Base
|
||||||
final val isNullable = r1.isNullable
|
final val isNullable = r1.isNullable
|
||||||
def r = r1
|
def r = r1
|
||||||
}
|
}
|
||||||
|
|
||||||
final def mkSequ(rs: _regexpT *): RegExp =
|
|
||||||
if (rs.isEmpty) Eps else Sequ(rs : _*)
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -114,7 +114,7 @@ class ElementValidator() extends Function1[Node,Boolean] {
|
||||||
case _: ELEMENTS =>
|
case _: ELEMENTS =>
|
||||||
dfa isFinal {
|
dfa isFinal {
|
||||||
getIterable(nodes, false).foldLeft(0) { (q, e) =>
|
getIterable(nodes, false).foldLeft(0) { (q, e) =>
|
||||||
(dfa delta q get e) getOrElse (throw ValidationException("element %s not allowed here" format e))
|
(dfa delta q).getOrElse(e, throw ValidationException("element %s not allowed here" format e))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue