\documentclass[11pt]{article}

\usepackage{graphicx}
\usepackage{colacl06}
\usepackage{times}
\usepackage{latexsym}
\setlength\titlebox{6.5cm}    % Expanding the titlebox
\usepackage{amsmath,amstext}
\usepackage{url}
\usepackage{color}
\usepackage{fancybox}
\usepackage{texnames}
\usepackage{xspace}

%% Resizing legends
\setlength\abovecaptionskip{0.25ex}
\setlength\belowcaptionskip{0.25ex}

%% Commands
\newcommand{\natlang}{\textit}
\newcommand{\FIXME}[1]{\{\textbf{FIXME:} \textit{#1}\}}
%% Keywords
\newcommand{\HPSG}{\textsc{hpsg}\xspace}
\newcommand{\Tag}{\textsc{tag}\xspace}
\newcommand{\LFG}{\textsc{lfg}\xspace}
\newcommand{\xmg}{\texttt{XMG}\xspace}
\newcommand{\geni}{\texttt{GenI}\xspace}
\newcommand{\gtester}{\texttt{gtester}\xspace}
\newcommand{\LKB}{\texttt{LKB}\xspace}
\newcommand{\xle}{\texttt{XLE}\xspace}
\newcommand{\xtag}{\texttt{XTAG}\xspace}
\newcommand{\dyalog}{\texttt{DyALog}\xspace}
\newcommand{\llp}{\texttt{LLP2}\xspace}

\title{SemTAG, the LORIA toolbox for TAG-based Parsing and Generation}
\author{Eric Kow \\
  INRIA / LORIA \\
  Universit\'e Henri Poincar\'e \\
  615, rue du Jardin Botanique \\
  F-54 600 Villers-L\`es-Nancy\\
  {\tt kow@loria.fr}  \And
  \hspace{1cm}
  Yannick Parmentier \\
  \hspace{1cm}
  INRIA / LORIA \\
  \hspace{1cm}
  Universit\'e Henri Poincar\'e \\
  \hspace{1cm}
  615, rue du Jardin Botanique \\
  \hspace{1cm}
  F-54 600 Villers-L\`es-Nancy\\
  \hspace{1cm}
  {\tt parmenti@loria.fr} \And
  \hspace{1cm}
  Claire Gardent \\
  \hspace{1cm}
  CNRS / LORIA \\
  \hspace{1cm}
  615, rue du Jardin Botanique \\
  \hspace{1cm}
  F-54 600 Villers-L\`es-Nancy\\
  \hspace{1cm}
  {\tt gardent@loria.fr}
}

\date{}

\begin{document}
\maketitle
\begin{abstract}
In this paper, we introduce {\sc SemTAG}, a toolbox for TAG-based
parsing and generation. This environment supports the development of
wide-coverage grammars and differs from existing environments for
\Tag\ such as \xtag, \cite{xtag01lexicalized} in that it includes a
semantic dimension. {\sc SemTAG} is open-source and freely available.
\end{abstract}

\section{Introduction}
\label{sec:introduction}
% yannick

In this paper we introduce a toolbox that allows for both parsing and
generation with \Tag.  This toolbox combines existing software and
aims at facilitating grammar development, More precisely, this toolbox
includes\footnote{All these tools are freely available, more
information and links at \url{http://trac.loria.fr/~semtag}.}:

\begin{itemize}
  \item \xmg : a grammar compiler which supports the generation of a \Tag from a factorised \Tag  \cite{crabbe04a},
  \item \llp and \dyalog : two chart parsers, one with a friendly
  user interface \cite{lopez00extended} and the other optimised for
  efficient parsing \cite{DyALog:CSLP05}\footnote{Note that \dyalog
    refers in fact to a logic programming language, and a tabular
    compiler for this language. The \dyalog system is well-adapted to
    the compilation of efficient tabular parsers.}
%     In the rest of this
%     paper, we well call "\dyalog parser" a parser compiled by the \dyalog
%     system for a given grammar.}
\item \geni : a chart generator which has been tested on a middle size grammar for French \cite{gardentKow05}
\end{itemize}


%\begin{tabular}{lp{6.5cm}}
%  $\bullet$ & \xmg - a metagrammar compiler allowing to describe the \Tag in a
%  highly factorized way \cite{crabbe04a}, \\
%  $\bullet$ & \geni - a chart generator which has been tested on realistic
%  grammars \cite{gardentKow05}, \\
%  $\bullet$ & \llp - a parser based on an original bottom-up algorithm and
%  with a friendly user interface \cite{lopez00extended}, and\\
%  $\bullet$ & \dyalog - an alternative parser based on a tabular algorithm
%  increasing parsing efficiency \cite{DyALog:CSLP05}. \\
%\end{tabular}

%% Commented for the extended abstract submission:
%
% While developing / integrating these tools, one of our goals is to
% maintain a unique and common resource (in our case, a unique
% {\it metagrammar} with associated lexicons, cf {\it infra}) for both
% parsing and generation. Having a unique resource presents several
% advantages: maintenance costs are reduced and the bidirectionality thus
% provided allows us to perform cross-testing\footnote{Indeed, we can
%   use the output of the generator to check if the coverage of the
%   grammar as given by the parser is correct and {\it vice versa}.}. As
% we will see in \S \ref{sec:testing}, performing such tests facilitates
% grammar development.

% Our toolbox can be compared with the \LKB system, which constitutes the
% reference grammar development environment for \HPSG, as it shares some
% of its features:

% \paragraph{Declarativity and expressivity} the reduced description of
% the grammar, called the metagrammar, is represented using a
% metagrammatical framework (namely \xmg) whose description language
% is highly expressive and declarative \cite{gardent06xmg}.
% \paragraph{Bidirectionality} as mentioned above, the toolbox can be used
% both for parsing and generation with the same metagrammar.
% \paragraph{Scale} the use of a metagrammar allows to produce
% semi-automatically a wide coverage \Tag, this has been done for
% French \cite{crabbe05representation}.
% \paragraph{Efficiency} the DyALog system which pre-compiles the
% grammar to produce the parser is one of the most efficient \Tag\
% parsers \cite{Lisbon04}.
% \paragraph{Semantic representation} the grammar which is produced by
% the \xmg system associates a flat semantic representation to the
% trees such as in \cite{gardent03semantic}.
% \paragraph{Availability} eventually, all the tools of our toolbox are
% open source and freely available (more details in \S \ref{sec:tools}).
%
%% End commented for extended abstract submission

% long version
%The paper is structured as follows.  We introduce the individual components
%of the toolbox in \S \ref{sec:tools}, discuss the issues raised by
%sharing resources for parsing and generation in \S \ref{sec:resources},
%and evaluate the correctness and performance of our components in
%\S \ref{sec:evaluation}.

%In this section, we will introduce each of the tools composing our
%toolbox, highlighting their features in the context of grammar
%development.

\section{XMG, a grammar writing environment for Tree Based Grammars}
\label{sec:xmg}
% yannick

\xmg provides a grammar writing environment for tree based
grammars\footnote{Although in this paper we only mention \Tag, the
  \xmg framework is also used to develop so called Interaction
  Grammars i.e., grammars whose basic units are tree descriptions
  rather than trees \cite{parmentier05a}.}
with three distinctive features. First, \xmg supports a highly
factorised and fully declarative description of tree based
grammars. Second, \xmg permits the integration in a \Tag of a
semantic dimension. Third, \xmg is based on well understood and
efficient logic programming techniques. Moreover, it offers a
graphical interface for exploring the resulting grammar (see
Figure~\ref{fig:xmg}).

\begin{figure}%[htbp]
\begin{center}
\includegraphics[scale=0.4]{images/xmg.jpg}
\caption{XMG's graphical interface}
\label{fig:xmg}
\end{center}
\end{figure}

\paragraph{Factorising information.} In the \xmg framework, a \Tag
is defined by a set of classes organised in an inheritance hierarchy
where classes define tree fragments (using a tree logic) and tree
fragment combinations (by conjunction or disjunction). \xmg
furthermore integrates a sophisticated treatment of names
whereby variables scope can be local, global or user defined (i.e.,
local to part of the hierarchy).

In practice, the resulting framework supports a very high degree of
factorisation. For instance, a first core grammar ({\sc FraG}) for
French comprising 4 200 trees was produced from roughly 300 \xmg
classes.

\paragraph{Integrating semantic information.} In \xmg , classes can be
multi-dimensional.  That is, they can be used to describe several
levels of linguistic knowledge such as for instance, syntax,
semantics or prosody. At present, \xmg supports classes including
both a syntactic and a semantic dimension. As mentioned above, the
syntactic dimension is based on a tree logic and can be used to
describe (partial) tree fragments. The semantic dimension on the other
hand, can be used to associate with each tree a flat semantic
formula. Such a formula can furthermore include identifiers which
corefer with identifiers occurring in the associated syntactic
tree. In other words, \xmg also provides support for the interface
between semantic formulae and tree decorations. 
Note that the inclusion of semantic information remains
optional. That is, it is possible to use \xmg to define a purely
syntactic \Tag.

\xmg was used to develop a core grammar for French ({\sc FraG}) which
was evaluated to have 75\% coverage\footnote{This means that for 75 \% of
  the sentences, a \Tag parser can build at least one derivation.} on
the Test Suite for Natural
Language Processing (TSNLP, \cite{lehmann96tsnlp}).  The {\sc FraG}
grammar was furthermore enriched with semantic information using another 50
classes describing the semantic dimension
\cite{gardent06integration}. The resulting grammar ({\sc SemFraG})
describes both the syntax and the semantics of the French core
constructions.

\paragraph{Compiling an \xmg specification.}
By building on efficient techniques from logic programming and in
particular, on the Warren's Abstract Machine idea \cite{aitkaci91warren},
the \xmg compiler allows for very reasonable compilation times
\cite{dlp04}.  For instance, the compilation of a \Tag containing 6
000 trees takes about 15 minutes with a Pentium 4 processor 2.6 GHz
and 1 GB of RAM.

\section{Two TAG parsers}
\label{sec:parsers}
% yannick

The toolbox includes two parsing systems: the
\llp parser and the \dyalog system. Both of them can be used in
conjunction with \xmg. First we will briefly introduce both of them,
and then show that they can be used with a semantic grammar (e.g.,
{\sc SemFrag})  to perform not only syntactic parsing but also
semantic construction.

\paragraph{LLP2} The \llp parser is based on a bottom-up algorithm
described in \cite{lopez99analyse}. It has relatively high parsing
times but provides a user friendly graphical parsing environment with
much statistical information (see Figure~\ref{fig:llp2a}). It is well suited
for teaching or for small scale projects.

\begin{figure}%[htbp]
\begin{center}
\includegraphics[scale=0.3]{images/llp2a.jpg}
\caption{The LLP2 parser.}
\label{fig:llp2a}
\end{center}
\end{figure}

\paragraph{DyALog} The \dyalog system on the other hand, is a highly
optimised parsing system based on tabulation and automata techniques
\cite{DyALog:CSLP05}. It 
is implemented using the \dyalog programming language ({\it i.e.}, it
is bootstrapped) and is also used to compile parsers for other types
of grammars such as {\it Tree Insertion Grammars}.

The \dyalog system is coupled with a semantic construction module
whose aim is to associate with each parsed string a semantic
representation\footnote{The corresponding system is called SemConst
  ({\it cf} section \ref{sec:availability}).}. 
%% The LORIA toolbox aims not at gathering parsers and generators
%% for a given formalism but rather at providing an efficient system for
%% parsing and generation using common resources. In this context, we
%% want to be able to produce semantic representations that would be used
%% as input for generation\footnote{And vice-versa, we would use the
%%   generated sentences for parsing and thus cross-check the tools (and
%%   the resources).}. To perform this, we can use the results of
%% syntactic parsing with a semantic grammar using 
This module assumes a \Tag of the type described in
\cite{gardent03semantic,gardent06integration} where initial trees are
associated with semantic information and unification is used to
combine semantic representations. In such a grammar, the semantic
representation of a derived tree is the union of the semantic
representations of the trees entering in the derivation of that
derived tree modulo the unifications entailed by analysis.  As
detailed in \cite{gardent05large}, such grammars support two
strategies for semantic construction.

The first possible strategy is to use the full grammar and to perform
semantic construction during derivation. In this case the parser must
manipulate both syntactic trees and semantic representations. The
advantage is that the approach is simple (the semantic representations
can simply be an added feature on the anchor node of each tree). The
drawback is that the presence of semantic information might reduce
chart sharing. 

The second possibility involves extracting the semantic information
contained in the grammar and storing it into a semantic
lexicon. Parsing then proceeds with a purely syntactic grammar and
semantic construction is done after parsing on the basis of the parser
output and of the extracted semantic lexicon.  This latter technique
is more suitable for large scale semantic construction as it supports
better sharing in the derivation forests. It is implemented in the
LORIA toolbox where a module permits both extracting a semantic
lexicon from a semantic \Tag and constructing a semantic representation
based on this lexicon and on the derivation forests output by \dyalog
(see Figure~\ref{fig:semconst}).

\begin{figure}%[htbp]
\begin{center}
\includegraphics[scale=0.25]{images/semconst3.jpg}
\caption{The SemConst system}
\label{fig:semconst}
\end{center}
\end{figure}

The integration of the \dyalog system into the toolbox is relatively
new so that parsing evaluation is still under progress. So far,
evaluation has been restricted to parsing the TSNLP with \dyalog with
the following preliminary results. On sentences ranging from 1 to 18
words, with an average of 7 words per sentence, and with a grammar
containing 5 069 trees, \dyalog average parsing time is of 0.38 sec
with a P4 processor 2.6 GHz and 1 GB of RAM\footnote{These features
  only concern classic syntactic parsing as the semantic construction
  module has not been tested on real grammars yet.}.

\section{A TAG-based surface realiser}
\label{sec:geni}
The surface realiser \geni takes a TAG and a flat semantic logical
form as input, and produces all the sentences that are associated with
that logical form by the grammar.  It implements two bottom up
algorithms, one which manipulates derived trees as items and one which
is based on Earley for \Tag. Both of these algorithms integrate a
number of optimisations such as delayed adjunction and polarity
filtering \cite{kow05a,gardentKow05}.  

\geni is written in Haskell and includes a graphical debugger to
inspect the state of the generator at any point in the surface
realisation process (see Figure~\ref{fig:geni_debugger}). It also
integrates a test harness for automated
regression testing and benchmarking of the surface realiser and the
grammar.  The harness \gtester is written in Python.  It runs the
surface realiser on a test suite, outputting a single document with a
table of passes and failures and various performance charts (see Figures
\ref{fig:testharness} and \ref{fig:testharness2}).

\begin{figure}%[htbp]
\begin{center}
\includegraphics[scale=0.33]{images/geni-debugger-screenshot}
\caption{The GenI debugger}
\label{fig:geni_debugger}
\end{center}
\end{figure}

\begin{figure}%[htbp]
\begin{center}
{\small
\begin{tabular}{|r|l|l|l|}
\hline
\textbf{test} & \textbf{expected}& \textbf{simple}& \textbf{earley}\\
\hline
t1 &il le accepter & pass & pass\\
\hline
t32 &il nous accepter & pass & pass\\
\hline
t83 &le ingénieur le lui apprendre & pass & \colorbox{red}{DIED}\\
\hline
t114 &le ingénieur nous le présenter & pass & pass\\
\hline
t145 &le ingénieur vous le apprendre & pass & pass\\
\hline
t180 &vous venir & pass & pass\\
\hline
\end{tabular}
}
\caption{Fragment of test harness output - The Earley algorithm timed out.}
\label{fig:testharness}
\end{center}
\end{figure}

\begin{figure}%[htbp]
\begin{center}
\includegraphics[scale=0.6]{images/gtester_graph}
\caption{Automatically generated graph of performance data by the test
harness.}
\label{fig:testharness2}
\end{center}
\end{figure}

%The use of \gtester is made
%especially convenient when combined with a Makefile which automatically
%recompiles a subset of the grammar needed for the tests being
%run\footnote{One could also compile the entire grammar, but this is not
%conducive to debugging as this entails frequent modifications to grammar
%and much recompilation}, but only when the grammar or test suite have
%changed.  
%Similar techniques are also used for testing the parser.

% more on polarity filtering
% ambiguity via number of results
% 
\paragraph{Test suite and performance}
The test suite is built with an emphasis on testing the surface
realiser's performance in the face of increasing paraphrastic power
i.e., ambiguity.  The suite consists of semantic inputs that select for
and combines verbs with different valencies.
For example, given a hypothetical English grammar, a valency (2,1)
semantics might be realised in as \natlang{Martin thinks Faye drinks}
(\natlang{thinks} takes 2 arguments and \natlang{drinks} takes 1),
whereas a valency (2,3,2) one would be \natlang{Dora says that Martin
tells Bob that Faye likes music}.  The suite also adds a varying
number of intersective modifiers into the mix, giving us for instance,
\natlang{The girl likes music}, \natlang{The pretty scary girl likes
indie music}.

The sentences in the suite range from 2 to 15 words (8 average).
Realisation times for the core suite range from 0.7 to 2.84 seconds CPU
time (average 1.6 seconds).

We estimate the ambiguity for each test case in two ways.  The first
is to count the number of paraphrases.  Given our current grammar, the
test cases in our suite have up to 669 paraphrases (average 41).  The
second estimate for ambiguity is the number of combinations of lexical
items covering the input semantics.

This second measure is based on optimisation known as polarity filtering
\cite{gardentKow05}.  This optimisation detects and eliminates
combinations of lexical items that cannot be used to build a result.  It
associates the syntactic resources (root nodes) and requirements
(substitution nodes) of the lexical items to polarities, which are then
used to build ``polarity automata''.  The automata are minimised to
eliminate lexical combinations where the polarities do not cancel out,
that is those for which the number of root and substitution nodes for
any given category do not equal each other.

Once built, the polarity automata can also serve to estimate ambiguity.
The number of paths in the automaton represent the number of possible
combinations of lexical items.  To determine how effective polarity
filtering with respect to ambiguity, we compare the combinations
before and after polarity filtering.  Before filtering, we start with
an initial polarity automaton in which all items are
associated with a zero polarity.  This gives us the lexical ambiguity
before filtering.  The polarity filter then builds upon this to form a
final automaton where all polarities are taken into account.  Counting
the paths on this automaton gives us the ambiguity after filtering, and
comparing this number with the lexical initial ambiguity provides an
estimate on the usefulness of the polarity filter.  In our suite, the
initial automata for each case have 1 to 800 000 paths (76 000 average).
The final automata have 1 to 6000 paths (192 average).  This can
represent quite a large reduction in search space, 4000 times in the
case of the largest automaton.  The effect of this search space
reduction is most pronounced on the larger sentences or those with the
most modifiers.  Indeed, realisation times with and without filtering
are comparable for most of the test suite, but for the most complicated
sentence in the core suite, polarity filtering makes surface realisation
94\% faster, producing a result in 2.35 seconds instead of 37.38.

\section{Benefits of an integrated toolset}
\label{sec:benefits}

As described above, the LORIA toolbox for TAG based semantic
processing includes a lexicon, a grammar, a parser, a semantic
construction module and a surface realiser. Integrating these into
a single platform provides some accrued benefits which we now discuss
in more details. 

\paragraph{Simplified resource management}
\label{par:simplification}
The first advantage of an integrated toolkit is that it facilitates
the management of the linguistic resources used namely the grammar and
the lexicon. Indeed it is common that each NLP tool (parser or
generator) has its own representation format. Thus, managing the
resources gets tiresome as one has to deal with several versions of a
single resource. When one version is updated, the others have to be
recomputed. Using an integrated toolset avoid such a drawback as the
intermediate formats are hidden and the user can focus on
linguistic description.

\paragraph{Better support for grammar development}
\label{par:mgdev}

When developing parsers or surface realisers, it is useful to test
them out by running them on large, realistic grammars.  Such
grammars can explore nooks and crannies in our implementations
that would otherwise have been overlooked by a toy grammar.  For
example, it was only when we ran \geni on our French grammar that
we realised our implementation did not account for auxiliary
trees with substitution nodes (this has been rectified).  In this
respect, one could argue that \xmg could almost be seen as a
parser/realiser debugging utility because it helps us to build and
extend the large grammars that are crucial for testing.

This perspective can also be inverted; parsers and surface realiser make
for excellent grammar-debugging devices.  For example, one possible
regression test is to run the parser on a suite of known sentences to
make sure that the modified grammar still parses them correctly.  The
exact reverse is useful as well; we could also run the surface realiser
over a suite of known semantic inputs and make sure that sentences are
generated for each one.  This is useful for two reasons.  First, reading
surface realiser output (sentences) is arguably easier for human beings
than reading parser output (semantic formulas).   Second, the surface
realiser can tell us if the grammar overgenerates because it would
output nonsense sentences.  Parsers, on the other hand, are much better
adapted for testing for undergeneration because it is easier to write
sentences than semantic formulas, which makes it easier to test
phenomena which might not already be in the suite.

\paragraph{Towards a reversible grammar}
\label{par:reversible}
%% intro de section
Another advantage of using such a toolset relies on the fact that we
can manage a common resource for both parsing and generation, and thus
avoid inconsistency, redundancy and offer a better flexibility as
advocated in \cite{neumann94uniform}. 
%% Fin intro de section

% throwaway sentence? - depends on what Yannick writes
% It is conceivable to use the LORIA toolbox to simulate the
% interleaved
% parsing/realisation architecture described by
% \cite{neumann94uniform}.

On top of these practical questions, having a unique reversible
resource can lead us further. 
For instance, \cite{neumann94uniform} proposes an interleaved
parsing/realisation architecture where the parser is used to choose
among a set of paraphrases proposed by the generator; paraphrases
which are ambiguous (that have multiple parses) are discarded in
favour of those whose meaning is most explicit.  Concretely, we could
do this with a simple
pipeline using \geni to produce the paraphrases, \dyalog to parse them,
and a small shell script to pick the best result. This would only be a
simulation, of course. \cite{neumann94uniform} goes as far as to
interleave the processes, keeping the shared chart and using the
parser to iteratively prune the search space as it is being explored by
the generator.  The version we propose would not have such niceties
as a shared chart, but the point is that having all the tools at our
disposable makes such experimentation possible in the first place.
%Moving from conceivable to practical is likely just a matter of
%implementation.

Moreover, there are several other interesting applications of the
combined toolbox. We could use the surface realiser to build
artificial corpora.
These can in turn be parsed to semi-automatically create rich treebanks
containing syntactico-semantic analyses \`a la Redwoods
\cite{oepen02lingo}.  

Eventually, another use for the toolbox might be in components of
standard NLP applications such as machine translation, questioning
answering, or interactive dialogue systems.

\section{Availability}
\label{sec:availability}
%% say something about the availability of the system (including
%% platform)
The toolbox presented here is open-source and freely available under
the terms of the GPL\footnote{Note that \xmg is released under the
  terms of the CeCILL license
  (\url{http://www.cecill.info/index.en.html}), which is
  compatible with the GPL.}. More information about the requirements
and installation procedure is available at
\url{http://trac.loria.fr/~semtag}. Note that this toolbox is
made of two main components: the
GenI\footnote{\url{http://trac.loria.fr/~geni}} system and the
SemConst\footnote{\url{http://trac.loria.fr/~semconst}} system, which
respectively performs generation and parsing from common linguistic
resources. The first is written in 
Haskell (except the \xmg part written in Oz) and is multi-platform
(Linux, Windows, Mac OS). The latter is
written in Oz (except the \dyalog part which is bootstrapped and
contains some Intel assembler code) and is available on Unix-like
platforms only.


\section{Conclusion}
\label{sec:conclusion}
%all

The LORIA toolbox provides an integrated environment for TAG based
semantic processing: either to construct the semantic representation
of a given sentence (parsing) or to generate a sentence verbalising a
given semantic content (generation).

Importantly, both the generator and the parsers use the same grammar
({\sc SemFraG}) so that both tools can be used jointly to improve
grammar precision.  All the sentences outputted by the surface
realiser should be parsed to have at least the semantic representation
given by the test suite, and all parses of a sentence should be
realised into at least the same sentence.

Current and future work concentrates on developing an automated error
mining environment for both parsing and generation; on extending the
grammar coverage; on integrating further optimisations both in the
parser (through parsing with factorised trees) and in the generator
(through packing and accessibility filtering cf. \cite{carrollOepen05};
and on experimenting with different semantic construction strategies
\cite{gardent05large}.


%\section{Acknowledgements}

%{\small
  \bibliographystyle{acl}
  \bibliography{garkowpar-tag-06}
%}

%\appendix

%\section*{Appendix A. Features of eXtensible MetaGrammar.}
% \begin{figure*}
% {\bf Appendix A. Features of eXtensible MetaGrammar.}
% \end{figure*}

% \begin{figure*}%[htbp]
% \begin{center}
% \includegraphics[scale=0.6]{images/xmg.jpg}
% \caption{XMG's output}
% \label{fig:xmg}
% \end{center}
% \end{figure*}

%\section*{Appendix B. Features of the GenI generator.}
% \begin{figure*}
% {\bf Appendix B. Features of the GenI generator.}
% \end{figure*}

% \begin{figure*}%[htbp]
% \begin{center}
% \includegraphics[scale=0.33]{images/geni-debugger-screenshot}
% \caption{The GenI debugger}
% \label{fig:geni_debugger}
% \end{center}
% \end{figure*}

% \begin{figure*}%[htbp]
% \begin{center}
% {\small
% \begin{tabular}{|r|l|l|l|}
% \hline
% \textbf{test} & \textbf{expected}& \textbf{simple}& \textbf{cky2}\\
% \hline
% t1 &il le accepter & pass & pass\\
% \hline
% t32 &il nous accepter & pass & pass\\
% \hline
% t83 &le ingénieur le lui apprendre & pass & \colorbox{red}{DIED}\\
% \hline
% t114 &le ingénieur nous le présenter & pass & pass\\
% \hline
% t145 &le ingénieur vous le apprendre & pass & pass\\
% \hline
% t180 &vous venir & pass & pass\\
% \hline
% \end{tabular}
% }
% \caption{Fragment of test harness output - The Earley algorithm timed out.}
% \label{fig:testharness}
% \end{center}
% \end{figure*}

% \begin{figure*}%[htbp]
% \begin{center}
% \includegraphics[scale=0.9]{images/gtester_graph}
% \caption{Automatically generated graph of performance data by the test
% harness.}
% \label{fig:testharness2}
% \end{center}
% \end{figure*}


%\section*{Appendix C. Features of the LLP2 parser.}
% \begin{figure*}
% {\bf Appendix C. Features of the LLP2 parser.}
% \end{figure*}

% \begin{figure*}%[htbp]
% \begin{center}
% \includegraphics[scale=0.5]{images/llp2a.jpg}
% \caption{The LLP2 parser.}
% \label{fig:llp2a}
% \end{center}
% \end{figure*}

%% removed for final version coz 6 pages limit.
%%

% \begin{figure*}%[htbp]
% \begin{center}
% \includegraphics[scale=0.6]{images/llp2b.jpg}
% \caption{The LLP2 parser (displaying features).}
% \label{fig:llp2b}
% \end{center}
% \end{figure*}

% \begin{figure*}%[htbp]
% {\small
% \begin{verbatim}
% Morpho loading time : 0ms
% Resource loading time : 0ms
% Lemmatizing time : 10645ms
% Chart initializing time : 45ms
% Parsing time : 67ms
% Derivation Trees building time : 32ms
% Nb of words :3
% Nb of segment :7
% Nb of elementary trees : 141
% Nb of items at the beginning: 242
% Nb of items at the end: 272
% Nb of parsed sentences : 2
% \end{verbatim}
% }
% \caption{Statistical information given by the LLP2 parser.}
% \label{fig:stats}
% \end{figure*}

\end{document}