\documentclass[letterpaper]{article}
\usepackage{ijcai05}
\usepackage{times}
\usepackage{latexsym}
\usepackage{lingmacros}
\usepackage{pstricks,pst-tree,pst-node}
\usepackage{tree-dvips}
\usepackage{genistuff}
\usepackage{color}
\usepackage{graphicx}
\usepackage{algorithm}
\usepackage{algpseudocode}
\usepackage[latin1]{inputenc}

% we use a more standard notation for comments
\renewcommand\algorithmiccomment[1]{\hfill\(\#\) #1}%

\newcommand{\todo}[1]{{\bf ** #1 **}}
\newcommand{\RECENTDIFF}[1]{\{\textbf{NEW STUFF:} #1 \textbf{END NEW
STUFF}\}}


%% my own commands
\newcommand{\nat}[0]{\mbox{\rm I} \! \mbox{\rm N}}
\setlength{\unitlength}{0.3cm}
\newcommand{\dotted}[0]{\makedash{2pt}}
\newcommand{\const}[1]{\mbox{\sf #1}}
\newcommand{\sem}[2]
             {\fbox{\hspace{-0.3em}
              \begin{tabular}{l}
             #1\\[0.2em] \hline
             $\quad$\\[-1.2em]
             arg: $#2$ 
             \end{tabular}\hspace{-0.3em}}}%}\\
\newcommand{\tb}[2]{\mbox{$\begin{array}{l}
                       {t: \scriptstyle \mbox{#1}}\\[-0.5ex]
                       {b: \scriptstyle \mbox{#2}}
                          \end{array}
                          $}}
\newcounter{sitem-zaehler}
\newenvironment{sitem}{\begin{list}{$\bullet$}
{\usecounter{sitem-zaehler}
\setlength{\itemindent}{0pt}
\setlength{\labelwidth}{3ex}
\setlength{\leftmargin}{3ex}
\setlength{\topsep}{0ex plus0ex minus0ex}
\setlength{\parsep}{0.3ex plus0ex minus0ex}
\setlength{\itemsep}{0ex plus0ex minus0ex}
}}{\end{list}}
%% end of my own commands

\setlength\titlebox{6.5cm}    % Expanding the titlebox

\title{Generating and selecting grammatical paraphrases}

\author{Claire Gardent\\
  CNRS/LORIA\\
  Nancy, France\\
  {\tt claire.gardent@loria.fr} \And
  Eric Kow\\ 
  INRIA/LORIA\\
  Nancy, France\\
  {\tt eric.kow@loria.fr}}

\date{\today}

\begin{document}

\maketitle
\begin{abstract}
Natural language has a high paraphrastic power yet not all paraphrases
are appropriate for all contexts. In this paper, we present a TAG
based surface realiser which supports both the generation and the
selection of paraphrases. To deal with the combinatorial explosion
typical of such an NP-complete task, we introduce a number of new
optimisations in a tabular, bottom-up surface realisation
algorithm. We then show that one of these optimisations supports
paraphrase selection.
\end{abstract}

\section{Introduction}

As is well known, natural language has a very high paraphrastic power
so that the same core meaning can be expressed in many different ways
\cite{gross75,melcuk88}. Yet not all paraphrases are appropriate for
all contexts. So for instance, a sentence and its converse
(\ref{ex:converse1}) express the same core meaning and so can be
considered paraphrases of each other. Yet as example
(\ref{ex:control1}) illustrates, they are not interchangeable in the
context of a control verb:

{\small
\eenumsentence{
\item \label{ex:converse1}
John  borrowed a book from Mary. \\
$\equiv$ Mary lent a book to John
\item \label{ex:control1}
Peter persuaded John to borrow a book from Mary. \\
$\not \equiv$ Peter persuaded Mary to lend a book to John
}}

Similarly, a canonical and a cleft sentence (\ref{ex:cleft2})
communicate the same core meaning yet a contrastive context
(\ref{ex:contrast2}) only admits the cleft version.

{\small
\eenumsentence{
\item \label{ex:cleft2}
John looks at Mary. \\
$\equiv$ It is Mary that John looks at
\item \label{ex:contrast2}
* It is not Sarah, John looks at Mary. \\
It is not Sarah, it is Mary that John looks at}
}

More generally, the anaphoric potential (that is, the discourse status
of the entities being talked about) of the preceding discourse, its
structure, the presence of an embedding verb or of a given
subordinating or coordinating conjunction are all factors which may
restrict the use of paraphrases. To preserve completeness, it is
therefore important that a generator be able to produce paraphrases in
a systematic fashion. 

On the other hand, it is also well known that surface realisation (the
task of producing the set of sentences associated by a grammar with a
given semantic representation) is NP-complete \cite{brew92}.

In this paper, we present a TAG based surface realiser which supports
both the generation and the selection of {\it grammatical} paraphrases
(section \ref{sec:grammar} and \ref{sec:algo}). To deal with the
resulting combinatorics, we introduce a number of new optimisations
(section \ref{sec:optimisations}). We then show how one of these
optimisations can be used to support the selection of contextually
appropriate paraphrases (section \ref{sec:selection}).  Finally, we
relate our approach to similar proposals and show that it compares
favourably in terms of efficiency (section \ref{sec:experiment} and
\ref{sec:discussion}). 

\section{The grammar}
\label{sec:grammar}

The grammar used by the surface realiser is Feature-based TAG, a
unification based version of Tree Adjoining
Grammar. Briefly\footnote{For more details on FTAG see
\cite{VijJos88}.}, a Feature-based TAG consists of a set of (auxiliary
or initial) elementary trees and of two tree composition operations:
substitution and adjunction. Substitution inserts a tree onto a leaf
node of another tree\footnote{Leaf nodes where substitution can take
place are graphically distinguished by a down arrow.} while adjunction
inserts an auxiliary tree into a derived tree (i.e., either an
elementary tree or a tree resulting from the combination of two
trees).  In an FTAG, each tree node which is not a substitution node
is associated with two feature structures called \semexpr{top} and
\semexpr{bottom} and during derivation, the following unifications
take place.

\begin{itemize}
\item
The adjunction at some node X  with \semexpr{top}
features $t_X$ and \semexpr{bottom} features $b_X$, of an auxiliary
tree with root \semexpr{top} features $r$ and foot
\semexpr{bottom} features $f$ entails the unification of $t_X$
with $r$ and of $b_X$ with $f$.
\item
The substitution at some node X with \semexpr{top} features $t_X$ of a
tree with root \semexpr{top} features $t$ entails the unification of
$t_X$ with $t$. 
\item
At the end of a derivation, the \semexpr{top} and \semexpr{bottom}
features of all nodes in the derived tree are unified. 
\end{itemize}

In the FTAG used by the surface realisation algorithm, linguistic
expressions are associated with semantic representations as advocated
in \cite{gardentKallmeyer03}. The semantic representations used
are flat semantic representations in the sense of \cite{CopLasFli01}
and the {\bf semantic parameters} (that is, the semantic indices
representing the missing arguments of the semantic functors) are
represented by unification variables.

Further, each elementary tree is associated with a semantic
representation of the type just described and the appropriate nodes of
the elementary trees are decorated with semantic indices or
parameters.  

More precisely, the substitution nodes of the tree associated with a
semantic functor will be associated with semantic parameters whilst
root nodes and certain adjunction nodes will be labelled with semantic
indices. As trees are combined, semantic parameters and semantic
indices are unified by the FTAG unification mechanism thus specifying
which semantic index provides the value for which semantic
parameter. 

Generally, the idea is that the association between tree nodes
and unification variables encodes the syn\-tax/se\-man\-tics
interface: it specifies which node in the tree provides the value for
which semantic parameter in the semantic representation of a semantic
functor. So for instance, the trees for \natlang{John, loves} and
\natlang{Mary} will be as given in Figure
\ref{fig:jon-likes-mary}. The tree for \natlang{loves} is associated
with a semantic representation including the two semantic parameters
$x$ and $y$. These parameters also label the subject and the object
substitution nodes of this tree. Conversely, the root node of the tree
for \natlang{John} is labelled with the semantic index $j$. If the
string parsed is \natlang{John loves Mary}, this tree will be
substituted at the subject substitution node of the \natlang{loves}
tree thus instantiating the semantic parameter $x$ to $j$. And
similarly, for the \natlang{Mary} tree.

\begin{figure}[htbp]
\begin{center}
{\small
\vspace{-1em}
\begin{tabular}{ccccc}
\multicolumn{2}{c}{\node{s}{S}}&\\[2ex]
\node{np1}{NP$\downarrow^{x}$}
&\multicolumn{2}{c}{\node{vp1}{VP}}
\\[2ex]
\node{np3}{NP$_{j}$}
&\node{v1}{V}
&\node{np2}{NP$\downarrow^{y}$} 
&\node{np4}{NP$_{m}$}
\\[2ex]
\node{jon}{John} 
&\node{love}{loves} 
&&
\node{mary}{Mary} 
\\[2ex]
{\small {\it name(j,john)}}
&\multicolumn{2}{l}{{\small {\it love(x,y)}}}
&{\small {\it name(m,mary)}}
\end{tabular}
\nodeconnect{s}{np1} \nodeconnect{s}{vp1}
\nodeconnect{vp1}{v1} \nodeconnect{vp1}{np2}
\nodeconnect{v1}{love} \nodeconnect{np3}{jon}
\nodeconnect{np4}{mary}
}
{\dotted
\anodecurve[br]{np3}[tr]{np1}{0.4in}
\anodecurve[br]{np4}[r]{np2}{0.4in}
}
\vspace{0.5cm}

$\Rightarrow$ {\small {\it love(j,m),name(j,john),name(m,mary)}}
\caption{\label{fig:jon-likes-mary} \natlang{John loves Mary}}

\end{center} 
\end{figure} 

\paragraph{Coverage.} The grammar used describes a core fragment for French and
contains around 4 000 trees. It covers some 35 basic subcategorisation
frames and for each of these frames, the set of argument
redistributions (active, passive, middle, reflexivisation,
impersonal, passive impersonal) and of argument realisations
(cliticisation, extraction, omission, permutations, etc.) possible for
this frame. As a result, it captures most grammatical paraphrases that
is, paraphrases due to diverging argument realisations or to different
meaning preserving alternation (e.g., active/passive or clefted/non
clefted sentence).

\section{The basic algorithm}
\label{sec:algo}

The basic surface realisation algorithm used is summarised in Figure 
\ref{fig:algo} (appendix). It is a bottom up, tabular algorithm \cite{kay96}
optimised for TAGs. Its workings can be illustrated by the following
example. Suppose that the input semantics is the following :

{\small
$$
 \semexpr{\{camp(s,j),john(j),in(s,l),paris(l)\}}
$$}

Then the algorithm proceeds as follows. In a first step ({\bf lexical
selection}), the elementary trees whose semantics
subsumes\footnote{Subsumption is here taken to denote term
unification. Hence lexical selection is done on a very ``syntactic''
basis: only these lexical entries whose semantics representation
matches part of the input semantics are selected. This is partly
alleviated by taking lexical synonymy into account while developing
the grammar so that two (intra- or inter-categorical) synonyms are
assigned the same semantic representation. A more complete treatment
would require the integration either of a richer lexical semantics or
of a lexical selection module permitting inference so that for
instance ``adult(x) male(x) human(x)'' can be inferred to be denoted
by the word ``man''.}  part of the input semantics are retrieved and
added to the agenda. In our simple example, the selected trees are the
trees for \natlang{Jean, campe, dans} and \natlang{paris}.

The second step (the {\bf substitution phase}) consists in
systematically exploring the possibility of combining two trees by
substitution. It is summarised for our example by the table in figure
\ref{fig:substphase} where each line corresponds to a processing
step. The words in each column indicate the trees present at each step
in the chart, the agenda and the agenda for auxiliary trees
(AgendaA). The combination column indicates which tree combines with
which tree by means of which operation ($\downarrow$ indicates a
substitution, $\star$ an adjunction). The trees resulting from such a
combination are represented using the concatenation of the names of
the combined trees (jeanCampe is the tree resulting from the
combination of the tree anchored by \natlang{Jean} with that anchored
by \natlang{campe}). Thus, the first line indicates that the trees
anchored by \natlang{Jean}, \natlang{campe}, \natlang{dans} and 
\natlang{Paris} are in the agenda and that the chart is empty. The
second line shows that the next state is a state where the tree
anchored by \natlang{Jean} has been retrieved from the agenda and
added to the chart. The third line indicates that when the trees
anchored by \natlang{campe} and \natlang{Jean} are in the chart, they
can be combined using substitution. The result is added to the agenda
etc.


\begin{figure*}
\begin{center}
  % In to chart
{\footnotesize
\begin{tabular}{|l||l|l|l|}
\hline
Agenda & Chart & Combination & AgendaA  \\ 
\hline
Jean,campe,dans,Paris & &  & \\
campe,dans,Paris & Jean &  & \\
dans,Paris & campe,Jean & $\downarrow$(campe,Jean)  &\\
Paris,JeanCampe & campe,Jean,dans & & \\
JeanCampe & campe,Jean,dans,Paris & $\downarrow$(dans,Paris) &\\
dansParis & campe,Jean,dans,Paris,JeanCampe  & &\\
 & campe,Jean,dans,Paris,JeanCampe & & dansParis  \\
&&&\\
\hline
\end{tabular}}
\end{center}
\caption{Sample run of the substitution phase}
\label{fig:substphase}
\end{figure*}

%\vspace{0.3cm}
More generally, the items are retrieved one by one from the agenda to
be added either to the chart or to the auxiliary agenda (in the case of
an auxiliary tree devoid of substitution node). For each item added to
the chart, all possible substitutions are carried out and the
resulting derived trees are added to the agenda. The loop ends when the
agenda is empty.

At this stage, all the items containing an empty substitution node are
erased from the chart (here, the trees anchored by \natlang{campe} and
\natlang{dans} are erased). The agenda is then reinitialised to the
content of the chart and the chart to the content of the auxiliary
agenda. The third step (the {\bf adjunction phase}) occurs then in
which all possible adjunctions are performed (figure
\ref{fig:adjphase}). Finally ({\bf retrieval phase}), the strings
labelling the items in the chart whose semantics is the input
semantics are printed out, which in this case yields the sentence
\natlang{Jean campe dans Paris}.


\begin{figure*}
\begin{center}
{\footnotesize
\begin{tabular}{|l||l|l|l|}
\hline
Agenda & Chart & Combination & AgendaA  \\ 
\hline
Jean,Paris,JeanCampe & dansParis & & \\
Paris,JeanCampe & dansParis,Jean & & \\
JeanCampe & dansParis,Jean,Paris & & \\
 & dansParis,Jean,Paris,JeanCampe & $\star$(JeanCampe,dansParis) & \\
JeanCampeDansParis & dansParis,Jean,Paris,JeanCampe & & \\
 & dansParis,Jean,Paris,JeanCampe, &&\\
&JeanCampeDansParis & & \\
&&&\\
\hline
\end{tabular}}
\end{center}
\caption{Sample run of the adjunction phase}
\label{fig:adjphase}
\end{figure*}

%\vspace{0.3cm}
\section{Optimisations}
\label{sec:optimisations}

Surface realisation is NP complete \cite{brew92}. Moreover the
paraphrastic power of natural language is enormous
\cite{gross75,melcuk88}. Hence optimisation is a key issue and so is
the possibility to select a given paraphrase on demand. We now present
a number of optimisations we added to the algorithm just described in
order to reduce the combinatorics.

\subsection{Tabulation and ordered combinations}
\label{sec:adjunction}
Tabulation serves to avoid redundant computations. In analysis, the
use of the chart to store intermediate constituents and avoid multiple
computation of the same structure renders an exponential task
polynomial. In generation however, tabulation increases efficiency by
avoiding duplicate computations but the complexity remains exponential
because in particular of multiple modifiers \cite{brew92}. Suppose for
instance that the input semantic representation is the following:
\\

\begin{center}
\semexpr{fierce(x),little(x),cat(x),black(x)}
\end{center}

For this input, a naive bottom-up realisation algorithm will generate
all intermediate structures that is, $n!$ intermediate structures with
$n$ the number of modifiers.
These $n!$ structures will furthermore be multiplied by the context so
that for instance given the input for \natlang{the fierce little black
cat runs}, the following structures will all be generated.\\

\enumsentence{\label{ex:fiercecat}
a. \natlang{fierce cat, fierce black cat, little cat,little black cat,
  fierce little cat, black cat}\\
b. \natlang{{\bf the} fierce cat, the fierce black cat, the little cat, the little black cat, the fierce little cat, the black cat}\\
c. \natlang{{\bf the} fierce cat {\bf runs}, the fierce black cat
  runs, the little cat runs, the little black cat runs, the fierce
  little cat runs, the black cat runs} 
}

To minimise the impact of multiple modifiers, the algorithm presented
here performs all substitutions before considering adjunctions. In
effect, this means that adjunction only applies to syntactically
complete trees and so that the many intermediate structures induced by
the modifiers do not multiply out with other incomplete structures. In
the above example for instance, (\ref{ex:fiercecat}c) will be
computed but neither (\ref{ex:fiercecat}a) nor (\ref{ex:fiercecat}b).

\subsection{Avoiding spurious derivations}

Categorical grammars often allow so called spurious derivations in that
one and the same syntactic structure can be derived in several different
ways \cite{hepple91}. TAGs also induce spurious derivations due to the
fact that substitutions and adjunctions on different nodes of the same
tree can be carried out in different relative orders all of which result
in one and the same structure. Thus for instance, given the trees
\koweytree{{\small np(Marie)}}, \koweytree{{\small np(Jean)}},
\koweytree{{\small s(np$\downarrow$, v(aime), np$\downarrow$)}} and the
semantic \semexpr{{\small aime(j,m),jean(j), marie(m)}}, two derivations
are possibles, one where \koweytree{{\small np(Jean)}} is first
substituted in \koweytree{{\small s(np$\downarrow$, v(aime),
np$\downarrow$)}} before the tree for \koweytree{{\small np(Marie)}} is
; and the other where \koweytree{{\small np(Marie)}} is first
substituted before \koweytree{{\small np(Jean)}} is added. More
generally, for a tree containing $n$ substitution nodes, there will be
$n!$ possible derivations. For instance given the sentence\\

\enumsentence{
Jean persuade Marie de promettre � Claire de donner un livre
� Marie.\\
\textit{Jean persuades Mary to promise Claire to give Mary a book}
}

there will be $3! \times 2! \times 2! = 24$ possible derivations all of
them produce the same syntactic tree and hence the same sentence.

Adjunction suffers from the same shortcoming. Given a TAG tree and
$n$ auxiliary trees that can adjoin to different nodes of that tree,
there are $n!$ possible ways of deriving the tree resulting from these
$n$ adjunctions.

To avoid these spurious derivations, we impose a unique order (from
left to right) on the sequences of substitutions and adjunctions done
within a given tree.  Because the algorithm systematically examines
all pairs of items, this restriction does not affect completeness :
the unique derivation supported by the imposed constraints will be
taken into consideration by the algorithm and will therefore be produced.

A third source of spurious derivations come from the possibility of
having multiple adjunctions on the same node of a given tree for
instance in the case of the \natlang{little black cat}. The auxiliary
trees anchored by \natlang{little} and \natlang{black} can adjoin in
two different orders on the tree anchored by \natlang{cat}: either
\natlang{little} is adjoined to \natlang{cat} and \natlang{black} to
the foot node of \natlang{little} in the resulting tree or
\natlang{black} is adjoined to \natlang{cat} and \natlang{little} to
the root node of the resulting derived tree. To eliminate this type of
spurious derivations, adjunction on a foot node is ruled out -- which
is usual in TAG parsers.

\subsection{Filtering of valid lexical sequences}
\label{sec:polarities}

The most efficient optimisation takes place between the lexical
selection phase and that of combination by substitution and
adjunction. At this stage, the number of combinations that are {\it a
  priori} possible is 
$\prod_{1 \leq i \leq n}{a_i}$ with $a_i$ the degree of lexical
ambiguity of the  $i$-th literal  and $n$, the
number of literals in the input semantic. That is, the search
space is exponential in the number of literals. To reduce the
combinatorics, we use a technique introduced for parsing by
\cite{perrier03} called \natlang{polarity based filtering}.

Polarity based filtering is based on the observation that many of the
combinations of lexical items which cover the input semantics are in
fact syntactically invalid either because a syntactic requirement is
not fulfilled or because a syntactic resource is not
used. Accordingly, polarity based filtering detects
and eliminates such combinations by:

\begin{enumerate}
\item assigning each lexical item
a polarity signature reflecting its syntactic requirements and
resources
\item computing for each possible combination of lexical
items the net sum of its syntactic requirements and resources and
\item eliminating all combinations of lexical items that do not have a
net sum of zero (because such combinations cannot possibly lead to a
syntactically valid sentence)
\end{enumerate}

As we shall see below, polarity based filtering is implemented using
finite state techniques.

Let us see how this works by running through a simple example. Suppose
that the input semantic representation is:
\\

\enumsentence{\label{ex:polex}
 \semexpr{buy(e,t,j),
annoying(e), field(t),john(j)}
}

and that the TAG trees selected for this input are the ones given in
Figure \ref{fig:trees} (appendix).

In this figure, the literals following the tree name give the
polarities that are automatically assigned to each of these trees on
the basis of their root and substitution nodes (for instance, the
\natlang{v\_achete} has polarity $(+p -2n)$ meaning that it provides a
sentence and requires two NPs). Since in a TAG, substitution nodes
indicates syntactic requirements whilst an initial tree permits
fulfilling a syntactic requirement, polarity signatures can be
automatically computed as follows:

\begin{itemize}
\item a polarity of the form $+C$ is added to the tree polarity
  signature of each initial tree with root node category $C$.
\item a polarity of the form $-S$ is added to the tree polarity
  signature of each initial tree for each substitution node with
  category $S$ in that tree.
\end{itemize}

Now we need to compute the polarity of all possible combinations of
lexical items. This is done by:

\begin{enumerate}
\item building a polarity automaton for
each polarity category occurring in the set of possible combinations
(in this case, $n$ and $s$),
\item computing the intersection of these
automata and
\item minimising the resulting automaton. 
\end{enumerate}

In the final automaton, only the combinations that have a null
polarity are represented. These will be the combinations actually
explored by the realiser.

For the above example, the final automaton is that given in figure
 \ref{fig:automaton} where each state is labelled with the cumulated
 polarity of the path(s) leading to that state and where the
 transitions are labelled with the lexical item covered. As can be
 seen, the combinations that are syntactically invalid (in grey in the
 automaton) have been eliminated. Thus in particular, the combination
 of the predicative tree {\it n0Vadj} with the verb {\it ach\`ete} and
 its two complements is ruled out (as the $n$ requirement of {\it
 n0Vadj} cannot be satisfied) and conversely, the combination of the
 predicative tree {\it p0Vadj} with the relational noun {\it achat}
 (because the $p$ requirement of {\it p0Vadj} cannot be
 satisfied)\footnote{For lack of space, we ignore here functional
 words (determiners, prepositions).  In the full algorithm, their
 treatment is implemented either by means of co-anchors (a verb whose
 compl�ment requires a given preposition for instance, will be
 assigned a tree with multiple anchors, one for the verb, the other
 for the preposition) or by means of a richer semantic (contrary to
 what is shown here, a quantifier will have a non nul semantics). Note
 further that lexical items with multi-literal semantics are also
 handled as well as items whose semantics is reduced to an index
 (pronouns, control verb subject, modifiers, etc.). } .

\subsection{Combining polarity based filtering and tabulation}

To preserve the factorisation supported by the use of a chart,
polarity filtering must furthermore be integrated with the realisation
algorithm. Indeed, each path through a polarity automaton represents a
combination of lexical items whose total semantics is the input
semantics and which may lead to a syntactically valid expression. But
some of these paths may share some subpath(s). To avoid computing
these shared subpaths several times, each selected elementary tree is
annotated with the set of automaton paths it occurs in. During
realisation, two items will be compared only if the intersection of
their path sets is not empty (they appear in the same automaton
path). The result of a combination is labelled with the intersection
of the labels of the combined constituents. In this way, the
elementary items appearing in several paths of the automaton are only
introduced once in the chart and the factorisation of both elementary
and derived items that are common to several automaton path is
ensured.


\section{Paraphrase selection}
\label{sec:selection}

As pointed out in the introduction, not all paraphrases are
appropriate in all contexts. To test the ability to generate
contextually appropriate sentences, we augmented the realiser with a
paraphrase selection mechanism based on the polarity filtering system
described in section (\ref{sec:polarities}). For instance, it is
possible to select from among the possible realisations for
\semexpr{regarde(j,m), jean(j), marie(m)}, the variant where
\natlang{jean} is verbalised as a cleft subject namely, \natlang{C'est
Jean qui regarde Marie (It is John who is looking at Mary)}. 

More generally, the selection constraints allowed are
syntactico-semantic constraints of the form \semexpr{Synt:SemIndex}
where \semexpr{Synt} is a morpho-syntactic feature (declarative,
interrogative, cleft, pronoun, etc.) and \semexpr{SemIndex} is an
index occurring in the input semantics.

Intuitively, a selection constraint supports the selection, for a
given semantic index, of the variant(s) obeying the
syntactico-semantic constraint set by the selection constraint for
that index. 

Formally, these constraints are imposed during the polarity filtering
phase as follows. The syntactic \jargon{properties} supported by the
selection constraints are automatically associated during grammar
compilation to the elementary trees of the grammar by means of
so-called hypertags \cite{kinyon00}. This is made possible by the fact
that the TAG used is derived from a metagrammar \cite{crabbeDuchier04}
that is, from a highly factorised way of representing the linguistic
concepts encoded in the TAG trees. Roughly, the metagrammar formalism
is used (i) to define abstractions over these concepts and (ii) to
combine these abstractions so as to produce the elementary trees of a
TAG. During the metagrammar compilation process, a so-called {\it
hypertag} is built for each tree which records the
abstractions used to produce that tree. Thus hypertags contain
detailed information about the linguistic content of the TAG
elementary trees. In particular, the hypertag of the tree with clefted
subject of the n0vn1 family (i.e., the set of verbs taking two nominal
arguments) will contain the property \semexpr{+cleft:X} where
\semexpr{X} is the semantic index associated with the subject node.

During lexical selection, this index is instantiated by unification
with the input so that the selected elementary tree for
\natlang{regarde} will have the property \semexpr{+cleft:j}.

Conversely, a \jargon{restrictor} is a property that a lexical item
intervening in the production of the generated paraphrases {\it
must} have. In the above example, the restrictor is \semexpr{-cleft:j}
meaning that the \semexpr{j} index must be realised by a clefted
structure.

Paraphrase selection is implemented by parameterising the realiser
with a restrictor (for instance, \semexpr{-cleft:j}). This restrictor
is then used to initialise the polarity automaton and eliminate (by
polarity filtering) all these combinations which do not contain the
\semexpr{+cleft:j} charge (since the negative charge introduced during
initialisation must be cancelled). As a result, the realiser will only
produce the variant:

\enumsentence{
\natlang{C'est Jean qui regarde Marie}.
}

More generally, the polarity mechanism permits selecting paraphrases
on the basis of the information contained in the grammar hypertags or
in the TAG tree features. This information, which is decided upon by
the grammar writer, can be both fine grained and of different
natures. 

Feature values can be used to control the feature values associated
with the root node of the constructed tree, typically requiring that
it is of interrogative, declarative or imperative mood.

Hypertags can be used more generally to control the selection of the
lexical items entering in the generated construct. Importantly, the
information they contain can be specified both at the grammar and at the
lexical level so that paraphrase selection can then operate both on
features determined by syntax and on lexically determined
characteristics (level of speech, modality, type of semantic relation,
thematic and fore/backgrounding structure, etc;).

\section{Implementation and Experimentation}
\label{sec:experiment}

The realiser described here has been implemented in Haskell. It
includes a graphical interface as well as a debugger so that the user
can inspect the content of the chart and of the agenda at each step of
the algorithm. It also supports batch processing thus permitting a
systematic evaluation of the impact of various optimisations
combinations. In what follows, we discuss the effect of polarity
filtering and of paraphrase selection in that system.

\subsection{The effect of polarity filtering}

To get an estimate of how our realiser compares with existing
published results, we revisited the test cases discussed in
\cite{carrollEtAl99} and \cite{kollerStriegnitz02} by producing
similar sentences in French namely (\ref{ex:carroll_1}) and
(\ref{ex:carroll_2}).  

{\small \eenumsentence{
\item \label{ex:carroll_1} 
Le directeur de ce bureau
auditionne un nouveau consultant d'Allemagne ({\it The manager in that
office interviews a new consultant from Germany})
\item \label{ex:carroll_2}
Le directeur organise un nouveau seminaire d'equipe hebdomadaire special ({\it The manager organises an unusual additional weekly departmental
conference}).
}}

The grammar used contains \geniresult{2063} trees. In this grammar,
the verb {\it organiser} is associated with \geniresult{107} trees and
adjectives with \geniresult{8} trees. For the purpose of efficiency
testing, we furthermore treated the PP {\it d'\'equipe} as an
adjective. As a result, there are \geniresult{$107 \times 8$} (856)
combinations of lexical items covering the input semantics for example
(\ref{ex:carroll_1}) while for example (\ref{ex:carroll_2}), this
number is \geniresult{$107 \times 8^4$}.  The effect of polarity filtering
for these two examples is summarised in the following table.

\begin{figure}[htbp]
\begin{center}
{\footnotesize
\begin{tabular}{|r|r|r|}
\hline
      & Example \ref{ex:carroll_1} &  Example \ref{ex:carroll_2} \\
\hline
Possible combinations       &  856 & 438 272 \\
Combinations explored       &   55 &     232 \\
Sentences (w/o selection)   &    9 &     216 \\
\hline
\end{tabular}\\
}
\end{center}
\caption{Filtering out combinations}
\end{figure}

That is, polarity filtering reduces the number of lexical items
combinations actually explored from \geniresult{856 to 55} in the first
case and from \geniresult{438 272 to 232} in the second.  

Note furthermore that despite the overhead introduced by the
construction of the polarity automaton, polarity filtering reduces
overall processing times (cf. Figure \ref{fig:overhead}).

\begin{figure}[htbp]
\begin{center}
{\footnotesize
\begin{tabular}{|r||r|r|}
\hline
      Optimisations 
      & Example \ref{ex:carroll_1} 
      & Example \ref{ex:carroll_2} \\
\hline
            none    &  14.8 s  &   93.8 s \\
             pol    &   0.8 s  &   14.7 s \\
\hline
    Carroll         &   1.8 s  &    4.3 s \\
    Koller          &   1.4 s  &    0.8 s \\
\hline
\end{tabular}\\
}
\end{center}
\caption{Processing times}
\label{fig:overhead}
\end{figure}


Thus, for the examples considered, processing times are reduced by
\geniresult{95\% and 84\%} respectively.  The processing times for 
(\ref{ex:carroll_1}) compares favourably with those published for
both the Carroll et al. and the Koller and Striegnitz realisers.  
This comparison is not all that meaningful, however, since we are using
different grammars and significantly faster computers, a 3 Ghz Pentium
IV to the 700 Mhz Pentium III in \cite{kollerStriegnitz02}.

Indeed, the poor performance of our surface realiser in example
(\ref{ex:carroll_2}) is directly related to the degree of lexical
ambiguity in our grammar.  As illustrated in section
\ref{sec:adjunction}, input semantics with multiple modifiers pose a
problem for surface realisers.  Although performing adjunction
separately from substitution prevents this problem from spilling over
into incomplete structures, the fact remains that $n$ translate to
$n!$ structures.  Further aggravating the situation is that
our grammar provides 8 trees for every adjective, leading to 
$8^5 \times 5!$, or 3.9 million possible structures.  When we modified
our grammar to only have one tree per adjective, our realisation times
dropped to 9s without filtering and 2.7s with.  This example calls to
attention the fact that polarity filtering does not account for lexical
ambiguity in modifiers.  In section \ref{sec:discussion}, we suggest
some potential mechanisms for dealing with modifiers, which we expect to
be complementary to the filtering technique.


\subsection{Paraphrase selection}


Paraphrase selection permits reducing the combinatorics one step
further. Thus introducing a cleft restrictor for examples
(\ref{ex:carroll_1}) and (\ref{ex:carroll_2}), causes the generator to
produce fewer results, \geniresult{2} sentences instead of
\geniresult{9} in the first example, and \geniresult{18} instead of
\geniresult{54} in the second.

These figures can be explained as follows. The grammar allows 9
syntactic structures for the input considered namely:

\enumsentence{
a. 
C'est par le directeur de ce bureau qu'un nouveau consultant d'Allemagne
est auditionn\'e \\
b. C'est le directeur de ce bureau qui auditionne un nouveau
consultant d'Allemagne \\  
c. C'est un nouveau consultant d'Allemagne qu'auditionne le directeur
de ce bureau \\  
d. C'est un nouveau consultant d'Allemagne que le directeur de ce
bureau auditionne \\ 
e. C'est un nouveau consultant d'Allemagne qui est auditionn\'e par le
directeur de ce bureau \\ 
f. Le directeur de ce bureau auditionne un nouveau consultant
d'Allemagne \\ 
g. Un nouveau consultant d'Allemagne est auditionn\'e par le directeur
de ce bureau } 

Since for the moment the grammar places no constraints on the
respective order of modifiers, there are 9 possible realisations for
example (\ref{ex:carroll_1}) and $9 \times 3!$ for example
(\ref{ex:carroll_2}). With the object cleft restrictions on
``consultant'', these numbers drop to 2 for the first example and to
$2 \times 3!$ for the second.  


\begin{figure}[htbp]
\begin{center}
{\footnotesize
\begin{tabular}{|r|r|r|}
\hline
      & Example \ref{ex:carroll_1} &  Example \ref{ex:carroll_2} \\
\hline
Sentences (w/o selection)   &    9 &     54 \\
Sentences (with selection)   &   2 &      18 \\
\hline
\end{tabular}\\
}
\end{center}
\caption{Selection}
\end{figure}


Accordingly, the processing time drops by \geniresult{63\% and 88\%}
with respect to simple polarities (cf. Figure \ref{fig:overheadsel}).

\begin{figure}[htbp]                                                 
\begin{center}
{\footnotesize
\begin{tabular}{|r||r|r|}
\hline
      Optimisations & Example \ref{ex:carroll_1} & Example \ref{ex:carroll_2} \\
\hline
            none    &  14.8 s  &   93.8 s \\
             pol    &   0.8 s  &   14.7 s \\
    pol + select    &   0.3 s  &    1.8 s \\
\hline
\end{tabular}\\
}
\end{center}
\caption{Polarity + Selection}
\label{fig:overheadsel}
\end{figure}

\section{Related approaches}
\label{sec:discussion}

Several recent papers focus on improving the efficiency of surface
realisation. In this section, we relate our approach to the HPSG based
approach presented in \cite{carrollEtAl99}, to the statistical and
semi-statistical strategies used in \cite{bangalore00} and in
\cite{white04} and to the constraint based approach described in
\cite{kollerStriegnitz02}. We also briefly relate it to the greedy
strategy used in \cite{stoneEtAl03}.

\subsection{Copestake et al.'s HPSG approach}

As mentioned in section \ref{sec:adjunction}, multiple modifiers may
trigger an exponential number of intermediate structures. The
``adjunction after substitution'' idea is inspired from the proposal
made in \cite{carrollEtAl99} that a complete syntactic skeleton be
built before modifiers be inserted into that tree. Because the Carroll
et al. proposal is set within the HPSG framework however, extracted
modifiers as in {\it Which office did John work in?} need specific
treatment. In contrast, in TAG, all modifiers are treated using
adjunction so that no specific treatment is required. All that is
needed is that adjunction only be applied after all possible
substitutions have been carried out. A second, more meaningful
difference is that no such {\it global} optimisation as polarity
filtering is proposed to filter out on the basis of global information
about the sets of possible combinations, a priori invalid ones.

\subsection{Statistical approaches}

Interestingly, \cite{white04} proposes a treatment of modifiers which
is in some sense the converse of the ``adjunction after substitution''
treatment and where complete NPs are first built before they are
combined with the verb. This second option is also feasible in TAG
(adjunction would then apply on specific sets of lexical entries and
the results combined with the verb) and it would be interesting to
experiment and compare the relative efficiency of both approaches
within the TAG framework.  

Both approaches isolate the addition of modifiers to a constituent,
thereby avoiding spurious combinations with unrelated constituents; but
neither directly address the fact that are still an exponential $n!$
ways to combine any $n$ modifiers for a single constituent.
\cite{white04} and \cite{bangalore00} propose statistical solutions to
this problem based on a linear n-gram language model.  In White's
approach the statistical knowledge is used to prune the chart of
identical edges representing different modifier permutations, e.g., to
choose between \natlang{fierce black cat} and \natlang{black fierce
cat}.  Bangalore assumes a single derivation tree that encodes a word
lattice (\natlang{a \{fierce black, black fierce\} cat}), and uses
statistical knowledge to select the best linearilisation.  Our framework
does not currently implement either approach, but we hope to adopt an
approach similar to Bangalore's.  Rather than directly performing
adjunction, we associate each node with the set of auxiliary trees
(modifiers) that are to be adjoined to that node.  The order in which
these modifiers are adjoined can be decided through statistical methods.

There are three other uses for probabilistic techniques: for lexical
selection, optimisation and ranking.  Such techniques are useful for
guiding the surface realiser towards a single best result (or a
relatively small number thereof).  On the other hand, we aim to
produce \emph{all} possible paraphrases, that is explore the entire
search space of syntactic variants, and so with the exception of
modifier ordering, we eschew the use of probabilities in favour of an
``exact method'' \cite{bonfante04}.  While Bangalore uses a tree model
to produce a single most probable lexical selection, we use polarities
to filter out all the definitely impossible ones.  While in White's
system, the best paraphrase is determined on the basis of n-gram
scores that is, on the basis of frequency, in our approach ``best''
means ``most contextually appropriate''.  Indeed, the restrictors we
use to select a paraphrase, although they are here given by hand,
could equally be set by the context and so permit modelling the effect
of contextual constraints on paraphrases. We believe that our
approach, modulo statistical handling of modifiers, would be roughly
equivalent to White's with anytime-searching disabled.

%Our approach further differs from White's approach in the main
%mechanisms used for optimisation namely, n-gram scores versus
%polarities or more generally, statistic based versus symbolic.  

\subsection{Koller et al.'s constraint-based approach}

Finally, our approach has interesting connections to the
constraint-based approach proposed by \cite{kollerStriegnitz02}. In
this approach, the subset of the TAG grammar which is used for a given
realisation task is translated into a set of lexical entries in a
dependency grammar defining well formed TAG derivation trees. This
set of entries is then parsed by an efficient constraint-based
dependency parser thus producing the derivation trees associated by
the grammar with the set of input lexical entries. A post processing
phase produces the derived trees on the basis of the derivation
trees output by the first step. 

The main similarity between this and our approach is that they both
use a {\it global} mechanism for filtering out combinations of lexical
entries that cannot possibly lead to a syntactically valid sequences.
In the Koller et al. approach, this filtering is based on well formed
derivation trees (only these combinations of lexical entries that form
a valid derivation tree are considered) whereas in ours, it is based on
polarities and on the cancelling out of syntactic resources and
requirements. As a preliminary evaluation shows, such a global
optimisation is very efficient in pruning the search space.

There are differences though. In particular, while Koller et
al. explicitly ignores feature information, our algorithm handles a
TAG with fully specified feature structures. Further while in our
approach, the processing of the valid combinations is done using a
tabular algorithm optimised to avoid spurious derivations, the
postprocessing step producing derived trees from derivation trees is
left undefined in the Koller et al. approach. Finally, while the
Koller et al. approach is based on constraint propagation, ours is
based on finite state techniques. These differences open up the door
for interesting comparisons and combinations.  It would be interesting
for instance to combine the Koller et al approach with the tabular
surface realisation algorithm presented in this paper, or to compare
run times once feature structures are taken into account.

\subsection{Stone's greedy approach}

\cite{stoneEtAl03} presents a greedy approach to TAG based surface
realisation. The greedy search applies iteratively to update a {\it
single} state in the search space. On each iteration, all neighbours of
the current state are produced but only one state is chosen at the
next current state, based on a heuristic evaluation.

\cite{stoneEtAl03}'s search strategy is therefore markedly different
from ours. While we explore the entire search space and use polarities
to control the combinatorics, Stone's greedy strategy is a best first
strategy which incrementally trims the search space using
heuristics. In terms of efficiency, the greedy strategy is of course
better. The goals behind the two approaches are distinct however. Thus
while Stone's approach aims at modelling the interaction of the
various mechanisms involved in microplanning, the present proposal is
directed towards generating and selecting paraphrases. In particular,
we are interested in using the realiser to debug a paraphrastic
grammar that is, a grammar which alleviates the inference task by
assigning paraphrases the same semantics -- this can only be done by
adopting an exhaustive search strategy. More generally, ``exhaustive
surface realisation'' provides a natural way to debug grammars and
reduce their degree of overgeneration. Since the combinatorics is not
only theoretically (worse case analysis) but also practically very
high, it is worth investigating ways of optimising surface realisers
which perform an exhaustive search.

\section{Conclusion}
\label{sec:conclusion}

We have presented a surface realiser for TAG which is optimised to
support the generation of grammatical paraphrases while also
permitting the selection, on the basis of syntactico semantic
constraints, of a particular paraphrase. The most efficient
optimisation proposed concerns polarity filtering, a global technique
that permits the elimination of combinations of lexical items which
cannot possibly lead to a syntactically valid sentence. While used
here for generating with TAG, the technique is fully general and can
be used for parsing \cite{perrier03} but also for generating with
other grammatical frameworks.

Future work will concentrate on extending the grammar and the lexicon
to other types of paraphrases (in particular, morphoderivational or
cross categorical paraphrases), on providing a systematic evaluation of
the paraphrase selection mechanism and on using the realiser for the
debugging of an existing TAG for French.

{
\bibliographystyle{named}
\bibliography{enlg05}
}

\appendix

\section{Appendix}

\begin{algorithm}[h]
\caption{The GenI algorithm}
\label{fig:algo}
{\small
\begin{algorithmic}[1]
\Procedure{Generate}{$Gram$,$Sem$}
%\State{\# Input: $Gram$ - A TAG Grammar, $Sem$ - an input semantics}
%\State{\# Output: the set of all sentences associated by $Gram$ to $Sem$ }
%  \Statex
  \State $AgendaA \leftarrow \emptyset$; $Agenda  \leftarrow \emptyset$; $Chart   \leftarrow \emptyset$

% \Statex
  \ForAll { trees $t \in Gram$ such that $t$'s semantics
            subsumes $Sem$ }
  \State $Agenda \leftarrow Agenda + t$
  \EndFor
  
  %\Statex
  \While {$Agenda \neq \emptyset$}
    \State $t \leftarrow$ any tree $\in Agenda$
    \State delete $t$ from $Agenda$ 
    \If {$t$ has a foot node and no substitution nodes}
       \State $AgendaA \leftarrow AgendaA + t$
    \Else
       \ForAll {trees $c \in Chart$ which can combine with
        $t$ via substitution into a new tree $ct$}
         \State $Agenda \leftarrow Agenda + ct$
       \EndFor
       \State $Chart \leftarrow Chart + t$
    \EndIf
  \EndWhile

%  \Statex
  \State {delete from $Chart$ any tree with a substitution node}
  \State {$Agenda \leftarrow Chart$}
  \State {$Chart  \leftarrow AgendaA$}

  %\Statex
  \While {$Agenda \neq \emptyset$}
    \State $t \leftarrow$ any tree $\in Agenda$
    \State delete $t$ from $Agenda$ 
    \If {$t$'s semantics is $Sem$}  
       \State \Return { the string corresponding to $t$ }
    \Else
       \ForAll {trees $c \in Chart$ which can combine with
        $t$ via adjunction into a new tree $ct$}
         \State $Agenda \leftarrow Agenda + ct$
       \EndFor
    \EndIf
  \EndWhile
\EndProcedure
\end{algorithmic}
}
\end{algorithm}

\begin{figure*}[htbp]
{\small
\begin{tabular}{lll}
\multicolumn{1}{l}{\semexpr{buy(e,j,f)}}  &
\multicolumn{1}{l}{\semexpr{annoying(e)}} & 
\multicolumn{1}{l}{\semexpr{field(f)}}    \\
~\\
% ----------------------------------------------------------------------
% trees in row 1  
% ----------------------------------------------------------------------
\multicolumn{1}{l}{v\_ach\`ete (+p -2n)} &
\multicolumn{1}{l}{n0Vadj\_ennuyeux (+p -n)} & 
\multicolumn{1}{l}{n\_field (+n)} \\ 

% tree for v_achete
% -----------------
\begin{minipage}{5cm}
\begin{tabular}{ccc}
\multicolumn{3}{c}{\node{1}{P$^{e}$}}\\[2ex]
% 
\node{1.1}{N$\downarrow^j$} &
\node{1.2}{V$^e$} &
\node{1.3}{N$\downarrow^f$} \\[2ex] 
%
&
\node{1.2.1}{ach\`ete} & \\[2ex]
\end{tabular}
\nodeconnect{1}{1.1} \nodeconnect{1}{1.2} \nodeconnect{1}{1.3} 
\nodeconnect{1.2}{1.2.1}
\end{minipage} & 

% tree for n0Vadj_ennuyeux
% ------------------------
\begin{minipage}{5cm}
\begin{tabular}{ccc}
\multicolumn{3}{c}{\node{1}{P}}\\[2ex] 
%
\node{1.1}{N$\downarrow^e$} &
\node{1.2}{V} &
\node{1.3}{Adj} \\[2ex] 
%
&
\node{1.2.1}{est} &
\node{1.3.1}{ennuyeux} \\[2ex]
\end{tabular} % tree
\nodeconnect{1}{1.1} \nodeconnect{1}{1.2} \nodeconnect{1}{1.3} 
\nodeconnect{1.2}{1.2.1}
\nodeconnect{1.3}{1.3.1}
\end{minipage} &

% tree for n_terrain
% ------------------
\begin{minipage}{5cm}
\begin{tabular}{c}
\node{1}{N$^{f}$} \\[2ex] 
%
\node{1.1}{terrain} \\[2ex] 
\end{tabular}
\nodeconnect{1}{1.1}
\end{minipage} \\ 

% ----------------------------------------------------------------------
% trees in row 2  
% ----------------------------------------------------------------------
& & \multicolumn{1}{l}{\semexpr{john(j)}}\\
~ \\

\multicolumn{1}{l}{n\_achat   (+n -2n)} &
\multicolumn{1}{l}{p0Vadj\_ennuyeux (+p -p)} &
\multicolumn{1}{l}{n\_jean (+n)}\\

% tree for n_achat
% ----------------
\begin{minipage}{7cm}
\begin{tabular}{cccccc}
&\multicolumn{5}{c}{\node{1}{N$^{e}$}} \\[2ex]
%
%\node{1.1}{D$\downarrow$} &
\node{1.2}{N}  &
\multicolumn{2}{c}{\node{1.3}{GP}} &
\multicolumn{2}{c}{\node{1.4}{GP}} \\[2ex]
%
%&
\node{1.2.1}{achat}  &
\node{1.3.1}{P} &
\node{1.3.2}{N$\downarrow^{f}$} &
\node{1.4.1}{P} &
\node{1.4.2}{N$\downarrow^{j}$} \\[2ex]
%
&
\node{1.3.1.1}{par} &
&
\node{1.4.1.1}{de} &
\\[2ex]
\end{tabular}
\vspace{0.5cm}
%\nodeconnect{1}{1.1} 
\nodeconnect{1}{1.2} \nodeconnect{1}{1.3} \nodeconnect{1}{1.4} 
\nodeconnect{1.2}{1.2.1}  
\nodeconnect{1.3}{1.3.1} \nodeconnect{1.3}{1.3.2}  
\nodeconnect{1.4}{1.4.1} \nodeconnect{1.4}{1.4.2}  
\nodeconnect{1.3.1}{1.3.1.1} 
\nodeconnect{1.4.1}{1.4.1.1}  
\end{minipage} & 

% tree for p0Vadj_ennuyeux
% ------------------------
\begin{minipage}{5cm}
\begin{tabular}{ccc}
\multicolumn{3}{c}{\node{1}{P}}\\[2ex] 
%
\node{1.1}{P$\downarrow^e$} &
\node{1.2}{V} &
\node{1.3}{Adj} \\[2ex] 
%
&
\node{1.2.1}{est} &
\node{1.3.1}{ennuyeux} \\[2ex]
\end{tabular} % tree
\nodeconnect{1}{1.1} \nodeconnect{1}{1.2} \nodeconnect{1}{1.3} 
\nodeconnect{1.2}{1.2.1}
\nodeconnect{1.3}{1.3.1}
\end{minipage} & 

% tree for n_jean 
% ---------------
\begin{minipage}{5cm}
\begin{tabular}{c}
\node{1}{N$^{j}$} \\[2ex] 
%
\node{1.1}{jean} \\[2ex] 
\end{tabular}
\nodeconnect{1}{1.1} 
\end{minipage} \\ 

\end{tabular} % tree positioning table
}
\caption{\label{fig:trees}Grammar for example \ref{ex:polex}}
\end{figure*} 

\begin{figure*}[htpb]
\begin{center}
\includegraphics[scale=0.5]{images/terrain-aut.eps}
\end{center}
\caption{A minimised polarity automaton}
\label{fig:automaton}
\end{figure*}


\end{document}