\documentclass[10pt]{article}
\usepackage{times}
\usepackage{epsfig}
\usepackage{amssymb}
\usepackage{amsmath}
\usepackage{setspace}
%%\doublespacing
\setlength{\textheight}{8.875in}
\setlength{\textwidth}{6.875in}
\setlength{\columnsep}{0.3125in}
\setlength{\topmargin}{0in}
\setlength{\headheight}{0in}
\setlength{\headsep}{0in}
\setlength{\parindent}{1pc}
\setlength{\oddsidemargin}{-.1875in}  % Centers text.
\setlength{\evensidemargin}{-.1875in}

\newcommand{\Section}[1]{\vspace{-8pt}\section{\hskip -1em.~~#1}\vspace{-3pt}}
\newcommand{\SubSection}[1]{\vspace{-3pt}\subsection{\hskip -1em.~~#1}
        \vspace{-3pt}}
\newtheorem{thm}{Theorem}
\newtheorem{lem}{Lemma}
\newtheorem{prop}{Proposition}
\newtheorem{coro}{Corollary}
\newcommand{\bprf}{\par{}\smallskip\noindent{\bf Proof}:\ }
\newcommand{\eprf}{\hfill\rule{.2cm}{.2cm}}
\newcommand{\Z}{\mathbb{Z}}
\begin{document}
% Don't want date printed
\date{}
% Make title bold and 14 apt font (Latex default is non-bold, 16apt)
\title{\bf The Non-Communicative Goulden-Jackson Cluster Method}
% For single author (just remove % characters)
% For two authors (default example)
\author{
%\begin{tabular}[t]{c@{\extracolsep{8em}}c}
Xiangdong Wen
%\end{tabular}
}
\maketitle
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\Section{Introduction} \label{sec-intro}
Let $V$ be a finite alphabet with $|V|\,=\, d$ letters $a_1,a_2,\cdots,a_d$. 
Let $B$ be a finite set of $words$ (on $V$) and let ${\cal L}(B)$ denote the set 
of words (language) over $V$ that avoid the members of $B$ as factors. 
Suppose  $q(n)$ is the total number of words with length $n$ that avoid 
the words in $B$ as factors. Then using the Goulden-Jackson cluster method 
(\cite{GoJ1},\cite{GoJ2} \cite{NZ}), we could efficiently
find the generating function 
\begin{equation}
f(s)\,=\, \sum_{n=0}^{\infty}q(n)s^n\,.
\label{gene1}
\end{equation} 
However the function (\ref{gene1}) lost the information of the letters and thus the information of the order of the letters. In order to keep track of these informations, we set up the general generating function
\begin{equation}
g(s_{a_1},s_{a_2},\cdots,s_{a_d})\,=\, \sum_{n=0}^{\infty}\sum_{ a_1a_2\cdots a_n \in {\cal L}(B)} s_{a_1}\cdot s_{a_2}\cdot s_{a_3} \cdot \cdots \cdot s_{a_n}\,,
\label{gene2}
\end{equation} 
where the operand ``$\cdot$'' is non-communicative.
The generating function (\ref{gene2}) preserves not only the information 
of letters in a word but also the order of the letters. Actually it contains all the information of the language ${\cal L}(B)$. In this paper, we use the {\it non-communicative Goulden-Jackson method} to find a rational form of the function (\ref{gene2}).
\bigskip\\
{\bf Keywords:}\\
Goulden-Jackson cluster method, marked word, cluster, self-avoiding walks.


\Section{The non-communicative Goulden-Jackson cluster method}\label{sec-gj}
The non-communicative Goulden-Jackson cluster method
has the same analog as the  normal Goulden-Jackson cluster method except that
the definitions of the weight of a word are different. 
Amazingly the procedures developed in \cite{NZ} and \cite{wen} 
could all be applied to the non-communicative case. Here we provide an analog version to make it self-contained.
\bigskip\\
A {\bf factor} of the word $a_1a_2\cdots a_n$ is one of the words
$a_ia_{i+1}\cdots a_{j-1} a_j$, $1\leq i \leq j \leq n,$ that we shall
denote by $[i,j]$. 
Two factors $[i,j]$ and $[i',j']$ {\bf overlap} if they have at least 
one common letter.
The {\bf weight}
of a word $w\,=\,a_1\,a_2\,a_3\,\cdots \,a_n$ is the non-communicative product $W(w)\,=\,s_{a_1}\cdot s_{a_2}\cdot \cdots\cdot s_{a_n}$.  
Summing weights of all the words in ${\cal L}(B), $ 
\begin{equation}
g(s_{a_1},s_{a_2},\cdots,s_{a_d})\,=\sum_{w \in {\cal L}(B) } W(w),
\label{weight}
\end{equation}  
gives us the general generating function (\ref{gene2}).
\bigskip \\
A word with some factors marked is called a {\bf marked word}. 
A marked word could be written in the form: 
$$
(w;[i_1,j_1],[i_2,j_2],\cdots,[i_l,j_l]), \mbox{ where }
[i_r,j_r]\, , 1\leq r\leq l, \mbox{ are marked factors.}
$$
A {\bf cluster } is a marked word 
$$(w_1w_2\cdots w_n;[i_1(=1),j_1],[i_2,j_2],\cdots,[i_l,j_l(=n)]),$$
where $[i_k,j_k]$ overlaps with $[i_{k+1},j_{k+1}]$ for all $k=1,2,\cdots,l-1$. 
The set of clusters ${\cal C}$ can be partitioned into
$$
{\cal C}= \bigcup_{u \in B} {\cal C}[u] \, ,$$
 where ${\cal C}[u],$  $ u \in B,$ is the set
of clusters whose first entry is $u$.



Define the {\bf weight},${\overline{W}}$, of a marked word 
$w$ with marked factors $S$, $ S \subset B,$ as 
$$
\overline{W}(w,S)=(-1)^{|S|} s_{a_1}\cdot s_{a_2}\cdot s_{a_3} \cdot \cdots\,\cdot s_{a_n}=(-1)^{|S|}\,W(w),
$$
where $|S|$ is the cardinality of $S$.
\bigskip \\
Let $B(w)$ be a subset of $B$ whose members are factors of $w$. And let $N_B(w)$ denote the number of marked factors of $w$ that belong to $B$. 
Using the inclusion-exclusion principle, we have: 
\begin{displaymath}
\begin{array}{rcl}
f(s) &=& \sum_{w \in {\cal L}(B)} W(w)\\
&=&\sum_{w \in V^*} W(w)0^{N_B(w)}\quad \quad \quad \quad (\mbox{  Define } 0^0=1)\\  
&=&\sum_{w \in V^*} W(w)[1+(-1)]^{N_B(w)}\\
&=&\sum_{w \in V^*} W(w) \sum_{S \subset B(w)} (-1)^{|S|} \\
&=&\sum_{w \in V^*} \sum_{S \subset B(w)} (-1)^{|S|}W(w)\\
&=&\sum_{w \in V^*} \sum_{S \subset B(w)}\overline{W}(w,S).
\end{array}
\end{displaymath}
Hence the generating function (\ref{gene2}) is exactly the same as the 
generating function for the weighted marked word, 
\bigskip\\
A non-empty marked word either starts with a letter that is not
part of a cluster, or starts with a cluster. Peeling-off the maximal cluster, we
get a shorter marked word. Thus we have the following decomposition
$${\cal M}=\{empty\_word\} \cup {\cal M}V \cup {\cal M}{\cal C}.$$
Taking weights on both sides and solving it for $\overline{W}({\cal M})$, we have
\begin{equation}
g(s_1,s_2,\cdots,s_d)= \overline{W}({\cal M}) 
=\frac{1}{1-(s_1+s_2+\cdots+s_d)-\overline{W}({\cal C})}\quad .
\label{main}
\end{equation}
\bigskip \\
For a given word $w=a_1a_2\cdots a_n$, let 
$HEAD(w)$ be the set of all proper prefixes:
 $$HEAD(w)\, := \,\{a_1a_2\cdots a_k|k=1,2,\cdots,n-1\},$$
and $TAIL(w)$ be the set of all proper suffixes 
$$
TAIL(w)\, := \, \{a_ka_{k+1}\cdots a_n|k=2,3,\cdots,n\},
$$  
and 
$$
OVERLAP(u,v)\,:= \,TAIL(u)\cap HEAD(v).
$$ 
Let $u/r$ denote the operation of the
word $u$  chopping off its tail $r$, 
and let
$$
(u:v)\, := \, \sum_{x\in OVERLAP(u,v)}\overline{W}(u/x).
$$


Given a cluster in ${\cal C}[u]$, $ u \in B,$ it either consists of 
just $u$  or
chopping its head $u$ results in a shorter cluster in ${\cal C}[v]$, $ v \in B$ if
$OVERLAP(u,v)$ is not empty. On the other hand, given a cluster in ${\cal C }[v]$,
we could always reconstitute the bigger cluster in ${\cal C}[u]$ 
by adding some words in $OVERLAP(u,v).$  
Hence, there exists a bijection:
$$ 
{\cal C}[u] \leftrightarrow \{(u;[1,|u|])\}\bigcup_{v\in B} {\cal C}[u] OVERLAP(u,v).
$$
Taking weights on both sides, we have:
\begin{equation}
\overline{W}({\cal C}[u])=(-1)\,\overline{W}(u)-\sum_{v \in
  B}\,(u:v)\cdot \overline{W}({\cal C}[v]).
\label{mainsys}
\end{equation}
This is a system of $|B|$ linear equations with $|B|$ unknowns 
$\overline{W}({\cal C}[v]), v\in B.$ The classical method can not be used to solve the system for the product ``$\cdot$'' in the equation is non-communicative,  Fortunately all the 
unknowns in the equations are at the rightmost part of the operand ``$\cdot$'' . We can use 
multiplication on the left to extract out the unknowns and then use the 
Gaussion elimination method to solve the system.  This
procedure could automatically be done by computer.
\bigskip \\ 
After solving these equations we could obtain 
$\overline{W} ({\cal C})$ by:
$\overline{W}({\cal C})=\sum_{v\in B}\,\overline{W}({\cal C}[v])$ 
\bigskip \\

The symbolic method (\cite{wen}) could also be implemented because
 $\overline{W}({\cal C})$ is independent of
the cardinality of the alphabet. 

Example 1:
Let $V=\{1,2\}$, find the generating function for the language 
which does not have words in $B=\{111\}$ as factors.
Introduce the non-communicative indeterminate $s_1,s_2,s_3$. Using (\ref{mainsys})  we set up a system of one equation:
$$ 
\overline{W}({\cal C}[111])  =  -s_1^3-s_1 \cdot \overline{W}({\cal C}[111])
-s_1^2 \cdot \overline{W}({\cal C}[111]).
$$
After solving it, we have 
$$
\overline{W}({\cal C}[111])=-\frac{1}{1+s_1+s_1^2}\cdot s_1^3,
$$
and
$$
g(s_1,s_2)=\frac{1}{1-s_1-s_2+\frac{1}{1+s_1+s_1^2}\cdot s_1^3}.
$$
This could be simplified further as
$$ g(s_1,s_2) =\frac{1}{(1+s_1+s_1^2)\cdot (1-s_1-s_2)+s_1^3}\cdot (1+s_1+s_1^2)$$

Example 2:
Let $V=\{1,2\}$ and $B=\{12,22\}$. Find the generating function 
of the language which does not have words in $B$ as factors. 
Using (\ref{mainsys})  we set up a linear system of two equations:
\begin{equation}\nonumber
 \left\{
\begin{array}{lcl} 
\overline{W}({\cal C}[12]) & = & -s_1 \cdot s_2 -s_1\cdot \overline{W}({\cal C}[22])
\\
\overline{W}({\cal C}[22]) & = & -s_2^2 - s_2\cdot \overline{W}({\cal C}[22])
\end{array} \right. . 
\end{equation}
After solving it, we have
\begin{equation}\nonumber
 \left\{
\begin{array}{lcl} 
\overline{W}({\cal C}[22]) & = & -\frac{1}{1+s_2}\cdot s_2^2 \\
\overline{W}({\cal C}[12]) & = & -s_1\cdot s_2 +s_1\cdot\frac{1}{1+s_2} \cdot s_2^2
\end{array} \right. . 
\end{equation}
And finally we get
$$
g(s_1,s_2)=\frac{1}{1-s_1-s_2+\frac{1}{1+s_2}\cdot s_2^2 + s_1\cdot s_2 -s_1\cdot\frac{1}{1+s_2} \cdot s_2^2}.
$$

\Section{Avoid Pairs}
One of the most interesting problems is to find the generating function for the language which avoid some pair of the letters as factors. It is a special case for using of the Goulden-Jackson method. 
But there also exist a direct way to set up equations and thus to calculate the generating function: Divide the whole language into $d$ sub-languages in which the words start with the same letter. The generating functions for each sub-group satisfy a system of linear equations which could be solved by revised Gaussion elimination method.
And hence we could get the generating function for the language by summing up the generating functions for the sub-languages.\\
Here we give an example to illustrate how it works.\\

Example 3: Given an alphabet $V=\{1,2\}$, and a set of pairs $B=\{11,22\}.$
Find the generating function of the language that avoids words in $B$ as 
factors.\\
Let $C[i]$ be the sub-language in which each word starts with the letter $i$,$i=1,2$.
then
$$C[1]=\{[1]\}\bigcup \{[1]\cdot C[2]\} .$$
Taking weight on both sides we have 
$$ \overline{W}({\cal C}[1])  =  s_1+s_1\cdot \overline{W}({\cal C}[2]).$$
By the same way we could get another equation:
$$ \overline{W}({\cal C}[2])  =  s_2+s_2\cdot \overline{W}({\cal C}[1]).$$
Thus we can see all the unknowns are at the rightmost part of the operand ``$\cdot$''. Using multiplication on the left and also the Gaussion elimination method, we could solve the system:
\begin{equation}\nonumber
\left\{
\begin{array}{lcl}
\overline{W}({\cal C}[1])&=& s_1\cdot [1-\frac{1}{x_1-\frac{1}{s_2}}\cdot(1+s_1)]\\
\overline{W}({\cal C}[2])&=& -\frac{1}{s_1-\frac{1}{s_2}}\cdot(1+s_1)
\end{array} \right. . 
\end{equation}
Finally the generating function could be calculated by 
$$f(s_1,s_2)=1+\overline{W}({\cal C}[1])+\overline{W}({\cal C}[2]).$$

\Section{The Maple package}

All the procedures are included in the package ``NONCOMM\_GJ", downloadable 
from the web address: 
$$ http://www.math.temple.edu/\sim wen/gj/noncomm/ .$$
Users could get the detailed online help when opened the package by maple.
The direct way to set up and solve the linear system to get the generating function for avoiding pairs are also included in the package.
\Section{Acknowledgment}

This work will be a part of the author's Ph.D. dissertation, written
under the direction of Professor Doron Zeilberger (Rutgers University).
I wish to thank Dr. Zeilberger for his generous support and encouragement.


\begin{thebibliography}{9}
\small  % Use 9 point text.


\bibitem{NZ}
John Noonan and Doron Zeilberger,
`` The Goulden-Jackson Cluster Method: Extensions, 
Applications, and Implementations'',
{\em J. Difference Eq. Appl.}, 5(1999), 355-377. 

\bibitem{Noonan}
John Noonan,
`` New Upper Bounds for The Connective Constants of Self-Avoiding Walks'',
{\em J. Stat. Phys.}, 91(1998), 871-888. 

\bibitem{MS}
N. Madras,and G. Slade, 
``The Self avoiding Walk'',
{\em } Birkhauser, Boston (1993).

\bibitem{GoJ1} I. Goulden  and D.M. Jackson,
{\em An inversion theorem for cluster decompositions of
sequences with distinguished subsequences},
J. London Math. Soc.(2){\bf 20} (1979), 567-576.
 
\bibitem{GoJ2} I. Goulden  and D.M. Jackson,
{\em "Combinatorial Enumeration"}, John  Wiley, 1983, New York.
 
 \bibitem{wen} X. Wen, 
 ``The symbolic Goulden-Jackson cluster method''
{\em J. Difference Eq. Appl.}, vol. 11, No. 2(2005),173-179.
 
\end{thebibliography}
  
\noindent
{\bf Xiangdong Wen; 638 Wachman Hall(038-16); 1805 N. Broad Street}\\
{\bf Department of Mathematics; Temple University; Philadelphia,PA 19122 }\\
{\bf wen@math.temple.edu; 215-204-1655;}\\
{\bf AMS Classification codes:05A15}\\
{\bf Electronic version: http://www.math.temple.edu/\~\,wen/gj}\\
\end{document}

% LocalWords:  symetric

