mirror of https://gitee.com/openkylin/po4a.git
217 lines
7.8 KiB
Bash
Executable File
217 lines
7.8 KiB
Bash
Executable File
#! /bin/sh
|
|
module=man
|
|
|
|
DIFF="diff -uBb"
|
|
WDIFF="wdiff -3 -n"
|
|
|
|
if [ $# = 1 ] && [ $1 = "-h" ] ; then
|
|
if [ "$module" = "man" ] ; then
|
|
echo "\
|
|
Compare how are displayed an original man page and the po4a-normalized one.
|
|
Usage: $0
|
|
Check all man pages found in /usr/share/man/man[1-8]
|
|
$0 <space separated list of man pages>
|
|
Check the specified man pages.
|
|
$0 -f <file>
|
|
Check the man pages specified in <file> (one page per line).
|
|
Man pages can be gzipped.
|
|
$0 erases all LISTE.* files in the current directory and creates
|
|
the following files:
|
|
LISTE.TOTAL List of all the checked man pages
|
|
LISTE.OK List of the man pages for which $DIFF does not see a difference.
|
|
LISTE.OK2 $DIFF does not see a difference afetr converting hyphens
|
|
to minus signs, `` to \", and '' to \" (in both pages).
|
|
LISTE.WOK1 $WDIFF doesn't see any difference after the same modifications
|
|
LISTE.WOK2 This tries to detect changes in the hyphenation of words.
|
|
LISTE.WOK3 This removes minus signs, and thus detects more changes in
|
|
hyphenation. It also mask font changes.
|
|
LISTE.PBS po4a preferred to stop processing the man page (non
|
|
supported,...)
|
|
LISTE.WDIFF These are probably bugs in po4a or in the man page
|
|
LISTE.IGN man pages po4a refused to operate on (e.g. were generated by
|
|
Pod::Man)
|
|
You may wish to use stats.sh to compare to runs of $0."
|
|
fi
|
|
|
|
exit 0
|
|
fi
|
|
|
|
rm -f LISTE.*
|
|
touch LISTE.{TOTAL,OK,OK2,WDIFF,PBS,IGN,WOK1,WOK2,WOK3}
|
|
|
|
if [ $# = 2 ] && [ $1 = "-f" ] ; then
|
|
cat $2 > LISTE.TOTAL
|
|
elif [ $# != 0 ] ; then
|
|
echo $@ > LISTE.TOTAL
|
|
else
|
|
if [ "$module" = "man" ] ; then
|
|
find /usr/share/man/man[1-9] /usr/X11R6/man/man[1-9] -type f | sort > LISTE.TOTAL
|
|
elif [ "$module" = "pod" ] ; then
|
|
locate pod|egrep '\.pod$' | sort > LISTE.TOTAL
|
|
fi
|
|
fi
|
|
|
|
tmp=/tmp/po4a-check-$module-$$
|
|
mkdir -p $tmp
|
|
if [ "$module" = "man" ] ; then
|
|
cmdtotxt="mantotxt"
|
|
elif [ "$module" = "pod" ] ; then
|
|
cmdtotxt="podtotxt"
|
|
fi
|
|
|
|
|
|
mantotxt() {
|
|
`grog -Tutf8 $1` > $1.txt
|
|
}
|
|
|
|
podtotxt() { # $1 file to convert ; $2 name to pretend to have
|
|
if [ $1 != $2 ] ; then
|
|
mv $2 $2.old
|
|
mv $1 $2
|
|
fi
|
|
pod2man $2 > $2.man
|
|
man -Pcat -l $2.man > $2.txt
|
|
if [ $1 != $2 ] ; then
|
|
mv $2.txt $1.txt
|
|
mv $2 $1
|
|
mv $2.old $2
|
|
fi
|
|
}
|
|
|
|
|
|
for fich in `cat LISTE.TOTAL` ; do
|
|
if echo $fich | egrep '\.gz$' ; then
|
|
newfich=`basename $fich .gz`;
|
|
zcat $fich > $tmp/$newfich;
|
|
else
|
|
newfich=`basename $fich`
|
|
cat $fich > $tmp/$newfich;
|
|
fi
|
|
|
|
echo "####### $fich";
|
|
rm -f po4a-normalize.*
|
|
if PERLLIB=../lib ../po4a-normalize -f $module -M iso-8859-1 $tmp/$newfich \
|
|
-o groff_code=translate \
|
|
-o noarg=ny0,Sp,Pp,BS,BE,VE,ES,zZ,zY \
|
|
-o translate_each=TA,IN,hN,AS,OP,Tp,Ip,TQ \
|
|
-o no_wrap=Ds:De,Ex:Ee,CS:CE,Vb:Ve \
|
|
-o untranslated=Id,VS,rm \
|
|
-o translate_joined=FN,Sh,iX \
|
|
-o inline=ZN,Pn \
|
|
-o generated 2>&1 ; then
|
|
mv po4a-normalize.output $tmp/po4a-normalize.output
|
|
$cmdtotxt $tmp/po4a-normalize.output $tmp/$newfich;
|
|
|
|
$cmdtotxt $tmp/$newfich $tmp/$newfich;
|
|
echo $fich
|
|
echo "$DIFF $tmp/$newfich.txt $tmp/po4a-normalize.output.txt"
|
|
if $DIFF $tmp/$newfich.txt $tmp/po4a-normalize.output.txt ; then
|
|
echo ">ok"
|
|
echo $fich >> LISTE.OK
|
|
else
|
|
awk 'BEGIN{RS=" ";ORS=" "}
|
|
{gsub("\xE2\x80\x90","-"); # this is safe
|
|
gsub("\xE2\x88\x92","-"); # the minus sign in eqn
|
|
gsub("\xE2\x80\x98\xE2\x80\x98","\xE2\x80\x9C"); # this can
|
|
# be troublesome (e.g. two single quotes => " insteas of ""
|
|
gsub("\xE2\x80\x99\xE2\x80\x99","\xE2\x80\x9D");
|
|
print}' $tmp/$newfich.txt > $tmp/"$newfich"_ignore
|
|
awk 'BEGIN{RS=" ";ORS=" "}
|
|
{gsub("\xE2\x80\x90","-");
|
|
gsub("\xE2\x88\x92","-");
|
|
gsub("\xE2\x80\x98\xE2\x80\x98","\xE2\x80\x9C");
|
|
gsub("\xE2\x80\x99\xE2\x80\x99","\xE2\x80\x9D");
|
|
print}' $tmp/po4a-normalize.output.txt > $tmp/po4a-normalize.ignore
|
|
echo $fich
|
|
echo "$DIFF $tmp/"$newfich"_ignore $tmp/po4a-normalize.ignore"
|
|
if $DIFF $tmp/"$newfich"_ignore $tmp/po4a-normalize.ignore ; then
|
|
echo ">ok2"
|
|
echo $fich >> LISTE.OK2
|
|
else
|
|
echo $fich
|
|
echo "$WDIFF $tmp/"$newfich"_ignore $tmp/po4a-normalize.ignore"
|
|
if $WDIFF $tmp/"$newfich"_ignore $tmp/po4a-normalize.ignore; then
|
|
echo ">wok1"
|
|
echo $fich >> LISTE.WOK1
|
|
else
|
|
awk 'BEGIN{RS=";";ORS=";"}
|
|
{gsub("-\n[[:blank:]]+",""); # remove hyphen at the end of lines
|
|
# this permits to detect some diff due
|
|
# to word rewrapping (fails if a word
|
|
# containing a hyphen was wrapped)
|
|
print}' $tmp/"$newfich"_ignore > $tmp/"$newfich"_ignore2
|
|
awk 'BEGIN{RS=";";ORS=";"}
|
|
{gsub("-\n[[:blank:]]+","");
|
|
print}' $tmp/po4a-normalize.ignore > $tmp/po4a-normalize.ignore2
|
|
# the same, but for word wrapped on hyphen.
|
|
# This is useful only if there is either word wrapped on
|
|
# hyphen or word not wrapped on hyphen. But this may be
|
|
# usefull to classify the diff
|
|
awk 'BEGIN{RS=";";ORS=";"}
|
|
{gsub("-\n[[:blank:]]+","-");
|
|
print}' $tmp/"$newfich"_ignore > $tmp/"$newfich"_ignore2b
|
|
awk 'BEGIN{RS=";";ORS=";"}
|
|
{gsub("-\n[[:blank:]]+","-");
|
|
print}' $tmp/po4a-normalize.ignore > $tmp/po4a-normalize.ignore2b
|
|
echo $fich
|
|
echo "$WDIFF $tmp/"$newfich"_ignore2 $tmp/po4a-normalize.ignore2"
|
|
$WDIFF $tmp/"$newfich"_ignore2 $tmp/po4a-normalize.ignore2
|
|
ret1=$?
|
|
echo $fich
|
|
echo "$WDIFF $tmp/"$newfich"_ignore2b $tmp/po4a-normalize.ignore2b"
|
|
$WDIFF $tmp/"$newfich"_ignore2b $tmp/po4a-normalize.ignore2b
|
|
ret2=$?
|
|
if [ $ret1 = 0 ] || [ $ret2 = 0 ]; then
|
|
echo ">wok2"
|
|
echo $fich >> LISTE.WOK2
|
|
else
|
|
awk '{gsub(".\x08",""); # this hides font diff ?
|
|
print}' $tmp/"$newfich"_ignore > $tmp/"$newfich"_ignore3_tmp
|
|
awk 'BEGIN{RS=";";ORS=";"}
|
|
{gsub("-\n[[:blank:]]+","");
|
|
gsub("-","");
|
|
print}' $tmp/"$newfich"_ignore3_tmp > $tmp/"$newfich"_ignore3
|
|
|
|
awk '{gsub(".\x08",""); # this hides font diff ?
|
|
print}' $tmp/po4a-normalize.ignore > $tmp/po4a-normalize.ignore3_tmp
|
|
awk 'BEGIN{RS=";";ORS=";"}
|
|
{gsub("-\n[[:blank:]]+","");
|
|
gsub("-","");
|
|
print}' $tmp/po4a-normalize.ignore3_tmp > $tmp/po4a-normalize.ignore3
|
|
|
|
echo $fich
|
|
echo "$WDIFF $tmp/"$newfich"_ignore3 $tmp/po4a-normalize.ignore3"
|
|
if $WDIFF $tmp/"$newfich"_ignore3 $tmp/po4a-normalize.ignore3
|
|
then
|
|
echo ">wok3"
|
|
echo $fich >> LISTE.WOK3
|
|
else
|
|
echo ">wdiff"
|
|
echo $fich >> LISTE.WDIFF
|
|
fi
|
|
fi
|
|
fi
|
|
fi
|
|
fi
|
|
else
|
|
if [ $? = 254 ]; then
|
|
echo ">ign"
|
|
echo $fich >> LISTE.IGN
|
|
else
|
|
l=$(wc -l $tmp/$newfich|cut -d" " -f1)
|
|
egrep "^.so " $tmp/$newfich 2>&1 > /dev/null
|
|
if [ "$?" = "0" ] && [ "$l" = "1" ];
|
|
then
|
|
echo ">ign"
|
|
echo $fich >> LISTE.IGN
|
|
else
|
|
echo ">pbs"
|
|
echo $fich >> LISTE.PBS
|
|
fi
|
|
fi
|
|
fi
|
|
rm -f $tmp/$newfich* $tmp/po4a-normalize.*
|
|
echo '-------------------'
|
|
done
|
|
rm -r $tmp
|