From: John Darrington Date: Sun, 25 Oct 2020 19:46:00 +0000 (+0100) Subject: Add a TeX driver X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=300d3db698d3ac82a0a302a91ee62bcaad6337b6;p=pspp Add a TeX driver --- diff --git a/NEWS b/NEWS index c30e558cb2..2b5cfd4ef7 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,8 @@ Please send PSPP bug reports to bug-gnu-pspp@gnu.org. Changes from 1.4.1 to 1.5.2: + * PSPP can now write its results directly to a TeX source file. + * Added Drag-N-Drop in output view. * The Explore GUI dialog supports the "Plots" subdialog. Boxplots, Q-Q Plots diff --git a/configure.ac b/configure.ac index 096825a08b..4b72f7f771 100644 --- a/configure.ac +++ b/configure.ac @@ -46,6 +46,8 @@ AM_CONDITIONAL(cc_is_gcc, test x"$GCC" = x"yes" ) PSPP_CC_FOR_BUILD PSPP_PERL PSPP_PYTHON +AC_CHECK_PROGS([TEX], [tex], [no]) +AC_SUBST([TEX]) # This suppresses an error in gl/careadlinkat.c that otherwise can't be # avoided in GCC 10.x: diff --git a/doc/invoking.texi b/doc/invoking.texi index b1a299720f..22754c9d12 100644 --- a/doc/invoking.texi +++ b/doc/invoking.texi @@ -24,6 +24,7 @@ interface. * Main Options:: * PDF PostScript and SVG Output Options:: * Plain Text Output Options:: +* TeX Output Options:: * HTML Output Options:: * OpenDocument Output Options:: * Comma-Separated Value Output Options:: @@ -342,6 +343,21 @@ overstriking, which may not be supported by all the software to which you might pass the output. Default: @code{none}. @end table +@node TeX Output Options +@section TeX Output Options +@cindex @TeX{} +@cindex tex + +If you want to publish statistical results in professional or academic +journals, you will probably want to provide results in @TeX{} format. +To do this, specify @option{-o @var{file}} on the @pspp{} command line where +@var{file} is a file name ending in @file{.tex}, or you can specify +@option{-O format=tex}. + +The resulting file can be directly processed using @TeX{} or you can manually +edit the file to add commentary text. +Alternatively, you can cut and paste desired sections to another @TeX{} file. + @node HTML Output Options @section HTML Output Options @cindex HTML diff --git a/doc/org.fsf.pspp.metainfo.xml.in b/doc/org.fsf.pspp.metainfo.xml.in index c0ae21f1ca..91e9406942 100644 --- a/doc/org.fsf.pspp.metainfo.xml.in +++ b/doc/org.fsf.pspp.metainfo.xml.in @@ -15,7 +15,7 @@
  • Support for over 1 billion variables
  • Syntax and data files which are compatible with those of SPSS
  • A choice of terminal or graphical user interface
  • -
  • A choice of text, postscript, pdf, opendocument or html output formats
  • +
  • A choice of text, postscript, pdf, opendocument, html or TeX output formats
  • Inter-operability with: LibreOffice, Apache OpenOffice, Gnumeric and other free software
  • Easy data import from spreadsheets, text files and database sources
  • The capability to open, analyse and edit two or more datasets concurrently
  • diff --git a/src/output/automake.mk b/src/output/automake.mk index dd9f3fc9cd..eb1b438a53 100644 --- a/src/output/automake.mk +++ b/src/output/automake.mk @@ -48,6 +48,12 @@ src_output_liboutput_la_SOURCES = \ src/output/driver-provider.h \ src/output/driver.c \ src/output/driver.h \ + src/output/tex-glyphs.c \ + src/output/tex-glyphs.h \ + src/output/tex-parsing.c \ + src/output/tex-parsing.h \ + src/output/tex-rendering.c \ + src/output/tex-rendering.h \ src/output/group-item.c \ src/output/group-item.h \ src/output/html.c \ @@ -78,6 +84,7 @@ src_output_liboutput_la_SOURCES = \ src/output/table-provider.h \ src/output/table.c \ src/output/table.h \ + src/output/tex.c \ src/output/text-item.c \ src/output/text-item.h if HAVE_CAIRO diff --git a/src/output/driver.c b/src/output/driver.c index 6204e335ca..067bf831ae 100644 --- a/src/output/driver.c +++ b/src/output/driver.c @@ -441,6 +441,7 @@ extern const struct output_driver_factory pdf_driver_factory; extern const struct output_driver_factory ps_driver_factory; extern const struct output_driver_factory svg_driver_factory; #endif +extern const struct output_driver_factory tex_driver_factory; static const struct output_driver_factory *factories[] = { @@ -455,6 +456,7 @@ static const struct output_driver_factory *factories[] = &ps_driver_factory, &svg_driver_factory, #endif + &tex_driver_factory, NULL }; diff --git a/src/output/tex-glyphs.c b/src/output/tex-glyphs.c new file mode 100644 index 0000000000..5f343a3f40 --- /dev/null +++ b/src/output/tex-glyphs.c @@ -0,0 +1,650 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "tex-glyphs.h" + +const char *tex_macro[] = + { + /* TEX_NONE */ + "", + /* TEX_VULGAR_FRAC */ + "%% Typset a vulgar fraction (without a /). The lack\n" + "%% of / is common in many typefaces (e.g. `Transport') and is easier to read.\n" + "\\def\\vulgarfrac#1/#2{\\leavevmode" + "\\raise.5ex\\hbox{\\the\\scriptfont0 #1}\\kern-.1em" + "\\lower.25ex\\hbox{\\the\\scriptfont0 #2}}", + /* TEX_OGONEK */ + "\\def\\ogonekx#1#2{#1\\hskip -#2\\llap{\\smash{\\lower1ex\\hbox{\\the\\textfont1 \\char\"2C}}}}\n" + "\\def\\ogonek#1{\\ogonekx{#1}{0pt}}", + /* TEX_THORN_UC */ + "\\def\\Thorn{{\\font\\xx=cmr7 \\xx \\rlap{\\raise 0.74ex\\hbox{I}}P}}", + /* TEX_THORN_LC */ + "\\def\\thorn{{\\rlap {\\lower 0.7ex \\hbox{l}}b}}", + /* TEX_GUILLEMET_LEFT */ + "\\def\\lguillemet{{\\raise0.5ex\\hbox{\\font\\xx=cmsy5 \\xx \\char\"1C}}}", + /* TEX_GUILLEMET_RIGHT */ + "\\def\\rguillemet{{\\raise0.5ex\\hbox{\\font\\xx=cmsy5 \\xx \\char\"1D}}}", + /* TEX_ETH */ + "\\def\\eth{\\rlap{\\hskip 0.08em\\raise 0.5ex\\hbox{\\the\\textfont0 \\char\"20}}" + "\\rlap{\\raise 1.5ex\\hbox{\\hskip -0.04em\\vbox to 0pt{\\hbox{\\font\\xx=cmr17 \\xx \\`\\ }\\vss}}}o}", + /* TEX_DOT */ + "\\def\\dotabove#1{{\\ifnum\\fam=7 \\raise1.5ex\\rlap{.}#1\\else\\.#1\\fi}}", + /* TEX_DOUBLE_ACUTE */ + "\\def\\doubleacute#1{\\ifnum\\fam=7 {\\setbox0=\\hbox{#1}\\setbox1=\\hbox{o}\\dimen0=\\ht0\\advance\\dimen0 -\\ht1" + " \\raise\\dimen0\\rlap{\\kern -0.25ex\\char\"13\\kern -0.8ex\\char\"13}#1}\\else\\H{#1}\\fi}" +}; + + + +const char *unsupported_glyph = "{\\tt\\char\"20}"; + + +static const struct glyph control_codes [] = + { + {0x0009, "TAB", TEX_NONE, " "}, + {0x000A, "LINE FEED", TEX_NONE, "{\\hfil\\break}"} + }; + +static const struct glyph basic_latin [] = + { + {0x0020, "SPACE", TEX_NONE, " "}, + {0x0021, "EXCLAMATION MARK", TEX_NONE, "!"}, + {0x0022, "QUOTATION MARK", TEX_NONE, "``"}, + {0x0023, "NUMBER SIGN", TEX_NONE, "\\#"}, + /* In the italic family, $ shows up as pound sterling. So use + the slanted typeface which is close enough. */ + {0x0024, "DOLLAR SIGN", TEX_NONE, "{\\ifnum\\fam=4{\\sl\\$}\\else\\$\\fi}"}, + {0x0025, "PERCENT SIGN", TEX_NONE, "\\%"}, + {0x0026, "AMPERSAND", TEX_NONE, "\\&"}, + {0x0027, "APOSTROPHE", TEX_NONE, "'"}, + {0x0028, "LEFT PARENTHESIS", TEX_NONE, "("}, + {0x0029, "RIGHT PARENTHESIS", TEX_NONE, ")"}, + {0x002A, "ASTERISK", TEX_NONE, "*"}, + {0x002B, "PLUS SIGN", TEX_NONE, "+"}, + {0x002C, "COMMA", TEX_NONE, ","}, + {0x002D, "HYPHEN-MINUS", TEX_NONE, "-"}, + {0x002E, "FULL STOP", TEX_NONE, "."}, + {0x002F, "SOLIDUS", TEX_NONE, "/"}, + {0x0030, "DIGIT ZERO", TEX_NONE, "0"}, + {0x0031, "DIGIT ONE", TEX_NONE, "1"}, + {0x0032, "DIGIT TWO", TEX_NONE, "2"}, + {0x0033, "DIGIT THREE", TEX_NONE, "3"}, + {0x0034, "DIGIT FOUR", TEX_NONE, "4"}, + {0x0035, "DIGIT FIVE", TEX_NONE, "5"}, + {0x0036, "DIGIT SIX", TEX_NONE, "6"}, + {0x0037, "DIGIT SEVEN", TEX_NONE, "7"}, + {0x0038, "DIGIT EIGHT", TEX_NONE, "8"}, + {0x0039, "DIGIT NINE", TEX_NONE, "9"}, + {0x003A, "COLON", TEX_NONE, ":"}, + {0x003B, "SEMICOLON", TEX_NONE, ";"}, + {0x003C, "LESS-THAN SIGN", TEX_NONE, "{\\ifnum\\fam=7 \\char\"3C\\else $<$\\fi}"}, + {0x003D, "EQUALS SIGN", TEX_NONE, "="}, + {0x003E, "GREATER-THAN SIGN", TEX_NONE, "{\\ifnum\\fam=7 \\char\"3E\\else $>$\\fi}"}, + {0x003F, "QUESTION MARK", TEX_NONE, "?"}, + {0x0040, "COMMERCIAL AT", TEX_NONE, "@"}, + {0x0041, "LATIN CAPITAL LETTER A", TEX_NONE, "A"}, + {0X0042, "LATIN CAPITAL LETTER B", TEX_NONE, "B"}, + {0X0043, "LATIN CAPITAL LETTER C", TEX_NONE, "C"}, + {0X0044, "LATIN CAPITAL LETTER D", TEX_NONE, "D"}, + {0X0045, "LATIN CAPITAL LETTER E", TEX_NONE, "E"}, + {0X0046, "LATIN CAPITAL LETTER F", TEX_NONE, "F"}, + {0X0047, "LATIN CAPITAL LETTER G", TEX_NONE, "G"}, + {0X0048, "LATIN CAPITAL LETTER H", TEX_NONE, "H"}, + {0X0049, "LATIN CAPITAL LETTER I", TEX_NONE, "I"}, + {0X004A, "LATIN CAPITAL LETTER J", TEX_NONE, "J"}, + {0X004B, "LATIN CAPITAL LETTER K", TEX_NONE, "K"}, + {0X004C, "LATIN CAPITAL LETTER L", TEX_NONE, "L"}, + {0X004D, "LATIN CAPITAL LETTER M", TEX_NONE, "M"}, + {0X004E, "LATIN CAPITAL LETTER N", TEX_NONE, "N"}, + {0X004F, "LATIN CAPITAL LETTER O", TEX_NONE, "O"}, + {0X0050, "LATIN CAPITAL LETTER P", TEX_NONE, "P"}, + {0X0051, "LATIN CAPITAL LETTER Q", TEX_NONE, "Q"}, + {0X0052, "LATIN CAPITAL LETTER R", TEX_NONE, "R"}, + {0X0053, "LATIN CAPITAL LETTER S", TEX_NONE, "S"}, + {0X0054, "LATIN CAPITAL LETTER T", TEX_NONE, "T"}, + {0X0055, "LATIN CAPITAL LETTER U", TEX_NONE, "U"}, + {0X0056, "LATIN CAPITAL LETTER V", TEX_NONE, "V"}, + {0X0057, "LATIN CAPITAL LETTER W", TEX_NONE, "W"}, + {0X0058, "LATIN CAPITAL LETTER X", TEX_NONE, "X"}, + {0X0059, "LATIN CAPITAL LETTER Y", TEX_NONE, "Y"}, + {0X005A, "LATIN CAPITAL LETTER Z", TEX_NONE, "Z"}, + {0x005B, "LEFT SQUARE BRACKET", TEX_NONE, "["}, + {0x005C, "REVERSE SOLIDUS", TEX_NONE, "{\\ifnum\\fam=7 \\char\"5C\\else $\\backslash$\\fi}" }, + {0x005D, "RIGHT SQUARE BRACKET", TEX_NONE, "]"}, + {0x005E, "CIRCUMFLEX ACCENT", TEX_NONE, "\\^{}"}, + {0x005F, "LOW LINE", TEX_NONE, "\\_"}, + {0x0060, "GRAVE ACCENT", TEX_NONE, "\\`{}"}, + {0x0061, "LATIN SMALL LETTER A", TEX_NONE, "a"}, + {0x0062, "LATIN SMALL LETTER B", TEX_NONE, "b"}, + {0x0063, "LATIN SMALL LETTER C", TEX_NONE, "c"}, + {0x0064, "LATIN SMALL LETTER D", TEX_NONE, "d"}, + {0x0065, "LATIN SMALL LETTER E", TEX_NONE, "e"}, + {0x0066, "LATIN SMALL LETTER F", TEX_NONE, "f"}, + {0x0067, "LATIN SMALL LETTER G", TEX_NONE, "g"}, + {0x0068, "LATIN SMALL LETTER H", TEX_NONE, "h"}, + {0x0069, "LATIN SMALL LETTER I", TEX_NONE, "i"}, + {0x006A, "LATIN SMALL LETTER J", TEX_NONE, "j"}, + {0x006B, "LATIN SMALL LETTER K", TEX_NONE, "k"}, + {0x006C, "LATIN SMALL LETTER L", TEX_NONE, "l"}, + {0x006D, "LATIN SMALL LETTER M", TEX_NONE, "m"}, + {0x006E, "LATIN SMALL LETTER N", TEX_NONE, "n"}, + {0x006F, "LATIN SMALL LETTER O", TEX_NONE, "o"}, + {0x0070, "LATIN SMALL LETTER P", TEX_NONE, "p"}, + {0x0071, "LATIN SMALL LETTER Q", TEX_NONE, "q"}, + {0x0072, "LATIN SMALL LETTER R", TEX_NONE, "r"}, + {0x0073, "LATIN SMALL LETTER S", TEX_NONE, "s"}, + {0x0074, "LATIN SMALL LETTER T", TEX_NONE, "t"}, + {0x0075, "LATIN SMALL LETTER U", TEX_NONE, "u"}, + {0x0076, "LATIN SMALL LETTER V", TEX_NONE, "v"}, + {0x0077, "LATIN SMALL LETTER W", TEX_NONE, "w"}, + {0x0078, "LATIN SMALL LETTER X", TEX_NONE, "x"}, + {0x0079, "LATIN SMALL LETTER Y", TEX_NONE, "y"}, + {0x007A, "LATIN SMALL LETTER Z", TEX_NONE, "z"}, + {0x007B, "LEFT CURLY BRACKET", TEX_NONE, "{\\ifnum\\fam=7 \\char\"7B\\else $\\{$\\fi}" }, + {0x007C, "VERTICAL LINE", TEX_NONE, "{\\ifnum\\fam=7 \\char\"7C\\else {\\the\\textfont2 \\char\"6A}\\fi}" }, + {0x007D, "RIGHT CURLY BRACKET", TEX_NONE, "{\\ifnum\\fam=7 \\char\"7D\\else $\\}$\\fi}" }, + {0x007E, "TILDE", TEX_NONE, "{\\ifnum\\fam=7 \\char\"7E\\else {\\the\\textfont2 \\char\"18}\\fi}" }, + }; + + +static const struct glyph extended_latin [] = + { + { 0x00A0, "NO-BREAK SPACE", TEX_NONE, "~" }, + { 0x00A1, "INVERTED EXCLAMATION MARK", TEX_NONE, "!`" }, + { 0x00A2, "CENT SIGN", TEX_NONE, "\\rlap /c" }, + { 0x00A3, "POUND SIGN", TEX_NONE, "{\\it \\$}" }, + { 0x00A4, "CURRENCY SIGN", TEX_NONE, + "\\rlap{\\kern 0.028em\\raise 0.2ex\\hbox{\\the\\textfont2\\char\"0E}}" + "{\\ifnum\\fam=7\\kern -0.3ex\\fi" + "\\rlap{\\raise 1.05ex\\hbox{.}}\\rlap{\\kern 0.28em\\raise 1.05ex\\hbox{.}}" + "\\rlap{\\raise 0.28ex\\hbox{.}}{\\kern 0.28em\\raise 0.28ex\\hbox{.}}" + "}" }, + { 0x00A5, "YEN SIGN", TEX_NONE, "\\rlap Y=" }, + { 0x00A6, "BROKEN BAR", TEX_NONE, "{\\thinspace\\rlap{\\hbox{\\vrule height 0.7ex depth 0pt}}{\\raise 0.9ex\\hbox{\\vrule height 0.7ex depth 0pt}}}" }, + { 0x00A7, "SECTION SIGN", TEX_NONE, "{\\S}" }, + { 0x00A8, "DIAERESIS", TEX_NONE, "\\\"{}" }, + { 0x00A9, "COPYRIGHT SIGN", TEX_NONE, "{\\copyright}" }, + { 0x00AA, "FEMININE ORDINAL INDICATOR", TEX_NONE, "$\\rm ^{\\b a}$" }, + { 0x00AB, "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK", TEX_GUILLEMET_LEFT, "{\\lguillemet}" }, + { 0x00AC, "NOT SIGN", TEX_NONE, "$\\neg$" }, + { 0x00AD, "SOFT HYPHEN", TEX_NONE, "\\-" }, + { 0x00AE, "REGISTERED SIGN", TEX_NONE, "{\\font\\sc=cmr7 \\rlap {\\sc \\hskip 2pt\\relax R}$\\bigcirc$}" }, + { 0x00AF, "MACRON", TEX_NONE, "\\={}" }, + { 0x00B0, "DEGREE SIGN", TEX_NONE, "$^\\circ$" }, + { 0x00B1, "PLUS-MINUS SIGN", TEX_NONE, "$\\pm$" }, + { 0x00B2, "SUPERSCRIPT TWO", TEX_NONE, "$^2$" }, + { 0x00B3, "SUPERSCRIPT THREE", TEX_NONE, "$^3$" }, + { 0x00B4, "ACUTE ACCENT", TEX_NONE, "\\'{}" }, + { 0x00B5, "MICRO SIGN", TEX_NONE, "{\\the\\textfont1\\char\"16}" }, + { 0x00B6, "PILCROW SIGN", TEX_NONE, "{\\P}" }, + { 0x00B7, "MIDDLE DOT", TEX_NONE, "$\\cdot$" }, + { 0x00B8, "CEDILLA", TEX_NONE, "\\c{}" }, + { 0x00B9, "SUPERSCRIPT ONE", TEX_NONE, "$^1$" }, + { 0x00BA, "MASCULINE ORDINAL INDICATOR", TEX_NONE, "$\\rm ^{\\b o}$" }, + { 0x00BB, "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK", TEX_GUILLEMET_RIGHT, "{\\rguillemet}" }, + { 0x00BC, "VULGAR FRACTION ONE QUARTER", TEX_VULGAR_FRAC, "\\vulgarfrac 1/4" }, + { 0x00BD, "VULGAR FRACTION ONE HALF", TEX_VULGAR_FRAC, "\\vulgarfrac 1/2" }, + { 0x00BE, "VULGAR FRACTION THREE QUARTERS", TEX_VULGAR_FRAC, "\\vulgarfrac 3/4" }, + { 0x00BF, "INVERTED QUESTION MARK", TEX_NONE, "?`" }, + { 0x00C0, "LATIN CAPITAL LETTER A WITH GRAVE", TEX_NONE, "\\`A" }, + { 0x00C1, "LATIN CAPITAL LETTER A WITH ACUTE", TEX_NONE, "\\'A" }, + { 0x00C2, "LATIN CAPITAL LETTER A WITH CIRCUMFLEX", TEX_NONE, "\\^A" }, + { 0x00C3, "LATIN CAPITAL LETTER A WITH TILDE", TEX_NONE, "\\~A" }, + { 0x00C4, "LATIN CAPITAL LETTER A WITH DIAERESIS", TEX_NONE, "\\\"A" }, + { 0x00C5, "LATIN CAPITAL LETTER A WITH RING ABOVE", TEX_NONE, "{\\AA}" }, + { 0x00C6, "LATIN CAPITAL LETTER AE", TEX_NONE, "{\\AE}" }, + { 0x00C7, "LATIN CAPITAL LETTER C WITH CEDILLA", TEX_NONE, "\\c C" }, + { 0x00C8, "LATIN CAPITAL LETTER E WITH GRAVE", TEX_NONE, "\\`E" }, + { 0x00C9, "LATIN CAPITAL LETTER E WITH ACUTE", TEX_NONE, "\\'E" }, + { 0x00CA, "LATIN CAPITAL LETTER E WITH CIRCUMFLEX", TEX_NONE, "\\^E" }, + { 0x00CB, "LATIN CAPITAL LETTER E WITH DIAERESIS", TEX_NONE, "\\\"E" }, + { 0x00CC, "LATIN CAPITAL LETTER I WITH GRAVE", TEX_NONE, "\\`I" }, + { 0x00CD, "LATIN CAPITAL LETTER I WITH ACUTE", TEX_NONE, "\\'I" }, + { 0x00CE, "LATIN CAPITAL LETTER I WITH CIRCUMFLEX", TEX_NONE, "\\^I" }, + { 0x00CF, "LATIN CAPITAL LETTER I WITH DIAERESIS", TEX_NONE, "\\\"I" }, + /* 0x00D0 and 0x0110 are indistinguishable */ + { 0x00D0, "LATIN CAPITAL LETTER ETH", TEX_NONE, "\\rlap{\\raise0.4ex\\hbox{-}}D" }, + { 0x00D1, "LATIN CAPITAL LETTER N WITH TILDE", TEX_NONE, "\\~N" }, + { 0x00D2, "LATIN CAPITAL LETTER O WITH GRAVE", TEX_NONE, "\\`O" }, + { 0x00D3, "LATIN CAPITAL LETTER O WITH ACUTE", TEX_NONE, "\\'O" }, + { 0x00D4, "LATIN CAPITAL LETTER O WITH CIRCUMFLEX", TEX_NONE, "\\^O" }, + { 0x00D5, "LATIN CAPITAL LETTER O WITH TILDE", TEX_NONE, "\\~O" }, + { 0x00D6, "LATIN CAPITAL LETTER O WITH DIAERESIS", TEX_NONE, "\\\"O" }, + { 0x00D7, "MULTIPLICATION SIGN", TEX_NONE, "{\\the\\textfont2\\char\"02}" }, + { 0x00D8, "LATIN CAPITAL LETTER O WITH STROKE", TEX_NONE, "{\\O}" }, + { 0x00D9, "LATIN CAPITAL LETTER U WITH GRAVE", TEX_NONE, "\\`U" }, + { 0x00DA, "LATIN CAPITAL LETTER U WITH ACUTE", TEX_NONE, "\\'U" }, + { 0x00DB, "LATIN CAPITAL LETTER U WITH CIRCUMFLEX", TEX_NONE, "\\^U" }, + { 0x00DC, "LATIN CAPITAL LETTER U WITH DIAERESIS", TEX_NONE, "\\\"U" }, + { 0x00DD, "LATIN CAPITAL LETTER Y WITH ACUTE", TEX_NONE, "\\'Y" }, + { 0x00DE, "LATIN CAPITAL LETTER THORN", TEX_THORN_UC, "{\\Thorn}" }, + { 0x00DF, "LATIN SMALL LETTER SHARP S", TEX_NONE, "{\\ss}" }, + { 0x00E0, "LATIN SMALL LETTER A WITH GRAVE", TEX_NONE, "\\`a" }, + { 0x00E1, "LATIN SMALL LETTER A WITH ACUTE", TEX_NONE, "\\'a" }, + { 0x00E2, "LATIN SMALL LETTER A WITH CIRCUMFLEX", TEX_NONE, "\\^a" }, + { 0x00E3, "LATIN SMALL LETTER A WITH TILDE", TEX_NONE, "\\~a" }, + { 0x00E4, "LATIN SMALL LETTER A WITH DIAERESIS", TEX_NONE, "\\\"a" }, + { 0x00E5, "LATIN SMALL LETTER A WITH RING ABOVE", TEX_NONE, "{\\aa}" }, + { 0x00E6, "LATIN SMALL LETTER AE", TEX_NONE, "{\\ae}" }, + { 0x00E7, "LATIN SMALL LETTER C WITH CEDILLA", TEX_NONE, "\\c c" }, + { 0x00E8, "LATIN SMALL LETTER E WITH GRAVE", TEX_NONE, "\\`e" }, + { 0x00E9, "LATIN SMALL LETTER E WITH ACUTE", TEX_NONE, "\\'e" }, + { 0x00EA, "LATIN SMALL LETTER E WITH CIRCUMFLEX", TEX_NONE, "\\^e" }, + { 0x00EB, "LATIN SMALL LETTER E WITH DIAERESIS", TEX_NONE, "\\\"e" }, + { 0x00EC, "LATIN SMALL LETTER I WITH GRAVE", TEX_NONE, "{\\`\\i}" }, + { 0x00ED, "LATIN SMALL LETTER I WITH ACUTE", TEX_NONE, "{\\'\\i}" }, + { 0x00EE, "LATIN SMALL LETTER I WITH CIRCUMFLEX", TEX_NONE, "{\\^\\i}" }, + { 0x00EF, "LATIN SMALL LETTER I WITH DIAERESIS", TEX_NONE, "{\\\"\\i}" }, + { 0x00F0, "LATIN SMALL LETTER ETH", TEX_ETH, "{\\eth}" }, + { 0x00F1, "LATIN SMALL LETTER N WITH TILDE", TEX_NONE, "\\~n" }, + { 0x00F2, "LATIN SMALL LETTER O WITH GRAVE", TEX_NONE, "\\`o" }, + { 0x00F3, "LATIN SMALL LETTER O WITH ACUTE", TEX_NONE, "\\'o" }, + { 0x00F4, "LATIN SMALL LETTER O WITH CIRCUMFLEX", TEX_NONE, "\\^o" }, + { 0x00F5, "LATIN SMALL LETTER O WITH TILDE", TEX_NONE, "\\~o" }, + { 0x00F6, "LATIN SMALL LETTER O WITH DIAERESIS", TEX_NONE, "\\\"o" }, + { 0x00F7, "DIVISION SIGN", TEX_NONE, "{\\the\\textfont2\\char\"04}" }, + { 0x00F8, "LATIN SMALL LETTER O WITH STROKE", TEX_NONE, "{\\o}" }, + { 0x00F9, "LATIN SMALL LETTER U WITH GRAVE", TEX_NONE, "\\`u" }, + { 0x00FA, "LATIN SMALL LETTER U WITH ACUTE", TEX_NONE, "\\'u" }, + { 0x00FB, "LATIN SMALL LETTER U WITH CIRCUMFLEX", TEX_NONE, "\\^u" }, + { 0x00FC, "LATIN SMALL LETTER U WITH DIAERESIS", TEX_NONE, "\\\"u" }, + { 0x00FD, "LATIN SMALL LETTER Y WITH ACUTE", TEX_NONE, "\\'y" }, + { 0x00FE, "LATIN SMALL LETTER THORN", TEX_THORN_LC, "{\\thorn}" }, + { 0x00FF, "LATIN SMALL LETTER Y WITH DIAERESIS", TEX_NONE, "\\\"y" }, + { 0x0100, "LATIN CAPITAL LETTER A WITH MACRON", TEX_NONE, "\\=A" }, + { 0x0101, "LATIN SMALL LETTER A WITH MACRON", TEX_NONE, "\\=a" }, + { 0x0102, "LATIN CAPITAL LETTER A WITH BREVE", TEX_NONE, "\\u A" }, + { 0x0103, "LATIN SMALL LETTER A WITH BREVE", TEX_NONE, "\\u a" }, + { 0x0104, "LATIN CAPITAL LETTER A WITH OGONEK", TEX_OGONEK, "\\ogonek{A}" }, + { 0x0105, "LATIN SMALL LETTER A WITH OGONEK", TEX_OGONEK, "\\ogonek{a}" }, + { 0x0106, "LATIN CAPITAL LETTER C WITH ACUTE", TEX_NONE, "\\'C" }, + { 0x0107, "LATIN SMALL LETTER C WITH ACUTE", TEX_NONE, "\\'c" }, + { 0x0108, "LATIN CAPITAL LETTER C WITH CIRCUMFLEX", TEX_NONE, "\\^C" }, + { 0x0109, "LATIN SMALL LETTER C WITH CIRCUMFLEX", TEX_NONE, "\\^c" }, + { 0x010A, "LATIN CAPITAL LETTER C WITH DOT ABOVE", TEX_DOT, "\\dotabove{C}" }, + { 0x010B, "LATIN SMALL LETTER C WITH DOT ABOVE", TEX_DOT, "\\dotabove{c}" }, + { 0x010C, "LATIN CAPITAL LETTER C WITH CARON", TEX_NONE, "\\v C" }, + { 0x010D, "LATIN SMALL LETTER C WITH CARON", TEX_NONE, "\\v c" }, + { 0x010E, "LATIN CAPITAL LETTER D WITH CARON", TEX_NONE, "\\v D" }, + { 0x010F, "LATIN SMALL LETTER D WITH CARON", TEX_NONE, "\\v d" }, + { 0x0110, "LATIN CAPITAL LETTER D WITH STROKE", TEX_NONE, "\\rlap{\\raise0.4ex\\hbox{-}}D" }, + { 0x0111, "LATIN SMALL LETTER D WITH STROKE", TEX_NONE, "\\hbox{d\\kern-0.32em{\\raise0.8ex\\hbox{-}}}" }, + { 0x0112, "LATIN CAPITAL LETTER E WITH MACRON", TEX_NONE, "\\=E" }, + { 0x0113, "LATIN SMALL LETTER E WITH MACRON", TEX_NONE, "\\=e" }, + { 0x0114, "LATIN CAPITAL LETTER E WITH BREVE", TEX_NONE, "\\u E" }, + { 0x0115, "LATIN SMALL LETTER E WITH BREVE", TEX_NONE, "\\u e" }, + { 0x0116, "LATIN CAPITAL LETTER E WITH DOT ABOVE", TEX_DOT, "\\dotabove{E}" }, + { 0x0117, "LATIN SMALL LETTER E WITH DOT ABOVE", TEX_DOT, "\\dotabove{e}" }, + { 0x0118, "LATIN CAPITAL LETTER E WITH OGONEK", TEX_OGONEK, "\\ogonek{E}" }, + { 0x0119, "LATIN SMALL LETTER E WITH OGONEK", TEX_OGONEK, "\\ogonek{e}" }, + { 0x011A, "LATIN CAPITAL LETTER E WITH CARON", TEX_NONE, "\\v E" }, + { 0x011B, "LATIN SMALL LETTER E WITH CARON", TEX_NONE, "\\v e" }, + { 0x011C, "LATIN CAPITAL LETTER G WITH CIRCUMFLEX", TEX_NONE, "\\^G" }, + { 0x011D, "LATIN SMALL LETTER G WITH CIRCUMFLEX", TEX_NONE, "\\^g" }, + { 0x011E, "LATIN CAPITAL LETTER G WITH BREVE", TEX_NONE, "\\u G" }, + { 0x011F, "LATIN SMALL LETTER G WITH BREVE", TEX_NONE, "\\u g" }, + { 0x0120, "LATIN CAPITAL LETTER G WITH DOT ABOVE", TEX_DOT, "\\dotabove{G}" }, + { 0x0121, "LATIN SMALL LETTER G WITH DOT ABOVE", TEX_DOT, "\\dotabove{g}" }, + { 0x0122, "LATIN CAPITAL LETTER G WITH CEDILLA", TEX_NONE, "\\c G" }, + /* Because of the descender, a conventional cedilla on a 'g' looks ugly. */ + { 0x0123, "LATIN SMALL LETTER G WITH CEDILLA", TEX_NONE, "{\\accent96 g}" }, + { 0x0124, "LATIN CAPITAL LETTER H WITH CIRCUMFLEX", TEX_NONE, "\\^H" }, + { 0x0125, "LATIN SMALL LETTER H WITH CIRCUMFLEX", TEX_NONE, "\\^h" }, + { 0x0126, "LATIN CAPITAL LETTER H WITH STROKE", TEX_NONE, "\\rlap{\\raise 1.1ex\\vbox{\\hrule width 0.77em\\vss}}H" }, + { 0x0127, "LATIN SMALL LETTER H WITH STROKE", TEX_NONE, "\\hbox{{\\raise0.8ex\\hbox{-}}\\kern-0.35em h}" }, + { 0x0128, "LATIN CAPITAL LETTER I WITH TILDE", TEX_NONE, "\\~I" }, + { 0x0129, "LATIN SMALL LETTER I WITH TILDE", TEX_NONE, "{\\~\\i}" }, + { 0x012A, "LATIN CAPITAL LETTER I WITH MACRON", TEX_NONE, "\\=I" }, + { 0x012B, "LATIN SMALL LETTER I WITH MACRON", TEX_NONE, "{\\=\\i}" }, + { 0x012C, "LATIN CAPITAL LETTER I WITH BREVE", TEX_NONE, "\\u I" }, + { 0x012D, "LATIN SMALL LETTER I WITH BREVE", TEX_NONE, "{\\u \\i}" }, + { 0x012E, "LATIN CAPITAL LETTER I WITH OGONEK", TEX_OGONEK, "\\ogonek{I}" }, + { 0x012F, "LATIN SMALL LETTER I WITH OGONEK", TEX_OGONEK, "\\ogonek{i}" }, + { 0x0130, "LATIN CAPITAL LETTER I WITH DOT ABOVE", TEX_DOT, "\\dotabove{I}" }, + { 0x0131, "LATIN SMALL LETTER DOTLESS I", TEX_NONE, "{\\i}" }, + { 0x0132, "LATIN CAPITAL LIGATURE IJ", TEX_NONE, "\\hbox{I\\kern -0.05em J}" }, + { 0x0133, "LATIN SMALL LIGATURE IJ", TEX_NONE, "\\hbox{i\\kern -0.1em j}" }, + { 0x0134, "LATIN CAPITAL LETTER J WITH CIRCUMFLEX", TEX_NONE, "\\^J" }, + { 0x0135, "LATIN SMALL LETTER J WITH CIRCUMFLEX", TEX_NONE, "{\\^\\j}" }, + { 0x0136, "LATIN CAPITAL LETTER K WITH CEDILLA", TEX_NONE, "\\c K" }, + { 0x0137, "LATIN SMALL LETTER K WITH CEDILLA", TEX_NONE, "\\c k" }, + { 0x0138, "LATIN SMALL LETTER KRA", TEX_NONE, "{\\font\\xx=cmr7\\xx K}" }, + { 0x0139, "LATIN CAPITAL LETTER L WITH ACUTE", TEX_NONE, "\\'L" }, + { 0x013A, "LATIN SMALL LETTER L WITH ACUTE", TEX_NONE, "\\'l" }, + { 0x013B, "LATIN CAPITAL LETTER L WITH CEDILLA", TEX_NONE, "\\c L" }, + { 0x013C, "LATIN SMALL LETTER L WITH CEDILLA", TEX_NONE, "\\c l" }, + { 0x013D, "LATIN CAPITAL LETTER L WITH CARON", TEX_NONE, "\\v L" }, + { 0x013E, "LATIN SMALL LETTER L WITH CARON", TEX_NONE, "\\v l" }, + { 0x013F, "LATIN CAPITAL LETTER L WITH MIDDLE DOT", TEX_NONE, "\\hbox{\\rlap{\\kern0.27em\\raise0.3ex\\hbox{$\\cdot$}}L}" }, + { 0x0140, "LATIN SMALL LETTER L WITH MIDDLE DOT", TEX_NONE, "\\hbox{l\\kern-0.12em\\raise0.3ex\\hbox{$\\cdot$}}" }, + { 0x0141, "LATIN CAPITAL LETTER L WITH STROKE", TEX_NONE, "{\\ifnum\\fam=7 \\lower 0.4ex\\rlap{\\kern -0.13em\\'{}}L\\else\\L\\fi}" }, + { 0x0142, "LATIN SMALL LETTER L WITH STROKE", TEX_NONE, "{\\ifnum\\fam=7 \\lower 0.4ex\\rlap{\\kern -0.05em\\'{}}l\\else\\l\\fi}" }, + { 0x0143, "LATIN CAPITAL LETTER N WITH ACUTE", TEX_NONE, "\\'N" }, + { 0x0144, "LATIN SMALL LETTER N WITH ACUTE", TEX_NONE, "\\'n" }, + { 0x0145, "LATIN CAPITAL LETTER N WITH CEDILLA", TEX_NONE, "\\c N" }, + { 0x0146, "LATIN SMALL LETTER N WITH CEDILLA", TEX_NONE, "\\c n" }, + { 0x0147, "LATIN CAPITAL LETTER N WITH CARON", TEX_NONE, "\\v N" }, + { 0x0148, "LATIN SMALL LETTER N WITH CARON", TEX_NONE, "\\v n" }, + { 0x0149, "LATIN SMALL LETTER N PRECEDED BY APOSTROPHE", TEX_NONE, "\\hbox{'\\kern -0.1em n}" }, + { 0x014A, "LATIN CAPITAL LETTER ENG", TEX_NONE, 0 }, + { 0x014B, "LATIN SMALL LETTER ENG", TEX_NONE, 0 }, + { 0x014C, "LATIN CAPITAL LETTER O WITH MACRON", TEX_NONE, "\\=O" }, + { 0x014D, "LATIN SMALL LETTER O WITH MACRON", TEX_NONE, "\\=o" }, + { 0x014E, "LATIN CAPITAL LETTER O WITH BREVE", TEX_NONE, "\\u O" }, + { 0x014F, "LATIN SMALL LETTER O WITH BREVE", TEX_NONE, "\\u o" }, + { 0x0150, "LATIN CAPITAL LETTER O WITH DOUBLE ACUTE", TEX_DOUBLE_ACUTE, "\\doubleacute{O}" }, + { 0x0151, "LATIN SMALL LETTER O WITH DOUBLE ACUTE", TEX_DOUBLE_ACUTE, "\\doubleacute{o}" }, + { 0x0152, "LATIN CAPITAL LIGATURE OE", TEX_NONE, "{\\OE}" }, + { 0x0153, "LATIN SMALL LIGATURE OE", TEX_NONE, "{\\oe}" }, + { 0x0154, "LATIN CAPITAL LETTER R WITH ACUTE", TEX_NONE, "\\'R" }, + { 0x0155, "LATIN SMALL LETTER R WITH ACUTE", TEX_NONE, "\\'r" }, + { 0x0156, "LATIN CAPITAL LETTER R WITH CEDILLA", TEX_NONE, "\\c R" }, + { 0x0157, "LATIN SMALL LETTER R WITH CEDILLA", TEX_NONE, "\\c r" }, + { 0x0158, "LATIN CAPITAL LETTER R WITH CARON", TEX_NONE, "\\v R" }, + { 0x0159, "LATIN SMALL LETTER R WITH CARON", TEX_NONE, "\\v r" }, + { 0x015A, "LATIN CAPITAL LETTER S WITH ACUTE", TEX_NONE, "\\'S" }, + { 0x015B, "LATIN SMALL LETTER S WITH ACUTE", TEX_NONE, "\\'s" }, + { 0x015C, "LATIN CAPITAL LETTER S WITH CIRCUMFLEX", TEX_NONE, "\\^S" }, + { 0x015D, "LATIN SMALL LETTER S WITH CIRCUMFLEX", TEX_NONE, "\\^s" }, + { 0x015E, "LATIN CAPITAL LETTER S WITH CEDILLA", TEX_NONE, "\\c S" }, + { 0x015F, "LATIN SMALL LETTER S WITH CEDILLA", TEX_NONE, "\\c s" }, + { 0x0160, "LATIN CAPITAL LETTER S WITH CARON", TEX_NONE, "\\v S" }, + { 0x0161, "LATIN SMALL LETTER S WITH CARON", TEX_NONE, "\\v s" }, + { 0x0162, "LATIN CAPITAL LETTER T WITH CEDILLA", TEX_NONE, "\\c T" }, + { 0x0163, "LATIN SMALL LETTER T WITH CEDILLA", TEX_NONE, "\\c t" }, + { 0x0164, "LATIN CAPITAL LETTER T WITH CARON", TEX_NONE, "\\v T" }, + { 0x0165, "LATIN SMALL LETTER T WITH CARON", TEX_NONE, "\\v t" }, + { 0x0166, "LATIN CAPITAL LETTER T WITH STROKE", TEX_NONE, "\\rlap{\\raise 0.35ex\\hbox{\\kern0.22em -}}T" }, + { 0x0167, "LATIN SMALL LETTER T WITH STROKE", TEX_NONE, "\\hbox{{\\raise0.16ex\\hbox{-}}\\kern-0.35em t}" }, + { 0x0168, "LATIN CAPITAL LETTER U WITH TILDE", TEX_NONE, "\\~U" }, + { 0x0169, "LATIN SMALL LETTER U WITH TILDE", TEX_NONE, "\\~u" }, + { 0x016A, "LATIN CAPITAL LETTER U WITH MACRON", TEX_NONE, "\\=U" }, + { 0x016B, "LATIN SMALL LETTER U WITH MACRON", TEX_NONE, "\\=u" }, + { 0x016C, "LATIN CAPITAL LETTER U WITH BREVE", TEX_NONE, "\\u U" }, + { 0x016D, "LATIN SMALL LETTER U WITH BREVE", TEX_NONE, "\\u u" }, + { 0x016E, "LATIN CAPITAL LETTER U WITH RING ABOVE", TEX_NONE, "\\accent23 U" }, + { 0x016F, "LATIN SMALL LETTER U WITH RING ABOVE", TEX_NONE, "\\accent23 u" }, + { 0x0170, "LATIN CAPITAL LETTER U WITH DOUBLE ACUTE", TEX_DOUBLE_ACUTE, "\\doubleacute{U}" }, + { 0x0171, "LATIN SMALL LETTER U WITH DOUBLE ACUTE", TEX_DOUBLE_ACUTE, "\\doubleacute{u}" }, + { 0x0172, "LATIN CAPITAL LETTER U WITH OGONEK", TEX_OGONEK, "\\ogonekx{U}{0.08em}" }, + { 0x0173, "LATIN SMALL LETTER U WITH OGONEK", TEX_OGONEK, "\\ogonek{u}" }, + { 0x0174, "LATIN CAPITAL LETTER W WITH CIRCUMFLEX", TEX_NONE, "\\^W" }, + { 0x0175, "LATIN SMALL LETTER W WITH CIRCUMFLEX", TEX_NONE, "\\^w" }, + { 0x0176, "LATIN CAPITAL LETTER Y WITH CIRCUMFLEX", TEX_NONE, "\\^Y" }, + { 0x0177, "LATIN SMALL LETTER Y WITH CIRCUMFLEX", TEX_NONE, "\\^y" }, + { 0x0178, "LATIN CAPITAL LETTER Y WITH DIAERESIS", TEX_NONE, "\\\"Y" }, + { 0x0179, "LATIN CAPITAL LETTER Z WITH ACUTE", TEX_NONE, "\\'Z" }, + { 0x017A, "LATIN SMALL LETTER Z WITH ACUTE", TEX_NONE, "\\'z" }, + { 0x017B, "LATIN CAPITAL LETTER Z WITH DOT ABOVE", TEX_DOT, "\\dotabove{Z}" }, + { 0x017C, "LATIN SMALL LETTER Z WITH DOT ABOVE", TEX_DOT, "\\dotabove{z}" }, + { 0x017D, "LATIN CAPITAL LETTER Z WITH CARON", TEX_NONE, "\\v Z" }, + { 0x017E, "LATIN SMALL LETTER Z WITH CARON", TEX_NONE, "\\v z" }, + { 0x017F, "LATIN SMALL LETTER LONG S", TEX_NONE, 0 }, + }; + + + +static const struct glyph punctuation [] = +{ + {0x2000, "EN QUAD", TEX_NONE, "\\kern.5em" }, + {0x2001, "EM QUAD", TEX_NONE, "\\kern1em" }, + {0x2002, "EN SPACE", TEX_NONE, "\\kern.5em" }, + {0x2003, "EM SPACE", TEX_NONE, "\\kern1em" }, + {0x2004, "THREE-PER-EM SPACE", TEX_NONE, "\\kern0.333em" }, + {0x2005, "FOUR-PER-EM SPACE", TEX_NONE, "\\kern0.250em" }, + {0x2006, "SIX-PER-EM SPACE", TEX_NONE, "\\kern0.166em" }, + {0x2007, "FIGURE SPACE", TEX_NONE, "\\kern1ex" }, + {0x2008, "PUNCTUATION SPACE", TEX_NONE, "{\\thinspace}" }, + {0x2009, "THIN SPACE", TEX_NONE, "{\\thinspace}" }, + {0x200A, "HAIR SPACE", TEX_NONE, "{\\hskip 1pt}" }, + {0x200B, "ZERO WIDTH SPACE", TEX_NONE, "{}" }, + {0x200C, "ZERO WIDTH NON-JOINER", TEX_NONE, "{}" }, + {0x200D, "ZERO WIDTH JOINER", TEX_NONE, "{}" }, + {0x200E, "LEFT-TO-RIGHT MARK", TEX_NONE, 0 }, + {0x200F, "RIGHT-TO-LEFT MARK", TEX_NONE, 0 }, + {0x2010, "HYPHEN", TEX_NONE, "-" }, + {0x2011, "NON-BREAKING HYPHEN", TEX_NONE, "\\hbox{-}" }, + {0x2012, "FIGURE DASH", TEX_NONE, "--" }, + {0x2013, "EN DASH", TEX_NONE, "--" }, + {0x2014, "EM DASH", TEX_NONE, "---" }, + {0x2015, "HORIZONTAL BAR", TEX_NONE, "---" }, + {0x2016, "DOUBLE VERTICAL LINE", TEX_NONE, "{\\the\\textfont2 \\char\"6B}" }, + {0x2017, "DOUBLE LOW LINE", TEX_NONE, "{\\the\\textfont2 \\lower0.4ex\\rlap{\\char\"00}\\lower0.8ex\\hbox{\\char\"00}}" }, + {0x2018, "LEFT SINGLE QUOTATION MARK", TEX_NONE, "`" }, + {0x2019, "RIGHT SINGLE QUOTATION MARK", TEX_NONE, "'" }, + {0x201A, "SINGLE LOW-9 QUOTATION MARK", TEX_NONE, "," }, + {0x201B, "SINGLE HIGH-REVERSED-9 QUOTATION MARK", TEX_NONE, 0 }, + {0x201C, "LEFT DOUBLE QUOTATION MARK", TEX_NONE, "``" }, + {0x201D, "RIGHT DOUBLE QUOTATION MARK", TEX_NONE, "''" }, + {0x201E, "DOUBLE LOW-9 QUOTATION MARK", TEX_NONE, ",," }, + {0x201F, "DOUBLE HIGH-REVERSED-9 QUOTATION MARK", TEX_NONE, 0 }, + {0x2020, "DAGGER", TEX_NONE, "{\\dag}" }, + {0x2021, "DOUBLE DAGGER", TEX_NONE, "{\\ddag}" }, + {0x2022, "BULLET", TEX_NONE, "{\\the\\textfont2 \\char\"0F}" }, + {0x2023, "TRIANGULAR BULLET", TEX_NONE, "{\\the\\textfont1 \\char\"2E}" }, + {0x2024, "ONE DOT LEADER", TEX_NONE, "\\hbox{.}" }, + {0x2025, "TWO DOT LEADER", TEX_NONE, "\\hbox{.\\kern 0.15em.}" }, + /* Ellipsis could be done with $\dots$ but that means a font change which we + want to avoid if possible. */ + {0x2026, "HORIZONTAL ELLIPSIS", TEX_NONE, "\\hbox{.\\kern 0.15em.\\kern 0.15em.}" }, + {0x2027, "HYPHENATION POINT", TEX_NONE, "$\\cdot$" }, + {0x2028, "LINE SEPARATOR", TEX_NONE, "{\\break}" }, + {0x2029, "PARAGRAPH SEPARATOR", TEX_NONE, "{\\par}" }, + {0x202A, "LEFT-TO-RIGHT EMBEDDING", TEX_NONE, 0 }, + {0x202B, "RIGHT-TO-LEFT EMBEDDING", TEX_NONE, 0 }, + {0x202C, "POP DIRECTIONAL FORMATTING", TEX_NONE, 0 }, + {0x202D, "LEFT-TO-RIGHT OVERRIDE", TEX_NONE, 0 }, + {0x202E, "RIGHT-TO-LEFT OVERRIDE", TEX_NONE, 0 }, + {0x202F, "NARROW NO-BREAK SPACE", TEX_NONE, "\\hbox{\\thinspace}" }, + {0x2030, "PER MILLE SIGN", TEX_NONE, "{\\font\\xx=\\ifnum\\fam=6 wasyb10\\else wasy10\\fi \\xx \\char\"68}" }, + {0x2031, "PER TEN THOUSAND SIGN", TEX_NONE, 0 }, + {0x2032, "PRIME", TEX_NONE, "$'$" }, + {0x2033, "DOUBLE PRIME", TEX_NONE, "$''$" }, + {0x2034, "TRIPLE PRIME", TEX_NONE, "$'''$" }, + {0x2035, "REVERSED PRIME", TEX_NONE, 0 }, + {0x2036, "REVERSED DOUBLE PRIME", TEX_NONE, 0 }, + {0x2037, "REVERSED TRIPLE PRIME", TEX_NONE, 0 }, + {0x2038, "CARET", TEX_NONE, "\\^{ }" }, + {0x2039, "SINGLE LEFT-POINTING ANGLE QUOTATION MARK", TEX_NONE, "{\\raise0.5ex\\hbox{\\font\\xx=cmmi5 \\xx \\char\"3C}}" }, + {0x203A, "SINGLE RIGHT-POINTING ANGLE QUOTATION MARK", TEX_NONE, "{\\raise0.5ex\\hbox{\\font\\xx=cmmi5 \\xx \\char\"3E}}" }, + {0x203B, "REFERENCE MARK", TEX_NONE, + "\\rlap{\\ifnum\\fam=7\\kern -0.3ex\\fi" + "\\rlap{\\raise 1.2ex\\hbox{\\kern 1ex.}}" + "\\rlap{\\raise 0.2ex\\hbox{\\kern 1ex.}}" + "\\rlap{\\raise 0.7ex\\hbox{\\kern 1.5ex.}}" + "\\rlap{\\raise 0.7ex\\hbox{\\kern 0.5ex.}}" + "}" + "{\\font\\xx=cmsy10 scaled\\magstep2\\xx\\char\"02}" + }, + {0x203C, "DOUBLE EXCLAMATION MARK", TEX_NONE, "\\hbox{!\\kern -0.1em!}" }, + {0x203D, "INTERROBANG", TEX_NONE, "\\rlap{\\ifnum\\fam=7 \\else\\kern 0.1em\\fi!}?" }, + {0x203E, "OVERLINE", TEX_NONE, "\\raise 1ex \\hbox{\\the\\textfont0 \\char\"7B}"}, + {0x203F, "UNDERTIE", TEX_NONE, "{\\the\\textfont1 \\char\"05E}" }, + {0x2040, "CHARACTER TIE", TEX_NONE, "{\\the\\textfont1 \\char\"05F}" }, + {0x2041, "CARET INSERTION POINT", TEX_NONE, 0 }, + {0x2042, "ASTERISM", TEX_NONE, "\\vtop to 0pt{\\hbox{\\lower .8ex\\hbox{*}}\\vss}\\kern-0.55ex" + "*\\kern-0.55ex\\vtop to 0pt{\\hbox{\\lower .8ex\\hbox{*}}\\vss}" }, + {0x2043, "HYPHEN BULLET", TEX_NONE, "\\raise 0.6ex\\hbox to 0.3em{\\leaders\\hrule height 1pt\\hfil}" }, + {0x2044, "FRACTION SLASH", TEX_NONE, "{\\it /\\/}" }, + {0x2045, "LEFT SQUARE BRACKET WITH QUILL", TEX_NONE, "\\rlap{[}{\\raise 0.1ex\\hbox{-}}" }, + {0x2046, "RIGHT SQUARE BRACKET WITH QUILL", TEX_NONE, "\\rlap{]}{\\raise 0.1ex\\hbox{-}}" }, + {0x2047, "DOUBLE QUESTION MARK", TEX_NONE, "?\\kern-0.2ex?" }, + {0x2048, "QUESTION EXCLAMATION MARK", TEX_NONE, "?\\kern-0.2ex!" }, + {0x2049, "EXCLAMATION QUESTION MARK", TEX_NONE, "!\\kern-0.2ex?" }, + {0x204A, "TIRONIAN SIGN ET", TEX_NONE, "\\raise 1ex\\rlap{\\the\\textfont3 \\char\"7D}/" }, + {0x204B, "REVERSED PILCROW SIGN", TEX_NONE, 0 }, + {0x204C, "BLACK LEFTWARDS BULLET", TEX_NONE, 0 }, + {0x204D, "BLACK RIGHTWARDS BULLET", TEX_NONE, 0 }, + {0x204E, "LOW ASTERISK", TEX_NONE, "\\lower 0.8ex\\hbox{*}" }, + {0x204F, "REVERSED SEMICOLON", TEX_NONE, 0 }, + {0x2050, "CLOSE UP", TEX_NONE, "\\rlap{\\lower 0.8ex\\hbox{\\the\\textfont1 \\char\"05E}}\\raise 1.ex\\hbox{\\the\\textfont1 \\char\"05F}"}, + {0x2051, "TWO ASTERISKS ALIGNED VERTICALLY", TEX_NONE, "\\vtop to 0pt{\\rlap{\\lower 0.8ex\\hbox{*}}\\vss}*" }, + {0x2052, "COMMERCIAL MINUS SIGN", TEX_NONE, "{\\raise 1.3ex\\hbox{.}\\rlap{\\raise 0.2ex\\hbox{\\kern-0.25em/}}.}" }, + {0x2053, "SWUNG DASH", TEX_NONE, "\\lower 0.5ex\\hbox{\\the\\textfont3 \\char\"65}" }, + {0x2054, "INVERTED UNDERTIE", TEX_NONE, "\\lower 0.3ex\\hbox{\\the\\textfont1 \\char\"05F}" }, + {0x2055, "FLOWER PUNCTUATION MARK", TEX_NONE, 0 }, + {0x2056, "THREE DOT PUNCTUATION", TEX_NONE, "\\raise 0.25\\baselineskip\\hbox{.}\\raise 0.5\\baselineskip\\rlap{.}." }, + {0x2057, "QUADRUPLE PRIME", TEX_NONE, "$''''$" }, + {0x2058, "FOUR DOT PUNCTUATION", TEX_NONE, + "\\raise 0.2\\baselineskip\\rlap{.}" + "\\kern 0.22\\baselineskip" + "\\lower 0.0\\baselineskip\\rlap{.}" + "\\raise 0.4\\baselineskip\\rlap{.}" + "\\kern 0.22\\baselineskip" + "\\raise 0.2\\baselineskip\\hbox{.}" + }, + {0x2059, "FIVE DOT PUNCTUATION", TEX_NONE, + "\\lower 0.0\\baselineskip\\rlap{.}" + "\\raise 0.4\\baselineskip\\rlap{.}" + "\\kern 0.2\\baselineskip" + "\\raise 0.2\\baselineskip\\rlap{.}" + "\\kern 0.2\\baselineskip" + "\\lower 0.0\\baselineskip\\rlap{.}" + "\\raise 0.4\\baselineskip\\hbox{.}" + }, + {0x205A, "TWO DOT PUNCTUATION", TEX_NONE, "\\raise 0.5\\baselineskip\\rlap{.}." }, + {0x205B, "FOUR DOT MARK", TEX_NONE, + "\\raise 0.3\\baselineskip\\hbox{.}" + "\\lower 0.1\\baselineskip\\rlap{.}" + "\\raise 0.7\\baselineskip\\hbox{.}" + "\\raise 0.3\\baselineskip\\hbox{.}" + }, + {0x205C, "DOTTED CROSS", TEX_NONE, + "\\rlap{\\ifnum\\fam=7\\kern -0.3ex\\fi" + "\\raise 0.07\\baselineskip\\rlap{.}" + "\\raise 0.31\\baselineskip\\rlap{.}" + "\\kern 0.25\\baselineskip" + "\\raise 0.07\\baselineskip\\rlap{.}" + "\\raise 0.31\\baselineskip\\rlap{.}" + "}\\kern 0.01\\baselineskip" + "\\hbox to 0.5\\baselineskip{" + "\\rlap{\\raise 0.225\\baselineskip\\hbox to 0.5\\baselineskip{\\leaders\\hrule height 0.5pt\\hfil}}" + "\\kern 0.225\\baselineskip" + "\\vbox to 0.5\\baselineskip{\\leaders\\vrule width 0.5pt\\vfil}" + "\\hss}" + }, + /* According to https://unicode.org/charts/PDF/U2000.pdf the vertical extent + of the next two is the whole height of the line. */ + {0x205D, "TRICOLON", TEX_NONE, + "\\smash{" + "\\setbox0=\\hbox{.}" + "\\dimen255=\\baselineskip \\advance\\dimen255 by -\\lineskip \\advance\\dimen255 by -\\ht255" + "\\rlap{\\raise 1.0\\dimen255\\hbox{.}}" + "\\rlap{\\raise 0.5\\dimen255\\hbox{.}}" + "\\raise 0\\dimen255\\hbox{.}}" }, + {0x205E, "VERTICAL FOUR DOTS", TEX_NONE, + "\\smash{" + "\\setbox0=\\hbox{.}" + "\\dimen255=\\baselineskip \\advance\\dimen255 by -\\lineskip \\advance\\dimen255 by -\\ht255" + "\\rlap{\\raise 1.0\\dimen255\\hbox{.}}" + "\\rlap{\\raise 0.666666\\dimen255\\hbox{.}}" + "\\rlap{\\raise 0.333333\\dimen255\\hbox{.}}" + "\\raise 0\\dimen255\\hbox{.}}" }, + {0x205F, "MEDIUM MATHEMATICAL SPACE", TEX_NONE, "{\\hskip 0.2222222em}" }, + {0x2060, "WORD JOINER", TEX_NONE, "{}" }, + {0x2061, "FUNCTION APPLICATION", TEX_NONE, "$$" }, + {0x2062, "INVISIBLE TIMES", TEX_NONE, "$$" }, + {0x2063, "INVISIBLE SEPARATOR", TEX_NONE, "$$" }, + {0x2064, "INVISIBLE PLUS", TEX_NONE, "$$" }, + }; + +static const struct glyph mathematical [] = + { + {0x2264, "LESS-THAN OR EQUAL TO", TEX_NONE, "$\\leq$" }, + {0x2265, "GREATER-THAN OR EQUAL TO", TEX_NONE, "$\\geq$" }, + }; + + +static const struct glyph greek [] = + { + {0x0391, "GREEK CAPITAL LETTER ALPHA", TEX_NONE, "{\\the\\textfont1 \\char\"41}" }, + {0x0392, "GREEK CAPITAL LETTER BETA", TEX_NONE, "{\\the\\textfont1 \\char\"42}" }, + {0x0393, "GREEK CAPITAL LETTER GAMMA", TEX_NONE,"{\\the\\textfont1 \\char\"00}" }, + {0x0394, "GREEK CAPITAL LETTER DELTA", TEX_NONE, "{\\the\\textfont1 \\char\"01}" }, + {0x0395, "GREEK CAPITAL LETTER EPSILON", TEX_NONE, "{\\the\\textfont1 \\char\"45}" }, + {0x0396, "GREEK CAPITAL LETTER ZETA", TEX_NONE, "{\\the\\textfont1 \\char\"5A}" }, + {0x0397, "GREEK CAPITAL LETTER ETA", TEX_NONE, "{\\the\\textfont1 \\char\"48}" }, + {0x0398, "GREEK CAPITAL LETTER THETA", TEX_NONE, "{\\the\\textfont1 \\char\"02}" }, + {0x0399, "GREEK CAPITAL LETTER IOTA", TEX_NONE, "{\\the\\textfont1 \\char\"49}" }, + {0x039A, "GREEK CAPITAL LETTER KAPPA", TEX_NONE, "{\\the\\textfont1 \\char\"4B}" }, + {0x039B, "GREEK CAPITAL LETTER LAMDA", TEX_NONE, "{\\the\\textfont1 \\char\"03}" }, + {0x039C, "GREEK CAPITAL LETTER MU", TEX_NONE, "{\\the\\textfont1 \\char\"4D}" }, + {0x039D, "GREEK CAPITAL LETTER NU", TEX_NONE, "{\\the\\textfont1 \\char\"4E}" }, + {0x039E, "GREEK CAPITAL LETTER XI", TEX_NONE, "{\\the\\textfont1 \\char\"04}" }, + {0x039F, "GREEK CAPITAL LETTER OMICRON", TEX_NONE, "{\\the\\textfont1 \\char\"4F}" }, + {0x03A0, "GREEK CAPITAL LETTER PI", TEX_NONE, "{\\the\\textfont1 \\char\"05}" }, + {0x03A1, "GREEK CAPITAL LETTER RHO", TEX_NONE, "{\\the\\textfont1 \\char\"50}" }, + {0x03A2, "reserved", TEX_NONE, 0 }, + {0x03A3, "GREEK CAPITAL LETTER SIGMA", TEX_NONE, "{\\the\\textfont1 \\char\"06}" }, + {0x03A4, "GREEK CAPITAL LETTER TAU", TEX_NONE, "{\\the\\textfont1 \\char\"54}" }, + {0x03A5, "GREEK CAPITAL LETTER UPSILON", TEX_NONE, "{\\the\\textfont1 \\char\"59}" }, + {0x03A6, "GREEK CAPITAL LETTER PHI", TEX_NONE, "{\\the\\textfont1 \\char\"08}" }, + {0x03A7, "GREEK CAPITAL LETTER CHI", TEX_NONE, "{\\the\\textfont1 \\char\"58}" }, + {0x03A8, "GREEK CAPITAL LETTER PSI", TEX_NONE, "{\\the\\textfont1 \\char\"09}" }, + {0x03A9, "GREEK CAPITAL LETTER OMEGA", TEX_NONE, "{\\the\\textfont1 \\char\"0A}" }, + {0x03AA, "GREEK CAPITAL LETTER IOTA WITH DIALYTIKA", TEX_NONE, 0 }, + {0x03AB, "GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA", TEX_NONE, 0 }, + {0x03AC, "GREEK SMALL LETTER ALPHA WITH TONOS", TEX_NONE, "\\rlap{\\kern -0.25ex\\it \\char\"13}{\\the\\textfont1 \\char\"0B}"}, + {0x03AD, "GREEK SMALL LETTER EPSILON WITH TONOS", TEX_NONE, "\\rlap{\\kern -0.25ex\\it \\char\"13}{\\the\\textfont1 \\char\"22}"}, + {0x03AE, "GREEK SMALL LETTER ETA WITH TONOS", TEX_NONE, "\\rlap{\\kern -0.25ex\\it \\char\"13}{\\the\\textfont1 \\char\"11}"}, + {0x03AF, "GREEK SMALL LETTER IOTA WITH TONOS", TEX_NONE, "\\rlap{\\the\\textfont1 \\char\"13}{\\kern -0.35ex\\it \\char\"13\\kern 0.1ex}" }, + {0x03B0, "GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS", TEX_NONE, 0 }, + {0x03B1, "GREEK SMALL LETTER ALPHA", TEX_NONE, "{\\the\\textfont1 \\char\"0B}" }, + {0x03B2, "GREEK SMALL LETTER BETA", TEX_NONE, "{\\the\\textfont1 \\char\"0C}" }, + {0x03B3, "GREEK SMALL LETTER GAMMA", TEX_NONE, "{\\the\\textfont1 \\char\"0D}" }, + {0x03B4, "GREEK SMALL LETTER DELTA", TEX_NONE, "{\\the\\textfont1 \\char\"0E}" }, + /* Unicode prefers the squiggly epsilon */ + {0x03B5, "GREEK SMALL LETTER EPSILON", TEX_NONE, "{\\the\\textfont1 \\char\"22}" }, + {0x03B6, "GREEK SMALL LETTER ZETA", TEX_NONE, "{\\the\\textfont1 \\char\"10}" }, + {0x03B7, "GREEK SMALL LETTER ETA", TEX_NONE, "{\\the\\textfont1 \\char\"11}" }, + {0x03B8, "GREEK SMALL LETTER THETA", TEX_NONE, "{\\the\\textfont1 \\char\"12}" }, + {0x03B9, "GREEK SMALL LETTER IOTA", TEX_NONE, "{\\the\\textfont1 \\char\"13}" }, + {0x03BA, "GREEK SMALL LETTER KAPPA", TEX_NONE, "{\\the\\textfont1 \\char\"14}" }, + {0x03BB, "GREEK SMALL LETTER LAMDA", TEX_NONE, "{\\the\\textfont1 \\char\"15}" }, + {0x03BC, "GREEK SMALL LETTER MU", TEX_NONE, "{\\the\\textfont1 \\char\"16}" }, + {0x03BD, "GREEK SMALL LETTER NU", TEX_NONE, "{\\the\\textfont1 \\char\"17}" }, + {0x03BE, "GREEK SMALL LETTER XI", TEX_NONE, "{\\the\\textfont1 \\char\"18}" }, + {0x03BF, "GREEK SMALL LETTER OMICRON", TEX_NONE, "{\\the\\textfont1 \\char\"6F}" }, + {0x03C0, "GREEK SMALL LETTER PI", TEX_NONE, "{\\the\\textfont1 \\char\"19}" }, + {0x03C1, "GREEK SMALL LETTER RHO", TEX_NONE, "{\\the\\textfont1 \\char\"1A}" }, + {0x03C2, "GREEK SMALL LETTER FINAL SIGMA", TEX_NONE, "{\\the\\textfont1 \\char\"26}" }, + {0x03C3, "GREEK SMALL LETTER SIGMA", TEX_NONE, "{\\the\\textfont1 \\char\"1B}" }, + {0x03C4, "GREEK SMALL LETTER TAU", TEX_NONE, "{\\the\\textfont1 \\char\"1C}" }, + {0x03C5, "GREEK SMALL LETTER UPSILON", TEX_NONE, "{\\the\\textfont1 \\char\"1D}" }, + {0x03C6, "GREEK SMALL LETTER PHI", TEX_NONE, "{\\the\\textfont1 \\char\"27}" }, + {0x03C7, "GREEK SMALL LETTER CHI", TEX_NONE, "{\\the\\textfont1 \\char\"1F}" }, + {0x03C8, "GREEK SMALL LETTER PSI", TEX_NONE, "{\\the\\textfont1 \\char\"20}" }, + {0x03C9, "GREEK SMALL LETTER OMEGA", TEX_NONE, "{\\the\\textfont1 \\char\"21}" }, + {0x03CA, "GREEK SMALL LETTER IOTA WITH DIALYTIKA", TEX_NONE, 0 }, + {0x03CB, "GREEK SMALL LETTER UPSILON WITH DIALYTIKA", TEX_NONE, 0 }, + {0x03CC, "GREEK SMALL LETTER OMICRON WITH TONOS", TEX_NONE, "\\rlap{\\kern -0.25ex\\it \\char\"13}{\\the\\textfont1 \\char\"6F}"}, + {0x03CD, "GREEK SMALL LETTER UPSILON WITH TONOS", TEX_NONE, "\\rlap{\\kern -0.25ex\\it \\char\"13}{\\the\\textfont1 \\char\"1D}"}, + {0x03CE, "GREEK SMALL LETTER OMEGA WITH TONOS", TEX_NONE, "\\rlap{\\kern -0.25ex\\it \\char\"13}{\\the\\textfont1 \\char\"21}"}, + {0x03CF, "GREEK CAPITAL KAI SYMBOL", TEX_NONE, 0 } + }; + +const struct glyph_block defined_blocks[] = + { + { control_codes, 2 }, + { basic_latin, 0x7F - 0x20 }, + { extended_latin, 0x180 - 0xA0 }, + { greek, 0x3D0 - 0x391}, + { punctuation, 0x65}, + { mathematical, 2}, + { 0, 0} + }; diff --git a/src/output/tex-glyphs.h b/src/output/tex-glyphs.h new file mode 100644 index 0000000000..d0bab4a1fe --- /dev/null +++ b/src/output/tex-glyphs.h @@ -0,0 +1,67 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef TEX_GLYPHS_H +#define TEX_GLYPHS_H 1 + +#include "gl/unitypes.h" + +#include "libpspp/hmap.h" + + +enum tex_ancilliary + { + TEX_NONE, + TEX_VULGAR_FRAC, + TEX_OGONEK, + TEX_THORN_UC, + TEX_THORN_LC, + TEX_GUILLEMET_LEFT, + TEX_GUILLEMET_RIGHT, + TEX_ETH, + TEX_DOT, + TEX_DOUBLE_ACUTE + }; + +extern const char *tex_macro[]; + +struct tex_macro +{ + struct hmap_node node; + enum tex_ancilliary index; +}; + + +struct glyph +{ + ucs4_t code_point; + const char *name; + enum tex_ancilliary macro; + const char *tex_rendering; +}; + +struct glyph_block +{ + const struct glyph *start; + int n_glyphs; +}; + + +extern const char *unsupported_glyph; + +extern const struct glyph_block defined_blocks[] ; + +#endif diff --git a/src/output/tex-parsing.c b/src/output/tex-parsing.c new file mode 100644 index 0000000000..7c4283514e --- /dev/null +++ b/src/output/tex-parsing.c @@ -0,0 +1,158 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "gl/xalloc.h" + +#include "tex-parsing.h" +#include "libpspp/ll.h" + +#include +#include +#include + +enum state + { + STATE_INITIAL, + STATE_CS, + STATE_COMMENT + }; + + +/* Return the category of C. + These are the default TeX categories as defined in Chapter 7 of + The TeXbook */ +static enum tex_cat +category (const char c) +{ + if (c >= 'A' && c <= 'Z') + return CAT_LETTER; + + if (c >= 'a' && c <= 'z') + return CAT_LETTER; + + switch (c) + { + case '\\': + return CAT_ESCAPE; + case '{': + return CAT_BEGIN_GROUP; + case '}': + return CAT_END_GROUP; + case '$': + return CAT_MATH_MODE; + case '&': + return CAT_ALIGNMENT; + case '#': + return CAT_PARAMETER; + case '^': + return CAT_SUPERSCRIPT; + case '_': + return CAT_SUBSCRIPT; + case '~': + return CAT_ACTIVE; + case ' ': + case '\t': + return CAT_SPACE; + case '\n': + case '\r': + return CAT_EOL; + case '%': + return CAT_COMMENT; + case 127: + return CAT_INVALID; + case 0: + return CAT_IGNORED; + } + + return CAT_OTHER; +} + + +/* Parse the TeX fragment STR into TeX tokens and push them + on to LIST. */ +void +tex_parse (const char *str, struct ll_list *list) +{ + enum state state = STATE_INITIAL; + struct tex_token *token = NULL; + int c; + while ((c = *str++) != '\0') + { + enum tex_cat cat = category (c); + + if (state == STATE_COMMENT) + { + ds_put_byte (&token->str, c); + if (cat == CAT_EOL) + { + token->cat = CAT_COMMENT; + ll_push_tail (list, &token->ll); + state = STATE_INITIAL; + } + } + else if (state == STATE_INITIAL) + { + token = XZALLOC (struct tex_token); + ds_init_empty (&token->str); + if (cat == CAT_COMMENT) + { + ds_put_byte (&token->str, c); + state = STATE_COMMENT; + } + else if (cat == CAT_ESCAPE) + { + ds_put_byte (&token->str, c); + state = STATE_CS; + } + else + { + ds_put_byte (&token->str, c); + token->cat = category (c); + ll_push_tail (list, &token->ll); + } + } + else if (state == STATE_CS) + { + ds_put_byte (&token->str, c); + if (cat != CAT_LETTER) + { + if (ds_length (&token->str) > 2) + { + ds_truncate (&token->str, ds_length (&token->str) - 1); + str--; + } + token->cat = CAT_CONTROL_SEQ; + ll_push_tail (list, &token->ll); + state = STATE_INITIAL; + } + } + } + if (state == STATE_CS) + { + /* The end of the string was encountered whilst processing + a control sequence. */ + + /* A \ at the end of the string must be erroneous. */ + assert (ds_length (&token->str) > 1); + token->cat = CAT_CONTROL_SEQ; + ll_push_tail (list, &token->ll); + state = STATE_INITIAL; + } + + assert (state == STATE_INITIAL); +} diff --git a/src/output/tex-parsing.h b/src/output/tex-parsing.h new file mode 100644 index 0000000000..5a1097fa6b --- /dev/null +++ b/src/output/tex-parsing.h @@ -0,0 +1,61 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef TEX_PARSING_H +#define TEX_PARSING_H + + +#include "libpspp/str.h" +#include "libpspp/ll.h" + +/* These are the default TeX categories as defined in Chapter 7 of + The TeXbook , plus a new one: CAT_CONTROL_SEQUENCE. */ +enum tex_cat + { + CAT_ESCAPE = 0, + CAT_BEGIN_GROUP, // 1 + CAT_END_GROUP, // 2 + CAT_MATH_MODE, // 3 + CAT_ALIGNMENT, // 4 + CAT_PARAMETER, // 5 + CAT_SUPERSCRIPT, // 6 + CAT_SUBSCRIPT, // 7 + CAT_IGNORED, // 8 + CAT_EOL, // 9 + CAT_SPACE, // 10 + CAT_LETTER, // 11 + CAT_OTHER, // 12 + CAT_ACTIVE, // 13 + CAT_COMMENT, // 14 + CAT_INVALID, // 15 + CAT_CONTROL_SEQ, + }; + + +struct tex_token +{ + struct ll ll; + struct string str; + enum tex_cat cat; +}; + + +/* Parse the TeX fragment STR into TeX tokens and push them + on to LIST. */ +void tex_parse (const char *str, struct ll_list *list); + + +#endif //TEX_PARSING_H diff --git a/src/output/tex-rendering.c b/src/output/tex-rendering.c new file mode 100644 index 0000000000..9b68fde5ff --- /dev/null +++ b/src/output/tex-rendering.c @@ -0,0 +1,89 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "tex-rendering.h" +#include "tex-glyphs.h" + +#include "libpspp/hmap.h" +#include "libpspp/hash-functions.h" + +#include "gl/mbiter.h" +#include "gl/mbchar.h" +#include "gl/unistr.h" +#include "gl/xalloc.h" + +/* Return a string containing TeX code which can be used to typeset + Unicode code point CP. As a side effect, insert any needed macro indeces + into the hash table MACROS. */ +const char * +code_point_to_tex (ucs4_t cp, struct hmap *macros) +{ + const char *what = 0; + + for (const struct glyph_block *gb = defined_blocks; gb->start; gb++) + { + if (cp < gb->start->code_point) + break; + + if (cp < gb->start->code_point + gb->n_glyphs) + { + what = gb->start[cp - gb->start->code_point].tex_rendering; + enum tex_ancilliary macro = gb->start[cp - gb->start->code_point].macro; + if (macro != TEX_NONE) + { + struct tex_macro *a_macro = NULL; + HMAP_FOR_EACH_WITH_HASH (a_macro, struct tex_macro, node, hash_int (0, macro), macros) + { + if (a_macro->index == macro) + break; + } + + if (a_macro == NULL) + { + a_macro = XMALLOC (struct tex_macro); + a_macro->index = macro; + hmap_insert (macros, &a_macro->node, hash_int (0, macro)); + } + } + break; + } + } + + if (!what) + fprintf (stderr, "Unsupported code point U+%04X\n", cp); + return what ? what : unsupported_glyph; +} + +/* Convert the first character of the utf8 string S, into a TeX fragment. + LEN must be the length of S (in bytes). After this function returns, S + will have been incremented by the length of the first character in S, + and LEN will have been decremented by the same amount. */ +const char * +u8_to_tex_fragments (const char **s, size_t *len, struct hmap *macros) +{ + const uint8_t *u = (const uint8_t *) *s; + size_t clen = u8_mblen (u, *len); + + ucs4_t puc; + u8_mbtouc (&puc, u, clen); + + *len -= clen; + *s += clen; + + return code_point_to_tex (puc, macros); +} diff --git a/src/output/tex-rendering.h b/src/output/tex-rendering.h new file mode 100644 index 0000000000..571046fac6 --- /dev/null +++ b/src/output/tex-rendering.h @@ -0,0 +1,37 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef TEX_RENDERING_H +#define TEX_RENDERING_H + +#include "gl/unitypes.h" +#include + +struct hmap; + +/* Return a string containing TeX code which can be used to typeset + Unicode code point CP. */ +const char * code_point_to_tex (ucs4_t cp, struct hmap *macros); + + +/* Convert the first character of the utf8 string S, into a TeX fragment. + LEN must be the length of S (in bytes). After this function returns, S + will have been incremented by the length of the first character in S, + and LEN will have been decremented by the same amount. */ +const char * u8_to_tex_fragments (const char **s, size_t *len, struct hmap *macros); + + +#endif diff --git a/src/output/tex.c b/src/output/tex.c new file mode 100644 index 0000000000..8fad562311 --- /dev/null +++ b/src/output/tex.c @@ -0,0 +1,548 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "gl/mbiter.h" +#include "data/file-name.h" +#include "data/file-handle-def.h" +#include "libpspp/assertion.h" +#include "libpspp/cast.h" +#include "libpspp/compiler.h" +#include "libpspp/hmap.h" +#include "libpspp/ll.h" +#include "libpspp/i18n.h" +#include "libpspp/message.h" +#include "libpspp/temp-file.h" +#include "libpspp/version.h" +#include "output/cairo.h" +#include "output/chart-item.h" +#include "output/driver-provider.h" +#include "output/message-item.h" +#include "output/options.h" +#include "output/output-item-provider.h" +#include "output/table-provider.h" +#include "output/table-item.h" +#include "output/text-item.h" +#include "output/tex-rendering.h" +#include "output/tex-parsing.h" + + +#include "tex-glyphs.h" + +#include "gl/minmax.h" +#include "gl/xalloc.h" +#include "gl/c-vasnprintf.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +/* The desired maximum line length in the TeX file. */ +#define TEX_LINE_MAX 80 + +struct tex_driver + { + struct output_driver driver; + /* A hash table containing any Tex macros which need to be emitted. */ + struct hmap macros; +#ifdef HAVE_CAIRO + struct cell_color fg; + struct cell_color bg; +#endif + struct file_handle *handle; + char *chart_file_name; + + FILE *file; + size_t chart_cnt; + + struct ll_list preamble_list; + struct ll_list token_list; + }; + +/* Ships the string STR to the driver. */ +static void +shipout (struct ll_list *list, const char *str, ...) +{ + va_list args; + va_start (args, str); + + size_t length; + char *s = c_vasnprintf (NULL, &length, str, args); + + tex_parse (s, list); + + va_end (args); + free (s); +} + +static const struct output_driver_class tex_driver_class; + +static void tex_output_table (struct tex_driver *, const struct table_item *); + +static struct tex_driver * +tex_driver_cast (struct output_driver *driver) +{ + assert (driver->class == &tex_driver_class); + return UP_CAST (driver, struct tex_driver, driver); +} + +static struct driver_option * +opt (struct output_driver *d, struct string_map *options, const char *key, + const char *default_value) +{ + return driver_option_get (d, options, key, default_value); +} + +static struct output_driver * +tex_create (struct file_handle *fh, enum settings_output_devices device_type, + struct string_map *o) +{ + struct output_driver *d; + struct tex_driver *tex = XZALLOC (struct tex_driver); + hmap_init (&tex->macros); + ll_init (&tex->preamble_list); + ll_init (&tex->token_list); + + d = &tex->driver; + output_driver_init (&tex->driver, &tex_driver_class, fh_get_file_name (fh), + device_type); + tex->handle = fh; + tex->chart_file_name = parse_chart_file_name (opt (d, o, "charts", + fh_get_file_name (fh))); + tex->chart_cnt = 1; +#ifdef HAVE_CAIRO + parse_color (d, o, "background-color", "#FFFFFFFFFFFF", &tex->bg); + parse_color (d, o, "foreground-color", "#000000000000", &tex->fg); +#endif + + tex->file = fn_open (tex->handle, "w"); + if (tex->file == NULL) + { + msg_error (errno, _("error opening output file `%s'"), + fh_get_file_name (tex->handle)); + goto error; + } + + return d; + + error: + output_driver_destroy (d); + return NULL; +} + + +/* Emit all the tokens in LIST to FILE. + Then destroy LIST and its contents. */ +static void +post_process_tokens (FILE *file, struct ll_list *list) +{ + size_t line_len = 0; + struct tex_token *tt; + struct tex_token *ttnext; + ll_for_each_safe (tt, ttnext, struct tex_token, ll, list) + { + if (tt->cat == CAT_SPACE) + { + /* Count the number of characters up to the next space, + and if it'll not fit on to the line, then make a line + break here. */ + size_t word_len = 0; + struct tex_token *prev_x = NULL; + for (struct ll *x = ll_next (&tt->ll); x != ll_null (list); + x = ll_next (x)) + { + struct tex_token *nt = ll_data (x, struct tex_token, ll); + if (nt->cat == CAT_SPACE || nt->cat == CAT_EOL) + break; + if (prev_x && (prev_x->cat == CAT_COMMENT) && (nt->cat != CAT_COMMENT)) + break; + word_len += ds_length (&nt->str); + prev_x = nt; + } + + if ((word_len < TEX_LINE_MAX) && (line_len + word_len >= TEX_LINE_MAX - 1)) + { + fputs ("\n", file); + line_len = 0; + continue; + } + } + + line_len += ds_length (&tt->str); + if (tt->cat == CAT_EOL) + line_len = 0; + if (line_len >= TEX_LINE_MAX) + { + fputs ("%\n", file); + line_len = ds_length (&tt->str); + } + if (tt->cat == CAT_COMMENT) + line_len = 0; + fputs (ds_cstr (&tt->str), file); + ds_destroy (&tt->str); + free (tt); + } +} + + +static void +tex_destroy (struct output_driver *driver) +{ + struct tex_driver *tex = tex_driver_cast (driver); + + shipout (&tex->preamble_list, "%%%% TeX output of pspp\n\n"); + shipout (&tex->preamble_list, "%%%% Define the horizontal space between table columns\n"); + shipout (&tex->preamble_list, "\\def\\psppcolumnspace{1mm}\n\n"); + + shipout (&tex->preamble_list, "\\input graphicx\n\n"); + + char *ln = get_language (); + if (ln) + shipout (&tex->preamble_list, "%%%% Language is \"%s\"\n", ln); + free (ln); + shipout (&tex->preamble_list, "\n"); + + shipout (&tex->preamble_list, "%%%% Sets the environment for rendering material in table cell\n"); + shipout (&tex->preamble_list, "%%%% The parameter is the number of columns in the table\n"); + shipout (&tex->preamble_list, + "\\def\\cell#1{\\normalbaselines\\advance\\hsize by -#1.0\\psppcolumnspace" + "\\advance\\hsize by \\psppcolumnspace" + "\\divide\\hsize by #1" + "\\noindent\\raggedright\\hskip0pt}\n\n"); + + /* centre macro */ + shipout (&tex->preamble_list, + "%%%% Render the text centre justified\n" + "\\def\\startcentre{\\begingroup\\leftskip=0pt plus 1fil\n" + "\\rightskip=\\leftskip\\parfillskip=0pt}\n"); + shipout (&tex->preamble_list, "\\def\\stopcentre{\\par\\endgroup}\n"); + shipout (&tex->preamble_list, "\\long\\def\\centre#1{\\startcentre#1\\stopcentre}\n\n"); + + + /* right macro */ + shipout (&tex->preamble_list, + "%%%% Render the text right justified\n" + "\\def\\startright{\\begingroup\\leftskip=0pt plus 1fil\n" + "\\parfillskip=0pt}\n"); + shipout (&tex->preamble_list, "\\def\\stopright{\\par\\endgroup}\n"); + shipout (&tex->preamble_list, "\\long\\def\\right#1{\\startright#1\\stopright}\n\n"); + + + /* Emit all the macro defintions. */ + struct tex_macro *m; + struct tex_macro *next; + HMAP_FOR_EACH_SAFE (m, next, struct tex_macro, node, &tex->macros) + { + shipout (&tex->preamble_list, "%s", tex_macro[m->index]); + shipout (&tex->preamble_list, "\n\n"); + free (m); + } + hmap_destroy (&tex->macros); + + post_process_tokens (tex->file, &tex->preamble_list); + + shipout (&tex->token_list, "\n\\bye\n"); + + post_process_tokens (tex->file, &tex->token_list); + + fn_close (tex->handle, tex->file); + + free (tex->chart_file_name); + fh_unref (tex->handle); + free (tex); +} + +/* Ship out TEXT (which must be a UTF-8 encoded string to the driver's output. + if TABULAR is true, then this text is within a table. */ +static void +tex_escape_string (struct tex_driver *tex, const char *text, + bool tabular) +{ + size_t n = strlen (text); + while (n > 0) + { + const char *frag = u8_to_tex_fragments (&text, &n, &tex->macros); + shipout (&tex->token_list, "%s", frag); + if (text[0] != '\0' && tabular && 0 == strcmp (frag, ".")) + { + /* Peek ahead to the next code sequence */ + size_t nn = n; + const char *t = text; + const char *next = u8_to_tex_fragments (&t, &nn, &tex->macros); + /* If a period followed by whitespace is encountered within tabular + material, then it is reasonable to assume, that it is an + abbreviation (like "Sig." or "Std. Deviation") rather than the + end of a sentance. */ + if (next && 0 == strcmp (" ", next)) + { + shipout (&tex->token_list, "\\ "); + } + } + } +} + +static void +tex_submit (struct output_driver *driver, + const struct output_item *output_item) +{ + struct tex_driver *tex = tex_driver_cast (driver); + + if (is_table_item (output_item)) + { + struct table_item *table_item = to_table_item (output_item); + tex_output_table (tex, table_item); + } +#ifdef HAVE_CAIRO + else if (is_chart_item (output_item) && tex->chart_file_name != NULL) + { + struct chart_item *chart_item = to_chart_item (output_item); + char *file_name = xr_draw_png_chart (chart_item, tex->chart_file_name, + tex->chart_cnt++, + &tex->fg, + &tex->bg); + if (file_name != NULL) + { + const char *title = chart_item_get_title (chart_item); + // printf ("The chart title is %s\n", title); + + shipout (&tex->token_list, "\\includegraphics{%s}\n", file_name); + free (file_name); + } + } +#endif /* HAVE_CAIRO */ + else if (is_text_item (output_item)) + { + struct text_item *text_item = to_text_item (output_item); + const char *s = text_item_get_text (text_item); + + switch (text_item_get_type (text_item)) + { + case TEXT_ITEM_PAGE_TITLE: + shipout (&tex->token_list, "\\headline={\\bf "); + tex_escape_string (tex, s, false); + shipout (&tex->token_list, "\\hfil}\n"); + break; + + case TEXT_ITEM_LOG: + shipout (&tex->token_list, "{\\tt "); + tex_escape_string (tex, s, false); + shipout (&tex->token_list, "}\\par\n\n"); + break; + + case TEXT_ITEM_EJECT_PAGE: + /* Nothing to do. */ + break; + + case TEXT_ITEM_SYNTAX: + /* So far as I'm aware, this can never happen. */ + default: + printf ("Unhandled type %d\n", text_item_get_type (text_item)); + break; + } + } + else if (is_message_item (output_item)) + { + const struct message_item *message_item = to_message_item (output_item); + char *s = msg_to_string (message_item_get_msg (message_item)); + tex_escape_string (tex, s, false); + shipout (&tex->token_list, "\\par\n"); + free (s); + } +} + +static void +tex_put_footnote_markers (struct tex_driver *tex, + const struct footnote **footnotes, + size_t n_footnotes) +{ + if (n_footnotes > 0) + shipout (&tex->token_list, "$^{"); + for (size_t i = 0; i < n_footnotes; i++) + { + const struct footnote *f = footnotes[i]; + + tex_escape_string (tex, f->marker, true); + } + if (n_footnotes > 0) + shipout (&tex->token_list, "}$"); +} + +static void +tex_put_table_item_text (struct tex_driver *tex, + const struct table_item_text *text) +{ + tex_escape_string (tex, text->content, false); + tex_put_footnote_markers (tex, text->footnotes, text->n_footnotes); +} + +static void +tex_output_table (struct tex_driver *tex, const struct table_item *item) +{ + /* Tables are rendered in TeX with the \halign command. + This is described in the TeXbook Ch. 22 */ + + const struct table *t = table_item_get_table (item); + + shipout (&tex->token_list, "\n{\\parindent=0pt\n"); + + const struct table_item_text *caption = table_item_get_caption (item); + if (caption) + { + shipout (&tex->token_list, "{\\sl "); + tex_escape_string (tex, caption->content, false); + shipout (&tex->token_list, "}\n\n"); + } + const struct footnote **f; + size_t n_footnotes = table_collect_footnotes (item, &f); + + const struct table_item_text *title = table_item_get_title (item); + const struct table_item_layers *layers = table_item_get_layers (item); + if (title || layers) + { + if (title) + { + shipout (&tex->token_list, "{\\bf "); + tex_put_table_item_text (tex, title); + shipout (&tex->token_list, "}"); + } + if (layers) + abort (); + shipout (&tex->token_list, "\\par\n"); + } + + shipout (&tex->token_list, "\\offinterlineskip\\halign{\\strut%%\n"); + + /* Generate the preamble */ + for (int x = 0; x < table_nc (t); ++x) + { + shipout (&tex->token_list, "{\\vbox{\\cell{%d}#}}", table_nc (t)); + + if (x < table_nc (t) - 1) + { + shipout (&tex->token_list, "\\hskip\\psppcolumnspace\\hfil"); + shipout (&tex->token_list, "&\\vrule\n"); + } + else + shipout (&tex->token_list, "\\cr\n"); + } + + /* Emit the row data */ + for (int y = 0; y < table_nr (t); y++) + { + bool is_column_header = (y < table_ht (t) + || y >= table_nr (t) - table_hb (t)); + int prev_x = -1; + int skipped = 0; + for (int x = 0; x < table_nc (t);) + { + struct table_cell cell; + + table_get_cell (t, x, y, &cell); + + int colspan = table_cell_colspan (&cell); + if (x > 0) + shipout (&tex->token_list, "&"); + else + for (int i = 0; i < skipped - colspan; ++i) + shipout (&tex->token_list, "&"); + + + if (x != cell.d[TABLE_HORZ][0] || y != cell.d[TABLE_VERT][0]) + goto next_1; + + /* bool is_header = (y < table_ht (t) */ + /* || y >= table_nr (t) - table_hb (t) */ + /* || x < table_hl (t) */ + /* || x >= table_nc (t) - table_hr (t)); */ + + + enum table_halign halign = + table_halign_interpret (cell.style->cell_style.halign, + cell.options & TAB_NUMERIC); + + /* int rowspan = table_cell_rowspan (&cell); */ + + /* if (rowspan > 1) */ + /* fprintf (tex->file, " rowspan=\"%d\"", rowspan); */ + + if (colspan > 1) + { + shipout (&tex->token_list, "\\multispan{%d}\\span", colspan - 1); + shipout (&tex->token_list, "\\hsize=%d.0\\hsize", colspan); + shipout (&tex->token_list, "\\advance\\hsize%d.0\\psppcolumnspace ", + colspan - 1); + } + + if (halign == TABLE_HALIGN_CENTER) + shipout (&tex->token_list, "\\centre{"); + + if (halign == TABLE_HALIGN_RIGHT) + shipout (&tex->token_list, "\\right{"); + + /* Output cell contents. */ + tex_escape_string (tex, cell.text, true); + tex_put_footnote_markers (tex, cell.footnotes, cell.n_footnotes); + if (halign == TABLE_HALIGN_CENTER || halign == TABLE_HALIGN_RIGHT) + { + shipout (&tex->token_list, "}"); + } + + next_1: + skipped = x - prev_x; + prev_x = x; + x = cell.d[TABLE_HORZ][1]; + } + shipout (&tex->token_list, "\\cr\n"); + if (is_column_header) + shipout (&tex->token_list, "\\noalign{\\hrule\\vskip -\\normalbaselineskip}\\cr\n"); + } + + shipout (&tex->token_list, "}%% End of \\halign\n"); + + /* Shipout any footnotes. */ + if (n_footnotes > 0) + shipout (&tex->token_list, "\\vskip 0.5ex\n"); + + for (int i = 0; i < n_footnotes; ++i) + { + shipout (&tex->token_list, "$^{"); + tex_escape_string (tex, f[i]->marker, false); + shipout (&tex->token_list, "}$"); + tex_escape_string (tex, f[i]->content, false); + } + free (f); + + shipout (&tex->token_list, "}\n\\vskip 3ex\n\n"); +} + +struct output_driver_factory tex_driver_factory = + { "tex", "pspp.tex", tex_create }; + +static const struct output_driver_class tex_driver_class = + { + "tex", + tex_destroy, + tex_submit, + NULL, + }; diff --git a/tests/atlocal.in b/tests/atlocal.in index df10bef95c..162b832dfd 100644 --- a/tests/atlocal.in +++ b/tests/atlocal.in @@ -22,6 +22,7 @@ EXEEXT='@EXEEXT@' PERL='@PERL@' PYTHON='@PYTHON@' +TEX='@TEX@' WITH_PERL_MODULE='@WITH_PERL_MODULE@' host='@host@' PACKAGE_STRING='@PACKAGE_STRING@' diff --git a/tests/automake.mk b/tests/automake.mk index 241f0afd4d..ea0efb8569 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -51,6 +51,8 @@ check_PROGRAMS += \ tests/math/chart-get-ticks-format-test \ tests/math/chart-get-scale-test \ tests/output/render-test \ + tests/output/tex-glyphs \ + tests/output/tex-strings \ tests/ui/syntax-gen-test @@ -262,6 +264,16 @@ tests_ui_syntax_gen_test_LDADD = \ src/libpspp-core.la \ $(CAIRO_LIBS) +tests_output_tex_glyphs_SOURCES = \ + tests/output/tex-glyphs.c +tests_output_tex_glyphs_LDADD = src/libpspp-core.la src/output/liboutput.la +tests_output_tex_glyphs_CFLAGS = $(AM_CFLAGS) -I $(top_srcdir)/src/output + +tests_output_tex_strings_SOURCES = \ + tests/output/tex-strings.c +tests_output_tex_strings_LDADD = src/libpspp-core.la src/output/liboutput.la +tests_output_tex_strings_CFLAGS = $(AM_CFLAGS) -I $(top_srcdir)/src/output + EXTRA_DIST += \ tests/coverage.sh \ @@ -445,6 +457,7 @@ TESTSUITE_AT = \ tests/output/paper-size.at \ tests/output/render.at \ tests/output/tables.at \ + tests/output/tex.at \ tests/ui/terminal/main.at \ tests/ui/syntax-gen.at \ tests/utilities/pspp-convert.at \ diff --git a/tests/output/tex-glyphs.c b/tests/output/tex-glyphs.c new file mode 100644 index 0000000000..f0e3891e97 --- /dev/null +++ b/tests/output/tex-glyphs.c @@ -0,0 +1,112 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "libpspp/hmap.h" +#include "tex-rendering.h" +#include "tex-glyphs.h" + +#include +#include +#include + +static void +tex_render (FILE *fp, const char *str) +{ + fputs (str, fp); + fputc ('\n', fp); +} + +static long macro_insertion_point = 0; + +static void +tex_preamble (FILE *fp, const char *str) +{ + long where = ftell (fp); + fseek (fp, macro_insertion_point, SEEK_SET); + tex_render (fp, str); + fputc ('\n', fp); + macro_insertion_point = ftell (fp); + fseek (fp, where, SEEK_SET); +} + + +int +main (int argc, char **argv) +{ + if (argc < 2) + { + fprintf (stderr, "Usage: tex-glyphs \n"); + return 1; + } + + FILE *fp = fopen (argv[1], "w"); + if (!fp) + { + perror ("Cannot open output file"); + return 1; + } + + struct hmap macros; + hmap_init (¯os); + + fseek (fp, 4096, SEEK_SET); + + tex_render (fp, "\\raggedbottom"); + + tex_render (fp, "\\halign{{\\tt #}\\qquad&{\\font\\xx=cmr7 \\xx #}\\hfil&\\quad{\\rm #}"); + tex_render (fp, "\\hfil&\\quad{\\sl #}"); + tex_render (fp, "\\hfil&\\quad{\\it #}"); + tex_render (fp, "\\hfil&\\quad{\\bf #}"); + tex_render (fp, "\\hfil&\\quad{\\tt #}\\cr"); + + for (const struct glyph_block *gb = defined_blocks; gb->start; ++gb) + { + ucs4_t x = gb->start->code_point; + for (const struct glyph *g = gb->start; x < gb->n_glyphs + gb->start->code_point; ++g) + { + assert (g->code_point == x++); + fprintf (fp, "U+%04X&%s&M%sM", g->code_point, g->name, + code_point_to_tex (g->code_point, ¯os)); + fprintf (fp, "&M%sM", + code_point_to_tex (g->code_point, ¯os)); + fprintf (fp, "&M%sM", + code_point_to_tex (g->code_point, ¯os)); + fprintf (fp, "&M%sM", + code_point_to_tex (g->code_point, ¯os)); + fprintf (fp, "&M%sM\\cr\n", + code_point_to_tex (g->code_point, ¯os)); + } + } + + { + struct tex_macro *m; + struct tex_macro *next; + HMAP_FOR_EACH_SAFE (m, next, struct tex_macro, node, ¯os) + { + tex_preamble (fp, tex_macro[m->index]); + free (m); + } + } + hmap_destroy (¯os); + + tex_render (fp, "}"); + tex_render (fp, "\\bye"); + + fclose (fp); + return 0; +} diff --git a/tests/output/tex-strings.c b/tests/output/tex-strings.c new file mode 100644 index 0000000000..71aed14307 --- /dev/null +++ b/tests/output/tex-strings.c @@ -0,0 +1,174 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "tex-rendering.h" +#include "tex-glyphs.h" + +#include +#include +#include +#include +#include +#include + +#include + +static void +tex_render (FILE *fp, const char *str) +{ + fputs (str, fp); + fputc ('\n', fp); +} + +#define BLOCK_SIZE 16 + +/* Reads an entire file FP and returns it as a string. + Any single instance of newline will be mutated to a space. + However multiple consecutive newlines will be mutated to + a single newline. */ +static char * +read_whole_file (FILE *fp) +{ + char *result = 0; + size_t bytes = 0; + size_t len = 0; + int c = -1; + int consecutive_nl = 0; + while ((c = fgetc (fp)) >= 0) + { + if (len <= bytes + 1) + { + result = xrealloc (result, len + BLOCK_SIZE); + memset (result + len, 0, BLOCK_SIZE); + len += BLOCK_SIZE; + } + if (c != '\n') + { + if (consecutive_nl > 1) + result[bytes++] = '\n'; + if (consecutive_nl == 1) + result[bytes++] = ' '; + + result[bytes++] = c; + } + + if (c == '\n') + consecutive_nl++; + else + consecutive_nl = 0; + } + + return result; +} + +static long macro_insertion_point = 0; + +static void +tex_preamble (FILE *fp, const char *str) +{ + long where = ftell (fp); + fseek (fp, macro_insertion_point, SEEK_SET); + tex_render (fp, str); + fputc ('\n', fp); + macro_insertion_point = ftell (fp); + fseek (fp, where, SEEK_SET); +} + + +int +main (int argc, char **argv) +{ + char *outfile = NULL; + int opt; + while ((opt = getopt (argc, argv, "o:")) != -1) + { + switch (opt) + { + case 'o': + outfile = argv[optind-1]; + break; + default: + fprintf (stderr, "Usage: tex-strings -o ... \n"); + return 1; + } + } + + if (optind >= argc) + { + fprintf (stderr, "Usage: tex-strings -o ... \n"); + return 1; + } + + FILE *fpout = fopen (outfile, "w"); + if (!fpout) + { + int err = errno; + fprintf (stderr, "Cannot open output file %s: %s\n", outfile, strerror (err)); + return 1; + } + + struct hmap macros; + hmap_init (¯os); + + fseek (fpout, 4096, SEEK_SET); + + for (int arg = optind; arg < argc; ++arg) + { + FILE *fpin = fopen (argv[arg], "r"); + if (!fpin) + { + int err = errno; + fprintf (stderr, "Cannot open input file %s: %s\n", argv[arg], strerror (err)); + return 1; + } + + tex_render (fpout, "\\noindent"); + + char *str = read_whole_file (fpin); + const char *s = str; + size_t n = strlen (str); + const char *frag = 0; + while (n > 0) + { + frag = u8_to_tex_fragments (&s, &n, ¯os); + fputs (frag, fpout); + } + free (str); + + + fclose (fpin); + + tex_render(fpout, "\\par\\vskip 1em"); + } + + { + struct tex_macro *m; + struct tex_macro *next; + HMAP_FOR_EACH_SAFE (m, next, struct tex_macro, node, ¯os) + { + tex_preamble (fpout, tex_macro[m->index]); + free (m); + } + } + hmap_destroy (¯os); + + tex_render (fpout, "\\bye"); + + fclose (fpout); + return 0; +} diff --git a/tests/output/tex.at b/tests/output/tex.at new file mode 100644 index 0000000000..0044e14df7 --- /dev/null +++ b/tests/output/tex.at @@ -0,0 +1,326 @@ +dnl PSPP - a program for statistical analysis. +dnl Copyright (C) 2020 Free Software Foundation, Inc. +dnl +dnl This program is free software: you can redistribute it and/or modify +dnl it under the terms of the GNU General Public License as published by +dnl the Free Software Foundation, either version 3 of the License, or +dnl (at your option) any later version. +dnl +dnl This program is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +dnl GNU General Public License for more details. +dnl +dnl You should have received a copy of the GNU General Public License +dnl along with this program. If not, see . +dnl + +AT_BANNER([tex driver]) + +dnl By passing --xdvi=xdvi to TESTSUITEFLAGS, one can view the rendered dvi from +dnl tests defined in this file. +AT_ARG_OPTION_ARG([xdvi],[AS_HELP_STRING([--xdvi], + [the xdvi program to show show TeX output (default none)])]) + +AT_SETUP([tex glyphs]) + +AT_CHECK([tex-glyphs pspp.tex], [0], [ignore], [ignore]) + +AT_SKIP_IF([test "$TEX" = no]) +AT_CHECK([$TEX --halt-on-error pspp.tex], [0], [ignore]) + +dnl There should be no overfull boxes +AT_CHECK([grep '^Overfull ' pspp.log], [1]) +dnl nor any underful ones +AT_CHECK([grep '^Underfull ' pspp.log], [1]) + +test -z "$at_arg_xdvi" || AT_CHECK([$at_arg_xdvi -s 3 pspp.dvi], [0], [ignore]) + +AT_CLEANUP + + + +AT_SETUP([tex utf8-strings]) + +mkdir sample-texts + +dnl The following UTF-8 texts are a collection of snippets harvested +dnl from various Wikipedia sites. Wikipedia articles are +dnl copyright and licenced under the GFDL (unversioned) +AT_DATA([sample-texts/en.txt], [[The +Battle of Artaza (Acción de Artaza) was +a battle that occurred on April 20–22, 1835 during the First +Carlist War. + +Jerónimo Valdés, at the time Minister of War, arrived at the area known as +Las Amescoas with 22,000 men with the intention of +definitively destroying the Carlist forces.[1] Zumalacárregui had 5,000 men, +but had the advantage of knowing the terrain and practicing guerrilla tactics. +]]) + +AT_DATA([sample-texts/pl.txt], [[The Giszowiec składa się z budynków +wielorodzinnych z wielkiej płyty oraz +zabytkowych domów, przeważnie jedno- i dwurodzinnych, stanowiących +niegdyś zwarte osiedle górnicze, które do dziś jest unikatowe w skali +europejskiej i znajduje się na Szlaku Zabytków Techniki Województwa +Śląskiego. Zostało ono wybudowane dla górników pracujących w kopalni +Giesche (późniejszej kopalni Wieczorek) przez koncern Georg von +Giesches Erben na zlecenie dyrektora spółki Antona Uthemanna w latach +1907–1910. Projektantami osiedla byli Zillmannowie: Georg i +go dwa szlaki komunikacyjne: droga krajowa nr 86 oraz autostrada A4, a +także rozpoczyna swój bieg droga krajowa nr 81. Dzielnica ma +powierzchnię 12,03 km² (7,30% powierzchni miasta) i liczyła w 2007 +roku 18 475 mieszkańców (5,9% ludności Katowic). +]]) + +AT_DATA([sample-texts/el.txt], [[Η πόλη των Σερρών υπήρξε η πιο +οχυρωμένη πόλη της περιοχής λόγω της στρατηγικής γεωγραφικής της +θέσης. Στα βόρεια της πόλης των Σερρών, πάνω σε λόφο γνωστό με την +επωνυμία Κουλάς, βρίσκεται η αρχαία και βυζαντινή ακρόπολη. Η ακρόπολη +όπως σώζεται σήμερα αποτελεί έργο των βυζαντινών χρόνων, αλλά +θεμελιώθηκε πάνω σε αρχαίο φρούριο του 7ου και 6ου π. Χ. αιώνα. Η +κατασκευή της ακρόπολης χρονολογείται στον 9ο μ. Χ. αιώνα, πιθανότατα, +επί Αυτοκράτορα Νικηφόρου Φωκά και αναφέρεται σε πολλά βυζαντινά +χρυσόβουλα ως «κάστρο» με γνωστότερο καστροφύλακα τον Λέοντα τον +Αζανίτη. [1] Με την πάροδο των χρόνων ολόκληρη η πόλη χαρακτηριζόταν +ως «κάστρο» και οι Σερραίοι, «καστρινοί». +]]) + +AT_DATA([sample-texts/is.txt], [[Saloth Sar, betur þekktur sem Pol Pot, +var fæddur 19. maí 1925 og dó +15. apríl 1998. Hann var leiðtogi Rauðu khmeranna í Kambódíu frá 1963 +til 1979 og er þekktastur fyrir dauða óhemjumargs fólks í stjórnartíð +sinni, sem var frá 1975 til 1979. Rauðu khmerarnir reyndu að +framfylgja sýn sinni um eins konar samyrkjuvæðingu, en meðal þess sem +hún átti að fela í sér var að borgarbúar flyttu út í sveitir og ynnu +þar við landbúnað eða í betrunarvinnu. Þeir töldu sig geta byrjað +siðmenninguna upp á nýtt og tóku því upp tímatal sem átti að hefjast +með valdatíð þeirra. Sú valdatíð var ekki löng, en því +mannskæðari. Þrælkunarvinna, vannæring, hrun í heilbrigðiskerfinu og +beinar aftökur kostuðu á bilinu 750.000 - 1.700.000 manns lífið (sumir +segja á bilinu). +]]) + +AT_DATA([sample-texts/hr.txt], [[Nakon Å¡to je film zavrÅ¡en početkom +1984., autori su krenuli tražiti distributera. Odnijeli su film u Los +Angeles i pokazali ga velikim studijima. Svi su odbili izuzev Oriona +koji je prihvatio distribuciju, ali uz uvjet da ne bi niÅ¡ta platio +unaprijed, Å¡to je investitorima bilo neprihvatljivo. Joel Coen +napominje da se dogovorio sa agentom za prodaju prava u inozemstvu +kako bi se napokon film počeo prikazivati negdje, no to je bio +riskantan potez jer to ubija zanimanje domaćih distributera. +]]) + +AT_DATA([sample-texts/cz.txt], [[Volarský dům alpského typu (německy +Wallerer Haus) je na území České +republiky ojedinělý typ venkovského dřevěného domu, jenž se vyskytuje +v Å¡umavském městě Volary, které proto někdy bylo nazýváno „alpským +městečkem na Å umavě“. Mimo Volary se vyskytuje jeÅ¡tě v sousední obci +Dobrá a další jeden dům se nachází v Železné Rudě.[1] Další, do +současnosti nedochované domy, se nacházely v Českých Žlebech, Lenoře, +Hůrce a Vimperku. V Å¡irším okolí Å umavy (Chodsko, Klatovsko, Plánicko) +se vyskytují domy volarským stylem více nebo méně ovlivněné.[2] +]]) + +AT_DATA([sample-texts/de.txt], [[Urkundlich wurde Rehme 753 erstmals +in den fränkischen Reichsannalen erwähnt, als sich die fränkischen +Könige Pippin der Jüngere und sein Nachfolger Karl der Große auf +Feldzügen dorthin begaben.[2] Seit ca. 800 ist Rehme Sitz einer +Kirchengemeinde. Die romanische Laurentiuskirche stammt aus dem +12. Jahrhundert und wurde 1890–1892 im neoromanischen Stil erweitert. +]]) + +AT_DATA([sample-texts/ang.txt], [[Regn is Æ¿Ç£t æðmhēap, nā heard cynn +æðmhēapes sÆ¿ilce snāƿ, hægel, and Ä«sregn. Tō regnenne, hit is nȳdmicel +þæt þicce dÇ£l þæs lyfthelmes sÄ«e Æ¿earmra þonne his hāt meltunge, nēah +þǣm andÆ¿litan þǣre eorðan and ēac ofer him. Hē is sēo gæderung þæs +lyfthelmes æðmes on lyfte oþþæt hē is sƿā hefig þæt hē fealle, oft +full tō þǣre eorðan þǣm andÆ¿litan. TÅ« Æ¿eorc cunnon fyllan þā lyfte +oþþæt hire ƿæter beflōƿe: sēo acēlung þǣre lyfte and sēo settung +ƿæteres æðmes on þā lyfte. +]]) + +AT_DATA([sample-texts/sv.txt], [[Årsmedeltemperaturen i trakten är 18 +°C. Den varmaste mÃ¥naden är januari, dÃ¥ medeltemperaturen är 20 °C, +och den kallaste är juni, med 16 °C.[5] Genomsnittlig Ã¥rsnederbörd är +2 874 millimeter. Den regnigaste mÃ¥naden är februari, med i genomsnitt +421 mm nederbörd, och den torraste är augusti, med 79 mm nederbörd.[6] +]]) + + +AT_CHECK([tex-strings -o pspp.tex sample-texts/*], [0], [ignore], [ignore]) + +AT_SKIP_IF([test "$TEX" = no]) +AT_CHECK([$TEX --halt-on-error pspp.tex], [0], [ignore]) + +test -z "$at_arg_xdvi" || AT_CHECK([$at_arg_xdvi -s 3 pspp.dvi], [0], [ignore]) + +AT_CLEANUP + + + +AT_SETUP([tex simple example]) + +AT_DATA([simple.sps], [dnl +data list list /name (a16) score *. +begin data +alfred 34.0 +bertram 12.2 +charlie 12.1 +david 54.1 +end data. + +descriptives /score. +]) + + +AT_CHECK([pspp -O format=tex simple.sps], [0], []) +AT_CHECK([test $(wc -L pspp.tex | awk '{print $1}') -le 80], [0], [ignore]) + +AT_SKIP_IF([test "$TEX" = no]) +AT_CHECK([$TEX --halt-on-error pspp.tex], [0], [ignore]) + +dnl There should be no overfull boxes +AT_CHECK([grep '^Overfull ' pspp.log], [1]) +dnl nor any underful ones +AT_CHECK([grep '^Underfull ' pspp.log], [1]) + +test -z "$at_arg_xdvi" || AT_CHECK([$at_arg_xdvi -s 3 pspp.dvi], [0], [ignore]) + +AT_CLEANUP + + + +AT_SETUP([tex another example]) + +AT_CHECK([pspp -O format=tex $abs_top_srcdir/examples/regress.sps], [0], []) +AT_CHECK([test $(wc -L pspp.tex | awk '{print $1}') -le 80], [0], [ignore]) + +AT_SKIP_IF([test "$TEX" = no]) +AT_CHECK([$TEX --halt-on-error pspp.tex], [0], [ignore]) + +test -z "$at_arg_xdvi" || AT_CHECK([$at_arg_xdvi -s 3 pspp.dvi], [0], [ignore]) + +AT_CLEANUP + + +dnl This example emits a table footnote +AT_SETUP([tex table footnotes]) + +AT_DATA([footnote.sps], [dnl +set format = F11.3. +data list notable list /foo * bar * wiz * bang *. +begin data. +1 0 3 1 +3 9 -50 5 +3 4 3 203 +4 -9 0 -4 +98 78 104 2 +3 50 -49 200 +. 4 4 4 +5 3 0 . +end data. + +correlations + variables = foo bar wiz bang + /print nosig + . +]) + +AT_CHECK([pspp -O format=tex footnote.sps], [0], []) +AT_CHECK([test $(wc -L pspp.tex | awk '{print $1}') -le 80], [0], [ignore]) + +AT_SKIP_IF([test "$TEX" = no]) +AT_CHECK([$TEX --halt-on-error pspp.tex], [0], [ignore]) + +dnl There should be no overfull boxes +AT_CHECK([grep '^Overfull ' pspp.log], [1]) +dnl nor any underful ones +AT_CHECK([grep '^Underfull ' pspp.log], [1]) + +test -z "$at_arg_xdvi" || AT_CHECK([$at_arg_xdvi -s 3 pspp.dvi], [0], [ignore]) + + +AT_CLEANUP + + +dnl An example with non-ascii characters +AT_SETUP([tex non-ascii]) + +AT_DATA([utf8.sps], [dnl +set format = F11.3. +data list notable list /items (a16). +begin data. +Äpfeln +Öl +Üter +Tschüß +école +côte +français +þekktastur +landbúnað +16°C +powierzchnię +12.03km² +end data. + +frequencies items. +]) + +AT_CHECK([LC_ALL=C.UTF-8 pspp -O format=tex utf8.sps], [0], []) +AT_CHECK([test $(wc -L pspp.tex | awk '{print $1}') -le 80], [0], [ignore]) + +dnl The string "??" should not be present in pspp.tex +AT_CHECK([grep -F '??' pspp.tex], [1]) + +AT_SKIP_IF([test "$TEX" = no]) +AT_CHECK([$TEX --halt-on-error pspp.tex], [0], [ignore]) + +dnl There should be no overfull boxes +AT_CHECK([grep '^Overfull ' pspp.log], [1]) +dnl nor any underful ones +AT_CHECK([grep '^Underfull ' pspp.log], [1]) + +test -z "$at_arg_xdvi" || AT_CHECK([$at_arg_xdvi -s 3 pspp.dvi], [0], [ignore]) + +AT_CLEANUP + + +dnl This example emits a table which is rather more complicated +dnl It has cells which span multiple columns +AT_SETUP([tex multispan]) + +AT_DATA([t-test.sps], [dnl +set format = F11.3. +data list notable list /score * group (a20). +begin data. +1 this +2 this +3 this +4 this +5 that +6 that +7 that +8 that +end data. + +t-test /variables=score /group=group("this", "that"). +]) + +AT_CHECK([pspp -O format=tex t-test.sps], [0], []) +AT_CHECK([test $(wc -L pspp.tex | awk '{print $1}') -le 80], [0], [ignore]) + +AT_SKIP_IF([test "$TEX" = no]) +AT_CHECK([$TEX --halt-on-error pspp.tex], [0], [ignore]) + +dnl There should be no overfull boxes +AT_CHECK([grep '^Overfull ' pspp.log], [1]) +dnl nor any underful ones +AT_CHECK([grep '^Underfull ' pspp.log], [1]) + +test -z "$at_arg_xdvi" || AT_CHECK([$at_arg_xdvi -s 3 pspp.dvi], [0], [ignore]) + + +AT_CLEANUP +