#!/usr/bin/tclsh ## ## ## GOSH - a script by Norman Feske written in July'2003 ## ## ## ## This script converts plain ASCII text to a more eye-candy textual format ## ## such as tex. It was written with expandability in mind. Thus, it should ## ## be a childs play to add more backends. The only thing a child has to ## ## know about are regular expressions. ## ## ## ## This file is released under the terms of the GNU General Public Licence ## ## ## ############################# # # # LATEX BACKEND DEFINITIONS # # # ############################# ### FILTER TEXTUAL OUTPUT ### proc out_latex {string} { global references set string " $string " # italic style while {[regexp {([ \"\(])_(.+?)_([ \)\.\",!?\-])} $string dummy head_char emph_text tail_char]} { regsub -all {_} $emph_text " " emph_text regsub {([ \"\(])_(.+?)_([ \)\.\",!?\-])} $string "$head_char\\emph{$emph_text}$tail_char" string } # bold style while {[regexp {([ \"\(])\*(.+?)\*([ \)\.\",!?])} $string dummy head_char bf_text tail_char]} { regsub -all {\*} $bf_text " " bf_text regsub {([ \"\(])\*(.+?)\*([ \)\.\",!?])} $string "$head_char\\textbf{$bf_text}$tail_char" string } # monospace style while {[regexp {([ \"\(])\'(.+?)\'([ \-\)\.\"\',!?])} $string dummy head_char code_text tail_char]} { regsub {([ \"\(])\'(.+?)\'([ \-\)\.\"\',!?])} $string "$head_char\\texttt{$code_text}$tail_char" string } # FIXME: kick out monospace style via hashes while {[regexp {\#([^#]+)\#} $string dummy code_text]} { regsub {\#([^#]+)\#} $string "\\texttt{$code_text}" string puts stderr "Warning: Monospace using #hashes# is deprecated because it looks ugly." puts stderr " Please use 'apostrophes' instead. Thanks, your GOSH maintainer." } # hexadecimal numbers # regsub -all {0x(([a-fA-F0-9]+)[\+\-\*\/]?(0x)?)+} $string "\\texttt{&}" string # insert references and citations while {[regexp {\[([^\]]+)\]} $string dummy ref_text]} { if {[info exists references($ref_text,type)]} { regsub {\[([^\]]+)\]} $string "\\ref{[label_latex $ref_text]}" string } else { if {[regexp {^http://} $ref_text dummy]} { set url "" set linktext "" set tooltip "" regexp {^(\w+:[^ ]+)} $ref_text url regsub -all {&} $url "_§%and%§_" url regsub {\[([^\]]+)\]} $string "\\texttt{$url}" string } elseif {[regexp {^\.\.\.$} $ref_text dummy]} { regsub {\[([^\]]+)\]} $string "_citation_gap_" string } else { regsub -all {_} $ref_text {adamndunderlineshite!} ref_text regsub {\[([^\]]+)\]} $string "\\cite{$ref_text}" string } } } regsub -all {_§%and%§_} $string {\&} string regsub -all {_citation_gap_} $string "\[\\ldots\{\}\]" string regsub -all {"([\w\\])} $string "``\\1" string regsub -all {([\.\?\!\w\}])"} $string "\\1''" string regsub -all {\^} $string "\\^{ }" string regsub -all {_} $string "\\_" string regsub -all {#} $string "\\#" string regsub -all {%} $string "\\%" string regsub -all {\$} $string "\\$" string regsub -all {&} $string {\\&} string regsub -all {^ *} $string "" string regsub -all { *$} $string "" string regsub -all {~} $string {\\textasciitilde{}} string regsub -all {µ} $string "\$\\mu\$" string regsub -all {<->} $string "\$\\leftrightarrow\$" string regsub -all -- {->} $string "\$\\rightarrow\$" string regsub -all {<-} $string "\$\\leftarrow\$" string regsub -all {<=>} $string "\$\\Leftrightarrow\$" string regsub -all {=>} $string "\$\\Rightarrow\$" string regsub -all {<=} $string "\$\\Leftarrow\$" string regsub -all {<} $string "\\mbox{\$<\$}" string regsub -all {>} $string "\\mbox{\$>\$}" string regsub -all {e\.g\.} $string "e.\\,g." string regsub -all {i\.e\.} $string "i.\\,e." string regsub -all {adamndunderlineshite!} $string "_" string set priv_function out_latex_private if {[info procs $priv_function] == $priv_function} { set string [eval "$priv_function [list $string]"] } return $string } proc print {string} { puts -nonewline $string } proc printline {string} { global config_indent if {$config_indent} { set string "[indent]$string" } regsub {^ *$} $string "" string puts $string } ### FILTER LABEL ### proc label_latex {string} { regsub -all {ä} $string "ae" string regsub -all {ö} $string "oe" string regsub -all {ü} $string "ue" string regsub -all {Ä} $string "Ae" string regsub -all {Ö} $string "Oe" string regsub -all {Ü} $string "Ue" string regsub -all {ß} $string "ss" string regsub -all {[^a-zA-Z0-9 ]} $string "" string return $string } ### WRITE HEADER OF TEX FILE ### proc produce_head_latex {} { global title authors printline {\documentclass[11pt,ngerman,a4paper,normalheadings,DIV14]{scrartcl}} printline {\usepackage[T1]{fontenc}} printline {\usepackage[latin1]{inputenc}} printline {\usepackage[small,bf,hang]{caption2}} printline {\usepackage[ngerman]{babel}} printline {\usepackage{epsfig}} printline {\usepackage{mathptmx}} printline {\usepackage{helvet}} printline {\usepackage{courier}} printline {\emergencystretch = 10pt} printline {\clubpenalty = 10000} printline {\widowpenalty = 10000} printline {\displaywidowpenalty = 10000} printline {\usepackage{amsmath}} printline {\usepackage{amssymb}} printline {\begin{document}} if {$title != ""} { printline "\\title{[out_latex $title]}" if {$authors != ""} { printline "\\author{[out_latex $authors]}" } printline {\maketitle} } } ### WRITE TAIL OF TEX FILE ### proc produce_tail_latex {} { printline "\\newpage" printline "\\bibliographystyle{plain}" printline "\\bibliography{custom,master}" printline "\\end{document}" } ### ANNOTATION ### proc process_annotation_latex {txtblock} { set new_txtblock {} foreach txtline $txtblock { set txtline [lineregsub {^\| ?} $txtline ""] lappend new_txtblock $txtline } printline "{ \\footnotesize \\it" handle_txtblock annotation $new_txtblock printline "}" } ### VERBATIM ### proc process_verbatim_latex {txtblock} { while {[lindex $txtblock end] == ""} { set txtblock [lrange $txtblock 0 [expr [llength $txtblock]-2]] } puts "\\begin{verbatim}" foreach txtline $txtblock { set txt [linetxt $txtline] regsub {^\!} $txt "" txt regsub -all {\t} $txt " " txt puts "$txt" } puts "\\end{verbatim}" } ### ITEMIZE ### proc process_itemize_latex {txtblock} { printline "\\begin{itemize}" handle_txtblock itemize $txtblock printline "\\end{itemize}" } ### UTILITY: EXTRACT CONTENT OF AN ITEM ### proc extract_item_text {itemtxtblock} { set txtline [lindex $itemtxtblock 0] set txtline [lineregsub {^\*\ } $txtline ""] set txtline [lineregsub {^\#\ } $txtline ""] lappend txtblock $txtline foreach txtline [lrange $itemtxtblock 1 end] { set txtline [lineregsub {^\ \ } $txtline ""] lappend txtblock $txtline } return $txtblock } ### ITEM ### proc process_item_latex {itemtxtblock} { printline "\\item" handle_txtblock item [extract_item_text $itemtxtblock] } ### DESCRIPTION ### proc process_description_latex {txtblock} { printline "\\begin{description}" handle_txtblock description $txtblock printline "\\end{description}" } ### DESCRIPTION ITEM ### proc process_descitem_latex {itemtxtblock} { set txtline [lindex $itemtxtblock 0] set desc_name "" regexp {^\:([^\:]+)\:} [linetxt $txtline] dummy desc_name set txtline [lineregsub {^\:([^\:]+)\: *} $txtline ""] printline "\\item\[[out_latex $desc_name]\]" if {[linetxt $txtline] == ""} { set txtline [lineset $txtline "\\mbox{}[linetxt $txtline]"] } lappend txtblock $txtline foreach txtline [lrange $itemtxtblock 1 end] { lappend txtblock [lineregsub {^\ \ } $txtline ""] } handle_txtblock descitem $txtblock } ### ENUMERATION ### proc process_enumeration_latex {txtblock} { printline "\\begin{enumerate}" handle_txtblock enumeration $txtblock printline "\\end{enumerate}" } ### ENUM ITEM ### proc process_enum_latex {itemtxtblock} { process_item_latex $itemtxtblock } ### PLAIN ### proc process_plain_latex {plaintxtblock} { foreach txtline $plaintxtblock { printline [out_latex [linetxt $txtline]] } } ### EMPTY ### proc process_empty_latex {emptytxtblock} { foreach txtline $emptytxtblock { printline "[linetxt $txtline]" } } ### ABSTRACT ### proc process_abstract_latex {txtblock} { set title [linetxt [lindex $txtblock 0]] printline "" printline "% -+*|\[ [string toupper $title] \]|*+-\n" printline "\\begin{abstract} \\label{$title}" handle_txtblock abstract [lrange $txtblock 2 end] printline "\\end{abstract}" } ### GENERATE SECTION ENVIRONMENT ### # # A section can be excluded from the table of contents # by prefixing its title with a '*'. This function # returns the proper section environment and a # corresponding label for cross-referencing. # proc section_env_latex {title} { set out "" if {[regexp {^\*(.*)$} $title dummy title]} { append out "*" } append out "{[out_latex $title]} \\label{[label_latex $title]}" return $out } ### CHAPTER ### proc process_chapter_latex {txtblock} { set title [linetxt [lindex $txtblock 0]] printline "" printline "% -+*|\[ [string toupper $title] \]|*+-\n" printline "\\section[section_env_latex $title]" handle_txtblock chapter [lrange $txtblock 2 end] } ### SECTION ### proc process_section_latex {txtblock} { set title [linetxt [lindex $txtblock 0]] printline "" printline "% -+*|\[ [string toupper $title] \]|*+-\n" printline "\\subsection[section_env_latex $title]" handle_txtblock section [lrange $txtblock 2 end] } ### SUBSECTION ### proc process_subsection_latex {txtblock} { set title [linetxt [lindex $txtblock 0]] printline "" printline "% -+*|\[ [string toupper $title] \]|*+-\n" printline "\\subsubsection[section_env_latex $title]" handle_txtblock subsection [lrange $txtblock 2 end] } ### PARAGRAPH ### proc process_paragraph_latex {txtblock} { set title [linetxt [lindex $txtblock 0]] printline "" printline "% -+*|\[ [string toupper $title] \]|*+-\n" printline "\\paragraph{[out_latex $title]}" handle_txtblock paragraph [lrange $txtblock 2 end] } ### IMAGE ### proc process_image_latex {txtblock} { set img_info "" set img_size 80 set img_angle "0" set img_star "" set img_relw "columnwidth" regexp {\[(image \w+.*)\]} [lindex $txtblock 0] dummy img_info if {$img_info == ""} return set img_name [lindex $img_info 1] regexp { (\d+)%} $img_info dummy img_size regexp { (\d+)°} $img_info dummy img_angle if {[regexp {full-span} $img_info dummy]} { set img_star "*" set img_relw "textwidth" } set img_cap "" foreach img_capline $txtblock { set txt [linetxt $img_capline] regsub {^\[.*\]} $txt "" txt regsub {^ *} $txt "" txt append img_cap $txt " " } regsub { *$} $img_cap "" img_cap printline "" printline "\\begin{figure$img_star}\[tbp\]\n[indent] \\begin{center}" printline " \\epsfig{file=$img_name,angle=$img_angle,width=[expr $img_size.0/100]\\$img_relw}" printline " \\caption{[out_latex $img_cap]}" printline " \\label{[label_latex $img_name]}" printline " \\end{center}\n[indent]\\end{figure$img_star}\n" } ### TABLE ### proc output_table_latex {colattr rows caption} { global config_tex_table_floating set tabenv "table" # determine additional table attributes that are specified after the table lable if {[regexp {\[table +[^ ]+ +([^\]]+)} [linetxt [lindex $caption 0]] dummy attr]} { if {[regexp {full-span} $attr]} { set tabenv "table*" } } if {$config_tex_table_floating} { printline "\\begin\{$tabenv\}\[ht\]" } else { printline "\\begin\{$tabenv\}\[ht!]" } printline "\{\\center" print "[indent] \\begin\{tabular\}\{|" foreach attr $colattr { if {$attr == "left"} { print "l|" } else { print "r|" } } print "\}" printline "" printline " \\hline" set firstrow 1 foreach row $rows { set rowtype [lindex $row 0] set rowlines [lindex $row 1] if {$rowtype == "tabrow"} { foreach rowline $rowlines { set rowlinetxt [linetxt $rowline] set rowlinetxt_list [split $rowlinetxt "|"] set out_txt "" set idx 0 foreach txt $rowlinetxt_list { append out_txt [out_latex $txt] if {$idx < [expr [llength $rowlinetxt_list] - 1]} { append out_txt " & " } incr idx } printline " $out_txt \\\\" } } if {$rowtype == "tabhline"} { if {$firstrow} { printline " \\hline" set firstrow 0 } printline " \\hline" } } printline " \\hline" printline " \\end\{tabular\}" set cap "" foreach capline $caption { set txt [linetxt $capline] regsub {^ +} $txt " " txt append cap $txt } if {[regexp {^\[table ([^\]]+)\](.*)$} $cap dummy caplab captxt]} { regsub {^ +} $captxt "" captxt printline " \\caption\{[out_latex $captxt]\}" printline " \\label\{[label_latex $caplab]\}" } printline "\}" printline "\\end\{$tabenv\}" } # # Process command line arguments for the Latex backend # set config_tex_table_floating [regexp {\--tex-table-floating} $argv dummy] ############################## # # # DOCUMENT STRUCTURE BACKEND # # # ############################## proc process_header_struct {txtblock} { printline "HEADER" foreach txtline $txtblock { printline " $txtline" } } proc process_verbatim_struct {txtblock} { printline "VERBATIM" } proc process_itemize_struct {txtblock} { printline "ITEMIZE" handle_txtblock itemize $txtblock } proc process_description_struct {txtblock} { if {[regexp {^\:([^\:]+)\:} [lindex $txtblock 0] dummy identifier]} { printline "DESCRIPTION $identifier" } } proc process_item_struct {itemtxtblock} { printline "ITEM" set txtline [lindex $itemtxtblock 0] regsub {^\*\ } $txtline "" txtline lappend txtblock $txtline foreach txtline [lrange $itemtxtblock 1 end] { regsub {^\ \ } $txtline "" txtline lappend txtblock $txtline } handle_txtblock item $txtblock } proc process_enumeration_struct {txtblock} { printline "ENUMERATION" handle_txtblock enumeration $txtblock } proc process_enum_struct {itemtxtblock} { printline "ENUMERATION ITEM" set txtline [lindex $itemtxtblock 0] regsub {^\#\ } $txtline "" txtline lappend txtblock $txtline foreach txtline [lrange $itemtxtblock 1 end] { regsub {^\ \ } $txtline "" txtline lappend txtblock $txtline } handle_txtblock enum $txtblock } proc process_plain_struct {txtblock} { printline "PLAIN" foreach txtline $txtblock { printline " $txtline" } } proc process_abstract_struct {txtblock} { set title [lindex $txtblock 0] printline "ABSTRACT \"$title\"" handle_txtblock abstract [lrange $txtblock 2 end] } proc process_bibliography_struct {txtblock} { set title [lindex $txtblock 0] printline "BIBLIOGRAPHY \"$title\"" handle_txtblock bibliography [lrange $txtblock 2 end] } proc process_bibitem_struct {txtblock} { printline "BIBITEM" foreach txtline $txtblock { printline " $txtline" } } proc process_chapter_struct {txtblock} { set title [lindex $txtblock 0] printline "CHAPTER \"$title\"" handle_txtblock chapter [lrange $txtblock 2 end] } proc process_section_struct {txtblock} { set title [lindex $txtblock 0] printline "SECTION \"$title\"" handle_txtblock section [lrange $txtblock 2 end] } proc process_subsection_struct {txtblock} { set title [lindex $txtblock 0] printline "SUBSECTION \"$title\"" handle_txtblock subsection [lrange $txtblock 2 end] } proc process_paragraph_struct {txtblock} { set title [lindex $txtblock 0] printline "PARAGRAPH \"$title\"" handle_txtblock paragraph [lrange $txtblock 2 end] } proc process_image_struct {txtblock} { printline "IMAGE" foreach txtline $txtblock { printline " $txtline" } } proc output_table_struct {head cells caption} { printline "TABLE" } proc process_table {txtblock} { global outmode tables # read table index from '[tabref..]' tag regexp {\[tabref ([0-9]+)} [linetxt [lindex $txtblock 0]] dummy table_idx set txtblock $tables($table_idx) set split_tab [style_split {tabmain tabcap empty undefined} $txtblock] set tabmain "" set cap "" foreach tabpart $split_tab { if {[lindex $tabpart 0] == "tabmain"} { append tabmain [lindex $tabpart 1] } if {[lindex $tabpart 0] == "tabcap"} { append cap [lindex $tabpart 1] } } set rows [style_split {tabrow tabhline undefined} $txtblock] # determine alignment of table colums based on the first # line of the table head set tabhead [linetxt [lindex [lindex [lindex $rows 0] 1] 0]] set headcells [split $tabhead "|"] set colattr {} foreach headcell $headcells { set align "left" if {[regexp {[^ ] $} $headcell]} { set align "right" } if {[regexp {^ [^ ]} $headcell]} { set align "left" } lappend colattr $align } set tabout_function "output_table_" append tabout_function $outmode if {[info procs $tabout_function] == $tabout_function} { eval "$tabout_function \$colattr \$rows \$cap" } } proc process_undefined {txtblock} { puts stderr "Error at line [linenum [lindex $txtblock 0]]: cannot figure out what you mean with" foreach txtline $txtblock { puts stderr " \"[linetxt $txtline]\"" } exit 1 } ### HEADER - FIND OUT ABOUT TITLE AND AUTHORS ### proc process_header {txtblock} { global title authors set block "" foreach txtline $txtblock { set txtline [lineregsub {^\ +} $txtline ""] # regsub {^\ +} $txtline "" txtline if {[linetxt $txtline] != ""} { set block [append block " " [linetxt $txtline]] } else { regsub {^\ +} $block "" block if {$block != ""} { if {$title == ""} { set title $block } else { set authors $block } } set block "" } } } ### RAW OUTPUT ### proc process_raw {txtblock} { set new_txtblock {} foreach txtline $txtblock { puts [linetxt [lineregsub {^\: ?} $txtline ""]] } } ############################## # # # TEXT STRUCTURE DEFINITIONS # # # ############################## ### HEADER ### set style_begin(header) {^\ +.+} set style_continue(header) {(^\ +.+)|(^\n)} ### EVERYTHING AFTER HEADER ### set style_begin(afterheader) {^[^\ ]} set style_continue(afterheader) {()} ### VERBATIM ### set style_begin(verbatim) {^\!} set style_continue(verbatim) {^\!} ### ANNOTATION ### set style_begin(annotation) {^\|} set style_continue(annotation) {^\|} set style_substyles(annotation) {itemize enumeration description verbatim plain} ### RAW ### set style_begin(raw) {(^\: )|(^\:$)} set style_continue(raw) {(^\: )|(^\:$)} ### ITEMIZE ### set style_begin(itemize) {^\*\ .+} set style_continue(itemize) {(^\ \ +[^\ ])|(^\n)|(^\*\ .+)} set style_contnext(itemize) {^.*$} set style_substyles(itemize) {item} ### ITEM ### set style_begin(item) {^\*\ .+} set style_continue(item) {(^\ \ .+)|(^\n)} set style_substyles(item) {annotation raw itemize enumeration description verbatim plain} ### DESCRIPTION ### set style_begin(description) {^\:[^\:]+\:} set style_continue(description) {(^\ \ +[^\ ])|(^\n)|(^\:[\w ]+\:)} #set style_contnext(description) {^[^|]*$} set style_substyles(description) {descitem} ### DESCRIPTION ITEM ### set style_begin(descitem) {^\:[^\:]+\:} set style_continue(descitem) {(^\ \ .+)|(^\n)} set style_substyles(descitem) {annotation raw itemize enumeration description verbatim plain} ### ENUMERATION ### set style_begin(enumeration) {^\#\ .+} set style_continue(enumeration) {(^\ \ +[^\ ])|(^\n)|(^\#\ .+)} #set style_contnext(enumeration) {^[^|]*$} set style_substyles(enumeration) {enum} ### ENUM ITEM ### set style_begin(enum) {^\#\ .+} set style_continue(enum) {(^\ \ .+)|(^\n)} set style_substyles(enum) {annotation raw itemize enumeration description verbatim plain} ### PLAIN ### set style_begin(plain) {^[^\ \n;].*} set style_continue(plain) {^[\w\('\"\[]} #set style_contnext(plain) {^[^-=~#]} ### ABSTRACT ### set style_begin(abstract) {^Abstract.*} set style_next(abstract) {^\#\#\#} set style_continue(abstract) {.*} set style_contnext(abstract) {(^[^(\#\#\#)])|(^\n)} set style_substyles(abstract) {itemize enumeration plain} ### BIBLIOGRAPHY ### set style_begin(bibliography) {^Bibliography} set style_next(bibliography) {^\#\#\#} set style_continue(bibliography) {.*} set style_contnext(bibliography) {(^[^(\#\#\#)])|(^\n)} set style_substyles(bibliography) {bibitem} ### BIBLIOGRAPHY ITEM ### set style_begin(bibitem) {^\[\w+]} set style_continue(bibitem) {^.+\n} ### CHAPTER ### set style_begin(chapter) $style_begin(plain) set style_next(chapter) {^\#{3}} set style_continue(chapter) {.*} set style_contnext(chapter) {^#{0,2}[^#]} set style_substyles(chapter) {section paragraph image table annotation raw itemize enumeration description verbatim plain} ### SECTION ### set style_begin(section) $style_begin(plain) set style_next(section) {^\={3}} set style_continue(section) {.*} set style_contnext(section) {^\={0,2}[^\=]} set style_substyles(section) {subsection paragraph image table annotation raw itemize enumeration description verbatim plain} ### SUBSECTION ### set style_begin(subsection) $style_begin(plain) set style_next(subsection) {^\~{3}} set style_continue(subsection) {.*} set style_contnext(subsection) {^\~{0,2}[^\~]} set style_substyles(subsection) {paragraph image table annotation raw itemize enumeration description verbatim plain} ### PARAGRAPH ### set style_begin(paragraph) $style_begin(plain) set style_next(paragraph) {^\-{3}} set style_continue(paragraph) {.*} set style_contnext(paragraph) {^\-{0,2}[^\-]} set style_substyles(paragraph) {image table annotation raw itemize enumeration description verbatim plain} ### IMAGE ### set style_begin(image) {^\[image .+\]} set style_continue(image) {(^ .+)|(^$)} ### TABLE ### set style_begin(table) {\[tabref[^\]]*\]} set style_continue(table) {^$} #set style_continue(table) {(^\n)|(^\[table[^\]]*\])|(^ .+\n)} #set style_begin(table) {\|} #set style_next(table) {\-\-\-} #set style_continue(table) {(\|)|(\-\-\-)|(^\n)|(^\[table.*\])|(^ .+\n)} ### TABLE CAPTION ### set style_begin(tabcap) {^\[table .+\]} set style_continue(tabcap) {^.+\n} ### MAIN PART OF TABLE ### set style_begin(tabmain) {\|} set style_next(tabmain) {\-\-\-} set style_continue(tabmain) {(\|)|(\-\-\-)} ### TABLE ROW ### set style_begin(tabrow) {\|} set style_continue(tabrow) {\|} #set style_next(tabrow) {\|} ### TABLE HLINE ### set style_begin(tabhline) {\-\-\-} set style_continue(tabhline) {^$} ### EMPTY ### set style_begin(empty) {^ *$} set style_continue(empty) {^ *$} ### UNDEFINED ### set style_begin(undefined) {.*} set style_continue(undefined) {\\{100}} ### DOCUMENT HEAD ### set style_substyles(documenthead) {header afterheader} ### DOCUMENT MAIN ### set style_substyles(documentmain) {header abstract bibliography chapter paragraph image table annotation raw itemize enumeration description verbatim plain} #################### # # # PARSER FUNCTIONS # # # #################### ### DETERMINE STYLE OF SPECIFIED LINE ### proc get_style {styles txtline next_txtline} { global style_begin style_next foreach style $styles { if {[regexp $style_begin($style) [linetxt $txtline]]} { if {![info exists style_next($style)]} { return $style } if {[regexp $style_next($style) [linetxt $next_txtline]]} { return $style } } } return nostyle } ### DETERMINE IF THE GIVEN STYLE IS STILL VALID ### proc style_continues {style txtline next_txtline} { global style_continue style_contnext if {[regexp $style_continue($style) "[linetxt $txtline]\n"]} { if {![info exists style_contnext($style)]} { return 1 } if {[regexp $style_contnext($style) "[linetxt $next_txtline]\n"]} { return 1 } } return 0 } ### SPLIT A TEXTUAL BLOCK INTO A LIST OF DIFFERENT STYLES ### proc style_split {styles txtblock} { set style_block_list {} set i 0 set txtlen [llength $txtblock] set curr_txtline [lindex $txtblock 0] set next_txtline [lindex $txtblock 1] while {$i < $txtlen} { set style [get_style $styles $curr_txtline $next_txtline] # read current style block until its end set style_block [list $curr_txtline] incr i set curr_txtline [lindex $txtblock $i] set next_txtline [lindex $txtblock [expr $i+1]] while {(([style_continues $style $curr_txtline $next_txtline]) & ($i < $txtlen))} { lappend style_block $curr_txtline incr i set curr_txtline [lindex $txtblock $i] set next_txtline [lindex $txtblock [expr $i+1]] } lappend style_block_list [list $style $style_block] } return $style_block_list } ### APPLY THE GIVEN STYLES TO THE SPECIFIED TEXT BLOCK ### proc handle_txtblock {txtstyle txtcontent} { global style_process style_substyles outmode depth set styles $style_substyles($txtstyle) lappend styles empty undefined set style_blocks [style_split $styles $txtcontent] foreach style_block $style_blocks { set style [lindex $style_block 0] set content [lindex $style_block 1] set process_function [append process_ $style _ $outmode] incr depth set spec_function process_ append spec_function $style set gen_function $spec_function append spec_function _ $outmode if {[info procs $spec_function] == $spec_function} { eval "$spec_function [list $content]" } elseif {[info procs $gen_function] == $gen_function} { eval "$gen_function [list $content]" } incr depth -1 } } ### RETURN LINE NUMBER OF SPECIFIED LINE ### proc linenum {line} { return [lindex $line 0] } ### RETURN TEXT STRING OF SPECIFIED LINE ### proc linetxt {line} { return [lindex $line 1] } ### APPLY REGEXP SUBSTITUTION TO A LINE ### proc lineregsub {pattern txtline replacement} { set txt [linetxt $txtline] regsub $pattern $txt $replacement txt return [list [linenum $txtline] $txt] } ### ASSIGN NEW TEST TO THE SPECIFIED LINE ### proc lineset {txtline newtxt} { return [list [lindex $txtline 0] $newtxt] } ### RETURN A STRING WITH CURRENT NUMBER OF INDENTATION BLANKS ### proc indent {} { global depth set result "" for {set i 0} {$i<$depth} {incr i} { append result " " } return $result } ### FIND OUT ABOUT REFERENCES PROVIDED BY THE DOCUMENT ### proc collect_references {txtblock} { global references toc_refs foreach var {chapter_cnt section_cnt subsection_cnt image_cnt table_cnt} {set $var 0} set curr_chapter "" set curr_section "" set curr_line [lindex $txtblock 0] foreach next_line [lrange $txtblock 1 end] { set style [get_style [list chapter section subsection image tabcap] $curr_line $next_line] set txt [linetxt $curr_line] if {(($style == "chapter") || ($style == "section") || ($style == "subsection"))} { set references($txt,type) $style lappend toc_refs $txt if {$style == "chapter"} { incr chapter_cnt set curr_chapter $txt set section_cnt 0 set subsection_cnt 0 set references($txt,index) $chapter_cnt } elseif {$style == "section"} { incr section_cnt set curr_section $txt set subsection_cnt 0 set references($txt,chapter) $curr_chapter set references($txt,index) $section_cnt } elseif {$style == "subsection"} { incr subsection_cnt set references($txt,section) $curr_section set references($txt,index) $subsection_cnt } } elseif {$style == "image"} { if {[regexp {\[(image \w+.*)\]} $txt dummy img_info]} { incr image_cnt set img_name [lindex $img_info 1] set references($img_name,type) $style set references($img_name,index) $image_cnt } } elseif {$style == "tabcap"} { if {[regexp {\[(table \w+.*)\]} $txt dummy tab_info]} { incr table_cnt set tab_name [lindex $tab_info 1] set references($tab_name,type) $style set references($tab_name,index) $table_cnt } } set curr_line $next_line } } ### DETECT PRESENCE OF A TABLE AT SPECIFIED LINE INDEX ### # # Returns number of text lines that make up the table. # A number of less than 3 indicates an invalid table. # proc detect_table {start_idx max_idx} { global txtcontent # first line must start with a space and must contain at least one pipe symbol set line [linetxt [lindex $txtcontent $start_idx]] if {![regexp { .*[|]} $line]} { return 0 } # further lines must contain either '---' or pipes set idx [expr $start_idx + 1] set valid 0 while {($idx < $max_idx) && [regexp {(---)|([|])} [linetxt [lindex $txtcontent $idx]]]} { # a valid table must feature at least one horizontal separator if {[regexp -- {---} [linetxt [lindex $txtcontent $idx]]]} { set valid 1 } incr idx } if {$valid == 0} { return 0 } # right after the table, we accept empty lines while {($idx < $max_idx) && [regexp {^ *$} [linetxt [lindex $txtcontent $idx]]]} { incr idx } # Detect caption of the table. # The caption begins with a '[table' tag and is followed # by any number of lines that are indented by two spaces. if {[regexp {^\[table} [linetxt [lindex $txtcontent $idx]]]} { incr idx while {[regexp {^ [^ ]} [linetxt [lindex $txtcontent $idx]]]} { incr idx } } # eat empty lines after caption while {($idx < $max_idx) && [regexp {^ *$} [linetxt [lindex $txtcontent $idx]]]} { incr idx } set table_len [expr $idx - $start_idx] return $table_len } ### EXTRACT TABLES FROM TEXT ### # # Each table gets inserted into the 'tables' array. # The corresponding array index is kept within the # text as attribute of the table label. # proc extract_tables {} { global txtcontent tables num_tables set num_tables 0 set max_idx [llength $txtcontent] for {set idx 0} {$idx < $max_idx} {incr idx} { set table_len [detect_table $idx $max_idx] # a valid table consumes at least 3 lines of text if {$table_len >= 3} { incr num_tables set table_end [expr $idx + $table_len - 1] # store text lines of the table in 'tables' array set tables($num_tables) [lrange $txtcontent $idx $table_end] # replace table in text by a reference token pointing to the # corresponding 'tables' index set txtcontent [lreplace $txtcontent $idx $table_end [list "-1" "\[tabref $num_tables\]"]] } } } ################ # # # MAIN PROGRAM # # # ################ # assist the user a bit if {([llength $argv] == 0) || ([regexp {\--help} $argv])} { printline "Convert ASCII to Latex" printline " usage: gosh > " printline " gosh --style > " exit } # read text file and kick out any comments set txtfilename [lindex $argv end] set txtlines [split [exec cat $txtfilename] "\n"] set txtcontent {} set cnt 1 foreach txtline $txtlines { if {![regexp {^;} $txtline]} {lappend txtcontent [list $cnt $txtline]} incr cnt } set outmode latex set depth 0 set title "" set authors "" set toc_refs {} set config_indent 1 ### FIND FILE FOR SPECIFIED STYLE NAME ### proc get_style_file {style_name} { global argv0 if {![string match "*.gosh" $style_name]} { append style_name ".gosh" } set gosh_path $argv0 if {[file type $gosh_path] == "link"} { set gosh_path [file readlink $gosh_path] } if {![file exists $style_name]} { set try_file "[file dirname $gosh_path]/$style_name" if {[file exists $try_file]} { set style_name $try_file } } if {![file exists $style_name]} { puts stderr "Error: style file $style_name does not exist" exit 1 } return $style_name } # process arguments while {[regexp {\--style ([^ ]+)} $argv dummy style_name]} { catch { source [get_style_file $style_name] } regsub {\--style [^ ]+} $argv "" argv } # find out about internal references collect_references $txtcontent # parse text for tables extract_tables # find out about title and authors handle_txtblock documenthead $txtcontent proc chapter_has_section {chapter} { global toc_refs references foreach ref $toc_refs { if {[info exists references($ref,chapter)]} { if {"$references($ref,chapter)" == "$chapter"} { return 1 } } } return 0 } # generate output head set produce_function "produce_head_$outmode" if {[info procs $produce_function] == $produce_function} { eval $produce_function } # generate main output handle_txtblock documentmain $txtcontent # generate output tail set produce_function "produce_tail_$outmode" if {[info procs $produce_function] == $produce_function} { eval $produce_function }