/[armedbear]/public_html/toctool.py
ViewVC logotype

Contents of /public_html/toctool.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 12008 - (show annotations)
Sun Jun 7 21:25:48 2009 UTC (4 years, 10 months ago) by ehuelsmann
File MIME type: text/x-python
File size: 6932 byte(s)
Send in the new site.
1 #!/usr/bin/env python
2
3 """\
4 This tool regenerates and replaces the ToC in an HTML file from the actual
5 structure of <div>s and <h[2345]>s present in the body of the document.
6 The section to be overwritten is identified as the XML subtree
7 rooted at <ol id="toc">.
8
9 Usage: ./toctool.py filename...
10
11 This file is a copy of r37798 from the Subversion repository.
12
13 """
14
15 import sys
16 import os
17 import xml.parsers.expat
18
19
20 class Index:
21 def __init__(self):
22 self.title = None
23 self.tree = []
24 self._ptr_stack = [self.tree]
25
26 def addLevel(self, id, title):
27 newlevel = [(id, title)]
28 self._ptr_stack[-1].append(newlevel)
29 self._ptr_stack.append(newlevel)
30
31 def upLevel(self):
32 self._ptr_stack.pop(-1)
33
34 def prettyString(self):
35 out = []
36 def step(ilevel, node):
37 if isinstance(node, list):
38 for subnode in node:
39 step(ilevel+1, subnode)
40 else:
41 out.append("%s%s" % (" "*ilevel, node))
42 step(-2, self.tree)
43 return "\n".join(out)
44
45 def renderXML(self):
46 out = []
47 def step(ilevel, node):
48 if len(node) == 1:
49 out.append('%s<li><a href="#%s">%s</a></li>'
50 % (' '*ilevel, node[0][0], node[0][1]))
51 else:
52 out.append('%s<li><a href="#%s">%s</a>'
53 % (' '*ilevel, node[0][0], node[0][1]))
54 out.append('%s<ol>' % (' '*ilevel))
55 for subnode in node[1:]:
56 step(ilevel+1, subnode)
57 out.append('%s</ol>' % (' '*ilevel))
58 out.append('%s</li> <!-- %s -->' % (' '*ilevel, node[0][0]))
59 out.append('<ol id="toc">')
60 for node in self.tree:
61 step(1, node)
62 out.append('</ol>')
63 return "\n".join(out)
64
65
66 class ExpatParseJob:
67 def parse(self, file):
68 p = xml.parsers.expat.ParserCreate()
69 p.ordered_attributes = self._ordered_attributes
70 p.returns_unicode = False
71 p.specified_attributes = True
72 for name in dir(self):
73 if name.endswith('Handler'):
74 setattr(p, name, getattr(self, name))
75 p.ParseFile(file)
76
77
78 class IndexBuildParse(ExpatParseJob):
79 keys = {'h2':None, 'h3':None, 'h4':None, 'h5':None}
80
81 def __init__(self):
82 self.index = Index()
83 self.keyptr = 0
84 self.collecting_text = False
85 self.text = ''
86 self.waiting_for_elt = None
87 self.saved_id = None
88 self.elt_stack = []
89 self._ordered_attributes = False
90
91 def StartElementHandler(self, name, attrs):
92 if name == 'div':
93 cl = attrs.get('class')
94 if cl in self.keys:
95 self.waiting_for_elt = cl
96 self.saved_id = attrs.get('id')
97 self.elt_stack.append((name, True))
98 return
99 elif name == 'title':
100 self.collecting_text = name
101 self.text = ''
102 elif name == self.waiting_for_elt:
103 self.waiting_for_elt = None
104 self.collecting_text = name
105 self.text = ''
106 self.elt_stack.append((name, False))
107
108 def EndElementHandler(self, name):
109 if self.collecting_text:
110 if name == self.collecting_text:
111 if name == 'title':
112 self.index.title = self.text
113 else:
114 self.index.addLevel(self.saved_id, self.text)
115 self.saved_id = None
116 self.collecting_text = False
117 else:
118 raise RuntimeError('foo')
119 eltinfo = self.elt_stack.pop(-1)
120 assert eltinfo[0] == name
121 if eltinfo[1]:
122 self.index.upLevel()
123
124 def DefaultHandler(self, data) :
125 if self.collecting_text:
126 self.text += data
127
128
129 def attrlist_to_dict(l):
130 d = {}
131 for i in range(0, len(l), 2):
132 d[l[i]] = l[i+1]
133 return d
134
135
136 def escape_entities(s):
137 return s.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
138
139
140 class IndexInsertParse(ExpatParseJob):
141 def __init__(self, index, outfp):
142 self._ordered_attributes = True
143 self.index = index
144 self.outfp = outfp
145 self.elt_stack = []
146 self.skipping_toc = False
147
148 self._line_in_progress = []
149 self._element_open = None
150 self.linepos = 0
151 self.indentpos = 0
152
153 self.do_not_minimize = {'script':None}
154 self.do_not_indent = {'div':None, 'a':None, 'strong':None, 'em':None}
155 self.do_not_wrap = {'div':None, 'strong':None, 'em':None, 'li':None}
156
157 if self.index.title == 'Subversion Design':
158 self.do_not_wrap['a'] = None
159
160 def put_token(self, token, tag_name):
161 self._line_in_progress.append((token, tag_name))
162
163 def done_line(self):
164 linepos = 0
165 last_was_tag = False
166 outq = []
167 for token, tag_name in self._line_in_progress:
168 is_tag = tag_name is not None and tag_name not in self.do_not_wrap
169 no_indent_if_wrap = tag_name in self.do_not_indent
170 linepos += len(token)
171 if linepos > 79 and is_tag and last_was_tag:
172 token = token.lstrip(' ')
173 if no_indent_if_wrap:
174 linepos = len(token)
175 outq.append('\n')
176 else:
177 linepos = len(token) + 2
178 outq.append('\n ')
179 outq.append(token)
180 last_was_tag = is_tag
181 outq.append('\n')
182 for i in outq:
183 self.outfp.write(i)
184 del self._line_in_progress[:]
185
186 def _finish_pending(self, minimized_form):
187 if self._element_open is not None:
188 name = self._element_open
189 self._element_open = None
190 if minimized_form:
191 self.put_token(' />', name)
192 return True
193 else:
194 self.put_token('>', name)
195 return False
196
197 def StartElementHandler(self, name, attrs):
198 self._finish_pending(False)
199 if name == 'ol' and attrlist_to_dict(attrs).get('id') == 'toc':
200 self.outfp.write(self.index.renderXML())
201 self.skipping_toc = True
202 self.elt_stack.append((name, True))
203 return
204 if not self.skipping_toc:
205 self.put_token("<%s" % name, name)
206 while attrs:
207 aname = attrs.pop(0)
208 aval = escape_entities(attrs.pop(0))
209 self.put_token(' %s="%s"' % (aname, aval), name)
210 self._element_open = name
211 self.elt_stack.append((name, False))
212
213 def EndElementHandler(self, name):
214 if not self.skipping_toc:
215 if not self._finish_pending(name not in self.do_not_minimize):
216 self.put_token("</%s>" % name, name)
217 eltinfo = self.elt_stack.pop(-1)
218 assert eltinfo[0] == name
219 if eltinfo[1]:
220 self.skipping_toc = False
221
222 def DefaultHandler(self, data):
223 if self.skipping_toc:
224 return
225 self._finish_pending(False)
226 # This makes an unsafe assumption that expat will pass '\n' as individual
227 # characters to this function. Seems to work at the moment.
228 # Will almost certainly break later.
229 if data == '\n':
230 self.done_line()
231 else:
232 self.put_token(data, None)
233
234
235 def process(fn):
236 infp = open(fn, 'r')
237 builder = IndexBuildParse()
238 builder.parse(infp)
239
240 infp.seek(0)
241 outfp = open(fn + '.new', 'w')
242 inserter = IndexInsertParse(builder.index, outfp)
243 inserter.parse(infp)
244
245 infp.close()
246 outfp.close()
247 os.rename(fn, fn + '.toctool-backup~')
248 os.rename(fn + '.new', fn)
249
250
251 def main():
252 for fn in sys.argv[1:]:
253 process(fn)
254
255
256 if __name__ == '__main__':
257 main()

  ViewVC Help
Powered by ViewVC 1.1.5