Skip to content
This repository was archived by the owner on Feb 16, 2023. It is now read-only.

Commit 8522d62

Browse files
authored
Merge pull request #11 from ian-r-rose/parse_rendered_html
Parse rendered html
2 parents a0be47f + 7a6dca4 commit 8522d62

File tree

4 files changed

+130
-18
lines changed

4 files changed

+130
-18
lines changed

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
"dependencies": {
3333
"@jupyterlab/application": "^0.15.0",
3434
"@jupyterlab/apputils": "^0.15.5",
35+
"@jupyterlab/cells": "^0.15.4",
3536
"@jupyterlab/coreutils": "^1.0.10",
3637
"@jupyterlab/docmanager": "^0.15.5",
3738
"@jupyterlab/fileeditor": "^0.15.4",

src/extension.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,10 @@ function activateTOC(
7070
restorer.add(toc, 'juputerlab-toc');
7171

7272
// Create a notebook TableOfContentsRegistry.IGenerator
73-
const notebookGenerator = createNotebookGenerator(notebookTracker);
73+
const notebookGenerator = createNotebookGenerator(
74+
notebookTracker,
75+
rendermime.sanitizer,
76+
);
7477
registry.addGenerator(notebookGenerator);
7578

7679
// Create an markdown editor TableOfContentsRegistry.IGenerator

src/generators.ts

Lines changed: 101 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
// Copyright (c) Jupyter Development Team.
22
// Distributed under the terms of the Modified BSD License.
33

4+
import {ISanitizer} from '@jupyterlab/apputils';
5+
46
import {FileEditor, IEditorTracker} from '@jupyterlab/fileeditor';
57

8+
import {MarkdownCell} from '@jupyterlab/cells';
9+
610
import {INotebookTracker, NotebookPanel} from '@jupyterlab/notebook';
711

812
import {each} from '@phosphor/algorithm';
@@ -20,6 +24,7 @@ import {IHeading} from './toc';
2024
*/
2125
export function createNotebookGenerator(
2226
tracker: INotebookTracker,
27+
sanitizer: ISanitizer,
2328
): TableOfContentsRegistry.IGenerator<NotebookPanel> {
2429
return {
2530
tracker,
@@ -33,14 +38,35 @@ export function createNotebookGenerator(
3338
return;
3439
}
3540

36-
const onClickFactory = () => {
37-
return () => {
38-
cell.node.scrollIntoView();
41+
// If the cell is rendered, generate the ToC items from
42+
// the HTML. If it is not rendered, generate them from
43+
// the text of the cell.
44+
if ((cell as MarkdownCell).rendered) {
45+
const onClickFactory = (el: Element) => {
46+
return () => {
47+
if (!(cell as MarkdownCell).rendered) {
48+
cell.node.scrollIntoView();
49+
} else {
50+
el.scrollIntoView();
51+
}
52+
};
3953
};
40-
};
41-
headings = headings.concat(
42-
Private.getMarkdownHeadings(model.value.text, onClickFactory),
43-
);
54+
headings = headings.concat(
55+
Private.getRenderedHTMLHeadings(cell.node, onClickFactory, sanitizer),
56+
);
57+
} else {
58+
const onClickFactory = (line: number) => {
59+
return () => {
60+
cell.node.scrollIntoView();
61+
if (!(cell as MarkdownCell).rendered) {
62+
cell.editor.setCursorPosition({line, column: 0});
63+
}
64+
};
65+
};
66+
headings = headings.concat(
67+
Private.getMarkdownHeadings(model.value.text, onClickFactory),
68+
);
69+
}
4470
});
4571
return headings;
4672
},
@@ -120,7 +146,7 @@ export function createLatexGenerator(
120146
/^\s*\\(section|subsection|subsubsection){(.+)}/,
121147
);
122148
if (match) {
123-
const level = Private.LatexLevels[match[1]];
149+
const level = Private.latexLevels[match[1]];
124150
const text = match[2];
125151
const onClick = () => {
126152
editor.editor.setCursorPosition({line: line.idx, column: 0});
@@ -137,10 +163,14 @@ export function createLatexGenerator(
137163
* A private namespace for miscellaneous things.
138164
*/
139165
namespace Private {
166+
/**
167+
* Given a string of markdown, get the markdown headings
168+
* in that string.
169+
*/
140170
export function getMarkdownHeadings(
141171
text: string,
142172
onClickFactory: (line: number) => (() => void),
143-
) {
173+
): IHeading[] {
144174
// Split the text into lines.
145175
const lines = text.split('\n');
146176
let headings: IHeading[] = [];
@@ -174,7 +204,7 @@ namespace Private {
174204
// Finally test for HTML headers. This will not catch multiline
175205
// headers, nor will it catch multiple headers on the same line.
176206
// It should do a decent job of catching many, though.
177-
match = line.match(/<h([1-6])>(.*)<\/h\1>/);
207+
match = line.match(/<h([1-6])>(.*)<\/h\1>/i);
178208
if (match) {
179209
const level = parseInt(match[1]);
180210
const text = match[2];
@@ -183,12 +213,37 @@ namespace Private {
183213
});
184214
return headings;
185215
}
216+
217+
/**
218+
* Given an HTML element, generate ToC headings
219+
* by finding all the headers and making IHeading objects for them.
220+
*/
221+
export function getRenderedHTMLHeadings(
222+
node: HTMLElement,
223+
onClickFactory: (el: Element) => (() => void),
224+
sanitizer: ISanitizer,
225+
): IHeading[] {
226+
let headings: IHeading[] = [];
227+
let headingNodes = node.querySelectorAll('h1, h2, h3, h4, h5, h6');
228+
for (let i = 0; i < headingNodes.length; i++) {
229+
const heading = headingNodes[i];
230+
const level = parseInt(heading.tagName[1]);
231+
const text = heading.textContent;
232+
let html = sanitizer.sanitize(heading.innerHTML, sanitizerOptions);
233+
html = html.replace('¶', ''); // Remove the anchor symbol.
234+
235+
const onClick = onClickFactory(heading);
236+
headings.push({level, text, html, onClick});
237+
}
238+
return headings;
239+
}
240+
186241
/**
187242
* A mapping from LaTeX section headers to HTML header
188243
* levels. `part` and `chapter` are less common in my experience,
189244
* so assign them to header level 1.
190245
*/
191-
export const LatexLevels: {[label: string]: number} = {
246+
export const latexLevels: {[label: string]: number} = {
192247
part: 1, // Only available for report and book classes
193248
chapter: 1, // Only available for report and book classes
194249
section: 1,
@@ -197,4 +252,39 @@ namespace Private {
197252
paragraph: 4,
198253
subparagraph: 5,
199254
};
255+
256+
/**
257+
* Allowed HTML tags for the ToC entries. We use this to
258+
* sanitize HTML headings, if they are given. We specifically
259+
* disallow anchor tags, since we are adding our own.
260+
*/
261+
const sanitizerOptions = {
262+
allowedTags: [
263+
'p',
264+
'blockquote',
265+
'b',
266+
'i',
267+
'strong',
268+
'em',
269+
'strike',
270+
'code',
271+
'br',
272+
'div',
273+
'span',
274+
'pre',
275+
'del',
276+
],
277+
allowedAttributes: {
278+
// Allow "class" attribute for <code> tags.
279+
code: ['class'],
280+
// Allow "class" attribute for <span> tags.
281+
span: ['class'],
282+
// Allow "class" attribute for <div> tags.
283+
div: ['class'],
284+
// Allow "class" attribute for <p> tags.
285+
p: ['class'],
286+
// Allow "class" attribute for <pre> tags.
287+
pre: ['class'],
288+
},
289+
};
200290
}

src/toc.tsx

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ export namespace TableOfContents {
151151
}
152152

153153
/**
154-
* An object that represents a markdown heading.
154+
* An object that represents a heading.
155155
*/
156156
export interface IHeading {
157157
/**
@@ -170,6 +170,16 @@ export interface IHeading {
170170
* the parent widget to this item.
171171
*/
172172
onClick: () => void;
173+
174+
/**
175+
* If there is special markup, we can instead
176+
* render the heading using a raw HTML string. This
177+
* HTML *should be properly sanitized!*
178+
*
179+
* For instance, this can be used to render
180+
* already-renderd-to-html markdown headings.
181+
*/
182+
html?: string;
173183
}
174184

175185
/**
@@ -219,11 +229,19 @@ export class TOCItem extends React.Component<ITOCItemProps, {}> {
219229
heading.onClick();
220230
};
221231

222-
return React.createElement(
223-
`h${level}`,
224-
{onClick: clickHandler},
225-
<a href="">{heading.text}</a>,
226-
);
232+
if (heading.html) {
233+
const el = React.createElement(`h${level}`, {
234+
onClick: clickHandler,
235+
dangerouslySetInnerHTML: {__html: heading.html},
236+
});
237+
return <a href="">{el}</a>;
238+
} else {
239+
return React.createElement(
240+
`h${level}`,
241+
{onClick: clickHandler},
242+
<a href="">{heading.text}</a>,
243+
);
244+
}
227245
}
228246
}
229247

0 commit comments

Comments
 (0)