1
-
2
1
//
3
2
// wiki-registry.c
4
3
//
@@ -96,31 +95,37 @@ wiki_registry_parse(const char *html) {
96
95
97
96
GumboNode * body = gumbo_get_element_by_id ("wiki-body" , output -> root );
98
97
if (body ) {
99
- // grab all category `<h2 />`s
100
- list_t * h2s = gumbo_get_elements_by_tag_name ("h2" , body );
101
- list_node_t * heading_node ;
102
- list_iterator_t * heading_iterator = list_iterator_new (h2s , LIST_HEAD );
103
- while ((heading_node = list_iterator_next (heading_iterator ))) {
104
- GumboNode * heading = (GumboNode * ) heading_node -> val ;
105
- char * category = gumbo_text_content (heading );
106
- // die if we failed to parse a category, as it's
107
- // almost certinaly a malloc error
108
- if (!category ) break ;
109
- trim (case_lower (category ));
110
- GumboVector * siblings = & heading -> parent -> v .element .children ;
111
- size_t pos = heading -> index_within_parent ;
112
-
113
- // skip elements until the UL
114
- // TODO: don't hardcode position here
115
- // 2:
116
- // 1 - whitespace
117
- // 2 - actual node
118
- GumboNode * ul = siblings -> data [pos + 2 ];
119
- if (GUMBO_TAG_UL != ul -> v .element .tag ) {
120
- free (category );
98
+ GumboNode * markdown_body = ((GumboNode * )((GumboVector )body -> v .element .children ).data [1 ]);
99
+ GumboVector children = (GumboVector )markdown_body -> v .element .children ;
100
+
101
+ size_t count = children .length - 1 ;
102
+
103
+ for (size_t index = 0 ; index < count ; index ++ ) {
104
+ GumboNode * heading = (GumboNode * )children .data [index ];
105
+ GumboNode * ul = NULL ;
106
+
107
+ if (heading -> v .element .tag != GUMBO_TAG_DIV ) {
108
+ continue ;
109
+ }
110
+
111
+ GumboAttribute * node_id = gumbo_get_attribute (& heading -> v .element .attributes , "class" );
112
+ if (node_id == NULL || strncmp (node_id -> value , "markdown-heading" , 16 ) != 0 ) {
121
113
continue ;
122
114
}
123
115
116
+ for (; index < count ; index ++ ) {
117
+ ul = (GumboNode * )children .data [index ];
118
+
119
+ if (ul -> v .element .tag == GUMBO_TAG_UL ) {
120
+ break ;
121
+ }
122
+ }
123
+
124
+ list_t * h2 = gumbo_get_elements_by_tag_name ("h2" , heading );
125
+ char * category = gumbo_text_content (h2 -> head -> val );
126
+ if (!category ) break ;
127
+ trim (case_lower (category ));
128
+
124
129
list_t * lis = gumbo_get_elements_by_tag_name ("li" , ul );
125
130
list_iterator_t * li_iterator = list_iterator_new (lis , LIST_HEAD );
126
131
list_node_t * li_node ;
@@ -138,8 +143,6 @@ wiki_registry_parse(const char *html) {
138
143
list_destroy (lis );
139
144
free (category );
140
145
}
141
- list_iterator_destroy (heading_iterator );
142
- list_destroy (h2s );
143
146
}
144
147
145
148
gumbo_destroy_output (& kGumboDefaultOptions , output );
0 commit comments