1 /** 2 D syntax highlighting. 3 4 Copyright: © 2015 RejectedSoftware e.K. 5 License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. 6 Authors: Sönke Ludwig 7 */ 8 module ddox.highlight; 9 10 import std.algorithm : any; 11 import std.array : Appender, appender, replace; 12 import std.range; 13 import std.string : strip; 14 import std.uni : isLower, isUpper; 15 16 17 /** 18 Takes a piece of D code and outputs a sequence of HTML elements useful for syntax highlighting. 19 20 The output will contain $(LT)span$(GT) elements with the class attribute 21 set to the kind of entity that it contains. The class names are kept 22 compatible with the ones used for Google's prettify library: "typ", "kwd", 23 "com", "str", "lit", "pun", "pln", "spc" 24 25 The only addition is "spc", which denotes a special token sequence starting 26 with a "#", such as "#line" or "#!/bin/sh". 27 28 Note that this function will only perform actual syntax highlighting if 29 the libdparse package is available as a DUB dependency. 30 31 --- 32 void main(string[] args) 33 { 34 #line 2 35 import std.stdio; // yeah 36 writefln("Hello, "~"World!"); 37 Package pack; 38 ddox.entities.Module mod; 39 } 40 --- 41 42 Params: 43 dst = Output range where to write the HTML output 44 code = The D source code to process 45 ident_render = Optional delegate to customize how (qualified) 46 identifiers are rendered 47 */ 48 void highlightDCode(R)(ref R dst, string code, scope IdentifierRenderCallback ident_render = null) 49 if (isOutputRange!(R, char)) 50 { 51 string last_class; 52 dst.highlightDCodeImpl(code, ident_render, last_class); 53 if (last_class.length) dst.put("</span>"); 54 } 55 56 /// ditto 57 string highlightDCode(string str, IdentifierRenderCallback ident_render = null) 58 { 59 auto dst = appender!string(); 60 dst.highlightDCode(str, ident_render); 61 return dst.data; 62 } 63 64 unittest { 65 void ident_render(string ident, scope void delegate(IdentifierRenderMode, size_t) insert) { insert(IdentifierRenderMode.normal, 0); } 66 assert(highlightDCode("@safe", &ident_render) == `<span class="kwd">@safe</span>`); 67 assert(highlightDCode("@safe foo", &ident_render) == `<span class="kwd">@safe </span><span class="pln">foo</span>`); 68 assert(highlightDCode("@path", &ident_render) == `<span class="pun">@</span><span class="pln">path</span>`); 69 assert(highlightDCode("@path foo", &ident_render) == `<span class="pun">@</span><span class="pln">path foo</span>`); 70 assert(highlightDCode("@path(", &ident_render) == `<span class="pun">@</span><span class="pln">path</span><span class="pun">(</span>`); 71 assert(highlightDCode("@.path", &ident_render) == `<span class="pun">@<wbr/>.</span><span class="pln">path</span>`); 72 assert(highlightDCode("@ path", &ident_render) == `<span class="pun">@ </span><span class="pln">path</span>`); 73 74 assert(highlightDCode("@safe") == `<span class="kwd">@safe</span>`); 75 assert(highlightDCode("@safe foo") == `<span class="kwd">@safe </span><span class="pln">foo</span>`); 76 assert(highlightDCode("@path") == `<span class="pun">@</span><span class="pln">path</span>`); 77 assert(highlightDCode("@path foo") == `<span class="pun">@</span><span class="pln">path foo</span>`); 78 assert(highlightDCode("@path(") == `<span class="pun">@</span><span class="pln">path</span><span class="pun">(</span>`); 79 assert(highlightDCode("@.path") == `<span class="pun">@<wbr/>.</span><span class="pln">path</span>`); 80 assert(highlightDCode("@ path") == `<span class="pun">@ </span><span class="pln">path</span>`); 81 } 82 83 84 alias IdentifierRenderCallback = void delegate(string ident, scope void delegate(IdentifierRenderMode mode, size_t nskip) insert_ident); 85 86 enum IdentifierRenderMode { 87 normal, 88 nested 89 } 90 91 private void highlightDCodeImpl(R)(ref R dst, string code, scope IdentifierRenderCallback ident_render, ref string last_class) 92 if (isOutputRange!(R, char)) 93 { 94 import dparse.lexer : DLexer, LexerConfig, StringBehavior, StringCache, WhitespaceBehavior, 95 isBasicType, isKeyword, isStringLiteral, isNumberLiteral, 96 isOperator, str, tok; 97 import std.algorithm : endsWith; 98 import std.string : indexOf, stripRight; 99 100 StringCache cache = StringCache(1024 * 4); 101 102 LexerConfig config; 103 config.stringBehavior = StringBehavior.source; 104 config.whitespaceBehavior = WhitespaceBehavior.include; 105 106 void writeWithClass(string text, string cls) 107 { 108 import std.format : formattedWrite; 109 if (last_class != cls) { 110 if (last_class.length) dst.put("</span>"); 111 dst.formattedWrite("<span class=\"%s\">", cls); 112 last_class = cls; 113 } 114 115 foreach (char ch; text) { 116 switch (ch) { 117 default: dst.put(ch); break; 118 case '&': dst.put("&"); break; 119 case '<': dst.put("<"); break; 120 case '>': dst.put(">"); break; 121 } 122 } 123 } 124 125 126 auto symbol = appender!string; 127 auto verbatim_symbol = appender!string; 128 129 void flushSymbol() 130 { 131 string vsym = verbatim_symbol.data.stripRight(); 132 if (vsym == ".") { 133 dst.put("<wbr/>"); 134 writeWithClass(".", "pun"); 135 } else { 136 ident_render(symbol.data, (IdentifierRenderMode mode, size_t nskip) { 137 string dsym = vsym; 138 while (nskip-- > 0) { 139 auto idx = dsym.indexOf('.'); 140 if (idx >= 0) dsym = dsym[idx+1 .. $]; 141 } 142 final switch (mode) with (IdentifierRenderMode) { 143 case normal: 144 highlightDCodeImpl(dst, dsym, null, last_class); 145 break; 146 case nested: 147 if (last_class.length) dst.put("</span>"); 148 last_class = null; 149 string internal_class; 150 highlightDCodeImpl(dst, dsym, null, internal_class); 151 if (internal_class.length) dst.put("</span>"); 152 break; 153 } 154 }); 155 } 156 if (vsym.length < verbatim_symbol.data.length) 157 writeWithClass(verbatim_symbol.data[vsym.length .. $], last_class.length ? last_class : "pln"); 158 symbol = appender!string(); 159 verbatim_symbol = appender!string(); 160 } 161 162 bool last_was_at = false; 163 164 foreach (t; DLexer(cast(ubyte[])code, config, &cache)) { 165 if (last_was_at) { 166 last_was_at = false; 167 switch (t.text) { 168 default: writeWithClass("@", "pun"); break; 169 case "property", "safe", "trusted", "system", "disable", "nogc": 170 writeWithClass("@", "kwd"); 171 writeWithClass(t.text, "kwd"); 172 continue; 173 } 174 } 175 176 if (t.type == tok!"whitespace") { 177 if (symbol.data.length) verbatim_symbol ~= t.text; 178 else writeWithClass(t.text, last_class.length ? last_class : "pln"); 179 continue; 180 } 181 182 183 if (ident_render) { 184 if (t.type == tok!"." && !symbol.data.endsWith(".")) { 185 symbol ~= "."; 186 verbatim_symbol ~= "."; 187 continue; 188 } else if (t.type == tok!"identifier" && (symbol.data.empty || symbol.data.endsWith("."))) { 189 symbol ~= t.text; 190 verbatim_symbol ~= t.text; 191 continue; 192 } else if (symbol.data.length) flushSymbol(); 193 } 194 195 if (t.type == tok!".") dst.put("<wbr/>"); 196 197 if (t.type == tok!"@") last_was_at = true; 198 else if (isBasicType(t.type)) writeWithClass(str(t.type), "typ"); 199 else if (isKeyword(t.type)) writeWithClass(str(t.type), "kwd"); 200 else if (t.type == tok!"comment") writeWithClass(t.text, "com"); 201 else if (isStringLiteral(t.type) || t.type == tok!"characterLiteral") writeWithClass(t.text, "str"); 202 else if (isNumberLiteral(t.type)) writeWithClass(t.text, "lit"); 203 else if (isOperator(t.type)) writeWithClass(str(t.type), "pun"); 204 else if (t.type == tok!"specialTokenSequence" || t.type == tok!"scriptLine") writeWithClass(t.text, "spc"); 205 else if (t.text.strip == "string") writeWithClass(t.text, "typ"); 206 else if (t.type == tok!"identifier" && t.text.isCamelCase) writeWithClass(t.text, "typ"); 207 else if (t.type == tok!"identifier") writeWithClass(t.text, "pln"); 208 else if (t.type == tok!"whitespace") writeWithClass(t.text, last_class.length ? last_class : "pln"); 209 else writeWithClass(t.text, "pun"); 210 } 211 212 if (last_was_at) writeWithClass("@", "pun"); 213 214 if (symbol.data.length) flushSymbol(); 215 } 216 217 private bool isCamelCase(string text) 218 { 219 text = text.strip(); 220 if (text.length < 2) return false; 221 if (!text[0].isUpper) return false; 222 if (!text.any!(ch => ch.isLower)) return false; 223 return true; 224 } 225