1 module sitemap; 2 import std.string; 3 import std.conv : to; 4 import std.uri : uriLength; 5 import std.datetime : DateTime; 6 import std.typecons : Nullable; 7 import std.exception : enforce; 8 9 private string escapeUrl(string url) 10 { 11 return translate(url, [ 12 '&': "&", 13 '\'': "'", 14 '"': """, 15 '>': ">", 16 '<': "<" 17 ]); 18 } 19 20 /// Sitemap builder 21 /// Reference: https://www.sitemaps.org/protocol.html 22 class Sitemap 23 { 24 SitemapUrl[] urls; 25 26 // Reference: https://www.w3.org/TR/NOTE-datetime 27 string toW3CDatetimeString(DateTime d) 28 { 29 return d.toISOExtString() ~ "Z"; 30 } 31 32 string build(bool pretty = false) 33 { 34 string str = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; 35 if (pretty) 36 str ~= "\n"; 37 str ~= "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"; 38 if (pretty) 39 str ~= "\n"; 40 41 enforce(urls.length > 0, "Sitemap must have at least one URL"); 42 foreach (u; urls) 43 { 44 enforce(uriLength(u.loc) > 0, "Sitemap location must be a URL"); 45 if (pretty) 46 str ~= "\t"; 47 str ~= "<url>"; 48 if (pretty) 49 str ~= "\n"; 50 51 if (pretty) 52 str ~= "\t\t"; 53 str ~= "<loc>" ~ escapeUrl(u.loc) ~ "</loc>"; 54 if (pretty) 55 str ~= "\n"; 56 57 if (!u.lastmod.isNull) 58 { 59 if (pretty) 60 str ~= "\t\t"; 61 62 DateTime dat = u.lastmod.get; 63 str ~= "<lastmod>" ~ toW3CDatetimeString(dat) ~ "</lastmod>"; 64 65 if (pretty) 66 str ~= "\n"; 67 } 68 69 if (u.changefreq != ChangeFrequency.NotSet) 70 { 71 if (pretty) 72 str ~= "\t\t"; 73 str ~= "<changefreq>" ~ u.changefreq.to!string().toLower ~ "</changefreq>"; 74 if (pretty) 75 str ~= "\n"; 76 } 77 78 enforce(u.priority >= 0 && u.priority <= 1, "Sitemap priority must be between 0.0 and 1.0"); 79 80 if (u.priority != 0.5f) 81 { 82 if (pretty) 83 str ~= "\t\t"; 84 str ~= "<priority>" ~ u.priority.to!string() ~ "</priority>"; 85 if (pretty) 86 str ~= "\n"; 87 } 88 89 if (pretty) 90 str ~= "\t"; 91 str ~= "</url>"; 92 if (pretty) 93 str ~= "\n"; 94 } 95 96 str ~= "</urlset>"; 97 98 return str; 99 } 100 } 101 102 /// Entry in the sitemap 103 class SitemapUrl 104 { 105 /// Required. 106 /// URL of the page. This URL must begin with the protocol (such as http) and end with a trailing slash, if your web server requires it. This value must be less than 2,048 characters. 107 string loc; 108 /// Optional. 109 /// The date of last modification of the file. 110 Nullable!DateTime lastmod; 111 /// Optional. 112 /// How frequently the page is likely to change. This value provides general information to search engines and may not correlate exactly to how often they crawl the page. 113 /// The value Always should be used to describe documents that change each time they are accessed. The value Never should be used to describe archived URLs. 114 ChangeFrequency changefreq = ChangeFrequency.NotSet; 115 /// Optional. 116 /// The priority of this URL relative to other URLs on your site. Valid values range from 0.0 to 1.0. This value does not affect how your pages are compared to pages on other sites—it only lets the search engines know which pages you deem most important for the crawlers. 117 /// The default priority of a page is 0.5. 118 float priority = 0.5f; 119 } 120 121 enum ChangeFrequency 122 { 123 NotSet, 124 Always, 125 Hourly, 126 Daily, 127 Weekly, 128 Monthly, 129 Yearly, 130 Never 131 }