1 module sitemap;
2 import std.string;
3 import std.conv : to;
4 import std.uri : uriLength;
5 import std.datetime : DateTime;
6 import std.typecons : Nullable;
7 import std.exception : enforce;
8 
9 private string escapeUrl(string url)
10 {
11 	return translate(url, [
12 			'&': "&",
13 			'\'': "'",
14 			'"': """,
15 			'>': ">",
16 			'<': "&lt;"
17 		]);
18 }
19 
20 /// Sitemap builder
21 /// Reference: https://www.sitemaps.org/protocol.html
22 class Sitemap
23 {
24 	SitemapUrl[] urls;
25 	
26 	// Reference: https://www.w3.org/TR/NOTE-datetime
27 	string toW3CDatetimeString(DateTime d)
28 	{
29 		return d.toISOExtString() ~ "Z";
30 	}
31 
32 	string build(bool pretty = false)
33 	{
34 		string str = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
35 		if (pretty)
36 			str ~= "\n";
37 		str ~= "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">";
38 		if (pretty)
39 			str ~= "\n";
40 
41 		enforce(urls.length > 0, "Sitemap must have at least one URL");
42 		foreach (u; urls)
43 		{
44 			enforce(uriLength(u.loc) > 0, "Sitemap location must be a URL");
45 			if (pretty)
46 				str ~= "\t";
47 			str ~= "<url>";
48 			if (pretty)
49 				str ~= "\n";
50 
51 			if (pretty)
52 				str ~= "\t\t";
53 			str ~= "<loc>" ~ escapeUrl(u.loc) ~ "</loc>";
54 			if (pretty)
55 				str ~= "\n";
56 
57 			if (!u.lastmod.isNull)
58 			{
59 				if (pretty)
60 					str ~= "\t\t";
61 
62 				DateTime dat = u.lastmod.get;
63 				str ~= "<lastmod>" ~ toW3CDatetimeString(dat) ~ "</lastmod>";
64 
65 				if (pretty)
66 					str ~= "\n";
67 			}
68 
69 			if (u.changefreq != ChangeFrequency.NotSet)
70 			{
71 				if (pretty)
72 					str ~= "\t\t";
73 				str ~= "<changefreq>" ~ u.changefreq.to!string().toLower ~ "</changefreq>";
74 				if (pretty)
75 					str ~= "\n";
76 			}
77 
78 			enforce(u.priority >= 0 && u.priority <= 1, "Sitemap priority must be between 0.0 and 1.0");
79 
80 			if (u.priority != 0.5f)
81 			{
82 				if (pretty)
83 					str ~= "\t\t";
84 				str ~= "<priority>" ~ u.priority.to!string() ~ "</priority>";
85 				if (pretty)
86 					str ~= "\n";
87 			}
88 
89 			if (pretty)
90 				str ~= "\t";
91 			str ~= "</url>";
92 			if (pretty)
93 				str ~= "\n";
94 		}
95 
96 		str ~= "</urlset>";
97 
98 		return str;
99 	}
100 }
101 
102 /// Entry in the sitemap
103 class SitemapUrl
104 {
105 	/// Required.
106 	/// URL of the page. This URL must begin with the protocol (such as http) and end with a trailing slash, if your web server requires it. This value must be less than 2,048 characters.
107 	string loc;
108 	/// Optional.
109 	/// The date of last modification of the file.
110 	Nullable!DateTime lastmod;
111 	/// Optional.
112 	/// How frequently the page is likely to change. This value provides general information to search engines and may not correlate exactly to how often they crawl the page.
113 	/// The value Always should be used to describe documents that change each time they are accessed. The value Never should be used to describe archived URLs.
114 	ChangeFrequency changefreq = ChangeFrequency.NotSet;
115 	/// Optional.
116 	/// The priority of this URL relative to other URLs on your site. Valid values range from 0.0 to 1.0. This value does not affect how your pages are compared to pages on other sites—it only lets the search engines know which pages you deem most important for the crawlers.
117 	/// The default priority of a page is 0.5.
118 	float priority = 0.5f;
119 }
120 
121 enum ChangeFrequency
122 {
123 	NotSet,
124 	Always,
125 	Hourly,
126 	Daily,
127 	Weekly,
128 	Monthly,
129 	Yearly,
130 	Never
131 }