(* ::Package:: *)
(* ::Title:: *)
(*WebUtils*)
(* ::Author:: *)
(*Rob Raguet-Schofield*)
(*packages@ragfield.com*)
(* ::Section:: *)
(*Initialization*)
BeginPackage["WebUtils`", {"JLink`"}];
(* ::Subsection::Closed:: *)
(*HTTP utilities*)
HTTP::err = "Unknown Error (`1`) for URL `2`";
HTTP::e401 = "Not Authorized for URL `1`";
HTTP::e403 = "Forbidden for URL `1`";
HTTP::e404 = "Not Found for URL `1`";
HTTP::e500 = "Internal Server Error for URL `1`";
HTTP::e502 = "Bad Gateway (server is down) for URL `1`";
HTTP::e503 = "Service Unavailable (server is busy) for URL `1`";
HTTPClient::usage = "HTTPClient is the object returned by HTTPOpen.";
HTTPOpen::usage = "HTTPOpen[] returns a new HTTPClient object.";
HTTPClose::usage = "HTTPClose[client] frees resources associated with client when it is no longer needed.";
HTTPAuthorize::usage = "";
HTTPGet::usage = "";
HTTPPost::usage = "";
HTTPDateString::usage = "";
HTTPDateList::usage = "";
(* ::Subsection:: *)
(*URL utilties*)
URLEncode::usage = "";
URLDecode::usage = "";
URLQueryString::usage = "";
URLShorten::usage = "URLShorten[url] shortens url with a service such as bit.ly or TinyURL.";
URLExpand::usage = "URLExpand[url] expands url if it has previously been shortened by a service such as bit.ly or TinyURL.";
(* ::Subsection:: *)
(*JSON*)
JSONImportString::usage = "";
(* ::Subsection:: *)
(*Private*)
Begin["`Private`"];
(* ::Section:: *)
(*Implementation*)
(* ::Subsection::Closed:: *)
(*HTTP utilities*)
LoadJavaClass["org.apache.commons.httpclient.auth.AuthScope"];
Format[client_HTTPClient, StandardForm] := "HTTPClient[<...>]";
httpClient[client_HTTPClient] := First[client];
HTTPOpen[] := HTTPClient[
JavaNew["org.apache.commons.httpclient.HttpClient"]
];
HTTPClose[client_HTTPClient] := ReleaseJavaObject[httpClient[client]];
(* ::Input:: *)
(*client=HTTPOpen[]*)
(* ::Input:: *)
(*HTTPClose[client]*)
Options[HTTPAuthorize] = {
"User" -> "",
"Password" -> "",
"Server" -> AuthScope`ANYUHOST,
"Port" -> AuthScope`ANYUPORT
};
HTTPAuthorize[client_HTTPClient, opts___?OptionQ] := Module[
{user, pass, res = True, creds, scope, server, port},
user = "User" /. {opts} /. Options[HTTPAuthorize];
pass = "Password" /. {opts} /. Options[HTTPAuthorize];
server = "Server" /. {opts} /. Options[HTTPAuthorize];
port = "Port" /. {opts} /. Options[HTTPAuthorize];
If[StringLength[user] === 0 || StringLength[pass] === 0,
DynamicModule[{dynUser = user, dynPass = pass},
res = ChoiceDialog[Column[{
TextCell["Login to " <> server <> ":" <> ToString[port],
FontSize->CurrentValue[{"ControlsFontSize", Large}]],
TextCell["User name:"],
InputField[Dynamic[dynUser], String],
TextCell["Password:"],
InputField[Dynamic[dynPass], String,
BaseStyle->{FontOpacity->0}]
}]];
user = dynUser;
pass = dynPass;
]
];
If[!res, pass = ""; Return[$Canceled]];
JavaBlock[
creds = JavaNew[
"org.apache.commons.httpclient.UsernamePasswordCredentials",
user, pass];
scope = JavaNew[
"org.apache.commons.httpclient.auth.AuthScope",
server, port, AuthScope`ANYUREALM];
httpClient[client]@getState[]@setCredentials[scope, creds];
pass = "" (* Null out module variable *)
];
True
];
(* ::Input:: *)
(*HTTPAuthorize[client,"User"->"ragfield","Server"->"twitter.com","Port"->80]*)
(* ::Input:: *)
(*httpClient[client]@getState[]@getCredentials[]*)
HTTPExecute[client_HTTPClient, method_?JavaObjectQ, url_String] := Module[
{res},
res = httpClient[client]@executeMethod[method];
Switch[res,
200, Return@JavaBlock[
method@getResponseBodyAsString[]
],
401, Message[HTTP::e401, url],
403, Message[HTTP::e403, url],
404, Message[HTTP::e404, url],
500, Message[HTTP::e500, url],
502, Message[HTTP::e502, url],
503, Message[HTTP::e503, url],
_, Message[HTTP::err, res, url]
];
$Failed
];
HTTPGet[client_HTTPClient, url_String] := Module[
{method, res},
method = JavaNew[
"org.apache.commons.httpclient.methods.GetMethod",
url];
res = HTTPExecute[client, method, url];
ReleaseJavaObject[method];
res
];
(* ::Input:: *)
(*HTTPGet[client,"http://twitter.com/account/verify_credentials.xml"]*)
(* ::Input:: *)
(*HTTPGet[client, "http://google.com/doesnotexist"]*)
HTTPPost[client_HTTPClient, url_String] := Module[
{method, res},
method = JavaNew[
"org.apache.commons.httpclient.methods.PostMethod",
url];
res = HTTPExecute[client, method, url];
ReleaseJavaObject[method];
res
];
(* ::Text:: *)
(*Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123*)
httpDateString[gmt_List] :=
DateString[gmt, {
"DayNameShort", ", ",
"Day", " ",
"MonthNameShort", " ",
"Year", " ",
"Time", " GMT"
}];
(* ::Input:: *)
(*httpDateString[Date[0]]*)
HTTPDateString[] := httpDateString[Date[0]];
(* ::Input:: *)
(*HTTPDateString[]*)
HTTPDateString[{
year_Integer, mon_Integer, day_Integer,
hour_Integer, min_Integer, sec_?NumericQ
}] := httpDateString[{year, mon, day, hour, min, sec}];
(* ::Input:: *)
(*HTTPDateString[Date[]]*)
(* ::Input:: *)
(*HTTPDateString[Date[0]]*)
HTTPDateString[time_?NumericQ] :=
HTTPDateString[ToDate[time]];
(* ::Input:: *)
(*HTTPDateString[AbsoluteTime[]]*)
(* ::Input:: *)
(*HTTPDateString[AbsoluteTime[0]]*)
pmonth = Alternatives["Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"];
pweekday = Alternatives["Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday", "Sunday"];
pwkday = Alternatives["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"];
ptwodigit = DigitCharacter ~~ DigitCharacter;
pfourdigit = ptwodigit ~~ ptwodigit;
ptime = ptwodigit ~~ ":" ~~ ptwodigit ~~ ":" ~~ ptwodigit;
pdate1 = ptwodigit ~~ " " ~~ pmonth ~~ " " ~~ pfourdigit;
pdate2 = ptwodigit ~~ "-" ~~ pmonth ~~ "-" ~~ ptwodigit;
pdate3 = pmonth ~~ " " ~~ Alternatives[ptwodigit, " " ~~ DigitCharacter];
rfc1123 = pwkday ~~ ", " ~~ pdate1 ~~ " " ~~ ptime ~~ " GMT";
rfc850 = pweekday ~~ ", " ~~ pdate2 ~~ " " ~~ ptime ~~ " GMT";
asctime = pwkday ~~ " " ~~ pdate3 ~~ " " ~~ ptime ~~ " " ~~ pfourdigit;
httpdate = Alternatives[rfc1123, rfc850, asctime];
(* ::Input:: *)
(*str = HTTPDateString[]*)
HTTPDateList[str_String] :=
(* normalize the date to remove the +0000 time zone because DateList doesn't understand it *)
DateList@StringReplace[str, RegularExpression[
"(Mon|Tue|Wed|Thu|Fri|Sat|Sun),?\\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\s(\\d\\d)\\s(\\d\\d):(\\d\\d):(\\d\\d)\\s([+-]\\d*\\s)?(\\d*)"]:>"$2 $3 $8 $4:$5:$6"
];
(* ::Input:: *)
(*HTTPDateList["Mon Feb 04 16:11:54 +0000 2008"]*)
(* ::Subsection:: *)
(*URL utilities*)
URLEncode[s_String] := StringJoin[
Riffle[IntegerString[ToCharacterCode[s, "UTF-8"], 16, 2], "%", {1, -2, 2}]
];
URLDecode[s_String] := StringReplace[s,
RegularExpression["%([0-9a-fA-F][0-9a-fA-F])"]:>FromCharacterCode@FromDigits["$1",16]
];
URLQueryString[args_List] :=
StringJoin@@Riffle[StringJoin[
ToString@First@#, "=", ToString@Last@#]& /@ args, "&"];
URLQueryString[url_String, args_List] := Module[
{query = URLQueryString[args]},
If[StringLength[query] =!= 0,
url <> "?" <> query,
url
]
];
Options[URLShorten] = {
"Service" -> "bit.ly"
};
URLShorten[url_String, opts___?OptionQ] := Module[
{service},
service = "Service" /. {opts} /. Options[URLShorten];
Which[
StringMatchQ[service, "bit.ly"],
BitlyShorten[url],
StringMatchQ[service, "TinyURL"],
TinyURLShorten[url],
StringMatchQ[service, "tr.im"],
TrimShorten[url],
StringMatchQ[service, "is.gd"],
ISGDShorten[url],
True,
BitlyShorten[url]
]
];
(* ::Input:: *)
(*URLShorten["http://www.google.com"]*)
(* ::Input:: *)
(*URLShorten["http://www.google.com","Service"->"bit.ly"]*)
(* ::Input:: *)
(*URLShorten["http://www.google.com","Service"->"TinyURL"]*)
(* ::Input:: *)
(*URLShorten["http://www.google.com","Service"->"tr.im"]*)
(* ::Input:: *)
(*URLShorten["http://www.google.com","Service"->"is.gd"]*)
URLExpand[url_String] := Which[
StringMatchQ[url, "http://bit.ly/*"],
BitlyExpand[url],
StringMatchQ[url, "http://tinyurl.com/*" | "http://digg.com/*" | "http://ow.ly/*"],
TinyURLExpand[url],
StringMatchQ[url, "http://*hungh.com/*"],
HunghExpand[url],
StringMatchQ[url, "http://tr.im/*"],
TrimExpand[url],
StringMatchQ[url, "http://is.gd/*"],
ISGDExpand[url],
StringMatchQ[url, "http://tcrn.ch/*"],
TCRNExpand[url],
True,
url
];
(* ::Input:: *)
(*URLExpand["http://bit.ly/dE56x"]*)
(* ::Input:: *)
(*URLExpand["http://digg.com/d1r7n3"]*)
(* ::Input:: *)
(*URLExpand["http://tinyurl.com/ohngma"]*)
(* ::Input:: *)
(*URLExpand["http://www.hungh.com/link292.html"]*)
(* ::Input:: *)
(*URLExpand["http://tr.im/ltxU"]*)
(* ::Input:: *)
(*URLExpand["http://is.gd/2"]*)
(* ::Input:: *)
(*URLExpand["http://tcrn.ch/21f"]*)
(* ::Subsubsection::Closed:: *)
(*bit.ly*)
$BitlyVersion = "2.0.1";
$BitlyLogin = "ragfieldpackages";
$BitlyAPIKey = "R_f306ee0b481a5df3be0a94cee6a398d0";
$BitlyDebug = False;
BitlyShorten[url_String] := Module[
{u, xml, res},
u = URLQueryString["http://api.bit.ly/shorten", {
"version" -> $BitlyVersion,
"login" -> $BitlyLogin,
"apiKey" -> $BitlyAPIKey,
"format" -> "xml",
"longUrl" -> URLEncode[url]
}];
xml = Import[u, "XML"];
res = Cases[xml, XMLElement["shortUrl", _, {short_String}]:>short, \[Infinity]];
If[Length[res] === 1, First[res], $Failed]
];
(* ::Input:: *)
(*BitlyShorten["http://www.google.com"]*)
BitlyExpand[url_String] := Module[
{u, xml, res},
u = URLQueryString["http://api.bit.ly/expand", {
"version" -> $BitlyVersion,
"login" -> $BitlyLogin,
"apiKey" -> $BitlyAPIKey,
"format" -> "xml",
"shortUrl" -> url
}];
If[$BitlyDebug, Print["Expanding ", Hyperlink[url], " with ", Hyperlink[u]]];
xml = Quiet[Import[u, "XML"]];
If[xml =!= $Failed,
res = Cases[xml, XMLElement["longUrl", _, {short_String}]:>short, \[Infinity]];
,(*else*)
(* Sigh, bit.ly returns invalid XML if the hash starts with a number *)
xml = Import[u, "String"];
res = StringCases[xml,
RegularExpression[".*\\(.*)\\"]:>"$1"];
];
If[Length[res] === 1, First[res], url]
];
(* ::Input:: *)
(*BitlyExpand["http://bit.ly/8ttn"]*)
(* ::Input:: *)
(*BitlyExpand["http://bit.ly/dE56x"]*)
(* ::Input:: *)
(*BitlyExpand["http://bit.ly/BTvGk"]*)
(* ::Subsubsection::Closed:: *)
(*TinyURL & Digg & ow.ly*)
TinyURLShorten[url_String] := Module[
{u},
u = URLQueryString["http://tinyurl.com/api-create.php", {
"url" -> URLEncode[url]
}];
Import[u, "String"]
];
(* ::Input:: *)
(*TinyURLShorten["http://www.google.com"]*)
TinyURLExpand[url_String] := JavaBlock@Module[
{client, method, res},
client = HTTPOpen[];
method = JavaNew[
"org.apache.commons.httpclient.methods.GetMethod",
url];
method@setFollowRedirects[False];
res = httpClient[client]@executeMethod[method];
HTTPClose[client];
Switch[res,
200, url,
301, URLDecode[method@getResponseHeader["Location"]@getValue[]],
_, url
]
];
(* ::Input:: *)
(*WebUtils`Private`TinyURLExpand["http://tinyurl.com/ohngma"]*)
(* ::Input:: *)
(*WebUtils`Private`TinyURLExpand["http://digg.com/d1r7n3"]*)
(* ::Input:: *)
(*WebUtils`Private`TinyURLExpand["http://ow.ly/7bay"]*)
(* ::Subsubsection::Closed:: *)
(*hungh*)
HunghExpand[url_String] := Module[
{xml = Import[url, "XMLObject"], sources},
sources = Cases[xml, XMLElement["iframe",
{___, "src"->src_, ___}, _]:>src, \[Infinity]];
If[Length[sources] > 0, First[sources], url]
];
(* ::Input:: *)
(*HunghExpand["http://www.hungh.com/link292.html"]*)
(* ::Subsubsection::Closed:: *)
(*tr.im*)
TrimShorten[url_String] := Module[
{res},
res = Import[URLQueryString[
"http://api.tr.im/api/trim_url.xml", {"url"->url}
]];
res = Cases[res, XMLElement["url", _, {s_String}]:>s, \[Infinity]];
If[Length[res] > 0, First[res], $Failed]
]
(* ::Input:: *)
(*TrimShorten["http://www.google.com"]*)
TrimExpand[url_String] := Module[
{res},
res = Import[URLQueryString[
"http://api.tr.im/api/trim_destination.xml",
{"trimpath"->StringReplace[url, "http://tr.im/"->""]}
]];
res = Cases[res, XMLElement["destination", _, {s_String}]:>s, \[Infinity]];
If[Length[res] > 0, First[res], url]
]
(* ::Input:: *)
(*TrimExpand["http://tr.im/ltwi"]*)
(* ::Subsubsection::Closed:: *)
(*is.gd*)
ISGDShorten[url_String] :=
Import["http://is.gd/api.php?longurl="<>URLEncode[url], "String"];
(* ::Input:: *)
(*ISGDShorten["http://www.google.com"]*)
ISGDExpand[url_String] := TinyURLExpand[url];
(* ::Input:: *)
(*WebUtils`Private`ISGDExpand["http://is.gd/2"]*)
(* ::Subsubsection:: *)
(*tcrn.ch*)
(* ::Text:: *)
(*TechCrunch.m URLs.*)
(*Expand service URL: http://api.longurl.org/v1/expand?url=...*)
(*Reply:*)
(* *)
(* *)
(* *)
(* *)
(* *)
(* *)
(* *)
(* *)
(**)
TCRNExpand[url_String] := Module[{xml, longUrl},
xml = Import["http://api.longurl.org/v1/expand?url=" <> url, {"XML", "XMLElement"}];
longUrl = Cases[xml, XMLElement["long_url", {}, {u_}] :> u, Infinity];
If[Length[longUrl] > 0, First[longUrl], url]
];
(* ::Input:: *)
(*TCRNExpand["http://tcrn.ch/21f"]*)
(* ::Subsection::Closed:: *)
(*JSON*)
jsonE = ("e"|"E") ~~ ("+"|"-"|"");
jsonDigits = DigitCharacter..;
jsonExp = jsonE ~~ jsonDigits;
jsonFrac = "." ~~ DigitCharacter...;
jsonInt = Longest@Alternatives[
("+" | "-" | "") ~~ CharacterRange["1", "9"] ~~ jsonDigits,
("+" | "-" | "") ~~ DigitCharacter
];
jsonNumber = Longest@Alternatives[
jsonInt ~~ (jsonFrac | "") ~~ (jsonExp | ""),
("+" | "-" | "") ~~ "." ~~ DigitCharacter.. ~~ (jsonExp | "")
];
jsonChar = Except["\""];(*FIXME*)
jsonString = "\"" ~~ jsonChar... ~~ "\"";
JSONImportString[json_String] := Module[
{vals},
vals = StringSplit[json, {
num:jsonNumber :> Hold[StringReplace[num, "e"|"E"->"`*^"]],
str:jsonString :> Hold[StringReplace[str, "u"->":"]],
"null" :> "Null",
"true" :> "True",
"false" :> "False",
"," :> ",",
":" :> "->",
"{" :> "{",
"}" :> "}",
"[" :> "{",
"]" :> "}"
}];
vals = DeleteCases[vals,
Alternatives["", x_String/;StringMatchQ[x, Whitespace]]];
ToExpression[StringJoin@@ReleaseHold[vals]]
];
(* ::Input:: *)
(*JSONImportString["[1, 2, 3, 10, 01, 1., .1, 1e-4, 1E+4, 1.E-4, 1.e+4, .1e-4, .1E+4, 00.001, 0.025e3]"]//InputForm*)
(* ::Input:: *)
(*JSONImportString["[\"rob\", \"[{}]\", \"'\\\"\", \"\\u2010\"]"]//InputForm*)
(* ::Input:: *)
(*JSONImportString["{\"rob\" : 123, \"rob2\": [1, 2.23 ,3e-8 ,.19, 19.], \"fofalseo\": true, \"foo\":\"[]{}\", \"bar\":{\"a\":null, \"b\":[1, 2,[1, 2, 3]]}}"]//InputForm*)
(* ::Section:: *)
(*Finalization*)
End[]; (* `Private` *)
EndPackage[]; (* WebUtils` *)