I want to extract the http link from inside the anchor tags? The extension that should be extracted should be WMV files only.
Regex:
[^\"]*.wmv)(\"|\'))\\s*>(?.*)\\s*
[Note: \s* is used in several places to match the extra white space characters that can occur in the html.]
Sample C# code:
///
/// Assigns proper values to link and name, if the htmlId matches the pattern
/// Matches only for .wmv files
///
/// true if success, false otherwise
public static bool TryGetHrefDetailsWMV(string htmlATag, out string wmvLink, out string name)
{
wmvLink = null;
name = null;
string pattern = "[^\"]*.wmv)(\"|\'))\\s*>(?.*)\\s* ";
if (Regex.IsMatch(htmlATag, pattern))
{
Regex r = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Compiled);
wmvLink = r.Match(htmlATag).Result("${link}");
name = r.Match(htmlATag).Result("${name}");
return true;
}
else
return false;
}
MyRegEx.TryGetHrefDetailsWMV("Name of File ",
out wmvLink, out name); // No match
MyRegEx.TryGetHrefDetailsWMV("Name of File ",
out wmvLink, out name); // Match
MyRegEx.TryGetHrefDetailsWMV("Name of File ", out wmvLink, out name); // Match