Async Screen Scraping

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Navigation;
using System.Windows.Shapes;
using HtmlAgilityPack;
using Org.Mentalis.Network.ProxySocket;
using AwaitSample1.Proxy;

async Task<string> AccessTheWebAsync()
{
string uri = “http://www.some~data.somewhere.com/2014/ClinicalData&#8221;;

  HttpClient client = new HttpClient();

   Task<string> getStringTask = client.GetStringAsync(uri);

// You can do work here that doesn’t rely on the string from 
// DoIndependentWork();

string urlContents = await getStringTask;

// Normally the return statement specifies result.
//here getlinks will post to our XAML Label
  getlinks(urlContents);

return urlContents;
}

private void Button_Click(object sender, RoutedEventArgs e)
{
      StatusLabel.Content = “Loading ….”;
      AccessTheWebAsync();
}

/// <summary>
/// Using HTML Agility Pack to traverse the HTML Document
/// </summary>
/// <param name=”input”>string</param>
/// <returns>string</returns>
private string getlinks(string input)
{

     StatusLabel.Content = String.Empty;
     HtmlDocument doc = new HtmlDocument();
     doc.LoadHtml(input);

  var aTags = doc.DocumentNode.SelectNodes(“//a”);
  int counter = 1;
  if (aTags != null)
  {
          foreach (var aTag in aTags){
    StatusLabel.Content += counter + “. ” + aTag.InnerHtml + ” – ” +    aTag.Attributes[“href”].Value + “\t” + Environment.NewLine;
 counter++;
 }
}
return String.Empty;
}

Advertisements

Leave a Reply

Please log in using one of these methods to post your comment:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s