Job finder:
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Shapes;
using System.Windows.Threading;
namespace CraiglistReader
{
/// <summary>
/// Interaction logic for WindowUS.xaml
/// </summary>
public partial class WindowUS : Window
{
BackgroundWorker worker = new BackgroundWorker();
List<sites> sbtext = new List<sites>();
List<Cities> cities = new List<Cities>();
List<CitiesData> CitiesData = new List<CitiesData>();
List<Data> ltsData = null;
public WindowUS()
{
InitializeComponent();
}
private void Button_Click_1(object sender, RoutedEventArgs e)
{
try
{
List<string> links = new List<string>();
List<string> names = new List<string>();
HtmlDocument doc = new HtmlDocument();
//Load the Html
doc.Load(new WebClient().OpenRead("http://geo.craigslist.org/iso/us"));
//Get all Links in the div with the ID = 'list' that have an href-Attribute
//HtmlNodeCollection linkNodes = doc.DocumentNode.SelectNodes("//div[@id='list']/a[@href]");
//or if you have only the links already saved somewhere
HtmlNodeCollection linkNodes = doc.DocumentNode.SelectNodes("//a[@href]");
if (linkNodes != null)
{
foreach (HtmlNode link in linkNodes)
{
CitiesData cities = new CitiesData();
cities.Url = "";
if (link.GetAttributeValue("href", "").Contains("http://"))
{
links.Add(link.GetAttributeValue("href", ""));
cities.Url = link.GetAttributeValue("href", "");
}
else
{
cities.Country = link.InnerText;
}
names.Add(link.InnerText);//Get the InnerText so you don't get any Html-Tags
cities.Cities = link.InnerText;
CitiesData.Add(cities);
}
}
//Write both lists to a File
File.WriteAllText("urls.txt", string.Join(Environment.NewLine, links.ToArray()));
File.WriteAllText("cities.txt", string.Join(Environment.NewLine, names.ToArray()));
//if (cmbCountry.SelectedIndex == 0) CitiesData = ReadCities.readhtml("https://www.craigslist.org/about/sites#US");
//else CitiesData = ReadCities.readhtml();
btnSearch.IsEnabled = false;
ProgressBar1.Minimum = 0;
ProgressBar1.Maximum = short.MaxValue;
ProgressBar1.Value = 0;
//Stores the value of the ProgressBar
double value = 0;
//Create a new instance of our ProgressBar Delegate that points
// to the ProgressBar's SetValue method.
//UpdateProgressBarDelegate updatePbDelegate = new UpdateProgressBarDelegate(ProgressBar1.SetValue);
string search = "is_parttime=1";
if (chktelecommute.IsChecked == true && chktelecommute.IsChecked == false) search = "is_telecommuting=1";
if (chktelecommute.IsChecked == false && chktelecommute.IsChecked == true) search = "is_parttime=1";
if (chkparttime.IsChecked == true && chktelecommute.IsChecked == true) search = "is_parttime=1&is_telecommuting=1";
// search = "part-time";
ltsData = new List<Data>();
Action workAction = delegate
{
worker = new BackgroundWorker();
worker.DoWork += delegate
{
List<Data> lst = new List<Data>();
List<CitiesData> Citites = CitiesData.Where(p => p.Url != "" && p.Cities.Length > 2).ToList();
foreach (CitiesData str in Citites)
{
try
{
readhtml(str.Url.ToString() + "/search/sof?" + search, search, str.Url.ToString()).ToList().ForEach(x =>
{
lst.Add(x);
});
}
catch (Exception ex)
{
// MessageBox.Show(ex.Message);
File.WriteAllText("Error.txt", string.Join(Environment.NewLine, ex.Message.ToString()));
}
}
if (lst.Count > 0)
{
lst.ForEach(x =>
{
ltsData.Add(x);
// lblSearch.Content = x;
});
}
};
worker.RunWorkerCompleted += delegate
{
dg.ItemsSource = ltsData.GroupBy(p => p.Title).ToList();
btnSearch.IsEnabled = true;
};
worker.RunWorkerAsync();
};
Dispatcher.Invoke(DispatcherPriority.Background, workAction);
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
private void DG_Hyperlink_Click(object sender, RoutedEventArgs e)
{
Hyperlink link = (Hyperlink)e.OriginalSource;
Process.Start(link.NavigateUri.ToString());
}
private void CheckBox_Checked(object sender, RoutedEventArgs e)
{
}
List<Data> readhtml(string urlAddress, string search, string rawurl)
{
List<Data> _listdata = new List<Data>();
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(urlAddress);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode == HttpStatusCode.OK)
{
Stream receiveStream = response.GetResponseStream();
StreamReader readStream = null;
if (response.CharacterSet == null)
{
readStream = new StreamReader(receiveStream);
}
else
{
readStream = new StreamReader(receiveStream, Encoding.GetEncoding(response.CharacterSet));
}
string data = readStream.ReadToEnd();
rawurl = rawurl + "/sof/";
_listdata = FindRow(rawurl, data);
response.Close();
readStream.Close();
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
return _listdata;
}
static List<Data> FindRow(string url, string input)
{
List<Data> _data = new List<Data>();
try
{
string pattern = "<p class=.*?>(.*?)<\\/p>";
MatchCollection matches = Regex.Matches(input, pattern);
Console.WriteLine("Matches found: {0}", matches.Count);
if (matches.Count > 0)
foreach (Match m in matches)
{
string datapidpattern = "<a href=.*?>(.*?)<\\/a>";
MatchCollection matchesinner = Regex.Matches(m.ToString(), datapidpattern);
string posteddays = "<time datetime=.*?>(.*?)</time>";
DateTime posteddt = DateTime.Now.AddYears(-100);
MatchCollection matchesdays = Regex.Matches(m.ToString(), posteddays);
try
{
foreach (Match match in matchesdays)
{
Console.WriteLine("Found '{0}' at position {1}",
match.Value, match.Index);
string removestr = "time datetime=";
string posteddatetme = match.ToString().Remove(0, removestr.Length + 2);
posteddt = Convert.ToDateTime(posteddatetme.Substring(0, 16));
}
}catch(Exception ex)
{
}
if (matchesinner.Count > 0)
{
foreach (Match mi in matchesinner)
{
if (mi.ToString().Contains("hdrlnk"))
{
string title = mi.Value.Split(new char[] { '>', '<' })[2];
string pid = mi.Value.Split(new char[] { '<', ' ' })[2].Remove(0, 6);
pid = pid.Remove(pid.Length - 2, 2);
//if (mi.Value.Split(new char[] { "<a href=\", 'html\' })[4].ToString() == "ca") pid = mi.Value.Split(new char[] { '/', '.' })[7];
pid = pid.Replace(".htm", ".html");
string tempurl = url;
if (!pid.Contains("http://"))
{
tempurl = tempurl.Remove(url.Length - 5, 5);
pid = tempurl + pid;
}
// pid = url + pid + ".html";
_data.Add(new Data() { Title = title.ToString(), Link = pid.Replace("//", "/").Replace("http:/", "http://"), PostedDate = posteddt });
}
}
}
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
return _data;
}
private void btnResume_Click(object sender, RoutedEventArgs e)
{
try
{
new ResumeWindow().Show();
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
private void OnKeyDownHandler(object sender, KeyEventArgs e)
{
try
{
if (e.Key == Key.Return)
{
dg.ItemsSource = ltsData.Where(p => p.Title.Contains(txtSearch.Text)).GroupBy(p => p.Title).ToList();
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
}
}
<Window x:Class="CraiglistReader.WindowUS"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
Title="Job finder" >
<Grid >
<Grid.RowDefinitions>
<RowDefinition Height="35"/>
<RowDefinition Height="35"/>
<RowDefinition Height="35"/>
<RowDefinition Height="35"/>
<RowDefinition Height="*"/>
</Grid.RowDefinitions>
<CheckBox Content="telecommute" x:Name="chktelecommute" Grid.Row="0" Checked="CheckBox_Checked" Unchecked="CheckBox_Checked" HorizontalAlignment="Left"/>
<CheckBox Content="part-time" x:Name="chkparttime" Grid.Row="1" IsChecked="True" Checked="CheckBox_Checked" Unchecked="CheckBox_Checked" HorizontalAlignment="Left"/>
<Button x:Name="btnSearch" Content="Button" HorizontalAlignment="Left" Grid.Row="2" Width="75" Click="Button_Click_1"/>
<Label x:Name="lblSearch" HorizontalAlignment="Right" Grid.Row="2" Width="475" />
<ProgressBar Grid.Row="3" Height="20" Width="200" Margin="0,4,0,0" Name="ProgressBar1" HorizontalAlignment="Center" VerticalAlignment="top" />
<DataGrid x:Name="dg" Grid.Row="4" AutoGenerateColumns="False">
<DataGrid.Columns>
<DataGridTextColumn Header="Title"
Binding="{Binding Path=Title}"/>
<DataGridHyperlinkColumn Header="Link"
Binding="{Binding Path=Link}" >
<DataGridHyperlinkColumn.ElementStyle>
<Style>
<EventSetter Event="Hyperlink.Click" Handler="DG_Hyperlink_Click"/>
</Style>
</DataGridHyperlinkColumn.ElementStyle>
</DataGridHyperlinkColumn>
<DataGridTextColumn Header="Type"
Binding="{Binding Path=Type}"/>
<DataGridTextColumn Header="PostedDate"
Binding="{Binding Path=PostedDate}"/>
<DataGridTextColumn Header="Description"
Binding="{Binding Path=Description}"/>
</DataGrid.Columns>
</DataGrid>
<ComboBox x:Name="cmbCountry" HorizontalAlignment="Left" Margin="536,10,0,0" Grid.Row="1" VerticalAlignment="Top" Width="120" SelectedIndex="0">
<ComboBoxItem Content="US" />
<ComboBoxItem Content="Canada"/>
<ComboBoxItem Content=""/>
</ComboBox>
<Button x:Name="btnResume" Content="Resume" HorizontalAlignment="Left" Height="23" Margin="452,7,0,0" VerticalAlignment="Top" Width="130" Click="btnResume_Click"/>
<TextBox x:Name="txtSearch" HorizontalAlignment="Left" Height="23" Margin="587,10,0,0" TextWrapping="Wrap" Text="Marketing" VerticalAlignment="Top" Width="158" KeyDown="OnKeyDownHandler"/>
</Grid>
</Window>
Resume Finder
<Window x:Class="CraiglistReader.ResumeWindow"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
Title="Resume finder" >
<Grid >
<Grid.RowDefinitions>
<RowDefinition Height="35"/>
<RowDefinition Height="35"/>
<RowDefinition Height="35"/>
<RowDefinition Height="35"/>
<RowDefinition Height="*"/>
</Grid.RowDefinitions>
<Button x:Name="btnSearch" Content="Button" HorizontalAlignment="Left" Grid.Row="2" Width="75" Click="Button_Click_1"/>
<Label x:Name="lblSearch" HorizontalAlignment="Right" Grid.Row="2" Width="475" />
<ProgressBar Grid.Row="3" Height="20" Width="200" Margin="0,4,0,0" Name="ProgressBar1" HorizontalAlignment="Center" VerticalAlignment="top" />
<DataGrid x:Name="dg" Grid.Row="4" AutoGenerateColumns="False">
<DataGrid.Columns>
<DataGridTextColumn Header="Title"
Binding="{Binding Path=Title}"/>
<DataGridHyperlinkColumn Header="Link"
Binding="{Binding Path=Link}" >
<DataGridHyperlinkColumn.ElementStyle>
<Style>
<EventSetter Event="Hyperlink.Click" Handler="DG_Hyperlink_Click"/>
</Style>
</DataGridHyperlinkColumn.ElementStyle>
</DataGridHyperlinkColumn>
<DataGridTextColumn Header="Type"
Binding="{Binding Path=Type}"/>
<DataGridTextColumn Header="PostedDate"
Binding="{Binding Path=PostedDate}"/>
<DataGridTextColumn Header="Description"
Binding="{Binding Path=Description}"/>
</DataGrid.Columns>
</DataGrid>
<ComboBox x:Name="cmbCountry" HorizontalAlignment="Left" Margin="536,10,0,0" Grid.Row="1" VerticalAlignment="Top" Width="120" SelectedIndex="0">
<ComboBoxItem Content="US" />
<ComboBoxItem Content="Canada"/>
<ComboBoxItem Content=""/>
</ComboBox>
<Button x:Name="btnResume" Content="Jobs" HorizontalAlignment="Left" Height="23" Margin="452,7,0,0" Click="btnResume_Click" VerticalAlignment="Top" Width="130"/>
<TextBox x:Name="txtSearch" HorizontalAlignment="Left" Height="23" Margin="587,10,0,0" TextWrapping="Wrap" Text="Marketing" VerticalAlignment="Top" Width="158" KeyDown="OnKeyDownHandler"/>
</Grid>
</Window>
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Shapes;
using System.Windows.Threading;
namespace CraiglistReader
{
/// <summary>
/// Interaction logic for WindowUS.xaml
/// </summary>
public partial class ResumeWindow : Window
{
BackgroundWorker worker = new BackgroundWorker();
List<sites> sbtext = new List<sites>();
List<Cities> cities = new List<Cities>();
List<CitiesData> CitiesData = new List<CitiesData>();
List<Data> ltsData = null;
public ResumeWindow()
{
InitializeComponent();
}
private void Button_Click_1(object sender, RoutedEventArgs e)
{
try
{
List<string> links = new List<string>();
List<string> names = new List<string>();
HtmlDocument doc = new HtmlDocument();
//Load the Html
doc.Load(new WebClient().OpenRead("http://geo.craigslist.org/iso/us"));
//Get all Links in the div with the ID = 'list' that have an href-Attribute
//HtmlNodeCollection linkNodes = doc.DocumentNode.SelectNodes("//div[@id='list']/a[@href]");
//or if you have only the links already saved somewhere
HtmlNodeCollection linkNodes = doc.DocumentNode.SelectNodes("//a[@href]");
if (linkNodes != null)
{
foreach (HtmlNode link in linkNodes)
{
CitiesData cities = new CitiesData();
cities.Url = "";
if (link.GetAttributeValue("href", "").Contains("http://"))
{
links.Add(link.GetAttributeValue("href", ""));
cities.Url = link.GetAttributeValue("href", "");
}
else
{
cities.Country = link.InnerText;
}
names.Add(link.InnerText);//Get the InnerText so you don't get any Html-Tags
cities.Cities = link.InnerText;
CitiesData.Add(cities);
}
}
//Write both lists to a File
File.WriteAllText("urls.txt", string.Join(Environment.NewLine, links.ToArray()));
File.WriteAllText("cities.txt", string.Join(Environment.NewLine, names.ToArray()));
//if (cmbCountry.SelectedIndex == 0) CitiesData = ReadCities.readhtml("https://www.craigslist.org/about/sites#US");
//else CitiesData = ReadCities.readhtml();
btnSearch.IsEnabled = false;
ProgressBar1.Minimum = 0;
ProgressBar1.Maximum = short.MaxValue;
ProgressBar1.Value = 0;
btnSearch.IsEnabled = false;
//Stores the value of the ProgressBar
double value = 0;
string search = txtSearch.Text;
// search = "part-time";
ltsData = new List<Data>();
Action workAction = delegate
{
worker = new BackgroundWorker();
worker.DoWork += delegate
{
List<Data> lst = new List<Data>();
List<CitiesData> Citites = CitiesData.Where(p => p.Url != "" && p.Cities.Length > 2).ToList();
foreach (CitiesData str in Citites)
{
try
{
readhtml(str.Url.ToString() + "/search/rrr?query=", search, str.Url.ToString()).ToList().ForEach(x =>
{
lst.Add(x);
});
}
catch (Exception ex)
{
// MessageBox.Show(ex.Message);
File.WriteAllText("Error.txt", string.Join(Environment.NewLine, ex.Message.ToString()));
}
}
if (lst.Count > 0)
{
lst.ForEach(x =>
{
ltsData.Add(x);
// lblSearch.Content = x;
});
}
};
worker.RunWorkerCompleted += delegate
{
dg.ItemsSource = ltsData.GroupBy(p => p.Title).ToList();
btnSearch.IsEnabled = true;
};
worker.RunWorkerAsync();
};
Dispatcher.Invoke(DispatcherPriority.Background, workAction);
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
private void DG_Hyperlink_Click(object sender, RoutedEventArgs e)
{
Hyperlink link = (Hyperlink)e.OriginalSource;
Process.Start(link.NavigateUri.ToString());
}
private void CheckBox_Checked(object sender, RoutedEventArgs e)
{
}
List<Data> readhtml(string urlAddress, string search, string rawurl)
{
List<Data> _listdata = new List<Data>();
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(urlAddress);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode == HttpStatusCode.OK)
{
Stream receiveStream = response.GetResponseStream();
StreamReader readStream = null;
if (response.CharacterSet == null)
{
readStream = new StreamReader(receiveStream);
}
else
{
readStream = new StreamReader(receiveStream, Encoding.GetEncoding(response.CharacterSet));
}
string data = readStream.ReadToEnd();
rawurl = rawurl + "/search/";
_listdata = FindRow(rawurl, data);
response.Close();
readStream.Close();
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
return _listdata;
}
static List<Data> FindRow(string url, string input)
{
List<Data> _data = new List<Data>();
try
{
string pattern = "<p class=.*?>(.*?)<\\/p>";
MatchCollection matches = Regex.Matches(input, pattern);
Console.WriteLine("Matches found: {0}", matches.Count);
if (matches.Count > 0)
foreach (Match m in matches)
{
string datapidpattern = "<a href=.*?>(.*?)<\\/a>";
MatchCollection matchesinner = Regex.Matches(m.ToString(), datapidpattern);
string posteddays = "<time datetime=.*?>(.*?)</time>";
DateTime posteddt = DateTime.Now.AddYears(-100);
MatchCollection matchesdays = Regex.Matches(m.ToString(), posteddays);
try
{
foreach (Match match in matchesdays)
{
Console.WriteLine("Found '{0}' at position {1}",
match.Value, match.Index);
string removestr = "time datetime=";
string posteddatetme = match.ToString().Remove(0, removestr.Length + 2);
posteddt = Convert.ToDateTime(posteddatetme.Substring(0, 16));
}
}
catch (Exception ex)
{
}
if (matchesinner.Count > 0)
{
foreach (Match mi in matchesinner)
{
if (mi.ToString().Contains("hdrlnk"))
{
string title = mi.Value.Split(new char[] { '>', '<' })[2];
string pid = mi.Value.Split(new char[] { '<', ' ' })[2].Remove(0, 6);
pid = pid.Remove(pid.Length - 2, 2);
//if (mi.Value.Split(new char[] { "<a href=\", 'html\' })[4].ToString() == "ca") pid = mi.Value.Split(new char[] { '/', '.' })[7];
pid = pid.Replace(".htm", ".html");
string tempurl = url;
if (!pid.Contains("http://"))
{
tempurl = tempurl.Remove(url.Length - 5, 5);
pid = tempurl + pid;
}
// pid = url + pid + ".html";
_data.Add(new Data() { Title = title.ToString(), Link = pid.Replace("//", "/").Replace("http:/", "http://"), PostedDate = posteddt });
}
}
}
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
return _data;
}
private void btnResume_Click(object sender, RoutedEventArgs e)
{
try
{
new ResumeWindow().Show();
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
private void OnKeyDownHandler(object sender, KeyEventArgs e)
{
try
{
if (e.Key == Key.Return)
{
dg.ItemsSource = ltsData.Where(p=>p.Title.Contains(txtSearch.Text)).GroupBy(p => p.Title).ToList();
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
}
}
Supporting class
public class Data
{
public string Title { get; set; }
public string Link { get; set; }
public string Description { get; set; }
public string Type { get; set; }
public DateTime PostedDate { get; set; }
}
public class sites
{
public string link { get; set; }
}
public class Cities
{
public string Url { get; set; }
}
public class DataPid
{
public string Pid;
public string title;
}