using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace ConsoleApp1
{
class Program
{
class Corection
{
private string category;
private string title;
public string Category
{
set { category = value; }
get { return category; }
}
}
static void Main(string[] args)
{
WebClient wc = new WebClient()
{ Encoding = Encoding.GetEncoding("UTF-8") };
Dictionary<String, String> dic = new Dictionary<String, String>();
for (int i = 1; i < 5; i++)
{
string html = wc.DownloadString("url");
wc.Dispose();
Regex reg = new Regex("<a href=\"(?<category>.*?)(?<title>.*?)</a>",
string category = "";
string title = "[]";
Match m = reg.Match(html);
m = m.NextMatch();
string c = m.Groups["category"].Value;
string t = m.Groups["title"].Value;
}
dic.Add("category", "title");
//Console.WriteLine();
}
}
}
1using System;2using System.Collections.Generic;3using System.Linq;4using System.Net;5using System.Text.RegularExpressions;6using System.Threading;789namespace Crawlertest1
10{11 class Program
12{13 class Question
14{15 public string Category { get; set;}16 public string Title { get; set;}17 public int Number { get; set;}18}1920staticvoidMain(string[] args)21{22 WebClient wc = new WebClient();23 wc.Encoding = System.Text.Encoding.UTF8;2425 List<Question> list = new List<Question>();2627for(int i =0; i <40; i++)28{29 string data = wc.DownloadString($"https://teratail.com/questions/{79594 - i}");3031 var r = new Regex(@"<title>(?<category>.*?) - (?<title>.*?)\((?<num>\d+?)\)|teratail</title>");32 var m = r.Match(data);3334 var tmp = new Question
35{36 Category = m.Groups["category"].Value,37 Title = m.Groups["title"].Value,38 Number =int.Parse(m.Groups["num"].Value)39};4041 list.Add(tmp);4243 Thread.Sleep(4000);44}4546 var questionSort = list.OrderBy(o => o.Category).ThenBy(o => o.Number);4748foreach(var q in questionSort)49{50 Console.WriteLine($"{q.Category}:{q.Title}:{q.Number}");51}5253 Console.ReadKey();54}55}56}57
1using System;2using System.Collections.Generic;3using System.Linq;4using System.Text.RegularExpressions;5using System.Net;6using System.Threading;7using Newtonsoft.Json;8910namespace Crawltest2
11{12 class Program
13{14 class Data
15{16 public string Category { get; set;}17 public string Title { get; set;}18 public string Date { get; set;}19 public string Text { get; set;}20}2122static List<string>Get_Data(string source, Regex re, string group)23{24 List<string> tmp = new List<string>();2526 Match get_data = re.Match(source);2728while(get_data.Success)29{30 tmp.Add(get_data.Groups[group].Value);31 get_data = get_data.NextMatch();32}3334return tmp;35}3637staticvoidMain(string[] args)38{39 WebClient wc = new WebClient();40 wc.Encoding = System.Text.Encoding.UTF8;4142 List<Data> for_show = new List<Data>();4344for(int i =1; i <6; i++)45{46 string url = xxxxxxx{i};4748 string html = wc.DownloadString(url);4950 List<string> tmp_categorys =Get_Data(html, new Regex(@"<li class=""category"" style="".*?"">(?<category>.*?)</li>"),"category");51 List<string> tmp_titles =Get_Data(html, new Regex(@"<li class=""title""><a href="".*?"">(?<title>.*?)</a></li>"),"title");52 List<string> tmp_date =Get_Data(html, new Regex(@"<li class=""date"">(?<date>.*?)</li>"),"date");53 List<string> tmp_text =Get_Data(html, new Regex(@"<li class=""description(|Single)"">(?<text>.*?)<"),"text");5455 tmp_text.RemoveAt(0);5657for(int n =0; n < tmp_categorys.Count; n++)58{59 Data tmp_data = new Data
60{61 Category = tmp_categorys[n],62 Title = tmp_titles[n],63 Date = tmp_date[n],64 Text = tmp_text[n]65};6667 for_show.Add(tmp_data);68}6970// System.Threading.Thread.Sleep(4000);71}7273 var for_showSort = for_show.OrderBy(o => o.Category).ThenByDescending(o => o.Date);7475 string jsonstring = JsonConvert.SerializeObject(for_showSort, Formatting.Indented);7677 System.IO.StreamWriter sw = new System.IO.StreamWriter("test.txt", false, System.Text.Encoding.GetEncoding("shift_jis"));7879 sw.WriteLine(jsonstring);8081 sw.Close();82}83}84}
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。
退会済みユーザー
2017/06/10 12:30
2017/06/10 19:57
退会済みユーザー
2017/06/11 07:44