C#编程之.net开发的当当网信息查询工具
白羽 2018-07-10 来源 :网络 阅读 1104 评论 0

摘要:本文将带你了解C#编程之.net开发的当当网信息查询工具,希望本文对大家学C#/.Net有所帮助。


学生时代的小玩具

用C#.net开发的一个用来抓取当当网计算机类图书信息的工具

 

Program.cs

using System;using System.Collections.Generic;using System.Linq;using System.Windows.Forms;namespace spider

{

    static class Program

    {

        /// <summary>

        /// 应用程序的主入口点。

        /// </summary>

        [STAThread]

        static void Main()

        {

            Application.EnableVisualStyles();

            Application.SetCompatibleTextRenderingDefault(false);

            Application.Run(new Form1());

        }

    }

}


Form1.cs

using System;using System.Collections.Generic;using System.ComponentModel;using System.Data;using System.Drawing;using System.Linq;using System.Text;using System.Windows.Forms;using System.IO;namespace spider

{

    public partial class Form1 : Form

    {

        private string url = @"//category.dangdang.com/all/?category_path=01.54.26.00.00.00&page_index=";

        private static int page = 1;

        private Parse p;

        public Form1()

        {

            InitializeComponent();

        }

        private void buttonstart_Click(object sender, EventArgs e)

        {

            page = 1;

            Execute();

        }

        private void buttonprev_Click(object sender, EventArgs e)

        {

            page--;

            Execute();

        }

        private void buttonnext_Click(object sender, EventArgs e)

        {

            page++;

            Execute();

        }

        private void buttonjump_Click(object sender, EventArgs e)

        {

            page = int.Parse(textBox2.Text);

            Execute();

        }

        private void Execute()

        {

            webBrowser1.Navigate(url + page.ToString());

            textBox1.Text = url + page.ToString();

            Cursor.Current = Cursors.WaitCursor;

        }

        private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)

        {

            HtmlDocument doc = webBrowser1.Document;

            p = new Parse(doc);

            DataTable dt = p.dt;

            dataGridView1.DataSource = dt;

            dataGridView1.Columns[0].Width = 150;

            dataGridView1.Columns[1].Width = 150;

            dataGridView1.Columns[2].Width = 150;

            dataGridView1.Columns[3].Width = 80;

            dataGridView1.Columns[4].Width = 450;

            Cursor.Current = Cursors.Default;

            MessageBox.Show("解析完成");

        }

        private void buttonsave_Click(object sender, EventArgs e)

        {

            SaveFileDialog sfd = new SaveFileDialog();

            sfd.DefaultExt = "txt";

            if (sfd.ShowDialog() == DialogResult.OK)

            {

                string path = sfd.FileName;

                StringBuilder sb = new StringBuilder();

                List<Book> list = p.list;

                foreach (Book book in list)

                {

                    sb.Append(book.ToString());

                }

                string text = sb.ToString();

                File.AppendAllText(path, text, Encoding.Default);

                MessageBox.Show("保存成功\n" + path);

            }

        }

    }

}

 

Book.cs

using System;using System.Collections.Generic;using System.Linq;using System.Text;namespace spider

{

    class Book

    {

        public string name { get; set; }

        public string author { get; set; }

        public string pub { get; set; }

        public string time { get; set; }

        public string describ { get; set; }

        public Book()

        {

        }

        public Book(string name, string author, string pub, string time, string describ)

        {

            this.name = name;

            this.author = author;

            this.pub = pub;

            this.time = time;

            this.describ = describ;

        }

        public override string ToString()

        {

            return "书名:" + name + "\r\n"

                + "作者:" + author + "\r\n"

                + "出版商:" + pub + "\r\n"

                + "出版时间:" + time + "\r\n"

                + "描述:" + describ + "\r\n\r\n";

        }

    }

}


Parse.cs

using System;using System.Collections.Generic;using System.Linq;using System.Text;using System.Windows.Forms;using System.Data;using System.Text.RegularExpressions;namespace spider

{

    class Parse

    {

        private HtmlDocument dom;

        public DataTable dt { get; set; }

        public List<Book> list { get; set; }

        public Parse(HtmlDocument dom)

        {

            this.dom = dom;

            dt = new DataTable();

            list = new List<Book>();

            dt.Columns.Add("书名");

            dt.Columns.Add("作者");

            dt.Columns.Add("出版社");

            dt.Columns.Add("出版时间");

            dt.Columns.Add("描述");

            Execute();

        }

        public void Execute()

        {

            HtmlElementCollection els = dom.GetElementsByTagName("div");

            foreach (HtmlElement el in els)

            {

                if (el.GetAttribute("classname") == "listitem detail")//图书信息

                {

                    Book book = new Book();

                    HtmlElementCollection els2 = el.GetElementsByTagName("li");

                    foreach (HtmlElement el2 in els2)

                    {

                        if (el2.GetAttribute("classname") == "maintitle")//书名

                        {

                            book.name = el2.OuterText;

                        }

                        if (el2.GetAttribute("classname") == "publisher_info")

                        {

                            HtmlElementCollection els3 = el2.GetElementsByTagName("a");

                            StringBuilder sb = new StringBuilder();

                            foreach (HtmlElement el3 in els3)

                            {

                                if (el3.GetAttribute("name") == "Author")//作者

                                {

                                    if (sb.Length==0)

                                    {

                                        sb.Append(el3.OuterText);

                                    }

                                    else

                                    {

                                        sb.Append("," + el3.OuterText);

                                    }

                                }

                                if (el3.GetAttribute("name") == "Pub")//出版商

                                {

                                    book.pub = el3.OuterText;

                                }

                            }

                            book.author = sb.ToString();

                            Regex r = new Regex(@"(\d{4})\-(\d{2})\-(\d{2})");

                            Match m = r.Match(el2.OuterText);

                            if (m.Success)//出版时间

                            {

                                book.time = m.Value;

                            }

                        }

                        if (el2.GetAttribute("classname") == "describ")//描述

                        {

                            book.describ = el2.OuterText;

                        }

                    }

                    DataRow dr = dt.NewRow();

                    dr["书名"] = book.name;

                    dr["作者"] = book.author;

                    dr["出版社"] = book.pub;

                    dr["出版时间"] = book.time;

                    dr["描述"] = book.describ;

                    dt.Rows.Add(dr);

                    list.Add(book);

                }

            }

        }

    }

}

 

 


以上就介绍了C#.NET的相关知识,希望对C#.NET有兴趣的朋友有所帮助。了解更多内容,请关注职坐标编程语言C#.NET频道!


本文由 @白羽 发布于职坐标。未经许可,禁止转载。
喜欢 | 0 不喜欢 | 0
看完这篇文章有何感觉?已经有0人表态,0%的人喜欢 快给朋友分享吧~
评论(0)
后参与评论

您输入的评论内容中包含违禁敏感词

我知道了

助您圆梦职场 匹配合适岗位
验证码手机号,获得海同独家IT培训资料
选择就业方向:
人工智能物联网
大数据开发/分析
人工智能Python
Java全栈开发
WEB前端+H5

请输入正确的手机号码

请输入正确的验证码

获取验证码

您今天的短信下发次数太多了,明天再试试吧!

提交

我们会在第一时间安排职业规划师联系您!

您也可以联系我们的职业规划师咨询:

小职老师的微信号:z_zhizuobiao
小职老师的微信号:z_zhizuobiao

版权所有 职坐标-一站式IT培训就业服务领导者 沪ICP备13042190号-4
上海海同信息科技有限公司 Copyright ©2015 www.zhizuobiao.com,All Rights Reserved.
 沪公网安备 31011502005948号    

©2015 www.zhizuobiao.com All Rights Reserved

208小时内训课程