✅ Lập trình website dùng Laravel bằng ngôn ngữ gì?

Bạn có thể sử dụng ngôn ngữ PHP trong Laravel Framework đễ xây dựng website

✅ Sử dụng Vue.js trong Laravel Framework như thế nào?

Bạn có thể tích hợp giữa Laravel và Vue.js để phát triển website, Laravel 5.7 + Vue SPA

✅ Kết hợp Laravel Framework và Angular như thế nào?

Bạn có thể kết hợp giữa Laravel và Angular xây dựng website SPA

✅ Làm sao để xây dựng website Single Page Application?

Có thể kết hợp Laravel + Vue.js hoặc Laravel + Angular, Node.js + Vue.js , Node.js + Angular

✅ Xây dựng website ASP.NET MVC với Angular Framework được không?

Bạn hoàn toàn có thể kết hợp hai ngôn ngữ này Build Angular + ASP.NET MVC 5

✅ Lập trình website bằng ASP.NET Core như thế nào?

Bạn có thể lập trình website bằng ASP.NET Core

Crawl Data Website Using NodeJS

Hôm này mình chia sẻ với mọi người cách crawl dữ liệu từ website về bằng ngôn ngữ NodeJS, có rất là nhiều cách dùng, chẳng hạn có người thì dùng php,c#,java,...Nhưng trong hôm này mình sẽ làm demo về cách lấy dữ liệu từ website về bằng Nodejs xem sao nhé

Đầu tiên các bạn cài các thư viện để hộ trợ crawl, okay hãy mở CMD lên chạy lệnh sau


npm install request-promise cheerio request fs

+ request-promise : dùng thư viện này giúp ta gọi tới một URL nào đó, để có thể nhận dữ liệu trả về
+ cheerio : dùng để DOM phần tử HTML, ta có thể dùng jquery trỏ tới từng (id, class) để lấy giá trị
+ fs : dùng để đọc một file

Okay, sau khi tải các thư viện cần thiết rồi, ta tiến hành cài đặt project như sau
+ Đầu tiền mình tạo file dslink.txt, đựng các URL mà ta muốn chạy lấy dữ liệu về máy tính
+ Tạo thư mục images trong project, dùng save hình ảnh download về máy tính
+ Tạo file data.json để lưu các thông tin data

Tạo file index.js như sau :


const rp = require("request-promise");
const cheerio = require("cheerio");
const request = require('request');
const fs = require("fs");

function sleep(ms) {
    return new Promise(resolve => setTimeout(resolve, ms));
}

//file chứa ds link
const dslink = "dslink.txt";

//lưu danh sách link thành mảng
var arrayLink = fs.readFileSync(dslink).toString().split("\n");

async function crawler() {
    await sleep(1000);
    for (i in arrayLink) {
        const linkchay = arrayLink[i];

        try {
            const options = {
                uri: linkchay,
                transform: function (body) {
                    //Khi lấy dữ liệu từ trang thành công nó sẽ tự động parse DOM
                    return cheerio.load(body);
                },
            };
            var $ = await rp(options);
        } catch (error) {
            console.log("Link dang dung:" + arrayLink[i]);
            return error;
        }

        /* Lấy tên bài viết*/
        const title = $(".ten_title").text().trim();
        //const description = $(".entry-content > p").text().trim();

        //kiếm class hoặc id cha chứa nội dung website
        const tableContent = $(".info_content");
        let data = [];
        // Tên của chương đó.
        let chaperTitle = tableContent.find("p").text().trim();


        //Tìm hình ảnh trong bài viết
        let namefile = "";
        let chaperData = []
        const chaperLink = tableContent.find("p").find("img");
     
        for (let j = 0; j < chaperLink.length; j++) {
            const post = $(chaperLink[j]);
            const postLink = post.attr("src");
            //lấy vị trí thứ tự để chúng ta biết mà cắt lấy name của hình ảnh
            const n = postLink.lastIndexOf("/");
            //lấy name hình ảnh
            const filename = postLink.substring(n + 1, postLink.length); 
            namefile = filename;
            //tiến hành chèn url hình và name hình vào hàm download 
            download(postLink, filename, function () {
                //console.log("Link:"+linkchay);
            });
            const postTitle = post.text().trim();
            chaperData.push({
                postTitle,
                linkchay,
                filename,
            });
        }
        data.push({
            chaperTitle,
            chaperData,

        });


        // Lưu dữ liệu về máy
        fs.writeFileSync('data.json', JSON.stringify(data))
        console.log(linkchay + "------------->done");
      
        await sleep(1000);
    }


};
//call crawler
crawler();

//call download file
var download = function (uri, filename, callback) {
    request.head(uri, function (err, res, body) {
        console.log('content-type:', res.headers['content-type']);
        console.log('content-length:', res.headers['content-length']);

        request(uri).pipe(fs.createWriteStream('./images/' + filename)).on('close', callback);
    });
};

Bạn nhìn đoạn code bên trên mình tạo function sleep để delay nó một khoảng thời gian khi request URL


function sleep(ms) {
    return new Promise(resolve => setTimeout(resolve, ms));
}

Đọc danh sách file dslink.txt


//dslink.txt
//https://100daysofcode.hoanguyenit.com/create-project-laravel-58-using-composer.html

const dslink = "dslink.txt";
var arrayLink = fs.readFileSync(dslink).toString().split("\n");

Request URL để nhận dư liệu trả về, sau đó gọi cheerio.load(body) , để hồi ta có thể dùng jquery trỏ tới (id, class) để lấy dữ liệu


  const linkchay = arrayLink[i];
        try {
            const options = {
                uri: linkchay,
                transform: function (body) {
                    //Khi lấy dữ liệu từ trang thành công nó sẽ tự động parse DOM
                    return cheerio.load(body);
                },
            };
            var $ = await rp(options);
        } catch (error) {
            console.log("Link dang dung:" + arrayLink[i]);
            return error;
        }

Chúng ta cần lấy đúng tên của hình ảnh khi download xuống


 const postLink = post.attr("src");
            //lấy vị trí thứ tự để chúng ta biết mà cắt lấy name của hình ảnh
            const n = postLink.lastIndexOf("/");
            //lấy name hình ảnh
            const filename = postLink.substring(n + 1, postLink.length);

Gọi hàm download để tải hình ảnh về , save vào folder images


download(postLink, filename, function () {
   //console.log("Link:"+linkchay);
});
var download = function (uri, filename, callback) {
    request.head(uri, function (err, res, body) {
        console.log('content-type:', res.headers['content-type']);
        console.log('content-length:', res.headers['content-length']);

        request(uri).pipe(fs.createWriteStream('./images/' + filename)).on('close', callback);
    });
};

Okay vậy là xong, các bạn có thể tìm hiểu thêm nửa nhé!

Nếu bạn thấy đem lại kiến thức bổ ích, hãy Click xem quảng cáo trên trang website của mình nhé! 🚀