Kouhei Sutou
null+****@clear*****
Sun Feb 19 23:40:10 JST 2017
Kouhei Sutou 2017-02-19 23:40:10 +0900 (Sun, 19 Feb 2017) New Revision: 701b5e5463e5d0945b36383658ac649b761be074 https://github.com/ranguba/ranguba-server/commit/701b5e5463e5d0945b36383658ac649b761be074 Message: Support scraping Added files: app/assets/javascripts/scrapings.coffee app/assets/stylesheets/scrapings.scss app/controllers/application_api_controller.rb app/controllers/scrapings_controller.rb app/helpers/scrapings_helper.rb app/models/scraping.rb bin/ranguba config/initializers/chupa_text.rb test/controllers/scrapings_controller_test.rb test/factories/scrapings.rb test/models/scraping_test.rb Modified files: Gemfile Gemfile.lock app/views/entries/_entry.json.jbuilder config/routes.rb Modified: Gemfile (+3 -0) =================================================================== --- Gemfile 2017-02-19 22:18:11 +0900 (88f3b53) +++ Gemfile 2017-02-19 23:40:10 +0900 (90b4cb7) @@ -51,3 +51,6 @@ end gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby] gem 'groonga-client-model' +gem 'chupa-text', path: '../chupa-text' +gem 'chupa-text-decomposer-html', path: '../chupa-text-decomposer-html' +gem 'chupa-text-decomposer-mail', path: '../chupa-text-decomposer-mail' Modified: Gemfile.lock (+22 -0) =================================================================== --- Gemfile.lock 2017-02-19 22:18:11 +0900 (966f6e5) +++ Gemfile.lock 2017-02-19 23:40:10 +0900 (1b7f6d3) @@ -1,3 +1,22 @@ +PATH + remote: ../chupa-text-decomposer-html + specs: + chupa-text-decomposer-html (1.0.2) + chupa-text + nokogiri + +PATH + remote: ../chupa-text-decomposer-mail + specs: + chupa-text-decomposer-mail (1.0.0) + chupa-text + mail + +PATH + remote: ../chupa-text + specs: + chupa-text (1.0.5) + GEM remote: https://rubygems.org/ specs: @@ -209,6 +228,9 @@ PLATFORMS DEPENDENCIES byebug + chupa-text! + chupa-text-decomposer-html! + chupa-text-decomposer-mail! coffee-rails (~> 4.2) factory_girl_rails groonga-client-model Added: app/assets/javascripts/scrapings.coffee (+3 -0) 100644 =================================================================== --- /dev/null +++ app/assets/javascripts/scrapings.coffee 2017-02-19 23:40:10 +0900 (24f83d1) @@ -0,0 +1,3 @@ +# Place all the behaviors and hooks related to the matching controller here. +# All this logic will automatically be available in application.js. +# You can use CoffeeScript in this file: http://coffeescript.org/ Added: app/assets/stylesheets/scrapings.scss (+3 -0) 100644 =================================================================== --- /dev/null +++ app/assets/stylesheets/scrapings.scss 2017-02-19 23:40:10 +0900 (ea8de25) @@ -0,0 +1,3 @@ +// Place all the styles related to the scrapings controller here. +// They will automatically be included in application.css. +// You can use Sass (SCSS) here: http://sass-lang.com/ Added: app/controllers/application_api_controller.rb (+3 -0) 100644 =================================================================== --- /dev/null +++ app/controllers/application_api_controller.rb 2017-02-19 23:40:10 +0900 (f8dedc1) @@ -0,0 +1,3 @@ +class ApplicationApiController < ActionController::API + include ActionController::MimeResponds +end Added: app/controllers/scrapings_controller.rb (+22 -0) 100644 =================================================================== --- /dev/null +++ app/controllers/scrapings_controller.rb 2017-02-19 23:40:10 +0900 (45f829c) @@ -0,0 +1,22 @@ +class ScrapingsController < ApplicationApiController + # POST /scraping.json + def create + @scraping = Scraping.new(scraping_params) + + respond_to do |format| + if****@scrap***** + @entry =****@scrap***** + format.html { redirect_to @entry, notice: 'Scraped successfully.' } + format.json { render "entries/show", status: :created, location: @entry } + else + format.html { render :new } + format.json { render json: @scraping.errors, status: :unprocessable_entity } + end + end + end + + private + def scraping_params + params.permit(:uri, :mime_type, :body) + end +end Added: app/helpers/scrapings_helper.rb (+2 -0) 100644 =================================================================== --- /dev/null +++ app/helpers/scrapings_helper.rb 2017-02-19 23:40:10 +0900 (c52d30c) @@ -0,0 +1,2 @@ +module ScrapingsHelper +end Added: app/models/scraping.rb (+27 -0) 100644 =================================================================== --- /dev/null +++ app/models/scraping.rb 2017-02-19 23:40:10 +0900 (2b2bd44) @@ -0,0 +1,27 @@ +class Scraping + include ActiveModel::Model + + attr_accessor :uri + attr_accessor :mime_type + attr_accessor :body + + attr_reader :entry + + def scrape + @entry = Entry.new + extractor = ChupaText::Extractor.new + extractor.apply_configuration(ChupaText::Configuration.default) + data = ChupaText::TextData.new(body) + data.uri = uri + data.mime_type = mime_type + texts = [] + extractor.extract(data) do |extracted_data| + texts << extracted_data.body + end + @entry._key = data.uri.to_s + @entry.mime_type = data.mime_type + @entry.body = texts.join("\n") + @entry.size =****@entry***** + @entry.save + end +end Modified: app/views/entries/_entry.json.jbuilder (+2 -2) =================================================================== --- app/views/entries/_entry.json.jbuilder 2017-02-19 22:18:11 +0900 (f60ac0a) +++ app/views/entries/_entry.json.jbuilder 2017-02-19 23:40:10 +0900 (8bc949f) @@ -1,2 +1,2 @@ -json.extract! entry, :id, :_key, :title, :body, :size, :created_at, :updated_at -json.url entry_url(entry, format: :json) \ No newline at end of file +json.extract! entry, :id, :_key, :title, :body, :size#, :created_at, :updated_at +json.url entry_url(entry, format: :json) Added: bin/ranguba (+27 -0) 100644 =================================================================== --- /dev/null +++ bin/ranguba 2017-02-19 23:40:10 +0900 (ffdf001) @@ -0,0 +1,27 @@ +# -*- ruby -*- + +require "net/http" +require "uri" +require "pathname" +require "json" +require "pp" + +def path_to_uri(path) + components = path.expand_path.to_s.split(Pathname::SEPARATOR_PAT) + escaped_components = components.collect do |component| + CGI.escape(component) + end + "file://" + File.join(*escaped_components) +end + +api_uri = URI("http://localhost:3000/scraping.json") +ARGV.each do |path| + response = Net::HTTP.post_form(api_uri, + { + "uri" => path_to_uri(Pathname(path)), + "mime_type" => "message/rfc822", + "body" => File.read(path), + }) + p response + pp JSON.parse(response.body) +end Added: config/initializers/chupa_text.rb (+1 -0) 100644 =================================================================== --- /dev/null +++ config/initializers/chupa_text.rb 2017-02-19 23:40:10 +0900 (b949701) @@ -0,0 +1 @@ +ChupaText::Decomposers.load Modified: config/routes.rb (+1 -0) =================================================================== --- config/routes.rb 2017-02-19 22:18:11 +0900 (b69de7e) +++ config/routes.rb 2017-02-19 23:40:10 +0900 (ffa876a) @@ -1,4 +1,5 @@ Rails.application.routes.draw do + resource :scraping, only: ["create"] resources :entries # For details on the DSL available within this file, see http://guides.rubyonrails.org/routing.html end Added: test/controllers/scrapings_controller_test.rb (+7 -0) 100644 =================================================================== --- /dev/null +++ test/controllers/scrapings_controller_test.rb 2017-02-19 23:40:10 +0900 (cdb6793) @@ -0,0 +1,7 @@ +require 'test_helper' + +class ScrapingsControllerTest < ActionDispatch::IntegrationTest + # test "the truth" do + # assert true + # end +end Added: test/factories/scrapings.rb (+7 -0) 100644 =================================================================== --- /dev/null +++ test/factories/scrapings.rb 2017-02-19 23:40:10 +0900 (b4c5d9f) @@ -0,0 +1,7 @@ +FactoryGirl.define do + factory :scraping do + uri "MyString" + mime_type "MyString" + body "MyText" + end +end Added: test/models/scraping_test.rb (+7 -0) 100644 =================================================================== --- /dev/null +++ test/models/scraping_test.rb 2017-02-19 23:40:10 +0900 (dfe068f) @@ -0,0 +1,7 @@ +require 'test_helper' + +class ScrapingTest < ActiveSupport::TestCase + # test "the truth" do + # assert true + # end +end -------------- next part -------------- HTML����������������������������...Télécharger