My Sites


Sunday, June 11, 2017

Text Pre-processing with Python Natural Language Toolkit (NLTK)

Text Preprocessing steps
  1. Tokenization
  2. Stemming and Lemmatization
  3. Stop Word Removal
  4. POS-tagging or Part-of-Speech tagging (https://nlp.stanford.edu/software/tagger.shtml)
Play Session
python
>>> import nltk
>>> nltk.download('all')

Reference: http://www.nltk.org/

#!/usr/bin/python
# -*- coding: utf-8 -*-
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer
import json





How to simply host a Python RESTful web service on Amazon EC2 instance? (Ubuntu)

Login to your AWS Console.


Create and Launch a EC2 instance.

Select the EC2 instance type.

Create an SSH key pair to SSH login to the given instance.

Login to the EC2 instance and Install Python in AWS EC2 instance.

python --version
sudo apt-get install python3
python -m pip install pymongo

Python Flask is a microframework used to create simple RESTful web services.
python -m pip install Flask
Reference: http://flask.pocoo.org/

Consider the app_controller.py as the main python file. (Initiator)

app_controller.py

#!flask/bin/python
# -*- coding: utf-8 -*-
from __future__ import division
from pymongo import MongoClient
from flask import Flask, jsonify
from flask import request
from my_first_class import MyFirstClass
from flask_api import FlaskAPI, status, exceptions
import threading
import json

app = Flask(__name__)

# Basic GET route
@app.route('/status')
def check_status():
    return "OK"

# Basic POST route
@app.route('/profile', methods=['POST'])
def create_user():
    print request.form.get('fname')
    print request.form.get('lname')
    # do processing. return data
    return json.dumps(data), status.HTTP_200_OK

if __name__ == '__main__':
    app.run(host='0.0.0.0', threaded=True, use_reloader=True)
    # [dev localhost] app.run(threaded=True, use_reloader=True)

my_first_class.py

#!flask/bin/python
# -*- coding: utf-8 -*-
import json
from pymongo import MongoClient

class MyFirstClass:

    def __init__(self):
        self.client = MongoClient('mongodb://localhost:27017/')
        self.icps_db = self.client['database']

    def create_user(self):

        documents = self.database[data_collection].find({}, no_cursor_timeout=True)

        if documents is not None:

            for idx, document in enumerate(documents):

                raw_title = document['title'].encode('utf8')
                
                if "USER1" in str(raw_title).lower() or "USER2" in str(raw_title).lower():
                    name = "MANUAL_USER"

                data_record = {
                    "title": raw_title,
                    "name": name                  
                    #"words": list(word_list)
                }
                record_id = self.database[data_collection].insert(data_record)
                print("Record created. ", record_id, "  ", idx)
                self.client.close()

Run the python web service as a nohup service
nohup python app_controller.py & 

Stop the service
ps -ef | grep app_controller.py
kill -9 <pid>

Simple Must know SQL and NOSQL hacks

PostgreSQL Hacks
Install

sudo apt-get update
sudo apt-get install postgresql postgresql-contrib
sudo -i -u postgres
psql
Exit out of the PostgreSQL prompt by typing: \q
createdb test1
 \connect test1
CREATE TABLE table_name (
    column_name1 col_type (field_length) column_constraints,
    column_name2 col_type (field_length),
    column_name3 col_type (field_length)
);

MySQL Hacks
mysql -u root -p myDatabase
show databases;
use myDatabase;
show tables;
select * from table;

Take MySQL dump
mysqldump -u [uname] -p db_name > db_backup.sql

restore data dump
mysql -u root -p devengoDev < db_backup.sql

Reset MySQL Password
https://help.ubuntu.com/community/MysqlPasswordReset

sudo /etc/init.d/mysql stop
sudo /usr/sbin/mysqld --skip-grant-tables --skip-networking &
mysql -u root
FLUSH PRIVILEGES;
SET PASSWORD FOR root@'localhost' = PASSWORD('password');
UPDATE mysql.user SET Password=PASSWORD('newpwd') WHERE User='root';
FLUSH PRIVILEGES;
sudo /etc/init.d/mysql stop
sudo /etc/init.d/mysql start

MongoDB Hacks
Setup
https://docs.mongodb.com/v3.0/tutorial/install-mongodb-on-ubuntu/

sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv EA312927
echo "deb http://repo.mongodb.org/apt/ubuntu trusty/mongodb-org/3.2 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.2.list
sudo apt-get update
sudo apt-get install -y mongodb-org
sudo service mongod start
sudo service mongod stop

mongodump -d myDatabase -o ~/backups/first_backup
mongorestore -d myDatabase ~/backups/first_backup

use mydb
show dbs
db.dropDatabase()
db.createCollection("mycollection")
show collections
db.createCollection("mycol", { capped : true, autoIndexId : true, size : 
   6142800, max : 10000 } )
db.mycol.insert({
   title: 'MongoDB Overview', 
   description: 'MongoDB is no sql database',
   by: 'tutorials point',
   url: 'http://www.tutorialspoint.com',
   tags: ['mongodb', 'database', 'NoSQL'],
   likes: 100
})
db.movie.insert({"name":"tutorials point"})
db.COLLECTION_NAME.drop()
db.mycol.find().pretty()
db.mycol.find(
   {
      $and: [
         {key1: value1}, {key2:value2}
      ]
   }
).pretty()
db.mycol.find(
   {
      $or: [
         {key1: value1}, {key2:value2}
      ]
   }
).pretty()
db.mycol.find({"likes": {$gt:10}, $or: [{"by": "tutorials point"},
   {"title": "MongoDB Overview"}]}).pretty()
{
   "_id": ObjectId(7df78ad8902c),
   "title": "MongoDB Overview", 
   "description": "MongoDB is no sql database",
   "by": "tutorials point",
   "url": "http://www.tutorialspoint.com",
   "tags": ["mongodb", "database", "NoSQL"],
   "likes": "100"
}
db.mycol.update({'title':'MongoDB Overview'},{$set:{'title':'New MongoDB Tutorial'}})
db.mycol.update({'title':'MongoDB Overview'},{$set:{'title':'New MongoDB Tutorial'}},{multi:true})
db.mycol.remove({'title':'MongoDB Overview'})
db.mycol.find({},{"title":1,_id:0}).limit(2)
db.mycol.find({},{"title":1,_id:0}).sort({"title":-1})
db.mycol.ensureIndex({"title":1})

db.mycol.aggregate([{$group : {_id : "$by_user", num_tutorial : {$sum : 1}}}])
{
   "result" : [
      {
         "_id" : "tutorials point",
         "num_tutorial" : 2
      },
      {
         "_id" : "Neo4j",
         "num_tutorial" : 1
      }
   ],
   "ok" : 1
}

Mongo Clustering

Mongo Sharding