Skip to content

added code for preprocessing_data #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 40 additions & 1 deletion twitter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
#import numpy as np
import re
import nltk
import pandas as pd
from textblob import TextBlob
from nltk import word_tokenize,sent_tokenize
from typing import Counter, List
from nltk.corpus import stopwords
from nltk.util import filestring, pr
"""
def processTweet(self,tweet):

Expand All @@ -20,6 +26,39 @@ def processTweet(self,tweet):

dataset=pd.read_csv('twitter.csv' , engine='python')

#if @dataset is only of text and with no additional data

"""
and if data set consists of more columns
tweet_id,sentiment,content
1956967341,empty,@tiffanylue i know i was listenin to bad habit earlier and i started freakin at his part =[
1956967666,sadness,Layin n bed with a headache ughhhh...waitin on your call...
1956967696,sadness,Funeral ceremony...gloomy friday...
1956967789,enthusiasm,wants to hang out with friends SOON!
1956968416,neutral,"@dannycastillo We want to trade with someone who has Houston tickets, but no one will."
1956968477,worry,Re-pinging @ghostridah14: why didn't you go to prom? BC my bf didn't like my friends
1956968487,sadness,"I should be sleep, but im not! thinking about an old friend who I want. but he's married now. damn, & he wants me 2! scandalous!"
1956968636,worry,Hmmm. http://www.djhero.com/ is down
1956969035,sadness,@charviray Charlene my love. I miss you
1956969172,sadness,@kelcouch I'm sorry at least it's Friday?
"""
#then parse it by dataset>content

csv_text=[]
for i in dataset:#dataset['content']
csv_text.append(i)

csv_words=[]
countsm=1
for i in csv_text:
i = re.sub('[^a-zA-Z]',' ', i)
i=i.replace(' ','')
i=i.split()
for f in i:
if not f in set(stopwords.words('english')):
csv_words.append(f)

print(csv_words)

x=dataset.iloc[:,5]
#df=pd.DataFrame(columns=[6])
Expand Down Expand Up @@ -58,4 +97,4 @@ def processTweet(self,tweet):
print(neg*100/y)

print("percentage of neutral tweets")
print(neu*100/y)
print(neu*100/y)