In [19]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sqlite3
In [20]:
# =======================================================
# Laden der Daten aus SQLite-Datenbank
# -- Verbindung zur Datenbank und Abfrage der Reviews-Tabelle
# =======================================================
In [21]:
con = sqlite3.connect(r'C:\Users\Miso\Desktop\Data Analysis 3/database.sqlite')
In [22]:
type(con)
Out[22]:
sqlite3.Connection
In [23]:
df=pd.read_sql_query('SELECT * FROM Reviews', con)
In [24]:
df.head()
Out[24]:
Id ProductId UserId ProfileName HelpfulnessNumerator HelpfulnessDenominator Score Time Summary Text
0 1 B001E4KFG0 A3SGXH7AUHU8GW delmartian 1 1 5 1303862400 Good Quality Dog Food I have bought several of the Vitality canned d...
1 2 B00813GRG4 A1D87F6ZCVE5NK dll pa 0 0 1 1346976000 Not as Advertised Product arrived labeled as Jumbo Salted Peanut...
2 3 B000LQOCH0 ABXLMWJIXXAIN Natalia Corres "Natalia Corres" 1 1 4 1219017600 "Delight" says it all This is a confection that has been around a fe...
3 4 B000UA0QIQ A395BORC6FGVXV Karl 3 3 2 1307923200 Cough Medicine If you are looking for the secret ingredient i...
4 5 B006K2ZZ7K A1UQRSCLF8GW1T Michael D. Bigham "M. Wassir" 0 0 5 1350777600 Great taffy Great taffy at a great price. There was a wid...
In [25]:
df.shape
Out[25]:
(568454, 10)
In [26]:
pd.read_csv(r'C:\Users\Miso\Desktop\Data Analysis 3\Reviews.csv')
Out[26]:
Id ProductId UserId ProfileName HelpfulnessNumerator HelpfulnessDenominator Score Time Summary Text
0 1 B001E4KFG0 A3SGXH7AUHU8GW delmartian 1 1 5 1303862400 Good Quality Dog Food I have bought several of the Vitality canned d...
1 2 B00813GRG4 A1D87F6ZCVE5NK dll pa 0 0 1 1346976000 Not as Advertised Product arrived labeled as Jumbo Salted Peanut...
2 3 B000LQOCH0 ABXLMWJIXXAIN Natalia Corres "Natalia Corres" 1 1 4 1219017600 "Delight" says it all This is a confection that has been around a fe...
3 4 B000UA0QIQ A395BORC6FGVXV Karl 3 3 2 1307923200 Cough Medicine If you are looking for the secret ingredient i...
4 5 B006K2ZZ7K A1UQRSCLF8GW1T Michael D. Bigham "M. Wassir" 0 0 5 1350777600 Great taffy Great taffy at a great price. There was a wid...
... ... ... ... ... ... ... ... ... ... ...
568449 568450 B001EO7N10 A28KG5XORO54AY Lettie D. Carter 0 0 5 1299628800 Will not do without Great for sesame chicken..this is a good if no...
568450 568451 B003S1WTCU A3I8AFVPEE8KI5 R. Sawyer 0 0 2 1331251200 disappointed I'm disappointed with the flavor. The chocolat...
568451 568452 B004I613EE A121AA1GQV751Z pksd "pk_007" 2 2 5 1329782400 Perfect for our maltipoo These stars are small, so you can give 10-15 o...
568452 568453 B004I613EE A3IBEVCTXKNOH Kathy A. Welch "katwel" 1 1 5 1331596800 Favorite Training and reward treat These are the BEST treats for training and rew...
568453 568454 B001LR2CU2 A3LGQPJCZVL9UC srfell17 0 0 5 1338422400 Great Honey I am very satisfied ,product is as advertised,...

568454 rows × 10 columns

In [27]:
!pip install textblob
from textblob import TextBlob
Requirement already satisfied: textblob in c:\users\miso\anaconda3\lib\site-packages (0.19.0)
Requirement already satisfied: nltk>=3.9 in c:\users\miso\anaconda3\lib\site-packages (from textblob) (3.9.1)
Requirement already satisfied: click in c:\users\miso\anaconda3\lib\site-packages (from nltk>=3.9->textblob) (8.1.8)
Requirement already satisfied: joblib in c:\users\miso\anaconda3\lib\site-packages (from nltk>=3.9->textblob) (1.4.2)
Requirement already satisfied: regex>=2021.8.3 in c:\users\miso\anaconda3\lib\site-packages (from nltk>=3.9->textblob) (2024.11.6)
Requirement already satisfied: tqdm in c:\users\miso\anaconda3\lib\site-packages (from nltk>=3.9->textblob) (4.67.1)
Requirement already satisfied: colorama in c:\users\miso\anaconda3\lib\site-packages (from click->nltk>=3.9->textblob) (0.4.6)
In [28]:
text=df['Summary'][0]
text
Out[28]:
'Good Quality Dog Food'
In [29]:
TextBlob(text).sentiment.polarity
Out[29]:
0.7
In [30]:
# =======================================================
# Sentimentanalyse auf Zusammenfassungen (Summary)
# -- Berechnung der Polarität für jede Zusammenfassung
# =======================================================
In [31]:
polarity=[]
for i in df['Summary']:
    try:
        polarity.append(TextBlob(i).sentiment.polarity)
    except:
        polarity.append(0)
In [32]:
len(polarity)
Out[32]:
568454
In [33]:
# =======================================================
# Hinzufügen der Polarität zum DataFrame
# =======================================================
In [34]:
data=df.copy()
In [35]:
data['Polarity']=polarity
In [36]:
data.head()
Out[36]:
Id ProductId UserId ProfileName HelpfulnessNumerator HelpfulnessDenominator Score Time Summary Text Polarity
0 1 B001E4KFG0 A3SGXH7AUHU8GW delmartian 1 1 5 1303862400 Good Quality Dog Food I have bought several of the Vitality canned d... 0.7
1 2 B00813GRG4 A1D87F6ZCVE5NK dll pa 0 0 1 1346976000 Not as Advertised Product arrived labeled as Jumbo Salted Peanut... 0.0
2 3 B000LQOCH0 ABXLMWJIXXAIN Natalia Corres "Natalia Corres" 1 1 4 1219017600 "Delight" says it all This is a confection that has been around a fe... 0.0
3 4 B000UA0QIQ A395BORC6FGVXV Karl 3 3 2 1307923200 Cough Medicine If you are looking for the secret ingredient i... 0.0
4 5 B006K2ZZ7K A1UQRSCLF8GW1T Michael D. Bigham "M. Wassir" 0 0 5 1350777600 Great taffy Great taffy at a great price. There was a wid... 0.8
In [37]:
# =======================================================
# Positive Zusammenfassungen extrahieren
# =======================================================
In [38]:
data_positive=data[data['Polarity']>0]
In [39]:
data_positive.shape
Out[39]:
(331661, 11)
In [40]:
# =======================================================
# Wordcloud für positive Zusammenfassungen
# =======================================================
In [41]:
!pip install wordcloud
from wordcloud import WordCloud, STOPWORDS
Requirement already satisfied: wordcloud in c:\users\miso\anaconda3\lib\site-packages (1.9.4)
Requirement already satisfied: numpy>=1.6.1 in c:\users\miso\anaconda3\lib\site-packages (from wordcloud) (2.1.3)
Requirement already satisfied: pillow in c:\users\miso\anaconda3\lib\site-packages (from wordcloud) (11.1.0)
Requirement already satisfied: matplotlib in c:\users\miso\anaconda3\lib\site-packages (from wordcloud) (3.10.0)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\miso\anaconda3\lib\site-packages (from matplotlib->wordcloud) (1.3.1)
Requirement already satisfied: cycler>=0.10 in c:\users\miso\anaconda3\lib\site-packages (from matplotlib->wordcloud) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\miso\anaconda3\lib\site-packages (from matplotlib->wordcloud) (4.55.3)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\miso\anaconda3\lib\site-packages (from matplotlib->wordcloud) (1.4.8)
Requirement already satisfied: packaging>=20.0 in c:\users\miso\anaconda3\lib\site-packages (from matplotlib->wordcloud) (24.2)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\miso\anaconda3\lib\site-packages (from matplotlib->wordcloud) (3.2.0)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\miso\anaconda3\lib\site-packages (from matplotlib->wordcloud) (2.9.0.post0)
Requirement already satisfied: six>=1.5 in c:\users\miso\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib->wordcloud) (1.17.0)
In [42]:
stopwords=set(STOPWORDS)
In [43]:
data_positive.head()
Out[43]:
Id ProductId UserId ProfileName HelpfulnessNumerator HelpfulnessDenominator Score Time Summary Text Polarity
0 1 B001E4KFG0 A3SGXH7AUHU8GW delmartian 1 1 5 1303862400 Good Quality Dog Food I have bought several of the Vitality canned d... 0.700000
4 5 B006K2ZZ7K A1UQRSCLF8GW1T Michael D. Bigham "M. Wassir" 0 0 5 1350777600 Great taffy Great taffy at a great price. There was a wid... 0.800000
5 6 B006K2ZZ7K ADT0SRK1MGOEU Twoapennything 0 0 4 1342051200 Nice Taffy I got a wild hair for taffy and ordered this f... 0.600000
6 7 B006K2ZZ7K A1SP2KVKFXXRU1 David C. Sullivan 0 0 5 1340150400 Great! Just as good as the expensive brands! This saltwater taffy had great flavors and was... 0.358333
7 8 B006K2ZZ7K A3JRGQVEQN31IQ Pamela G. Williams 0 0 5 1336003200 Wonderful, tasty taffy This taffy is so good. It is very soft and ch... 1.000000
In [44]:
total_text=(' '.join(data_positive['Summary']))
In [45]:
len(total_text)
Out[45]:
8464853
In [46]:
total_text[0:1000]
Out[46]:
'Good Quality Dog Food Great taffy Nice Taffy Great!  Just as good as the expensive brands! Wonderful, tasty taffy Healthy Dog Food The Best Hot Sauce in the World My cats LOVE this "diet" food better than their regular food My Cats Are Not Fans of the New Food fresh and greasy! Love it! GREAT SWEET CANDY! Always fresh Delicious product! Great Bargain for the Price The Best Hot Sauce in the World Great machine! Best of the Instant Oatmeals Good Instant Great Irish oatmeal for those in a hurry! satisfying Love Gluten Free Oatmeal!!! GOOD WAY TO START THE DAY.... Wife\'s favorite Breakfast Why wouldn\'t you buy oatmeal from Mcanns? Tastes great! Good Hot Breakfast Great taste and convenience good Very good but next time I won\'t order the Variety Pack HOT!  And good!  Came back for more  :) Roasts up a smooth brew Our guests love it! Awesome Deal! Awsome - Kids in neighborhood loved us! great deal. Better price for this at Target great source of electrolytes Great for preventing cramps Taste'
In [47]:
import re
total_text=re.sub('[^a-zA-Z]',' ',total_text)
In [48]:
total_text[0:2000]
Out[48]:
'Good Quality Dog Food Great taffy Nice Taffy Great   Just as good as the expensive brands  Wonderful  tasty taffy Healthy Dog Food The Best Hot Sauce in the World My cats LOVE this  diet  food better than their regular food My Cats Are Not Fans of the New Food fresh and greasy  Love it  GREAT SWEET CANDY  Always fresh Delicious product  Great Bargain for the Price The Best Hot Sauce in the World Great machine  Best of the Instant Oatmeals Good Instant Great Irish oatmeal for those in a hurry  satisfying Love Gluten Free Oatmeal    GOOD WAY TO START THE DAY     Wife s favorite Breakfast Why wouldn t you buy oatmeal from Mcanns  Tastes great  Good Hot Breakfast Great taste and convenience good Very good but next time I won t order the Variety Pack HOT   And good   Came back for more     Roasts up a smooth brew Our guests love it  Awesome Deal  Awsome   Kids in neighborhood loved us  great deal  Better price for this at Target great source of electrolytes Great for preventing cramps Taste is not so good  How much would you pay for a bag of chocolate pretzels  Great Gummi  Best ever latice tart nothing special Good Taste great this is the best Delicious  Great Natural Balance Lamb and Rice Great food  Great for my dogs allergies Great for stomach problems  Better life for you dog  Great Food Great food for my my dog who has a sensitive stomach  Great dog food Mmmmm  Mmmmm good  Great Dog Food  Good healthy dog food Great dog food Great allergy sensitive dog food  dogs love it Perfect for our English Bulldog with Allergies Taste wise it is a   star item Great Support Loved these Tartlets The best My Idea of a Good Diet Food  Delicious tea the best tea ever    freah bright clean Wonderful Tea Great cookies Best everyday cookie  So Far So Good Best Cat Food Great food  Perfect Cat Food For Older Cats  Good for Feline UTI Palatable and healthy Healthy   They LOVE It  Wonderful food   perfect for allergic kitties Tastes great  Love Hot   Spicy  Bad price here  My favorite ra'
In [49]:
total_text=re.sub(' +',' ', total_text)
In [50]:
total_text[0:10000]
Out[50]:
'Good Quality Dog Food Great taffy Nice Taffy Great Just as good as the expensive brands Wonderful tasty taffy Healthy Dog Food The Best Hot Sauce in the World My cats LOVE this diet food better than their regular food My Cats Are Not Fans of the New Food fresh and greasy Love it GREAT SWEET CANDY Always fresh Delicious product Great Bargain for the Price The Best Hot Sauce in the World Great machine Best of the Instant Oatmeals Good Instant Great Irish oatmeal for those in a hurry satisfying Love Gluten Free Oatmeal GOOD WAY TO START THE DAY Wife s favorite Breakfast Why wouldn t you buy oatmeal from Mcanns Tastes great Good Hot Breakfast Great taste and convenience good Very good but next time I won t order the Variety Pack HOT And good Came back for more Roasts up a smooth brew Our guests love it Awesome Deal Awsome Kids in neighborhood loved us great deal Better price for this at Target great source of electrolytes Great for preventing cramps Taste is not so good How much would you pay for a bag of chocolate pretzels Great Gummi Best ever latice tart nothing special Good Taste great this is the best Delicious Great Natural Balance Lamb and Rice Great food Great for my dogs allergies Great for stomach problems Better life for you dog Great Food Great food for my my dog who has a sensitive stomach Great dog food Mmmmm Mmmmm good Great Dog Food Good healthy dog food Great dog food Great allergy sensitive dog food dogs love it Perfect for our English Bulldog with Allergies Taste wise it is a star item Great Support Loved these Tartlets The best My Idea of a Good Diet Food Delicious tea the best tea ever freah bright clean Wonderful Tea Great cookies Best everyday cookie So Far So Good Best Cat Food Great food Perfect Cat Food For Older Cats Good for Feline UTI Palatable and healthy Healthy They LOVE It Wonderful food perfect for allergic kitties Tastes great Love Hot Spicy Bad price here My favorite ramen Amazing to the last bite Great spicy flavor Great value and convenient ramen great flavor Tastes great but is cheaper locally Tastes awesome looks beautiful Happy Face Simply the BEST Excellent Product Life Saver Nice snack Good Licorice I love these Great for the kids Sweet with a nice kick Love the salsa awesome cornmeal GREAT marinade Awesome stuff tastes good Great flavor of Jell o Great Deal Great tasting sea salt WITH iodine tastes very fresh Simple but good Not the greatest tasting Not Bad Right size taste This stuff is sooooo good Best Stuff Ever Worked great Delicious Fluffy Soft Delicious and Sugary Sweet Great but not as good as it was back in the day as a teen EXCELLENT LEMON JUICE Great Product Handy Never paid that much Great product to help you sleep Perfect for gluten free chocolate chip cookies Make a fresh fruit tart light and beautiful not bad for instant healthy coffee It s ok I love it great taste and has health benefits Tastes Great Arrived in days Great for after lunch Nice little mints but pricey These mints are awesome Love these And reusable containers A huge hit at the office Love em they re great Love these fresh better than average more expensive than average Great For Fat Cats and Senior Citizens Best by the case More Hot Spicy than McCormick s Brand Ahmad Loose Imperial Blend Tea is great for the price Nice tea Best Ahmad Tea My favorite tea Best tea ever DELICIOUS Best Bloody Mary mixer The Best Love this tea Really Nice Taste High Quality But it gave my dog wicked gas Great tasting green tea and such a great deal OMG best chocolate jelly belly Excellent loose tea Good anytime hot tea Wonderful Best way to buy kcups delicious Super SuperFoods are Super easy Best Energy Shot For Me Great for Gluten free lifestyle Excellent but not perfect Good product Thanks for the review Scott great Awesome Sugar Great product weak packaging Excellent Excellent for G F Amazing Very tasty chips Excellent Taste it s fabulous but not from amazon Not mild enough for me lol Great Natural Energy Great Energy The best energy shot out there smooth and organic Fantastic natural energy Way better than Guayaki Doesn t taste that good but provides you the energy Favorite energy shot and all natural too natural energy boost Best energy shot I have ever tasted The Best Good Stuff Great energy drink without artificial ingredients Flavor getting better energy is great Fantastic WOW Very Dissapointed Very Good Coffee Very Tasty Excellent coffee Hot Hot and delicious ABSOLUTELY DELICIOUS Great gag gift arrived FAST No broken creamers Shipped great Better Packaging Perfect for work Yes this is real excellent coffee Does not taste very good Love Love Love These great for eating whole foods clean with veggie brush Absolutely LOVE IT Only good for ice Great for teething Wonderful idea difficult to clean I wasn t that impressed Love the Fresh Food Feeder Great Beans Good stuff excellent exactly what I expected These are the Best Love Love Love The product is great but the price is out of line Perfect great taste Excellent Everyday Olive Oil Love Weavers I am a fan Treat yourself to the best coffee Drinking it now love the latin america aroma GREAT SNACK Best Bar My New Granola Bar Another Husband Favorite Very Smooth Coffee Highly Recommended My favorite Good Coffee Greatest Oil since slice bread Best Ever Deliciously scrumptious This is really good stuff Porcini Mushrooms an excellent product Excellent flavor mostly large pieces The Best Good for the money not the highest quality but good for the price Fresh Whole perfect Fresh and Tasty Cat won t go near it Simply WIld Chick Brown RIce for Cats A Great All Around Mix Great mix Perfect mix for egg allergic Arrowhead Mills whole grain buttermilk Pancakes are easy Good for Egg Allergy Great Healthy Snack Sweet and Soothing A Fantastic Healthy Product Great product Excellent tea best roast ever Franch s is the best VERY GOOD Great taste and easy for a single guy Look elsewhere for your whole grains These are Famous for a reason Wow God I love these cookies Fresh Lightly Spiced Crunchy Kettle Chips Good Value Good Product glad to find them in oz size pretty good could be better Best chips ever Kettle potato chips Sweet onion Ridiculously Good Delicious I love these chips They are thick and crunchy Quite good Delicious WOW Best gluten free dairy free chips A unique flavor for fans of Thai food Honey Dijon leaves bad aftertaste NY Cheddar are pretty good Very good Excellent A delicious crisp chip with good flavor BEST BUY in BBQ Chips Love Them Best deal ever Excellent Thai flavored chip Best Kettle Chips Delicious as always Not quite the best One of Their Best Flavors Love these chips Highly addicitive chips These are AWESOME By far my favorite chips Good chips more cheese Pretty good tasting chip Best sour cream onion chip I ve had Great chips Fabulous Great Chip Excellent balance of taste crunchiness and moisture Very good chips at a great price Great chips Good and tangy the best chips ever I do not even like kettle chips and I love these You have to love sea salt and vinegar already amazing chips Best Chip Ever Tangy spicy and sweet oh my The best I ve had Excellent chip Delicious Love Kettle Chips Best unsalted chips Love Kettle Chips but not this flavor Crisp Delicious what else did you expect Great Value I have had better Jalapeno Kettle Chips Spicy but good boulder salt and malt vinegar chips are way better Lightly Salted Heavily Delicious Too Much Flavor Love at first bite Tongue puckering tang and crunch The Best Chips PERIOD Delicious Extra Crunchy Best Salt Vinegar Gourmet powerful Salt Vinegar chips Great deal Best Chips out there Great price but not as tangy as I expected Absotively Posilutely Delicious completely ripped off C H I P C H I P H O O R A Y B A C K Y A R D B A R B E C U E Not the Best THESE ARE VERY GOOD USED to be my favorite chips Not as good as the English sell Not so good delicious Kettle Chips Make Great Mouse Food great hot new flavor Favorite Kettle flavor and a great value Kettle Brand Potato Chips New York Cheddar My favorite flavor So much flavor your farts will smell like sweet onions Great Chip Awesome and delicious Good chips YUM If you want a snack have something REALLY good a good buy Good chips Awesome Great chips with very low sodium My favorite Kettle Chip Best salt vinegar chips out there Amazing Service Pretty tasty and decently spiced Great Tasting Chips Good deal but close expiration date GREAT TASTING CHIPS Buy These Eat These Be Happy Fantastic Sweet salty tangy the way a snack should be Some of the best chips anywhere Delicious Lightly salted yet tasty Crunch Wow Great strong flavor Best Chips Out There These chips tasted good awesome chips Delicious Tangy and delicious snack Best Chip Best Chips I ve Ever Tasted Love the smaller bags Ok but Miss Vickie s Are Better These chips are awesome if not best but GREAT DEAL What a great tea at this price delicious Great well balanced Earl Grey Best Earl Grey ever favorite Earl Grey tea Delicious The best A HUGE Success Do not taste from bottle Mix with vanilla for true flavor The oldest soft drink is still the best Caramel flavor excellent for baking and toppings tips for using agave too Great buy excellent sweetner Good but container could be better Great stuff Healthy Sweetener Great way replacing the sugar Great substitute sweetener The Best Healthy Stuff Sweet success great product Best price on agave nectar that I ve found How this could be good Best tea I ever had Who needs salsa when chips taste this good Delicious Organic yummy chips what more can you ask for These are the best widely available bbq chips My favorite chips from Kettle Amazing Taste Best Chip Ever Plocky s Sweet Smokey Chipotle Whole Grain Tortill The best tortilla chips I have ever eaten Such an excellent chip Great Huge fan of these chips Plocky s tortilla chips tasty and healthy Tasty but make sure you have gum delicious and healthy Unique schrumshist and tasty Tortilla Chips Best kept secret Delicious'
In [51]:
# =======================================================
# Negative Zusammenfassungen analysieren & Wordcloud
# =======================================================
In [52]:
wordcloud=WordCloud(width=1000,height=500,stopwords=stopwords).generate(total_text)
plt.figure(figsize=(15,5))
plt.imshow(wordcloud)
plt.axis('off')
Out[52]:
(np.float64(-0.5), np.float64(999.5), np.float64(499.5), np.float64(-0.5))
No description has been provided for this image
In [53]:
data_negative=data[data['Polarity']<0]
In [54]:
total_text2=(' '.join(data_negative['Summary']))
In [55]:
total_text2=re.sub('[^a-zA-Z]',' ',total_text2)
In [56]:
total_text2=re.sub(' +',' ', total_text2)
In [57]:
wordcloud2=WordCloud(width=1000,height=500,stopwords=stopwords).generate(total_text2)
plt.figure(figsize=(15,5))
plt.imshow(wordcloud2)
plt.axis('off')
Out[57]:
(np.float64(-0.5), np.float64(999.5), np.float64(499.5), np.float64(-0.5))
No description has been provided for this image
In [58]:
df.head()
Out[58]:
Id ProductId UserId ProfileName HelpfulnessNumerator HelpfulnessDenominator Score Time Summary Text
0 1 B001E4KFG0 A3SGXH7AUHU8GW delmartian 1 1 5 1303862400 Good Quality Dog Food I have bought several of the Vitality canned d...
1 2 B00813GRG4 A1D87F6ZCVE5NK dll pa 0 0 1 1346976000 Not as Advertised Product arrived labeled as Jumbo Salted Peanut...
2 3 B000LQOCH0 ABXLMWJIXXAIN Natalia Corres "Natalia Corres" 1 1 4 1219017600 "Delight" says it all This is a confection that has been around a fe...
3 4 B000UA0QIQ A395BORC6FGVXV Karl 3 3 2 1307923200 Cough Medicine If you are looking for the secret ingredient i...
4 5 B006K2ZZ7K A1UQRSCLF8GW1T Michael D. Bigham "M. Wassir" 0 0 5 1350777600 Great taffy Great taffy at a great price. There was a wid...
In [59]:
# =======================================================
# Nutzeranalyse – Top-User nach Bewertungen
# =======================================================
In [60]:
df['UserId'].nunique()
Out[60]:
256059
In [61]:
raw=df.groupby('UserId').agg({'Summary':'count','Text':'count','Score':'mean','ProductId':'count'}).sort_values(by='Text',ascending=False)
In [62]:
raw
Out[62]:
Summary Text Score ProductId
UserId
A3OXHLG6DIBRW8 448 448 4.535714 448
A1YUL9PCJR3JTY 421 421 4.494062 421
AY12DBB0U420B 389 389 4.647815 389
A281NPSIMI1C2R 365 365 4.841096 365
A1Z54EM24Y40LL 256 256 4.453125 256
... ... ... ... ...
AZZQLMNX239VT 1 1 5.000000 1
AZZP14UZ813US 1 1 5.000000 1
AZZOMF6HZYFL7 1 1 2.000000 1
AZZV61COVM8CA 1 1 5.000000 1
AZZUQYE2C1LNI 1 1 4.000000 1

256059 rows × 4 columns

In [63]:
raw.columns=['Number_of_summaries','num_text','Avg_Score','no_of_products_purchased']
raw
Out[63]:
Number_of_summaries num_text Avg_Score no_of_products_purchased
UserId
A3OXHLG6DIBRW8 448 448 4.535714 448
A1YUL9PCJR3JTY 421 421 4.494062 421
AY12DBB0U420B 389 389 4.647815 389
A281NPSIMI1C2R 365 365 4.841096 365
A1Z54EM24Y40LL 256 256 4.453125 256
... ... ... ... ...
AZZQLMNX239VT 1 1 5.000000 1
AZZP14UZ813US 1 1 5.000000 1
AZZOMF6HZYFL7 1 1 2.000000 1
AZZV61COVM8CA 1 1 5.000000 1
AZZUQYE2C1LNI 1 1 4.000000 1

256059 rows × 4 columns

In [64]:
# =======================================================
# Visualisierung der 10 aktivsten Nutzer
# =======================================================
In [65]:
user_10=raw.index[0:10]
In [66]:
number_10=raw['no_of_products_purchased'][0:10]
In [67]:
plt.bar(user_10,number_10,label='most recommended users')
plt.xlabel('User_Id')
plt.ylabel('Number of products purchased')
plt.xticks(rotation='vertical')
Out[67]:
([0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
 [Text(0, 0, 'A3OXHLG6DIBRW8'),
  Text(1, 0, 'A1YUL9PCJR3JTY'),
  Text(2, 0, 'AY12DBB0U420B'),
  Text(3, 0, 'A281NPSIMI1C2R'),
  Text(4, 0, 'A1Z54EM24Y40LL'),
  Text(5, 0, 'A1TMAVN4CEM8U8'),
  Text(6, 0, 'A2MUGFV2TDQ47K'),
  Text(7, 0, 'A3TVZM3ZIXG8YW'),
  Text(8, 0, 'A3PJZ8TU8FDQ1K'),
  Text(9, 0, 'AQQLWCMRNDFGI')])
No description has been provided for this image
In [68]:
# =======================================================
# Stichprobe und Datenbereinigung
# =======================================================
In [69]:
df.sample(n=2000)
Out[69]:
Id ProductId UserId ProfileName HelpfulnessNumerator HelpfulnessDenominator Score Time Summary Text
546982 546983 B000AYGXKC A309XRV5MKT6ZS Happy Customer 0 0 5 1246233600 Very Good Sugar Free Cookies Murray's Sugar Free Peanut Butter cookies are ...
125594 125595 B0029NJ16K A2GGBMCEGBMYQ5 JOI "*PUMKIN*" 0 0 5 1231027200 THE BEST EVER THIS IS THE BEST DOGIE FOOD EVER, MY POODLE OD...
201808 201809 B001EQ59AK AEIALC4KRI314 Dr. Cheryl Ann Dusty "Cherokee Angel" 2 2 5 1314921600 A must for every cook This product is a must have amoung your kitche...
5908 5909 B001DIM8K8 AV5PTOA4JL8TG Sissy Jollie "Practical Gourmet" 3 3 5 1237680000 Best Oatmeal Ever! This is the only oatmeal that my family will e...
11947 11948 B001CD1VI4 A1HWCQGV41JRYV M. Hudson 0 1 5 1254096000 Yummy! I love these cookies! They are just perfect f...
... ... ... ... ... ... ... ... ... ... ...
284003 284004 B000PKMMQU A1Y8PGTWFL5D0V Customer 3 3 1 1324684800 No taste Very unhappy with this purchase. There was es...
450964 450965 B001OCKIBY A7T1NIWZHTC4P Seven Kitties "7kitties" 0 0 4 1245801600 Yummy but.... The packaging on the box compares this to a tr...
175719 175720 B003EJ9KLO A3I0Z04OY8PFNF Missi 0 0 5 1285027200 Love this mix! This is an easy-to-use, healthy pancake mix. ...
264918 264919 B005O8BJU8 ARA0UAUUO4B6X Pauline N. Borderies "Pauline" 0 0 5 1349481600 We love Ella's Kitchen The brekkie series are great because they are ...
179439 179440 B000CQC050 AP1PTF85IH674 Wendi A. Pilling 3 3 5 1276646400 Best Peppermint Tea We Have Found My husband has IBS and peppermint tea is essen...

2000 rows × 10 columns

In [70]:
final=df[0:2000]
In [71]:
final.head()
Out[71]:
Id ProductId UserId ProfileName HelpfulnessNumerator HelpfulnessDenominator Score Time Summary Text
0 1 B001E4KFG0 A3SGXH7AUHU8GW delmartian 1 1 5 1303862400 Good Quality Dog Food I have bought several of the Vitality canned d...
1 2 B00813GRG4 A1D87F6ZCVE5NK dll pa 0 0 1 1346976000 Not as Advertised Product arrived labeled as Jumbo Salted Peanut...
2 3 B000LQOCH0 ABXLMWJIXXAIN Natalia Corres "Natalia Corres" 1 1 4 1219017600 "Delight" says it all This is a confection that has been around a fe...
3 4 B000UA0QIQ A395BORC6FGVXV Karl 3 3 2 1307923200 Cough Medicine If you are looking for the secret ingredient i...
4 5 B006K2ZZ7K A1UQRSCLF8GW1T Michael D. Bigham "M. Wassir" 0 0 5 1350777600 Great taffy Great taffy at a great price. There was a wid...
In [72]:
final.isnull().sum()
Out[72]:
Id                        0
ProductId                 0
UserId                    0
ProfileName               0
HelpfulnessNumerator      0
HelpfulnessDenominator    0
Score                     0
Time                      0
Summary                   0
Text                      0
dtype: int64
In [73]:
final.duplicated().sum()
Out[73]:
np.int64(0)
In [74]:
final['Text'][0].split(' ')
Out[74]:
['I',
 'have',
 'bought',
 'several',
 'of',
 'the',
 'Vitality',
 'canned',
 'dog',
 'food',
 'products',
 'and',
 'have',
 'found',
 'them',
 'all',
 'to',
 'be',
 'of',
 'good',
 'quality.',
 'The',
 'product',
 'looks',
 'more',
 'like',
 'a',
 'stew',
 'than',
 'a',
 'processed',
 'meat',
 'and',
 'it',
 'smells',
 'better.',
 'My',
 'Labrador',
 'is',
 'finicky',
 'and',
 'she',
 'appreciates',
 'this',
 'product',
 'better',
 'than',
 '',
 'most.']
In [75]:
# =======================================================
# Berechnung der Textlänge in Wörtern
# =======================================================
In [76]:
def calc_len(text):
    return len(text.split(' '))
In [77]:
final['Text_length']=final['Text'].apply(calc_len)
In [78]:
!pip install plotly
Requirement already satisfied: plotly in c:\users\miso\anaconda3\lib\site-packages (5.24.1)
Requirement already satisfied: tenacity>=6.2.0 in c:\users\miso\anaconda3\lib\site-packages (from plotly) (9.0.0)
Requirement already satisfied: packaging in c:\users\miso\anaconda3\lib\site-packages (from plotly) (24.2)
In [79]:
import plotly.express as px
In [80]:
# =======================================================
# Plotly Boxplot zur Verteilung der Textlängen
# =======================================================
In [81]:
px.box(final,y='Text_length')
In [82]:
sns.countplot(final['Score'])
Out[82]:
<Axes: ylabel='count'>
No description has been provided for this image
In [83]:
final.head()
Out[83]:
Id ProductId UserId ProfileName HelpfulnessNumerator HelpfulnessDenominator Score Time Summary Text Text_length
0 1 B001E4KFG0 A3SGXH7AUHU8GW delmartian 1 1 5 1303862400 Good Quality Dog Food I have bought several of the Vitality canned d... 49
1 2 B00813GRG4 A1D87F6ZCVE5NK dll pa 0 0 1 1346976000 Not as Advertised Product arrived labeled as Jumbo Salted Peanut... 31
2 3 B000LQOCH0 ABXLMWJIXXAIN Natalia Corres "Natalia Corres" 1 1 4 1219017600 "Delight" says it all This is a confection that has been around a fe... 99
3 4 B000UA0QIQ A395BORC6FGVXV Karl 3 3 2 1307923200 Cough Medicine If you are looking for the secret ingredient i... 43
4 5 B006K2ZZ7K A1UQRSCLF8GW1T Michael D. Bigham "M. Wassir" 0 0 5 1350777600 Great taffy Great taffy at a great price. There was a wid... 30
In [84]:
# =======================================================
# Textvorverarbeitung – Kleinschreibung & Zeichen filtern
# =======================================================
In [85]:
final['Text']=final['Text'].str.lower()
In [86]:
final['Text'][164]
Out[86]:
'seriously this product was as tasteless as they come. there are much better tasting products out there but at 100 calories its better than a special k bar or cookie snack pack. you just have to season it or combine it with something else to share the flavor.'
In [87]:
import re
re.sub('[^a-zA-Z]',' ',final['Text'][164])
Out[87]:
'seriously this product was as tasteless as they come  there are much better tasting products out there but at     calories its better than a special k bar or cookie snack pack  you just have to season it or combine it with something else to share the flavor '
In [88]:
# =======================================================
# Entfernen von Satzzeichen mit Python string.punctuation
# =======================================================
In [89]:
punctuations='''!()[]{}:;'"\<>.?/@#$%*_-'''

data=final['Text'][164]
no_punc=''
for char in data:
    if char not in punctuations:
        no_punc=no_punc + char
no_punc
Out[89]:
'seriously this product was as tasteless as they come there are much better tasting products out there but at 100 calories its better than a special k bar or cookie snack pack you just have to season it or combine it with something else to share the flavor'
In [90]:
import string
punctuations = string.punctuation

def remove_punc(review):
    no_punc=''
    for char in review:
        if char not in punctuations:
            no_punc=no_punc + char
    return no_punc
In [91]:
final['Text']=final['Text'].apply(remove_punc)
In [92]:
final.head()
Out[92]:
Id ProductId UserId ProfileName HelpfulnessNumerator HelpfulnessDenominator Score Time Summary Text Text_length
0 1 B001E4KFG0 A3SGXH7AUHU8GW delmartian 1 1 5 1303862400 Good Quality Dog Food i have bought several of the vitality canned d... 49
1 2 B00813GRG4 A1D87F6ZCVE5NK dll pa 0 0 1 1346976000 Not as Advertised product arrived labeled as jumbo salted peanut... 31
2 3 B000LQOCH0 ABXLMWJIXXAIN Natalia Corres "Natalia Corres" 1 1 4 1219017600 "Delight" says it all this is a confection that has been around a fe... 99
3 4 B000UA0QIQ A395BORC6FGVXV Karl 3 3 2 1307923200 Cough Medicine if you are looking for the secret ingredient i... 43
4 5 B006K2ZZ7K A1UQRSCLF8GW1T Michael D. Bigham "M. Wassir" 0 0 5 1350777600 Great taffy great taffy at a great price there was a wide... 30
In [93]:
data=final['Text'][164]
data
Out[93]:
'seriously this product was as tasteless as they come there are much better tasting products out there but at 100 calories its better than a special k bar or cookie snack pack you just have to season it or combine it with something else to share the flavor'
In [94]:
!pip install nltk
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
Requirement already satisfied: nltk in c:\users\miso\anaconda3\lib\site-packages (3.9.1)
Requirement already satisfied: click in c:\users\miso\anaconda3\lib\site-packages (from nltk) (8.1.8)
Requirement already satisfied: joblib in c:\users\miso\anaconda3\lib\site-packages (from nltk) (1.4.2)
Requirement already satisfied: regex>=2021.8.3 in c:\users\miso\anaconda3\lib\site-packages (from nltk) (2024.11.6)
Requirement already satisfied: tqdm in c:\users\miso\anaconda3\lib\site-packages (from nltk) (4.67.1)
Requirement already satisfied: colorama in c:\users\miso\anaconda3\lib\site-packages (from click->nltk) (0.4.6)
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Miso\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
Out[94]:
True
In [95]:
# =======================================================
# Entfernen von Stopwörtern mit nltk
# =======================================================
In [96]:
re=[word for word in data.split(' ') if word not in set(stopwords.words('english'))]

str=''
for wd in re:
    str=str+wd
    str=str+' '
str
Out[96]:
'seriously product tasteless come much better tasting products 100 calories better special k bar cookie snack pack season combine something else share flavor '
In [97]:
def remove_stopwords(review):
    return ' '.join([word for word in review.split(' ') if word not in set(stopwords.words('english'))])
In [98]:
final['Text']=final['Text'].apply(remove_stopwords)
In [99]:
final['Text'][45]
Out[99]:
'seems little wholesome supermarket brands somewhat mushy doesnt quite much flavor either  didnt pass muster kids probably wont buy'
In [100]:
final['Text'].str.contains('http?').sum()
Out[100]:
np.int64(35)
In [117]:
pd.set_option('display.max_rows',2000)
final['Text'].str.contains('http?');
In [102]:
review=final['Text'][21]
review
Out[102]:
'bought husband currently overseas loves apparently staff likes alsobr generous amounts twizzlers 16ounce bag well worth price hrefhttpwwwamazoncomgpproductb001gvisjmtwizzlers strawberry 16ounce bags pack 6a'
In [103]:
import re
In [104]:
url_pattern = re.compile(r'href\S*|http\S+')
url_pattern.sub(r'',review)
Out[104]:
'bought husband currently overseas loves apparently staff likes alsobr generous amounts twizzlers 16ounce bag well worth price  strawberry 16ounce bags pack 6a'
In [105]:
# =======================================================
# Entfernen von URLs in den Texten
# =======================================================
In [106]:
def remove_urls(review):
    url_pattern=re.compile(r'href|http.\w+')
    return url_pattern.sub(r'',review)
In [107]:
final['Text']=final['Text'].apply(remove_urls)
In [108]:
final['Text'][21]
Out[108]:
'bought husband currently overseas loves apparently staff likes alsobr generous amounts twizzlers 16ounce bag well worth price  strawberry 16ounce bags pack 6a'
In [109]:
final['Text'].str.contains('http?').sum()
Out[109]:
np.int64(0)
In [110]:
final['Text'][34].replace('br','')
Out[110]:
'instant oatmeal become soggy minute water hits bowl mccanns instant oatmeal holds texture excellent flavor good time mccanns regular oat meal excellent may take bit longer prepare time morning best instant and ive ever eaten close second noninstant variety  mccanns instant irish oatmeal variety pack regular apples  cinnamon maple  own sugar 10count boxes pack 6'
In [111]:
import warnings
from warnings import filterwarnings
filterwarnings('ignore')
In [112]:
for i in range(len(final['Text'])):
    final['Text'][i]=final['Text'][i].replace('br','')
In [113]:
comment_words=' '.join(final['Text'])
In [114]:
# =======================================================
# Wordcloud für bereinigten Text
# =======================================================
In [115]:
stopwords=set(STOPWORDS)
In [116]:
wordcloud=WordCloud(width=800,height=800,stopwords=stopwords).generate(comment_words)
plt.figure(figsize=(8,8))
plt.imshow(wordcloud)
plt.axis('off')
Out[116]:
(np.float64(-0.5), np.float64(799.5), np.float64(799.5), np.float64(-0.5))
No description has been provided for this image